summaryrefslogtreecommitdiffstats
path: root/xlators/mgmt/glusterd/src
diff options
context:
space:
mode:
authorM. Mohan Kumar <mohan@in.ibm.com>2013-11-13 22:44:42 +0530
committerAnand Avati <avati@redhat.com>2013-11-13 11:38:42 -0800
commit48c40e1a42efe1b59126406084821947d139dd0e (patch)
tree74959ecda9b9bd56c85e0e32991c11c06b022296 /xlators/mgmt/glusterd/src
parent15a8ecd9b3eedf80881bd3dba81f16b7d2cb7c97 (diff)
bd: posix/multi-brick support to BD xlator
Current BD xlator (block backend) has a few limitations such as * Creation of directories not supported * Supports only single brick * Does not use extended attributes (and client gfid) like posix xlator * Creation of special files (symbolic links, device nodes etc) not supported Basic limitation of not allowing directory creation is blocking oVirt/VDSM to consume BD xlator as part of Gluster domain since VDSM creates multi-level directories when GlusterFS is used as storage backend for storing VM images. To overcome these limitations a new BD xlator with following improvements is suggested. * New hybrid BD xlator that handles both regular files and block device files * The volume will have both POSIX and BD bricks. Regular files are created on POSIX bricks, block devices are created on the BD brick (VG) * BD xlator leverages exiting POSIX xlator for most POSIX calls and hence sits above the POSIX xlator * Block device file is differentiated from regular file by an extended attribute * The xattr 'user.glusterfs.bd' (BD_XATTR) plays a role in mapping a posix file to Logical Volume (LV). * When a client sends a request to set BD_XATTR on a posix file, a new LV is created and mapped to posix file. So every block device will have a representative file in POSIX brick with 'user.glusterfs.bd' (BD_XATTR) set. * Here after all operations on this file results in LV related operations. For example opening a file that has BD_XATTR set results in opening the LV block device, reading results in reading the corresponding LV block device. When BD xlator gets request to set BD_XATTR via setxattr call, it creates a LV and information about this LV is placed in the xattr of the posix file. xattr "user.glusterfs.bd" used to identify that posix file is mapped to BD. Usage: Server side: [root@host1 ~]# gluster volume create bdvol host1:/storage/vg1_info?vg1 host2:/storage/vg2_info?vg2 It creates a distributed gluster volume 'bdvol' with Volume Group vg1 using posix brick /storage/vg1_info in host1 and Volume Group vg2 using /storage/vg2_info in host2. [root@host1 ~]# gluster volume start bdvol Client side: [root@node ~]# mount -t glusterfs host1:/bdvol /media [root@node ~]# touch /media/posix It creates regular posix file 'posix' in either host1:/vg1 or host2:/vg2 brick [root@node ~]# mkdir /media/image [root@node ~]# touch /media/image/lv1 It also creates regular posix file 'lv1' in either host1:/vg1 or host2:/vg2 brick [root@node ~]# setfattr -n "user.glusterfs.bd" -v "lv" /media/image/lv1 [root@node ~]# Above setxattr results in creating a new LV in corresponding brick's VG and it sets 'user.glusterfs.bd' with value 'lv:<default-extent-size' [root@node ~]# truncate -s5G /media/image/lv1 It results in resizig LV 'lv1'to 5G New BD xlator code is placed in xlators/storage/bd directory. Also add volume-uuid to the VG so that same VG can't be used for other bricks/volumes. After deleting a gluster volume, one has to manually remove the associated tag using vgchange <vg-name> --deltag <trusted.glusterfs.volume-id:<volume-id>> Changes from previous version V5: * Removed support for delayed deleting of LVs Changes from previous version V4: * Consolidated the patches * Removed usage of BD_XATTR_SIZE and consolidated it in BD_XATTR. Changes from previous version V3: * Added support in FUSE to support full/linked clone * Added support to merge snapshots and provide information about origin * bd_map xlator removed * iatt structure used in inode_ctx. iatt is cached and updated during fsync/flush * aio support * Type and capabilities of volume are exported through getxattr Changes from version 2: * Used inode_context for caching BD size and to check if loc/fd is BD or not. * Added GlusterFS server offloaded copy and snapshot through setfattr FOP. As part of this libgfapi is modified. * BD xlator supports stripe * During unlinking if a LV file is already opened, its added to delete list and bd_del_thread tries to delete from this list when a last reference to that file is closed. Changes from previous version: * gfid is used as name of LV * ? is used to specify VG name for creating BD volume in volume create, add-brick. gluster volume create volname host:/path?vg * open-behind issue is fixed * A replicate brick can be added dynamically and LVs from source brick are replicated to destination brick * A distribute brick can be added dynamically and rebalance operation distributes existing LVs/files to the new brick * Thin provisioning support added. * bd_map xlator support retained * setfattr -n user.glusterfs.bd -v "lv" creates a regular LV and setfattr -n user.glusterfs.bd -v "thin" creates thin LV * Capability and backend information added to gluster volume info (and --xml) so that management tools can exploit BD xlator. * tracing support for bd xlator added TODO: * Add support to display snapshots for a given LV * Display posix filename for list-origin instead of gfid Change-Id: I00d32dfbab3b7c806e0841515c86c3aa519332f2 BUG: 1028672 Signed-off-by: M. Mohan Kumar <mohan@in.ibm.com> Reviewed-on: http://review.gluster.org/4809 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Anand Avati <avati@redhat.com>
Diffstat (limited to 'xlators/mgmt/glusterd/src')
-rw-r--r--xlators/mgmt/glusterd/src/Makefile.am3
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-brick-ops.c34
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handler.c47
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.h3
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.c20
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.h2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c81
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c21
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.h1
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-ops.c149
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h7
11 files changed, 367 insertions, 1 deletions
diff --git a/xlators/mgmt/glusterd/src/Makefile.am b/xlators/mgmt/glusterd/src/Makefile.am
index 17767d7ca92..a6f49ae01b1 100644
--- a/xlators/mgmt/glusterd/src/Makefile.am
+++ b/xlators/mgmt/glusterd/src/Makefile.am
@@ -2,6 +2,9 @@ xlator_LTLIBRARIES = glusterd.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/mgmt
glusterd_la_CPPFLAGS = $(AM_CPPFLAGS) "-DFILTERDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/filter\""
glusterd_la_LDFLAGS = -module -avoid-version
+if ENABLE_BD_XLATOR
+glusterd_la_LDFLAGS += -llvm2app
+endif
glusterd_la_SOURCES = glusterd.c glusterd-handler.c glusterd-sm.c \
glusterd-op-sm.c glusterd-utils.c glusterd-rpc-ops.c \
glusterd-store.c glusterd-handshake.c glusterd-pmap.c \
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
index e24edb2d594..cc425353516 100644
--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
@@ -1025,6 +1025,8 @@ glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count,
glusterd_brickinfo_t *brickinfo = NULL;
glusterd_gsync_status_temp_t param = {0, };
gf_boolean_t restart_needed = 0;
+ char msg[1024] __attribute__((unused)) = {0, };
+ int caps = 0;
GF_ASSERT (volinfo);
@@ -1105,12 +1107,30 @@ glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count,
if (count)
brick = strtok_r (brick_list+1, " \n", &saveptr);
+#ifdef HAVE_BD_XLATOR
+ if (brickinfo->vg[0])
+ caps = CAPS_BD | CAPS_THIN;
+#endif
while (i <= count) {
ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo,
&brickinfo);
if (ret)
goto out;
+#ifdef HAVE_BD_XLATOR
+ /* Check for VG/thin pool if its BD volume */
+ if (brickinfo->vg[0]) {
+ ret = glusterd_is_valid_vg (brickinfo, 0, msg);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_CRITICAL, "%s", msg);
+ goto out;
+ }
+ /* if anyone of the brick does not have thin support,
+ disable it for entire volume */
+ caps &= brickinfo->caps;
+ } else
+ caps = 0;
+#endif
if (uuid_is_null (brickinfo->uuid)) {
ret = glusterd_resolve_brick (brickinfo);
@@ -1147,7 +1167,7 @@ glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count,
dict_foreach (volinfo->gsync_slaves,
_glusterd_restart_gsync_session, &param);
}
-
+ volinfo->caps = caps;
out:
GF_FREE (free_ptr1);
GF_FREE (free_ptr2);
@@ -1321,6 +1341,18 @@ glusterd_op_stage_add_brick (dict_t *dict, char **op_errstr)
}
if (!uuid_compare (brickinfo->uuid, MY_UUID)) {
+#ifdef HAVE_BD_XLATOR
+ if (brickinfo->vg[0]) {
+ ret = glusterd_is_valid_vg (brickinfo, 1, msg);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "%s",
+ msg);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+ }
+#endif
+
ret = glusterd_validate_and_create_brickpath (brickinfo,
volinfo->volume_id,
op_errstr, is_force);
diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
index e545fc2120d..181b8fcf1a9 100644
--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
+++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
@@ -50,6 +50,10 @@
#include "globals.h"
#include "glusterd-syncop.h"
+#ifdef HAVE_BD_XLATOR
+#include <lvm2app.h>
+#endif
+
int glusterd_big_locked_notify (struct rpc_clnt *rpc, void *mydata,
rpc_clnt_event_t event,
void *data, rpc_clnt_notify_t notify_fn)
@@ -395,6 +399,39 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo,
if (ret)
goto out;
+#ifdef HAVE_BD_XLATOR
+ if (volinfo->caps) {
+ snprintf (key, 256, "volume%d.xlator0", count);
+ buf = GF_MALLOC (256, gf_common_mt_char);
+ if (!buf) {
+ ret = ENOMEM;
+ goto out;
+ }
+ if (volinfo->caps & CAPS_BD)
+ snprintf (buf, 256, "BD");
+ ret = dict_set_dynstr (volumes, key, buf);
+ if (ret) {
+ GF_FREE (buf);
+ goto out;
+ }
+
+ if (volinfo->caps & CAPS_THIN) {
+ snprintf (key, 256, "volume%d.xlator0.caps0", count);
+ buf = GF_MALLOC (256, gf_common_mt_char);
+ if (!buf) {
+ ret = ENOMEM;
+ goto out;
+ }
+ snprintf (buf, 256, "thin");
+ ret = dict_set_dynstr (volumes, key, buf);
+ if (ret) {
+ GF_FREE (buf);
+ goto out;
+ }
+ }
+ }
+#endif
+
list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
char brick[1024] = {0,};
char brick_uuid[64] = {0,};
@@ -414,6 +451,16 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo,
if (ret)
goto out;
+#ifdef HAVE_BD_XLATOR
+ if (volinfo->caps & CAPS_BD) {
+ snprintf (key, 256, "volume%d.vg%d", count, i);
+ snprintf (brick, 1024, "%s", brickinfo->vg);
+ buf = gf_strdup (brick);
+ ret = dict_set_dynstr (volumes, key, buf);
+ if (ret)
+ goto out;
+ }
+#endif
i++;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.h b/xlators/mgmt/glusterd/src/glusterd-op-sm.h
index 5fe05069de8..498c869a025 100644
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.h
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.h
@@ -286,4 +286,7 @@ glusterd_check_gsync_running (glusterd_volinfo_t *volinfo, gf_boolean_t *flag);
int
glusterd_defrag_volume_node_rsp (dict_t *req_dict, dict_t *rsp_dict,
dict_t *op_ctx);
+int
+glusterd_is_valid_vg (glusterd_brickinfo_t *brick, int check_tag, char *msg);
+
#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c
index 8b658ae3037..5902589f4b6 100644
--- a/xlators/mgmt/glusterd/src/glusterd-store.c
+++ b/xlators/mgmt/glusterd/src/glusterd-store.c
@@ -241,6 +241,11 @@ glusterd_store_brickinfo_write (int fd, glusterd_brickinfo_t *brickinfo)
if (ret)
goto out;
+ if (!brickinfo->vg[0])
+ goto out;
+
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_VGNAME,
+ brickinfo->vg);
out:
gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
return ret;
@@ -581,6 +586,13 @@ glusterd_volume_exclude_options_write (int fd, glusterd_volinfo_t *volinfo)
buf);
if (ret)
goto out;
+ if (volinfo->caps) {
+ snprintf (buf, sizeof (buf), "%d", volinfo->caps);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_CAPS,
+ buf);
+ if (ret)
+ goto out;
+ }
out:
if (ret)
@@ -1538,6 +1550,11 @@ glusterd_store_retrieve_bricks (glusterd_volinfo_t *volinfo)
} else if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED,
strlen (GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED))) {
gf_string2int (value, &brickinfo->decommissioned);
+ } else if (!strncmp (key,
+ GLUSTERD_STORE_KEY_BRICK_VGNAME,
+ strlen (GLUSTERD_STORE_KEY_BRICK_VGNAME))) {
+ strncpy (brickinfo->vg, value,
+ sizeof (brickinfo->vg));
} else {
gf_log ("", GF_LOG_ERROR, "Unknown key: %s",
key);
@@ -1856,6 +1873,9 @@ glusterd_store_retrieve_volume (char *volname)
} else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION,
strlen (GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION))) {
volinfo->client_op_version = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_CAPS,
+ strlen (GLUSTERD_STORE_KEY_VOL_CAPS))) {
+ volinfo->caps = atoi (value);
} else {
if (is_key_glusterd_hooks_friendly (key)) {
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h
index facb964fa72..ce1f766b1de 100644
--- a/xlators/mgmt/glusterd/src/glusterd-store.h
+++ b/xlators/mgmt/glusterd/src/glusterd-store.h
@@ -64,11 +64,13 @@ typedef enum glusterd_store_ver_ac_{
#define GLUSTERD_STORE_KEY_BRICK_PORT "listen-port"
#define GLUSTERD_STORE_KEY_BRICK_RDMA_PORT "rdma.listen-port"
#define GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED "decommissioned"
+#define GLUSTERD_STORE_KEY_BRICK_VGNAME "vg"
#define GLUSTERD_STORE_KEY_PEER_UUID "uuid"
#define GLUSTERD_STORE_KEY_PEER_HOSTNAME "hostname"
#define GLUSTERD_STORE_KEY_PEER_STATE "state"
+#define GLUSTERD_STORE_KEY_VOL_CAPS "caps"
#define glusterd_for_each_entry(entry, dir) \
do {\
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index d4f33f2cefc..f0445cf0b72 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -49,6 +49,11 @@
#include <unistd.h>
#include <fnmatch.h>
#include <sys/statvfs.h>
+#include <ifaddrs.h>
+#ifdef HAVE_BD_XLATOR
+#include <lvm2app.h>
+#endif
+
#ifdef GF_LINUX_HOST_OS
#include <mntent.h>
@@ -622,6 +627,7 @@ glusterd_brickinfo_new_from_brick (char *brick,
char *path = NULL;
char *tmp_host = NULL;
char *tmp_path = NULL;
+ char *vg = NULL;
GF_ASSERT (brick);
GF_ASSERT (brickinfo);
@@ -640,6 +646,17 @@ glusterd_brickinfo_new_from_brick (char *brick,
if (ret)
goto out;
+#ifdef HAVE_BD_XLATOR
+ vg = strchr (path, '?');
+ /* ? is used as a delimiter for vg */
+ if (vg) {
+ strncpy (new_brickinfo->vg, vg + 1, PATH_MAX - 1);
+ *vg = '\0';
+ }
+ new_brickinfo->caps = CAPS_BD;
+#else
+ vg = NULL; /* Avoid compiler warnings when BD not enabled */
+#endif
ret = gf_canonicalize_path (path);
if (ret)
goto out;
@@ -743,6 +760,62 @@ out:
return available;
}
+#ifdef HAVE_BD_XLATOR
+/*
+ * Sets the tag of the format "trusted.glusterfs.volume-id:<uuid>" in
+ * the brick VG. It is used to avoid using same VG for another brick.
+ * @volume-id - gfid, @brick - brick info, @msg - Error message returned
+ * to the caller
+ */
+int
+glusterd_bd_set_vg_tag (unsigned char *volume_id, glusterd_brickinfo_t *brick,
+ char *msg, int msg_size)
+{
+ lvm_t handle = NULL;
+ vg_t vg = NULL;
+ char *uuid = NULL;
+ int ret = -1;
+
+ gf_asprintf (&uuid, "%s:%s", GF_XATTR_VOL_ID_KEY,
+ uuid_utoa (volume_id));
+ if (!uuid) {
+ snprintf (msg, sizeof(*msg), "Could not allocate memory "
+ "for tag");
+ return -1;
+ }
+
+ handle = lvm_init (NULL);
+ if (!handle) {
+ snprintf (msg, sizeof(*msg), "lvm_init failed");
+ goto out;
+ }
+
+ vg = lvm_vg_open (handle, brick->vg, "w", 0);
+ if (!vg) {
+ snprintf (msg, sizeof(*msg), "Could not open VG %s",
+ brick->vg);
+ goto out;
+ }
+
+ if (lvm_vg_add_tag (vg, uuid) < 0) {
+ snprintf (msg, sizeof(*msg), "Could not set tag %s for "
+ "VG %s", uuid, brick->vg);
+ goto out;
+ }
+ lvm_vg_write (vg);
+ ret = 0;
+out:
+ GF_FREE (uuid);
+
+ if (vg)
+ lvm_vg_close (vg);
+ if (handle)
+ lvm_quit (handle);
+
+ return ret;
+}
+#endif
+
int
glusterd_validate_and_create_brickpath (glusterd_brickinfo_t *brickinfo,
uuid_t volume_id, char **op_errstr,
@@ -825,6 +898,14 @@ glusterd_validate_and_create_brickpath (glusterd_brickinfo_t *brickinfo,
}
}
+#ifdef HAVE_BD_XLATOR
+ if (brickinfo->vg[0]) {
+ ret = glusterd_bd_set_vg_tag (volume_id, brickinfo, msg,
+ sizeof(msg));
+ if (ret)
+ goto out;
+ }
+#endif
ret = glusterd_check_and_set_brick_xattr (brickinfo->hostname,
brickinfo->path, volume_id,
op_errstr, is_force);
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
index 6bf14bc3daa..51fba4da343 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
@@ -594,6 +594,8 @@ get_server_xlator (char *xlator)
subvol = GF_XLATOR_MARKER;
if (strcmp (xlator, "io-stats") == 0)
subvol = GF_XLATOR_IO_STATS;
+ if (strcmp (xlator, "bd") == 0)
+ subvol = GF_XLATOR_BD;
return subvol;
}
@@ -1456,7 +1458,26 @@ server_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
"posix");
if (ret)
return -1;
+#ifdef HAVE_BD_XLATOR
+ if (*brickinfo->vg != '\0') {
+ /* Now add BD v2 xlator if volume is BD type */
+ xl = volgen_graph_add (graph, "storage/bd", volname);
+ if (!xl)
+ return -1;
+
+ ret = xlator_set_option (xl, "device", "vg");
+ if (ret)
+ return -1;
+ ret = xlator_set_option (xl, "export", brickinfo->vg);
+ if (ret)
+ return -1;
+
+ ret = check_and_add_debug_xl (graph, set_dict, volname, "bd");
+ if (ret)
+ return -1;
+ }
+#endif
xl = volgen_graph_add (graph, "features/changelog", volname);
if (!xl)
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.h b/xlators/mgmt/glusterd/src/glusterd-volgen.h
index 0c1a76c123d..31bfe980dee 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volgen.h
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.h
@@ -75,6 +75,7 @@ typedef enum {
GF_XLATOR_INDEX,
GF_XLATOR_MARKER,
GF_XLATOR_IO_STATS,
+ GF_XLATOR_BD,
GF_XLATOR_NONE,
} glusterd_server_xlator_t;
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
index 034004dbd9e..a2bd7334c93 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
@@ -12,6 +12,10 @@
#include "config.h"
#endif
+#ifdef HAVE_BD_XLATOR
+#include <lvm2app.h>
+#endif
+
#include "common-utils.h"
#include "syscall.h"
#include "cli1-xdr.h"
@@ -26,6 +30,7 @@
#define glusterd_op_start_volume_args_get(dict, volname, flags) \
glusterd_op_stop_volume_args_get (dict, volname, flags)
+
int
__glusterd_handle_create_volume (rpcsvc_request_t *req)
{
@@ -599,6 +604,101 @@ glusterd_handle_cli_statedump_volume (rpcsvc_request_t *req)
__glusterd_handle_cli_statedump_volume);
}
+#ifdef HAVE_BD_XLATOR
+/*
+ * Validates if given VG in the brick exists or not. Also checks if VG has
+ * GF_XATTR_VOL_ID_KEY tag set to avoid using same VG for multiple bricks.
+ * Tag is checked only during glusterd_op_stage_create_volume. Tag is set during
+ * glusterd_validate_and_create_brickpath().
+ * @brick - brick info, @check_tag - check for VG tag or not
+ * @msg - Error message to return to caller
+ */
+int
+glusterd_is_valid_vg (glusterd_brickinfo_t *brick, int check_tag, char *msg)
+{
+ lvm_t handle = NULL;
+ vg_t vg = NULL;
+ char *vg_name = NULL;
+ int retval = 0;
+ char *p = NULL;
+ char *ptr = NULL;
+ struct dm_list *dm_lvlist = NULL;
+ struct dm_list *dm_seglist = NULL;
+ struct lvm_lv_list *lv_list = NULL;
+ struct lvm_property_value prop = {0, };
+ struct lvm_lvseg_list *seglist = NULL;
+ struct dm_list *taglist = NULL;
+ struct lvm_str_list *strl = NULL;
+
+ handle = lvm_init (NULL);
+ if (!handle) {
+ sprintf (msg, "lvm_init failed, could not validate vg");
+ return -1;
+ }
+ if (*brick->vg == '\0') { /* BD xlator has vg in brick->path */
+ p = gf_strdup (brick->path);
+ vg_name = strtok_r (p, "/", &ptr);
+ } else
+ vg_name = brick->vg;
+
+ vg = lvm_vg_open (handle, vg_name, "r", 0);
+ if (!vg) {
+ sprintf (msg, "no such vg: %s", vg_name);
+ retval = -1;
+ goto out;
+ }
+ if (!check_tag)
+ goto next;
+
+ taglist = lvm_vg_get_tags (vg);
+ if (!taglist)
+ goto next;
+
+ dm_list_iterate_items (strl, taglist) {
+ if (!strncmp(strl->str, GF_XATTR_VOL_ID_KEY,
+ strlen (GF_XATTR_VOL_ID_KEY))) {
+ sprintf (msg, "VG %s is already part of"
+ " a brick", vg_name);
+ retval = -1;
+ goto out;
+ }
+ }
+next:
+
+ brick->caps = CAPS_BD;
+
+ dm_lvlist = lvm_vg_list_lvs (vg);
+ if (!dm_lvlist)
+ goto out;
+
+ dm_list_iterate_items (lv_list, dm_lvlist) {
+ dm_seglist = lvm_lv_list_lvsegs (lv_list->lv);
+ dm_list_iterate_items (seglist, dm_seglist) {
+ prop = lvm_lvseg_get_property (seglist->lvseg,
+ "segtype");
+ if (!prop.is_valid || !prop.value.string)
+ continue;
+ if (!strcmp (prop.value.string, "thin-pool")) {
+ brick->caps |= CAPS_THIN;
+ gf_log (THIS->name, GF_LOG_INFO, "Thin Pool "
+ "\"%s\" will be used for thin LVs",
+ lvm_lv_get_name (lv_list->lv));
+ break;
+ }
+ }
+ }
+
+ retval = 0;
+out:
+ if (vg)
+ lvm_vg_close (vg);
+ lvm_quit (handle);
+ if (p)
+ GF_FREE (p);
+ return retval;
+}
+#endif
+
/* op-sm */
int
glusterd_op_stage_create_volume (dict_t *dict, char **op_errstr)
@@ -712,6 +812,11 @@ glusterd_op_stage_create_volume (dict_t *dict, char **op_errstr)
}
if (!uuid_compare (brick_info->uuid, MY_UUID)) {
+ if (brick_info->vg[0]) {
+ ret = glusterd_is_valid_vg (brick_info, 1, msg);
+ if (ret)
+ goto out;
+ }
ret = glusterd_validate_and_create_brickpath (brick_info,
volume_uuid, op_errstr,
is_force);
@@ -809,6 +914,7 @@ glusterd_op_stage_start_volume (dict_t *dict, char **op_errstr)
uuid_t volume_id = {0,};
char volid[50] = {0,};
char xattr_volid[50] = {0,};
+ int caps = 0;
this = THIS;
GF_ASSERT (this);
@@ -847,6 +953,7 @@ glusterd_op_stage_start_volume (dict_t *dict, char **op_errstr)
}
}
+
list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
ret = glusterd_resolve_brick (brickinfo);
if (ret) {
@@ -902,8 +1009,24 @@ glusterd_op_stage_start_volume (dict_t *dict, char **op_errstr)
ret = -1;
goto out;
}
+#ifdef HAVE_BD_XLATOR
+ if (brickinfo->vg[0])
+ caps = CAPS_BD | CAPS_THIN;
+
+ /* Check for VG/thin pool if its BD volume */
+ if (brickinfo->vg[0]) {
+ ret = glusterd_is_valid_vg (brickinfo, 0, msg);
+ if (ret)
+ goto out;
+ /* if anyone of the brick does not have thin support,
+ disable it for entire volume */
+ caps &= brickinfo->caps;
+ } else
+ caps = 0;
+#endif
}
+ volinfo->caps = caps;
ret = 0;
out:
if (ret && (msg[0] != '\0')) {
@@ -1315,6 +1438,8 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr)
char *str = NULL;
char *username = NULL;
char *password = NULL;
+ int caps = 0;
+ char msg[1024] __attribute__((unused)) = {0, };
this = THIS;
GF_ASSERT (this);
@@ -1477,6 +1602,7 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr)
if (count)
brick = strtok_r (brick_list+1, " \n", &saveptr);
+ caps = CAPS_BD | CAPS_THIN;
while ( i <= count) {
ret = glusterd_brickinfo_new_from_brick (brick, &brickinfo);
@@ -1489,6 +1615,27 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr)
brickinfo->hostname, brickinfo->path);
goto out;
}
+
+#ifdef HAVE_BD_XLATOR
+ if (!uuid_compare (brickinfo->uuid, MY_UUID)) {
+ if (brickinfo->vg[0]) {
+ ret = glusterd_is_valid_vg (brickinfo, 0, msg);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s",
+ msg);
+ goto out;
+ }
+
+ /* if anyone of the brick does not have thin
+ support, disable it for entire volume */
+ caps &= brickinfo->caps;
+
+
+ } else
+ caps = 0;
+ }
+#endif
+
list_add_tail (&brickinfo->brick_list, &volinfo->bricks);
brick = strtok_r (NULL, " \n", &saveptr);
i++;
@@ -1496,6 +1643,8 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr)
gd_update_volume_op_versions (volinfo);
+ volinfo->caps = caps;
+
ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
if (ret) {
glusterd_store_delete_volume (volinfo);
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
index 8d8f8d4e07e..6423d5a8154 100644
--- a/xlators/mgmt/glusterd/src/glusterd.h
+++ b/xlators/mgmt/glusterd/src/glusterd.h
@@ -176,6 +176,8 @@ struct glusterd_brickinfo {
gf_brick_status_t status;
struct rpc_clnt *rpc;
int decommissioned;
+ char vg[PATH_MAX]; /* FIXME: Use max size for length of vg */
+ int caps; /* Capability */
};
typedef struct glusterd_brickinfo glusterd_brickinfo_t;
@@ -231,6 +233,10 @@ struct _auth {
typedef struct _auth auth_t;
+/* Capabilities of xlator */
+#define CAPS_BD 0x00000001
+#define CAPS_THIN 0x00000010
+
struct glusterd_rebalance_ {
gf_defrag_status_t defrag_status;
uint64_t rebalance_files;
@@ -300,6 +306,7 @@ struct glusterd_volinfo_ {
xlator_t *xl;
gf_boolean_t memory_accounting;
+ int caps; /* Capability */
int op_version;
int client_op_version;