summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAmar Tumballi <amarts@redhat.com>2017-06-23 13:10:56 +0530
committerAtin Mukherjee <amukherj@redhat.com>2017-07-24 15:34:34 +0000
commitfebf5ed4848ad705a34413353559482417c61467 (patch)
tree081447d6844b0bb16622c6bfce9fbb680ad42549
parent0b3fec6924cad5c9f38941550ab4106972efa5cc (diff)
posix: option to handle the shared bricks for statvfs()
Currently 'storage/posix' xlator has an option called option `export-statfs-size no`, which exports zero as values for few fields in `struct statvfs`. In a case of backend brick shared between multiple brick processes, the values of these variables should be `field_value / number-of-bricks-at-node`. This way, even the issue of 'min-free-disk' etc at different layers would also be handled properly when the statfs() sys call is made. Fixes #241 Change-Id: I2e320e1fdcc819ab9173277ef3498201432c275f Signed-off-by: Amar Tumballi <amarts@redhat.com> Reviewed-on: https://review.gluster.org/17618 CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Smoke: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Jeff Darcy <jeff@pl.atyp.us> Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
-rw-r--r--tests/basic/posix/shared-statfs.t53
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-brick-ops.c17
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-messages.h8
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.c10
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.h1
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c28
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c4
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-ops.c36
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h5
-rw-r--r--xlators/storage/posix/src/posix.c34
-rw-r--r--xlators/storage/posix/src/posix.h3
11 files changed, 186 insertions, 13 deletions
diff --git a/tests/basic/posix/shared-statfs.t b/tests/basic/posix/shared-statfs.t
new file mode 100644
index 00000000000..8caa9fa2110
--- /dev/null
+++ b/tests/basic/posix/shared-statfs.t
@@ -0,0 +1,53 @@
+#!/bin/bash
+#Test that statfs is not served from posix backend FS.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+TEST glusterd
+
+#Create brick partitions
+TEST truncate -s 100M $B0/brick1
+TEST truncate -s 100M $B0/brick2
+LO1=`SETUP_LOOP $B0/brick1`
+TEST [ $? -eq 0 ]
+TEST MKFS_LOOP $LO1
+LO2=`SETUP_LOOP $B0/brick2`
+TEST [ $? -eq 0 ]
+TEST MKFS_LOOP $LO2
+TEST mkdir -p $B0/${V0}1 $B0/${V0}2
+TEST MOUNT_LOOP $LO1 $B0/${V0}1
+TEST MOUNT_LOOP $LO2 $B0/${V0}2
+
+# Create a subdir in mountpoint and use that for volume.
+TEST $CLI volume create $V0 $H0:$B0/${V0}1/1 $H0:$B0/${V0}2/1;
+TEST $CLI volume start $V0
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
+total_space=$(df -P $M0 | tail -1 | awk '{ print $2}')
+# Keeping the size less than 200M mainly because XFS will use
+# some storage in brick to keep its own metadata.
+TEST [ $total_space -gt 194000 -a $total_space -lt 200000 ]
+
+
+TEST force_umount $M0
+TEST $CLI volume stop $V0
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+# From the same mount point, share another 2 bricks with the volume
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}1/2 $H0:$B0/${V0}2/2 $H0:$B0/${V0}1/3 $H0:$B0/${V0}2/3
+
+TEST $CLI volume start $V0
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
+total_space=$(df -P $M0 | tail -1 | awk '{ print $2}')
+TEST [ $total_space -gt 194000 -a $total_space -lt 200000 ]
+
+TEST force_umount $M0
+TEST $CLI volume stop $V0
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+
+UMOUNT_LOOP ${B0}/${V0}{1,2}
+rm -f ${B0}/brick{1,2}
+cleanup;
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
index 8d4ea13af95..c7b618745b3 100644
--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
@@ -22,6 +22,7 @@
#include "glusterd-server-quorum.h"
#include "run.h"
#include "glusterd-volgen.h"
+#include "syscall.h"
#include <sys/signal.h>
/* misc */
@@ -1322,6 +1323,7 @@ glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count,
xlator_t *this = NULL;
glusterd_conf_t *conf = NULL;
gf_boolean_t is_valid_add_brick = _gf_false;
+ struct statvfs brickstat = {0,};
this = THIS;
GF_ASSERT (this);
@@ -1396,6 +1398,21 @@ glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count,
if (ret)
goto out;
+ if (!gf_uuid_compare (brickinfo->uuid, MY_UUID)) {
+ ret = sys_statvfs (brickinfo->path, &brickstat);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_STATVFS_FAILED,
+ "Failed to fetch disk utilization "
+ "from the brick (%s:%s). Please check the health of "
+ "the brick. Error code was %s",
+ brickinfo->hostname, brickinfo->path,
+ strerror (errno));
+
+ goto out;
+ }
+ brickinfo->statfs_fsid = brickstat.f_fsid;
+ }
/* hot tier bricks are added to head of brick list */
if (dict_get (dict, "attach-tier")) {
cds_list_add (&brickinfo->brick_list, &volinfo->bricks);
diff --git a/xlators/mgmt/glusterd/src/glusterd-messages.h b/xlators/mgmt/glusterd/src/glusterd-messages.h
index 14424d36890..2caa16c2eda 100644
--- a/xlators/mgmt/glusterd/src/glusterd-messages.h
+++ b/xlators/mgmt/glusterd/src/glusterd-messages.h
@@ -4901,6 +4901,14 @@
*/
#define GD_MSG_BRICKPROC_NEW_FAILED (GLUSTERD_COMP_BASE + 606)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_STATVFS_FAILED (GLUSTERD_COMP_BASE + 607)
+
/*------------*/
#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c
index 72b70f916c6..8eb301f040f 100644
--- a/xlators/mgmt/glusterd/src/glusterd-store.c
+++ b/xlators/mgmt/glusterd/src/glusterd-store.c
@@ -352,6 +352,12 @@ gd_store_brick_snap_details_write (int fd, glusterd_brickinfo_t *brickinfo)
snprintf (value, sizeof(value), "%d", brickinfo->snap_status);
ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS,
value);
+ if (ret)
+ goto out;
+
+ memset (value, 0, sizeof (value));
+ snprintf (value, sizeof (value), "%lu", brickinfo->statfs_fsid);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_FSID, value);
out:
return ret;
@@ -2508,6 +2514,10 @@ glusterd_store_retrieve_bricks (glusterd_volinfo_t *volinfo)
} else if (!strcmp(key, GLUSTERD_STORE_KEY_BRICK_ID)) {
strncpy (brickinfo->brick_id, value,
sizeof (brickinfo->brick_id));
+ } else if (!strncmp (key,
+ GLUSTERD_STORE_KEY_BRICK_FSID,
+ strlen (GLUSTERD_STORE_KEY_BRICK_FSID))) {
+ gf_string2uint64 (value, &brickinfo->statfs_fsid);
} else {
gf_msg (this->name, GF_LOG_ERROR, 0,
GD_MSG_UNKNOWN_KEY, "Unknown key: %s",
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h
index 3e31c965638..b515ca6c554 100644
--- a/xlators/mgmt/glusterd/src/glusterd-store.h
+++ b/xlators/mgmt/glusterd/src/glusterd-store.h
@@ -97,6 +97,7 @@ typedef enum glusterd_store_ver_ac_{
#define GLUSTERD_STORE_KEY_BRICK_FSTYPE "fs-type"
#define GLUSTERD_STORE_KEY_BRICK_MNTOPTS "mnt-opts"
#define GLUSTERD_STORE_KEY_BRICK_ID "brick-id"
+#define GLUSTERD_STORE_KEY_BRICK_FSID "brick-fsid"
#define GLUSTERD_STORE_KEY_PEER_UUID "uuid"
#define GLUSTERD_STORE_KEY_PEER_HOSTNAME "hostname"
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index 6ff11a2e050..f1627df688f 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -186,6 +186,32 @@ out:
return ret;
}
+/* This is going to be a O(n^2) operation as we have to pick a brick,
+ make sure it belong to this machine, and compare another brick belonging
+ to this machine (if exists), is sharing the backend */
+static void
+gd_set_shared_brick_count (glusterd_volinfo_t *volinfo)
+{
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_brickinfo_t *trav = NULL;
+
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks,
+ brick_list) {
+ if (gf_uuid_compare (brickinfo->uuid, MY_UUID))
+ continue;
+ brickinfo->fs_share_count = 0;
+ cds_list_for_each_entry (trav, &volinfo->bricks,
+ brick_list) {
+ if (!gf_uuid_compare (trav->uuid, MY_UUID) &&
+ (trav->statfs_fsid == brickinfo->statfs_fsid)) {
+ brickinfo->fs_share_count++;
+ }
+ }
+ }
+
+ return;
+}
+
int
glusterd_volume_brick_for_each (glusterd_volinfo_t *volinfo, void *data,
int (*fn) (glusterd_volinfo_t *, glusterd_brickinfo_t *,
@@ -195,6 +221,8 @@ glusterd_volume_brick_for_each (glusterd_volinfo_t *volinfo, void *data,
glusterd_volinfo_t *dup_volinfo = NULL;
int ret = 0;
+ gd_set_shared_brick_count (volinfo);
+
if (volinfo->type != GF_CLUSTER_TYPE_TIER) {
ret = _brick_for_each (volinfo, NULL, data, fn);
if (ret)
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
index 0a0668e9ea6..1ada7232f3e 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
@@ -1440,6 +1440,7 @@ static int
brick_graph_add_posix (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
dict_t *set_dict, glusterd_brickinfo_t *brickinfo)
{
+ char tmpstr[10] = {0,};
int ret = -1;
gf_boolean_t quota_enabled = _gf_true;
gf_boolean_t trash_enabled = _gf_false;
@@ -1491,6 +1492,9 @@ brick_graph_add_posix (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
if (quota_enabled || pgfid_feat || trash_enabled)
xlator_set_option (xl, "update-link-count-parent",
"on");
+
+ snprintf (tmpstr, sizeof (tmpstr), "%d", brickinfo->fs_share_count);
+ ret = xlator_set_option (xl, "shared-brick-count", tmpstr);
out:
return ret;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
index 7254e281497..b95b8a4e863 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
@@ -2164,6 +2164,7 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr)
char *brick_mount_dir = NULL;
char key[PATH_MAX] = "";
char *address_family_str = NULL;
+ struct statvfs brickstat = {0,};
this = THIS;
GF_ASSERT (this);
@@ -2405,24 +2406,35 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr)
sizeof(brickinfo->mount_dir));
}
-#ifdef HAVE_BD_XLATOR
- if (!gf_uuid_compare (brickinfo->uuid, MY_UUID)
- && brickinfo->vg[0]) {
- ret = glusterd_is_valid_vg (brickinfo, 0, msg);
+ if (!gf_uuid_compare (brickinfo->uuid, MY_UUID)) {
+ ret = sys_statvfs (brickinfo->path, &brickstat);
if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- GD_MSG_INVALID_VG, "%s", msg);
+ gf_log ("brick-op", GF_LOG_ERROR, "Failed to fetch disk"
+ " utilization from the brick (%s:%s). Please "
+ "check health of the brick. Error code was %s",
+ brickinfo->hostname, brickinfo->path,
+ strerror (errno));
goto out;
}
+ brickinfo->statfs_fsid = brickstat.f_fsid;
- /* if anyone of the brick does not have thin
- support, disable it for entire volume */
- caps &= brickinfo->caps;
- } else {
- caps = 0;
- }
+#ifdef HAVE_BD_XLATOR
+ if (brickinfo->vg[0]) {
+ ret = glusterd_is_valid_vg (brickinfo, 0, msg);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_INVALID_VG, "%s", msg);
+ goto out;
+ }
+ /* if anyone of the brick does not have thin
+ support, disable it for entire volume */
+ caps &= brickinfo->caps;
+ } else {
+ caps = 0;
+ }
#endif
+ }
cds_list_add_tail (&brickinfo->brick_list, &volinfo->bricks);
brick = strtok_r (NULL, " \n", &saveptr);
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
index b2141853db4..3226ec24c0f 100644
--- a/xlators/mgmt/glusterd/src/glusterd.h
+++ b/xlators/mgmt/glusterd/src/glusterd.h
@@ -232,6 +232,11 @@ struct glusterd_brickinfo {
*/
uint16_t group;
uuid_t jbr_uuid;
+
+ /* Below are used for handling the case of multiple bricks sharing
+ the backend filesystem */
+ uint64_t statfs_fsid;
+ uint32_t fs_share_count;
};
typedef struct glusterd_brickinfo glusterd_brickinfo_t;
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
index dc8a129cacb..92a2f3772cb 100644
--- a/xlators/storage/posix/src/posix.c
+++ b/xlators/storage/posix/src/posix.c
@@ -3641,6 +3641,7 @@ posix_statfs (call_frame_t *frame, xlator_t *this,
int32_t op_errno = 0;
struct statvfs buf = {0, };
struct posix_private * priv = NULL;
+ int shared_by = 1;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -3665,6 +3666,16 @@ posix_statfs (call_frame_t *frame, xlator_t *this,
goto out;
}
+ shared_by = priv->shared_brick_count;
+ if (shared_by > 1) {
+ buf.f_blocks /= shared_by;
+ buf.f_bfree /= shared_by;
+ buf.f_bavail /= shared_by;
+ buf.f_files /= shared_by;
+ buf.f_ffree /= shared_by;
+ buf.f_favail /= shared_by;
+ }
+
if (!priv->export_statfs) {
buf.f_blocks = 0;
buf.f_bfree = 0;
@@ -6971,7 +6982,7 @@ int
reconfigure (xlator_t *this, dict_t *options)
{
int ret = -1;
-struct posix_private *priv = NULL;
+ struct posix_private *priv = NULL;
int32_t uid = -1;
int32_t gid = -1;
char *batch_fsync_mode_str = NULL;
@@ -7039,6 +7050,9 @@ struct posix_private *priv = NULL;
options, uint32, out);
posix_spawn_health_check_thread (this);
+ GF_OPTION_RECONF ("shared-brick-count", priv->shared_brick_count,
+ options, int32, out);
+
ret = 0;
out:
return ret;
@@ -7573,6 +7587,17 @@ init (xlator_t *this)
}
}
#endif
+ _private->shared_brick_count = 1;
+ ret = dict_get_int32 (this->options, "shared-brick-count",
+ &_private->shared_brick_count);
+ if (ret == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ P_MSG_INVALID_OPTION_VAL,
+ "'shared-brick-count' takes only integer "
+ "values");
+ goto out;
+ }
+
this->private = (void *)_private;
op_ret = posix_handle_init (this);
@@ -7863,5 +7888,12 @@ struct volume_options options[] = {
"\t- Strip: Will strip the user namespace before setting. The raw filesystem will work in OS X.\n"
},
#endif
+ { .key = {"shared-brick-count"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "1",
+ .description = "Number of bricks sharing the same backend export."
+ " Useful for displaying the proper usable size through statvfs() "
+ "call (df command)",
+ },
{ .key = {NULL} }
};
diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
index 480566a5340..81158266111 100644
--- a/xlators/storage/posix/src/posix.h
+++ b/xlators/storage/posix/src/posix.h
@@ -176,6 +176,9 @@ struct posix_private {
} xattr_user_namespace;
#endif
+ /* Option to handle the cases of multiple bricks exported from
+ same backend. Very much usable in brick-splitting feature. */
+ int32_t shared_brick_count;
};
typedef struct {