summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPranith Kumar K <pkarampu@redhat.com>2017-12-06 07:59:53 +0530
committerPranith Kumar K <pkarampu@redhat.com>2017-12-22 14:55:29 +0530
commitc96a1338fe8139d07a0aa1bc40f0843d033f0324 (patch)
treedda04fd3366565b2524a85091d29c3a55a5fb544
parent85d321b21c0e982866fd2c83b91b71de458dd043 (diff)
cluster/ec: Change [f]getxattr to parallel-dispatch-one
At the moment in EC, [f]getxattr operations wait to acquire a lock while other operations are in progress even when it is in the same mount with a lock on the file/directory. This happens because [f]getxattr operations follow the model where the operation is wound on 'k' of the bricks and are matched to make sure the data returned is same on all of them. This consistency check requires that no other operations are on-going while [f]getxattr operations are wound to the bricks. We can perform [f]getxattr in another way as well, where we find the good_mask from the lock that is already granted and wind the operation on any one of the good bricks and unwind the answer after adjusting size/blocks to the parent xlator. Since we are taking into account good_mask, the reply we get will either be before or after a possible on-going operation. Using this method, the operation doesn't need to depend on completion of on-going operations which could be taking long time (In case of some slow disks and writes are in progress etc). Thus we reduce the time to serve [f]getxattr requests. I changed [f]getxattr to dispatch-one and added extra logic in ec_link_has_lock_conflict() to not have any conflicts for fops with EC_MINIMUM_ONE as fop->minimum to achieve the effect described above. Modified scripts to make sure READ fop is received in EC to trigger heals. Updates gluster/glusterfs#368 Change-Id: I3b4ebf89181c336b7b8d5471b0454f016cdaf296 Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
-rw-r--r--libglusterfs/src/glusterfs.h3
-rw-r--r--tests/basic/ec/ec-fast-fgetxattr.c133
-rwxr-xr-xtests/basic/ec/ec-fast-fgetxattr.t40
-rw-r--r--xlators/cluster/ec/src/ec-common.c7
-rw-r--r--xlators/cluster/ec/src/ec-helpers.h2
-rw-r--r--xlators/cluster/ec/src/ec-inode-read.c15
-rw-r--r--xlators/cluster/ec/src/ec.c4
7 files changed, 199 insertions, 5 deletions
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
index 4dee7f00ce6..75c12dc6d3d 100644
--- a/libglusterfs/src/glusterfs.h
+++ b/libglusterfs/src/glusterfs.h
@@ -72,6 +72,9 @@
#define FNM_EXTMATCH 0
#endif
+/*gets max-offset on all architectures correctly*/
+#define GF_OFF_MAX ((1ULL << (sizeof(off_t) * 8 - 1)) - 1ULL)
+
#define GLUSTERD_MAX_SNAP_NAME 255
#define GLUSTERFS_SOCKET_LISTEN_BACKLOG 10
#define ZR_MOUNTPOINT_OPT "mountpoint"
diff --git a/tests/basic/ec/ec-fast-fgetxattr.c b/tests/basic/ec/ec-fast-fgetxattr.c
new file mode 100644
index 00000000000..f117f5350ab
--- /dev/null
+++ b/tests/basic/ec/ec-fast-fgetxattr.c
@@ -0,0 +1,133 @@
+#include <stdio.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+int cbk_complete = 0;
+ssize_t cbk_ret_val = 0;
+int
+fill_iov (struct iovec *iov, char fillchar, int count)
+{
+ int ret = -1;
+
+ iov->iov_base = calloc (count + 1, sizeof(fillchar));
+ if (iov->iov_base == NULL) {
+ return ret;
+ } else {
+ iov->iov_len = count;
+ ret = 0;
+ }
+ memset (iov->iov_base, fillchar, count);
+ memset (iov->iov_base + count, '\0', 1);
+
+ return ret;
+}
+
+void
+write_async_cbk (glfs_fd_t *fd, ssize_t ret, void *cookie)
+{
+
+ if (ret < 0) {
+ fprintf (stderr, "glfs_write failed");
+ }
+ cbk_ret_val = ret;
+ cbk_complete = 1;
+}
+
+int
+write_async (glfs_t *fs, glfs_fd_t *glfd, int char_count)
+{
+ ssize_t ret = -1;
+ int flags = O_RDWR;
+ struct iovec iov = {0};
+
+
+
+ ret = fill_iov (&iov, 'a', char_count);
+ if (ret) {
+ fprintf (stderr, "failed to create iov");
+ goto out;
+ }
+
+ ret = glfs_pwritev_async (glfd, &iov, 1, 0, flags, write_async_cbk,
+ NULL);
+out:
+ if (ret < 0) {
+ fprintf (stderr, "glfs_pwritev async failed");
+ }
+ return ret;
+
+}
+
+int
+main (int argc, char *argv[])
+{
+ glfs_t *fs = NULL;
+ glfs_fd_t *fd = NULL;
+ int ret = 1;
+ char buf[1024] = {0};
+
+ if (argc != 4) {
+ fprintf (stderr, "Syntax: %s <host> <volname> <file>\n", argv[0]);
+ return 1;
+ }
+
+ fs = glfs_new (argv[2]);
+ if (!fs) {
+ fprintf (stderr, "glfs_new: returned NULL\n");
+ return 1;
+ }
+
+ ret = glfs_set_volfile_server (fs, "tcp", argv[1], 24007);
+ if (ret != 0) {
+ fprintf (stderr, "glfs_set_volfile_server: retuned %d\n", ret);
+ goto out;
+ }
+ ret = glfs_set_logging (fs, "/tmp/ec-fgetxattr.log", 7);
+ if (ret != 0) {
+ fprintf (stderr, "glfs_set_logging: returned %d\n", ret);
+ goto out;
+ }
+ ret = glfs_init (fs);
+ if (ret != 0) {
+ fprintf (stderr, "glfs_init: returned %d\n", ret);
+ goto out;
+ }
+
+ fd = glfs_open (fs, argv[3], O_RDWR | O_TRUNC);
+ if (fd == NULL) {
+ fprintf (stderr, "glfs_open: returned NULL\n");
+ goto out;
+ }
+
+ ret = write_async (fs, fd, 16);
+ if (ret) {
+ fprintf (stderr, "write_async failed\n");
+ }
+
+ sleep (1);
+ ret = glfs_fgetxattr (fd, "trusted.glusterfs.abc", buf, sizeof buf);
+ while (cbk_complete != 1) {
+ /* ret will be -ve as xattr doesn't exist, and fgetxattr should
+ * return waaaayyy before writev */
+ ret = 0;
+ sleep (1);
+ }
+ if (cbk_ret_val < 0) {
+ fprintf (stderr, "cbk_ret_val is -ve\n");
+ ret = -1;
+ }
+ glfs_close(fd);
+
+out:
+ unlink ("/tmp/ec-fgetxattr.log");
+ glfs_fini (fs);
+
+ return ret;
+}
diff --git a/tests/basic/ec/ec-fast-fgetxattr.t b/tests/basic/ec/ec-fast-fgetxattr.t
new file mode 100755
index 00000000000..eb12fa4a0ba
--- /dev/null
+++ b/tests/basic/ec/ec-fast-fgetxattr.t
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{1..6}
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.client-io-threads off
+TEST $CLI volume set $V0 brick-log-level DEBUG
+TEST $CLI volume set $V0 delay-gen posix
+TEST $CLI volume set $V0 delay-gen.delay-duration 10000000
+TEST $CLI volume set $V0 delay-gen.delay-percentage 100
+TEST $CLI volume set $V0 delay-gen.enable read,write
+
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+TEST touch $M0/file
+
+# Perform two writes to make sure io-threads have enough threads to perform
+# things in parallel when the test execution happens.
+echo abc > $M0/file1 &
+echo abc > $M0/file2 &
+wait
+
+TEST build_tester $(dirname $0)/ec-fast-fgetxattr.c -lgfapi -Wall -O2
+TEST $(dirname $0)/ec-fast-fgetxattr $H0 $V0 /file
+cleanup_tester $(dirname ${0})/ec-fast-fgetxattr
+
+cleanup;
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
index 8ddda9b1370..eea1e7c7a36 100644
--- a/xlators/cluster/ec/src/ec-common.c
+++ b/xlators/cluster/ec/src/ec-common.c
@@ -53,6 +53,13 @@ ec_is_range_conflict (ec_lock_link_t *l1, ec_lock_link_t *l2)
static gf_boolean_t
ec_lock_conflict (ec_lock_link_t *l1, ec_lock_link_t *l2)
{
+ /* Fops like access/stat won't have to worry what the other fops are
+ * modifying as the fop is wound only to one brick. So it can be
+ * executed in parallel*/
+ if (l1->fop->minimum == EC_MINIMUM_ONE ||
+ l2->fop->minimum == EC_MINIMUM_ONE)
+ return _gf_false;
+
if ((l1->fop->flags & EC_FLAG_LOCK_SHARED) &&
(l2->fop->flags & EC_FLAG_LOCK_SHARED))
return _gf_false;
diff --git a/xlators/cluster/ec/src/ec-helpers.h b/xlators/cluster/ec/src/ec-helpers.h
index a8f153a395d..71b8978abf8 100644
--- a/xlators/cluster/ec/src/ec-helpers.h
+++ b/xlators/cluster/ec/src/ec-helpers.h
@@ -152,7 +152,7 @@ ec_adjust_offset_up(ec_t *ec, off_t *value, gf_boolean_t scale)
} else {
/* Check if there has been an overflow. */
if ((off_t)tmp < 0) {
- tmp = (1ULL << (sizeof(off_t) * 8 - 1)) - 1ULL;
+ tmp = GF_OFF_MAX;
tail = -tail;
}
}
diff --git a/xlators/cluster/ec/src/ec-inode-read.c b/xlators/cluster/ec/src/ec-inode-read.c
index 33fd7f549bb..03690ab8e96 100644
--- a/xlators/cluster/ec/src/ec-inode-read.c
+++ b/xlators/cluster/ec/src/ec-inode-read.c
@@ -324,13 +324,23 @@ int32_t ec_manager_getxattr(ec_fop_data_t * fop, int32_t state)
return EC_STATE_DISPATCH;
case EC_STATE_DISPATCH:
- ec_dispatch_all(fop);
+ if (fop->minimum == EC_MINIMUM_ALL) {
+ ec_dispatch_all(fop);
+ } else {
+ ec_dispatch_one(fop);
+ }
return EC_STATE_PREPARE_ANSWER;
case EC_STATE_PREPARE_ANSWER:
ec_handle_special_xattrs (fop);
- cbk = ec_fop_prepare_answer(fop, _gf_true);
+ if (fop->minimum == EC_MINIMUM_ALL) {
+ cbk = ec_fop_prepare_answer(fop, _gf_true);
+ } else {
+ if (ec_dispatch_one_retry (fop, &cbk)) {
+ return EC_STATE_DISPATCH;
+ }
+ }
if (cbk != NULL) {
int32_t err;
@@ -1809,6 +1819,7 @@ int32_t ec_manager_stat(ec_fop_data_t * fop, int32_t state)
case EC_STATE_PREPARE_ANSWER:
cbk = ec_fop_prepare_answer(fop, _gf_true);
+
if (cbk != NULL) {
if (cbk->iatt[0].ia_type == IA_IFREG) {
ec_iatt_rebuild(fop->xl->private, cbk->iatt, 1,
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
index 74709b0e2b4..c8beb2b56fc 100644
--- a/xlators/cluster/ec/src/ec.c
+++ b/xlators/cluster/ec/src/ec.c
@@ -864,7 +864,7 @@ ec_gf_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
{
int error = 0;
ec_t *ec = this->private;
- int32_t minimum = EC_MINIMUM_MIN;
+ int32_t minimum = EC_MINIMUM_ONE;
if (name && strcmp (name, EC_XATTR_HEAL) != 0) {
EC_INTERNAL_XATTR_OR_GOTO(name, NULL, error, out);
@@ -901,7 +901,7 @@ ec_gf_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
EC_INTERNAL_XATTR_OR_GOTO(name, NULL, error, out);
- ec_fgetxattr (frame, this, -1, EC_MINIMUM_MIN, default_fgetxattr_cbk,
+ ec_fgetxattr (frame, this, -1, EC_MINIMUM_ONE, default_fgetxattr_cbk,
NULL, fd, name, xdata);
return 0;
out: