From 7b3971ad0152eb1bb89a982333970118a6bd4922 Mon Sep 17 00:00:00 2001 From: Pranith Kumar K Date: Wed, 4 Sep 2019 12:06:34 +0530 Subject: cluster/ec: Fail fsync/flush for files on update size/version failure Problem: If update size/version is not successful on the file, updates on the same stripe could lead to data corruptions if the earlier un-aligned write is not successful on all the bricks. Application won't have any knowledge of this because update size/version happens in the background. Fix: Fail fsync/flush on fds that are opened before update-size-version went bad. fixes: bz#1748836 Change-Id: I9d323eddcda703bd27d55f340c4079d76e06e492 Signed-off-by: Pranith Kumar K --- tests/basic/ec/ec-badfd.c | 124 ++++++++++++++++++++++++++++++++++++++++++++++ tests/basic/ec/ec-badfd.t | 26 ++++++++++ 2 files changed, 150 insertions(+) create mode 100644 tests/basic/ec/ec-badfd.c create mode 100755 tests/basic/ec/ec-badfd.t (limited to 'tests/basic') diff --git a/tests/basic/ec/ec-badfd.c b/tests/basic/ec/ec-badfd.c new file mode 100644 index 00000000000..8be23c10eaf --- /dev/null +++ b/tests/basic/ec/ec-badfd.c @@ -0,0 +1,124 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int +fill_iov(struct iovec *iov, char fillchar, int count) +{ + int ret = -1; + + iov->iov_base = malloc(count + 1); + if (iov->iov_base == NULL) { + return ret; + } else { + iov->iov_len = count; + ret = 0; + } + memset(iov->iov_base, fillchar, count); + memset(iov->iov_base + count, '\0', 1); + + return ret; +} + +int +write_sync(glfs_t *fs, glfs_fd_t *glfd, int char_count) +{ + ssize_t ret = -1; + int flags = O_RDWR; + struct iovec iov = {0}; + + ret = fill_iov(&iov, 'a', char_count); + if (ret) { + fprintf(stderr, "failed to create iov"); + goto out; + } + + ret = glfs_pwritev(glfd, &iov, 1, 0, flags); +out: + if (ret < 0) { + fprintf(stderr, "glfs_pwritev failed, %d", errno); + } + return ret; +} + +int +main(int argc, char *argv[]) +{ + glfs_t *fs = NULL; + glfs_fd_t *fd = NULL; + int ret = 1; + char volume_cmd[4096] = {0}; + + if (argc != 4) { + fprintf(stderr, "Syntax: %s \n", argv[0]); + return 1; + } + + fs = glfs_new(argv[2]); + if (!fs) { + fprintf(stderr, "glfs_new: returned NULL\n"); + return 1; + } + + ret = glfs_set_volfile_server(fs, "tcp", argv[1], 24007); + if (ret != 0) { + fprintf(stderr, "glfs_set_volfile_server: returned %d\n", ret); + goto out; + } + ret = glfs_set_logging(fs, "/tmp/ec-badfd.log", 7); + if (ret != 0) { + fprintf(stderr, "glfs_set_logging: returned %d\n", ret); + goto out; + } + ret = glfs_init(fs); + if (ret != 0) { + fprintf(stderr, "glfs_init: returned %d\n", ret); + goto out; + } + + fd = glfs_open(fs, argv[3], O_RDWR); + if (fd == NULL) { + fprintf(stderr, "glfs_open: returned NULL\n"); + goto out; + } + + ret = write_sync(fs, fd, 16); + if (ret < 0) { + fprintf(stderr, "write_sync failed\n"); + } + + snprintf(volume_cmd, sizeof(volume_cmd), + "gluster --mode=script volume stop %s", argv[2]); + /*Stop the volume so that update-size-version fails*/ + system(volume_cmd); + sleep(8); /* 3 seconds more than eager-lock-timeout*/ + snprintf(volume_cmd, sizeof(volume_cmd), + "gluster --mode=script volume start %s", argv[2]); + system(volume_cmd); + sleep(8); /*wait for bricks to come up*/ + ret = glfs_fsync(fd, NULL, NULL); + if (ret == 0) { + fprintf(stderr, "fsync succeeded on a BADFD\n"); + exit(1); + } + + ret = glfs_close(fd); + if (ret == 0) { + fprintf(stderr, "flush succeeded on a BADFD\n"); + exit(1); + } + ret = 0; + +out: + unlink("/tmp/ec-badfd.log"); + glfs_fini(fs); + + return ret; +} diff --git a/tests/basic/ec/ec-badfd.t b/tests/basic/ec/ec-badfd.t new file mode 100755 index 00000000000..56feb47f115 --- /dev/null +++ b/tests/basic/ec/ec-badfd.t @@ -0,0 +1,26 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd + +TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{1..6} +TEST $CLI volume set $V0 performance.write-behind off +TEST $CLI volume set $V0 disperse.eager-lock-timeout 5 + +TEST $CLI volume start $V0 +EXPECT 'Started' volinfo_field $V0 'Status' + +TEST $GFS -s $H0 --volfile-id $V0 $M0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 +TEST touch $M0/file + +TEST build_tester $(dirname $0)/ec-badfd.c -lgfapi -Wall -O2 +TEST $(dirname $0)/ec-badfd $H0 $V0 /file +cleanup_tester $(dirname ${0})/ec-badfd + +cleanup; -- cgit