From 593150979399f7f11e580591eab4b032bb0228ac Mon Sep 17 00:00:00 2001 From: Ravishankar N Date: Wed, 16 Oct 2019 13:06:29 +0530 Subject: afr: lock healing changes Implements lock healing for gluster-block fencing use case. If mandatory lock is enabled: - Add domain lock/unlock to afr_lk fop. - Maintain a list of locks to be healed in afr_private_t. - Add lock to the list if afr_lk(F_SETLK or F_SETLKW) was sucessful. - Remove it from the list during afr_lk(F_UNLCK). - On child_down, mark lock as needing heal on that child. If lock is lost on quorum no. of bricks, remove it from the list and mark fd bad. - For fds marked as bad, fail the subsequent fd based fops. - On parent up, traverse the list and heal the locks IFF the client is the lk owner and has quorum. (shd does not heal any locks). updates: #613 Change-Id: I03c46ceaea30f5e6236d5ec13f71d843d827f1bc Signed-off-by: Ravishankar N --- tests/basic/fencing/afr-lock-heal-advanced.c | 227 +++++++++++++++++++++++++++ tests/basic/fencing/afr-lock-heal-advanced.t | 104 ++++++++++++ tests/basic/fencing/afr-lock-heal-basic.c | 182 +++++++++++++++++++++ tests/basic/fencing/afr-lock-heal-basic.t | 99 ++++++++++++ 4 files changed, 612 insertions(+) create mode 100644 tests/basic/fencing/afr-lock-heal-advanced.c create mode 100644 tests/basic/fencing/afr-lock-heal-advanced.t create mode 100644 tests/basic/fencing/afr-lock-heal-basic.c create mode 100644 tests/basic/fencing/afr-lock-heal-basic.t (limited to 'tests/basic') diff --git a/tests/basic/fencing/afr-lock-heal-advanced.c b/tests/basic/fencing/afr-lock-heal-advanced.c new file mode 100644 index 00000000000..e202ccd5b29 --- /dev/null +++ b/tests/basic/fencing/afr-lock-heal-advanced.c @@ -0,0 +1,227 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define GF_ENFORCE_MANDATORY_LOCK "trusted.glusterfs.enforce-mandatory-lock" + +FILE *logfile_fp; + +#define LOG_ERR(func, err) \ + do { \ + if (!logfile_fp) { \ + fprintf(stderr, "%\n%d %s : returned error (%s)\n", __LINE__, \ + func, strerror(err)); \ + fflush(stderr); \ + } else { \ + fprintf(logfile_fp, "\n%d %s : returned error (%s)\n", __LINE__, \ + func, strerror(err)); \ + fflush(logfile_fp); \ + } \ + } while (0) + +glfs_t * +setup_client(char *hostname, char *volname, char *log_file) +{ + int ret = 0; + glfs_t *fs = NULL; + + fs = glfs_new(volname); + if (!fs) { + fprintf(logfile_fp, "\nglfs_new: returned NULL (%s)\n", + strerror(errno)); + goto error; + } + + ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007); + if (ret < 0) { + fprintf(logfile_fp, "\nglfs_set_volfile_server failed ret:%d (%s)\n", + ret, strerror(errno)); + goto error; + } + + ret = glfs_set_logging(fs, log_file, 7); + if (ret < 0) { + fprintf(logfile_fp, "\nglfs_set_logging failed with ret: %d (%s)\n", + ret, strerror(errno)); + goto error; + } + + ret = glfs_init(fs); + if (ret < 0) { + fprintf(logfile_fp, "\nglfs_init failed with ret: %d (%s)\n", ret, + strerror(errno)); + goto error; + } + +out: + return fs; +error: + return NULL; +} + +glfs_fd_t * +open_file(glfs_t *fs, char *fname) +{ + glfs_fd_t *fd = NULL; + + fd = glfs_creat(fs, fname, O_CREAT, 0644); + if (!fd) { + LOG_ERR("glfs_creat", errno); + goto out; + } +out: + return fd; +} + +int +acquire_mandatory_lock(glfs_t *fs, glfs_fd_t *fd) +{ + struct flock lock; + int ret = 0; + + /* initialize lock */ + lock.l_type = F_WRLCK; + lock.l_whence = SEEK_SET; + lock.l_start = 0; + lock.l_len = 100; + + ret = glfs_fsetxattr(fd, GF_ENFORCE_MANDATORY_LOCK, "set", 8, 0); + if (ret < 0) { + LOG_ERR("glfs_fsetxattr", errno); + ret = -1; + goto out; + } + + /* take a write mandatory lock */ + ret = glfs_file_lock(fd, F_SETLKW, &lock, GLFS_LK_MANDATORY); + if (ret) { + LOG_ERR("glfs_file_lock", errno); + ret = -1; + goto out; + } + +out: + return ret; +} + +int +perform_test(glfs_t *fs, char *file1, char *file2) +{ + int ret = 0; + glfs_fd_t *fd1 = NULL; + glfs_fd_t *fd2 = NULL; + char *buf = "0123456789"; + + fd1 = open_file(fs, file1); + if (!fd1) { + ret = -1; + goto out; + } + fd2 = open_file(fs, file2); + if (!fd2) { + ret = -1; + goto out; + } + + /* Kill one brick from the .t.*/ + pause(); + + ret = acquire_mandatory_lock(fs, fd1); + if (ret) { + goto out; + } + ret = acquire_mandatory_lock(fs, fd2); + if (ret) { + goto out; + } + + /* Bring the brick up and let the locks heal. */ + pause(); + /*At this point, the .t would have killed and brought back 2 bricks, marking + * the fd bad.*/ + + ret = glfs_write(fd1, buf, 10, 0); + if (ret > 0) { + /* Write is supposed to fail with EBADFD*/ + LOG_ERR("glfs_write", ret); + goto out; + } + + ret = 0; +out: + if (fd1) + glfs_close(fd1); + if (fd2) + glfs_close(fd2); + return ret; +} + +static void +sigusr1_handler(int signo) +{ + /*Signal caught. Just continue with the execution.*/ +} + +int +main(int argc, char *argv[]) +{ + int ret = 0; + glfs_t *fs = NULL; + char *volname = NULL; + char log_file[100]; + char *hostname = NULL; + char *fname1 = NULL; + char *fname2 = NULL; + + if (argc != 7) { + fprintf(stderr, + "Expect following args %s " + " \n", + argv[0]); + return -1; + } + + hostname = argv[1]; + volname = argv[2]; + fname1 = argv[3]; + fname2 = argv[4]; + + /*Use SIGUSR1 and pause()as a means of hitting break-points this program + *when signalled from the .t test case.*/ + if (signal(SIGUSR1, sigusr1_handler) == SIG_ERR) { + LOG_ERR("SIGUSR1 handler error", errno); + exit(EXIT_FAILURE); + } + + sprintf(log_file, "%s/%s.%s.%s", argv[5], "lock-heal.c", argv[6], "log"); + logfile_fp = fopen(log_file, "w"); + if (!logfile_fp) { + fprintf(stderr, "\nfailed to open %s\n", log_file); + fflush(stderr); + return -1; + } + + sprintf(log_file, "%s/%s.%s.%s", argv[5], "glfs-client", argv[6], "log"); + fs = setup_client(hostname, volname, log_file); + if (!fs) { + LOG_ERR("setup_client", errno); + return -1; + } + + ret = perform_test(fs, fname1, fname2); + +error: + if (fs) { + /*glfs_fini(fs)*/; // glfs fini path is racy and crashes the program + } + + fclose(logfile_fp); + + return ret; +} diff --git a/tests/basic/fencing/afr-lock-heal-advanced.t b/tests/basic/fencing/afr-lock-heal-advanced.t new file mode 100644 index 00000000000..8a7a208db29 --- /dev/null +++ b/tests/basic/fencing/afr-lock-heal-advanced.t @@ -0,0 +1,104 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +cleanup; + +function is_gfapi_program_alive() +{ + pid=$1 + ps -p $pid + if [ $? -eq 0 ] + then + echo "Y" + else + echo "N" + fi +} + +function get_active_lock_count { + brick=$1 + sdump=$(generate_brick_statedump $V0 $H0 $brick) + lock_count="$(grep ACTIVE $sdump| wc -l)" + echo "$lock_count" +} + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume info; + +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} +EXPECT 'Created' volinfo_field $V0 'Status'; +TEST $CLI volume set $V0 performance.write-behind off +TEST $CLI volume set $V0 performance.open-behind off +TEST $CLI volume set $V0 locks.mandatory-locking forced +TEST $CLI volume set $V0 enforce-mandatory-lock on +TEST $CLI volume start $V0; +EXPECT 'Started' volinfo_field $V0 'Status'; + +logdir=`gluster --print-logdir` +TEST build_tester $(dirname $0)/afr-lock-heal-advanced.c -lgfapi -ggdb + +#------------------------------------------------------------------------------ +# Use more than 1 fd from same client so that list_for_each_* loops are executed more than once. +$(dirname $0)/afr-lock-heal-advanced $H0 $V0 "/FILE1" "/FILE2" $logdir C1& +client_pid=$! +TEST [ $client_pid ] + +TEST sleep 5 # By now, the client would have opened an fd on FILE1 and FILE2 and waiting for a SIGUSR1. +EXPECT "Y" is_gfapi_program_alive $client_pid + +# Kill brick-3 and let client-1 take lock on both files. +TEST kill_brick $V0 $H0 $B0/${V0}2 +TEST kill -SIGUSR1 $client_pid +# If program is still alive, glfs_file_lock() was a success. +EXPECT "Y" is_gfapi_program_alive $client_pid + +# Check lock is present on brick-1 and brick-2 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" get_active_lock_count $B0/${V0}0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" get_active_lock_count $B0/${V0}1 + +# Restart brick-3 and check that the lock has healed on it. +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 +TEST sleep 10 #Needed for client to re-open fd? Otherwise client_pre_lk_v2() fails with EBADFD for remote-fd. + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" get_active_lock_count $B0/${V0}2 + +#------------------------------------------------------------------------------ +# Kill same brick before heal completes the first time and check it completes the second time. +TEST $CLI volume set $V0 delay-gen locks +TEST $CLI volume set $V0 delay-gen.delay-duration 5000000 +TEST $CLI volume set $V0 delay-gen.delay-percentage 100 +TEST $CLI volume set $V0 delay-gen.enable finodelk + +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST $CLI volume reset $V0 delay-gen +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" get_active_lock_count $B0/${V0}0 + +#------------------------------------------------------------------------------ +# Kill 2 bricks and bring it back. The fds must be marked bad. +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 + +# TODO: `gluster v statedump $V0 client localhost:$client_pid` is not working, +# so sleep for 20 seconds for the client to connect to connect to the bricks. +TEST sleep $CHILD_UP_TIMEOUT + +# Try to write to FILE1 from the .c; it must fail. +TEST kill -SIGUSR1 $client_pid +wait $client_pid +ret=$? +TEST [ $ret == 0 ] + +cleanup_tester $(dirname $0)/afr-lock-heal-advanced +cleanup; diff --git a/tests/basic/fencing/afr-lock-heal-basic.c b/tests/basic/fencing/afr-lock-heal-basic.c new file mode 100644 index 00000000000..768c9e57181 --- /dev/null +++ b/tests/basic/fencing/afr-lock-heal-basic.c @@ -0,0 +1,182 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define GF_ENFORCE_MANDATORY_LOCK "trusted.glusterfs.enforce-mandatory-lock" + +FILE *logfile_fp; + +#define LOG_ERR(func, err) \ + do { \ + if (!logfile_fp) { \ + fprintf(stderr, "%\n%d %s : returned error (%s)\n", __LINE__, \ + func, strerror(err)); \ + fflush(stderr); \ + } else { \ + fprintf(logfile_fp, "\n%d %s : returned error (%s)\n", __LINE__, \ + func, strerror(err)); \ + fflush(logfile_fp); \ + } \ + } while (0) + +glfs_t * +setup_client(char *hostname, char *volname, char *log_file) +{ + int ret = 0; + glfs_t *fs = NULL; + + fs = glfs_new(volname); + if (!fs) { + fprintf(logfile_fp, "\nglfs_new: returned NULL (%s)\n", + strerror(errno)); + goto error; + } + + ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007); + if (ret < 0) { + fprintf(logfile_fp, "\nglfs_set_volfile_server failed ret:%d (%s)\n", + ret, strerror(errno)); + goto error; + } + + ret = glfs_set_logging(fs, log_file, 7); + if (ret < 0) { + fprintf(logfile_fp, "\nglfs_set_logging failed with ret: %d (%s)\n", + ret, strerror(errno)); + goto error; + } + + ret = glfs_init(fs); + if (ret < 0) { + fprintf(logfile_fp, "\nglfs_init failed with ret: %d (%s)\n", ret, + strerror(errno)); + goto error; + } + +out: + return fs; +error: + return NULL; +} + +int +acquire_mandatory_lock(glfs_t *fs, char *fname) +{ + struct flock lock; + int ret = 0; + glfs_fd_t *fd = NULL; + + fd = glfs_creat(fs, fname, O_CREAT, 0644); + if (!fd) { + if (errno != EEXIST) { + LOG_ERR("glfs_creat", errno); + ret = -1; + goto out; + } + fd = glfs_open(fs, fname, O_RDWR | O_NONBLOCK); + if (!fd) { + LOG_ERR("glfs_open", errno); + ret = -1; + goto out; + } + } + + /* initialize lock */ + lock.l_type = F_WRLCK; + lock.l_whence = SEEK_SET; + lock.l_start = 0; + lock.l_len = 100; + + ret = glfs_fsetxattr(fd, GF_ENFORCE_MANDATORY_LOCK, "set", 8, 0); + if (ret < 0) { + LOG_ERR("glfs_fsetxattr", errno); + ret = -1; + goto out; + } + + pause(); + + /* take a write mandatory lock */ + ret = glfs_file_lock(fd, F_SETLKW, &lock, GLFS_LK_MANDATORY); + if (ret) { + LOG_ERR("glfs_file_lock", errno); + goto out; + } + + pause(); + +out: + if (fd) { + glfs_close(fd); + } + + return ret; +} + +static void +sigusr1_handler(int signo) +{ + /*Signal caught. Just continue with the execution.*/ +} + +int +main(int argc, char *argv[]) +{ + int ret = 0; + glfs_t *fs = NULL; + char *volname = NULL; + char log_file[100]; + char *hostname = NULL; + char *fname = NULL; + + if (argc != 6) { + fprintf(stderr, + "Expect following args %s \n", + argv[0]); + return -1; + } + + hostname = argv[1]; + volname = argv[2]; + fname = argv[3]; + + /*Use SIGUSR1 and pause()as a means of hitting break-points this program + *when signalled from the .t test case.*/ + if (signal(SIGUSR1, sigusr1_handler) == SIG_ERR) { + LOG_ERR("SIGUSR1 handler error", errno); + exit(EXIT_FAILURE); + } + + sprintf(log_file, "%s/%s.%s.%s", argv[4], "lock-heal-basic.c", argv[5], + "log"); + logfile_fp = fopen(log_file, "w"); + if (!logfile_fp) { + fprintf(stderr, "\nfailed to open %s\n", log_file); + fflush(stderr); + return -1; + } + + sprintf(log_file, "%s/%s.%s.%s", argv[4], "glfs-client", argv[5], "log"); + fs = setup_client(hostname, volname, log_file); + if (!fs) { + LOG_ERR("setup_client", errno); + return -1; + } + + ret = acquire_mandatory_lock(fs, fname); + +error: + if (fs) { + /*glfs_fini(fs)*/; // glfs fini path is racy and crashes the program + } + + fclose(logfile_fp); + + return ret; +} diff --git a/tests/basic/fencing/afr-lock-heal-basic.t b/tests/basic/fencing/afr-lock-heal-basic.t new file mode 100644 index 00000000000..5ac05c7aec6 --- /dev/null +++ b/tests/basic/fencing/afr-lock-heal-basic.t @@ -0,0 +1,99 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +cleanup; + +function is_gfapi_program_alive() +{ + pid=$1 + ps -p $pid + if [ $? -eq 0 ] + then + echo "Y" + else + echo "N" + fi +} + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume info; + +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} +EXPECT 'Created' volinfo_field $V0 'Status'; +TEST $CLI volume set $V0 performance.write-behind off +TEST $CLI volume set $V0 performance.open-behind off +TEST $CLI volume set $V0 locks.mandatory-locking forced +TEST $CLI volume set $V0 enforce-mandatory-lock on +TEST $CLI volume start $V0; +EXPECT 'Started' volinfo_field $V0 'Status'; + +logdir=`gluster --print-logdir` +TEST build_tester $(dirname $0)/afr-lock-heal-basic.c -lgfapi -ggdb + +$(dirname $0)/afr-lock-heal-basic $H0 $V0 "/FILE" $logdir C1& +client1_pid=$! +TEST [ $client1_pid ] + +$(dirname $0)/afr-lock-heal-basic $H0 $V0 "/FILE" $logdir C2& +client2_pid=$! +TEST [ $client2_pid ] + +TEST sleep 5 # By now, the 2 clients would have opened an fd on FILE and waiting for a SIGUSR1. +EXPECT "Y" is_gfapi_program_alive $client1_pid +EXPECT "Y" is_gfapi_program_alive $client2_pid + +# Kill brick-3 and let client-1 take lock on the file. +TEST kill_brick $V0 $H0 $B0/${V0}2 +TEST kill -SIGUSR1 $client1_pid +# If program is still alive, glfs_file_lock() was a success. +EXPECT "Y" is_gfapi_program_alive $client1_pid + +# Check lock is present on brick-1 and brick-2 +b1_sdump=$(generate_brick_statedump $V0 $H0 $B0/${V0}0) +b2_sdump=$(generate_brick_statedump $V0 $H0 $B0/${V0}1) +c1_lock_on_b1="$(grep ACTIVE $b1_sdump| awk '{print $1,$2,$3,S4,$5,$6,$7,$8}'|tr -d '(,), ,')" +c1_lock_on_b2="$(grep ACTIVE $b2_sdump| awk '{print $1,$2,$3,S4,$5,$6,$7,$8}'|tr -d '(,), ,')" +TEST [ "$c1_lock_on_b1" == "$c1_lock_on_b2" ] + +# Restart brick-3 and check that the lock has healed on it. +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 +TEST sleep 10 #Needed for client to re-open fd? Otherwise client_pre_lk_v2() fails with EBADFD for remote-fd. Also wait for lock heal. + +b3_sdump=$(generate_brick_statedump $V0 $H0 $B0/${V0}2) +c1_lock_on_b3="$(grep ACTIVE $b3_sdump| awk '{print $1,$2,$3,S4,$5,$6,$7,$8}'|tr -d '(,), ,')" +TEST [ "$c1_lock_on_b1" == "$c1_lock_on_b3" ] + +# Kill brick-1 and let client-2 preempt the lock on bricks 2 and 3. +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST kill -SIGUSR1 $client2_pid +# If program is still alive, glfs_file_lock() was a success. +EXPECT "Y" is_gfapi_program_alive $client2_pid + +# Restart brick-1 and let lock healing complete. +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +TEST sleep 10 #Needed for client to re-open fd? Otherwise client_pre_lk_v2() fails with EBADFD for remote-fd. Also wait for lock heal. + +# Check that all bricks now have locks from client 2 only. +b1_sdump=$(generate_brick_statedump $V0 $H0 $B0/${V0}0) +b2_sdump=$(generate_brick_statedump $V0 $H0 $B0/${V0}1) +b3_sdump=$(generate_brick_statedump $V0 $H0 $B0/${V0}2) +c2_lock_on_b1="$(grep ACTIVE $b1_sdump| awk '{print $1,$2,$3,S4,$5,$6,$7,$8}'|tr -d '(,), ,')" +c2_lock_on_b2="$(grep ACTIVE $b2_sdump| awk '{print $1,$2,$3,S4,$5,$6,$7,$8}'|tr -d '(,), ,')" +c2_lock_on_b3="$(grep ACTIVE $b3_sdump| awk '{print $1,$2,$3,S4,$5,$6,$7,$8}'|tr -d '(,), ,')" +TEST [ "$c2_lock_on_b1" == "$c2_lock_on_b2" ] +TEST [ "$c2_lock_on_b1" == "$c2_lock_on_b3" ] +TEST [ "$c2_lock_on_b1" != "$c1_lock_on_b1" ] + +#Let the client programs run and exit. +TEST kill -SIGUSR1 $client1_pid +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" is_gfapi_program_alive $client1_pid +TEST kill -SIGUSR1 $client2_pid +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" is_gfapi_program_alive $client2_pid + +cleanup_tester $(dirname $0)/afr-lock-heal-basic +cleanup; -- cgit