diff options
25 files changed, 1814 insertions, 1 deletions
diff --git a/.gitignore b/.gitignore index 849a0f1a8be..fc5ba586f8e 100644 --- a/.gitignore +++ b/.gitignore @@ -121,3 +121,5 @@ xlators/features/cloudsync/src/cloudsync-autogen-fops.c xlators/features/cloudsync/src/cloudsync-autogen-fops.h xlators/features/utime/src/utime-autogen-fops.c xlators/features/utime/src/utime-autogen-fops.h +tests/basic/metadisp/ftruncate +xlators/features/metadisp/src/fops.c diff --git a/configure.ac b/configure.ac index 13d439e0f93..cf22267aada 100644 --- a/configure.ac +++ b/configure.ac @@ -164,6 +164,8 @@ AC_CONFIG_FILES([Makefile xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/Makefile xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/Makefile xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/Makefile + xlators/features/metadisp/Makefile + xlators/features/metadisp/src/Makefile xlators/playground/Makefile xlators/playground/template/Makefile xlators/playground/template/src/Makefile @@ -810,6 +812,17 @@ fi AC_SUBST(GEOREP_EXTRAS_SUBDIR) AM_CONDITIONAL(USE_GEOREP, test "x$enable_georeplication" != "xno") +# METADISP section +AC_ARG_ENABLE([metadisp], + AC_HELP_STRING([--enable-metadisp], + [Enable the metadata dispersal xlator])) +BUILD_METADISP=no +if test "x${enable_metadisp}" = "xyes"; then + BUILD_METADISP=yes +fi +AM_CONDITIONAL([BUILD_METADISP], [test "x$BUILD_METADISP" = "xyes"]) +# end METADISP section + # Events section AC_ARG_ENABLE([events], AC_HELP_STRING([--disable-events], @@ -1675,6 +1688,7 @@ echo "IPV6 default : $with_ipv6_default" echo "Use TIRPC : $with_libtirpc" echo "With Python : ${PYTHON_VERSION}" echo "Cloudsync : $BUILD_CLOUDSYNC" +echo "Metadata dispersal : $BUILD_METADISP" echo "Link with TCMALLOC : $BUILD_TCMALLOC" echo diff --git a/tests/basic/metadisp/fsyncdir.c b/tests/basic/metadisp/fsyncdir.c new file mode 100644 index 00000000000..62b532b9ce4 --- /dev/null +++ b/tests/basic/metadisp/fsyncdir.c @@ -0,0 +1,29 @@ +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include <fcntl.h> + +int +main(int argc, char **argv) +{ + int pfd; + + pfd = open(argv[1], O_RDONLY | O_DIRECTORY); + if (pfd == (-1)) { + perror("open"); + return EXIT_FAILURE; + } + + if (rename(argv[2], argv[3]) == (-1)) { + perror("rename"); + return EXIT_FAILURE; + } + + if (fsync(pfd) == (-1)) { + perror("fsync"); + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} diff --git a/tests/basic/metadisp/ftruncate.c b/tests/basic/metadisp/ftruncate.c new file mode 100644 index 00000000000..c9185212c31 --- /dev/null +++ b/tests/basic/metadisp/ftruncate.c @@ -0,0 +1,34 @@ +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include <fcntl.h> + +int +main(int argc, char **argv) +{ + int pfd; + + pfd = open(argv[1], O_RDWR); + if (pfd == (-1)) { + perror("open"); + return EXIT_FAILURE; + } + + if (ftruncate(pfd, 0) == (-1)) { + perror("ftruncate"); + return EXIT_FAILURE; + } + + if (write(pfd, "hello", 5) == (-1)) { + perror("write"); + return EXIT_FAILURE; + } + + if (fsync(pfd) == (-1)) { + perror("fsync"); + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} diff --git a/tests/basic/metadisp/fxattr.c b/tests/basic/metadisp/fxattr.c new file mode 100644 index 00000000000..e552057778a --- /dev/null +++ b/tests/basic/metadisp/fxattr.c @@ -0,0 +1,107 @@ +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include <fcntl.h> +#include <string.h> +#include <sys/types.h> +#include <sys/xattr.h> + +static char MY_XATTR[] = "user.fxtest"; +static char *PROGRAM; +#define CONSUME(v) \ + do { \ + if (!argc) { \ + fprintf(stderr, "missing argument\n"); \ + return EXIT_FAILURE; \ + } \ + v = argv[0]; \ + ++argv; \ + --argc; \ + } while (0) + +static int +do_get(int argc, char **argv, int fd) +{ + char *value; + int ret; + char buf[1024]; + + CONSUME(value); + + ret = fgetxattr(fd, MY_XATTR, buf, sizeof(buf)); + if (ret == (-1)) { + perror("fgetxattr"); + return EXIT_FAILURE; + } + + if (strncmp(buf, value, ret) != 0) { + fprintf(stderr, "data mismatch\n"); + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} + +static int +do_set(int argc, char **argv, int fd) +{ + char *value; + int ret; + + CONSUME(value); + + ret = fsetxattr(fd, MY_XATTR, value, strlen(value), 0); + if (ret == (-1)) { + perror("fsetxattr"); + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} + +static int +do_remove(int argc, char **argv, int fd) +{ + int ret; + + ret = fremovexattr(fd, MY_XATTR); + if (ret == (-1)) { + perror("femovexattr"); + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} + +int +main(int argc, char **argv) +{ + int fd; + char *path; + char *cmd; + + CONSUME(PROGRAM); + CONSUME(path); + CONSUME(cmd); + + fd = open(path, O_RDWR); + if (fd == (-1)) { + perror("open"); + return EXIT_FAILURE; + } + + if (strcmp(cmd, "get") == 0) { + return do_get(argc, argv, fd); + } + + if (strcmp(cmd, "set") == 0) { + return do_set(argc, argv, fd); + } + + if (strcmp(cmd, "remove") == 0) { + return do_remove(argc, argv, fd); + } + + return EXIT_SUCCESS; +} diff --git a/tests/basic/metadisp/gfs-fsetxattr.c b/tests/basic/metadisp/gfs-fsetxattr.c new file mode 100644 index 00000000000..63578bc528f --- /dev/null +++ b/tests/basic/metadisp/gfs-fsetxattr.c @@ -0,0 +1,141 @@ +#include <glusterfs/api/glfs.h> +#include <glusterfs/api/glfs-handles.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +int gfapi = 1; + +int +main(int argc, char *argv[]) +{ + glfs_t *fs = NULL; + int ret = 0; + int i = 0; + glfs_fd_t *fd = NULL; + char *topdir = "topdir", *filename = "file1"; + char *buf = NULL; + char *logfile = NULL; + char *hostname = NULL; + char *basename = NULL; + char *dir1 = NULL, *dir2 = NULL, *filename1 = NULL, *filename2 = NULL; + struct stat sb = { + 0, + }; + + if (argc != 5) { + fprintf( + stderr, + "Expect following args %s <hostname> <Vol> <log file> <basename>\n", + argv[0]); + return -1; + } + + hostname = argv[1]; + logfile = argv[3]; + basename = argv[4]; + + fs = glfs_new(argv[2]); + if (!fs) { + fprintf(stderr, "glfs_new: returned NULL (%s)\n", strerror(errno)); + return -1; + } + + ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007); + if (ret < 0) { + fprintf(stderr, "glfs_set_volfile_server failed ret:%d (%s)\n", ret, + strerror(errno)); + return -1; + } + + ret = glfs_set_logging(fs, logfile, 7); + if (ret < 0) { + fprintf(stderr, "glfs_set_logging failed with ret: %d (%s)\n", ret, + strerror(errno)); + return -1; + } + + ret = glfs_init(fs); + if (ret < 0) { + fprintf(stderr, "glfs_init failed with ret: %d (%s)\n", ret, + strerror(errno)); + return -1; + } + + ret = asprintf(&dir1, "%s-dir", basename); + if (ret < 0) { + fprintf(stderr, "cannot construct filename (%s)", strerror(errno)); + return ret; + } + + ret = glfs_mkdir(fs, dir1, 0755); + if (ret < 0) { + fprintf(stderr, "mkdir(%s): %s\n", dir1, strerror(errno)); + return -1; + } + + fd = glfs_opendir(fs, dir1); + if (!fd) { + fprintf(stderr, "/: %s\n", strerror(errno)); + return -1; + } + + ret = glfs_fsetxattr(fd, "user.dirfattr", "fsetxattr", 9, 0); + if (ret < 0) { + fprintf(stderr, "fsetxattr(%s): %d (%s)\n", dir1, ret, strerror(errno)); + return -1; + } + + ret = glfs_closedir(fd); + if (ret < 0) { + fprintf(stderr, "glfs_closedir failed with ret: %d (%s)\n", ret, + strerror(errno)); + return -1; + } + + ret = asprintf(&filename1, "%s-file", basename); + if (ret < 0) { + fprintf(stderr, "cannot construct filename (%s)", strerror(errno)); + return ret; + } + + ret = asprintf(&filename2, "%s-file-renamed", basename); + if (ret < 0) { + fprintf(stderr, "cannot construct filename (%s)", strerror(errno)); + return ret; + } + + fd = glfs_creat(fs, filename1, O_RDWR, 0644); + if (!fd) { + fprintf(stderr, "%s: (%p) %s\n", filename1, fd, strerror(errno)); + return -1; + } + + ret = glfs_rename(fs, filename1, filename2); + if (ret < 0) { + fprintf(stderr, "glfs_rename failed with ret: %d (%s)\n", ret, + strerror(errno)); + return -1; + } + + ret = glfs_lstat(fs, filename2, &sb); + if (ret < 0) { + fprintf(stderr, "glfs_lstat failed with ret: %d (%s)\n", ret, + strerror(errno)); + return -1; + } + + ret = glfs_fsetxattr(fd, "user.filefattr", "fsetxattr", 9, 0); + if (ret < 0) { + fprintf(stderr, "fsetxattr(%s): %d (%s)\n", dir1, ret, strerror(errno)); + return -1; + } + + ret = glfs_close(fd); + if (ret < 0) { + fprintf(stderr, "glfs_close failed with ret: %d (%s)\n", ret, + strerror(errno)); + return -1; + } +} diff --git a/tests/basic/metadisp/metadisp.t b/tests/basic/metadisp/metadisp.t new file mode 100644 index 00000000000..894ffe07226 --- /dev/null +++ b/tests/basic/metadisp/metadisp.t @@ -0,0 +1,316 @@ +#!/usr/bin/env bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + + +# Considering `--enable-metadisp` is an option for `./configure`, +# which is disabled by default, this test will never pass regression. +# But to see the value of this test, run below after configuring +# with above option : +# `prove -vmfe '/bin/bash' tests/basic/metadisp/metadisp.t` + +#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST + +cleanup; + +TEST mkdir -p $B0/b0/{0,1} + +TEST setfattr -n trusted.glusterfs.volume-id -v 0xddab9eece7b64a95b07351a1f748f56f ${B0}/b0/0 +TEST setfattr -n trusted.glusterfs.volume-id -v 0xddab9eece7b64a95b07351a1f748f56f ${B0}/b0/1 + +TEST $GFS --volfile=$(dirname $0)/metadisp.vol --volfile-id=$V0 $M0; + +NUM_FILES=40 +TEST touch $M0/{1..${NUM_FILES}} + +# each drive should get 40 files +TEST [ $(dir -1 $B0/b0/0/ | wc -l) -eq $NUM_FILES ] +TEST [ $(dir -1 $B0/b0/1/ | wc -l) -eq $NUM_FILES ] + +# now write some data to a file +echo "hello" > $M0/3 +filename=$$ +echo "hello" > /tmp/metadisp-write-${filename} +checksum=$(md5sum /tmp/metadisp-write-${filename} | awk '{print $1}') +TEST [ "$(md5sum $M0/3 | awk '{print $1}')" == "$checksum" ] + +# check that the backend file exists on b1 +gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/b0/*/3)) +TEST [ $(dir -1 $B0/b0/1/$gfid | wc -l) -eq 1 ] + +# check that the backend file matches the frontend +TEST [ "$(md5sum $B0/b0/1/$gfid | awk '{print $1}')" == "$checksum" ] + +# delete the file +TEST rm $M0/3 + +# ensure the frontend and backend files are cleaned up +TEST ! -e $M0/3 +TEST ! [ stat $B0/b*/*/$gfid ] + +# Test TRUNCATE + WRITE flow +echo "hello" | tee $M0/4 +echo "goo" | tee $M0/4 +filename=$$ +echo "goo" | tee /tmp/metadisp-truncate-${filename} +checksum=$(md5sum /tmp/metadisp-truncate-${filename} | awk '{print $1}') +TEST [ "$(md5sum $M0/4 | awk '{print $1}')" == "$checksum" ] + +# Test mkdir + rmdir. +TEST mkdir $M0/rmdir_me +nfiles=$(ls -d $B0/b*/*/rmdir_me 2> /dev/null | wc -l) +TEST [ "$nfiles" = "1" ] +TEST rmdir $M0/rmdir_me +nfiles=$(ls -d $B0/b*/*/rmdir_me 2> /dev/null | wc -l) +TEST [ "$nfiles" = "0" ] + +# Test rename. +TEST touch $M0/rename_me +nfiles=$(ls $B0/b*/*/rename_me 2> /dev/null | wc -l) +TEST [ "$nfiles" = "1" ] +nfiles=$(ls $B0/b*/*/such_rename 2> /dev/null | wc -l) +TEST [ "$nfiles" = "0" ] +TEST mv $M0/rename_me $M0/such_rename +nfiles=$(ls $B0/b*/*/rename_me 2> /dev/null | wc -l) +TEST [ "$nfiles" = "0" ] +nfiles=$(ls $B0/b*/*/such_rename 2> /dev/null | wc -l) +TEST [ "$nfiles" = "1" ] + +# Test rename of a file that doesn't exist. +TEST ! mv $M0/does-not-exist $M0/neither-does-this + + +# cleanup all the other files. +TEST rm -v $M0/1 $M0/2 $M0/{4..${NUM_FILES}} +TEST rm $M0/such_rename +TEST [ $(ls /d/backends/b0/0/ | wc -l) -eq 0 ] +TEST [ $(ls /d/backends/b0/1/ | wc -l) -eq 0 ] + +# Test CREATE flow +NUM_FILES=40 +TEST touch $M0/{1..${NUM_FILES}} +TEST [ $(ls /d/backends/b0/0/ | wc -l) -eq $NUM_FILES ] +TEST [ $(ls /d/backends/b0/1/ | wc -l) -eq $NUM_FILES ] + +# Test UNLINK flow +# No drives should have any files +TEST rm -v $M0/{1..${NUM_FILES}} +TEST [ $(ls /d/backends/b0/0/ | wc -l) -eq 0 ] +TEST [ $(ls /d/backends/b0/1/ | wc -l) -eq 0 ] + +# Test CREATE + WRITE + READ flow +filename=$$ +dd if=/dev/urandom of=/tmp/${filename} bs=1M count=10 +checksum=$(md5sum /tmp/${filename} | awk '{print $1}') +TEST cp -v /tmp/${filename} $M0/1 +TEST cp -v /tmp/${filename} $M0/2 +TEST cp -v /tmp/${filename} $M0/3 +TEST cp -v /tmp/${filename} $M0/4 +TEST [ "$(md5sum $M0/1 | awk '{print $1}')" == "$checksum" ] +TEST [ "$(md5sum $M0/2 | awk '{print $1}')" == "$checksum" ] +TEST [ "$(md5sum $M0/3 | awk '{print $1}')" == "$checksum" ] +TEST [ "$(md5sum $M0/4 | awk '{print $1}')" == "$checksum" ] + +# Test TRUNCATE + WRITE flow +TEST dd if=/dev/zero of=$M0/1 bs=1M count=20 + +# Check that readdir stats the files properly and we get the correct sizes +TEST [ $(find $M0 -size +9M | wc -l) -eq 4 ]; + +# Test mkdir + rmdir. +TEST mkdir $M0/rmdir_me +nfiles=$(ls -d $B0/b*/*/rmdir_me 2> /dev/null | wc -l) +TEST [ "$nfiles" = "1" ] +TEST rmdir $M0/rmdir_me +nfiles=$(ls -d $B0/b*/*/rmdir_me 2> /dev/null | wc -l) +TEST [ "$nfiles" = "0" ] + +# Test rename. +# Still flaky, so disabled until it can be debugged. +TEST touch $M0/rename_me +nfiles=$(ls $B0/b*/*/rename_me 2> /dev/null | wc -l) +TEST [ "$nfiles" = "1" ] +nfiles=$(ls $B0/b*/*/such_rename 2> /dev/null | wc -l) +TEST [ "$nfiles" = "0" ] +TEST mv $M0/rename_me $M0/such_rename +nfiles=$(ls $B0/b*/*/rename_me 2> /dev/null | wc -l) +TEST [ "$nfiles" = "0" ] +nfiles=$(ls $B0/b*/*/such_rename 2> /dev/null | wc -l) +TEST [ "$nfiles" = "1" ] + +# Test rename of a file that doesn't exist. +TEST ! mv $M0/does-not-exist $M0/neither-does-this + +# Test rename over an existing file. +ok=yes +for i in $(seq 0 9); do + echo foo > $M0/src$i + echo bar > $M0/dst$i +done +for i in $(seq 0 9); do + mv $M0/src$i $M0/dst$i +done +for i in $(seq 0 9); do + nfiles=$(cat $B0/b0/*/dst$i | wc -l) + if [ "$nfiles" = "2" ]; then + echo "COLLISION on dst$i" + (ls -l $B0/b0/*/dst$i; cat $B0/b0/*/dst$i) | sed "/^/s// /" + ok=no + fi +done +EXPECT "yes" echo $ok + +# Test rename of a directory. +count_copies () { + ls -d $B0/b?/?/$1 2> /dev/null | wc -l +} +TEST mkdir $M0/foo_dir +EXPECT 1 count_copies foo_dir +EXPECT 0 count_copies bar_dir +TEST mv $M0/foo_dir $M0/bar_dir +EXPECT 0 count_copies foo_dir +EXPECT 1 count_copies bar_dir + +for x in $(seq 0 99); do + touch $M0/target$x + ln -s $M0/target$x $M0/link$x +done +on_0=$(ls $B0/b*/0/link* | wc -l) +on_1=$(ls $B0/b*/1/link* | wc -l) +TEST [ "$on_0" -eq 100 ] +TEST [ "$on_1" -eq 0 ] +TEST [ "$(ls -l $M0/link* | wc -l)" = 100 ] + +# Test (hard) link. +_test_hardlink () { + local b + local has_src + local has_dst + local src_inum + local dst_inum + touch $M0/hardsrc$1 + ln $M0/hardsrc$1 $M0/harddst$1 + for b in $B0/b{0}/{0,1}; do + [ -f $b/hardsrc$1 ]; has_src=$? + [ -f $b/harddst$1 ]; has_dst=$? + if [ "$has_src" != "$has_dst" ]; then + echo "MISSING $b/hardxxx$1 $has_src $has_dst" + return + fi + if [ "$has_src$has_dst" = "00" ]; then + src_inum=$(stat -c '%i' $b/hardsrc$1) + dst_inum=$(stat -c '%i' $b/harddst$1) + if [ "$dst_inum" != "$src_inum" ]; then + echo "MISMATCH $b/hardxx$i $src_inum $dst_inum" + return + fi + fi + done + echo "OK" +} + +test_hardlink () { + local result=$(_test_hardlink $*) + # [ "$result" = "OK" ] || echo $result > /dev/tty + echo $result +} + +# Do this multiple times to make sure colocation isn't a fluke. +EXPECT "OK" test_hardlink 0 +EXPECT "OK" test_hardlink 1 +EXPECT "OK" test_hardlink 2 +EXPECT "OK" test_hardlink 3 +EXPECT "OK" test_hardlink 4 +EXPECT "OK" test_hardlink 5 +EXPECT "OK" test_hardlink 6 +EXPECT "OK" test_hardlink 7 +EXPECT "OK" test_hardlink 8 +EXPECT "OK" test_hardlink 9 + +# Test remove hardlink source. ensure deleting one file +# doesn't delete the data unless link-count is 1 +TEST mkdir $M0/hardlink +TEST touch $M0/hardlink/fileA +echo "data" >> $M0/hardlink/fileA +checksum=$(md5sum $M0/hardlink/fileA | awk '{print $1}') +TEST ln $M0/hardlink/fileA $M0/hardlink/fileB +TEST [ $(dir -1 $M0/hardlink/ | wc -l) -eq 2 ] +TEST rm $M0/hardlink/fileA +TEST [ $(dir -1 $M0/hardlink/ | wc -l) -eq 1 ] +TEST [ "$(md5sum $M0/hardlink/fileB | awk '{print $1}')" == "$checksum" ] + +# +# FIXME: statfs values look ok but the test is bad +# +# Test statfs. If we're doing it right, the numbers for the mountpoint should be +# double those for the brick filesystem times the number of bricks, +# but unless we're on a completely idle +# system (which never happens) the numbers can change even while this function +# runs and that would trip us up. Do a sloppy comparison to deal with that. +#compare_fields () { +# val1=$(df $1 | grep / | awk "{print \$$3}") +# val2=$(df $2 | grep / | awk "{print \$$3}") +# [ "$val2" -gt "$(((val1/(29/10))*19/10))" -a "$val2" -lt "$(((val1/(31/10))*21/10))" ] +#} + +#brick_df=$(df $B0 | grep /) +#mount_df=$(df $M0 | grep /) +#TEST compare_fields $B0 $M0 2 # Total blocks +#TEST compare_fields $B0 $M0 3 # Used +#TEST compare_fields $B0 $M0 4 # Available + +# Test removexattr. +#RXATTR_FILE=$(get_file_not_on_disk0 rxtest) +#TEST setfattr -n user.foo -v bar $M0/$RXATTR_FILE +#TEST getfattr -n user.foo $B0/b0/1/$RXATTR_FILE +#TEST setfattr -x user.foo $M0/$RXATTR_FILE +#TEST ! getfattr -n user.foo $B0/b0/1/$RXATTR_FILE + +# Test fsyncdir. We can't really test whether it's doing the right thing, +# but we can test that it doesn't fail and we can hand-check that it's calling +# down to all of the disks instead of just one. +# +# P.S. There's no fsyncdir test in the rest of Gluster, so who even knows if +# other translators are handling it correctly? + +#FSYNCDIR_EXE=$(dirname $0)/fsyncdir +#build_tester ${FSYNCDIR_EXE}.c +#TEST touch $M0/fsyncdir_src +#TEST $FSYNCDIR_EXE $M0 $M0/fsyncdir_src $M0/fsyncdir_dst +#TEST rm -f $FSYNCDIR_EXE + +# Test fsetxattr, fgetxattr, fremovexattr (in that order). +FXATTR_FILE=$M0/fxfile1 +TEST touch $FXATTR_FILE +FXATTR_EXE=$(dirname $0)/fxattr +build_tester ${FXATTR_EXE}.c +TEST ! getfattr -n user.fxtest $FXATTR_FILE +TEST $FXATTR_EXE $FXATTR_FILE set value1 +TEST getfattr -n user.fxtest $FXATTR_FILE +TEST setfattr -n user.fxtest -v value2 $FXATTR_FILE +TEST $FXATTR_EXE $FXATTR_FILE get value2 +TEST $FXATTR_EXE $FXATTR_FILE remove +TEST ! getfattr -n user.fxtest $FXATTR_FILE +TEST rm -f $FXATTR_EXE + +# Test ftruncate +FTRUNCATE_EXE=$(dirname $0)/ftruncate +build_tester ${FTRUNCATE_EXE}.c +FTRUNCATE_FILE=$M0/ftfile1 +TEST dd if=/dev/urandom of=$FTRUNCATE_FILE count=1 bs=1MB +TEST $FTRUNCATE_EXE $FTRUNCATE_FILE +#gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/b0/*/ftfile1)) + +# Test fallocate, discard, zerofill. Actually we don't so much check that these +# *work* as that they don't throw any errors (especially ENOENT because the +# file's not on disk zero). +FALLOC_FILE=fatest1 +TEST touch $M0/$FALLOC_FILE +TEST fallocate -l $((4096*5)) $M0/$FALLOC_FILE +TEST fallocate -p -o 4096 -l 4096 $M0/$FALLOC_FILE +# This actually fails with "operation not supported" on most filesystems, so +# don't leave it enabled except to test changes. +#TEST fallocate -z -o $((4096*3)) -l 4096 $M0/$FALLOC_FILE + +#cleanup; diff --git a/tests/basic/metadisp/metadisp.vol b/tests/basic/metadisp/metadisp.vol new file mode 100644 index 00000000000..58ae2f6f2a8 --- /dev/null +++ b/tests/basic/metadisp/metadisp.vol @@ -0,0 +1,14 @@ +volume posix-0 + type storage/posix + option directory /d/backends/b0/0 +end-volume + +volume posix-1 + type storage/posix + option directory /d/backends/b0/1 +end-volume + +volume metadisp-0 + type features/metadisp + subvolumes posix-0 posix-1 +end-volume diff --git a/xlators/features/Makefile.am b/xlators/features/Makefile.am index 194634b003d..c57897f11ea 100644 --- a/xlators/features/Makefile.am +++ b/xlators/features/Makefile.am @@ -2,9 +2,13 @@ if BUILD_CLOUDSYNC CLOUDSYNC_DIR = cloudsync endif +if BUILD_METADISP + METADISP_DIR = metadisp +endif + SUBDIRS = locks quota read-only quiesce marker index barrier arbiter upcall \ compress changelog gfid-access snapview-client snapview-server trash \ shard bit-rot leases selinux sdfs namespace $(CLOUDSYNC_DIR) thin-arbiter \ - utime + utime $(METADISP_DIR) CLEANFILES = diff --git a/xlators/features/metadisp/Makefile.am b/xlators/features/metadisp/Makefile.am new file mode 100644 index 00000000000..a985f42a877 --- /dev/null +++ b/xlators/features/metadisp/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/features/metadisp/src/Makefile.am b/xlators/features/metadisp/src/Makefile.am new file mode 100644 index 00000000000..1520ad8c424 --- /dev/null +++ b/xlators/features/metadisp/src/Makefile.am @@ -0,0 +1,38 @@ +noinst_PYTHON = gen-fops.py + +EXTRA_DIST = fops-tmpl.c + +xlator_LTLIBRARIES = metadisp.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features + +nodist_metadisp_la_SOURCES = fops.c + +BUILT_SOURCES = fops.c + +metadisp_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) + +metadisp_la_SOURCES = metadisp.c \ + metadisp-unlink.c \ + metadisp-stat.c \ + metadisp-lookup.c \ + metadisp-readdir.c \ + metadisp-create.c \ + metadisp-open.c \ + metadisp-fsync.c \ + metadisp-setattr.c \ + backend.c + +metadisp_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS = metadisp.h metadisp-fops.h + +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src + +AM_CFLAGS = -Wall $(GF_CFLAGS) + +fops.c: fops-tmpl.c $(top_srcdir)/libglusterfs/src/generator.py gen-fops.py + PYTHONPATH=$(top_srcdir)/libglusterfs/src \ + $(PYTHON) $(srcdir)/gen-fops.py $(srcdir)/fops-tmpl.c > $@ + +CLEANFILES = $(nodist_metadisp_la_SOURCES) diff --git a/xlators/features/metadisp/src/backend.c b/xlators/features/metadisp/src/backend.c new file mode 100644 index 00000000000..ee2c25bfaa7 --- /dev/null +++ b/xlators/features/metadisp/src/backend.c @@ -0,0 +1,45 @@ +#define GFID_STR_LEN 37 + +#include "metadisp.h" + +/* + * backend.c + * + * functions responsible for converting user-facing paths to backend-style + * "/$GFID" paths. + */ + +int32_t +build_backend_loc(uuid_t gfid, loc_t *src_loc, loc_t *dst_loc) +{ + static uuid_t root = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; + char gfid_buf[GFID_STR_LEN + 1] = { + 0, + }; + char *path = NULL; + + GF_VALIDATE_OR_GOTO("metadisp", src_loc, out); + GF_VALIDATE_OR_GOTO("metadisp", dst_loc, out); + + loc_copy(dst_loc, src_loc); + memcpy(dst_loc->pargfid, root, sizeof(root)); + GF_FREE((char *)dst_loc->path); // we are overwriting path so nuke + // whatever loc_copy gave us + + uuid_utoa_r(gfid, gfid_buf); + + path = GF_CALLOC(GFID_STR_LEN + 1, sizeof(char), + gf_common_mt_char); // freed via loc_wipe + + path[0] = '/'; + strncpy(path + 1, gfid_buf, GFID_STR_LEN); + path[GFID_STR_LEN] = 0; + dst_loc->path = path; + if (src_loc->name) + dst_loc->name = strrchr(dst_loc->path, '/'); + if (dst_loc->name) + dst_loc->name++; + return 0; +out: + return -1; +} diff --git a/xlators/features/metadisp/src/fops-tmpl.c b/xlators/features/metadisp/src/fops-tmpl.c new file mode 100644 index 00000000000..4385b7dd5b7 --- /dev/null +++ b/xlators/features/metadisp/src/fops-tmpl.c @@ -0,0 +1,10 @@ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include <glusterfs/xlator.h> +#include "metadisp.h" +#include "metadisp-fops.h" + +#pragma generate diff --git a/xlators/features/metadisp/src/gen-fops.py b/xlators/features/metadisp/src/gen-fops.py new file mode 100644 index 00000000000..8b5e120fdec --- /dev/null +++ b/xlators/features/metadisp/src/gen-fops.py @@ -0,0 +1,160 @@ +#!/usr/bin/python + +import sys +from generator import fop_subs, generate + +FN_METADATA_CHILD_GENERIC = """ +int32_t +metadisp_@NAME@ (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) +{ + METADISP_TRACE("@NAME@ metadata"); + STACK_WIND (frame, default_@NAME@_cbk, + METADATA_CHILD(this), METADATA_CHILD(this)->fops->@NAME@, + @SHORT_ARGS@); + return 0; +} +""" + +FN_GENERIC_TEMPLATE = """ +int32_t +metadisp_@NAME@ (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) +{ + METADISP_TRACE("@NAME@ generic"); + STACK_WIND (frame, default_@NAME@_cbk, + DATA_CHILD(this), DATA_CHILD(this)->fops->@NAME@, + @SHORT_ARGS@); + return 0; +} +""" + +FN_DATAFD_TEMPLATE = """ +int32_t +metadisp_@NAME@ (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) +{ + METADISP_TRACE("@NAME@ datafd"); + xlator_t *child = NULL; + child = DATA_CHILD(this); + STACK_WIND (frame, default_@NAME@_cbk, + child, child->fops->@NAME@, + @SHORT_ARGS@); + return 0; +} +""" + +FN_DATALOC_TEMPLATE = """ +int32_t +metadisp_@NAME@ (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) +{ + METADISP_TRACE("@NAME@ dataloc"); + loc_t backend_loc = { + 0, + }; + if (build_backend_loc(loc->gfid, loc, &backend_loc)) { + goto unwind; + } + xlator_t *child = NULL; + child = DATA_CHILD(this); + STACK_WIND (frame, default_@NAME@_cbk, + child, child->fops->@NAME@, + @SHORT_ARGS@); + return 0; + +unwind: + STACK_UNWIND_STRICT(lookup, frame, -1, EINVAL, NULL, NULL, NULL, NULL); + return 0; +} +""" + +FOPS_LINE_TEMPLATE = "\t.@NAME@ = metadisp_@NAME@," + +skipped = [ + "readdir", + "readdirp", + "lookup", + "fsync", + "stat", + "open", + "create", + "unlink", + "setattr", + # TODO: implement "inodelk", +] + + +def gen_fops(): + done = skipped + + # + # these are fops that wind to the DATA_CHILD + # + # NOTE: re-written in order from google doc: + # https://docs.google.com/document/d/1KEwVtSNvDhs4qb63gWx2ulCp5GJjge77NGJk4p_Ms4Q + for name in [ + "writev", + "readv", + "ftruncate", + "zerofill", + "discard", + "seek", + "fstat", + ]: + done = done + [name] + print(generate(FN_DATAFD_TEMPLATE, name, fop_subs)) + + for name in ["truncate"]: + done = done + [name] + print(generate(FN_DATALOC_TEMPLATE, name, fop_subs)) + + # these are fops that operate solely on dentries, folders, + # or extended attributes. Therefore, they must always + # wind to METADATA_CHILD and should never perform + # any path rewriting + # + # NOTE: re-written in order from google doc: + # https://docs.google.com/document/d/1KEwVtSNvDhs4qb63gWx2ulCp5GJjge77NGJk4p_Ms4Q + for name in [ + "mkdir", + "symlink", + "link", + "rename", + "mknod", + "opendir", + # "readdir, # special-cased + # "readdirp, # special-cased + "fsyncdir", + # "setattr", # special-cased + "readlink", + "fentrylk", + "access", + # TODO: these wind to both, + # data for backend-attributes and metadata for the rest + "xattrop", + "setxattr", + "getxattr", + "removexattr", + "fgetxattr", + "fsetxattr", + "fremovexattr", + ]: + + done = done + [name] + print(generate(FN_METADATA_CHILD_GENERIC, name, fop_subs)) + + print("struct xlator_fops fops = {") + for name in done: + print(generate(FOPS_LINE_TEMPLATE, name, fop_subs)) + + print("};") + + +for l in open(sys.argv[1], "r").readlines(): + if l.find("#pragma generate") != -1: + print("/* BEGIN GENERATED CODE - DO NOT MODIFY */") + gen_fops() + print("/* END GENERATED CODE */") + else: + print(l[:-1]) diff --git a/xlators/features/metadisp/src/metadisp-create.c b/xlators/features/metadisp/src/metadisp-create.c new file mode 100644 index 00000000000..f8c9798dd59 --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-create.c @@ -0,0 +1,101 @@ +#include "metadisp.h" +#include <glusterfs/call-stub.h> + +/** + * Create, like stat, is a two-step process. We send a create + * to the METADATA_CHILD, then send another create to the DATA_CHILD. + * + * We do the metadata child first to ensure that the ACLs are enforced. + */ + +int32_t +metadisp_create_dentry_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, + inode_t *inode, struct iatt *buf, + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) +{ + STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf, + preparent, postparent, xdata); + return 0; +} + +int32_t +metadisp_create_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + int32_t flags, mode_t mode, mode_t umask, fd_t *fd, + dict_t *xdata) +{ + // create the backend data inode + STACK_WIND(frame, metadisp_create_dentry_cbk, DATA_CHILD(this), + DATA_CHILD(this)->fops->create, loc, flags, mode, umask, fd, + xdata); + return 0; +} + +int32_t +metadisp_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + METADISP_TRACE("%d %d", op_ret, op_errno); + call_stub_t *stub = cookie; + if (op_ret != 0) { + STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf, + preparent, postparent, xdata); + return 0; + } + + if (stub == NULL) { + goto unwind; + } + + if (stub->poison) { + call_stub_destroy(stub); + return 0; + } + + call_resume(stub); + return 0; + +unwind: + STACK_UNWIND_STRICT(create, frame, -1, EINVAL, NULL, NULL, NULL, NULL, NULL, + NULL); + return 0; +} + +int32_t +metadisp_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) +{ + METADISP_TRACE("."); + + loc_t backend_loc = { + 0, + }; + call_stub_t *stub = NULL; + uuid_t *gfid_req = NULL; + + RESOLVE_GFID_REQ(xdata, gfid_req, out); + + if (build_backend_loc(*gfid_req, loc, &backend_loc)) { + goto unwind; + } + + frame->local = loc; + + stub = fop_create_stub(frame, metadisp_create_resume, &backend_loc, flags, + mode, umask, fd, xdata); + + STACK_WIND_COOKIE(frame, metadisp_create_cbk, stub, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->create, loc, flags, mode, + umask, fd, xdata); + return 0; + +unwind: + STACK_UNWIND_STRICT(create, frame, -1, EINVAL, NULL, NULL, NULL, NULL, NULL, + NULL); + return 0; +out: + return -1; +} diff --git a/xlators/features/metadisp/src/metadisp-fops.h b/xlators/features/metadisp/src/metadisp-fops.h new file mode 100644 index 00000000000..56dd427cf34 --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-fops.h @@ -0,0 +1,51 @@ +#ifndef GF_METADISP_FOPS_H_ +#define GF_METADISP_FOPS_H_ + +#include <glusterfs/xlator.h> +#include <glusterfs/dict.h> +#include <glusterfs/glusterfs.h> + +#include <sys/types.h> + +/* fops in here are defined in their own file. Every other fop is just defined + * inline of fops.c */ + +int +metadisp_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *xdata); + +int +metadisp_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *dict); + +int +metadisp_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata); + +int +metadisp_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata); + +int +metadisp_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + fd_t *fd, dict_t *xdata); + +int +metadisp_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata); + +int +metadisp_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, + loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata); + +int +metadisp_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + dict_t *xdata); + +int +metadisp_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, + dict_t *xdata); + +int +metadisp_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, dict_t *xdata); + +#endif diff --git a/xlators/features/metadisp/src/metadisp-fsync.c b/xlators/features/metadisp/src/metadisp-fsync.c new file mode 100644 index 00000000000..2e46fa84eac --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-fsync.c @@ -0,0 +1,54 @@ + +#include "metadisp.h" +#include <glusterfs/call-stub.h> + +int32_t +metadisp_fsync_resume(call_frame_t *frame, xlator_t *this, fd_t *fd, + int32_t flags, dict_t *xdata) +{ + STACK_WIND(frame, default_fsync_cbk, DATA_CHILD(this), + DATA_CHILD(this)->fops->fsync, fd, flags, xdata); + return 0; +} + +int32_t +metadisp_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + call_stub_t *stub = NULL; + if (cookie) { + stub = cookie; + } + + if (op_ret != 0) { + goto unwind; + } + + if (stub->poison) { + call_stub_destroy(stub); + stub = NULL; + return 0; + } + + call_resume(stub); + return 0; + +unwind: + if (stub) { + call_stub_destroy(stub); + } + STACK_UNWIND_STRICT(fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata); + return 0; +} + +int32_t +metadisp_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + dict_t *xdata) +{ + call_stub_t *stub = NULL; + stub = fop_fsync_stub(frame, metadisp_fsync_resume, fd, flags, xdata); + STACK_WIND_COOKIE(frame, metadisp_fsync_cbk, stub, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->fsync, fd, flags, xdata); + return 0; +} diff --git a/xlators/features/metadisp/src/metadisp-lookup.c b/xlators/features/metadisp/src/metadisp-lookup.c new file mode 100644 index 00000000000..27d90c9f746 --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-lookup.c @@ -0,0 +1,90 @@ +#include "metadisp.h" +#include <glusterfs/call-stub.h> + +/** + * Lookup, like stat, is a two-step process for grabbing the metadata details + * as well as the data details. + */ + +int32_t +metadisp_backend_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, + struct iatt *postparent) +{ + METADISP_TRACE("backend_lookup_cbk"); + if (op_errno == ENOENT) { + op_errno = ENODATA; + op_ret = -1; + } + STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, xdata, + postparent); + return 0; +} + +int32_t +metadisp_backend_lookup_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + dict_t *xdata) +{ + METADISP_TRACE("backend_lookup_resume"); + loc_t backend_loc = { + 0, + }; + if (build_backend_loc(loc->gfid, loc, &backend_loc)) { + goto unwind; + } + + STACK_WIND(frame, metadisp_backend_lookup_cbk, DATA_CHILD(this), + DATA_CHILD(this)->fops->lookup, &backend_loc, xdata); + return 0; + +unwind: + STACK_UNWIND_STRICT(lookup, frame, -1, EINVAL, NULL, NULL, NULL, NULL); + return 0; +} + +int32_t +metadisp_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, struct iatt *postparent) +{ + METADISP_TRACE("%d %d", op_ret, op_errno); + call_stub_t *stub = NULL; + stub = cookie; + + if (op_ret != 0) { + goto unwind; + } + + if (!IA_ISREG(buf->ia_type)) { + goto unwind; + } else if (!stub) { + op_errno = EINVAL; + goto unwind; + } + + METADISP_TRACE("resuming stub"); + + // memcpy(stub->args.loc.gfid, buf->ia_gfid, sizeof(uuid_t)); + call_resume(stub); + return 0; +unwind: + METADISP_TRACE("unwinding %d %d", op_ret, op_errno); + STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, xdata, + postparent); + if (stub) { + call_stub_destroy(stub); + } + return 0; +} + +int32_t +metadisp_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ + METADISP_TRACE("lookup"); + call_stub_t *stub = NULL; + stub = fop_lookup_stub(frame, metadisp_backend_lookup_resume, loc, xdata); + STACK_WIND_COOKIE(frame, metadisp_lookup_cbk, stub, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->lookup, loc, xdata); + return 0; +} diff --git a/xlators/features/metadisp/src/metadisp-open.c b/xlators/features/metadisp/src/metadisp-open.c new file mode 100644 index 00000000000..64814afe636 --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-open.c @@ -0,0 +1,70 @@ +#include <glusterfs/call-stub.h> +#include "metadisp.h" + +int32_t +metadisp_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) +{ + METADISP_TRACE("got open results %d %d", op_ret, op_errno); + + call_stub_t *stub = NULL; + if (cookie) { + stub = cookie; + } + + if (op_ret != 0) { + goto unwind; + } + + if (!stub) { + goto unwind; + } + + if (stub->poison) { + call_stub_destroy(stub); + stub = NULL; + return 0; + } + + call_resume(stub); + return 0; + +unwind: + if (stub) { + call_stub_destroy(stub); + } + STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, xdata); + return 0; +} + +int32_t +metadisp_open_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + int32_t flags, fd_t *fd, dict_t *xdata) +{ + STACK_WIND_COOKIE(frame, metadisp_open_cbk, NULL, DATA_CHILD(this), + DATA_CHILD(this)->fops->open, loc, flags, fd, xdata); + return 0; +} + +int32_t +metadisp_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + fd_t *fd, dict_t *xdata) +{ + call_stub_t *stub = NULL; + loc_t backend_loc = { + 0, + }; + + if (build_backend_loc(loc->gfid, loc, &backend_loc)) { + goto unwind; + } + + stub = fop_open_stub(frame, metadisp_open_resume, &backend_loc, flags, fd, + xdata); + STACK_WIND_COOKIE(frame, metadisp_open_cbk, stub, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->open, loc, flags, fd, xdata); + return 0; +unwind: + STACK_UNWIND_STRICT(open, frame, -1, EINVAL, NULL, NULL); + return 0; +} diff --git a/xlators/features/metadisp/src/metadisp-readdir.c b/xlators/features/metadisp/src/metadisp-readdir.c new file mode 100644 index 00000000000..5f840b1e88f --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-readdir.c @@ -0,0 +1,65 @@ +#include "metadisp.h" + +/** + * With a change to the posix xlator, readdir and readdirp are shockingly + * simple. + * + * The issue with separating the backend data of the files + * with the metadata is that readdirs must now read from multiple sources + * to coalesce the directory entries. + * + * The way we do this is to tell the METADATA_CHILD that when it's + * running readdirp, each file entry should have a stat wound to + * 'stat-source-of-truth'. + * + * see metadisp_stat for how it handles winds _from_posix. + */ + +int32_t +metadisp_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *xdata) +{ + METADISP_TRACE("."); + /* + * Always use readdirp, even if the original was readdir. Why? Because NFS. + * There are multiple translations between Gluster, UNIX, and NFS stat + * structures in that path. One of them uses the type etc. from the stat + * structure, which is only filled in by readdirp. If we use readdir, the + * entries do actually go all the way back to the client and are visible in + * getdents, but then the readdir throws them away because of the + * uninitialized type. + */ + GF_UNUSED int32_t ret; + if (!xdata) { + xdata = dict_new(); + } + + // ret = dict_set_int32 (xdata, "list-xattr", 1); + + // I'm my own source of truth! + ret = dict_set_static_ptr(xdata, "stat-source-of-truth", (void *)this); + + STACK_WIND(frame, default_readdirp_cbk, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->readdirp, fd, size, off, xdata); + + return 0; +} + +int32_t +metadisp_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *xdata) +{ + METADISP_TRACE("."); + if (!xdata) { + xdata = dict_new(); + } + GF_UNUSED int32_t ret; + // ret = dict_set_int32 (xdata, "list-xattr", 1); + + // I'm my own source of truth! + ret = dict_set_static_ptr(xdata, "stat-source-of-truth", (void *)this); + + STACK_WIND(frame, default_readdirp_cbk, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->readdirp, fd, size, off, xdata); + return 0; +} diff --git a/xlators/features/metadisp/src/metadisp-setattr.c b/xlators/features/metadisp/src/metadisp-setattr.c new file mode 100644 index 00000000000..6991cf644f3 --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-setattr.c @@ -0,0 +1,90 @@ +#include "metadisp.h" +#include <glusterfs/call-stub.h> + +int32_t +metadisp_backend_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *statpre, struct iatt *statpost, + dict_t *xdata) + +{ + METADISP_TRACE("backend_setattr_cbk"); + if (op_errno == ENOENT) { + op_errno = ENODATA; + op_ret = -1; + } + STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, statpre, statpost, + xdata); + return 0; +} + +int32_t +metadisp_backend_setattr_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, + dict_t *xdata) + +{ + METADISP_TRACE("backend_setattr_resume"); + loc_t backend_loc = { + 0, + }; + if (build_backend_loc(loc->gfid, loc, &backend_loc)) { + goto unwind; + } + + STACK_WIND(frame, metadisp_backend_setattr_cbk, DATA_CHILD(this), + DATA_CHILD(this)->fops->setattr, &backend_loc, stbuf, valid, + xdata); + return 0; + +unwind: + STACK_UNWIND_STRICT(setattr, frame, -1, EINVAL, NULL, NULL, NULL); + return 0; +} + +int32_t +metadisp_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *statpre, + struct iatt *statpost, dict_t *xdata) +{ + METADISP_TRACE("%d %d", op_ret, op_errno); + call_stub_t *stub = NULL; + stub = cookie; + + if (op_ret != 0) { + goto unwind; + } + + if (!IA_ISREG(statpost->ia_type)) { + goto unwind; + } else if (!stub) { + op_errno = EINVAL; + goto unwind; + } + + METADISP_TRACE("resuming stub"); + call_resume(stub); + return 0; +unwind: + METADISP_TRACE("unwinding %d %d", op_ret, op_errno); + STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, statpre, statpost, + xdata); + if (stub) { + call_stub_destroy(stub); + } + return 0; +} + +int32_t +metadisp_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, dict_t *xdata) +{ + METADISP_TRACE("setattr"); + call_stub_t *stub = NULL; + stub = fop_setattr_stub(frame, metadisp_backend_setattr_resume, loc, stbuf, + valid, xdata); + STACK_WIND_COOKIE(frame, metadisp_setattr_cbk, stub, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->setattr, loc, stbuf, valid, + xdata); + return 0; +} diff --git a/xlators/features/metadisp/src/metadisp-stat.c b/xlators/features/metadisp/src/metadisp-stat.c new file mode 100644 index 00000000000..b06d0dbcddd --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-stat.c @@ -0,0 +1,124 @@ +#include "metadisp.h" +#include <glusterfs/call-stub.h> + +/** + * The stat flow in METADISP is complicated because we must + * do ensure a few things: + * 1. stat, on the path within the metadata layer, + * MUST get the backend FD of the data layer. + * --- we wind to the metadata layer, then the data layer. + * + * 2. the metadata layer MUST be able to ask the data + * layer for stat information. + * --- this is 'syncop-internal-from-posix' + * + * 3. when the metadata exists BUT the data is missing, + * we MUST mark the backend file as bad and heal it. + */ + +int32_t +metadisp_stat_backend_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + dict_t *xdata) +{ + METADISP_TRACE("got backend stat results %d %d", op_ret, op_errno); + if (op_errno == ENOENT) { + STACK_UNWIND_STRICT(open, frame, -1, ENODATA, NULL, NULL); + return 0; + } + STACK_UNWIND_STRICT(stat, frame, op_ret, op_errno, buf, xdata); + return 0; +} + +int32_t +metadisp_stat_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + dict_t *xdata) +{ + METADISP_TRACE("winding stat to path %s", loc->path); + if (gf_uuid_is_null(loc->gfid)) { + METADISP_TRACE("bad object, sending EUCLEAN"); + STACK_UNWIND_STRICT(open, frame, -1, EUCLEAN, NULL, NULL); + return 0; + } + + STACK_WIND(frame, metadisp_stat_backend_cbk, SECOND_CHILD(this), + SECOND_CHILD(this)->fops->stat, loc, xdata); + return 0; +} + +int32_t +metadisp_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + dict_t *xdata) +{ + call_stub_t *stub = NULL; + + METADISP_TRACE("got stat results %d %d", op_ret, op_errno); + + if (cookie) { + stub = cookie; + } + + if (op_ret != 0) { + goto unwind; + } + + // only use the stub for the files + if (!IA_ISREG(buf->ia_type)) { + goto unwind; + } + + if (stub->poison) { + call_stub_destroy(stub); + stub = NULL; + return 0; + } + + call_resume(stub); + return 0; + +unwind: + if (stub) { + call_stub_destroy(stub); + } + STACK_UNWIND_STRICT(stat, frame, op_ret, op_errno, buf, xdata); + return 0; +} + +int32_t +metadisp_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ + call_stub_t *stub = NULL; + int32_t ret = 0; + loc_t backend_loc = { + 0, + }; + METADISP_FILTER_ROOT(stat, loc, xdata); + + if (build_backend_loc(loc->gfid, loc, &backend_loc)) { + goto unwind; + } + + if (dict_get_int32(xdata, "syncop-internal-from-posix", &ret) == 0) { + // if we've just been sent a stat from posix, then we know + // that we must send down a stat for a file to the second child. + // + // that means we can skip the stat for the first child and just + // send to the data disk. + METADISP_TRACE("got syncop-internal-from-posix"); + STACK_WIND(frame, default_stat_cbk, DATA_CHILD(this), + DATA_CHILD(this)->fops->stat, &backend_loc, xdata); + return 0; + } + + // we do not know if the request is for a file, folder, etc. wind + // to first child to find out. + stub = fop_stat_stub(frame, metadisp_stat_resume, &backend_loc, xdata); + METADISP_TRACE("winding stat to first child %s", loc->path); + STACK_WIND_COOKIE(frame, metadisp_stat_cbk, stub, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->stat, loc, xdata); + return 0; +unwind: + STACK_UNWIND_STRICT(stat, frame, -1, EINVAL, NULL, NULL); + return 0; +} diff --git a/xlators/features/metadisp/src/metadisp-unlink.c b/xlators/features/metadisp/src/metadisp-unlink.c new file mode 100644 index 00000000000..1f6a8eb35ce --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-unlink.c @@ -0,0 +1,160 @@ + +#include "metadisp.h" +#include <glusterfs/call-stub.h> + +/** + * The unlink flow in metadisp is complicated because we must + * do ensure that UNLINK causes both the metadata objects + * to get removed and the data objects to get removed. + */ + +int32_t +metadisp_unlink_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + int xflag, dict_t *xdata) +{ + METADISP_TRACE("winding backend unlink to path %s", loc->path); + STACK_WIND(frame, default_unlink_cbk, DATA_CHILD(this), + DATA_CHILD(this)->fops->unlink, loc, xflag, xdata); + return 0; +} + +int32_t +metadisp_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + METADISP_TRACE(". %d %d", op_ret, op_errno); + + int ret = 0; + call_stub_t *stub = NULL; + int nlink = 0; + + if (cookie) { + stub = cookie; + } + + if (op_ret != 0) { + goto unwind; + } + + if (stub->poison) { + call_stub_destroy(stub); + stub = NULL; + return 0; + } + + ret = dict_get_uint32(xdata, GF_RESPONSE_LINK_COUNT_XDATA, &nlink); + if (ret != 0) { + op_errno = EINVAL; + op_ret = -1; + goto unwind; + } + METADISP_TRACE("frontend hardlink count %d %d", ret, nlink); + if (nlink > 1) { + goto unwind; + } + + call_resume(stub); + return 0; + +unwind: + if (stub) { + call_stub_destroy(stub); + } + STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, preparent, postparent, + xdata); + return 0; +} + +int32_t +metadisp_unlink_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, + struct iatt *postparent) +{ + call_stub_t *stub = NULL; + + if (cookie) { + stub = cookie; + } + + if (op_ret != 0) { + goto unwind; + } + + // fail fast on empty gfid so we don't loop forever + if (gf_uuid_is_null(buf->ia_gfid)) { + op_ret = -1; + op_errno = ENODATA; + goto unwind; + } + + // fill gfid since the stub is incomplete + memcpy(stub->args.loc.gfid, buf->ia_gfid, sizeof(uuid_t)); + memcpy(stub->args.loc.pargfid, postparent->ia_gfid, sizeof(uuid_t)); + + if (stub->poison) { + call_stub_destroy(stub); + stub = NULL; + return 0; + } + + call_resume(stub); + return 0; + +unwind: + if (stub) { + call_stub_destroy(stub); + } + STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, NULL, NULL, NULL); + return 0; +} + +int32_t +metadisp_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, + dict_t *xdata) +{ + call_stub_t *stub = NULL; + loc_t backend_loc = { + 0, + }; + + if (gf_uuid_is_null(loc->gfid)) { + METADISP_TRACE("winding lookup for unlink to path %s", loc->path); + + // loop back to ourselves after a lookup + stub = fop_unlink_stub(frame, metadisp_unlink, loc, xflag, xdata); + STACK_WIND_COOKIE(frame, metadisp_unlink_lookup_cbk, stub, + METADATA_CHILD(this), + METADATA_CHILD(this)->fops->lookup, loc, xdata); + return 0; + } + + if (build_backend_loc(loc->gfid, loc, &backend_loc)) { + goto unwind; + } + + // + // ensure we get the link count on the unlink response, so we can + // account for hardlinks before winding to the backend. + // NOTE: + // multiple xlators use GF_REQUEST_LINK_COUNT_XDATA. confirmation + // is needed to ensure that multiple requests will work in the same + // xlator stack. + // + if (!xdata) { + xdata = dict_new(); + } + dict_set_int32(xdata, GF_REQUEST_LINK_COUNT_XDATA, 1); + + METADISP_TRACE("winding frontend unlink to path %s", loc->path); + stub = fop_unlink_stub(frame, metadisp_unlink_resume, &backend_loc, xflag, + xdata); + + STACK_WIND_COOKIE(frame, metadisp_unlink_cbk, stub, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->unlink, loc, xflag, xdata); + return 0; +unwind: + STACK_UNWIND_STRICT(unlink, frame, -1, EINVAL, NULL, NULL, NULL); + return 0; +} diff --git a/xlators/features/metadisp/src/metadisp.c b/xlators/features/metadisp/src/metadisp.c new file mode 100644 index 00000000000..3c8f150cebc --- /dev/null +++ b/xlators/features/metadisp/src/metadisp.c @@ -0,0 +1,46 @@ +#include <glusterfs/call-stub.h> + +#include "metadisp.h" +#include "metadisp-fops.h" + +int32_t +init(xlator_t *this) +{ + if (!this->children) { + gf_log(this->name, GF_LOG_ERROR, + "not configured with children. exiting"); + return -1; + } + + if (!this->parents) { + gf_log(this->name, GF_LOG_WARNING, "dangling volume. check volfile "); + } + + return 0; +} + +void +fini(xlator_t *this) +{ + return; +} + +/* defined in fops.c */ +struct xlator_fops fops; + +struct xlator_cbks cbks = {}; + +struct volume_options options[] = { + {.key = {NULL}}, +}; + +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .fops = &fops, + .cbks = &cbks, + .options = options, + .op_version = {1}, + .identifier = "metadisp", + .category = GF_EXPERIMENTAL, +}; diff --git a/xlators/features/metadisp/src/metadisp.h b/xlators/features/metadisp/src/metadisp.h new file mode 100644 index 00000000000..c8fd7a13c04 --- /dev/null +++ b/xlators/features/metadisp/src/metadisp.h @@ -0,0 +1,45 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef GF_METADISP_H_ +#define GF_METADISP_H_ + +#include <glusterfs/glusterfs.h> +#include <glusterfs/logging.h> +#include <glusterfs/dict.h> +#include <glusterfs/xlator.h> +#include <glusterfs/defaults.h> + +#define METADATA_CHILD(_this) FIRST_CHILD(_this) +#define DATA_CHILD(_this) SECOND_CHILD(_this) + +int32_t +build_backend_loc(uuid_t gfid, loc_t *src_loc, loc_t *dst_loc); + +#define METADISP_TRACE(_args...) gf_log("metadisp", GF_LOG_INFO, _args) + +#define METADISP_FILTER_ROOT(_op, _args...) \ + if (strcmp(loc->path, "/") == 0) { \ + STACK_WIND(frame, default_##_op##_cbk, METADATA_CHILD(this), \ + METADATA_CHILD(this)->fops->_op, _args); \ + return 0; \ + } + +#define METADISP_FILTER_ROOT_BY_GFID(_op, _gfid, _args...) \ + if (__is_root_gfid(_gfid)) { \ + STACK_WIND(frame, default_##_op##_cbk, METADATA_CHILD(this), \ + METADATA_CHILD(this)->fops->_op, _args); \ + return 0; \ + } + +#define RESOLVE_GFID_REQ(_dict, _dest, _lbl) \ + VALIDATE_OR_GOTO(dict_get_ptr(_dict, "gfid-req", (void **)&_dest) == 0, \ + _lbl) + +#endif /* __TEMPLATE_H__ */ |