diff options
-rw-r--r-- | tests/basic/afr/split-brain-healing.t | 36 | ||||
-rw-r--r-- | tests/utils/get-mdata-xattr.c | 152 | ||||
-rw-r--r-- | tests/volume.rc | 30 | ||||
-rw-r--r-- | xlators/storage/posix/src/posix-metadata.c | 21 |
4 files changed, 223 insertions, 16 deletions
diff --git a/tests/basic/afr/split-brain-healing.t b/tests/basic/afr/split-brain-healing.t index c80f900b909..78553e6c845 100644 --- a/tests/basic/afr/split-brain-healing.t +++ b/tests/basic/afr/split-brain-healing.t @@ -20,11 +20,14 @@ function get_replicate_subvol_number { cleanup; AREQUAL_PATH=$(dirname $0)/../../utils +GET_MDATA_PATH=$(dirname $0)/../../utils CFLAGS="" test "`uname -s`" != "Linux" && { CFLAGS="$CFLAGS -lintl"; } build_tester $AREQUAL_PATH/arequal-checksum.c $CFLAGS +build_tester $GET_MDATA_PATH/get-mdata-xattr.c + TEST glusterd TEST pidof glusterd TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4} @@ -152,13 +155,13 @@ EXPECT $SMALLER_FILE_SIZE stat -c %s file4 subvolume=$(get_replicate_subvol_number file5) if [ $subvolume == 0 ] then - mtime1=$(stat -c %Y $B0/${V0}1/file5) - mtime2=$(stat -c %Y $B0/${V0}2/file5) + mtime1=$(get_mtime $B0/${V0}1/file5) + mtime2=$(get_mtime $B0/${V0}2/file5) LATEST_MTIME=$(($mtime1 > $mtime2 ? $mtime1:$mtime2)) elif [ $subvolume == 1 ] then - mtime1=$(stat -c %Y $B0/${V0}3/file5) - mtime2=$(stat -c %Y $B0/${V0}4/file5) + mtime1=$(get_mtime $B0/${V0}3/file5) + mtime2=$(get_mtime $B0/${V0}4/file5) LATEST_MTIME=$(($mtime1 > $mtime2 ? $mtime1:$mtime2)) fi $CLI volume heal $V0 split-brain latest-mtime /file5 @@ -166,12 +169,12 @@ EXPECT "0" echo $? if [ $subvolume == 0 ] then - mtime1_after_heal=$(stat -c %Y $B0/${V0}1/file5) - mtime2_after_heal=$(stat -c %Y $B0/${V0}2/file5) + mtime1_after_heal=$(get_mtime $B0/${V0}1/file5) + mtime2_after_heal=$(get_mtime $B0/${V0}2/file5) elif [ $subvolume == 1 ] then - mtime1_after_heal=$(stat -c %Y $B0/${V0}3/file5) - mtime2_after_heal=$(stat -c %Y $B0/${V0}4/file5) + mtime1_after_heal=$(get_mtime $B0/${V0}3/file5) + mtime2_after_heal=$(get_mtime $B0/${V0}4/file5) fi #TODO: To below comparisons on full sub-second resolution @@ -188,14 +191,14 @@ subvolume=$(get_replicate_subvol_number file6) if [ $subvolume == 0 ] then GFID=$(gf_get_gfid_xattr $B0/${V0}1/file6) - mtime1=$(stat -c %Y $B0/${V0}1/file6) - mtime2=$(stat -c %Y $B0/${V0}2/file6) + mtime1=$(get_mtime $B0/${V0}1/file6) + mtime2=$(get_mtime $B0/${V0}2/file6) LATEST_MTIME=$(($mtime1 > $mtime2 ? $mtime1:$mtime2)) elif [ $subvolume == 1 ] then GFID=$(gf_get_gfid_xattr $B0/${V0}3/file6) - mtime1=$(stat -c %Y $B0/${V0}3/file6) - mtime2=$(stat -c %Y $B0/${V0}4/file6) + mtime1=$(get_mtime $B0/${V0}3/file6) + mtime2=$(get_mtime $B0/${V0}4/file6) LATEST_MTIME=$(($mtime1 > $mtime2 ? $mtime1:$mtime2)) fi GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)" @@ -204,12 +207,12 @@ EXPECT "0" echo $? if [ $subvolume == 0 ] then - mtime1_after_heal=$(stat -c %Y $B0/${V0}1/file6) - mtime2_after_heal=$(stat -c %Y $B0/${V0}2/file6) + mtime1_after_heal=$(get_mtime $B0/${V0}1/file6) + mtime2_after_heal=$(get_mtime $B0/${V0}2/file6) elif [ $subvolume == 1 ] then - mtime1_after_heal=$(stat -c %Y $B0/${V0}3/file6) - mtime2_after_heal=$(stat -c %Y $B0/${V0}4/file6) + mtime1_after_heal=$(get_mtime $B0/${V0}3/file6) + mtime2_after_heal=$(get_mtime $B0/${V0}4/file6) fi #TODO: To below comparisons on full sub-second resolution @@ -253,4 +256,5 @@ EXPECT "1" echo $? cd - TEST rm $AREQUAL_PATH/arequal-checksum +TEST rm $GET_MDATA_PATH/get-mdata-xattr cleanup diff --git a/tests/utils/get-mdata-xattr.c b/tests/utils/get-mdata-xattr.c new file mode 100644 index 00000000000..e9f54717263 --- /dev/null +++ b/tests/utils/get-mdata-xattr.c @@ -0,0 +1,152 @@ +/* + Copyright (c) 2019 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include <stdlib.h> +#include <endian.h> +#include <stdio.h> +#include <time.h> +#include <string.h> +#include <inttypes.h> +#include <sys/types.h> +#include <sys/xattr.h> +#include <errno.h> + +typedef struct gf_timespec_disk { + uint64_t tv_sec; + uint64_t tv_nsec; +} gf_timespec_disk_t; + +/* posix_mdata_t on disk structure */ +typedef struct __attribute__((__packed__)) posix_mdata_disk { + /* version of structure, bumped up if any new member is added */ + uint8_t version; + /* flags indicates valid fields in the structure */ + uint64_t flags; + gf_timespec_disk_t ctime; + gf_timespec_disk_t mtime; + gf_timespec_disk_t atime; +} posix_mdata_disk_t; + +/* In memory representation posix metadata xattr */ +typedef struct { + /* version of structure, bumped up if any new member is added */ + uint8_t version; + /* flags indicates valid fields in the structure */ + uint64_t flags; + struct timespec ctime; + struct timespec mtime; + struct timespec atime; +} posix_mdata_t; + +#define GF_XATTR_MDATA_KEY "trusted.glusterfs.mdata" + +/* posix_mdata_from_disk converts posix_mdata_disk_t into host byte order + */ +static inline void +posix_mdata_from_disk(posix_mdata_t *out, posix_mdata_disk_t *in) +{ + out->version = in->version; + out->flags = be64toh(in->flags); + + out->ctime.tv_sec = be64toh(in->ctime.tv_sec); + out->ctime.tv_nsec = be64toh(in->ctime.tv_nsec); + + out->mtime.tv_sec = be64toh(in->mtime.tv_sec); + out->mtime.tv_nsec = be64toh(in->mtime.tv_nsec); + + out->atime.tv_sec = be64toh(in->atime.tv_sec); + out->atime.tv_nsec = be64toh(in->atime.tv_nsec); +} + +/* posix_fetch_mdata_xattr fetches the posix_mdata_t from disk */ +static int +posix_fetch_mdata_xattr(const char *real_path, posix_mdata_t *metadata) +{ + size_t size = -1; + char *value = NULL; + char gfid_str[64] = {0}; + + char *key = GF_XATTR_MDATA_KEY; + + if (!metadata || !real_path) { + goto err; + } + + /* Get size */ + size = lgetxattr(real_path, key, NULL, 0); + if (size == -1) { + goto err; + } + + value = calloc(size + 1, sizeof(char)); + if (!value) { + goto err; + } + + /* Get xattr value */ + size = lgetxattr(real_path, key, value, size); + if (size == -1) { + goto err; + } + posix_mdata_from_disk(metadata, (posix_mdata_disk_t *)value); + +out: + if (value) + free(value); + return 0; +err: + if (value) + free(value); + return -1; +} + +int +main(int argc, char *argv[]) +{ + posix_mdata_t metadata; + uint64_t result; + + if (argc != 3) { + /* + Usage: get_mdata_xattr -c|-m|-a <file-name> + where -c --> ctime + -m --> mtime + -a --> atime + */ + printf("-1"); + goto err; + } + + if (posix_fetch_mdata_xattr(argv[2], &metadata)) { + printf("-1"); + goto err; + } + + switch (argv[1][1]) { + case 'c': + result = metadata.ctime.tv_sec; + break; + case 'm': + result = metadata.mtime.tv_sec; + break; + case 'a': + result = metadata.atime.tv_sec; + break; + default: + printf("-1"); + goto err; + } + printf("%" PRIu64, result); + fflush(stdout); + return 0; +err: + fflush(stdout); + return -1; +} diff --git a/tests/volume.rc b/tests/volume.rc index 6477dfb484f..85a2cb01727 100644 --- a/tests/volume.rc +++ b/tests/volume.rc @@ -944,3 +944,33 @@ function number_healer_threads_shd { local pid=$(get_shd_mux_pid $1) pstack $pid | grep $2 | wc -l } + +function get_mtime { + local time=$(get-mdata-xattr -m $1) + if [ $time == "-1" ]; + then + echo $(stat -c %Y $1) + else + echo $time + fi +} + +function get_ctime { + local time=$(get-mdata-xattr -c $1) + if [ $time == "-1" ]; + then + echo $(stat -c %Z $2) + else + echo $time + fi +} + +function get_atime { + local time=$(get-mdata-xattr -a $1) + if [ $time == "-1" ]; + then + echo $(stat -c %X $1) + else + echo $time + fi +} diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c index e96f222cff4..5a5e6cd744e 100644 --- a/xlators/storage/posix/src/posix-metadata.c +++ b/xlators/storage/posix/src/posix-metadata.c @@ -416,6 +416,22 @@ posix_set_mdata_xattr(xlator_t *this, const char *real_path, int fd, * still fine as the times would get eventually * accurate. */ + + /* Don't create xattr with utimes/utimensat, only update if + * present. This otherwise causes issues during inservice + * upgrade. It causes inconsistent xattr values with in replica + * set. The scenario happens during upgrade where clients are + * older versions (without the ctime feature) and the server is + * upgraded to the new version (with the ctime feature which + * is enabled by default). + */ + + if (update_utime) { + UNLOCK(&inode->lock); + GF_FREE(mdata); + return 0; + } + mdata->version = 1; mdata->flags = 0; mdata->ctime.tv_sec = time->tv_sec; @@ -527,6 +543,11 @@ posix_update_utime_in_mdata(xlator_t *this, const char *real_path, int fd, priv = this->private; + /* NOTE: + * This routine (utimes) is intentionally allowed for all internal and + * external clients even if ctime is not set. This is because AFR and + * WORM uses time attributes for it's internal operations + */ if (inode && priv->ctime) { if ((valid & GF_SET_ATTR_ATIME) == GF_SET_ATTR_ATIME) { tv.tv_sec = stbuf->ia_atime; |