diff options
| -rw-r--r-- | tests/basic/afr/split-brain-healing.t | 36 | ||||
| -rw-r--r-- | tests/utils/get-mdata-xattr.c | 152 | ||||
| -rw-r--r-- | tests/volume.rc | 30 | ||||
| -rw-r--r-- | xlators/storage/posix/src/posix-metadata.c | 21 | 
4 files changed, 223 insertions, 16 deletions
diff --git a/tests/basic/afr/split-brain-healing.t b/tests/basic/afr/split-brain-healing.t index c80f900b909..78553e6c845 100644 --- a/tests/basic/afr/split-brain-healing.t +++ b/tests/basic/afr/split-brain-healing.t @@ -20,11 +20,14 @@ function get_replicate_subvol_number {  cleanup;  AREQUAL_PATH=$(dirname $0)/../../utils +GET_MDATA_PATH=$(dirname $0)/../../utils  CFLAGS=""  test "`uname -s`" != "Linux" && {      CFLAGS="$CFLAGS -lintl";  }  build_tester $AREQUAL_PATH/arequal-checksum.c $CFLAGS +build_tester $GET_MDATA_PATH/get-mdata-xattr.c +  TEST glusterd  TEST pidof glusterd  TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4} @@ -152,13 +155,13 @@ EXPECT $SMALLER_FILE_SIZE stat -c %s file4  subvolume=$(get_replicate_subvol_number file5)  if [ $subvolume == 0 ]  then -        mtime1=$(stat -c %Y $B0/${V0}1/file5) -        mtime2=$(stat -c %Y $B0/${V0}2/file5) +        mtime1=$(get_mtime $B0/${V0}1/file5) +        mtime2=$(get_mtime $B0/${V0}2/file5)          LATEST_MTIME=$(($mtime1 > $mtime2 ? $mtime1:$mtime2))  elif [ $subvolume == 1 ]  then -        mtime1=$(stat -c %Y $B0/${V0}3/file5) -        mtime2=$(stat -c %Y $B0/${V0}4/file5) +        mtime1=$(get_mtime $B0/${V0}3/file5) +        mtime2=$(get_mtime $B0/${V0}4/file5)          LATEST_MTIME=$(($mtime1 > $mtime2 ? $mtime1:$mtime2))  fi  $CLI volume heal $V0 split-brain latest-mtime /file5 @@ -166,12 +169,12 @@ EXPECT "0" echo $?  if [ $subvolume == 0 ]  then -        mtime1_after_heal=$(stat -c %Y $B0/${V0}1/file5) -        mtime2_after_heal=$(stat -c %Y $B0/${V0}2/file5) +        mtime1_after_heal=$(get_mtime $B0/${V0}1/file5) +        mtime2_after_heal=$(get_mtime $B0/${V0}2/file5)  elif [ $subvolume == 1 ]  then -        mtime1_after_heal=$(stat -c %Y $B0/${V0}3/file5) -        mtime2_after_heal=$(stat -c %Y $B0/${V0}4/file5) +        mtime1_after_heal=$(get_mtime $B0/${V0}3/file5) +        mtime2_after_heal=$(get_mtime $B0/${V0}4/file5)  fi  #TODO: To below comparisons on full sub-second resolution @@ -188,14 +191,14 @@ subvolume=$(get_replicate_subvol_number file6)  if [ $subvolume == 0 ]  then          GFID=$(gf_get_gfid_xattr $B0/${V0}1/file6) -        mtime1=$(stat -c %Y $B0/${V0}1/file6) -        mtime2=$(stat -c %Y $B0/${V0}2/file6) +        mtime1=$(get_mtime $B0/${V0}1/file6) +        mtime2=$(get_mtime $B0/${V0}2/file6)          LATEST_MTIME=$(($mtime1 > $mtime2 ? $mtime1:$mtime2))  elif [ $subvolume == 1 ]  then          GFID=$(gf_get_gfid_xattr $B0/${V0}3/file6) -        mtime1=$(stat -c %Y $B0/${V0}3/file6) -        mtime2=$(stat -c %Y $B0/${V0}4/file6) +        mtime1=$(get_mtime $B0/${V0}3/file6) +        mtime2=$(get_mtime $B0/${V0}4/file6)          LATEST_MTIME=$(($mtime1 > $mtime2 ? $mtime1:$mtime2))  fi  GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)" @@ -204,12 +207,12 @@ EXPECT "0" echo $?  if [ $subvolume == 0 ]  then -        mtime1_after_heal=$(stat -c %Y $B0/${V0}1/file6) -        mtime2_after_heal=$(stat -c %Y $B0/${V0}2/file6) +        mtime1_after_heal=$(get_mtime $B0/${V0}1/file6) +        mtime2_after_heal=$(get_mtime $B0/${V0}2/file6)  elif [ $subvolume == 1 ]  then -        mtime1_after_heal=$(stat -c %Y $B0/${V0}3/file6) -        mtime2_after_heal=$(stat -c %Y $B0/${V0}4/file6) +        mtime1_after_heal=$(get_mtime $B0/${V0}3/file6) +        mtime2_after_heal=$(get_mtime $B0/${V0}4/file6)  fi  #TODO: To below comparisons on full sub-second resolution @@ -253,4 +256,5 @@ EXPECT "1" echo $?  cd -  TEST rm $AREQUAL_PATH/arequal-checksum +TEST rm $GET_MDATA_PATH/get-mdata-xattr  cleanup diff --git a/tests/utils/get-mdata-xattr.c b/tests/utils/get-mdata-xattr.c new file mode 100644 index 00000000000..e9f54717263 --- /dev/null +++ b/tests/utils/get-mdata-xattr.c @@ -0,0 +1,152 @@ +/* +   Copyright (c) 2019 Red Hat, Inc. <http://www.redhat.com> +   This file is part of GlusterFS. + +   This file is licensed to you under your choice of the GNU Lesser +   General Public License, version 3 or any later version (LGPLv3 or +   later), or the GNU General Public License, version 2 (GPLv2), in all +   cases as published by the Free Software Foundation. +*/ + +#include <stdlib.h> +#include <endian.h> +#include <stdio.h> +#include <time.h> +#include <string.h> +#include <inttypes.h> +#include <sys/types.h> +#include <sys/xattr.h> +#include <errno.h> + +typedef struct gf_timespec_disk { +    uint64_t tv_sec; +    uint64_t tv_nsec; +} gf_timespec_disk_t; + +/* posix_mdata_t on disk structure */ +typedef struct __attribute__((__packed__)) posix_mdata_disk { +    /* version of structure, bumped up if any new member is added */ +    uint8_t version; +    /* flags indicates valid fields in the structure */ +    uint64_t flags; +    gf_timespec_disk_t ctime; +    gf_timespec_disk_t mtime; +    gf_timespec_disk_t atime; +} posix_mdata_disk_t; + +/* In memory representation posix metadata xattr */ +typedef struct { +    /* version of structure, bumped up if any new member is added */ +    uint8_t version; +    /* flags indicates valid fields in the structure */ +    uint64_t flags; +    struct timespec ctime; +    struct timespec mtime; +    struct timespec atime; +} posix_mdata_t; + +#define GF_XATTR_MDATA_KEY "trusted.glusterfs.mdata" + +/* posix_mdata_from_disk converts posix_mdata_disk_t into host byte order + */ +static inline void +posix_mdata_from_disk(posix_mdata_t *out, posix_mdata_disk_t *in) +{ +    out->version = in->version; +    out->flags = be64toh(in->flags); + +    out->ctime.tv_sec = be64toh(in->ctime.tv_sec); +    out->ctime.tv_nsec = be64toh(in->ctime.tv_nsec); + +    out->mtime.tv_sec = be64toh(in->mtime.tv_sec); +    out->mtime.tv_nsec = be64toh(in->mtime.tv_nsec); + +    out->atime.tv_sec = be64toh(in->atime.tv_sec); +    out->atime.tv_nsec = be64toh(in->atime.tv_nsec); +} + +/* posix_fetch_mdata_xattr fetches the posix_mdata_t from disk */ +static int +posix_fetch_mdata_xattr(const char *real_path, posix_mdata_t *metadata) +{ +    size_t size = -1; +    char *value = NULL; +    char gfid_str[64] = {0}; + +    char *key = GF_XATTR_MDATA_KEY; + +    if (!metadata || !real_path) { +        goto err; +    } + +    /* Get size */ +    size = lgetxattr(real_path, key, NULL, 0); +    if (size == -1) { +        goto err; +    } + +    value = calloc(size + 1, sizeof(char)); +    if (!value) { +        goto err; +    } + +    /* Get xattr value */ +    size = lgetxattr(real_path, key, value, size); +    if (size == -1) { +        goto err; +    } +    posix_mdata_from_disk(metadata, (posix_mdata_disk_t *)value); + +out: +    if (value) +        free(value); +    return 0; +err: +    if (value) +        free(value); +    return -1; +} + +int +main(int argc, char *argv[]) +{ +    posix_mdata_t metadata; +    uint64_t result; + +    if (argc != 3) { +        /* +        Usage: get_mdata_xattr -c|-m|-a <file-name> +                       where -c --> ctime +                             -m --> mtime +                             -a --> atime +        */ +        printf("-1"); +        goto err; +    } + +    if (posix_fetch_mdata_xattr(argv[2], &metadata)) { +        printf("-1"); +        goto err; +    } + +    switch (argv[1][1]) { +        case 'c': +            result = metadata.ctime.tv_sec; +            break; +        case 'm': +            result = metadata.mtime.tv_sec; +            break; +        case 'a': +            result = metadata.atime.tv_sec; +            break; +        default: +            printf("-1"); +            goto err; +    } +    printf("%" PRIu64, result); +    fflush(stdout); +    return 0; +err: +    fflush(stdout); +    return -1; +} diff --git a/tests/volume.rc b/tests/volume.rc index 87ca9589305..6ce49768f3f 100644 --- a/tests/volume.rc +++ b/tests/volume.rc @@ -881,3 +881,33 @@ function check_changelog_op {          $PYTHON $(dirname $0)/../../utils/changelogparser.py ${clog_path}/CHANGELOG | grep "$op" | wc -l  } + +function get_mtime { +    local time=$(get-mdata-xattr -m $1) +    if [ $time == "-1" ]; +    then +        echo $(stat -c %Y $1) +    else +        echo $time +    fi +} + +function get_ctime { +    local time=$(get-mdata-xattr -c $1) +    if [ $time == "-1" ]; +    then +        echo $(stat -c %Z $2) +    else +        echo $time +    fi +} + +function get_atime { +    local time=$(get-mdata-xattr -a $1) +    if [ $time == "-1" ]; +    then +        echo $(stat -c %X $1) +    else +        echo $time +    fi +} diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c index e96f222cff4..5a5e6cd744e 100644 --- a/xlators/storage/posix/src/posix-metadata.c +++ b/xlators/storage/posix/src/posix-metadata.c @@ -416,6 +416,22 @@ posix_set_mdata_xattr(xlator_t *this, const char *real_path, int fd,                   * still fine as the times would get eventually                   * accurate.                   */ + +                /* Don't create xattr with utimes/utimensat, only update if +                 * present. This otherwise causes issues during inservice +                 * upgrade. It causes inconsistent xattr values with in replica +                 * set. The scenario happens during upgrade where clients are +                 * older versions (without the ctime feature) and the server is +                 * upgraded to the new version (with the ctime feature which +                 * is enabled by default). +                 */ + +                if (update_utime) { +                    UNLOCK(&inode->lock); +                    GF_FREE(mdata); +                    return 0; +                } +                  mdata->version = 1;                  mdata->flags = 0;                  mdata->ctime.tv_sec = time->tv_sec; @@ -527,6 +543,11 @@ posix_update_utime_in_mdata(xlator_t *this, const char *real_path, int fd,      priv = this->private; +    /* NOTE: +     * This routine (utimes) is intentionally allowed for all internal and +     * external clients even if ctime is not set. This is because AFR and +     * WORM uses time attributes for it's internal operations +     */      if (inode && priv->ctime) {          if ((valid & GF_SET_ATTR_ATIME) == GF_SET_ATTR_ATIME) {              tv.tv_sec = stbuf->ia_atime;  | 
