diff options
| -rw-r--r-- | cli/src/cli-cmd-volume.c | 6 | ||||
| -rw-r--r-- | cli/src/cli-xml-output.c | 8 | ||||
| -rw-r--r-- | cli/src/cli.h | 2 | ||||
| -rw-r--r-- | contrib/mount/mntent.c | 266 | ||||
| -rw-r--r-- | doc/release-notes/3.8.9.md | 27 | ||||
| -rw-r--r-- | tests/bugs/replicate/bug-1417522-block-split-brain-resolution.t | 66 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 32 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 38 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal.h | 6 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-rebalance.c | 24 | ||||
| -rw-r--r-- | xlators/debug/trace/src/trace.c | 18 | ||||
| -rw-r--r-- | xlators/features/changelog/src/changelog-helpers.c | 31 | ||||
| -rw-r--r-- | xlators/features/upcall/src/upcall-internal.c | 1 | ||||
| -rw-r--r-- | xlators/nfs/server/src/mount3.c | 10 |
14 files changed, 365 insertions, 170 deletions
diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c index 0d25279f381..9ff71767ce4 100644 --- a/cli/src/cli-cmd-volume.c +++ b/cli/src/cli-cmd-volume.c @@ -1977,13 +1977,11 @@ cli_get_detail_status (dict_t *dict, int i, cli_volume_status_t *status) if (!status->total) goto out; -#ifdef GF_LINUX_HOST_OS memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "brick%d.device", i); ret = dict_get_str (dict, key, &(status->device)); if (ret) status->device = NULL; -#endif memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "brick%d.block_size", i); @@ -1993,7 +1991,6 @@ cli_get_detail_status (dict_t *dict, int i, cli_volume_status_t *status) status->block_size = 0; } -#ifdef GF_LINUX_HOST_OS memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "brick%d.mnt_options", i); ret = dict_get_str (dict, key, &(status->mount_options)); @@ -2013,7 +2010,6 @@ cli_get_detail_status (dict_t *dict, int i, cli_volume_status_t *status) ret = dict_get_str (dict, key, &(status->inode_size)); if (ret) status->inode_size = NULL; -#endif /* GF_LINUX_HOST_OS */ memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "brick%d.total_inodes", i); @@ -2051,7 +2047,6 @@ cli_print_detailed_status (cli_volume_status_t *status) cli_out ("%-20s : %-20c", "Online", (status->online) ? 'Y' : 'N'); cli_out ("%-20s : %-20s", "Pid", status->pid_str); -#ifdef GF_LINUX_HOST_OS if (status->fs_name) cli_out ("%-20s : %-20s", "File System", status->fs_name); else @@ -2075,7 +2070,6 @@ cli_print_detailed_status (cli_volume_status_t *status) } else { cli_out ("%-20s : %-20s", "Inode Size", "N/A"); } -#endif if (status->free) cli_out ("%-20s : %-20s", "Disk Space Free", status->free); else diff --git a/cli/src/cli-xml-output.c b/cli/src/cli-xml-output.c index dbc8aa7b848..4eafc0b5e68 100644 --- a/cli/src/cli-xml-output.c +++ b/cli/src/cli-xml-output.c @@ -438,12 +438,6 @@ cli_xml_output_vol_status_detail (xmlTextWriterPtr writer, dict_t *dict, (xmlChar *)"fsName", "%s", fs_name); - /* inode details are only available for ext 2/3/4 & xfs */ - if (!fs_name || !IS_EXT_FS(fs_name) || strcmp (fs_name, "xfs")) { - ret = 0; - goto out; - } - memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "brick%d.inode_size", brick_index); ret = dict_get_str (dict, key, &inode_size); @@ -467,6 +461,8 @@ cli_xml_output_vol_status_detail (xmlTextWriterPtr writer, dict_t *dict, ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"inodesFree", "%"PRIu64, inodes_free); + else + ret = 0; out: gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret); diff --git a/cli/src/cli.h b/cli/src/cli.h index 73fb67289f7..8acec640c83 100644 --- a/cli/src/cli.h +++ b/cli/src/cli.h @@ -170,12 +170,10 @@ struct cli_volume_status { char *pid_str; char *free; char *total; -#ifdef GF_LINUX_HOST_OS char *fs_name; char *mount_options; char *device; char *inode_size; -#endif }; struct snap_config_opt_vals_ { diff --git a/contrib/mount/mntent.c b/contrib/mount/mntent.c index e9b448845a7..9a7e5f39bdb 100644 --- a/contrib/mount/mntent.c +++ b/contrib/mount/mntent.c @@ -36,6 +36,7 @@ */ #if !defined(GF_LINUX_HOST_OS) + #include <stdlib.h> #include <string.h> #include <sys/param.h> @@ -49,166 +50,209 @@ typedef struct statvfs gf_statfs_t; typedef struct statfs gf_statfs_t; #endif -static int pos = -1; -static int mntsize = -1; -static struct mntent _mntent; +typedef struct _mntent_state { + struct mntent mntent; + gf_statfs_t *statfs; + int count; + int pos; + /* A buffer big enough to store all defined flags as a string. + * Increase it if necessary when more flags are defined. */ + char buf[256]; +} mntent_state_t; + +typedef struct _mntflag { + unsigned long value; + const char *on; + const char *off; +} mntflag_t; + +static mntflag_t mntflags[] = { + { MNT_RDONLY, "ro", "rw" }, + { MNT_SYNCHRONOUS, "sync", NULL }, + { MNT_NOEXEC, "noexec", NULL }, + { MNT_NOSUID, "nosuid", NULL }, +#if !defined(__FreeBSD__) + { MNT_NODEV, "nodev", NULL }, +#endif /* __FreeBSD__ */ + { MNT_UNION, "union", NULL }, + { MNT_ASYNC, "async", NULL }, +#if !defined(GF_DARWIN_HOST_OS) + { MNT_NOATIME, "noatime", NULL }, +#if !defined(__NetBSD__) + { MNT_NOCLUSTERR, "noclusterr", NULL }, + { MNT_NOCLUSTERW, "noclusterw", NULL }, + { MNT_NOSYMFOLLOW, "nosymfollow", NULL }, + { MNT_SUIDDIR, "suiddir", NULL }, +#endif /* !__NetBSD__ */ +#endif /* !GF_DARWIN_HOST_OS */ + { 0, NULL, NULL } +}; char * hasmntopt (const struct mntent *mnt, const char *option) { - int found; char *opt, *optbuf; + int len; optbuf = strdup(mnt->mnt_opts); - found = 0; - for (opt = optbuf; (opt = strtok(opt, " ")) != NULL; opt = NULL) { - if (!strcasecmp(opt, option)) { - opt = opt - optbuf + mnt->mnt_opts; - free (optbuf); - return (opt); + if (optbuf == NULL) { + return NULL; + } + + opt = optbuf; + len = 0; + while (*opt) { + while (opt[len] != 0) { + if (opt[len] == ' ') { + opt[len++] = 0; + break; + } + len++; + } + if ((*opt != 0) && (strcasecmp(opt, option) == 0)) { + break; } + opt += len; + len = 0; } - free (optbuf); - return (NULL); + free(optbuf); + if (len == 0) { + return NULL; + } + + return opt - optbuf + mnt->mnt_opts; } -static char * -concatopt (char *s0, const char *s1) +static int +writeopt(const char *text, char *buf, int buflen, int pos) { - size_t i; - char *cp; - - if (s1 == NULL || *s1 == '\0') - return s0; - if (s0 && *s0) { - i = strlen(s0) + strlen(s1) + 1 + 1; - if ((cp = (char *)malloc(i)) == NULL) - return (NULL); - (void)snprintf(cp, i, "%s %s", s0, s1); - } else - cp = strdup(s1); - - if (s0) - free(s0); - return (cp); -} + int len; + + /* buflen must be > 0 */ + + if (text == NULL) { + return pos; + } + + buf += pos; + if (pos > 0) { + /* We are sure we have at least one byte to store the space. + * We don't need to check buflen here. */ + *buf++ = ' '; + pos++; + } + len = strlen(text) + 1; + pos += len; + if (pos >= buflen) { + /* There won't be enough space for the text and the + * terminating null character. We copy as much as we can + * of the text and mark the end of the string with '...' */ + memcpy(buf, text, buflen - pos + len); + if (buflen > 3) { + strcpy(buf + buflen - 4, "..."); + } else { + strncpy(buf, "...", buflen - 1); + buf[buflen - 1] = 0; + } + pos = buflen; + } else { + memcpy(buf, text, len); + } + return pos; +} static char * -flags2opts (int flags) +flags2opts (int flags, char *buf, int buflen) { - char *res; - res = NULL; - res = concatopt(res, (flags & MNT_RDONLY) ? "ro" : "rw"); - if (flags & MNT_SYNCHRONOUS) res = concatopt(res, "sync"); - if (flags & MNT_NOEXEC) res = concatopt(res, "noexec"); - if (flags & MNT_NOSUID) res = concatopt(res, "nosuid"); -#if !defined(__FreeBSD__) - if (flags & MNT_NODEV) res = concatopt(res, "nodev"); -#endif /* __FreeBSD__ */ - if (flags & MNT_UNION) res = concatopt(res, "union"); - if (flags & MNT_ASYNC) res = concatopt(res, "async"); -#if !defined(GF_DARWIN_HOST_OS) - if (flags & MNT_NOATIME) res = concatopt(res, "noatime"); -#if !defined(__NetBSD__) - if (flags & MNT_NOCLUSTERR) res = concatopt(res, "noclusterr"); - if (flags & MNT_NOCLUSTERW) res = concatopt(res, "noclusterw"); - if (flags & MNT_NOSYMFOLLOW) res = concatopt(res, "nosymfollow"); - if (flags & MNT_SUIDDIR) res = concatopt(res, "suiddir"); -#endif /* !__NetBSD__ */ -#endif /* !GF_DARWIN_HOS_OS */ - return res; + char other[16]; + mntflag_t *flg; + int pos; + + if (buflen == 0) { + return NULL; + } + + pos = 0; + for (flg = mntflags; flg->value != 0; flg++) { + pos = writeopt((flags & flg->value) == 0 ? flg->off : flg->on, + buf, buflen, pos); + flags &= ~flg->value; + } + + if (flags != 0) { + sprintf(other, "[0x%x]", flags); + writeopt(other, buf, buflen, pos); + } + + return buf; } -static struct mntent * -statfs_to_mntent (gf_statfs_t *mntbuf) +static void +statfs_to_mntent (struct mntent *mntent, gf_statfs_t *mntbuf, char *buf, + int buflen) { - static char opts_buf[40], *tmp; int f_flags; - _mntent.mnt_fsname = mntbuf->f_mntfromname; - _mntent.mnt_dir = mntbuf->f_mntonname; - _mntent.mnt_type = mntbuf->f_fstypename; + mntent->mnt_fsname = mntbuf->f_mntfromname; + mntent->mnt_dir = mntbuf->f_mntonname; + mntent->mnt_type = mntbuf->f_fstypename; #ifdef __NetBSD__ f_flags = mntbuf->f_flag; #else f_flags = mntbuf->f_flags; #endif - tmp = flags2opts (f_flags); - if (tmp) { - opts_buf[sizeof(opts_buf)-1] = '\0'; - strncpy (opts_buf, tmp, sizeof(opts_buf)-1); - free (tmp); - } else { - *opts_buf = '\0'; - } - _mntent.mnt_opts = opts_buf; - _mntent.mnt_freq = _mntent.mnt_passno = 0; - return (&_mntent); + mntent->mnt_opts = flags2opts (f_flags, buf, buflen); + + mntent->mnt_freq = mntent->mnt_passno = 0; } struct mntent * -getmntent (FILE *fp) +getmntent_r (FILE *fp, struct mntent *mntent, char *buf, int buflen) { - gf_statfs_t *mntbuf; + mntent_state_t *state = (mntent_state_t *)fp; - if (!fp) + if (state->pos >= state->count) { return NULL; - - if (pos == -1 || mntsize == -1) - mntsize = getmntinfo (&mntbuf, MNT_NOWAIT); - - ++pos; - if (pos == mntsize) { - pos = mntsize = -1; - return (NULL); } - return (statfs_to_mntent (&mntbuf[pos])); + statfs_to_mntent(mntent, &state->statfs[state->pos++], buf, buflen); + + return mntent; } -/* - Careful using this function ``buffer`` and ``bufsize`` are - ignored since there is no stream with strings to populate - them on OSX or NetBSD, if one wishes to populate them then - perhaps a new function should be written in this source file - which uses 'getmntinfo()' to stringify the mntent's -*/ - -struct mntent *getmntent_r (FILE *fp, struct mntent *result, - char *buffer, int bufsize) +struct mntent * +getmntent (FILE *fp) { - struct mntent *ment = NULL; - - if (!fp) - return NULL; + mntent_state_t *state = (mntent_state_t *)fp; - flockfile (fp); - ment = getmntent (fp); - memcpy (result, ment, sizeof(*ment)); - funlockfile (fp); - - return result; + return getmntent_r(fp, &state->mntent, state->buf, + sizeof(state->buf)); } FILE * setmntent (const char *filename, const char *type) { - FILE *fp = NULL; -#ifdef GF_DARWIN_HOST_OS - fp = fopen (filename, "w"); -#else - fp = fopen (filename, type); -#endif - return fp; + mntent_state_t *state; + + /* We don't really need to access any file so we'll use the FILE* as + * a fake file to store state information. + */ + + state = malloc(sizeof(mntent_state_t)); + if (state != NULL) { + state->pos = 0; + state->count = getmntinfo(&state->statfs, MNT_NOWAIT); + } + + return (FILE *)state; } int endmntent (FILE *fp) { - if (fp) - fclose (fp); + free(fp); return 1; /* endmntent() always returns 1 */ } diff --git a/doc/release-notes/3.8.9.md b/doc/release-notes/3.8.9.md new file mode 100644 index 00000000000..b3fa0bc1d4f --- /dev/null +++ b/doc/release-notes/3.8.9.md @@ -0,0 +1,27 @@ +# Release notes for Gluster 3.8.9 + +This is a bugfix release. The [Release Notes for 3.8.0](3.8.0.md), +[3.8.1](3.8.1.md), [3.8.2](3.8.2.md), [3.8.3](3.8.3.md), [3.8.4](3.8.4.md), +[3.8.5](3.8.5.md), [3.8.6](3.8.6.md), [3.8.7](3.8.7.md) and [3.8.8](3.8.8.md) +contain a listing of all the new features that were added and bugs fixed in the +GlusterFS 3.8 stable release. + + +## Bugs addressed + +A total of 16 patches have been merged, addressing 14 bugs: + +- [#1410852](https://bugzilla.redhat.com/1410852): glusterfs-server should depend on firewalld-filesystem +- [#1411899](https://bugzilla.redhat.com/1411899): DHT doesn't evenly balance files on FreeBSD with ZFS +- [#1412119](https://bugzilla.redhat.com/1412119): ganesha service crashed on all nodes of ganesha cluster on disperse volume when doing lookup while copying files remotely using scp +- [#1412888](https://bugzilla.redhat.com/1412888): Extra lookup/fstats are sent over the network when a brick is down. +- [#1412913](https://bugzilla.redhat.com/1412913): [ganesha + EC]posix compliance rename tests failed on EC volume with nfs-ganesha mount. +- [#1412915](https://bugzilla.redhat.com/1412915): Spurious split-brain error messages are seen in rebalance logs +- [#1412916](https://bugzilla.redhat.com/1412916): [ganesha+ec]: Contents of original file are not seen when hardlink is created +- [#1412922](https://bugzilla.redhat.com/1412922): ls and move hung on disperse volume +- [#1412941](https://bugzilla.redhat.com/1412941): Regression caused by enabling client-io-threads by default +- [#1414655](https://bugzilla.redhat.com/1414655): Upcall: Possible memleak if inode_ctx_set fails +- [#1415053](https://bugzilla.redhat.com/1415053): geo-rep session faulty with ChangelogException "No such file or directory" +- [#1415132](https://bugzilla.redhat.com/1415132): Improve output of "gluster volume status detail" +- [#1417802](https://bugzilla.redhat.com/1417802): debug/trace: Print iatts of individual entries in readdirp callback for better debugging experience +- [#1420184](https://bugzilla.redhat.com/1420184): [Remove-brick] Hardlink migration fails with "lookup failed (No such file or directory)" error messages in rebalance logs diff --git a/tests/bugs/replicate/bug-1417522-block-split-brain-resolution.t b/tests/bugs/replicate/bug-1417522-block-split-brain-resolution.t new file mode 100644 index 00000000000..4592ebf8d23 --- /dev/null +++ b/tests/bugs/replicate/bug-1417522-block-split-brain-resolution.t @@ -0,0 +1,66 @@ +#!/bin/bash +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0..2} +TEST $CLI volume set $V0 self-heal-daemon off +TEST $CLI volume set $V0 data-self-heal off +TEST $CLI volume set $V0 entry-self-heal off +TEST $CLI volume set $V0 metadata-self-heal off +TEST $CLI volume start $V0 + +TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0; +TEST touch $M0/file + +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST dd if=/dev/urandom of=$M0/file bs=1024 count=10 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +TEST kill_brick $V0 $H0 $B0/${V0}2 +TEST dd if=/dev/urandom of=$M0/file bs=1024 count=20 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST ! dd if=$M0/file of=/dev/null +SOURCE_BRICK_MD5=$(md5sum $B0/${V0}0/file | cut -d\ -f1) + +# Various fav-child policies must not heal the file when some bricks are down. +TEST $CLI volume set $V0 favorite-child-policy size +TEST ! dd if=$M0/file of=/dev/null +TEST $CLI volume set $V0 favorite-child-policy ctime +TEST ! dd if=$M0/file of=/dev/null +TEST $CLI volume set $V0 favorite-child-policy mtime +TEST ! dd if=$M0/file of=/dev/null +TEST $CLI volume set $V0 favorite-child-policy majority +TEST ! dd if=$M0/file of=/dev/null + +# CLI/mount based split-brain resolution must also not work. +TEST ! $CLI volume heal $V0 split-brain bigger-file /file +TEST ! $CLI volume heal $V0 split-brain mtime /file +TEST ! $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}2 /file1 + +TEST ! getfattr -n replica.split-brain-status $M0/file +TEST ! setfattr -n replica.split-brain-choice -v $V0-client-1 $M0/file + +# Bring all bricks back up and launch heal. +TEST $CLI volume set $V0 self-heal-daemon on +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 +TEST $CLI volume heal $V0 +EXPECT 0 get_pending_heal_count $V0 +B1_MD5=$(md5sum $B0/${V0}1/file | cut -d\ -f1) +B2_MD5=$(md5sum $B0/${V0}2/file | cut -d\ -f1) +TEST [ "$SOURCE_BRICK_MD5" == "$B1_MD5" ] +TEST [ "$SOURCE_BRICK_MD5" == "$B2_MD5" ] + +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +cleanup; diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index fb3318da36a..ebadba99a05 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -668,14 +668,17 @@ afr_set_split_brain_choice (int ret, call_frame_t *frame, void *opaque) gf_boolean_t timer_reset = _gf_false; int old_spb_choice = -1; - if (ret) - goto out; - frame = data->frame; loc = data->loc; this = frame->this; priv = this->private; + if (ret) { + op_errno = -ret; + ret = -1; + goto out; + } + delta.tv_sec = priv->spb_choice_timeout; delta.tv_nsec = 0; @@ -5880,6 +5883,12 @@ afr_is_split_brain (call_frame_t *frame, xlator_t *this, inode_t *inode, if (ret) goto out; + if (!afr_can_decide_split_brain_source_sinks (replies, + priv->child_count)) { + ret = -EAGAIN; + goto out; + } + ret = _afr_is_split_brain (frame, this, replies, AFR_DATA_TRANSACTION, d_spb); if (ret) @@ -5932,6 +5941,13 @@ afr_get_split_brain_status (void *opaque) if (!inode) goto out; + dict = dict_new (); + if (!dict) { + op_errno = ENOMEM; + ret = -1; + goto out; + } + /* Calculation for string length : * (child_count X length of child-name) + strlen (" Choices :") * child-name consists of : @@ -5945,13 +5961,9 @@ afr_get_split_brain_status (void *opaque) &m_spb); if (ret) { op_errno = -ret; - ret = -1; - goto out; - } - - dict = dict_new (); - if (!dict) { - op_errno = ENOMEM; + if (ret == -EAGAIN) + ret = dict_set_str (dict, GF_AFR_SBRAIN_STATUS, + SBRAIN_HEAL_NO_GO_MSG); ret = -1; goto out; } diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 40cfd01f4e9..c11ca11fdd9 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -472,6 +472,19 @@ afr_dict_contains_heal_op (call_frame_t *frame) return _gf_true; } +gf_boolean_t +afr_can_decide_split_brain_source_sinks (struct afr_reply *replies, + int child_count) +{ + int i = 0; + + for (i = 0; i < child_count; i++) + if (replies[i].valid != 1 || replies[i].op_ret != 0) + return _gf_false; + + return _gf_true; +} + int afr_mark_split_brain_source_sinks_by_heal_op (call_frame_t *frame, xlator_t *this, unsigned char *sources, @@ -510,6 +523,14 @@ afr_mark_split_brain_source_sinks_by_heal_op (call_frame_t *frame, } xdata_rsp = local->xdata_rsp; + if (!afr_can_decide_split_brain_source_sinks (replies, + priv->child_count)) { + ret = dict_set_str (xdata_rsp, "sh-fail-msg", + SBRAIN_HEAL_NO_GO_MSG); + ret = -1; + goto out; + } + for (i = 0 ; i < priv->child_count; i++) if (locked_on[i]) sources[i] = 1; @@ -748,26 +769,35 @@ afr_sh_get_fav_by_policy (xlator_t *this, struct afr_reply *replies, int fav_child = -1; priv = this->private; + if (!afr_can_decide_split_brain_source_sinks (replies, + priv->child_count)) { + return -1; + } + switch (priv->fav_child_policy) { case AFR_FAV_CHILD_BY_SIZE: fav_child = afr_sh_fav_by_size (this, replies, inode); - if (policy_str && fav_child >= 0) + if (policy_str && fav_child >= 0) { *policy_str = "SIZE"; + } break; case AFR_FAV_CHILD_BY_CTIME: fav_child = afr_sh_fav_by_ctime (this, replies, inode); - if (policy_str && fav_child >= 0) + if (policy_str && fav_child >= 0) { *policy_str = "CTIME"; + } break; case AFR_FAV_CHILD_BY_MTIME: fav_child = afr_sh_fav_by_mtime (this, replies, inode); - if (policy_str && fav_child >= 0) + if (policy_str && fav_child >= 0) { *policy_str = "MTIME"; + } break; case AFR_FAV_CHILD_BY_MAJORITY: fav_child = afr_sh_fav_by_majority (this, replies, inode); - if (policy_str && fav_child >= 0) + if (policy_str && fav_child >= 0) { *policy_str = "MAJORITY"; + } break; case AFR_FAV_CHILD_NONE: default: diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h index 500227abe24..a33905033cc 100644 --- a/xlators/cluster/afr/src/afr-self-heal.h +++ b/xlators/cluster/afr/src/afr-self-heal.h @@ -81,7 +81,8 @@ #define IA_EQUAL(f,s,field) (memcmp (&(f.ia_##field), &(s.ia_##field), sizeof (s.ia_##field)) == 0) - +#define SBRAIN_HEAL_NO_GO_MSG "Failed to obtain replies from all bricks of "\ + "the replica (are they up?). Cannot resolve split-brain." int afr_selfheal (xlator_t *this, uuid_t gfid); @@ -220,6 +221,9 @@ afr_mark_active_sinks (xlator_t *this, unsigned char *sources, gf_boolean_t afr_dict_contains_heal_op (call_frame_t *frame); +gf_boolean_t +afr_can_decide_split_brain_source_sinks (struct afr_reply *replies, + int child_count); int afr_mark_split_brain_source_sinks (call_frame_t *frame, xlator_t *this, inode_t *inode, diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index ebc8a9c2492..ffd8bac9e4f 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -1835,32 +1835,8 @@ rebalance_task (void *data) static int rebalance_task_completion (int op_ret, call_frame_t *sync_frame, void *data) { - int ret = -1; - uint64_t layout_int = 0; - dht_layout_t *layout = 0; - xlator_t *this = NULL; - dht_local_t *local = NULL; int32_t op_errno = EINVAL; - this = THIS; - local = sync_frame->local; - - if (!op_ret) { - /* Make sure we have valid 'layout' in inode ctx - after the operation */ - ret = inode_ctx_del (local->loc.inode, this, &layout_int); - if (!ret && layout_int) { - layout = (dht_layout_t *)(long)layout_int; - dht_layout_unref (this, layout); - } - - ret = dht_layout_preset (this, local->rebalance.target_node, - local->loc.inode); - if (ret) - gf_log (this->name, GF_LOG_WARNING, - "%s: failed to set inode ctx", local->loc.path); - } - if (op_ret == -1) { /* Failure of migration process, mostly due to write process. as we can't preserve the exact errno, lets say there was diff --git a/xlators/debug/trace/src/trace.c b/xlators/debug/trace/src/trace.c index 03e92184dcd..555147aec47 100644 --- a/xlators/debug/trace/src/trace.c +++ b/xlators/debug/trace/src/trace.c @@ -294,14 +294,17 @@ trace_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, gf_dirent_t *buf, dict_t *xdata) { - trace_conf_t *conf = NULL; + int count = 0; + char statstr[4096] = {0,}; + char string[4096] = {0,}; + trace_conf_t *conf = NULL; + gf_dirent_t *entry = NULL; conf = this->private; if (!conf->log_file && !conf->log_history) goto out; if (trace_fop_names[GF_FOP_READDIRP].enabled) { - char string[4096] = {0,}; snprintf (string, sizeof (string), "%"PRId64" : gfid=%s op_ret=%d, op_errno=%d", frame->root->unique, uuid_utoa (frame->local), @@ -309,6 +312,17 @@ trace_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, LOG_ELEMENT (conf, string); } + if (op_ret < 0) + goto out; + + list_for_each_entry (entry, &buf->list, list) { + count++; + TRACE_STAT_TO_STR (&entry->d_stat, statstr); + snprintf (string, sizeof (string), "entry no. %d, pargfid=%s, " + "bname=%s *buf {%s}", count, uuid_utoa (frame->local), + entry->d_name, statstr); + LOG_ELEMENT (conf, string); + } out: TRACE_STACK_UNWIND (readdirp, frame, op_ret, op_errno, buf, xdata); diff --git a/xlators/features/changelog/src/changelog-helpers.c b/xlators/features/changelog/src/changelog-helpers.c index 0cb68587e57..5c47f5e0303 100644 --- a/xlators/features/changelog/src/changelog-helpers.c +++ b/xlators/features/changelog/src/changelog-helpers.c @@ -620,7 +620,10 @@ htime_open (xlator_t *this, unsigned long min_ts = 0; unsigned long max_ts = 0; unsigned long total = 0; + unsigned long total1 = 0; ssize_t size = 0; + struct stat stat_buf = {0,}; + unsigned long record_len = 0; CHANGELOG_FILL_HTIME_DIR(priv->changelog_dir, ht_dir_path); @@ -681,6 +684,16 @@ htime_open (xlator_t *this, /* save this htime_fd in priv->htime_fd */ priv->htime_fd = ht_file_fd; + ret = sys_fstat (ht_file_fd, &stat_buf); + if (ret < 0) { + gf_msg (this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_HTIME_ERROR, + "unable to stat htime file: %s", + ht_file_path); + ret = -1; + goto out; + } + /* Initialize rollover-number in priv to current number */ size = sys_fgetxattr (ht_file_fd, HTIME_KEY, x_value, sizeof (x_value)); if (size < 0) { @@ -693,11 +706,27 @@ htime_open (xlator_t *this, } sscanf (x_value, "%lu:%lu", &max_ts, &total); + + /* 22 = 1(/) + 20(CHANGELOG.TIMESTAMP) + 1(\x00) */ + record_len = strlen(priv->changelog_dir) + 22; + total1 = stat_buf.st_size/record_len; + if (total != total1) { + gf_msg (this->name, GF_LOG_INFO, 0, + CHANGELOG_MSG_TOTAL_LOG_INFO, + "Mismatch of changelog count. " + "INIT CASE: XATTR TOTAL: %lu, SIZE TOTAL: %lu", + total, total1); + } + gf_msg (this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_TOTAL_LOG_INFO, "INIT CASE: MIN: %lu, MAX: %lu," " TOTAL CHANGELOGS: %lu", min_ts, max_ts, total); - priv->rollover_count = total + 1; + + if (total < total1) + priv->rollover_count = total1 + 1; + else + priv->rollover_count = total + 1; out: if (ht_dir_fd != -1) diff --git a/xlators/features/upcall/src/upcall-internal.c b/xlators/features/upcall/src/upcall-internal.c index 66cbddf5fa9..37fc3dbc35a 100644 --- a/xlators/features/upcall/src/upcall-internal.c +++ b/xlators/features/upcall/src/upcall-internal.c @@ -204,6 +204,7 @@ __upcall_inode_ctx_set (inode_t *inode, xlator_t *this) if (ret) { gf_log (this->name, GF_LOG_DEBUG, "failed to set inode ctx (%p)", inode); + GF_FREE (inode_ctx); goto out; } diff --git a/xlators/nfs/server/src/mount3.c b/xlators/nfs/server/src/mount3.c index bff7e0669ff..57d1874c79a 100644 --- a/xlators/nfs/server/src/mount3.c +++ b/xlators/nfs/server/src/mount3.c @@ -2544,8 +2544,6 @@ __mnt3svc_umountall (struct mount3_state *ms) GF_FREE (me); } - dict_unref (ms->mountdict); - return 0; } @@ -3939,7 +3937,13 @@ mnt3svc_deinit (xlator_t *nfsx) mnt3_auth_params_deinit (mstate->auth_params); /* Unmount everything and clear mountdict */ - mnt3svc_umountall (mstate); + LOCK (&mstate->mountlock); + { + __mnt3svc_umountall (mstate); + dict_unref (mstate->mountdict); + } + UNLOCK (&mstate->mountlock); + } rpcsvc_program_t * |
