diff options
228 files changed, 6827 insertions, 2651 deletions
diff --git a/.gitignore b/.gitignore index 849a0f1a8be..fc5ba586f8e 100644 --- a/.gitignore +++ b/.gitignore @@ -121,3 +121,5 @@ xlators/features/cloudsync/src/cloudsync-autogen-fops.c xlators/features/cloudsync/src/cloudsync-autogen-fops.h xlators/features/utime/src/utime-autogen-fops.c xlators/features/utime/src/utime-autogen-fops.h +tests/basic/metadisp/ftruncate +xlators/features/metadisp/src/fops.c diff --git a/.testignore b/.testignore index 1784aff629a..fe8f838bf2b 100644 --- a/.testignore +++ b/.testignore @@ -8,7 +8,7 @@ rfc.sh submit-for-review.sh AUTHORS -CONTRIBUTING +CONTRIBUTING.md COPYING-GPLV2 COPYING-LGPLV3 ChangeLog diff --git a/CONTRIBUTING b/CONTRIBUTING deleted file mode 100644 index eb30d76d1b4..00000000000 --- a/CONTRIBUTING +++ /dev/null @@ -1,30 +0,0 @@ -# GlusterFS project Contribution guidelines - - -## By contributing to this project, the contributor would need to agree to below. - -### Developer's Certificate of Origin 1.1 - -By making a contribution to this project, I certify that: - -(a) The contribution was created in whole or in part by me and I - have the right to submit it under the open source license - indicated in the file; or - -(b) The contribution is based upon previous work that, to the best - of my knowledge, is covered under an appropriate open source - license and I have the right under that license to submit that - work with modifications, whether created in whole or in part - by me, under the same open source license (unless I am - permitted to submit under a different license), as indicated - in the file; or - -(c) The contribution was provided directly to me by some other - person who certified (a), (b) or (c) and I have not modified - it. - -(d) I understand and agree that this project and the contribution - are public and that a record of the contribution (including all - personal information I submit with it, including my sign-off) is - maintained indefinitely and may be redistributed consistent with - this project or the open source license(s) involved. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000000..65fc3497104 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,114 @@ +# GlusterFS project Contribution guidelines + +## Development Workflow + +We follow most of the details as per the [document here](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests). If you are not aware of the github workflow, it is recommended to go through them before continuing here. + + +#### Get the Repository setup + +0. Fork Repository + - Fork [GlusterFS repository](https://github.com/gluster/glusterfs/fork). + +1. Clone Repository + - Clone the glusterfs repo freshly from github using below steps. + +``` + git clone git@github.com:${username}/glusterfs.git + cd glusterfs/ + git remote add upstream git@github.com:gluster/glusterfs.git +``` + +About two tasks are one time for the life time. You can continue to use the same repository for all the work in future. + +#### Development & Other flows + +0. Issue: + - Make sure there is an issue filed for the task you are working on. + - If it is not filed, open the issue with all the description. + - If it is a bug fix, add label "Type:Bug". + - If it is an RFC, provide all the documentation, and request for "DocApproved", and "SpecApproved" label. + +1. Code: + - Start coding + - Build and test locally + - Make sure clang-format is installed and is run on the patch. + +2. Keep up-to-date + - GlusterFS is a large project with many developers, so there would be one or the other patch everyday. + - It is critical for developer to be up-to-date with `devel` repo to be Conflict-Free when PR is opened. + - Git provides many options to keep up-to-date, below is one of them +``` + git fetch upstream + git rebase upstream/devel +``` + - It is recommended you keep pushing to your repo every day, so you don't loose any work. + - It can be done by `./rfc.sh` (or `git push origin HEAD:issueNNN`) + +2. Commit Message / PR description: + - The name of the branch on your personal fork can start with issueNNNN, followed by anything of your choice. + - PRs continue to have the title of format "component: \<title\>", like it is practiced now. + - When you open a PR, having a reference Issue for the commit is mandatory in GlusterFS. + - Commit message can have, either `Fixes: #NNNN` or `Updates: #NNNN` in a separate line in the commit message. + - Here, NNNN is the Issue ID in glusterfs repository. + - Each commit needs the author to have the "Signed-off-by: Name \<email\>" line. + - Can do this by `-s` option for `git commit`. + - If the PR is not ready for review, apply the label `work-in-progress`. + - Check the availability of "Draft PR" is present for you, if yes, use that instead. + +3. Tests: + - All the required smoke tests would be auto-triggered. + - Developers get a chance to retrigger the smoke tests using **"/recheck smoke"** as comment. + - The "regression" tests would be triggered by a comment **"/run regression"** from developers in the [@gluster-maintainers](https://github.com/orgs/gluster/teams/gluster-maintainers) group. + - Ask for help as comment in PR if you have any questions about the process! + +4. Review Process: + - `+2` : is equivalent to "Approve" from the people in the maintainer's group. + - `+1` : can be given by a maintainer/reviewer by explicitly stating that in the comment. + - `-1` : provide details on required changes and pick "Request Changes" while submitting your review. + - `-2` : done by adding the `DO-NOT-MERGE` label. + + - Any further discussions can happen as comments in the PR. + +5. Making changes: + - There are 2 approaches to submit changes done after addressing review comments. + - Commit changes as a new commit on top of the original commits in the branch, and push the changes to same branch (issueNNNN) + - Commit changes into the same commit with `--amend` option, and do a push to the same branch with `--force` option. + +6. Merging: + - GlusterFS project follows 'Squash and Merge' method + - This is mainly to preserve the historic Gerrit method of one patch in `git log` for one URL link. + - This also makes every merge a complete patch, which has passed all tests. + - The merging of the patch is expected to be done by the maintainers. + - It can be done when all the tests (smoke and regression) pass. + - When the PR has 'Approved' flag from corresponding maintainer. + - If you feel there is delay, feel free to add a comment, discuss the same in Slack channel, or send email. + +## By contributing to this project, the contributor would need to agree to below. + +### Developer's Certificate of Origin 1.1 + +By making a contribution to this project, I certify that: + +(a) The contribution was created in whole or in part by me and I + have the right to submit it under the open source license + indicated in the file; or + +(b) The contribution is based upon previous work that, to the best + of my knowledge, is covered under an appropriate open source + license and I have the right under that license to submit that + work with modifications, whether created in whole or in part + by me, under the same open source license (unless I am + permitted to submit under a different license), as indicated + in the file; or + +(c) The contribution was provided directly to me by some other + person who certified (a), (b) or (c) and I have not modified + it. + +(d) I understand and agree that this project and the contribution + are public and that a record of the contribution (including all + personal information I submit with it, including my sign-off) is + maintained indefinitely and may be redistributed consistent with + this project or the open source license(s) involved. + diff --git a/MAINTAINERS b/MAINTAINERS index cfe075fa17d..953e8755fd9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -53,7 +53,7 @@ General Project Architects -------------------------- M: Amar Tumballi <amarts@gmail.com> M: Xavier Hernandez <xhernandez@redhat.com> -P: Pranith Karampuri <pkarampu@redhat.com> +P: Pranith Karampuri <pranith.karampuri@phonepe.com> P: Atin Mukherjee <amukherj@redhat.com> xlators: @@ -66,12 +66,12 @@ F: xlators/system/posix-acl/ Arbiter M: Ravishankar N <ravishankar@redhat.com> -P: Pranith Karampuri <pkarampu@redhat.com> +P: Pranith Karampuri <pranith.karampuri@phonepe.com> S: Maintained F: xlators/features/arbiter/ Automatic File Replication (AFR) -M: Pranith Karampuri <pkarampu@redhat.com> +M: Pranith Karampuri <pranith.karampuri@phonepe.com> M: Ravishankar N <ravishankar@redhat.com> P: Karthik US <ksubrahm@redhat.com> S: Maintained @@ -101,7 +101,7 @@ S: Maintained F: xlators/cluster/dht/ Erasure Coding -M: Pranith Karampuri <pkarampu@redhat.com> +M: Pranith Karampuri <pranith.karampuri@phonepe.com> M: Xavier Hernandez <xhernandez@redhat.com> P: Ashish Pandey <aspandey@redhat.com> S: Maintained @@ -119,13 +119,13 @@ S: Maintained F: xlators/mount/ Index -M: Pranith Karampuri <pkarampu@redhat.com> +M: Pranith Karampuri <pranith.karampuri@phonepe.com> P: Ravishankar N <ravishankar@redhat.com> S: Maintained F: xlators/features/index/ IO Cache -P: Mohammed Rafi KC <rkavunga@redhat.com> +P: Mohammed Rafi KC <rafi.kavungal@iternity.com> S: Maintained F: xlators/performance/io-cache/ @@ -136,7 +136,7 @@ S: Maintained F: xlators/debug/io-stats/ IO threads -M: Pranith Karampuri <pkarampu@redhat.com> +M: Pranith Karampuri <pranith.karampuri@phonepe.com> P: Ravishankar N <ravishankar@redhat.com> S: Maintained F: xlators/performance/io-threads/ @@ -160,7 +160,7 @@ S: Maintained F: xlators/features/marker/ Meta -M: Mohammed Rafi KC <rkavunga@redhat.com> +M: Mohammed Rafi KC <rafi.kavungal@iternity.com> S: Maintained F: xlators/features/meta/ @@ -172,7 +172,7 @@ F: xlators/performance/md-cache/ Negative-lookup Cache M: Poornima G <pgurusid@redhat.com> -P: Pranith Karampuri <pkarampu@redhat.com> +P: Pranith Karampuri <pranith.karampuri@phonepe.com> S: Maintained F: xlators/performance/nl-cache/ @@ -282,7 +282,7 @@ M: Xavier Hernandez <xhernandez@redhat.com> M: Jeff Darcy <jeff@pl.atyp.us> P: Kaleb Keithley <kkeithle@redhat.com> P: Niels de Vos <ndevos@redhat.com> -P: Pranith Karampuri <pkarampu@redhat.com> +P: Pranith Karampuri <pranith.karampuri@phonepe.com> P: Shyamsundar Ranganathan <srangana@redhat.com> S: Maintained F: libglusterfs/ @@ -305,7 +305,7 @@ F: xlators/mgmt/glusterd/ Protocol M: Niels de Vos <ndevos@redhat.com> -P: Mohammed Rafi KC <rkavunga@redhat.com> +P: Mohammed Rafi KC <rafi.kavungal@iternity.com> S: Maintained F: xlators/protocol/ @@ -317,7 +317,7 @@ F: rpc/xdr/ Snapshot M: Raghavendra Bhat <raghavendra@redhat.com> -P: Mohammed Rafi KC <rkavunga@redhat.com> +P: Mohammed Rafi KC <rafi.kavungal@iternity.com> P: Sunny Kumar <sunkumar@redhat.com> S: Maintained F: xlators/mgmt/glusterd/src/glusterd-snap* @@ -326,7 +326,7 @@ F: extras/snap-scheduler.py Socket subsystem P: Krutika Dhananjay <kdhananj@redhat.com> P: Milind Changire <mchangir@redhat.com> -P: Mohammed Rafi KC <rkavunga@redhat.com> +P: Mohammed Rafi KC <rafi.kavungal@iternity.com> P: Mohit Agrawal <moagrawa@redhat.com> S: Maintained F: rpc/rpc-transport/socket/ diff --git a/Makefile.am b/Makefile.am index aa0ca5c2a04..98ea5c1038d 100644 --- a/Makefile.am +++ b/Makefile.am @@ -19,9 +19,13 @@ pkgconfig_DATA = glusterfs-api.pc libgfchangelog.pc CLEANFILES = glusterfs-api.pc libgfchangelog.pc contrib/umountd/Makefile +clean-local: + find . -name '*.o' -o -name '*.lo' -o -name '.Po' | xargs rm -f + gitclean: distclean find . -name Makefile.in -exec rm -f {} \; find . -name mount.glusterfs -exec rm -f {} \; + find . -name .deps -o -name .libs | xargs rm -rf rm -fr autom4te.cache rm -f missing aclocal.m4 config.h.in config.guess config.sub ltmain.sh install-sh configure depcomp @@ -49,4 +53,3 @@ gen-VERSION: ./build-aux/pkg-version --full \ > $(abs_top_builddir)/$(distdir)/VERSION; \ fi - diff --git a/README.md b/README.md index 92f829e431e..9d68e033782 100644 --- a/README.md +++ b/README.md @@ -3,8 +3,7 @@ petabytes. It provides interfaces for object, block and file storage. ## Development - Contributions to gluster in the form of patches and new feature additions can - be made by following steps outlined at [Developers Guide](http://docs.gluster.org/en/latest/Developer-guide/Developers-Index/#contributing-to-the-gluster-community). + The development workflow is documented in [Contributors guide](CONTRIBUTING.md) ## Documentation The Gluster documentation can be found at [Gluster Docs](http://docs.gluster.org). diff --git a/api/src/glfs-fops.c b/api/src/glfs-fops.c index fc2cfa04309..6aa3c5602d1 100644 --- a/api/src/glfs-fops.c +++ b/api/src/glfs-fops.c @@ -1100,6 +1100,11 @@ pub_glfs_read(struct glfs_fd *glfd, void *buf, size_t count, int flags) }; ssize_t ret = 0; + if (glfd == NULL) { + errno = EBADF; + return -1; + } + iov.iov_base = buf; iov.iov_len = count; @@ -1151,6 +1156,11 @@ pub_glfs_readv(struct glfs_fd *glfd, const struct iovec *iov, int count, { ssize_t ret = 0; + if (glfd == NULL) { + errno = EBADF; + return -1; + } + ret = pub_glfs_preadv(glfd, iov, count, glfd->offset, flags); return ret; @@ -1387,6 +1397,11 @@ pub_glfs_read_async34(struct glfs_fd *glfd, void *buf, size_t count, int flags, }; ssize_t ret = 0; + if (glfd == NULL) { + errno = EBADF; + return -1; + } + iov.iov_base = buf; iov.iov_len = count; @@ -1406,6 +1421,11 @@ pub_glfs_read_async(struct glfs_fd *glfd, void *buf, size_t count, int flags, }; ssize_t ret = 0; + if (glfd == NULL) { + errno = EBADF; + return -1; + } + iov.iov_base = buf; iov.iov_len = count; @@ -1460,6 +1480,11 @@ pub_glfs_readv_async34(struct glfs_fd *glfd, const struct iovec *iov, int count, { ssize_t ret = 0; + if (glfd == NULL) { + errno = EBADF; + return -1; + } + ret = glfs_preadv_async_common(glfd, iov, count, glfd->offset, flags, _gf_true, (void *)fn, data); return ret; @@ -1472,6 +1497,11 @@ pub_glfs_readv_async(struct glfs_fd *glfd, const struct iovec *iov, int count, { ssize_t ret = 0; + if (glfd == NULL) { + errno = EBADF; + return -1; + } + ret = glfs_preadv_async_common(glfd, iov, count, glfd->offset, flags, _gf_false, fn, data); return ret; @@ -1733,6 +1763,11 @@ pub_glfs_write(struct glfs_fd *glfd, const void *buf, size_t count, int flags) }; ssize_t ret = 0; + if (glfd == NULL) { + errno = EBADF; + return -1; + } + iov.iov_base = (void *)buf; iov.iov_len = count; @@ -1748,6 +1783,11 @@ pub_glfs_writev(struct glfs_fd *glfd, const struct iovec *iov, int count, { ssize_t ret = 0; + if (glfd == NULL) { + errno = EBADF; + return -1; + } + ret = pub_glfs_pwritev(glfd, iov, count, glfd->offset, flags); return ret; @@ -1938,6 +1978,11 @@ pub_glfs_write_async34(struct glfs_fd *glfd, const void *buf, size_t count, }; ssize_t ret = 0; + if (glfd == NULL) { + errno = EBADF; + return -1; + } + iov.iov_base = (void *)buf; iov.iov_len = count; @@ -1957,6 +2002,11 @@ pub_glfs_write_async(struct glfs_fd *glfd, const void *buf, size_t count, }; ssize_t ret = 0; + if (glfd == NULL) { + errno = EBADF; + return -1; + } + iov.iov_base = (void *)buf; iov.iov_len = count; @@ -2011,6 +2061,11 @@ pub_glfs_writev_async34(struct glfs_fd *glfd, const struct iovec *iov, { ssize_t ret = 0; + if (glfd == NULL) { + errno = EBADF; + return -1; + } + ret = glfs_pwritev_async_common(glfd, iov, count, glfd->offset, flags, _gf_true, (void *)fn, data); return ret; @@ -2023,6 +2078,11 @@ pub_glfs_writev_async(struct glfs_fd *glfd, const struct iovec *iov, int count, { ssize_t ret = 0; + if (glfd == NULL) { + errno = EBADF; + return -1; + } + ret = glfs_pwritev_async_common(glfd, iov, count, glfd->offset, flags, _gf_false, fn, data); return ret; @@ -3334,6 +3394,11 @@ GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_telldir, 3.4.0) long pub_glfs_telldir(struct glfs_fd *fd) { + if (fd == NULL) { + errno = EBADF; + return -1; + } + return fd->offset; } @@ -3344,6 +3409,11 @@ pub_glfs_seekdir(struct glfs_fd *fd, long offset) gf_dirent_t *entry = NULL; gf_dirent_t *tmp = NULL; + if (fd == NULL) { + errno = EBADF; + return; + } + if (fd->offset == offset) return; diff --git a/api/src/glfs-mgmt.c b/api/src/glfs-mgmt.c index 08df2b1a4b7..7c82b8cd162 100644 --- a/api/src/glfs-mgmt.c +++ b/api/src/glfs-mgmt.c @@ -1013,11 +1013,6 @@ glfs_mgmt_init(struct glfs *fs) if (ret) goto out; - if (sys_access(SECURE_ACCESS_FILE, F_OK) == 0) { - ctx->secure_mgmt = 1; - ctx->ssl_cert_depth = glusterfs_read_secure_access_file(); - } - rpc = rpc_clnt_new(options, THIS, THIS->name, 8); if (!rpc) { ret = -1; diff --git a/api/src/glfs.c b/api/src/glfs.c index 5259356b4c5..b4bf1423f6d 100644 --- a/api/src/glfs.c +++ b/api/src/glfs.c @@ -251,6 +251,11 @@ glfs_volumes_init(struct glfs *fs) if (!vol_assigned(cmd_args)) return -1; + if (sys_access(SECURE_ACCESS_FILE, F_OK) == 0) { + fs->ctx->secure_mgmt = 1; + fs->ctx->ssl_cert_depth = glusterfs_read_secure_access_file(); + } + if (cmd_args->volfile_server) { ret = glfs_mgmt_init(fs); goto out; @@ -650,6 +655,11 @@ GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_from_glfd, 3.4.0) struct glfs * pub_glfs_from_glfd(struct glfs_fd *glfd) { + if (glfd == NULL) { + errno = EBADF; + return NULL; + } + return glfd->fs; } diff --git a/configure.ac b/configure.ac index 1725301d524..e2d6fd66cec 100644 --- a/configure.ac +++ b/configure.ac @@ -164,6 +164,8 @@ AC_CONFIG_FILES([Makefile xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/Makefile xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/Makefile xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/Makefile + xlators/features/metadisp/Makefile + xlators/features/metadisp/src/Makefile xlators/playground/Makefile xlators/playground/template/Makefile xlators/playground/template/src/Makefile @@ -273,7 +275,7 @@ AC_ARG_ENABLE([debug], [Enable debug build options.])) if test "x$enable_debug" = "xyes"; then BUILD_DEBUG=yes - GF_CFLAGS="${GF_CFLAGS} -g -rdynamic -O0 -DDEBUG" + GF_CFLAGS="${GF_CFLAGS} -g -O0 -DDEBUG" else BUILD_DEBUG=no fi @@ -407,12 +409,27 @@ esac # --enable-valgrind prevents calling dlclose(), this leaks memory AC_ARG_ENABLE([valgrind], - AC_HELP_STRING([--enable-valgrind], - [Enable valgrind for resource leak debugging.])) -if test "x$enable_valgrind" = "xyes"; then - AC_DEFINE(RUN_WITH_VALGRIND, 1, [define if all processes should run under valgrind]) -fi - + AC_HELP_STRING([--enable-valgrind@<:@=memcheck,drd@:>@], + [Enable valgrind for resource leak (memcheck, which is + the default) or thread synchronization (drd) debugging.])) +case x$enable_valgrind in + xmemcheck|xyes) + AC_DEFINE(RUN_WITH_MEMCHECK, 1, + [Define if all processes should run under 'valgrind --tool=memcheck'.]) + VALGRIND_TOOL=memcheck + ;; + xdrd) + AC_DEFINE(RUN_WITH_DRD, 1, + [Define if all processes should run under 'valgrind --tool=drd'.]) + VALGRIND_TOOL=drd + ;; + x|xno) + VALGRIND_TOOL=no + ;; + *) + AC_MSG_ERROR([Please specify --enable-valgrind@<:@=memcheck,drd@:>@]) + ;; +esac AC_ARG_WITH([previous-options], [AS_HELP_STRING([--with-previous-options], @@ -810,6 +827,17 @@ fi AC_SUBST(GEOREP_EXTRAS_SUBDIR) AM_CONDITIONAL(USE_GEOREP, test "x$enable_georeplication" != "xno") +# METADISP section +AC_ARG_ENABLE([metadisp], + AC_HELP_STRING([--enable-metadisp], + [Enable the metadata dispersal xlator])) +BUILD_METADISP=no +if test "x${enable_metadisp}" = "xyes"; then + BUILD_METADISP=yes +fi +AM_CONDITIONAL([BUILD_METADISP], [test "x$BUILD_METADISP" = "xyes"]) +# end METADISP section + # Events section AC_ARG_ENABLE([events], AC_HELP_STRING([--disable-events], @@ -1562,9 +1590,9 @@ case $host_os in ;; esac dnl GF_XLATOR_DEFAULT_LDFLAGS is for most xlators that expose a common set of symbols -GF_XLATOR_DEFAULT_LDFLAGS='-avoid-version -export-symbols $(top_srcdir)/xlators/xlator.sym $(UUID_LIBS) $(GF_NO_UNDEFINED)' +GF_XLATOR_DEFAULT_LDFLAGS='-avoid-version -export-symbols $(top_srcdir)/xlators/xlator.sym $(UUID_LIBS) $(GF_NO_UNDEFINED) $(TIRPC_LIBS)' dnl GF_XLATOR_LDFLAGS is for xlators that expose extra symbols, e.g. dht -GF_XLATOR_LDFLAGS='-avoid-version $(UUID_LIBS) $(GF_NO_UNDEFINED)' +GF_XLATOR_LDFLAGS='-avoid-version $(UUID_LIBS) $(GF_NO_UNDEFINED) $(TIRPC_LIBS)' AC_SUBST(GF_HOST_OS) AC_SUBST(GF_CFLAGS) @@ -1579,6 +1607,13 @@ AC_SUBST(AM_LIBTOOLFLAGS) AC_SUBST(GF_NO_UNDEFINED) AC_SUBST(GF_XLATOR_DEFAULT_LDFLAGS) AC_SUBST(GF_XLATOR_LDFLAGS) +AC_SUBST(GF_XLATOR_MGNT_LIBADD) + +case $host_os in + *freebsd*) + GF_XLATOR_MGNT_LIBADD="-lutil -lprocstat" + ;; +esac CONTRIBDIR='$(top_srcdir)/contrib' AC_SUBST(CONTRIBDIR) @@ -1650,6 +1685,7 @@ echo "readline : $BUILD_READLINE" echo "georeplication : $BUILD_SYNCDAEMON" echo "Linux-AIO : $BUILD_LIBAIO" echo "Enable Debug : $BUILD_DEBUG" +echo "Run with Valgrind : $VALGRIND_TOOL" echo "Sanitizer enabled : $SANITIZER" echo "Use syslog : $USE_SYSLOG" echo "XML output : $BUILD_XML_OUTPUT" @@ -1668,6 +1704,7 @@ echo "IPV6 default : $with_ipv6_default" echo "Use TIRPC : $with_libtirpc" echo "With Python : ${PYTHON_VERSION}" echo "Cloudsync : $BUILD_CLOUDSYNC" +echo "Metadata dispersal : $BUILD_METADISP" echo "Link with TCMALLOC : $BUILD_TCMALLOC" echo diff --git a/contrib/fuse-lib/mount.c b/contrib/fuse-lib/mount.c index 351972ce3cf..06ff191f542 100644 --- a/contrib/fuse-lib/mount.c +++ b/contrib/fuse-lib/mount.c @@ -390,6 +390,7 @@ fuse_mount_sys (const char *mountpoint, char *fsname, build_iovec (&iov, &iovlen, "from", "/dev/fuse", -1); build_iovec (&iov, &iovlen, "volname", source, -1); build_iovec (&iov, &iovlen, "fd", fdstr, -1); + build_iovec (&iov, &iovlen, "allow_other", NULL, -1); ret = nmount (iov, iovlen, mountflags); #else ret = mount (source, mountpoint, fstype, mountflags, diff --git a/doc/developer-guide/identifying-resource-leaks.md b/doc/developer-guide/identifying-resource-leaks.md index 851fc4424bc..950cae79b0a 100644 --- a/doc/developer-guide/identifying-resource-leaks.md +++ b/doc/developer-guide/identifying-resource-leaks.md @@ -174,3 +174,27 @@ In this case, the resource leak can be addressed by adding a single line to the Running the same Valgrind command and comparing the output will show that the memory leak in `xlators/meta/src/meta.c:init` is not reported anymore. + +### Running DRD, the Valgrind thread error detector + +When configuring GlusterFS with: + +```shell +./configure --enable-valgrind +``` + +the default Valgrind tool (Memcheck) is enabled. But it's also possble to select +one of Memcheck or DRD by using: + +```shell +./configure --enable-valgrind=memcheck +``` + +or: + +```shell +./configure --enable-valgrind=drd +``` + +respectively. When using DRD, it's recommended to consult +https://valgrind.org/docs/manual/drd-manual.html before running. diff --git a/events/src/eventsapiconf.py.in b/events/src/eventsapiconf.py.in index 76b5954d325..700093bee60 100644 --- a/events/src/eventsapiconf.py.in +++ b/events/src/eventsapiconf.py.in @@ -28,6 +28,8 @@ def get_glusterd_workdir(): return glusterd_workdir SERVER_ADDRESS = "0.0.0.0" +SERVER_ADDRESSv4 = "0.0.0.0" +SERVER_ADDRESSv6 = "::1" DEFAULT_CONFIG_FILE = "@SYSCONF_DIR@/glusterfs/eventsconfig.json" CUSTOM_CONFIG_FILE_TO_SYNC = "/events/config.json" CUSTOM_CONFIG_FILE = get_glusterd_workdir() + CUSTOM_CONFIG_FILE_TO_SYNC diff --git a/events/src/glustereventsd.py b/events/src/glustereventsd.py index c4c7b65e332..341a3b60947 100644 --- a/events/src/glustereventsd.py +++ b/events/src/glustereventsd.py @@ -13,6 +13,7 @@ from __future__ import print_function import sys import signal +import threading try: import socketserver except ImportError: @@ -23,10 +24,17 @@ from argparse import ArgumentParser, RawDescriptionHelpFormatter from eventtypes import all_events import handlers import utils -from eventsapiconf import SERVER_ADDRESS, PID_FILE +from eventsapiconf import SERVER_ADDRESSv4, SERVER_ADDRESSv6, PID_FILE from eventsapiconf import AUTO_BOOL_ATTRIBUTES, AUTO_INT_ATTRIBUTES from utils import logger, PidFile, PidFileLockFailed, boolify +# Subclass so that specifically IPv4 packets are captured +class UDPServerv4(socketserver.ThreadingUDPServer): + address_family = socket.AF_INET + +# Subclass so that specifically IPv6 packets are captured +class UDPServerv6(socketserver.ThreadingUDPServer): + address_family = socket.AF_INET6 class GlusterEventsRequestHandler(socketserver.BaseRequestHandler): @@ -89,6 +97,10 @@ def signal_handler_sigusr2(sig, frame): utils.restart_webhook_pool() +def UDP_server_thread(sock): + sock.serve_forever() + + def init_event_server(): utils.setup_logger() utils.load_all() @@ -99,15 +111,26 @@ def init_event_server(): sys.stderr.write("Unable to get Port details from Config\n") sys.exit(1) - # Start the Eventing Server, UDP Server + # Creating the Eventing Server, UDP Server for IPv4 packets + try: + serverv4 = UDPServerv4((SERVER_ADDRESSv4, port), + GlusterEventsRequestHandler) + except socket.error as e: + sys.stderr.write("Failed to start Eventsd for IPv4: {0}\n".format(e)) + sys.exit(1) + # Creating the Eventing Server, UDP Server for IPv6 packets try: - server = socketserver.ThreadingUDPServer( - (SERVER_ADDRESS, port), - GlusterEventsRequestHandler) + serverv6 = UDPServerv6((SERVER_ADDRESSv6, port), + GlusterEventsRequestHandler) except socket.error as e: - sys.stderr.write("Failed to start Eventsd: {0}\n".format(e)) + sys.stderr.write("Failed to start Eventsd for IPv6: {0}\n".format(e)) sys.exit(1) - server.serve_forever() + server_thread1 = threading.Thread(target=UDP_server_thread, + args=(serverv4,)) + server_thread2 = threading.Thread(target=UDP_server_thread, + args=(serverv6,)) + server_thread1.start() + server_thread2.start() def get_args(): diff --git a/events/src/utils.py b/events/src/utils.py index 38b707a1b28..6d4e0791a2b 100644 --- a/events/src/utils.py +++ b/events/src/utils.py @@ -13,6 +13,7 @@ import sys import json import os import logging +import logging.handlers import fcntl from errno import EBADF from threading import Thread @@ -98,7 +99,7 @@ def setup_logger(): logger.setLevel(logging.INFO) # create the logging file handler - fh = logging.FileHandler(LOG_FILE) + fh = logging.handlers.WatchedFileHandler(LOG_FILE) formatter = logging.Formatter("[%(asctime)s] %(levelname)s " "[%(module)s - %(lineno)s:%(funcName)s] " diff --git a/extras/distributed-testing/distributed-test-runner.py b/extras/distributed-testing/distributed-test-runner.py index 7bfb6c9652a..5a07e2feab1 100755 --- a/extras/distributed-testing/distributed-test-runner.py +++ b/extras/distributed-testing/distributed-test-runner.py @@ -383,14 +383,17 @@ class Handlers: return self.shell.call("make install") == 0 @synchronized - def prove(self, id, test, timeout, valgrind=False, asan_noleaks=True): + def prove(self, id, test, timeout, valgrind="no", asan_noleaks=True): assert id == self.client_id self.shell.cd(self.gluster_root) env = "DEBUG=1 " - if valgrind: + if valgrind == "memcheck" or valgrind == "yes": cmd = "valgrind" cmd += " --tool=memcheck --leak-check=full --track-origins=yes" cmd += " --show-leak-kinds=all -v prove -v" + elif valgrind == "drd": + cmd = "valgrind" + cmd += " --tool=drd -v prove -v" elif asan_noleaks: cmd = "prove -v" env += "ASAN_OPTIONS=detect_leaks=0 " @@ -827,8 +830,9 @@ parser.add_argument("--port", help="server port to listen", type=int, default=DEFAULT_PORT) # test role parser.add_argument("--tester", help="start tester", action="store_true") -parser.add_argument("--valgrind", help="run tests under valgrind", - action="store_true") +parser.add_argument("--valgrind[=memcheck,drd]", + help="run tests with valgrind tool 'memcheck' or 'drd'", + default="no") parser.add_argument("--asan", help="test with asan enabled", action="store_true") parser.add_argument("--asan-noleaks", help="test with asan but no mem leaks", diff --git a/extras/ganesha/ocf/ganesha_nfsd b/extras/ganesha/ocf/ganesha_nfsd index 93fc8be5136..f91e8b6b8f7 100644 --- a/extras/ganesha/ocf/ganesha_nfsd +++ b/extras/ganesha/ocf/ganesha_nfsd @@ -36,7 +36,7 @@ else . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs fi -OCF_RESKEY_ha_vol_mnt_default="/var/run/gluster/shared_storage" +OCF_RESKEY_ha_vol_mnt_default="/run/gluster/shared_storage" : ${OCF_RESKEY_ha_vol_mnt=${OCF_RESKEY_ha_vol_mnt_default}} ganesha_meta_data() { diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh index a6814b17935..9790a719e10 100644 --- a/extras/ganesha/scripts/ganesha-ha.sh +++ b/extras/ganesha/scripts/ganesha-ha.sh @@ -24,7 +24,7 @@ GANESHA_HA_SH=$(realpath $0) HA_NUM_SERVERS=0 HA_SERVERS="" HA_VOL_NAME="gluster_shared_storage" -HA_VOL_MNT="/var/run/gluster/shared_storage" +HA_VOL_MNT="/run/gluster/shared_storage" HA_CONFDIR=$HA_VOL_MNT"/nfs-ganesha" SERVICE_MAN="DISTRO_NOT_FOUND" diff --git a/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh b/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh index 885ed03ad5b..1f2564b44ff 100755 --- a/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh +++ b/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh @@ -79,9 +79,9 @@ done if [ "$option" == "disable" ]; then # Unmount the volume on all the nodes - umount /var/run/gluster/shared_storage - cat /etc/fstab | grep -v "gluster_shared_storage /var/run/gluster/shared_storage/" > /var/run/gluster/fstab.tmp - mv /var/run/gluster/fstab.tmp /etc/fstab + umount /run/gluster/shared_storage + cat /etc/fstab | grep -v "gluster_shared_storage /run/gluster/shared_storage/" > /run/gluster/fstab.tmp + mv /run/gluster/fstab.tmp /etc/fstab fi if [ "$is_originator" == 1 ]; then @@ -104,8 +104,15 @@ function check_volume_status() echo $status } -mount_cmd="mount -t glusterfs $local_node_hostname:/gluster_shared_storage \ - /var/run/gluster/shared_storage" +key=`echo $5 | cut -d '=' -f 1` +val=`echo $5 | cut -d '=' -f 2` +if [ "$key" == "transport.address-family" ]; then + mount_cmd="mount -t glusterfs -o xlator-option=transport.address-family=inet6 \ + $local_node_hostname:/gluster_shared_storage /run/gluster/shared_storage" +else + mount_cmd="mount -t glusterfs $local_node_hostname:/gluster_shared_storage \ + /run/gluster/shared_storage" +fi if [ "$option" == "enable" ]; then retry=0; @@ -120,10 +127,10 @@ if [ "$option" == "enable" ]; then status=$(check_volume_status) done # Mount the volume on all the nodes - umount /var/run/gluster/shared_storage - mkdir -p /var/run/gluster/shared_storage + umount /run/gluster/shared_storage + mkdir -p /run/gluster/shared_storage $mount_cmd - cp /etc/fstab /var/run/gluster/fstab.tmp - echo "$local_node_hostname:/gluster_shared_storage /var/run/gluster/shared_storage/ glusterfs defaults 0 0" >> /var/run/gluster/fstab.tmp - mv /var/run/gluster/fstab.tmp /etc/fstab + cp /etc/fstab /run/gluster/fstab.tmp + echo "$local_node_hostname:/gluster_shared_storage /run/gluster/shared_storage/ glusterfs defaults 0 0" >> /run/gluster/fstab.tmp + mv /run/gluster/fstab.tmp /etc/fstab fi diff --git a/extras/hook-scripts/start/post/S31ganesha-start.sh b/extras/hook-scripts/start/post/S31ganesha-start.sh index 90ba6bc73a5..7ad6f23ad06 100755 --- a/extras/hook-scripts/start/post/S31ganesha-start.sh +++ b/extras/hook-scripts/start/post/S31ganesha-start.sh @@ -4,7 +4,7 @@ OPTSPEC="volname:,gd-workdir:" VOL= declare -i EXPORT_ID ganesha_key="ganesha.enable" -GANESHA_DIR="/var/run/gluster/shared_storage/nfs-ganesha" +GANESHA_DIR="/run/gluster/shared_storage/nfs-ganesha" CONF1="$GANESHA_DIR/ganesha.conf" GLUSTERD_WORKDIR= diff --git a/extras/quota/quota_fsck.py b/extras/quota/quota_fsck.py index 174f2a2a44c..e62f7fc52a3 100755 --- a/extras/quota/quota_fsck.py +++ b/extras/quota/quota_fsck.py @@ -156,12 +156,10 @@ def get_quota_xattr_brick(dpath): xattr_dict = {} xattr_dict['parents'] = {} - for xattr in pairs: + for xattr in pairs[1:]: + xattr = xattr.decode("utf-8") xattr_key = xattr.split("=")[0] - if re.search("# file:", xattr_key): - # skip the file comment - continue - elif xattr_key is "": + if xattr_key == "": # skip any empty lines continue elif not re.search("quota", xattr_key): diff --git a/extras/snap_scheduler/gcron.py b/extras/snap_scheduler/gcron.py index cc16310e31d..0e4df77d481 100755 --- a/extras/snap_scheduler/gcron.py +++ b/extras/snap_scheduler/gcron.py @@ -19,10 +19,10 @@ import logging.handlers import fcntl -GCRON_TASKS = "/var/run/gluster/shared_storage/snaps/glusterfs_snap_cron_tasks" +GCRON_TASKS = "/run/gluster/shared_storage/snaps/glusterfs_snap_cron_tasks" GCRON_CROND_TASK = "/etc/cron.d/glusterfs_snap_cron_tasks" GCRON_RELOAD_FLAG = "/var/run/gluster/crond_task_reload_flag" -LOCK_FILE_DIR = "/var/run/gluster/shared_storage/snaps/lock_files/" +LOCK_FILE_DIR = "/run/gluster/shared_storage/snaps/lock_files/" log = logging.getLogger("gcron-logger") start_time = 0.0 diff --git a/extras/snap_scheduler/snap_scheduler.py b/extras/snap_scheduler/snap_scheduler.py index 5a29d41c8f8..e8fcc449a9b 100755 --- a/extras/snap_scheduler/snap_scheduler.py +++ b/extras/snap_scheduler/snap_scheduler.py @@ -67,7 +67,7 @@ except ImportError: SCRIPT_NAME = "snap_scheduler" scheduler_enabled = False log = logging.getLogger(SCRIPT_NAME) -SHARED_STORAGE_DIR="/var/run/gluster/shared_storage" +SHARED_STORAGE_DIR="/run/gluster/shared_storage" GCRON_DISABLED = SHARED_STORAGE_DIR+"/snaps/gcron_disabled" GCRON_ENABLED = SHARED_STORAGE_DIR+"/snaps/gcron_enabled" GCRON_TASKS = SHARED_STORAGE_DIR+"/snaps/glusterfs_snap_cron_tasks" diff --git a/geo-replication/gsyncd.conf.in b/geo-replication/gsyncd.conf.in index 11e57fdf54f..9688c79fab7 100644 --- a/geo-replication/gsyncd.conf.in +++ b/geo-replication/gsyncd.conf.in @@ -123,7 +123,7 @@ type=bool help=Use this to set Active Passive mode to meta-volume. [meta-volume-mnt] -value=/var/run/gluster/shared_storage +value=/run/gluster/shared_storage help=Meta Volume or Shared Volume mount path [allow-network] diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py index b3f3fd8a936..a3df103e76c 100644 --- a/geo-replication/syncdaemon/syncdutils.py +++ b/geo-replication/syncdaemon/syncdutils.py @@ -725,7 +725,7 @@ def get_slv_dir_path(slv_host, slv_volume, gfid): if not isinstance(realpath, int): basename = os.path.basename(realpath).rstrip('\x00') dirpath = os.path.dirname(realpath) - if dirpath is "/": + if dirpath == "/": pargfid = ROOT_GFID else: dirpath = dirpath.strip("/") diff --git a/glusterfs.spec.in b/glusterfs.spec.in index cf4ab55fb2b..b6d63146e14 100644 --- a/glusterfs.spec.in +++ b/glusterfs.spec.in @@ -706,6 +706,9 @@ Requires: %{name}%{?_isa} = %{version}-%{release} Requires: %{name}-cli%{?_isa} = %{version}-%{release} Requires: libglusterfs0%{?_isa} = %{version}-%{release} Requires: libgfchangelog0%{?_isa} = %{version}-%{release} +%if ( 0%{?fedora} && 0%{?fedora} >= 30 || ( 0%{?rhel} && 0%{?rhel} >= 8 ) ) +Requires: glusterfs-selinux >= 0.1.0-2 +%endif # some daemons (like quota) use a fuse-mount, glusterfsd is part of -fuse Requires: %{name}-fuse%{?_isa} = %{version}-%{release} # self-heal daemon, rebalance, nfs-server etc. are actually clients diff --git a/glusterfsd/src/Makefile.am b/glusterfsd/src/Makefile.am index 804461ca9df..a0a778158d8 100644 --- a/glusterfsd/src/Makefile.am +++ b/glusterfsd/src/Makefile.am @@ -14,6 +14,7 @@ gf_attach_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ $(top_builddir)/api/src/libgfapi.la \ $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \ $(top_builddir)/rpc/xdr/src/libgfxdr.la +gf_attach_LDFLAGS = $(GF_LDFLAGS) noinst_HEADERS = glusterfsd.h glusterfsd-mem-types.h glusterfsd-messages.h diff --git a/glusterfsd/src/gf_attach.c b/glusterfsd/src/gf_attach.c index c54b4f9b516..c553b0b1f61 100644 --- a/glusterfsd/src/gf_attach.c +++ b/glusterfsd/src/gf_attach.c @@ -99,7 +99,7 @@ send_brick_req(xlator_t *this, struct rpc_clnt *rpc, char *path, int op) iov.iov_len = ret; /* Wait for connection */ - clock_gettime(CLOCK_REALTIME, &ts); + timespec_now_realtime(&ts); ts.tv_sec += CONNECT_TIMEOUT; pthread_mutex_lock(&rpc->conn.lock); { @@ -124,7 +124,7 @@ send_brick_req(xlator_t *this, struct rpc_clnt *rpc, char *path, int op) 0, iobref, frame, NULL, 0, NULL, 0, NULL); if (!ret) { /* OK, wait for callback */ - clock_gettime(CLOCK_REALTIME, &ts); + timespec_now_realtime(&ts); ts.tv_sec += REPLY_TIMEOUT; pthread_mutex_lock(&mutex); { diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c index 793f27ab67f..eaf6796e4c3 100644 --- a/glusterfsd/src/glusterfsd-mgmt.c +++ b/glusterfsd/src/glusterfsd-mgmt.c @@ -629,7 +629,7 @@ glusterfs_volume_top_perf(const char *brick_path, dict_t *dict, goto out; } - time = (end.tv_sec - begin.tv_sec) * 1e6 + (end.tv_usec - begin.tv_usec); + time = gf_tvdiff(&begin, &end); throughput = total_blks / time; gf_log("glusterd", GF_LOG_INFO, "Throughput %.2f Mbps time %.2f secs " @@ -685,7 +685,7 @@ glusterfs_volume_top_perf(const char *brick_path, dict_t *dict, goto out; } - time = (end.tv_sec - begin.tv_sec) * 1e6 + (end.tv_usec - begin.tv_usec); + time = gf_tvdiff(&begin, &end); throughput = total_blks / time; gf_log("glusterd", GF_LOG_INFO, "Throughput %.2f Mbps time %.2f secs " diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c index f36ce2b0ffc..dae41f33fef 100644 --- a/glusterfsd/src/glusterfsd.c +++ b/glusterfsd/src/glusterfsd.c @@ -1687,6 +1687,10 @@ glusterfs_ctx_defaults_init(glusterfs_ctx_t *ctx) INIT_LIST_HEAD(&cmd_args->xlator_options); INIT_LIST_HEAD(&cmd_args->volfile_servers); + ctx->pxl_count = 0; + pthread_mutex_init(&ctx->fd_lock, NULL); + pthread_cond_init(&ctx->fd_cond, NULL); + INIT_LIST_HEAD(&ctx->janitor_fds); lim.rlim_cur = RLIM_INFINITY; lim.rlim_max = RLIM_INFINITY; @@ -2079,8 +2083,8 @@ parse_cmdline(int argc, char *argv[], glusterfs_ctx_t *ctx) if (((ret == 0) && (S_ISREG(stbuf.st_mode) || S_ISLNK(stbuf.st_mode))) || (ret == -1)) { - /* Have separate logfile per run */ - gf_time_fmt(timestr, sizeof timestr, time(NULL), gf_timefmt_FT); + /* Have separate logfile per run. */ + gf_time_fmt(timestr, sizeof timestr, gf_time(), gf_timefmt_FT); sprintf(tmp_logfile, "%s.%s.%d", cmd_args->log_file, timestr, getpid()); diff --git a/heal/src/glfs-heal.c b/heal/src/glfs-heal.c index ef53dfc0ccb..bf4b47f8760 100644 --- a/heal/src/glfs-heal.c +++ b/heal/src/glfs-heal.c @@ -1055,6 +1055,10 @@ glfsh_set_heal_options(glfs_t *fs, gf_xl_afr_op_t heal_op) if (ret) goto out; + ret = glfs_set_xlator_option(fs, "*-replicate-*", "halo-enabled", "off"); + if (ret) + goto out; + if ((heal_op != GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE) && (heal_op != GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK) && (heal_op != GF_SHD_OP_SBRAIN_HEAL_FROM_LATEST_MTIME)) diff --git a/libglusterfs/src/Makefile.am b/libglusterfs/src/Makefile.am index c4b0e9bab08..385e8ef4600 100644 --- a/libglusterfs/src/Makefile.am +++ b/libglusterfs/src/Makefile.am @@ -12,7 +12,8 @@ libglusterfs_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 \ -DSBIN_DIR=\"$(sbindir)\" -I$(CONTRIBDIR)/timer-wheel \ -I$(CONTRIBDIR)/xxhash -libglusterfs_la_LIBADD = $(ZLIB_LIBS) $(MATH_LIB) $(UUID_LIBS) $(LIB_DL) +libglusterfs_la_LIBADD = $(ZLIB_LIBS) $(MATH_LIB) $(UUID_LIBS) $(LIB_DL) \ + $(URCU_LIBS) $(URCU_CDS_LIBS) libglusterfs_la_LDFLAGS = -version-info $(LIBGLUSTERFS_LT_VERSION) $(GF_LDFLAGS) \ -export-symbols $(top_srcdir)/libglusterfs/src/libglusterfs.sym diff --git a/libglusterfs/src/common-utils.c b/libglusterfs/src/common-utils.c index c37c45449f2..682cbf28055 100644 --- a/libglusterfs/src/common-utils.c +++ b/libglusterfs/src/common-utils.c @@ -577,8 +577,14 @@ struct dnscache * gf_dnscache_init(time_t ttl) { struct dnscache *cache = GF_MALLOC(sizeof(*cache), gf_common_mt_dnscache); - if (cache) { - cache->cache_dict = NULL; + if (!cache) + return NULL; + + cache->cache_dict = dict_new(); + if (!cache->cache_dict) { + GF_FREE(cache); + cache = NULL; + } else { cache->ttl = ttl; } @@ -586,6 +592,20 @@ gf_dnscache_init(time_t ttl) } /** + * gf_dnscache_deinit -- cleanup resources used by struct dnscache + */ +void +gf_dnscache_deinit(struct dnscache *cache) +{ + if (!cache) { + gf_msg_plain(GF_LOG_WARNING, "dnscache is NULL"); + return; + } + dict_unref(cache->cache_dict); + GF_FREE(cache); +} + +/** * gf_dnscache_entry_init -- Initialize a dnscache entry * * @return: SUCCESS: Pointer to an allocated dnscache entry struct @@ -633,12 +653,6 @@ gf_rev_dns_lookup_cached(const char *ip, struct dnscache *dnscache) if (!dnscache) goto out; - if (!dnscache->cache_dict) { - dnscache->cache_dict = dict_new(); - if (!dnscache->cache_dict) { - goto out; - } - } cache = dnscache->cache_dict; /* Quick cache lookup to see if we already hold it */ @@ -646,7 +660,7 @@ gf_rev_dns_lookup_cached(const char *ip, struct dnscache *dnscache) if (entrydata) { dnsentry = (struct dnscache_entry *)entrydata->data; /* First check the TTL & timestamp */ - if (time(NULL) - dnsentry->timestamp > dnscache->ttl) { + if (gf_time() - dnsentry->timestamp > dnscache->ttl) { gf_dnscache_entry_deinit(dnsentry); entrydata->data = NULL; /* Mark this as 'null' so * dict_del () doesn't try free @@ -683,7 +697,7 @@ out: if (entry) { entry->fqdn = fqdn; entry->ip = gf_strdup(ip); - entry->timestamp = time(NULL); + entry->timestamp = gf_time(); entrydata = bin_to_data(entry, sizeof(*entry)); dict_set(cache, (char *)ip, entrydata); } @@ -935,7 +949,7 @@ gf_print_trace(int32_t signum, glusterfs_ctx_t *ctx) { /* Dump the timestamp of the crash too, so the previous logs can be related */ - gf_time_fmt(timestr, sizeof timestr, time(NULL), gf_timefmt_FT); + gf_time_fmt(timestr, sizeof timestr, gf_time(), gf_timefmt_FT); gf_msg_plain_nomem(GF_LOG_ALERT, "time of crash: "); gf_msg_plain_nomem(GF_LOG_ALERT, timestr); } @@ -3119,7 +3133,7 @@ get_mem_size() memsize = page_size * num_pages; #endif -#if defined GF_DARWIN_HOST_OS +#if defined GF_DARWIN_HOST_OS || defined __FreeBSD__ size_t len = sizeof(memsize); int name[] = {CTL_HW, HW_PHYSMEM}; @@ -4133,6 +4147,14 @@ gf_skip_header_section(int fd, int header_len) gf_boolean_t gf_is_pid_running(int pid) { +#ifdef __FreeBSD__ + int ret = -1; + + ret = sys_kill(pid, 0); + if (ret < 0) { + return _gf_false; + } +#else char fname[32] = { 0, }; @@ -4146,6 +4168,7 @@ gf_is_pid_running(int pid) } sys_close(fd); +#endif return _gf_true; } diff --git a/libglusterfs/src/ctx.c b/libglusterfs/src/ctx.c index 4a001c29209..3d890b04ec9 100644 --- a/libglusterfs/src/ctx.c +++ b/libglusterfs/src/ctx.c @@ -37,8 +37,12 @@ glusterfs_ctx_new() ctx->log.loglevel = DEFAULT_LOG_LEVEL; -#ifdef RUN_WITH_VALGRIND - ctx->cmd_args.valgrind = _gf_true; +#if defined(RUN_WITH_MEMCHECK) + ctx->cmd_args.vgtool = _gf_memcheck; +#elif defined(RUN_WITH_DRD) + ctx->cmd_args.vgtool = _gf_drd; +#else + ctx->cmd_args.vgtool = _gf_none; #endif /* lock is never destroyed! */ diff --git a/libglusterfs/src/dict.c b/libglusterfs/src/dict.c index 917737a943e..1d9be9217a6 100644 --- a/libglusterfs/src/dict.c +++ b/libglusterfs/src/dict.c @@ -1495,32 +1495,13 @@ fail: * -val error, val = errno */ -int -dict_get_with_ref(dict_t *this, char *key, data_t **data) -{ - if (!this || !key || !data) { - gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG, - "dict OR key (%s) is NULL", key); - return -EINVAL; - } - - return dict_get_with_refn(this, key, strlen(key), data); -} - -int +static int dict_get_with_refn(dict_t *this, char *key, const int keylen, data_t **data) { data_pair_t *pair = NULL; int ret = -ENOENT; uint32_t hash; - if (!this || !key || !data) { - gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG, - "dict OR key (%s) is NULL", key); - ret = -EINVAL; - goto err; - } - hash = (uint32_t)XXH64(key, keylen, 0); LOCK(&this->lock); @@ -1533,10 +1514,22 @@ dict_get_with_refn(dict_t *this, char *key, const int keylen, data_t **data) } } UNLOCK(&this->lock); -err: + return ret; } +int +dict_get_with_ref(dict_t *this, char *key, data_t **data) +{ + if (!this || !key || !data) { + gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG, + "dict OR key (%s) is NULL", key); + return -EINVAL; + } + + return dict_get_with_refn(this, key, strlen(key), data); +} + static int data_to_ptr_common(data_t *data, void **val) { @@ -1710,7 +1703,7 @@ dict_get_int8(dict_t *this, char *key, int8_t *val) data_t *data = NULL; int ret = 0; - if (!this || !key || !val) { + if (!val) { ret = -EINVAL; goto err; } @@ -1756,7 +1749,7 @@ dict_get_int16(dict_t *this, char *key, int16_t *val) data_t *data = NULL; int ret = 0; - if (!this || !key || !val) { + if (!val) { ret = -EINVAL; goto err; } @@ -1828,7 +1821,7 @@ dict_get_int32(dict_t *this, char *key, int32_t *val) data_t *data = NULL; int ret = 0; - if (!this || !key || !val) { + if (!val) { ret = -EINVAL; goto err; } @@ -1893,7 +1886,7 @@ dict_get_int64(dict_t *this, char *key, int64_t *val) data_t *data = NULL; int ret = 0; - if (!this || !key || !val) { + if (!val) { ret = -EINVAL; goto err; } @@ -1938,7 +1931,7 @@ dict_get_uint16(dict_t *this, char *key, uint16_t *val) data_t *data = NULL; int ret = 0; - if (!this || !key || !val) { + if (!val) { ret = -EINVAL; goto err; } @@ -1983,7 +1976,7 @@ dict_get_uint32(dict_t *this, char *key, uint32_t *val) data_t *data = NULL; int ret = 0; - if (!this || !key || !val) { + if (!val) { ret = -EINVAL; goto err; } @@ -2028,7 +2021,7 @@ dict_get_uint64(dict_t *this, char *key, uint64_t *val) data_t *data = NULL; int ret = 0; - if (!this || !key || !val) { + if (!val) { ret = -EINVAL; goto err; } @@ -2251,7 +2244,7 @@ dict_get_double(dict_t *this, char *key, double *val) data_t *data = NULL; int ret = 0; - if (!this || !key || !val) { + if (!val) { ret = -EINVAL; goto err; } @@ -2334,7 +2327,7 @@ dict_get_ptr(dict_t *this, char *key, void **ptr) data_t *data = NULL; int ret = 0; - if (!this || !key || !ptr) { + if (!ptr) { ret = -EINVAL; goto err; } @@ -2364,7 +2357,7 @@ dict_get_ptr_and_len(dict_t *this, char *key, void **ptr, int *len) data_t *data = NULL; int ret = 0; - if (!this || !key || !ptr) { + if (!ptr) { ret = -EINVAL; goto err; } @@ -2422,7 +2415,7 @@ dict_get_str(dict_t *this, char *key, char **str) data_t *data = NULL; int ret = -EINVAL; - if (!this || !key || !str) { + if (!str) { goto err; } ret = dict_get_with_ref(this, key, &data); @@ -2596,7 +2589,7 @@ dict_get_bin(dict_t *this, char *key, void **bin) data_t *data = NULL; int ret = -EINVAL; - if (!this || !key || !bin) { + if (!bin) { goto err; } @@ -2699,7 +2692,7 @@ dict_get_gfuuid(dict_t *this, char *key, uuid_t *gfid) data_t *data = NULL; int ret = -EINVAL; - if (!this || !key || !gfid) { + if (!gfid) { goto err; } ret = dict_get_with_ref(this, key, &data); @@ -2732,7 +2725,7 @@ dict_get_mdata(dict_t *this, char *key, struct mdata_iatt *mdata) data_t *data = NULL; int ret = -EINVAL; - if (!this || !key || !mdata) { + if (!mdata) { goto err; } ret = dict_get_with_ref(this, key, &data); @@ -2770,7 +2763,7 @@ dict_get_iatt(dict_t *this, char *key, struct iatt *iatt) data_t *data = NULL; int ret = -EINVAL; - if (!this || !key || !iatt) { + if (!iatt) { goto err; } ret = dict_get_with_ref(this, key, &data); diff --git a/libglusterfs/src/event.c b/libglusterfs/src/event.c index 235128b6044..402c253ca25 100644 --- a/libglusterfs/src/event.c +++ b/libglusterfs/src/event.c @@ -17,6 +17,7 @@ #include <string.h> #include "glusterfs/gf-event.h" +#include "glusterfs/timespec.h" #include "glusterfs/common-utils.h" #include "glusterfs/libglusterfs-messages.h" #include "glusterfs/syscall.h" @@ -266,7 +267,7 @@ gf_event_dispatch_destroy(struct event_pool *event_pool) if (sys_write(fd[1], "dummy", 6) == -1) { break; } - clock_gettime(CLOCK_REALTIME, &sleep_till); + timespec_now_realtime(&sleep_till); sleep_till.tv_sec += 1; ret = pthread_cond_timedwait(&event_pool->cond, &event_pool->mutex, &sleep_till); diff --git a/libglusterfs/src/events.c b/libglusterfs/src/events.c index 6d1e3836477..33157549897 100644 --- a/libglusterfs/src/events.c +++ b/libglusterfs/src/events.c @@ -40,6 +40,7 @@ _gf_event(eventtypes_t event, const char *fmt, ...) char *host = NULL; struct addrinfo hints; struct addrinfo *result = NULL; + struct addrinfo *iter_result_ptr = NULL; xlator_t *this = THIS; char *volfile_server_transport = NULL; @@ -51,13 +52,6 @@ _gf_event(eventtypes_t event, const char *fmt, ...) goto out; } - /* Initialize UDP socket */ - sock = socket(AF_INET, SOCK_DGRAM, 0); - if (sock < 0) { - ret = EVENT_ERROR_SOCKET; - goto out; - } - if (ctx) { volfile_server_transport = ctx->cmd_args.volfile_server_transport; } @@ -66,7 +60,6 @@ _gf_event(eventtypes_t event, const char *fmt, ...) } /* host = NULL returns localhost */ - host = NULL; if (ctx && ctx->cmd_args.volfile_server && (strcmp(volfile_server_transport, "unix"))) { /* If it is client code then volfile_server is set @@ -84,6 +77,24 @@ _gf_event(eventtypes_t event, const char *fmt, ...) goto out; } + // iterate over the result and break when socket creation is success. + for (iter_result_ptr = result; iter_result_ptr != NULL; + iter_result_ptr = iter_result_ptr->ai_next) { + sock = socket(iter_result_ptr->ai_family, iter_result_ptr->ai_socktype, + iter_result_ptr->ai_protocol); + if (sock != -1) { + break; + } + } + /* + * If none of the addrinfo structures lead to a successful socket + * creation, socket creation has failed. + */ + if (sock < 0) { + ret = EVENT_ERROR_SOCKET; + goto out; + } + va_start(arguments, fmt); ret = gf_vasprintf(&msg, fmt, arguments); va_end(arguments); @@ -93,7 +104,7 @@ _gf_event(eventtypes_t event, const char *fmt, ...) goto out; } - ret = gf_asprintf(&eventstr, "%u %d %s", (unsigned)time(NULL), event, msg); + ret = gf_asprintf(&eventstr, "%u %d %s", (unsigned)gf_time(), event, msg); GF_FREE(msg); if (ret <= 0) { ret = EVENT_ERROR_MSG_FORMAT; diff --git a/libglusterfs/src/gidcache.c b/libglusterfs/src/gidcache.c index 40fcffbb35e..64a93802f76 100644 --- a/libglusterfs/src/gidcache.c +++ b/libglusterfs/src/gidcache.c @@ -10,6 +10,7 @@ #include "glusterfs/gidcache.h" #include "glusterfs/mem-pool.h" +#include "glusterfs/common-utils.h" /* * We treat this as a very simple set-associative LRU cache, with entries aged @@ -64,7 +65,7 @@ gid_cache_lookup(gid_cache_t *cache, uint64_t id, uint64_t uid, uint64_t gid) time_t now; const gid_list_t *agl; - now = time(NULL); + now = gf_time(); LOCK(&cache->gc_lock); bucket = id % cache->gc_nbuckets; agl = BUCKET_START(cache->gc_cache, bucket); @@ -132,7 +133,7 @@ gid_cache_add(gid_cache_t *cache, gid_list_t *gl) if (!cache->gc_max_age) return 0; - now = time(NULL); + now = gf_time(); LOCK(&cache->gc_lock); /* diff --git a/libglusterfs/src/glusterfs/common-utils.h b/libglusterfs/src/glusterfs/common-utils.h index f05bda1f537..f297fdab5c9 100644 --- a/libglusterfs/src/glusterfs/common-utils.h +++ b/libglusterfs/src/glusterfs/common-utils.h @@ -255,6 +255,8 @@ list_node_del(struct list_node *node); struct dnscache * gf_dnscache_init(time_t ttl); +void +gf_dnscache_deinit(struct dnscache *cache); struct dnscache_entry * gf_dnscache_entry_init(void); void @@ -1213,4 +1215,42 @@ gf_syncfs(int fd); int gf_nanosleep(uint64_t nsec); +static inline time_t +gf_time(void) +{ + return time(NULL); +} + +/* Return delta value in microseconds. */ + +static inline double +gf_tvdiff(struct timeval *start, struct timeval *end) +{ + struct timeval t; + + if (start->tv_usec > end->tv_usec) + t.tv_sec = end->tv_sec - 1, t.tv_usec = end->tv_usec + 1000000; + else + t.tv_sec = end->tv_sec, t.tv_usec = end->tv_usec; + + return (double)(t.tv_sec - start->tv_sec) * 1e6 + + (double)(t.tv_usec - start->tv_usec); +} + +/* Return delta value in nanoseconds. */ + +static inline double +gf_tsdiff(struct timespec *start, struct timespec *end) +{ + struct timespec t; + + if (start->tv_nsec > end->tv_nsec) + t.tv_sec = end->tv_sec - 1, t.tv_nsec = end->tv_nsec + 1000000000; + else + t.tv_sec = end->tv_sec, t.tv_nsec = end->tv_nsec; + + return (double)(t.tv_sec - start->tv_sec) * 1e9 + + (double)(t.tv_nsec - start->tv_nsec); +} + #endif /* _COMMON_UTILS_H */ diff --git a/libglusterfs/src/glusterfs/dict.h b/libglusterfs/src/glusterfs/dict.h index d4bf9b4f1a7..d0467c6dfb6 100644 --- a/libglusterfs/src/glusterfs/dict.h +++ b/libglusterfs/src/glusterfs/dict.h @@ -25,9 +25,6 @@ typedef struct _data_pair data_pair_t; #define dict_add_sizen(this, key, value) dict_addn(this, key, SLEN(key), value) -#define dict_get_with_ref_sizen(this, key, value) \ - dict_get_with_refn(this, key, SLEN(key), value) - #define dict_get_sizen(this, key) dict_getn(this, key, SLEN(key)) #define dict_del_sizen(this, key) dict_deln(this, key, SLEN(key)) @@ -138,6 +135,7 @@ int32_t dict_set(dict_t *this, char *key, data_t *value); int32_t dict_setn(dict_t *this, char *key, const int keylen, data_t *value); + /* function to set a new key/value pair (without checking for duplicate) */ int32_t dict_add(dict_t *this, char *key, data_t *value); @@ -145,8 +143,6 @@ int32_t dict_addn(dict_t *this, char *key, const int keylen, data_t *value); int dict_get_with_ref(dict_t *this, char *key, data_t **data); -int -dict_get_with_refn(dict_t *this, char *key, const int keylen, data_t **data); data_t * dict_get(dict_t *this, char *key); data_t * diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h index e4dfda8d01a..e6425618b7f 100644 --- a/libglusterfs/src/glusterfs/glusterfs.h +++ b/libglusterfs/src/glusterfs/glusterfs.h @@ -466,6 +466,8 @@ typedef struct _server_cmdline server_cmdline_t; #define GF_OPTION_DISABLE _gf_false #define GF_OPTION_DEFERRED 2 +typedef enum { _gf_none, _gf_memcheck, _gf_drd } gf_valgrind_tool; + struct _cmd_args { /* basic options */ char *volfile_server; @@ -558,7 +560,8 @@ struct _cmd_args { /* Run this process with valgrind? Might want to prevent calling * functions that prevent valgrind from working correctly, like * dlclose(). */ - int valgrind; + gf_valgrind_tool vgtool; + int localtime_logging; /* For the subdir mount */ @@ -730,6 +733,13 @@ struct _glusterfs_ctx { } stats; struct list_head volfile_list; + /* Add members to manage janitor threads for cleanup fd */ + struct list_head janitor_fds; + pthread_cond_t fd_cond; + pthread_mutex_t fd_lock; + pthread_t janitor; + /* The variable is use to save total posix xlator count */ + uint32_t pxl_count; char volume_id[GF_UUID_BUF_SIZE]; /* Used only in protocol/client */ }; diff --git a/libglusterfs/src/glusterfs/latency.h b/libglusterfs/src/glusterfs/latency.h index ed47b1f0cbc..4d601bbcbd6 100644 --- a/libglusterfs/src/glusterfs/latency.h +++ b/libglusterfs/src/glusterfs/latency.h @@ -11,13 +11,23 @@ #ifndef __LATENCY_H__ #define __LATENCY_H__ -#include "glusterfs/glusterfs.h" +#include <inttypes.h> +#include <time.h> -typedef struct fop_latency { - double min; /* min time for the call (microseconds) */ - double max; /* max time for the call (microseconds) */ - double total; /* total time (microseconds) */ +typedef struct _gf_latency { + uint64_t min; /* min time for the call (nanoseconds) */ + uint64_t max; /* max time for the call (nanoseconds) */ + uint64_t total; /* total time (nanoseconds) */ uint64_t count; -} fop_latency_t; +} gf_latency_t; +gf_latency_t * +gf_latency_new(size_t n); + +void +gf_latency_reset(gf_latency_t *lat); + +void +gf_latency_update(gf_latency_t *lat, struct timespec *begin, + struct timespec *end); #endif /* __LATENCY_H__ */ diff --git a/libglusterfs/src/glusterfs/mem-pool.h b/libglusterfs/src/glusterfs/mem-pool.h index 0fd1214e27d..e5b3276d047 100644 --- a/libglusterfs/src/glusterfs/mem-pool.h +++ b/libglusterfs/src/glusterfs/mem-pool.h @@ -202,6 +202,24 @@ out: return dup_mem; } +#ifdef GF_DISABLE_MEMPOOL + +/* No-op memory pool enough to fit current API without massive redesign. */ + +struct mem_pool { + unsigned long sizeof_type; +}; + +#define mem_pools_init() \ + do { \ + } while (0) +#define mem_pools_fini() \ + do { \ + } while (0) +#define mem_pool_thread_destructor(pool_list) (void)pool_list + +#else /* !GF_DISABLE_MEMPOOL */ + /* kind of 'header' for the actual mem_pool_shared structure, this might make * it possible to dump some more details in a statedump */ struct mem_pool { @@ -210,10 +228,10 @@ struct mem_pool { unsigned long count; /* requested pool size (unused) */ char *name; char *xl_name; - gf_atomic_t active; /* current allocations */ + gf_atomic_t active; /* current allocations */ #ifdef DEBUG - gf_atomic_t hit; /* number of allocations served from pt_pool */ - gf_atomic_t miss; /* number of std allocs due to miss */ + gf_atomic_t hit; /* number of allocations served from pt_pool */ + gf_atomic_t miss; /* number of std allocs due to miss */ #endif struct list_head owner; /* glusterfs_ctx_t->mempool_list */ glusterfs_ctx_t *ctx; /* take ctx->lock when updating owner */ @@ -287,6 +305,10 @@ void mem_pools_init(void); /* start the pool_sweeper thread */ void mem_pools_fini(void); /* cleanup memory pools */ +void +mem_pool_thread_destructor(per_thread_pool_list_t *pool_list); + +#endif /* GF_DISABLE_MEMPOOL */ struct mem_pool * mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type, @@ -309,9 +331,6 @@ void mem_pool_destroy(struct mem_pool *pool); void -mem_pool_thread_destructor(per_thread_pool_list_t *pool_list); - -void gf_mem_acct_enable_set(void *ctx); #endif /* _MEM_POOL_H */ diff --git a/libglusterfs/src/glusterfs/mem-types.h b/libglusterfs/src/glusterfs/mem-types.h index 36cf7820ad5..d45d5b68c91 100644 --- a/libglusterfs/src/glusterfs/mem-types.h +++ b/libglusterfs/src/glusterfs/mem-types.h @@ -133,6 +133,7 @@ enum gf_common_mem_types_ { gf_common_volfile_t, gf_common_mt_mgmt_v3_lock_timer_t, /* used only in one location */ gf_common_mt_server_cmdline_t, /* used only in one location */ + gf_common_mt_latency_t, gf_common_mt_end }; #endif diff --git a/libglusterfs/src/glusterfs/stack.h b/libglusterfs/src/glusterfs/stack.h index 17585508a22..536a330d38b 100644 --- a/libglusterfs/src/glusterfs/stack.h +++ b/libglusterfs/src/glusterfs/stack.h @@ -45,6 +45,9 @@ typedef int32_t (*ret_fn_t)(call_frame_t *frame, call_frame_t *prev_frame, xlator_t *this, int32_t op_ret, int32_t op_errno, ...); +void +gf_frame_latency_update(call_frame_t *frame); + struct call_pool { union { struct list_head all_frames; @@ -149,8 +152,6 @@ struct _call_stack { } while (0); struct xlator_fops; -void -gf_update_latency(call_frame_t *frame); static inline void FRAME_DESTROY(call_frame_t *frame) @@ -158,7 +159,7 @@ FRAME_DESTROY(call_frame_t *frame) void *local = NULL; if (frame->root->ctx->measure_latency) - gf_update_latency(frame); + gf_frame_latency_update(frame); list_del_init(&frame->frames); if (frame->local) { @@ -429,6 +430,7 @@ call_stack_alloc_groups(call_stack_t *stack, int ngrps) if (ngrps <= SMALL_GROUP_COUNT) { stack->groups = stack->groups_small; } else { + GF_FREE(stack->groups_large); stack->groups_large = GF_CALLOC(ngrps, sizeof(gid_t), gf_common_mt_groups_t); if (!stack->groups_large) @@ -442,6 +444,12 @@ call_stack_alloc_groups(call_stack_t *stack, int ngrps) } static inline int +call_stack_groups_capacity(call_stack_t *stack) +{ + return max(stack->ngrps, SMALL_GROUP_COUNT); +} + +static inline int call_frames_count(call_stack_t *call_stack) { call_frame_t *pos; diff --git a/libglusterfs/src/glusterfs/statedump.h b/libglusterfs/src/glusterfs/statedump.h index 89d04f94587..ce082706bdf 100644 --- a/libglusterfs/src/glusterfs/statedump.h +++ b/libglusterfs/src/glusterfs/statedump.h @@ -127,4 +127,6 @@ gf_proc_dump_xlator_meminfo(xlator_t *this, strfd_t *strfd); void gf_proc_dump_xlator_profile(xlator_t *this, strfd_t *strfd); +void +gf_latency_statedump_and_reset(char *key, gf_latency_t *lat); #endif /* STATEDUMP_H */ diff --git a/libglusterfs/src/glusterfs/store.h b/libglusterfs/src/glusterfs/store.h index c8be544e164..a1f70c7b840 100644 --- a/libglusterfs/src/glusterfs/store.h +++ b/libglusterfs/src/glusterfs/store.h @@ -95,7 +95,7 @@ int32_t gf_store_iter_get_matching(gf_store_iter_t *iter, char *key, char **value); int32_t -gf_store_iter_destroy(gf_store_iter_t *iter); +gf_store_iter_destroy(gf_store_iter_t **iter); char * gf_store_strerror(gf_store_op_errno_t op_errno); diff --git a/libglusterfs/src/glusterfs/syscall.h b/libglusterfs/src/glusterfs/syscall.h index 91e921aea50..b6d3ab4f2ad 100644 --- a/libglusterfs/src/glusterfs/syscall.h +++ b/libglusterfs/src/glusterfs/syscall.h @@ -266,4 +266,13 @@ ssize_t sys_copy_file_range(int fd_in, off64_t *off_in, int fd_out, off64_t *off_out, size_t len, unsigned int flags); +int +sys_kill(pid_t pid, int sig); + +#ifdef __FreeBSD__ +int +sys_sysctl(const int *name, u_int namelen, void *oldp, size_t *oldlenp, + const void *newp, size_t newlen); +#endif + #endif /* __SYSCALL_H__ */ diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h index 23004ab2245..4fd3abdaeff 100644 --- a/libglusterfs/src/glusterfs/xlator.h +++ b/libglusterfs/src/glusterfs/xlator.h @@ -805,7 +805,7 @@ struct _xlator { struct { /* for latency measurement */ - fop_latency_t latencies[GF_FOP_MAXVALUE]; + gf_latency_t latencies[GF_FOP_MAXVALUE]; /* for latency measurement */ fop_metrics_t metrics[GF_FOP_MAXVALUE]; diff --git a/libglusterfs/src/latency.c b/libglusterfs/src/latency.c index 15b397c3799..ce4b0e8255d 100644 --- a/libglusterfs/src/latency.c +++ b/libglusterfs/src/latency.c @@ -16,35 +16,32 @@ #include "glusterfs/glusterfs.h" #include "glusterfs/statedump.h" -void -gf_update_latency(call_frame_t *frame) +gf_latency_t * +gf_latency_new(size_t n) { - double elapsed; - struct timespec *begin, *end; - - fop_latency_t *lat; - - begin = &frame->begin; - end = &frame->end; + int i = 0; + gf_latency_t *lat = NULL; - if (!(begin->tv_sec && end->tv_sec)) - goto out; + lat = GF_MALLOC(n * sizeof(*lat), gf_common_mt_latency_t); + if (!lat) + return NULL; - elapsed = (end->tv_sec - begin->tv_sec) * 1e9 + - (end->tv_nsec - begin->tv_nsec); + for (i = 0; i < n; i++) { + gf_latency_reset(lat + i); + } + return lat; +} - if (frame->op < 0 || frame->op >= GF_FOP_MAXVALUE) { - gf_log("[core]", GF_LOG_WARNING, "Invalid frame op value: %d", - frame->op); +void +gf_latency_update(gf_latency_t *lat, struct timespec *begin, + struct timespec *end) +{ + if (!(begin->tv_sec && end->tv_sec)) { + /*Measure latency might have been enabled/disabled during the op*/ return; } - /* Can happen mostly at initiator xlator, as STACK_WIND/UNWIND macros - set it right anyways for those frames */ - if (!frame->op) - frame->op = frame->root->op; - - lat = &frame->this->stats.interval.latencies[frame->op]; + double elapsed = gf_tsdiff(begin, end); if (lat->max < elapsed) lat->max = elapsed; @@ -54,42 +51,34 @@ gf_update_latency(call_frame_t *frame) lat->total += elapsed; lat->count++; -out: - return; } void -gf_proc_dump_latency_info(xlator_t *xl) +gf_latency_reset(gf_latency_t *lat) { - char key_prefix[GF_DUMP_MAX_BUF_LEN]; - char key[GF_DUMP_MAX_BUF_LEN]; - int i; - - snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.latency", xl->name); - gf_proc_dump_add_section("%s", key_prefix); - - for (i = 0; i < GF_FOP_MAXVALUE; i++) { - gf_proc_dump_build_key(key, key_prefix, "%s", (char *)gf_fop_list[i]); - - fop_latency_t *lat = &xl->stats.interval.latencies[i]; + if (!lat) + return; + memset(lat, 0, sizeof(*lat)); + lat->min = ULLONG_MAX; + /* make sure 'min' is set to high value, so it would be + properly set later */ +} - /* Doesn't make sense to continue if there are no fops - came in the given interval */ - if (!lat->count) - continue; +void +gf_frame_latency_update(call_frame_t *frame) +{ + gf_latency_t *lat; + /* Can happen mostly at initiator xlator, as STACK_WIND/UNWIND macros + set it right anyways for those frames */ + if (!frame->op) + frame->op = frame->root->op; - gf_proc_dump_write( - key, "AVG:%.03f,CNT:%" PRId64 ",TOTAL:%.03f,MIN:%.03f,MAX:%.03f", - (lat->total / lat->count), lat->count, lat->total, lat->min, - lat->max); + if (frame->op < 0 || frame->op >= GF_FOP_MAXVALUE) { + gf_log("[core]", GF_LOG_WARNING, "Invalid frame op value: %d", + frame->op); + return; } - memset(xl->stats.interval.latencies, 0, - sizeof(xl->stats.interval.latencies)); - - /* make sure 'min' is set to high value, so it would be - properly set later */ - for (i = 0; i < GF_FOP_MAXVALUE; i++) { - xl->stats.interval.latencies[i].min = 0xffffffff; - } + lat = &frame->this->stats.interval.latencies[frame->op]; + gf_latency_update(lat, &frame->begin, &frame->end); } diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym index 00a6e484df6..5f18cd56cbe 100644 --- a/libglusterfs/src/libglusterfs.sym +++ b/libglusterfs/src/libglusterfs.sym @@ -585,6 +585,7 @@ gf_dirent_free gf_dirent_orig_offset gf_dm_hashfn gf_dnscache_init +gf_dnscache_deinit gf_errno_to_error gf_error_to_errno _gf_event @@ -1080,6 +1081,8 @@ sys_write sys_writev sys_socket sys_accept +sys_kill +sys_sysctl tbf_init tbf_throttle timespec_now @@ -1182,4 +1185,9 @@ xlator_is_cleanup_starting gf_nanosleep gf_syncfs graph_total_client_xlator -get_xattrs_to_heal
\ No newline at end of file +get_xattrs_to_heal +gf_latency_statedump_and_reset +gf_latency_new +gf_latency_reset +gf_latency_update +gf_frame_latency_update diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c index 1a87d277cc9..2d5a12b0a00 100644 --- a/libglusterfs/src/mem-pool.c +++ b/libglusterfs/src/mem-pool.c @@ -362,6 +362,30 @@ free: FREE(ptr); } +#if defined(GF_DISABLE_MEMPOOL) + +struct mem_pool * +mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type, + unsigned long count, char *name) +{ + struct mem_pool *new; + + new = GF_MALLOC(sizeof(struct mem_pool), gf_common_mt_mem_pool); + if (!new) + return NULL; + + new->sizeof_type = sizeof_type; + return new; +} + +void +mem_pool_destroy(struct mem_pool *pool) +{ + GF_FREE(pool); +} + +#else /* !GF_DISABLE_MEMPOOL */ + static pthread_mutex_t pool_lock = PTHREAD_MUTEX_INITIALIZER; static struct list_head pool_threads; static pthread_mutex_t pool_free_lock = PTHREAD_MUTEX_INITIALIZER; @@ -371,7 +395,6 @@ static size_t pool_list_size; static __thread per_thread_pool_list_t *thread_pool_list = NULL; -#if !defined(GF_DISABLE_MEMPOOL) #define N_COLD_LISTS 1024 #define POOL_SWEEP_SECS 30 @@ -617,21 +640,29 @@ mem_pools_fini(void) pthread_mutex_unlock(&init_mutex); } -#else void -mem_pools_init(void) -{ -} -void -mem_pools_fini(void) -{ -} -void -mem_pool_thread_destructor(per_thread_pool_list_t *pool_list) +mem_pool_destroy(struct mem_pool *pool) { -} + if (!pool) + return; -#endif + /* remove this pool from the owner (glusterfs_ctx_t) */ + LOCK(&pool->ctx->lock); + { + list_del(&pool->owner); + } + UNLOCK(&pool->ctx->lock); + + /* free this pool, but keep the mem_pool_shared */ + GF_FREE(pool); + + /* + * Pools are now permanent, so the mem_pool->pool is kept around. All + * of the objects *in* the pool will eventually be freed via the + * pool-sweeper thread, and this way we don't have to add a lot of + * reference-counting complexity. + */ +} struct mem_pool * mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type, @@ -700,21 +731,6 @@ mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type, return new; } -void * -mem_get0(struct mem_pool *mem_pool) -{ - void *ptr = mem_get(mem_pool); - if (ptr) { -#if defined(GF_DISABLE_MEMPOOL) - memset(ptr, 0, mem_pool->sizeof_type); -#else - memset(ptr, 0, AVAILABLE_SIZE(mem_pool->pool->power_of_two)); -#endif - } - - return ptr; -} - per_thread_pool_list_t * mem_get_pool_list(void) { @@ -823,6 +839,23 @@ mem_get_from_pool(struct mem_pool *mem_pool) return retval; } +#endif /* GF_DISABLE_MEMPOOL */ + +void * +mem_get0(struct mem_pool *mem_pool) +{ + void *ptr = mem_get(mem_pool); + if (ptr) { +#if defined(GF_DISABLE_MEMPOOL) + memset(ptr, 0, mem_pool->sizeof_type); +#else + memset(ptr, 0, AVAILABLE_SIZE(mem_pool->pool->power_of_two)); +#endif + } + + return ptr; +} + void * mem_get(struct mem_pool *mem_pool) { @@ -897,27 +930,3 @@ mem_put(void *ptr) } #endif /* GF_DISABLE_MEMPOOL */ } - -void -mem_pool_destroy(struct mem_pool *pool) -{ - if (!pool) - return; - - /* remove this pool from the owner (glusterfs_ctx_t) */ - LOCK(&pool->ctx->lock); - { - list_del(&pool->owner); - } - UNLOCK(&pool->ctx->lock); - - /* free this pool, but keep the mem_pool_shared */ - GF_FREE(pool); - - /* - * Pools are now permanent, so the mem_pool->pool is kept around. All - * of the objects *in* the pool will eventually be freed via the - * pool-sweeper thread, and this way we don't have to add a lot of - * reference-counting complexity. - */ -} diff --git a/libglusterfs/src/monitoring.c b/libglusterfs/src/monitoring.c index 45e3d776903..fbb68dc8622 100644 --- a/libglusterfs/src/monitoring.c +++ b/libglusterfs/src/monitoring.c @@ -113,15 +113,15 @@ dump_latency_and_count(xlator_t *xl, int fd) dprintf(fd, "%s.interval.%s.fail_count %" PRIu64 "\n", xl->name, gf_fop_list[index], cbk); } - if (xl->stats.interval.latencies[index].count != 0.0) { + if (xl->stats.interval.latencies[index].count != 0) { dprintf(fd, "%s.interval.%s.latency %lf\n", xl->name, gf_fop_list[index], - (xl->stats.interval.latencies[index].total / + (((double)xl->stats.interval.latencies[index].total) / xl->stats.interval.latencies[index].count)); - dprintf(fd, "%s.interval.%s.max %lf\n", xl->name, + dprintf(fd, "%s.interval.%s.max %" PRIu64 "\n", xl->name, gf_fop_list[index], xl->stats.interval.latencies[index].max); - dprintf(fd, "%s.interval.%s.min %lf\n", xl->name, + dprintf(fd, "%s.interval.%s.min %" PRIu64 "\n", xl->name, gf_fop_list[index], xl->stats.interval.latencies[index].min); } diff --git a/libglusterfs/src/statedump.c b/libglusterfs/src/statedump.c index 655317f3ef1..65f0eb5c7f3 100644 --- a/libglusterfs/src/statedump.c +++ b/libglusterfs/src/statedump.c @@ -199,6 +199,40 @@ gf_proc_dump_write(char *key, char *value, ...) return ret; } +void +gf_latency_statedump_and_reset(char *key, gf_latency_t *lat) +{ + /* Doesn't make sense to continue if there are no fops + came in the given interval */ + if (!lat || !lat->count) + return; + gf_proc_dump_write(key, + "AVG:%lf CNT:%" PRIu64 " TOTAL:%" PRIu64 " MIN:%" PRIu64 + " MAX:%" PRIu64, + (((double)lat->total) / lat->count), lat->count, + lat->total, lat->min, lat->max); + gf_latency_reset(lat); +} + +void +gf_proc_dump_xl_latency_info(xlator_t *xl) +{ + char key_prefix[GF_DUMP_MAX_BUF_LEN]; + char key[GF_DUMP_MAX_BUF_LEN]; + int i; + + snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.latency", xl->name); + gf_proc_dump_add_section("%s", key_prefix); + + for (i = 0; i < GF_FOP_MAXVALUE; i++) { + gf_proc_dump_build_key(key, key_prefix, "%s", (char *)gf_fop_list[i]); + + gf_latency_t *lat = &xl->stats.interval.latencies[i]; + + gf_latency_statedump_and_reset(key, lat); + } +} + static void gf_proc_dump_xlator_mem_info(xlator_t *xl) { @@ -349,26 +383,13 @@ gf_proc_dump_mem_info_to_dict(dict_t *dict) void gf_proc_dump_mempool_info(glusterfs_ctx_t *ctx) { +#ifdef GF_DISABLE_MEMPOOL + gf_proc_dump_write("built with --disable-mempool", " so no memory pools"); +#else struct mem_pool *pool = NULL; gf_proc_dump_add_section("mempool"); -#if defined(OLD_MEM_POOLS) - list_for_each_entry(pool, &ctx->mempool_list, global_list) - { - gf_proc_dump_write("-----", "-----"); - gf_proc_dump_write("pool-name", "%s", pool->name); - gf_proc_dump_write("hot-count", "%d", pool->hot_count); - gf_proc_dump_write("cold-count", "%d", pool->cold_count); - gf_proc_dump_write("padded_sizeof", "%lu", pool->padded_sizeof_type); - gf_proc_dump_write("alloc-count", "%" PRIu64, pool->alloc_count); - gf_proc_dump_write("max-alloc", "%d", pool->max_alloc); - - gf_proc_dump_write("pool-misses", "%" PRIu64, pool->pool_misses); - gf_proc_dump_write("cur-stdalloc", "%d", pool->curr_stdalloc); - gf_proc_dump_write("max-stdalloc", "%d", pool->max_stdalloc); - } -#else LOCK(&ctx->lock); { list_for_each_entry(pool, &ctx->mempool_list, owner) @@ -388,15 +409,13 @@ gf_proc_dump_mempool_info(glusterfs_ctx_t *ctx) } } UNLOCK(&ctx->lock); - - /* TODO: details of (struct mem_pool_shared) pool->pool */ -#endif +#endif /* GF_DISABLE_MEMPOOL */ } void gf_proc_dump_mempool_info_to_dict(glusterfs_ctx_t *ctx, dict_t *dict) { -#if defined(OLD_MEM_POOLS) +#ifndef GF_DISABLE_MEMPOOL struct mem_pool *pool = NULL; char key[GF_DUMP_MAX_BUF_LEN] = { 0, @@ -407,51 +426,47 @@ gf_proc_dump_mempool_info_to_dict(glusterfs_ctx_t *ctx, dict_t *dict) if (!ctx || !dict) return; - list_for_each_entry(pool, &ctx->mempool_list, global_list) + LOCK(&ctx->lock); { - snprintf(key, sizeof(key), "pool%d.name", count); - ret = dict_set_str(dict, key, pool->name); - if (ret) - return; - - snprintf(key, sizeof(key), "pool%d.hotcount", count); - ret = dict_set_int32(dict, key, pool->hot_count); - if (ret) - return; - - snprintf(key, sizeof(key), "pool%d.coldcount", count); - ret = dict_set_int32(dict, key, pool->cold_count); - if (ret) - return; - - snprintf(key, sizeof(key), "pool%d.paddedsizeof", count); - ret = dict_set_uint64(dict, key, pool->padded_sizeof_type); - if (ret) - return; - - snprintf(key, sizeof(key), "pool%d.alloccount", count); - ret = dict_set_uint64(dict, key, pool->alloc_count); - if (ret) - return; - - snprintf(key, sizeof(key), "pool%d.max_alloc", count); - ret = dict_set_int32(dict, key, pool->max_alloc); - if (ret) - return; - - snprintf(key, sizeof(key), "pool%d.max-stdalloc", count); - ret = dict_set_int32(dict, key, pool->max_stdalloc); - if (ret) - return; - - snprintf(key, sizeof(key), "pool%d.pool-misses", count); - ret = dict_set_uint64(dict, key, pool->pool_misses); - if (ret) - return; - count++; + list_for_each_entry(pool, &ctx->mempool_list, owner) + { + int64_t active = GF_ATOMIC_GET(pool->active); + + snprintf(key, sizeof(key), "pool%d.name", count); + ret = dict_set_str(dict, key, pool->name); + if (ret) + goto out; + + snprintf(key, sizeof(key), "pool%d.active-count", count); + ret = dict_set_uint64(dict, key, active); + if (ret) + goto out; + + snprintf(key, sizeof(key), "pool%d.sizeof-type", count); + ret = dict_set_uint64(dict, key, pool->sizeof_type); + if (ret) + goto out; + + snprintf(key, sizeof(key), "pool%d.padded-sizeof", count); + ret = dict_set_uint64(dict, key, 1 << pool->pool->power_of_two); + if (ret) + goto out; + + snprintf(key, sizeof(key), "pool%d.size", count); + ret = dict_set_uint64(dict, key, + (1 << pool->pool->power_of_two) * active); + if (ret) + goto out; + + snprintf(key, sizeof(key), "pool%d.shared-pool", count); + ret = dict_set_static_ptr(dict, key, pool->pool); + if (ret) + goto out; + } } - ret = dict_set_int32(dict, "mempool-count", count); -#endif +out: + UNLOCK(&ctx->lock); +#endif /* !GF_DISABLE_MEMPOOL */ } void @@ -486,7 +501,7 @@ gf_proc_dump_single_xlator_info(xlator_t *trav) return; if (ctx->measure_latency) - gf_proc_dump_latency_info(trav); + gf_proc_dump_xl_latency_info(trav); gf_proc_dump_xlator_mem_info(trav); @@ -843,7 +858,7 @@ gf_proc_dump_info(int signum, glusterfs_ctx_t *ctx) ? dump_options.dump_path : ((ctx->statedump_path != NULL) ? ctx->statedump_path : DEFAULT_VAR_RUN_DIRECTORY)), - brick_name, getpid(), (uint64_t)time(NULL)); + brick_name, getpid(), (uint64_t)gf_time()); if ((ret < 0) || (ret >= sizeof(path))) { goto out; } @@ -1030,7 +1045,7 @@ gf_proc_dump_xlator_profile(xlator_t *this, strfd_t *strfd) { gf_dump_strfd = strfd; - gf_proc_dump_latency_info(this); + gf_proc_dump_xl_latency_info(this); gf_dump_strfd = NULL; } diff --git a/libglusterfs/src/store.c b/libglusterfs/src/store.c index 74cf5459c3c..5c316b9291a 100644 --- a/libglusterfs/src/store.c +++ b/libglusterfs/src/store.c @@ -649,24 +649,24 @@ out: } int32_t -gf_store_iter_destroy(gf_store_iter_t *iter) +gf_store_iter_destroy(gf_store_iter_t **iter) { int32_t ret = -1; - if (!iter) + if (!(*iter)) return 0; /* gf_store_iter_new will not return a valid iter object with iter->file * being NULL*/ - ret = fclose(iter->file); + ret = fclose((*iter)->file); if (ret) gf_msg("", GF_LOG_ERROR, errno, LG_MSG_FILE_OP_FAILED, "Unable" " to close file: %s, ret: %d", - iter->filepath, ret); + (*iter)->filepath, ret); - GF_FREE(iter); - iter = NULL; + GF_FREE(*iter); + *iter = NULL; return ret; } diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c index 95bea675379..df20cec559f 100644 --- a/libglusterfs/src/syncop.c +++ b/libglusterfs/src/syncop.c @@ -593,7 +593,7 @@ syncenv_task(struct syncproc *proc) env->procs_idle++; - sleep_till.tv_sec = time(NULL) + SYNCPROC_IDLE_TIME; + sleep_till.tv_sec = gf_time() + SYNCPROC_IDLE_TIME; ret = pthread_cond_timedwait(&env->cond, &env->mutex, &sleep_till); env->procs_idle--; diff --git a/libglusterfs/src/syscall.c b/libglusterfs/src/syscall.c index 007e3e84eea..04400f98b6c 100644 --- a/libglusterfs/src/syscall.c +++ b/libglusterfs/src/syscall.c @@ -13,6 +13,10 @@ #include "glusterfs/mem-pool.h" #include "glusterfs/libglusterfs-messages.h" +#ifdef __FreeBSD__ +#include <sys/sysctl.h> +#include <signal.h> +#endif #include <sys/types.h> #include <utime.h> #include <sys/time.h> @@ -506,7 +510,7 @@ sys_lsetxattr(const char *path, const char *name, const void *value, #endif #ifdef GF_BSD_HOST_OS - return FS_RET_CHECK0( + return FS_RET_CHECK( extattr_set_link(path, EXTATTR_NAMESPACE_USER, name, value, size), errno); #endif @@ -624,7 +628,7 @@ sys_fsetxattr(int filedes, const char *name, const void *value, size_t size, #endif #ifdef GF_BSD_HOST_OS - return FS_RET_CHECK0( + return FS_RET_CHECK( extattr_set_fd(filedes, EXTATTR_NAMESPACE_USER, name, value, size), errno); #endif @@ -854,3 +858,19 @@ sys_copy_file_range(int fd_in, off64_t *off_in, int fd_out, off64_t *off_out, #endif /* HAVE_COPY_FILE_RANGE_SYS */ #endif /* HAVE_COPY_FILE_RANGE */ } + +#ifdef __FreeBSD__ +int +sys_kill(pid_t pid, int sig) +{ + return FS_RET_CHECK0(kill(pid, sig), errno); +} + +int +sys_sysctl(const int *name, u_int namelen, void *oldp, size_t *oldlenp, + const void *newp, size_t newlen) +{ + return FS_RET_CHECK0(sysctl(name, namelen, oldp, oldlenp, newp, newlen), + errno); +} +#endif diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c index 539da7f4716..9a2582d45d5 100644 --- a/libglusterfs/src/xlator.c +++ b/libglusterfs/src/xlator.c @@ -260,6 +260,7 @@ xlator_dynload_apis(xlator_t *xl) void *handle = NULL; volume_opt_list_t *vol_opt = NULL; xlator_api_t *xlapi = NULL; + int i = 0; handle = xl->dlhandle; @@ -357,6 +358,10 @@ xlator_dynload_apis(xlator_t *xl) memcpy(xl->op_version, xlapi->op_version, sizeof(uint32_t) * GF_MAX_RELEASES); + for (i = 0; i < GF_FOP_MAXVALUE; i++) { + gf_latency_reset(&xl->stats.interval.latencies[i]); + } + ret = 0; out: return ret; @@ -821,7 +826,7 @@ xlator_members_free(xlator_t *xl) GF_FREE(xl->name); GF_FREE(xl->type); - if (!(xl->ctx && xl->ctx->cmd_args.valgrind) && xl->dlhandle) + if (!(xl->ctx && xl->ctx->cmd_args.vgtool != _gf_none) && xl->dlhandle) dlclose(xl->dlhandle); if (xl->options) dict_unref(xl->options); @@ -4,7 +4,29 @@ # i.e. where we are interested in the result of a command, # we have to run the command in an if-statement. -ORIGIN=${GLUSTER_ORIGIN:-origin} +UPSTREAM=${GLUSTER_UPSTREAM} +if [ "x$UPSTREAM" -eq "x" ]; then + for rmt in $(git remote); do + rmt_repo=$(git remote show $rmt -n | grep Fetch | awk '{ print $3 }'); + if [ $rmt_repo -eq "git@github:gluster/glusterfs" ]; then + UPSTREAM=$rmt + echo "Picked $rmt as upstream remote" + break + fi + done +fi + +USER_REPO=${GLUSTER_USER_REPO:-origin} +if [ "x${USER_REPO}" -eq "x${UPSTREAM}" ] ; then + echo "When you submit patches, it should get submitted to your fork, not to upstream directly" + echo "If you are not sure, check `for rmt in $(git remote); do git remote show $rmt -n; done`" + echo "And pick the correct remote you would like to push to and do `export GLUSTER_USER_REPO=$rmt`" + echo "" + echo "Exiting..." + exit 1 +fi + + while getopts "v" opt; do case $opt in @@ -18,7 +40,7 @@ done shift $((OPTIND-1)) -branch="master"; +branch="devel"; set_hooks_commit_msg() { @@ -50,21 +72,21 @@ is_num() backport_id_message() { echo "" - echo "This commit is to a non-master branch, and hence is treated as a backport." + echo "This commit is to a non-devel branch, and hence is treated as a backport." echo "" echo "For backports we would like to retain the same gerrit Change-Id across" echo "branches. On auto inspection it is found that a gerrit Change-Id is" - echo "missing, or the Change-Id is not found on your local master" + echo "missing, or the Change-Id is not found on your local devel branch" echo "" echo "This could mean a few things:" echo " 1. This is not a backport, hence choose Y on the prompt to proceed" - echo " 2. Your $ORIGIN master is not up to date, hence the script is unable" - echo " to find the corresponding Change-Id on master. Either choose N," + echo " 2. Your $USER_REPO/devel is not up to date, hence the script is unable" + echo " to find the corresponding Change-Id on devel. Either choose N," echo " 'git fetch', and try again, OR if you are sure you used the" echo " same Change-Id, choose Y at the prompt to proceed" echo " 3. You commented or removed the Change-Id in your commit message after" echo " cherry picking the commit. Choose N, fix the commit message to" - echo " use the same Change-Id as master (git commit --amend), resubmit" + echo " use the same Change-Id as 'devel' (git commit --amend), resubmit" echo "" } @@ -72,8 +94,8 @@ check_backport() { moveon='N' - # Backports are never made to master - if [ $branch = "master" ]; then + # Backports are never made to 'devel' + if [ $branch = "devel" ]; then return; fi @@ -86,22 +108,22 @@ check_backport() echo -n "Did not find a Change-Id for a possible backport. Continue (y/N): " read moveon else - # Search master for the same change ID (rebase_changes has run, so we + # Search 'devel' for the same change ID (rebase_changes has run, so we # should never not find a Change-Id) - mchangeid=$(git log $ORIGIN/master --format='%b' --grep="^Change-Id: ${changeid}" | grep ${changeid} | awk '{print $2}') + mchangeid=$(git log $UPSTREAM/devel --format='%b' --grep="^Change-Id: ${changeid}" | grep ${changeid} | awk '{print $2}') - # Check if we found the change ID on master, else throw a message to + # Check if we found the change ID on 'devel', else throw a message to # decide if we should continue. - # NOTE: If master was not rebased, we will not find the Change-ID and + # NOTE: If 'devel' was not rebased, we will not find the Change-ID and # could hit a false positive case here (or if someone checks out some - # other branch as master). + # other branch as 'devel'). if [ "${mchangeid}" = "${changeid}" ]; then moveon="Y" else backport_id_message; echo "Change-Id of commit: $changeid" - echo "Change-Id on master: $mchangeid" - echo -n "Did not find mentioned Change-Id on master for a possible backport. Continue (y/N): " + echo "Change-Id on devel: $mchangeid" + echo -n "Did not find mentioned Change-Id on 'devel' for a possible backport. Continue (y/N): " read moveon fi fi @@ -116,7 +138,7 @@ check_backport() rebase_changes() { - GIT_EDITOR=$0 git rebase -i $ORIGIN/$branch; + GIT_EDITOR=$0 git rebase -i $UPSTREAM/$branch; } @@ -212,7 +234,7 @@ EOF assert_diverge() { - git diff $ORIGIN/$branch..HEAD | grep -q .; + git diff $UPSTREAM/$branch..HEAD | grep -q .; } @@ -258,7 +280,7 @@ main() return; fi - git fetch $ORIGIN; + git fetch $UPSTREAM; rebase_changes; @@ -269,9 +291,9 @@ main() # see note above variable REFRE for regex elaboration reference=$(git log -n1 --format='%b' | grep -iow -E "${REFRE}" | awk -F '#' '{print $2}'); - # If this is a commit against master and does not have a github + # If this is a commit against 'devel' and does not have a github # issue reference. Warn the contributor that one of the 2 is required - if [ -z "${reference}" ] && [ $branch = "master" ]; then + if [ -z "${reference}" ] && [ $branch = "devel" ]; then warn_reference_missing; fi @@ -299,9 +321,9 @@ main() fi if [ -z "${reference}" ]; then - $drier git push $ORIGIN HEAD:refs/for/$branch/rfc; + $drier git push $USER_REPO HEAD:temp_${branch}/$(date +%Y-%m-%d_%s); else - $drier git push $ORIGIN HEAD:refs/for/$branch/ref-${reference}; + $drier git push $USER_REPO HEAD:issue${reference}_${branch}; fi } diff --git a/rpc/rpc-lib/src/libgfrpc.sym b/rpc/rpc-lib/src/libgfrpc.sym index 54d1be1112f..e026d80259b 100644 --- a/rpc/rpc-lib/src/libgfrpc.sym +++ b/rpc/rpc-lib/src/libgfrpc.sym @@ -65,3 +65,4 @@ rpc_transport_unix_options_build rpc_transport_unref rpc_clnt_mgmt_pmap_signout rpcsvc_autoscale_threads +rpcsvc_statedump diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h index 7275d7568b6..0cb5862e9a9 100644 --- a/rpc/rpc-lib/src/protocol-common.h +++ b/rpc/rpc-lib/src/protocol-common.h @@ -309,6 +309,7 @@ enum glusterd_mgmt_v3_procnum { GLUSTERD_MGMT_V3_PRE_VALIDATE, GLUSTERD_MGMT_V3_BRICK_OP, GLUSTERD_MGMT_V3_COMMIT, + GLUSTERD_MGMT_V3_POST_COMMIT, GLUSTERD_MGMT_V3_POST_VALIDATE, GLUSTERD_MGMT_V3_UNLOCK, GLUSTERD_MGMT_V3_MAXVALUE, diff --git a/rpc/rpc-lib/src/rpc-clnt-ping.c b/rpc/rpc-lib/src/rpc-clnt-ping.c index 2298ef6394f..31f17841bea 100644 --- a/rpc/rpc-lib/src/rpc-clnt-ping.c +++ b/rpc/rpc-lib/src/rpc-clnt-ping.c @@ -122,7 +122,7 @@ rpc_clnt_ping_timer_expired(void *rpc_ptr) goto out; } - clock_gettime(CLOCK_REALTIME, ¤t); + timespec_now_realtime(¤t); pthread_mutex_lock(&conn->lock); { unref = rpc_clnt_remove_ping_timer_locked(rpc); diff --git a/rpc/rpc-lib/src/rpc-clnt.c b/rpc/rpc-lib/src/rpc-clnt.c index 2b0ec295525..517037c4a5d 100644 --- a/rpc/rpc-lib/src/rpc-clnt.c +++ b/rpc/rpc-lib/src/rpc-clnt.c @@ -919,7 +919,7 @@ rpc_clnt_notify(rpc_transport_t *trans, void *mydata, } case RPC_TRANSPORT_MSG_RECEIVED: { - clock_gettime(CLOCK_REALTIME, &conn->last_received); + timespec_now_realtime(&conn->last_received); pollin = data; if (pollin->is_reply) @@ -933,8 +933,7 @@ rpc_clnt_notify(rpc_transport_t *trans, void *mydata, } case RPC_TRANSPORT_MSG_SENT: { - clock_gettime(CLOCK_REALTIME, &conn->last_sent); - + timespec_now_realtime(&conn->last_sent); ret = 0; break; } diff --git a/rpc/rpc-lib/src/rpcsvc.c b/rpc/rpc-lib/src/rpcsvc.c index 4297af4609c..39910d481bf 100644 --- a/rpc/rpc-lib/src/rpcsvc.c +++ b/rpc/rpc-lib/src/rpcsvc.c @@ -13,6 +13,7 @@ #include <glusterfs/dict.h> #include <glusterfs/byte-order.h> #include <glusterfs/compat-errno.h> +#include <glusterfs/statedump.h> #include "xdr-rpc.h" #include <glusterfs/iobuf.h> #include "xdr-common.h" @@ -341,14 +342,12 @@ rpcsvc_program_actor(rpcsvc_request_t *req) goto err; } + if (svc->xl->ctx->measure_latency) { + timespec_now(&req->begin); + } + req->ownthread = program->ownthread; req->synctask = program->synctask; - if (((req->procnum == GFS3_OP_RELEASE) || - (req->procnum == GFS3_OP_RELEASEDIR)) && - (program->prognum == GLUSTER_FOP_PROGRAM)) { - req->ownthread = _gf_false; - req->synctask = _gf_true; - } err = SUCCESS; gf_log(GF_RPCSVC, GF_LOG_TRACE, "Actor found: %s - %s for %s", @@ -1496,10 +1495,18 @@ rpcsvc_submit_generic(rpcsvc_request_t *req, struct iovec *proghdr, size_t hdrlen = 0; char new_iobref = 0; rpcsvc_drc_globals_t *drc = NULL; + gf_latency_t *lat = NULL; if ((!req) || (!req->trans)) return -1; + if (req->prog && req->begin.tv_sec) { + if ((req->procnum >= 0) && (req->procnum < req->prog->numactors)) { + timespec_now(&req->end); + lat = &req->prog->latencies[req->procnum]; + gf_latency_update(lat, &req->begin, &req->end); + } + } trans = req->trans; for (i = 0; i < hdrcount; i++) { @@ -1830,6 +1837,15 @@ rpcsvc_submit_message(rpcsvc_request_t *req, struct iovec *proghdr, iobref); } +void +rpcsvc_program_destroy(rpcsvc_program_t *program) +{ + if (program) { + GF_FREE(program->latencies); + GF_FREE(program); + } +} + int rpcsvc_program_unregister(rpcsvc_t *svc, rpcsvc_program_t *program) { @@ -1887,8 +1903,7 @@ rpcsvc_program_unregister(rpcsvc_t *svc, rpcsvc_program_t *program) ret = 0; out: - if (prog) - GF_FREE(prog); + rpcsvc_program_destroy(prog); if (ret == -1) { if (program) { @@ -2273,6 +2288,11 @@ rpcsvc_program_register(rpcsvc_t *svc, rpcsvc_program_t *program, } memcpy(newprog, program, sizeof(*program)); + newprog->latencies = gf_latency_new(program->numactors); + if (!newprog->latencies) { + rpcsvc_program_destroy(newprog); + goto out; + } INIT_LIST_HEAD(&newprog->program); pthread_mutexattr_init(&thr_attr); @@ -3218,6 +3238,48 @@ out: return ret; } +void +rpcsvc_program_dump(rpcsvc_program_t *prog) +{ + char key_prefix[GF_DUMP_MAX_BUF_LEN]; + char key[GF_DUMP_MAX_BUF_LEN]; + int i; + + snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s", prog->progname); + gf_proc_dump_add_section("%s", key_prefix); + + gf_proc_dump_build_key(key, key_prefix, "program-number"); + gf_proc_dump_write(key, "%d", prog->prognum); + + gf_proc_dump_build_key(key, key_prefix, "program-version"); + gf_proc_dump_write(key, "%d", prog->progver); + + strncat(key_prefix, ".latency", + sizeof(key_prefix) - strlen(key_prefix) - 1); + + for (i = 0; i < prog->numactors; i++) { + gf_proc_dump_build_key(key, key_prefix, "%s", prog->actors[i].procname); + gf_latency_statedump_and_reset(key, &prog->latencies[i]); + } +} + +void +rpcsvc_statedump(rpcsvc_t *svc) +{ + rpcsvc_program_t *prog = NULL; + int ret = 0; + ret = pthread_rwlock_tryrdlock(&svc->rpclock); + if (ret) + return; + { + list_for_each_entry(prog, &svc->programs, program) + { + rpcsvc_program_dump(prog); + } + } + pthread_rwlock_unlock(&svc->rpclock); +} + static rpcsvc_actor_t gluster_dump_actors[GF_DUMP_MAXVALUE] = { [GF_DUMP_NULL] = {"NULL", NULL, NULL, GF_DUMP_NULL, DRC_NA, 0}, [GF_DUMP_DUMP] = {"DUMP", rpcsvc_dump, NULL, GF_DUMP_DUMP, DRC_NA, 0}, diff --git a/rpc/rpc-lib/src/rpcsvc.h b/rpc/rpc-lib/src/rpcsvc.h index c370b330572..7b3030926c8 100644 --- a/rpc/rpc-lib/src/rpcsvc.h +++ b/rpc/rpc-lib/src/rpcsvc.h @@ -264,6 +264,8 @@ struct rpcsvc_request { gf_boolean_t ownthread; gf_boolean_t synctask; + struct timespec begin; /*req handling start time*/ + struct timespec end; /*req handling end time*/ }; #define rpcsvc_request_program(req) ((rpcsvc_program_t *)((req)->prog)) @@ -419,6 +421,7 @@ struct rpcsvc_program { supported by the system. */ /* Program specific state handed to actors */ void *private; + gf_latency_t *latencies; /*Tracks latency statistics for the rpc call*/ /* This upcall is provided by the program during registration. * It is used to notify the program about events like connection being @@ -686,4 +689,6 @@ rpcsvc_autoscale_threads(glusterfs_ctx_t *ctx, rpcsvc_t *rpc, int incr); extern int rpcsvc_destroy(rpcsvc_t *svc); +void +rpcsvc_statedump(rpcsvc_t *svc); #endif diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c index 65a41d93493..ed8b473be23 100644 --- a/rpc/rpc-transport/socket/src/socket.c +++ b/rpc/rpc-transport/socket/src/socket.c @@ -2950,6 +2950,13 @@ socket_event_handler(int fd, int idx, int gen, void *data, int poll_in, socket_dump_info(sa, priv->is_server, priv->use_ssl, priv->sock, this->name, "disconnecting from"); + /* Dump the SSL error stack to clear any errors that may otherwise + * resurface in the future. + */ + if (priv->use_ssl && priv->ssl_ssl) { + ssl_dump_error_stack(this->name); + } + /* Logging has happened already in earlier cases */ gf_log("transport", ((ret >= 0) ? GF_LOG_INFO : GF_LOG_DEBUG), "EPOLLERR - disconnecting (sock:%d) (%s)", priv->sock, diff --git a/rpc/xdr/src/glusterd1-xdr.x b/rpc/xdr/src/glusterd1-xdr.x index 02ebec26c01..b631dea3502 100644 --- a/rpc/xdr/src/glusterd1-xdr.x +++ b/rpc/xdr/src/glusterd1-xdr.x @@ -202,6 +202,21 @@ struct gd1_mgmt_v3_commit_rsp { string op_errstr<>; } ; +struct gd1_mgmt_v3_post_commit_req { + unsigned char uuid[16]; + int op; + opaque dict<>; +} ; + +struct gd1_mgmt_v3_post_commit_rsp { + unsigned char uuid[16]; + int op; + int op_ret; + int op_errno; + opaque dict<>; + string op_errstr<>; +} ; + struct gd1_mgmt_v3_post_val_req { unsigned char uuid[16]; int op; diff --git a/rpc/xdr/src/libgfxdr.sym b/rpc/xdr/src/libgfxdr.sym index dd4ac8562bc..8fa0e0ddd8a 100644 --- a/rpc/xdr/src/libgfxdr.sym +++ b/rpc/xdr/src/libgfxdr.sym @@ -28,6 +28,8 @@ xdr_gd1_mgmt_v3_brick_op_req xdr_gd1_mgmt_v3_brick_op_rsp xdr_gd1_mgmt_v3_commit_req xdr_gd1_mgmt_v3_commit_rsp +xdr_gd1_mgmt_v3_post_commit_req +xdr_gd1_mgmt_v3_post_commit_rsp xdr_gd1_mgmt_v3_lock_req xdr_gd1_mgmt_v3_lock_rsp xdr_gd1_mgmt_v3_post_val_req diff --git a/run-tests.sh b/run-tests.sh index dc25d8cca31..e2a1655d8e0 100755 --- a/run-tests.sh +++ b/run-tests.sh @@ -328,6 +328,7 @@ function get_bug_list_for_disabled_test () function run_tests() { RES=0 + FLAKY='' FAILED='' TESTS_NEEDED_RETRY='' GENERATED_CORE='' @@ -349,14 +350,15 @@ function run_tests() timeout_cmd_exists="no" fi - for t in $(find ${regression_testsdir}/tests -name '*.t' \ - | LC_COLLATE=C sort) ; do + all_tests=($(find ${regression_testsdir}/tests -name '*.t' | sort)) + all_tests_cnt=${#all_tests[@]} + for t in "${all_tests[@]}" ; do old_cores=$(ls /*-*.core 2> /dev/null | wc -l) total_tests=$((total_tests+1)) if match $t "$@" ; then selected_tests=$((selected_tests+1)) echo - echo $section_separator$section_separator + echo $section_separator "(${total_tests} / ${all_tests_cnt})" $section_separator if [[ $(get_test_status $t) =~ "BAD_TEST" ]] && \ [[ $skip_bad_tests == "yes" ]] then @@ -432,9 +434,17 @@ function run_tests() TESTS_NEEDED_RETRY="${TESTS_NEEDED_RETRY}${t} " fi + + if [ ${TMP_RES} -ne 0 ] ; then - RES=${TMP_RES} - FAILED="${FAILED}${t} " + if [[ "$t" == *"tests/000-flaky/"* ]]; then + FLAKY="${FLAKY}${t} " + echo "FAILURE -> SUCCESS: Flaky test" + TMP_RES=0 + else + RES=${TMP_RES} + FAILED="${FAILED}${t} " + fi fi new_cores=$(ls /*-*.core 2> /dev/null | wc -l) @@ -469,8 +479,10 @@ function run_tests() echo "$key - ${ELAPSEDTIMEMAP["$key"]} second" done | sort -rn -k3 - # Output the errors into a file + # initialize the output file echo > "${result_output}" + + # Output the errors into a file if [ ${RES} -ne 0 ] ; then FAILED=$( echo ${FAILED} | tr ' ' '\n' | sort -u ) FAILED_COUNT=$( echo -n "${FAILED}" | grep -c '^' ) @@ -483,7 +495,13 @@ function run_tests() TESTS_NEEDED_RETRY=$( echo ${TESTS_NEEDED_RETRY} | tr ' ' '\n' | sort -u ) RETRY_COUNT=$( echo -n "${TESTS_NEEDED_RETRY}" | grep -c '^' ) if [ ${RETRY_COUNT} -ne 0 ] ; then - echo -e "\n${RETRY_COUNT} test(s) needed retry \n${TESTS_NEEDED_RETRY}" + echo -e "\n${RETRY_COUNT} test(s) needed retry \n${TESTS_NEEDED_RETRY}" >> "${result_output}" + fi + + FLAKY_TESTS_FAILED=$( echo ${FLAKY} | tr ' ' '\n' | sort -u ) + RETRY_COUNT=$( echo -n "${FLAKY_TESTS_FAILED}" | grep -c '^' ) + if [ ${RETRY_COUNT} -ne 0 ] ; then + echo -e "\n${RETRY_COUNT} flaky test(s) marked as success even though they failed \n${FLAKY_TESTS_FAILED}" >> "${result_output}" fi echo diff --git a/tests/basic/afr/split-brain-favorite-child-policy.t b/tests/000-flaky/basic_afr_split-brain-favorite-child-policy.t index c268c125610..77d82a4996f 100644 --- a/tests/basic/afr/split-brain-favorite-child-policy.t +++ b/tests/000-flaky/basic_afr_split-brain-favorite-child-policy.t @@ -1,8 +1,8 @@ #!/bin/bash #Test the split-brain resolution CLI commands. -. $(dirname $0)/../../include.rc -. $(dirname $0)/../../volume.rc +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc cleanup; diff --git a/tests/basic/changelog/changelog-snapshot.t b/tests/000-flaky/basic_changelog_changelog-snapshot.t index 7742db48cdd..f6cd0b04d47 100644 --- a/tests/basic/changelog/changelog-snapshot.t +++ b/tests/000-flaky/basic_changelog_changelog-snapshot.t @@ -1,7 +1,7 @@ #!/bin/bash -. $(dirname $0)/../../include.rc -. $(dirname $0)/../../snapshot.rc +. $(dirname $0)/../include.rc +. $(dirname $0)/../snapshot.rc cleanup; ROLLOVER_TIME=3 diff --git a/tests/basic/distribute/rebal-all-nodes-migrate.t b/tests/000-flaky/basic_distribute_rebal-all-nodes-migrate.t index acc4ffefecc..eb5d3305ac1 100644 --- a/tests/basic/distribute/rebal-all-nodes-migrate.t +++ b/tests/000-flaky/basic_distribute_rebal-all-nodes-migrate.t @@ -1,8 +1,8 @@ #!/bin/bash -. $(dirname $0)/../../include.rc -. $(dirname $0)/../../cluster.rc -. $(dirname $0)/../../dht.rc +. $(dirname $0)/../include.rc +. $(dirname $0)/../cluster.rc +. $(dirname $0)/../dht.rc # Check if every single rebalance process migrated some files @@ -10,11 +10,9 @@ function cluster_rebal_all_nodes_migrated_files { val=0 a=$($CLI_1 volume rebalance $V0 status | grep "completed" | awk '{print $2}'); -# echo $a b=($a) for i in "${b[@]}" do -# echo "$i"; if [ "$i" -eq "0" ]; then echo "false"; val=1; diff --git a/tests/basic/ec/ec-quorum-count-partial-failure.t b/tests/000-flaky/basic_ec_ec-quorum-count-partial-failure.t index 79f5825ae10..42808ce0c0e 100755..100644 --- a/tests/basic/ec/ec-quorum-count-partial-failure.t +++ b/tests/000-flaky/basic_ec_ec-quorum-count-partial-failure.t @@ -1,7 +1,7 @@ #!/bin/bash -. $(dirname $0)/../../include.rc -. $(dirname $0)/../../volume.rc +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc #This test checks that partial failure of fop results in main fop failure only cleanup; diff --git a/tests/basic/mount-nfs-auth.t b/tests/000-flaky/basic_mount-nfs-auth.t index 3d4a9cff00b..3d4a9cff00b 100755..100644 --- a/tests/basic/mount-nfs-auth.t +++ b/tests/000-flaky/basic_mount-nfs-auth.t diff --git a/tests/bugs/core/multiplex-limit-issue-151.t b/tests/000-flaky/bugs_core_multiplex-limit-issue-151.t index dc9013061b0..5a88f97d726 100644 --- a/tests/bugs/core/multiplex-limit-issue-151.t +++ b/tests/000-flaky/bugs_core_multiplex-limit-issue-151.t @@ -1,8 +1,8 @@ #!/bin/bash -. $(dirname $0)/../../include.rc -. $(dirname $0)/../../traps.rc -. $(dirname $0)/../../volume.rc +. $(dirname $0)/../include.rc +. $(dirname $0)/../traps.rc +. $(dirname $0)/../volume.rc function count_up_bricks { $CLI --xml volume status all | grep '<status>1' | wc -l diff --git a/tests/bugs/distribute/bug-1117851.t b/tests/000-flaky/bugs_distribute_bug-1117851.t index 62cb6b66ab4..5980bf2fd4b 100755..100644 --- a/tests/bugs/distribute/bug-1117851.t +++ b/tests/000-flaky/bugs_distribute_bug-1117851.t @@ -2,8 +2,8 @@ SCRIPT_TIMEOUT=250 -. $(dirname $0)/../../include.rc -. $(dirname $0)/../../volume.rc +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc create_files () { for i in {1..1000}; do diff --git a/tests/bugs/distribute/bug-1122443.t b/tests/000-flaky/bugs_distribute_bug-1122443.t index 906be7072bd..abd37082b33 100644 --- a/tests/bugs/distribute/bug-1122443.t +++ b/tests/000-flaky/bugs_distribute_bug-1122443.t @@ -1,8 +1,8 @@ #!/bin/bash -. $(dirname $0)/../../include.rc -. $(dirname $0)/../../volume.rc -. $(dirname $0)/../../dht.rc +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc +. $(dirname $0)/../dht.rc make_files() { mkdir $1 && \ @@ -42,8 +42,8 @@ TEST glusterfs -s $H0 --volfile-id $V0 $M0 TEST make_files $M0/subdir # Get mtime before migration -BEFORE="$(stat -c %n:%Y $M0/subdir/* | tr '\n' ',')" - +BEFORE="$(stat -c %n:%Y $M0/subdir/* | sort | tr '\n' ',')" +echo $BEFORE # Migrate brick TEST $CLI volume add-brick $V0 $H0:$B0/${V0}1 TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}0 start @@ -51,9 +51,10 @@ EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}0 commit # Get mtime after migration -EXPECT_WITHIN 5 RECONNECTED bug_1113050_workaround $M0/subdir/* -AFTER="$(stat -c %n:%Y $M0/subdir/* | tr '\n' ',')" - +EXPECT_WITHIN 30 RECONNECTED bug_1113050_workaround $M0/subdir/symlink +sleep 3 +AFTER="$(stat -c %n:%Y $M0/subdir/* | sort | tr '\n' ',')" +echo $AFTER # Check if mtime is unchanged TEST [ "$AFTER" == "$BEFORE" ] diff --git a/tests/bugs/glusterd/bug-857330/common.rc b/tests/000-flaky/bugs_glusterd_bug-857330/common.rc index d0aa4b1a640..bd122eff18c 100644 --- a/tests/bugs/glusterd/bug-857330/common.rc +++ b/tests/000-flaky/bugs_glusterd_bug-857330/common.rc @@ -1,4 +1,4 @@ -. $(dirname $0)/../../../include.rc +. $(dirname $0)/../../include.rc UUID_REGEX='[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}' diff --git a/tests/bugs/glusterd/bug-857330/normal.t b/tests/000-flaky/bugs_glusterd_bug-857330/normal.t index ad0c8844fae..6c1cf54ec3c 100755 --- a/tests/bugs/glusterd/bug-857330/normal.t +++ b/tests/000-flaky/bugs_glusterd_bug-857330/normal.t @@ -1,7 +1,7 @@ #!/bin/bash . $(dirname $0)/common.rc -. $(dirname $0)/../../../volume.rc +. $(dirname $0)/../../volume.rc cleanup; TEST glusterd @@ -14,7 +14,7 @@ TEST $CLI volume start $V0; TEST glusterfs -s $H0 --volfile-id=$V0 $M0; -TEST $PYTHON $(dirname $0)/../../../utils/create-files.py \ +TEST $PYTHON $(dirname $0)/../../utils/create-files.py \ --multi -b 10 -d 10 -n 10 $M0; EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 diff --git a/tests/bugs/glusterd/bug-857330/xml.t b/tests/000-flaky/bugs_glusterd_bug-857330/xml.t index 8383d2a0711..11785adacdb 100755 --- a/tests/bugs/glusterd/bug-857330/xml.t +++ b/tests/000-flaky/bugs_glusterd_bug-857330/xml.t @@ -1,7 +1,7 @@ #!/bin/bash . $(dirname $0)/common.rc -. $(dirname $0)/../../../volume.rc +. $(dirname $0)/../../volume.rc cleanup; @@ -15,7 +15,7 @@ TEST $CLI volume start $V0; TEST glusterfs -s $H0 --volfile-id=$V0 $M0; -TEST $PYTHON $(dirname $0)/../../../utils/create-files.py \ +TEST $PYTHON $(dirname $0)/../../utils/create-files.py \ --multi -b 10 -d 10 -n 10 $M0; EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 diff --git a/tests/bugs/glusterd/quorum-value-check.t b/tests/000-flaky/bugs_glusterd_quorum-value-check.t index c701f08bbd5..a431b8c4fd4 100755..100644 --- a/tests/bugs/glusterd/quorum-value-check.t +++ b/tests/000-flaky/bugs_glusterd_quorum-value-check.t @@ -1,7 +1,7 @@ #!/bin/bash -. $(dirname $0)/../../include.rc -. $(dirname $0)/../../volume.rc +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc #G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST diff --git a/tests/bugs/nfs/bug-1116503.t b/tests/000-flaky/bugs_nfs_bug-1116503.t index dd3998df150..fc50021acc7 100644 --- a/tests/bugs/nfs/bug-1116503.t +++ b/tests/000-flaky/bugs_nfs_bug-1116503.t @@ -3,9 +3,9 @@ # Verify that mounting NFS over UDP (MOUNT service only) works. # -. $(dirname $0)/../../include.rc -. $(dirname $0)/../../volume.rc -. $(dirname $0)/../../nfs.rc +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc +. $(dirname $0)/../nfs.rc #G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST diff --git a/tests/features/lock-migration/lkmigration-set-option.t b/tests/000-flaky/features_lock-migration_lkmigration-set-option.t index 4340438591f..1327ef3579f 100644 --- a/tests/features/lock-migration/lkmigration-set-option.t +++ b/tests/000-flaky/features_lock-migration_lkmigration-set-option.t @@ -1,7 +1,7 @@ #!/bin/bash # Test to check -. $(dirname $0)/../../include.rc -. $(dirname $0)/../../volume.rc +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc #Check lock-migration set option sanity cleanup; diff --git a/tests/afr.rc b/tests/afr.rc index 5fc7fa1898d..241789903ba 100644 --- a/tests/afr.rc +++ b/tests/afr.rc @@ -115,3 +115,9 @@ function afr_private_key_value() #xargs at the end will strip leading spaces grep -E "^${key} = " $m/.meta/graphs/active/${v}-replicate-${replica_id}/private | cut -f2 -d'=' | xargs } + +function afr_anon_entry_count() +{ + local b=$1 + ls $b/.glusterfs-anonymous-inode* | wc -l +} diff --git a/tests/basic/afr/afr-anon-inode-no-quorum.t b/tests/basic/afr/afr-anon-inode-no-quorum.t new file mode 100644 index 00000000000..896ba0c9b2c --- /dev/null +++ b/tests/basic/afr/afr-anon-inode-no-quorum.t @@ -0,0 +1,63 @@ +#!/bin/bash + +#Test that anon-inode entry is not cleaned up as long as there exists at least +#one valid entry +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../afr.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +TEST $CLI volume heal $V0 disable +TEST $CLI volume set $V0 performance.write-behind off +TEST $CLI volume set $V0 performance.read-ahead off +TEST $CLI volume set $V0 performance.readdir-ahead off +TEST $CLI volume set $V0 performance.open-behind off +TEST $CLI volume set $V0 performance.stat-prefetch off +TEST $CLI volume set $V0 performance.io-cache off +TEST $CLI volume set $V0 performance.quick-read off +TEST $CLI volume set $V0 cluster.entry-self-heal off +TEST $CLI volume start $V0 + +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 + +TEST touch $M0/a $M0/b + +gfid_a=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/a)) +gfid_b=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/b)) +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST mv $M0/a $M0/a-new +TEST mv $M0/b $M0/b-new + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +TEST ! ls $M0/a +TEST ! ls $M0/b +anon_inode_name=$(ls -a $B0/${V0}0 | grep glusterfs-anonymous-inode) +TEST stat $B0/${V0}0/$anon_inode_name/$gfid_a +TEST stat $B0/${V0}0/$anon_inode_name/$gfid_b +#Make sure index heal doesn't happen after enabling heal +TEST setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1 +TEST rm -f $B0/${V0}1/.glusterfs/indices/xattrop/* +TEST $CLI volume heal $V0 enable +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +TEST $CLI volume heal $V0 +#Allow time for a scan +sleep 5 +TEST stat $B0/${V0}0/$anon_inode_name/$gfid_a +TEST stat $B0/${V0}0/$anon_inode_name/$gfid_b +inum_b=$(STAT_INO $B0/${V0}0/$anon_inode_name/$gfid_b) +TEST rm -f $M0/a-new +TEST stat $M0/b-new + +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}1 +EXPECT "$inum_b" STAT_INO $B0/${V0}0/b-new + +cleanup diff --git a/tests/basic/afr/afr-anon-inode.t b/tests/basic/afr/afr-anon-inode.t new file mode 100644 index 00000000000..f4cf37a2fa0 --- /dev/null +++ b/tests/basic/afr/afr-anon-inode.t @@ -0,0 +1,114 @@ +#!/bin/bash +#Tests that afr-anon-inode test cases work fine as expected +#These are cases where in entry-heal/name-heal we dont know entry for an inode +#so these inodes are kept in a special directory + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../afr.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0..2} +TEST $CLI volume set $V0 performance.quick-read off +TEST $CLI volume set $V0 performance.io-cache off +TEST $CLI volume set $V0 performance.write-behind off +TEST $CLI volume set $V0 performance.stat-prefetch off +TEST $CLI volume set $V0 performance.read-ahead off +TEST $CLI volume set $V0 performance.open-behind off +TEST $CLI volume start $V0 +TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; +EXPECT "^1$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode" +TEST $CLI volume set $V0 cluster.use-anonymous-inode no +EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^0$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode" +TEST $CLI volume set $V0 cluster.use-anonymous-inode yes +EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode" +TEST mkdir -p $M0/d1/b $M0/d2/a +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST mv $M0/d2/a $M0/d1 +TEST mv $M0/d1/b $M0/d2 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 +anon_inode_name=$(ls -a $B0/${V0}0 | grep glusterfs-anonymous-inode) +TEST [[ -d $B0/${V0}1/$anon_inode_name ]] +TEST [[ -d $B0/${V0}2/$anon_inode_name ]] +anon_gfid=$(gf_get_gfid_xattr $B0/${V0}0/$anon_inode_name) +EXPECT "$anon_gfid" gf_get_gfid_xattr $B0/${V0}1/$anon_inode_name +EXPECT "$anon_gfid" gf_get_gfid_xattr $B0/${V0}2/$anon_inode_name + +TEST ! ls $M0/$anon_inode_name +EXPECT "^4$" echo $(ls -a $M0 | wc -l) + +#Test purging code path by shd +TEST $CLI volume heal $V0 disable +TEST mkdir $M0/l0 $M0/l1 $M0/l2 +TEST touch $M0/del-file $M0/del-file-nolink $M0/l0/file +TEST ln $M0/del-file $M0/del-file-link +TEST ln $M0/l0/file $M0/l1/file-link1 +TEST ln $M0/l0/file $M0/l2/file-link2 +TEST mkdir -p $M0/del-recursive-dir/d1 + +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST rm -f $M0/del-file $M0/del-file-nolink +TEST rm -rf $M0/del-recursive-dir +TEST mv $M0/d1/a $M0/d2 +TEST mv $M0/l0/file $M0/l0/renamed-file +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 0 + +nolink_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-file-nolink)) +link_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-file)) +dir_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-recursive-dir)) +rename_dir_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/d1/a)) +rename_file_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/l0/file)) +TEST ! stat $M0/del-file +TEST stat $B0/${V0}0/$anon_inode_name/$link_gfid +TEST ! stat $M0/del-file-nolink +TEST ! stat $B0/${V0}0/$anon_inode_name/$nolink_gfid +TEST ! stat $M0/del-recursive-dir +TEST stat $B0/${V0}0/$anon_inode_name/$dir_gfid +TEST ! stat $M0/d1/a +TEST stat $B0/${V0}0/$anon_inode_name/$rename_dir_gfid +TEST ! stat $M0/l0/file +TEST stat $B0/${V0}0/$anon_inode_name/$rename_file_gfid + +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST mv $M0/l1/file-link1 $M0/l1/renamed-file-link1 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 1 +TEST ! stat $M0/l1/file-link1 +TEST stat $B0/${V0}1/$anon_inode_name/$rename_file_gfid + +TEST kill_brick $V0 $H0 $B0/${V0}2 +TEST mv $M0/l2/file-link2 $M0/l2/renamed-file-link2 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 2 +TEST ! stat $M0/l2/file-link2 +TEST stat $B0/${V0}2/$anon_inode_name/$rename_file_gfid + +#Simulate only anon-inodes present in all bricks +TEST rm -f $M0/l0/renamed-file $M0/l1/renamed-file-link1 $M0/l2/renamed-file-link2 + +#Test that shd doesn't cleanup anon-inodes when some bricks are down +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST $CLI volume heal $V0 enable +$CLI volume heal $V0 +sleep 5 #Allow time for completion of one scan +TEST stat $B0/${V0}0/$anon_inode_name/$link_gfid +TEST stat $B0/${V0}0/$anon_inode_name/$rename_dir_gfid +TEST stat $B0/${V0}0/$anon_inode_name/$dir_gfid +rename_dir_inum=$(STAT_INO $B0/${V0}0/$anon_inode_name/$rename_dir_gfid) + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 1 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}1 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}2 + +#Test that rename indeed happened instead of rmdir/mkdir +renamed_dir_inum=$(STAT_INO $B0/${V0}0/d2/a) +EXPECT "$rename_dir_inum" echo $renamed_dir_inum +cleanup; diff --git a/tests/basic/afr/entry-self-heal-anon-dir-off.t b/tests/basic/afr/entry-self-heal-anon-dir-off.t new file mode 100644 index 00000000000..7bb6ee14193 --- /dev/null +++ b/tests/basic/afr/entry-self-heal-anon-dir-off.t @@ -0,0 +1,459 @@ +#!/bin/bash + +#This file checks if missing entry self-heal and entry self-heal are working +#as expected. +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../afr.rc + +cleanup; + +function get_file_type { + stat -c "%a:%F:%g:%t:%T:%u" $1 +} + +function diff_dirs { + diff <(ls $1 | sort) <(ls $2 | sort) +} + +function heal_status { + local f1_path="${1}/${3}" + local f2_path="${2}/${3}" + local insync="" + diff_dirs $f1_path $f2_path + if [ $? -eq 0 ]; + then + insync="Y" + else + insync="N" + fi + local xattr11=$(get_hex_xattr trusted.afr.$V0-client-0 $f1_path) + local xattr12=$(get_hex_xattr trusted.afr.$V0-client-1 $f1_path) + local xattr21=$(get_hex_xattr trusted.afr.$V0-client-0 $f2_path) + local xattr22=$(get_hex_xattr trusted.afr.$V0-client-1 $f2_path) + local dirty1=$(get_hex_xattr trusted.afr.dirty $f1_path) + local dirty2=$(get_hex_xattr trusted.afr.dirty $f2_path) + if [ -z $xattr11 ]; then xattr11="000000000000000000000000"; fi + if [ -z $xattr12 ]; then xattr12="000000000000000000000000"; fi + if [ -z $xattr21 ]; then xattr21="000000000000000000000000"; fi + if [ -z $xattr22 ]; then xattr22="000000000000000000000000"; fi + if [ -z $dirty1 ]; then dirty1="000000000000000000000000"; fi + if [ -z $dirty2 ]; then dirty2="000000000000000000000000"; fi + echo ${insync}${xattr11}${xattr12}${xattr21}${xattr22}${dirty1}${dirty2} +} + +function is_heal_done { + local zero_xattr="000000000000000000000000" + if [ "$(heal_status $@)" == "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" ]; + then + echo "Y" + else + echo "N" + fi +} + +function print_pending_heals { + local result=":" + for i in "$@"; + do + if [ "N" == $(is_heal_done $B0/${V0}0 $B0/${V0}1 $i) ]; + then + result="$result:$i" + fi + done +#To prevent any match for EXPECT_WITHIN, print a char non-existent in file-names + if [ $result == ":" ]; then result="~"; fi + echo $result +} + +zero_xattr="000000000000000000000000" +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +TEST $CLI volume heal $V0 disable +TEST $CLI volume set $V0 cluster.use-anonymous-inode off +TEST $CLI volume set $V0 performance.write-behind off +TEST $CLI volume set $V0 performance.read-ahead off +TEST $CLI volume set $V0 performance.readdir-ahead off +TEST $CLI volume set $V0 performance.open-behind off +TEST $CLI volume set $V0 performance.stat-prefetch off +TEST $CLI volume set $V0 performance.io-cache off +TEST $CLI volume set $V0 performance.quick-read off +TEST $CLI volume set $V0 cluster.data-self-heal on +TEST $CLI volume set $V0 cluster.metadata-self-heal on +TEST $CLI volume set $V0 cluster.entry-self-heal on +TEST $CLI volume start $V0 + +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --use-readdirp=no $M0 +cd $M0 +#_me_ is dir on which missing entry self-heal happens, _heal is where dir self-heal happens +#spb is split-brain, fool is all fool + +#source_self_accusing means there exists source and a sink which self-accuses. +#This simulates failures where fops failed on the bricks without it going down. +#Something like EACCESS/EDQUOT etc + +TEST mkdir spb_heal spb spb_me_heal spb_me fool_heal fool_me v1_fool_heal v1_fool_me source_creations_heal source_deletions_heal source_creations_me source_deletions_me v1_dirty_me v1_dirty_heal source_self_accusing +TEST mkfifo source_deletions_heal/fifo +TEST mknod source_deletions_heal/block b 4 5 +TEST mknod source_deletions_heal/char c 1 5 +TEST touch source_deletions_heal/file +TEST ln -s source_deletions_heal/file source_deletions_heal/slink +TEST mkdir source_deletions_heal/dir1 +TEST mkdir source_deletions_heal/dir1/dir2 + +TEST mkfifo source_deletions_me/fifo +TEST mknod source_deletions_me/block b 4 5 +TEST mknod source_deletions_me/char c 1 5 +TEST touch source_deletions_me/file +TEST ln -s source_deletions_me/file source_deletions_me/slink +TEST mkdir source_deletions_me/dir1 +TEST mkdir source_deletions_me/dir1/dir2 + +TEST mkfifo source_self_accusing/fifo +TEST mknod source_self_accusing/block b 4 5 +TEST mknod source_self_accusing/char c 1 5 +TEST touch source_self_accusing/file +TEST ln -s source_self_accusing/file source_self_accusing/slink +TEST mkdir source_self_accusing/dir1 +TEST mkdir source_self_accusing/dir1/dir2 + +TEST kill_brick $V0 $H0 $B0/${V0}0 + +TEST touch spb_heal/0 spb/0 spb_me_heal/0 spb_me/0 fool_heal/0 fool_me/0 v1_fool_heal/0 v1_fool_me/0 v1_dirty_heal/0 v1_dirty_me/0 +TEST rm -rf source_deletions_heal/fifo source_deletions_heal/block source_deletions_heal/char source_deletions_heal/file source_deletions_heal/slink source_deletions_heal/dir1 +TEST rm -rf source_deletions_me/fifo source_deletions_me/block source_deletions_me/char source_deletions_me/file source_deletions_me/slink source_deletions_me/dir1 +TEST rm -rf source_self_accusing/fifo source_self_accusing/block source_self_accusing/char source_self_accusing/file source_self_accusing/slink source_self_accusing/dir1 + +#Test that the files are deleted +TEST ! stat $B0/${V0}1/source_deletions_heal/fifo +TEST ! stat $B0/${V0}1/source_deletions_heal/block +TEST ! stat $B0/${V0}1/source_deletions_heal/char +TEST ! stat $B0/${V0}1/source_deletions_heal/file +TEST ! stat $B0/${V0}1/source_deletions_heal/slink +TEST ! stat $B0/${V0}1/source_deletions_heal/dir1 +TEST ! stat $B0/${V0}1/source_deletions_me/fifo +TEST ! stat $B0/${V0}1/source_deletions_me/block +TEST ! stat $B0/${V0}1/source_deletions_me/char +TEST ! stat $B0/${V0}1/source_deletions_me/file +TEST ! stat $B0/${V0}1/source_deletions_me/slink +TEST ! stat $B0/${V0}1/source_deletions_me/dir1 +TEST ! stat $B0/${V0}1/source_self_accusing/fifo +TEST ! stat $B0/${V0}1/source_self_accusing/block +TEST ! stat $B0/${V0}1/source_self_accusing/char +TEST ! stat $B0/${V0}1/source_self_accusing/file +TEST ! stat $B0/${V0}1/source_self_accusing/slink +TEST ! stat $B0/${V0}1/source_self_accusing/dir1 + + +TEST mkfifo source_creations_heal/fifo +TEST mknod source_creations_heal/block b 4 5 +TEST mknod source_creations_heal/char c 1 5 +TEST touch source_creations_heal/file +TEST ln -s source_creations_heal/file source_creations_heal/slink +TEST mkdir source_creations_heal/dir1 +TEST mkdir source_creations_heal/dir1/dir2 + +TEST mkfifo source_creations_me/fifo +TEST mknod source_creations_me/block b 4 5 +TEST mknod source_creations_me/char c 1 5 +TEST touch source_creations_me/file +TEST ln -s source_creations_me/file source_creations_me/slink +TEST mkdir source_creations_me/dir1 +TEST mkdir source_creations_me/dir1/dir2 + +$CLI volume stop $V0 + +#simulate fool fool scenario for fool_* dirs +setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1/{fool_heal,fool_me} +setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/{fool_heal,fool_me} +setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}1/{v1_fool_heal,v1_fool_me} + +#Simulate v1-dirty(self-accusing but no pending ops on others) scenario for v1-dirty +setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1/v1_dirty_{heal,me} +setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}1/v1_dirty_{heal,me} + +$CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +TEST kill_brick $V0 $H0 $B0/${V0}1 + +TEST touch spb_heal/1 spb/0 spb_me_heal/1 spb_me/0 fool_heal/1 fool_me/1 v1_fool_heal/1 v1_fool_me/1 + +$CLI volume stop $V0 + +#simulate fool fool scenario for fool_* dirs +setfattr -x trusted.afr.$V0-client-1 $B0/${V0}0/{fool_heal,fool_me} +setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/{fool_heal,fool_me} +setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}1/{v1_fool_heal,v1_fool_me} + +#simulate self-accusing for source_self_accusing +TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000006 $B0/${V0}0/source_self_accusing + +$CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 + +# Check if conservative merges happened correctly on _me_ dirs +TEST stat spb_me_heal/1 +TEST stat $B0/${V0}0/spb_me_heal/1 +TEST stat $B0/${V0}1/spb_me_heal/1 + +TEST stat spb_me_heal/0 +TEST stat $B0/${V0}0/spb_me_heal/0 +TEST stat $B0/${V0}1/spb_me_heal/0 + +TEST stat fool_me/1 +TEST stat $B0/${V0}0/fool_me/1 +TEST stat $B0/${V0}1/fool_me/1 + +TEST stat fool_me/0 +TEST stat $B0/${V0}0/fool_me/0 +TEST stat $B0/${V0}1/fool_me/0 + +TEST stat v1_fool_me/0 +TEST stat $B0/${V0}0/v1_fool_me/0 +TEST stat $B0/${V0}1/v1_fool_me/0 + +TEST stat v1_fool_me/1 +TEST stat $B0/${V0}0/v1_fool_me/1 +TEST stat $B0/${V0}1/v1_fool_me/1 + +TEST stat v1_dirty_me/0 +TEST stat $B0/${V0}0/v1_dirty_me/0 +TEST stat $B0/${V0}1/v1_dirty_me/0 + +#Check if files that have gfid-mismatches in _me_ are giving EIO +TEST ! stat spb_me/0 + +#Check if stale files are deleted on access +TEST ! stat source_deletions_me/fifo +TEST ! stat $B0/${V0}0/source_deletions_me/fifo +TEST ! stat $B0/${V0}1/source_deletions_me/fifo +TEST ! stat source_deletions_me/block +TEST ! stat $B0/${V0}0/source_deletions_me/block +TEST ! stat $B0/${V0}1/source_deletions_me/block +TEST ! stat source_deletions_me/char +TEST ! stat $B0/${V0}0/source_deletions_me/char +TEST ! stat $B0/${V0}1/source_deletions_me/char +TEST ! stat source_deletions_me/file +TEST ! stat $B0/${V0}0/source_deletions_me/file +TEST ! stat $B0/${V0}1/source_deletions_me/file +TEST ! stat source_deletions_me/file +TEST ! stat $B0/${V0}0/source_deletions_me/file +TEST ! stat $B0/${V0}1/source_deletions_me/file +TEST ! stat source_deletions_me/dir1/dir2 +TEST ! stat $B0/${V0}0/source_deletions_me/dir1/dir2 +TEST ! stat $B0/${V0}1/source_deletions_me/dir1/dir2 +TEST ! stat source_deletions_me/dir1 +TEST ! stat $B0/${V0}0/source_deletions_me/dir1 +TEST ! stat $B0/${V0}1/source_deletions_me/dir1 + +#Test if the files created as part of access are healed correctly +r=$(get_file_type source_creations_me/fifo) +EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/fifo +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/fifo +TEST [ -p source_creations_me/fifo ] + +r=$(get_file_type source_creations_me/block) +EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/block +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/block +EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}1/source_creations_me/block +EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}0/source_creations_me/block +TEST [ -b source_creations_me/block ] + +r=$(get_file_type source_creations_me/char) +EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/char +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/char +EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}1/source_creations_me/char +EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}0/source_creations_me/char +TEST [ -c source_creations_me/char ] + +r=$(get_file_type source_creations_me/file) +EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/file +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/file +TEST [ -f source_creations_me/file ] + +r=$(get_file_type source_creations_me/slink) +EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/slink +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/slink +TEST [ -h source_creations_me/slink ] + +r=$(get_file_type source_creations_me/dir1/dir2) +EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/dir1/dir2 +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/dir1/dir2 +TEST [ -d source_creations_me/dir1/dir2 ] + +r=$(get_file_type source_creations_me/dir1) +EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/dir1 +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/dir1 +TEST [ -d source_creations_me/dir1 ] + +#Trigger heal and check _heal dirs are healed properly +#Trigger change in event generation number. That way inodes would get refreshed during lookup +TEST kill_brick $V0 $H0 $B0/${V0}1 +$CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 + +TEST stat spb_heal +TEST stat spb_me_heal +TEST stat fool_heal +TEST stat fool_me +TEST stat v1_fool_heal +TEST stat v1_fool_me +TEST stat source_deletions_heal +TEST stat source_deletions_me +TEST stat source_self_accusing +TEST stat source_creations_heal +TEST stat source_creations_me +TEST stat v1_dirty_heal +TEST stat v1_dirty_me +TEST $CLI volume stop $V0 +TEST rm -rf $B0/${V0}{0,1}/.glusterfs/indices/xattrop/* + +$CLI volume start $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 + +#Create base entry in indices/xattrop +echo "Data" > $M0/FILE +rm -f $M0/FILE +EXPECT "1" count_index_entries $B0/${V0}0 +EXPECT "1" count_index_entries $B0/${V0}1 + +TEST $CLI volume stop $V0; + +#Create entries for fool_heal and fool_me to ensure they are fully healed and dirty xattrs erased, before triggering index heal +create_brick_xattrop_entry $B0/${V0}0 fool_heal fool_me source_creations_heal/dir1 + +$CLI volume start $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 + +$CLI volume heal $V0 enable +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 + +TEST $CLI volume heal $V0; +EXPECT_WITHIN $HEAL_TIMEOUT "~" print_pending_heals spb_heal spb_me_heal fool_heal fool_me v1_fool_heal v1_fool_me source_deletions_heal source_deletions_me source_creations_heal source_creations_me v1_dirty_heal v1_dirty_me source_self_accusing + +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 spb_heal +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 spb_me_heal +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 fool_heal +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 fool_me +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_fool_heal +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_fool_me +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_deletions_heal +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_deletions_me +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_self_accusing +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_creations_heal +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_creations_me +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_dirty_heal +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_dirty_me + +#Don't access the files/dirs from mount point as that may cause self-heals +# Check if conservative merges happened correctly on heal dirs +TEST stat $B0/${V0}0/spb_heal/1 +TEST stat $B0/${V0}1/spb_heal/1 + +TEST stat $B0/${V0}0/spb_heal/0 +TEST stat $B0/${V0}1/spb_heal/0 + +TEST stat $B0/${V0}0/fool_heal/1 +TEST stat $B0/${V0}1/fool_heal/1 + +TEST stat $B0/${V0}0/fool_heal/0 +TEST stat $B0/${V0}1/fool_heal/0 + +TEST stat $B0/${V0}0/v1_fool_heal/0 +TEST stat $B0/${V0}1/v1_fool_heal/0 + +TEST stat $B0/${V0}0/v1_fool_heal/1 +TEST stat $B0/${V0}1/v1_fool_heal/1 + +TEST stat $B0/${V0}0/v1_dirty_heal/0 +TEST stat $B0/${V0}1/v1_dirty_heal/0 + +#Check if files that have gfid-mismatches in spb are giving EIO +TEST ! stat spb/0 + +#Check if stale files are deleted on access +TEST ! stat $B0/${V0}0/source_deletions_heal/fifo +TEST ! stat $B0/${V0}1/source_deletions_heal/fifo +TEST ! stat $B0/${V0}0/source_deletions_heal/block +TEST ! stat $B0/${V0}1/source_deletions_heal/block +TEST ! stat $B0/${V0}0/source_deletions_heal/char +TEST ! stat $B0/${V0}1/source_deletions_heal/char +TEST ! stat $B0/${V0}0/source_deletions_heal/file +TEST ! stat $B0/${V0}1/source_deletions_heal/file +TEST ! stat $B0/${V0}0/source_deletions_heal/file +TEST ! stat $B0/${V0}1/source_deletions_heal/file +TEST ! stat $B0/${V0}0/source_deletions_heal/dir1/dir2 +TEST ! stat $B0/${V0}1/source_deletions_heal/dir1/dir2 +TEST ! stat $B0/${V0}0/source_deletions_heal/dir1 +TEST ! stat $B0/${V0}1/source_deletions_heal/dir1 + +#Check if stale files are deleted on access +TEST ! stat $B0/${V0}0/source_self_accusing/fifo +TEST ! stat $B0/${V0}1/source_self_accusing/fifo +TEST ! stat $B0/${V0}0/source_self_accusing/block +TEST ! stat $B0/${V0}1/source_self_accusing/block +TEST ! stat $B0/${V0}0/source_self_accusing/char +TEST ! stat $B0/${V0}1/source_self_accusing/char +TEST ! stat $B0/${V0}0/source_self_accusing/file +TEST ! stat $B0/${V0}1/source_self_accusing/file +TEST ! stat $B0/${V0}0/source_self_accusing/file +TEST ! stat $B0/${V0}1/source_self_accusing/file +TEST ! stat $B0/${V0}0/source_self_accusing/dir1/dir2 +TEST ! stat $B0/${V0}1/source_self_accusing/dir1/dir2 +TEST ! stat $B0/${V0}0/source_self_accusing/dir1 +TEST ! stat $B0/${V0}1/source_self_accusing/dir1 + +#Test if the files created as part of full self-heal correctly +r=$(get_file_type $B0/${V0}0/source_creations_heal/fifo) +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/fifo +TEST [ -p $B0/${V0}0/source_creations_heal/fifo ] +EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}1/source_creations_heal/block +EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}0/source_creations_heal/block + +r=$(get_file_type $B0/${V0}0/source_creations_heal/block) +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/block + +r=$(get_file_type $B0/${V0}0/source_creations_heal/char) +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/char +EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}1/source_creations_heal/char +EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}0/source_creations_heal/char + +r=$(get_file_type $B0/${V0}0/source_creations_heal/file) +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/file +TEST [ -f $B0/${V0}0/source_creations_heal/file ] + +r=$(get_file_type source_creations_heal/file $B0/${V0}0/slink) +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/file slink +TEST [ -h $B0/${V0}0/source_creations_heal/slink ] + +r=$(get_file_type $B0/${V0}0/source_creations_heal/dir1/dir2) +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/dir1/dir2 +TEST [ -d $B0/${V0}0/source_creations_heal/dir1/dir2 ] + +r=$(get_file_type $B0/${V0}0/source_creations_heal/dir1) +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/dir1 +TEST [ -d $B0/${V0}0/source_creations_heal/dir1 ] + +cd - + +#Anonymous directory shouldn't be created +TEST mkdir $M0/rename-dir +before_rename=$(STAT_INO $B0/${V0}1/rename-dir) +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST mv $M0/rename-dir $M0/new-name +TEST $CLI volume start $V0 force +#'spb' is in split-brain so pending-heal-count will be 2 +EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0 +after_rename=$(STAT_INO $B0/${V0}1/new-name) +EXPECT "0" echo $(ls -a $B0/${V0}0/ | grep anonymous-inode | wc -l) +EXPECT "0" echo $(ls -a $B0/${V0}1/ | grep anonymous-inode | wc -l) +EXPECT_NOT "$before_rename" echo $after_rename +cleanup diff --git a/tests/basic/afr/rename-data-loss.t b/tests/basic/afr/rename-data-loss.t new file mode 100644 index 00000000000..256ee2aafce --- /dev/null +++ b/tests/basic/afr/rename-data-loss.t @@ -0,0 +1,72 @@ +#!/bin/bash +#Self-heal tests +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../afr.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1} +TEST $CLI volume set $V0 write-behind off +TEST $CLI volume set $V0 self-heal-daemon off +TEST $CLI volume set $V0 data-self-heal off +TEST $CLI volume set $V0 metadata-self-heal off +TEST $CLI volume set $V0 entry-self-heal off +TEST $CLI volume start $V0 +EXPECT 'Started' volinfo_field $V0 'Status' +TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0; + +cd $M0 +TEST `echo "line1" >> file1` +TEST mkdir dir1 +TEST mkdir dir2 +TEST mkdir -p dir1/dira/dirb +TEST `echo "line1">>dir1/dira/dirb/file1` +TEST mkdir delete_me +TEST `echo "line1" >> delete_me/file1` + +#brick0 has witnessed the second write while brick1 is down. +TEST kill_brick $V0 $H0 $B0/brick1 +TEST `echo "line2" >> file1` +TEST `echo "line2" >> dir1/dira/dirb/file1` +TEST `echo "line2" >> delete_me/file1` + +#Toggle the bricks that are up/down. +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 +TEST kill_brick $V0 $H0 $B0/brick0 + +#Rename when the 'source' brick0 for data-selfheals is down. +mv file1 file2 +mv dir1/dira dir2 + +#Delete a dir when brick0 is down. +rm -rf delete_me +cd - + +#Bring everything up and trigger heal +TEST $CLI volume set $V0 self-heal-daemon on +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/brick0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/brick1 + +#Remount to avoid reading from caches +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0; +EXPECT "line2" tail -1 $M0/file2 +EXPECT "line2" tail -1 $M0/dir2/dira/dirb/file1 +TEST ! stat $M0/delete_me/file1 +TEST ! stat $M0/delete_me + +anon_inode_name=$(ls -a $B0/brick0 | grep glusterfs-anonymous-inode) +TEST [[ -d $B0/brick0/$anon_inode_name ]] +TEST [[ -d $B0/brick1/$anon_inode_name ]] +cleanup diff --git a/tests/basic/afr/split-brain-favorite-child-policy-client-side-healing.t b/tests/basic/afr/split-brain-favorite-child-policy-client-side-healing.t new file mode 100644 index 00000000000..7c249c4bcbd --- /dev/null +++ b/tests/basic/afr/split-brain-favorite-child-policy-client-side-healing.t @@ -0,0 +1,124 @@ +#!/bin/bash + +#Test the client side split-brain resolution +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +cleanup; + +GET_MDATA_PATH=$(dirname $0)/../../utils +build_tester $GET_MDATA_PATH/get-mdata-xattr.c + +TEST glusterd +TEST pidof glusterd + +count_files () { + ls $1 | wc -l +} + +#Create replica 2 volume +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +TEST $CLI volume set $V0 performance.write-behind off +TEST $CLI volume heal $V0 disable +TEST $CLI volume set $V0 cluster.quorum-type fixed +TEST $CLI volume set $V0 cluster.quorum-count 1 +TEST $CLI volume set $V0 cluster.metadata-self-heal on +TEST $CLI volume set $V0 cluster.data-self-heal on +TEST $CLI volume set $V0 cluster.entry-self-heal on + +TEST $CLI volume start $V0 +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 + +TEST mkdir $M0/data +TEST touch $M0/data/file + + +############ Client side healing using favorite-child-policy = mtime ################# +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST dd if=/dev/urandom of=$M0/data/file bs=1024 count=1024 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST dd if=/dev/urandom of=$M0/data/file bs=1024 count=1024 + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 + +mtime1=$(get_mtime $B0/${V0}0/data/file) +mtime2=$(get_mtime $B0/${V0}1/data/file) +if (( $(echo "$mtime1 > $mtime2" | bc -l) )); then + LATEST_MTIME_MD5=$(md5sum $B0/${V0}0/data/file | cut -d\ -f1) +else + LATEST_MTIME_MD5=$(md5sum $B0/${V0}1/data/file | cut -d\ -f1) +fi + +#file will be in split-brain +cat $M0/data/file > /dev/null +EXPECT "1" echo $? + +TEST $CLI volume set $V0 cluster.favorite-child-policy mtime +TEST $CLI volume start $V0 force + +EXPECT_WITHIN $HEAL_TIMEOUT "^2$" afr_get_split_brain_count $V0 +cat $M0/data/file > /dev/null +EXPECT "0" echo $? +M0_MD5=$(md5sum $M0/data/file | cut -d\ -f1) +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_get_split_brain_count $V0 +TEST [ "$LATEST_MTIME_MD5" == "$M0_MD5" ] + +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 + +B0_MD5=$(md5sum $B0/${V0}0/data/file | cut -d\ -f1) +B1_MD5=$(md5sum $B0/${V0}1/data/file | cut -d\ -f1) +TEST [ "$LATEST_MTIME_MD5" == "$B0_MD5" ] +TEST [ "$LATEST_MTIME_MD5" == "$B1_MD5" ] + +############ Client side directory conservative merge ################# +TEST $CLI volume reset $V0 cluster.favorite-child-policy +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST touch $M0/data/test +files=$(count_files $M0/data) +EXPECT "2" echo $files +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST touch $M0/data/test1 +files=$(count_files $M0/data) +EXPECT "2" echo $files + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 + +#data dir will be in entry split-brain +ls $M0/data > /dev/null +EXPECT "2" echo $? + +TEST $CLI volume set $V0 cluster.favorite-child-policy mtime + +EXPECT_WITHIN $HEAL_TIMEOUT "^2$" afr_get_split_brain_count $V0 + + +ls $M0/data > /dev/null +EXPECT "0" echo $? + +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_get_split_brain_count $V0 +#Entry Split-brain is gone, but data self-heal is pending on the files +EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0 + +cat $M0/data/test > /dev/null +cat $M0/data/test1 > /dev/null + +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 +files=$(count_files $M0/data) +EXPECT "3" echo $files + +TEST force_umount $M0 +TEST rm $GET_MDATA_PATH/get-mdata-xattr + +cleanup diff --git a/tests/basic/fuse/active-io-graph-switch.t b/tests/basic/fuse/active-io-graph-switch.t new file mode 100644 index 00000000000..6ec3e1fcbfa --- /dev/null +++ b/tests/basic/fuse/active-io-graph-switch.t @@ -0,0 +1,65 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +TESTS_EXPECTED_IN_LOOP=12 + +function perform_io_on_mount { + local m="$1" + local f="$2" + local lockfile="$3" + while [ -f "$m/$lockfile" ]; + do + dd if=/dev/zero of=$m/$f bs=1M count=1 + done +} + +function perform_graph_switch { + for i in {1..3} + do + TEST_IN_LOOP $CLI volume set $V0 performance.stat-prefetch off + sleep 3 + TEST_IN_LOOP $CLI volume set $V0 performance.stat-prefetch on + sleep 3 + done +} + +function count_files { + ls $M0 | wc -l +} + +cleanup; +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} +TEST $CLI volume set $V0 flush-behind off +TEST $CLI volume start $V0 +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 +TEST touch $M0/lock +for i in {1..100}; do perform_io_on_mount $M0 $i lock & done +EXPECT_WITHIN 5 "101" count_files + +perform_graph_switch +TEST rm -f $M0/lock +wait +EXPECT "100" count_files +TEST rm -f $M0/{1..100} +EXPECT "0" count_files + +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 + +#Repeat the tests with reader-thread-count +TEST $GFS --reader-thread-count=10 --volfile-id=/$V0 --volfile-server=$H0 $M0 +TEST touch $M0/lock +for i in {1..100}; do perform_io_on_mount $M0 $i lock & done +EXPECT_WITHIN 5 "101" count_files + +perform_graph_switch +TEST rm -f $M0/lock +wait +EXPECT "100" count_files +TEST rm -f $M0/{1..100} +EXPECT "0" count_files + +cleanup diff --git a/tests/basic/gfapi/gfapi-ssl-load-volfile-test.c b/tests/basic/gfapi/gfapi-ssl-load-volfile-test.c new file mode 100644 index 00000000000..7beb8dd1fe4 --- /dev/null +++ b/tests/basic/gfapi/gfapi-ssl-load-volfile-test.c @@ -0,0 +1,127 @@ +#include <fcntl.h> +#include <unistd.h> +#include <time.h> +#include <limits.h> +#include <string.h> +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <glusterfs/api/glfs.h> +#include <glusterfs/api/glfs-handles.h> + +#define LOG_ERR(msg) \ + do { \ + fprintf(stderr, "%s : Error (%s)\n", msg, strerror(errno)); \ + } while (0) + +glfs_t * +init_glfs(const char *hostname, const char *volname, const char *volfile, + const char *logfile) +{ + int ret = -1; + glfs_t *fs = NULL; + + fs = glfs_new(volname); + if (!fs) { + LOG_ERR("glfs_new failed"); + return NULL; + } + + ret = glfs_set_volfile(fs, volfile); + if (ret < 0) { + LOG_ERR("glfs_set_volfile failed"); + goto out; + } + + ret = glfs_set_logging(fs, logfile, 7); + if (ret < 0) { + LOG_ERR("glfs_set_logging failed"); + goto out; + } + + ret = glfs_init(fs); + if (ret < 0) { + LOG_ERR("glfs_init failed"); + goto out; + } + + ret = 0; +out: + if (ret) { + glfs_fini(fs); + fs = NULL; + } + + return fs; +} + +int +glfs_test_function(const char *hostname, const char *volname, + const char *volfile, const char *logfile) +{ + int ret = -1; + int flags = O_CREAT | O_RDWR; + glfs_t *fs = NULL; + glfs_fd_t *glfd = NULL; + const char *buff = "This is from my prog\n"; + const char *filename = "glfs_test.txt"; + + fs = init_glfs(hostname, volname, volfile, logfile); + if (fs == NULL) { + LOG_ERR("init_glfs failed"); + return -1; + } + + glfd = glfs_creat(fs, filename, flags, 0644); + if (glfd == NULL) { + LOG_ERR("glfs_creat failed"); + goto out; + } + + ret = glfs_write(glfd, buff, strlen(buff), flags); + if (ret < 0) { + LOG_ERR("glfs_write failed"); + goto out; + } + + ret = glfs_close(glfd); + if (ret < 0) { + LOG_ERR("glfs_write failed"); + goto out; + } + +out: + ret = glfs_fini(fs); + if (ret) { + LOG_ERR("glfs_fini failed"); + } + + return ret; +} + +int +main(int argc, char *argv[]) +{ + int ret = 0; + char *hostname = NULL; + char *volname = NULL; + char *volfile = NULL; + char *logfile = NULL; + + if (argc != 5) { + fprintf(stderr, "Invalid argument\n"); + exit(1); + } + + hostname = argv[1]; + volname = argv[2]; + volfile = argv[3]; + logfile = argv[4]; + + ret = glfs_test_function(hostname, volname, volfile, logfile); + if (ret) { + LOG_ERR("glfs_test_function failed"); + } + + return ret; +} diff --git a/tests/basic/gfapi/gfapi-ssl-load-volfile-test.t b/tests/basic/gfapi/gfapi-ssl-load-volfile-test.t new file mode 100755 index 00000000000..8e94df9d321 --- /dev/null +++ b/tests/basic/gfapi/gfapi-ssl-load-volfile-test.t @@ -0,0 +1,76 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../traps.rc +. $(dirname $0)/../../ssl.rc + +cleanup; + +sed -e "s,@@HOSTNAME@@,${H0},g" -e "s,@@BRICKPATH@@,${B0}/brick1,g" \ + -e "s,@@SSL@@,off,g" \ + $(dirname ${0})/protocol-client-ssl.vol.in \ + > $(dirname ${0})/protocol-client-ssl.vol + +TEST create_self_signed_certs + +TEST glusterd + +TEST $CLI volume create $V0 $H0:$B0/brick1; +EXPECT 'Created' volinfo_field $V0 'Status'; + +TEST $CLI volume start $V0; +EXPECT 'Started' volinfo_field $V0 'Status'; +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count + +logdir=`gluster --print-logdir` + +TEST build_tester $(dirname $0)/gfapi-ssl-load-volfile-test.c -lgfapi + +# Run test without I/O or management encryption +TEST $(dirname $0)/gfapi-ssl-load-volfile-test $H0 $V0 \ + $(dirname ${0})/protocol-client-ssl.vol \ + $logdir/gfapi-ssl-load-volfile-test.log + +# Enable management encryption +touch $GLUSTERD_WORKDIR/secure-access + +killall_gluster + +TEST glusterd +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count + +# Run test with management encryption (No I/O encryption) +TEST $(dirname $0)/gfapi-ssl-load-volfile-test $H0 $V0 \ + $(dirname ${0})/protocol-client-ssl.vol \ + $logdir/gfapi-ssl-load-volfile-test.log + +# Enable I/O encryption +TEST $CLI volume set $V0 server.ssl on + +killall_gluster + +sed -e "s,@@HOSTNAME@@,${H0},g" -e "s,@@BRICKPATH@@,${B0}/brick1,g" \ + -e "s,@@SSL@@,on,g" \ + $(dirname ${0})/protocol-client-ssl.vol.in \ + > $(dirname ${0})/protocol-client-ssl.vol + +TEST glusterd +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count + +# Run test without I/O or management encryption +TEST $(dirname $0)/gfapi-ssl-load-volfile-test $H0 $V0 \ + $(dirname ${0})/protocol-client-ssl.vol \ + $logdir/gfapi-ssl-load-volfile-test.log + +cleanup_tester $(dirname $0)/gfapi-ssl-load-volfile-test + +TEST $CLI volume stop $V0 +TEST $CLI volume delete $V0 + +cleanup; + +# NetBSD build scripts are not up to date therefore this test +# is failing in NetBSD. Therefore skipping the test in NetBSD +# as of now. +#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000 diff --git a/tests/basic/gfapi/protocol-client-ssl.vol.in b/tests/basic/gfapi/protocol-client-ssl.vol.in new file mode 100644 index 00000000000..cdc0c9d0671 --- /dev/null +++ b/tests/basic/gfapi/protocol-client-ssl.vol.in @@ -0,0 +1,15 @@ +# +# This .vol file expects that there is +# +# 1. GlusterD listening on @@HOSTNAME@@ +# 2. a volume that provides a brick on @@BRICKPATH@@ +# 3. the volume with the brick has been started +# +volume test + type protocol/client + option remote-host @@HOSTNAME@@ + option remote-subvolume @@BRICKPATH@@ + option transport-type socket + option transport.socket.ssl-enabled @@SSL@@ +end-volume + diff --git a/tests/basic/metadisp/fsyncdir.c b/tests/basic/metadisp/fsyncdir.c new file mode 100644 index 00000000000..62b532b9ce4 --- /dev/null +++ b/tests/basic/metadisp/fsyncdir.c @@ -0,0 +1,29 @@ +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include <fcntl.h> + +int +main(int argc, char **argv) +{ + int pfd; + + pfd = open(argv[1], O_RDONLY | O_DIRECTORY); + if (pfd == (-1)) { + perror("open"); + return EXIT_FAILURE; + } + + if (rename(argv[2], argv[3]) == (-1)) { + perror("rename"); + return EXIT_FAILURE; + } + + if (fsync(pfd) == (-1)) { + perror("fsync"); + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} diff --git a/tests/basic/metadisp/ftruncate.c b/tests/basic/metadisp/ftruncate.c new file mode 100644 index 00000000000..c9185212c31 --- /dev/null +++ b/tests/basic/metadisp/ftruncate.c @@ -0,0 +1,34 @@ +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include <fcntl.h> + +int +main(int argc, char **argv) +{ + int pfd; + + pfd = open(argv[1], O_RDWR); + if (pfd == (-1)) { + perror("open"); + return EXIT_FAILURE; + } + + if (ftruncate(pfd, 0) == (-1)) { + perror("ftruncate"); + return EXIT_FAILURE; + } + + if (write(pfd, "hello", 5) == (-1)) { + perror("write"); + return EXIT_FAILURE; + } + + if (fsync(pfd) == (-1)) { + perror("fsync"); + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} diff --git a/tests/basic/metadisp/fxattr.c b/tests/basic/metadisp/fxattr.c new file mode 100644 index 00000000000..e552057778a --- /dev/null +++ b/tests/basic/metadisp/fxattr.c @@ -0,0 +1,107 @@ +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include <fcntl.h> +#include <string.h> +#include <sys/types.h> +#include <sys/xattr.h> + +static char MY_XATTR[] = "user.fxtest"; +static char *PROGRAM; +#define CONSUME(v) \ + do { \ + if (!argc) { \ + fprintf(stderr, "missing argument\n"); \ + return EXIT_FAILURE; \ + } \ + v = argv[0]; \ + ++argv; \ + --argc; \ + } while (0) + +static int +do_get(int argc, char **argv, int fd) +{ + char *value; + int ret; + char buf[1024]; + + CONSUME(value); + + ret = fgetxattr(fd, MY_XATTR, buf, sizeof(buf)); + if (ret == (-1)) { + perror("fgetxattr"); + return EXIT_FAILURE; + } + + if (strncmp(buf, value, ret) != 0) { + fprintf(stderr, "data mismatch\n"); + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} + +static int +do_set(int argc, char **argv, int fd) +{ + char *value; + int ret; + + CONSUME(value); + + ret = fsetxattr(fd, MY_XATTR, value, strlen(value), 0); + if (ret == (-1)) { + perror("fsetxattr"); + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} + +static int +do_remove(int argc, char **argv, int fd) +{ + int ret; + + ret = fremovexattr(fd, MY_XATTR); + if (ret == (-1)) { + perror("femovexattr"); + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} + +int +main(int argc, char **argv) +{ + int fd; + char *path; + char *cmd; + + CONSUME(PROGRAM); + CONSUME(path); + CONSUME(cmd); + + fd = open(path, O_RDWR); + if (fd == (-1)) { + perror("open"); + return EXIT_FAILURE; + } + + if (strcmp(cmd, "get") == 0) { + return do_get(argc, argv, fd); + } + + if (strcmp(cmd, "set") == 0) { + return do_set(argc, argv, fd); + } + + if (strcmp(cmd, "remove") == 0) { + return do_remove(argc, argv, fd); + } + + return EXIT_SUCCESS; +} diff --git a/tests/basic/metadisp/gfs-fsetxattr.c b/tests/basic/metadisp/gfs-fsetxattr.c new file mode 100644 index 00000000000..63578bc528f --- /dev/null +++ b/tests/basic/metadisp/gfs-fsetxattr.c @@ -0,0 +1,141 @@ +#include <glusterfs/api/glfs.h> +#include <glusterfs/api/glfs-handles.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +int gfapi = 1; + +int +main(int argc, char *argv[]) +{ + glfs_t *fs = NULL; + int ret = 0; + int i = 0; + glfs_fd_t *fd = NULL; + char *topdir = "topdir", *filename = "file1"; + char *buf = NULL; + char *logfile = NULL; + char *hostname = NULL; + char *basename = NULL; + char *dir1 = NULL, *dir2 = NULL, *filename1 = NULL, *filename2 = NULL; + struct stat sb = { + 0, + }; + + if (argc != 5) { + fprintf( + stderr, + "Expect following args %s <hostname> <Vol> <log file> <basename>\n", + argv[0]); + return -1; + } + + hostname = argv[1]; + logfile = argv[3]; + basename = argv[4]; + + fs = glfs_new(argv[2]); + if (!fs) { + fprintf(stderr, "glfs_new: returned NULL (%s)\n", strerror(errno)); + return -1; + } + + ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007); + if (ret < 0) { + fprintf(stderr, "glfs_set_volfile_server failed ret:%d (%s)\n", ret, + strerror(errno)); + return -1; + } + + ret = glfs_set_logging(fs, logfile, 7); + if (ret < 0) { + fprintf(stderr, "glfs_set_logging failed with ret: %d (%s)\n", ret, + strerror(errno)); + return -1; + } + + ret = glfs_init(fs); + if (ret < 0) { + fprintf(stderr, "glfs_init failed with ret: %d (%s)\n", ret, + strerror(errno)); + return -1; + } + + ret = asprintf(&dir1, "%s-dir", basename); + if (ret < 0) { + fprintf(stderr, "cannot construct filename (%s)", strerror(errno)); + return ret; + } + + ret = glfs_mkdir(fs, dir1, 0755); + if (ret < 0) { + fprintf(stderr, "mkdir(%s): %s\n", dir1, strerror(errno)); + return -1; + } + + fd = glfs_opendir(fs, dir1); + if (!fd) { + fprintf(stderr, "/: %s\n", strerror(errno)); + return -1; + } + + ret = glfs_fsetxattr(fd, "user.dirfattr", "fsetxattr", 9, 0); + if (ret < 0) { + fprintf(stderr, "fsetxattr(%s): %d (%s)\n", dir1, ret, strerror(errno)); + return -1; + } + + ret = glfs_closedir(fd); + if (ret < 0) { + fprintf(stderr, "glfs_closedir failed with ret: %d (%s)\n", ret, + strerror(errno)); + return -1; + } + + ret = asprintf(&filename1, "%s-file", basename); + if (ret < 0) { + fprintf(stderr, "cannot construct filename (%s)", strerror(errno)); + return ret; + } + + ret = asprintf(&filename2, "%s-file-renamed", basename); + if (ret < 0) { + fprintf(stderr, "cannot construct filename (%s)", strerror(errno)); + return ret; + } + + fd = glfs_creat(fs, filename1, O_RDWR, 0644); + if (!fd) { + fprintf(stderr, "%s: (%p) %s\n", filename1, fd, strerror(errno)); + return -1; + } + + ret = glfs_rename(fs, filename1, filename2); + if (ret < 0) { + fprintf(stderr, "glfs_rename failed with ret: %d (%s)\n", ret, + strerror(errno)); + return -1; + } + + ret = glfs_lstat(fs, filename2, &sb); + if (ret < 0) { + fprintf(stderr, "glfs_lstat failed with ret: %d (%s)\n", ret, + strerror(errno)); + return -1; + } + + ret = glfs_fsetxattr(fd, "user.filefattr", "fsetxattr", 9, 0); + if (ret < 0) { + fprintf(stderr, "fsetxattr(%s): %d (%s)\n", dir1, ret, strerror(errno)); + return -1; + } + + ret = glfs_close(fd); + if (ret < 0) { + fprintf(stderr, "glfs_close failed with ret: %d (%s)\n", ret, + strerror(errno)); + return -1; + } +} diff --git a/tests/basic/metadisp/metadisp.t b/tests/basic/metadisp/metadisp.t new file mode 100644 index 00000000000..894ffe07226 --- /dev/null +++ b/tests/basic/metadisp/metadisp.t @@ -0,0 +1,316 @@ +#!/usr/bin/env bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + + +# Considering `--enable-metadisp` is an option for `./configure`, +# which is disabled by default, this test will never pass regression. +# But to see the value of this test, run below after configuring +# with above option : +# `prove -vmfe '/bin/bash' tests/basic/metadisp/metadisp.t` + +#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST + +cleanup; + +TEST mkdir -p $B0/b0/{0,1} + +TEST setfattr -n trusted.glusterfs.volume-id -v 0xddab9eece7b64a95b07351a1f748f56f ${B0}/b0/0 +TEST setfattr -n trusted.glusterfs.volume-id -v 0xddab9eece7b64a95b07351a1f748f56f ${B0}/b0/1 + +TEST $GFS --volfile=$(dirname $0)/metadisp.vol --volfile-id=$V0 $M0; + +NUM_FILES=40 +TEST touch $M0/{1..${NUM_FILES}} + +# each drive should get 40 files +TEST [ $(dir -1 $B0/b0/0/ | wc -l) -eq $NUM_FILES ] +TEST [ $(dir -1 $B0/b0/1/ | wc -l) -eq $NUM_FILES ] + +# now write some data to a file +echo "hello" > $M0/3 +filename=$$ +echo "hello" > /tmp/metadisp-write-${filename} +checksum=$(md5sum /tmp/metadisp-write-${filename} | awk '{print $1}') +TEST [ "$(md5sum $M0/3 | awk '{print $1}')" == "$checksum" ] + +# check that the backend file exists on b1 +gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/b0/*/3)) +TEST [ $(dir -1 $B0/b0/1/$gfid | wc -l) -eq 1 ] + +# check that the backend file matches the frontend +TEST [ "$(md5sum $B0/b0/1/$gfid | awk '{print $1}')" == "$checksum" ] + +# delete the file +TEST rm $M0/3 + +# ensure the frontend and backend files are cleaned up +TEST ! -e $M0/3 +TEST ! [ stat $B0/b*/*/$gfid ] + +# Test TRUNCATE + WRITE flow +echo "hello" | tee $M0/4 +echo "goo" | tee $M0/4 +filename=$$ +echo "goo" | tee /tmp/metadisp-truncate-${filename} +checksum=$(md5sum /tmp/metadisp-truncate-${filename} | awk '{print $1}') +TEST [ "$(md5sum $M0/4 | awk '{print $1}')" == "$checksum" ] + +# Test mkdir + rmdir. +TEST mkdir $M0/rmdir_me +nfiles=$(ls -d $B0/b*/*/rmdir_me 2> /dev/null | wc -l) +TEST [ "$nfiles" = "1" ] +TEST rmdir $M0/rmdir_me +nfiles=$(ls -d $B0/b*/*/rmdir_me 2> /dev/null | wc -l) +TEST [ "$nfiles" = "0" ] + +# Test rename. +TEST touch $M0/rename_me +nfiles=$(ls $B0/b*/*/rename_me 2> /dev/null | wc -l) +TEST [ "$nfiles" = "1" ] +nfiles=$(ls $B0/b*/*/such_rename 2> /dev/null | wc -l) +TEST [ "$nfiles" = "0" ] +TEST mv $M0/rename_me $M0/such_rename +nfiles=$(ls $B0/b*/*/rename_me 2> /dev/null | wc -l) +TEST [ "$nfiles" = "0" ] +nfiles=$(ls $B0/b*/*/such_rename 2> /dev/null | wc -l) +TEST [ "$nfiles" = "1" ] + +# Test rename of a file that doesn't exist. +TEST ! mv $M0/does-not-exist $M0/neither-does-this + + +# cleanup all the other files. +TEST rm -v $M0/1 $M0/2 $M0/{4..${NUM_FILES}} +TEST rm $M0/such_rename +TEST [ $(ls /d/backends/b0/0/ | wc -l) -eq 0 ] +TEST [ $(ls /d/backends/b0/1/ | wc -l) -eq 0 ] + +# Test CREATE flow +NUM_FILES=40 +TEST touch $M0/{1..${NUM_FILES}} +TEST [ $(ls /d/backends/b0/0/ | wc -l) -eq $NUM_FILES ] +TEST [ $(ls /d/backends/b0/1/ | wc -l) -eq $NUM_FILES ] + +# Test UNLINK flow +# No drives should have any files +TEST rm -v $M0/{1..${NUM_FILES}} +TEST [ $(ls /d/backends/b0/0/ | wc -l) -eq 0 ] +TEST [ $(ls /d/backends/b0/1/ | wc -l) -eq 0 ] + +# Test CREATE + WRITE + READ flow +filename=$$ +dd if=/dev/urandom of=/tmp/${filename} bs=1M count=10 +checksum=$(md5sum /tmp/${filename} | awk '{print $1}') +TEST cp -v /tmp/${filename} $M0/1 +TEST cp -v /tmp/${filename} $M0/2 +TEST cp -v /tmp/${filename} $M0/3 +TEST cp -v /tmp/${filename} $M0/4 +TEST [ "$(md5sum $M0/1 | awk '{print $1}')" == "$checksum" ] +TEST [ "$(md5sum $M0/2 | awk '{print $1}')" == "$checksum" ] +TEST [ "$(md5sum $M0/3 | awk '{print $1}')" == "$checksum" ] +TEST [ "$(md5sum $M0/4 | awk '{print $1}')" == "$checksum" ] + +# Test TRUNCATE + WRITE flow +TEST dd if=/dev/zero of=$M0/1 bs=1M count=20 + +# Check that readdir stats the files properly and we get the correct sizes +TEST [ $(find $M0 -size +9M | wc -l) -eq 4 ]; + +# Test mkdir + rmdir. +TEST mkdir $M0/rmdir_me +nfiles=$(ls -d $B0/b*/*/rmdir_me 2> /dev/null | wc -l) +TEST [ "$nfiles" = "1" ] +TEST rmdir $M0/rmdir_me +nfiles=$(ls -d $B0/b*/*/rmdir_me 2> /dev/null | wc -l) +TEST [ "$nfiles" = "0" ] + +# Test rename. +# Still flaky, so disabled until it can be debugged. +TEST touch $M0/rename_me +nfiles=$(ls $B0/b*/*/rename_me 2> /dev/null | wc -l) +TEST [ "$nfiles" = "1" ] +nfiles=$(ls $B0/b*/*/such_rename 2> /dev/null | wc -l) +TEST [ "$nfiles" = "0" ] +TEST mv $M0/rename_me $M0/such_rename +nfiles=$(ls $B0/b*/*/rename_me 2> /dev/null | wc -l) +TEST [ "$nfiles" = "0" ] +nfiles=$(ls $B0/b*/*/such_rename 2> /dev/null | wc -l) +TEST [ "$nfiles" = "1" ] + +# Test rename of a file that doesn't exist. +TEST ! mv $M0/does-not-exist $M0/neither-does-this + +# Test rename over an existing file. +ok=yes +for i in $(seq 0 9); do + echo foo > $M0/src$i + echo bar > $M0/dst$i +done +for i in $(seq 0 9); do + mv $M0/src$i $M0/dst$i +done +for i in $(seq 0 9); do + nfiles=$(cat $B0/b0/*/dst$i | wc -l) + if [ "$nfiles" = "2" ]; then + echo "COLLISION on dst$i" + (ls -l $B0/b0/*/dst$i; cat $B0/b0/*/dst$i) | sed "/^/s// /" + ok=no + fi +done +EXPECT "yes" echo $ok + +# Test rename of a directory. +count_copies () { + ls -d $B0/b?/?/$1 2> /dev/null | wc -l +} +TEST mkdir $M0/foo_dir +EXPECT 1 count_copies foo_dir +EXPECT 0 count_copies bar_dir +TEST mv $M0/foo_dir $M0/bar_dir +EXPECT 0 count_copies foo_dir +EXPECT 1 count_copies bar_dir + +for x in $(seq 0 99); do + touch $M0/target$x + ln -s $M0/target$x $M0/link$x +done +on_0=$(ls $B0/b*/0/link* | wc -l) +on_1=$(ls $B0/b*/1/link* | wc -l) +TEST [ "$on_0" -eq 100 ] +TEST [ "$on_1" -eq 0 ] +TEST [ "$(ls -l $M0/link* | wc -l)" = 100 ] + +# Test (hard) link. +_test_hardlink () { + local b + local has_src + local has_dst + local src_inum + local dst_inum + touch $M0/hardsrc$1 + ln $M0/hardsrc$1 $M0/harddst$1 + for b in $B0/b{0}/{0,1}; do + [ -f $b/hardsrc$1 ]; has_src=$? + [ -f $b/harddst$1 ]; has_dst=$? + if [ "$has_src" != "$has_dst" ]; then + echo "MISSING $b/hardxxx$1 $has_src $has_dst" + return + fi + if [ "$has_src$has_dst" = "00" ]; then + src_inum=$(stat -c '%i' $b/hardsrc$1) + dst_inum=$(stat -c '%i' $b/harddst$1) + if [ "$dst_inum" != "$src_inum" ]; then + echo "MISMATCH $b/hardxx$i $src_inum $dst_inum" + return + fi + fi + done + echo "OK" +} + +test_hardlink () { + local result=$(_test_hardlink $*) + # [ "$result" = "OK" ] || echo $result > /dev/tty + echo $result +} + +# Do this multiple times to make sure colocation isn't a fluke. +EXPECT "OK" test_hardlink 0 +EXPECT "OK" test_hardlink 1 +EXPECT "OK" test_hardlink 2 +EXPECT "OK" test_hardlink 3 +EXPECT "OK" test_hardlink 4 +EXPECT "OK" test_hardlink 5 +EXPECT "OK" test_hardlink 6 +EXPECT "OK" test_hardlink 7 +EXPECT "OK" test_hardlink 8 +EXPECT "OK" test_hardlink 9 + +# Test remove hardlink source. ensure deleting one file +# doesn't delete the data unless link-count is 1 +TEST mkdir $M0/hardlink +TEST touch $M0/hardlink/fileA +echo "data" >> $M0/hardlink/fileA +checksum=$(md5sum $M0/hardlink/fileA | awk '{print $1}') +TEST ln $M0/hardlink/fileA $M0/hardlink/fileB +TEST [ $(dir -1 $M0/hardlink/ | wc -l) -eq 2 ] +TEST rm $M0/hardlink/fileA +TEST [ $(dir -1 $M0/hardlink/ | wc -l) -eq 1 ] +TEST [ "$(md5sum $M0/hardlink/fileB | awk '{print $1}')" == "$checksum" ] + +# +# FIXME: statfs values look ok but the test is bad +# +# Test statfs. If we're doing it right, the numbers for the mountpoint should be +# double those for the brick filesystem times the number of bricks, +# but unless we're on a completely idle +# system (which never happens) the numbers can change even while this function +# runs and that would trip us up. Do a sloppy comparison to deal with that. +#compare_fields () { +# val1=$(df $1 | grep / | awk "{print \$$3}") +# val2=$(df $2 | grep / | awk "{print \$$3}") +# [ "$val2" -gt "$(((val1/(29/10))*19/10))" -a "$val2" -lt "$(((val1/(31/10))*21/10))" ] +#} + +#brick_df=$(df $B0 | grep /) +#mount_df=$(df $M0 | grep /) +#TEST compare_fields $B0 $M0 2 # Total blocks +#TEST compare_fields $B0 $M0 3 # Used +#TEST compare_fields $B0 $M0 4 # Available + +# Test removexattr. +#RXATTR_FILE=$(get_file_not_on_disk0 rxtest) +#TEST setfattr -n user.foo -v bar $M0/$RXATTR_FILE +#TEST getfattr -n user.foo $B0/b0/1/$RXATTR_FILE +#TEST setfattr -x user.foo $M0/$RXATTR_FILE +#TEST ! getfattr -n user.foo $B0/b0/1/$RXATTR_FILE + +# Test fsyncdir. We can't really test whether it's doing the right thing, +# but we can test that it doesn't fail and we can hand-check that it's calling +# down to all of the disks instead of just one. +# +# P.S. There's no fsyncdir test in the rest of Gluster, so who even knows if +# other translators are handling it correctly? + +#FSYNCDIR_EXE=$(dirname $0)/fsyncdir +#build_tester ${FSYNCDIR_EXE}.c +#TEST touch $M0/fsyncdir_src +#TEST $FSYNCDIR_EXE $M0 $M0/fsyncdir_src $M0/fsyncdir_dst +#TEST rm -f $FSYNCDIR_EXE + +# Test fsetxattr, fgetxattr, fremovexattr (in that order). +FXATTR_FILE=$M0/fxfile1 +TEST touch $FXATTR_FILE +FXATTR_EXE=$(dirname $0)/fxattr +build_tester ${FXATTR_EXE}.c +TEST ! getfattr -n user.fxtest $FXATTR_FILE +TEST $FXATTR_EXE $FXATTR_FILE set value1 +TEST getfattr -n user.fxtest $FXATTR_FILE +TEST setfattr -n user.fxtest -v value2 $FXATTR_FILE +TEST $FXATTR_EXE $FXATTR_FILE get value2 +TEST $FXATTR_EXE $FXATTR_FILE remove +TEST ! getfattr -n user.fxtest $FXATTR_FILE +TEST rm -f $FXATTR_EXE + +# Test ftruncate +FTRUNCATE_EXE=$(dirname $0)/ftruncate +build_tester ${FTRUNCATE_EXE}.c +FTRUNCATE_FILE=$M0/ftfile1 +TEST dd if=/dev/urandom of=$FTRUNCATE_FILE count=1 bs=1MB +TEST $FTRUNCATE_EXE $FTRUNCATE_FILE +#gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/b0/*/ftfile1)) + +# Test fallocate, discard, zerofill. Actually we don't so much check that these +# *work* as that they don't throw any errors (especially ENOENT because the +# file's not on disk zero). +FALLOC_FILE=fatest1 +TEST touch $M0/$FALLOC_FILE +TEST fallocate -l $((4096*5)) $M0/$FALLOC_FILE +TEST fallocate -p -o 4096 -l 4096 $M0/$FALLOC_FILE +# This actually fails with "operation not supported" on most filesystems, so +# don't leave it enabled except to test changes. +#TEST fallocate -z -o $((4096*3)) -l 4096 $M0/$FALLOC_FILE + +#cleanup; diff --git a/tests/basic/metadisp/metadisp.vol b/tests/basic/metadisp/metadisp.vol new file mode 100644 index 00000000000..58ae2f6f2a8 --- /dev/null +++ b/tests/basic/metadisp/metadisp.vol @@ -0,0 +1,14 @@ +volume posix-0 + type storage/posix + option directory /d/backends/b0/0 +end-volume + +volume posix-1 + type storage/posix + option directory /d/backends/b0/1 +end-volume + +volume metadisp-0 + type features/metadisp + subvolumes posix-0 posix-1 +end-volume diff --git a/tests/bugs/fuse/many-groups-for-acl.t b/tests/bugs/fuse/many-groups-for-acl.t index d959f750ee0..a51b1bc7267 100755 --- a/tests/bugs/fuse/many-groups-for-acl.t +++ b/tests/bugs/fuse/many-groups-for-acl.t @@ -38,6 +38,13 @@ do done TEST useradd -o -M -u ${NEW_UID} -g ${NEW_GID} -G ${NEW_USER}-${NEW_GIDS} ${NEW_USER} +# Linux < 3.8 exports only first 32 gids of pid to userspace +kernel_exports_few_gids=0 +if [ "$OSTYPE" = Linux ] && \ + su -m ${NEW_USER} -c "grep ^Groups: /proc/self/status | wc -w | xargs -I@ expr @ - 1 '<' $LAST_GID - $NEW_GID + 1" > /dev/null; then + kernel_exports_few_gids=1 +fi + # preparation done, start the tests TEST glusterd @@ -48,6 +55,8 @@ TEST $CLI volume set $V0 nfs.disable off TEST $CLI volume set ${V0} server.manage-gids off TEST $CLI volume start ${V0} +# This is just a synchronization hack to make sure the bricks are +# up before going on. EXPECT_WITHIN ${NFS_EXPORT_TIMEOUT} "1" is_nfs_export_available # mount the volume with POSIX ACL support, without --resolve-gids @@ -69,8 +78,8 @@ TEST [ $? -eq 0 ] su -m ${NEW_USER} -c "touch ${M0}/first-32-gids-2/success > /dev/null" TEST [ $? -eq 0 ] -su -m ${NEW_USER} -c "touch ${M0}/gid-64/failure > /dev/null" -TEST [ $? -ne 0 ] +su -m ${NEW_USER} -c "touch ${M0}/gid-64/success--if-all-gids-exported > /dev/null" +TEST [ $? -eq $kernel_exports_few_gids ] su -m ${NEW_USER} -c "touch ${M0}/gid-120/failure > /dev/null" TEST [ $? -ne 0 ] diff --git a/tests/bugs/glusterd/brick-order-check-add-brick.t b/tests/bugs/glusterd/brick-order-check-add-brick.t index 29f0ed1fe4e..0be31dac768 100644 --- a/tests/bugs/glusterd/brick-order-check-add-brick.t +++ b/tests/bugs/glusterd/brick-order-check-add-brick.t @@ -37,4 +37,25 @@ EXPECT '3 x 3 = 9' volinfo_field $V0 'Number of Bricks' TEST $CLI_1 volume add-brick $V0 $H1:$L1/${V0}_3 $H1:$L1/${V0}_4 $H1:$L1/${V0}_5 force EXPECT '4 x 3 = 12' volinfo_field $V0 'Number of Bricks' +TEST $CLI_1 volume stop $V0 +TEST $CLI_1 volume delete $V0 + +TEST $CLI_1 volume create $V0 replica 2 $H1:$L1/${V0}1 $H2:$L2/${V0}1 +EXPECT '1 x 2 = 2' volinfo_field $V0 'Number of Bricks' +EXPECT 'Created' volinfo_field $V0 'Status' + +TEST $CLI_1 volume start $V0 +EXPECT 'Started' volinfo_field $V0 'Status' + +#Add-brick with Increasing replica count +TEST $CLI_1 volume add-brick $V0 replica 3 $H3:$L3/${V0}1 +EXPECT '1 x 3 = 3' volinfo_field $V0 'Number of Bricks' + +#Add-brick with Increasing replica count from same host should fail +TEST ! $CLI_1 volume add-brick $V0 replica 5 $H1:$L1/${V0}2 $H1:$L1/${V0}3 + +#adding multiple bricks from same host should fail the brick order check +TEST ! $CLI_1 volume add-brick $V0 replica 3 $H1:$L1/${V0}{4..6} $H2:$L2/${V0}{7..9} +EXPECT '1 x 3 = 3' volinfo_field $V0 'Number of Bricks' + cleanup diff --git a/tests/bugs/replicate/bug-1744548-heal-timeout.t b/tests/bugs/replicate/bug-1744548-heal-timeout.t index c208112c8b0..011535066f9 100644 --- a/tests/bugs/replicate/bug-1744548-heal-timeout.t +++ b/tests/bugs/replicate/bug-1744548-heal-timeout.t @@ -25,14 +25,14 @@ TEST ! $CLI volume heal $V0 TEST $CLI volume profile $V0 start TEST $CLI volume profile $V0 info clear TEST $CLI volume heal $V0 enable -# Each brick does 3 opendirs, corresponding to dirty, xattrop and entry-changes -EXPECT_WITHIN $HEAL_TIMEOUT "^333$" get_cumulative_opendir_count +# Each brick does 4 opendirs, corresponding to dirty, xattrop and entry-changes, anonymous-inode +EXPECT_WITHIN 4 "^444$" get_cumulative_opendir_count # Check that a change in heal-timeout is honoured immediately. TEST $CLI volume set $V0 cluster.heal-timeout 5 sleep 10 # Two crawls must have happened. -EXPECT_WITHIN $HEAL_TIMEOUT "^999$" get_cumulative_opendir_count +EXPECT_WITHIN $CHILD_UP_TIMEOUT "^121212$" get_cumulative_opendir_count # shd must not heal if it is disabled and heal-timeout is changed. TEST $CLI volume heal $V0 disable diff --git a/tests/bugs/shard/issue-1425.t b/tests/bugs/shard/issue-1425.t new file mode 100644 index 00000000000..bbe82c0e5b2 --- /dev/null +++ b/tests/bugs/shard/issue-1425.t @@ -0,0 +1,45 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +cleanup; + +FILE_COUNT_TIME=5 + +function get_file_count { + ls $1* | wc -l +} + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 $H0:$B0/${V0}0 +TEST $CLI volume set $V0 features.shard on +TEST $CLI volume set $V0 features.shard-block-size 4MB +TEST $CLI volume start $V0 +TEST $CLI volume profile $V0 start + +TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0 + +TEST fallocate -l 20M $M0/foo +gfid_new=$(get_gfid_string $M0/foo) + +# Check for the base shard +TEST stat $M0/foo +TEST stat $B0/${V0}0/foo + +# There should be 4 associated shards +EXPECT_WITHIN $FILE_COUNT_TIME 4 get_file_count $B0/${V0}0/.shard/$gfid_new + +# There should be 1+4 shards and we expect 4 lookups less than on the build without this patch +EXPECT "21" echo `$CLI volume profile $V0 info incremental | grep -w LOOKUP | awk '{print $8}'` + +# Delete the base shard and check shards get cleaned up +TEST unlink $M0/foo + +TEST ! stat $M0/foo +TEST ! stat $B0/${V0}0/foo + +# There should be no shards now +EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/$gfid_new +cleanup diff --git a/tests/features/trash.t b/tests/features/trash.t index 472e909e567..da5b50bc85a 100755 --- a/tests/features/trash.t +++ b/tests/features/trash.t @@ -94,105 +94,105 @@ wildcard_not_exists() { if [ $? -eq 0 ]; then echo "Y"; else echo "N"; fi } -# testing glusterd [1-3] +# testing glusterd TEST glusterd TEST pidof glusterd TEST $CLI volume info -# creating distributed volume [4] +# creating distributed volume TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2} -# checking volume status [5-7] +# checking volume status EXPECT "$V0" volinfo_field $V0 'Volume Name' EXPECT 'Created' volinfo_field $V0 'Status' EXPECT '2' brick_count $V0 -# test without enabling trash translator [8] +# test without enabling trash translator TEST start_vol $V0 $M0 -# test on enabling trash translator [9-10] +# test on enabling trash translator TEST $CLI volume set $V0 features.trash on EXPECT 'on' volinfo_field $V0 'features.trash' -# files directly under mount point [11] +# files directly under mount point create_files $M0/file1 $M0/file2 TEST file_exists $V0 file1 file2 -# perform unlink [12] +# perform unlink TEST unlink_op file1 -# perform truncate [13] +# perform truncate TEST truncate_op file2 4 -# create files directory hierarchy and check [14] +# create files directory hierarchy and check mkdir -p $M0/1/2/3 create_files $M0/1/2/3/foo1 $M0/1/2/3/foo2 TEST file_exists $V0 1/2/3/foo1 1/2/3/foo2 -# perform unlink [15] +# perform unlink TEST unlink_op 1/2/3/foo1 -# perform truncate [16] +# perform truncate TEST truncate_op 1/2/3/foo2 4 # create a directory for eliminate pattern mkdir $M0/a -# set the eliminate pattern [17-18] +# set the eliminate pattern TEST $CLI volume set $V0 features.trash-eliminate-path /a EXPECT '/a' volinfo_field $V0 'features.trash-eliminate-path' -# create two files and check [19] +# create two files and check create_files $M0/a/test1 $M0/a/test2 TEST file_exists $V0 a/test1 a/test2 -# remove from eliminate pattern [20] +# remove from eliminate pattern rm -f $M0/a/test1 EXPECT "Y" wildcard_not_exists $M0/.trashcan/a/test1* -# truncate from eliminate path [21-23] +# truncate from eliminate path truncate -s 2 $M0/a/test2 TEST [ -e $M0/a/test2 ] TEST [ `ls -l $M0/a/test2 | awk '{print $5}'` -eq 2 ] EXPECT "Y" wildcard_not_exists $M0/.trashcan/a/test2* -# set internal op on [24-25] +# set internal op on TEST $CLI volume set $V0 features.trash-internal-op on EXPECT 'on' volinfo_field $V0 'features.trash-internal-op' -# again create two files and check [26] +# again create two files and check create_files $M0/inop1 $M0/inop2 TEST file_exists $V0 inop1 inop2 -# perform unlink [27] +# perform unlink TEST unlink_op inop1 -# perform truncate [28] +# perform truncate TEST truncate_op inop2 4 -# remove one brick and restart the volume [28-31] +# remove one brick and restart the volume TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 force EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 TEST $CLI volume stop $V0 TEST start_vol $V0 $M0 $M0/.trashcan -# again create two files and check [33] +# again create two files and check create_files $M0/rebal1 $M0/rebal2 TEST file_exists $V0 rebal1 rebal2 -# add one brick [34-35] +# add one brick TEST $CLI volume add-brick $V0 $H0:$B0/${V0}3 TEST [ -d $B0/${V0}3 ] -# perform rebalance [36] +# perform rebalance TEST $CLI volume rebalance $V0 start force EXPECT_WITHIN $REBALANCE_TIMEOUT "0" rebalance_completed #Find out which file was migrated to the new brick file_name=$(ls $B0/${V0}3/rebal*| xargs basename) -# check whether rebalance was succesful [37-40] +# check whether rebalance was succesful EXPECT "Y" wildcard_exists $B0/${V0}3/$file_name* EXPECT "Y" wildcard_exists $B0/${V0}1/.trashcan/internal_op/$file_name* @@ -201,52 +201,42 @@ EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 # force required in case rebalance is not over TEST $CLI volume stop $V0 force -# create a replicated volume [41] +# create a replicated volume TEST $CLI volume create $V1 replica 2 $H0:$B0/${V1}{1,2} -# checking volume status [42-45] +# checking volume status EXPECT "$V1" volinfo_field $V1 'Volume Name' EXPECT 'Replicate' volinfo_field $V1 'Type' EXPECT 'Created' volinfo_field $V1 'Status' EXPECT '2' brick_count $V1 -# enable trash with options and start the replicate volume by disabling automatic self-heal [46-50] +# enable trash with options and start the replicate volume by disabling automatic self-heal TEST $CLI volume set $V1 features.trash on TEST $CLI volume set $V1 features.trash-internal-op on EXPECT 'on' volinfo_field $V1 'features.trash' EXPECT 'on' volinfo_field $V1 'features.trash-internal-op' TEST start_vol $V1 $M1 $M1/.trashcan -# mount and check for trash directory [51] +# mount and check for trash directory TEST [ -d $M1/.trashcan/internal_op ] -# create a file and check [52] +# create a file and check touch $M1/self TEST [ -e $B0/${V1}1/self -a -e $B0/${V1}2/self ] -# kill one brick and delete the file from mount point [53-54] +# kill one brick and delete the file from mount point kill_brick $V1 $H0 $B0/${V1}1 EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "1" online_brick_count rm -f $M1/self EXPECT "Y" wildcard_exists $B0/${V1}2/.trashcan/self* -# force start the volume and trigger the self-heal manually [55-57] -TEST $CLI volume start $V1 force -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" online_brick_count -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status -# Since we created the file under root of the volume, it will be -# healed automatically - -# check for the removed file in trashcan [58] -EXPECT_WITHIN $HEAL_TIMEOUT "Y" wildcard_exists $B0/${V1}1/.trashcan/internal_op/self* - -# check renaming of trash directory through cli [59-62] +# check renaming of trash directory through cli TEST $CLI volume set $V0 trash-dir abc TEST start_vol $V0 $M0 $M0/abc TEST [ -e $M0/abc -a ! -e $M0/.trashcan ] EXPECT "Y" wildcard_exists $B0/${V0}1/abc/internal_op/rebal* -# ensure that rename and delete operation on trash directory fails [63-65] +# ensure that rename and delete operation on trash directory fails rm -rf $M0/abc/internal_op TEST [ -e $M0/abc/internal_op ] rm -rf $M0/abc/ diff --git a/tests/volume.rc b/tests/volume.rc index 3924baeb7fc..b38848c0e52 100644 --- a/tests/volume.rc +++ b/tests/volume.rc @@ -999,4 +999,4 @@ function logging_time_check() local logfile=`echo ${0##*/}`_glusterd1.log cat $logdir/1/$logfile | tail -n 2 | head -n 1 | grep $(date +%H:%M) | wc -l -}
\ No newline at end of file +} diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 8dbdb572abd..032ab5c8001 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -45,6 +45,41 @@ afr_quorum_errno(afr_private_t *priv) return ENOTCONN; } +gf_boolean_t +afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name, + pid_t pid) +{ + if (!__is_root_gfid(pargfid)) { + return _gf_false; + } + + if (strcmp(name, GF_REPLICATE_TRASH_DIR) == 0) { + /*For backward compatibility /.landfill is private*/ + return _gf_true; + } + + if (pid == GF_CLIENT_PID_GSYNCD) { + /*geo-rep needs to create/sync private directory on slave because + * it appears in changelog*/ + return _gf_false; + } + + if (pid == GF_CLIENT_PID_GLFS_HEAL || pid == GF_CLIENT_PID_SELF_HEALD) { + if (strcmp(name, priv->anon_inode_name) == 0) { + /* anonymous-inode dir is private*/ + return _gf_true; + } + } else { + if (strncmp(name, AFR_ANON_DIR_PREFIX, strlen(AFR_ANON_DIR_PREFIX)) == + 0) { + /* anonymous-inode dir prefix is private for geo-rep to work*/ + return _gf_true; + } + } + + return _gf_false; +} + void afr_fill_success_replies(afr_local_t *local, afr_private_t *priv, unsigned char *replies) @@ -1197,12 +1232,11 @@ afr_inode_get_readable(call_frame_t *frame, inode_t *inode, xlator_t *this, return 0; } -int +static int afr_inode_split_brain_choice_get(inode_t *inode, xlator_t *this, int *spb_choice) { int ret = -1; - GF_VALIDATE_OR_GOTO(this->name, inode, out); LOCK(&inode->lock); @@ -1214,6 +1248,40 @@ out: return ret; } +/* + * frame is used to get the favourite policy. Since + * afr_inode_split_brain_choice_get was called with afr_open, it is possible to + * have a frame with out local->replies. So in that case, frame is passed as + * null, hence this function will handle the frame NULL case. + */ +int +afr_split_brain_read_subvol_get(inode_t *inode, xlator_t *this, + call_frame_t *frame, int *spb_subvol) +{ + int ret = -1; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + GF_VALIDATE_OR_GOTO("afr", this, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + GF_VALIDATE_OR_GOTO(this->name, inode, out); + GF_VALIDATE_OR_GOTO(this->name, spb_subvol, out); + + priv = this->private; + + ret = afr_inode_split_brain_choice_get(inode, this, spb_subvol); + if (*spb_subvol < 0 && priv->fav_child_policy && frame && frame->local) { + local = frame->local; + *spb_subvol = afr_sh_get_fav_by_policy(this, local->replies, inode, + NULL); + if (*spb_subvol >= 0) { + ret = 0; + } + } + +out: + return ret; +} int afr_inode_read_subvol_set(inode_t *inode, xlator_t *this, unsigned char *data, unsigned char *metadata, int event) @@ -2238,8 +2306,9 @@ afr_hash_child(afr_read_subvol_args_t *args, afr_private_t *priv, * need is a low probability that multiple clients * won't converge on the same subvolume. */ + gf_uuid_copy(gfid_copy, args->gfid); pid = getpid(); - memcpy(gfid_copy, &pid, sizeof(pid)); + *(pid_t *)gfid_copy ^= pid; } child = SuperFastHash((char *)gfid_copy, sizeof(gfid_copy)) % priv->child_count; @@ -2823,7 +2892,7 @@ afr_attempt_readsubvol_set(call_frame_t *frame, xlator_t *this, { afr_private_t *priv = NULL; afr_local_t *local = NULL; - int spb_choice = -1; + int spb_subvol = -1; int child_count = -1; if (*read_subvol != -1) @@ -2833,10 +2902,10 @@ afr_attempt_readsubvol_set(call_frame_t *frame, xlator_t *this, local = frame->local; child_count = priv->child_count; - afr_inode_split_brain_choice_get(local->inode, this, &spb_choice); - if ((spb_choice >= 0) && + afr_split_brain_read_subvol_get(local->inode, this, frame, &spb_subvol); + if ((spb_subvol >= 0) && (AFR_COUNT(success_replies, child_count) == child_count)) { - *read_subvol = spb_choice; + *read_subvol = spb_subvol; } else if (!priv->quorum_count || frame->root->pid == GF_CLIENT_PID_GLFS_HEAL) { *read_subvol = afr_first_up_child(frame, this); @@ -3635,7 +3704,7 @@ afr_ta_id_file_check(void *opaque) this = opaque; priv = this->private; - ret = afr_fill_ta_loc(this, &loc); + ret = afr_fill_ta_loc(this, &loc, _gf_false); if (ret) { gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, "Failed to populate thin-arbiter loc for: %s.", loc.name); @@ -3945,11 +4014,10 @@ afr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) return 0; } - if (__is_root_gfid(loc->parent->gfid)) { - if (!strcmp(loc->name, GF_REPLICATE_TRASH_DIR)) { - op_errno = EPERM; - goto out; - } + if (afr_is_private_directory(this->private, loc->parent->gfid, loc->name, + frame->root->pid)) { + op_errno = EPERM; + goto out; } local = AFR_FRAME_INIT(frame, op_errno); @@ -5627,6 +5695,7 @@ afr_priv_dump(xlator_t *this) priv->background_self_heal_count); gf_proc_dump_write("healers", "%d", priv->healers); gf_proc_dump_write("read-hash-mode", "%d", priv->hash_mode); + gf_proc_dump_write("use-anonymous-inode", "%d", priv->use_anon_inode); if (priv->quorum_count == AFR_QUORUM_AUTO) { gf_proc_dump_write("quorum-type", "auto"); } else if (priv->quorum_count == 0) { @@ -6620,6 +6689,7 @@ afr_priv_destroy(afr_private_t *priv) GF_FREE(priv->local); GF_FREE(priv->pending_key); GF_FREE(priv->children); + GF_FREE(priv->anon_inode); GF_FREE(priv->child_up); GF_FREE(priv->halo_child_up); GF_FREE(priv->child_latency); diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c index f69013f3e0a..f8bf8340dab 100644 --- a/xlators/cluster/afr/src/afr-dir-read.c +++ b/xlators/cluster/afr/src/afr-dir-read.c @@ -164,8 +164,8 @@ afr_validate_read_subvol(inode_t *inode, xlator_t *this, int par_read_subvol) } static void -afr_readdir_transform_entries(gf_dirent_t *subvol_entries, int subvol, - gf_dirent_t *entries, fd_t *fd) +afr_readdir_transform_entries(call_frame_t *frame, gf_dirent_t *subvol_entries, + int subvol, gf_dirent_t *entries, fd_t *fd) { int ret = -1; gf_dirent_t *entry = NULL; @@ -183,8 +183,8 @@ afr_readdir_transform_entries(gf_dirent_t *subvol_entries, int subvol, list_for_each_entry_safe(entry, tmp, &subvol_entries->list, list) { - if (__is_root_gfid(fd->inode->gfid) && - !strcmp(entry->d_name, GF_REPLICATE_TRASH_DIR)) { + if (afr_is_private_directory(priv, fd->inode->gfid, entry->d_name, + frame->root->pid)) { continue; } @@ -228,8 +228,8 @@ afr_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, } if (op_ret >= 0) - afr_readdir_transform_entries(subvol_entries, (long)cookie, &entries, - local->fd); + afr_readdir_transform_entries(frame, subvol_entries, (long)cookie, + &entries, local->fd); AFR_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, xdata); diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c index a5b004f4258..64856042b65 100644 --- a/xlators/cluster/afr/src/afr-open.c +++ b/xlators/cluster/afr/src/afr-open.c @@ -137,7 +137,7 @@ afr_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, { afr_private_t *priv = NULL; afr_local_t *local = NULL; - int spb_choice = 0; + int spb_subvol = 0; int event_generation = 0; int ret = 0; int32_t op_errno = 0; @@ -179,9 +179,9 @@ afr_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, ret = afr_inode_get_readable(frame, local->inode, this, NULL, &event_generation, AFR_DATA_TRANSACTION); if ((ret < 0) && - (afr_inode_split_brain_choice_get(local->inode, this, &spb_choice) == - 0) && - spb_choice < 0) { + (afr_split_brain_read_subvol_get(local->inode, this, NULL, + &spb_subvol) == 0) && + spb_subvol < 0) { afr_inode_refresh(frame, this, local->inode, local->inode->gfid, afr_open_continue); } else { diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c index 772b59f9a2f..6fc2c75145c 100644 --- a/xlators/cluster/afr/src/afr-read-txn.c +++ b/xlators/cluster/afr/src/afr-read-txn.c @@ -164,7 +164,7 @@ afr_ta_read_txn(void *opaque) xdata_rsp = NULL; /* It doesn't. So query thin-arbiter to see if it blames any data brick. */ - ret = afr_fill_ta_loc(this, &loc); + ret = afr_fill_ta_loc(this, &loc, _gf_true); if (ret) { gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, "Failed to populate thin-arbiter loc for: %s.", loc.name); @@ -272,7 +272,7 @@ afr_read_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err) int read_subvol = -1; inode_t *inode = NULL; int ret = -1; - int spb_choice = -1; + int spb_subvol = -1; local = frame->local; inode = local->inode; @@ -303,9 +303,9 @@ afr_read_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err) local->read_attempted[read_subvol] = 1; readfn: if (read_subvol == -1) { - ret = afr_inode_split_brain_choice_get(inode, this, &spb_choice); - if ((ret == 0) && spb_choice >= 0) - read_subvol = spb_choice; + ret = afr_split_brain_read_subvol_get(inode, this, frame, &spb_subvol); + if ((ret == 0) && spb_subvol >= 0) + read_subvol = spb_subvol; } if (read_subvol == -1) { diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index f35c41df274..a580a1584cc 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -2750,3 +2750,185 @@ afr_choose_source_by_policy(afr_private_t *priv, unsigned char *sources, out: return source; } + +static int +afr_anon_inode_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + afr_local_t *local = frame->local; + int i = (long)cookie; + + local->replies[i].valid = 1; + local->replies[i].op_ret = op_ret; + local->replies[i].op_errno = op_errno; + if (op_ret == 0) { + local->op_ret = 0; + local->replies[i].poststat = *buf; + local->replies[i].preparent = *preparent; + local->replies[i].postparent = *postparent; + } + if (xdata) { + local->replies[i].xdata = dict_ref(xdata); + } + + syncbarrier_wake(&local->barrier); + return 0; +} + +int +afr_anon_inode_create(xlator_t *this, int child, inode_t **linked_inode) +{ + call_frame_t *frame = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = this->private; + unsigned char *mkdir_on = alloca0(priv->child_count); + unsigned char *lookup_on = alloca0(priv->child_count); + loc_t loc = {0}; + int32_t op_errno = 0; + int32_t child_op_errno = 0; + struct iatt iatt = {0}; + dict_t *xdata = NULL; + uuid_t anon_inode_gfid = {0}; + int mkdir_count = 0; + int i = 0; + + /*Try to mkdir everywhere and return success if the dir exists on 'child' + */ + + if (!priv->use_anon_inode) { + op_errno = EINVAL; + goto out; + } + + frame = afr_frame_create(this, &op_errno); + if (op_errno) { + goto out; + } + local = frame->local; + if (!local->child_up[child]) { + /*Other bricks may need mkdir so don't error out yet*/ + child_op_errno = ENOTCONN; + } + gf_uuid_parse(priv->anon_gfid_str, anon_inode_gfid); + for (i = 0; i < priv->child_count; i++) { + if (!local->child_up[i]) + continue; + + if (priv->anon_inode[i]) { + mkdir_on[i] = 0; + } else { + mkdir_on[i] = 1; + mkdir_count++; + } + } + + if (mkdir_count == 0) { + *linked_inode = inode_find(this->itable, anon_inode_gfid); + if (*linked_inode) { + op_errno = 0; + goto out; + } + } + + loc.parent = inode_ref(this->itable->root); + loc.name = priv->anon_inode_name; + loc.inode = inode_new(this->itable); + if (!loc.inode) { + op_errno = ENOMEM; + goto out; + } + + xdata = dict_new(); + if (!xdata) { + op_errno = ENOMEM; + goto out; + } + + op_errno = -dict_set_gfuuid(xdata, "gfid-req", anon_inode_gfid, _gf_true); + if (op_errno) { + goto out; + } + + if (mkdir_count == 0) { + memcpy(lookup_on, local->child_up, priv->child_count); + goto lookup; + } + + AFR_ONLIST(mkdir_on, frame, afr_anon_inode_mkdir_cbk, mkdir, &loc, 0755, 0, + xdata); + + for (i = 0; i < priv->child_count; i++) { + if (!mkdir_on[i]) { + continue; + } + + if (local->replies[i].op_ret == 0) { + priv->anon_inode[i] = 1; + iatt = local->replies[i].poststat; + } else if (local->replies[i].op_ret < 0 && + local->replies[i].op_errno == EEXIST) { + lookup_on[i] = 1; + } else if (i == child) { + child_op_errno = local->replies[i].op_errno; + } + } + + if (AFR_COUNT(lookup_on, priv->child_count) == 0) { + goto link; + } + +lookup: + AFR_ONLIST(lookup_on, frame, afr_selfheal_discover_cbk, lookup, &loc, + xdata); + for (i = 0; i < priv->child_count; i++) { + if (!lookup_on[i]) { + continue; + } + + if (local->replies[i].op_ret == 0) { + if (gf_uuid_compare(anon_inode_gfid, + local->replies[i].poststat.ia_gfid) == 0) { + priv->anon_inode[i] = 1; + iatt = local->replies[i].poststat; + } else { + if (i == child) + child_op_errno = EINVAL; + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_DATA, + "%s has gfid: %s", priv->anon_inode_name, + uuid_utoa(local->replies[i].poststat.ia_gfid)); + } + } else if (i == child) { + child_op_errno = local->replies[i].op_errno; + } + } +link: + if (!gf_uuid_is_null(iatt.ia_gfid)) { + *linked_inode = inode_link(loc.inode, loc.parent, loc.name, &iatt); + if (*linked_inode) { + op_errno = 0; + inode_lookup(*linked_inode); + } else { + op_errno = ENOMEM; + } + goto out; + } + +out: + if (xdata) + dict_unref(xdata); + loc_wipe(&loc); + /*child_op_errno takes precedence*/ + if (child_op_errno == 0) { + child_op_errno = op_errno; + } + + if (child_op_errno && *linked_inode) { + inode_unref(*linked_inode); + *linked_inode = NULL; + } + if (frame) + AFR_STACK_DESTROY(frame); + return -child_op_errno; +} diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index ac31751997f..64893f441e3 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -16,54 +16,170 @@ #include <glusterfs/syncop-utils.h> #include <glusterfs/events.h> -static int -afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name, - inode_t *inode, int child, struct afr_reply *replies) +int +afr_selfheal_entry_anon_inode(xlator_t *this, inode_t *dir, const char *name, + inode_t *inode, int child, + struct afr_reply *replies, + gf_boolean_t *anon_inode) { afr_private_t *priv = NULL; + afr_local_t *local = NULL; xlator_t *subvol = NULL; int ret = 0; + int i = 0; + char g[64] = {0}; + unsigned char *lookup_success = NULL; + call_frame_t *frame = NULL; + loc_t loc2 = { + 0, + }; loc_t loc = { 0, }; - char g[64]; priv = this->private; - subvol = priv->children[child]; + lookup_success = alloca0(priv->child_count); + uuid_utoa_r(replies[child].poststat.ia_gfid, g); + loc.inode = inode_new(inode->table); + if (!loc.inode) { + ret = -ENOMEM; + goto out; + } + + if (replies[child].poststat.ia_type == IA_IFDIR) { + /* This directory may have sub-directory hierarchy which may need to + * be preserved for subsequent heals. So unconditionally move the + * directory to anonymous-inode directory*/ + *anon_inode = _gf_true; + goto anon_inode; + } + + frame = afr_frame_create(this, &ret); + if (!frame) { + ret = -ret; + goto out; + } + local = frame->local; + gf_uuid_copy(loc.gfid, replies[child].poststat.ia_gfid); + AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, &loc, + NULL); + for (i = 0; i < priv->child_count; i++) { + if (local->replies[i].op_ret == 0) { + lookup_success[i] = 1; + } else if (local->replies[i].op_errno != ENOENT && + local->replies[i].op_errno != ESTALE) { + ret = -local->replies[i].op_errno; + } + } + + if (priv->quorum_count) { + if (afr_has_quorum(lookup_success, this, NULL)) { + *anon_inode = _gf_true; + } + } else if (AFR_COUNT(lookup_success, priv->child_count) > 1) { + *anon_inode = _gf_true; + } else if (ret) { + goto out; + } + +anon_inode: + if (!*anon_inode) { + ret = 0; + goto out; + } loc.parent = inode_ref(dir); gf_uuid_copy(loc.pargfid, dir->gfid); loc.name = name; - loc.inode = inode_ref(inode); - if (replies[child].valid && replies[child].op_ret == 0) { - switch (replies[child].poststat.ia_type) { - case IA_IFDIR: - gf_msg(this->name, GF_LOG_WARNING, 0, - AFR_MSG_EXPUNGING_FILE_OR_DIR, - "expunging dir %s/%s (%s) on %s", uuid_utoa(dir->gfid), - name, uuid_utoa_r(replies[child].poststat.ia_gfid, g), - subvol->name); - ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL); - break; - default: - gf_msg(this->name, GF_LOG_WARNING, 0, - AFR_MSG_EXPUNGING_FILE_OR_DIR, - "expunging file %s/%s (%s) on %s", uuid_utoa(dir->gfid), - name, uuid_utoa_r(replies[child].poststat.ia_gfid, g), - subvol->name); - ret = syncop_unlink(subvol, &loc, NULL, NULL); - break; - } + ret = afr_anon_inode_create(this, child, &loc2.parent); + if (ret < 0) + goto out; + + loc2.name = g; + ret = syncop_rename(subvol, &loc, &loc2, NULL, NULL); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_EXPUNGING_FILE_OR_DIR, + "Rename to %s dir %s/%s (%s) on %s failed", + priv->anon_inode_name, uuid_utoa(dir->gfid), name, g, + subvol->name); + } else { + gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR, + "Rename to %s dir %s/%s (%s) on %s successful", + priv->anon_inode_name, uuid_utoa(dir->gfid), name, g, + subvol->name); } +out: loc_wipe(&loc); + loc_wipe(&loc2); + if (frame) { + AFR_STACK_DESTROY(frame); + } return ret; } int +afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name, + inode_t *inode, int child, struct afr_reply *replies) +{ + char g[64] = {0}; + afr_private_t *priv = NULL; + xlator_t *subvol = NULL; + int ret = 0; + loc_t loc = { + 0, + }; + gf_boolean_t anon_inode = _gf_false; + + priv = this->private; + subvol = priv->children[child]; + + if ((!replies[child].valid) || (replies[child].op_ret < 0)) { + /*Nothing to do*/ + ret = 0; + goto out; + } + + if (priv->use_anon_inode) { + ret = afr_selfheal_entry_anon_inode(this, dir, name, inode, child, + replies, &anon_inode); + if (ret < 0 || anon_inode) + goto out; + } + + loc.parent = inode_ref(dir); + loc.inode = inode_new(inode->table); + if (!loc.inode) { + ret = -ENOMEM; + goto out; + } + loc.name = name; + switch (replies[child].poststat.ia_type) { + case IA_IFDIR: + gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR, + "expunging dir %s/%s (%s) on %s", uuid_utoa(dir->gfid), name, + uuid_utoa_r(replies[child].poststat.ia_gfid, g), + subvol->name); + ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL); + break; + default: + gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR, + "expunging file %s/%s (%s) on %s", uuid_utoa(dir->gfid), + name, uuid_utoa_r(replies[child].poststat.ia_gfid, g), + subvol->name); + ret = syncop_unlink(subvol, &loc, NULL, NULL); + break; + } + +out: + loc_wipe(&loc); + return ret; +} + +int afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source, unsigned char *sources, inode_t *dir, const char *name, inode_t *inode, @@ -76,6 +192,9 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source, loc_t srcloc = { 0, }; + loc_t anonloc = { + 0, + }; xlator_t *this = frame->this; afr_private_t *priv = NULL; dict_t *xdata = NULL; @@ -86,15 +205,17 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source, 0, }; unsigned char *newentry = NULL; - char dir_uuid_str[64] = {0}, iatt_uuid_str[64] = {0}; + char iatt_uuid_str[64] = {0}; + char dir_uuid_str[64] = {0}; priv = this->private; iatt = &replies[source].poststat; + uuid_utoa_r(iatt->ia_gfid, iatt_uuid_str); if (iatt->ia_type == IA_INVAL || gf_uuid_is_null(iatt->ia_gfid)) { gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SELF_HEAL_FAILED, "Invalid ia_type (%d) or gfid(%s). source brick=%d, " "pargfid=%s, name=%s", - iatt->ia_type, uuid_utoa_r(iatt->ia_gfid, iatt_uuid_str), source, + iatt->ia_type, iatt_uuid_str, source, uuid_utoa_r(dir->gfid, dir_uuid_str), name); ret = -EINVAL; goto out; @@ -120,14 +241,24 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source, srcloc.inode = inode_ref(inode); gf_uuid_copy(srcloc.gfid, iatt->ia_gfid); - if (iatt->ia_type != IA_IFDIR) - ret = syncop_lookup(priv->children[dst], &srcloc, 0, 0, 0, 0); - if (iatt->ia_type == IA_IFDIR || ret == -ENOENT || ret == -ESTALE) { + ret = syncop_lookup(priv->children[dst], &srcloc, 0, 0, 0, 0); + if (ret == -ENOENT || ret == -ESTALE) { newentry[dst] = 1; ret = afr_selfheal_newentry_mark(frame, this, inode, source, replies, sources, newentry); if (ret) goto out; + } else if (ret == 0 && iatt->ia_type == IA_IFDIR && priv->use_anon_inode) { + // Try rename from hidden directory + ret = afr_anon_inode_create(this, dst, &anonloc.parent); + if (ret < 0) + goto out; + anonloc.inode = inode_ref(inode); + anonloc.name = iatt_uuid_str; + ret = syncop_rename(priv->children[dst], &anonloc, &loc, NULL, NULL); + if (ret == -ENOENT || ret == -ESTALE) + ret = -1; /*This sets 'mismatch' to true*/ + goto out; } mode = st_mode_from_ia(iatt->ia_prot, iatt->ia_type); @@ -166,6 +297,7 @@ out: GF_FREE(linkname); loc_wipe(&loc); loc_wipe(&srcloc); + loc_wipe(&anonloc); return ret; } @@ -578,6 +710,11 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd, priv = this->private; + if (afr_is_private_directory(priv, fd->inode->gfid, name, + GF_CLIENT_PID_SELF_HEALD)) { + return 0; + } + xattr = dict_new(); if (!xattr) return -ENOMEM; @@ -626,7 +763,7 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd, replies); if ((ret == 0) && (priv->esh_granular) && parent_idx_inode) { - ret = afr_shd_index_purge(subvol, parent_idx_inode, name, + ret = afr_shd_entry_purge(subvol, parent_idx_inode, name, inode->ia_type); /* Why is ret force-set to 0? We do not care about * index purge failing for full heal as it is quite @@ -756,10 +893,6 @@ afr_selfheal_entry_do_subvol(call_frame_t *frame, xlator_t *this, fd_t *fd, if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) continue; - if (__is_root_gfid(fd->inode->gfid) && - !strcmp(entry->d_name, GF_REPLICATE_TRASH_DIR)) - continue; - ret = afr_selfheal_entry_dirent(iter_frame, this, fd, entry->d_name, loc.inode, subvol, local->need_full_crawl); @@ -822,7 +955,7 @@ afr_selfheal_entry_granular_dirent(xlator_t *subvol, gf_dirent_t *entry, /* The name indices under the pgfid index dir are guaranteed * to be regular files. Hence the hardcoding. */ - afr_shd_index_purge(subvol, parent->inode, entry->d_name, IA_IFREG); + afr_shd_entry_purge(subvol, parent->inode, entry->d_name, IA_IFREG); ret = 0; goto out; } diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c index dd40c57ab12..834aac86d48 100644 --- a/xlators/cluster/afr/src/afr-self-heal-name.c +++ b/xlators/cluster/afr/src/afr-self-heal-name.c @@ -98,21 +98,12 @@ __afr_selfheal_name_expunge(xlator_t *this, inode_t *parent, uuid_t pargfid, const char *bname, inode_t *inode, struct afr_reply *replies) { - loc_t loc = { - 0, - }; int i = 0; afr_private_t *priv = NULL; - char g[64]; int ret = 0; priv = this->private; - loc.parent = inode_ref(parent); - gf_uuid_copy(loc.pargfid, pargfid); - loc.name = bname; - loc.inode = inode_ref(inode); - for (i = 0; i < priv->child_count; i++) { if (!replies[i].valid) continue; @@ -120,30 +111,10 @@ __afr_selfheal_name_expunge(xlator_t *this, inode_t *parent, uuid_t pargfid, if (replies[i].op_ret) continue; - switch (replies[i].poststat.ia_type) { - case IA_IFDIR: - gf_msg(this->name, GF_LOG_WARNING, 0, - AFR_MSG_EXPUNGING_FILE_OR_DIR, - "expunging dir %s/%s (%s) on %s", uuid_utoa(pargfid), - bname, uuid_utoa_r(replies[i].poststat.ia_gfid, g), - priv->children[i]->name); - - ret |= syncop_rmdir(priv->children[i], &loc, 1, NULL, NULL); - break; - default: - gf_msg(this->name, GF_LOG_WARNING, 0, - AFR_MSG_EXPUNGING_FILE_OR_DIR, - "expunging file %s/%s (%s) on %s", uuid_utoa(pargfid), - bname, uuid_utoa_r(replies[i].poststat.ia_gfid, g), - priv->children[i]->name); - - ret |= syncop_unlink(priv->children[i], &loc, NULL, NULL); - break; - } + ret |= afr_selfheal_entry_delete(this, parent, bname, inode, i, + replies); } - loc_wipe(&loc); - return ret; } diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h index 7a038fa7fe3..48e6dbcfb18 100644 --- a/xlators/cluster/afr/src/afr-self-heal.h +++ b/xlators/cluster/afr/src/afr-self-heal.h @@ -369,4 +369,9 @@ gf_boolean_t afr_is_file_empty_on_all_children(afr_private_t *priv, struct afr_reply *replies); +int +afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name, + inode_t *inode, int child, struct afr_reply *replies); +int +afr_anon_inode_create(xlator_t *this, int child, inode_t **linked_inode); #endif /* !_AFR_SELFHEAL_H */ diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c index 2219a53b277..109fd4b7421 100644 --- a/xlators/cluster/afr/src/afr-self-heald.c +++ b/xlators/cluster/afr/src/afr-self-heald.c @@ -94,7 +94,7 @@ __afr_shd_healer_wait(struct subvol_healer *healer) priv = healer->this->private; disabled_loop: - wait_till.tv_sec = time(NULL) + priv->shd.timeout; + wait_till.tv_sec = gf_time() + priv->shd.timeout; while (!healer->rerun) { ret = pthread_cond_timedwait(&healer->cond, &healer->mutex, &wait_till); @@ -222,7 +222,7 @@ out: } int -afr_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name, +afr_shd_entry_purge(xlator_t *subvol, inode_t *inode, char *name, ia_type_t type) { int ret = 0; @@ -371,7 +371,7 @@ afr_shd_sweep_prepare(struct subvol_healer *healer) event->split_brain_count = 0; event->heal_failed_count = 0; - time(&event->start_time); + event->start_time = gf_time(); event->end_time = 0; _mask_cancellation(); } @@ -386,7 +386,7 @@ afr_shd_sweep_done(struct subvol_healer *healer) event = &healer->crawl_event; shd = &(((afr_private_t *)healer->this->private)->shd); - time(&event->end_time); + event->end_time = gf_time(); history = gf_memdup(event, sizeof(*event)); event->start_time = 0; @@ -424,7 +424,7 @@ afr_shd_index_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, ret = afr_shd_selfheal(healer, healer->subvol, gfid); if (ret == -ENOENT || ret == -ESTALE) - afr_shd_index_purge(subvol, parent->inode, entry->d_name, val); + afr_shd_entry_purge(subvol, parent->inode, entry->d_name, val); if (ret == 2) /* If bricks crashed in pre-op after creating indices/xattrop @@ -843,6 +843,176 @@ out: return need_heal; } +static int +afr_shd_anon_inode_cleaner(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, + void *data) +{ + struct subvol_healer *healer = data; + afr_private_t *priv = healer->this->private; + call_frame_t *frame = NULL; + afr_local_t *local = NULL; + int ret = 0; + loc_t loc = {0}; + int count = 0; + int i = 0; + int op_errno = 0; + struct iatt *iatt = NULL; + gf_boolean_t multiple_links = _gf_false; + unsigned char *gfid_present = alloca0(priv->child_count); + unsigned char *entry_present = alloca0(priv->child_count); + char *type = "file"; + + frame = afr_frame_create(healer->this, &ret); + if (!frame) { + ret = -ret; + goto out; + } + local = frame->local; + if (AFR_COUNT(local->child_up, priv->child_count) != priv->child_count) { + gf_msg_debug(healer->this->name, 0, + "Not all bricks are up. Skipping " + "cleanup of %s on %s", + entry->d_name, subvol->name); + ret = 0; + goto out; + } + + loc.inode = inode_new(parent->inode->table); + if (!loc.inode) { + ret = -ENOMEM; + goto out; + } + ret = gf_uuid_parse(entry->d_name, loc.gfid); + if (ret) { + ret = 0; + goto out; + } + AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, &loc, + NULL); + for (i = 0; i < priv->child_count; i++) { + if (local->replies[i].op_ret == 0) { + count++; + gfid_present[i] = 1; + iatt = &local->replies[i].poststat; + if (iatt->ia_type == IA_IFDIR) { + type = "dir"; + } + + if (i == healer->subvol) { + if (local->replies[i].poststat.ia_nlink > 1) { + multiple_links = _gf_true; + } + } + } else if (local->replies[i].op_errno != ENOENT && + local->replies[i].op_errno != ESTALE) { + /*We don't have complete view. Skip the entry*/ + gf_msg_debug(healer->this->name, local->replies[i].op_errno, + "Skipping cleanup of %s on %s", entry->d_name, + subvol->name); + ret = 0; + goto out; + } + } + + /*Inode is deleted from subvol*/ + if (count == 1 || (iatt->ia_type != IA_IFDIR && multiple_links)) { + gf_msg(healer->this->name, GF_LOG_WARNING, 0, + AFR_MSG_EXPUNGING_FILE_OR_DIR, "expunging %s %s/%s on %s", type, + priv->anon_inode_name, entry->d_name, subvol->name); + ret = afr_shd_entry_purge(subvol, parent->inode, entry->d_name, + iatt->ia_type); + if (ret == -ENOENT || ret == -ESTALE) + ret = 0; + } else if (count > 1) { + loc_wipe(&loc); + loc.parent = inode_ref(parent->inode); + loc.name = entry->d_name; + loc.inode = inode_new(parent->inode->table); + if (!loc.inode) { + ret = -ENOMEM; + goto out; + } + AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, + &loc, NULL); + count = 0; + for (i = 0; i < priv->child_count; i++) { + if (local->replies[i].op_ret == 0) { + count++; + entry_present[i] = 1; + iatt = &local->replies[i].poststat; + } else if (local->replies[i].op_errno != ENOENT && + local->replies[i].op_errno != ESTALE) { + /*We don't have complete view. Skip the entry*/ + gf_msg_debug(healer->this->name, local->replies[i].op_errno, + "Skipping cleanup of %s on %s", entry->d_name, + subvol->name); + ret = 0; + goto out; + } + } + for (i = 0; i < priv->child_count; i++) { + if (gfid_present[i] && !entry_present[i]) { + /*Entry is not anonymous on at least one subvol*/ + gf_msg_debug(healer->this->name, 0, + "Valid entry present on %s " + "Skipping cleanup of %s on %s", + priv->children[i]->name, entry->d_name, + subvol->name); + ret = 0; + goto out; + } + } + + gf_msg(healer->this->name, GF_LOG_WARNING, 0, + AFR_MSG_EXPUNGING_FILE_OR_DIR, + "expunging %s %s/%s on all subvols", type, priv->anon_inode_name, + entry->d_name); + ret = 0; + for (i = 0; i < priv->child_count; i++) { + op_errno = -afr_shd_entry_purge(priv->children[i], loc.parent, + entry->d_name, iatt->ia_type); + if (op_errno != ENOENT && op_errno != ESTALE) { + ret |= -op_errno; + } + } + } + +out: + if (frame) + AFR_STACK_DESTROY(frame); + loc_wipe(&loc); + return ret; +} + +static void +afr_cleanup_anon_inode_dir(struct subvol_healer *healer) +{ + int ret = 0; + call_frame_t *frame = NULL; + afr_private_t *priv = healer->this->private; + loc_t loc = {0}; + + ret = afr_anon_inode_create(healer->this, healer->subvol, &loc.inode); + if (ret) + goto out; + + frame = afr_frame_create(healer->this, &ret); + if (!frame) { + ret = -ret; + goto out; + } + + ret = syncop_mt_dir_scan(frame, priv->children[healer->subvol], &loc, + GF_CLIENT_PID_SELF_HEALD, healer, + afr_shd_anon_inode_cleaner, NULL, + priv->shd.max_threads, priv->shd.wait_qlength); +out: + if (frame) + AFR_STACK_DESTROY(frame); + loc_wipe(&loc); + return; +} + void * afr_shd_index_healer(void *data) { @@ -900,6 +1070,10 @@ afr_shd_index_healer(void *data) sleep(1); } while (ret > 0); + if (ret == 0) { + afr_cleanup_anon_inode_dir(healer); + } + if (ret == 0 && pre_crawl_xdata && !healer->crawl_event.heal_failed_count) { afr_shd_ta_check_and_unset_xattrs(this, &loc, healer, diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h index 687c28e6472..18db728ea7b 100644 --- a/xlators/cluster/afr/src/afr-self-heald.h +++ b/xlators/cluster/afr/src/afr-self-heald.h @@ -70,6 +70,6 @@ afr_shd_gfid_to_path(xlator_t *this, xlator_t *subvol, uuid_t gfid, char **path_p); int -afr_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name, +afr_shd_entry_purge(xlator_t *subvol, inode_t *inode, char *name, ia_type_t type); #endif /* !_AFR_SELF_HEALD_H */ diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index 67c3e0699e6..a51f79b1f43 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -124,9 +124,9 @@ afr_release_notify_lock_for_ta(void *opaque) this = (xlator_t *)opaque; priv = this->private; - ret = afr_fill_ta_loc(this, &loc); + ret = afr_fill_ta_loc(this, &loc, _gf_true); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_THIN_ARB, + gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, "Failed to populate loc for thin-arbiter."); goto out; } @@ -1029,7 +1029,7 @@ set_response: } int -afr_fill_ta_loc(xlator_t *this, loc_t *loc) +afr_fill_ta_loc(xlator_t *this, loc_t *loc, gf_boolean_t is_gfid_based_fop) { afr_private_t *priv = NULL; @@ -1037,6 +1037,11 @@ afr_fill_ta_loc(xlator_t *this, loc_t *loc) loc->parent = inode_ref(priv->root_inode); gf_uuid_copy(loc->pargfid, loc->parent->gfid); loc->name = priv->pending_key[THIN_ARBITER_BRICK_INDEX]; + if (is_gfid_based_fop && gf_uuid_is_null(priv->ta_gfid)) { + /* Except afr_ta_id_file_check() which is path based, all other gluster + * FOPS need gfid.*/ + return -EINVAL; + } gf_uuid_copy(loc->gfid, priv->ta_gfid); loc->inode = inode_new(loc->parent->table); if (!loc->inode) { @@ -1046,86 +1051,6 @@ afr_fill_ta_loc(xlator_t *this, loc_t *loc) return 0; } -int -afr_changelog_thin_arbiter_post_op(xlator_t *this, afr_local_t *local) -{ - int ret = 0; - afr_private_t *priv = NULL; - dict_t *xattr = NULL; - int failed_count = 0; - struct gf_flock flock = { - 0, - }; - loc_t loc = { - 0, - }; - int i = 0; - - priv = this->private; - if (!priv->thin_arbiter_count) - return 0; - - failed_count = AFR_COUNT(local->transaction.failed_subvols, - priv->child_count); - if (!failed_count) - return 0; - - GF_ASSERT(failed_count == 1); - ret = afr_fill_ta_loc(this, &loc); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, - "Failed to populate thin-arbiter loc for: %s.", loc.name); - goto out; - } - - xattr = dict_new(); - if (!xattr) { - ret = -ENOMEM; - goto out; - } - for (i = 0; i < priv->child_count; i++) { - ret = dict_set_static_bin(xattr, priv->pending_key[i], - local->pending[i], - AFR_NUM_CHANGE_LOGS * sizeof(int)); - if (ret) - goto out; - } - - flock.l_type = F_WRLCK; - flock.l_start = 0; - flock.l_len = 0; - - /*TODO: Convert to two domain locking. */ - ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX], - AFR_TA_DOM_NOTIFY, &loc, F_SETLKW, &flock, NULL, NULL); - if (ret) - goto out; - - ret = syncop_xattrop(priv->children[THIN_ARBITER_BRICK_INDEX], &loc, - GF_XATTROP_ADD_ARRAY, xattr, NULL, NULL, NULL); - - if (ret == -EINVAL) { - gf_msg(this->name, GF_LOG_INFO, -ret, AFR_MSG_THIN_ARB, - "Thin-arbiter has denied post-op on %s for gfid %s.", - priv->pending_key[THIN_ARBITER_BRICK_INDEX], - uuid_utoa(local->inode->gfid)); - - } else if (ret) { - gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, - "Post-op on thin-arbiter id file %s failed for gfid %s.", - priv->pending_key[THIN_ARBITER_BRICK_INDEX], - uuid_utoa(local->inode->gfid)); - } - flock.l_type = F_UNLCK; - syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX], AFR_TA_DOM_NOTIFY, - &loc, F_SETLK, &flock, NULL, NULL); -out: - if (xattr) - dict_unref(xattr); - - return ret; -} - static int afr_ta_post_op_done(int ret, call_frame_t *frame, void *opaque) { @@ -1220,9 +1145,9 @@ afr_ta_post_op_do(void *opaque) this = local->transaction.frame->this; priv = this->private; - ret = afr_fill_ta_loc(this, &loc); + ret = afr_fill_ta_loc(this, &loc, _gf_true); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_THIN_ARB, + gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, "Failed to populate loc for thin-arbiter."); goto out; } diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index 67e0a4d10be..df7366f0a65 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -135,6 +135,27 @@ set_data_self_heal_algorithm(afr_private_t *priv, char *algo) } } +void +afr_handle_anon_inode_options(afr_private_t *priv, dict_t *options) +{ + char *volfile_id_str = NULL; + uuid_t anon_inode_gfid = {0}; + + /*If volume id is not present don't enable anything*/ + if (dict_get_str(options, "volume-id", &volfile_id_str)) + return; + GF_ASSERT(strlen(AFR_ANON_DIR_PREFIX) + strlen(volfile_id_str) <= NAME_MAX); + /*anon_inode_name is not supposed to change once assigned*/ + if (!priv->anon_inode_name[0]) { + snprintf(priv->anon_inode_name, sizeof(priv->anon_inode_name), "%s-%s", + AFR_ANON_DIR_PREFIX, volfile_id_str); + gf_uuid_parse(volfile_id_str, anon_inode_gfid); + /*Flip a bit to make sure volfile-id and anon-gfid are not same*/ + anon_inode_gfid[0] ^= 1; + uuid_utoa_r(anon_inode_gfid, priv->anon_gfid_str); + } +} + int reconfigure(xlator_t *this, dict_t *options) { @@ -290,6 +311,10 @@ reconfigure(xlator_t *this, dict_t *options) consistent_io = _gf_false; priv->consistent_io = consistent_io; + afr_handle_anon_inode_options(priv, options); + + GF_OPTION_RECONF("use-anonymous-inode", priv->use_anon_inode, options, bool, + out); if (priv->shd.enabled) { if ((priv->shd.enabled != enabled_old) || (timeout_old != priv->shd.timeout)) @@ -541,7 +566,9 @@ init(xlator_t *this) GF_OPTION_INIT("consistent-metadata", priv->consistent_metadata, bool, out); GF_OPTION_INIT("consistent-io", priv->consistent_io, bool, out); + afr_handle_anon_inode_options(priv, this->options); + GF_OPTION_INIT("use-anonymous-inode", priv->use_anon_inode, bool, out); if (priv->quorum_count != 0) priv->consistent_io = _gf_false; @@ -553,6 +580,9 @@ init(xlator_t *this) goto out; } + priv->anon_inode = GF_CALLOC(sizeof(unsigned char), child_count, + gf_afr_mt_char); + priv->child_up = GF_CALLOC(sizeof(unsigned char), child_count, gf_afr_mt_char); @@ -561,7 +591,8 @@ init(xlator_t *this) priv->halo_child_up = GF_CALLOC(sizeof(unsigned char), child_count, gf_afr_mt_char); - if (!priv->child_up || !priv->child_latency || !priv->halo_child_up) { + if (!priv->child_up || !priv->child_latency || !priv->halo_child_up || + !priv->anon_inode) { ret = -ENOMEM; goto out; } @@ -1286,6 +1317,14 @@ struct volume_options options[] = { .tags = {"replicate"}, .description = "This option exists only for backward compatibility " "and configuring it doesn't have any effect"}, + {.key = {"use-anonymous-inode"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "no", + .op_version = {GD_OP_VERSION_8_0}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE, + .tags = {"replicate"}, + .description = "Setting this option heals directory renames efficiently"}, + {.key = {NULL}}, }; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 1fff5640940..d62f9a9caf2 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -42,6 +42,7 @@ #define AFR_LK_HEAL_DOM "afr.lock-heal.domain" #define AFR_HALO_MAX_LATENCY 99999 +#define AFR_ANON_DIR_PREFIX ".glusterfs-anonymous-inode" #define PFLAG_PENDING (1 << 0) #define PFLAG_SBRAIN (1 << 1) @@ -190,6 +191,7 @@ typedef struct _afr_private { struct list_head ta_waitq; struct list_head ta_onwireq; + unsigned char *anon_inode; unsigned char *child_up; unsigned char *halo_child_up; int64_t *child_latency; @@ -275,10 +277,15 @@ typedef struct _afr_private { gf_boolean_t esh_granular; gf_boolean_t consistent_io; gf_boolean_t data_self_heal; /* on/off */ + gf_boolean_t use_anon_inode; /*For lock healing.*/ struct list_head saved_locks; struct list_head lk_healq; + + /*For anon-inode handling */ + char anon_inode_name[NAME_MAX + 1]; + char anon_gfid_str[UUID_SIZE + 1]; } afr_private_t; typedef enum { @@ -1271,8 +1278,8 @@ int afr_inode_split_brain_choice_set(inode_t *inode, xlator_t *this, int spb_choice); int -afr_inode_split_brain_choice_get(inode_t *inode, xlator_t *this, - int *spb_choice); +afr_split_brain_read_subvol_get(inode_t *inode, xlator_t *this, + call_frame_t *frame, int *spb_subvol); int afr_get_child_index_from_name(xlator_t *this, char *name); @@ -1357,7 +1364,7 @@ int afr_set_inode_local(xlator_t *this, afr_local_t *local, inode_t *inode); int -afr_fill_ta_loc(xlator_t *this, loc_t *loc); +afr_fill_ta_loc(xlator_t *this, loc_t *loc, gf_boolean_t is_gfid_based_fop); int afr_ta_post_op_lock(xlator_t *this, loc_t *loc); @@ -1409,4 +1416,8 @@ afr_dom_lock_release(call_frame_t *frame); void afr_fill_success_replies(afr_local_t *local, afr_private_t *priv, unsigned char *replies); + +gf_boolean_t +afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name, + pid_t pid); #endif /* __AFR_H__ */ diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index be92236e3bd..8ba0cc4c732 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -382,7 +382,7 @@ out: /* Code to save hashed subvol on inode ctx as a mds subvol */ -static int +int dht_inode_ctx_mdsvol_set(inode_t *inode, xlator_t *this, xlator_t *mds_subvol) { dht_inode_ctx_t *ctx = NULL; @@ -2161,31 +2161,18 @@ static int dht_fill_dict_to_avoid_unlink_of_migrating_file(dict_t *dict) { int ret = 0; - xlator_t *this = NULL; - char *linktoskip_key = NULL; - this = THIS; - GF_VALIDATE_OR_GOTO("dht", this, err); - - if (dht_is_tier_xlator(this)) - linktoskip_key = TIER_SKIP_NON_LINKTO_UNLINK; - else - linktoskip_key = DHT_SKIP_NON_LINKTO_UNLINK; - - ret = dict_set_int32(dict, linktoskip_key, 1); + ret = dict_set_int32_sizen(dict, DHT_SKIP_NON_LINKTO_UNLINK, 1); if (ret) - goto err; + return -1; - ret = dict_set_int32(dict, DHT_SKIP_OPEN_FD_UNLINK, 1); + ret = dict_set_int32_sizen(dict, DHT_SKIP_OPEN_FD_UNLINK, 1); if (ret) - goto err; + return -1; return 0; - -err: - return -1; } static int32_t @@ -4314,6 +4301,8 @@ dht_find_local_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this, index = conf->local_subvols_cnt; uuid_list_copy = gf_strdup(uuid_list); + if (!uuid_list_copy) + goto unlock; for (uuid_str = strtok_r(uuid_list, " ", &saveptr); uuid_str; uuid_str = next_uuid_str) { @@ -4604,18 +4593,8 @@ dht_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, dict_del(xattr, conf->xattr_name); dict_del(xattr, conf->mds_xattr_key); - /* filter out following two xattrs that need not - * be visible on the mount point for geo-rep - - * trusted.tier.fix.layout.complete and - * trusted.tier.tier-dht.commithash - */ - dict_del(xattr, conf->commithash_xattr_name); - if (frame->root->pid >= 0 && dht_is_tier_xlator(this)) { - dict_del(xattr, GF_XATTR_TIER_LAYOUT_FIXED_KEY); - } - if (frame->root->pid >= 0) { GF_REMOVE_INTERNAL_XATTR("trusted.glusterfs.quota*", xattr); GF_REMOVE_INTERNAL_XATTR("trusted.pgfid*", xattr); @@ -5893,22 +5872,7 @@ dht_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr, if (local->rebalance.target_node) { local->flags = forced_rebalance; - /* Flag to suggest its a tiering migration - * The reason for this dic key-value is that - * promotions and demotions are multithreaded - * so the original frame from gf_defrag_start() - * is not carried. A new frame will be created when - * we do syncop_setxattr(). This does not have the - * frame->root->pid of the original frame. So we pass - * this dic key-value when we do syncop_setxattr() to do - * data migration and set the frame->root->pid to - * GF_CLIENT_PID_TIER_DEFRAG in dht_setxattr() just before - * calling dht_start_rebalance_task() */ - tmp = dict_get(xattr, TIERING_MIGRATION_KEY); - if (tmp) - frame->root->pid = GF_CLIENT_PID_TIER_DEFRAG; - else - frame->root->pid = GF_CLIENT_PID_DEFRAG; + frame->root->pid = GF_CLIENT_PID_DEFRAG; ret = dht_start_rebalance_task(this, frame); if (!ret) @@ -6720,10 +6684,9 @@ dht_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, layout = local->layout; - /* We have seen crashes in while running "rm -rf" on tier volumes - when the layout was NULL on the hot tier. This will skip the - entries on the subvol without a layout, hence preventing the crash - but rmdir might fail with "directory not empty" errors*/ + /* This will skip the entries on the subvol without a layout, + * hence preventing the crash but rmdir might fail with + * "directory not empty" errors*/ if (layout == NULL) goto done; @@ -10850,23 +10813,17 @@ dht_notify(xlator_t *this, int event, void *data, ...) int had_heard_from_all = 0; int have_heard_from_all = 0; - struct timeval time = { - 0, - }; gf_defrag_info_t *defrag = NULL; dict_t *dict = NULL; gf_defrag_type cmd = 0; dict_t *output = NULL; va_list ap; - dht_methods_t *methods = NULL; struct gf_upcall *up_data = NULL; struct gf_upcall_cache_invalidation *up_ci = NULL; conf = this->private; GF_VALIDATE_OR_GOTO(this->name, conf, out); - methods = &(conf->methods); - /* had all subvolumes reported status once till now? */ had_heard_from_all = 1; for (i = 0; i < conf->subvolume_cnt; i++) { @@ -10896,12 +10853,11 @@ dht_notify(xlator_t *this, int event, void *data, ...) break; } - gettimeofday(&time, NULL); LOCK(&conf->subvolume_lock); { conf->subvolume_status[cnt] = 1; conf->last_event[cnt] = event; - conf->subvol_up_time[cnt] = time.tv_sec; + conf->subvol_up_time[cnt] = gf_time(); } UNLOCK(&conf->subvolume_lock); @@ -11090,15 +11046,13 @@ dht_notify(xlator_t *this, int event, void *data, ...) * thread has already started. */ if (conf->defrag && !run_defrag) { - if (methods->migration_needed(this)) { - run_defrag = 1; - ret = gf_thread_create(&conf->defrag->th, NULL, gf_defrag_start, - this, "dhtdg"); - if (ret) { - GF_FREE(conf->defrag); - conf->defrag = NULL; - kill(getpid(), SIGTERM); - } + run_defrag = 1; + ret = gf_thread_create(&conf->defrag->th, NULL, gf_defrag_start, + this, "dhtdg"); + if (ret) { + GF_FREE(conf->defrag); + conf->defrag = NULL; + kill(getpid(), SIGTERM); } } } @@ -11243,28 +11197,6 @@ out: return ret; } -int32_t -dht_migration_needed(xlator_t *this) -{ - gf_defrag_info_t *defrag = NULL; - dht_conf_t *conf = NULL; - int ret = 0; - - conf = this->private; - - GF_VALIDATE_OR_GOTO("dht", conf, out); - GF_VALIDATE_OR_GOTO("dht", conf->defrag, out); - - defrag = conf->defrag; - - if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) && - (defrag->cmd != GF_DEFRAG_CMD_START_DETACH_TIER)) - ret = 1; - -out: - return ret; -} - /* This function should not be called more then once during a FOP handling path. It is valid only for for ops on files @@ -11299,14 +11231,6 @@ dht_set_local_rebalance(xlator_t *this, dht_local_t *local, struct iatt *stbuf, return 0; } -gf_boolean_t -dht_is_tier_xlator(xlator_t *this) -{ - if (strcmp(this->type, "cluster/tier") == 0) - return _gf_true; - return _gf_false; -} - int32_t dht_release(xlator_t *this, fd_t *fd) { @@ -11465,117 +11389,3 @@ dht_dir_layout_error_check(xlator_t *this, inode_t *inode) /* Returning the first xlator error as all xlators have errors */ return layout->list[0].err; } - -/* Get brick paths from all the local subvols and store for use. - * - * TODO: Make sure newly added brick is not picked for migration. - * Otherwise there will be no rebalance as directory entries won't be present - * on a newly added brick */ -int -dht_get_brick_paths(xlator_t *this, dht_conf_t *conf, loc_t *loc) -{ - dict_t *dict = NULL; - gf_defrag_info_t *defrag = conf->defrag; - char *key = NULL; - char *tmp = NULL; - char *str = NULL; - char *token; - char *saveptr = NULL; - int i = 1; - int j = 0; - int ret = 0; - - key = gf_strdup("glusterfs.pathinfo"); - if (!key) { - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0, - "failed to allocate " - "memory"); - ret = -1; - goto out; - } - - defrag->local_brick_paths = GF_CALLOC(conf->local_subvols_cnt, - sizeof(*defrag->local_brick_paths), - gf_common_mt_pointer); - - for (j = 0; j < conf->local_subvols_cnt; j++) { - ret = syncop_getxattr(conf->local_subvols[j], loc, &dict, key, NULL, - NULL); - if (ret == -1) { - gf_msg(this->name, GF_LOG_WARNING, 0, 0, - "failed to get path," - " errno %d", - ret); - /* TODO: We need not break out from here and can resume operation. - * We need a place holder in gf_defrag_info_t to mark which - * local_brick_paths we are working on. Right now, we blindly - * take defrag->local_brick_path[0]. This can be dynamic based on - * need */ - goto out; - } - - str = NULL; - ret = dict_get_str(dict, key, &str); - if (ret != 0) { - gf_msg(this->name, GF_LOG_ERROR, -ret, 0, "dict get failed for :%s", - key); - goto out; - } - if (str == NULL) { - gf_msg(this->name, GF_LOG_ERROR, 0, 0, "key:%s not found", key); - ret = -1; - goto out; - } - - if (!defrag->is_pure_distribute) { - tmp = strstr(str, "REPLICATE"); - if (tmp) { - defrag->is_pure_distribute = _gf_false; - break; - } - - /*TODO: fetching glusterfs.pathinfo on erasure volume is failing. - *Function the old way till we get it resolved */ - tmp = strstr(str, "ERASURE"); - if (tmp) { - defrag->is_pure_distribute = _gf_false; - break; - } - - defrag->is_pure_distribute = _gf_true; - } - - saveptr = NULL; - - for (token = strtok_r(str, ":", &saveptr), i = 1; token;) { - token = strtok_r(NULL, ":", &saveptr); - i++; - if (i == 3) { - token = strtok_r(token, ">", &saveptr); - break; - } else { - continue; - } - } - - defrag->local_brick_paths[j] = gf_strdup(token); - } - -out: - if (ret == -1) { - gf_msg(this->name, GF_LOG_INFO, 0, 0, - "failed to get brick path. " - "Will operate old way"); - for (j = 0; j < conf->local_subvols_cnt; j++) { - GF_FREE(defrag->local_brick_paths[j]); - } - defrag->is_pure_distribute = _gf_false; - } - - if (defrag->is_pure_distribute) { - gf_msg(this->name, GF_LOG_INFO, 0, 0, "volume type : pure distribute"); - } - - GF_FREE(key); - return ret; -} diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index 84891406c71..fe0dc3db34a 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -24,7 +24,6 @@ #define _DHT_H #define GF_XATTR_FIX_LAYOUT_KEY "distribute.fix.layout" -#define GF_XATTR_TIER_LAYOUT_FIXED_KEY "trusted.tier.fix.layout.complete" #define GF_XATTR_FILE_MIGRATE_KEY "trusted.distribute.migrate-data" #define DHT_MDS_STR "mds" #define GF_DHT_LOOKUP_UNHASHED_OFF 0 @@ -36,7 +35,6 @@ #define DHT_LAYOUT_HEAL_DOMAIN "dht.layout.heal" /* Namespace synchronization */ #define DHT_ENTRY_SYNC_DOMAIN "dht.entry.sync" -#define TIERING_MIGRATION_KEY "tiering.migration" #define DHT_LAYOUT_HASH_INVALID 1 #define MAX_REBAL_THREADS sysconf(_SC_NPROCESSORS_ONLN) @@ -242,19 +240,6 @@ typedef gf_boolean_t (*dht_need_heal_t)(call_frame_t *frame, dht_layout_t **inmem, dht_layout_t **ondisk); -typedef struct { - uint64_t blocks_used; - uint64_t pblocks_used; - uint64_t files_used; - uint64_t pfiles_used; - uint64_t unhashed_blocks_used; - uint64_t unhashed_pblocks_used; - uint64_t unhashed_files_used; - uint64_t unhashed_pfiles_used; - uint64_t unhashed_fsid; - uint64_t hashed_fsid; -} tier_statvfs_t; - struct dht_local { loc_t loc; loc_t loc2; @@ -272,7 +257,6 @@ struct dht_local { struct iatt preparent; struct iatt postparent; struct statvfs statvfs; - tier_statvfs_t tier_statvfs; fd_t *fd; inode_t *inode; dict_t *params; @@ -405,14 +389,7 @@ enum gf_defrag_type { GF_DEFRAG_CMD_STATUS = 1 + 2, GF_DEFRAG_CMD_START_LAYOUT_FIX = 1 + 3, GF_DEFRAG_CMD_START_FORCE = 1 + 4, - GF_DEFRAG_CMD_START_TIER = 1 + 5, - GF_DEFRAG_CMD_STATUS_TIER = 1 + 6, - GF_DEFRAG_CMD_START_DETACH_TIER = 1 + 7, - GF_DEFRAG_CMD_STOP_DETACH_TIER = 1 + 8, - GF_DEFRAG_CMD_PAUSE_TIER = 1 + 9, - GF_DEFRAG_CMD_RESUME_TIER = 1 + 10, GF_DEFRAG_CMD_DETACH_STATUS = 1 + 11, - GF_DEFRAG_CMD_STOP_TIER = 1 + 12, GF_DEFRAG_CMD_DETACH_START = 1 + 13, GF_DEFRAG_CMD_DETACH_COMMIT = 1 + 14, GF_DEFRAG_CMD_DETACH_COMMIT_FORCE = 1 + 15, @@ -463,75 +440,6 @@ struct dht_container { int local_subvol_index; }; -typedef enum tier_mode_ { - TIER_MODE_NONE = 0, - TIER_MODE_TEST, - TIER_MODE_WM -} tier_mode_t; - -typedef enum tier_pause_state_ { - TIER_RUNNING = 0, - TIER_REQUEST_PAUSE, - TIER_PAUSED -} tier_pause_state_t; - -/* This Structure is only used in tiering fixlayout */ -typedef struct gf_tier_fix_layout_arg { - xlator_t *this; - dict_t *fix_layout; - pthread_t thread_id; -} gf_tier_fix_layout_arg_t; - -typedef struct gf_tier_conf { - int is_tier; - int watermark_hi; - int watermark_low; - int watermark_last; - unsigned long block_size; - fsblkcnt_t blocks_total; - fsblkcnt_t blocks_used; - uint64_t max_migrate_bytes; - int max_migrate_files; - int query_limit; - tier_mode_t mode; - int percent_full; - /* These flags are only used for tier-compact */ - gf_boolean_t compact_active; - /* These 3 flags are set to true when the client changes the */ - /* compaction mode on the command line. */ - /* When they are set, the daemon will trigger compaction as */ - /* soon as possible to activate or deactivate compaction. */ - /* If in the middle of a compaction, then the switches take */ - /* effect on the next compaction, not the current one. */ - /* If the user switches it off, we want to avoid needless */ - /* compactions. */ - /* If the user switches it on, they want to compact as soon */ - /* as possible. */ - gf_boolean_t compact_mode_switched; - gf_boolean_t compact_mode_switched_hot; - gf_boolean_t compact_mode_switched_cold; - int tier_max_promote_size; - int tier_promote_frequency; - int tier_demote_frequency; - int tier_compact_hot_frequency; - int tier_compact_cold_frequency; - uint64_t st_last_promoted_size; - uint64_t st_last_demoted_size; - struct synctask *pause_synctask; - gf_timer_t *pause_timer; - pthread_mutex_t pause_mutex; - int promote_in_progress; - int demote_in_progress; - /* This Structure is only used in tiering fixlayout */ - gf_tier_fix_layout_arg_t tier_fix_layout_arg; - /* Indicates the index of the first queryfile picked - * in the last cycle of promote or demote */ - int32_t last_promote_qfile_index; - int32_t last_demote_qfile_index; - tier_pause_state_t pause_state; - char volname[GD_VOLUME_NAME_MAX + 1]; -} gf_tier_conf_t; - typedef struct nodeuuid_info { char info; /* Set to 1 is this is my node's uuid*/ uuid_t uuid; /* Store the nodeuuid as well for debugging*/ @@ -559,17 +467,10 @@ struct gf_defrag_info_ { int cmd; inode_t *root_inode; uuid_t node_uuid; - struct timeval start_time; + time_t start_time; uint32_t new_commit_hash; gf_defrag_status_t defrag_status; gf_defrag_pattern_list_t *defrag_pattern; - gf_tier_conf_t tier_conf; - - /*Data Tiering params for scanner*/ - uint64_t total_files_promoted; - uint64_t total_files_demoted; - int write_freq_threshold; - int read_freq_threshold; pthread_cond_t parallel_migration_cond; pthread_mutex_t dfq_mutex; @@ -598,15 +499,6 @@ struct gf_defrag_info_ { gf_boolean_t stats; /* lock migration flag */ gf_boolean_t lock_migration_enabled; - - /* local system crawl */ - char **local_brick_paths; - - /* whether the volume is pure distribute */ - gf_boolean_t is_pure_distribute; - - /*TODO: Introduce a glusterd option to tune this behaviour*/ - gf_boolean_t operate_dist; }; typedef struct gf_defrag_info_ gf_defrag_info_t; @@ -614,7 +506,6 @@ typedef struct gf_defrag_info_ gf_defrag_info_t; struct dht_methods_s { int32_t (*migration_get_dst_subvol)(xlator_t *this, dht_local_t *local); int32_t (*migration_other)(xlator_t *this, gf_defrag_info_t *defrag); - int32_t (*migration_needed)(xlator_t *this); xlator_t *(*layout_search)(xlator_t *this, dht_layout_t *layout, const char *name); }; @@ -635,7 +526,7 @@ struct dht_conf { int subvolume_cnt; int32_t refresh_interval; gf_lock_t subvolume_lock; - struct timeval last_stat_fetch; + time_t last_stat_fetch; gf_lock_t layout_lock; dict_t *leaf_to_subvol; void *private; /* Can be used by wrapper xlators over @@ -1325,9 +1216,6 @@ dht_layout_missing_dirs(dht_layout_t *layout); int dht_refresh_layout(call_frame_t *frame); -gf_boolean_t -dht_is_tier_xlator(xlator_t *this); - int dht_build_parent_loc(xlator_t *this, loc_t *parent, loc_t *child, int32_t *op_errno); @@ -1492,5 +1380,5 @@ int dht_dir_layout_error_check(xlator_t *this, inode_t *inode); int -dht_get_brick_paths(xlator_t *this, dht_conf_t *conf, loc_t *loc); +dht_inode_ctx_mdsvol_set(inode_t *inode, xlator_t *this, xlator_t *mds_subvol); #endif /* _DHT_H */ diff --git a/xlators/cluster/dht/src/dht-diskusage.c b/xlators/cluster/dht/src/dht-diskusage.c index 27097ca2475..c0588828fdb 100644 --- a/xlators/cluster/dht/src/dht-diskusage.c +++ b/xlators/cluster/dht/src/dht-diskusage.c @@ -151,22 +151,18 @@ dht_get_du_info(call_frame_t *frame, xlator_t *this, loc_t *loc) dht_conf_t *conf = NULL; call_frame_t *statfs_frame = NULL; dht_local_t *statfs_local = NULL; - struct timeval tv = { - 0, - }; loc_t tmp_loc = { 0, }; + time_t now; conf = this->private; - - gettimeofday(&tv, NULL); - + now = gf_time(); /* make it root gfid, should be enough to get the proper info back */ tmp_loc.gfid[15] = 1; - if (tv.tv_sec > (conf->refresh_interval + conf->last_stat_fetch.tv_sec)) { + if (now > (conf->refresh_interval + conf->last_stat_fetch)) { statfs_frame = copy_frame(frame); if (!statfs_frame) { goto err; @@ -198,7 +194,7 @@ dht_get_du_info(call_frame_t *frame, xlator_t *this, loc_t *loc) statfs_local->params); } - conf->last_stat_fetch.tv_sec = tv.tv_sec; + conf->last_stat_fetch = now; } return 0; err: diff --git a/xlators/cluster/dht/src/dht-inode-write.c b/xlators/cluster/dht/src/dht-inode-write.c index eda2491e0ff..2f23ce90fbd 100644 --- a/xlators/cluster/dht/src/dht-inode-write.c +++ b/xlators/cluster/dht/src/dht-inode-write.c @@ -93,30 +93,28 @@ dht_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, /* Check if the rebalance phase1 is true */ if (IS_DHT_MIGRATION_PHASE1(postbuf)) { - if (!dht_is_tier_xlator(this)) { + if (!local->xattr_req) { + local->xattr_req = dict_new(); if (!local->xattr_req) { - local->xattr_req = dict_new(); - if (!local->xattr_req) { - gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, ENOMEM, - "insufficient memory"); - local->op_errno = ENOMEM; - local->op_ret = -1; - goto out; - } - } - - ret = dict_set_uint32(local->xattr_req, - GF_PROTECT_FROM_EXTERNAL_WRITES, 1); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_DICT_SET_FAILED, 0, - "Failed to set key %s in dictionary", - GF_PROTECT_FROM_EXTERNAL_WRITES); + gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, ENOMEM, + "insufficient memory"); local->op_errno = ENOMEM; local->op_ret = -1; goto out; } } + ret = dict_set_uint32(local->xattr_req, GF_PROTECT_FROM_EXTERNAL_WRITES, + 1); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_DICT_SET_FAILED, 0, + "Failed to set key %s in dictionary", + GF_PROTECT_FROM_EXTERNAL_WRITES); + local->op_errno = ENOMEM; + local->op_ret = -1; + goto out; + } + dht_iatt_merge(this, &local->stbuf, postbuf); dht_iatt_merge(this, &local->prebuf, prebuf); diff --git a/xlators/cluster/dht/src/dht-mem-types.h b/xlators/cluster/dht/src/dht-mem-types.h index 33f9832395b..e3c4471334a 100644 --- a/xlators/cluster/dht/src/dht-mem-types.h +++ b/xlators/cluster/dht/src/dht-mem-types.h @@ -30,10 +30,7 @@ enum gf_dht_mem_types_ { gf_dht_mt_container_t, gf_dht_mt_octx_t, gf_dht_mt_miginfo_t, - gf_tier_mt_bricklist_t, - gf_tier_mt_ipc_ctr_params_t, gf_dht_mt_fd_ctx_t, - gf_tier_mt_qfile_array_t, gf_dht_ret_cache_t, gf_dht_nodeuuids_t, gf_dht_mt_end diff --git a/xlators/cluster/dht/src/dht-messages.h b/xlators/cluster/dht/src/dht-messages.h index 026879e14af..601f8dad78b 100644 --- a/xlators/cluster/dht/src/dht-messages.h +++ b/xlators/cluster/dht/src/dht-messages.h @@ -38,12 +38,11 @@ GLFS_MSGID( DHT_MSG_REBALANCE_STATUS, DHT_MSG_REBALANCE_STOPPED, DHT_MSG_RENAME_FAILED, DHT_MSG_SETATTR_FAILED, DHT_MSG_SUBVOL_INSUFF_INODES, DHT_MSG_SUBVOL_INSUFF_SPACE, DHT_MSG_UNLINK_FAILED, - DHT_MSG_LAYOUT_SET_FAILED, DHT_MSG_LOG_FIXED_LAYOUT, DHT_MSG_LOG_TIER_ERROR, - DHT_MSG_LOG_TIER_STATUS, DHT_MSG_GET_XATTR_FAILED, - DHT_MSG_FILE_LOOKUP_FAILED, DHT_MSG_OPEN_FD_FAILED, - DHT_MSG_SET_INODE_CTX_FAILED, DHT_MSG_UNLOCKING_FAILED, - DHT_MSG_DISK_LAYOUT_NULL, DHT_MSG_SUBVOL_INFO, DHT_MSG_CHUNK_SIZE_INFO, - DHT_MSG_LAYOUT_FORM_FAILED, DHT_MSG_SUBVOL_ERROR, + DHT_MSG_LAYOUT_SET_FAILED, DHT_MSG_LOG_FIXED_LAYOUT, + DHT_MSG_GET_XATTR_FAILED, DHT_MSG_FILE_LOOKUP_FAILED, + DHT_MSG_OPEN_FD_FAILED, DHT_MSG_SET_INODE_CTX_FAILED, + DHT_MSG_UNLOCKING_FAILED, DHT_MSG_DISK_LAYOUT_NULL, DHT_MSG_SUBVOL_INFO, + DHT_MSG_CHUNK_SIZE_INFO, DHT_MSG_LAYOUT_FORM_FAILED, DHT_MSG_SUBVOL_ERROR, DHT_MSG_LAYOUT_SORT_FAILED, DHT_MSG_REGEX_INFO, DHT_MSG_FOPEN_FAILED, DHT_MSG_SET_HOSTNAME_FAILED, DHT_MSG_BRICK_ERROR, DHT_MSG_SYNCOP_FAILED, DHT_MSG_MIGRATE_INFO, DHT_MSG_SOCKET_ERROR, DHT_MSG_CREATE_FD_FAILED, @@ -69,8 +68,7 @@ GLFS_MSGID( DHT_MSG_INIT_LOCAL_SUBVOL_FAILED, DHT_MSG_SYS_CALL_GET_TIME_FAILED, DHT_MSG_NO_DISK_USAGE_STATUS, DHT_MSG_SUBVOL_DOWN_ERROR, DHT_MSG_REBAL_THROTTLE_INFO, DHT_MSG_COMMIT_HASH_INFO, - DHT_MSG_REBAL_STRUCT_SET, DHT_MSG_HAS_MIGINFO, DHT_MSG_LOG_IPC_TIER_ERROR, - DHT_MSG_TIER_PAUSED, DHT_MSG_TIER_RESUME, DHT_MSG_SETTLE_HASH_FAILED, + DHT_MSG_REBAL_STRUCT_SET, DHT_MSG_HAS_MIGINFO, DHT_MSG_SETTLE_HASH_FAILED, DHT_MSG_DEFRAG_PROCESS_DIR_FAILED, DHT_MSG_FD_CTX_SET_FAILED, DHT_MSG_STALE_LOOKUP, DHT_MSG_PARENT_LAYOUT_CHANGED, DHT_MSG_LOCK_MIGRATION_FAILED, DHT_MSG_LOCK_INODE_UNREF_FAILED, @@ -96,15 +94,13 @@ GLFS_MSGID( DHT_MSG_UNLOCK_FILE_FAILED, DHT_MSG_REMOVE_XATTR_FAILED, DHT_MSG_DATA_MIGRATE_ABORT, DHT_MSG_DEFRAG_NULL, DHT_MSG_PARENT_NULL, DHT_MSG_GFID_NOT_PRESENT, DHT_MSG_CHILD_LOC_FAILED, - DHT_MSG_SET_LOOKUP_FAILED, DHT_MSG_DIR_REMOVED, - DHT_MSG_TIER_FIX_LAYOUT_STARTED, DHT_MSG_FIX_NOT_COMP, - DHT_MSG_REMOVE_TIER_FAILED, DHT_MSG_SUBVOL_DETER_FAILED, - DHT_MSG_LOCAL_SUBVOL, DHT_MSG_NODE_UUID, DHT_MSG_SIZE_FILE, - DHT_MSG_GET_DATA_SIZE_FAILED, DHT_MSG_PTHREAD_JOIN_FAILED, - DHT_MSG_COUNTER_THREAD_CREATE_FAILED, DHT_MSG_MIGRATION_INIT_QUEUE_FAILED, - DHT_MSG_PAUSED_TIMEOUT, DHT_MSG_WOKE, DHT_MSG_ABORT_REBALANCE, - DHT_MSG_CREATE_TASK_REBAL_FAILED, DHT_MSG_REBAL_ESTIMATE_NOT_AVAIL, - DHT_MSG_MIG_TIER_PAUSED, DHT_MSG_ADD_CHOICES_ERROR, + DHT_MSG_SET_LOOKUP_FAILED, DHT_MSG_DIR_REMOVED, DHT_MSG_FIX_NOT_COMP, + DHT_MSG_SUBVOL_DETER_FAILED, DHT_MSG_LOCAL_SUBVOL, DHT_MSG_NODE_UUID, + DHT_MSG_SIZE_FILE, DHT_MSG_GET_DATA_SIZE_FAILED, + DHT_MSG_PTHREAD_JOIN_FAILED, DHT_MSG_COUNTER_THREAD_CREATE_FAILED, + DHT_MSG_MIGRATION_INIT_QUEUE_FAILED, DHT_MSG_PAUSED_TIMEOUT, DHT_MSG_WOKE, + DHT_MSG_ABORT_REBALANCE, DHT_MSG_CREATE_TASK_REBAL_FAILED, + DHT_MSG_REBAL_ESTIMATE_NOT_AVAIL, DHT_MSG_ADD_CHOICES_ERROR, DHT_MSG_GET_CHOICES_ERROR, DHT_MSG_PREPARE_STATUS_ERROR, DHT_MSG_SET_CHOICE_FAILED, DHT_MSG_SET_HASHED_SUBVOL_FAILED, DHT_MSG_XATTR_HEAL_NOT_POSS, DHT_MSG_LINKTO_FILE_FAILED, @@ -180,7 +176,6 @@ GLFS_MSGID( "adding bricks" #define DHT_MSG_NEW_TARGET_FOUND_STR "New target found for file" #define DHT_MSG_INSUFF_MEMORY_STR "insufficient memory" -#define DHT_MSG_MIG_TIER_PAUSED_STR "Migrate file paused" #define DHT_MSG_SET_XATTR_FAILED_STR "failed to set xattr" #define DHT_MSG_SET_MODE_FAILED_STR "failed to set mode" #define DHT_MSG_FILE_EXISTS_IN_DEST_STR "file exists in destination" @@ -222,17 +217,14 @@ GLFS_MSGID( #define DHT_MSG_GFID_NOT_PRESENT_STR "gfid not present" #define DHT_MSG_CHILD_LOC_FAILED_STR "Child loc build failed" #define DHT_MSG_SET_LOOKUP_FAILED_STR "Failed to set lookup" -#define DHT_MSG_LOG_TIER_STATUS_STR "lookup to cold tier on attach heal failed" #define DHT_MSG_DIR_LOOKUP_FAILED_STR "lookup failed" #define DHT_MSG_DIR_REMOVED_STR "Dir renamed or removed. Skipping" #define DHT_MSG_READDIR_ERROR_STR "readdir failed, Aborting fix-layout" #define DHT_MSG_SETTLE_HASH_FAILED_STR "Settle hash failed" #define DHT_MSG_DEFRAG_PROCESS_DIR_FAILED_STR "gf_defrag_process_dir failed" -#define DHT_MSG_TIER_FIX_LAYOUT_STARTED_STR "Tiering fix layout started" #define DHT_MSG_FIX_NOT_COMP_STR \ "Unable to retrieve fixlayout xattr. Assume background fix layout not " \ "complete" -#define DHT_MSG_REMOVE_TIER_FAILED_STR "Failed removing tier fix layout xattr" #define DHT_MSG_SUBVOL_DETER_FAILED_STR \ "local subvolume determination failed with error" #define DHT_MSG_LOCAL_SUBVOL_STR "local subvol" @@ -248,8 +240,6 @@ GLFS_MSGID( #define DHT_MSG_MIGRATION_INIT_QUEUE_FAILED_STR \ "Failed to initialise migration queue" #define DHT_MSG_REBALANCE_STOPPED_STR "Received stop command on rebalance" -#define DHT_MSG_TIER_RESUME_STR "Pause end. Resume tiering" -#define DHT_MSG_TIER_PAUSED_STR "Pause tiering" #define DHT_MSG_PAUSED_TIMEOUT_STR "Request pause timer timeout" #define DHT_MSG_WOKE_STR "woken" #define DHT_MSG_ABORT_REBALANCE_STR "Aborting rebalance" diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index d850eef62ab..8ba8082bd86 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -14,7 +14,6 @@ #include <signal.h> #include <glusterfs/events.h> #include "glusterfs/compat-errno.h" // for ENODATA on BSD -#include <string.h> #define GF_DISK_SECTOR_SIZE 512 #define DHT_REBALANCE_PID 4242 /* Change it if required */ @@ -610,26 +609,23 @@ __dht_rebalance_create_dst_file(xlator_t *this, xlator_t *to, xlator_t *from, goto out; } - if (!!dht_is_tier_xlator(this)) { - xdata = dict_new(); - if (!xdata) { - *fop_errno = ENOMEM; - ret = -1; - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, - DHT_MSG_MIGRATE_FILE_FAILED, "%s: dict_new failed)", - loc->path); - goto out; - } + xdata = dict_new(); + if (!xdata) { + *fop_errno = ENOMEM; + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MIGRATE_FILE_FAILED, + "%s: dict_new failed)", loc->path); + goto out; + } - ret = dict_set_int32(xdata, GF_CLEAN_WRITE_PROTECTION, 1); - if (ret) { - *fop_errno = ENOMEM; - ret = -1; - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, - "%s: failed to set dictionary value: key = %s ", loc->path, - GF_CLEAN_WRITE_PROTECTION); - goto out; - } + ret = dict_set_int32_sizen(xdata, GF_CLEAN_WRITE_PROTECTION, 1); + if (ret) { + *fop_errno = ENOMEM; + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, + "%s: failed to set dictionary value: key = %s ", loc->path, + GF_CLEAN_WRITE_PROTECTION); + goto out; } ret = syncop_lookup(to, loc, &new_stbuf, NULL, xdata, NULL); @@ -1097,7 +1093,7 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag, break; } - if (!conf->force_migration && !dht_is_tier_xlator(this)) { + if (!conf->force_migration) { if (!xdata) { xdata = dict_new(); if (!xdata) { @@ -1537,21 +1533,6 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, goto out; } - /* If defrag is NULL, it should be assumed that migration is triggered - * from client using the trusted.distribute.migrate-data virtual xattr - */ - defrag = conf->defrag; - - /* migration of files from clients is restricted to non-tiered clients - * for now */ - if (!defrag && dht_is_tier_xlator(this)) { - ret = ENOTSUP; - goto out; - } - - if (defrag && defrag->tier_conf.is_tier) - log_level = GF_LOG_TRACE; - gf_log(this->name, log_level, "%s: attempting to move from %s to %s", loc->path, from->name, to->name); @@ -2301,14 +2282,12 @@ out: } } - if (!dht_is_tier_xlator(this)) { - lk_ret = syncop_removexattr(to, loc, GF_PROTECT_FROM_EXTERNAL_WRITES, - NULL, NULL); - if (lk_ret && (lk_ret != -ENODATA) && (lk_ret != -ENOATTR)) { - gf_msg(this->name, GF_LOG_WARNING, -lk_ret, 0, - "%s: removexattr failed key %s", loc->path, - GF_PROTECT_FROM_EXTERNAL_WRITES); - } + lk_ret = syncop_removexattr(to, loc, GF_PROTECT_FROM_EXTERNAL_WRITES, NULL, + NULL); + if (lk_ret && (lk_ret != -ENODATA) && (lk_ret != -ENOATTR)) { + gf_msg(this->name, GF_LOG_WARNING, -lk_ret, 0, + "%s: removexattr failed key %s", loc->path, + GF_PROTECT_FROM_EXTERNAL_WRITES); } if (dict) @@ -2895,8 +2874,7 @@ gf_defrag_migrate_single_file(void *opaque) if (defrag->stats == _gf_true) { gettimeofday(&end, NULL); - elapsed = (end.tv_sec - start.tv_sec) * 1e6 + - (end.tv_usec - start.tv_usec); + elapsed = gf_tvdiff(&start, &end); gf_log(this->name, GF_LOG_INFO, "Migration of " "file:%s size:%" PRIu64 @@ -3075,7 +3053,7 @@ int static gf_defrag_get_entry(xlator_t *this, int i, dht_conf_t *conf, gf_defrag_info_t *defrag, fd_t *fd, dict_t *migrate_data, struct dir_dfmeta *dir_dfmeta, dict_t *xattr_req, - int *should_commit_hash, int *perrno) + int *perrno) { int ret = 0; char is_linkfile = 0; @@ -3279,7 +3257,7 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, int dfc_index = 0; int throttle_up = 0; struct dir_dfmeta *dir_dfmeta = NULL; - int should_commit_hash = 1; + xlator_t *old_THIS = NULL; gf_log(this->name, GF_LOG_INFO, "migrate data called on %s", loc->path); gettimeofday(&dir_start, NULL); @@ -3292,6 +3270,9 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, goto out; } + old_THIS = THIS; + THIS = this; + dir_dfmeta = GF_CALLOC(1, sizeof(*dir_dfmeta), gf_common_mt_pointer); if (!dir_dfmeta) { gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta is NULL"); @@ -3456,7 +3437,7 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, ret = gf_defrag_get_entry(this, dfc_index, &container, loc, conf, defrag, dir_dfmeta->lfd[dfc_index], migrate_data, dir_dfmeta, xattr_req, - &should_commit_hash, perrno); + perrno); if (defrag->defrag_status == GF_DEFRAG_STATUS_STOPPED) { goto out; @@ -3500,24 +3481,19 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, } gettimeofday(&end, NULL); - elapsed = (end.tv_sec - dir_start.tv_sec) * 1e6 + - (end.tv_usec - dir_start.tv_usec); + elapsed = gf_tvdiff(&dir_start, &end); gf_log(this->name, GF_LOG_INFO, "Migration operation on dir %s took " "%.2f secs", loc->path, elapsed / 1e6); ret = 0; out: - + THIS = old_THIS; gf_defrag_free_dir_dfmeta(dir_dfmeta, local_subvols_cnt); if (xattr_req) dict_unref(xattr_req); - if (ret == 0 && should_commit_hash == 0) { - ret = 2; - } - /* It does not matter if it errored out - this number is * used to calculate rebalance estimated time to complete. * No locking required as dirs are processed by a single thread. @@ -3525,6 +3501,7 @@ out: defrag->num_dirs_processed++; return ret; } + int gf_defrag_settle_hash(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, dict_t *fix_layout) @@ -3539,7 +3516,6 @@ gf_defrag_settle_hash(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, * rebalance is complete. */ if (defrag->cmd == GF_DEFRAG_CMD_START_LAYOUT_FIX || - defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER || defrag->cmd == GF_DEFRAG_CMD_DETACH_START) { return 0; } @@ -3585,114 +3561,6 @@ gf_defrag_settle_hash(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, return 0; } -/* Function for doing a named lookup on file inodes during an attach tier - * So that a hardlink lookup heal i.e gfid to parent gfid lookup heal - * happens on pre-existing data. This is required so that the ctr database has - * hardlinks of all the exisitng file in the volume. CTR xlator on the - * brick/server side does db update/insert of the hardlink on a namelookup. - * Currently the namedlookup is done synchronous to the fixlayout that is - * triggered by attach tier. This is not performant, adding more time to - * fixlayout. The performant approach is record the hardlinks on a compressed - * datastore and then do the namelookup asynchronously later, giving the ctr db - * eventual consistency - * */ -int -gf_fix_layout_tier_attach_lookup(xlator_t *this, loc_t *parent_loc, - gf_dirent_t *file_dentry) -{ - int ret = -1; - dict_t *lookup_xdata = NULL; - dht_conf_t *conf = NULL; - loc_t file_loc = { - 0, - }; - struct iatt iatt = { - 0, - }; - - GF_VALIDATE_OR_GOTO("tier", this, out); - - GF_VALIDATE_OR_GOTO(this->name, parent_loc, out); - - GF_VALIDATE_OR_GOTO(this->name, file_dentry, out); - - GF_VALIDATE_OR_GOTO(this->name, this->private, out); - - if (!parent_loc->inode) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "%s/%s parent is NULL", parent_loc->path, file_dentry->d_name); - goto out; - } - - conf = this->private; - - loc_wipe(&file_loc); - - if (gf_uuid_is_null(file_dentry->d_stat.ia_gfid)) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "%s/%s gfid not present", parent_loc->path, file_dentry->d_name); - goto out; - } - - gf_uuid_copy(file_loc.gfid, file_dentry->d_stat.ia_gfid); - - if (gf_uuid_is_null(parent_loc->gfid)) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "%s/%s" - " gfid not present", - parent_loc->path, file_dentry->d_name); - goto out; - } - - gf_uuid_copy(file_loc.pargfid, parent_loc->gfid); - - ret = dht_build_child_loc(this, &file_loc, parent_loc, file_dentry->d_name); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Child loc build failed"); - ret = -1; - goto out; - } - - lookup_xdata = dict_new(); - if (!lookup_xdata) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Failed creating lookup dict for %s", file_dentry->d_name); - goto out; - } - - ret = dict_set_int32(lookup_xdata, CTR_ATTACH_TIER_LOOKUP, 1); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Failed to set lookup flag"); - goto out; - } - - gf_uuid_copy(file_loc.parent->gfid, parent_loc->gfid); - - /* Sending lookup to cold tier only */ - ret = syncop_lookup(conf->subvolumes[0], &file_loc, &iatt, NULL, - lookup_xdata, NULL); - if (ret) { - /* If the file does not exist on the cold tier than it must */ - /* have been discovered on the hot tier. This is not an error. */ - gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, - "%s lookup to cold tier on attach heal failed", file_loc.path); - goto out; - } - - ret = 0; - -out: - - loc_wipe(&file_loc); - - if (lookup_xdata) - dict_unref(lookup_xdata); - - return ret; -} - int gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, dict_t *fix_layout, dict_t *migrate_data) @@ -3712,7 +3580,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, }; inode_t *linked_inode = NULL, *inode = NULL; dht_conf_t *conf = NULL; - int should_commit_hash = 1; int perrno = 0; conf = this->private; @@ -3815,16 +3682,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) continue; if (!IA_ISDIR(entry->d_stat.ia_type)) { - /* If its a fix layout during the attach - * tier operation do lookups on files - * on cold subvolume so that there is a - * CTR DB Lookup Heal triggered on existing - * data. - * */ - if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) { - gf_fix_layout_tier_attach_lookup(this, loc, entry); - } - continue; } loc_wipe(&entry_loc); @@ -3841,8 +3698,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, goto out; } else { - should_commit_hash = 0; - continue; } } @@ -3905,7 +3760,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, ret = -1; goto out; } else { - should_commit_hash = 0; continue; } } @@ -3923,7 +3777,7 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, goto out; } - if (ret && ret != 2) { + if (ret) { gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LAYOUT_FIX_FAILED, "Fix layout failed for %s", entry_loc.path); @@ -3990,11 +3844,10 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, } } - if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) && - (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX)) { + if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) { ret = gf_defrag_process_dir(this, defrag, loc, migrate_data, &perrno); - if (ret && (ret != 2)) { + if (ret) { if (perrno == ENOENT || perrno == ESTALE) { ret = 0; goto out; @@ -4010,18 +3863,13 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, if (conf->decommission_in_progress) { goto out; } - - should_commit_hash = 0; } - } else if (ret == 2) { - should_commit_hash = 0; } } gf_msg_trace(this->name, 0, "fix layout called on %s", loc->path); - if (should_commit_hash && - gf_defrag_settle_hash(this, defrag, loc, fix_layout) != 0) { + if (gf_defrag_settle_hash(this, defrag, loc, fix_layout) != 0) { defrag->total_failures++; gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SETTLE_HASH_FAILED, @@ -4045,372 +3893,6 @@ out: if (fd) fd_unref(fd); - if (ret == 0 && should_commit_hash == 0) { - ret = 2; - } - - return ret; -} - -int -gf_defrag_fix_layout_puredist(xlator_t *this, gf_defrag_info_t *defrag, - loc_t *loc, dict_t *fix_layout, - dict_t *migrate_data) -{ - int ret = -1; - loc_t entry_loc = { - 0, - }; - fd_t *fd = NULL; - inode_t *linked_inode = NULL, *inode = NULL; - dht_conf_t *conf = NULL; - int should_commit_hash = 1; - int perrno = 0; - /* absolute brick path length */ - int brick_len = 0; - /* dir path length (relative to gluster mount) */ - int dir_len = 0; - /* absolute dir path length */ - int total_len = 0; - struct dirent *entry = NULL; - struct dirent scratch[2] = {{ - 0, - }}; - DIR *dirp = NULL; - int full_entry_length = 0; - int entry_len = 0; - char full_entry_path[4096] = { - 0, - }; - char full_dir_path[4096] = { - 0, - }; - ssize_t size = 0; - uuid_t tmp_gfid; - struct stat tmpbuf = { - 0, - }; - struct iatt iatt = { - 0, - }; - - struct stat lstatbuf = { - 0, - }; - struct iatt stbuf = { - 0, - }; - - conf = this->private; - if (!conf) { - ret = -1; - goto out; - } - - /* - * Since the primary target for the following lookup is to figure out if the - * entry still exists, going to do a direct stat call rather than going - * through the whole gluster stack. There are some benefits of doing gluster - * lookup, but this is redundant since we have done already one gluster - * lookup in the parent function. - * - * Randomly selecting the first local subvol to read, since it is expected - * that the directory structure is present in all the subvols identically - */ - - brick_len = strlen(defrag->local_brick_paths[0]); - /* discarding the first "/" */ - dir_len = strlen(loc->path) - 1; - /* Extra two: one for "/" at the end and one more for '\0'*/ - total_len = brick_len + dir_len + 2; - - snprintf(full_dir_path, total_len, "%s%s/", defrag->local_brick_paths[0], - loc->path + 1); - - ret = sys_lstat(full_dir_path, &tmpbuf); - if (ret == -1) { - gf_log(this->name, GF_LOG_ERROR, - "[absolutepath %s] directory " - "not found, path %s error %d", - full_dir_path, loc->path, errno); - goto out; - } - - dirp = sys_opendir(full_dir_path); - if (!dirp) { - ret = -1; - gf_msg(this->name, GF_LOG_ERROR, errno, 0, "failed to open dir : %s", - loc->path); - if (conf->decommission_subvols_cnt) { - defrag->total_failures++; - } - goto out; - } - - while ((entry = sys_readdir(dirp, scratch)) != NULL) { - if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { - ret = 1; - goto out; - } - if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..") || - !strcmp(entry->d_name, ".glusterfs")) - continue; - - /* TODO: Need to add a check for _DIRENT_HAVE_D_TYPE flag to fall back - to stat in case d_type is not defined */ - if (entry->d_type != DT_DIR) { - continue; - } - - entry_len = strlen(entry->d_name); - full_entry_length = total_len + entry_len + 1; /* one more for "/"*/ - - snprintf(full_entry_path, full_entry_length, "%s%s/", full_dir_path, - entry->d_name); - - size = sys_lgetxattr(full_entry_path, GFID_XATTR_KEY, tmp_gfid, 16); - if (size != 16) { - gf_log(this->name, GF_LOG_ERROR, "gfid not found, path %s", - full_entry_path); - continue; - } - - loc_wipe(&entry_loc); - - ret = dht_build_child_loc(this, &entry_loc, loc, entry->d_name); - if (ret) { - gf_log(this->name, GF_LOG_ERROR, - "Child loc" - " build failed for entry: %s", - entry->d_name); - - if (conf->decommission_in_progress) { - defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; - - goto out; - } else { - should_commit_hash = 0; - - continue; - } - } - - if (gf_uuid_is_null(tmp_gfid)) { - gf_log(this->name, GF_LOG_ERROR, - "%s/%s" - " gfid not present", - loc->path, entry->d_name); - continue; - } - - gf_uuid_copy(entry_loc.gfid, tmp_gfid); - - /*In case the gfid stored in the inode by inode_link - *and the gfid obtained in the lookup differs, then - *client3_3_lookup_cbk will return ESTALE and proper - *error will be captured. - */ - memset(&lstatbuf, 0, sizeof(struct stat)); - ret = sys_lstat(full_entry_path, &lstatbuf); - if (ret == -1) { - gf_msg(this->name, GF_LOG_ERROR, errno, 0, "lstat failed for %s", - entry->d_name); - } - - memset(&stbuf, 0, sizeof(struct iatt)); - iatt_from_stat(&stbuf, &lstatbuf); - gf_uuid_copy(stbuf.ia_gfid, entry_loc.gfid); - linked_inode = inode_link(entry_loc.inode, loc->inode, entry->d_name, - &stbuf); - - inode = entry_loc.inode; - entry_loc.inode = linked_inode; - inode_unref(inode); - - if (gf_uuid_is_null(loc->gfid)) { - gf_log(this->name, GF_LOG_ERROR, - "%s/%s" - " gfid not present", - loc->path, entry->d_name); - continue; - } - - gf_uuid_copy(entry_loc.pargfid, loc->gfid); - - ret = syncop_lookup(this, &entry_loc, &iatt, NULL, NULL, NULL); - if (ret) { - if (-ret == ENOENT || -ret == ESTALE) { - gf_msg(this->name, GF_LOG_INFO, -ret, DHT_MSG_DIR_LOOKUP_FAILED, - "Dir:%s renamed or removed. " - "Skipping", - loc->path); - ret = 0; - if (conf->decommission_subvols_cnt) { - defrag->total_failures++; - } - continue; - } else { - gf_msg(this->name, GF_LOG_ERROR, -ret, - DHT_MSG_DIR_LOOKUP_FAILED, "lookup failed for:%s", - entry_loc.path); - - defrag->total_failures++; - - if (conf->decommission_in_progress) { - defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; - ret = -1; - goto out; - } else { - should_commit_hash = 0; - continue; - } - } - } - - /* A return value of 2 means, either process_dir or - * lookup of a dir failed. Hence, don't commit hash - * for the current directory*/ - - ret = gf_defrag_fix_layout_puredist(this, defrag, &entry_loc, - fix_layout, migrate_data); - - if (defrag->defrag_status == GF_DEFRAG_STATUS_STOPPED) { - goto out; - } - - if (ret && ret != 2) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LAYOUT_FIX_FAILED, - "Fix layout failed for %s", entry_loc.path); - - defrag->total_failures++; - - if (conf->decommission_in_progress) { - defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; - - goto out; - } else { - /* Let's not commit-hash if - * gf_defrag_fix_layout failed*/ - continue; - } - } - } - - ret = sys_closedir(dirp); - if (ret) { - gf_msg_debug(this->name, 0, - "Failed to close dir %s. Reason :" - " %s", - full_dir_path, strerror(errno)); - ret = 0; - } - - dirp = NULL; - - /* A directory layout is fixed only after its subdirs are healed to - * any newly added bricks. If the layout is fixed before subdirs are - * healed, the newly added brick will get a non-null layout. - * Any subdirs which hash to that layout will no longer show up - * in a directory listing until they are healed. - */ - - ret = syncop_setxattr(this, loc, fix_layout, 0, NULL, NULL); - - /* In case of a race where the directory is deleted just before - * layout setxattr, the errors are updated in the layout structure. - * We can use this information to make a decision whether the directory - * is deleted entirely. - */ - if (ret == 0) { - ret = dht_dir_layout_error_check(this, loc->inode); - ret = -ret; - } - - if (ret) { - if (-ret == ENOENT || -ret == ESTALE) { - gf_msg(this->name, GF_LOG_INFO, -ret, DHT_MSG_LAYOUT_FIX_FAILED, - "Setxattr failed. Dir %s " - "renamed or removed", - loc->path); - if (conf->decommission_subvols_cnt) { - defrag->total_failures++; - } - ret = 0; - goto out; - } else { - gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LAYOUT_FIX_FAILED, - "Setxattr failed for %s", loc->path); - - defrag->total_failures++; - - if (conf->decommission_in_progress) { - defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; - ret = -1; - goto out; - } - } - } - - if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) && - (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX)) { - ret = gf_defrag_process_dir(this, defrag, loc, migrate_data, &perrno); - - if (ret && (ret != 2)) { - if (perrno == ENOENT || perrno == ESTALE) { - ret = 0; - goto out; - } else { - defrag->total_failures++; - - gf_msg(this->name, GF_LOG_ERROR, 0, - DHT_MSG_DEFRAG_PROCESS_DIR_FAILED, - "gf_defrag_process_dir failed for " - "directory: %s", - loc->path); - - if (conf->decommission_in_progress) { - goto out; - } - - should_commit_hash = 0; - } - } else if (ret == 2) { - should_commit_hash = 0; - } - } - - gf_msg_trace(this->name, 0, "fix layout called on %s", loc->path); - - if (should_commit_hash && - gf_defrag_settle_hash(this, defrag, loc, fix_layout) != 0) { - defrag->total_failures++; - - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SETTLE_HASH_FAILED, - "Settle hash failed for %s", loc->path); - - ret = -1; - - if (conf->decommission_in_progress) { - defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; - goto out; - } - } - - ret = 0; -out: - loc_wipe(&entry_loc); - - if (fd) - fd_unref(fd); - - if (ret == 0 && should_commit_hash == 0) { - ret = 2; - } - - if (dirp) { - sys_closedir(dirp); - } - return ret; } @@ -4419,31 +3901,26 @@ dht_init_local_subvols_and_nodeuuids(xlator_t *this, dht_conf_t *conf, loc_t *loc) { dict_t *dict = NULL; - gf_defrag_info_t *defrag = NULL; uuid_t *uuid_ptr = NULL; int ret = -1; int i = 0; int j = 0; - defrag = conf->defrag; - - if (defrag->cmd != GF_DEFRAG_CMD_START_TIER) { - /* Find local subvolumes */ - ret = syncop_getxattr(this, loc, &dict, GF_REBAL_FIND_LOCAL_SUBVOL, - NULL, NULL); - if (ret && (ret != -ENODATA)) { - gf_msg(this->name, GF_LOG_ERROR, -ret, 0, - "local " - "subvolume determination failed with error: %d", - -ret); - ret = -1; - goto out; - } - - if (!ret) - goto out; + /* Find local subvolumes */ + ret = syncop_getxattr(this, loc, &dict, GF_REBAL_FIND_LOCAL_SUBVOL, NULL, + NULL); + if (ret && (ret != -ENODATA)) { + gf_msg(this->name, GF_LOG_ERROR, -ret, 0, + "local " + "subvolume determination failed with error: %d", + -ret); + ret = -1; + goto out; } + if (!ret) + goto out; + ret = syncop_getxattr(this, loc, &dict, GF_REBAL_OLD_FIND_LOCAL_SUBVOL, NULL, NULL); if (ret) { @@ -4534,9 +4011,6 @@ dht_file_counter_thread(void *args) struct timespec time_to_wait = { 0, }; - struct timeval now = { - 0, - }; uint64_t tmp_size = 0; if (!args) @@ -4546,9 +4020,8 @@ dht_file_counter_thread(void *args) dht_build_root_loc(defrag->root_inode, &root_loc); while (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED) { - gettimeofday(&now, NULL); - time_to_wait.tv_sec = now.tv_sec + 600; - time_to_wait.tv_nsec = 0; + timespec_now(&time_to_wait); + time_to_wait.tv_sec += 600; pthread_mutex_lock(&defrag->fc_mutex); pthread_cond_timedwait(&defrag->fc_wakeup_cond, &defrag->fc_mutex, @@ -4621,7 +4094,7 @@ gf_defrag_estimates_init(xlator_t *this, loc_t *loc, pthread_t *filecnt_thread) goto out; } - ret = gf_thread_create(filecnt_thread, NULL, &dht_file_counter_thread, + ret = gf_thread_create(filecnt_thread, NULL, dht_file_counter_thread, (void *)defrag, "dhtfcnt"); if (ret) { @@ -4678,7 +4151,7 @@ gf_defrag_parallel_migration_init(xlator_t *this, gf_defrag_info_t *defrag, /*Spawn Threads Here*/ while (index < thread_spawn_count) { - ret = gf_thread_create(&(tid[index]), NULL, &gf_defrag_task, + ret = gf_thread_create(&(tid[index]), NULL, gf_defrag_task, (void *)defrag, "dhtmig%d", (index + 1) & 0x3ff); if (ret != 0) { gf_msg("DHT", GF_LOG_ERROR, ret, 0, "Thread[%d] creation failed. ", @@ -4768,7 +4241,6 @@ gf_defrag_start_crawl(void *data) pthread_t *tid = NULL; pthread_t filecnt_thread; gf_boolean_t fc_thread_started = _gf_false; - int i = 0; this = data; if (!this) @@ -4786,7 +4258,8 @@ gf_defrag_start_crawl(void *data) if (!defrag) goto exit; - gettimeofday(&defrag->start_time, NULL); + defrag->start_time = gf_time(); + dht_build_root_inode(this, &defrag->root_inode); if (!defrag->root_inode) goto out; @@ -4903,12 +4376,6 @@ gf_defrag_start_crawl(void *data) goto out; } - ret = dht_get_brick_paths(this, conf, &loc); - if (ret) { - gf_log(this->name, GF_LOG_WARNING, "could not get brick path"); - ret = 0; - } - /* Initialise the structures required for parallel migration */ ret = gf_defrag_parallel_migration_init(this, defrag, &tid, &thread_index); @@ -4926,27 +4393,14 @@ gf_defrag_start_crawl(void *data) } } - /* TODO: Need to introduce a flag to safely operate in the old way */ - if (defrag->operate_dist && defrag->is_pure_distribute) { - ret = gf_defrag_fix_layout_puredist(this, defrag, &loc, fix_layout, - migrate_data); - if (ret && ret != 2) { - defrag->total_failures++; - ret = -1; - goto out; - } - } else { - ret = gf_defrag_fix_layout(this, defrag, &loc, fix_layout, - migrate_data); - if (ret && ret != 2) { - defrag->total_failures++; - ret = -1; - goto out; - } + ret = gf_defrag_fix_layout(this, defrag, &loc, fix_layout, migrate_data); + if (ret) { + defrag->total_failures++; + ret = -1; + goto out; } - if (ret != 2 && - gf_defrag_settle_hash(this, defrag, &loc, fix_layout) != 0) { + if (gf_defrag_settle_hash(this, defrag, &loc, fix_layout) != 0) { defrag->total_failures++; ret = -1; goto out; @@ -4988,14 +4442,6 @@ out: } UNLOCK(&defrag->lock); - for (i = 0; i < conf->local_subvols_cnt; i++) { - if (defrag->local_brick_paths[i]) { - GF_FREE(defrag->local_brick_paths[i]); - } - } - - GF_FREE(defrag->local_brick_paths); - GF_FREE(defrag); conf->defrag = NULL; @@ -5069,9 +4515,6 @@ gf_defrag_get_estimates_based_on_size(dht_conf_t *conf) uint64_t total_processed = 0; uint64_t tmp_count = 0; uint64_t time_to_complete = 0; - struct timeval now = { - 0, - }; double elapsed = 0; defrag = conf->defrag; @@ -5079,8 +4522,7 @@ gf_defrag_get_estimates_based_on_size(dht_conf_t *conf) if (!g_totalsize) goto out; - gettimeofday(&now, NULL); - elapsed = now.tv_sec - defrag->start_time.tv_sec; + elapsed = gf_time() - defrag->start_time; /* Don't calculate the estimates for the first 10 minutes. * It is unlikely to be accurate and estimates are not required @@ -5130,13 +4572,8 @@ gf_defrag_status_get(dht_conf_t *conf, dict_t *dict) uint64_t lookup = 0; uint64_t failures = 0; uint64_t skipped = 0; - uint64_t promoted = 0; - uint64_t demoted = 0; char *status = ""; double elapsed = 0; - struct timeval end = { - 0, - }; uint64_t time_to_complete = 0; uint64_t time_left = 0; gf_defrag_info_t *defrag = conf->defrag; @@ -5153,17 +4590,12 @@ gf_defrag_status_get(dht_conf_t *conf, dict_t *dict) lookup = defrag->num_files_lookedup; failures = defrag->total_failures; skipped = defrag->skipped; - promoted = defrag->total_files_promoted; - demoted = defrag->total_files_demoted; - gettimeofday(&end, NULL); - - elapsed = end.tv_sec - defrag->start_time.tv_sec; + elapsed = gf_time() - defrag->start_time; /* The rebalance is still in progress */ - if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) && - (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED)) { + if (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED) { time_to_complete = gf_defrag_get_estimates_based_on_size(conf); if (time_to_complete && (time_to_complete > elapsed)) @@ -5178,14 +4610,6 @@ gf_defrag_status_get(dht_conf_t *conf, dict_t *dict) if (!dict) goto log; - ret = dict_set_uint64(dict, "promoted", promoted); - if (ret) - gf_log(THIS->name, GF_LOG_WARNING, "failed to set promoted count"); - - ret = dict_set_uint64(dict, "demoted", demoted); - if (ret) - gf_log(THIS->name, GF_LOG_WARNING, "failed to set demoted count"); - ret = dict_set_uint64(dict, "files", files); if (ret) gf_log(THIS->name, GF_LOG_WARNING, "failed to set file count"); diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c index 1b6571cd43c..3e24065227c 100644 --- a/xlators/cluster/dht/src/dht-selfheal.c +++ b/xlators/cluster/dht/src/dht-selfheal.c @@ -1271,10 +1271,6 @@ dht_selfheal_dir_mkdir_lock_cbk(call_frame_t *frame, void *cookie, local->call_cnt = conf->subvolume_cnt; if (op_ret < 0) { - /* We get this error when the directory entry was not created - * on a newky attached tier subvol. Hence proceed and do mkdir - * on the tier subvol. - */ if (op_errno == EINVAL) { local->call_cnt = 1; dht_selfheal_dir_mkdir_lookup_done(frame, this); @@ -1330,9 +1326,11 @@ dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout, int ret = -1; dht_local_t *local = NULL; xlator_t *this = NULL; + dht_conf_t *conf = NULL; local = frame->local; this = frame->this; + conf = this->private; local->selfheal.force_mkdir = force; local->selfheal.hole_cnt = 0; @@ -1372,15 +1370,44 @@ dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout, return 0; } - if (local->hashed_subvol == NULL) - local->hashed_subvol = dht_subvol_get_hashed(this, loc); + /* MDS xattr is populated only while DHT is having more than one + subvol.In case of graph switch while adding more dht subvols need to + consider hash subvol as a MDS to avoid MDS check failure at the time + of running fop on directory + */ + if (!dict_get(local->xattr, conf->mds_xattr_key) && + (conf->subvolume_cnt > 1)) { + if (local->hashed_subvol == NULL) { + local->hashed_subvol = dht_subvol_get_hashed(this, loc); + if (local->hashed_subvol == NULL) { + local->op_errno = EINVAL; + gf_smsg(this->name, GF_LOG_WARNING, local->op_errno, + DHT_MSG_HASHED_SUBVOL_GET_FAILED, "gfid=%s", + loc->pargfid, "name=%s", loc->name, "path=%s", + loc->path, NULL); + goto err; + } + } + ret = dht_inode_ctx_mdsvol_set(local->inode, this, + local->hashed_subvol); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED, + "Failed to set hashed subvol for %s on inode vol is %s", + local->loc.path, + local->hashed_subvol ? local->hashed_subvol->name : "NULL"); + goto err; + } + } if (local->hashed_subvol == NULL) { - local->op_errno = EINVAL; - gf_smsg(this->name, GF_LOG_WARNING, local->op_errno, - DHT_MSG_HASHED_SUBVOL_GET_FAILED, "gfid=%s", loc->pargfid, - "name=%s", loc->name, "path=%s", loc->path, NULL); - goto err; + local->hashed_subvol = dht_subvol_get_hashed(this, loc); + if (local->hashed_subvol == NULL) { + local->op_errno = EINVAL; + gf_smsg(this->name, GF_LOG_WARNING, local->op_errno, + DHT_MSG_HASHED_SUBVOL_GET_FAILED, "gfid=%s", loc->pargfid, + "name=%s", loc->name, "path=%s", loc->path, NULL); + goto err; + } } local->current = &local->lock[0]; diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c index 811bb55925f..bb72b0ffbb5 100644 --- a/xlators/cluster/dht/src/dht-shared.c +++ b/xlators/cluster/dht/src/dht-shared.c @@ -140,9 +140,9 @@ dht_priv_dump(xlator_t *this) } } - if (conf->last_stat_fetch.tv_sec) + if (conf->last_stat_fetch) gf_proc_dump_write("last_stat_fetch", "%s", - ctime(&conf->last_stat_fetch.tv_sec)); + ctime(&conf->last_stat_fetch)); UNLOCK(&conf->subvolume_lock); @@ -537,6 +537,8 @@ gf_defrag_pattern_list_fill(xlator_t *this, gf_defrag_info_t *defrag, pattern_str = strtok_r(data, ",", &tmp_str); while (pattern_str) { dup_str = gf_strdup(pattern_str); + if (!dup_str) + goto out; pattern_list = GF_CALLOC(1, sizeof(gf_defrag_pattern_list_t), 1); if (!pattern_list) { goto out; @@ -596,7 +598,6 @@ dht_init_methods(xlator_t *this) methods = &(conf->methods); methods->migration_get_dst_subvol = dht_migration_get_dst_subvol; - methods->migration_needed = dht_migration_needed; methods->migration_other = NULL; methods->layout_search = dht_layout_search; @@ -700,10 +701,6 @@ dht_init(xlator_t *this) pthread_cond_init(&defrag->fc_wakeup_cond, 0); defrag->global_error = 0; - - defrag->is_pure_distribute = _gf_false; - - defrag->operate_dist = _gf_true; } conf->use_fallocate = 1; @@ -1049,84 +1046,6 @@ struct volume_options dht_options[] = { /* NUFA option */ {.key = {"local-volume-name"}, .type = GF_OPTION_TYPE_XLATOR}, - /* tier options */ - { - .key = {"tier-pause"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", - }, - - { - .key = {"tier-promote-frequency"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "120", - }, - - { - .key = {"tier-demote-frequency"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "3600", - }, - - { - .key = {"write-freq-threshold"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "0", - }, - - { - .key = {"read-freq-threshold"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "0", - }, - { - .key = {"watermark-hi"}, - .type = GF_OPTION_TYPE_PERCENT, - .default_value = "90", - }, - { - .key = {"watermark-low"}, - .type = GF_OPTION_TYPE_PERCENT, - .default_value = "75", - }, - { - .key = {"tier-mode"}, - .type = GF_OPTION_TYPE_STR, - .default_value = "test", - }, - { - .key = {"tier-compact"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", - }, - {.key = {"tier-hot-compact-frequency"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "604800", - .description = "Frequency to compact DBs on hot tier in system"}, - {.key = {"tier-cold-compact-frequency"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "604800", - .description = "Frequency to compact DBs on cold tier in system"}, - { - .key = {"tier-max-mb"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "4000", - }, - { - .key = {"tier-max-promote-file-size"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "0", - }, - { - .key = {"tier-max-files"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "10000", - }, - { - .key = {"tier-query-limit"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "100", - }, /* switch option */ {.key = {"pattern.switch.case"}, .type = GF_OPTION_TYPE_ANY}, diff --git a/xlators/cluster/dht/src/nufa.c b/xlators/cluster/dht/src/nufa.c index 59313639c45..3648a564840 100644 --- a/xlators/cluster/dht/src/nufa.c +++ b/xlators/cluster/dht/src/nufa.c @@ -595,7 +595,6 @@ nufa_init(xlator_t *this) dht_methods_t dht_methods = { .migration_get_dst_subvol = dht_migration_get_dst_subvol, - .migration_needed = dht_migration_needed, .layout_search = dht_layout_search, }; diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c index 9abdcec3f78..b955efd8c2d 100644 --- a/xlators/cluster/ec/src/ec-common.c +++ b/xlators/cluster/ec/src/ec-common.c @@ -316,17 +316,19 @@ ec_check_status(ec_fop_data_t *fop) } } - gf_msg(fop->xl->name, GF_LOG_WARNING, 0, EC_MSG_OP_FAIL_ON_SUBVOLS, - "Operation failed on %d of %d subvolumes.(up=%s, mask=%s, " - "remaining=%s, good=%s, bad=%s, %s)", - gf_bits_count(ec->xl_up & ~(fop->remaining | fop->good)), ec->nodes, - ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes), - ec_bin(str2, sizeof(str2), fop->mask, ec->nodes), - ec_bin(str3, sizeof(str3), fop->remaining, ec->nodes), - ec_bin(str4, sizeof(str4), fop->good, ec->nodes), - ec_bin(str5, sizeof(str5), ec->xl_up & ~(fop->remaining | fop->good), - ec->nodes), - ec_msg_str(fop)); + gf_msg( + fop->xl->name, GF_LOG_WARNING, 0, EC_MSG_OP_FAIL_ON_SUBVOLS, + "Operation failed on %d of %d subvolumes.(up=%s, mask=%s, " + "remaining=%s, good=%s, bad=%s," + "(Least significant bit represents first client/brick of subvol), %s)", + gf_bits_count(ec->xl_up & ~(fop->remaining | fop->good)), ec->nodes, + ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes), + ec_bin(str2, sizeof(str2), fop->mask, ec->nodes), + ec_bin(str3, sizeof(str3), fop->remaining, ec->nodes), + ec_bin(str4, sizeof(str4), fop->good, ec->nodes), + ec_bin(str5, sizeof(str5), ec->xl_up & ~(fop->remaining | fop->good), + ec->nodes), + ec_msg_str(fop)); if (fop->use_fd) { if (fop->fd != NULL) { ec_fheal(NULL, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, NULL, @@ -614,10 +616,10 @@ ec_msg_str(ec_fop_data_t *fop) loc_t *loc2 = NULL; char gfid1[64] = {0}; char gfid2[64] = {0}; + ec_fop_data_t *parent = fop->parent; if (fop->errstr) return fop->errstr; - if (!fop->use_fd) { loc1 = &fop->loc[0]; loc2 = &fop->loc[1]; @@ -625,23 +627,45 @@ ec_msg_str(ec_fop_data_t *fop) if (fop->id == GF_FOP_RENAME) { gf_asprintf(&fop->errstr, "FOP : '%s' failed on '%s' and '%s' with gfids " - "%s and %s respectively", + "%s and %s respectively. Parent FOP: %s", ec_fop_name(fop->id), loc1->path, loc2->path, uuid_utoa_r(loc1->gfid, gfid1), - uuid_utoa_r(loc2->gfid, gfid2)); + uuid_utoa_r(loc2->gfid, gfid2), + parent ? ec_fop_name(parent->id) : "No Parent"); } else { - gf_asprintf(&fop->errstr, "FOP : '%s' failed on '%s' with gfid %s", - ec_fop_name(fop->id), loc1->path, - uuid_utoa_r(loc1->gfid, gfid1)); + gf_asprintf( + &fop->errstr, + "FOP : '%s' failed on '%s' with gfid %s. Parent FOP: %s", + ec_fop_name(fop->id), loc1->path, + uuid_utoa_r(loc1->gfid, gfid1), + parent ? ec_fop_name(parent->id) : "No Parent"); } } else { - gf_asprintf(&fop->errstr, "FOP : '%s' failed on gfid %s", - ec_fop_name(fop->id), - uuid_utoa_r(fop->fd->inode->gfid, gfid1)); + gf_asprintf( + &fop->errstr, "FOP : '%s' failed on gfid %s. Parent FOP: %s", + ec_fop_name(fop->id), uuid_utoa_r(fop->fd->inode->gfid, gfid1), + parent ? ec_fop_name(parent->id) : "No Parent"); } return fop->errstr; } +static void +ec_log_insufficient_vol(ec_fop_data_t *fop, int32_t have, uint32_t need, + int32_t loglevel) +{ + ec_t *ec = fop->xl->private; + char str1[32], str2[32], str3[32]; + + gf_msg(ec->xl->name, loglevel, 0, EC_MSG_CHILDS_INSUFFICIENT, + "Insufficient available children for this request: " + "Have : %d, Need : %u : Child UP : %s " + "Mask: %s, Healing : %s : %s ", + have, need, ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes), + ec_bin(str2, sizeof(str2), fop->mask, ec->nodes), + ec_bin(str3, sizeof(str3), fop->healing, ec->nodes), + ec_msg_str(fop)); +} + static int32_t ec_child_select(ec_fop_data_t *fop) { @@ -699,11 +723,7 @@ ec_child_select(ec_fop_data_t *fop) ec_trace("SELECT", fop, ""); if ((num < fop->minimum) && (num < ec->fragments)) { - gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_CHILDS_INSUFFICIENT, - "Insufficient available children " - "for this request (have %d, need " - "%d). %s", - num, fop->minimum, ec_msg_str(fop)); + ec_log_insufficient_vol(fop, num, fop->minimum, GF_LOG_ERROR); return 0; } @@ -711,11 +731,7 @@ ec_child_select(ec_fop_data_t *fop) (fop->locks[0].update[EC_DATA_TXN] || fop->locks[0].update[EC_METADATA_TXN])) { if (ec->quorum_count && (num < ec->quorum_count)) { - gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_CHILDS_INSUFFICIENT, - "Insufficient available children " - "for this request (have %d, need " - "%d). %s", - num, ec->quorum_count, ec_msg_str(fop)); + ec_log_insufficient_vol(fop, num, ec->quorum_count, GF_LOG_ERROR); return 0; } } diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c index 89a433e5b91..7d991f04aac 100644 --- a/xlators/cluster/ec/src/ec-heal.c +++ b/xlators/cluster/ec/src/ec-heal.c @@ -2498,7 +2498,7 @@ out: } int -ec_heal_set_dirty_without_lock(call_frame_t *frame, ec_t *ec, inode_t *inode) +ec_heal_purge_stale_index(call_frame_t *frame, ec_t *ec, inode_t *inode) { int i = 0; int ret = 0; @@ -2528,7 +2528,6 @@ ec_heal_set_dirty_without_lock(call_frame_t *frame, ec_t *ec, inode_t *inode) xattr[i] = dict; on[i] = 1; } - dirty_xattr[EC_METADATA_TXN] = hton64(1); ret = dict_set_static_bin(dict, EC_XATTR_DIRTY, dirty_xattr, (sizeof(*dirty_xattr) * EC_VERSION_SIZE)); if (ret < 0) { @@ -2629,13 +2628,11 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) gf_msg(ec->xl->name, GF_LOG_INFO, 0, EC_MSG_HEAL_FAIL, "Index entry needs to be purged for: %s ", uuid_utoa(loc->gfid)); - /* We need to send xattrop to set dirty flag so that it can be - * healed and index entry could be removed. We need not to take lock - * on this entry to do so as we are just setting dirty flag which - * actually increases the trusted.ec.dirty count and does not set - * the new value. - * This will make sure that it is not interfering in other fops.*/ - ec_heal_set_dirty_without_lock(frame, ec, loc->inode); + /* We need to send zero-xattrop so that stale index entry could be + * removed. We need not take lock on this entry to do so as + * xattrop on a brick is atomic. */ + ec_heal_purge_stale_index(frame, ec, loc->inode); + goto out; } else if (need_heal == EC_HEAL_NONEED) { gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL, "Heal is not required for : %s ", uuid_utoa(loc->gfid)); diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c index 63fe5d34e38..5c1586bc9c5 100644 --- a/xlators/cluster/ec/src/ec-heald.c +++ b/xlators/cluster/ec/src/ec-heald.c @@ -62,7 +62,7 @@ __ec_shd_healer_wait(struct subvol_healer *healer) ec = healer->this->private; disabled_loop: - wait_till.tv_sec = time(NULL) + ec->shd.timeout; + wait_till.tv_sec = gf_time() + ec->shd.timeout; while (!healer->rerun) { ret = pthread_cond_timedwait(&healer->cond, &healer->mutex, &wait_till); @@ -156,15 +156,58 @@ ec_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name) return ret; } +static gf_boolean_t +ec_is_heal_completed(char *status) +{ + char *bad_pos = NULL; + char *zero_pos = NULL; + + if (!status) { + return _gf_false; + } + + /*Logic: + * Status will be of the form Good: <binary>, Bad: <binary> + * If heal completes, if we do strchr for '0' it should be present after + * 'Bad:' i.e. strRchr for ':' + * */ + + zero_pos = strchr(status, '0'); + bad_pos = strrchr(status, ':'); + if (!zero_pos || !bad_pos) { + /*malformed status*/ + return _gf_false; + } + + if (zero_pos > bad_pos) { + return _gf_true; + } + + return _gf_false; +} + int ec_shd_selfheal(struct subvol_healer *healer, int child, loc_t *loc, gf_boolean_t full) { dict_t *xdata = NULL; + dict_t *dict = NULL; uint32_t count; int32_t ret; + char *heal_status = NULL; + ec_t *ec = healer->this->private; + + GF_ATOMIC_INC(ec->stats.shd.attempted); + ret = syncop_getxattr(healer->this, loc, &dict, EC_XATTR_HEAL, NULL, + &xdata); + if (ret == 0) { + if (dict && (dict_get_str(dict, EC_XATTR_HEAL, &heal_status) == 0)) { + if (ec_is_heal_completed(heal_status)) { + GF_ATOMIC_INC(ec->stats.shd.completed); + } + } + } - ret = syncop_getxattr(healer->this, loc, NULL, EC_XATTR_HEAL, NULL, &xdata); if (!full && (loc->inode->ia_type == IA_IFDIR)) { /* If we have just healed a directory, it's possible that * other index entries have appeared to be healed. */ @@ -183,6 +226,10 @@ ec_shd_selfheal(struct subvol_healer *healer, int child, loc_t *loc, dict_unref(xdata); } + if (dict) { + dict_unref(dict); + } + return ret; } diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h index 90da328e641..de9b89bb2c9 100644 --- a/xlators/cluster/ec/src/ec-types.h +++ b/xlators/cluster/ec/src/ec-types.h @@ -626,6 +626,11 @@ struct _ec_statistics { requests. (Basically memory allocation errors). */ } stripe_cache; + struct { + gf_atomic_t attempted; /*Number of heals attempted on + files/directories*/ + gf_atomic_t completed; /*Number of heals complted on files/directories*/ + } shd; }; struct _ec { diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c index 66b4e634911..7344be4968d 100644 --- a/xlators/cluster/ec/src/ec.c +++ b/xlators/cluster/ec/src/ec.c @@ -325,13 +325,18 @@ ec_get_event_from_state(ec_t *ec) void ec_up(xlator_t *this, ec_t *ec) { + char str1[32], str2[32]; + if (ec->timer != NULL) { gf_timer_call_cancel(this->ctx, ec->timer); ec->timer = NULL; } ec->up = 1; - gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_UP, "Going UP"); + gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_UP, + "Going UP : Child UP = %s Child Notify = %s", + ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes), + ec_bin(str2, sizeof(str2), ec->xl_notify, ec->nodes)); gf_event(EVENT_EC_MIN_BRICKS_UP, "subvol=%s", this->name); } @@ -339,13 +344,18 @@ ec_up(xlator_t *this, ec_t *ec) void ec_down(xlator_t *this, ec_t *ec) { + char str1[32], str2[32]; + if (ec->timer != NULL) { gf_timer_call_cancel(this->ctx, ec->timer); ec->timer = NULL; } ec->up = 0; - gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_DOWN, "Going DOWN"); + gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_DOWN, + "Going DOWN : Child UP = %s Child Notify = %s", + ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes), + ec_bin(str2, sizeof(str2), ec->xl_notify, ec->nodes)); gf_event(EVENT_EC_MIN_BRICKS_NOT_UP, "subvol=%s", this->name); } @@ -700,6 +710,8 @@ ec_statistics_init(ec_t *ec) GF_ATOMIC_INIT(ec->stats.stripe_cache.evicts, 0); GF_ATOMIC_INIT(ec->stats.stripe_cache.allocs, 0); GF_ATOMIC_INIT(ec->stats.stripe_cache.errors, 0); + GF_ATOMIC_INIT(ec->stats.shd.attempted, 0); + GF_ATOMIC_INIT(ec->stats.shd.completed, 0); } static int @@ -1569,6 +1581,10 @@ ec_dump_private(xlator_t *this) GF_ATOMIC_GET(ec->stats.stripe_cache.allocs)); gf_proc_dump_write("errors", "%" GF_PRI_ATOMIC, GF_ATOMIC_GET(ec->stats.stripe_cache.errors)); + gf_proc_dump_write("heals-attempted", "%" GF_PRI_ATOMIC, + GF_ATOMIC_GET(ec->stats.shd.attempted)); + gf_proc_dump_write("heals-completed", "%" GF_PRI_ATOMIC, + GF_ATOMIC_GET(ec->stats.shd.completed)); return 0; } diff --git a/xlators/debug/error-gen/src/error-gen.c b/xlators/debug/error-gen/src/error-gen.c index 0158e8d8546..d45655ef4c3 100644 --- a/xlators/debug/error-gen/src/error-gen.c +++ b/xlators/debug/error-gen/src/error-gen.c @@ -1507,8 +1507,8 @@ init(xlator_t *this) this->private = pvt; - /* Give some seed value here */ - srand(time(NULL)); + /* Give some seed value here. */ + srand(gf_time()); ret = 0; out: diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c index 345dbe7e09c..aa00c446e5a 100644 --- a/xlators/debug/io-stats/src/io-stats.c +++ b/xlators/debug/io-stats/src/io-stats.c @@ -135,7 +135,7 @@ struct ios_global_stats { gf_atomic_t block_count_read[IOS_BLOCK_COUNT_SIZE]; gf_atomic_t fop_hits[GF_FOP_MAXVALUE]; gf_atomic_t upcall_hits[GF_UPCALL_FLAGS_MAXVALUE]; - struct timeval started_at; + time_t started_at; struct ios_lat latency[GF_FOP_MAXVALUE]; uint64_t nr_opens; uint64_t max_nr_opens; @@ -292,9 +292,7 @@ is_fop_latency_started(call_frame_t *frame) begin = &frame->begin; \ end = &frame->end; \ \ - elapsed = ((end->tv_sec - begin->tv_sec) * 1e9 + \ - (end->tv_nsec - begin->tv_nsec)) / \ - 1000; \ + elapsed = gf_tsdiff(begin, end) / 1000.0; \ throughput = op_ret / elapsed; \ \ conf = this->private; \ @@ -767,9 +765,8 @@ err: int io_stats_dump_global_to_json_logfp(xlator_t *this, - struct ios_global_stats *stats, - struct timeval *now, int interval, - FILE *logfp) + struct ios_global_stats *stats, time_t now, + int interval, FILE *logfp) { int i = 0; int j = 0; @@ -795,10 +792,7 @@ io_stats_dump_global_to_json_logfp(xlator_t *this, }; dict_t *xattr = NULL; - interval_sec = ((now->tv_sec * 1000000.0 + now->tv_usec) - - (stats->started_at.tv_sec * 1000000.0 + - stats->started_at.tv_usec)) / - 1000000.0; + interval_sec = (double)(now - stats->started_at); conf = this->private; @@ -950,8 +944,8 @@ io_stats_dump_global_to_json_logfp(xlator_t *this, } if (interval == -1) { - ios_log(this, logfp, "\"%s.%s.uptime\": %" PRId64 ",", key_prefix, - str_prefix, (uint64_t)(now->tv_sec - stats->started_at.tv_sec)); + ios_log(this, logfp, "\"%s.%s.uptime\": %" PRIu64 ",", key_prefix, + str_prefix, (uint64_t)(now - stats->started_at)); ios_log(this, logfp, "\"%s.%s.bytes_read\": " "%" GF_PRI_ATOMIC ",", @@ -1203,7 +1197,7 @@ out: int io_stats_dump_global_to_logfp(xlator_t *this, struct ios_global_stats *stats, - struct timeval *now, int interval, FILE *logfp) + time_t now, int interval, FILE *logfp) { int i = 0; int per_line = 0; @@ -1226,8 +1220,8 @@ io_stats_dump_global_to_logfp(xlator_t *this, struct ios_global_stats *stats, ios_log(this, logfp, "\n=== Cumulative stats ==="); else ios_log(this, logfp, "\n=== Interval %d stats ===", interval); - ios_log(this, logfp, " Duration : %" PRId64 " secs", - (uint64_t)(now->tv_sec - stats->started_at.tv_sec)); + ios_log(this, logfp, " Duration : %" PRIu64 " secs", + (uint64_t)(now - stats->started_at)); ios_log(this, logfp, " BytesRead : %" GF_PRI_ATOMIC, GF_ATOMIC_GET(stats->data_read)); ios_log(this, logfp, " BytesWritten : %" GF_PRI_ATOMIC "\n", @@ -1372,7 +1366,7 @@ io_stats_dump_global_to_logfp(xlator_t *this, struct ios_global_stats *stats, int io_stats_dump_global_to_dict(xlator_t *this, struct ios_global_stats *stats, - struct timeval *now, int interval, dict_t *dict) + time_t now, int interval, dict_t *dict) { int ret = 0; char key[64] = {0}; @@ -1398,7 +1392,7 @@ io_stats_dump_global_to_dict(xlator_t *this, struct ios_global_stats *stats, interval); snprintf(key, sizeof(key), "%d-duration", interval); - sec = (uint64_t)(now->tv_sec - stats->started_at.tv_sec); + sec = now - stats->started_at; ret = dict_set_uint64(dict, key, sec); if (ret) { gf_log(this->name, GF_LOG_ERROR, @@ -1521,9 +1515,8 @@ out: } int -io_stats_dump_global(xlator_t *this, struct ios_global_stats *stats, - struct timeval *now, int interval, - struct ios_dump_args *args) +io_stats_dump_global(xlator_t *this, struct ios_global_stats *stats, time_t now, + int interval, struct ios_dump_args *args) { int ret = -1; @@ -1581,13 +1574,13 @@ ios_dump_args_init(struct ios_dump_args *args, ios_dump_type_t type, } static void -ios_global_stats_clear(struct ios_global_stats *stats, struct timeval *now) +ios_global_stats_clear(struct ios_global_stats *stats, time_t now) { GF_ASSERT(stats); GF_ASSERT(now); memset(stats, 0, sizeof(*stats)); - stats->started_at = *now; + stats->started_at = now; } int @@ -1598,7 +1591,7 @@ io_stats_dump(xlator_t *this, struct ios_dump_args *args, ios_info_op_t op, struct ios_global_stats cumulative = {}; struct ios_global_stats incremental = {}; int increment = 0; - struct timeval now; + time_t now = 0; GF_ASSERT(this); GF_ASSERT(args); @@ -1606,8 +1599,8 @@ io_stats_dump(xlator_t *this, struct ios_dump_args *args, ios_info_op_t op, GF_ASSERT(args->type < IOS_DUMP_TYPE_MAX); conf = this->private; + now = gf_time(); - gettimeofday(&now, NULL); LOCK(&conf->lock); { if (op == GF_IOS_INFO_ALL || op == GF_IOS_INFO_CUMULATIVE) @@ -1620,17 +1613,17 @@ io_stats_dump(xlator_t *this, struct ios_dump_args *args, ios_info_op_t op, if (!is_peek) { increment = conf->increment++; - ios_global_stats_clear(&conf->incremental, &now); + ios_global_stats_clear(&conf->incremental, now); } } } UNLOCK(&conf->lock); if (op == GF_IOS_INFO_ALL || op == GF_IOS_INFO_CUMULATIVE) - io_stats_dump_global(this, &cumulative, &now, -1, args); + io_stats_dump_global(this, &cumulative, now, -1, args); if (op == GF_IOS_INFO_ALL || op == GF_IOS_INFO_INCREMENTAL) - io_stats_dump_global(this, &incremental, &now, increment, args); + io_stats_dump_global(this, &incremental, now, increment, args); return 0; } @@ -1640,9 +1633,8 @@ io_stats_dump_fd(xlator_t *this, struct ios_fd *iosfd) { struct ios_conf *conf = NULL; struct timeval now; - uint64_t sec = 0; - uint64_t usec = 0; int i = 0; + double usecs = 0; uint64_t data_read = 0; uint64_t data_written = 0; uint64_t block_count_read = 0; @@ -1657,23 +1649,15 @@ io_stats_dump_fd(xlator_t *this, struct ios_fd *iosfd) return 0; gettimeofday(&now, NULL); - - if (iosfd->opened_at.tv_usec > now.tv_usec) { - now.tv_usec += 1000000; - now.tv_usec--; - } - - sec = now.tv_sec - iosfd->opened_at.tv_sec; - usec = now.tv_usec - iosfd->opened_at.tv_usec; + usecs = gf_tvdiff(&iosfd->opened_at, &now); gf_log(this->name, GF_LOG_INFO, "--- fd stats ---"); if (iosfd->filename) gf_log(this->name, GF_LOG_INFO, " Filename : %s", iosfd->filename); - if (sec) - gf_log(this->name, GF_LOG_INFO, - " Lifetime : %" PRId64 "secs, %" PRId64 "usecs", sec, usec); + if (usecs) + gf_log(this->name, GF_LOG_INFO, " Lifetime : %lf secs", usecs); data_read = GF_ATOMIC_GET(iosfd->data_read); if (data_read) @@ -1776,9 +1760,7 @@ update_ios_latency(struct ios_conf *conf, call_frame_t *frame, begin = &frame->begin; end = &frame->end; - elapsed = ((end->tv_sec - begin->tv_sec) * 1e9 + - (end->tv_nsec - begin->tv_nsec)) / - 1000; + elapsed = gf_tsdiff(begin, end) / 1000.0; update_ios_latency_stats(&conf->cumulative, elapsed, op); update_ios_latency_stats(&conf->incremental, elapsed, op); @@ -3592,26 +3574,21 @@ ios_destroy_top_stats(struct ios_conf *conf) return; } -static int +static void io_stats_clear(struct ios_conf *conf) { - struct timeval now; - int ret = -1; + time_t now = 0; GF_ASSERT(conf); + now = gf_time(); - if (!gettimeofday(&now, NULL)) { - LOCK(&conf->lock); - { - ios_global_stats_clear(&conf->cumulative, &now); - ios_global_stats_clear(&conf->incremental, &now); - conf->increment = 0; - } - UNLOCK(&conf->lock); - ret = 0; + LOCK(&conf->lock); + { + ios_global_stats_clear(&conf->cumulative, now); + ios_global_stats_clear(&conf->incremental, now); + conf->increment = 0; } - - return ret; + UNLOCK(&conf->lock); } int32_t @@ -3852,7 +3829,7 @@ ios_conf_destroy(struct ios_conf *conf) _ios_destroy_dump_thread(conf); ios_destroy_sample_buf(conf->ios_sample_buf); LOCK_DESTROY(&conf->lock); - GF_FREE(conf->dnscache); + gf_dnscache_deinit(conf->dnscache); GF_FREE(conf); } @@ -3875,7 +3852,7 @@ ios_init_stats(struct ios_global_stats *stats) for (i = 0; i < GF_UPCALL_FLAGS_MAXVALUE; i++) GF_ATOMIC_INIT(stats->upcall_hits[i], 0); - gettimeofday(&stats->started_at, NULL); + stats->started_at = gf_time(); } int @@ -3964,11 +3941,14 @@ init(xlator_t *this) gf_log(this->name, GF_LOG_ERROR, "Out of memory."); goto out; } - ret = -1; GF_OPTION_INIT("ios-dnscache-ttl-sec", conf->ios_dnscache_ttl_sec, int32, out); conf->dnscache = gf_dnscache_init(conf->ios_dnscache_ttl_sec); + if (!conf->dnscache) { + ret = -1; + goto out; + } GF_OPTION_INIT("sys-log-level", sys_log_str, str, out); if (sys_log_str) { @@ -4119,12 +4099,9 @@ notify(xlator_t *this, int32_t event, void *data, ...) } if (GF_IOS_INFO_CLEAR == op) { - ret = io_stats_clear(this->private); - if (ret) - gf_log(this->name, GF_LOG_ERROR, - "Failed to clear info stats"); + io_stats_clear(this->private); - ret = dict_set_int32(output, "stats-cleared", ret ? 0 : 1); + ret = dict_set_int32(output, "stats-cleared", 1); if (ret) gf_log(this->name, GF_LOG_ERROR, "Failed to set stats-cleared" diff --git a/xlators/features/Makefile.am b/xlators/features/Makefile.am index 194634b003d..c57897f11ea 100644 --- a/xlators/features/Makefile.am +++ b/xlators/features/Makefile.am @@ -2,9 +2,13 @@ if BUILD_CLOUDSYNC CLOUDSYNC_DIR = cloudsync endif +if BUILD_METADISP + METADISP_DIR = metadisp +endif + SUBDIRS = locks quota read-only quiesce marker index barrier arbiter upcall \ compress changelog gfid-access snapview-client snapview-server trash \ shard bit-rot leases selinux sdfs namespace $(CLOUDSYNC_DIR) thin-arbiter \ - utime + utime $(METADISP_DIR) CLEANFILES = diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c index 34e20f9df11..5cef2ffa5e5 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c @@ -40,21 +40,21 @@ br_inc_scrubbed_file(br_scrub_stats_t *scrub_stat) } void -br_update_scrub_start_time(br_scrub_stats_t *scrub_stat, struct timeval *tv) +br_update_scrub_start_time(br_scrub_stats_t *scrub_stat, time_t time) { if (!scrub_stat) return; pthread_mutex_lock(&scrub_stat->lock); { - scrub_stat->scrub_start_tv.tv_sec = tv->tv_sec; + scrub_stat->scrub_start_time = time; } pthread_mutex_unlock(&scrub_stat->lock); } void br_update_scrub_finish_time(br_scrub_stats_t *scrub_stat, char *timestr, - struct timeval *tv) + time_t time) { int lst_size = 0; @@ -67,10 +67,10 @@ br_update_scrub_finish_time(br_scrub_stats_t *scrub_stat, char *timestr, pthread_mutex_lock(&scrub_stat->lock); { - scrub_stat->scrub_end_tv.tv_sec = tv->tv_sec; + scrub_stat->scrub_end_time = time; - scrub_stat->scrub_duration = scrub_stat->scrub_end_tv.tv_sec - - scrub_stat->scrub_start_tv.tv_sec; + scrub_stat->scrub_duration = scrub_stat->scrub_end_time - + scrub_stat->scrub_start_time; snprintf(scrub_stat->last_scrub_time, lst_size, "%s", timestr); } diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h index 24128b90a66..f022aa831eb 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h +++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h @@ -15,20 +15,22 @@ #include <sys/time.h> #include <pthread.h> +#include <glusterfs/common-utils.h> + struct br_scrub_stats { - uint64_t scrubbed_files; /* Total number of scrubbed file */ + uint64_t scrubbed_files; /* Total number of scrubbed files. */ - uint64_t unsigned_files; /* Total number of unsigned file */ + uint64_t unsigned_files; /* Total number of unsigned files. */ - uint64_t scrub_duration; /* Duration of last scrub */ + uint64_t scrub_duration; /* Duration of last scrub. */ - char last_scrub_time[1024]; /*last scrub completion time */ + char last_scrub_time[GF_TIMESTR_SIZE]; /* Last scrub completion time. */ - struct timeval scrub_start_tv; /* Scrubbing starting time*/ + time_t scrub_start_time; /* Scrubbing starting time. */ - struct timeval scrub_end_tv; /* Scrubbing finishing time */ + time_t scrub_end_time; /* Scrubbing finishing time. */ - int8_t scrub_running; /* Scrub running or not */ + int8_t scrub_running; /* Whether scrub running or not. */ pthread_mutex_t lock; }; @@ -40,9 +42,9 @@ br_inc_unsigned_file_count(br_scrub_stats_t *scrub_stat); void br_inc_scrubbed_file(br_scrub_stats_t *scrub_stat); void -br_update_scrub_start_time(br_scrub_stats_t *scrub_stat, struct timeval *tv); +br_update_scrub_start_time(br_scrub_stats_t *scrub_stat, time_t time); void br_update_scrub_finish_time(br_scrub_stats_t *scrub_stat, char *timestr, - struct timeval *tv); + time_t time); #endif /* __BIT_ROT_SCRUB_STATUS_H__ */ diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c index c4607654365..289dd53f610 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c @@ -604,22 +604,20 @@ br_scrubber_log_time(xlator_t *this, const char *sfx) char timestr[GF_TIMESTR_SIZE] = { 0, }; - struct timeval tv = { - 0, - }; br_private_t *priv = NULL; + time_t now = 0; + now = gf_time(); priv = this->private; - gettimeofday(&tv, NULL); - gf_time_fmt(timestr, sizeof(timestr), tv.tv_sec, gf_timefmt_FT); + gf_time_fmt(timestr, sizeof(timestr), now, gf_timefmt_FT); if (strcasecmp(sfx, "started") == 0) { - br_update_scrub_start_time(&priv->scrub_stat, &tv); + br_update_scrub_start_time(&priv->scrub_stat, now); gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_START, "Scrubbing %s at %s", sfx, timestr); } else { - br_update_scrub_finish_time(&priv->scrub_stat, timestr, &tv); + br_update_scrub_finish_time(&priv->scrub_stat, timestr, now); gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_FINISH, "Scrubbing %s at %s", sfx, timestr); } @@ -631,12 +629,10 @@ br_fsscanner_log_time(xlator_t *this, br_child_t *child, const char *sfx) char timestr[GF_TIMESTR_SIZE] = { 0, }; - struct timeval tv = { - 0, - }; + time_t now = 0; - gettimeofday(&tv, NULL); - gf_time_fmt(timestr, sizeof(timestr), tv.tv_sec, gf_timefmt_FT); + now = gf_time(); + gf_time_fmt(timestr, sizeof(timestr), now, gf_timefmt_FT); if (strcasecmp(sfx, "started") == 0) { gf_msg_debug(this->name, 0, "Scrubbing \"%s\" %s at %s", @@ -919,9 +915,6 @@ br_fsscan_schedule(xlator_t *this) { uint32_t timo = 0; br_private_t *priv = NULL; - struct timeval tv = { - 0, - }; char timestr[GF_TIMESTR_SIZE] = { 0, }; @@ -933,8 +926,7 @@ br_fsscan_schedule(xlator_t *this) fsscrub = &priv->fsscrub; scrub_monitor = &priv->scrub_monitor; - (void)gettimeofday(&tv, NULL); - scrub_monitor->boot = tv.tv_sec; + scrub_monitor->boot = gf_time(); timo = br_fsscan_calculate_timeout(fsscrub->frequency); if (timo == 0) { @@ -978,9 +970,7 @@ br_fsscan_activate(xlator_t *this) char timestr[GF_TIMESTR_SIZE] = { 0, }; - struct timeval now = { - 0, - }; + time_t now = 0; br_private_t *priv = NULL; struct br_scrubber *fsscrub = NULL; struct br_monitor *scrub_monitor = NULL; @@ -989,7 +979,7 @@ br_fsscan_activate(xlator_t *this) fsscrub = &priv->fsscrub; scrub_monitor = &priv->scrub_monitor; - (void)gettimeofday(&now, NULL); + now = gf_time(); timo = br_fsscan_calculate_timeout(fsscrub->frequency); if (timo == 0) { gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_ZERO_TIMEOUT_BUG, @@ -1003,7 +993,7 @@ br_fsscan_activate(xlator_t *this) } pthread_mutex_unlock(&scrub_monitor->donelock); - gf_time_fmt(timestr, sizeof(timestr), (now.tv_sec + timo), gf_timefmt_FT); + gf_time_fmt(timestr, sizeof(timestr), now + timo, gf_timefmt_FT); (void)gf_tw_mod_timer(priv->timer_wheel, scrub_monitor->timer, timo); _br_monitor_set_scrub_state(scrub_monitor, BR_SCRUB_STATE_PENDING); @@ -1023,9 +1013,7 @@ br_fsscan_reschedule(xlator_t *this) char timestr[GF_TIMESTR_SIZE] = { 0, }; - struct timeval now = { - 0, - }; + time_t now = 0; br_private_t *priv = NULL; struct br_scrubber *fsscrub = NULL; struct br_monitor *scrub_monitor = NULL; @@ -1037,7 +1025,7 @@ br_fsscan_reschedule(xlator_t *this) if (!fsscrub->frequency_reconf) return 0; - (void)gettimeofday(&now, NULL); + now = gf_time(); timo = br_fsscan_calculate_timeout(fsscrub->frequency); if (timo == 0) { gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_ZERO_TIMEOUT_BUG, @@ -1045,7 +1033,7 @@ br_fsscan_reschedule(xlator_t *this) return -1; } - gf_time_fmt(timestr, sizeof(timestr), (now.tv_sec + timo), gf_timefmt_FT); + gf_time_fmt(timestr, sizeof(timestr), now + timo, gf_timefmt_FT); pthread_mutex_lock(&scrub_monitor->donelock); { @@ -1076,20 +1064,16 @@ br_fsscan_ondemand(xlator_t *this) char timestr[GF_TIMESTR_SIZE] = { 0, }; - struct timeval now = { - 0, - }; + time_t now = 0; br_private_t *priv = NULL; struct br_monitor *scrub_monitor = NULL; priv = this->private; scrub_monitor = &priv->scrub_monitor; - (void)gettimeofday(&now, NULL); - + now = gf_time(); timo = BR_SCRUB_ONDEMAND; - - gf_time_fmt(timestr, sizeof(timestr), (now.tv_sec + timo), gf_timefmt_FT); + gf_time_fmt(timestr, sizeof(timestr), now + timo, gf_timefmt_FT); pthread_mutex_lock(&scrub_monitor->donelock); { diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h index 8d2b7f051da..6c15a166f18 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h +++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h @@ -44,7 +44,8 @@ GLFS_MSGID(BITROT_STUB, BRS_MSG_NO_MEMORY, BRS_MSG_SET_EVENT_FAILED, BRS_MSG_NON_BITD_PID, BRS_MSG_SIGN_PREPARE_FAIL, BRS_MSG_USING_DEFAULT_THREAD_SIZE, BRS_MSG_ALLOC_MEM_FAILED, BRS_MSG_DICT_ALLOC_FAILED, BRS_MSG_CREATE_GF_DIRENT_FAILED, - BRS_MSG_ALLOC_FAILED, BRS_MSG_PATH_XATTR_GET_FAILED); + BRS_MSG_ALLOC_FAILED, BRS_MSG_PATH_XATTR_GET_FAILED, + BRS_MSG_VERSION_PREPARE_FAIL); #define BRS_MSG_MEM_ACNT_FAILED_STR "Memory accounting init failed" #define BRS_MSG_BAD_OBJ_THREAD_FAIL_STR "pthread_init failed" @@ -68,6 +69,8 @@ GLFS_MSGID(BITROT_STUB, BRS_MSG_NO_MEMORY, BRS_MSG_SET_EVENT_FAILED, "daemon. Unwinding the fop" #define BRS_MSG_SIGN_PREPARE_FAIL_STR \ "failed to prepare the signature. Unwinding the fop" +#define BRS_MSG_VERSION_PREPARE_FAIL_STR \ + "failed to prepare the version. Unwinding the fop" #define BRS_MSG_STUB_ALLOC_FAILED_STR "failed to allocate stub fop, Unwinding" #define BRS_MSG_BAD_OBJ_MARK_FAIL_STR "failed to mark object as bad" #define BRS_MSG_NON_SCRUB_BAD_OBJ_MARK_STR \ diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c index 605a5e4c3e4..447dd47ff41 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c +++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c @@ -424,8 +424,8 @@ br_stub_prepare_version_request(xlator_t *this, dict_t *dict, priv = this->private; br_set_ongoingversion(obuf, oversion, priv->boot); - return dict_set_static_bin(dict, BITROT_CURRENT_VERSION_KEY, (void *)obuf, - sizeof(br_version_t)); + return dict_set_bin(dict, BITROT_CURRENT_VERSION_KEY, (void *)obuf, + sizeof(br_version_t)); } static int @@ -436,8 +436,7 @@ br_stub_prepare_signing_request(dict_t *dict, br_signature_t *sbuf, br_set_signature(sbuf, sign, signaturelen, &size); - return dict_set_static_bin(dict, BITROT_SIGNING_VERSION_KEY, (void *)sbuf, - size); + return dict_set_bin(dict, BITROT_SIGNING_VERSION_KEY, (void *)sbuf, size); } /** @@ -854,23 +853,27 @@ br_stub_perform_incversioning(xlator_t *this, call_frame_t *frame, op_errno = ENOMEM; dict = dict_new(); if (!dict) - goto done; + goto out; ret = br_stub_alloc_versions(&obuf, NULL, 0); - if (ret) - goto dealloc_dict; + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ALLOC_MEM_FAILED, + "gfid=%s", uuid_utoa(fd->inode->gfid), NULL); + goto out; + } ret = br_stub_prepare_version_request(this, dict, obuf, writeback_version); - if (ret) - goto dealloc_versions; + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_VERSION_PREPARE_FAIL, + "gfid=%s", uuid_utoa(fd->inode->gfid), NULL); + br_stub_dealloc_versions(obuf); + goto out; + } ret = br_stub_fd_versioning( this, frame, stub, dict, fd, br_stub_fd_incversioning_cbk, writeback_version, BR_STUB_INCREMENTAL_VERSIONING, !WRITEBACK_DURABLE); - -dealloc_versions: - br_stub_dealloc_versions(obuf); -dealloc_dict: - dict_unref(dict); -done: +out: + if (dict) + dict_unref(dict); if (ret) { if (local) frame->local = NULL; @@ -1025,31 +1028,36 @@ static int br_stub_prepare_signature(xlator_t *this, dict_t *dict, inode_t *inode, br_isignature_t *sign, int *fakesuccess) { - int32_t ret = 0; + int32_t ret = -1; size_t signaturelen = 0; br_signature_t *sbuf = NULL; if (!br_is_signature_type_valid(sign->signaturetype)) - goto error_return; + goto out; signaturelen = sign->signaturelen; ret = br_stub_alloc_versions(NULL, &sbuf, signaturelen); - if (ret) - goto error_return; + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ALLOC_MEM_FAILED, + "gfid=%s", uuid_utoa(inode->gfid), NULL); + ret = -1; + goto out; + } ret = br_stub_prepare_signing_request(dict, sbuf, sign, signaturelen); - if (ret) - goto dealloc_versions; + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_SIGN_PREPARE_FAIL, + "gfid=%s", uuid_utoa(inode->gfid), NULL); + ret = -1; + br_stub_dealloc_versions(sbuf); + goto out; + } + /* At this point sbuf has been added to dict, so the memory will be freed + * when the data from the dict is destroyed + */ ret = br_stub_compare_sign_version(this, inode, sbuf, dict, fakesuccess); - if (ret) - goto dealloc_versions; - - return 0; - -dealloc_versions: - br_stub_dealloc_versions(sbuf); -error_return: - return -1; +out: + return ret; } static void diff --git a/xlators/features/changelog/src/changelog-helpers.c b/xlators/features/changelog/src/changelog-helpers.c index 71fe1f032a0..e561997d858 100644 --- a/xlators/features/changelog/src/changelog-helpers.c +++ b/xlators/features/changelog/src/changelog-helpers.c @@ -242,8 +242,7 @@ changelog_write(int fd, char *buffer, size_t len) } int -htime_update(xlator_t *this, changelog_priv_t *priv, unsigned long ts, - char *buffer) +htime_update(xlator_t *this, changelog_priv_t *priv, time_t ts, char *buffer) { char changelog_path[PATH_MAX + 1] = { 0, @@ -273,7 +272,7 @@ htime_update(xlator_t *this, changelog_priv_t *priv, unsigned long ts, goto out; } - len = snprintf(x_value, sizeof(x_value), "%lu:%d", ts, + len = snprintf(x_value, sizeof(x_value), "%ld:%d", ts, priv->rollover_count); if (len >= sizeof(x_value)) { ret = -1; @@ -382,8 +381,7 @@ out: } static int -changelog_rollover_changelog(xlator_t *this, changelog_priv_t *priv, - unsigned long ts) +changelog_rollover_changelog(xlator_t *this, changelog_priv_t *priv, time_t ts) { int ret = -1; int notify = 0; @@ -421,16 +419,14 @@ changelog_rollover_changelog(xlator_t *this, changelog_priv_t *priv, priv->changelog_fd = -1; } - time_t time = (time_t)ts; - - /* Get GMT time */ - gmt = gmtime(&time); + /* Get GMT time. */ + gmt = gmtime(&ts); strftime(yyyymmdd, sizeof(yyyymmdd), "%Y/%m/%d", gmt); (void)snprintf(ofile, PATH_MAX, "%s/" CHANGELOG_FILE_NAME, priv->changelog_dir); - (void)snprintf(nfile, PATH_MAX, "%s/%s/" CHANGELOG_FILE_NAME ".%lu", + (void)snprintf(nfile, PATH_MAX, "%s/%s/" CHANGELOG_FILE_NAME ".%ld", priv->changelog_dir, yyyymmdd, ts); (void)snprintf(nfile_dir, PATH_MAX, "%s/%s", priv->changelog_dir, yyyymmdd); @@ -593,7 +589,7 @@ out: * returns -1 on failure or error */ int -htime_open(xlator_t *this, changelog_priv_t *priv, unsigned long ts) +htime_open(xlator_t *this, changelog_priv_t *priv, time_t ts) { int ht_file_fd = -1; int ht_dir_fd = -1; @@ -723,7 +719,7 @@ out: * returns -1 on failure or error */ int -htime_create(xlator_t *this, changelog_priv_t *priv, unsigned long ts) +htime_create(xlator_t *this, changelog_priv_t *priv, time_t ts) { int ht_file_fd = -1; int ht_dir_fd = -1; @@ -741,12 +737,12 @@ htime_create(xlator_t *this, changelog_priv_t *priv, unsigned long ts) int32_t len = 0; gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_NEW_HTIME_FILE, - "name=%lu", ts, NULL); + "name=%ld", ts, NULL); CHANGELOG_FILL_HTIME_DIR(priv->changelog_dir, ht_dir_path); /* get the htime file name in ht_file_path */ - len = snprintf(ht_file_path, PATH_MAX, "%s/%s.%lu", ht_dir_path, + len = snprintf(ht_file_path, PATH_MAX, "%s/%s.%ld", ht_dir_path, HTIME_FILE_NAME, ts); if ((len < 0) || (len >= PATH_MAX)) { ret = -1; @@ -792,7 +788,7 @@ htime_create(xlator_t *this, changelog_priv_t *priv, unsigned long ts) goto out; } - (void)snprintf(ht_file_bname, sizeof(ht_file_bname), "%s.%lu", + (void)snprintf(ht_file_bname, sizeof(ht_file_bname), "%s.%ld", HTIME_FILE_NAME, ts); if (sys_fsetxattr(ht_dir_fd, HTIME_CURRENT, ht_file_bname, strlen(ht_file_bname), 0)) { @@ -963,8 +959,8 @@ out: } int -changelog_start_next_change(xlator_t *this, changelog_priv_t *priv, - unsigned long ts, gf_boolean_t finale) +changelog_start_next_change(xlator_t *this, changelog_priv_t *priv, time_t ts, + gf_boolean_t finale) { int ret = -1; @@ -985,21 +981,12 @@ changelog_entry_length() return sizeof(changelog_log_data_t); } -int +void changelog_fill_rollover_data(changelog_log_data_t *cld, gf_boolean_t is_last) { - struct timeval tv = { - 0, - }; - cld->cld_type = CHANGELOG_TYPE_ROLLOVER; - - if (gettimeofday(&tv, NULL)) - return -1; - - cld->cld_roll_time = (unsigned long)tv.tv_sec; + cld->cld_roll_time = gf_time(); cld->cld_finale = is_last; - return 0; } int @@ -1274,7 +1261,7 @@ changelog_rollover(void *data) while (1) { (void)pthread_testcancel(); - tv.tv_sec = time(NULL) + priv->rollover_time; + tv.tv_sec = gf_time() + priv->rollover_time; tv.tv_nsec = 0; ret = 0; /* Reset ret to zero */ @@ -1355,12 +1342,7 @@ changelog_rollover(void *data) if (priv->explicit_rollover == _gf_true) sleep(1); - ret = changelog_fill_rollover_data(&cld, _gf_false); - if (ret) { - gf_smsg(this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_ROLLOVER_DATA_FILL_FAILED, NULL); - continue; - } + changelog_fill_rollover_data(&cld, _gf_false); _mask_cancellation(); diff --git a/xlators/features/changelog/src/changelog-helpers.h b/xlators/features/changelog/src/changelog-helpers.h index 0d06d98c9e1..38fa7590c32 100644 --- a/xlators/features/changelog/src/changelog-helpers.h +++ b/xlators/features/changelog/src/changelog-helpers.h @@ -31,7 +31,7 @@ */ typedef struct changelog_log_data { /* rollover related */ - unsigned long cld_roll_time; + time_t cld_roll_time; /* reopen changelog? */ gf_boolean_t cld_finale; @@ -97,12 +97,6 @@ struct changelog_encoder { typedef struct changelog_time_slice { /** - * just in case we need nanosecond granularity some day. - * field is unused as of now (maybe we'd need it later). - */ - struct timeval tv_start; - - /** * version of changelog file, incremented each time changes * rollover. */ @@ -423,11 +417,11 @@ changelog_local_t * changelog_local_init(xlator_t *this, inode_t *inode, uuid_t gfid, int xtra_records, gf_boolean_t update_flag); int -changelog_start_next_change(xlator_t *this, changelog_priv_t *priv, - unsigned long ts, gf_boolean_t finale); +changelog_start_next_change(xlator_t *this, changelog_priv_t *priv, time_t ts, + gf_boolean_t finale); int changelog_open_journal(xlator_t *this, changelog_priv_t *priv); -int +void changelog_fill_rollover_data(changelog_log_data_t *cld, gf_boolean_t is_last); int changelog_inject_single_event(xlator_t *this, changelog_priv_t *priv, @@ -451,12 +445,11 @@ changelog_fsync_thread(void *data); int changelog_forget(xlator_t *this, inode_t *inode); int -htime_update(xlator_t *this, changelog_priv_t *priv, unsigned long ts, - char *buffer); +htime_update(xlator_t *this, changelog_priv_t *priv, time_t ts, char *buffer); int -htime_open(xlator_t *this, changelog_priv_t *priv, unsigned long ts); +htime_open(xlator_t *this, changelog_priv_t *priv, time_t ts); int -htime_create(xlator_t *this, changelog_priv_t *priv, unsigned long ts); +htime_create(xlator_t *this, changelog_priv_t *priv, time_t ts); /* Geo-Rep snapshot dependency changes */ void diff --git a/xlators/features/changelog/src/changelog-messages.h b/xlators/features/changelog/src/changelog-messages.h index 4dd56b8ee97..cb0e16c85d8 100644 --- a/xlators/features/changelog/src/changelog-messages.h +++ b/xlators/features/changelog/src/changelog-messages.h @@ -59,12 +59,12 @@ GLFS_MSGID( CHANGELOG_MSG_NO_HTIME_CURRENT, CHANGELOG_MSG_HTIME_CURRENT, CHANGELOG_MSG_NEW_HTIME_FILE, CHANGELOG_MSG_MKDIR_ERROR, CHANGELOG_MSG_PATH_NOT_FOUND, CHANGELOG_MSG_XATTR_INIT_FAILED, - CHANGELOG_MSG_WROTE_TO_CSNAP, CHANGELOG_MSG_ROLLOVER_DATA_FILL_FAILED, + CHANGELOG_MSG_WROTE_TO_CSNAP, CHANGELOG_MSG_UNUSED_0, CHANGELOG_MSG_GET_BUFFER_FAILED, CHANGELOG_MSG_BARRIER_STATE_NOTIFY, CHANGELOG_MSG_BARRIER_DISABLED, CHANGELOG_MSG_BARRIER_ALREADY_DISABLED, CHANGELOG_MSG_BARRIER_ON_ERROR, CHANGELOG_MSG_BARRIER_ENABLE, CHANGELOG_MSG_BARRIER_KEY_NOT_FOUND, CHANGELOG_MSG_ERROR_IN_DICT_GET, - CHANGELOG_MSG_GET_TIME_FAILURE, CHANGELOG_MSG_HTIME_FETCH_FAILED, + CHANGELOG_MSG_UNUSED_1, CHANGELOG_MSG_UNUSED_2, CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS, CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS_FINISHED, CHANGELOG_MSG_BARRIER_TIMEOUT, CHANGELOG_MSG_TIMEOUT_ADD_FAILED, @@ -123,8 +123,6 @@ GLFS_MSGID( #define CHANGELOG_MSG_GET_TIME_OP_FAILED_STR "Problem rolling over changelog(s)" #define CHANGELOG_MSG_BARRIER_INFO_STR "Explicit wakeup on barrier notify" #define CHANGELOG_MSG_SELECT_FAILED_STR "pthread_cond_timedwait failed" -#define CHANGELOG_MSG_ROLLOVER_DATA_FILL_FAILED_STR \ - "failed to fill rollover data" #define CHANGELOG_MSG_INJECT_FSYNC_FAILED_STR "failed to inject fsync event" #define CHANGELOG_MSG_LOCAL_INIT_FAILED_STR \ "changelog local initialization failed" @@ -144,9 +142,7 @@ GLFS_MSGID( #define CHANGELOG_MSG_BARRIER_KEY_NOT_FOUND_STR "barrier key not found" #define CHANGELOG_MSG_ERROR_IN_DICT_GET_STR \ "Something went wrong in dict_get_str_boolean" -#define CHANGELOG_MSG_GET_TIME_FAILURE_STR "gettimeofday() failure" #define CHANGELOG_MSG_DIR_OPTIONS_NOT_SET_STR "changelog-dir option is not set" -#define CHANGELOG_MSG_HTIME_FETCH_FAILED_STR "unable to fetch htime" #define CHANGELOG_MSG_FREEUP_FAILED_STR "could not cleanup bootstrapper" #define CHANGELOG_MSG_CHILD_MISCONFIGURED_STR \ "translator needs a single subvolume" diff --git a/xlators/features/changelog/src/changelog.c b/xlators/features/changelog/src/changelog.c index 9a7d158cbf2..6a6e5af859e 100644 --- a/xlators/features/changelog/src/changelog.c +++ b/xlators/features/changelog/src/changelog.c @@ -2252,23 +2252,11 @@ static int changelog_init(xlator_t *this, changelog_priv_t *priv) { int i = 0; - int ret = -1; - struct timeval tv = { - 0, - }; + int ret = 0; changelog_log_data_t cld = { 0, }; - ret = gettimeofday(&tv, NULL); - if (ret) { - gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_GET_TIME_FAILURE, - NULL); - goto out; - } - - priv->slice.tv_start = tv; - priv->maps[CHANGELOG_TYPE_DATA] = "D "; priv->maps[CHANGELOG_TYPE_METADATA] = "M "; priv->maps[CHANGELOG_TYPE_METADATA_XATTR] = "M "; @@ -2287,9 +2275,7 @@ changelog_init(xlator_t *this, changelog_priv_t *priv) * in case there was an encoding change. so... things are kept * simple here. */ - ret = changelog_fill_rollover_data(&cld, _gf_false); - if (ret) - goto out; + changelog_fill_rollover_data(&cld, _gf_false); ret = htime_open(this, priv, cld.cld_roll_time); /* call htime open with cld's rollover_time */ @@ -2470,9 +2456,6 @@ reconfigure(xlator_t *this, dict_t *options) char csnap_dir[PATH_MAX] = { 0, }; - struct timeval tv = { - 0, - }; uint32_t timeout = 0; priv = this->private; @@ -2564,9 +2547,7 @@ reconfigure(xlator_t *this, dict_t *options) out); if (active_now || active_earlier) { - ret = changelog_fill_rollover_data(&cld, !active_now); - if (ret) - goto out; + changelog_fill_rollover_data(&cld, !active_now); slice = &priv->slice; @@ -2585,13 +2566,7 @@ reconfigure(xlator_t *this, dict_t *options) if (!active_earlier) { gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_RECONFIGURE, NULL); - if (gettimeofday(&tv, NULL)) { - gf_smsg(this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_HTIME_FETCH_FAILED, NULL); - ret = -1; - goto out; - } - htime_create(this, priv, tv.tv_sec); + htime_create(this, priv, gf_time()); } ret = changelog_spawn_helper_threads(this, priv); } diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c index 7680260988b..23c3599825a 100644 --- a/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c +++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c @@ -237,7 +237,7 @@ aws_form_request(char *resource, char **date, char *reqtype, char *bucketid, int date_len = -1; int res_len = -1; - ctime = time(NULL); + ctime = gf_time(); gtime = gmtime(&ctime); date_len = strftime(httpdate, sizeof(httpdate), diff --git a/xlators/features/index/src/index.c b/xlators/features/index/src/index.c index 4ece7ff6fc8..4abb2c73ce5 100644 --- a/xlators/features/index/src/index.c +++ b/xlators/features/index/src/index.c @@ -2104,7 +2104,7 @@ index_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) worker_enqueue(this, stub); return 0; normal: - ret = dict_get_str(xattr_req, "link-count", &flag); + ret = dict_get_str_sizen(xattr_req, "link-count", &flag); if ((ret == 0) && (strcmp(flag, GF_XATTROP_INDEX_COUNT) == 0)) { STACK_WIND(frame, index_lookup_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, loc, xattr_req); @@ -2592,7 +2592,7 @@ notify(xlator_t *this, int event, void *data, ...) if ((event == GF_EVENT_PARENT_DOWN) && victim->cleanup_starting) { stub_cnt = GF_ATOMIC_GET(priv->stub_cnt); - clock_gettime(CLOCK_REALTIME, &sleep_till); + timespec_now_realtime(&sleep_till); sleep_till.tv_sec += 1; /* Wait for draining stub from queue before notify PARENT_DOWN */ diff --git a/xlators/features/leases/src/leases-internal.c b/xlators/features/leases/src/leases-internal.c index 67fdd53cee2..56dee244281 100644 --- a/xlators/features/leases/src/leases-internal.c +++ b/xlators/features/leases/src/leases-internal.c @@ -897,7 +897,7 @@ __recall_lease(xlator_t *this, lease_inode_ctx_t *lease_ctx) } priv = this->private; - recall_time = time(NULL); + recall_time = gf_time(); list_for_each_entry_safe(lease_entry, tmp, &lease_ctx->lease_id_list, lease_id_list) { @@ -1367,7 +1367,7 @@ expired_recall_cleanup(void *data) gf_msg_debug(this->name, 0, "Started the expired_recall_cleanup thread"); while (1) { - time_now = time(NULL); + time_now = gf_time(); pthread_mutex_lock(&priv->mutex); { if (priv->fini) { diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c index 1fbbae3541f..a2c6be93e03 100644 --- a/xlators/features/locks/src/common.c +++ b/xlators/features/locks/src/common.c @@ -605,13 +605,11 @@ static void __insert_lock(pl_inode_t *pl_inode, posix_lock_t *lock) { if (lock->blocked) - gettimeofday(&lock->blkd_time, NULL); + lock->blkd_time = gf_time(); else - gettimeofday(&lock->granted_time, NULL); + lock->granted_time = gf_time(); list_add_tail(&lock->list, &pl_inode->ext_list); - - return; } /* Return true if the locks overlap, false otherwise */ diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c index d5babcc325c..fd772c850dd 100644 --- a/xlators/features/locks/src/entrylk.c +++ b/xlators/features/locks/src/entrylk.c @@ -121,7 +121,6 @@ __stale_entrylk(xlator_t *this, pl_entry_lock_t *candidate_lock, pl_entry_lock_t *requested_lock, time_t *lock_age_sec) { posix_locks_private_t *priv = NULL; - struct timeval curr; priv = this->private; @@ -129,8 +128,7 @@ __stale_entrylk(xlator_t *this, pl_entry_lock_t *candidate_lock, * chance? Or just the locks we are attempting to acquire? */ if (names_conflict(candidate_lock->basename, requested_lock->basename)) { - gettimeofday(&curr, NULL); - *lock_age_sec = curr.tv_sec - candidate_lock->granted_time.tv_sec; + *lock_age_sec = gf_time() - candidate_lock->granted_time; if (*lock_age_sec > priv->revocation_secs) return _gf_true; } @@ -544,14 +542,10 @@ static int __lock_blocked_add(xlator_t *this, pl_inode_t *pinode, pl_dom_list_t *dom, pl_entry_lock_t *lock, int nonblock) { - struct timeval now; - if (nonblock) goto out; - gettimeofday(&now, NULL); - - lock->blkd_time = now; + lock->blkd_time = gf_time(); list_add_tail(&lock->blocked_locks, &dom->blocked_entrylks); gf_msg_trace(this->name, 0, "Blocking lock: {pinode=%p, basename=%s}", @@ -612,7 +606,7 @@ __lock_entrylk(xlator_t *this, pl_inode_t *pinode, pl_entry_lock_t *lock, } __pl_entrylk_ref(lock); - gettimeofday(&lock->granted_time, NULL); + lock->granted_time = gf_time(); list_add(&lock->domain_list, &dom->entrylk_list); ret = 0; diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c index d4a24eb44be..d4e51d6e0a1 100644 --- a/xlators/features/locks/src/inodelk.c +++ b/xlators/features/locks/src/inodelk.c @@ -140,15 +140,13 @@ __stale_inodelk(xlator_t *this, pl_inode_lock_t *candidate_lock, pl_inode_lock_t *requested_lock, time_t *lock_age_sec) { posix_locks_private_t *priv = NULL; - struct timeval curr; priv = this->private; /* Question: Should we just prune them all given the * chance? Or just the locks we are attempting to acquire? */ if (inodelk_conflict(candidate_lock, requested_lock)) { - gettimeofday(&curr, NULL); - *lock_age_sec = curr.tv_sec - candidate_lock->granted_time.tv_sec; + *lock_age_sec = gf_time() - candidate_lock->granted_time; if (*lock_age_sec > priv->revocation_secs) return _gf_true; } @@ -397,15 +395,11 @@ static int __lock_blocked_add(xlator_t *this, pl_dom_list_t *dom, pl_inode_lock_t *lock, int can_block) { - struct timeval now; - if (can_block == 0) { goto out; } - gettimeofday(&now, NULL); - - lock->blkd_time = now; + lock->blkd_time = gf_time(); list_add_tail(&lock->blocked_locks, &dom->blocked_inodelks); gf_msg_trace(this->name, 0, @@ -466,7 +460,7 @@ __lock_inodelk(xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock, return __lock_blocked_add(this, dom, lock, can_block); } __pl_inodelk_ref(lock); - gettimeofday(&lock->granted_time, NULL); + lock->granted_time = gf_time(); list_add(&lock->list, &dom->inodelk_list); return 0; diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h index 93c2973b2b3..c868eb494a2 100644 --- a/xlators/features/locks/src/locks.h +++ b/xlators/features/locks/src/locks.h @@ -43,9 +43,8 @@ struct __posix_lock { fd_t *fd; call_frame_t *frame; - struct timeval blkd_time; /*time at which lock was queued into blkd list*/ - struct timeval - granted_time; /*time at which lock was queued into active list*/ + time_t blkd_time; /* time at which lock was queued into blkd list */ + time_t granted_time; /* time at which lock was queued into active list */ /* These two together serve to uniquely identify each process across nodes */ @@ -85,9 +84,9 @@ struct __pl_inode_lock { call_frame_t *frame; - struct timeval blkd_time; /*time at which lock was queued into blkd list*/ - struct timeval - granted_time; /*time at which lock was queued into active list*/ + time_t blkd_time; /* time at which lock was queued into blkd list */ + time_t granted_time; /* time at which lock was queued into active list */ + /*last time at which lock contention was detected and notified*/ struct timespec contention_time; @@ -139,9 +138,9 @@ struct __entry_lock { const char *basename; - struct timeval blkd_time; /*time at which lock was queued into blkd list*/ - struct timeval - granted_time; /*time at which lock was queued into active list*/ + time_t blkd_time; /* time at which lock was queued into blkd list */ + time_t granted_time; /* time at which lock was queued into active list */ + /*last time at which lock contention was detected and notified*/ struct timespec contention_time; diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c index d781cde8969..cf0ae4c57dd 100644 --- a/xlators/features/locks/src/posix.c +++ b/xlators/features/locks/src/posix.c @@ -494,6 +494,9 @@ pl_inodelk_xattr_fill_multiple(dict_t *this, char *key, data_t *value, char *save_ptr = NULL; tmp_key = gf_strdup(key); + if (!tmp_key) + return -1; + strtok_r(tmp_key, ":", &save_ptr); if (!*save_ptr) { if (tmp_key) @@ -3684,10 +3687,10 @@ __dump_entrylks(pl_inode_t *pl_inode) list_for_each_entry(lock, &dom->entrylk_list, domain_list) { - gf_time_fmt(granted, sizeof(granted), lock->granted_time.tv_sec, + gf_time_fmt(granted, sizeof(granted), lock->granted_time, gf_timefmt_FT); gf_proc_dump_build_key(key, k, "entrylk[%d](ACTIVE)", count); - if (lock->blkd_time.tv_sec == 0) { + if (lock->blkd_time == 0) { snprintf(tmp, sizeof(tmp), ENTRY_GRNTD_FMT, lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" : "ENTRYLK_WRLCK", @@ -3695,7 +3698,7 @@ __dump_entrylks(pl_inode_t *pl_inode) lkowner_utoa(&lock->owner), lock->client, lock->connection_id, granted); } else { - gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time.tv_sec, + gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time, gf_timefmt_FT); snprintf(tmp, sizeof(tmp), ENTRY_BLKD_GRNTD_FMT, lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" @@ -3712,7 +3715,7 @@ __dump_entrylks(pl_inode_t *pl_inode) list_for_each_entry(lock, &dom->blocked_entrylks, blocked_locks) { - gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time.tv_sec, + gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time, gf_timefmt_FT); gf_proc_dump_build_key(key, k, "entrylk[%d](BLOCKED)", count); @@ -3764,9 +3767,8 @@ __dump_inodelks(pl_inode_t *pl_inode) SET_FLOCK_PID(&lock->user_flock, lock); pl_dump_lock(tmp, sizeof(tmp), &lock->user_flock, &lock->owner, - lock->client, lock->connection_id, - &lock->granted_time.tv_sec, &lock->blkd_time.tv_sec, - _gf_true); + lock->client, lock->connection_id, &lock->granted_time, + &lock->blkd_time, _gf_true); gf_proc_dump_write(key, "%s", tmp); count++; @@ -3778,8 +3780,8 @@ __dump_inodelks(pl_inode_t *pl_inode) count); SET_FLOCK_PID(&lock->user_flock, lock); pl_dump_lock(tmp, sizeof(tmp), &lock->user_flock, &lock->owner, - lock->client, lock->connection_id, 0, - &lock->blkd_time.tv_sec, _gf_false); + lock->client, lock->connection_id, 0, &lock->blkd_time, + _gf_false); gf_proc_dump_write(key, "%s", tmp); count++; @@ -3812,9 +3814,8 @@ __dump_posixlks(pl_inode_t *pl_inode) gf_proc_dump_build_key(key, "posixlk", "posixlk[%d](%s)", count, lock->blocked ? "BLOCKED" : "ACTIVE"); pl_dump_lock(tmp, sizeof(tmp), &lock->user_flock, &lock->owner, - lock->client, lock->client_uid, &lock->granted_time.tv_sec, - &lock->blkd_time.tv_sec, - (lock->blocked) ? _gf_false : _gf_true); + lock->client, lock->client_uid, &lock->granted_time, + &lock->blkd_time, (lock->blocked) ? _gf_false : _gf_true); gf_proc_dump_write(key, "%s", tmp); count++; diff --git a/xlators/features/metadisp/Makefile.am b/xlators/features/metadisp/Makefile.am new file mode 100644 index 00000000000..a985f42a877 --- /dev/null +++ b/xlators/features/metadisp/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/features/metadisp/src/Makefile.am b/xlators/features/metadisp/src/Makefile.am new file mode 100644 index 00000000000..1520ad8c424 --- /dev/null +++ b/xlators/features/metadisp/src/Makefile.am @@ -0,0 +1,38 @@ +noinst_PYTHON = gen-fops.py + +EXTRA_DIST = fops-tmpl.c + +xlator_LTLIBRARIES = metadisp.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features + +nodist_metadisp_la_SOURCES = fops.c + +BUILT_SOURCES = fops.c + +metadisp_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) + +metadisp_la_SOURCES = metadisp.c \ + metadisp-unlink.c \ + metadisp-stat.c \ + metadisp-lookup.c \ + metadisp-readdir.c \ + metadisp-create.c \ + metadisp-open.c \ + metadisp-fsync.c \ + metadisp-setattr.c \ + backend.c + +metadisp_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS = metadisp.h metadisp-fops.h + +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src + +AM_CFLAGS = -Wall $(GF_CFLAGS) + +fops.c: fops-tmpl.c $(top_srcdir)/libglusterfs/src/generator.py gen-fops.py + PYTHONPATH=$(top_srcdir)/libglusterfs/src \ + $(PYTHON) $(srcdir)/gen-fops.py $(srcdir)/fops-tmpl.c > $@ + +CLEANFILES = $(nodist_metadisp_la_SOURCES) diff --git a/xlators/features/metadisp/src/backend.c b/xlators/features/metadisp/src/backend.c new file mode 100644 index 00000000000..ee2c25bfaa7 --- /dev/null +++ b/xlators/features/metadisp/src/backend.c @@ -0,0 +1,45 @@ +#define GFID_STR_LEN 37 + +#include "metadisp.h" + +/* + * backend.c + * + * functions responsible for converting user-facing paths to backend-style + * "/$GFID" paths. + */ + +int32_t +build_backend_loc(uuid_t gfid, loc_t *src_loc, loc_t *dst_loc) +{ + static uuid_t root = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; + char gfid_buf[GFID_STR_LEN + 1] = { + 0, + }; + char *path = NULL; + + GF_VALIDATE_OR_GOTO("metadisp", src_loc, out); + GF_VALIDATE_OR_GOTO("metadisp", dst_loc, out); + + loc_copy(dst_loc, src_loc); + memcpy(dst_loc->pargfid, root, sizeof(root)); + GF_FREE((char *)dst_loc->path); // we are overwriting path so nuke + // whatever loc_copy gave us + + uuid_utoa_r(gfid, gfid_buf); + + path = GF_CALLOC(GFID_STR_LEN + 1, sizeof(char), + gf_common_mt_char); // freed via loc_wipe + + path[0] = '/'; + strncpy(path + 1, gfid_buf, GFID_STR_LEN); + path[GFID_STR_LEN] = 0; + dst_loc->path = path; + if (src_loc->name) + dst_loc->name = strrchr(dst_loc->path, '/'); + if (dst_loc->name) + dst_loc->name++; + return 0; +out: + return -1; +} diff --git a/xlators/features/metadisp/src/fops-tmpl.c b/xlators/features/metadisp/src/fops-tmpl.c new file mode 100644 index 00000000000..4385b7dd5b7 --- /dev/null +++ b/xlators/features/metadisp/src/fops-tmpl.c @@ -0,0 +1,10 @@ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include <glusterfs/xlator.h> +#include "metadisp.h" +#include "metadisp-fops.h" + +#pragma generate diff --git a/xlators/features/metadisp/src/gen-fops.py b/xlators/features/metadisp/src/gen-fops.py new file mode 100644 index 00000000000..8b5e120fdec --- /dev/null +++ b/xlators/features/metadisp/src/gen-fops.py @@ -0,0 +1,160 @@ +#!/usr/bin/python + +import sys +from generator import fop_subs, generate + +FN_METADATA_CHILD_GENERIC = """ +int32_t +metadisp_@NAME@ (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) +{ + METADISP_TRACE("@NAME@ metadata"); + STACK_WIND (frame, default_@NAME@_cbk, + METADATA_CHILD(this), METADATA_CHILD(this)->fops->@NAME@, + @SHORT_ARGS@); + return 0; +} +""" + +FN_GENERIC_TEMPLATE = """ +int32_t +metadisp_@NAME@ (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) +{ + METADISP_TRACE("@NAME@ generic"); + STACK_WIND (frame, default_@NAME@_cbk, + DATA_CHILD(this), DATA_CHILD(this)->fops->@NAME@, + @SHORT_ARGS@); + return 0; +} +""" + +FN_DATAFD_TEMPLATE = """ +int32_t +metadisp_@NAME@ (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) +{ + METADISP_TRACE("@NAME@ datafd"); + xlator_t *child = NULL; + child = DATA_CHILD(this); + STACK_WIND (frame, default_@NAME@_cbk, + child, child->fops->@NAME@, + @SHORT_ARGS@); + return 0; +} +""" + +FN_DATALOC_TEMPLATE = """ +int32_t +metadisp_@NAME@ (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) +{ + METADISP_TRACE("@NAME@ dataloc"); + loc_t backend_loc = { + 0, + }; + if (build_backend_loc(loc->gfid, loc, &backend_loc)) { + goto unwind; + } + xlator_t *child = NULL; + child = DATA_CHILD(this); + STACK_WIND (frame, default_@NAME@_cbk, + child, child->fops->@NAME@, + @SHORT_ARGS@); + return 0; + +unwind: + STACK_UNWIND_STRICT(lookup, frame, -1, EINVAL, NULL, NULL, NULL, NULL); + return 0; +} +""" + +FOPS_LINE_TEMPLATE = "\t.@NAME@ = metadisp_@NAME@," + +skipped = [ + "readdir", + "readdirp", + "lookup", + "fsync", + "stat", + "open", + "create", + "unlink", + "setattr", + # TODO: implement "inodelk", +] + + +def gen_fops(): + done = skipped + + # + # these are fops that wind to the DATA_CHILD + # + # NOTE: re-written in order from google doc: + # https://docs.google.com/document/d/1KEwVtSNvDhs4qb63gWx2ulCp5GJjge77NGJk4p_Ms4Q + for name in [ + "writev", + "readv", + "ftruncate", + "zerofill", + "discard", + "seek", + "fstat", + ]: + done = done + [name] + print(generate(FN_DATAFD_TEMPLATE, name, fop_subs)) + + for name in ["truncate"]: + done = done + [name] + print(generate(FN_DATALOC_TEMPLATE, name, fop_subs)) + + # these are fops that operate solely on dentries, folders, + # or extended attributes. Therefore, they must always + # wind to METADATA_CHILD and should never perform + # any path rewriting + # + # NOTE: re-written in order from google doc: + # https://docs.google.com/document/d/1KEwVtSNvDhs4qb63gWx2ulCp5GJjge77NGJk4p_Ms4Q + for name in [ + "mkdir", + "symlink", + "link", + "rename", + "mknod", + "opendir", + # "readdir, # special-cased + # "readdirp, # special-cased + "fsyncdir", + # "setattr", # special-cased + "readlink", + "fentrylk", + "access", + # TODO: these wind to both, + # data for backend-attributes and metadata for the rest + "xattrop", + "setxattr", + "getxattr", + "removexattr", + "fgetxattr", + "fsetxattr", + "fremovexattr", + ]: + + done = done + [name] + print(generate(FN_METADATA_CHILD_GENERIC, name, fop_subs)) + + print("struct xlator_fops fops = {") + for name in done: + print(generate(FOPS_LINE_TEMPLATE, name, fop_subs)) + + print("};") + + +for l in open(sys.argv[1], "r").readlines(): + if l.find("#pragma generate") != -1: + print("/* BEGIN GENERATED CODE - DO NOT MODIFY */") + gen_fops() + print("/* END GENERATED CODE */") + else: + print(l[:-1]) diff --git a/xlators/features/metadisp/src/metadisp-create.c b/xlators/features/metadisp/src/metadisp-create.c new file mode 100644 index 00000000000..f8c9798dd59 --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-create.c @@ -0,0 +1,101 @@ +#include "metadisp.h" +#include <glusterfs/call-stub.h> + +/** + * Create, like stat, is a two-step process. We send a create + * to the METADATA_CHILD, then send another create to the DATA_CHILD. + * + * We do the metadata child first to ensure that the ACLs are enforced. + */ + +int32_t +metadisp_create_dentry_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, + inode_t *inode, struct iatt *buf, + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) +{ + STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf, + preparent, postparent, xdata); + return 0; +} + +int32_t +metadisp_create_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + int32_t flags, mode_t mode, mode_t umask, fd_t *fd, + dict_t *xdata) +{ + // create the backend data inode + STACK_WIND(frame, metadisp_create_dentry_cbk, DATA_CHILD(this), + DATA_CHILD(this)->fops->create, loc, flags, mode, umask, fd, + xdata); + return 0; +} + +int32_t +metadisp_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + METADISP_TRACE("%d %d", op_ret, op_errno); + call_stub_t *stub = cookie; + if (op_ret != 0) { + STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf, + preparent, postparent, xdata); + return 0; + } + + if (stub == NULL) { + goto unwind; + } + + if (stub->poison) { + call_stub_destroy(stub); + return 0; + } + + call_resume(stub); + return 0; + +unwind: + STACK_UNWIND_STRICT(create, frame, -1, EINVAL, NULL, NULL, NULL, NULL, NULL, + NULL); + return 0; +} + +int32_t +metadisp_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) +{ + METADISP_TRACE("."); + + loc_t backend_loc = { + 0, + }; + call_stub_t *stub = NULL; + uuid_t *gfid_req = NULL; + + RESOLVE_GFID_REQ(xdata, gfid_req, out); + + if (build_backend_loc(*gfid_req, loc, &backend_loc)) { + goto unwind; + } + + frame->local = loc; + + stub = fop_create_stub(frame, metadisp_create_resume, &backend_loc, flags, + mode, umask, fd, xdata); + + STACK_WIND_COOKIE(frame, metadisp_create_cbk, stub, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->create, loc, flags, mode, + umask, fd, xdata); + return 0; + +unwind: + STACK_UNWIND_STRICT(create, frame, -1, EINVAL, NULL, NULL, NULL, NULL, NULL, + NULL); + return 0; +out: + return -1; +} diff --git a/xlators/features/metadisp/src/metadisp-fops.h b/xlators/features/metadisp/src/metadisp-fops.h new file mode 100644 index 00000000000..56dd427cf34 --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-fops.h @@ -0,0 +1,51 @@ +#ifndef GF_METADISP_FOPS_H_ +#define GF_METADISP_FOPS_H_ + +#include <glusterfs/xlator.h> +#include <glusterfs/dict.h> +#include <glusterfs/glusterfs.h> + +#include <sys/types.h> + +/* fops in here are defined in their own file. Every other fop is just defined + * inline of fops.c */ + +int +metadisp_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *xdata); + +int +metadisp_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *dict); + +int +metadisp_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata); + +int +metadisp_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata); + +int +metadisp_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + fd_t *fd, dict_t *xdata); + +int +metadisp_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata); + +int +metadisp_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, + loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata); + +int +metadisp_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + dict_t *xdata); + +int +metadisp_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, + dict_t *xdata); + +int +metadisp_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, dict_t *xdata); + +#endif diff --git a/xlators/features/metadisp/src/metadisp-fsync.c b/xlators/features/metadisp/src/metadisp-fsync.c new file mode 100644 index 00000000000..2e46fa84eac --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-fsync.c @@ -0,0 +1,54 @@ + +#include "metadisp.h" +#include <glusterfs/call-stub.h> + +int32_t +metadisp_fsync_resume(call_frame_t *frame, xlator_t *this, fd_t *fd, + int32_t flags, dict_t *xdata) +{ + STACK_WIND(frame, default_fsync_cbk, DATA_CHILD(this), + DATA_CHILD(this)->fops->fsync, fd, flags, xdata); + return 0; +} + +int32_t +metadisp_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + call_stub_t *stub = NULL; + if (cookie) { + stub = cookie; + } + + if (op_ret != 0) { + goto unwind; + } + + if (stub->poison) { + call_stub_destroy(stub); + stub = NULL; + return 0; + } + + call_resume(stub); + return 0; + +unwind: + if (stub) { + call_stub_destroy(stub); + } + STACK_UNWIND_STRICT(fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata); + return 0; +} + +int32_t +metadisp_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + dict_t *xdata) +{ + call_stub_t *stub = NULL; + stub = fop_fsync_stub(frame, metadisp_fsync_resume, fd, flags, xdata); + STACK_WIND_COOKIE(frame, metadisp_fsync_cbk, stub, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->fsync, fd, flags, xdata); + return 0; +} diff --git a/xlators/features/metadisp/src/metadisp-lookup.c b/xlators/features/metadisp/src/metadisp-lookup.c new file mode 100644 index 00000000000..27d90c9f746 --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-lookup.c @@ -0,0 +1,90 @@ +#include "metadisp.h" +#include <glusterfs/call-stub.h> + +/** + * Lookup, like stat, is a two-step process for grabbing the metadata details + * as well as the data details. + */ + +int32_t +metadisp_backend_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, + struct iatt *postparent) +{ + METADISP_TRACE("backend_lookup_cbk"); + if (op_errno == ENOENT) { + op_errno = ENODATA; + op_ret = -1; + } + STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, xdata, + postparent); + return 0; +} + +int32_t +metadisp_backend_lookup_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + dict_t *xdata) +{ + METADISP_TRACE("backend_lookup_resume"); + loc_t backend_loc = { + 0, + }; + if (build_backend_loc(loc->gfid, loc, &backend_loc)) { + goto unwind; + } + + STACK_WIND(frame, metadisp_backend_lookup_cbk, DATA_CHILD(this), + DATA_CHILD(this)->fops->lookup, &backend_loc, xdata); + return 0; + +unwind: + STACK_UNWIND_STRICT(lookup, frame, -1, EINVAL, NULL, NULL, NULL, NULL); + return 0; +} + +int32_t +metadisp_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, struct iatt *postparent) +{ + METADISP_TRACE("%d %d", op_ret, op_errno); + call_stub_t *stub = NULL; + stub = cookie; + + if (op_ret != 0) { + goto unwind; + } + + if (!IA_ISREG(buf->ia_type)) { + goto unwind; + } else if (!stub) { + op_errno = EINVAL; + goto unwind; + } + + METADISP_TRACE("resuming stub"); + + // memcpy(stub->args.loc.gfid, buf->ia_gfid, sizeof(uuid_t)); + call_resume(stub); + return 0; +unwind: + METADISP_TRACE("unwinding %d %d", op_ret, op_errno); + STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, xdata, + postparent); + if (stub) { + call_stub_destroy(stub); + } + return 0; +} + +int32_t +metadisp_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ + METADISP_TRACE("lookup"); + call_stub_t *stub = NULL; + stub = fop_lookup_stub(frame, metadisp_backend_lookup_resume, loc, xdata); + STACK_WIND_COOKIE(frame, metadisp_lookup_cbk, stub, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->lookup, loc, xdata); + return 0; +} diff --git a/xlators/features/metadisp/src/metadisp-open.c b/xlators/features/metadisp/src/metadisp-open.c new file mode 100644 index 00000000000..64814afe636 --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-open.c @@ -0,0 +1,70 @@ +#include <glusterfs/call-stub.h> +#include "metadisp.h" + +int32_t +metadisp_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) +{ + METADISP_TRACE("got open results %d %d", op_ret, op_errno); + + call_stub_t *stub = NULL; + if (cookie) { + stub = cookie; + } + + if (op_ret != 0) { + goto unwind; + } + + if (!stub) { + goto unwind; + } + + if (stub->poison) { + call_stub_destroy(stub); + stub = NULL; + return 0; + } + + call_resume(stub); + return 0; + +unwind: + if (stub) { + call_stub_destroy(stub); + } + STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, xdata); + return 0; +} + +int32_t +metadisp_open_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + int32_t flags, fd_t *fd, dict_t *xdata) +{ + STACK_WIND_COOKIE(frame, metadisp_open_cbk, NULL, DATA_CHILD(this), + DATA_CHILD(this)->fops->open, loc, flags, fd, xdata); + return 0; +} + +int32_t +metadisp_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + fd_t *fd, dict_t *xdata) +{ + call_stub_t *stub = NULL; + loc_t backend_loc = { + 0, + }; + + if (build_backend_loc(loc->gfid, loc, &backend_loc)) { + goto unwind; + } + + stub = fop_open_stub(frame, metadisp_open_resume, &backend_loc, flags, fd, + xdata); + STACK_WIND_COOKIE(frame, metadisp_open_cbk, stub, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->open, loc, flags, fd, xdata); + return 0; +unwind: + STACK_UNWIND_STRICT(open, frame, -1, EINVAL, NULL, NULL); + return 0; +} diff --git a/xlators/features/metadisp/src/metadisp-readdir.c b/xlators/features/metadisp/src/metadisp-readdir.c new file mode 100644 index 00000000000..5f840b1e88f --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-readdir.c @@ -0,0 +1,65 @@ +#include "metadisp.h" + +/** + * With a change to the posix xlator, readdir and readdirp are shockingly + * simple. + * + * The issue with separating the backend data of the files + * with the metadata is that readdirs must now read from multiple sources + * to coalesce the directory entries. + * + * The way we do this is to tell the METADATA_CHILD that when it's + * running readdirp, each file entry should have a stat wound to + * 'stat-source-of-truth'. + * + * see metadisp_stat for how it handles winds _from_posix. + */ + +int32_t +metadisp_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *xdata) +{ + METADISP_TRACE("."); + /* + * Always use readdirp, even if the original was readdir. Why? Because NFS. + * There are multiple translations between Gluster, UNIX, and NFS stat + * structures in that path. One of them uses the type etc. from the stat + * structure, which is only filled in by readdirp. If we use readdir, the + * entries do actually go all the way back to the client and are visible in + * getdents, but then the readdir throws them away because of the + * uninitialized type. + */ + GF_UNUSED int32_t ret; + if (!xdata) { + xdata = dict_new(); + } + + // ret = dict_set_int32 (xdata, "list-xattr", 1); + + // I'm my own source of truth! + ret = dict_set_static_ptr(xdata, "stat-source-of-truth", (void *)this); + + STACK_WIND(frame, default_readdirp_cbk, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->readdirp, fd, size, off, xdata); + + return 0; +} + +int32_t +metadisp_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *xdata) +{ + METADISP_TRACE("."); + if (!xdata) { + xdata = dict_new(); + } + GF_UNUSED int32_t ret; + // ret = dict_set_int32 (xdata, "list-xattr", 1); + + // I'm my own source of truth! + ret = dict_set_static_ptr(xdata, "stat-source-of-truth", (void *)this); + + STACK_WIND(frame, default_readdirp_cbk, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->readdirp, fd, size, off, xdata); + return 0; +} diff --git a/xlators/features/metadisp/src/metadisp-setattr.c b/xlators/features/metadisp/src/metadisp-setattr.c new file mode 100644 index 00000000000..6991cf644f3 --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-setattr.c @@ -0,0 +1,90 @@ +#include "metadisp.h" +#include <glusterfs/call-stub.h> + +int32_t +metadisp_backend_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *statpre, struct iatt *statpost, + dict_t *xdata) + +{ + METADISP_TRACE("backend_setattr_cbk"); + if (op_errno == ENOENT) { + op_errno = ENODATA; + op_ret = -1; + } + STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, statpre, statpost, + xdata); + return 0; +} + +int32_t +metadisp_backend_setattr_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, + dict_t *xdata) + +{ + METADISP_TRACE("backend_setattr_resume"); + loc_t backend_loc = { + 0, + }; + if (build_backend_loc(loc->gfid, loc, &backend_loc)) { + goto unwind; + } + + STACK_WIND(frame, metadisp_backend_setattr_cbk, DATA_CHILD(this), + DATA_CHILD(this)->fops->setattr, &backend_loc, stbuf, valid, + xdata); + return 0; + +unwind: + STACK_UNWIND_STRICT(setattr, frame, -1, EINVAL, NULL, NULL, NULL); + return 0; +} + +int32_t +metadisp_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *statpre, + struct iatt *statpost, dict_t *xdata) +{ + METADISP_TRACE("%d %d", op_ret, op_errno); + call_stub_t *stub = NULL; + stub = cookie; + + if (op_ret != 0) { + goto unwind; + } + + if (!IA_ISREG(statpost->ia_type)) { + goto unwind; + } else if (!stub) { + op_errno = EINVAL; + goto unwind; + } + + METADISP_TRACE("resuming stub"); + call_resume(stub); + return 0; +unwind: + METADISP_TRACE("unwinding %d %d", op_ret, op_errno); + STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, statpre, statpost, + xdata); + if (stub) { + call_stub_destroy(stub); + } + return 0; +} + +int32_t +metadisp_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, dict_t *xdata) +{ + METADISP_TRACE("setattr"); + call_stub_t *stub = NULL; + stub = fop_setattr_stub(frame, metadisp_backend_setattr_resume, loc, stbuf, + valid, xdata); + STACK_WIND_COOKIE(frame, metadisp_setattr_cbk, stub, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->setattr, loc, stbuf, valid, + xdata); + return 0; +} diff --git a/xlators/features/metadisp/src/metadisp-stat.c b/xlators/features/metadisp/src/metadisp-stat.c new file mode 100644 index 00000000000..b06d0dbcddd --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-stat.c @@ -0,0 +1,124 @@ +#include "metadisp.h" +#include <glusterfs/call-stub.h> + +/** + * The stat flow in METADISP is complicated because we must + * do ensure a few things: + * 1. stat, on the path within the metadata layer, + * MUST get the backend FD of the data layer. + * --- we wind to the metadata layer, then the data layer. + * + * 2. the metadata layer MUST be able to ask the data + * layer for stat information. + * --- this is 'syncop-internal-from-posix' + * + * 3. when the metadata exists BUT the data is missing, + * we MUST mark the backend file as bad and heal it. + */ + +int32_t +metadisp_stat_backend_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + dict_t *xdata) +{ + METADISP_TRACE("got backend stat results %d %d", op_ret, op_errno); + if (op_errno == ENOENT) { + STACK_UNWIND_STRICT(open, frame, -1, ENODATA, NULL, NULL); + return 0; + } + STACK_UNWIND_STRICT(stat, frame, op_ret, op_errno, buf, xdata); + return 0; +} + +int32_t +metadisp_stat_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + dict_t *xdata) +{ + METADISP_TRACE("winding stat to path %s", loc->path); + if (gf_uuid_is_null(loc->gfid)) { + METADISP_TRACE("bad object, sending EUCLEAN"); + STACK_UNWIND_STRICT(open, frame, -1, EUCLEAN, NULL, NULL); + return 0; + } + + STACK_WIND(frame, metadisp_stat_backend_cbk, SECOND_CHILD(this), + SECOND_CHILD(this)->fops->stat, loc, xdata); + return 0; +} + +int32_t +metadisp_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + dict_t *xdata) +{ + call_stub_t *stub = NULL; + + METADISP_TRACE("got stat results %d %d", op_ret, op_errno); + + if (cookie) { + stub = cookie; + } + + if (op_ret != 0) { + goto unwind; + } + + // only use the stub for the files + if (!IA_ISREG(buf->ia_type)) { + goto unwind; + } + + if (stub->poison) { + call_stub_destroy(stub); + stub = NULL; + return 0; + } + + call_resume(stub); + return 0; + +unwind: + if (stub) { + call_stub_destroy(stub); + } + STACK_UNWIND_STRICT(stat, frame, op_ret, op_errno, buf, xdata); + return 0; +} + +int32_t +metadisp_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ + call_stub_t *stub = NULL; + int32_t ret = 0; + loc_t backend_loc = { + 0, + }; + METADISP_FILTER_ROOT(stat, loc, xdata); + + if (build_backend_loc(loc->gfid, loc, &backend_loc)) { + goto unwind; + } + + if (dict_get_int32(xdata, "syncop-internal-from-posix", &ret) == 0) { + // if we've just been sent a stat from posix, then we know + // that we must send down a stat for a file to the second child. + // + // that means we can skip the stat for the first child and just + // send to the data disk. + METADISP_TRACE("got syncop-internal-from-posix"); + STACK_WIND(frame, default_stat_cbk, DATA_CHILD(this), + DATA_CHILD(this)->fops->stat, &backend_loc, xdata); + return 0; + } + + // we do not know if the request is for a file, folder, etc. wind + // to first child to find out. + stub = fop_stat_stub(frame, metadisp_stat_resume, &backend_loc, xdata); + METADISP_TRACE("winding stat to first child %s", loc->path); + STACK_WIND_COOKIE(frame, metadisp_stat_cbk, stub, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->stat, loc, xdata); + return 0; +unwind: + STACK_UNWIND_STRICT(stat, frame, -1, EINVAL, NULL, NULL); + return 0; +} diff --git a/xlators/features/metadisp/src/metadisp-unlink.c b/xlators/features/metadisp/src/metadisp-unlink.c new file mode 100644 index 00000000000..1f6a8eb35ce --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-unlink.c @@ -0,0 +1,160 @@ + +#include "metadisp.h" +#include <glusterfs/call-stub.h> + +/** + * The unlink flow in metadisp is complicated because we must + * do ensure that UNLINK causes both the metadata objects + * to get removed and the data objects to get removed. + */ + +int32_t +metadisp_unlink_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + int xflag, dict_t *xdata) +{ + METADISP_TRACE("winding backend unlink to path %s", loc->path); + STACK_WIND(frame, default_unlink_cbk, DATA_CHILD(this), + DATA_CHILD(this)->fops->unlink, loc, xflag, xdata); + return 0; +} + +int32_t +metadisp_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + METADISP_TRACE(". %d %d", op_ret, op_errno); + + int ret = 0; + call_stub_t *stub = NULL; + int nlink = 0; + + if (cookie) { + stub = cookie; + } + + if (op_ret != 0) { + goto unwind; + } + + if (stub->poison) { + call_stub_destroy(stub); + stub = NULL; + return 0; + } + + ret = dict_get_uint32(xdata, GF_RESPONSE_LINK_COUNT_XDATA, &nlink); + if (ret != 0) { + op_errno = EINVAL; + op_ret = -1; + goto unwind; + } + METADISP_TRACE("frontend hardlink count %d %d", ret, nlink); + if (nlink > 1) { + goto unwind; + } + + call_resume(stub); + return 0; + +unwind: + if (stub) { + call_stub_destroy(stub); + } + STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, preparent, postparent, + xdata); + return 0; +} + +int32_t +metadisp_unlink_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, + struct iatt *postparent) +{ + call_stub_t *stub = NULL; + + if (cookie) { + stub = cookie; + } + + if (op_ret != 0) { + goto unwind; + } + + // fail fast on empty gfid so we don't loop forever + if (gf_uuid_is_null(buf->ia_gfid)) { + op_ret = -1; + op_errno = ENODATA; + goto unwind; + } + + // fill gfid since the stub is incomplete + memcpy(stub->args.loc.gfid, buf->ia_gfid, sizeof(uuid_t)); + memcpy(stub->args.loc.pargfid, postparent->ia_gfid, sizeof(uuid_t)); + + if (stub->poison) { + call_stub_destroy(stub); + stub = NULL; + return 0; + } + + call_resume(stub); + return 0; + +unwind: + if (stub) { + call_stub_destroy(stub); + } + STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, NULL, NULL, NULL); + return 0; +} + +int32_t +metadisp_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, + dict_t *xdata) +{ + call_stub_t *stub = NULL; + loc_t backend_loc = { + 0, + }; + + if (gf_uuid_is_null(loc->gfid)) { + METADISP_TRACE("winding lookup for unlink to path %s", loc->path); + + // loop back to ourselves after a lookup + stub = fop_unlink_stub(frame, metadisp_unlink, loc, xflag, xdata); + STACK_WIND_COOKIE(frame, metadisp_unlink_lookup_cbk, stub, + METADATA_CHILD(this), + METADATA_CHILD(this)->fops->lookup, loc, xdata); + return 0; + } + + if (build_backend_loc(loc->gfid, loc, &backend_loc)) { + goto unwind; + } + + // + // ensure we get the link count on the unlink response, so we can + // account for hardlinks before winding to the backend. + // NOTE: + // multiple xlators use GF_REQUEST_LINK_COUNT_XDATA. confirmation + // is needed to ensure that multiple requests will work in the same + // xlator stack. + // + if (!xdata) { + xdata = dict_new(); + } + dict_set_int32(xdata, GF_REQUEST_LINK_COUNT_XDATA, 1); + + METADISP_TRACE("winding frontend unlink to path %s", loc->path); + stub = fop_unlink_stub(frame, metadisp_unlink_resume, &backend_loc, xflag, + xdata); + + STACK_WIND_COOKIE(frame, metadisp_unlink_cbk, stub, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->unlink, loc, xflag, xdata); + return 0; +unwind: + STACK_UNWIND_STRICT(unlink, frame, -1, EINVAL, NULL, NULL, NULL); + return 0; +} diff --git a/xlators/features/metadisp/src/metadisp.c b/xlators/features/metadisp/src/metadisp.c new file mode 100644 index 00000000000..3c8f150cebc --- /dev/null +++ b/xlators/features/metadisp/src/metadisp.c @@ -0,0 +1,46 @@ +#include <glusterfs/call-stub.h> + +#include "metadisp.h" +#include "metadisp-fops.h" + +int32_t +init(xlator_t *this) +{ + if (!this->children) { + gf_log(this->name, GF_LOG_ERROR, + "not configured with children. exiting"); + return -1; + } + + if (!this->parents) { + gf_log(this->name, GF_LOG_WARNING, "dangling volume. check volfile "); + } + + return 0; +} + +void +fini(xlator_t *this) +{ + return; +} + +/* defined in fops.c */ +struct xlator_fops fops; + +struct xlator_cbks cbks = {}; + +struct volume_options options[] = { + {.key = {NULL}}, +}; + +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .fops = &fops, + .cbks = &cbks, + .options = options, + .op_version = {1}, + .identifier = "metadisp", + .category = GF_EXPERIMENTAL, +}; diff --git a/xlators/features/metadisp/src/metadisp.h b/xlators/features/metadisp/src/metadisp.h new file mode 100644 index 00000000000..c8fd7a13c04 --- /dev/null +++ b/xlators/features/metadisp/src/metadisp.h @@ -0,0 +1,45 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef GF_METADISP_H_ +#define GF_METADISP_H_ + +#include <glusterfs/glusterfs.h> +#include <glusterfs/logging.h> +#include <glusterfs/dict.h> +#include <glusterfs/xlator.h> +#include <glusterfs/defaults.h> + +#define METADATA_CHILD(_this) FIRST_CHILD(_this) +#define DATA_CHILD(_this) SECOND_CHILD(_this) + +int32_t +build_backend_loc(uuid_t gfid, loc_t *src_loc, loc_t *dst_loc); + +#define METADISP_TRACE(_args...) gf_log("metadisp", GF_LOG_INFO, _args) + +#define METADISP_FILTER_ROOT(_op, _args...) \ + if (strcmp(loc->path, "/") == 0) { \ + STACK_WIND(frame, default_##_op##_cbk, METADATA_CHILD(this), \ + METADATA_CHILD(this)->fops->_op, _args); \ + return 0; \ + } + +#define METADISP_FILTER_ROOT_BY_GFID(_op, _gfid, _args...) \ + if (__is_root_gfid(_gfid)) { \ + STACK_WIND(frame, default_##_op##_cbk, METADATA_CHILD(this), \ + METADATA_CHILD(this)->fops->_op, _args); \ + return 0; \ + } + +#define RESOLVE_GFID_REQ(_dict, _dest, _lbl) \ + VALIDATE_OR_GOTO(dict_get_ptr(_dict, "gfid-req", (void **)&_dest) == 0, \ + _lbl) + +#endif /* __TEMPLATE_H__ */ diff --git a/xlators/features/quota/src/quota.c b/xlators/features/quota/src/quota.c index 73c008a2c00..18df9ae6d19 100644 --- a/xlators/features/quota/src/quota.c +++ b/xlators/features/quota/src/quota.c @@ -586,9 +586,6 @@ quota_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, quota_meta_t size = { 0, }; - struct timeval tv = { - 0, - }; local = frame->local; @@ -626,13 +623,12 @@ quota_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, * loop of validation and checking * limit when timeout is zero. */ - gettimeofday(&tv, NULL); LOCK(&ctx->lock); { ctx->size = size.size; + ctx->validate_time = gf_time(); ctx->file_count = size.file_count; ctx->dir_count = size.dir_count; - memcpy(&ctx->tv, &tv, sizeof(struct timeval)); } UNLOCK(&ctx->lock); @@ -644,27 +640,10 @@ unwind: return 0; } -static uint64_t -quota_time_elapsed(struct timeval *now, struct timeval *then) -{ - return (now->tv_sec - then->tv_sec); -} - -int32_t -quota_timeout(struct timeval *tv, int32_t timeout) +static inline gf_boolean_t +quota_timeout(time_t t, uint32_t timeout) { - struct timeval now = { - 0, - }; - int32_t timed_out = 0; - - gettimeofday(&now, NULL); - - if (quota_time_elapsed(&now, tv) >= timeout) { - timed_out = 1; - } - - return timed_out; + return (gf_time() - t) >= timeout; } /* Return: 1 if new entry added @@ -1128,7 +1107,7 @@ quota_check_object_limit(call_frame_t *frame, quota_inode_ctx_t *ctx, timeout = priv->hard_timeout; } - if (!just_validated && quota_timeout(&ctx->tv, timeout)) { + if (!just_validated && quota_timeout(ctx->validate_time, timeout)) { need_validate = 1; } else if ((object_aggr_count) > ctx->object_hard_lim) { hard_limit_exceeded = 1; @@ -1195,7 +1174,7 @@ quota_check_size_limit(call_frame_t *frame, quota_inode_ctx_t *ctx, timeout = priv->hard_timeout; } - if (!just_validated && quota_timeout(&ctx->tv, timeout)) { + if (!just_validated && quota_timeout(ctx->validate_time, timeout)) { need_validate = 1; } else if (wouldbe_size >= ctx->hard_lim) { hard_limit_exceeded = 1; @@ -4314,9 +4293,6 @@ quota_statfs_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, quota_meta_t size = { 0, }; - struct timeval tv = { - 0, - }; local = frame->local; @@ -4348,13 +4324,12 @@ quota_statfs_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, op_errno = EINVAL; } - gettimeofday(&tv, NULL); LOCK(&ctx->lock); { ctx->size = size.size; + ctx->validate_time = gf_time(); ctx->file_count = size.file_count; ctx->dir_count = size.dir_count; - memcpy(&ctx->tv, &tv, sizeof(struct timeval)); } UNLOCK(&ctx->lock); @@ -4873,7 +4848,7 @@ off: void quota_log_helper(char **usage_str, int64_t cur_size, inode_t *inode, - char **path, struct timeval *cur_time) + char **path, time_t *cur_time) { xlator_t *this = THIS; @@ -4892,7 +4867,7 @@ quota_log_helper(char **usage_str, int64_t cur_size, inode_t *inode, if (!(*path)) *path = uuid_utoa(inode->gfid); - gettimeofday(cur_time, NULL); + *cur_time = gf_time(); } /* Logs if @@ -4903,9 +4878,7 @@ void quota_log_usage(xlator_t *this, quota_inode_ctx_t *ctx, inode_t *inode, int64_t delta) { - struct timeval cur_time = { - 0, - }; + time_t cur_time = 0; char *usage_str = NULL; char *path = NULL; int64_t cur_size = 0; @@ -4931,12 +4904,12 @@ quota_log_usage(xlator_t *this, quota_inode_ctx_t *ctx, inode_t *inode, "path=%s", usage_str, priv->volume_uuid, path); - ctx->prev_log = cur_time; + ctx->prev_log_time = cur_time; } /* Usage is above soft limit */ else if (cur_size > ctx->soft_lim && - quota_timeout(&ctx->prev_log, priv->log_timeout)) { + quota_timeout(ctx->prev_log_time, priv->log_timeout)) { quota_log_helper(&usage_str, cur_size, inode, &path, &cur_time); gf_msg(this->name, GF_LOG_ALERT, 0, Q_MSG_CROSSED_SOFT_LIMIT, @@ -4947,7 +4920,7 @@ quota_log_usage(xlator_t *this, quota_inode_ctx_t *ctx, inode_t *inode, "path=%s", usage_str, priv->volume_uuid, path); - ctx->prev_log = cur_time; + ctx->prev_log_time = cur_time; } if (path) @@ -5184,9 +5157,9 @@ quota_priv_dump(xlator_t *this) if (ret) goto out; else { - gf_proc_dump_write("soft-timeout", "%d", priv->soft_timeout); - gf_proc_dump_write("hard-timeout", "%d", priv->hard_timeout); - gf_proc_dump_write("alert-time", "%d", priv->log_timeout); + gf_proc_dump_write("soft-timeout", "%u", priv->soft_timeout); + gf_proc_dump_write("hard-timeout", "%u", priv->hard_timeout); + gf_proc_dump_write("alert-time", "%u", priv->log_timeout); gf_proc_dump_write("quota-on", "%d", priv->is_quota_on); gf_proc_dump_write("statfs", "%d", priv->consider_statfs); gf_proc_dump_write("volume-uuid", "%s", priv->volume_uuid); diff --git a/xlators/features/quota/src/quota.h b/xlators/features/quota/src/quota.h index 8a3dc7a77f5..0395d78c9ef 100644 --- a/xlators/features/quota/src/quota.h +++ b/xlators/features/quota/src/quota.h @@ -153,8 +153,8 @@ struct quota_inode_ctx { int64_t object_soft_lim; struct iatt buf; struct list_head parents; - struct timeval tv; - struct timeval prev_log; + time_t validate_time; + time_t prev_log_time; gf_boolean_t ancestry_built; gf_lock_t lock; }; @@ -199,6 +199,7 @@ struct quota_local { typedef struct quota_local quota_local_t; struct quota_priv { + /* FIXME: consider time_t for timeouts. */ uint32_t soft_timeout; uint32_t hard_timeout; uint32_t log_timeout; diff --git a/xlators/features/read-only/src/worm-helper.c b/xlators/features/read-only/src/worm-helper.c index 25fbd4aa748..df45f2a940b 100644 --- a/xlators/features/read-only/src/worm-helper.c +++ b/xlators/features/read-only/src/worm-helper.c @@ -41,7 +41,7 @@ worm_init_state(xlator_t *this, gf_boolean_t fop_with_fd, void *file_ptr) GF_VALIDATE_OR_GOTO("worm", this, out); GF_VALIDATE_OR_GOTO(this->name, file_ptr, out); - start_time = time(NULL); + start_time = gf_time(); dict = dict_new(); if (!dict) { gf_log(this->name, GF_LOG_ERROR, "Error creating the dict"); @@ -94,7 +94,7 @@ worm_set_state(xlator_t *this, gf_boolean_t fop_with_fd, void *file_ptr, if (ret) goto out; stbuf->ia_mtime = stpre.ia_mtime; - stbuf->ia_atime = time(NULL) + retention_state->ret_period; + stbuf->ia_atime = gf_time() + retention_state->ret_period; if (fop_with_fd) ret = syncop_fsetattr(this, (fd_t *)file_ptr, stbuf, GF_SET_ATTR_ATIME, @@ -286,6 +286,7 @@ gf_worm_state_transition(xlator_t *this, gf_boolean_t fop_with_fd, { int op_errno = EROFS; int ret = -1; + time_t now = 0; uint64_t com_period = 0; uint64_t start_time = 0; dict_t *dict = NULL; @@ -337,8 +338,10 @@ gf_worm_state_transition(xlator_t *this, gf_boolean_t fop_with_fd, goto out; } - if (ret == -1 && (time(NULL) - start_time) >= com_period) { - if ((time(NULL) - stbuf.ia_mtime) >= com_period) { + now = gf_time(); + + if (ret == -1 && (now - start_time) >= com_period) { + if ((now - stbuf.ia_mtime) >= com_period) { ret = worm_set_state(this, fop_with_fd, file_ptr, &reten_state, &stbuf); if (ret) { @@ -352,10 +355,10 @@ gf_worm_state_transition(xlator_t *this, gf_boolean_t fop_with_fd, op_errno = 0; goto out; } - } else if (ret == -1 && (time(NULL) - start_time) < com_period) { + } else if (ret == -1 && (now - start_time) < com_period) { op_errno = 0; goto out; - } else if (reten_state.retain && ((time(NULL) >= stbuf.ia_atime))) { + } else if (reten_state.retain && ((now >= stbuf.ia_atime))) { gf_worm_state_lookup(this, fop_with_fd, file_ptr, &reten_state, &stbuf); } if (reten_state.worm && !reten_state.retain && priv->worm_files_deletable && diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c index ff928d0df61..e5f93063943 100644 --- a/xlators/features/shard/src/shard.c +++ b/xlators/features/shard/src/shard.c @@ -1004,6 +1004,10 @@ shard_initiate_evicted_inode_fsync(xlator_t *this, inode_t *inode) } int +shard_common_inode_write_post_lookup_shards_handler(call_frame_t *frame, + xlator_t *this); + +int shard_common_resolve_shards(call_frame_t *frame, xlator_t *this, shard_post_resolve_fop_handler_t post_res_handler) { @@ -1020,21 +1024,47 @@ shard_common_resolve_shards(call_frame_t *frame, xlator_t *this, inode_t *fsync_inode = NULL; shard_priv_t *priv = NULL; shard_local_t *local = NULL; + uint64_t resolve_count = 0; priv = this->private; local = frame->local; local->call_count = 0; shard_idx_iter = local->first_block; res_inode = local->resolver_base_inode; + + if ((local->op_ret < 0) || (local->resolve_not)) + goto out; + + /* If this prealloc FOP is for fresh file creation, then the size of the + * file will be 0. Then there will be no shards associated with this file. + * So we can skip the lookup process for the shards which do not exists + * and directly issue mknod to crete shards. + * + * In case the prealloc fop is to extend the preallocated file to bigger + * size then just lookup and populate inodes of existing shards and + * update the create count + */ + if (local->fop == GF_FOP_FALLOCATE) { + if (!local->prebuf.ia_size) { + local->inode_list[0] = inode_ref(res_inode); + local->create_count = local->last_block; + shard_common_inode_write_post_lookup_shards_handler(frame, this); + return 0; + } + if (local->prebuf.ia_size < local->total_size) + local->create_count = local->last_block - + ((local->prebuf.ia_size - 1) / + local->block_size); + } + + resolve_count = local->last_block - local->create_count; + if (res_inode) gf_uuid_copy(gfid, res_inode->gfid); else gf_uuid_copy(gfid, local->base_gfid); - if ((local->op_ret < 0) || (local->resolve_not)) - goto out; - - while (shard_idx_iter <= local->last_block) { + while (shard_idx_iter <= resolve_count) { i++; if (shard_idx_iter == 0) { local->inode_list[i] = inode_ref(res_inode); @@ -2443,7 +2473,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode, int count = 0; int call_count = 0; int32_t shard_idx_iter = 0; - int last_block = 0; + int lookup_count = 0; char path[PATH_MAX] = { 0, }; @@ -2463,7 +2493,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode, local = frame->local; count = call_count = local->call_count; shard_idx_iter = local->first_block; - last_block = local->last_block; + lookup_count = local->last_block - local->create_count; local->pls_fop_handler = handler; if (local->lookup_shards_barriered) local->barrier.waitfor = local->call_count; @@ -2473,7 +2503,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode, else gf_uuid_copy(gfid, local->base_gfid); - while (shard_idx_iter <= last_block) { + while (shard_idx_iter <= lookup_count) { if (local->inode_list[i]) { i++; shard_idx_iter++; @@ -5656,6 +5686,8 @@ shard_common_inode_write_post_resolve_handler(call_frame_t *frame, shard_common_lookup_shards( frame, this, local->resolver_base_inode, shard_common_inode_write_post_lookup_shards_handler); + } else if (local->create_count) { + shard_common_inode_write_post_lookup_shards_handler(frame, this); } else { shard_common_inode_write_do(frame, this); } diff --git a/xlators/features/trash/src/trash.c b/xlators/features/trash/src/trash.c index a28aa1e09e0..7d09cba3e9c 100644 --- a/xlators/features/trash/src/trash.c +++ b/xlators/features/trash/src/trash.c @@ -216,7 +216,7 @@ append_time_stamp(char *name, size_t name_size) 0, }; - gf_time_fmt(timestr, sizeof(timestr), time(NULL), gf_timefmt_F_HMS); + gf_time_fmt(timestr, sizeof(timestr), gf_time(), gf_timefmt_F_HMS); /* removing white spaces in timestamp */ for (i = 0; i < strlen(timestr); i++) { diff --git a/xlators/features/upcall/src/upcall-internal.c b/xlators/features/upcall/src/upcall-internal.c index 978825f6b56..c641bd6f432 100644 --- a/xlators/features/upcall/src/upcall-internal.c +++ b/xlators/features/upcall/src/upcall-internal.c @@ -316,7 +316,7 @@ upcall_reaper_thread(void *data) priv = this->private; GF_ASSERT(priv); - time_now = time(NULL); + time_now = gf_time(); while (!priv->fini) { list_for_each_entry_safe(inode_ctx, tmp, &priv->inode_ctx_list, inode_ctx_list) @@ -344,7 +344,7 @@ upcall_reaper_thread(void *data) /* don't do a very busy loop */ timeout = get_cache_invalidation_timeout(this); sleep(timeout / 2); - time_now = time(NULL); + time_now = gf_time(); } return NULL; @@ -533,7 +533,7 @@ upcall_cache_invalidate(call_frame_t *frame, xlator_t *this, client_t *client, goto out; } - time_now = time(NULL); + time_now = gf_time(); pthread_mutex_lock(&up_inode_ctx->client_list_lock); { list_for_each_entry_safe(up_client_entry, tmp, @@ -670,13 +670,13 @@ upcall_cache_forget(xlator_t *this, inode_t *inode, return; } - time_now = time(NULL); + time_now = gf_time(); pthread_mutex_lock(&up_inode_ctx->client_list_lock); { list_for_each_entry_safe(up_client_entry, tmp, &up_inode_ctx->client_list, client_list) { - /* Set the access time to time(NULL) + /* Set the access time to gf_time() * to send notify */ up_client_entry->access_time = time_now; diff --git a/xlators/mgmt/glusterd/src/Makefile.am b/xlators/mgmt/glusterd/src/Makefile.am index eaa61c435e5..685beb42d27 100644 --- a/xlators/mgmt/glusterd/src/Makefile.am +++ b/xlators/mgmt/glusterd/src/Makefile.am @@ -25,13 +25,14 @@ glusterd_la_SOURCES = glusterd.c glusterd-handler.c glusterd-sm.c \ glusterd-conn-helper.c glusterd-snapd-svc.c glusterd-snapd-svc-helper.c \ glusterd-bitd-svc.c glusterd-scrub-svc.c glusterd-server-quorum.c \ glusterd-reset-brick.c glusterd-shd-svc.c glusterd-shd-svc-helper.c \ - glusterd-gfproxyd-svc.c glusterd-gfproxyd-svc-helper.c glusterd-ganesha.c + glusterd-gfproxyd-svc.c glusterd-gfproxyd-svc-helper.c glusterd-ganesha.c \ + $(CONTRIBDIR)/mount/mntent.c glusterd_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ $(top_builddir)/libglusterd/src/libglusterd.la \ $(top_builddir)/rpc/xdr/src/libgfxdr.la \ $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \ - $(XML_LIBS) -lcrypto $(URCU_LIBS) $(URCU_CDS_LIBS) $(LIB_DL) + $(XML_LIBS) -lcrypto $(URCU_LIBS) $(URCU_CDS_LIBS) $(LIB_DL) $(GF_XLATOR_MGNT_LIBADD) noinst_HEADERS = glusterd.h glusterd-utils.h glusterd-op-sm.h \ glusterd-sm.h glusterd-store.h glusterd-mem-types.h \ @@ -46,7 +47,8 @@ noinst_HEADERS = glusterd.h glusterd-utils.h glusterd-op-sm.h \ glusterd-scrub-svc.h glusterd-server-quorum.h glusterd-errno.h \ glusterd-shd-svc.h glusterd-shd-svc-helper.h \ glusterd-gfproxyd-svc.h glusterd-gfproxyd-svc-helper.h \ - $(CONTRIBDIR)/userspace-rcu/rculist-extra.h + $(CONTRIBDIR)/userspace-rcu/rculist-extra.h \ + $(CONTRIBDIR)/mount/mntent_compat.h AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \ diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c index 6d1a1e98848..e56cd0e6c74 100644 --- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c @@ -1359,14 +1359,14 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict) ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); if (ret) { - gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, "Unable to get volume name"); goto out; } ret = glusterd_volinfo_find(volname, &volinfo); if (ret) { - gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, "Unable to find volume: %s", volname); goto out; } @@ -1378,13 +1378,7 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict) ret = dict_get_int32n(dict, "replica-count", SLEN("replica-count"), &replica_count); if (ret) { - gf_msg_debug(THIS->name, 0, "Unable to get replica count"); - } - - ret = dict_get_int32n(dict, "arbiter-count", SLEN("arbiter-count"), - &arbiter_count); - if (ret) { - gf_msg_debug(THIS->name, 0, "No arbiter count present in the dict"); + gf_msg_debug(this->name, 0, "Unable to get replica count"); } if (replica_count > 0) { @@ -1400,18 +1394,18 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict) glusterd_add_peers_to_auth_list(volname); - if (glusterd_is_volume_replicate(volinfo)) { + if (replica_count && glusterd_is_volume_replicate(volinfo)) { /* Do not allow add-brick for stopped volumes when replica-count * is being increased. */ - if (conf->op_version >= GD_OP_VERSION_3_7_10 && replica_count && - GLUSTERD_STATUS_STOPPED == volinfo->status) { + if (GLUSTERD_STATUS_STOPPED == volinfo->status && + conf->op_version >= GD_OP_VERSION_3_7_10) { ret = -1; snprintf(msg, sizeof(msg), " Volume must not be in" " stopped state when replica-count needs to " " be increased."); - gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s", + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s", msg); *op_errstr = gf_strdup(msg); goto out; @@ -1419,25 +1413,31 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict) /* op-version check for replica 2 to arbiter conversion. If we * don't have this check, an older peer added as arbiter brick * will not have the arbiter xlator in its volfile. */ - if ((conf->op_version < GD_OP_VERSION_3_8_0) && (arbiter_count == 1) && - (replica_count == 3)) { - ret = -1; - snprintf(msg, sizeof(msg), - "Cluster op-version must " - "be >= 30800 to add arbiter brick to a " - "replica 2 volume."); - gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s", - msg); - *op_errstr = gf_strdup(msg); - goto out; + if ((replica_count == 3) && (conf->op_version < GD_OP_VERSION_3_8_0)) { + ret = dict_get_int32n(dict, "arbiter-count", SLEN("arbiter-count"), + &arbiter_count); + if (ret) { + gf_msg_debug(this->name, 0, + "No arbiter count present in the dict"); + } else if (arbiter_count == 1) { + ret = -1; + snprintf(msg, sizeof(msg), + "Cluster op-version must " + "be >= 30800 to add arbiter brick to a " + "replica 2 volume."); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s", + msg); + *op_errstr = gf_strdup(msg); + goto out; + } } /* Do not allow increasing replica count for arbiter volumes. */ - if (replica_count && volinfo->arbiter_count) { + if (volinfo->arbiter_count) { ret = -1; snprintf(msg, sizeof(msg), "Increasing replica count " "for arbiter volumes is not supported."); - gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s", + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s", msg); *op_errstr = gf_strdup(msg); goto out; @@ -1451,7 +1451,7 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict) * doing this check at the originator node is sufficient. */ - if (is_origin_glusterd(dict) && !is_force) { + if (!is_force && is_origin_glusterd(dict)) { ret = 0; if (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) { gf_msg_debug(this->name, 0, @@ -1459,15 +1459,18 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict) "found. Checking brick order."); if (replica_count) ret = glusterd_check_brick_order(dict, msg, volinfo->type, + &volname, &bricks, &count, replica_count); else ret = glusterd_check_brick_order(dict, msg, volinfo->type, + &volname, &bricks, &count, volinfo->replica_count); } else if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE) { gf_msg_debug(this->name, 0, "Disperse cluster type" " found. Checking brick order."); - ret = glusterd_check_brick_order(dict, msg, volinfo->type, + ret = glusterd_check_brick_order(dict, msg, volinfo->type, &volname, + &bricks, &count, volinfo->disperse_count); } if (ret) { @@ -1496,7 +1499,7 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict) if (len < 0) { strcpy(msg, "<error>"); } - gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s", + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s", msg); *op_errstr = gf_strdup(msg); goto out; @@ -1528,7 +1531,7 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict) "Volume name %s rebalance is in " "progress. Please retry after completion", volname); - gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_OIP_RETRY_LATER, "%s", msg); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OIP_RETRY_LATER, "%s", msg); *op_errstr = gf_strdup(msg); ret = -1; goto out; @@ -1546,18 +1549,22 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict) msg[0] = '\0'; } - ret = dict_get_int32n(dict, "count", SLEN("count"), &count); - if (ret) { - gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, - "Unable to get count"); - goto out; + if (!count) { + ret = dict_get_int32n(dict, "count", SLEN("count"), &count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Unable to get count"); + goto out; + } } - ret = dict_get_strn(dict, "bricks", SLEN("bricks"), &bricks); - if (ret) { - gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, - "Unable to get bricks"); - goto out; + if (!bricks) { + ret = dict_get_strn(dict, "bricks", SLEN("bricks"), &bricks); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Unable to get bricks"); + goto out; + } } if (bricks) { @@ -1576,7 +1583,7 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict) "brick path %s is " "too long", brick); - gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_BRKPATH_TOO_LONG, "%s", + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRKPATH_TOO_LONG, "%s", msg); *op_errstr = gf_strdup(msg); @@ -1587,7 +1594,7 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict) ret = glusterd_brickinfo_new_from_brick(brick, &brickinfo, _gf_true, NULL); if (ret) { - gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_NOT_FOUND, + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_NOT_FOUND, "Add-brick: Unable" " to get brickinfo"); goto out; @@ -1657,7 +1664,7 @@ out: GF_FREE(str_ret); GF_FREE(all_bricks); - gf_msg_debug(THIS->name, 0, "Returning %d", ret); + gf_msg_debug(this->name, 0, "Returning %d", ret); return ret; } @@ -2227,6 +2234,42 @@ out: } int +glusterd_post_commit_add_brick(dict_t *dict, char **op_errstr) +{ + int ret = 0; + char *volname = NULL; + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Unable to get volume name"); + goto out; + } + ret = glusterd_replace_old_auth_allow_list(volname); +out: + return ret; +} + +int +glusterd_post_commit_replace_brick(dict_t *dict, char **op_errstr) +{ + int ret = 0; + char *volname = NULL; + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Unable to get volume name"); + goto out; + } + ret = glusterd_replace_old_auth_allow_list(volname); +out: + return ret; +} + +int glusterd_set_rebalance_id_for_remove_brick(dict_t *req_dict, dict_t *rsp_dict) { int ret = -1; diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c index 2d60daf180a..f08bd6cebee 100644 --- a/xlators/mgmt/glusterd/src/glusterd-ganesha.c +++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c @@ -421,6 +421,35 @@ check_host_list(void) } int +gd_ganesha_send_dbus(char *volname, char *value) +{ + runner_t runner = { + 0, + }; + int ret = -1; + runinit(&runner); + + GF_VALIDATE_OR_GOTO("glusterd-ganesha", volname, out); + GF_VALIDATE_OR_GOTO("glusterd-ganesha", value, out); + + ret = 0; + if (check_host_list()) { + /* Check whether ganesha is running on this node */ + if (manage_service("status")) { + gf_msg("glusterd-ganesha", GF_LOG_WARNING, 0, + GD_MSG_GANESHA_NOT_RUNNING, + "Export failed, NFS-Ganesha is not running"); + } else { + runner_add_args(&runner, GANESHA_PREFIX "/dbus-send.sh", CONFDIR, + value, volname, NULL); + ret = runner_run(&runner); + } + } +out: + return ret; +} + +int manage_export_config(char *volname, char *value, char **op_errstr) { runner_t runner = { @@ -447,9 +476,6 @@ int ganesha_manage_export(dict_t *dict, char *value, gf_boolean_t update_cache_invalidation, char **op_errstr) { - runner_t runner = { - 0, - }; int ret = -1; glusterd_volinfo_t *volinfo = NULL; dict_t *vol_opts = NULL; @@ -458,7 +484,6 @@ ganesha_manage_export(dict_t *dict, char *value, glusterd_conf_t *priv = NULL; gf_boolean_t option = _gf_false; - runinit(&runner); this = THIS; GF_ASSERT(this); priv = this->private; @@ -538,26 +563,13 @@ ganesha_manage_export(dict_t *dict, char *value, goto out; } } - - if (check_host_list()) { - /* Check whether ganesha is running on this node */ - if (manage_service("status")) { - gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_GANESHA_NOT_RUNNING, - "Export failed, NFS-Ganesha is not running"); - } else { - runner_add_args(&runner, GANESHA_PREFIX "/dbus-send.sh", CONFDIR, - value, volname, NULL); - ret = runner_run(&runner); - if (ret) { - gf_asprintf(op_errstr, - "Dynamic export" - " addition/deletion failed." - " Please see log file for details"); - goto out; - } - } + ret = gd_ganesha_send_dbus(volname, value); + if (ret) { + gf_asprintf(op_errstr, + "Dynamic export addition/deletion failed." + " Please see log file for details"); + goto out; } - if (update_cache_invalidation) { vol_opts = volinfo->dict; ret = dict_set_dynstr_with_alloc(vol_opts, diff --git a/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c index b01fd4da24b..a0bfea41f0f 100644 --- a/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c +++ b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c @@ -310,7 +310,7 @@ glusterd_gfproxydsvc_start(glusterd_svc_t *svc, int flags) } runinit(&runner); - if (this->ctx->cmd_args.valgrind) { + if (this->ctx->cmd_args.vgtool != _gf_none) { len = snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-%s", svc->proc.logdir, svc->proc.logfile); if ((len < 0) || (len >= PATH_MAX)) { @@ -318,8 +318,13 @@ glusterd_gfproxydsvc_start(glusterd_svc_t *svc, int flags) goto out; } - runner_add_args(&runner, "valgrind", "--leak-check=full", - "--trace-children=yes", "--track-origins=yes", NULL); + if (this->ctx->cmd_args.vgtool == _gf_memcheck) + runner_add_args(&runner, "valgrind", "--leak-check=full", + "--trace-children=yes", "--track-origins=yes", + NULL); + else + runner_add_args(&runner, "valgrind", "--tool=drd", NULL); + runner_argprintf(&runner, "--log-file=%s", valgrind_logfile); } diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index 7d488ffd87a..1b21c40596d 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -5593,7 +5593,7 @@ glusterd_get_state(rpcsvc_request_t *req, dict_t *dict) ret = dict_get_strn(dict, "filename", SLEN("filename"), &tmp_str); if (ret) { - now = time(NULL); + now = gf_time(); strftime(timestamp, sizeof(timestamp), "%Y%m%d_%H%M%S", localtime(&now)); gf_asprintf(&filename, "%s_%s", "glusterd_state", timestamp); diff --git a/xlators/mgmt/glusterd/src/glusterd-hooks.c b/xlators/mgmt/glusterd/src/glusterd-hooks.c index d18eb6b2f5e..61c0f1c946f 100644 --- a/xlators/mgmt/glusterd/src/glusterd-hooks.c +++ b/xlators/mgmt/glusterd/src/glusterd-hooks.c @@ -206,11 +206,13 @@ glusterd_hooks_set_volume_args(dict_t *dict, runner_t *runner) int i = 0; int count = 0; int ret = -1; + int flag = 0; char query[1024] = { 0, }; char *key = NULL; char *value = NULL; + char *inet_family = NULL; xlator_t *this = NULL; this = THIS; GF_ASSERT(this); @@ -243,9 +245,23 @@ glusterd_hooks_set_volume_args(dict_t *dict, runner_t *runner) continue; runner_argprintf(runner, "%s=%s", key, value); + if ((strncmp(key, "cluster.enable-shared-storage", + SLEN("cluster.enable-shared-storage")) == 0 || + strncmp(key, "enable-shared-storage", + SLEN("enable-shared-storage")) == 0) && + strncmp(value, "enable", SLEN("enable")) == 0) + flag = 1; } glusterd_hooks_add_custom_args(dict, runner); + if (flag == 1) { + ret = dict_get_str_sizen(this->options, "transport.address-family", + &inet_family); + if (!ret) { + runner_argprintf(runner, "transport.address-family=%s", + inet_family); + } + } ret = 0; out: diff --git a/xlators/mgmt/glusterd/src/glusterd-log-ops.c b/xlators/mgmt/glusterd/src/glusterd-log-ops.c index a800d9543cf..34abf35cb00 100644 --- a/xlators/mgmt/glusterd/src/glusterd-log-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-log-ops.c @@ -76,7 +76,7 @@ __glusterd_handle_log_rotate(rpcsvc_request_t *req) "for volume %s", volname); - ret = dict_set_uint64(dict, "rotate-key", (uint64_t)time(NULL)); + ret = dict_set_uint64(dict, "rotate-key", (uint64_t)gf_time()); if (ret) goto out; diff --git a/xlators/mgmt/glusterd/src/glusterd-mem-types.h b/xlators/mgmt/glusterd/src/glusterd-mem-types.h index 17052cee263..d7257e1a7b5 100644 --- a/xlators/mgmt/glusterd/src/glusterd-mem-types.h +++ b/xlators/mgmt/glusterd/src/glusterd-mem-types.h @@ -27,6 +27,7 @@ typedef enum gf_gld_mem_types_ { gf_gld_mt_mop_stage_req_t, gf_gld_mt_probe_ctx_t, gf_gld_mt_glusterd_volinfo_t, + gf_gld_mt_volinfo_dict_data_t, gf_gld_mt_glusterd_brickinfo_t, gf_gld_mt_peer_hostname_t, gf_gld_mt_defrag_info, diff --git a/xlators/mgmt/glusterd/src/glusterd-messages.h b/xlators/mgmt/glusterd/src/glusterd-messages.h index c0891797fdf..3a1e600fb03 100644 --- a/xlators/mgmt/glusterd/src/glusterd-messages.h +++ b/xlators/mgmt/glusterd/src/glusterd-messages.h @@ -319,7 +319,8 @@ GLFS_MSGID( GD_MSG_SNAPSHOT_NOT_THIN_PROVISIONED, GD_MSG_VOL_STOP_ARGS_GET_FAILED, GD_MSG_LSTAT_FAIL, GD_MSG_VOLUME_NOT_IMPORTED, GD_MSG_ADD_BRICK_MNT_INFO_FAIL, GD_MSG_GET_MNT_ENTRY_INFO_FAIL, - GD_MSG_QUORUM_CLUSTER_COUNT_GET_FAIL); + GD_MSG_QUORUM_CLUSTER_COUNT_GET_FAIL, GD_MSG_POST_COMMIT_OP_FAIL, + GD_MSG_POST_COMMIT_FROM_UUID_REJCT, GD_MSG_POST_COMMIT_REQ_SEND_FAIL); #define GD_MSG_INVALID_ENTRY_STR "Invalid data entry" #define GD_MSG_INVALID_ARGUMENT_STR \ @@ -447,4 +448,4 @@ GLFS_MSGID( "Failed to allocate memory or get serialized length of dict" #define GD_MSG_GET_XATTR_FAIL_STR "Failed to get extended attribute" -#endif /* !_GLUSTERD_MESSAGES_H_ */
\ No newline at end of file +#endif /* !_GLUSTERD_MESSAGES_H_ */ diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c b/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c index c170827eec0..1069688a89d 100644 --- a/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c @@ -626,6 +626,136 @@ out: } static int +glusterd_mgmt_v3_post_commit_send_resp(rpcsvc_request_t *req, int32_t op, + int32_t status, char *op_errstr, + uint32_t op_errno, dict_t *rsp_dict) +{ + gd1_mgmt_v3_post_commit_rsp rsp = { + {0}, + }; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + + rsp.op_ret = status; + glusterd_get_uuid(&rsp.uuid); + rsp.op = op; + rsp.op_errno = op_errno; + if (op_errstr) + rsp.op_errstr = op_errstr; + else + rsp.op_errstr = ""; + + ret = dict_allocate_and_serialize(rsp_dict, &rsp.dict.dict_val, + &rsp.dict.dict_len); + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, 0, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); + goto out; + } + + ret = glusterd_submit_reply(req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_v3_post_commit_rsp); + + GF_FREE(rsp.dict.dict_val); +out: + gf_msg_debug(this->name, 0, "Responded to post commit, ret: %d", ret); + return ret; +} + +static int +glusterd_handle_post_commit_fn(rpcsvc_request_t *req) +{ + int32_t ret = -1; + gd1_mgmt_v3_post_commit_req op_req = { + {0}, + }; + xlator_t *this = NULL; + char *op_errstr = NULL; + dict_t *dict = NULL; + dict_t *rsp_dict = NULL; + uint32_t op_errno = 0; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + + ret = xdr_to_generic(req->msg[0], &op_req, + (xdrproc_t)xdr_gd1_mgmt_v3_post_commit_req); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, + "Failed to decode post commit " + "request received from peer"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (glusterd_peerinfo_find_by_uuid(op_req.uuid) == NULL) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_PEER_NOT_FOUND, + "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa(op_req.uuid)); + ret = -1; + goto out; + } + + dict = dict_new(); + if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL); + goto out; + } + + ret = dict_unserialize(op_req.dict.dict_val, op_req.dict.dict_len, &dict); + if (ret) { + gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + NULL); + goto out; + } + + rsp_dict = dict_new(); + if (!rsp_dict) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL); + return -1; + } + + ret = gd_mgmt_v3_post_commit_fn(op_req.op, dict, &op_errstr, &op_errno, + rsp_dict); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL, + "post commit failed on operation %s", gd_op_list[op_req.op]); + } + + ret = glusterd_mgmt_v3_post_commit_send_resp(req, op_req.op, ret, op_errstr, + op_errno, rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_OP_RESP_FAIL, + "Failed to send post commit " + "response for operation %s", + gd_op_list[op_req.op]); + goto out; + } + +out: + if (op_errstr && (strcmp(op_errstr, ""))) + GF_FREE(op_errstr); + + free(op_req.dict.dict_val); + + if (dict) + dict_unref(dict); + + if (rsp_dict) + dict_unref(rsp_dict); + + /* Return 0 from handler to avoid double deletion of req obj */ + return 0; +} + +static int glusterd_mgmt_v3_post_validate_send_resp(rpcsvc_request_t *req, int32_t op, int32_t status, char *op_errstr, dict_t *rsp_dict) @@ -963,6 +1093,12 @@ glusterd_handle_commit(rpcsvc_request_t *req) } static int +glusterd_handle_post_commit(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, glusterd_handle_post_commit_fn); +} + +static int glusterd_handle_post_validate(rpcsvc_request_t *req) { return glusterd_big_locked_handler(req, glusterd_handle_post_validate_fn); @@ -986,6 +1122,9 @@ static rpcsvc_actor_t gd_svc_mgmt_v3_actors[GLUSTERD_MGMT_V3_MAXVALUE] = { GLUSTERD_MGMT_V3_BRICK_OP, DRC_NA, 0}, [GLUSTERD_MGMT_V3_COMMIT] = {"COMMIT", glusterd_handle_commit, NULL, GLUSTERD_MGMT_V3_COMMIT, DRC_NA, 0}, + [GLUSTERD_MGMT_V3_POST_COMMIT] = {"POST_COMMIT", + glusterd_handle_post_commit, NULL, + GLUSTERD_MGMT_V3_POST_COMMIT, DRC_NA, 0}, [GLUSTERD_MGMT_V3_POST_VALIDATE] = {"POST_VAL", glusterd_handle_post_validate, NULL, GLUSTERD_MGMT_V3_POST_VALIDATE, DRC_NA, diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-mgmt.c index b2128e10a04..bca7221062b 100644 --- a/xlators/mgmt/glusterd/src/glusterd-mgmt.c +++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.c @@ -86,6 +86,11 @@ gd_mgmt_v3_collate_errors(struct syncargs *args, int op_ret, int op_errno, peer_str, err_string); break; } + case GLUSTERD_MGMT_V3_POST_COMMIT: { + snprintf(op_err, sizeof(op_err), "Post commit failed on %s. %s", + peer_str, err_string); + break; + } case GLUSTERD_MGMT_V3_POST_VALIDATE: { snprintf(op_err, sizeof(op_err), "Post Validation failed on %s. %s", peer_str, @@ -405,6 +410,47 @@ out: } int32_t +gd_mgmt_v3_post_commit_fn(glusterd_op_t op, dict_t *dict, char **op_errstr, + uint32_t *op_errno, dict_t *rsp_dict) +{ + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(op_errstr); + GF_VALIDATE_OR_GOTO(this->name, op_errno, out); + GF_ASSERT(rsp_dict); + + switch (op) { + case GD_OP_ADD_BRICK: + ret = glusterd_post_commit_add_brick(dict, op_errstr); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL, + "Add-brick post commit failed."); + goto out; + } + break; + case GD_OP_REPLACE_BRICK: + ret = glusterd_post_commit_replace_brick(dict, op_errstr); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL, + "Replace-brick post commit failed."); + goto out; + } + break; + default: + break; + } + + ret = 0; +out: + gf_msg_debug(this->name, 0, "OP = %d. Returning %d", op, ret); + return ret; +} + +int32_t gd_mgmt_v3_post_validate_fn(glusterd_op_t op, int32_t op_ret, dict_t *dict, char **op_errstr, dict_t *rsp_dict) { @@ -1720,6 +1766,274 @@ out: } int32_t +gd_mgmt_v3_post_commit_cbk_fn(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) +{ + int32_t ret = -1; + struct syncargs *args = NULL; + gd1_mgmt_v3_post_commit_rsp rsp = { + {0}, + }; + call_frame_t *frame = NULL; + int32_t op_ret = -1; + int32_t op_errno = -1; + dict_t *rsp_dict = NULL; + xlator_t *this = NULL; + uuid_t *peerid = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + GF_ASSERT(myframe); + + frame = myframe; + args = frame->local; + peerid = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + GF_VALIDATE_OR_GOTO_WITH_ERROR(this->name, iov, out, op_errno, EINVAL); + + ret = xdr_to_generic(*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_v3_post_commit_rsp); + if (ret < 0) + goto out; + + if (rsp.dict.dict_len) { + /* Unserialize the dictionary */ + rsp_dict = dict_new(); + + ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &rsp_dict); + if (ret < 0) { + free(rsp.dict.dict_val); + goto out; + } else { + rsp_dict->extra_stdfree = rsp.dict.dict_val; + } + } + + gf_uuid_copy(args->uuid, rsp.uuid); + pthread_mutex_lock(&args->lock_dict); + { + ret = glusterd_syncop_aggr_rsp_dict(rsp.op, args->dict, rsp_dict); + } + pthread_mutex_unlock(&args->lock_dict); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RESP_AGGR_FAIL, "%s", + "Failed to aggregate response from " + " node/brick"); + if (!rsp.op_ret) + op_ret = ret; + else { + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + } + } else { + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + } + +out: + if (rsp_dict) + dict_unref(rsp_dict); + + gd_mgmt_v3_collate_errors(args, op_ret, op_errno, rsp.op_errstr, + GLUSTERD_MGMT_V3_POST_COMMIT, *peerid, rsp.uuid); + GF_FREE(peerid); + + if (rsp.op_errstr) + free(rsp.op_errstr); + + /* req->rpc_status set to -1 means, STACK_DESTROY will be called from + * the caller function. + */ + if (req->rpc_status != -1) + STACK_DESTROY(frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_mgmt_v3_post_commit_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) +{ + return glusterd_big_locked_cbk(req, iov, count, myframe, + gd_mgmt_v3_post_commit_cbk_fn); +} + +int +gd_mgmt_v3_post_commit_req(glusterd_op_t op, dict_t *op_ctx, + glusterd_peerinfo_t *peerinfo, struct syncargs *args, + uuid_t my_uuid, uuid_t recv_uuid) +{ + int32_t ret = -1; + gd1_mgmt_v3_post_commit_req req = { + {0}, + }; + xlator_t *this = NULL; + uuid_t *peerid = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(op_ctx); + GF_ASSERT(peerinfo); + GF_ASSERT(args); + + ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val, + &req.dict.dict_len); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); + goto out; + } + + gf_uuid_copy(req.uuid, my_uuid); + req.op = op; + + GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_ALLOC_AND_COPY_UUID_FAIL, NULL); + goto out; + } + + ret = gd_syncop_submit_request( + peerinfo->rpc, &req, args, peerid, &gd_mgmt_v3_prog, + GLUSTERD_MGMT_V3_POST_COMMIT, gd_mgmt_v3_post_commit_cbk, + (xdrproc_t)xdr_gd1_mgmt_v3_post_commit_req); +out: + GF_FREE(req.dict.dict_val); + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +int +glusterd_mgmt_v3_post_commit(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, + char **op_errstr, uint32_t *op_errno, + uint32_t txn_generation) +{ + int32_t ret = -1; + int32_t peer_cnt = 0; + dict_t *rsp_dict = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + struct syncargs args = {0}; + uuid_t peer_uuid = {0}; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + + this = THIS; + GF_ASSERT(this); + conf = this->private; + GF_ASSERT(conf); + + GF_ASSERT(op_ctx); + GF_ASSERT(req_dict); + GF_ASSERT(op_errstr); + GF_VALIDATE_OR_GOTO(this->name, op_errno, out); + + rsp_dict = dict_new(); + if (!rsp_dict) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, + "Failed to create response dictionary"); + goto out; + } + + /* Post commit on local node */ + ret = gd_mgmt_v3_post_commit_fn(op, req_dict, op_errstr, op_errno, + rsp_dict); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL, + "Post commit failed for " + "operation %s on local node", + gd_op_list[op]); + + if (*op_errstr == NULL) { + ret = gf_asprintf(op_errstr, + "Post commit failed " + "on localhost. Please " + "check log file for details."); + if (ret == -1) + *op_errstr = NULL; + + ret = -1; + } + goto out; + } + + ret = glusterd_syncop_aggr_rsp_dict(op, op_ctx, rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RESP_AGGR_FAIL, "%s", + "Failed to aggregate response from " + " node/brick"); + goto out; + } + + dict_unref(rsp_dict); + rsp_dict = NULL; + + /* Sending post commit req to other nodes in the cluster */ + gd_syncargs_init(&args, op_ctx); + ret = synctask_barrier_init((&args)); + if (ret) + goto out; + peer_cnt = 0; + + RCU_READ_LOCK; + cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list) + { + /* Only send requests to peers who were available before the + * transaction started + */ + if (peerinfo->generation > txn_generation) + continue; + if (!peerinfo->connected) + continue; + + if (op != GD_OP_SYNC_VOLUME && + peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) + continue; + + gd_mgmt_v3_post_commit_req(op, req_dict, peerinfo, &args, MY_UUID, + peer_uuid); + peer_cnt++; + } + RCU_READ_UNLOCK; + + if (0 == peer_cnt) { + ret = 0; + goto out; + } + + gd_synctask_barrier_wait((&args), peer_cnt); + + if (args.op_ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL, + "Post commit failed on peers"); + + if (args.errstr) + *op_errstr = gf_strdup(args.errstr); + } + + ret = args.op_ret; + *op_errno = args.op_errno; + + gf_msg_debug(this->name, 0, + "Sent post commit req for %s to %d " + "peers. Returning %d", + gd_op_list[op], peer_cnt, ret); +out: + glusterd_op_modify_op_ctx(op, op_ctx); + return ret; +} + +int32_t gd_mgmt_v3_post_validate_cbk_fn(struct rpc_req *req, struct iovec *iov, int count, void *myframe) { @@ -2408,6 +2722,15 @@ glusterd_mgmt_v3_initiate_all_phases(rpcsvc_request_t *req, glusterd_op_t op, goto out; } + /* POST COMMIT OP PHASE */ + ret = glusterd_mgmt_v3_post_commit(op, dict, req_dict, &op_errstr, + &op_errno, txn_generation); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL, + "Post commit Op Failed"); + goto out; + } + /* POST-COMMIT VALIDATE PHASE */ /* As of now, post_validate is not trying to cleanup any failed commands. So as of now, I am sending 0 (op_ret as 0). diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.h b/xlators/mgmt/glusterd/src/glusterd-mgmt.h index 71f793d0397..27dd1849519 100644 --- a/xlators/mgmt/glusterd/src/glusterd-mgmt.h +++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.h @@ -28,6 +28,10 @@ gd_mgmt_v3_commit_fn(glusterd_op_t op, dict_t *dict, char **op_errstr, uint32_t *op_errno, dict_t *rsp_dict); int32_t +gd_mgmt_v3_post_commit_fn(glusterd_op_t op, dict_t *dict, char **op_errstr, + uint32_t *op_errno, dict_t *rsp_dict); + +int32_t gd_mgmt_v3_post_validate_fn(glusterd_op_t op, int32_t op_ret, dict_t *dict, char **op_errstr, dict_t *rsp_dict); @@ -84,4 +88,10 @@ glusterd_reset_brick_prevalidate(dict_t *dict, char **op_errstr, dict_t *rsp_dict); int glusterd_op_reset_brick(dict_t *dict, dict_t *rsp_dict); + +int +glusterd_post_commit_add_brick(dict_t *dict, char **op_errstr); + +int +glusterd_post_commit_replace_brick(dict_t *dict, char **op_errstr); #endif /* _GLUSTERD_MGMT_H_ */ diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c index 2afd0fe1b74..458bf168ede 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c +++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c @@ -219,6 +219,9 @@ glusterd_handle_defrag_start(glusterd_volinfo_t *volinfo, char *op_errstr, char valgrind_logfile[PATH_MAX] = { 0, }; + char msg[1024] = { + 0, + }; char *volfileserver = NULL; char *localtime_logging = NULL; @@ -270,12 +273,17 @@ glusterd_handle_defrag_start(glusterd_volinfo_t *volinfo, char *op_errstr, "rebalance"); runinit(&runner); - if (this->ctx->cmd_args.valgrind) { + if (this->ctx->cmd_args.vgtool != _gf_none) { snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-%s-rebalance.log", priv->logdir, volinfo->volname); - runner_add_args(&runner, "valgrind", "--leak-check=full", - "--trace-children=yes", "--track-origins=yes", NULL); + if (this->ctx->cmd_args.vgtool == _gf_memcheck) + runner_add_args(&runner, "valgrind", "--leak-check=full", + "--trace-children=yes", "--track-origins=yes", + NULL); + else + runner_add_args(&runner, "valgrind", "--tool=drd", NULL); + runner_argprintf(&runner, "--log-file=%s", valgrind_logfile); } @@ -316,6 +324,10 @@ glusterd_handle_defrag_start(glusterd_volinfo_t *volinfo, char *op_errstr, runner_add_arg(&runner, "--localtime-logging"); } + snprintf(msg, sizeof(msg), "Starting the rebalance service for volume %s", + volinfo->volname); + runner_log(&runner, this->name, GF_LOG_DEBUG, msg); + ret = runner_run_nowait(&runner); if (ret) { gf_msg_debug("glusterd", 0, "rebalance command failed"); diff --git a/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c b/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c index 1f3f4909cbb..d75f249b29e 100644 --- a/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c +++ b/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c @@ -304,7 +304,7 @@ glusterd_snapdsvc_start(glusterd_svc_t *svc, int flags) } runinit(&runner); - if (this->ctx->cmd_args.valgrind) { + if (this->ctx->cmd_args.vgtool != _gf_none) { len = snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-snapd.log", svc->proc.logdir); if ((len < 0) || (len >= PATH_MAX)) { @@ -313,8 +313,13 @@ glusterd_snapdsvc_start(glusterd_svc_t *svc, int flags) goto out; } - runner_add_args(&runner, "valgrind", "--leak-check=full", - "--trace-children=yes", "--track-origins=yes", NULL); + if (this->ctx->cmd_args.vgtool == _gf_memcheck) + runner_add_args(&runner, "valgrind", "--leak-check=full", + "--trace-children=yes", "--track-origins=yes", + NULL); + else + runner_add_args(&runner, "valgrind", "--tool=drd", NULL); + runner_argprintf(&runner, "--log-file=%s", valgrind_logfile); } diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c index d96d5dd2cfc..995268b796d 100644 --- a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c @@ -2037,8 +2037,9 @@ glusterd_update_snaps_synctask(void *opaque) "Failed to remove snap %s", snap->snapname); goto out; } - if (dict) - dict_unref(dict); + + dict_unref(dict); + dict = NULL; } snprintf(buf, sizeof(buf), "%s.accept_peer_data", prefix); ret = dict_get_int32(peer_data, buf, &val); diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c index c2428dc0de0..aeaa8d15214 100644 --- a/xlators/mgmt/glusterd/src/glusterd-snapshot.c +++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c @@ -3930,7 +3930,8 @@ glusterd_handle_snapshot_create(rpcsvc_request_t *req, glusterd_op_t op, goto out; } - ret = dict_set_int64(dict, "snap-time", (int64_t)time(&snap_time)); + snap_time = gf_time(); + ret = dict_set_int64(dict, "snap-time", (int64_t)snap_time); if (ret) { gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Unable to set snap-time"); @@ -5322,6 +5323,48 @@ glusterd_do_snap_vol(glusterd_volinfo_t *origin_vol, glusterd_snap_t *snap, dict_deln(snap_vol->dict, "features.barrier", SLEN("features.barrier")); gd_update_volume_op_versions(snap_vol); + /* * + * Create the export file from the node where ganesha.enable "on" + * is executed + * */ + if (glusterd_is_ganesha_cluster() && + glusterd_check_ganesha_export(snap_vol)) { + if (is_origin_glusterd(dict)) { + ret = manage_export_config(clonename, "on", NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_EXPORT_FILE_CREATE_FAIL, + "Failed to create" + "export file for NFS-Ganesha\n"); + goto out; + } + } + + ret = dict_set_dynstr_with_alloc(snap_vol->dict, + "features.cache-invalidation", "on"); + ret = gd_ganesha_send_dbus(clonename, "on"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_EXPORT_FILE_CREATE_FAIL, + "Dynamic export addition/deletion failed." + " Please see log file for details. Clone name = %s", + clonename); + goto out; + } + } + if (!glusterd_is_ganesha_cluster() && + glusterd_check_ganesha_export(snap_vol)) { + /* This happens when a snapshot was created when Ganesha was + * enabled globally. Then Ganesha disabled from the cluster. + * In such cases, we will have the volume level option set + * on dict, So we have to disable it as it doesn't make sense + * to keep the option. + */ + + ret = dict_set_dynstr(snap_vol->dict, "ganesha.enable", "off"); + if (ret) + goto out; + } + ret = glusterd_store_volinfo(snap_vol, GLUSTERD_VOLINFO_VER_AC_INCREMENT); if (ret) { gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_SET_FAIL, @@ -5393,8 +5436,31 @@ out: for (i = 0; unsupported_opt[i].key; i++) GF_FREE(unsupported_opt[i].value); - if (snap_vol) + if (snap_vol) { + if (glusterd_is_ganesha_cluster() && + glusterd_check_ganesha_export(snap_vol)) { + if (is_origin_glusterd(dict)) { + ret = manage_export_config(clonename, "on", NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_EXPORT_FILE_CREATE_FAIL, + "Failed to create" + "export file for NFS-Ganesha\n"); + } + } + + ret = gd_ganesha_send_dbus(clonename, "off"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_EXPORT_FILE_CREATE_FAIL, + "Dynamic export addition/deletion failed." + " Please see log file for details. Clone name = %s", + clonename); + } + } + glusterd_snap_volume_remove(rsp_dict, snap_vol, _gf_true, _gf_true); + } snap_vol = NULL; } diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c index 465e41ef00b..d94dceb10b7 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.c +++ b/xlators/mgmt/glusterd/src/glusterd-store.c @@ -74,7 +74,7 @@ glusterd_replace_slash_with_hyphen(char *str) while (ptr) { *ptr = '-'; - ptr = strchr(str, '/'); + ptr = strchr(ptr, '/'); } } @@ -660,85 +660,72 @@ out: } static int -_storeslaves(dict_t *this, char *key, data_t *value, void *data) -{ - int32_t ret = 0; - gf_store_handle_t *shandle = NULL; - xlator_t *xl = NULL; - - xl = THIS; - GF_ASSERT(xl); - - shandle = (gf_store_handle_t *)data; - - GF_ASSERT(shandle); - GF_ASSERT(shandle->fd > 0); - GF_ASSERT(shandle->path); - GF_ASSERT(key); - GF_ASSERT(value); - GF_ASSERT(value->data); - - gf_msg_debug(xl->name, 0, "Storing in volinfo:key= %s, val=%s", key, - value->data); - - ret = gf_store_save_value(shandle->fd, key, (char *)value->data); - if (ret) { - gf_msg(xl->name, GF_LOG_ERROR, 0, GD_MSG_STORE_HANDLE_WRITE_FAIL, - "Unable to write into store" - " handle for path: %s", - shandle->path); - return -1; - } - return 0; -} - -int -_storeopts(dict_t *this, char *key, data_t *value, void *data) +_storeopts(dict_t *dict_value, char *key, data_t *value, void *data) { int32_t ret = 0; int32_t exists = 0; + int32_t option_len = 0; gf_store_handle_t *shandle = NULL; - xlator_t *xl = NULL; + glusterd_volinfo_data_store_t *dict_data = NULL; + xlator_t *this = NULL; - xl = THIS; - GF_ASSERT(xl); + this = THIS; + GF_ASSERT(this); - shandle = (gf_store_handle_t *)data; + dict_data = (glusterd_volinfo_data_store_t *)data; + shandle = dict_data->shandle; GF_ASSERT(shandle); GF_ASSERT(shandle->fd > 0); - GF_ASSERT(shandle->path); GF_ASSERT(key); GF_ASSERT(value); GF_ASSERT(value->data); - if (is_key_glusterd_hooks_friendly(key)) { - exists = 1; + if (dict_data->key_check == 1) { + if (is_key_glusterd_hooks_friendly(key)) { + exists = 1; - } else { - exists = glusterd_check_option_exists(key, NULL); + } else { + exists = glusterd_check_option_exists(key, NULL); + } } - - if (1 == exists) { - gf_msg_debug(xl->name, 0, - "Storing in volinfo:key= %s, " + if (exists == 1 || dict_data->key_check == 0) { + gf_msg_debug(this->name, 0, + "Storing in buffer for volinfo:key= %s, " "val=%s", key, value->data); - } else { - gf_msg_debug(xl->name, 0, "Discarding:key= %s, val=%s", key, + gf_msg_debug(this->name, 0, "Discarding:key= %s, val=%s", key, value->data); return 0; } - ret = gf_store_save_value(shandle->fd, key, (char *)value->data); - if (ret) { - gf_msg(xl->name, GF_LOG_ERROR, 0, GD_MSG_STORE_HANDLE_WRITE_FAIL, - "Unable to write into store" - " handle for path: %s", - shandle->path); + /* + * The option_len considers the length of the key value + * pair and along with that '=' and '\n', but as value->len + * already considers a NULL at the end of the data, adding + * just 1. + */ + option_len = strlen(key) + value->len + 1; + + if ((VOLINFO_BUFFER_SIZE - dict_data->buffer_len - 1) < option_len) { + ret = gf_store_save_items(shandle->fd, dict_data->buffer); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED, NULL); + return -1; + } + dict_data->buffer_len = 0; + dict_data->buffer[0] = '\0'; + } + ret = snprintf(dict_data->buffer + dict_data->buffer_len, option_len + 1, + "%s=%s\n", key, value->data); + if (ret < 0 || ret > option_len + 1) { + gf_smsg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_COPY_FAIL, NULL); return -1; } + + dict_data->buffer_len += ret; + return 0; } @@ -1013,7 +1000,7 @@ glusterd_store_create_snap_dir(glusterd_snap_t *snap) return ret; } -int32_t +static int32_t glusterd_store_volinfo_write(int fd, glusterd_volinfo_t *volinfo) { int32_t ret = -1; @@ -1021,19 +1008,47 @@ glusterd_store_volinfo_write(int fd, glusterd_volinfo_t *volinfo) GF_ASSERT(fd > 0); GF_ASSERT(volinfo); GF_ASSERT(volinfo->shandle); + xlator_t *this = NULL; + glusterd_volinfo_data_store_t *dict_data = NULL; + + this = THIS; + GF_ASSERT(this); shandle = volinfo->shandle; + + dict_data = GF_CALLOC(1, sizeof(glusterd_volinfo_data_store_t), + gf_gld_mt_volinfo_dict_data_t); + if (dict_data == NULL) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_MEMORY, NULL); + return -1; + } + ret = glusterd_volume_exclude_options_write(fd, volinfo); - if (ret) + if (ret) { goto out; + } + + dict_data->shandle = shandle; + dict_data->key_check = 1; shandle->fd = fd; - dict_foreach(volinfo->dict, _storeopts, shandle); + dict_foreach(volinfo->dict, _storeopts, (void *)dict_data); + + dict_data->key_check = 0; + dict_foreach(volinfo->gsync_slaves, _storeopts, (void *)dict_data); + + if (dict_data->buffer_len > 0) { + ret = gf_store_save_items(fd, dict_data->buffer); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED, NULL); + goto out; + } + } - dict_foreach(volinfo->gsync_slaves, _storeslaves, shandle); shandle->fd = 0; out: - gf_msg_debug(THIS->name, 0, "Returning %d", ret); + GF_FREE(dict_data); + gf_msg_debug(this->name, 0, "Returning %d", ret); return ret; } @@ -1274,14 +1289,6 @@ out: return ret; } -static int -_gd_store_rebalance_dict(dict_t *dict, char *key, data_t *value, void *data) -{ - int fd = *(int *)data; - - return gf_store_save_value(fd, key, value->data); -} - int32_t glusterd_store_node_state_write(int fd, glusterd_volinfo_t *volinfo) { @@ -1289,6 +1296,12 @@ glusterd_store_node_state_write(int fd, glusterd_volinfo_t *volinfo) char buf[PATH_MAX]; char uuid[UUID_SIZE + 1]; uint total_len = 0; + glusterd_volinfo_data_store_t *dict_data = NULL; + gf_store_handle_t shandle; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); GF_ASSERT(fd > 0); GF_ASSERT(volinfo); @@ -1328,14 +1341,33 @@ glusterd_store_node_state_write(int fd, glusterd_volinfo_t *volinfo) } ret = gf_store_save_items(fd, buf); - if (ret) + if (ret) { goto out; + } if (volinfo->rebal.dict) { - dict_foreach(volinfo->rebal.dict, _gd_store_rebalance_dict, &fd); + dict_data = GF_CALLOC(1, sizeof(glusterd_volinfo_data_store_t), + gf_gld_mt_volinfo_dict_data_t); + if (dict_data == NULL) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_MEMORY, NULL); + return -1; + } + dict_data->shandle = &shandle; + shandle.fd = fd; + dict_foreach(volinfo->rebal.dict, _storeopts, (void *)dict_data); + if (dict_data->buffer_len > 0) { + ret = gf_store_save_items(fd, dict_data->buffer); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED, + NULL); + goto out; + ; + } + } } out: - gf_msg_debug(THIS->name, 0, "Returning %d", ret); + GF_FREE(dict_data); + gf_msg_debug(this->name, 0, "Returning %d", ret); return ret; } @@ -2309,7 +2341,7 @@ glusterd_store_retrieve_snapd(glusterd_volinfo_t *volinfo) ret = 0; out: - if (gf_store_iter_destroy(iter)) { + if (gf_store_iter_destroy(&iter)) { gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL, "Failed to destroy store iter"); ret = -1; @@ -2642,7 +2674,7 @@ glusterd_store_retrieve_bricks(glusterd_volinfo_t *volinfo) brick_count++; } - if (gf_store_iter_destroy(tmpiter)) { + if (gf_store_iter_destroy(&tmpiter)) { gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL, "Failed to destroy store iter"); ret = -1; @@ -2823,13 +2855,13 @@ glusterd_store_retrieve_bricks(glusterd_volinfo_t *volinfo) ret = 0; out: - if (gf_store_iter_destroy(tmpiter)) { + if (gf_store_iter_destroy(&tmpiter)) { gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL, "Failed to destroy store iter"); ret = -1; } - if (gf_store_iter_destroy(iter)) { + if (gf_store_iter_destroy(&iter)) { gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL, "Failed to destroy store iter"); ret = -1; @@ -2962,7 +2994,7 @@ glusterd_store_retrieve_node_state(glusterd_volinfo_t *volinfo) ret = 0; out: - if (gf_store_iter_destroy(iter)) { + if (gf_store_iter_destroy(&iter)) { gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL, "Failed to destroy store iter"); ret = -1; @@ -3238,7 +3270,7 @@ glusterd_store_update_volinfo(glusterd_volinfo_t *volinfo) ret = 0; out: - if (gf_store_iter_destroy(iter)) { + if (gf_store_iter_destroy(&iter)) { gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL, "Failed to destroy store iter"); ret = -1; @@ -3343,20 +3375,6 @@ glusterd_store_set_options_path(glusterd_conf_t *conf, char *path, size_t len) snprintf(path, len, "%s/options", conf->workdir); } -int -_store_global_opts(dict_t *this, char *key, data_t *value, void *data) -{ - gf_store_handle_t *shandle = data; - - if (gf_store_save_value(shandle->fd, key, (char *)value->data)) { - gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_STORE_HANDLE_WRITE_FAIL, - "Unable to write into store handle for key : %s, value %s", key, - (char *)value->data); - } - - return 0; -} - int32_t glusterd_store_options(xlator_t *this, dict_t *opts) { @@ -3365,13 +3383,15 @@ glusterd_store_options(xlator_t *this, dict_t *opts) char path[PATH_MAX] = {0}; int fd = -1; int32_t ret = -1; + glusterd_volinfo_data_store_t *dict_data = NULL; conf = this->private; glusterd_store_set_options_path(conf, path, sizeof(path)); ret = gf_store_handle_new(path, &shandle); - if (ret) + if (ret) { goto out; + } fd = gf_store_mkstemp(shandle); if (fd <= 0) { @@ -3379,15 +3399,30 @@ glusterd_store_options(xlator_t *this, dict_t *opts) goto out; } + dict_data = GF_CALLOC(1, sizeof(glusterd_volinfo_data_store_t), + gf_gld_mt_volinfo_dict_data_t); + if (dict_data == NULL) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_MEMORY, NULL); + return -1; + } + dict_data->shandle = shandle; shandle->fd = fd; - dict_foreach(opts, _store_global_opts, shandle); - shandle->fd = 0; + dict_foreach(opts, _storeopts, (void *)dict_data); + if (dict_data->buffer_len > 0) { + ret = gf_store_save_items(fd, dict_data->buffer); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED, NULL); + goto out; + } + } + ret = gf_store_rename_tmppath(shandle); - if (ret) - goto out; out: - if ((ret < 0) && (fd > 0)) + shandle->fd = 0; + GF_FREE(dict_data); + if ((ret < 0) && (fd > 0)) { gf_store_unlink_tmppath(shandle); + } gf_store_handle_destroy(shandle); return ret; } @@ -3433,7 +3468,7 @@ glusterd_store_retrieve_options(xlator_t *this) goto out; ret = 0; out: - (void)gf_store_iter_destroy(iter); + (void)gf_store_iter_destroy(&iter); gf_store_handle_destroy(shandle); return ret; } @@ -3883,7 +3918,7 @@ glusterd_store_update_snap(glusterd_snap_t *snap) ret = 0; out: - if (gf_store_iter_destroy(iter)) { + if (gf_store_iter_destroy(&iter)) { gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL, "Failed to destroy store iter"); ret = -1; @@ -4625,7 +4660,7 @@ glusterd_store_retrieve_peers(xlator_t *this) is_ok = _gf_true; next: - (void)gf_store_iter_destroy(iter); + (void)gf_store_iter_destroy(&iter); if (!is_ok) { gf_log(this->name, GF_LOG_WARNING, diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h index 04070549678..83f4df0783e 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.h +++ b/xlators/mgmt/glusterd/src/glusterd-store.h @@ -29,7 +29,7 @@ typedef enum glusterd_store_ver_ac_ { } glusterd_volinfo_ver_ac_t; #define UUID_SIZE 36 - +#define VOLINFO_BUFFER_SIZE 4093 #define GLUSTERD_STORE_UUID_KEY "UUID" #define GLUSTERD_STORE_KEY_VOL_TYPE "type" @@ -112,6 +112,19 @@ typedef enum glusterd_store_ver_ac_ { #define GLUSTERD_STORE_KEY_GANESHA_GLOBAL "nfs-ganesha" +/* + * The structure is responsible for handling the parameter for writes into + * the buffer before it is finally written to the file. The writes will be + * of the form of key-value pairs. + */ +struct glusterd_volinfo_data_store_ { + gf_store_handle_t *shandle; /*Contains fd and path of the file */ + int16_t buffer_len; + char key_check; /* flag to check if key is to be validated before write*/ + char buffer[VOLINFO_BUFFER_SIZE]; +}; +typedef struct glusterd_volinfo_data_store_ glusterd_volinfo_data_store_t; + int32_t glusterd_store_volinfo(glusterd_volinfo_t *volinfo, glusterd_volinfo_ver_ac_t ac); diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c index 99119d69e45..18b3fb13630 100644 --- a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c +++ b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c @@ -162,6 +162,9 @@ glusterd_svc_start(glusterd_svc_t *svc, int flags, dict_t *cmdline) char *localtime_logging = NULL; char *log_level = NULL; char daemon_log_level[30] = {0}; + char msg[1024] = { + 0, + }; int32_t len = 0; this = THIS; @@ -187,7 +190,7 @@ glusterd_svc_start(glusterd_svc_t *svc, int flags, dict_t *cmdline) runinit(&runner); - if (this->ctx->cmd_args.valgrind) { + if (this->ctx->cmd_args.vgtool != _gf_none) { len = snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-%s.log", svc->proc.logdir, svc->name); if ((len < 0) || (len >= PATH_MAX)) { @@ -195,9 +198,13 @@ glusterd_svc_start(glusterd_svc_t *svc, int flags, dict_t *cmdline) goto unlock; } - runner_add_args(&runner, "valgrind", "--leak-check=full", - "--trace-children=yes", "--track-origins=yes", - NULL); + if (this->ctx->cmd_args.vgtool == _gf_memcheck) + runner_add_args(&runner, "valgrind", "--leak-check=full", + "--trace-children=yes", "--track-origins=yes", + NULL); + else + runner_add_args(&runner, "valgrind", "--tool=drd", NULL); + runner_argprintf(&runner, "--log-file=%s", valgrind_logfile); } @@ -226,8 +233,8 @@ glusterd_svc_start(glusterd_svc_t *svc, int flags, dict_t *cmdline) if (cmdline) dict_foreach(cmdline, svc_add_args, (void *)&runner); - gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_SVC_START_SUCCESS, - "Starting %s service", svc->name); + snprintf(msg, sizeof(msg), "Starting %s service", svc->name); + runner_log(&runner, this->name, GF_LOG_DEBUG, msg); if (flags == PROC_START_NO_WAIT) { ret = runner_run_nowait(&runner); diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 7d38b0a42d7..90ef2cf4c9c 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -79,6 +79,14 @@ #include <sys/sockio.h> #endif +#ifdef __FreeBSD__ +#include <sys/sysctl.h> +#include <sys/param.h> +#include <sys/queue.h> +#include <libprocstat.h> +#include <libutil.h> +#endif + #define NFS_PROGRAM 100003 #define NFSV3_VERSION 3 @@ -1117,7 +1125,8 @@ glusterd_get_brick_mount_dir(char *brickpath, char *hostname, char *mount_dir) } brick_dir = &brickpath[strlen(mnt_pt)]; - brick_dir++; + if (brick_dir[0] == '/') + brick_dir++; snprintf(mount_dir, VALID_GLUSTERD_PATHMAX, "/%s", brick_dir); } @@ -2068,8 +2077,8 @@ glusterd_volume_start_glusterfs(glusterd_volinfo_t *volinfo, retry: runinit(&runner); - if (this->ctx->cmd_args.valgrind) { - /* Run bricks with valgrind */ + if (this->ctx->cmd_args.vgtool != _gf_none) { + /* Run bricks with valgrind. */ if (volinfo->logdir) { len = snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-%s-%s.log", volinfo->logdir, volinfo->volname, exp_path); @@ -2083,8 +2092,13 @@ retry: goto out; } - runner_add_args(&runner, "valgrind", "--leak-check=full", - "--trace-children=yes", "--track-origins=yes", NULL); + if (this->ctx->cmd_args.vgtool == _gf_memcheck) + runner_add_args(&runner, "valgrind", "--leak-check=full", + "--trace-children=yes", "--track-origins=yes", + NULL); + else + runner_add_args(&runner, "valgrind", "--tool=drd", NULL); + runner_argprintf(&runner, "--log-file=%s", valgrind_logfile); } @@ -2197,7 +2211,7 @@ retry: if (is_brick_mx_enabled()) runner_add_arg(&runner, "--brick-mux"); - runner_log(&runner, "", 0, "Starting GlusterFS"); + runner_log(&runner, "", GF_LOG_DEBUG, "Starting GlusterFS"); brickinfo->port = port; brickinfo->rdma_port = rdma_port; @@ -2206,7 +2220,10 @@ retry: if (wait) { synclock_unlock(&priv->big_lock); + errno = 0; ret = runner_run(&runner); + if (errno != 0) + ret = errno; synclock_lock(&priv->big_lock); if (ret == EADDRINUSE) { @@ -2788,6 +2805,15 @@ glusterd_volume_compute_cksum(glusterd_volinfo_t *volinfo, char *cksum_path, ret = -1; goto out; } + } else if (priv->op_version < GD_OP_VERSION_7_0) { + ret = get_checksum_for_path(filepath, &cksum, priv->op_version); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CKSUM_GET_FAIL, + "unable to get " + "checksum for path: %s", + filepath); + goto out; + } } ret = get_checksum_for_file(fd, &cksum, priv->op_version); @@ -6083,7 +6109,6 @@ send_attach_req(xlator_t *this, struct rpc_clnt *rpc, char *path, GF_ATOMIC_INC(conf->blockers); ret = rpc_clnt_submit(rpc, &gd_brick_prog, op, cbkfn, &iov, 1, NULL, 0, iobref, frame, NULL, 0, NULL, 0, NULL); - return ret; free_iobref: iobref_unref(iobref); @@ -6092,7 +6117,7 @@ maybe_free_iobuf: iobuf_unref(iobuf); } err: - return -1; + return ret; } extern size_t @@ -6420,7 +6445,6 @@ find_compatible_brick(glusterd_conf_t *conf, glusterd_volinfo_t *volinfo, int glusterd_get_sock_from_brick_pid(int pid, char *sockpath, size_t len) { - char fname[128] = ""; char buf[1024] = ""; char cmdline[2048] = ""; xlator_t *this = NULL; @@ -6435,6 +6459,22 @@ glusterd_get_sock_from_brick_pid(int pid, char *sockpath, size_t len) this = THIS; GF_ASSERT(this); +#ifdef __FreeBSD__ + blen = sizeof(buf); + int mib[4]; + + mib[0] = CTL_KERN; + mib[1] = KERN_PROC; + mib[2] = KERN_PROC_ARGS; + mib[3] = pid; + + if (sys_sysctl(mib, 4, buf, &blen, NULL, blen) != 0) { + gf_log(this->name, GF_LOG_ERROR, "brick process %d is not running", + pid); + return ret; + } +#else + char fname[128] = ""; snprintf(fname, sizeof(fname), "/proc/%d/cmdline", pid); if (sys_access(fname, R_OK) != 0) { @@ -6451,6 +6491,7 @@ glusterd_get_sock_from_brick_pid(int pid, char *sockpath, size_t len) strerror(errno), fname); return ret; } +#endif /* convert cmdline to single string */ for (i = 0, j = 0; i < blen; i++) { @@ -6499,6 +6540,43 @@ glusterd_get_sock_from_brick_pid(int pid, char *sockpath, size_t len) char * search_brick_path_from_proc(pid_t brick_pid, char *brickpath) { + char *brick_path = NULL; +#ifdef __FreeBSD__ + struct filestat *fst; + struct procstat *ps; + struct kinfo_proc *kp; + struct filestat_list *head; + + ps = procstat_open_sysctl(); + if (ps == NULL) + goto out; + + kp = kinfo_getproc(brick_pid); + if (kp == NULL) + goto out; + + head = procstat_getfiles(ps, (void *)kp, 0); + if (head == NULL) + goto out; + + STAILQ_FOREACH(fst, head, next) + { + if (fst->fs_fd < 0) + continue; + + if (!strcmp(fst->fs_path, brickpath)) { + brick_path = gf_strdup(fst->fs_path); + break; + } + } + +out: + if (head != NULL) + procstat_freefiles(ps, head); + if (kp != NULL) + free(kp); + procstat_close(ps); +#else struct dirent *dp = NULL; DIR *dirp = NULL; size_t len = 0; @@ -6509,7 +6587,6 @@ search_brick_path_from_proc(pid_t brick_pid, char *brickpath) 0, }, }; - char *brick_path = NULL; if (!brickpath) goto out; @@ -6547,6 +6624,7 @@ search_brick_path_from_proc(pid_t brick_pid, char *brickpath) out: if (dirp) sys_closedir(dirp); +#endif return brick_path; } @@ -8417,7 +8495,8 @@ glusterd_sm_tr_log_transition_add(glusterd_sm_tr_log_t *log, int old_state, transitions[next].old_state = old_state; transitions[next].new_state = new_state; transitions[next].event = event; - time(&transitions[next].time); + transitions[next].time = gf_time(); + log->current = next; if (log->count < log->size) log->count++; @@ -14632,7 +14711,8 @@ glusterd_compare_addrinfo(struct addrinfo *first, struct addrinfo *next) */ int32_t glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type, - int32_t sub_count) + char **volname, char **brick_list, + int32_t *brick_count, int32_t sub_count) { int ret = -1; int i = 0; @@ -14643,12 +14723,9 @@ glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type, addrinfo_list_t *ai_list_tmp1 = NULL; addrinfo_list_t *ai_list_tmp2 = NULL; char *brick = NULL; - char *brick_list = NULL; char *brick_list_dup = NULL; char *brick_list_ptr = NULL; char *tmpptr = NULL; - char *volname = NULL; - int32_t brick_count = 0; struct addrinfo *ai_info = NULL; char brick_addr[128] = { 0, @@ -14676,32 +14753,38 @@ glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type, ai_list->info = NULL; CDS_INIT_LIST_HEAD(&ai_list->list); - ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, - "Unable to get volume name"); - goto out; + if (!(*volname)) { + ret = dict_get_strn(dict, "volname", SLEN("volname"), &(*volname)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get volume name"); + goto out; + } } - ret = dict_get_strn(dict, "bricks", SLEN("bricks"), &brick_list); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, - "Bricks check : Could not " - "retrieve bricks list"); - goto out; + if (!(*brick_list)) { + ret = dict_get_strn(dict, "bricks", SLEN("bricks"), &(*brick_list)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Bricks check : Could not " + "retrieve bricks list"); + goto out; + } } - ret = dict_get_int32n(dict, "count", SLEN("count"), &brick_count); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, - "Bricks check : Could not " - "retrieve brick count"); - goto out; + if (!(*brick_count)) { + ret = dict_get_int32n(dict, "count", SLEN("count"), &(*brick_count)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Bricks check : Could not " + "retrieve brick count"); + goto out; + } } - brick_list_dup = brick_list_ptr = gf_strdup(brick_list); + brick_list_dup = brick_list_ptr = gf_strdup(*brick_list); /* Resolve hostnames and get addrinfo */ - while (i < brick_count) { + while (i < *brick_count) { ++i; brick = strtok_r(brick_list_dup, " \n", &tmpptr); brick_list_dup = tmpptr; @@ -14737,8 +14820,12 @@ glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type, i = 0; ai_list_tmp1 = cds_list_entry(ai_list->list.next, addrinfo_list_t, list); + if (*brick_count < sub_count) { + sub_count = *brick_count; + } + /* Check for bad brick order */ - while (i < brick_count) { + while (i < *brick_count) { ++i; ai_info = ai_list_tmp1->info; ai_list_tmp1 = cds_list_entry(ai_list_tmp1->list.next, addrinfo_list_t, @@ -14901,3 +14988,59 @@ out: GF_FREE(new_auth_allow_list); return; } + +int +glusterd_replace_old_auth_allow_list(char *volname) +{ + int ret = 0; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + char *old_auth_allow_list = NULL; + + this = THIS; + GF_ASSERT(this); + + GF_VALIDATE_OR_GOTO(this->name, volname, out); + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, + "Unable to find volume: %s", volname); + goto out; + } + + ret = dict_get_str_sizen(volinfo->dict, "old.auth.allow", + &old_auth_allow_list); + if (ret) { + gf_msg(this->name, GF_LOG_INFO, errno, GD_MSG_DICT_GET_FAILED, + "old auth allow list is not set, no need to replace the list"); + ret = 0; + goto out; + } + + dict_del_sizen(volinfo->dict, "auth.allow"); + ret = dict_set_strn(volinfo->dict, "auth.allow", SLEN("auth.allow"), + old_auth_allow_list); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Unable to replace auth.allow list"); + goto out; + } + + dict_del_sizen(volinfo->dict, "old.auth.allow"); + + ret = glusterd_create_volfiles_and_notify_services(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOLFILE_CREATE_FAIL, + "failed to create volfiles"); + goto out; + } + ret = glusterd_store_volinfo(volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOLINFO_STORE_FAIL, + "failed to store volinfo"); + goto out; + } +out: + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index 05346916968..bf6ac295e26 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -859,6 +859,7 @@ glusterd_add_shd_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict, int32_t count); int32_t glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type, + char **volname, char **bricks, int32_t *brick_count, int32_t sub_count); #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index 087be916c23..8d6fb5e0fac 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -3361,11 +3361,20 @@ volgen_link_bricks(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, j); j++; } + if (!xl) { ret = -1; goto out; } + if (strncmp(xl_type, "performance/readdir-ahead", + SLEN("performance/readdir-ahead")) == 0) { + ret = xlator_set_fixed_option(xl, "performance.readdir-ahead", + "on"); + if (ret) + goto out; + } + ret = volgen_xlator_link(xl, trav); if (ret) goto out; @@ -3593,13 +3602,13 @@ volgen_graph_build_readdir_ahead(volgen_graph_t *graph, int32_t clusters = 0; if (graph->type == GF_QUOTAD || graph->type == GF_SNAPD || - !glusterd_volinfo_get_boolean(volinfo, VKEY_PARALLEL_READDIR) || - !glusterd_volinfo_get_boolean(volinfo, VKEY_READDIR_AHEAD)) + !glusterd_volinfo_get_boolean(volinfo, VKEY_PARALLEL_READDIR)) goto out; clusters = volgen_link_bricks_from_list_tail( graph, volinfo, "performance/readdir-ahead", "%s-readdir-ahead-%d", child_count, 1); + out: return clusters; } @@ -3801,6 +3810,38 @@ out: } static int +set_volfile_id_option(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + int clusters) +{ + xlator_t *xlator = NULL; + int i = 0; + int ret = -1; + glusterd_conf_t *conf = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, out); + + if (conf->op_version < GD_OP_VERSION_9_0) + return 0; + xlator = first_of(graph); + + for (i = 0; i < clusters; i++) { + ret = xlator_set_fixed_option(xlator, "volume-id", + uuid_utoa(volinfo->volume_id)); + if (ret) + goto out; + + xlator = xlator->next; + } + +out: + return ret; +} + +static int volgen_graph_build_afr_clusters(volgen_graph_t *graph, glusterd_volinfo_t *volinfo) { @@ -3842,6 +3883,13 @@ volgen_graph_build_afr_clusters(volgen_graph_t *graph, clusters = -1; goto out; } + + ret = set_volfile_id_option(graph, volinfo, clusters); + if (ret) { + clusters = -1; + goto out; + } + if (!volinfo->arbiter_count && !volinfo->thin_arbiter_count) goto out; diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c index cafdffb63c4..814ab14fb27 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c @@ -1002,7 +1002,8 @@ glusterd_op_stage_create_volume(dict_t *dict, char **op_errstr, gf_msg_debug(this->name, 0, "Replicate cluster type " "found. Checking brick order."); - ret = glusterd_check_brick_order(dict, msg, type, + ret = glusterd_check_brick_order(dict, msg, type, &volname, + &bricks, &brick_count, replica_count); } else if (type == GF_CLUSTER_TYPE_DISPERSE) { ret = dict_get_int32n(dict, "disperse-count", @@ -1016,7 +1017,8 @@ glusterd_op_stage_create_volume(dict_t *dict, char **op_errstr, gf_msg_debug(this->name, 0, "Disperse cluster type" " found. Checking brick order."); - ret = glusterd_check_brick_order(dict, msg, type, + ret = glusterd_check_brick_order(dict, msg, type, &volname, + &bricks, &brick_count, disperse_count); } if (ret) { diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index 3ac8e2a29d7..398b4d76f52 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -1813,7 +1813,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { {.key = "performance.readdir-ahead", .voltype = "performance/readdir-ahead", .option = "!perf", - .value = "on", + .value = "off", .op_version = 3, .description = "enable/disable readdir-ahead translator in the volume.", .flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT}, @@ -3138,4 +3138,9 @@ struct volopt_map_entry glusterd_volopt_map[] = { .type = NO_DOC, }, + {.key = "cluster.use-anonymous-inode", + .voltype = "cluster/replicate", + .op_version = GD_OP_VERSION_9_0, + .value = "yes", + .flags = VOLOPT_FLAG_CLIENT_OPT}, {.key = NULL}}; diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c index 91c5f9ec5e3..7a86c2997b1 100644 --- a/xlators/mgmt/glusterd/src/glusterd.c +++ b/xlators/mgmt/glusterd/src/glusterd.c @@ -1423,7 +1423,7 @@ init(xlator_t *this) char *mountbroker_root = NULL; int i = 0; int total_transport = 0; - gf_boolean_t valgrind = _gf_false; + gf_valgrind_tool vgtool; char *valgrind_str = NULL; char *transport_type = NULL; char var_run_dir[PATH_MAX] = { @@ -1436,6 +1436,14 @@ init(xlator_t *this) int32_t len = 0; int op_version = 0; +#if defined(RUN_WITH_MEMCHECK) + vgtool = _gf_memcheck; +#elif defined(RUN_WITH_DRD) + vgtool = _gf_drd; +#else + vgtool = _gf_none; +#endif + #ifndef GF_DARWIN_HOST_OS { struct rlimit lim; @@ -1925,18 +1933,24 @@ init(xlator_t *this) } /* Set option to run bricks on valgrind if enabled in glusterd.vol */ - this->ctx->cmd_args.valgrind = valgrind; + this->ctx->cmd_args.vgtool = vgtool; ret = dict_get_str(this->options, "run-with-valgrind", &valgrind_str); if (ret < 0) { gf_msg_debug(this->name, 0, "cannot get run-with-valgrind value"); } if (valgrind_str) { - if (gf_string2boolean(valgrind_str, &valgrind)) { + gf_boolean_t vg = _gf_false; + + if (!strcmp(valgrind_str, "memcheck")) + this->ctx->cmd_args.vgtool = _gf_memcheck; + else if (!strcmp(valgrind_str, "drd")) + this->ctx->cmd_args.vgtool = _gf_drd; + else if (!gf_string2boolean(valgrind_str, &vg)) + this->ctx->cmd_args.vgtool = (vg ? _gf_memcheck : _gf_none); + else gf_msg(this->name, GF_LOG_WARNING, EINVAL, GD_MSG_INVALID_ENTRY, - "run-with-valgrind value not a boolean string"); - } else { - this->ctx->cmd_args.valgrind = valgrind; - } + "run-with-valgrind is neither boolean" + " nor one of 'memcheck' or 'drd'"); } /* Store ping-timeout in conf */ diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index 2c8fab8f0e7..cc4f98ecf47 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -1197,6 +1197,8 @@ glusterd_op_set_ganesha(dict_t *dict, char **errstr); int ganesha_manage_export(dict_t *dict, char *value, gf_boolean_t update_cache_invalidation, char **op_errstr); +int +gd_ganesha_send_dbus(char *volname, char *value); gf_boolean_t glusterd_is_ganesha_cluster(); gf_boolean_t @@ -1367,4 +1369,7 @@ glusterd_recreate_volfiles(glusterd_conf_t *conf); void glusterd_add_peers_to_auth_list(char *volname); +int +glusterd_replace_old_auth_allow_list(char *volname); + #endif diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c index f2eeac1d1ee..0e22fe411ee 100644 --- a/xlators/mount/fuse/src/fuse-bridge.c +++ b/xlators/mount/fuse/src/fuse-bridge.c @@ -179,7 +179,7 @@ fusedump_gettime(struct fusedump_timespec *fts) 0, }; - clock_gettime(CLOCK_REALTIME, &ts); + timespec_now_realtime(&ts); fts->sec = ts.tv_sec; fts->nsec = ts.tv_nsec; @@ -5899,7 +5899,9 @@ fuse_graph_sync(xlator_t *this) new_graph_id = priv->next_graph->id; priv->next_graph = NULL; need_first_lookup = 1; - priv->handle_graph_switch = _gf_true; + if (old_subvol) { + priv->handle_graph_switch = _gf_true; + } while (!priv->event_recvd) { ret = pthread_cond_wait(&priv->sync_cond, &priv->sync_mutex); @@ -5935,13 +5937,6 @@ unlock: if (winds_on_old_subvol == 0) { xlator_notify(old_subvol, GF_EVENT_PARENT_DOWN, old_subvol, NULL); } - } else { - pthread_mutex_lock(&priv->sync_mutex); - { - priv->handle_graph_switch = _gf_false; - pthread_cond_broadcast(&priv->migrate_cond); - } - pthread_mutex_unlock(&priv->sync_mutex); } return 0; diff --git a/xlators/mount/fuse/src/fuse-helpers.c b/xlators/mount/fuse/src/fuse-helpers.c index fd11f2ba652..a2b0ad11fe4 100644 --- a/xlators/mount/fuse/src/fuse-helpers.c +++ b/xlators/mount/fuse/src/fuse-helpers.c @@ -139,8 +139,6 @@ get_fuse_state(xlator_t *this, fuse_in_header_t *finh) return state; } -#define FUSE_MAX_AUX_GROUPS \ - 32 /* We can get only up to 32 aux groups from /proc */ void frame_fill_groups(call_frame_t *frame) { @@ -150,8 +148,6 @@ frame_fill_groups(call_frame_t *frame) char filename[32]; char line[4096]; char *ptr = NULL; - FILE *fp = NULL; - int idx = 0; long int id = 0; char *saveptr = NULL; char *endptr = NULL; @@ -191,45 +187,72 @@ frame_fill_groups(call_frame_t *frame) call_stack_set_groups(frame->root, ngroups, &mygroups); } else { + FILE *fp = NULL; + ret = snprintf(filename, sizeof filename, "/proc/%d/status", frame->root->pid); - if (ret >= sizeof filename) + if (ret >= sizeof filename) { + gf_log(this->name, GF_LOG_ERROR, "procfs path exceeds buffer size"); goto out; + } fp = fopen(filename, "r"); - if (!fp) + if (!fp) { + gf_log(this->name, GF_LOG_ERROR, "failed to open %s: %s", filename, + strerror(errno)); goto out; + } - if (call_stack_alloc_groups(frame->root, ngroups) != 0) - goto out; + for (;;) { + gf_boolean_t found_groups = _gf_false; + int idx = 0; - while ((ptr = fgets(line, sizeof line, fp))) { - if (strncmp(ptr, "Groups:", 7) != 0) - continue; + if (call_stack_alloc_groups(frame->root, ngroups) != 0) { + gf_log(this->name, GF_LOG_ERROR, + "failed to allocate gid buffer"); + goto out; + } + while ((ptr = fgets(line, sizeof line, fp))) { + if (strncmp(ptr, "Groups:", 7) == 0) { + found_groups = _gf_true; + break; + } + } + if (!found_groups) { + gf_log(this->name, GF_LOG_ERROR, "cannot find gid list in %s", + filename); + break; + } ptr = line + 8; for (ptr = strtok_r(ptr, " \t\r\n", &saveptr); ptr; ptr = strtok_r(NULL, " \t\r\n", &saveptr)) { errno = 0; id = strtol(ptr, &endptr, 0); - if (errno == ERANGE) - break; - if (!endptr || *endptr) + if (errno == ERANGE || !endptr || *endptr) { + gf_log(this->name, GF_LOG_ERROR, "failed to parse %s", + filename); break; - frame->root->groups[idx++] = id; - if (idx == FUSE_MAX_AUX_GROUPS) + } + if (idx < call_stack_groups_capacity(frame->root)) + frame->root->groups[idx] = id; + idx++; + if (idx == GF_MAX_AUX_GROUPS) break; } - - frame->root->ngrps = idx; - break; + if (idx > call_stack_groups_capacity(frame->root)) { + ngroups = idx; + rewind(fp); + } else { + frame->root->ngrps = idx; + break; + } } + out: + if (fp) + fclose(fp); } - -out: - if (fp) - fclose(fp); #elif defined(GF_SOLARIS_HOST_OS) char filename[32]; char scratch[128]; @@ -245,7 +268,7 @@ out: fp = fopen(filename, "r"); if (fp != NULL) { if (fgets(scratch, sizeof scratch, fp) != NULL) { - ngrps = MIN(prcred->pr_ngroups, FUSE_MAX_AUX_GROUPS); + ngrps = MIN(prcred->pr_ngroups, GF_MAX_AUX_GROUPS); if (call_stack_alloc_groups(frame->root, ngrps) != 0) { fclose(fp); return; diff --git a/xlators/mount/fuse/utils/mount_glusterfs.in b/xlators/mount/fuse/utils/mount_glusterfs.in index d43fc97d084..3a5feb606d7 100755 --- a/xlators/mount/fuse/utils/mount_glusterfs.in +++ b/xlators/mount/fuse/utils/mount_glusterfs.in @@ -469,6 +469,7 @@ parse_options() main () { +#if !defined(__FreeBSD__) ## `mount` on OSX specifies options as first argument echo $1|grep -q -- "-o" if [ $? -eq 0 ]; then @@ -478,7 +479,7 @@ main () volfile_loc=$1 mount_point=$2 fi - +#endif /* __FreeBSD__ */ while getopts "Vo:h" opt; do case "${opt}" in o) @@ -499,6 +500,12 @@ main () esac done +#ifdef __FreeBSD__ + shift $((OPTIND - 1)) + volfile_loc="$1" + mount_point="$2" +#endif /* __FreeBSD__ */ + [ -r "$volfile_loc" ] || { # '%' included to support ipv6 link local addresses server_ip=$(echo "$volfile_loc" | sed -n 's/\([a-zA-Z0-9:%.\-]*\):.*/\1/p'); diff --git a/xlators/nfs/server/src/acl3.c b/xlators/nfs/server/src/acl3.c index 3745188c7a5..7e3bbf16086 100644 --- a/xlators/nfs/server/src/acl3.c +++ b/xlators/nfs/server/src/acl3.c @@ -753,8 +753,7 @@ acl3svc_init(xlator_t *nfsx) goto err; } - ret = dict_set_dynstr(options, "transport.socket.listen-port", - GF_ACL3_PORT); + ret = dict_set_str(options, "transport.socket.listen-port", GF_ACL3_PORT); if (ret == -1) goto err; ret = dict_set_str(options, "transport-type", "socket"); diff --git a/xlators/nfs/server/src/auth-cache.c b/xlators/nfs/server/src/auth-cache.c index 64768646074..ffbf5b6cad6 100644 --- a/xlators/nfs/server/src/auth-cache.c +++ b/xlators/nfs/server/src/auth-cache.c @@ -189,7 +189,7 @@ out: static int _auth_cache_expired(struct auth_cache *cache, struct auth_cache_entry *entry) { - return ((time(NULL) - entry->timestamp) > cache->ttl_sec); + return ((gf_time() - entry->timestamp) > cache->ttl_sec); } /** @@ -474,7 +474,7 @@ cache_nfs_fh(struct auth_cache *cache, struct nfs3_fh *fh, goto out; } - entry->timestamp = time(NULL); + entry->timestamp = gf_time(); /* Update entry->item if it is pointing to a different export_item */ if (entry->item && entry->item != export_item) { GF_REF_PUT(entry->item); diff --git a/xlators/nfs/server/src/mount3udp_svc.c b/xlators/nfs/server/src/mount3udp_svc.c index 0688779eb65..1a2b0f85453 100644 --- a/xlators/nfs/server/src/mount3udp_svc.c +++ b/xlators/nfs/server/src/mount3udp_svc.c @@ -216,7 +216,7 @@ mount3udp_thread(void *argv) GF_ASSERT(nfsx); - glusterfs_this_set(nfsx); + THIS = nfsx; transp = svcudp_create(RPC_ANYSOCK); if (transp == NULL) { diff --git a/xlators/nfs/server/src/nfs3-helpers.c b/xlators/nfs/server/src/nfs3-helpers.c index 8a58977b53c..897fb42b071 100644 --- a/xlators/nfs/server/src/nfs3-helpers.c +++ b/xlators/nfs/server/src/nfs3-helpers.c @@ -1072,7 +1072,7 @@ nfs3_sattr3_to_setattr_valid(sattr3 *sattr, struct iatt *buf, mode_t *omode) if (sattr->atime.set_it == SET_TO_SERVER_TIME) { valid |= GF_SET_ATTR_ATIME; if (buf) - buf->ia_atime = time(NULL); + buf->ia_atime = gf_time(); } if (sattr->mtime.set_it == SET_TO_CLIENT_TIME) { @@ -1084,7 +1084,7 @@ nfs3_sattr3_to_setattr_valid(sattr3 *sattr, struct iatt *buf, mode_t *omode) if (sattr->mtime.set_it == SET_TO_SERVER_TIME) { valid |= GF_SET_ATTR_MTIME; if (buf) - buf->ia_mtime = time(NULL); + buf->ia_mtime = gf_time(); } return valid; diff --git a/xlators/nfs/server/src/nfs3.c b/xlators/nfs/server/src/nfs3.c index 7cfd75f9ed1..f9042bc3b3f 100644 --- a/xlators/nfs/server/src/nfs3.c +++ b/xlators/nfs/server/src/nfs3.c @@ -5651,7 +5651,7 @@ nfs3_init_state(xlator_t *nfsx) goto free_localpool; } - nfs3->serverstart = (uint64_t)time(NULL); + nfs3->serverstart = (uint64_t)gf_time(); INIT_LIST_HEAD(&nfs3->fdlru); LOCK_INIT(&nfs3->fdlrulock); nfs3->fdcount = 0; diff --git a/xlators/nfs/server/src/nlm4.c b/xlators/nfs/server/src/nlm4.c index c909e3bc093..577e8543966 100644 --- a/xlators/nfs/server/src/nlm4.c +++ b/xlators/nfs/server/src/nlm4.c @@ -1011,7 +1011,8 @@ nlm4_establish_callback(nfs3_call_state_t *cs, call_frame_t *cbk_frame) int port = -1; struct nlm4_notify_args *ncf = NULL; - glusterfs_this_set(cs->nfsx); + GF_ASSERT(cs->nfsx); + THIS = cs->nfsx; rpc_transport_get_peeraddr(cs->trans, NULL, 0, &sock_union.storage, sizeof(sock_union.storage)); @@ -2714,7 +2715,7 @@ nlm4svc_init(xlator_t *nfsx) goto err; } - (void)gf_thread_create(&thr, NULL, nsm_thread, (void *)NULL, "nfsnsm"); + (void)gf_thread_create(&thr, NULL, nsm_thread, nfsx, "nfsnsm"); timeout.tv_sec = nlm_grace_period; timeout.tv_nsec = 0; diff --git a/xlators/nfs/server/src/nlmcbk_svc.c b/xlators/nfs/server/src/nlmcbk_svc.c index d18b86ce8db..eaa7b916190 100644 --- a/xlators/nfs/server/src/nlmcbk_svc.c +++ b/xlators/nfs/server/src/nlmcbk_svc.c @@ -84,9 +84,14 @@ nlmcbk_program_0(struct svc_req *rqstp, register SVCXPRT *transp) void * nsm_thread(void *argv) { + xlator_t *nfsx = argv; register SVCXPRT *transp; int ret = 0; + GF_ASSERT(nfsx); + + THIS = nfsx; + ret = pmap_unset(NLMCBK_PROGRAM, NLMCBK_V1); if (ret == 0) { gf_msg(GF_NLM, GF_LOG_ERROR, 0, NFS_MSG_PMAP_UNSET_FAIL, diff --git a/xlators/performance/io-cache/src/io-cache.c b/xlators/performance/io-cache/src/io-cache.c index 2fefaef8de8..9375d29c17f 100644 --- a/xlators/performance/io-cache/src/io-cache.c +++ b/xlators/performance/io-cache/src/io-cache.c @@ -133,23 +133,17 @@ ioc_update_pages(call_frame_t *frame, ioc_inode_t *ioc_inode, return 0; } -int32_t +static gf_boolean_t ioc_inode_need_revalidate(ioc_inode_t *ioc_inode) { - int8_t need_revalidate = 0; - struct timeval tv = { - 0, - }; ioc_table_t *table = NULL; + GF_ASSERT(ioc_inode); table = ioc_inode->table; + GF_ASSERT(table); - gettimeofday(&tv, NULL); - - if (time_elapsed(&tv, &ioc_inode->cache.tv) >= table->cache_timeout) - need_revalidate = 1; - - return need_revalidate; + return (gf_time() - ioc_inode->cache.last_revalidate >= + table->cache_timeout); } /* @@ -411,9 +405,6 @@ ioc_cache_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ioc_inode_t *ioc_inode = NULL; size_t destroy_size = 0; struct iatt *local_stbuf = NULL; - struct timeval tv = { - 0, - }; local = frame->local; ioc_inode = local->inode; @@ -451,10 +442,9 @@ ioc_cache_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, if (op_ret < 0) local_stbuf = NULL; - gettimeofday(&tv, NULL); ioc_inode_lock(ioc_inode); { - memcpy(&ioc_inode->cache.tv, &tv, sizeof(struct timeval)); + ioc_inode->cache.last_revalidate = gf_time(); } ioc_inode_unlock(ioc_inode); @@ -1405,9 +1395,6 @@ ioc_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, { ioc_inode_t *ioc_inode = NULL; uint64_t tmp_inode = 0; - struct timeval tv = { - 0, - }; inode_ctx_get(fd->inode, this, &tmp_inode); ioc_inode = (ioc_inode_t *)(long)tmp_inode; @@ -1418,10 +1405,9 @@ ioc_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, return 0; } - gettimeofday(&tv, NULL); ioc_inode_lock(ioc_inode); { - memcpy(&ioc_inode->cache.tv, &tv, sizeof(struct timeval)); + ioc_inode->cache.last_revalidate = gf_time(); } ioc_inode_unlock(ioc_inode); @@ -1955,9 +1941,9 @@ __ioc_cache_dump(ioc_inode_t *ioc_inode, char *prefix) table = ioc_inode->table; - if (ioc_inode->cache.tv.tv_sec) { - gf_time_fmt_tv(timestr, sizeof timestr, &ioc_inode->cache.tv, - gf_timefmt_FT); + if (ioc_inode->cache.last_revalidate) { + gf_time_fmt(timestr, sizeof timestr, ioc_inode->cache.last_revalidate, + gf_timefmt_FT); gf_proc_dump_write("last-cache-validation-time", "%s", timestr); } diff --git a/xlators/performance/io-cache/src/io-cache.h b/xlators/performance/io-cache/src/io-cache.h index 4303c2fae13..14923c75edc 100644 --- a/xlators/performance/io-cache/src/io-cache.h +++ b/xlators/performance/io-cache/src/io-cache.h @@ -117,15 +117,13 @@ struct ioc_page { struct ioc_cache { rbthash_table_t *page_table; struct list_head page_lru; - time_t mtime; /* - * seconds component of file mtime - */ - time_t mtime_nsec; /* - * nanosecond component of file mtime - */ - struct timeval tv; /* - * time-stamp at last re-validate - */ + time_t mtime; /* + * seconds component of file mtime + */ + time_t mtime_nsec; /* + * nanosecond component of file mtime + */ + time_t last_revalidate; /* timestamp at last re-validate */ }; struct ioc_inode { @@ -270,17 +268,6 @@ ioc_frame_fill(ioc_page_t *page, call_frame_t *frame, off_t offset, size_t size, pthread_mutex_unlock(&page->page_lock); \ } while (0) -static inline uint64_t -time_elapsed(struct timeval *now, struct timeval *then) -{ - uint64_t sec = now->tv_sec - then->tv_sec; - - if (sec) - return sec; - - return 0; -} - ioc_inode_t * ioc_inode_search(ioc_table_t *table, inode_t *inode); diff --git a/xlators/performance/io-cache/src/page.c b/xlators/performance/io-cache/src/page.c index a8edbde23f2..84b1ae6cb20 100644 --- a/xlators/performance/io-cache/src/page.c +++ b/xlators/performance/io-cache/src/page.c @@ -413,9 +413,6 @@ ioc_fault_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, ioc_waitq_t *waitq = NULL; size_t iobref_page_size = 0; char zero_filled = 0; - struct timeval tv = { - 0, - }; GF_ASSERT(frame); @@ -431,7 +428,6 @@ ioc_fault_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, zero_filled = ((op_ret >= 0) && (stbuf->ia_mtime == 0)); - gettimeofday(&tv, NULL); ioc_inode_lock(ioc_inode); { if (op_ret == -1 || @@ -448,7 +444,7 @@ ioc_fault_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, ioc_inode->cache.mtime_nsec = stbuf->ia_mtime_nsec; } - memcpy(&ioc_inode->cache.tv, &tv, sizeof(struct timeval)); + ioc_inode->cache.last_revalidate = gf_time(); if (op_ret < 0) { /* error, readv returned -1 */ diff --git a/xlators/performance/io-threads/src/io-threads.c b/xlators/performance/io-threads/src/io-threads.c index 6fa4d88389c..3d24cc97f4b 100644 --- a/xlators/performance/io-threads/src/io-threads.c +++ b/xlators/performance/io-threads/src/io-threads.c @@ -1016,16 +1016,13 @@ static uint32_t THRESH_LIMIT = 1209600; /* SECONDS * (EVENTS-1) */ static void iot_apply_event(xlator_t *this, threshold_t *thresh) { - struct timespec now; - time_t delta; + time_t delta, now = gf_time(); /* Refresh for manual testing/debugging. It's cheap. */ THRESH_LIMIT = THRESH_SECONDS * (THRESH_EVENTS - 1); - timespec_now(&now); - if (thresh->value && thresh->update_time) { - delta = now.tv_sec - thresh->update_time; + delta = now - thresh->update_time; /* Be careful about underflow. */ if (thresh->value <= delta) { thresh->value = 0; @@ -1046,7 +1043,7 @@ iot_apply_event(xlator_t *this, threshold_t *thresh) kill(getpid(), SIGTRAP); } - thresh->update_time = now.tv_sec; + thresh->update_time = now; } static void * @@ -1311,7 +1308,7 @@ notify(xlator_t *this, int32_t event, void *data, ...) /* Wait for draining stub from queue before notify PARENT_DOWN */ stub_cnt = GF_ATOMIC_GET(conf->stub_cnt); if (stub_cnt) { - clock_gettime(CLOCK_REALTIME, &sleep_till); + timespec_now_realtime(&sleep_till); sleep_till.tv_sec += 1; pthread_mutex_lock(&conf->mutex); { diff --git a/xlators/performance/md-cache/src/md-cache.c b/xlators/performance/md-cache/src/md-cache.c index 1313c2b55c3..a405be51f02 100644 --- a/xlators/performance/md-cache/src/md-cache.c +++ b/xlators/performance/md-cache/src/md-cache.c @@ -8,7 +8,6 @@ cases as published by the Free Software Foundation. */ -#include <glusterfs/timespec.h> #include <glusterfs/glusterfs.h> #include <glusterfs/defaults.h> #include <glusterfs/logging.h> @@ -33,8 +32,7 @@ struct mdc_statfs_cache { pthread_mutex_t lock; - gf_boolean_t initialized; - struct timespec last_refreshed; + time_t last_refreshed; /* (time_t)-1 if not yet initialized. */ struct statvfs buf; }; @@ -61,7 +59,7 @@ struct mdc_statistics { }; struct mdc_conf { - int timeout; + uint32_t timeout; gf_boolean_t cache_posix_acl; gf_boolean_t cache_glusterfs_acl; gf_boolean_t cache_selinux; @@ -376,10 +374,9 @@ unlock: static gf_boolean_t __is_cache_valid(xlator_t *this, time_t mdc_time) { - time_t now = 0; gf_boolean_t ret = _gf_true; struct mdc_conf *conf = NULL; - int timeout = 0; + uint32_t timeout = 0; time_t last_child_down = 0; conf = this->private; @@ -393,15 +390,13 @@ __is_cache_valid(xlator_t *this, time_t mdc_time) last_child_down = conf->last_child_down; timeout = conf->timeout; - time(&now); - if ((mdc_time == 0) || ((last_child_down != 0) && (mdc_time < last_child_down))) { ret = _gf_false; goto out; } - if (now >= (mdc_time + timeout)) { + if (gf_time() >= (mdc_time + timeout)) { ret = _gf_false; } @@ -581,10 +576,9 @@ mdc_inode_iatt_set_validate(xlator_t *this, inode_t *inode, struct iatt *prebuf, mdc_from_iatt(mdc, iatt); mdc->valid = _gf_true; if (update_time) { - time(&mdc->ia_time); - + mdc->ia_time = gf_time(); if (mdc->xa_time && update_xa_time) - time(&mdc->xa_time); + mdc->xa_time = mdc->ia_time; } gf_msg_callingfn( @@ -785,7 +779,7 @@ mdc_inode_xatt_set(xlator_t *this, inode_t *inode, dict_t *dict) if (newdict) mdc->xattr = newdict; - time(&mdc->xa_time); + mdc->xa_time = gf_time(); gf_msg_trace("md-cache", 0, "xatt cache set for (%s) time:%lld", uuid_utoa(inode->gfid), (long long)mdc->xa_time); } @@ -1063,8 +1057,7 @@ mdc_cache_statfs(xlator_t *this, struct statvfs *buf) pthread_mutex_lock(&conf->statfs_cache.lock); { memcpy(&conf->statfs_cache.buf, buf, sizeof(struct statvfs)); - clock_gettime(CLOCK_MONOTONIC, &conf->statfs_cache.last_refreshed); - conf->statfs_cache.initialized = _gf_true; + conf->statfs_cache.last_refreshed = gf_time(); } pthread_mutex_unlock(&conf->statfs_cache.lock); } @@ -1073,8 +1066,7 @@ int mdc_load_statfs_info_from_cache(xlator_t *this, struct statvfs **buf) { struct mdc_conf *conf = this->private; - struct timespec now; - double cache_age = 0.0; + uint32_t cache_age = 0; int ret = 0; if (!buf || !conf) { @@ -1083,23 +1075,23 @@ mdc_load_statfs_info_from_cache(xlator_t *this, struct statvfs **buf) } *buf = NULL; - timespec_now(&now); pthread_mutex_lock(&conf->statfs_cache.lock); { - /* Skip if the cache is not initialized */ - if (!conf->statfs_cache.initialized) { + /* Skip if the cache is not initialized. */ + if (conf->statfs_cache.last_refreshed == (time_t)-1) { ret = -1; goto unlock; } - cache_age = (now.tv_sec - conf->statfs_cache.last_refreshed.tv_sec); + cache_age = (gf_time() - conf->statfs_cache.last_refreshed); - gf_log(this->name, GF_LOG_DEBUG, "STATFS cache age = %lf", cache_age); + gf_log(this->name, GF_LOG_DEBUG, "STATFS cache age = %u secs", + cache_age); if (cache_age > conf->timeout) { - /* Expire the cache */ + /* Expire the cache. */ gf_log(this->name, GF_LOG_DEBUG, - "Cache age %lf exceeded timeout %d", cache_age, + "Cache age %u secs exceeded timeout %u secs", cache_age, conf->timeout); ret = -1; goto unlock; @@ -3616,7 +3608,7 @@ int mdc_reconfigure(xlator_t *this, dict_t *options) { struct mdc_conf *conf = NULL; - int timeout = 0; + int timeout = 0, ret = 0; char *tmp_str = NULL; conf = this->private; @@ -3656,7 +3648,10 @@ mdc_reconfigure(xlator_t *this, dict_t *options) GF_OPTION_RECONF("md-cache-statfs", conf->cache_statfs, options, bool, out); GF_OPTION_RECONF("xattr-cache-list", tmp_str, options, str, out); - mdc_xattr_list_populate(conf, tmp_str); + + ret = mdc_xattr_list_populate(conf, tmp_str); + if (ret < 0) + goto out; /* If timeout is greater than 60s (default before the patch that added * cache invalidation support was added) then, cache invalidation @@ -3669,25 +3664,22 @@ mdc_reconfigure(xlator_t *this, dict_t *options) } conf->timeout = timeout; - (void)mdc_register_xattr_inval(this); + ret = mdc_register_xattr_inval(this); out: - return 0; + return ret; } int32_t mdc_mem_acct_init(xlator_t *this) { - int ret = -1; - - ret = xlator_mem_acct_init(this, gf_mdc_mt_end + 1); - return ret; + return xlator_mem_acct_init(this, gf_mdc_mt_end + 1); } int mdc_init(xlator_t *this) { struct mdc_conf *conf = NULL; - int timeout = 0; + uint32_t timeout = 0; char *tmp_str = NULL; conf = GF_CALLOC(sizeof(*conf), 1, gf_mdc_mt_mdc_conf_t); @@ -3699,7 +3691,7 @@ mdc_init(xlator_t *this) LOCK_INIT(&conf->lock); - GF_OPTION_INIT("md-cache-timeout", timeout, int32, out); + GF_OPTION_INIT("md-cache-timeout", timeout, uint32, out); GF_OPTION_INIT("cache-selinux", conf->cache_selinux, bool, out); @@ -3733,7 +3725,9 @@ mdc_init(xlator_t *this) GF_OPTION_INIT("xattr-cache-list", tmp_str, str, out); mdc_xattr_list_populate(conf, tmp_str); - time(&conf->last_child_down); + conf->last_child_down = gf_time(); + conf->statfs_cache.last_refreshed = (time_t)-1; + /* initialize gf_atomic_t counters */ GF_ATOMIC_INIT(conf->mdc_counter.stat_hit, 0); GF_ATOMIC_INIT(conf->mdc_counter.stat_miss, 0); @@ -3764,7 +3758,7 @@ out: } void -mdc_update_child_down_time(xlator_t *this, time_t *now) +mdc_update_child_down_time(xlator_t *this, time_t now) { struct mdc_conf *conf = NULL; @@ -3772,7 +3766,7 @@ mdc_update_child_down_time(xlator_t *this, time_t *now) LOCK(&conf->lock); { - conf->last_child_down = *now; + conf->last_child_down = now; } UNLOCK(&conf->lock); } @@ -3782,14 +3776,12 @@ mdc_notify(xlator_t *this, int event, void *data, ...) { int ret = 0; struct mdc_conf *conf = NULL; - time_t now = 0; conf = this->private; switch (event) { case GF_EVENT_CHILD_DOWN: case GF_EVENT_SOME_DESCENDENT_DOWN: - time(&now); - mdc_update_child_down_time(this, &now); + mdc_update_child_down_time(this, gf_time()); break; case GF_EVENT_UPCALL: if (conf->mdc_invalidation) diff --git a/xlators/performance/nl-cache/src/nl-cache-helper.c b/xlators/performance/nl-cache/src/nl-cache-helper.c index 03dedf8ea08..29b99b5b8ea 100644 --- a/xlators/performance/nl-cache/src/nl-cache-helper.c +++ b/xlators/performance/nl-cache/src/nl-cache-helper.c @@ -113,7 +113,7 @@ out: } void -nlc_update_child_down_time(xlator_t *this, time_t *now) +nlc_update_child_down_time(xlator_t *this, time_t now) { nlc_conf_t *conf = NULL; @@ -121,7 +121,7 @@ nlc_update_child_down_time(xlator_t *this, time_t *now) LOCK(&conf->lock); { - conf->last_child_down = *now; + conf->last_child_down = now; } UNLOCK(&conf->lock); @@ -262,7 +262,7 @@ nlc_init_invalid_ctx(xlator_t *this, inode_t *inode, nlc_ctx_t *nlc_ctx) if (nlc_ctx->timer) { gf_tw_mod_timer_pending(conf->timer_wheel, nlc_ctx->timer, conf->cache_timeout); - time(&nlc_ctx->cache_time); + nlc_ctx->cache_time = gf_time(); goto unlock; } @@ -496,7 +496,7 @@ __nlc_inode_ctx_timer_start(xlator_t *this, inode_t *inode, nlc_ctx_t *nlc_ctx) nlc_ctx->timer_data = tmp; gf_tw_add_timer(conf->timer_wheel, timer); - time(&nlc_ctx->cache_time); + nlc_ctx->cache_time = gf_time(); gf_msg_trace(this->name, 0, "Registering timer:%p, inode:%p, " "gfid:%s", diff --git a/xlators/performance/nl-cache/src/nl-cache.c b/xlators/performance/nl-cache/src/nl-cache.c index cd0e1d195fd..33a7c471663 100644 --- a/xlators/performance/nl-cache/src/nl-cache.c +++ b/xlators/performance/nl-cache/src/nl-cache.c @@ -520,15 +520,13 @@ int nlc_notify(xlator_t *this, int event, void *data, ...) { int ret = 0; - time_t now = 0; switch (event) { case GF_EVENT_CHILD_DOWN: case GF_EVENT_SOME_DESCENDENT_DOWN: case GF_EVENT_CHILD_UP: case GF_EVENT_SOME_DESCENDENT_UP: - time(&now); - nlc_update_child_down_time(this, &now); + nlc_update_child_down_time(this, gf_time()); /* TODO: nlc_clear_all_cache (this); else lru prune will lazily clear it*/ break; @@ -731,7 +729,7 @@ nlc_init(xlator_t *this) GF_ATOMIC_INIT(conf->nlc_counter.nlc_invals, 0); INIT_LIST_HEAD(&conf->lru); - time(&conf->last_child_down); + conf->last_child_down = gf_time(); conf->timer_wheel = glusterfs_ctx_tw_get(this->ctx); if (!conf->timer_wheel) { diff --git a/xlators/performance/nl-cache/src/nl-cache.h b/xlators/performance/nl-cache/src/nl-cache.h index 8b09972bb09..85fcc176342 100644 --- a/xlators/performance/nl-cache/src/nl-cache.h +++ b/xlators/performance/nl-cache/src/nl-cache.h @@ -155,7 +155,7 @@ nlc_local_init(call_frame_t *frame, xlator_t *this, glusterfs_fop_t fop, loc_t *loc, loc_t *loc2); void -nlc_update_child_down_time(xlator_t *this, time_t *now); +nlc_update_child_down_time(xlator_t *this, time_t now); void nlc_inode_clear_cache(xlator_t *this, inode_t *inode, int reason); diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c index e43fe73bcca..600c3b62ffe 100644 --- a/xlators/performance/open-behind/src/open-behind.c +++ b/xlators/performance/open-behind/src/open-behind.c @@ -333,6 +333,15 @@ ob_stub_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd, return 0; } +static void +ob_open_destroy(call_stub_t *stub, fd_t *fd) +{ + stub->frame->local = NULL; + STACK_DESTROY(stub->frame->root); + call_stub_destroy(stub); + fd_unref(fd); +} + static int32_t ob_open_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd, call_stub_t *stub) @@ -355,8 +364,7 @@ ob_open_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd, if (stub != NULL) { if (closed) { - call_stub_destroy(stub); - fd_unref(fd); + ob_open_destroy(stub, fd); } else { call_resume(stub); } @@ -509,6 +517,56 @@ ob_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd, } static int32_t +ob_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) +{ + ob_inode_t *ob_inode; + call_stub_t *stub; + fd_t *first_fd; + ob_state_t state; + + /* Create requests are never delayed. We always send them synchronously. */ + state = ob_open_and_resume_fd(this, fd, 1, true, true, &ob_inode, + &first_fd); + if (state == OB_STATE_READY) { + /* There's no pending open, but there are other file descriptors opened + * so we simply forward the request synchronously. */ + return default_create(frame, this, loc, flags, mode, umask, fd, xdata); + } + + if (state == OB_STATE_OPEN_TRIGGERED) { + /* The first open is in progress (either because it was already issued + * or because this request triggered it). We try to create a new stub + * to retry the operation once the initial open completes. */ + stub = fop_create_stub(frame, ob_create, loc, flags, mode, umask, fd, + xdata); + if (stub != NULL) { + return ob_stub_dispatch(this, ob_inode, first_fd, stub); + } + + state = -ENOMEM; + } + + /* Since we forced a synchronous request, OB_STATE_FIRST_OPEN will never + * be returned by ob_open_and_resume_fd(). If we are here it can only be + * because there has been a problem. */ + + /* In case of failure we need to decrement the number of open files because + * ob_fdclose() won't be called. */ + + LOCK(&fd->inode->lock); + { + ob_inode->open_count--; + } + UNLOCK(&fd->inode->lock); + + gf_smsg(this->name, GF_LOG_ERROR, -state, OPEN_BEHIND_MSG_FAILED, "fop=%s", + "create", "path=%s", loc->path, NULL); + + return default_create_failure_cbk(frame, -state); +} + +static int32_t ob_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata) { @@ -776,8 +834,7 @@ ob_fdclose(xlator_t *this, fd_t *fd) UNLOCK(&fd->inode->lock); if (stub != NULL) { - call_stub_destroy(stub); - fd_unref(fd); + ob_open_destroy(stub, fd); } ob_resume_pending(&list); @@ -940,6 +997,7 @@ fini(xlator_t *this) struct xlator_fops fops = { .open = ob_open, + .create = ob_create, .readv = ob_readv, .writev = ob_writev, .flush = ob_flush, diff --git a/xlators/performance/quick-read/src/quick-read.c b/xlators/performance/quick-read/src/quick-read.c index 640c9ac51c6..7fe4b3c3a4b 100644 --- a/xlators/performance/quick-read/src/quick-read.c +++ b/xlators/performance/quick-read/src/quick-read.c @@ -421,9 +421,6 @@ qr_content_update(xlator_t *this, qr_inode_t *qr_inode, void *data, qr_private_t *priv = NULL; qr_inode_table_t *table = NULL; uint32_t rollover = 0; - struct timeval tv = { - 0, - }; rollover = gen >> 32; gen = gen & 0xffffffff; @@ -431,7 +428,6 @@ qr_content_update(xlator_t *this, qr_inode_t *qr_inode, void *data, priv = this->private; table = &priv->table; - gettimeofday(&tv, NULL); LOCK(&table->lock); { if ((rollover != qr_inode->gen_rollover) || @@ -453,8 +449,7 @@ qr_content_update(xlator_t *this, qr_inode_t *qr_inode, void *data, qr_inode->ia_ctime_nsec = buf->ia_ctime_nsec; qr_inode->buf = *buf; - - memcpy(&qr_inode->last_refresh, &tv, sizeof(struct timeval)); + qr_inode->last_refresh = gf_time(); __qr_inode_register(this, table, qr_inode); } @@ -524,9 +519,7 @@ __qr_content_refresh(xlator_t *this, qr_inode_t *qr_inode, struct iatt *buf, if (qr_size_fits(conf, buf) && qr_time_equal(conf, qr_inode, buf)) { qr_inode->buf = *buf; - - gettimeofday(&qr_inode->last_refresh, NULL); - + qr_inode->last_refresh = gf_time(); __qr_inode_register(this, table, qr_inode); } else { __qr_inode_prune(this, table, qr_inode, gen); @@ -558,20 +551,14 @@ __qr_cache_is_fresh(xlator_t *this, qr_inode_t *qr_inode) { qr_conf_t *conf = NULL; qr_private_t *priv = NULL; - struct timeval now; - struct timeval diff; priv = this->private; conf = &priv->conf; - gettimeofday(&now, NULL); - - timersub(&now, &qr_inode->last_refresh, &diff); - - if (qr_inode->last_refresh.tv_sec < priv->last_child_down) + if (qr_inode->last_refresh < priv->last_child_down) return _gf_false; - if (diff.tv_sec >= conf->cache_timeout) + if (gf_time() - qr_inode->last_refresh >= conf->cache_timeout) return _gf_false; return _gf_true; @@ -1049,9 +1036,8 @@ qr_inodectx_dump(xlator_t *this, inode_t *inode) gf_proc_dump_write("entire-file-cached", "%s", qr_inode->data ? "yes" : "no"); - if (qr_inode->last_refresh.tv_sec) { - gf_time_fmt_tv(buf, sizeof buf, &qr_inode->last_refresh, gf_timefmt_FT); - + if (qr_inode->last_refresh) { + gf_time_fmt(buf, sizeof buf, qr_inode->last_refresh, gf_timefmt_FT); gf_proc_dump_write("last-cache-validation-time", "%s", buf); } @@ -1404,7 +1390,7 @@ qr_init(xlator_t *this) ret = 0; - time(&priv->last_child_down); + priv->last_child_down = gf_time(); GF_ATOMIC_INIT(priv->generation, 0); this->private = priv; out: @@ -1454,7 +1440,7 @@ qr_conf_destroy(qr_conf_t *conf) } void -qr_update_child_down_time(xlator_t *this, time_t *now) +qr_update_child_down_time(xlator_t *this, time_t now) { qr_private_t *priv = NULL; @@ -1462,7 +1448,7 @@ qr_update_child_down_time(xlator_t *this, time_t *now) LOCK(&priv->lock); { - priv->last_child_down = *now; + priv->last_child_down = now; } UNLOCK(&priv->lock); } @@ -1508,7 +1494,6 @@ qr_notify(xlator_t *this, int event, void *data, ...) { int ret = 0; qr_private_t *priv = NULL; - time_t now = 0; qr_conf_t *conf = NULL; priv = this->private; @@ -1517,8 +1502,7 @@ qr_notify(xlator_t *this, int event, void *data, ...) switch (event) { case GF_EVENT_CHILD_DOWN: case GF_EVENT_SOME_DESCENDENT_DOWN: - time(&now); - qr_update_child_down_time(this, &now); + qr_update_child_down_time(this, gf_time()); break; case GF_EVENT_UPCALL: if (conf->qr_invalidation) diff --git a/xlators/performance/quick-read/src/quick-read.h b/xlators/performance/quick-read/src/quick-read.h index 67850821b8e..20fcc70b3a7 100644 --- a/xlators/performance/quick-read/src/quick-read.h +++ b/xlators/performance/quick-read/src/quick-read.h @@ -39,7 +39,7 @@ struct qr_inode { uint32_t ia_ctime_nsec; uint32_t gen_rollover; struct iatt buf; - struct timeval last_refresh; + time_t last_refresh; struct list_head lru; uint64_t gen; uint64_t invalidation_time; diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c index 48f4d0a7f87..721968004a0 100644 --- a/xlators/protocol/server/src/server.c +++ b/xlators/protocol/server/src/server.c @@ -267,6 +267,8 @@ server_priv(xlator_t *this) gf_proc_dump_build_key(key, "server", "total-bytes-write"); gf_proc_dump_write(key, "%" PRIu64, total_write); + rpcsvc_statedump(conf->rpc); + ret = 0; out: if (ret) diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c index 609ddea2560..f10722ec3fb 100644 --- a/xlators/storage/posix/src/posix-common.c +++ b/xlators/storage/posix/src/posix-common.c @@ -140,6 +140,7 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...) struct timespec sleep_till = { 0, }; + glusterfs_ctx_t *ctx = this->ctx; switch (event) { case GF_EVENT_PARENT_UP: { @@ -150,8 +151,6 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...) case GF_EVENT_PARENT_DOWN: { if (!victim->cleanup_starting) break; - gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s", - victim->name); if (priv->janitor) { pthread_mutex_lock(&priv->janitor_mutex); @@ -160,7 +159,7 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...) ret = gf_tw_del_timer(this->ctx->tw->timer_wheel, priv->janitor); if (!ret) { - clock_gettime(CLOCK_REALTIME, &sleep_till); + timespec_now_realtime(&sleep_till); sleep_till.tv_sec += 1; /* Wait to set janitor_task flag to _gf_false by * janitor_task_done */ @@ -168,7 +167,7 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...) (void)pthread_cond_timedwait(&priv->janitor_cond, &priv->janitor_mutex, &sleep_till); - clock_gettime(CLOCK_REALTIME, &sleep_till); + timespec_now_realtime(&sleep_till); sleep_till.tv_sec += 1; } } @@ -177,6 +176,16 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...) GF_FREE(priv->janitor); } priv->janitor = NULL; + pthread_mutex_lock(&ctx->fd_lock); + { + while (priv->rel_fdcount > 0) { + pthread_cond_wait(&priv->fd_cond, &ctx->fd_lock); + } + } + pthread_mutex_unlock(&ctx->fd_lock); + + gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s", + victim->name); default_notify(this->parents->xlator, GF_EVENT_CHILD_DOWN, data); } break; default: @@ -1084,7 +1093,13 @@ posix_init(xlator_t *this) pthread_cond_init(&_private->fsync_cond, NULL); pthread_mutex_init(&_private->janitor_mutex, NULL); pthread_cond_init(&_private->janitor_cond, NULL); + pthread_cond_init(&_private->fd_cond, NULL); INIT_LIST_HEAD(&_private->fsyncs); + _private->rel_fdcount = 0; + ret = posix_spawn_ctx_janitor_thread(this); + if (ret) + goto out; + ret = gf_thread_create(&_private->fsyncer, NULL, posix_fsyncer, this, "posixfsy"); if (ret) { @@ -1197,6 +1212,8 @@ posix_fini(xlator_t *this) { struct posix_private *priv = this->private; gf_boolean_t health_check = _gf_false; + glusterfs_ctx_t *ctx = this->ctx; + uint32_t count; int ret = 0; int i = 0; @@ -1243,6 +1260,19 @@ posix_fini(xlator_t *this) priv->janitor = NULL; } + pthread_mutex_lock(&ctx->fd_lock); + { + count = --ctx->pxl_count; + if (count == 0) { + pthread_cond_signal(&ctx->fd_cond); + } + } + pthread_mutex_unlock(&ctx->fd_lock); + + if (count == 0) { + pthread_join(ctx->janitor, NULL); + } + if (priv->fsyncer) { (void)gf_thread_cleanup_xint(priv->fsyncer); priv->fsyncer = 0; diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c index bb4a5309f45..67db3324083 100644 --- a/xlators/storage/posix/src/posix-helpers.c +++ b/xlators/storage/posix/src/posix-helpers.c @@ -1505,7 +1505,7 @@ posix_janitor_task(void *data) if (!priv) goto out; - time(&now); + now = gf_time(); if ((now - priv->last_landfill_check) > priv->janitor_sleep_duration) { if (priv->disable_landfill_purge) { gf_msg_debug(this->name, 0, @@ -1592,16 +1592,108 @@ unlock: return; } +static struct posix_fd * +janitor_get_next_fd(glusterfs_ctx_t *ctx) +{ + struct posix_fd *pfd = NULL; + + while (list_empty(&ctx->janitor_fds)) { + if (ctx->pxl_count == 0) { + return NULL; + } + + pthread_cond_wait(&ctx->fd_cond, &ctx->fd_lock); + } + + pfd = list_first_entry(&ctx->janitor_fds, struct posix_fd, list); + list_del_init(&pfd->list); + + return pfd; +} + +static void +posix_close_pfd(xlator_t *xl, struct posix_fd *pfd) +{ + THIS = xl; + + if (pfd->dir == NULL) { + gf_msg_trace(xl->name, 0, "janitor: closing file fd=%d", pfd->fd); + sys_close(pfd->fd); + } else { + gf_msg_debug(xl->name, 0, "janitor: closing dir fd=%p", pfd->dir); + sys_closedir(pfd->dir); + } + + GF_FREE(pfd); +} + +static void * +posix_ctx_janitor_thread_proc(void *data) +{ + xlator_t *xl; + struct posix_fd *pfd; + glusterfs_ctx_t *ctx = NULL; + struct posix_private *priv_fd; + + ctx = data; + + pthread_mutex_lock(&ctx->fd_lock); + + while ((pfd = janitor_get_next_fd(ctx)) != NULL) { + pthread_mutex_unlock(&ctx->fd_lock); + + xl = pfd->xl; + posix_close_pfd(xl, pfd); + + pthread_mutex_lock(&ctx->fd_lock); + + priv_fd = xl->private; + priv_fd->rel_fdcount--; + if (!priv_fd->rel_fdcount) + pthread_cond_signal(&priv_fd->fd_cond); + } + + pthread_mutex_unlock(&ctx->fd_lock); + + return NULL; +} + +int +posix_spawn_ctx_janitor_thread(xlator_t *this) +{ + int ret = 0; + glusterfs_ctx_t *ctx = NULL; + + ctx = this->ctx; + + pthread_mutex_lock(&ctx->fd_lock); + { + if (ctx->pxl_count++ == 0) { + ret = gf_thread_create(&ctx->janitor, NULL, + posix_ctx_janitor_thread_proc, ctx, + "posixctxjan"); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_THREAD_FAILED, + "spawning janitor thread failed"); + ctx->pxl_count--; + } + } + } + pthread_mutex_unlock(&ctx->fd_lock); + + return ret; +} + static int -is_fresh_file(int64_t sec, int64_t ns) +is_fresh_file(struct timespec *ts) { - struct timeval tv; + struct timespec now; int64_t elapsed; - gettimeofday(&tv, NULL); + timespec_now_realtime(&now); + elapsed = (int64_t)gf_tsdiff(ts, &now); - elapsed = (tv.tv_sec - sec) * 1000000L; - elapsed += tv.tv_usec - (ns / 1000L); if (elapsed < 0) { /* The file has been modified in the future !!! * Is it fresh ? previous implementation considered this as a @@ -1610,11 +1702,7 @@ is_fresh_file(int64_t sec, int64_t ns) } /* If the file is newer than a second, we consider it fresh. */ - if (elapsed < 1000000) { - return 1; - } - - return 0; + return elapsed < 1000000; } int @@ -1677,7 +1765,9 @@ posix_gfid_heal(xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req) if (ret != 16) { /* TODO: This is a very hacky way of doing this, and very prone to * errors and unexpected behavior. This should be changed. */ - if (is_fresh_file(stbuf.ia_ctime, stbuf.ia_ctime_nsec)) { + struct timespec ts = {.tv_sec = stbuf.ia_ctime, + .tv_nsec = stbuf.ia_ctime_nsec}; + if (is_fresh_file(&ts)) { gf_msg(this->name, GF_LOG_ERROR, ENOENT, P_MSG_FRESHFILE, "Fresh file: %s", path); return -ENOENT; @@ -1691,7 +1781,7 @@ posix_gfid_heal(xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req) if (ret != 16) { /* TODO: This is a very hacky way of doing this, and very prone to * errors and unexpected behavior. This should be changed. */ - if (is_fresh_file(stat.st_ctim.tv_sec, stat.st_ctim.tv_nsec)) { + if (is_fresh_file(&stat.st_ctim)) { gf_msg(this->name, GF_LOG_ERROR, ENOENT, P_MSG_FRESHFILE, "Fresh file: %s", path); return -ENOENT; @@ -1950,7 +2040,7 @@ posix_fs_health_check(xlator_t *this, char *file_path) goto out; } - time_sec = time(NULL); + time_sec = gf_time(); gf_time_fmt(timestamp, sizeof timestamp, time_sec, gf_timefmt_FT); timelen = strlen(timestamp); diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c index 762041b5831..6d54d37e5aa 100644 --- a/xlators/storage/posix/src/posix-inode-fd-ops.c +++ b/xlators/storage/posix/src/posix-inode-fd-ops.c @@ -1361,6 +1361,22 @@ out: return 0; } +static void +posix_add_fd_to_cleanup(xlator_t *this, struct posix_fd *pfd) +{ + glusterfs_ctx_t *ctx = this->ctx; + struct posix_private *priv = this->private; + + pfd->xl = this; + pthread_mutex_lock(&ctx->fd_lock); + { + list_add_tail(&pfd->list, &ctx->janitor_fds); + priv->rel_fdcount++; + pthread_cond_signal(&ctx->fd_cond); + } + pthread_mutex_unlock(&ctx->fd_lock); +} + int32_t posix_releasedir(xlator_t *this, fd_t *fd) { @@ -1383,11 +1399,7 @@ posix_releasedir(xlator_t *this, fd_t *fd) "pfd->dir is NULL for fd=%p", fd); goto out; } - - gf_msg_debug(this->name, 0, "janitor: closing dir fd=%p", pfd->dir); - - sys_closedir(pfd->dir); - GF_FREE(pfd); + posix_add_fd_to_cleanup(this, pfd); out: return 0; @@ -2510,7 +2522,6 @@ out: int32_t posix_release(xlator_t *this, fd_t *fd) { - struct posix_private *priv = NULL; struct posix_fd *pfd = NULL; int ret = -1; uint64_t tmp_pfd = 0; @@ -2518,8 +2529,6 @@ posix_release(xlator_t *this, fd_t *fd) VALIDATE_OR_GOTO(this, out); VALIDATE_OR_GOTO(fd, out); - priv = this->private; - ret = fd_ctx_del(fd, this, &tmp_pfd); if (ret < 0) { gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_PFD_NULL, @@ -2533,13 +2542,7 @@ posix_release(xlator_t *this, fd_t *fd) "pfd->dir is %p (not NULL) for file fd=%p", pfd->dir, fd); } - gf_msg_debug(this->name, 0, "janitor: closing dir fd=%p", pfd->dir); - - sys_close(pfd->fd); - GF_FREE(pfd); - - if (!priv) - goto out; + posix_add_fd_to_cleanup(this, pfd); out: return 0; diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h index 35be197c869..b8db146eef2 100644 --- a/xlators/storage/posix/src/posix.h +++ b/xlators/storage/posix/src/posix.h @@ -125,7 +125,7 @@ struct posix_fd { off_t dir_eof; /* offset at dir EOF */ struct list_head list; /* to add to the janitor list */ int odirect; - + xlator_t *xl; char _pad[4]; /* manual padding */ }; @@ -137,10 +137,6 @@ struct posix_private { gf_lock_t lock; char *hostname; - /* Statistics, provides activity of the server */ - - struct timeval prev_fetch_time; - struct timeval init_time; time_t last_landfill_check; @@ -170,6 +166,7 @@ struct posix_private { pthread_cond_t fsync_cond; pthread_mutex_t janitor_mutex; pthread_cond_t janitor_cond; + pthread_cond_t fd_cond; int fsync_queue_count; int32_t janitor_sleep_duration; @@ -254,8 +251,7 @@ struct posix_private { gf_boolean_t aio_configured; gf_boolean_t aio_init_done; gf_boolean_t aio_capable; - - char _pad[4]; /* manual padding */ + uint32_t rel_fdcount; }; typedef struct { @@ -662,6 +658,9 @@ posix_cs_maintenance(xlator_t *this, fd_t *fd, loc_t *loc, int *pfd, int posix_check_dev_file(xlator_t *this, inode_t *inode, char *fop, int *op_errno); +int +posix_spawn_ctx_janitor_thread(xlator_t *this); + void posix_update_iatt_buf(struct iatt *buf, int fd, char *loc, dict_t *xdata); |