diff options
48 files changed, 2042 insertions, 786 deletions
diff --git a/.testignore b/.testignore index 1784aff629a..fe8f838bf2b 100644 --- a/.testignore +++ b/.testignore @@ -8,7 +8,7 @@ rfc.sh submit-for-review.sh AUTHORS -CONTRIBUTING +CONTRIBUTING.md COPYING-GPLV2 COPYING-LGPLV3 ChangeLog diff --git a/CONTRIBUTING b/CONTRIBUTING deleted file mode 100644 index eb30d76d1b4..00000000000 --- a/CONTRIBUTING +++ /dev/null @@ -1,30 +0,0 @@ -# GlusterFS project Contribution guidelines - - -## By contributing to this project, the contributor would need to agree to below. - -### Developer's Certificate of Origin 1.1 - -By making a contribution to this project, I certify that: - -(a) The contribution was created in whole or in part by me and I - have the right to submit it under the open source license - indicated in the file; or - -(b) The contribution is based upon previous work that, to the best - of my knowledge, is covered under an appropriate open source - license and I have the right under that license to submit that - work with modifications, whether created in whole or in part - by me, under the same open source license (unless I am - permitted to submit under a different license), as indicated - in the file; or - -(c) The contribution was provided directly to me by some other - person who certified (a), (b) or (c) and I have not modified - it. - -(d) I understand and agree that this project and the contribution - are public and that a record of the contribution (including all - personal information I submit with it, including my sign-off) is - maintained indefinitely and may be redistributed consistent with - this project or the open source license(s) involved. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000000..65fc3497104 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,114 @@ +# GlusterFS project Contribution guidelines + +## Development Workflow + +We follow most of the details as per the [document here](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests). If you are not aware of the github workflow, it is recommended to go through them before continuing here. + + +#### Get the Repository setup + +0. Fork Repository + - Fork [GlusterFS repository](https://github.com/gluster/glusterfs/fork). + +1. Clone Repository + - Clone the glusterfs repo freshly from github using below steps. + +``` + git clone git@github.com:${username}/glusterfs.git + cd glusterfs/ + git remote add upstream git@github.com:gluster/glusterfs.git +``` + +About two tasks are one time for the life time. You can continue to use the same repository for all the work in future. + +#### Development & Other flows + +0. Issue: + - Make sure there is an issue filed for the task you are working on. + - If it is not filed, open the issue with all the description. + - If it is a bug fix, add label "Type:Bug". + - If it is an RFC, provide all the documentation, and request for "DocApproved", and "SpecApproved" label. + +1. Code: + - Start coding + - Build and test locally + - Make sure clang-format is installed and is run on the patch. + +2. Keep up-to-date + - GlusterFS is a large project with many developers, so there would be one or the other patch everyday. + - It is critical for developer to be up-to-date with `devel` repo to be Conflict-Free when PR is opened. + - Git provides many options to keep up-to-date, below is one of them +``` + git fetch upstream + git rebase upstream/devel +``` + - It is recommended you keep pushing to your repo every day, so you don't loose any work. + - It can be done by `./rfc.sh` (or `git push origin HEAD:issueNNN`) + +2. Commit Message / PR description: + - The name of the branch on your personal fork can start with issueNNNN, followed by anything of your choice. + - PRs continue to have the title of format "component: \<title\>", like it is practiced now. + - When you open a PR, having a reference Issue for the commit is mandatory in GlusterFS. + - Commit message can have, either `Fixes: #NNNN` or `Updates: #NNNN` in a separate line in the commit message. + - Here, NNNN is the Issue ID in glusterfs repository. + - Each commit needs the author to have the "Signed-off-by: Name \<email\>" line. + - Can do this by `-s` option for `git commit`. + - If the PR is not ready for review, apply the label `work-in-progress`. + - Check the availability of "Draft PR" is present for you, if yes, use that instead. + +3. Tests: + - All the required smoke tests would be auto-triggered. + - Developers get a chance to retrigger the smoke tests using **"/recheck smoke"** as comment. + - The "regression" tests would be triggered by a comment **"/run regression"** from developers in the [@gluster-maintainers](https://github.com/orgs/gluster/teams/gluster-maintainers) group. + - Ask for help as comment in PR if you have any questions about the process! + +4. Review Process: + - `+2` : is equivalent to "Approve" from the people in the maintainer's group. + - `+1` : can be given by a maintainer/reviewer by explicitly stating that in the comment. + - `-1` : provide details on required changes and pick "Request Changes" while submitting your review. + - `-2` : done by adding the `DO-NOT-MERGE` label. + + - Any further discussions can happen as comments in the PR. + +5. Making changes: + - There are 2 approaches to submit changes done after addressing review comments. + - Commit changes as a new commit on top of the original commits in the branch, and push the changes to same branch (issueNNNN) + - Commit changes into the same commit with `--amend` option, and do a push to the same branch with `--force` option. + +6. Merging: + - GlusterFS project follows 'Squash and Merge' method + - This is mainly to preserve the historic Gerrit method of one patch in `git log` for one URL link. + - This also makes every merge a complete patch, which has passed all tests. + - The merging of the patch is expected to be done by the maintainers. + - It can be done when all the tests (smoke and regression) pass. + - When the PR has 'Approved' flag from corresponding maintainer. + - If you feel there is delay, feel free to add a comment, discuss the same in Slack channel, or send email. + +## By contributing to this project, the contributor would need to agree to below. + +### Developer's Certificate of Origin 1.1 + +By making a contribution to this project, I certify that: + +(a) The contribution was created in whole or in part by me and I + have the right to submit it under the open source license + indicated in the file; or + +(b) The contribution is based upon previous work that, to the best + of my knowledge, is covered under an appropriate open source + license and I have the right under that license to submit that + work with modifications, whether created in whole or in part + by me, under the same open source license (unless I am + permitted to submit under a different license), as indicated + in the file; or + +(c) The contribution was provided directly to me by some other + person who certified (a), (b) or (c) and I have not modified + it. + +(d) I understand and agree that this project and the contribution + are public and that a record of the contribution (including all + personal information I submit with it, including my sign-off) is + maintained indefinitely and may be redistributed consistent with + this project or the open source license(s) involved. + diff --git a/MAINTAINERS b/MAINTAINERS index cfe075fa17d..953e8755fd9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -53,7 +53,7 @@ General Project Architects -------------------------- M: Amar Tumballi <amarts@gmail.com> M: Xavier Hernandez <xhernandez@redhat.com> -P: Pranith Karampuri <pkarampu@redhat.com> +P: Pranith Karampuri <pranith.karampuri@phonepe.com> P: Atin Mukherjee <amukherj@redhat.com> xlators: @@ -66,12 +66,12 @@ F: xlators/system/posix-acl/ Arbiter M: Ravishankar N <ravishankar@redhat.com> -P: Pranith Karampuri <pkarampu@redhat.com> +P: Pranith Karampuri <pranith.karampuri@phonepe.com> S: Maintained F: xlators/features/arbiter/ Automatic File Replication (AFR) -M: Pranith Karampuri <pkarampu@redhat.com> +M: Pranith Karampuri <pranith.karampuri@phonepe.com> M: Ravishankar N <ravishankar@redhat.com> P: Karthik US <ksubrahm@redhat.com> S: Maintained @@ -101,7 +101,7 @@ S: Maintained F: xlators/cluster/dht/ Erasure Coding -M: Pranith Karampuri <pkarampu@redhat.com> +M: Pranith Karampuri <pranith.karampuri@phonepe.com> M: Xavier Hernandez <xhernandez@redhat.com> P: Ashish Pandey <aspandey@redhat.com> S: Maintained @@ -119,13 +119,13 @@ S: Maintained F: xlators/mount/ Index -M: Pranith Karampuri <pkarampu@redhat.com> +M: Pranith Karampuri <pranith.karampuri@phonepe.com> P: Ravishankar N <ravishankar@redhat.com> S: Maintained F: xlators/features/index/ IO Cache -P: Mohammed Rafi KC <rkavunga@redhat.com> +P: Mohammed Rafi KC <rafi.kavungal@iternity.com> S: Maintained F: xlators/performance/io-cache/ @@ -136,7 +136,7 @@ S: Maintained F: xlators/debug/io-stats/ IO threads -M: Pranith Karampuri <pkarampu@redhat.com> +M: Pranith Karampuri <pranith.karampuri@phonepe.com> P: Ravishankar N <ravishankar@redhat.com> S: Maintained F: xlators/performance/io-threads/ @@ -160,7 +160,7 @@ S: Maintained F: xlators/features/marker/ Meta -M: Mohammed Rafi KC <rkavunga@redhat.com> +M: Mohammed Rafi KC <rafi.kavungal@iternity.com> S: Maintained F: xlators/features/meta/ @@ -172,7 +172,7 @@ F: xlators/performance/md-cache/ Negative-lookup Cache M: Poornima G <pgurusid@redhat.com> -P: Pranith Karampuri <pkarampu@redhat.com> +P: Pranith Karampuri <pranith.karampuri@phonepe.com> S: Maintained F: xlators/performance/nl-cache/ @@ -282,7 +282,7 @@ M: Xavier Hernandez <xhernandez@redhat.com> M: Jeff Darcy <jeff@pl.atyp.us> P: Kaleb Keithley <kkeithle@redhat.com> P: Niels de Vos <ndevos@redhat.com> -P: Pranith Karampuri <pkarampu@redhat.com> +P: Pranith Karampuri <pranith.karampuri@phonepe.com> P: Shyamsundar Ranganathan <srangana@redhat.com> S: Maintained F: libglusterfs/ @@ -305,7 +305,7 @@ F: xlators/mgmt/glusterd/ Protocol M: Niels de Vos <ndevos@redhat.com> -P: Mohammed Rafi KC <rkavunga@redhat.com> +P: Mohammed Rafi KC <rafi.kavungal@iternity.com> S: Maintained F: xlators/protocol/ @@ -317,7 +317,7 @@ F: rpc/xdr/ Snapshot M: Raghavendra Bhat <raghavendra@redhat.com> -P: Mohammed Rafi KC <rkavunga@redhat.com> +P: Mohammed Rafi KC <rafi.kavungal@iternity.com> P: Sunny Kumar <sunkumar@redhat.com> S: Maintained F: xlators/mgmt/glusterd/src/glusterd-snap* @@ -326,7 +326,7 @@ F: extras/snap-scheduler.py Socket subsystem P: Krutika Dhananjay <kdhananj@redhat.com> P: Milind Changire <mchangir@redhat.com> -P: Mohammed Rafi KC <rkavunga@redhat.com> +P: Mohammed Rafi KC <rafi.kavungal@iternity.com> P: Mohit Agrawal <moagrawa@redhat.com> S: Maintained F: rpc/rpc-transport/socket/ diff --git a/README.md b/README.md index 92f829e431e..9d68e033782 100644 --- a/README.md +++ b/README.md @@ -3,8 +3,7 @@ petabytes. It provides interfaces for object, block and file storage. ## Development - Contributions to gluster in the form of patches and new feature additions can - be made by following steps outlined at [Developers Guide](http://docs.gluster.org/en/latest/Developer-guide/Developers-Index/#contributing-to-the-gluster-community). + The development workflow is documented in [Contributors guide](CONTRIBUTING.md) ## Documentation The Gluster documentation can be found at [Gluster Docs](http://docs.gluster.org). diff --git a/api/src/glfs-mgmt.c b/api/src/glfs-mgmt.c index 08df2b1a4b7..7c82b8cd162 100644 --- a/api/src/glfs-mgmt.c +++ b/api/src/glfs-mgmt.c @@ -1013,11 +1013,6 @@ glfs_mgmt_init(struct glfs *fs) if (ret) goto out; - if (sys_access(SECURE_ACCESS_FILE, F_OK) == 0) { - ctx->secure_mgmt = 1; - ctx->ssl_cert_depth = glusterfs_read_secure_access_file(); - } - rpc = rpc_clnt_new(options, THIS, THIS->name, 8); if (!rpc) { ret = -1; diff --git a/api/src/glfs.c b/api/src/glfs.c index e4e6ad69fd0..b4bf1423f6d 100644 --- a/api/src/glfs.c +++ b/api/src/glfs.c @@ -251,6 +251,11 @@ glfs_volumes_init(struct glfs *fs) if (!vol_assigned(cmd_args)) return -1; + if (sys_access(SECURE_ACCESS_FILE, F_OK) == 0) { + fs->ctx->secure_mgmt = 1; + fs->ctx->ssl_cert_depth = glusterfs_read_secure_access_file(); + } + if (cmd_args->volfile_server) { ret = glfs_mgmt_init(fs); goto out; diff --git a/configure.ac b/configure.ac index 4efddf4e38b..e2d6fd66cec 100644 --- a/configure.ac +++ b/configure.ac @@ -275,7 +275,7 @@ AC_ARG_ENABLE([debug], [Enable debug build options.])) if test "x$enable_debug" = "xyes"; then BUILD_DEBUG=yes - GF_CFLAGS="${GF_CFLAGS} -g -rdynamic -O0 -DDEBUG" + GF_CFLAGS="${GF_CFLAGS} -g -O0 -DDEBUG" else BUILD_DEBUG=no fi @@ -4,7 +4,29 @@ # i.e. where we are interested in the result of a command, # we have to run the command in an if-statement. -ORIGIN=${GLUSTER_ORIGIN:-origin} +UPSTREAM=${GLUSTER_UPSTREAM} +if [ "x$UPSTREAM" -eq "x" ]; then + for rmt in $(git remote); do + rmt_repo=$(git remote show $rmt -n | grep Fetch | awk '{ print $3 }'); + if [ $rmt_repo -eq "git@github:gluster/glusterfs" ]; then + UPSTREAM=$rmt + echo "Picked $rmt as upstream remote" + break + fi + done +fi + +USER_REPO=${GLUSTER_USER_REPO:-origin} +if [ "x${USER_REPO}" -eq "x${UPSTREAM}" ] ; then + echo "When you submit patches, it should get submitted to your fork, not to upstream directly" + echo "If you are not sure, check `for rmt in $(git remote); do git remote show $rmt -n; done`" + echo "And pick the correct remote you would like to push to and do `export GLUSTER_USER_REPO=$rmt`" + echo "" + echo "Exiting..." + exit 1 +fi + + while getopts "v" opt; do case $opt in @@ -18,7 +40,7 @@ done shift $((OPTIND-1)) -branch="master"; +branch="devel"; set_hooks_commit_msg() { @@ -50,21 +72,21 @@ is_num() backport_id_message() { echo "" - echo "This commit is to a non-master branch, and hence is treated as a backport." + echo "This commit is to a non-devel branch, and hence is treated as a backport." echo "" echo "For backports we would like to retain the same gerrit Change-Id across" echo "branches. On auto inspection it is found that a gerrit Change-Id is" - echo "missing, or the Change-Id is not found on your local master" + echo "missing, or the Change-Id is not found on your local devel branch" echo "" echo "This could mean a few things:" echo " 1. This is not a backport, hence choose Y on the prompt to proceed" - echo " 2. Your $ORIGIN master is not up to date, hence the script is unable" - echo " to find the corresponding Change-Id on master. Either choose N," + echo " 2. Your $USER_REPO/devel is not up to date, hence the script is unable" + echo " to find the corresponding Change-Id on devel. Either choose N," echo " 'git fetch', and try again, OR if you are sure you used the" echo " same Change-Id, choose Y at the prompt to proceed" echo " 3. You commented or removed the Change-Id in your commit message after" echo " cherry picking the commit. Choose N, fix the commit message to" - echo " use the same Change-Id as master (git commit --amend), resubmit" + echo " use the same Change-Id as 'devel' (git commit --amend), resubmit" echo "" } @@ -72,8 +94,8 @@ check_backport() { moveon='N' - # Backports are never made to master - if [ $branch = "master" ]; then + # Backports are never made to 'devel' + if [ $branch = "devel" ]; then return; fi @@ -86,22 +108,22 @@ check_backport() echo -n "Did not find a Change-Id for a possible backport. Continue (y/N): " read moveon else - # Search master for the same change ID (rebase_changes has run, so we + # Search 'devel' for the same change ID (rebase_changes has run, so we # should never not find a Change-Id) - mchangeid=$(git log $ORIGIN/master --format='%b' --grep="^Change-Id: ${changeid}" | grep ${changeid} | awk '{print $2}') + mchangeid=$(git log $UPSTREAM/devel --format='%b' --grep="^Change-Id: ${changeid}" | grep ${changeid} | awk '{print $2}') - # Check if we found the change ID on master, else throw a message to + # Check if we found the change ID on 'devel', else throw a message to # decide if we should continue. - # NOTE: If master was not rebased, we will not find the Change-ID and + # NOTE: If 'devel' was not rebased, we will not find the Change-ID and # could hit a false positive case here (or if someone checks out some - # other branch as master). + # other branch as 'devel'). if [ "${mchangeid}" = "${changeid}" ]; then moveon="Y" else backport_id_message; echo "Change-Id of commit: $changeid" - echo "Change-Id on master: $mchangeid" - echo -n "Did not find mentioned Change-Id on master for a possible backport. Continue (y/N): " + echo "Change-Id on devel: $mchangeid" + echo -n "Did not find mentioned Change-Id on 'devel' for a possible backport. Continue (y/N): " read moveon fi fi @@ -116,7 +138,7 @@ check_backport() rebase_changes() { - GIT_EDITOR=$0 git rebase -i $ORIGIN/$branch; + GIT_EDITOR=$0 git rebase -i $UPSTREAM/$branch; } @@ -212,7 +234,7 @@ EOF assert_diverge() { - git diff $ORIGIN/$branch..HEAD | grep -q .; + git diff $UPSTREAM/$branch..HEAD | grep -q .; } @@ -258,7 +280,7 @@ main() return; fi - git fetch $ORIGIN; + git fetch $UPSTREAM; rebase_changes; @@ -269,9 +291,9 @@ main() # see note above variable REFRE for regex elaboration reference=$(git log -n1 --format='%b' | grep -iow -E "${REFRE}" | awk -F '#' '{print $2}'); - # If this is a commit against master and does not have a github + # If this is a commit against 'devel' and does not have a github # issue reference. Warn the contributor that one of the 2 is required - if [ -z "${reference}" ] && [ $branch = "master" ]; then + if [ -z "${reference}" ] && [ $branch = "devel" ]; then warn_reference_missing; fi @@ -299,9 +321,9 @@ main() fi if [ -z "${reference}" ]; then - $drier git push $ORIGIN HEAD:refs/for/$branch/rfc; + $drier git push $USER_REPO HEAD:temp_${branch}/$(date +%Y-%m-%d_%s); else - $drier git push $ORIGIN HEAD:refs/for/$branch/ref-${reference}; + $drier git push $USER_REPO HEAD:issue${reference}_${branch}; fi } diff --git a/tests/afr.rc b/tests/afr.rc index 5fc7fa1898d..241789903ba 100644 --- a/tests/afr.rc +++ b/tests/afr.rc @@ -115,3 +115,9 @@ function afr_private_key_value() #xargs at the end will strip leading spaces grep -E "^${key} = " $m/.meta/graphs/active/${v}-replicate-${replica_id}/private | cut -f2 -d'=' | xargs } + +function afr_anon_entry_count() +{ + local b=$1 + ls $b/.glusterfs-anonymous-inode* | wc -l +} diff --git a/tests/basic/afr/afr-anon-inode-no-quorum.t b/tests/basic/afr/afr-anon-inode-no-quorum.t new file mode 100644 index 00000000000..896ba0c9b2c --- /dev/null +++ b/tests/basic/afr/afr-anon-inode-no-quorum.t @@ -0,0 +1,63 @@ +#!/bin/bash + +#Test that anon-inode entry is not cleaned up as long as there exists at least +#one valid entry +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../afr.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +TEST $CLI volume heal $V0 disable +TEST $CLI volume set $V0 performance.write-behind off +TEST $CLI volume set $V0 performance.read-ahead off +TEST $CLI volume set $V0 performance.readdir-ahead off +TEST $CLI volume set $V0 performance.open-behind off +TEST $CLI volume set $V0 performance.stat-prefetch off +TEST $CLI volume set $V0 performance.io-cache off +TEST $CLI volume set $V0 performance.quick-read off +TEST $CLI volume set $V0 cluster.entry-self-heal off +TEST $CLI volume start $V0 + +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 + +TEST touch $M0/a $M0/b + +gfid_a=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/a)) +gfid_b=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/b)) +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST mv $M0/a $M0/a-new +TEST mv $M0/b $M0/b-new + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +TEST ! ls $M0/a +TEST ! ls $M0/b +anon_inode_name=$(ls -a $B0/${V0}0 | grep glusterfs-anonymous-inode) +TEST stat $B0/${V0}0/$anon_inode_name/$gfid_a +TEST stat $B0/${V0}0/$anon_inode_name/$gfid_b +#Make sure index heal doesn't happen after enabling heal +TEST setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1 +TEST rm -f $B0/${V0}1/.glusterfs/indices/xattrop/* +TEST $CLI volume heal $V0 enable +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +TEST $CLI volume heal $V0 +#Allow time for a scan +sleep 5 +TEST stat $B0/${V0}0/$anon_inode_name/$gfid_a +TEST stat $B0/${V0}0/$anon_inode_name/$gfid_b +inum_b=$(STAT_INO $B0/${V0}0/$anon_inode_name/$gfid_b) +TEST rm -f $M0/a-new +TEST stat $M0/b-new + +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}1 +EXPECT "$inum_b" STAT_INO $B0/${V0}0/b-new + +cleanup diff --git a/tests/basic/afr/afr-anon-inode.t b/tests/basic/afr/afr-anon-inode.t new file mode 100644 index 00000000000..f4cf37a2fa0 --- /dev/null +++ b/tests/basic/afr/afr-anon-inode.t @@ -0,0 +1,114 @@ +#!/bin/bash +#Tests that afr-anon-inode test cases work fine as expected +#These are cases where in entry-heal/name-heal we dont know entry for an inode +#so these inodes are kept in a special directory + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../afr.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0..2} +TEST $CLI volume set $V0 performance.quick-read off +TEST $CLI volume set $V0 performance.io-cache off +TEST $CLI volume set $V0 performance.write-behind off +TEST $CLI volume set $V0 performance.stat-prefetch off +TEST $CLI volume set $V0 performance.read-ahead off +TEST $CLI volume set $V0 performance.open-behind off +TEST $CLI volume start $V0 +TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; +EXPECT "^1$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode" +TEST $CLI volume set $V0 cluster.use-anonymous-inode no +EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^0$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode" +TEST $CLI volume set $V0 cluster.use-anonymous-inode yes +EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode" +TEST mkdir -p $M0/d1/b $M0/d2/a +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST mv $M0/d2/a $M0/d1 +TEST mv $M0/d1/b $M0/d2 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 +anon_inode_name=$(ls -a $B0/${V0}0 | grep glusterfs-anonymous-inode) +TEST [[ -d $B0/${V0}1/$anon_inode_name ]] +TEST [[ -d $B0/${V0}2/$anon_inode_name ]] +anon_gfid=$(gf_get_gfid_xattr $B0/${V0}0/$anon_inode_name) +EXPECT "$anon_gfid" gf_get_gfid_xattr $B0/${V0}1/$anon_inode_name +EXPECT "$anon_gfid" gf_get_gfid_xattr $B0/${V0}2/$anon_inode_name + +TEST ! ls $M0/$anon_inode_name +EXPECT "^4$" echo $(ls -a $M0 | wc -l) + +#Test purging code path by shd +TEST $CLI volume heal $V0 disable +TEST mkdir $M0/l0 $M0/l1 $M0/l2 +TEST touch $M0/del-file $M0/del-file-nolink $M0/l0/file +TEST ln $M0/del-file $M0/del-file-link +TEST ln $M0/l0/file $M0/l1/file-link1 +TEST ln $M0/l0/file $M0/l2/file-link2 +TEST mkdir -p $M0/del-recursive-dir/d1 + +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST rm -f $M0/del-file $M0/del-file-nolink +TEST rm -rf $M0/del-recursive-dir +TEST mv $M0/d1/a $M0/d2 +TEST mv $M0/l0/file $M0/l0/renamed-file +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 0 + +nolink_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-file-nolink)) +link_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-file)) +dir_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-recursive-dir)) +rename_dir_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/d1/a)) +rename_file_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/l0/file)) +TEST ! stat $M0/del-file +TEST stat $B0/${V0}0/$anon_inode_name/$link_gfid +TEST ! stat $M0/del-file-nolink +TEST ! stat $B0/${V0}0/$anon_inode_name/$nolink_gfid +TEST ! stat $M0/del-recursive-dir +TEST stat $B0/${V0}0/$anon_inode_name/$dir_gfid +TEST ! stat $M0/d1/a +TEST stat $B0/${V0}0/$anon_inode_name/$rename_dir_gfid +TEST ! stat $M0/l0/file +TEST stat $B0/${V0}0/$anon_inode_name/$rename_file_gfid + +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST mv $M0/l1/file-link1 $M0/l1/renamed-file-link1 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 1 +TEST ! stat $M0/l1/file-link1 +TEST stat $B0/${V0}1/$anon_inode_name/$rename_file_gfid + +TEST kill_brick $V0 $H0 $B0/${V0}2 +TEST mv $M0/l2/file-link2 $M0/l2/renamed-file-link2 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 2 +TEST ! stat $M0/l2/file-link2 +TEST stat $B0/${V0}2/$anon_inode_name/$rename_file_gfid + +#Simulate only anon-inodes present in all bricks +TEST rm -f $M0/l0/renamed-file $M0/l1/renamed-file-link1 $M0/l2/renamed-file-link2 + +#Test that shd doesn't cleanup anon-inodes when some bricks are down +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST $CLI volume heal $V0 enable +$CLI volume heal $V0 +sleep 5 #Allow time for completion of one scan +TEST stat $B0/${V0}0/$anon_inode_name/$link_gfid +TEST stat $B0/${V0}0/$anon_inode_name/$rename_dir_gfid +TEST stat $B0/${V0}0/$anon_inode_name/$dir_gfid +rename_dir_inum=$(STAT_INO $B0/${V0}0/$anon_inode_name/$rename_dir_gfid) + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 1 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}1 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}2 + +#Test that rename indeed happened instead of rmdir/mkdir +renamed_dir_inum=$(STAT_INO $B0/${V0}0/d2/a) +EXPECT "$rename_dir_inum" echo $renamed_dir_inum +cleanup; diff --git a/tests/basic/afr/entry-self-heal-anon-dir-off.t b/tests/basic/afr/entry-self-heal-anon-dir-off.t new file mode 100644 index 00000000000..7bb6ee14193 --- /dev/null +++ b/tests/basic/afr/entry-self-heal-anon-dir-off.t @@ -0,0 +1,459 @@ +#!/bin/bash + +#This file checks if missing entry self-heal and entry self-heal are working +#as expected. +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../afr.rc + +cleanup; + +function get_file_type { + stat -c "%a:%F:%g:%t:%T:%u" $1 +} + +function diff_dirs { + diff <(ls $1 | sort) <(ls $2 | sort) +} + +function heal_status { + local f1_path="${1}/${3}" + local f2_path="${2}/${3}" + local insync="" + diff_dirs $f1_path $f2_path + if [ $? -eq 0 ]; + then + insync="Y" + else + insync="N" + fi + local xattr11=$(get_hex_xattr trusted.afr.$V0-client-0 $f1_path) + local xattr12=$(get_hex_xattr trusted.afr.$V0-client-1 $f1_path) + local xattr21=$(get_hex_xattr trusted.afr.$V0-client-0 $f2_path) + local xattr22=$(get_hex_xattr trusted.afr.$V0-client-1 $f2_path) + local dirty1=$(get_hex_xattr trusted.afr.dirty $f1_path) + local dirty2=$(get_hex_xattr trusted.afr.dirty $f2_path) + if [ -z $xattr11 ]; then xattr11="000000000000000000000000"; fi + if [ -z $xattr12 ]; then xattr12="000000000000000000000000"; fi + if [ -z $xattr21 ]; then xattr21="000000000000000000000000"; fi + if [ -z $xattr22 ]; then xattr22="000000000000000000000000"; fi + if [ -z $dirty1 ]; then dirty1="000000000000000000000000"; fi + if [ -z $dirty2 ]; then dirty2="000000000000000000000000"; fi + echo ${insync}${xattr11}${xattr12}${xattr21}${xattr22}${dirty1}${dirty2} +} + +function is_heal_done { + local zero_xattr="000000000000000000000000" + if [ "$(heal_status $@)" == "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" ]; + then + echo "Y" + else + echo "N" + fi +} + +function print_pending_heals { + local result=":" + for i in "$@"; + do + if [ "N" == $(is_heal_done $B0/${V0}0 $B0/${V0}1 $i) ]; + then + result="$result:$i" + fi + done +#To prevent any match for EXPECT_WITHIN, print a char non-existent in file-names + if [ $result == ":" ]; then result="~"; fi + echo $result +} + +zero_xattr="000000000000000000000000" +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +TEST $CLI volume heal $V0 disable +TEST $CLI volume set $V0 cluster.use-anonymous-inode off +TEST $CLI volume set $V0 performance.write-behind off +TEST $CLI volume set $V0 performance.read-ahead off +TEST $CLI volume set $V0 performance.readdir-ahead off +TEST $CLI volume set $V0 performance.open-behind off +TEST $CLI volume set $V0 performance.stat-prefetch off +TEST $CLI volume set $V0 performance.io-cache off +TEST $CLI volume set $V0 performance.quick-read off +TEST $CLI volume set $V0 cluster.data-self-heal on +TEST $CLI volume set $V0 cluster.metadata-self-heal on +TEST $CLI volume set $V0 cluster.entry-self-heal on +TEST $CLI volume start $V0 + +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --use-readdirp=no $M0 +cd $M0 +#_me_ is dir on which missing entry self-heal happens, _heal is where dir self-heal happens +#spb is split-brain, fool is all fool + +#source_self_accusing means there exists source and a sink which self-accuses. +#This simulates failures where fops failed on the bricks without it going down. +#Something like EACCESS/EDQUOT etc + +TEST mkdir spb_heal spb spb_me_heal spb_me fool_heal fool_me v1_fool_heal v1_fool_me source_creations_heal source_deletions_heal source_creations_me source_deletions_me v1_dirty_me v1_dirty_heal source_self_accusing +TEST mkfifo source_deletions_heal/fifo +TEST mknod source_deletions_heal/block b 4 5 +TEST mknod source_deletions_heal/char c 1 5 +TEST touch source_deletions_heal/file +TEST ln -s source_deletions_heal/file source_deletions_heal/slink +TEST mkdir source_deletions_heal/dir1 +TEST mkdir source_deletions_heal/dir1/dir2 + +TEST mkfifo source_deletions_me/fifo +TEST mknod source_deletions_me/block b 4 5 +TEST mknod source_deletions_me/char c 1 5 +TEST touch source_deletions_me/file +TEST ln -s source_deletions_me/file source_deletions_me/slink +TEST mkdir source_deletions_me/dir1 +TEST mkdir source_deletions_me/dir1/dir2 + +TEST mkfifo source_self_accusing/fifo +TEST mknod source_self_accusing/block b 4 5 +TEST mknod source_self_accusing/char c 1 5 +TEST touch source_self_accusing/file +TEST ln -s source_self_accusing/file source_self_accusing/slink +TEST mkdir source_self_accusing/dir1 +TEST mkdir source_self_accusing/dir1/dir2 + +TEST kill_brick $V0 $H0 $B0/${V0}0 + +TEST touch spb_heal/0 spb/0 spb_me_heal/0 spb_me/0 fool_heal/0 fool_me/0 v1_fool_heal/0 v1_fool_me/0 v1_dirty_heal/0 v1_dirty_me/0 +TEST rm -rf source_deletions_heal/fifo source_deletions_heal/block source_deletions_heal/char source_deletions_heal/file source_deletions_heal/slink source_deletions_heal/dir1 +TEST rm -rf source_deletions_me/fifo source_deletions_me/block source_deletions_me/char source_deletions_me/file source_deletions_me/slink source_deletions_me/dir1 +TEST rm -rf source_self_accusing/fifo source_self_accusing/block source_self_accusing/char source_self_accusing/file source_self_accusing/slink source_self_accusing/dir1 + +#Test that the files are deleted +TEST ! stat $B0/${V0}1/source_deletions_heal/fifo +TEST ! stat $B0/${V0}1/source_deletions_heal/block +TEST ! stat $B0/${V0}1/source_deletions_heal/char +TEST ! stat $B0/${V0}1/source_deletions_heal/file +TEST ! stat $B0/${V0}1/source_deletions_heal/slink +TEST ! stat $B0/${V0}1/source_deletions_heal/dir1 +TEST ! stat $B0/${V0}1/source_deletions_me/fifo +TEST ! stat $B0/${V0}1/source_deletions_me/block +TEST ! stat $B0/${V0}1/source_deletions_me/char +TEST ! stat $B0/${V0}1/source_deletions_me/file +TEST ! stat $B0/${V0}1/source_deletions_me/slink +TEST ! stat $B0/${V0}1/source_deletions_me/dir1 +TEST ! stat $B0/${V0}1/source_self_accusing/fifo +TEST ! stat $B0/${V0}1/source_self_accusing/block +TEST ! stat $B0/${V0}1/source_self_accusing/char +TEST ! stat $B0/${V0}1/source_self_accusing/file +TEST ! stat $B0/${V0}1/source_self_accusing/slink +TEST ! stat $B0/${V0}1/source_self_accusing/dir1 + + +TEST mkfifo source_creations_heal/fifo +TEST mknod source_creations_heal/block b 4 5 +TEST mknod source_creations_heal/char c 1 5 +TEST touch source_creations_heal/file +TEST ln -s source_creations_heal/file source_creations_heal/slink +TEST mkdir source_creations_heal/dir1 +TEST mkdir source_creations_heal/dir1/dir2 + +TEST mkfifo source_creations_me/fifo +TEST mknod source_creations_me/block b 4 5 +TEST mknod source_creations_me/char c 1 5 +TEST touch source_creations_me/file +TEST ln -s source_creations_me/file source_creations_me/slink +TEST mkdir source_creations_me/dir1 +TEST mkdir source_creations_me/dir1/dir2 + +$CLI volume stop $V0 + +#simulate fool fool scenario for fool_* dirs +setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1/{fool_heal,fool_me} +setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/{fool_heal,fool_me} +setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}1/{v1_fool_heal,v1_fool_me} + +#Simulate v1-dirty(self-accusing but no pending ops on others) scenario for v1-dirty +setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1/v1_dirty_{heal,me} +setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}1/v1_dirty_{heal,me} + +$CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +TEST kill_brick $V0 $H0 $B0/${V0}1 + +TEST touch spb_heal/1 spb/0 spb_me_heal/1 spb_me/0 fool_heal/1 fool_me/1 v1_fool_heal/1 v1_fool_me/1 + +$CLI volume stop $V0 + +#simulate fool fool scenario for fool_* dirs +setfattr -x trusted.afr.$V0-client-1 $B0/${V0}0/{fool_heal,fool_me} +setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/{fool_heal,fool_me} +setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}1/{v1_fool_heal,v1_fool_me} + +#simulate self-accusing for source_self_accusing +TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000006 $B0/${V0}0/source_self_accusing + +$CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 + +# Check if conservative merges happened correctly on _me_ dirs +TEST stat spb_me_heal/1 +TEST stat $B0/${V0}0/spb_me_heal/1 +TEST stat $B0/${V0}1/spb_me_heal/1 + +TEST stat spb_me_heal/0 +TEST stat $B0/${V0}0/spb_me_heal/0 +TEST stat $B0/${V0}1/spb_me_heal/0 + +TEST stat fool_me/1 +TEST stat $B0/${V0}0/fool_me/1 +TEST stat $B0/${V0}1/fool_me/1 + +TEST stat fool_me/0 +TEST stat $B0/${V0}0/fool_me/0 +TEST stat $B0/${V0}1/fool_me/0 + +TEST stat v1_fool_me/0 +TEST stat $B0/${V0}0/v1_fool_me/0 +TEST stat $B0/${V0}1/v1_fool_me/0 + +TEST stat v1_fool_me/1 +TEST stat $B0/${V0}0/v1_fool_me/1 +TEST stat $B0/${V0}1/v1_fool_me/1 + +TEST stat v1_dirty_me/0 +TEST stat $B0/${V0}0/v1_dirty_me/0 +TEST stat $B0/${V0}1/v1_dirty_me/0 + +#Check if files that have gfid-mismatches in _me_ are giving EIO +TEST ! stat spb_me/0 + +#Check if stale files are deleted on access +TEST ! stat source_deletions_me/fifo +TEST ! stat $B0/${V0}0/source_deletions_me/fifo +TEST ! stat $B0/${V0}1/source_deletions_me/fifo +TEST ! stat source_deletions_me/block +TEST ! stat $B0/${V0}0/source_deletions_me/block +TEST ! stat $B0/${V0}1/source_deletions_me/block +TEST ! stat source_deletions_me/char +TEST ! stat $B0/${V0}0/source_deletions_me/char +TEST ! stat $B0/${V0}1/source_deletions_me/char +TEST ! stat source_deletions_me/file +TEST ! stat $B0/${V0}0/source_deletions_me/file +TEST ! stat $B0/${V0}1/source_deletions_me/file +TEST ! stat source_deletions_me/file +TEST ! stat $B0/${V0}0/source_deletions_me/file +TEST ! stat $B0/${V0}1/source_deletions_me/file +TEST ! stat source_deletions_me/dir1/dir2 +TEST ! stat $B0/${V0}0/source_deletions_me/dir1/dir2 +TEST ! stat $B0/${V0}1/source_deletions_me/dir1/dir2 +TEST ! stat source_deletions_me/dir1 +TEST ! stat $B0/${V0}0/source_deletions_me/dir1 +TEST ! stat $B0/${V0}1/source_deletions_me/dir1 + +#Test if the files created as part of access are healed correctly +r=$(get_file_type source_creations_me/fifo) +EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/fifo +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/fifo +TEST [ -p source_creations_me/fifo ] + +r=$(get_file_type source_creations_me/block) +EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/block +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/block +EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}1/source_creations_me/block +EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}0/source_creations_me/block +TEST [ -b source_creations_me/block ] + +r=$(get_file_type source_creations_me/char) +EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/char +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/char +EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}1/source_creations_me/char +EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}0/source_creations_me/char +TEST [ -c source_creations_me/char ] + +r=$(get_file_type source_creations_me/file) +EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/file +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/file +TEST [ -f source_creations_me/file ] + +r=$(get_file_type source_creations_me/slink) +EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/slink +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/slink +TEST [ -h source_creations_me/slink ] + +r=$(get_file_type source_creations_me/dir1/dir2) +EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/dir1/dir2 +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/dir1/dir2 +TEST [ -d source_creations_me/dir1/dir2 ] + +r=$(get_file_type source_creations_me/dir1) +EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/dir1 +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/dir1 +TEST [ -d source_creations_me/dir1 ] + +#Trigger heal and check _heal dirs are healed properly +#Trigger change in event generation number. That way inodes would get refreshed during lookup +TEST kill_brick $V0 $H0 $B0/${V0}1 +$CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 + +TEST stat spb_heal +TEST stat spb_me_heal +TEST stat fool_heal +TEST stat fool_me +TEST stat v1_fool_heal +TEST stat v1_fool_me +TEST stat source_deletions_heal +TEST stat source_deletions_me +TEST stat source_self_accusing +TEST stat source_creations_heal +TEST stat source_creations_me +TEST stat v1_dirty_heal +TEST stat v1_dirty_me +TEST $CLI volume stop $V0 +TEST rm -rf $B0/${V0}{0,1}/.glusterfs/indices/xattrop/* + +$CLI volume start $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 + +#Create base entry in indices/xattrop +echo "Data" > $M0/FILE +rm -f $M0/FILE +EXPECT "1" count_index_entries $B0/${V0}0 +EXPECT "1" count_index_entries $B0/${V0}1 + +TEST $CLI volume stop $V0; + +#Create entries for fool_heal and fool_me to ensure they are fully healed and dirty xattrs erased, before triggering index heal +create_brick_xattrop_entry $B0/${V0}0 fool_heal fool_me source_creations_heal/dir1 + +$CLI volume start $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 + +$CLI volume heal $V0 enable +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 + +TEST $CLI volume heal $V0; +EXPECT_WITHIN $HEAL_TIMEOUT "~" print_pending_heals spb_heal spb_me_heal fool_heal fool_me v1_fool_heal v1_fool_me source_deletions_heal source_deletions_me source_creations_heal source_creations_me v1_dirty_heal v1_dirty_me source_self_accusing + +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 spb_heal +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 spb_me_heal +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 fool_heal +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 fool_me +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_fool_heal +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_fool_me +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_deletions_heal +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_deletions_me +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_self_accusing +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_creations_heal +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_creations_me +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_dirty_heal +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_dirty_me + +#Don't access the files/dirs from mount point as that may cause self-heals +# Check if conservative merges happened correctly on heal dirs +TEST stat $B0/${V0}0/spb_heal/1 +TEST stat $B0/${V0}1/spb_heal/1 + +TEST stat $B0/${V0}0/spb_heal/0 +TEST stat $B0/${V0}1/spb_heal/0 + +TEST stat $B0/${V0}0/fool_heal/1 +TEST stat $B0/${V0}1/fool_heal/1 + +TEST stat $B0/${V0}0/fool_heal/0 +TEST stat $B0/${V0}1/fool_heal/0 + +TEST stat $B0/${V0}0/v1_fool_heal/0 +TEST stat $B0/${V0}1/v1_fool_heal/0 + +TEST stat $B0/${V0}0/v1_fool_heal/1 +TEST stat $B0/${V0}1/v1_fool_heal/1 + +TEST stat $B0/${V0}0/v1_dirty_heal/0 +TEST stat $B0/${V0}1/v1_dirty_heal/0 + +#Check if files that have gfid-mismatches in spb are giving EIO +TEST ! stat spb/0 + +#Check if stale files are deleted on access +TEST ! stat $B0/${V0}0/source_deletions_heal/fifo +TEST ! stat $B0/${V0}1/source_deletions_heal/fifo +TEST ! stat $B0/${V0}0/source_deletions_heal/block +TEST ! stat $B0/${V0}1/source_deletions_heal/block +TEST ! stat $B0/${V0}0/source_deletions_heal/char +TEST ! stat $B0/${V0}1/source_deletions_heal/char +TEST ! stat $B0/${V0}0/source_deletions_heal/file +TEST ! stat $B0/${V0}1/source_deletions_heal/file +TEST ! stat $B0/${V0}0/source_deletions_heal/file +TEST ! stat $B0/${V0}1/source_deletions_heal/file +TEST ! stat $B0/${V0}0/source_deletions_heal/dir1/dir2 +TEST ! stat $B0/${V0}1/source_deletions_heal/dir1/dir2 +TEST ! stat $B0/${V0}0/source_deletions_heal/dir1 +TEST ! stat $B0/${V0}1/source_deletions_heal/dir1 + +#Check if stale files are deleted on access +TEST ! stat $B0/${V0}0/source_self_accusing/fifo +TEST ! stat $B0/${V0}1/source_self_accusing/fifo +TEST ! stat $B0/${V0}0/source_self_accusing/block +TEST ! stat $B0/${V0}1/source_self_accusing/block +TEST ! stat $B0/${V0}0/source_self_accusing/char +TEST ! stat $B0/${V0}1/source_self_accusing/char +TEST ! stat $B0/${V0}0/source_self_accusing/file +TEST ! stat $B0/${V0}1/source_self_accusing/file +TEST ! stat $B0/${V0}0/source_self_accusing/file +TEST ! stat $B0/${V0}1/source_self_accusing/file +TEST ! stat $B0/${V0}0/source_self_accusing/dir1/dir2 +TEST ! stat $B0/${V0}1/source_self_accusing/dir1/dir2 +TEST ! stat $B0/${V0}0/source_self_accusing/dir1 +TEST ! stat $B0/${V0}1/source_self_accusing/dir1 + +#Test if the files created as part of full self-heal correctly +r=$(get_file_type $B0/${V0}0/source_creations_heal/fifo) +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/fifo +TEST [ -p $B0/${V0}0/source_creations_heal/fifo ] +EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}1/source_creations_heal/block +EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}0/source_creations_heal/block + +r=$(get_file_type $B0/${V0}0/source_creations_heal/block) +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/block + +r=$(get_file_type $B0/${V0}0/source_creations_heal/char) +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/char +EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}1/source_creations_heal/char +EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}0/source_creations_heal/char + +r=$(get_file_type $B0/${V0}0/source_creations_heal/file) +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/file +TEST [ -f $B0/${V0}0/source_creations_heal/file ] + +r=$(get_file_type source_creations_heal/file $B0/${V0}0/slink) +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/file slink +TEST [ -h $B0/${V0}0/source_creations_heal/slink ] + +r=$(get_file_type $B0/${V0}0/source_creations_heal/dir1/dir2) +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/dir1/dir2 +TEST [ -d $B0/${V0}0/source_creations_heal/dir1/dir2 ] + +r=$(get_file_type $B0/${V0}0/source_creations_heal/dir1) +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/dir1 +TEST [ -d $B0/${V0}0/source_creations_heal/dir1 ] + +cd - + +#Anonymous directory shouldn't be created +TEST mkdir $M0/rename-dir +before_rename=$(STAT_INO $B0/${V0}1/rename-dir) +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST mv $M0/rename-dir $M0/new-name +TEST $CLI volume start $V0 force +#'spb' is in split-brain so pending-heal-count will be 2 +EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0 +after_rename=$(STAT_INO $B0/${V0}1/new-name) +EXPECT "0" echo $(ls -a $B0/${V0}0/ | grep anonymous-inode | wc -l) +EXPECT "0" echo $(ls -a $B0/${V0}1/ | grep anonymous-inode | wc -l) +EXPECT_NOT "$before_rename" echo $after_rename +cleanup diff --git a/tests/basic/afr/rename-data-loss.t b/tests/basic/afr/rename-data-loss.t new file mode 100644 index 00000000000..256ee2aafce --- /dev/null +++ b/tests/basic/afr/rename-data-loss.t @@ -0,0 +1,72 @@ +#!/bin/bash +#Self-heal tests +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../afr.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1} +TEST $CLI volume set $V0 write-behind off +TEST $CLI volume set $V0 self-heal-daemon off +TEST $CLI volume set $V0 data-self-heal off +TEST $CLI volume set $V0 metadata-self-heal off +TEST $CLI volume set $V0 entry-self-heal off +TEST $CLI volume start $V0 +EXPECT 'Started' volinfo_field $V0 'Status' +TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0; + +cd $M0 +TEST `echo "line1" >> file1` +TEST mkdir dir1 +TEST mkdir dir2 +TEST mkdir -p dir1/dira/dirb +TEST `echo "line1">>dir1/dira/dirb/file1` +TEST mkdir delete_me +TEST `echo "line1" >> delete_me/file1` + +#brick0 has witnessed the second write while brick1 is down. +TEST kill_brick $V0 $H0 $B0/brick1 +TEST `echo "line2" >> file1` +TEST `echo "line2" >> dir1/dira/dirb/file1` +TEST `echo "line2" >> delete_me/file1` + +#Toggle the bricks that are up/down. +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 +TEST kill_brick $V0 $H0 $B0/brick0 + +#Rename when the 'source' brick0 for data-selfheals is down. +mv file1 file2 +mv dir1/dira dir2 + +#Delete a dir when brick0 is down. +rm -rf delete_me +cd - + +#Bring everything up and trigger heal +TEST $CLI volume set $V0 self-heal-daemon on +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/brick0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/brick1 + +#Remount to avoid reading from caches +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0; +EXPECT "line2" tail -1 $M0/file2 +EXPECT "line2" tail -1 $M0/dir2/dira/dirb/file1 +TEST ! stat $M0/delete_me/file1 +TEST ! stat $M0/delete_me + +anon_inode_name=$(ls -a $B0/brick0 | grep glusterfs-anonymous-inode) +TEST [[ -d $B0/brick0/$anon_inode_name ]] +TEST [[ -d $B0/brick1/$anon_inode_name ]] +cleanup diff --git a/tests/basic/fuse/active-io-graph-switch.t b/tests/basic/fuse/active-io-graph-switch.t new file mode 100644 index 00000000000..6ec3e1fcbfa --- /dev/null +++ b/tests/basic/fuse/active-io-graph-switch.t @@ -0,0 +1,65 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +TESTS_EXPECTED_IN_LOOP=12 + +function perform_io_on_mount { + local m="$1" + local f="$2" + local lockfile="$3" + while [ -f "$m/$lockfile" ]; + do + dd if=/dev/zero of=$m/$f bs=1M count=1 + done +} + +function perform_graph_switch { + for i in {1..3} + do + TEST_IN_LOOP $CLI volume set $V0 performance.stat-prefetch off + sleep 3 + TEST_IN_LOOP $CLI volume set $V0 performance.stat-prefetch on + sleep 3 + done +} + +function count_files { + ls $M0 | wc -l +} + +cleanup; +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} +TEST $CLI volume set $V0 flush-behind off +TEST $CLI volume start $V0 +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 +TEST touch $M0/lock +for i in {1..100}; do perform_io_on_mount $M0 $i lock & done +EXPECT_WITHIN 5 "101" count_files + +perform_graph_switch +TEST rm -f $M0/lock +wait +EXPECT "100" count_files +TEST rm -f $M0/{1..100} +EXPECT "0" count_files + +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 + +#Repeat the tests with reader-thread-count +TEST $GFS --reader-thread-count=10 --volfile-id=/$V0 --volfile-server=$H0 $M0 +TEST touch $M0/lock +for i in {1..100}; do perform_io_on_mount $M0 $i lock & done +EXPECT_WITHIN 5 "101" count_files + +perform_graph_switch +TEST rm -f $M0/lock +wait +EXPECT "100" count_files +TEST rm -f $M0/{1..100} +EXPECT "0" count_files + +cleanup diff --git a/tests/basic/gfapi/gfapi-ssl-load-volfile-test.c b/tests/basic/gfapi/gfapi-ssl-load-volfile-test.c new file mode 100644 index 00000000000..7beb8dd1fe4 --- /dev/null +++ b/tests/basic/gfapi/gfapi-ssl-load-volfile-test.c @@ -0,0 +1,127 @@ +#include <fcntl.h> +#include <unistd.h> +#include <time.h> +#include <limits.h> +#include <string.h> +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <glusterfs/api/glfs.h> +#include <glusterfs/api/glfs-handles.h> + +#define LOG_ERR(msg) \ + do { \ + fprintf(stderr, "%s : Error (%s)\n", msg, strerror(errno)); \ + } while (0) + +glfs_t * +init_glfs(const char *hostname, const char *volname, const char *volfile, + const char *logfile) +{ + int ret = -1; + glfs_t *fs = NULL; + + fs = glfs_new(volname); + if (!fs) { + LOG_ERR("glfs_new failed"); + return NULL; + } + + ret = glfs_set_volfile(fs, volfile); + if (ret < 0) { + LOG_ERR("glfs_set_volfile failed"); + goto out; + } + + ret = glfs_set_logging(fs, logfile, 7); + if (ret < 0) { + LOG_ERR("glfs_set_logging failed"); + goto out; + } + + ret = glfs_init(fs); + if (ret < 0) { + LOG_ERR("glfs_init failed"); + goto out; + } + + ret = 0; +out: + if (ret) { + glfs_fini(fs); + fs = NULL; + } + + return fs; +} + +int +glfs_test_function(const char *hostname, const char *volname, + const char *volfile, const char *logfile) +{ + int ret = -1; + int flags = O_CREAT | O_RDWR; + glfs_t *fs = NULL; + glfs_fd_t *glfd = NULL; + const char *buff = "This is from my prog\n"; + const char *filename = "glfs_test.txt"; + + fs = init_glfs(hostname, volname, volfile, logfile); + if (fs == NULL) { + LOG_ERR("init_glfs failed"); + return -1; + } + + glfd = glfs_creat(fs, filename, flags, 0644); + if (glfd == NULL) { + LOG_ERR("glfs_creat failed"); + goto out; + } + + ret = glfs_write(glfd, buff, strlen(buff), flags); + if (ret < 0) { + LOG_ERR("glfs_write failed"); + goto out; + } + + ret = glfs_close(glfd); + if (ret < 0) { + LOG_ERR("glfs_write failed"); + goto out; + } + +out: + ret = glfs_fini(fs); + if (ret) { + LOG_ERR("glfs_fini failed"); + } + + return ret; +} + +int +main(int argc, char *argv[]) +{ + int ret = 0; + char *hostname = NULL; + char *volname = NULL; + char *volfile = NULL; + char *logfile = NULL; + + if (argc != 5) { + fprintf(stderr, "Invalid argument\n"); + exit(1); + } + + hostname = argv[1]; + volname = argv[2]; + volfile = argv[3]; + logfile = argv[4]; + + ret = glfs_test_function(hostname, volname, volfile, logfile); + if (ret) { + LOG_ERR("glfs_test_function failed"); + } + + return ret; +} diff --git a/tests/basic/gfapi/gfapi-ssl-load-volfile-test.t b/tests/basic/gfapi/gfapi-ssl-load-volfile-test.t new file mode 100755 index 00000000000..8e94df9d321 --- /dev/null +++ b/tests/basic/gfapi/gfapi-ssl-load-volfile-test.t @@ -0,0 +1,76 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../traps.rc +. $(dirname $0)/../../ssl.rc + +cleanup; + +sed -e "s,@@HOSTNAME@@,${H0},g" -e "s,@@BRICKPATH@@,${B0}/brick1,g" \ + -e "s,@@SSL@@,off,g" \ + $(dirname ${0})/protocol-client-ssl.vol.in \ + > $(dirname ${0})/protocol-client-ssl.vol + +TEST create_self_signed_certs + +TEST glusterd + +TEST $CLI volume create $V0 $H0:$B0/brick1; +EXPECT 'Created' volinfo_field $V0 'Status'; + +TEST $CLI volume start $V0; +EXPECT 'Started' volinfo_field $V0 'Status'; +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count + +logdir=`gluster --print-logdir` + +TEST build_tester $(dirname $0)/gfapi-ssl-load-volfile-test.c -lgfapi + +# Run test without I/O or management encryption +TEST $(dirname $0)/gfapi-ssl-load-volfile-test $H0 $V0 \ + $(dirname ${0})/protocol-client-ssl.vol \ + $logdir/gfapi-ssl-load-volfile-test.log + +# Enable management encryption +touch $GLUSTERD_WORKDIR/secure-access + +killall_gluster + +TEST glusterd +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count + +# Run test with management encryption (No I/O encryption) +TEST $(dirname $0)/gfapi-ssl-load-volfile-test $H0 $V0 \ + $(dirname ${0})/protocol-client-ssl.vol \ + $logdir/gfapi-ssl-load-volfile-test.log + +# Enable I/O encryption +TEST $CLI volume set $V0 server.ssl on + +killall_gluster + +sed -e "s,@@HOSTNAME@@,${H0},g" -e "s,@@BRICKPATH@@,${B0}/brick1,g" \ + -e "s,@@SSL@@,on,g" \ + $(dirname ${0})/protocol-client-ssl.vol.in \ + > $(dirname ${0})/protocol-client-ssl.vol + +TEST glusterd +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count + +# Run test without I/O or management encryption +TEST $(dirname $0)/gfapi-ssl-load-volfile-test $H0 $V0 \ + $(dirname ${0})/protocol-client-ssl.vol \ + $logdir/gfapi-ssl-load-volfile-test.log + +cleanup_tester $(dirname $0)/gfapi-ssl-load-volfile-test + +TEST $CLI volume stop $V0 +TEST $CLI volume delete $V0 + +cleanup; + +# NetBSD build scripts are not up to date therefore this test +# is failing in NetBSD. Therefore skipping the test in NetBSD +# as of now. +#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000 diff --git a/tests/basic/gfapi/protocol-client-ssl.vol.in b/tests/basic/gfapi/protocol-client-ssl.vol.in new file mode 100644 index 00000000000..cdc0c9d0671 --- /dev/null +++ b/tests/basic/gfapi/protocol-client-ssl.vol.in @@ -0,0 +1,15 @@ +# +# This .vol file expects that there is +# +# 1. GlusterD listening on @@HOSTNAME@@ +# 2. a volume that provides a brick on @@BRICKPATH@@ +# 3. the volume with the brick has been started +# +volume test + type protocol/client + option remote-host @@HOSTNAME@@ + option remote-subvolume @@BRICKPATH@@ + option transport-type socket + option transport.socket.ssl-enabled @@SSL@@ +end-volume + diff --git a/tests/bugs/replicate/bug-1744548-heal-timeout.t b/tests/bugs/replicate/bug-1744548-heal-timeout.t index c208112c8b0..011535066f9 100644 --- a/tests/bugs/replicate/bug-1744548-heal-timeout.t +++ b/tests/bugs/replicate/bug-1744548-heal-timeout.t @@ -25,14 +25,14 @@ TEST ! $CLI volume heal $V0 TEST $CLI volume profile $V0 start TEST $CLI volume profile $V0 info clear TEST $CLI volume heal $V0 enable -# Each brick does 3 opendirs, corresponding to dirty, xattrop and entry-changes -EXPECT_WITHIN $HEAL_TIMEOUT "^333$" get_cumulative_opendir_count +# Each brick does 4 opendirs, corresponding to dirty, xattrop and entry-changes, anonymous-inode +EXPECT_WITHIN 4 "^444$" get_cumulative_opendir_count # Check that a change in heal-timeout is honoured immediately. TEST $CLI volume set $V0 cluster.heal-timeout 5 sleep 10 # Two crawls must have happened. -EXPECT_WITHIN $HEAL_TIMEOUT "^999$" get_cumulative_opendir_count +EXPECT_WITHIN $CHILD_UP_TIMEOUT "^121212$" get_cumulative_opendir_count # shd must not heal if it is disabled and heal-timeout is changed. TEST $CLI volume heal $V0 disable diff --git a/tests/features/trash.t b/tests/features/trash.t index 472e909e567..da5b50bc85a 100755 --- a/tests/features/trash.t +++ b/tests/features/trash.t @@ -94,105 +94,105 @@ wildcard_not_exists() { if [ $? -eq 0 ]; then echo "Y"; else echo "N"; fi } -# testing glusterd [1-3] +# testing glusterd TEST glusterd TEST pidof glusterd TEST $CLI volume info -# creating distributed volume [4] +# creating distributed volume TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2} -# checking volume status [5-7] +# checking volume status EXPECT "$V0" volinfo_field $V0 'Volume Name' EXPECT 'Created' volinfo_field $V0 'Status' EXPECT '2' brick_count $V0 -# test without enabling trash translator [8] +# test without enabling trash translator TEST start_vol $V0 $M0 -# test on enabling trash translator [9-10] +# test on enabling trash translator TEST $CLI volume set $V0 features.trash on EXPECT 'on' volinfo_field $V0 'features.trash' -# files directly under mount point [11] +# files directly under mount point create_files $M0/file1 $M0/file2 TEST file_exists $V0 file1 file2 -# perform unlink [12] +# perform unlink TEST unlink_op file1 -# perform truncate [13] +# perform truncate TEST truncate_op file2 4 -# create files directory hierarchy and check [14] +# create files directory hierarchy and check mkdir -p $M0/1/2/3 create_files $M0/1/2/3/foo1 $M0/1/2/3/foo2 TEST file_exists $V0 1/2/3/foo1 1/2/3/foo2 -# perform unlink [15] +# perform unlink TEST unlink_op 1/2/3/foo1 -# perform truncate [16] +# perform truncate TEST truncate_op 1/2/3/foo2 4 # create a directory for eliminate pattern mkdir $M0/a -# set the eliminate pattern [17-18] +# set the eliminate pattern TEST $CLI volume set $V0 features.trash-eliminate-path /a EXPECT '/a' volinfo_field $V0 'features.trash-eliminate-path' -# create two files and check [19] +# create two files and check create_files $M0/a/test1 $M0/a/test2 TEST file_exists $V0 a/test1 a/test2 -# remove from eliminate pattern [20] +# remove from eliminate pattern rm -f $M0/a/test1 EXPECT "Y" wildcard_not_exists $M0/.trashcan/a/test1* -# truncate from eliminate path [21-23] +# truncate from eliminate path truncate -s 2 $M0/a/test2 TEST [ -e $M0/a/test2 ] TEST [ `ls -l $M0/a/test2 | awk '{print $5}'` -eq 2 ] EXPECT "Y" wildcard_not_exists $M0/.trashcan/a/test2* -# set internal op on [24-25] +# set internal op on TEST $CLI volume set $V0 features.trash-internal-op on EXPECT 'on' volinfo_field $V0 'features.trash-internal-op' -# again create two files and check [26] +# again create two files and check create_files $M0/inop1 $M0/inop2 TEST file_exists $V0 inop1 inop2 -# perform unlink [27] +# perform unlink TEST unlink_op inop1 -# perform truncate [28] +# perform truncate TEST truncate_op inop2 4 -# remove one brick and restart the volume [28-31] +# remove one brick and restart the volume TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 force EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 TEST $CLI volume stop $V0 TEST start_vol $V0 $M0 $M0/.trashcan -# again create two files and check [33] +# again create two files and check create_files $M0/rebal1 $M0/rebal2 TEST file_exists $V0 rebal1 rebal2 -# add one brick [34-35] +# add one brick TEST $CLI volume add-brick $V0 $H0:$B0/${V0}3 TEST [ -d $B0/${V0}3 ] -# perform rebalance [36] +# perform rebalance TEST $CLI volume rebalance $V0 start force EXPECT_WITHIN $REBALANCE_TIMEOUT "0" rebalance_completed #Find out which file was migrated to the new brick file_name=$(ls $B0/${V0}3/rebal*| xargs basename) -# check whether rebalance was succesful [37-40] +# check whether rebalance was succesful EXPECT "Y" wildcard_exists $B0/${V0}3/$file_name* EXPECT "Y" wildcard_exists $B0/${V0}1/.trashcan/internal_op/$file_name* @@ -201,52 +201,42 @@ EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 # force required in case rebalance is not over TEST $CLI volume stop $V0 force -# create a replicated volume [41] +# create a replicated volume TEST $CLI volume create $V1 replica 2 $H0:$B0/${V1}{1,2} -# checking volume status [42-45] +# checking volume status EXPECT "$V1" volinfo_field $V1 'Volume Name' EXPECT 'Replicate' volinfo_field $V1 'Type' EXPECT 'Created' volinfo_field $V1 'Status' EXPECT '2' brick_count $V1 -# enable trash with options and start the replicate volume by disabling automatic self-heal [46-50] +# enable trash with options and start the replicate volume by disabling automatic self-heal TEST $CLI volume set $V1 features.trash on TEST $CLI volume set $V1 features.trash-internal-op on EXPECT 'on' volinfo_field $V1 'features.trash' EXPECT 'on' volinfo_field $V1 'features.trash-internal-op' TEST start_vol $V1 $M1 $M1/.trashcan -# mount and check for trash directory [51] +# mount and check for trash directory TEST [ -d $M1/.trashcan/internal_op ] -# create a file and check [52] +# create a file and check touch $M1/self TEST [ -e $B0/${V1}1/self -a -e $B0/${V1}2/self ] -# kill one brick and delete the file from mount point [53-54] +# kill one brick and delete the file from mount point kill_brick $V1 $H0 $B0/${V1}1 EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "1" online_brick_count rm -f $M1/self EXPECT "Y" wildcard_exists $B0/${V1}2/.trashcan/self* -# force start the volume and trigger the self-heal manually [55-57] -TEST $CLI volume start $V1 force -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" online_brick_count -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status -# Since we created the file under root of the volume, it will be -# healed automatically - -# check for the removed file in trashcan [58] -EXPECT_WITHIN $HEAL_TIMEOUT "Y" wildcard_exists $B0/${V1}1/.trashcan/internal_op/self* - -# check renaming of trash directory through cli [59-62] +# check renaming of trash directory through cli TEST $CLI volume set $V0 trash-dir abc TEST start_vol $V0 $M0 $M0/abc TEST [ -e $M0/abc -a ! -e $M0/.trashcan ] EXPECT "Y" wildcard_exists $B0/${V0}1/abc/internal_op/rebal* -# ensure that rename and delete operation on trash directory fails [63-65] +# ensure that rename and delete operation on trash directory fails rm -rf $M0/abc/internal_op TEST [ -e $M0/abc/internal_op ] rm -rf $M0/abc/ diff --git a/tests/volume.rc b/tests/volume.rc index 3924baeb7fc..b38848c0e52 100644 --- a/tests/volume.rc +++ b/tests/volume.rc @@ -999,4 +999,4 @@ function logging_time_check() local logfile=`echo ${0##*/}`_glusterd1.log cat $logdir/1/$logfile | tail -n 2 | head -n 1 | grep $(date +%H:%M) | wc -l -}
\ No newline at end of file +} diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 4c8fa31b679..032ab5c8001 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -45,6 +45,41 @@ afr_quorum_errno(afr_private_t *priv) return ENOTCONN; } +gf_boolean_t +afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name, + pid_t pid) +{ + if (!__is_root_gfid(pargfid)) { + return _gf_false; + } + + if (strcmp(name, GF_REPLICATE_TRASH_DIR) == 0) { + /*For backward compatibility /.landfill is private*/ + return _gf_true; + } + + if (pid == GF_CLIENT_PID_GSYNCD) { + /*geo-rep needs to create/sync private directory on slave because + * it appears in changelog*/ + return _gf_false; + } + + if (pid == GF_CLIENT_PID_GLFS_HEAL || pid == GF_CLIENT_PID_SELF_HEALD) { + if (strcmp(name, priv->anon_inode_name) == 0) { + /* anonymous-inode dir is private*/ + return _gf_true; + } + } else { + if (strncmp(name, AFR_ANON_DIR_PREFIX, strlen(AFR_ANON_DIR_PREFIX)) == + 0) { + /* anonymous-inode dir prefix is private for geo-rep to work*/ + return _gf_true; + } + } + + return _gf_false; +} + void afr_fill_success_replies(afr_local_t *local, afr_private_t *priv, unsigned char *replies) @@ -2271,8 +2306,9 @@ afr_hash_child(afr_read_subvol_args_t *args, afr_private_t *priv, * need is a low probability that multiple clients * won't converge on the same subvolume. */ + gf_uuid_copy(gfid_copy, args->gfid); pid = getpid(); - memcpy(gfid_copy, &pid, sizeof(pid)); + *(pid_t *)gfid_copy ^= pid; } child = SuperFastHash((char *)gfid_copy, sizeof(gfid_copy)) % priv->child_count; @@ -3978,11 +4014,10 @@ afr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) return 0; } - if (__is_root_gfid(loc->parent->gfid)) { - if (!strcmp(loc->name, GF_REPLICATE_TRASH_DIR)) { - op_errno = EPERM; - goto out; - } + if (afr_is_private_directory(this->private, loc->parent->gfid, loc->name, + frame->root->pid)) { + op_errno = EPERM; + goto out; } local = AFR_FRAME_INIT(frame, op_errno); @@ -5660,6 +5695,7 @@ afr_priv_dump(xlator_t *this) priv->background_self_heal_count); gf_proc_dump_write("healers", "%d", priv->healers); gf_proc_dump_write("read-hash-mode", "%d", priv->hash_mode); + gf_proc_dump_write("use-anonymous-inode", "%d", priv->use_anon_inode); if (priv->quorum_count == AFR_QUORUM_AUTO) { gf_proc_dump_write("quorum-type", "auto"); } else if (priv->quorum_count == 0) { @@ -6653,6 +6689,7 @@ afr_priv_destroy(afr_private_t *priv) GF_FREE(priv->local); GF_FREE(priv->pending_key); GF_FREE(priv->children); + GF_FREE(priv->anon_inode); GF_FREE(priv->child_up); GF_FREE(priv->halo_child_up); GF_FREE(priv->child_latency); diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c index f69013f3e0a..f8bf8340dab 100644 --- a/xlators/cluster/afr/src/afr-dir-read.c +++ b/xlators/cluster/afr/src/afr-dir-read.c @@ -164,8 +164,8 @@ afr_validate_read_subvol(inode_t *inode, xlator_t *this, int par_read_subvol) } static void -afr_readdir_transform_entries(gf_dirent_t *subvol_entries, int subvol, - gf_dirent_t *entries, fd_t *fd) +afr_readdir_transform_entries(call_frame_t *frame, gf_dirent_t *subvol_entries, + int subvol, gf_dirent_t *entries, fd_t *fd) { int ret = -1; gf_dirent_t *entry = NULL; @@ -183,8 +183,8 @@ afr_readdir_transform_entries(gf_dirent_t *subvol_entries, int subvol, list_for_each_entry_safe(entry, tmp, &subvol_entries->list, list) { - if (__is_root_gfid(fd->inode->gfid) && - !strcmp(entry->d_name, GF_REPLICATE_TRASH_DIR)) { + if (afr_is_private_directory(priv, fd->inode->gfid, entry->d_name, + frame->root->pid)) { continue; } @@ -228,8 +228,8 @@ afr_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, } if (op_ret >= 0) - afr_readdir_transform_entries(subvol_entries, (long)cookie, &entries, - local->fd); + afr_readdir_transform_entries(frame, subvol_entries, (long)cookie, + &entries, local->fd); AFR_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, xdata); diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index f35c41df274..a580a1584cc 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -2750,3 +2750,185 @@ afr_choose_source_by_policy(afr_private_t *priv, unsigned char *sources, out: return source; } + +static int +afr_anon_inode_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + afr_local_t *local = frame->local; + int i = (long)cookie; + + local->replies[i].valid = 1; + local->replies[i].op_ret = op_ret; + local->replies[i].op_errno = op_errno; + if (op_ret == 0) { + local->op_ret = 0; + local->replies[i].poststat = *buf; + local->replies[i].preparent = *preparent; + local->replies[i].postparent = *postparent; + } + if (xdata) { + local->replies[i].xdata = dict_ref(xdata); + } + + syncbarrier_wake(&local->barrier); + return 0; +} + +int +afr_anon_inode_create(xlator_t *this, int child, inode_t **linked_inode) +{ + call_frame_t *frame = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = this->private; + unsigned char *mkdir_on = alloca0(priv->child_count); + unsigned char *lookup_on = alloca0(priv->child_count); + loc_t loc = {0}; + int32_t op_errno = 0; + int32_t child_op_errno = 0; + struct iatt iatt = {0}; + dict_t *xdata = NULL; + uuid_t anon_inode_gfid = {0}; + int mkdir_count = 0; + int i = 0; + + /*Try to mkdir everywhere and return success if the dir exists on 'child' + */ + + if (!priv->use_anon_inode) { + op_errno = EINVAL; + goto out; + } + + frame = afr_frame_create(this, &op_errno); + if (op_errno) { + goto out; + } + local = frame->local; + if (!local->child_up[child]) { + /*Other bricks may need mkdir so don't error out yet*/ + child_op_errno = ENOTCONN; + } + gf_uuid_parse(priv->anon_gfid_str, anon_inode_gfid); + for (i = 0; i < priv->child_count; i++) { + if (!local->child_up[i]) + continue; + + if (priv->anon_inode[i]) { + mkdir_on[i] = 0; + } else { + mkdir_on[i] = 1; + mkdir_count++; + } + } + + if (mkdir_count == 0) { + *linked_inode = inode_find(this->itable, anon_inode_gfid); + if (*linked_inode) { + op_errno = 0; + goto out; + } + } + + loc.parent = inode_ref(this->itable->root); + loc.name = priv->anon_inode_name; + loc.inode = inode_new(this->itable); + if (!loc.inode) { + op_errno = ENOMEM; + goto out; + } + + xdata = dict_new(); + if (!xdata) { + op_errno = ENOMEM; + goto out; + } + + op_errno = -dict_set_gfuuid(xdata, "gfid-req", anon_inode_gfid, _gf_true); + if (op_errno) { + goto out; + } + + if (mkdir_count == 0) { + memcpy(lookup_on, local->child_up, priv->child_count); + goto lookup; + } + + AFR_ONLIST(mkdir_on, frame, afr_anon_inode_mkdir_cbk, mkdir, &loc, 0755, 0, + xdata); + + for (i = 0; i < priv->child_count; i++) { + if (!mkdir_on[i]) { + continue; + } + + if (local->replies[i].op_ret == 0) { + priv->anon_inode[i] = 1; + iatt = local->replies[i].poststat; + } else if (local->replies[i].op_ret < 0 && + local->replies[i].op_errno == EEXIST) { + lookup_on[i] = 1; + } else if (i == child) { + child_op_errno = local->replies[i].op_errno; + } + } + + if (AFR_COUNT(lookup_on, priv->child_count) == 0) { + goto link; + } + +lookup: + AFR_ONLIST(lookup_on, frame, afr_selfheal_discover_cbk, lookup, &loc, + xdata); + for (i = 0; i < priv->child_count; i++) { + if (!lookup_on[i]) { + continue; + } + + if (local->replies[i].op_ret == 0) { + if (gf_uuid_compare(anon_inode_gfid, + local->replies[i].poststat.ia_gfid) == 0) { + priv->anon_inode[i] = 1; + iatt = local->replies[i].poststat; + } else { + if (i == child) + child_op_errno = EINVAL; + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_DATA, + "%s has gfid: %s", priv->anon_inode_name, + uuid_utoa(local->replies[i].poststat.ia_gfid)); + } + } else if (i == child) { + child_op_errno = local->replies[i].op_errno; + } + } +link: + if (!gf_uuid_is_null(iatt.ia_gfid)) { + *linked_inode = inode_link(loc.inode, loc.parent, loc.name, &iatt); + if (*linked_inode) { + op_errno = 0; + inode_lookup(*linked_inode); + } else { + op_errno = ENOMEM; + } + goto out; + } + +out: + if (xdata) + dict_unref(xdata); + loc_wipe(&loc); + /*child_op_errno takes precedence*/ + if (child_op_errno == 0) { + child_op_errno = op_errno; + } + + if (child_op_errno && *linked_inode) { + inode_unref(*linked_inode); + *linked_inode = NULL; + } + if (frame) + AFR_STACK_DESTROY(frame); + return -child_op_errno; +} diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index ac31751997f..64893f441e3 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -16,54 +16,170 @@ #include <glusterfs/syncop-utils.h> #include <glusterfs/events.h> -static int -afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name, - inode_t *inode, int child, struct afr_reply *replies) +int +afr_selfheal_entry_anon_inode(xlator_t *this, inode_t *dir, const char *name, + inode_t *inode, int child, + struct afr_reply *replies, + gf_boolean_t *anon_inode) { afr_private_t *priv = NULL; + afr_local_t *local = NULL; xlator_t *subvol = NULL; int ret = 0; + int i = 0; + char g[64] = {0}; + unsigned char *lookup_success = NULL; + call_frame_t *frame = NULL; + loc_t loc2 = { + 0, + }; loc_t loc = { 0, }; - char g[64]; priv = this->private; - subvol = priv->children[child]; + lookup_success = alloca0(priv->child_count); + uuid_utoa_r(replies[child].poststat.ia_gfid, g); + loc.inode = inode_new(inode->table); + if (!loc.inode) { + ret = -ENOMEM; + goto out; + } + + if (replies[child].poststat.ia_type == IA_IFDIR) { + /* This directory may have sub-directory hierarchy which may need to + * be preserved for subsequent heals. So unconditionally move the + * directory to anonymous-inode directory*/ + *anon_inode = _gf_true; + goto anon_inode; + } + + frame = afr_frame_create(this, &ret); + if (!frame) { + ret = -ret; + goto out; + } + local = frame->local; + gf_uuid_copy(loc.gfid, replies[child].poststat.ia_gfid); + AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, &loc, + NULL); + for (i = 0; i < priv->child_count; i++) { + if (local->replies[i].op_ret == 0) { + lookup_success[i] = 1; + } else if (local->replies[i].op_errno != ENOENT && + local->replies[i].op_errno != ESTALE) { + ret = -local->replies[i].op_errno; + } + } + + if (priv->quorum_count) { + if (afr_has_quorum(lookup_success, this, NULL)) { + *anon_inode = _gf_true; + } + } else if (AFR_COUNT(lookup_success, priv->child_count) > 1) { + *anon_inode = _gf_true; + } else if (ret) { + goto out; + } + +anon_inode: + if (!*anon_inode) { + ret = 0; + goto out; + } loc.parent = inode_ref(dir); gf_uuid_copy(loc.pargfid, dir->gfid); loc.name = name; - loc.inode = inode_ref(inode); - if (replies[child].valid && replies[child].op_ret == 0) { - switch (replies[child].poststat.ia_type) { - case IA_IFDIR: - gf_msg(this->name, GF_LOG_WARNING, 0, - AFR_MSG_EXPUNGING_FILE_OR_DIR, - "expunging dir %s/%s (%s) on %s", uuid_utoa(dir->gfid), - name, uuid_utoa_r(replies[child].poststat.ia_gfid, g), - subvol->name); - ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL); - break; - default: - gf_msg(this->name, GF_LOG_WARNING, 0, - AFR_MSG_EXPUNGING_FILE_OR_DIR, - "expunging file %s/%s (%s) on %s", uuid_utoa(dir->gfid), - name, uuid_utoa_r(replies[child].poststat.ia_gfid, g), - subvol->name); - ret = syncop_unlink(subvol, &loc, NULL, NULL); - break; - } + ret = afr_anon_inode_create(this, child, &loc2.parent); + if (ret < 0) + goto out; + + loc2.name = g; + ret = syncop_rename(subvol, &loc, &loc2, NULL, NULL); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_EXPUNGING_FILE_OR_DIR, + "Rename to %s dir %s/%s (%s) on %s failed", + priv->anon_inode_name, uuid_utoa(dir->gfid), name, g, + subvol->name); + } else { + gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR, + "Rename to %s dir %s/%s (%s) on %s successful", + priv->anon_inode_name, uuid_utoa(dir->gfid), name, g, + subvol->name); } +out: loc_wipe(&loc); + loc_wipe(&loc2); + if (frame) { + AFR_STACK_DESTROY(frame); + } return ret; } int +afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name, + inode_t *inode, int child, struct afr_reply *replies) +{ + char g[64] = {0}; + afr_private_t *priv = NULL; + xlator_t *subvol = NULL; + int ret = 0; + loc_t loc = { + 0, + }; + gf_boolean_t anon_inode = _gf_false; + + priv = this->private; + subvol = priv->children[child]; + + if ((!replies[child].valid) || (replies[child].op_ret < 0)) { + /*Nothing to do*/ + ret = 0; + goto out; + } + + if (priv->use_anon_inode) { + ret = afr_selfheal_entry_anon_inode(this, dir, name, inode, child, + replies, &anon_inode); + if (ret < 0 || anon_inode) + goto out; + } + + loc.parent = inode_ref(dir); + loc.inode = inode_new(inode->table); + if (!loc.inode) { + ret = -ENOMEM; + goto out; + } + loc.name = name; + switch (replies[child].poststat.ia_type) { + case IA_IFDIR: + gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR, + "expunging dir %s/%s (%s) on %s", uuid_utoa(dir->gfid), name, + uuid_utoa_r(replies[child].poststat.ia_gfid, g), + subvol->name); + ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL); + break; + default: + gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR, + "expunging file %s/%s (%s) on %s", uuid_utoa(dir->gfid), + name, uuid_utoa_r(replies[child].poststat.ia_gfid, g), + subvol->name); + ret = syncop_unlink(subvol, &loc, NULL, NULL); + break; + } + +out: + loc_wipe(&loc); + return ret; +} + +int afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source, unsigned char *sources, inode_t *dir, const char *name, inode_t *inode, @@ -76,6 +192,9 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source, loc_t srcloc = { 0, }; + loc_t anonloc = { + 0, + }; xlator_t *this = frame->this; afr_private_t *priv = NULL; dict_t *xdata = NULL; @@ -86,15 +205,17 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source, 0, }; unsigned char *newentry = NULL; - char dir_uuid_str[64] = {0}, iatt_uuid_str[64] = {0}; + char iatt_uuid_str[64] = {0}; + char dir_uuid_str[64] = {0}; priv = this->private; iatt = &replies[source].poststat; + uuid_utoa_r(iatt->ia_gfid, iatt_uuid_str); if (iatt->ia_type == IA_INVAL || gf_uuid_is_null(iatt->ia_gfid)) { gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SELF_HEAL_FAILED, "Invalid ia_type (%d) or gfid(%s). source brick=%d, " "pargfid=%s, name=%s", - iatt->ia_type, uuid_utoa_r(iatt->ia_gfid, iatt_uuid_str), source, + iatt->ia_type, iatt_uuid_str, source, uuid_utoa_r(dir->gfid, dir_uuid_str), name); ret = -EINVAL; goto out; @@ -120,14 +241,24 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source, srcloc.inode = inode_ref(inode); gf_uuid_copy(srcloc.gfid, iatt->ia_gfid); - if (iatt->ia_type != IA_IFDIR) - ret = syncop_lookup(priv->children[dst], &srcloc, 0, 0, 0, 0); - if (iatt->ia_type == IA_IFDIR || ret == -ENOENT || ret == -ESTALE) { + ret = syncop_lookup(priv->children[dst], &srcloc, 0, 0, 0, 0); + if (ret == -ENOENT || ret == -ESTALE) { newentry[dst] = 1; ret = afr_selfheal_newentry_mark(frame, this, inode, source, replies, sources, newentry); if (ret) goto out; + } else if (ret == 0 && iatt->ia_type == IA_IFDIR && priv->use_anon_inode) { + // Try rename from hidden directory + ret = afr_anon_inode_create(this, dst, &anonloc.parent); + if (ret < 0) + goto out; + anonloc.inode = inode_ref(inode); + anonloc.name = iatt_uuid_str; + ret = syncop_rename(priv->children[dst], &anonloc, &loc, NULL, NULL); + if (ret == -ENOENT || ret == -ESTALE) + ret = -1; /*This sets 'mismatch' to true*/ + goto out; } mode = st_mode_from_ia(iatt->ia_prot, iatt->ia_type); @@ -166,6 +297,7 @@ out: GF_FREE(linkname); loc_wipe(&loc); loc_wipe(&srcloc); + loc_wipe(&anonloc); return ret; } @@ -578,6 +710,11 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd, priv = this->private; + if (afr_is_private_directory(priv, fd->inode->gfid, name, + GF_CLIENT_PID_SELF_HEALD)) { + return 0; + } + xattr = dict_new(); if (!xattr) return -ENOMEM; @@ -626,7 +763,7 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd, replies); if ((ret == 0) && (priv->esh_granular) && parent_idx_inode) { - ret = afr_shd_index_purge(subvol, parent_idx_inode, name, + ret = afr_shd_entry_purge(subvol, parent_idx_inode, name, inode->ia_type); /* Why is ret force-set to 0? We do not care about * index purge failing for full heal as it is quite @@ -756,10 +893,6 @@ afr_selfheal_entry_do_subvol(call_frame_t *frame, xlator_t *this, fd_t *fd, if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) continue; - if (__is_root_gfid(fd->inode->gfid) && - !strcmp(entry->d_name, GF_REPLICATE_TRASH_DIR)) - continue; - ret = afr_selfheal_entry_dirent(iter_frame, this, fd, entry->d_name, loc.inode, subvol, local->need_full_crawl); @@ -822,7 +955,7 @@ afr_selfheal_entry_granular_dirent(xlator_t *subvol, gf_dirent_t *entry, /* The name indices under the pgfid index dir are guaranteed * to be regular files. Hence the hardcoding. */ - afr_shd_index_purge(subvol, parent->inode, entry->d_name, IA_IFREG); + afr_shd_entry_purge(subvol, parent->inode, entry->d_name, IA_IFREG); ret = 0; goto out; } diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c index dd40c57ab12..834aac86d48 100644 --- a/xlators/cluster/afr/src/afr-self-heal-name.c +++ b/xlators/cluster/afr/src/afr-self-heal-name.c @@ -98,21 +98,12 @@ __afr_selfheal_name_expunge(xlator_t *this, inode_t *parent, uuid_t pargfid, const char *bname, inode_t *inode, struct afr_reply *replies) { - loc_t loc = { - 0, - }; int i = 0; afr_private_t *priv = NULL; - char g[64]; int ret = 0; priv = this->private; - loc.parent = inode_ref(parent); - gf_uuid_copy(loc.pargfid, pargfid); - loc.name = bname; - loc.inode = inode_ref(inode); - for (i = 0; i < priv->child_count; i++) { if (!replies[i].valid) continue; @@ -120,30 +111,10 @@ __afr_selfheal_name_expunge(xlator_t *this, inode_t *parent, uuid_t pargfid, if (replies[i].op_ret) continue; - switch (replies[i].poststat.ia_type) { - case IA_IFDIR: - gf_msg(this->name, GF_LOG_WARNING, 0, - AFR_MSG_EXPUNGING_FILE_OR_DIR, - "expunging dir %s/%s (%s) on %s", uuid_utoa(pargfid), - bname, uuid_utoa_r(replies[i].poststat.ia_gfid, g), - priv->children[i]->name); - - ret |= syncop_rmdir(priv->children[i], &loc, 1, NULL, NULL); - break; - default: - gf_msg(this->name, GF_LOG_WARNING, 0, - AFR_MSG_EXPUNGING_FILE_OR_DIR, - "expunging file %s/%s (%s) on %s", uuid_utoa(pargfid), - bname, uuid_utoa_r(replies[i].poststat.ia_gfid, g), - priv->children[i]->name); - - ret |= syncop_unlink(priv->children[i], &loc, NULL, NULL); - break; - } + ret |= afr_selfheal_entry_delete(this, parent, bname, inode, i, + replies); } - loc_wipe(&loc); - return ret; } diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h index 7a038fa7fe3..48e6dbcfb18 100644 --- a/xlators/cluster/afr/src/afr-self-heal.h +++ b/xlators/cluster/afr/src/afr-self-heal.h @@ -369,4 +369,9 @@ gf_boolean_t afr_is_file_empty_on_all_children(afr_private_t *priv, struct afr_reply *replies); +int +afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name, + inode_t *inode, int child, struct afr_reply *replies); +int +afr_anon_inode_create(xlator_t *this, int child, inode_t **linked_inode); #endif /* !_AFR_SELFHEAL_H */ diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c index f2e08908c24..109fd4b7421 100644 --- a/xlators/cluster/afr/src/afr-self-heald.c +++ b/xlators/cluster/afr/src/afr-self-heald.c @@ -222,7 +222,7 @@ out: } int -afr_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name, +afr_shd_entry_purge(xlator_t *subvol, inode_t *inode, char *name, ia_type_t type) { int ret = 0; @@ -424,7 +424,7 @@ afr_shd_index_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, ret = afr_shd_selfheal(healer, healer->subvol, gfid); if (ret == -ENOENT || ret == -ESTALE) - afr_shd_index_purge(subvol, parent->inode, entry->d_name, val); + afr_shd_entry_purge(subvol, parent->inode, entry->d_name, val); if (ret == 2) /* If bricks crashed in pre-op after creating indices/xattrop @@ -843,6 +843,176 @@ out: return need_heal; } +static int +afr_shd_anon_inode_cleaner(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, + void *data) +{ + struct subvol_healer *healer = data; + afr_private_t *priv = healer->this->private; + call_frame_t *frame = NULL; + afr_local_t *local = NULL; + int ret = 0; + loc_t loc = {0}; + int count = 0; + int i = 0; + int op_errno = 0; + struct iatt *iatt = NULL; + gf_boolean_t multiple_links = _gf_false; + unsigned char *gfid_present = alloca0(priv->child_count); + unsigned char *entry_present = alloca0(priv->child_count); + char *type = "file"; + + frame = afr_frame_create(healer->this, &ret); + if (!frame) { + ret = -ret; + goto out; + } + local = frame->local; + if (AFR_COUNT(local->child_up, priv->child_count) != priv->child_count) { + gf_msg_debug(healer->this->name, 0, + "Not all bricks are up. Skipping " + "cleanup of %s on %s", + entry->d_name, subvol->name); + ret = 0; + goto out; + } + + loc.inode = inode_new(parent->inode->table); + if (!loc.inode) { + ret = -ENOMEM; + goto out; + } + ret = gf_uuid_parse(entry->d_name, loc.gfid); + if (ret) { + ret = 0; + goto out; + } + AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, &loc, + NULL); + for (i = 0; i < priv->child_count; i++) { + if (local->replies[i].op_ret == 0) { + count++; + gfid_present[i] = 1; + iatt = &local->replies[i].poststat; + if (iatt->ia_type == IA_IFDIR) { + type = "dir"; + } + + if (i == healer->subvol) { + if (local->replies[i].poststat.ia_nlink > 1) { + multiple_links = _gf_true; + } + } + } else if (local->replies[i].op_errno != ENOENT && + local->replies[i].op_errno != ESTALE) { + /*We don't have complete view. Skip the entry*/ + gf_msg_debug(healer->this->name, local->replies[i].op_errno, + "Skipping cleanup of %s on %s", entry->d_name, + subvol->name); + ret = 0; + goto out; + } + } + + /*Inode is deleted from subvol*/ + if (count == 1 || (iatt->ia_type != IA_IFDIR && multiple_links)) { + gf_msg(healer->this->name, GF_LOG_WARNING, 0, + AFR_MSG_EXPUNGING_FILE_OR_DIR, "expunging %s %s/%s on %s", type, + priv->anon_inode_name, entry->d_name, subvol->name); + ret = afr_shd_entry_purge(subvol, parent->inode, entry->d_name, + iatt->ia_type); + if (ret == -ENOENT || ret == -ESTALE) + ret = 0; + } else if (count > 1) { + loc_wipe(&loc); + loc.parent = inode_ref(parent->inode); + loc.name = entry->d_name; + loc.inode = inode_new(parent->inode->table); + if (!loc.inode) { + ret = -ENOMEM; + goto out; + } + AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, + &loc, NULL); + count = 0; + for (i = 0; i < priv->child_count; i++) { + if (local->replies[i].op_ret == 0) { + count++; + entry_present[i] = 1; + iatt = &local->replies[i].poststat; + } else if (local->replies[i].op_errno != ENOENT && + local->replies[i].op_errno != ESTALE) { + /*We don't have complete view. Skip the entry*/ + gf_msg_debug(healer->this->name, local->replies[i].op_errno, + "Skipping cleanup of %s on %s", entry->d_name, + subvol->name); + ret = 0; + goto out; + } + } + for (i = 0; i < priv->child_count; i++) { + if (gfid_present[i] && !entry_present[i]) { + /*Entry is not anonymous on at least one subvol*/ + gf_msg_debug(healer->this->name, 0, + "Valid entry present on %s " + "Skipping cleanup of %s on %s", + priv->children[i]->name, entry->d_name, + subvol->name); + ret = 0; + goto out; + } + } + + gf_msg(healer->this->name, GF_LOG_WARNING, 0, + AFR_MSG_EXPUNGING_FILE_OR_DIR, + "expunging %s %s/%s on all subvols", type, priv->anon_inode_name, + entry->d_name); + ret = 0; + for (i = 0; i < priv->child_count; i++) { + op_errno = -afr_shd_entry_purge(priv->children[i], loc.parent, + entry->d_name, iatt->ia_type); + if (op_errno != ENOENT && op_errno != ESTALE) { + ret |= -op_errno; + } + } + } + +out: + if (frame) + AFR_STACK_DESTROY(frame); + loc_wipe(&loc); + return ret; +} + +static void +afr_cleanup_anon_inode_dir(struct subvol_healer *healer) +{ + int ret = 0; + call_frame_t *frame = NULL; + afr_private_t *priv = healer->this->private; + loc_t loc = {0}; + + ret = afr_anon_inode_create(healer->this, healer->subvol, &loc.inode); + if (ret) + goto out; + + frame = afr_frame_create(healer->this, &ret); + if (!frame) { + ret = -ret; + goto out; + } + + ret = syncop_mt_dir_scan(frame, priv->children[healer->subvol], &loc, + GF_CLIENT_PID_SELF_HEALD, healer, + afr_shd_anon_inode_cleaner, NULL, + priv->shd.max_threads, priv->shd.wait_qlength); +out: + if (frame) + AFR_STACK_DESTROY(frame); + loc_wipe(&loc); + return; +} + void * afr_shd_index_healer(void *data) { @@ -900,6 +1070,10 @@ afr_shd_index_healer(void *data) sleep(1); } while (ret > 0); + if (ret == 0) { + afr_cleanup_anon_inode_dir(healer); + } + if (ret == 0 && pre_crawl_xdata && !healer->crawl_event.heal_failed_count) { afr_shd_ta_check_and_unset_xattrs(this, &loc, healer, diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h index 687c28e6472..18db728ea7b 100644 --- a/xlators/cluster/afr/src/afr-self-heald.h +++ b/xlators/cluster/afr/src/afr-self-heald.h @@ -70,6 +70,6 @@ afr_shd_gfid_to_path(xlator_t *this, xlator_t *subvol, uuid_t gfid, char **path_p); int -afr_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name, +afr_shd_entry_purge(xlator_t *subvol, inode_t *inode, char *name, ia_type_t type); #endif /* !_AFR_SELF_HEALD_H */ diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index 67e0a4d10be..df7366f0a65 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -135,6 +135,27 @@ set_data_self_heal_algorithm(afr_private_t *priv, char *algo) } } +void +afr_handle_anon_inode_options(afr_private_t *priv, dict_t *options) +{ + char *volfile_id_str = NULL; + uuid_t anon_inode_gfid = {0}; + + /*If volume id is not present don't enable anything*/ + if (dict_get_str(options, "volume-id", &volfile_id_str)) + return; + GF_ASSERT(strlen(AFR_ANON_DIR_PREFIX) + strlen(volfile_id_str) <= NAME_MAX); + /*anon_inode_name is not supposed to change once assigned*/ + if (!priv->anon_inode_name[0]) { + snprintf(priv->anon_inode_name, sizeof(priv->anon_inode_name), "%s-%s", + AFR_ANON_DIR_PREFIX, volfile_id_str); + gf_uuid_parse(volfile_id_str, anon_inode_gfid); + /*Flip a bit to make sure volfile-id and anon-gfid are not same*/ + anon_inode_gfid[0] ^= 1; + uuid_utoa_r(anon_inode_gfid, priv->anon_gfid_str); + } +} + int reconfigure(xlator_t *this, dict_t *options) { @@ -290,6 +311,10 @@ reconfigure(xlator_t *this, dict_t *options) consistent_io = _gf_false; priv->consistent_io = consistent_io; + afr_handle_anon_inode_options(priv, options); + + GF_OPTION_RECONF("use-anonymous-inode", priv->use_anon_inode, options, bool, + out); if (priv->shd.enabled) { if ((priv->shd.enabled != enabled_old) || (timeout_old != priv->shd.timeout)) @@ -541,7 +566,9 @@ init(xlator_t *this) GF_OPTION_INIT("consistent-metadata", priv->consistent_metadata, bool, out); GF_OPTION_INIT("consistent-io", priv->consistent_io, bool, out); + afr_handle_anon_inode_options(priv, this->options); + GF_OPTION_INIT("use-anonymous-inode", priv->use_anon_inode, bool, out); if (priv->quorum_count != 0) priv->consistent_io = _gf_false; @@ -553,6 +580,9 @@ init(xlator_t *this) goto out; } + priv->anon_inode = GF_CALLOC(sizeof(unsigned char), child_count, + gf_afr_mt_char); + priv->child_up = GF_CALLOC(sizeof(unsigned char), child_count, gf_afr_mt_char); @@ -561,7 +591,8 @@ init(xlator_t *this) priv->halo_child_up = GF_CALLOC(sizeof(unsigned char), child_count, gf_afr_mt_char); - if (!priv->child_up || !priv->child_latency || !priv->halo_child_up) { + if (!priv->child_up || !priv->child_latency || !priv->halo_child_up || + !priv->anon_inode) { ret = -ENOMEM; goto out; } @@ -1286,6 +1317,14 @@ struct volume_options options[] = { .tags = {"replicate"}, .description = "This option exists only for backward compatibility " "and configuring it doesn't have any effect"}, + {.key = {"use-anonymous-inode"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "no", + .op_version = {GD_OP_VERSION_8_0}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE, + .tags = {"replicate"}, + .description = "Setting this option heals directory renames efficiently"}, + {.key = {NULL}}, }; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 2e60708accf..d62f9a9caf2 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -42,6 +42,7 @@ #define AFR_LK_HEAL_DOM "afr.lock-heal.domain" #define AFR_HALO_MAX_LATENCY 99999 +#define AFR_ANON_DIR_PREFIX ".glusterfs-anonymous-inode" #define PFLAG_PENDING (1 << 0) #define PFLAG_SBRAIN (1 << 1) @@ -190,6 +191,7 @@ typedef struct _afr_private { struct list_head ta_waitq; struct list_head ta_onwireq; + unsigned char *anon_inode; unsigned char *child_up; unsigned char *halo_child_up; int64_t *child_latency; @@ -275,10 +277,15 @@ typedef struct _afr_private { gf_boolean_t esh_granular; gf_boolean_t consistent_io; gf_boolean_t data_self_heal; /* on/off */ + gf_boolean_t use_anon_inode; /*For lock healing.*/ struct list_head saved_locks; struct list_head lk_healq; + + /*For anon-inode handling */ + char anon_inode_name[NAME_MAX + 1]; + char anon_gfid_str[UUID_SIZE + 1]; } afr_private_t; typedef enum { @@ -1409,4 +1416,8 @@ afr_dom_lock_release(call_frame_t *frame); void afr_fill_success_replies(afr_local_t *local, afr_private_t *priv, unsigned char *replies); + +gf_boolean_t +afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name, + pid_t pid); #endif /* __AFR_H__ */ diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index 42e143e3a5a..8ba0cc4c732 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -2161,31 +2161,18 @@ static int dht_fill_dict_to_avoid_unlink_of_migrating_file(dict_t *dict) { int ret = 0; - xlator_t *this = NULL; - char *linktoskip_key = NULL; - - this = THIS; - GF_VALIDATE_OR_GOTO("dht", this, err); - - if (dht_is_tier_xlator(this)) - linktoskip_key = TIER_SKIP_NON_LINKTO_UNLINK; - else - linktoskip_key = DHT_SKIP_NON_LINKTO_UNLINK; - ret = dict_set_int32(dict, linktoskip_key, 1); + ret = dict_set_int32_sizen(dict, DHT_SKIP_NON_LINKTO_UNLINK, 1); if (ret) - goto err; + return -1; - ret = dict_set_int32(dict, DHT_SKIP_OPEN_FD_UNLINK, 1); + ret = dict_set_int32_sizen(dict, DHT_SKIP_OPEN_FD_UNLINK, 1); if (ret) - goto err; + return -1; return 0; - -err: - return -1; } static int32_t @@ -4314,6 +4301,8 @@ dht_find_local_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this, index = conf->local_subvols_cnt; uuid_list_copy = gf_strdup(uuid_list); + if (!uuid_list_copy) + goto unlock; for (uuid_str = strtok_r(uuid_list, " ", &saveptr); uuid_str; uuid_str = next_uuid_str) { @@ -4604,18 +4593,8 @@ dht_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, dict_del(xattr, conf->xattr_name); dict_del(xattr, conf->mds_xattr_key); - /* filter out following two xattrs that need not - * be visible on the mount point for geo-rep - - * trusted.tier.fix.layout.complete and - * trusted.tier.tier-dht.commithash - */ - dict_del(xattr, conf->commithash_xattr_name); - if (frame->root->pid >= 0 && dht_is_tier_xlator(this)) { - dict_del(xattr, GF_XATTR_TIER_LAYOUT_FIXED_KEY); - } - if (frame->root->pid >= 0) { GF_REMOVE_INTERNAL_XATTR("trusted.glusterfs.quota*", xattr); GF_REMOVE_INTERNAL_XATTR("trusted.pgfid*", xattr); @@ -5893,22 +5872,7 @@ dht_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr, if (local->rebalance.target_node) { local->flags = forced_rebalance; - /* Flag to suggest its a tiering migration - * The reason for this dic key-value is that - * promotions and demotions are multithreaded - * so the original frame from gf_defrag_start() - * is not carried. A new frame will be created when - * we do syncop_setxattr(). This does not have the - * frame->root->pid of the original frame. So we pass - * this dic key-value when we do syncop_setxattr() to do - * data migration and set the frame->root->pid to - * GF_CLIENT_PID_TIER_DEFRAG in dht_setxattr() just before - * calling dht_start_rebalance_task() */ - tmp = dict_get(xattr, TIERING_MIGRATION_KEY); - if (tmp) - frame->root->pid = GF_CLIENT_PID_TIER_DEFRAG; - else - frame->root->pid = GF_CLIENT_PID_DEFRAG; + frame->root->pid = GF_CLIENT_PID_DEFRAG; ret = dht_start_rebalance_task(this, frame); if (!ret) @@ -6720,10 +6684,9 @@ dht_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, layout = local->layout; - /* We have seen crashes in while running "rm -rf" on tier volumes - when the layout was NULL on the hot tier. This will skip the - entries on the subvol without a layout, hence preventing the crash - but rmdir might fail with "directory not empty" errors*/ + /* This will skip the entries on the subvol without a layout, + * hence preventing the crash but rmdir might fail with + * "directory not empty" errors*/ if (layout == NULL) goto done; @@ -10855,15 +10818,12 @@ dht_notify(xlator_t *this, int event, void *data, ...) gf_defrag_type cmd = 0; dict_t *output = NULL; va_list ap; - dht_methods_t *methods = NULL; struct gf_upcall *up_data = NULL; struct gf_upcall_cache_invalidation *up_ci = NULL; conf = this->private; GF_VALIDATE_OR_GOTO(this->name, conf, out); - methods = &(conf->methods); - /* had all subvolumes reported status once till now? */ had_heard_from_all = 1; for (i = 0; i < conf->subvolume_cnt; i++) { @@ -11086,15 +11046,13 @@ dht_notify(xlator_t *this, int event, void *data, ...) * thread has already started. */ if (conf->defrag && !run_defrag) { - if (methods->migration_needed(this)) { - run_defrag = 1; - ret = gf_thread_create(&conf->defrag->th, NULL, gf_defrag_start, - this, "dhtdg"); - if (ret) { - GF_FREE(conf->defrag); - conf->defrag = NULL; - kill(getpid(), SIGTERM); - } + run_defrag = 1; + ret = gf_thread_create(&conf->defrag->th, NULL, gf_defrag_start, + this, "dhtdg"); + if (ret) { + GF_FREE(conf->defrag); + conf->defrag = NULL; + kill(getpid(), SIGTERM); } } } @@ -11239,28 +11197,6 @@ out: return ret; } -int32_t -dht_migration_needed(xlator_t *this) -{ - gf_defrag_info_t *defrag = NULL; - dht_conf_t *conf = NULL; - int ret = 0; - - conf = this->private; - - GF_VALIDATE_OR_GOTO("dht", conf, out); - GF_VALIDATE_OR_GOTO("dht", conf->defrag, out); - - defrag = conf->defrag; - - if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) && - (defrag->cmd != GF_DEFRAG_CMD_START_DETACH_TIER)) - ret = 1; - -out: - return ret; -} - /* This function should not be called more then once during a FOP handling path. It is valid only for for ops on files @@ -11295,14 +11231,6 @@ dht_set_local_rebalance(xlator_t *this, dht_local_t *local, struct iatt *stbuf, return 0; } -gf_boolean_t -dht_is_tier_xlator(xlator_t *this) -{ - if (strcmp(this->type, "cluster/tier") == 0) - return _gf_true; - return _gf_false; -} - int32_t dht_release(xlator_t *this, fd_t *fd) { diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index adb362f7784..fe0dc3db34a 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -24,7 +24,6 @@ #define _DHT_H #define GF_XATTR_FIX_LAYOUT_KEY "distribute.fix.layout" -#define GF_XATTR_TIER_LAYOUT_FIXED_KEY "trusted.tier.fix.layout.complete" #define GF_XATTR_FILE_MIGRATE_KEY "trusted.distribute.migrate-data" #define DHT_MDS_STR "mds" #define GF_DHT_LOOKUP_UNHASHED_OFF 0 @@ -36,7 +35,6 @@ #define DHT_LAYOUT_HEAL_DOMAIN "dht.layout.heal" /* Namespace synchronization */ #define DHT_ENTRY_SYNC_DOMAIN "dht.entry.sync" -#define TIERING_MIGRATION_KEY "tiering.migration" #define DHT_LAYOUT_HASH_INVALID 1 #define MAX_REBAL_THREADS sysconf(_SC_NPROCESSORS_ONLN) @@ -242,19 +240,6 @@ typedef gf_boolean_t (*dht_need_heal_t)(call_frame_t *frame, dht_layout_t **inmem, dht_layout_t **ondisk); -typedef struct { - uint64_t blocks_used; - uint64_t pblocks_used; - uint64_t files_used; - uint64_t pfiles_used; - uint64_t unhashed_blocks_used; - uint64_t unhashed_pblocks_used; - uint64_t unhashed_files_used; - uint64_t unhashed_pfiles_used; - uint64_t unhashed_fsid; - uint64_t hashed_fsid; -} tier_statvfs_t; - struct dht_local { loc_t loc; loc_t loc2; @@ -272,7 +257,6 @@ struct dht_local { struct iatt preparent; struct iatt postparent; struct statvfs statvfs; - tier_statvfs_t tier_statvfs; fd_t *fd; inode_t *inode; dict_t *params; @@ -405,14 +389,7 @@ enum gf_defrag_type { GF_DEFRAG_CMD_STATUS = 1 + 2, GF_DEFRAG_CMD_START_LAYOUT_FIX = 1 + 3, GF_DEFRAG_CMD_START_FORCE = 1 + 4, - GF_DEFRAG_CMD_START_TIER = 1 + 5, - GF_DEFRAG_CMD_STATUS_TIER = 1 + 6, - GF_DEFRAG_CMD_START_DETACH_TIER = 1 + 7, - GF_DEFRAG_CMD_STOP_DETACH_TIER = 1 + 8, - GF_DEFRAG_CMD_PAUSE_TIER = 1 + 9, - GF_DEFRAG_CMD_RESUME_TIER = 1 + 10, GF_DEFRAG_CMD_DETACH_STATUS = 1 + 11, - GF_DEFRAG_CMD_STOP_TIER = 1 + 12, GF_DEFRAG_CMD_DETACH_START = 1 + 13, GF_DEFRAG_CMD_DETACH_COMMIT = 1 + 14, GF_DEFRAG_CMD_DETACH_COMMIT_FORCE = 1 + 15, @@ -463,75 +440,6 @@ struct dht_container { int local_subvol_index; }; -typedef enum tier_mode_ { - TIER_MODE_NONE = 0, - TIER_MODE_TEST, - TIER_MODE_WM -} tier_mode_t; - -typedef enum tier_pause_state_ { - TIER_RUNNING = 0, - TIER_REQUEST_PAUSE, - TIER_PAUSED -} tier_pause_state_t; - -/* This Structure is only used in tiering fixlayout */ -typedef struct gf_tier_fix_layout_arg { - xlator_t *this; - dict_t *fix_layout; - pthread_t thread_id; -} gf_tier_fix_layout_arg_t; - -typedef struct gf_tier_conf { - int is_tier; - int watermark_hi; - int watermark_low; - int watermark_last; - unsigned long block_size; - fsblkcnt_t blocks_total; - fsblkcnt_t blocks_used; - uint64_t max_migrate_bytes; - int max_migrate_files; - int query_limit; - tier_mode_t mode; - int percent_full; - /* These flags are only used for tier-compact */ - gf_boolean_t compact_active; - /* These 3 flags are set to true when the client changes the */ - /* compaction mode on the command line. */ - /* When they are set, the daemon will trigger compaction as */ - /* soon as possible to activate or deactivate compaction. */ - /* If in the middle of a compaction, then the switches take */ - /* effect on the next compaction, not the current one. */ - /* If the user switches it off, we want to avoid needless */ - /* compactions. */ - /* If the user switches it on, they want to compact as soon */ - /* as possible. */ - gf_boolean_t compact_mode_switched; - gf_boolean_t compact_mode_switched_hot; - gf_boolean_t compact_mode_switched_cold; - int tier_max_promote_size; - int tier_promote_frequency; - int tier_demote_frequency; - int tier_compact_hot_frequency; - int tier_compact_cold_frequency; - uint64_t st_last_promoted_size; - uint64_t st_last_demoted_size; - struct synctask *pause_synctask; - gf_timer_t *pause_timer; - pthread_mutex_t pause_mutex; - int promote_in_progress; - int demote_in_progress; - /* This Structure is only used in tiering fixlayout */ - gf_tier_fix_layout_arg_t tier_fix_layout_arg; - /* Indicates the index of the first queryfile picked - * in the last cycle of promote or demote */ - int32_t last_promote_qfile_index; - int32_t last_demote_qfile_index; - tier_pause_state_t pause_state; - char volname[GD_VOLUME_NAME_MAX + 1]; -} gf_tier_conf_t; - typedef struct nodeuuid_info { char info; /* Set to 1 is this is my node's uuid*/ uuid_t uuid; /* Store the nodeuuid as well for debugging*/ @@ -563,13 +471,6 @@ struct gf_defrag_info_ { uint32_t new_commit_hash; gf_defrag_status_t defrag_status; gf_defrag_pattern_list_t *defrag_pattern; - gf_tier_conf_t tier_conf; - - /*Data Tiering params for scanner*/ - uint64_t total_files_promoted; - uint64_t total_files_demoted; - int write_freq_threshold; - int read_freq_threshold; pthread_cond_t parallel_migration_cond; pthread_mutex_t dfq_mutex; @@ -605,7 +506,6 @@ typedef struct gf_defrag_info_ gf_defrag_info_t; struct dht_methods_s { int32_t (*migration_get_dst_subvol)(xlator_t *this, dht_local_t *local); int32_t (*migration_other)(xlator_t *this, gf_defrag_info_t *defrag); - int32_t (*migration_needed)(xlator_t *this); xlator_t *(*layout_search)(xlator_t *this, dht_layout_t *layout, const char *name); }; @@ -1316,9 +1216,6 @@ dht_layout_missing_dirs(dht_layout_t *layout); int dht_refresh_layout(call_frame_t *frame); -gf_boolean_t -dht_is_tier_xlator(xlator_t *this); - int dht_build_parent_loc(xlator_t *this, loc_t *parent, loc_t *child, int32_t *op_errno); diff --git a/xlators/cluster/dht/src/dht-inode-write.c b/xlators/cluster/dht/src/dht-inode-write.c index eda2491e0ff..2f23ce90fbd 100644 --- a/xlators/cluster/dht/src/dht-inode-write.c +++ b/xlators/cluster/dht/src/dht-inode-write.c @@ -93,30 +93,28 @@ dht_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, /* Check if the rebalance phase1 is true */ if (IS_DHT_MIGRATION_PHASE1(postbuf)) { - if (!dht_is_tier_xlator(this)) { + if (!local->xattr_req) { + local->xattr_req = dict_new(); if (!local->xattr_req) { - local->xattr_req = dict_new(); - if (!local->xattr_req) { - gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, ENOMEM, - "insufficient memory"); - local->op_errno = ENOMEM; - local->op_ret = -1; - goto out; - } - } - - ret = dict_set_uint32(local->xattr_req, - GF_PROTECT_FROM_EXTERNAL_WRITES, 1); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_DICT_SET_FAILED, 0, - "Failed to set key %s in dictionary", - GF_PROTECT_FROM_EXTERNAL_WRITES); + gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, ENOMEM, + "insufficient memory"); local->op_errno = ENOMEM; local->op_ret = -1; goto out; } } + ret = dict_set_uint32(local->xattr_req, GF_PROTECT_FROM_EXTERNAL_WRITES, + 1); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_DICT_SET_FAILED, 0, + "Failed to set key %s in dictionary", + GF_PROTECT_FROM_EXTERNAL_WRITES); + local->op_errno = ENOMEM; + local->op_ret = -1; + goto out; + } + dht_iatt_merge(this, &local->stbuf, postbuf); dht_iatt_merge(this, &local->prebuf, prebuf); diff --git a/xlators/cluster/dht/src/dht-mem-types.h b/xlators/cluster/dht/src/dht-mem-types.h index 33f9832395b..e3c4471334a 100644 --- a/xlators/cluster/dht/src/dht-mem-types.h +++ b/xlators/cluster/dht/src/dht-mem-types.h @@ -30,10 +30,7 @@ enum gf_dht_mem_types_ { gf_dht_mt_container_t, gf_dht_mt_octx_t, gf_dht_mt_miginfo_t, - gf_tier_mt_bricklist_t, - gf_tier_mt_ipc_ctr_params_t, gf_dht_mt_fd_ctx_t, - gf_tier_mt_qfile_array_t, gf_dht_ret_cache_t, gf_dht_nodeuuids_t, gf_dht_mt_end diff --git a/xlators/cluster/dht/src/dht-messages.h b/xlators/cluster/dht/src/dht-messages.h index 026879e14af..601f8dad78b 100644 --- a/xlators/cluster/dht/src/dht-messages.h +++ b/xlators/cluster/dht/src/dht-messages.h @@ -38,12 +38,11 @@ GLFS_MSGID( DHT_MSG_REBALANCE_STATUS, DHT_MSG_REBALANCE_STOPPED, DHT_MSG_RENAME_FAILED, DHT_MSG_SETATTR_FAILED, DHT_MSG_SUBVOL_INSUFF_INODES, DHT_MSG_SUBVOL_INSUFF_SPACE, DHT_MSG_UNLINK_FAILED, - DHT_MSG_LAYOUT_SET_FAILED, DHT_MSG_LOG_FIXED_LAYOUT, DHT_MSG_LOG_TIER_ERROR, - DHT_MSG_LOG_TIER_STATUS, DHT_MSG_GET_XATTR_FAILED, - DHT_MSG_FILE_LOOKUP_FAILED, DHT_MSG_OPEN_FD_FAILED, - DHT_MSG_SET_INODE_CTX_FAILED, DHT_MSG_UNLOCKING_FAILED, - DHT_MSG_DISK_LAYOUT_NULL, DHT_MSG_SUBVOL_INFO, DHT_MSG_CHUNK_SIZE_INFO, - DHT_MSG_LAYOUT_FORM_FAILED, DHT_MSG_SUBVOL_ERROR, + DHT_MSG_LAYOUT_SET_FAILED, DHT_MSG_LOG_FIXED_LAYOUT, + DHT_MSG_GET_XATTR_FAILED, DHT_MSG_FILE_LOOKUP_FAILED, + DHT_MSG_OPEN_FD_FAILED, DHT_MSG_SET_INODE_CTX_FAILED, + DHT_MSG_UNLOCKING_FAILED, DHT_MSG_DISK_LAYOUT_NULL, DHT_MSG_SUBVOL_INFO, + DHT_MSG_CHUNK_SIZE_INFO, DHT_MSG_LAYOUT_FORM_FAILED, DHT_MSG_SUBVOL_ERROR, DHT_MSG_LAYOUT_SORT_FAILED, DHT_MSG_REGEX_INFO, DHT_MSG_FOPEN_FAILED, DHT_MSG_SET_HOSTNAME_FAILED, DHT_MSG_BRICK_ERROR, DHT_MSG_SYNCOP_FAILED, DHT_MSG_MIGRATE_INFO, DHT_MSG_SOCKET_ERROR, DHT_MSG_CREATE_FD_FAILED, @@ -69,8 +68,7 @@ GLFS_MSGID( DHT_MSG_INIT_LOCAL_SUBVOL_FAILED, DHT_MSG_SYS_CALL_GET_TIME_FAILED, DHT_MSG_NO_DISK_USAGE_STATUS, DHT_MSG_SUBVOL_DOWN_ERROR, DHT_MSG_REBAL_THROTTLE_INFO, DHT_MSG_COMMIT_HASH_INFO, - DHT_MSG_REBAL_STRUCT_SET, DHT_MSG_HAS_MIGINFO, DHT_MSG_LOG_IPC_TIER_ERROR, - DHT_MSG_TIER_PAUSED, DHT_MSG_TIER_RESUME, DHT_MSG_SETTLE_HASH_FAILED, + DHT_MSG_REBAL_STRUCT_SET, DHT_MSG_HAS_MIGINFO, DHT_MSG_SETTLE_HASH_FAILED, DHT_MSG_DEFRAG_PROCESS_DIR_FAILED, DHT_MSG_FD_CTX_SET_FAILED, DHT_MSG_STALE_LOOKUP, DHT_MSG_PARENT_LAYOUT_CHANGED, DHT_MSG_LOCK_MIGRATION_FAILED, DHT_MSG_LOCK_INODE_UNREF_FAILED, @@ -96,15 +94,13 @@ GLFS_MSGID( DHT_MSG_UNLOCK_FILE_FAILED, DHT_MSG_REMOVE_XATTR_FAILED, DHT_MSG_DATA_MIGRATE_ABORT, DHT_MSG_DEFRAG_NULL, DHT_MSG_PARENT_NULL, DHT_MSG_GFID_NOT_PRESENT, DHT_MSG_CHILD_LOC_FAILED, - DHT_MSG_SET_LOOKUP_FAILED, DHT_MSG_DIR_REMOVED, - DHT_MSG_TIER_FIX_LAYOUT_STARTED, DHT_MSG_FIX_NOT_COMP, - DHT_MSG_REMOVE_TIER_FAILED, DHT_MSG_SUBVOL_DETER_FAILED, - DHT_MSG_LOCAL_SUBVOL, DHT_MSG_NODE_UUID, DHT_MSG_SIZE_FILE, - DHT_MSG_GET_DATA_SIZE_FAILED, DHT_MSG_PTHREAD_JOIN_FAILED, - DHT_MSG_COUNTER_THREAD_CREATE_FAILED, DHT_MSG_MIGRATION_INIT_QUEUE_FAILED, - DHT_MSG_PAUSED_TIMEOUT, DHT_MSG_WOKE, DHT_MSG_ABORT_REBALANCE, - DHT_MSG_CREATE_TASK_REBAL_FAILED, DHT_MSG_REBAL_ESTIMATE_NOT_AVAIL, - DHT_MSG_MIG_TIER_PAUSED, DHT_MSG_ADD_CHOICES_ERROR, + DHT_MSG_SET_LOOKUP_FAILED, DHT_MSG_DIR_REMOVED, DHT_MSG_FIX_NOT_COMP, + DHT_MSG_SUBVOL_DETER_FAILED, DHT_MSG_LOCAL_SUBVOL, DHT_MSG_NODE_UUID, + DHT_MSG_SIZE_FILE, DHT_MSG_GET_DATA_SIZE_FAILED, + DHT_MSG_PTHREAD_JOIN_FAILED, DHT_MSG_COUNTER_THREAD_CREATE_FAILED, + DHT_MSG_MIGRATION_INIT_QUEUE_FAILED, DHT_MSG_PAUSED_TIMEOUT, DHT_MSG_WOKE, + DHT_MSG_ABORT_REBALANCE, DHT_MSG_CREATE_TASK_REBAL_FAILED, + DHT_MSG_REBAL_ESTIMATE_NOT_AVAIL, DHT_MSG_ADD_CHOICES_ERROR, DHT_MSG_GET_CHOICES_ERROR, DHT_MSG_PREPARE_STATUS_ERROR, DHT_MSG_SET_CHOICE_FAILED, DHT_MSG_SET_HASHED_SUBVOL_FAILED, DHT_MSG_XATTR_HEAL_NOT_POSS, DHT_MSG_LINKTO_FILE_FAILED, @@ -180,7 +176,6 @@ GLFS_MSGID( "adding bricks" #define DHT_MSG_NEW_TARGET_FOUND_STR "New target found for file" #define DHT_MSG_INSUFF_MEMORY_STR "insufficient memory" -#define DHT_MSG_MIG_TIER_PAUSED_STR "Migrate file paused" #define DHT_MSG_SET_XATTR_FAILED_STR "failed to set xattr" #define DHT_MSG_SET_MODE_FAILED_STR "failed to set mode" #define DHT_MSG_FILE_EXISTS_IN_DEST_STR "file exists in destination" @@ -222,17 +217,14 @@ GLFS_MSGID( #define DHT_MSG_GFID_NOT_PRESENT_STR "gfid not present" #define DHT_MSG_CHILD_LOC_FAILED_STR "Child loc build failed" #define DHT_MSG_SET_LOOKUP_FAILED_STR "Failed to set lookup" -#define DHT_MSG_LOG_TIER_STATUS_STR "lookup to cold tier on attach heal failed" #define DHT_MSG_DIR_LOOKUP_FAILED_STR "lookup failed" #define DHT_MSG_DIR_REMOVED_STR "Dir renamed or removed. Skipping" #define DHT_MSG_READDIR_ERROR_STR "readdir failed, Aborting fix-layout" #define DHT_MSG_SETTLE_HASH_FAILED_STR "Settle hash failed" #define DHT_MSG_DEFRAG_PROCESS_DIR_FAILED_STR "gf_defrag_process_dir failed" -#define DHT_MSG_TIER_FIX_LAYOUT_STARTED_STR "Tiering fix layout started" #define DHT_MSG_FIX_NOT_COMP_STR \ "Unable to retrieve fixlayout xattr. Assume background fix layout not " \ "complete" -#define DHT_MSG_REMOVE_TIER_FAILED_STR "Failed removing tier fix layout xattr" #define DHT_MSG_SUBVOL_DETER_FAILED_STR \ "local subvolume determination failed with error" #define DHT_MSG_LOCAL_SUBVOL_STR "local subvol" @@ -248,8 +240,6 @@ GLFS_MSGID( #define DHT_MSG_MIGRATION_INIT_QUEUE_FAILED_STR \ "Failed to initialise migration queue" #define DHT_MSG_REBALANCE_STOPPED_STR "Received stop command on rebalance" -#define DHT_MSG_TIER_RESUME_STR "Pause end. Resume tiering" -#define DHT_MSG_TIER_PAUSED_STR "Pause tiering" #define DHT_MSG_PAUSED_TIMEOUT_STR "Request pause timer timeout" #define DHT_MSG_WOKE_STR "woken" #define DHT_MSG_ABORT_REBALANCE_STR "Aborting rebalance" diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index e901947d0ff..8ba8082bd86 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -609,26 +609,23 @@ __dht_rebalance_create_dst_file(xlator_t *this, xlator_t *to, xlator_t *from, goto out; } - if (!!dht_is_tier_xlator(this)) { - xdata = dict_new(); - if (!xdata) { - *fop_errno = ENOMEM; - ret = -1; - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, - DHT_MSG_MIGRATE_FILE_FAILED, "%s: dict_new failed)", - loc->path); - goto out; - } + xdata = dict_new(); + if (!xdata) { + *fop_errno = ENOMEM; + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MIGRATE_FILE_FAILED, + "%s: dict_new failed)", loc->path); + goto out; + } - ret = dict_set_int32(xdata, GF_CLEAN_WRITE_PROTECTION, 1); - if (ret) { - *fop_errno = ENOMEM; - ret = -1; - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, - "%s: failed to set dictionary value: key = %s ", loc->path, - GF_CLEAN_WRITE_PROTECTION); - goto out; - } + ret = dict_set_int32_sizen(xdata, GF_CLEAN_WRITE_PROTECTION, 1); + if (ret) { + *fop_errno = ENOMEM; + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, + "%s: failed to set dictionary value: key = %s ", loc->path, + GF_CLEAN_WRITE_PROTECTION); + goto out; } ret = syncop_lookup(to, loc, &new_stbuf, NULL, xdata, NULL); @@ -1096,7 +1093,7 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag, break; } - if (!conf->force_migration && !dht_is_tier_xlator(this)) { + if (!conf->force_migration) { if (!xdata) { xdata = dict_new(); if (!xdata) { @@ -1536,21 +1533,6 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, goto out; } - /* If defrag is NULL, it should be assumed that migration is triggered - * from client using the trusted.distribute.migrate-data virtual xattr - */ - defrag = conf->defrag; - - /* migration of files from clients is restricted to non-tiered clients - * for now */ - if (!defrag && dht_is_tier_xlator(this)) { - ret = ENOTSUP; - goto out; - } - - if (defrag && defrag->tier_conf.is_tier) - log_level = GF_LOG_TRACE; - gf_log(this->name, log_level, "%s: attempting to move from %s to %s", loc->path, from->name, to->name); @@ -2300,14 +2282,12 @@ out: } } - if (!dht_is_tier_xlator(this)) { - lk_ret = syncop_removexattr(to, loc, GF_PROTECT_FROM_EXTERNAL_WRITES, - NULL, NULL); - if (lk_ret && (lk_ret != -ENODATA) && (lk_ret != -ENOATTR)) { - gf_msg(this->name, GF_LOG_WARNING, -lk_ret, 0, - "%s: removexattr failed key %s", loc->path, - GF_PROTECT_FROM_EXTERNAL_WRITES); - } + lk_ret = syncop_removexattr(to, loc, GF_PROTECT_FROM_EXTERNAL_WRITES, NULL, + NULL); + if (lk_ret && (lk_ret != -ENODATA) && (lk_ret != -ENOATTR)) { + gf_msg(this->name, GF_LOG_WARNING, -lk_ret, 0, + "%s: removexattr failed key %s", loc->path, + GF_PROTECT_FROM_EXTERNAL_WRITES); } if (dict) @@ -3073,7 +3053,7 @@ int static gf_defrag_get_entry(xlator_t *this, int i, dht_conf_t *conf, gf_defrag_info_t *defrag, fd_t *fd, dict_t *migrate_data, struct dir_dfmeta *dir_dfmeta, dict_t *xattr_req, - int *should_commit_hash, int *perrno) + int *perrno) { int ret = 0; char is_linkfile = 0; @@ -3277,7 +3257,7 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, int dfc_index = 0; int throttle_up = 0; struct dir_dfmeta *dir_dfmeta = NULL; - int should_commit_hash = 1; + xlator_t *old_THIS = NULL; gf_log(this->name, GF_LOG_INFO, "migrate data called on %s", loc->path); gettimeofday(&dir_start, NULL); @@ -3290,6 +3270,9 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, goto out; } + old_THIS = THIS; + THIS = this; + dir_dfmeta = GF_CALLOC(1, sizeof(*dir_dfmeta), gf_common_mt_pointer); if (!dir_dfmeta) { gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta is NULL"); @@ -3454,7 +3437,7 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, ret = gf_defrag_get_entry(this, dfc_index, &container, loc, conf, defrag, dir_dfmeta->lfd[dfc_index], migrate_data, dir_dfmeta, xattr_req, - &should_commit_hash, perrno); + perrno); if (defrag->defrag_status == GF_DEFRAG_STATUS_STOPPED) { goto out; @@ -3505,16 +3488,12 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, loc->path, elapsed / 1e6); ret = 0; out: - + THIS = old_THIS; gf_defrag_free_dir_dfmeta(dir_dfmeta, local_subvols_cnt); if (xattr_req) dict_unref(xattr_req); - if (ret == 0 && should_commit_hash == 0) { - ret = 2; - } - /* It does not matter if it errored out - this number is * used to calculate rebalance estimated time to complete. * No locking required as dirs are processed by a single thread. @@ -3522,6 +3501,7 @@ out: defrag->num_dirs_processed++; return ret; } + int gf_defrag_settle_hash(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, dict_t *fix_layout) @@ -3536,7 +3516,6 @@ gf_defrag_settle_hash(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, * rebalance is complete. */ if (defrag->cmd == GF_DEFRAG_CMD_START_LAYOUT_FIX || - defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER || defrag->cmd == GF_DEFRAG_CMD_DETACH_START) { return 0; } @@ -3582,114 +3561,6 @@ gf_defrag_settle_hash(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, return 0; } -/* Function for doing a named lookup on file inodes during an attach tier - * So that a hardlink lookup heal i.e gfid to parent gfid lookup heal - * happens on pre-existing data. This is required so that the ctr database has - * hardlinks of all the exisitng file in the volume. CTR xlator on the - * brick/server side does db update/insert of the hardlink on a namelookup. - * Currently the namedlookup is done synchronous to the fixlayout that is - * triggered by attach tier. This is not performant, adding more time to - * fixlayout. The performant approach is record the hardlinks on a compressed - * datastore and then do the namelookup asynchronously later, giving the ctr db - * eventual consistency - * */ -int -gf_fix_layout_tier_attach_lookup(xlator_t *this, loc_t *parent_loc, - gf_dirent_t *file_dentry) -{ - int ret = -1; - dict_t *lookup_xdata = NULL; - dht_conf_t *conf = NULL; - loc_t file_loc = { - 0, - }; - struct iatt iatt = { - 0, - }; - - GF_VALIDATE_OR_GOTO("tier", this, out); - - GF_VALIDATE_OR_GOTO(this->name, parent_loc, out); - - GF_VALIDATE_OR_GOTO(this->name, file_dentry, out); - - GF_VALIDATE_OR_GOTO(this->name, this->private, out); - - if (!parent_loc->inode) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "%s/%s parent is NULL", parent_loc->path, file_dentry->d_name); - goto out; - } - - conf = this->private; - - loc_wipe(&file_loc); - - if (gf_uuid_is_null(file_dentry->d_stat.ia_gfid)) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "%s/%s gfid not present", parent_loc->path, file_dentry->d_name); - goto out; - } - - gf_uuid_copy(file_loc.gfid, file_dentry->d_stat.ia_gfid); - - if (gf_uuid_is_null(parent_loc->gfid)) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "%s/%s" - " gfid not present", - parent_loc->path, file_dentry->d_name); - goto out; - } - - gf_uuid_copy(file_loc.pargfid, parent_loc->gfid); - - ret = dht_build_child_loc(this, &file_loc, parent_loc, file_dentry->d_name); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Child loc build failed"); - ret = -1; - goto out; - } - - lookup_xdata = dict_new(); - if (!lookup_xdata) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Failed creating lookup dict for %s", file_dentry->d_name); - goto out; - } - - ret = dict_set_int32(lookup_xdata, CTR_ATTACH_TIER_LOOKUP, 1); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Failed to set lookup flag"); - goto out; - } - - gf_uuid_copy(file_loc.parent->gfid, parent_loc->gfid); - - /* Sending lookup to cold tier only */ - ret = syncop_lookup(conf->subvolumes[0], &file_loc, &iatt, NULL, - lookup_xdata, NULL); - if (ret) { - /* If the file does not exist on the cold tier than it must */ - /* have been discovered on the hot tier. This is not an error. */ - gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, - "%s lookup to cold tier on attach heal failed", file_loc.path); - goto out; - } - - ret = 0; - -out: - - loc_wipe(&file_loc); - - if (lookup_xdata) - dict_unref(lookup_xdata); - - return ret; -} - int gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, dict_t *fix_layout, dict_t *migrate_data) @@ -3709,7 +3580,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, }; inode_t *linked_inode = NULL, *inode = NULL; dht_conf_t *conf = NULL; - int should_commit_hash = 1; int perrno = 0; conf = this->private; @@ -3812,16 +3682,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) continue; if (!IA_ISDIR(entry->d_stat.ia_type)) { - /* If its a fix layout during the attach - * tier operation do lookups on files - * on cold subvolume so that there is a - * CTR DB Lookup Heal triggered on existing - * data. - * */ - if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) { - gf_fix_layout_tier_attach_lookup(this, loc, entry); - } - continue; } loc_wipe(&entry_loc); @@ -3838,8 +3698,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, goto out; } else { - should_commit_hash = 0; - continue; } } @@ -3902,7 +3760,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, ret = -1; goto out; } else { - should_commit_hash = 0; continue; } } @@ -3920,7 +3777,7 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, goto out; } - if (ret && ret != 2) { + if (ret) { gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LAYOUT_FIX_FAILED, "Fix layout failed for %s", entry_loc.path); @@ -3987,11 +3844,10 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, } } - if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) && - (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX)) { + if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) { ret = gf_defrag_process_dir(this, defrag, loc, migrate_data, &perrno); - if (ret && (ret != 2)) { + if (ret) { if (perrno == ENOENT || perrno == ESTALE) { ret = 0; goto out; @@ -4007,18 +3863,13 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, if (conf->decommission_in_progress) { goto out; } - - should_commit_hash = 0; } - } else if (ret == 2) { - should_commit_hash = 0; } } gf_msg_trace(this->name, 0, "fix layout called on %s", loc->path); - if (should_commit_hash && - gf_defrag_settle_hash(this, defrag, loc, fix_layout) != 0) { + if (gf_defrag_settle_hash(this, defrag, loc, fix_layout) != 0) { defrag->total_failures++; gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SETTLE_HASH_FAILED, @@ -4042,10 +3893,6 @@ out: if (fd) fd_unref(fd); - if (ret == 0 && should_commit_hash == 0) { - ret = 2; - } - return ret; } @@ -4054,31 +3901,26 @@ dht_init_local_subvols_and_nodeuuids(xlator_t *this, dht_conf_t *conf, loc_t *loc) { dict_t *dict = NULL; - gf_defrag_info_t *defrag = NULL; uuid_t *uuid_ptr = NULL; int ret = -1; int i = 0; int j = 0; - defrag = conf->defrag; - - if (defrag->cmd != GF_DEFRAG_CMD_START_TIER) { - /* Find local subvolumes */ - ret = syncop_getxattr(this, loc, &dict, GF_REBAL_FIND_LOCAL_SUBVOL, - NULL, NULL); - if (ret && (ret != -ENODATA)) { - gf_msg(this->name, GF_LOG_ERROR, -ret, 0, - "local " - "subvolume determination failed with error: %d", - -ret); - ret = -1; - goto out; - } - - if (!ret) - goto out; + /* Find local subvolumes */ + ret = syncop_getxattr(this, loc, &dict, GF_REBAL_FIND_LOCAL_SUBVOL, NULL, + NULL); + if (ret && (ret != -ENODATA)) { + gf_msg(this->name, GF_LOG_ERROR, -ret, 0, + "local " + "subvolume determination failed with error: %d", + -ret); + ret = -1; + goto out; } + if (!ret) + goto out; + ret = syncop_getxattr(this, loc, &dict, GF_REBAL_OLD_FIND_LOCAL_SUBVOL, NULL, NULL); if (ret) { @@ -4552,14 +4394,13 @@ gf_defrag_start_crawl(void *data) } ret = gf_defrag_fix_layout(this, defrag, &loc, fix_layout, migrate_data); - if (ret && ret != 2) { + if (ret) { defrag->total_failures++; ret = -1; goto out; } - if (ret != 2 && - gf_defrag_settle_hash(this, defrag, &loc, fix_layout) != 0) { + if (gf_defrag_settle_hash(this, defrag, &loc, fix_layout) != 0) { defrag->total_failures++; ret = -1; goto out; @@ -4731,8 +4572,6 @@ gf_defrag_status_get(dht_conf_t *conf, dict_t *dict) uint64_t lookup = 0; uint64_t failures = 0; uint64_t skipped = 0; - uint64_t promoted = 0; - uint64_t demoted = 0; char *status = ""; double elapsed = 0; uint64_t time_to_complete = 0; @@ -4751,15 +4590,12 @@ gf_defrag_status_get(dht_conf_t *conf, dict_t *dict) lookup = defrag->num_files_lookedup; failures = defrag->total_failures; skipped = defrag->skipped; - promoted = defrag->total_files_promoted; - demoted = defrag->total_files_demoted; elapsed = gf_time() - defrag->start_time; /* The rebalance is still in progress */ - if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) && - (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED)) { + if (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED) { time_to_complete = gf_defrag_get_estimates_based_on_size(conf); if (time_to_complete && (time_to_complete > elapsed)) @@ -4774,14 +4610,6 @@ gf_defrag_status_get(dht_conf_t *conf, dict_t *dict) if (!dict) goto log; - ret = dict_set_uint64(dict, "promoted", promoted); - if (ret) - gf_log(THIS->name, GF_LOG_WARNING, "failed to set promoted count"); - - ret = dict_set_uint64(dict, "demoted", demoted); - if (ret) - gf_log(THIS->name, GF_LOG_WARNING, "failed to set demoted count"); - ret = dict_set_uint64(dict, "files", files); if (ret) gf_log(THIS->name, GF_LOG_WARNING, "failed to set file count"); diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c index 8efa3b85b22..3e24065227c 100644 --- a/xlators/cluster/dht/src/dht-selfheal.c +++ b/xlators/cluster/dht/src/dht-selfheal.c @@ -1271,10 +1271,6 @@ dht_selfheal_dir_mkdir_lock_cbk(call_frame_t *frame, void *cookie, local->call_cnt = conf->subvolume_cnt; if (op_ret < 0) { - /* We get this error when the directory entry was not created - * on a newky attached tier subvol. Hence proceed and do mkdir - * on the tier subvol. - */ if (op_errno == EINVAL) { local->call_cnt = 1; dht_selfheal_dir_mkdir_lookup_done(frame, this); diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c index 96caf40d0b1..bb72b0ffbb5 100644 --- a/xlators/cluster/dht/src/dht-shared.c +++ b/xlators/cluster/dht/src/dht-shared.c @@ -537,6 +537,8 @@ gf_defrag_pattern_list_fill(xlator_t *this, gf_defrag_info_t *defrag, pattern_str = strtok_r(data, ",", &tmp_str); while (pattern_str) { dup_str = gf_strdup(pattern_str); + if (!dup_str) + goto out; pattern_list = GF_CALLOC(1, sizeof(gf_defrag_pattern_list_t), 1); if (!pattern_list) { goto out; @@ -596,7 +598,6 @@ dht_init_methods(xlator_t *this) methods = &(conf->methods); methods->migration_get_dst_subvol = dht_migration_get_dst_subvol; - methods->migration_needed = dht_migration_needed; methods->migration_other = NULL; methods->layout_search = dht_layout_search; @@ -1045,84 +1046,6 @@ struct volume_options dht_options[] = { /* NUFA option */ {.key = {"local-volume-name"}, .type = GF_OPTION_TYPE_XLATOR}, - /* tier options */ - { - .key = {"tier-pause"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", - }, - - { - .key = {"tier-promote-frequency"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "120", - }, - - { - .key = {"tier-demote-frequency"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "3600", - }, - - { - .key = {"write-freq-threshold"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "0", - }, - - { - .key = {"read-freq-threshold"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "0", - }, - { - .key = {"watermark-hi"}, - .type = GF_OPTION_TYPE_PERCENT, - .default_value = "90", - }, - { - .key = {"watermark-low"}, - .type = GF_OPTION_TYPE_PERCENT, - .default_value = "75", - }, - { - .key = {"tier-mode"}, - .type = GF_OPTION_TYPE_STR, - .default_value = "test", - }, - { - .key = {"tier-compact"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", - }, - {.key = {"tier-hot-compact-frequency"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "604800", - .description = "Frequency to compact DBs on hot tier in system"}, - {.key = {"tier-cold-compact-frequency"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "604800", - .description = "Frequency to compact DBs on cold tier in system"}, - { - .key = {"tier-max-mb"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "4000", - }, - { - .key = {"tier-max-promote-file-size"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "0", - }, - { - .key = {"tier-max-files"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "10000", - }, - { - .key = {"tier-query-limit"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "100", - }, /* switch option */ {.key = {"pattern.switch.case"}, .type = GF_OPTION_TYPE_ANY}, diff --git a/xlators/cluster/dht/src/nufa.c b/xlators/cluster/dht/src/nufa.c index 59313639c45..3648a564840 100644 --- a/xlators/cluster/dht/src/nufa.c +++ b/xlators/cluster/dht/src/nufa.c @@ -595,7 +595,6 @@ nufa_init(xlator_t *this) dht_methods_t dht_methods = { .migration_get_dst_subvol = dht_migration_get_dst_subvol, - .migration_needed = dht_migration_needed, .layout_search = dht_layout_search, }; diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h index 8d2b7f051da..6c15a166f18 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h +++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h @@ -44,7 +44,8 @@ GLFS_MSGID(BITROT_STUB, BRS_MSG_NO_MEMORY, BRS_MSG_SET_EVENT_FAILED, BRS_MSG_NON_BITD_PID, BRS_MSG_SIGN_PREPARE_FAIL, BRS_MSG_USING_DEFAULT_THREAD_SIZE, BRS_MSG_ALLOC_MEM_FAILED, BRS_MSG_DICT_ALLOC_FAILED, BRS_MSG_CREATE_GF_DIRENT_FAILED, - BRS_MSG_ALLOC_FAILED, BRS_MSG_PATH_XATTR_GET_FAILED); + BRS_MSG_ALLOC_FAILED, BRS_MSG_PATH_XATTR_GET_FAILED, + BRS_MSG_VERSION_PREPARE_FAIL); #define BRS_MSG_MEM_ACNT_FAILED_STR "Memory accounting init failed" #define BRS_MSG_BAD_OBJ_THREAD_FAIL_STR "pthread_init failed" @@ -68,6 +69,8 @@ GLFS_MSGID(BITROT_STUB, BRS_MSG_NO_MEMORY, BRS_MSG_SET_EVENT_FAILED, "daemon. Unwinding the fop" #define BRS_MSG_SIGN_PREPARE_FAIL_STR \ "failed to prepare the signature. Unwinding the fop" +#define BRS_MSG_VERSION_PREPARE_FAIL_STR \ + "failed to prepare the version. Unwinding the fop" #define BRS_MSG_STUB_ALLOC_FAILED_STR "failed to allocate stub fop, Unwinding" #define BRS_MSG_BAD_OBJ_MARK_FAIL_STR "failed to mark object as bad" #define BRS_MSG_NON_SCRUB_BAD_OBJ_MARK_STR \ diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c index 605a5e4c3e4..447dd47ff41 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c +++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c @@ -424,8 +424,8 @@ br_stub_prepare_version_request(xlator_t *this, dict_t *dict, priv = this->private; br_set_ongoingversion(obuf, oversion, priv->boot); - return dict_set_static_bin(dict, BITROT_CURRENT_VERSION_KEY, (void *)obuf, - sizeof(br_version_t)); + return dict_set_bin(dict, BITROT_CURRENT_VERSION_KEY, (void *)obuf, + sizeof(br_version_t)); } static int @@ -436,8 +436,7 @@ br_stub_prepare_signing_request(dict_t *dict, br_signature_t *sbuf, br_set_signature(sbuf, sign, signaturelen, &size); - return dict_set_static_bin(dict, BITROT_SIGNING_VERSION_KEY, (void *)sbuf, - size); + return dict_set_bin(dict, BITROT_SIGNING_VERSION_KEY, (void *)sbuf, size); } /** @@ -854,23 +853,27 @@ br_stub_perform_incversioning(xlator_t *this, call_frame_t *frame, op_errno = ENOMEM; dict = dict_new(); if (!dict) - goto done; + goto out; ret = br_stub_alloc_versions(&obuf, NULL, 0); - if (ret) - goto dealloc_dict; + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ALLOC_MEM_FAILED, + "gfid=%s", uuid_utoa(fd->inode->gfid), NULL); + goto out; + } ret = br_stub_prepare_version_request(this, dict, obuf, writeback_version); - if (ret) - goto dealloc_versions; + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_VERSION_PREPARE_FAIL, + "gfid=%s", uuid_utoa(fd->inode->gfid), NULL); + br_stub_dealloc_versions(obuf); + goto out; + } ret = br_stub_fd_versioning( this, frame, stub, dict, fd, br_stub_fd_incversioning_cbk, writeback_version, BR_STUB_INCREMENTAL_VERSIONING, !WRITEBACK_DURABLE); - -dealloc_versions: - br_stub_dealloc_versions(obuf); -dealloc_dict: - dict_unref(dict); -done: +out: + if (dict) + dict_unref(dict); if (ret) { if (local) frame->local = NULL; @@ -1025,31 +1028,36 @@ static int br_stub_prepare_signature(xlator_t *this, dict_t *dict, inode_t *inode, br_isignature_t *sign, int *fakesuccess) { - int32_t ret = 0; + int32_t ret = -1; size_t signaturelen = 0; br_signature_t *sbuf = NULL; if (!br_is_signature_type_valid(sign->signaturetype)) - goto error_return; + goto out; signaturelen = sign->signaturelen; ret = br_stub_alloc_versions(NULL, &sbuf, signaturelen); - if (ret) - goto error_return; + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ALLOC_MEM_FAILED, + "gfid=%s", uuid_utoa(inode->gfid), NULL); + ret = -1; + goto out; + } ret = br_stub_prepare_signing_request(dict, sbuf, sign, signaturelen); - if (ret) - goto dealloc_versions; + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_SIGN_PREPARE_FAIL, + "gfid=%s", uuid_utoa(inode->gfid), NULL); + ret = -1; + br_stub_dealloc_versions(sbuf); + goto out; + } + /* At this point sbuf has been added to dict, so the memory will be freed + * when the data from the dict is destroyed + */ ret = br_stub_compare_sign_version(this, inode, sbuf, dict, fakesuccess); - if (ret) - goto dealloc_versions; - - return 0; - -dealloc_versions: - br_stub_dealloc_versions(sbuf); -error_return: - return -1; +out: + return ret; } static void diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c index cda02dadc2c..cf0ae4c57dd 100644 --- a/xlators/features/locks/src/posix.c +++ b/xlators/features/locks/src/posix.c @@ -494,6 +494,9 @@ pl_inodelk_xattr_fill_multiple(dict_t *this, char *key, data_t *value, char *save_ptr = NULL; tmp_key = gf_strdup(key); + if (!tmp_key) + return -1; + strtok_r(tmp_key, ":", &save_ptr); if (!*save_ptr) { if (tmp_key) diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c index de932826c90..d94dceb10b7 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.c +++ b/xlators/mgmt/glusterd/src/glusterd-store.c @@ -74,7 +74,7 @@ glusterd_replace_slash_with_hyphen(char *str) while (ptr) { *ptr = '-'; - ptr = strchr(str, '/'); + ptr = strchr(ptr, '/'); } } diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index d0e72e0874f..90ef2cf4c9c 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -2220,7 +2220,10 @@ retry: if (wait) { synclock_unlock(&priv->big_lock); + errno = 0; ret = runner_run(&runner); + if (errno != 0) + ret = errno; synclock_lock(&priv->big_lock); if (ret == EADDRINUSE) { diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index 4e5712e6447..8d6fb5e0fac 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -3810,6 +3810,38 @@ out: } static int +set_volfile_id_option(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + int clusters) +{ + xlator_t *xlator = NULL; + int i = 0; + int ret = -1; + glusterd_conf_t *conf = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, out); + + if (conf->op_version < GD_OP_VERSION_9_0) + return 0; + xlator = first_of(graph); + + for (i = 0; i < clusters; i++) { + ret = xlator_set_fixed_option(xlator, "volume-id", + uuid_utoa(volinfo->volume_id)); + if (ret) + goto out; + + xlator = xlator->next; + } + +out: + return ret; +} + +static int volgen_graph_build_afr_clusters(volgen_graph_t *graph, glusterd_volinfo_t *volinfo) { @@ -3851,6 +3883,13 @@ volgen_graph_build_afr_clusters(volgen_graph_t *graph, clusters = -1; goto out; } + + ret = set_volfile_id_option(graph, volinfo, clusters); + if (ret) { + clusters = -1; + goto out; + } + if (!volinfo->arbiter_count && !volinfo->thin_arbiter_count) goto out; diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index f74876eec9f..398b4d76f52 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -3138,4 +3138,9 @@ struct volopt_map_entry glusterd_volopt_map[] = { .type = NO_DOC, }, + {.key = "cluster.use-anonymous-inode", + .voltype = "cluster/replicate", + .op_version = GD_OP_VERSION_9_0, + .value = "yes", + .flags = VOLOPT_FLAG_CLIENT_OPT}, {.key = NULL}}; diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c index 1cf55143e24..0e22fe411ee 100644 --- a/xlators/mount/fuse/src/fuse-bridge.c +++ b/xlators/mount/fuse/src/fuse-bridge.c @@ -5899,7 +5899,9 @@ fuse_graph_sync(xlator_t *this) new_graph_id = priv->next_graph->id; priv->next_graph = NULL; need_first_lookup = 1; - priv->handle_graph_switch = _gf_true; + if (old_subvol) { + priv->handle_graph_switch = _gf_true; + } while (!priv->event_recvd) { ret = pthread_cond_wait(&priv->sync_cond, &priv->sync_mutex); @@ -5935,13 +5937,6 @@ unlock: if (winds_on_old_subvol == 0) { xlator_notify(old_subvol, GF_EVENT_PARENT_DOWN, old_subvol, NULL); } - } else { - pthread_mutex_lock(&priv->sync_mutex); - { - priv->handle_graph_switch = _gf_false; - pthread_cond_broadcast(&priv->migrate_cond); - } - pthread_mutex_unlock(&priv->sync_mutex); } return 0; |