summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.gitignore2
-rw-r--r--.testignore2
-rw-r--r--CONTRIBUTING30
-rw-r--r--CONTRIBUTING.md114
-rw-r--r--MAINTAINERS26
-rw-r--r--Makefile.am5
-rw-r--r--README.md3
-rw-r--r--api/src/glfs-fops.c70
-rw-r--r--api/src/glfs-mgmt.c5
-rw-r--r--api/src/glfs.c10
-rw-r--r--configure.ac55
-rw-r--r--contrib/fuse-lib/mount.c1
-rw-r--r--doc/developer-guide/identifying-resource-leaks.md24
-rw-r--r--events/src/eventsapiconf.py.in2
-rw-r--r--events/src/glustereventsd.py37
-rw-r--r--events/src/utils.py3
-rwxr-xr-xextras/distributed-testing/distributed-test-runner.py12
-rw-r--r--extras/ganesha/ocf/ganesha_nfsd2
-rw-r--r--extras/ganesha/scripts/ganesha-ha.sh2
-rwxr-xr-xextras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh27
-rwxr-xr-xextras/hook-scripts/start/post/S31ganesha-start.sh2
-rwxr-xr-xextras/quota/quota_fsck.py8
-rwxr-xr-xextras/snap_scheduler/gcron.py4
-rwxr-xr-xextras/snap_scheduler/snap_scheduler.py2
-rw-r--r--geo-replication/gsyncd.conf.in2
-rw-r--r--geo-replication/syncdaemon/syncdutils.py2
-rw-r--r--glusterfs.spec.in3
-rw-r--r--glusterfsd/src/Makefile.am1
-rw-r--r--glusterfsd/src/gf_attach.c4
-rw-r--r--glusterfsd/src/glusterfsd-mgmt.c4
-rw-r--r--glusterfsd/src/glusterfsd.c8
-rw-r--r--heal/src/glfs-heal.c4
-rw-r--r--libglusterfs/src/Makefile.am3
-rw-r--r--libglusterfs/src/common-utils.c47
-rw-r--r--libglusterfs/src/ctx.c8
-rw-r--r--libglusterfs/src/dict.c65
-rw-r--r--libglusterfs/src/event.c3
-rw-r--r--libglusterfs/src/events.c29
-rw-r--r--libglusterfs/src/gidcache.c5
-rw-r--r--libglusterfs/src/glusterfs/common-utils.h40
-rw-r--r--libglusterfs/src/glusterfs/dict.h6
-rw-r--r--libglusterfs/src/glusterfs/glusterfs.h12
-rw-r--r--libglusterfs/src/glusterfs/latency.h22
-rw-r--r--libglusterfs/src/glusterfs/mem-pool.h31
-rw-r--r--libglusterfs/src/glusterfs/mem-types.h1
-rw-r--r--libglusterfs/src/glusterfs/stack.h14
-rw-r--r--libglusterfs/src/glusterfs/statedump.h2
-rw-r--r--libglusterfs/src/glusterfs/store.h2
-rw-r--r--libglusterfs/src/glusterfs/syscall.h9
-rw-r--r--libglusterfs/src/glusterfs/xlator.h2
-rw-r--r--libglusterfs/src/latency.c93
-rw-r--r--libglusterfs/src/libglusterfs.sym10
-rw-r--r--libglusterfs/src/mem-pool.c113
-rw-r--r--libglusterfs/src/monitoring.c8
-rw-r--r--libglusterfs/src/statedump.c147
-rw-r--r--libglusterfs/src/store.c12
-rw-r--r--libglusterfs/src/syncop.c2
-rw-r--r--libglusterfs/src/syscall.c24
-rw-r--r--libglusterfs/src/xlator.c7
-rwxr-xr-xrfc.sh68
-rw-r--r--rpc/rpc-lib/src/libgfrpc.sym1
-rw-r--r--rpc/rpc-lib/src/protocol-common.h1
-rw-r--r--rpc/rpc-lib/src/rpc-clnt-ping.c2
-rw-r--r--rpc/rpc-lib/src/rpc-clnt.c5
-rw-r--r--rpc/rpc-lib/src/rpcsvc.c78
-rw-r--r--rpc/rpc-lib/src/rpcsvc.h5
-rw-r--r--rpc/rpc-transport/socket/src/socket.c7
-rw-r--r--rpc/xdr/src/glusterd1-xdr.x15
-rw-r--r--rpc/xdr/src/libgfxdr.sym2
-rwxr-xr-xrun-tests.sh32
-rw-r--r--tests/000-flaky/basic_afr_split-brain-favorite-child-policy.t (renamed from tests/basic/afr/split-brain-favorite-child-policy.t)4
-rw-r--r--tests/000-flaky/basic_changelog_changelog-snapshot.t (renamed from tests/basic/changelog/changelog-snapshot.t)4
-rw-r--r--tests/000-flaky/basic_distribute_rebal-all-nodes-migrate.t (renamed from tests/basic/distribute/rebal-all-nodes-migrate.t)8
-rw-r--r--[-rwxr-xr-x]tests/000-flaky/basic_ec_ec-quorum-count-partial-failure.t (renamed from tests/basic/ec/ec-quorum-count-partial-failure.t)4
-rw-r--r--[-rwxr-xr-x]tests/000-flaky/basic_mount-nfs-auth.t (renamed from tests/basic/mount-nfs-auth.t)0
-rw-r--r--tests/000-flaky/bugs_core_multiplex-limit-issue-151.t (renamed from tests/bugs/core/multiplex-limit-issue-151.t)6
-rw-r--r--[-rwxr-xr-x]tests/000-flaky/bugs_distribute_bug-1117851.t (renamed from tests/bugs/distribute/bug-1117851.t)4
-rw-r--r--tests/000-flaky/bugs_distribute_bug-1122443.t (renamed from tests/bugs/distribute/bug-1122443.t)17
-rw-r--r--tests/000-flaky/bugs_glusterd_bug-857330/common.rc (renamed from tests/bugs/glusterd/bug-857330/common.rc)2
-rwxr-xr-xtests/000-flaky/bugs_glusterd_bug-857330/normal.t (renamed from tests/bugs/glusterd/bug-857330/normal.t)4
-rwxr-xr-xtests/000-flaky/bugs_glusterd_bug-857330/xml.t (renamed from tests/bugs/glusterd/bug-857330/xml.t)4
-rw-r--r--[-rwxr-xr-x]tests/000-flaky/bugs_glusterd_quorum-value-check.t (renamed from tests/bugs/glusterd/quorum-value-check.t)4
-rw-r--r--tests/000-flaky/bugs_nfs_bug-1116503.t (renamed from tests/bugs/nfs/bug-1116503.t)6
-rw-r--r--tests/000-flaky/features_lock-migration_lkmigration-set-option.t (renamed from tests/features/lock-migration/lkmigration-set-option.t)4
-rw-r--r--tests/afr.rc6
-rw-r--r--tests/basic/afr/afr-anon-inode-no-quorum.t63
-rw-r--r--tests/basic/afr/afr-anon-inode.t114
-rw-r--r--tests/basic/afr/entry-self-heal-anon-dir-off.t459
-rw-r--r--tests/basic/afr/rename-data-loss.t72
-rw-r--r--tests/basic/afr/split-brain-favorite-child-policy-client-side-healing.t124
-rw-r--r--tests/basic/fuse/active-io-graph-switch.t65
-rw-r--r--tests/basic/gfapi/gfapi-ssl-load-volfile-test.c127
-rwxr-xr-xtests/basic/gfapi/gfapi-ssl-load-volfile-test.t76
-rw-r--r--tests/basic/gfapi/protocol-client-ssl.vol.in15
-rw-r--r--tests/basic/metadisp/fsyncdir.c29
-rw-r--r--tests/basic/metadisp/ftruncate.c34
-rw-r--r--tests/basic/metadisp/fxattr.c107
-rw-r--r--tests/basic/metadisp/gfs-fsetxattr.c141
-rw-r--r--tests/basic/metadisp/metadisp.t316
-rw-r--r--tests/basic/metadisp/metadisp.vol14
-rwxr-xr-xtests/bugs/fuse/many-groups-for-acl.t13
-rw-r--r--tests/bugs/glusterd/brick-order-check-add-brick.t21
-rw-r--r--tests/bugs/replicate/bug-1744548-heal-timeout.t6
-rw-r--r--tests/bugs/shard/issue-1425.t45
-rwxr-xr-xtests/features/trash.t74
-rw-r--r--tests/volume.rc2
-rw-r--r--xlators/cluster/afr/src/afr-common.c96
-rw-r--r--xlators/cluster/afr/src/afr-dir-read.c12
-rw-r--r--xlators/cluster/afr/src/afr-open.c8
-rw-r--r--xlators/cluster/afr/src/afr-read-txn.c10
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c182
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c205
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-name.c33
-rw-r--r--xlators/cluster/afr/src/afr-self-heal.h5
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.c184
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.h2
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c95
-rw-r--r--xlators/cluster/afr/src/afr.c41
-rw-r--r--xlators/cluster/afr/src/afr.h17
-rw-r--r--xlators/cluster/dht/src/dht-common.c228
-rw-r--r--xlators/cluster/dht/src/dht-common.h118
-rw-r--r--xlators/cluster/dht/src/dht-diskusage.c12
-rw-r--r--xlators/cluster/dht/src/dht-inode-write.c32
-rw-r--r--xlators/cluster/dht/src/dht-mem-types.h3
-rw-r--r--xlators/cluster/dht/src/dht-messages.h36
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c706
-rw-r--r--xlators/cluster/dht/src/dht-selfheal.c49
-rw-r--r--xlators/cluster/dht/src/dht-shared.c89
-rw-r--r--xlators/cluster/dht/src/nufa.c1
-rw-r--r--xlators/cluster/ec/src/ec-common.c76
-rw-r--r--xlators/cluster/ec/src/ec-heal.c15
-rw-r--r--xlators/cluster/ec/src/ec-heald.c51
-rw-r--r--xlators/cluster/ec/src/ec-types.h5
-rw-r--r--xlators/cluster/ec/src/ec.c20
-rw-r--r--xlators/debug/error-gen/src/error-gen.c4
-rw-r--r--xlators/debug/io-stats/src/io-stats.c109
-rw-r--r--xlators/features/Makefile.am6
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c12
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h20
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub.c52
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h5
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub.c68
-rw-r--r--xlators/features/changelog/src/changelog-helpers.c52
-rw-r--r--xlators/features/changelog/src/changelog-helpers.h21
-rw-r--r--xlators/features/changelog/src/changelog-messages.h8
-rw-r--r--xlators/features/changelog/src/changelog.c33
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c2
-rw-r--r--xlators/features/index/src/index.c4
-rw-r--r--xlators/features/leases/src/leases-internal.c4
-rw-r--r--xlators/features/locks/src/common.c6
-rw-r--r--xlators/features/locks/src/entrylk.c12
-rw-r--r--xlators/features/locks/src/inodelk.c12
-rw-r--r--xlators/features/locks/src/locks.h17
-rw-r--r--xlators/features/locks/src/posix.c25
-rw-r--r--xlators/features/metadisp/Makefile.am3
-rw-r--r--xlators/features/metadisp/src/Makefile.am38
-rw-r--r--xlators/features/metadisp/src/backend.c45
-rw-r--r--xlators/features/metadisp/src/fops-tmpl.c10
-rw-r--r--xlators/features/metadisp/src/gen-fops.py160
-rw-r--r--xlators/features/metadisp/src/metadisp-create.c101
-rw-r--r--xlators/features/metadisp/src/metadisp-fops.h51
-rw-r--r--xlators/features/metadisp/src/metadisp-fsync.c54
-rw-r--r--xlators/features/metadisp/src/metadisp-lookup.c90
-rw-r--r--xlators/features/metadisp/src/metadisp-open.c70
-rw-r--r--xlators/features/metadisp/src/metadisp-readdir.c65
-rw-r--r--xlators/features/metadisp/src/metadisp-setattr.c90
-rw-r--r--xlators/features/metadisp/src/metadisp-stat.c124
-rw-r--r--xlators/features/metadisp/src/metadisp-unlink.c160
-rw-r--r--xlators/features/metadisp/src/metadisp.c46
-rw-r--r--xlators/features/metadisp/src/metadisp.h45
-rw-r--r--xlators/features/quota/src/quota.c59
-rw-r--r--xlators/features/quota/src/quota.h5
-rw-r--r--xlators/features/read-only/src/worm-helper.c15
-rw-r--r--xlators/features/shard/src/shard.c46
-rw-r--r--xlators/features/trash/src/trash.c2
-rw-r--r--xlators/features/upcall/src/upcall-internal.c10
-rw-r--r--xlators/mgmt/glusterd/src/Makefile.am8
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-brick-ops.c129
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-ganesha.c58
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c11
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handler.c2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-hooks.c16
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-log-ops.c2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mem-types.h1
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-messages.h5
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c139
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt.c323
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt.h10
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rebalance.c18
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapd-svc.c11
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c5
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot.c70
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.c237
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.h15
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c19
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c213
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.h1
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c52
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-ops.c6
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c7
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.c28
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h5
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.c13
-rw-r--r--xlators/mount/fuse/src/fuse-helpers.c71
-rwxr-xr-xxlators/mount/fuse/utils/mount_glusterfs.in9
-rw-r--r--xlators/nfs/server/src/acl3.c3
-rw-r--r--xlators/nfs/server/src/auth-cache.c4
-rw-r--r--xlators/nfs/server/src/mount3udp_svc.c2
-rw-r--r--xlators/nfs/server/src/nfs3-helpers.c4
-rw-r--r--xlators/nfs/server/src/nfs3.c2
-rw-r--r--xlators/nfs/server/src/nlm4.c5
-rw-r--r--xlators/nfs/server/src/nlmcbk_svc.c5
-rw-r--r--xlators/performance/io-cache/src/io-cache.c34
-rw-r--r--xlators/performance/io-cache/src/io-cache.h27
-rw-r--r--xlators/performance/io-cache/src/page.c6
-rw-r--r--xlators/performance/io-threads/src/io-threads.c11
-rw-r--r--xlators/performance/md-cache/src/md-cache.c72
-rw-r--r--xlators/performance/nl-cache/src/nl-cache-helper.c8
-rw-r--r--xlators/performance/nl-cache/src/nl-cache.c6
-rw-r--r--xlators/performance/nl-cache/src/nl-cache.h2
-rw-r--r--xlators/performance/open-behind/src/open-behind.c66
-rw-r--r--xlators/performance/quick-read/src/quick-read.c36
-rw-r--r--xlators/performance/quick-read/src/quick-read.h2
-rw-r--r--xlators/protocol/server/src/server.c2
-rw-r--r--xlators/storage/posix/src/posix-common.c38
-rw-r--r--xlators/storage/posix/src/posix-helpers.c118
-rw-r--r--xlators/storage/posix/src/posix-inode-fd-ops.c33
-rw-r--r--xlators/storage/posix/src/posix.h13
228 files changed, 6827 insertions, 2651 deletions
diff --git a/.gitignore b/.gitignore
index 849a0f1a8be..fc5ba586f8e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -121,3 +121,5 @@ xlators/features/cloudsync/src/cloudsync-autogen-fops.c
xlators/features/cloudsync/src/cloudsync-autogen-fops.h
xlators/features/utime/src/utime-autogen-fops.c
xlators/features/utime/src/utime-autogen-fops.h
+tests/basic/metadisp/ftruncate
+xlators/features/metadisp/src/fops.c
diff --git a/.testignore b/.testignore
index 1784aff629a..fe8f838bf2b 100644
--- a/.testignore
+++ b/.testignore
@@ -8,7 +8,7 @@
rfc.sh
submit-for-review.sh
AUTHORS
-CONTRIBUTING
+CONTRIBUTING.md
COPYING-GPLV2
COPYING-LGPLV3
ChangeLog
diff --git a/CONTRIBUTING b/CONTRIBUTING
deleted file mode 100644
index eb30d76d1b4..00000000000
--- a/CONTRIBUTING
+++ /dev/null
@@ -1,30 +0,0 @@
-# GlusterFS project Contribution guidelines
-
-
-## By contributing to this project, the contributor would need to agree to below.
-
-### Developer's Certificate of Origin 1.1
-
-By making a contribution to this project, I certify that:
-
-(a) The contribution was created in whole or in part by me and I
- have the right to submit it under the open source license
- indicated in the file; or
-
-(b) The contribution is based upon previous work that, to the best
- of my knowledge, is covered under an appropriate open source
- license and I have the right under that license to submit that
- work with modifications, whether created in whole or in part
- by me, under the same open source license (unless I am
- permitted to submit under a different license), as indicated
- in the file; or
-
-(c) The contribution was provided directly to me by some other
- person who certified (a), (b) or (c) and I have not modified
- it.
-
-(d) I understand and agree that this project and the contribution
- are public and that a record of the contribution (including all
- personal information I submit with it, including my sign-off) is
- maintained indefinitely and may be redistributed consistent with
- this project or the open source license(s) involved.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 00000000000..65fc3497104
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,114 @@
+# GlusterFS project Contribution guidelines
+
+## Development Workflow
+
+We follow most of the details as per the [document here](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests). If you are not aware of the github workflow, it is recommended to go through them before continuing here.
+
+
+#### Get the Repository setup
+
+0. Fork Repository
+ - Fork [GlusterFS repository](https://github.com/gluster/glusterfs/fork).
+
+1. Clone Repository
+ - Clone the glusterfs repo freshly from github using below steps.
+
+```
+ git clone git@github.com:${username}/glusterfs.git
+ cd glusterfs/
+ git remote add upstream git@github.com:gluster/glusterfs.git
+```
+
+About two tasks are one time for the life time. You can continue to use the same repository for all the work in future.
+
+#### Development & Other flows
+
+0. Issue:
+ - Make sure there is an issue filed for the task you are working on.
+ - If it is not filed, open the issue with all the description.
+ - If it is a bug fix, add label "Type:Bug".
+ - If it is an RFC, provide all the documentation, and request for "DocApproved", and "SpecApproved" label.
+
+1. Code:
+ - Start coding
+ - Build and test locally
+ - Make sure clang-format is installed and is run on the patch.
+
+2. Keep up-to-date
+ - GlusterFS is a large project with many developers, so there would be one or the other patch everyday.
+ - It is critical for developer to be up-to-date with `devel` repo to be Conflict-Free when PR is opened.
+ - Git provides many options to keep up-to-date, below is one of them
+```
+ git fetch upstream
+ git rebase upstream/devel
+```
+ - It is recommended you keep pushing to your repo every day, so you don't loose any work.
+ - It can be done by `./rfc.sh` (or `git push origin HEAD:issueNNN`)
+
+2. Commit Message / PR description:
+ - The name of the branch on your personal fork can start with issueNNNN, followed by anything of your choice.
+ - PRs continue to have the title of format "component: \<title\>", like it is practiced now.
+ - When you open a PR, having a reference Issue for the commit is mandatory in GlusterFS.
+ - Commit message can have, either `Fixes: #NNNN` or `Updates: #NNNN` in a separate line in the commit message.
+ - Here, NNNN is the Issue ID in glusterfs repository.
+ - Each commit needs the author to have the "Signed-off-by: Name \<email\>" line.
+ - Can do this by `-s` option for `git commit`.
+ - If the PR is not ready for review, apply the label `work-in-progress`.
+ - Check the availability of "Draft PR" is present for you, if yes, use that instead.
+
+3. Tests:
+ - All the required smoke tests would be auto-triggered.
+ - Developers get a chance to retrigger the smoke tests using **"/recheck smoke"** as comment.
+ - The "regression" tests would be triggered by a comment **"/run regression"** from developers in the [@gluster-maintainers](https://github.com/orgs/gluster/teams/gluster-maintainers) group.
+ - Ask for help as comment in PR if you have any questions about the process!
+
+4. Review Process:
+ - `+2` : is equivalent to "Approve" from the people in the maintainer's group.
+ - `+1` : can be given by a maintainer/reviewer by explicitly stating that in the comment.
+ - `-1` : provide details on required changes and pick "Request Changes" while submitting your review.
+ - `-2` : done by adding the `DO-NOT-MERGE` label.
+
+ - Any further discussions can happen as comments in the PR.
+
+5. Making changes:
+ - There are 2 approaches to submit changes done after addressing review comments.
+ - Commit changes as a new commit on top of the original commits in the branch, and push the changes to same branch (issueNNNN)
+ - Commit changes into the same commit with `--amend` option, and do a push to the same branch with `--force` option.
+
+6. Merging:
+ - GlusterFS project follows 'Squash and Merge' method
+ - This is mainly to preserve the historic Gerrit method of one patch in `git log` for one URL link.
+ - This also makes every merge a complete patch, which has passed all tests.
+ - The merging of the patch is expected to be done by the maintainers.
+ - It can be done when all the tests (smoke and regression) pass.
+ - When the PR has 'Approved' flag from corresponding maintainer.
+ - If you feel there is delay, feel free to add a comment, discuss the same in Slack channel, or send email.
+
+## By contributing to this project, the contributor would need to agree to below.
+
+### Developer's Certificate of Origin 1.1
+
+By making a contribution to this project, I certify that:
+
+(a) The contribution was created in whole or in part by me and I
+ have the right to submit it under the open source license
+ indicated in the file; or
+
+(b) The contribution is based upon previous work that, to the best
+ of my knowledge, is covered under an appropriate open source
+ license and I have the right under that license to submit that
+ work with modifications, whether created in whole or in part
+ by me, under the same open source license (unless I am
+ permitted to submit under a different license), as indicated
+ in the file; or
+
+(c) The contribution was provided directly to me by some other
+ person who certified (a), (b) or (c) and I have not modified
+ it.
+
+(d) I understand and agree that this project and the contribution
+ are public and that a record of the contribution (including all
+ personal information I submit with it, including my sign-off) is
+ maintained indefinitely and may be redistributed consistent with
+ this project or the open source license(s) involved.
+
diff --git a/MAINTAINERS b/MAINTAINERS
index cfe075fa17d..953e8755fd9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -53,7 +53,7 @@ General Project Architects
--------------------------
M: Amar Tumballi <amarts@gmail.com>
M: Xavier Hernandez <xhernandez@redhat.com>
-P: Pranith Karampuri <pkarampu@redhat.com>
+P: Pranith Karampuri <pranith.karampuri@phonepe.com>
P: Atin Mukherjee <amukherj@redhat.com>
xlators:
@@ -66,12 +66,12 @@ F: xlators/system/posix-acl/
Arbiter
M: Ravishankar N <ravishankar@redhat.com>
-P: Pranith Karampuri <pkarampu@redhat.com>
+P: Pranith Karampuri <pranith.karampuri@phonepe.com>
S: Maintained
F: xlators/features/arbiter/
Automatic File Replication (AFR)
-M: Pranith Karampuri <pkarampu@redhat.com>
+M: Pranith Karampuri <pranith.karampuri@phonepe.com>
M: Ravishankar N <ravishankar@redhat.com>
P: Karthik US <ksubrahm@redhat.com>
S: Maintained
@@ -101,7 +101,7 @@ S: Maintained
F: xlators/cluster/dht/
Erasure Coding
-M: Pranith Karampuri <pkarampu@redhat.com>
+M: Pranith Karampuri <pranith.karampuri@phonepe.com>
M: Xavier Hernandez <xhernandez@redhat.com>
P: Ashish Pandey <aspandey@redhat.com>
S: Maintained
@@ -119,13 +119,13 @@ S: Maintained
F: xlators/mount/
Index
-M: Pranith Karampuri <pkarampu@redhat.com>
+M: Pranith Karampuri <pranith.karampuri@phonepe.com>
P: Ravishankar N <ravishankar@redhat.com>
S: Maintained
F: xlators/features/index/
IO Cache
-P: Mohammed Rafi KC <rkavunga@redhat.com>
+P: Mohammed Rafi KC <rafi.kavungal@iternity.com>
S: Maintained
F: xlators/performance/io-cache/
@@ -136,7 +136,7 @@ S: Maintained
F: xlators/debug/io-stats/
IO threads
-M: Pranith Karampuri <pkarampu@redhat.com>
+M: Pranith Karampuri <pranith.karampuri@phonepe.com>
P: Ravishankar N <ravishankar@redhat.com>
S: Maintained
F: xlators/performance/io-threads/
@@ -160,7 +160,7 @@ S: Maintained
F: xlators/features/marker/
Meta
-M: Mohammed Rafi KC <rkavunga@redhat.com>
+M: Mohammed Rafi KC <rafi.kavungal@iternity.com>
S: Maintained
F: xlators/features/meta/
@@ -172,7 +172,7 @@ F: xlators/performance/md-cache/
Negative-lookup Cache
M: Poornima G <pgurusid@redhat.com>
-P: Pranith Karampuri <pkarampu@redhat.com>
+P: Pranith Karampuri <pranith.karampuri@phonepe.com>
S: Maintained
F: xlators/performance/nl-cache/
@@ -282,7 +282,7 @@ M: Xavier Hernandez <xhernandez@redhat.com>
M: Jeff Darcy <jeff@pl.atyp.us>
P: Kaleb Keithley <kkeithle@redhat.com>
P: Niels de Vos <ndevos@redhat.com>
-P: Pranith Karampuri <pkarampu@redhat.com>
+P: Pranith Karampuri <pranith.karampuri@phonepe.com>
P: Shyamsundar Ranganathan <srangana@redhat.com>
S: Maintained
F: libglusterfs/
@@ -305,7 +305,7 @@ F: xlators/mgmt/glusterd/
Protocol
M: Niels de Vos <ndevos@redhat.com>
-P: Mohammed Rafi KC <rkavunga@redhat.com>
+P: Mohammed Rafi KC <rafi.kavungal@iternity.com>
S: Maintained
F: xlators/protocol/
@@ -317,7 +317,7 @@ F: rpc/xdr/
Snapshot
M: Raghavendra Bhat <raghavendra@redhat.com>
-P: Mohammed Rafi KC <rkavunga@redhat.com>
+P: Mohammed Rafi KC <rafi.kavungal@iternity.com>
P: Sunny Kumar <sunkumar@redhat.com>
S: Maintained
F: xlators/mgmt/glusterd/src/glusterd-snap*
@@ -326,7 +326,7 @@ F: extras/snap-scheduler.py
Socket subsystem
P: Krutika Dhananjay <kdhananj@redhat.com>
P: Milind Changire <mchangir@redhat.com>
-P: Mohammed Rafi KC <rkavunga@redhat.com>
+P: Mohammed Rafi KC <rafi.kavungal@iternity.com>
P: Mohit Agrawal <moagrawa@redhat.com>
S: Maintained
F: rpc/rpc-transport/socket/
diff --git a/Makefile.am b/Makefile.am
index aa0ca5c2a04..98ea5c1038d 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -19,9 +19,13 @@ pkgconfig_DATA = glusterfs-api.pc libgfchangelog.pc
CLEANFILES = glusterfs-api.pc libgfchangelog.pc contrib/umountd/Makefile
+clean-local:
+ find . -name '*.o' -o -name '*.lo' -o -name '.Po' | xargs rm -f
+
gitclean: distclean
find . -name Makefile.in -exec rm -f {} \;
find . -name mount.glusterfs -exec rm -f {} \;
+ find . -name .deps -o -name .libs | xargs rm -rf
rm -fr autom4te.cache
rm -f missing aclocal.m4 config.h.in config.guess config.sub ltmain.sh install-sh configure depcomp
@@ -49,4 +53,3 @@ gen-VERSION:
./build-aux/pkg-version --full \
> $(abs_top_builddir)/$(distdir)/VERSION; \
fi
-
diff --git a/README.md b/README.md
index 92f829e431e..9d68e033782 100644
--- a/README.md
+++ b/README.md
@@ -3,8 +3,7 @@
petabytes. It provides interfaces for object, block and file storage.
## Development
- Contributions to gluster in the form of patches and new feature additions can
- be made by following steps outlined at [Developers Guide](http://docs.gluster.org/en/latest/Developer-guide/Developers-Index/#contributing-to-the-gluster-community).
+ The development workflow is documented in [Contributors guide](CONTRIBUTING.md)
## Documentation
The Gluster documentation can be found at [Gluster Docs](http://docs.gluster.org).
diff --git a/api/src/glfs-fops.c b/api/src/glfs-fops.c
index fc2cfa04309..6aa3c5602d1 100644
--- a/api/src/glfs-fops.c
+++ b/api/src/glfs-fops.c
@@ -1100,6 +1100,11 @@ pub_glfs_read(struct glfs_fd *glfd, void *buf, size_t count, int flags)
};
ssize_t ret = 0;
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
iov.iov_base = buf;
iov.iov_len = count;
@@ -1151,6 +1156,11 @@ pub_glfs_readv(struct glfs_fd *glfd, const struct iovec *iov, int count,
{
ssize_t ret = 0;
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
ret = pub_glfs_preadv(glfd, iov, count, glfd->offset, flags);
return ret;
@@ -1387,6 +1397,11 @@ pub_glfs_read_async34(struct glfs_fd *glfd, void *buf, size_t count, int flags,
};
ssize_t ret = 0;
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
iov.iov_base = buf;
iov.iov_len = count;
@@ -1406,6 +1421,11 @@ pub_glfs_read_async(struct glfs_fd *glfd, void *buf, size_t count, int flags,
};
ssize_t ret = 0;
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
iov.iov_base = buf;
iov.iov_len = count;
@@ -1460,6 +1480,11 @@ pub_glfs_readv_async34(struct glfs_fd *glfd, const struct iovec *iov, int count,
{
ssize_t ret = 0;
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
ret = glfs_preadv_async_common(glfd, iov, count, glfd->offset, flags,
_gf_true, (void *)fn, data);
return ret;
@@ -1472,6 +1497,11 @@ pub_glfs_readv_async(struct glfs_fd *glfd, const struct iovec *iov, int count,
{
ssize_t ret = 0;
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
ret = glfs_preadv_async_common(glfd, iov, count, glfd->offset, flags,
_gf_false, fn, data);
return ret;
@@ -1733,6 +1763,11 @@ pub_glfs_write(struct glfs_fd *glfd, const void *buf, size_t count, int flags)
};
ssize_t ret = 0;
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
iov.iov_base = (void *)buf;
iov.iov_len = count;
@@ -1748,6 +1783,11 @@ pub_glfs_writev(struct glfs_fd *glfd, const struct iovec *iov, int count,
{
ssize_t ret = 0;
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
ret = pub_glfs_pwritev(glfd, iov, count, glfd->offset, flags);
return ret;
@@ -1938,6 +1978,11 @@ pub_glfs_write_async34(struct glfs_fd *glfd, const void *buf, size_t count,
};
ssize_t ret = 0;
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
iov.iov_base = (void *)buf;
iov.iov_len = count;
@@ -1957,6 +2002,11 @@ pub_glfs_write_async(struct glfs_fd *glfd, const void *buf, size_t count,
};
ssize_t ret = 0;
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
iov.iov_base = (void *)buf;
iov.iov_len = count;
@@ -2011,6 +2061,11 @@ pub_glfs_writev_async34(struct glfs_fd *glfd, const struct iovec *iov,
{
ssize_t ret = 0;
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
ret = glfs_pwritev_async_common(glfd, iov, count, glfd->offset, flags,
_gf_true, (void *)fn, data);
return ret;
@@ -2023,6 +2078,11 @@ pub_glfs_writev_async(struct glfs_fd *glfd, const struct iovec *iov, int count,
{
ssize_t ret = 0;
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
ret = glfs_pwritev_async_common(glfd, iov, count, glfd->offset, flags,
_gf_false, fn, data);
return ret;
@@ -3334,6 +3394,11 @@ GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_telldir, 3.4.0)
long
pub_glfs_telldir(struct glfs_fd *fd)
{
+ if (fd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
return fd->offset;
}
@@ -3344,6 +3409,11 @@ pub_glfs_seekdir(struct glfs_fd *fd, long offset)
gf_dirent_t *entry = NULL;
gf_dirent_t *tmp = NULL;
+ if (fd == NULL) {
+ errno = EBADF;
+ return;
+ }
+
if (fd->offset == offset)
return;
diff --git a/api/src/glfs-mgmt.c b/api/src/glfs-mgmt.c
index 08df2b1a4b7..7c82b8cd162 100644
--- a/api/src/glfs-mgmt.c
+++ b/api/src/glfs-mgmt.c
@@ -1013,11 +1013,6 @@ glfs_mgmt_init(struct glfs *fs)
if (ret)
goto out;
- if (sys_access(SECURE_ACCESS_FILE, F_OK) == 0) {
- ctx->secure_mgmt = 1;
- ctx->ssl_cert_depth = glusterfs_read_secure_access_file();
- }
-
rpc = rpc_clnt_new(options, THIS, THIS->name, 8);
if (!rpc) {
ret = -1;
diff --git a/api/src/glfs.c b/api/src/glfs.c
index 5259356b4c5..b4bf1423f6d 100644
--- a/api/src/glfs.c
+++ b/api/src/glfs.c
@@ -251,6 +251,11 @@ glfs_volumes_init(struct glfs *fs)
if (!vol_assigned(cmd_args))
return -1;
+ if (sys_access(SECURE_ACCESS_FILE, F_OK) == 0) {
+ fs->ctx->secure_mgmt = 1;
+ fs->ctx->ssl_cert_depth = glusterfs_read_secure_access_file();
+ }
+
if (cmd_args->volfile_server) {
ret = glfs_mgmt_init(fs);
goto out;
@@ -650,6 +655,11 @@ GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_from_glfd, 3.4.0)
struct glfs *
pub_glfs_from_glfd(struct glfs_fd *glfd)
{
+ if (glfd == NULL) {
+ errno = EBADF;
+ return NULL;
+ }
+
return glfd->fs;
}
diff --git a/configure.ac b/configure.ac
index 1725301d524..e2d6fd66cec 100644
--- a/configure.ac
+++ b/configure.ac
@@ -164,6 +164,8 @@ AC_CONFIG_FILES([Makefile
xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/Makefile
xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/Makefile
xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/Makefile
+ xlators/features/metadisp/Makefile
+ xlators/features/metadisp/src/Makefile
xlators/playground/Makefile
xlators/playground/template/Makefile
xlators/playground/template/src/Makefile
@@ -273,7 +275,7 @@ AC_ARG_ENABLE([debug],
[Enable debug build options.]))
if test "x$enable_debug" = "xyes"; then
BUILD_DEBUG=yes
- GF_CFLAGS="${GF_CFLAGS} -g -rdynamic -O0 -DDEBUG"
+ GF_CFLAGS="${GF_CFLAGS} -g -O0 -DDEBUG"
else
BUILD_DEBUG=no
fi
@@ -407,12 +409,27 @@ esac
# --enable-valgrind prevents calling dlclose(), this leaks memory
AC_ARG_ENABLE([valgrind],
- AC_HELP_STRING([--enable-valgrind],
- [Enable valgrind for resource leak debugging.]))
-if test "x$enable_valgrind" = "xyes"; then
- AC_DEFINE(RUN_WITH_VALGRIND, 1, [define if all processes should run under valgrind])
-fi
-
+ AC_HELP_STRING([--enable-valgrind@<:@=memcheck,drd@:>@],
+ [Enable valgrind for resource leak (memcheck, which is
+ the default) or thread synchronization (drd) debugging.]))
+case x$enable_valgrind in
+ xmemcheck|xyes)
+ AC_DEFINE(RUN_WITH_MEMCHECK, 1,
+ [Define if all processes should run under 'valgrind --tool=memcheck'.])
+ VALGRIND_TOOL=memcheck
+ ;;
+ xdrd)
+ AC_DEFINE(RUN_WITH_DRD, 1,
+ [Define if all processes should run under 'valgrind --tool=drd'.])
+ VALGRIND_TOOL=drd
+ ;;
+ x|xno)
+ VALGRIND_TOOL=no
+ ;;
+ *)
+ AC_MSG_ERROR([Please specify --enable-valgrind@<:@=memcheck,drd@:>@])
+ ;;
+esac
AC_ARG_WITH([previous-options],
[AS_HELP_STRING([--with-previous-options],
@@ -810,6 +827,17 @@ fi
AC_SUBST(GEOREP_EXTRAS_SUBDIR)
AM_CONDITIONAL(USE_GEOREP, test "x$enable_georeplication" != "xno")
+# METADISP section
+AC_ARG_ENABLE([metadisp],
+ AC_HELP_STRING([--enable-metadisp],
+ [Enable the metadata dispersal xlator]))
+BUILD_METADISP=no
+if test "x${enable_metadisp}" = "xyes"; then
+ BUILD_METADISP=yes
+fi
+AM_CONDITIONAL([BUILD_METADISP], [test "x$BUILD_METADISP" = "xyes"])
+# end METADISP section
+
# Events section
AC_ARG_ENABLE([events],
AC_HELP_STRING([--disable-events],
@@ -1562,9 +1590,9 @@ case $host_os in
;;
esac
dnl GF_XLATOR_DEFAULT_LDFLAGS is for most xlators that expose a common set of symbols
-GF_XLATOR_DEFAULT_LDFLAGS='-avoid-version -export-symbols $(top_srcdir)/xlators/xlator.sym $(UUID_LIBS) $(GF_NO_UNDEFINED)'
+GF_XLATOR_DEFAULT_LDFLAGS='-avoid-version -export-symbols $(top_srcdir)/xlators/xlator.sym $(UUID_LIBS) $(GF_NO_UNDEFINED) $(TIRPC_LIBS)'
dnl GF_XLATOR_LDFLAGS is for xlators that expose extra symbols, e.g. dht
-GF_XLATOR_LDFLAGS='-avoid-version $(UUID_LIBS) $(GF_NO_UNDEFINED)'
+GF_XLATOR_LDFLAGS='-avoid-version $(UUID_LIBS) $(GF_NO_UNDEFINED) $(TIRPC_LIBS)'
AC_SUBST(GF_HOST_OS)
AC_SUBST(GF_CFLAGS)
@@ -1579,6 +1607,13 @@ AC_SUBST(AM_LIBTOOLFLAGS)
AC_SUBST(GF_NO_UNDEFINED)
AC_SUBST(GF_XLATOR_DEFAULT_LDFLAGS)
AC_SUBST(GF_XLATOR_LDFLAGS)
+AC_SUBST(GF_XLATOR_MGNT_LIBADD)
+
+case $host_os in
+ *freebsd*)
+ GF_XLATOR_MGNT_LIBADD="-lutil -lprocstat"
+ ;;
+esac
CONTRIBDIR='$(top_srcdir)/contrib'
AC_SUBST(CONTRIBDIR)
@@ -1650,6 +1685,7 @@ echo "readline : $BUILD_READLINE"
echo "georeplication : $BUILD_SYNCDAEMON"
echo "Linux-AIO : $BUILD_LIBAIO"
echo "Enable Debug : $BUILD_DEBUG"
+echo "Run with Valgrind : $VALGRIND_TOOL"
echo "Sanitizer enabled : $SANITIZER"
echo "Use syslog : $USE_SYSLOG"
echo "XML output : $BUILD_XML_OUTPUT"
@@ -1668,6 +1704,7 @@ echo "IPV6 default : $with_ipv6_default"
echo "Use TIRPC : $with_libtirpc"
echo "With Python : ${PYTHON_VERSION}"
echo "Cloudsync : $BUILD_CLOUDSYNC"
+echo "Metadata dispersal : $BUILD_METADISP"
echo "Link with TCMALLOC : $BUILD_TCMALLOC"
echo
diff --git a/contrib/fuse-lib/mount.c b/contrib/fuse-lib/mount.c
index 351972ce3cf..06ff191f542 100644
--- a/contrib/fuse-lib/mount.c
+++ b/contrib/fuse-lib/mount.c
@@ -390,6 +390,7 @@ fuse_mount_sys (const char *mountpoint, char *fsname,
build_iovec (&iov, &iovlen, "from", "/dev/fuse", -1);
build_iovec (&iov, &iovlen, "volname", source, -1);
build_iovec (&iov, &iovlen, "fd", fdstr, -1);
+ build_iovec (&iov, &iovlen, "allow_other", NULL, -1);
ret = nmount (iov, iovlen, mountflags);
#else
ret = mount (source, mountpoint, fstype, mountflags,
diff --git a/doc/developer-guide/identifying-resource-leaks.md b/doc/developer-guide/identifying-resource-leaks.md
index 851fc4424bc..950cae79b0a 100644
--- a/doc/developer-guide/identifying-resource-leaks.md
+++ b/doc/developer-guide/identifying-resource-leaks.md
@@ -174,3 +174,27 @@ In this case, the resource leak can be addressed by adding a single line to the
Running the same Valgrind command and comparing the output will show that the
memory leak in `xlators/meta/src/meta.c:init` is not reported anymore.
+
+### Running DRD, the Valgrind thread error detector
+
+When configuring GlusterFS with:
+
+```shell
+./configure --enable-valgrind
+```
+
+the default Valgrind tool (Memcheck) is enabled. But it's also possble to select
+one of Memcheck or DRD by using:
+
+```shell
+./configure --enable-valgrind=memcheck
+```
+
+or:
+
+```shell
+./configure --enable-valgrind=drd
+```
+
+respectively. When using DRD, it's recommended to consult
+https://valgrind.org/docs/manual/drd-manual.html before running.
diff --git a/events/src/eventsapiconf.py.in b/events/src/eventsapiconf.py.in
index 76b5954d325..700093bee60 100644
--- a/events/src/eventsapiconf.py.in
+++ b/events/src/eventsapiconf.py.in
@@ -28,6 +28,8 @@ def get_glusterd_workdir():
return glusterd_workdir
SERVER_ADDRESS = "0.0.0.0"
+SERVER_ADDRESSv4 = "0.0.0.0"
+SERVER_ADDRESSv6 = "::1"
DEFAULT_CONFIG_FILE = "@SYSCONF_DIR@/glusterfs/eventsconfig.json"
CUSTOM_CONFIG_FILE_TO_SYNC = "/events/config.json"
CUSTOM_CONFIG_FILE = get_glusterd_workdir() + CUSTOM_CONFIG_FILE_TO_SYNC
diff --git a/events/src/glustereventsd.py b/events/src/glustereventsd.py
index c4c7b65e332..341a3b60947 100644
--- a/events/src/glustereventsd.py
+++ b/events/src/glustereventsd.py
@@ -13,6 +13,7 @@
from __future__ import print_function
import sys
import signal
+import threading
try:
import socketserver
except ImportError:
@@ -23,10 +24,17 @@ from argparse import ArgumentParser, RawDescriptionHelpFormatter
from eventtypes import all_events
import handlers
import utils
-from eventsapiconf import SERVER_ADDRESS, PID_FILE
+from eventsapiconf import SERVER_ADDRESSv4, SERVER_ADDRESSv6, PID_FILE
from eventsapiconf import AUTO_BOOL_ATTRIBUTES, AUTO_INT_ATTRIBUTES
from utils import logger, PidFile, PidFileLockFailed, boolify
+# Subclass so that specifically IPv4 packets are captured
+class UDPServerv4(socketserver.ThreadingUDPServer):
+ address_family = socket.AF_INET
+
+# Subclass so that specifically IPv6 packets are captured
+class UDPServerv6(socketserver.ThreadingUDPServer):
+ address_family = socket.AF_INET6
class GlusterEventsRequestHandler(socketserver.BaseRequestHandler):
@@ -89,6 +97,10 @@ def signal_handler_sigusr2(sig, frame):
utils.restart_webhook_pool()
+def UDP_server_thread(sock):
+ sock.serve_forever()
+
+
def init_event_server():
utils.setup_logger()
utils.load_all()
@@ -99,15 +111,26 @@ def init_event_server():
sys.stderr.write("Unable to get Port details from Config\n")
sys.exit(1)
- # Start the Eventing Server, UDP Server
+ # Creating the Eventing Server, UDP Server for IPv4 packets
+ try:
+ serverv4 = UDPServerv4((SERVER_ADDRESSv4, port),
+ GlusterEventsRequestHandler)
+ except socket.error as e:
+ sys.stderr.write("Failed to start Eventsd for IPv4: {0}\n".format(e))
+ sys.exit(1)
+ # Creating the Eventing Server, UDP Server for IPv6 packets
try:
- server = socketserver.ThreadingUDPServer(
- (SERVER_ADDRESS, port),
- GlusterEventsRequestHandler)
+ serverv6 = UDPServerv6((SERVER_ADDRESSv6, port),
+ GlusterEventsRequestHandler)
except socket.error as e:
- sys.stderr.write("Failed to start Eventsd: {0}\n".format(e))
+ sys.stderr.write("Failed to start Eventsd for IPv6: {0}\n".format(e))
sys.exit(1)
- server.serve_forever()
+ server_thread1 = threading.Thread(target=UDP_server_thread,
+ args=(serverv4,))
+ server_thread2 = threading.Thread(target=UDP_server_thread,
+ args=(serverv6,))
+ server_thread1.start()
+ server_thread2.start()
def get_args():
diff --git a/events/src/utils.py b/events/src/utils.py
index 38b707a1b28..6d4e0791a2b 100644
--- a/events/src/utils.py
+++ b/events/src/utils.py
@@ -13,6 +13,7 @@ import sys
import json
import os
import logging
+import logging.handlers
import fcntl
from errno import EBADF
from threading import Thread
@@ -98,7 +99,7 @@ def setup_logger():
logger.setLevel(logging.INFO)
# create the logging file handler
- fh = logging.FileHandler(LOG_FILE)
+ fh = logging.handlers.WatchedFileHandler(LOG_FILE)
formatter = logging.Formatter("[%(asctime)s] %(levelname)s "
"[%(module)s - %(lineno)s:%(funcName)s] "
diff --git a/extras/distributed-testing/distributed-test-runner.py b/extras/distributed-testing/distributed-test-runner.py
index 7bfb6c9652a..5a07e2feab1 100755
--- a/extras/distributed-testing/distributed-test-runner.py
+++ b/extras/distributed-testing/distributed-test-runner.py
@@ -383,14 +383,17 @@ class Handlers:
return self.shell.call("make install") == 0
@synchronized
- def prove(self, id, test, timeout, valgrind=False, asan_noleaks=True):
+ def prove(self, id, test, timeout, valgrind="no", asan_noleaks=True):
assert id == self.client_id
self.shell.cd(self.gluster_root)
env = "DEBUG=1 "
- if valgrind:
+ if valgrind == "memcheck" or valgrind == "yes":
cmd = "valgrind"
cmd += " --tool=memcheck --leak-check=full --track-origins=yes"
cmd += " --show-leak-kinds=all -v prove -v"
+ elif valgrind == "drd":
+ cmd = "valgrind"
+ cmd += " --tool=drd -v prove -v"
elif asan_noleaks:
cmd = "prove -v"
env += "ASAN_OPTIONS=detect_leaks=0 "
@@ -827,8 +830,9 @@ parser.add_argument("--port", help="server port to listen",
type=int, default=DEFAULT_PORT)
# test role
parser.add_argument("--tester", help="start tester", action="store_true")
-parser.add_argument("--valgrind", help="run tests under valgrind",
- action="store_true")
+parser.add_argument("--valgrind[=memcheck,drd]",
+ help="run tests with valgrind tool 'memcheck' or 'drd'",
+ default="no")
parser.add_argument("--asan", help="test with asan enabled",
action="store_true")
parser.add_argument("--asan-noleaks", help="test with asan but no mem leaks",
diff --git a/extras/ganesha/ocf/ganesha_nfsd b/extras/ganesha/ocf/ganesha_nfsd
index 93fc8be5136..f91e8b6b8f7 100644
--- a/extras/ganesha/ocf/ganesha_nfsd
+++ b/extras/ganesha/ocf/ganesha_nfsd
@@ -36,7 +36,7 @@ else
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
fi
-OCF_RESKEY_ha_vol_mnt_default="/var/run/gluster/shared_storage"
+OCF_RESKEY_ha_vol_mnt_default="/run/gluster/shared_storage"
: ${OCF_RESKEY_ha_vol_mnt=${OCF_RESKEY_ha_vol_mnt_default}}
ganesha_meta_data() {
diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh
index a6814b17935..9790a719e10 100644
--- a/extras/ganesha/scripts/ganesha-ha.sh
+++ b/extras/ganesha/scripts/ganesha-ha.sh
@@ -24,7 +24,7 @@ GANESHA_HA_SH=$(realpath $0)
HA_NUM_SERVERS=0
HA_SERVERS=""
HA_VOL_NAME="gluster_shared_storage"
-HA_VOL_MNT="/var/run/gluster/shared_storage"
+HA_VOL_MNT="/run/gluster/shared_storage"
HA_CONFDIR=$HA_VOL_MNT"/nfs-ganesha"
SERVICE_MAN="DISTRO_NOT_FOUND"
diff --git a/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh b/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh
index 885ed03ad5b..1f2564b44ff 100755
--- a/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh
+++ b/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh
@@ -79,9 +79,9 @@ done
if [ "$option" == "disable" ]; then
# Unmount the volume on all the nodes
- umount /var/run/gluster/shared_storage
- cat /etc/fstab | grep -v "gluster_shared_storage /var/run/gluster/shared_storage/" > /var/run/gluster/fstab.tmp
- mv /var/run/gluster/fstab.tmp /etc/fstab
+ umount /run/gluster/shared_storage
+ cat /etc/fstab | grep -v "gluster_shared_storage /run/gluster/shared_storage/" > /run/gluster/fstab.tmp
+ mv /run/gluster/fstab.tmp /etc/fstab
fi
if [ "$is_originator" == 1 ]; then
@@ -104,8 +104,15 @@ function check_volume_status()
echo $status
}
-mount_cmd="mount -t glusterfs $local_node_hostname:/gluster_shared_storage \
- /var/run/gluster/shared_storage"
+key=`echo $5 | cut -d '=' -f 1`
+val=`echo $5 | cut -d '=' -f 2`
+if [ "$key" == "transport.address-family" ]; then
+ mount_cmd="mount -t glusterfs -o xlator-option=transport.address-family=inet6 \
+ $local_node_hostname:/gluster_shared_storage /run/gluster/shared_storage"
+else
+ mount_cmd="mount -t glusterfs $local_node_hostname:/gluster_shared_storage \
+ /run/gluster/shared_storage"
+fi
if [ "$option" == "enable" ]; then
retry=0;
@@ -120,10 +127,10 @@ if [ "$option" == "enable" ]; then
status=$(check_volume_status)
done
# Mount the volume on all the nodes
- umount /var/run/gluster/shared_storage
- mkdir -p /var/run/gluster/shared_storage
+ umount /run/gluster/shared_storage
+ mkdir -p /run/gluster/shared_storage
$mount_cmd
- cp /etc/fstab /var/run/gluster/fstab.tmp
- echo "$local_node_hostname:/gluster_shared_storage /var/run/gluster/shared_storage/ glusterfs defaults 0 0" >> /var/run/gluster/fstab.tmp
- mv /var/run/gluster/fstab.tmp /etc/fstab
+ cp /etc/fstab /run/gluster/fstab.tmp
+ echo "$local_node_hostname:/gluster_shared_storage /run/gluster/shared_storage/ glusterfs defaults 0 0" >> /run/gluster/fstab.tmp
+ mv /run/gluster/fstab.tmp /etc/fstab
fi
diff --git a/extras/hook-scripts/start/post/S31ganesha-start.sh b/extras/hook-scripts/start/post/S31ganesha-start.sh
index 90ba6bc73a5..7ad6f23ad06 100755
--- a/extras/hook-scripts/start/post/S31ganesha-start.sh
+++ b/extras/hook-scripts/start/post/S31ganesha-start.sh
@@ -4,7 +4,7 @@ OPTSPEC="volname:,gd-workdir:"
VOL=
declare -i EXPORT_ID
ganesha_key="ganesha.enable"
-GANESHA_DIR="/var/run/gluster/shared_storage/nfs-ganesha"
+GANESHA_DIR="/run/gluster/shared_storage/nfs-ganesha"
CONF1="$GANESHA_DIR/ganesha.conf"
GLUSTERD_WORKDIR=
diff --git a/extras/quota/quota_fsck.py b/extras/quota/quota_fsck.py
index 174f2a2a44c..e62f7fc52a3 100755
--- a/extras/quota/quota_fsck.py
+++ b/extras/quota/quota_fsck.py
@@ -156,12 +156,10 @@ def get_quota_xattr_brick(dpath):
xattr_dict = {}
xattr_dict['parents'] = {}
- for xattr in pairs:
+ for xattr in pairs[1:]:
+ xattr = xattr.decode("utf-8")
xattr_key = xattr.split("=")[0]
- if re.search("# file:", xattr_key):
- # skip the file comment
- continue
- elif xattr_key is "":
+ if xattr_key == "":
# skip any empty lines
continue
elif not re.search("quota", xattr_key):
diff --git a/extras/snap_scheduler/gcron.py b/extras/snap_scheduler/gcron.py
index cc16310e31d..0e4df77d481 100755
--- a/extras/snap_scheduler/gcron.py
+++ b/extras/snap_scheduler/gcron.py
@@ -19,10 +19,10 @@ import logging.handlers
import fcntl
-GCRON_TASKS = "/var/run/gluster/shared_storage/snaps/glusterfs_snap_cron_tasks"
+GCRON_TASKS = "/run/gluster/shared_storage/snaps/glusterfs_snap_cron_tasks"
GCRON_CROND_TASK = "/etc/cron.d/glusterfs_snap_cron_tasks"
GCRON_RELOAD_FLAG = "/var/run/gluster/crond_task_reload_flag"
-LOCK_FILE_DIR = "/var/run/gluster/shared_storage/snaps/lock_files/"
+LOCK_FILE_DIR = "/run/gluster/shared_storage/snaps/lock_files/"
log = logging.getLogger("gcron-logger")
start_time = 0.0
diff --git a/extras/snap_scheduler/snap_scheduler.py b/extras/snap_scheduler/snap_scheduler.py
index 5a29d41c8f8..e8fcc449a9b 100755
--- a/extras/snap_scheduler/snap_scheduler.py
+++ b/extras/snap_scheduler/snap_scheduler.py
@@ -67,7 +67,7 @@ except ImportError:
SCRIPT_NAME = "snap_scheduler"
scheduler_enabled = False
log = logging.getLogger(SCRIPT_NAME)
-SHARED_STORAGE_DIR="/var/run/gluster/shared_storage"
+SHARED_STORAGE_DIR="/run/gluster/shared_storage"
GCRON_DISABLED = SHARED_STORAGE_DIR+"/snaps/gcron_disabled"
GCRON_ENABLED = SHARED_STORAGE_DIR+"/snaps/gcron_enabled"
GCRON_TASKS = SHARED_STORAGE_DIR+"/snaps/glusterfs_snap_cron_tasks"
diff --git a/geo-replication/gsyncd.conf.in b/geo-replication/gsyncd.conf.in
index 11e57fdf54f..9688c79fab7 100644
--- a/geo-replication/gsyncd.conf.in
+++ b/geo-replication/gsyncd.conf.in
@@ -123,7 +123,7 @@ type=bool
help=Use this to set Active Passive mode to meta-volume.
[meta-volume-mnt]
-value=/var/run/gluster/shared_storage
+value=/run/gluster/shared_storage
help=Meta Volume or Shared Volume mount path
[allow-network]
diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py
index b3f3fd8a936..a3df103e76c 100644
--- a/geo-replication/syncdaemon/syncdutils.py
+++ b/geo-replication/syncdaemon/syncdutils.py
@@ -725,7 +725,7 @@ def get_slv_dir_path(slv_host, slv_volume, gfid):
if not isinstance(realpath, int):
basename = os.path.basename(realpath).rstrip('\x00')
dirpath = os.path.dirname(realpath)
- if dirpath is "/":
+ if dirpath == "/":
pargfid = ROOT_GFID
else:
dirpath = dirpath.strip("/")
diff --git a/glusterfs.spec.in b/glusterfs.spec.in
index cf4ab55fb2b..b6d63146e14 100644
--- a/glusterfs.spec.in
+++ b/glusterfs.spec.in
@@ -706,6 +706,9 @@ Requires: %{name}%{?_isa} = %{version}-%{release}
Requires: %{name}-cli%{?_isa} = %{version}-%{release}
Requires: libglusterfs0%{?_isa} = %{version}-%{release}
Requires: libgfchangelog0%{?_isa} = %{version}-%{release}
+%if ( 0%{?fedora} && 0%{?fedora} >= 30 || ( 0%{?rhel} && 0%{?rhel} >= 8 ) )
+Requires: glusterfs-selinux >= 0.1.0-2
+%endif
# some daemons (like quota) use a fuse-mount, glusterfsd is part of -fuse
Requires: %{name}-fuse%{?_isa} = %{version}-%{release}
# self-heal daemon, rebalance, nfs-server etc. are actually clients
diff --git a/glusterfsd/src/Makefile.am b/glusterfsd/src/Makefile.am
index 804461ca9df..a0a778158d8 100644
--- a/glusterfsd/src/Makefile.am
+++ b/glusterfsd/src/Makefile.am
@@ -14,6 +14,7 @@ gf_attach_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
$(top_builddir)/api/src/libgfapi.la \
$(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
$(top_builddir)/rpc/xdr/src/libgfxdr.la
+gf_attach_LDFLAGS = $(GF_LDFLAGS)
noinst_HEADERS = glusterfsd.h glusterfsd-mem-types.h glusterfsd-messages.h
diff --git a/glusterfsd/src/gf_attach.c b/glusterfsd/src/gf_attach.c
index c54b4f9b516..c553b0b1f61 100644
--- a/glusterfsd/src/gf_attach.c
+++ b/glusterfsd/src/gf_attach.c
@@ -99,7 +99,7 @@ send_brick_req(xlator_t *this, struct rpc_clnt *rpc, char *path, int op)
iov.iov_len = ret;
/* Wait for connection */
- clock_gettime(CLOCK_REALTIME, &ts);
+ timespec_now_realtime(&ts);
ts.tv_sec += CONNECT_TIMEOUT;
pthread_mutex_lock(&rpc->conn.lock);
{
@@ -124,7 +124,7 @@ send_brick_req(xlator_t *this, struct rpc_clnt *rpc, char *path, int op)
0, iobref, frame, NULL, 0, NULL, 0, NULL);
if (!ret) {
/* OK, wait for callback */
- clock_gettime(CLOCK_REALTIME, &ts);
+ timespec_now_realtime(&ts);
ts.tv_sec += REPLY_TIMEOUT;
pthread_mutex_lock(&mutex);
{
diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c
index 793f27ab67f..eaf6796e4c3 100644
--- a/glusterfsd/src/glusterfsd-mgmt.c
+++ b/glusterfsd/src/glusterfsd-mgmt.c
@@ -629,7 +629,7 @@ glusterfs_volume_top_perf(const char *brick_path, dict_t *dict,
goto out;
}
- time = (end.tv_sec - begin.tv_sec) * 1e6 + (end.tv_usec - begin.tv_usec);
+ time = gf_tvdiff(&begin, &end);
throughput = total_blks / time;
gf_log("glusterd", GF_LOG_INFO,
"Throughput %.2f Mbps time %.2f secs "
@@ -685,7 +685,7 @@ glusterfs_volume_top_perf(const char *brick_path, dict_t *dict,
goto out;
}
- time = (end.tv_sec - begin.tv_sec) * 1e6 + (end.tv_usec - begin.tv_usec);
+ time = gf_tvdiff(&begin, &end);
throughput = total_blks / time;
gf_log("glusterd", GF_LOG_INFO,
"Throughput %.2f Mbps time %.2f secs "
diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c
index f36ce2b0ffc..dae41f33fef 100644
--- a/glusterfsd/src/glusterfsd.c
+++ b/glusterfsd/src/glusterfsd.c
@@ -1687,6 +1687,10 @@ glusterfs_ctx_defaults_init(glusterfs_ctx_t *ctx)
INIT_LIST_HEAD(&cmd_args->xlator_options);
INIT_LIST_HEAD(&cmd_args->volfile_servers);
+ ctx->pxl_count = 0;
+ pthread_mutex_init(&ctx->fd_lock, NULL);
+ pthread_cond_init(&ctx->fd_cond, NULL);
+ INIT_LIST_HEAD(&ctx->janitor_fds);
lim.rlim_cur = RLIM_INFINITY;
lim.rlim_max = RLIM_INFINITY;
@@ -2079,8 +2083,8 @@ parse_cmdline(int argc, char *argv[], glusterfs_ctx_t *ctx)
if (((ret == 0) &&
(S_ISREG(stbuf.st_mode) || S_ISLNK(stbuf.st_mode))) ||
(ret == -1)) {
- /* Have separate logfile per run */
- gf_time_fmt(timestr, sizeof timestr, time(NULL), gf_timefmt_FT);
+ /* Have separate logfile per run. */
+ gf_time_fmt(timestr, sizeof timestr, gf_time(), gf_timefmt_FT);
sprintf(tmp_logfile, "%s.%s.%d", cmd_args->log_file, timestr,
getpid());
diff --git a/heal/src/glfs-heal.c b/heal/src/glfs-heal.c
index ef53dfc0ccb..bf4b47f8760 100644
--- a/heal/src/glfs-heal.c
+++ b/heal/src/glfs-heal.c
@@ -1055,6 +1055,10 @@ glfsh_set_heal_options(glfs_t *fs, gf_xl_afr_op_t heal_op)
if (ret)
goto out;
+ ret = glfs_set_xlator_option(fs, "*-replicate-*", "halo-enabled", "off");
+ if (ret)
+ goto out;
+
if ((heal_op != GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE) &&
(heal_op != GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK) &&
(heal_op != GF_SHD_OP_SBRAIN_HEAL_FROM_LATEST_MTIME))
diff --git a/libglusterfs/src/Makefile.am b/libglusterfs/src/Makefile.am
index c4b0e9bab08..385e8ef4600 100644
--- a/libglusterfs/src/Makefile.am
+++ b/libglusterfs/src/Makefile.am
@@ -12,7 +12,8 @@ libglusterfs_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 \
-DSBIN_DIR=\"$(sbindir)\" -I$(CONTRIBDIR)/timer-wheel \
-I$(CONTRIBDIR)/xxhash
-libglusterfs_la_LIBADD = $(ZLIB_LIBS) $(MATH_LIB) $(UUID_LIBS) $(LIB_DL)
+libglusterfs_la_LIBADD = $(ZLIB_LIBS) $(MATH_LIB) $(UUID_LIBS) $(LIB_DL) \
+ $(URCU_LIBS) $(URCU_CDS_LIBS)
libglusterfs_la_LDFLAGS = -version-info $(LIBGLUSTERFS_LT_VERSION) $(GF_LDFLAGS) \
-export-symbols $(top_srcdir)/libglusterfs/src/libglusterfs.sym
diff --git a/libglusterfs/src/common-utils.c b/libglusterfs/src/common-utils.c
index c37c45449f2..682cbf28055 100644
--- a/libglusterfs/src/common-utils.c
+++ b/libglusterfs/src/common-utils.c
@@ -577,8 +577,14 @@ struct dnscache *
gf_dnscache_init(time_t ttl)
{
struct dnscache *cache = GF_MALLOC(sizeof(*cache), gf_common_mt_dnscache);
- if (cache) {
- cache->cache_dict = NULL;
+ if (!cache)
+ return NULL;
+
+ cache->cache_dict = dict_new();
+ if (!cache->cache_dict) {
+ GF_FREE(cache);
+ cache = NULL;
+ } else {
cache->ttl = ttl;
}
@@ -586,6 +592,20 @@ gf_dnscache_init(time_t ttl)
}
/**
+ * gf_dnscache_deinit -- cleanup resources used by struct dnscache
+ */
+void
+gf_dnscache_deinit(struct dnscache *cache)
+{
+ if (!cache) {
+ gf_msg_plain(GF_LOG_WARNING, "dnscache is NULL");
+ return;
+ }
+ dict_unref(cache->cache_dict);
+ GF_FREE(cache);
+}
+
+/**
* gf_dnscache_entry_init -- Initialize a dnscache entry
*
* @return: SUCCESS: Pointer to an allocated dnscache entry struct
@@ -633,12 +653,6 @@ gf_rev_dns_lookup_cached(const char *ip, struct dnscache *dnscache)
if (!dnscache)
goto out;
- if (!dnscache->cache_dict) {
- dnscache->cache_dict = dict_new();
- if (!dnscache->cache_dict) {
- goto out;
- }
- }
cache = dnscache->cache_dict;
/* Quick cache lookup to see if we already hold it */
@@ -646,7 +660,7 @@ gf_rev_dns_lookup_cached(const char *ip, struct dnscache *dnscache)
if (entrydata) {
dnsentry = (struct dnscache_entry *)entrydata->data;
/* First check the TTL & timestamp */
- if (time(NULL) - dnsentry->timestamp > dnscache->ttl) {
+ if (gf_time() - dnsentry->timestamp > dnscache->ttl) {
gf_dnscache_entry_deinit(dnsentry);
entrydata->data = NULL; /* Mark this as 'null' so
* dict_del () doesn't try free
@@ -683,7 +697,7 @@ out:
if (entry) {
entry->fqdn = fqdn;
entry->ip = gf_strdup(ip);
- entry->timestamp = time(NULL);
+ entry->timestamp = gf_time();
entrydata = bin_to_data(entry, sizeof(*entry));
dict_set(cache, (char *)ip, entrydata);
}
@@ -935,7 +949,7 @@ gf_print_trace(int32_t signum, glusterfs_ctx_t *ctx)
{
/* Dump the timestamp of the crash too, so the previous logs
can be related */
- gf_time_fmt(timestr, sizeof timestr, time(NULL), gf_timefmt_FT);
+ gf_time_fmt(timestr, sizeof timestr, gf_time(), gf_timefmt_FT);
gf_msg_plain_nomem(GF_LOG_ALERT, "time of crash: ");
gf_msg_plain_nomem(GF_LOG_ALERT, timestr);
}
@@ -3119,7 +3133,7 @@ get_mem_size()
memsize = page_size * num_pages;
#endif
-#if defined GF_DARWIN_HOST_OS
+#if defined GF_DARWIN_HOST_OS || defined __FreeBSD__
size_t len = sizeof(memsize);
int name[] = {CTL_HW, HW_PHYSMEM};
@@ -4133,6 +4147,14 @@ gf_skip_header_section(int fd, int header_len)
gf_boolean_t
gf_is_pid_running(int pid)
{
+#ifdef __FreeBSD__
+ int ret = -1;
+
+ ret = sys_kill(pid, 0);
+ if (ret < 0) {
+ return _gf_false;
+ }
+#else
char fname[32] = {
0,
};
@@ -4146,6 +4168,7 @@ gf_is_pid_running(int pid)
}
sys_close(fd);
+#endif
return _gf_true;
}
diff --git a/libglusterfs/src/ctx.c b/libglusterfs/src/ctx.c
index 4a001c29209..3d890b04ec9 100644
--- a/libglusterfs/src/ctx.c
+++ b/libglusterfs/src/ctx.c
@@ -37,8 +37,12 @@ glusterfs_ctx_new()
ctx->log.loglevel = DEFAULT_LOG_LEVEL;
-#ifdef RUN_WITH_VALGRIND
- ctx->cmd_args.valgrind = _gf_true;
+#if defined(RUN_WITH_MEMCHECK)
+ ctx->cmd_args.vgtool = _gf_memcheck;
+#elif defined(RUN_WITH_DRD)
+ ctx->cmd_args.vgtool = _gf_drd;
+#else
+ ctx->cmd_args.vgtool = _gf_none;
#endif
/* lock is never destroyed! */
diff --git a/libglusterfs/src/dict.c b/libglusterfs/src/dict.c
index 917737a943e..1d9be9217a6 100644
--- a/libglusterfs/src/dict.c
+++ b/libglusterfs/src/dict.c
@@ -1495,32 +1495,13 @@ fail:
* -val error, val = errno
*/
-int
-dict_get_with_ref(dict_t *this, char *key, data_t **data)
-{
- if (!this || !key || !data) {
- gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
- "dict OR key (%s) is NULL", key);
- return -EINVAL;
- }
-
- return dict_get_with_refn(this, key, strlen(key), data);
-}
-
-int
+static int
dict_get_with_refn(dict_t *this, char *key, const int keylen, data_t **data)
{
data_pair_t *pair = NULL;
int ret = -ENOENT;
uint32_t hash;
- if (!this || !key || !data) {
- gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
- "dict OR key (%s) is NULL", key);
- ret = -EINVAL;
- goto err;
- }
-
hash = (uint32_t)XXH64(key, keylen, 0);
LOCK(&this->lock);
@@ -1533,10 +1514,22 @@ dict_get_with_refn(dict_t *this, char *key, const int keylen, data_t **data)
}
}
UNLOCK(&this->lock);
-err:
+
return ret;
}
+int
+dict_get_with_ref(dict_t *this, char *key, data_t **data)
+{
+ if (!this || !key || !data) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "dict OR key (%s) is NULL", key);
+ return -EINVAL;
+ }
+
+ return dict_get_with_refn(this, key, strlen(key), data);
+}
+
static int
data_to_ptr_common(data_t *data, void **val)
{
@@ -1710,7 +1703,7 @@ dict_get_int8(dict_t *this, char *key, int8_t *val)
data_t *data = NULL;
int ret = 0;
- if (!this || !key || !val) {
+ if (!val) {
ret = -EINVAL;
goto err;
}
@@ -1756,7 +1749,7 @@ dict_get_int16(dict_t *this, char *key, int16_t *val)
data_t *data = NULL;
int ret = 0;
- if (!this || !key || !val) {
+ if (!val) {
ret = -EINVAL;
goto err;
}
@@ -1828,7 +1821,7 @@ dict_get_int32(dict_t *this, char *key, int32_t *val)
data_t *data = NULL;
int ret = 0;
- if (!this || !key || !val) {
+ if (!val) {
ret = -EINVAL;
goto err;
}
@@ -1893,7 +1886,7 @@ dict_get_int64(dict_t *this, char *key, int64_t *val)
data_t *data = NULL;
int ret = 0;
- if (!this || !key || !val) {
+ if (!val) {
ret = -EINVAL;
goto err;
}
@@ -1938,7 +1931,7 @@ dict_get_uint16(dict_t *this, char *key, uint16_t *val)
data_t *data = NULL;
int ret = 0;
- if (!this || !key || !val) {
+ if (!val) {
ret = -EINVAL;
goto err;
}
@@ -1983,7 +1976,7 @@ dict_get_uint32(dict_t *this, char *key, uint32_t *val)
data_t *data = NULL;
int ret = 0;
- if (!this || !key || !val) {
+ if (!val) {
ret = -EINVAL;
goto err;
}
@@ -2028,7 +2021,7 @@ dict_get_uint64(dict_t *this, char *key, uint64_t *val)
data_t *data = NULL;
int ret = 0;
- if (!this || !key || !val) {
+ if (!val) {
ret = -EINVAL;
goto err;
}
@@ -2251,7 +2244,7 @@ dict_get_double(dict_t *this, char *key, double *val)
data_t *data = NULL;
int ret = 0;
- if (!this || !key || !val) {
+ if (!val) {
ret = -EINVAL;
goto err;
}
@@ -2334,7 +2327,7 @@ dict_get_ptr(dict_t *this, char *key, void **ptr)
data_t *data = NULL;
int ret = 0;
- if (!this || !key || !ptr) {
+ if (!ptr) {
ret = -EINVAL;
goto err;
}
@@ -2364,7 +2357,7 @@ dict_get_ptr_and_len(dict_t *this, char *key, void **ptr, int *len)
data_t *data = NULL;
int ret = 0;
- if (!this || !key || !ptr) {
+ if (!ptr) {
ret = -EINVAL;
goto err;
}
@@ -2422,7 +2415,7 @@ dict_get_str(dict_t *this, char *key, char **str)
data_t *data = NULL;
int ret = -EINVAL;
- if (!this || !key || !str) {
+ if (!str) {
goto err;
}
ret = dict_get_with_ref(this, key, &data);
@@ -2596,7 +2589,7 @@ dict_get_bin(dict_t *this, char *key, void **bin)
data_t *data = NULL;
int ret = -EINVAL;
- if (!this || !key || !bin) {
+ if (!bin) {
goto err;
}
@@ -2699,7 +2692,7 @@ dict_get_gfuuid(dict_t *this, char *key, uuid_t *gfid)
data_t *data = NULL;
int ret = -EINVAL;
- if (!this || !key || !gfid) {
+ if (!gfid) {
goto err;
}
ret = dict_get_with_ref(this, key, &data);
@@ -2732,7 +2725,7 @@ dict_get_mdata(dict_t *this, char *key, struct mdata_iatt *mdata)
data_t *data = NULL;
int ret = -EINVAL;
- if (!this || !key || !mdata) {
+ if (!mdata) {
goto err;
}
ret = dict_get_with_ref(this, key, &data);
@@ -2770,7 +2763,7 @@ dict_get_iatt(dict_t *this, char *key, struct iatt *iatt)
data_t *data = NULL;
int ret = -EINVAL;
- if (!this || !key || !iatt) {
+ if (!iatt) {
goto err;
}
ret = dict_get_with_ref(this, key, &data);
diff --git a/libglusterfs/src/event.c b/libglusterfs/src/event.c
index 235128b6044..402c253ca25 100644
--- a/libglusterfs/src/event.c
+++ b/libglusterfs/src/event.c
@@ -17,6 +17,7 @@
#include <string.h>
#include "glusterfs/gf-event.h"
+#include "glusterfs/timespec.h"
#include "glusterfs/common-utils.h"
#include "glusterfs/libglusterfs-messages.h"
#include "glusterfs/syscall.h"
@@ -266,7 +267,7 @@ gf_event_dispatch_destroy(struct event_pool *event_pool)
if (sys_write(fd[1], "dummy", 6) == -1) {
break;
}
- clock_gettime(CLOCK_REALTIME, &sleep_till);
+ timespec_now_realtime(&sleep_till);
sleep_till.tv_sec += 1;
ret = pthread_cond_timedwait(&event_pool->cond, &event_pool->mutex,
&sleep_till);
diff --git a/libglusterfs/src/events.c b/libglusterfs/src/events.c
index 6d1e3836477..33157549897 100644
--- a/libglusterfs/src/events.c
+++ b/libglusterfs/src/events.c
@@ -40,6 +40,7 @@ _gf_event(eventtypes_t event, const char *fmt, ...)
char *host = NULL;
struct addrinfo hints;
struct addrinfo *result = NULL;
+ struct addrinfo *iter_result_ptr = NULL;
xlator_t *this = THIS;
char *volfile_server_transport = NULL;
@@ -51,13 +52,6 @@ _gf_event(eventtypes_t event, const char *fmt, ...)
goto out;
}
- /* Initialize UDP socket */
- sock = socket(AF_INET, SOCK_DGRAM, 0);
- if (sock < 0) {
- ret = EVENT_ERROR_SOCKET;
- goto out;
- }
-
if (ctx) {
volfile_server_transport = ctx->cmd_args.volfile_server_transport;
}
@@ -66,7 +60,6 @@ _gf_event(eventtypes_t event, const char *fmt, ...)
}
/* host = NULL returns localhost */
- host = NULL;
if (ctx && ctx->cmd_args.volfile_server &&
(strcmp(volfile_server_transport, "unix"))) {
/* If it is client code then volfile_server is set
@@ -84,6 +77,24 @@ _gf_event(eventtypes_t event, const char *fmt, ...)
goto out;
}
+ // iterate over the result and break when socket creation is success.
+ for (iter_result_ptr = result; iter_result_ptr != NULL;
+ iter_result_ptr = iter_result_ptr->ai_next) {
+ sock = socket(iter_result_ptr->ai_family, iter_result_ptr->ai_socktype,
+ iter_result_ptr->ai_protocol);
+ if (sock != -1) {
+ break;
+ }
+ }
+ /*
+ * If none of the addrinfo structures lead to a successful socket
+ * creation, socket creation has failed.
+ */
+ if (sock < 0) {
+ ret = EVENT_ERROR_SOCKET;
+ goto out;
+ }
+
va_start(arguments, fmt);
ret = gf_vasprintf(&msg, fmt, arguments);
va_end(arguments);
@@ -93,7 +104,7 @@ _gf_event(eventtypes_t event, const char *fmt, ...)
goto out;
}
- ret = gf_asprintf(&eventstr, "%u %d %s", (unsigned)time(NULL), event, msg);
+ ret = gf_asprintf(&eventstr, "%u %d %s", (unsigned)gf_time(), event, msg);
GF_FREE(msg);
if (ret <= 0) {
ret = EVENT_ERROR_MSG_FORMAT;
diff --git a/libglusterfs/src/gidcache.c b/libglusterfs/src/gidcache.c
index 40fcffbb35e..64a93802f76 100644
--- a/libglusterfs/src/gidcache.c
+++ b/libglusterfs/src/gidcache.c
@@ -10,6 +10,7 @@
#include "glusterfs/gidcache.h"
#include "glusterfs/mem-pool.h"
+#include "glusterfs/common-utils.h"
/*
* We treat this as a very simple set-associative LRU cache, with entries aged
@@ -64,7 +65,7 @@ gid_cache_lookup(gid_cache_t *cache, uint64_t id, uint64_t uid, uint64_t gid)
time_t now;
const gid_list_t *agl;
- now = time(NULL);
+ now = gf_time();
LOCK(&cache->gc_lock);
bucket = id % cache->gc_nbuckets;
agl = BUCKET_START(cache->gc_cache, bucket);
@@ -132,7 +133,7 @@ gid_cache_add(gid_cache_t *cache, gid_list_t *gl)
if (!cache->gc_max_age)
return 0;
- now = time(NULL);
+ now = gf_time();
LOCK(&cache->gc_lock);
/*
diff --git a/libglusterfs/src/glusterfs/common-utils.h b/libglusterfs/src/glusterfs/common-utils.h
index f05bda1f537..f297fdab5c9 100644
--- a/libglusterfs/src/glusterfs/common-utils.h
+++ b/libglusterfs/src/glusterfs/common-utils.h
@@ -255,6 +255,8 @@ list_node_del(struct list_node *node);
struct dnscache *
gf_dnscache_init(time_t ttl);
+void
+gf_dnscache_deinit(struct dnscache *cache);
struct dnscache_entry *
gf_dnscache_entry_init(void);
void
@@ -1213,4 +1215,42 @@ gf_syncfs(int fd);
int
gf_nanosleep(uint64_t nsec);
+static inline time_t
+gf_time(void)
+{
+ return time(NULL);
+}
+
+/* Return delta value in microseconds. */
+
+static inline double
+gf_tvdiff(struct timeval *start, struct timeval *end)
+{
+ struct timeval t;
+
+ if (start->tv_usec > end->tv_usec)
+ t.tv_sec = end->tv_sec - 1, t.tv_usec = end->tv_usec + 1000000;
+ else
+ t.tv_sec = end->tv_sec, t.tv_usec = end->tv_usec;
+
+ return (double)(t.tv_sec - start->tv_sec) * 1e6 +
+ (double)(t.tv_usec - start->tv_usec);
+}
+
+/* Return delta value in nanoseconds. */
+
+static inline double
+gf_tsdiff(struct timespec *start, struct timespec *end)
+{
+ struct timespec t;
+
+ if (start->tv_nsec > end->tv_nsec)
+ t.tv_sec = end->tv_sec - 1, t.tv_nsec = end->tv_nsec + 1000000000;
+ else
+ t.tv_sec = end->tv_sec, t.tv_nsec = end->tv_nsec;
+
+ return (double)(t.tv_sec - start->tv_sec) * 1e9 +
+ (double)(t.tv_nsec - start->tv_nsec);
+}
+
#endif /* _COMMON_UTILS_H */
diff --git a/libglusterfs/src/glusterfs/dict.h b/libglusterfs/src/glusterfs/dict.h
index d4bf9b4f1a7..d0467c6dfb6 100644
--- a/libglusterfs/src/glusterfs/dict.h
+++ b/libglusterfs/src/glusterfs/dict.h
@@ -25,9 +25,6 @@ typedef struct _data_pair data_pair_t;
#define dict_add_sizen(this, key, value) dict_addn(this, key, SLEN(key), value)
-#define dict_get_with_ref_sizen(this, key, value) \
- dict_get_with_refn(this, key, SLEN(key), value)
-
#define dict_get_sizen(this, key) dict_getn(this, key, SLEN(key))
#define dict_del_sizen(this, key) dict_deln(this, key, SLEN(key))
@@ -138,6 +135,7 @@ int32_t
dict_set(dict_t *this, char *key, data_t *value);
int32_t
dict_setn(dict_t *this, char *key, const int keylen, data_t *value);
+
/* function to set a new key/value pair (without checking for duplicate) */
int32_t
dict_add(dict_t *this, char *key, data_t *value);
@@ -145,8 +143,6 @@ int32_t
dict_addn(dict_t *this, char *key, const int keylen, data_t *value);
int
dict_get_with_ref(dict_t *this, char *key, data_t **data);
-int
-dict_get_with_refn(dict_t *this, char *key, const int keylen, data_t **data);
data_t *
dict_get(dict_t *this, char *key);
data_t *
diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h
index e4dfda8d01a..e6425618b7f 100644
--- a/libglusterfs/src/glusterfs/glusterfs.h
+++ b/libglusterfs/src/glusterfs/glusterfs.h
@@ -466,6 +466,8 @@ typedef struct _server_cmdline server_cmdline_t;
#define GF_OPTION_DISABLE _gf_false
#define GF_OPTION_DEFERRED 2
+typedef enum { _gf_none, _gf_memcheck, _gf_drd } gf_valgrind_tool;
+
struct _cmd_args {
/* basic options */
char *volfile_server;
@@ -558,7 +560,8 @@ struct _cmd_args {
/* Run this process with valgrind? Might want to prevent calling
* functions that prevent valgrind from working correctly, like
* dlclose(). */
- int valgrind;
+ gf_valgrind_tool vgtool;
+
int localtime_logging;
/* For the subdir mount */
@@ -730,6 +733,13 @@ struct _glusterfs_ctx {
} stats;
struct list_head volfile_list;
+ /* Add members to manage janitor threads for cleanup fd */
+ struct list_head janitor_fds;
+ pthread_cond_t fd_cond;
+ pthread_mutex_t fd_lock;
+ pthread_t janitor;
+ /* The variable is use to save total posix xlator count */
+ uint32_t pxl_count;
char volume_id[GF_UUID_BUF_SIZE]; /* Used only in protocol/client */
};
diff --git a/libglusterfs/src/glusterfs/latency.h b/libglusterfs/src/glusterfs/latency.h
index ed47b1f0cbc..4d601bbcbd6 100644
--- a/libglusterfs/src/glusterfs/latency.h
+++ b/libglusterfs/src/glusterfs/latency.h
@@ -11,13 +11,23 @@
#ifndef __LATENCY_H__
#define __LATENCY_H__
-#include "glusterfs/glusterfs.h"
+#include <inttypes.h>
+#include <time.h>
-typedef struct fop_latency {
- double min; /* min time for the call (microseconds) */
- double max; /* max time for the call (microseconds) */
- double total; /* total time (microseconds) */
+typedef struct _gf_latency {
+ uint64_t min; /* min time for the call (nanoseconds) */
+ uint64_t max; /* max time for the call (nanoseconds) */
+ uint64_t total; /* total time (nanoseconds) */
uint64_t count;
-} fop_latency_t;
+} gf_latency_t;
+gf_latency_t *
+gf_latency_new(size_t n);
+
+void
+gf_latency_reset(gf_latency_t *lat);
+
+void
+gf_latency_update(gf_latency_t *lat, struct timespec *begin,
+ struct timespec *end);
#endif /* __LATENCY_H__ */
diff --git a/libglusterfs/src/glusterfs/mem-pool.h b/libglusterfs/src/glusterfs/mem-pool.h
index 0fd1214e27d..e5b3276d047 100644
--- a/libglusterfs/src/glusterfs/mem-pool.h
+++ b/libglusterfs/src/glusterfs/mem-pool.h
@@ -202,6 +202,24 @@ out:
return dup_mem;
}
+#ifdef GF_DISABLE_MEMPOOL
+
+/* No-op memory pool enough to fit current API without massive redesign. */
+
+struct mem_pool {
+ unsigned long sizeof_type;
+};
+
+#define mem_pools_init() \
+ do { \
+ } while (0)
+#define mem_pools_fini() \
+ do { \
+ } while (0)
+#define mem_pool_thread_destructor(pool_list) (void)pool_list
+
+#else /* !GF_DISABLE_MEMPOOL */
+
/* kind of 'header' for the actual mem_pool_shared structure, this might make
* it possible to dump some more details in a statedump */
struct mem_pool {
@@ -210,10 +228,10 @@ struct mem_pool {
unsigned long count; /* requested pool size (unused) */
char *name;
char *xl_name;
- gf_atomic_t active; /* current allocations */
+ gf_atomic_t active; /* current allocations */
#ifdef DEBUG
- gf_atomic_t hit; /* number of allocations served from pt_pool */
- gf_atomic_t miss; /* number of std allocs due to miss */
+ gf_atomic_t hit; /* number of allocations served from pt_pool */
+ gf_atomic_t miss; /* number of std allocs due to miss */
#endif
struct list_head owner; /* glusterfs_ctx_t->mempool_list */
glusterfs_ctx_t *ctx; /* take ctx->lock when updating owner */
@@ -287,6 +305,10 @@ void
mem_pools_init(void); /* start the pool_sweeper thread */
void
mem_pools_fini(void); /* cleanup memory pools */
+void
+mem_pool_thread_destructor(per_thread_pool_list_t *pool_list);
+
+#endif /* GF_DISABLE_MEMPOOL */
struct mem_pool *
mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type,
@@ -309,9 +331,6 @@ void
mem_pool_destroy(struct mem_pool *pool);
void
-mem_pool_thread_destructor(per_thread_pool_list_t *pool_list);
-
-void
gf_mem_acct_enable_set(void *ctx);
#endif /* _MEM_POOL_H */
diff --git a/libglusterfs/src/glusterfs/mem-types.h b/libglusterfs/src/glusterfs/mem-types.h
index 36cf7820ad5..d45d5b68c91 100644
--- a/libglusterfs/src/glusterfs/mem-types.h
+++ b/libglusterfs/src/glusterfs/mem-types.h
@@ -133,6 +133,7 @@ enum gf_common_mem_types_ {
gf_common_volfile_t,
gf_common_mt_mgmt_v3_lock_timer_t, /* used only in one location */
gf_common_mt_server_cmdline_t, /* used only in one location */
+ gf_common_mt_latency_t,
gf_common_mt_end
};
#endif
diff --git a/libglusterfs/src/glusterfs/stack.h b/libglusterfs/src/glusterfs/stack.h
index 17585508a22..536a330d38b 100644
--- a/libglusterfs/src/glusterfs/stack.h
+++ b/libglusterfs/src/glusterfs/stack.h
@@ -45,6 +45,9 @@ typedef int32_t (*ret_fn_t)(call_frame_t *frame, call_frame_t *prev_frame,
xlator_t *this, int32_t op_ret, int32_t op_errno,
...);
+void
+gf_frame_latency_update(call_frame_t *frame);
+
struct call_pool {
union {
struct list_head all_frames;
@@ -149,8 +152,6 @@ struct _call_stack {
} while (0);
struct xlator_fops;
-void
-gf_update_latency(call_frame_t *frame);
static inline void
FRAME_DESTROY(call_frame_t *frame)
@@ -158,7 +159,7 @@ FRAME_DESTROY(call_frame_t *frame)
void *local = NULL;
if (frame->root->ctx->measure_latency)
- gf_update_latency(frame);
+ gf_frame_latency_update(frame);
list_del_init(&frame->frames);
if (frame->local) {
@@ -429,6 +430,7 @@ call_stack_alloc_groups(call_stack_t *stack, int ngrps)
if (ngrps <= SMALL_GROUP_COUNT) {
stack->groups = stack->groups_small;
} else {
+ GF_FREE(stack->groups_large);
stack->groups_large = GF_CALLOC(ngrps, sizeof(gid_t),
gf_common_mt_groups_t);
if (!stack->groups_large)
@@ -442,6 +444,12 @@ call_stack_alloc_groups(call_stack_t *stack, int ngrps)
}
static inline int
+call_stack_groups_capacity(call_stack_t *stack)
+{
+ return max(stack->ngrps, SMALL_GROUP_COUNT);
+}
+
+static inline int
call_frames_count(call_stack_t *call_stack)
{
call_frame_t *pos;
diff --git a/libglusterfs/src/glusterfs/statedump.h b/libglusterfs/src/glusterfs/statedump.h
index 89d04f94587..ce082706bdf 100644
--- a/libglusterfs/src/glusterfs/statedump.h
+++ b/libglusterfs/src/glusterfs/statedump.h
@@ -127,4 +127,6 @@ gf_proc_dump_xlator_meminfo(xlator_t *this, strfd_t *strfd);
void
gf_proc_dump_xlator_profile(xlator_t *this, strfd_t *strfd);
+void
+gf_latency_statedump_and_reset(char *key, gf_latency_t *lat);
#endif /* STATEDUMP_H */
diff --git a/libglusterfs/src/glusterfs/store.h b/libglusterfs/src/glusterfs/store.h
index c8be544e164..a1f70c7b840 100644
--- a/libglusterfs/src/glusterfs/store.h
+++ b/libglusterfs/src/glusterfs/store.h
@@ -95,7 +95,7 @@ int32_t
gf_store_iter_get_matching(gf_store_iter_t *iter, char *key, char **value);
int32_t
-gf_store_iter_destroy(gf_store_iter_t *iter);
+gf_store_iter_destroy(gf_store_iter_t **iter);
char *
gf_store_strerror(gf_store_op_errno_t op_errno);
diff --git a/libglusterfs/src/glusterfs/syscall.h b/libglusterfs/src/glusterfs/syscall.h
index 91e921aea50..b6d3ab4f2ad 100644
--- a/libglusterfs/src/glusterfs/syscall.h
+++ b/libglusterfs/src/glusterfs/syscall.h
@@ -266,4 +266,13 @@ ssize_t
sys_copy_file_range(int fd_in, off64_t *off_in, int fd_out, off64_t *off_out,
size_t len, unsigned int flags);
+int
+sys_kill(pid_t pid, int sig);
+
+#ifdef __FreeBSD__
+int
+sys_sysctl(const int *name, u_int namelen, void *oldp, size_t *oldlenp,
+ const void *newp, size_t newlen);
+#endif
+
#endif /* __SYSCALL_H__ */
diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h
index 23004ab2245..4fd3abdaeff 100644
--- a/libglusterfs/src/glusterfs/xlator.h
+++ b/libglusterfs/src/glusterfs/xlator.h
@@ -805,7 +805,7 @@ struct _xlator {
struct {
/* for latency measurement */
- fop_latency_t latencies[GF_FOP_MAXVALUE];
+ gf_latency_t latencies[GF_FOP_MAXVALUE];
/* for latency measurement */
fop_metrics_t metrics[GF_FOP_MAXVALUE];
diff --git a/libglusterfs/src/latency.c b/libglusterfs/src/latency.c
index 15b397c3799..ce4b0e8255d 100644
--- a/libglusterfs/src/latency.c
+++ b/libglusterfs/src/latency.c
@@ -16,35 +16,32 @@
#include "glusterfs/glusterfs.h"
#include "glusterfs/statedump.h"
-void
-gf_update_latency(call_frame_t *frame)
+gf_latency_t *
+gf_latency_new(size_t n)
{
- double elapsed;
- struct timespec *begin, *end;
-
- fop_latency_t *lat;
-
- begin = &frame->begin;
- end = &frame->end;
+ int i = 0;
+ gf_latency_t *lat = NULL;
- if (!(begin->tv_sec && end->tv_sec))
- goto out;
+ lat = GF_MALLOC(n * sizeof(*lat), gf_common_mt_latency_t);
+ if (!lat)
+ return NULL;
- elapsed = (end->tv_sec - begin->tv_sec) * 1e9 +
- (end->tv_nsec - begin->tv_nsec);
+ for (i = 0; i < n; i++) {
+ gf_latency_reset(lat + i);
+ }
+ return lat;
+}
- if (frame->op < 0 || frame->op >= GF_FOP_MAXVALUE) {
- gf_log("[core]", GF_LOG_WARNING, "Invalid frame op value: %d",
- frame->op);
+void
+gf_latency_update(gf_latency_t *lat, struct timespec *begin,
+ struct timespec *end)
+{
+ if (!(begin->tv_sec && end->tv_sec)) {
+ /*Measure latency might have been enabled/disabled during the op*/
return;
}
- /* Can happen mostly at initiator xlator, as STACK_WIND/UNWIND macros
- set it right anyways for those frames */
- if (!frame->op)
- frame->op = frame->root->op;
-
- lat = &frame->this->stats.interval.latencies[frame->op];
+ double elapsed = gf_tsdiff(begin, end);
if (lat->max < elapsed)
lat->max = elapsed;
@@ -54,42 +51,34 @@ gf_update_latency(call_frame_t *frame)
lat->total += elapsed;
lat->count++;
-out:
- return;
}
void
-gf_proc_dump_latency_info(xlator_t *xl)
+gf_latency_reset(gf_latency_t *lat)
{
- char key_prefix[GF_DUMP_MAX_BUF_LEN];
- char key[GF_DUMP_MAX_BUF_LEN];
- int i;
-
- snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.latency", xl->name);
- gf_proc_dump_add_section("%s", key_prefix);
-
- for (i = 0; i < GF_FOP_MAXVALUE; i++) {
- gf_proc_dump_build_key(key, key_prefix, "%s", (char *)gf_fop_list[i]);
-
- fop_latency_t *lat = &xl->stats.interval.latencies[i];
+ if (!lat)
+ return;
+ memset(lat, 0, sizeof(*lat));
+ lat->min = ULLONG_MAX;
+ /* make sure 'min' is set to high value, so it would be
+ properly set later */
+}
- /* Doesn't make sense to continue if there are no fops
- came in the given interval */
- if (!lat->count)
- continue;
+void
+gf_frame_latency_update(call_frame_t *frame)
+{
+ gf_latency_t *lat;
+ /* Can happen mostly at initiator xlator, as STACK_WIND/UNWIND macros
+ set it right anyways for those frames */
+ if (!frame->op)
+ frame->op = frame->root->op;
- gf_proc_dump_write(
- key, "AVG:%.03f,CNT:%" PRId64 ",TOTAL:%.03f,MIN:%.03f,MAX:%.03f",
- (lat->total / lat->count), lat->count, lat->total, lat->min,
- lat->max);
+ if (frame->op < 0 || frame->op >= GF_FOP_MAXVALUE) {
+ gf_log("[core]", GF_LOG_WARNING, "Invalid frame op value: %d",
+ frame->op);
+ return;
}
- memset(xl->stats.interval.latencies, 0,
- sizeof(xl->stats.interval.latencies));
-
- /* make sure 'min' is set to high value, so it would be
- properly set later */
- for (i = 0; i < GF_FOP_MAXVALUE; i++) {
- xl->stats.interval.latencies[i].min = 0xffffffff;
- }
+ lat = &frame->this->stats.interval.latencies[frame->op];
+ gf_latency_update(lat, &frame->begin, &frame->end);
}
diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym
index 00a6e484df6..5f18cd56cbe 100644
--- a/libglusterfs/src/libglusterfs.sym
+++ b/libglusterfs/src/libglusterfs.sym
@@ -585,6 +585,7 @@ gf_dirent_free
gf_dirent_orig_offset
gf_dm_hashfn
gf_dnscache_init
+gf_dnscache_deinit
gf_errno_to_error
gf_error_to_errno
_gf_event
@@ -1080,6 +1081,8 @@ sys_write
sys_writev
sys_socket
sys_accept
+sys_kill
+sys_sysctl
tbf_init
tbf_throttle
timespec_now
@@ -1182,4 +1185,9 @@ xlator_is_cleanup_starting
gf_nanosleep
gf_syncfs
graph_total_client_xlator
-get_xattrs_to_heal \ No newline at end of file
+get_xattrs_to_heal
+gf_latency_statedump_and_reset
+gf_latency_new
+gf_latency_reset
+gf_latency_update
+gf_frame_latency_update
diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c
index 1a87d277cc9..2d5a12b0a00 100644
--- a/libglusterfs/src/mem-pool.c
+++ b/libglusterfs/src/mem-pool.c
@@ -362,6 +362,30 @@ free:
FREE(ptr);
}
+#if defined(GF_DISABLE_MEMPOOL)
+
+struct mem_pool *
+mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type,
+ unsigned long count, char *name)
+{
+ struct mem_pool *new;
+
+ new = GF_MALLOC(sizeof(struct mem_pool), gf_common_mt_mem_pool);
+ if (!new)
+ return NULL;
+
+ new->sizeof_type = sizeof_type;
+ return new;
+}
+
+void
+mem_pool_destroy(struct mem_pool *pool)
+{
+ GF_FREE(pool);
+}
+
+#else /* !GF_DISABLE_MEMPOOL */
+
static pthread_mutex_t pool_lock = PTHREAD_MUTEX_INITIALIZER;
static struct list_head pool_threads;
static pthread_mutex_t pool_free_lock = PTHREAD_MUTEX_INITIALIZER;
@@ -371,7 +395,6 @@ static size_t pool_list_size;
static __thread per_thread_pool_list_t *thread_pool_list = NULL;
-#if !defined(GF_DISABLE_MEMPOOL)
#define N_COLD_LISTS 1024
#define POOL_SWEEP_SECS 30
@@ -617,21 +640,29 @@ mem_pools_fini(void)
pthread_mutex_unlock(&init_mutex);
}
-#else
void
-mem_pools_init(void)
-{
-}
-void
-mem_pools_fini(void)
-{
-}
-void
-mem_pool_thread_destructor(per_thread_pool_list_t *pool_list)
+mem_pool_destroy(struct mem_pool *pool)
{
-}
+ if (!pool)
+ return;
-#endif
+ /* remove this pool from the owner (glusterfs_ctx_t) */
+ LOCK(&pool->ctx->lock);
+ {
+ list_del(&pool->owner);
+ }
+ UNLOCK(&pool->ctx->lock);
+
+ /* free this pool, but keep the mem_pool_shared */
+ GF_FREE(pool);
+
+ /*
+ * Pools are now permanent, so the mem_pool->pool is kept around. All
+ * of the objects *in* the pool will eventually be freed via the
+ * pool-sweeper thread, and this way we don't have to add a lot of
+ * reference-counting complexity.
+ */
+}
struct mem_pool *
mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type,
@@ -700,21 +731,6 @@ mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type,
return new;
}
-void *
-mem_get0(struct mem_pool *mem_pool)
-{
- void *ptr = mem_get(mem_pool);
- if (ptr) {
-#if defined(GF_DISABLE_MEMPOOL)
- memset(ptr, 0, mem_pool->sizeof_type);
-#else
- memset(ptr, 0, AVAILABLE_SIZE(mem_pool->pool->power_of_two));
-#endif
- }
-
- return ptr;
-}
-
per_thread_pool_list_t *
mem_get_pool_list(void)
{
@@ -823,6 +839,23 @@ mem_get_from_pool(struct mem_pool *mem_pool)
return retval;
}
+#endif /* GF_DISABLE_MEMPOOL */
+
+void *
+mem_get0(struct mem_pool *mem_pool)
+{
+ void *ptr = mem_get(mem_pool);
+ if (ptr) {
+#if defined(GF_DISABLE_MEMPOOL)
+ memset(ptr, 0, mem_pool->sizeof_type);
+#else
+ memset(ptr, 0, AVAILABLE_SIZE(mem_pool->pool->power_of_two));
+#endif
+ }
+
+ return ptr;
+}
+
void *
mem_get(struct mem_pool *mem_pool)
{
@@ -897,27 +930,3 @@ mem_put(void *ptr)
}
#endif /* GF_DISABLE_MEMPOOL */
}
-
-void
-mem_pool_destroy(struct mem_pool *pool)
-{
- if (!pool)
- return;
-
- /* remove this pool from the owner (glusterfs_ctx_t) */
- LOCK(&pool->ctx->lock);
- {
- list_del(&pool->owner);
- }
- UNLOCK(&pool->ctx->lock);
-
- /* free this pool, but keep the mem_pool_shared */
- GF_FREE(pool);
-
- /*
- * Pools are now permanent, so the mem_pool->pool is kept around. All
- * of the objects *in* the pool will eventually be freed via the
- * pool-sweeper thread, and this way we don't have to add a lot of
- * reference-counting complexity.
- */
-}
diff --git a/libglusterfs/src/monitoring.c b/libglusterfs/src/monitoring.c
index 45e3d776903..fbb68dc8622 100644
--- a/libglusterfs/src/monitoring.c
+++ b/libglusterfs/src/monitoring.c
@@ -113,15 +113,15 @@ dump_latency_and_count(xlator_t *xl, int fd)
dprintf(fd, "%s.interval.%s.fail_count %" PRIu64 "\n", xl->name,
gf_fop_list[index], cbk);
}
- if (xl->stats.interval.latencies[index].count != 0.0) {
+ if (xl->stats.interval.latencies[index].count != 0) {
dprintf(fd, "%s.interval.%s.latency %lf\n", xl->name,
gf_fop_list[index],
- (xl->stats.interval.latencies[index].total /
+ (((double)xl->stats.interval.latencies[index].total) /
xl->stats.interval.latencies[index].count));
- dprintf(fd, "%s.interval.%s.max %lf\n", xl->name,
+ dprintf(fd, "%s.interval.%s.max %" PRIu64 "\n", xl->name,
gf_fop_list[index],
xl->stats.interval.latencies[index].max);
- dprintf(fd, "%s.interval.%s.min %lf\n", xl->name,
+ dprintf(fd, "%s.interval.%s.min %" PRIu64 "\n", xl->name,
gf_fop_list[index],
xl->stats.interval.latencies[index].min);
}
diff --git a/libglusterfs/src/statedump.c b/libglusterfs/src/statedump.c
index 655317f3ef1..65f0eb5c7f3 100644
--- a/libglusterfs/src/statedump.c
+++ b/libglusterfs/src/statedump.c
@@ -199,6 +199,40 @@ gf_proc_dump_write(char *key, char *value, ...)
return ret;
}
+void
+gf_latency_statedump_and_reset(char *key, gf_latency_t *lat)
+{
+ /* Doesn't make sense to continue if there are no fops
+ came in the given interval */
+ if (!lat || !lat->count)
+ return;
+ gf_proc_dump_write(key,
+ "AVG:%lf CNT:%" PRIu64 " TOTAL:%" PRIu64 " MIN:%" PRIu64
+ " MAX:%" PRIu64,
+ (((double)lat->total) / lat->count), lat->count,
+ lat->total, lat->min, lat->max);
+ gf_latency_reset(lat);
+}
+
+void
+gf_proc_dump_xl_latency_info(xlator_t *xl)
+{
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i;
+
+ snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.latency", xl->name);
+ gf_proc_dump_add_section("%s", key_prefix);
+
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ gf_proc_dump_build_key(key, key_prefix, "%s", (char *)gf_fop_list[i]);
+
+ gf_latency_t *lat = &xl->stats.interval.latencies[i];
+
+ gf_latency_statedump_and_reset(key, lat);
+ }
+}
+
static void
gf_proc_dump_xlator_mem_info(xlator_t *xl)
{
@@ -349,26 +383,13 @@ gf_proc_dump_mem_info_to_dict(dict_t *dict)
void
gf_proc_dump_mempool_info(glusterfs_ctx_t *ctx)
{
+#ifdef GF_DISABLE_MEMPOOL
+ gf_proc_dump_write("built with --disable-mempool", " so no memory pools");
+#else
struct mem_pool *pool = NULL;
gf_proc_dump_add_section("mempool");
-#if defined(OLD_MEM_POOLS)
- list_for_each_entry(pool, &ctx->mempool_list, global_list)
- {
- gf_proc_dump_write("-----", "-----");
- gf_proc_dump_write("pool-name", "%s", pool->name);
- gf_proc_dump_write("hot-count", "%d", pool->hot_count);
- gf_proc_dump_write("cold-count", "%d", pool->cold_count);
- gf_proc_dump_write("padded_sizeof", "%lu", pool->padded_sizeof_type);
- gf_proc_dump_write("alloc-count", "%" PRIu64, pool->alloc_count);
- gf_proc_dump_write("max-alloc", "%d", pool->max_alloc);
-
- gf_proc_dump_write("pool-misses", "%" PRIu64, pool->pool_misses);
- gf_proc_dump_write("cur-stdalloc", "%d", pool->curr_stdalloc);
- gf_proc_dump_write("max-stdalloc", "%d", pool->max_stdalloc);
- }
-#else
LOCK(&ctx->lock);
{
list_for_each_entry(pool, &ctx->mempool_list, owner)
@@ -388,15 +409,13 @@ gf_proc_dump_mempool_info(glusterfs_ctx_t *ctx)
}
}
UNLOCK(&ctx->lock);
-
- /* TODO: details of (struct mem_pool_shared) pool->pool */
-#endif
+#endif /* GF_DISABLE_MEMPOOL */
}
void
gf_proc_dump_mempool_info_to_dict(glusterfs_ctx_t *ctx, dict_t *dict)
{
-#if defined(OLD_MEM_POOLS)
+#ifndef GF_DISABLE_MEMPOOL
struct mem_pool *pool = NULL;
char key[GF_DUMP_MAX_BUF_LEN] = {
0,
@@ -407,51 +426,47 @@ gf_proc_dump_mempool_info_to_dict(glusterfs_ctx_t *ctx, dict_t *dict)
if (!ctx || !dict)
return;
- list_for_each_entry(pool, &ctx->mempool_list, global_list)
+ LOCK(&ctx->lock);
{
- snprintf(key, sizeof(key), "pool%d.name", count);
- ret = dict_set_str(dict, key, pool->name);
- if (ret)
- return;
-
- snprintf(key, sizeof(key), "pool%d.hotcount", count);
- ret = dict_set_int32(dict, key, pool->hot_count);
- if (ret)
- return;
-
- snprintf(key, sizeof(key), "pool%d.coldcount", count);
- ret = dict_set_int32(dict, key, pool->cold_count);
- if (ret)
- return;
-
- snprintf(key, sizeof(key), "pool%d.paddedsizeof", count);
- ret = dict_set_uint64(dict, key, pool->padded_sizeof_type);
- if (ret)
- return;
-
- snprintf(key, sizeof(key), "pool%d.alloccount", count);
- ret = dict_set_uint64(dict, key, pool->alloc_count);
- if (ret)
- return;
-
- snprintf(key, sizeof(key), "pool%d.max_alloc", count);
- ret = dict_set_int32(dict, key, pool->max_alloc);
- if (ret)
- return;
-
- snprintf(key, sizeof(key), "pool%d.max-stdalloc", count);
- ret = dict_set_int32(dict, key, pool->max_stdalloc);
- if (ret)
- return;
-
- snprintf(key, sizeof(key), "pool%d.pool-misses", count);
- ret = dict_set_uint64(dict, key, pool->pool_misses);
- if (ret)
- return;
- count++;
+ list_for_each_entry(pool, &ctx->mempool_list, owner)
+ {
+ int64_t active = GF_ATOMIC_GET(pool->active);
+
+ snprintf(key, sizeof(key), "pool%d.name", count);
+ ret = dict_set_str(dict, key, pool->name);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "pool%d.active-count", count);
+ ret = dict_set_uint64(dict, key, active);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "pool%d.sizeof-type", count);
+ ret = dict_set_uint64(dict, key, pool->sizeof_type);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "pool%d.padded-sizeof", count);
+ ret = dict_set_uint64(dict, key, 1 << pool->pool->power_of_two);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "pool%d.size", count);
+ ret = dict_set_uint64(dict, key,
+ (1 << pool->pool->power_of_two) * active);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "pool%d.shared-pool", count);
+ ret = dict_set_static_ptr(dict, key, pool->pool);
+ if (ret)
+ goto out;
+ }
}
- ret = dict_set_int32(dict, "mempool-count", count);
-#endif
+out:
+ UNLOCK(&ctx->lock);
+#endif /* !GF_DISABLE_MEMPOOL */
}
void
@@ -486,7 +501,7 @@ gf_proc_dump_single_xlator_info(xlator_t *trav)
return;
if (ctx->measure_latency)
- gf_proc_dump_latency_info(trav);
+ gf_proc_dump_xl_latency_info(trav);
gf_proc_dump_xlator_mem_info(trav);
@@ -843,7 +858,7 @@ gf_proc_dump_info(int signum, glusterfs_ctx_t *ctx)
? dump_options.dump_path
: ((ctx->statedump_path != NULL) ? ctx->statedump_path
: DEFAULT_VAR_RUN_DIRECTORY)),
- brick_name, getpid(), (uint64_t)time(NULL));
+ brick_name, getpid(), (uint64_t)gf_time());
if ((ret < 0) || (ret >= sizeof(path))) {
goto out;
}
@@ -1030,7 +1045,7 @@ gf_proc_dump_xlator_profile(xlator_t *this, strfd_t *strfd)
{
gf_dump_strfd = strfd;
- gf_proc_dump_latency_info(this);
+ gf_proc_dump_xl_latency_info(this);
gf_dump_strfd = NULL;
}
diff --git a/libglusterfs/src/store.c b/libglusterfs/src/store.c
index 74cf5459c3c..5c316b9291a 100644
--- a/libglusterfs/src/store.c
+++ b/libglusterfs/src/store.c
@@ -649,24 +649,24 @@ out:
}
int32_t
-gf_store_iter_destroy(gf_store_iter_t *iter)
+gf_store_iter_destroy(gf_store_iter_t **iter)
{
int32_t ret = -1;
- if (!iter)
+ if (!(*iter))
return 0;
/* gf_store_iter_new will not return a valid iter object with iter->file
* being NULL*/
- ret = fclose(iter->file);
+ ret = fclose((*iter)->file);
if (ret)
gf_msg("", GF_LOG_ERROR, errno, LG_MSG_FILE_OP_FAILED,
"Unable"
" to close file: %s, ret: %d",
- iter->filepath, ret);
+ (*iter)->filepath, ret);
- GF_FREE(iter);
- iter = NULL;
+ GF_FREE(*iter);
+ *iter = NULL;
return ret;
}
diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c
index 95bea675379..df20cec559f 100644
--- a/libglusterfs/src/syncop.c
+++ b/libglusterfs/src/syncop.c
@@ -593,7 +593,7 @@ syncenv_task(struct syncproc *proc)
env->procs_idle++;
- sleep_till.tv_sec = time(NULL) + SYNCPROC_IDLE_TIME;
+ sleep_till.tv_sec = gf_time() + SYNCPROC_IDLE_TIME;
ret = pthread_cond_timedwait(&env->cond, &env->mutex, &sleep_till);
env->procs_idle--;
diff --git a/libglusterfs/src/syscall.c b/libglusterfs/src/syscall.c
index 007e3e84eea..04400f98b6c 100644
--- a/libglusterfs/src/syscall.c
+++ b/libglusterfs/src/syscall.c
@@ -13,6 +13,10 @@
#include "glusterfs/mem-pool.h"
#include "glusterfs/libglusterfs-messages.h"
+#ifdef __FreeBSD__
+#include <sys/sysctl.h>
+#include <signal.h>
+#endif
#include <sys/types.h>
#include <utime.h>
#include <sys/time.h>
@@ -506,7 +510,7 @@ sys_lsetxattr(const char *path, const char *name, const void *value,
#endif
#ifdef GF_BSD_HOST_OS
- return FS_RET_CHECK0(
+ return FS_RET_CHECK(
extattr_set_link(path, EXTATTR_NAMESPACE_USER, name, value, size),
errno);
#endif
@@ -624,7 +628,7 @@ sys_fsetxattr(int filedes, const char *name, const void *value, size_t size,
#endif
#ifdef GF_BSD_HOST_OS
- return FS_RET_CHECK0(
+ return FS_RET_CHECK(
extattr_set_fd(filedes, EXTATTR_NAMESPACE_USER, name, value, size),
errno);
#endif
@@ -854,3 +858,19 @@ sys_copy_file_range(int fd_in, off64_t *off_in, int fd_out, off64_t *off_out,
#endif /* HAVE_COPY_FILE_RANGE_SYS */
#endif /* HAVE_COPY_FILE_RANGE */
}
+
+#ifdef __FreeBSD__
+int
+sys_kill(pid_t pid, int sig)
+{
+ return FS_RET_CHECK0(kill(pid, sig), errno);
+}
+
+int
+sys_sysctl(const int *name, u_int namelen, void *oldp, size_t *oldlenp,
+ const void *newp, size_t newlen)
+{
+ return FS_RET_CHECK0(sysctl(name, namelen, oldp, oldlenp, newp, newlen),
+ errno);
+}
+#endif
diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c
index 539da7f4716..9a2582d45d5 100644
--- a/libglusterfs/src/xlator.c
+++ b/libglusterfs/src/xlator.c
@@ -260,6 +260,7 @@ xlator_dynload_apis(xlator_t *xl)
void *handle = NULL;
volume_opt_list_t *vol_opt = NULL;
xlator_api_t *xlapi = NULL;
+ int i = 0;
handle = xl->dlhandle;
@@ -357,6 +358,10 @@ xlator_dynload_apis(xlator_t *xl)
memcpy(xl->op_version, xlapi->op_version,
sizeof(uint32_t) * GF_MAX_RELEASES);
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ gf_latency_reset(&xl->stats.interval.latencies[i]);
+ }
+
ret = 0;
out:
return ret;
@@ -821,7 +826,7 @@ xlator_members_free(xlator_t *xl)
GF_FREE(xl->name);
GF_FREE(xl->type);
- if (!(xl->ctx && xl->ctx->cmd_args.valgrind) && xl->dlhandle)
+ if (!(xl->ctx && xl->ctx->cmd_args.vgtool != _gf_none) && xl->dlhandle)
dlclose(xl->dlhandle);
if (xl->options)
dict_unref(xl->options);
diff --git a/rfc.sh b/rfc.sh
index 8ab2f41b570..e7faec9ea0f 100755
--- a/rfc.sh
+++ b/rfc.sh
@@ -4,7 +4,29 @@
# i.e. where we are interested in the result of a command,
# we have to run the command in an if-statement.
-ORIGIN=${GLUSTER_ORIGIN:-origin}
+UPSTREAM=${GLUSTER_UPSTREAM}
+if [ "x$UPSTREAM" -eq "x" ]; then
+ for rmt in $(git remote); do
+ rmt_repo=$(git remote show $rmt -n | grep Fetch | awk '{ print $3 }');
+ if [ $rmt_repo -eq "git@github:gluster/glusterfs" ]; then
+ UPSTREAM=$rmt
+ echo "Picked $rmt as upstream remote"
+ break
+ fi
+ done
+fi
+
+USER_REPO=${GLUSTER_USER_REPO:-origin}
+if [ "x${USER_REPO}" -eq "x${UPSTREAM}" ] ; then
+ echo "When you submit patches, it should get submitted to your fork, not to upstream directly"
+ echo "If you are not sure, check `for rmt in $(git remote); do git remote show $rmt -n; done`"
+ echo "And pick the correct remote you would like to push to and do `export GLUSTER_USER_REPO=$rmt`"
+ echo ""
+ echo "Exiting..."
+ exit 1
+fi
+
+
while getopts "v" opt; do
case $opt in
@@ -18,7 +40,7 @@ done
shift $((OPTIND-1))
-branch="master";
+branch="devel";
set_hooks_commit_msg()
{
@@ -50,21 +72,21 @@ is_num()
backport_id_message()
{
echo ""
- echo "This commit is to a non-master branch, and hence is treated as a backport."
+ echo "This commit is to a non-devel branch, and hence is treated as a backport."
echo ""
echo "For backports we would like to retain the same gerrit Change-Id across"
echo "branches. On auto inspection it is found that a gerrit Change-Id is"
- echo "missing, or the Change-Id is not found on your local master"
+ echo "missing, or the Change-Id is not found on your local devel branch"
echo ""
echo "This could mean a few things:"
echo " 1. This is not a backport, hence choose Y on the prompt to proceed"
- echo " 2. Your $ORIGIN master is not up to date, hence the script is unable"
- echo " to find the corresponding Change-Id on master. Either choose N,"
+ echo " 2. Your $USER_REPO/devel is not up to date, hence the script is unable"
+ echo " to find the corresponding Change-Id on devel. Either choose N,"
echo " 'git fetch', and try again, OR if you are sure you used the"
echo " same Change-Id, choose Y at the prompt to proceed"
echo " 3. You commented or removed the Change-Id in your commit message after"
echo " cherry picking the commit. Choose N, fix the commit message to"
- echo " use the same Change-Id as master (git commit --amend), resubmit"
+ echo " use the same Change-Id as 'devel' (git commit --amend), resubmit"
echo ""
}
@@ -72,8 +94,8 @@ check_backport()
{
moveon='N'
- # Backports are never made to master
- if [ $branch = "master" ]; then
+ # Backports are never made to 'devel'
+ if [ $branch = "devel" ]; then
return;
fi
@@ -86,22 +108,22 @@ check_backport()
echo -n "Did not find a Change-Id for a possible backport. Continue (y/N): "
read moveon
else
- # Search master for the same change ID (rebase_changes has run, so we
+ # Search 'devel' for the same change ID (rebase_changes has run, so we
# should never not find a Change-Id)
- mchangeid=$(git log $ORIGIN/master --format='%b' --grep="^Change-Id: ${changeid}" | grep ${changeid} | awk '{print $2}')
+ mchangeid=$(git log $UPSTREAM/devel --format='%b' --grep="^Change-Id: ${changeid}" | grep ${changeid} | awk '{print $2}')
- # Check if we found the change ID on master, else throw a message to
+ # Check if we found the change ID on 'devel', else throw a message to
# decide if we should continue.
- # NOTE: If master was not rebased, we will not find the Change-ID and
+ # NOTE: If 'devel' was not rebased, we will not find the Change-ID and
# could hit a false positive case here (or if someone checks out some
- # other branch as master).
+ # other branch as 'devel').
if [ "${mchangeid}" = "${changeid}" ]; then
moveon="Y"
else
backport_id_message;
echo "Change-Id of commit: $changeid"
- echo "Change-Id on master: $mchangeid"
- echo -n "Did not find mentioned Change-Id on master for a possible backport. Continue (y/N): "
+ echo "Change-Id on devel: $mchangeid"
+ echo -n "Did not find mentioned Change-Id on 'devel' for a possible backport. Continue (y/N): "
read moveon
fi
fi
@@ -116,7 +138,7 @@ check_backport()
rebase_changes()
{
- GIT_EDITOR=$0 git rebase -i $ORIGIN/$branch;
+ GIT_EDITOR=$0 git rebase -i $UPSTREAM/$branch;
}
@@ -212,7 +234,7 @@ EOF
assert_diverge()
{
- git diff $ORIGIN/$branch..HEAD | grep -q .;
+ git diff $UPSTREAM/$branch..HEAD | grep -q .;
}
@@ -258,7 +280,7 @@ main()
return;
fi
- git fetch $ORIGIN;
+ git fetch $UPSTREAM;
rebase_changes;
@@ -269,9 +291,9 @@ main()
# see note above variable REFRE for regex elaboration
reference=$(git log -n1 --format='%b' | grep -iow -E "${REFRE}" | awk -F '#' '{print $2}');
- # If this is a commit against master and does not have a github
+ # If this is a commit against 'devel' and does not have a github
# issue reference. Warn the contributor that one of the 2 is required
- if [ -z "${reference}" ] && [ $branch = "master" ]; then
+ if [ -z "${reference}" ] && [ $branch = "devel" ]; then
warn_reference_missing;
fi
@@ -299,9 +321,9 @@ main()
fi
if [ -z "${reference}" ]; then
- $drier git push $ORIGIN HEAD:refs/for/$branch/rfc;
+ $drier git push $USER_REPO HEAD:temp_${branch}/$(date +%Y-%m-%d_%s);
else
- $drier git push $ORIGIN HEAD:refs/for/$branch/ref-${reference};
+ $drier git push $USER_REPO HEAD:issue${reference}_${branch};
fi
}
diff --git a/rpc/rpc-lib/src/libgfrpc.sym b/rpc/rpc-lib/src/libgfrpc.sym
index 54d1be1112f..e026d80259b 100644
--- a/rpc/rpc-lib/src/libgfrpc.sym
+++ b/rpc/rpc-lib/src/libgfrpc.sym
@@ -65,3 +65,4 @@ rpc_transport_unix_options_build
rpc_transport_unref
rpc_clnt_mgmt_pmap_signout
rpcsvc_autoscale_threads
+rpcsvc_statedump
diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h
index 7275d7568b6..0cb5862e9a9 100644
--- a/rpc/rpc-lib/src/protocol-common.h
+++ b/rpc/rpc-lib/src/protocol-common.h
@@ -309,6 +309,7 @@ enum glusterd_mgmt_v3_procnum {
GLUSTERD_MGMT_V3_PRE_VALIDATE,
GLUSTERD_MGMT_V3_BRICK_OP,
GLUSTERD_MGMT_V3_COMMIT,
+ GLUSTERD_MGMT_V3_POST_COMMIT,
GLUSTERD_MGMT_V3_POST_VALIDATE,
GLUSTERD_MGMT_V3_UNLOCK,
GLUSTERD_MGMT_V3_MAXVALUE,
diff --git a/rpc/rpc-lib/src/rpc-clnt-ping.c b/rpc/rpc-lib/src/rpc-clnt-ping.c
index 2298ef6394f..31f17841bea 100644
--- a/rpc/rpc-lib/src/rpc-clnt-ping.c
+++ b/rpc/rpc-lib/src/rpc-clnt-ping.c
@@ -122,7 +122,7 @@ rpc_clnt_ping_timer_expired(void *rpc_ptr)
goto out;
}
- clock_gettime(CLOCK_REALTIME, &current);
+ timespec_now_realtime(&current);
pthread_mutex_lock(&conn->lock);
{
unref = rpc_clnt_remove_ping_timer_locked(rpc);
diff --git a/rpc/rpc-lib/src/rpc-clnt.c b/rpc/rpc-lib/src/rpc-clnt.c
index 2b0ec295525..517037c4a5d 100644
--- a/rpc/rpc-lib/src/rpc-clnt.c
+++ b/rpc/rpc-lib/src/rpc-clnt.c
@@ -919,7 +919,7 @@ rpc_clnt_notify(rpc_transport_t *trans, void *mydata,
}
case RPC_TRANSPORT_MSG_RECEIVED: {
- clock_gettime(CLOCK_REALTIME, &conn->last_received);
+ timespec_now_realtime(&conn->last_received);
pollin = data;
if (pollin->is_reply)
@@ -933,8 +933,7 @@ rpc_clnt_notify(rpc_transport_t *trans, void *mydata,
}
case RPC_TRANSPORT_MSG_SENT: {
- clock_gettime(CLOCK_REALTIME, &conn->last_sent);
-
+ timespec_now_realtime(&conn->last_sent);
ret = 0;
break;
}
diff --git a/rpc/rpc-lib/src/rpcsvc.c b/rpc/rpc-lib/src/rpcsvc.c
index 4297af4609c..39910d481bf 100644
--- a/rpc/rpc-lib/src/rpcsvc.c
+++ b/rpc/rpc-lib/src/rpcsvc.c
@@ -13,6 +13,7 @@
#include <glusterfs/dict.h>
#include <glusterfs/byte-order.h>
#include <glusterfs/compat-errno.h>
+#include <glusterfs/statedump.h>
#include "xdr-rpc.h"
#include <glusterfs/iobuf.h>
#include "xdr-common.h"
@@ -341,14 +342,12 @@ rpcsvc_program_actor(rpcsvc_request_t *req)
goto err;
}
+ if (svc->xl->ctx->measure_latency) {
+ timespec_now(&req->begin);
+ }
+
req->ownthread = program->ownthread;
req->synctask = program->synctask;
- if (((req->procnum == GFS3_OP_RELEASE) ||
- (req->procnum == GFS3_OP_RELEASEDIR)) &&
- (program->prognum == GLUSTER_FOP_PROGRAM)) {
- req->ownthread = _gf_false;
- req->synctask = _gf_true;
- }
err = SUCCESS;
gf_log(GF_RPCSVC, GF_LOG_TRACE, "Actor found: %s - %s for %s",
@@ -1496,10 +1495,18 @@ rpcsvc_submit_generic(rpcsvc_request_t *req, struct iovec *proghdr,
size_t hdrlen = 0;
char new_iobref = 0;
rpcsvc_drc_globals_t *drc = NULL;
+ gf_latency_t *lat = NULL;
if ((!req) || (!req->trans))
return -1;
+ if (req->prog && req->begin.tv_sec) {
+ if ((req->procnum >= 0) && (req->procnum < req->prog->numactors)) {
+ timespec_now(&req->end);
+ lat = &req->prog->latencies[req->procnum];
+ gf_latency_update(lat, &req->begin, &req->end);
+ }
+ }
trans = req->trans;
for (i = 0; i < hdrcount; i++) {
@@ -1830,6 +1837,15 @@ rpcsvc_submit_message(rpcsvc_request_t *req, struct iovec *proghdr,
iobref);
}
+void
+rpcsvc_program_destroy(rpcsvc_program_t *program)
+{
+ if (program) {
+ GF_FREE(program->latencies);
+ GF_FREE(program);
+ }
+}
+
int
rpcsvc_program_unregister(rpcsvc_t *svc, rpcsvc_program_t *program)
{
@@ -1887,8 +1903,7 @@ rpcsvc_program_unregister(rpcsvc_t *svc, rpcsvc_program_t *program)
ret = 0;
out:
- if (prog)
- GF_FREE(prog);
+ rpcsvc_program_destroy(prog);
if (ret == -1) {
if (program) {
@@ -2273,6 +2288,11 @@ rpcsvc_program_register(rpcsvc_t *svc, rpcsvc_program_t *program,
}
memcpy(newprog, program, sizeof(*program));
+ newprog->latencies = gf_latency_new(program->numactors);
+ if (!newprog->latencies) {
+ rpcsvc_program_destroy(newprog);
+ goto out;
+ }
INIT_LIST_HEAD(&newprog->program);
pthread_mutexattr_init(&thr_attr);
@@ -3218,6 +3238,48 @@ out:
return ret;
}
+void
+rpcsvc_program_dump(rpcsvc_program_t *prog)
+{
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i;
+
+ snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s", prog->progname);
+ gf_proc_dump_add_section("%s", key_prefix);
+
+ gf_proc_dump_build_key(key, key_prefix, "program-number");
+ gf_proc_dump_write(key, "%d", prog->prognum);
+
+ gf_proc_dump_build_key(key, key_prefix, "program-version");
+ gf_proc_dump_write(key, "%d", prog->progver);
+
+ strncat(key_prefix, ".latency",
+ sizeof(key_prefix) - strlen(key_prefix) - 1);
+
+ for (i = 0; i < prog->numactors; i++) {
+ gf_proc_dump_build_key(key, key_prefix, "%s", prog->actors[i].procname);
+ gf_latency_statedump_and_reset(key, &prog->latencies[i]);
+ }
+}
+
+void
+rpcsvc_statedump(rpcsvc_t *svc)
+{
+ rpcsvc_program_t *prog = NULL;
+ int ret = 0;
+ ret = pthread_rwlock_tryrdlock(&svc->rpclock);
+ if (ret)
+ return;
+ {
+ list_for_each_entry(prog, &svc->programs, program)
+ {
+ rpcsvc_program_dump(prog);
+ }
+ }
+ pthread_rwlock_unlock(&svc->rpclock);
+}
+
static rpcsvc_actor_t gluster_dump_actors[GF_DUMP_MAXVALUE] = {
[GF_DUMP_NULL] = {"NULL", NULL, NULL, GF_DUMP_NULL, DRC_NA, 0},
[GF_DUMP_DUMP] = {"DUMP", rpcsvc_dump, NULL, GF_DUMP_DUMP, DRC_NA, 0},
diff --git a/rpc/rpc-lib/src/rpcsvc.h b/rpc/rpc-lib/src/rpcsvc.h
index c370b330572..7b3030926c8 100644
--- a/rpc/rpc-lib/src/rpcsvc.h
+++ b/rpc/rpc-lib/src/rpcsvc.h
@@ -264,6 +264,8 @@ struct rpcsvc_request {
gf_boolean_t ownthread;
gf_boolean_t synctask;
+ struct timespec begin; /*req handling start time*/
+ struct timespec end; /*req handling end time*/
};
#define rpcsvc_request_program(req) ((rpcsvc_program_t *)((req)->prog))
@@ -419,6 +421,7 @@ struct rpcsvc_program {
supported by the system. */
/* Program specific state handed to actors */
void *private;
+ gf_latency_t *latencies; /*Tracks latency statistics for the rpc call*/
/* This upcall is provided by the program during registration.
* It is used to notify the program about events like connection being
@@ -686,4 +689,6 @@ rpcsvc_autoscale_threads(glusterfs_ctx_t *ctx, rpcsvc_t *rpc, int incr);
extern int
rpcsvc_destroy(rpcsvc_t *svc);
+void
+rpcsvc_statedump(rpcsvc_t *svc);
#endif
diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c
index 65a41d93493..ed8b473be23 100644
--- a/rpc/rpc-transport/socket/src/socket.c
+++ b/rpc/rpc-transport/socket/src/socket.c
@@ -2950,6 +2950,13 @@ socket_event_handler(int fd, int idx, int gen, void *data, int poll_in,
socket_dump_info(sa, priv->is_server, priv->use_ssl, priv->sock,
this->name, "disconnecting from");
+ /* Dump the SSL error stack to clear any errors that may otherwise
+ * resurface in the future.
+ */
+ if (priv->use_ssl && priv->ssl_ssl) {
+ ssl_dump_error_stack(this->name);
+ }
+
/* Logging has happened already in earlier cases */
gf_log("transport", ((ret >= 0) ? GF_LOG_INFO : GF_LOG_DEBUG),
"EPOLLERR - disconnecting (sock:%d) (%s)", priv->sock,
diff --git a/rpc/xdr/src/glusterd1-xdr.x b/rpc/xdr/src/glusterd1-xdr.x
index 02ebec26c01..b631dea3502 100644
--- a/rpc/xdr/src/glusterd1-xdr.x
+++ b/rpc/xdr/src/glusterd1-xdr.x
@@ -202,6 +202,21 @@ struct gd1_mgmt_v3_commit_rsp {
string op_errstr<>;
} ;
+struct gd1_mgmt_v3_post_commit_req {
+ unsigned char uuid[16];
+ int op;
+ opaque dict<>;
+} ;
+
+struct gd1_mgmt_v3_post_commit_rsp {
+ unsigned char uuid[16];
+ int op;
+ int op_ret;
+ int op_errno;
+ opaque dict<>;
+ string op_errstr<>;
+} ;
+
struct gd1_mgmt_v3_post_val_req {
unsigned char uuid[16];
int op;
diff --git a/rpc/xdr/src/libgfxdr.sym b/rpc/xdr/src/libgfxdr.sym
index dd4ac8562bc..8fa0e0ddd8a 100644
--- a/rpc/xdr/src/libgfxdr.sym
+++ b/rpc/xdr/src/libgfxdr.sym
@@ -28,6 +28,8 @@ xdr_gd1_mgmt_v3_brick_op_req
xdr_gd1_mgmt_v3_brick_op_rsp
xdr_gd1_mgmt_v3_commit_req
xdr_gd1_mgmt_v3_commit_rsp
+xdr_gd1_mgmt_v3_post_commit_req
+xdr_gd1_mgmt_v3_post_commit_rsp
xdr_gd1_mgmt_v3_lock_req
xdr_gd1_mgmt_v3_lock_rsp
xdr_gd1_mgmt_v3_post_val_req
diff --git a/run-tests.sh b/run-tests.sh
index dc25d8cca31..e2a1655d8e0 100755
--- a/run-tests.sh
+++ b/run-tests.sh
@@ -328,6 +328,7 @@ function get_bug_list_for_disabled_test ()
function run_tests()
{
RES=0
+ FLAKY=''
FAILED=''
TESTS_NEEDED_RETRY=''
GENERATED_CORE=''
@@ -349,14 +350,15 @@ function run_tests()
timeout_cmd_exists="no"
fi
- for t in $(find ${regression_testsdir}/tests -name '*.t' \
- | LC_COLLATE=C sort) ; do
+ all_tests=($(find ${regression_testsdir}/tests -name '*.t' | sort))
+ all_tests_cnt=${#all_tests[@]}
+ for t in "${all_tests[@]}" ; do
old_cores=$(ls /*-*.core 2> /dev/null | wc -l)
total_tests=$((total_tests+1))
if match $t "$@" ; then
selected_tests=$((selected_tests+1))
echo
- echo $section_separator$section_separator
+ echo $section_separator "(${total_tests} / ${all_tests_cnt})" $section_separator
if [[ $(get_test_status $t) =~ "BAD_TEST" ]] && \
[[ $skip_bad_tests == "yes" ]]
then
@@ -432,9 +434,17 @@ function run_tests()
TESTS_NEEDED_RETRY="${TESTS_NEEDED_RETRY}${t} "
fi
+
+
if [ ${TMP_RES} -ne 0 ] ; then
- RES=${TMP_RES}
- FAILED="${FAILED}${t} "
+ if [[ "$t" == *"tests/000-flaky/"* ]]; then
+ FLAKY="${FLAKY}${t} "
+ echo "FAILURE -> SUCCESS: Flaky test"
+ TMP_RES=0
+ else
+ RES=${TMP_RES}
+ FAILED="${FAILED}${t} "
+ fi
fi
new_cores=$(ls /*-*.core 2> /dev/null | wc -l)
@@ -469,8 +479,10 @@ function run_tests()
echo "$key - ${ELAPSEDTIMEMAP["$key"]} second"
done | sort -rn -k3
- # Output the errors into a file
+ # initialize the output file
echo > "${result_output}"
+
+ # Output the errors into a file
if [ ${RES} -ne 0 ] ; then
FAILED=$( echo ${FAILED} | tr ' ' '\n' | sort -u )
FAILED_COUNT=$( echo -n "${FAILED}" | grep -c '^' )
@@ -483,7 +495,13 @@ function run_tests()
TESTS_NEEDED_RETRY=$( echo ${TESTS_NEEDED_RETRY} | tr ' ' '\n' | sort -u )
RETRY_COUNT=$( echo -n "${TESTS_NEEDED_RETRY}" | grep -c '^' )
if [ ${RETRY_COUNT} -ne 0 ] ; then
- echo -e "\n${RETRY_COUNT} test(s) needed retry \n${TESTS_NEEDED_RETRY}"
+ echo -e "\n${RETRY_COUNT} test(s) needed retry \n${TESTS_NEEDED_RETRY}" >> "${result_output}"
+ fi
+
+ FLAKY_TESTS_FAILED=$( echo ${FLAKY} | tr ' ' '\n' | sort -u )
+ RETRY_COUNT=$( echo -n "${FLAKY_TESTS_FAILED}" | grep -c '^' )
+ if [ ${RETRY_COUNT} -ne 0 ] ; then
+ echo -e "\n${RETRY_COUNT} flaky test(s) marked as success even though they failed \n${FLAKY_TESTS_FAILED}" >> "${result_output}"
fi
echo
diff --git a/tests/basic/afr/split-brain-favorite-child-policy.t b/tests/000-flaky/basic_afr_split-brain-favorite-child-policy.t
index c268c125610..77d82a4996f 100644
--- a/tests/basic/afr/split-brain-favorite-child-policy.t
+++ b/tests/000-flaky/basic_afr_split-brain-favorite-child-policy.t
@@ -1,8 +1,8 @@
#!/bin/bash
#Test the split-brain resolution CLI commands.
-. $(dirname $0)/../../include.rc
-. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
cleanup;
diff --git a/tests/basic/changelog/changelog-snapshot.t b/tests/000-flaky/basic_changelog_changelog-snapshot.t
index 7742db48cdd..f6cd0b04d47 100644
--- a/tests/basic/changelog/changelog-snapshot.t
+++ b/tests/000-flaky/basic_changelog_changelog-snapshot.t
@@ -1,7 +1,7 @@
#!/bin/bash
-. $(dirname $0)/../../include.rc
-. $(dirname $0)/../../snapshot.rc
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../snapshot.rc
cleanup;
ROLLOVER_TIME=3
diff --git a/tests/basic/distribute/rebal-all-nodes-migrate.t b/tests/000-flaky/basic_distribute_rebal-all-nodes-migrate.t
index acc4ffefecc..eb5d3305ac1 100644
--- a/tests/basic/distribute/rebal-all-nodes-migrate.t
+++ b/tests/000-flaky/basic_distribute_rebal-all-nodes-migrate.t
@@ -1,8 +1,8 @@
#!/bin/bash
-. $(dirname $0)/../../include.rc
-. $(dirname $0)/../../cluster.rc
-. $(dirname $0)/../../dht.rc
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../cluster.rc
+. $(dirname $0)/../dht.rc
# Check if every single rebalance process migrated some files
@@ -10,11 +10,9 @@
function cluster_rebal_all_nodes_migrated_files {
val=0
a=$($CLI_1 volume rebalance $V0 status | grep "completed" | awk '{print $2}');
-# echo $a
b=($a)
for i in "${b[@]}"
do
-# echo "$i";
if [ "$i" -eq "0" ]; then
echo "false";
val=1;
diff --git a/tests/basic/ec/ec-quorum-count-partial-failure.t b/tests/000-flaky/basic_ec_ec-quorum-count-partial-failure.t
index 79f5825ae10..42808ce0c0e 100755..100644
--- a/tests/basic/ec/ec-quorum-count-partial-failure.t
+++ b/tests/000-flaky/basic_ec_ec-quorum-count-partial-failure.t
@@ -1,7 +1,7 @@
#!/bin/bash
-. $(dirname $0)/../../include.rc
-. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
#This test checks that partial failure of fop results in main fop failure only
cleanup;
diff --git a/tests/basic/mount-nfs-auth.t b/tests/000-flaky/basic_mount-nfs-auth.t
index 3d4a9cff00b..3d4a9cff00b 100755..100644
--- a/tests/basic/mount-nfs-auth.t
+++ b/tests/000-flaky/basic_mount-nfs-auth.t
diff --git a/tests/bugs/core/multiplex-limit-issue-151.t b/tests/000-flaky/bugs_core_multiplex-limit-issue-151.t
index dc9013061b0..5a88f97d726 100644
--- a/tests/bugs/core/multiplex-limit-issue-151.t
+++ b/tests/000-flaky/bugs_core_multiplex-limit-issue-151.t
@@ -1,8 +1,8 @@
#!/bin/bash
-. $(dirname $0)/../../include.rc
-. $(dirname $0)/../../traps.rc
-. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../traps.rc
+. $(dirname $0)/../volume.rc
function count_up_bricks {
$CLI --xml volume status all | grep '<status>1' | wc -l
diff --git a/tests/bugs/distribute/bug-1117851.t b/tests/000-flaky/bugs_distribute_bug-1117851.t
index 62cb6b66ab4..5980bf2fd4b 100755..100644
--- a/tests/bugs/distribute/bug-1117851.t
+++ b/tests/000-flaky/bugs_distribute_bug-1117851.t
@@ -2,8 +2,8 @@
SCRIPT_TIMEOUT=250
-. $(dirname $0)/../../include.rc
-. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
create_files () {
for i in {1..1000}; do
diff --git a/tests/bugs/distribute/bug-1122443.t b/tests/000-flaky/bugs_distribute_bug-1122443.t
index 906be7072bd..abd37082b33 100644
--- a/tests/bugs/distribute/bug-1122443.t
+++ b/tests/000-flaky/bugs_distribute_bug-1122443.t
@@ -1,8 +1,8 @@
#!/bin/bash
-. $(dirname $0)/../../include.rc
-. $(dirname $0)/../../volume.rc
-. $(dirname $0)/../../dht.rc
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../dht.rc
make_files() {
mkdir $1 && \
@@ -42,8 +42,8 @@ TEST glusterfs -s $H0 --volfile-id $V0 $M0
TEST make_files $M0/subdir
# Get mtime before migration
-BEFORE="$(stat -c %n:%Y $M0/subdir/* | tr '\n' ',')"
-
+BEFORE="$(stat -c %n:%Y $M0/subdir/* | sort | tr '\n' ',')"
+echo $BEFORE
# Migrate brick
TEST $CLI volume add-brick $V0 $H0:$B0/${V0}1
TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}0 start
@@ -51,9 +51,10 @@ EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field
TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}0 commit
# Get mtime after migration
-EXPECT_WITHIN 5 RECONNECTED bug_1113050_workaround $M0/subdir/*
-AFTER="$(stat -c %n:%Y $M0/subdir/* | tr '\n' ',')"
-
+EXPECT_WITHIN 30 RECONNECTED bug_1113050_workaround $M0/subdir/symlink
+sleep 3
+AFTER="$(stat -c %n:%Y $M0/subdir/* | sort | tr '\n' ',')"
+echo $AFTER
# Check if mtime is unchanged
TEST [ "$AFTER" == "$BEFORE" ]
diff --git a/tests/bugs/glusterd/bug-857330/common.rc b/tests/000-flaky/bugs_glusterd_bug-857330/common.rc
index d0aa4b1a640..bd122eff18c 100644
--- a/tests/bugs/glusterd/bug-857330/common.rc
+++ b/tests/000-flaky/bugs_glusterd_bug-857330/common.rc
@@ -1,4 +1,4 @@
-. $(dirname $0)/../../../include.rc
+. $(dirname $0)/../../include.rc
UUID_REGEX='[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}'
diff --git a/tests/bugs/glusterd/bug-857330/normal.t b/tests/000-flaky/bugs_glusterd_bug-857330/normal.t
index ad0c8844fae..6c1cf54ec3c 100755
--- a/tests/bugs/glusterd/bug-857330/normal.t
+++ b/tests/000-flaky/bugs_glusterd_bug-857330/normal.t
@@ -1,7 +1,7 @@
#!/bin/bash
. $(dirname $0)/common.rc
-. $(dirname $0)/../../../volume.rc
+. $(dirname $0)/../../volume.rc
cleanup;
TEST glusterd
@@ -14,7 +14,7 @@ TEST $CLI volume start $V0;
TEST glusterfs -s $H0 --volfile-id=$V0 $M0;
-TEST $PYTHON $(dirname $0)/../../../utils/create-files.py \
+TEST $PYTHON $(dirname $0)/../../utils/create-files.py \
--multi -b 10 -d 10 -n 10 $M0;
EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
diff --git a/tests/bugs/glusterd/bug-857330/xml.t b/tests/000-flaky/bugs_glusterd_bug-857330/xml.t
index 8383d2a0711..11785adacdb 100755
--- a/tests/bugs/glusterd/bug-857330/xml.t
+++ b/tests/000-flaky/bugs_glusterd_bug-857330/xml.t
@@ -1,7 +1,7 @@
#!/bin/bash
. $(dirname $0)/common.rc
-. $(dirname $0)/../../../volume.rc
+. $(dirname $0)/../../volume.rc
cleanup;
@@ -15,7 +15,7 @@ TEST $CLI volume start $V0;
TEST glusterfs -s $H0 --volfile-id=$V0 $M0;
-TEST $PYTHON $(dirname $0)/../../../utils/create-files.py \
+TEST $PYTHON $(dirname $0)/../../utils/create-files.py \
--multi -b 10 -d 10 -n 10 $M0;
EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
diff --git a/tests/bugs/glusterd/quorum-value-check.t b/tests/000-flaky/bugs_glusterd_quorum-value-check.t
index c701f08bbd5..a431b8c4fd4 100755..100644
--- a/tests/bugs/glusterd/quorum-value-check.t
+++ b/tests/000-flaky/bugs_glusterd_quorum-value-check.t
@@ -1,7 +1,7 @@
#!/bin/bash
-. $(dirname $0)/../../include.rc
-. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
diff --git a/tests/bugs/nfs/bug-1116503.t b/tests/000-flaky/bugs_nfs_bug-1116503.t
index dd3998df150..fc50021acc7 100644
--- a/tests/bugs/nfs/bug-1116503.t
+++ b/tests/000-flaky/bugs_nfs_bug-1116503.t
@@ -3,9 +3,9 @@
# Verify that mounting NFS over UDP (MOUNT service only) works.
#
-. $(dirname $0)/../../include.rc
-. $(dirname $0)/../../volume.rc
-. $(dirname $0)/../../nfs.rc
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../nfs.rc
#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
diff --git a/tests/features/lock-migration/lkmigration-set-option.t b/tests/000-flaky/features_lock-migration_lkmigration-set-option.t
index 4340438591f..1327ef3579f 100644
--- a/tests/features/lock-migration/lkmigration-set-option.t
+++ b/tests/000-flaky/features_lock-migration_lkmigration-set-option.t
@@ -1,7 +1,7 @@
#!/bin/bash
# Test to check
-. $(dirname $0)/../../include.rc
-. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
#Check lock-migration set option sanity
cleanup;
diff --git a/tests/afr.rc b/tests/afr.rc
index 5fc7fa1898d..241789903ba 100644
--- a/tests/afr.rc
+++ b/tests/afr.rc
@@ -115,3 +115,9 @@ function afr_private_key_value()
#xargs at the end will strip leading spaces
grep -E "^${key} = " $m/.meta/graphs/active/${v}-replicate-${replica_id}/private | cut -f2 -d'=' | xargs
}
+
+function afr_anon_entry_count()
+{
+ local b=$1
+ ls $b/.glusterfs-anonymous-inode* | wc -l
+}
diff --git a/tests/basic/afr/afr-anon-inode-no-quorum.t b/tests/basic/afr/afr-anon-inode-no-quorum.t
new file mode 100644
index 00000000000..896ba0c9b2c
--- /dev/null
+++ b/tests/basic/afr/afr-anon-inode-no-quorum.t
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+#Test that anon-inode entry is not cleaned up as long as there exists at least
+#one valid entry
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.readdir-ahead off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+TEST touch $M0/a $M0/b
+
+gfid_a=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/a))
+gfid_b=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/b))
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST mv $M0/a $M0/a-new
+TEST mv $M0/b $M0/b-new
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST ! ls $M0/a
+TEST ! ls $M0/b
+anon_inode_name=$(ls -a $B0/${V0}0 | grep glusterfs-anonymous-inode)
+TEST stat $B0/${V0}0/$anon_inode_name/$gfid_a
+TEST stat $B0/${V0}0/$anon_inode_name/$gfid_b
+#Make sure index heal doesn't happen after enabling heal
+TEST setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1
+TEST rm -f $B0/${V0}1/.glusterfs/indices/xattrop/*
+TEST $CLI volume heal $V0 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+#Allow time for a scan
+sleep 5
+TEST stat $B0/${V0}0/$anon_inode_name/$gfid_a
+TEST stat $B0/${V0}0/$anon_inode_name/$gfid_b
+inum_b=$(STAT_INO $B0/${V0}0/$anon_inode_name/$gfid_b)
+TEST rm -f $M0/a-new
+TEST stat $M0/b-new
+
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}1
+EXPECT "$inum_b" STAT_INO $B0/${V0}0/b-new
+
+cleanup
diff --git a/tests/basic/afr/afr-anon-inode.t b/tests/basic/afr/afr-anon-inode.t
new file mode 100644
index 00000000000..f4cf37a2fa0
--- /dev/null
+++ b/tests/basic/afr/afr-anon-inode.t
@@ -0,0 +1,114 @@
+#!/bin/bash
+#Tests that afr-anon-inode test cases work fine as expected
+#These are cases where in entry-heal/name-heal we dont know entry for an inode
+#so these inodes are kept in a special directory
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0..2}
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+EXPECT "^1$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode"
+TEST $CLI volume set $V0 cluster.use-anonymous-inode no
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^0$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode"
+TEST $CLI volume set $V0 cluster.use-anonymous-inode yes
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode"
+TEST mkdir -p $M0/d1/b $M0/d2/a
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST mv $M0/d2/a $M0/d1
+TEST mv $M0/d1/b $M0/d2
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+anon_inode_name=$(ls -a $B0/${V0}0 | grep glusterfs-anonymous-inode)
+TEST [[ -d $B0/${V0}1/$anon_inode_name ]]
+TEST [[ -d $B0/${V0}2/$anon_inode_name ]]
+anon_gfid=$(gf_get_gfid_xattr $B0/${V0}0/$anon_inode_name)
+EXPECT "$anon_gfid" gf_get_gfid_xattr $B0/${V0}1/$anon_inode_name
+EXPECT "$anon_gfid" gf_get_gfid_xattr $B0/${V0}2/$anon_inode_name
+
+TEST ! ls $M0/$anon_inode_name
+EXPECT "^4$" echo $(ls -a $M0 | wc -l)
+
+#Test purging code path by shd
+TEST $CLI volume heal $V0 disable
+TEST mkdir $M0/l0 $M0/l1 $M0/l2
+TEST touch $M0/del-file $M0/del-file-nolink $M0/l0/file
+TEST ln $M0/del-file $M0/del-file-link
+TEST ln $M0/l0/file $M0/l1/file-link1
+TEST ln $M0/l0/file $M0/l2/file-link2
+TEST mkdir -p $M0/del-recursive-dir/d1
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST rm -f $M0/del-file $M0/del-file-nolink
+TEST rm -rf $M0/del-recursive-dir
+TEST mv $M0/d1/a $M0/d2
+TEST mv $M0/l0/file $M0/l0/renamed-file
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 0
+
+nolink_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-file-nolink))
+link_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-file))
+dir_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-recursive-dir))
+rename_dir_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/d1/a))
+rename_file_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/l0/file))
+TEST ! stat $M0/del-file
+TEST stat $B0/${V0}0/$anon_inode_name/$link_gfid
+TEST ! stat $M0/del-file-nolink
+TEST ! stat $B0/${V0}0/$anon_inode_name/$nolink_gfid
+TEST ! stat $M0/del-recursive-dir
+TEST stat $B0/${V0}0/$anon_inode_name/$dir_gfid
+TEST ! stat $M0/d1/a
+TEST stat $B0/${V0}0/$anon_inode_name/$rename_dir_gfid
+TEST ! stat $M0/l0/file
+TEST stat $B0/${V0}0/$anon_inode_name/$rename_file_gfid
+
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST mv $M0/l1/file-link1 $M0/l1/renamed-file-link1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 1
+TEST ! stat $M0/l1/file-link1
+TEST stat $B0/${V0}1/$anon_inode_name/$rename_file_gfid
+
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST mv $M0/l2/file-link2 $M0/l2/renamed-file-link2
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 2
+TEST ! stat $M0/l2/file-link2
+TEST stat $B0/${V0}2/$anon_inode_name/$rename_file_gfid
+
+#Simulate only anon-inodes present in all bricks
+TEST rm -f $M0/l0/renamed-file $M0/l1/renamed-file-link1 $M0/l2/renamed-file-link2
+
+#Test that shd doesn't cleanup anon-inodes when some bricks are down
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST $CLI volume heal $V0 enable
+$CLI volume heal $V0
+sleep 5 #Allow time for completion of one scan
+TEST stat $B0/${V0}0/$anon_inode_name/$link_gfid
+TEST stat $B0/${V0}0/$anon_inode_name/$rename_dir_gfid
+TEST stat $B0/${V0}0/$anon_inode_name/$dir_gfid
+rename_dir_inum=$(STAT_INO $B0/${V0}0/$anon_inode_name/$rename_dir_gfid)
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 1
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}1
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}2
+
+#Test that rename indeed happened instead of rmdir/mkdir
+renamed_dir_inum=$(STAT_INO $B0/${V0}0/d2/a)
+EXPECT "$rename_dir_inum" echo $renamed_dir_inum
+cleanup;
diff --git a/tests/basic/afr/entry-self-heal-anon-dir-off.t b/tests/basic/afr/entry-self-heal-anon-dir-off.t
new file mode 100644
index 00000000000..7bb6ee14193
--- /dev/null
+++ b/tests/basic/afr/entry-self-heal-anon-dir-off.t
@@ -0,0 +1,459 @@
+#!/bin/bash
+
+#This file checks if missing entry self-heal and entry self-heal are working
+#as expected.
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+function get_file_type {
+ stat -c "%a:%F:%g:%t:%T:%u" $1
+}
+
+function diff_dirs {
+ diff <(ls $1 | sort) <(ls $2 | sort)
+}
+
+function heal_status {
+ local f1_path="${1}/${3}"
+ local f2_path="${2}/${3}"
+ local insync=""
+ diff_dirs $f1_path $f2_path
+ if [ $? -eq 0 ];
+ then
+ insync="Y"
+ else
+ insync="N"
+ fi
+ local xattr11=$(get_hex_xattr trusted.afr.$V0-client-0 $f1_path)
+ local xattr12=$(get_hex_xattr trusted.afr.$V0-client-1 $f1_path)
+ local xattr21=$(get_hex_xattr trusted.afr.$V0-client-0 $f2_path)
+ local xattr22=$(get_hex_xattr trusted.afr.$V0-client-1 $f2_path)
+ local dirty1=$(get_hex_xattr trusted.afr.dirty $f1_path)
+ local dirty2=$(get_hex_xattr trusted.afr.dirty $f2_path)
+ if [ -z $xattr11 ]; then xattr11="000000000000000000000000"; fi
+ if [ -z $xattr12 ]; then xattr12="000000000000000000000000"; fi
+ if [ -z $xattr21 ]; then xattr21="000000000000000000000000"; fi
+ if [ -z $xattr22 ]; then xattr22="000000000000000000000000"; fi
+ if [ -z $dirty1 ]; then dirty1="000000000000000000000000"; fi
+ if [ -z $dirty2 ]; then dirty2="000000000000000000000000"; fi
+ echo ${insync}${xattr11}${xattr12}${xattr21}${xattr22}${dirty1}${dirty2}
+}
+
+function is_heal_done {
+ local zero_xattr="000000000000000000000000"
+ if [ "$(heal_status $@)" == "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" ];
+ then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+function print_pending_heals {
+ local result=":"
+ for i in "$@";
+ do
+ if [ "N" == $(is_heal_done $B0/${V0}0 $B0/${V0}1 $i) ];
+ then
+ result="$result:$i"
+ fi
+ done
+#To prevent any match for EXPECT_WITHIN, print a char non-existent in file-names
+ if [ $result == ":" ]; then result="~"; fi
+ echo $result
+}
+
+zero_xattr="000000000000000000000000"
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume set $V0 cluster.use-anonymous-inode off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.readdir-ahead off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 cluster.data-self-heal on
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
+TEST $CLI volume set $V0 cluster.entry-self-heal on
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --use-readdirp=no $M0
+cd $M0
+#_me_ is dir on which missing entry self-heal happens, _heal is where dir self-heal happens
+#spb is split-brain, fool is all fool
+
+#source_self_accusing means there exists source and a sink which self-accuses.
+#This simulates failures where fops failed on the bricks without it going down.
+#Something like EACCESS/EDQUOT etc
+
+TEST mkdir spb_heal spb spb_me_heal spb_me fool_heal fool_me v1_fool_heal v1_fool_me source_creations_heal source_deletions_heal source_creations_me source_deletions_me v1_dirty_me v1_dirty_heal source_self_accusing
+TEST mkfifo source_deletions_heal/fifo
+TEST mknod source_deletions_heal/block b 4 5
+TEST mknod source_deletions_heal/char c 1 5
+TEST touch source_deletions_heal/file
+TEST ln -s source_deletions_heal/file source_deletions_heal/slink
+TEST mkdir source_deletions_heal/dir1
+TEST mkdir source_deletions_heal/dir1/dir2
+
+TEST mkfifo source_deletions_me/fifo
+TEST mknod source_deletions_me/block b 4 5
+TEST mknod source_deletions_me/char c 1 5
+TEST touch source_deletions_me/file
+TEST ln -s source_deletions_me/file source_deletions_me/slink
+TEST mkdir source_deletions_me/dir1
+TEST mkdir source_deletions_me/dir1/dir2
+
+TEST mkfifo source_self_accusing/fifo
+TEST mknod source_self_accusing/block b 4 5
+TEST mknod source_self_accusing/char c 1 5
+TEST touch source_self_accusing/file
+TEST ln -s source_self_accusing/file source_self_accusing/slink
+TEST mkdir source_self_accusing/dir1
+TEST mkdir source_self_accusing/dir1/dir2
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+
+TEST touch spb_heal/0 spb/0 spb_me_heal/0 spb_me/0 fool_heal/0 fool_me/0 v1_fool_heal/0 v1_fool_me/0 v1_dirty_heal/0 v1_dirty_me/0
+TEST rm -rf source_deletions_heal/fifo source_deletions_heal/block source_deletions_heal/char source_deletions_heal/file source_deletions_heal/slink source_deletions_heal/dir1
+TEST rm -rf source_deletions_me/fifo source_deletions_me/block source_deletions_me/char source_deletions_me/file source_deletions_me/slink source_deletions_me/dir1
+TEST rm -rf source_self_accusing/fifo source_self_accusing/block source_self_accusing/char source_self_accusing/file source_self_accusing/slink source_self_accusing/dir1
+
+#Test that the files are deleted
+TEST ! stat $B0/${V0}1/source_deletions_heal/fifo
+TEST ! stat $B0/${V0}1/source_deletions_heal/block
+TEST ! stat $B0/${V0}1/source_deletions_heal/char
+TEST ! stat $B0/${V0}1/source_deletions_heal/file
+TEST ! stat $B0/${V0}1/source_deletions_heal/slink
+TEST ! stat $B0/${V0}1/source_deletions_heal/dir1
+TEST ! stat $B0/${V0}1/source_deletions_me/fifo
+TEST ! stat $B0/${V0}1/source_deletions_me/block
+TEST ! stat $B0/${V0}1/source_deletions_me/char
+TEST ! stat $B0/${V0}1/source_deletions_me/file
+TEST ! stat $B0/${V0}1/source_deletions_me/slink
+TEST ! stat $B0/${V0}1/source_deletions_me/dir1
+TEST ! stat $B0/${V0}1/source_self_accusing/fifo
+TEST ! stat $B0/${V0}1/source_self_accusing/block
+TEST ! stat $B0/${V0}1/source_self_accusing/char
+TEST ! stat $B0/${V0}1/source_self_accusing/file
+TEST ! stat $B0/${V0}1/source_self_accusing/slink
+TEST ! stat $B0/${V0}1/source_self_accusing/dir1
+
+
+TEST mkfifo source_creations_heal/fifo
+TEST mknod source_creations_heal/block b 4 5
+TEST mknod source_creations_heal/char c 1 5
+TEST touch source_creations_heal/file
+TEST ln -s source_creations_heal/file source_creations_heal/slink
+TEST mkdir source_creations_heal/dir1
+TEST mkdir source_creations_heal/dir1/dir2
+
+TEST mkfifo source_creations_me/fifo
+TEST mknod source_creations_me/block b 4 5
+TEST mknod source_creations_me/char c 1 5
+TEST touch source_creations_me/file
+TEST ln -s source_creations_me/file source_creations_me/slink
+TEST mkdir source_creations_me/dir1
+TEST mkdir source_creations_me/dir1/dir2
+
+$CLI volume stop $V0
+
+#simulate fool fool scenario for fool_* dirs
+setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1/{fool_heal,fool_me}
+setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/{fool_heal,fool_me}
+setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}1/{v1_fool_heal,v1_fool_me}
+
+#Simulate v1-dirty(self-accusing but no pending ops on others) scenario for v1-dirty
+setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1/v1_dirty_{heal,me}
+setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}1/v1_dirty_{heal,me}
+
+$CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST kill_brick $V0 $H0 $B0/${V0}1
+
+TEST touch spb_heal/1 spb/0 spb_me_heal/1 spb_me/0 fool_heal/1 fool_me/1 v1_fool_heal/1 v1_fool_me/1
+
+$CLI volume stop $V0
+
+#simulate fool fool scenario for fool_* dirs
+setfattr -x trusted.afr.$V0-client-1 $B0/${V0}0/{fool_heal,fool_me}
+setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/{fool_heal,fool_me}
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}1/{v1_fool_heal,v1_fool_me}
+
+#simulate self-accusing for source_self_accusing
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000006 $B0/${V0}0/source_self_accusing
+
+$CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+# Check if conservative merges happened correctly on _me_ dirs
+TEST stat spb_me_heal/1
+TEST stat $B0/${V0}0/spb_me_heal/1
+TEST stat $B0/${V0}1/spb_me_heal/1
+
+TEST stat spb_me_heal/0
+TEST stat $B0/${V0}0/spb_me_heal/0
+TEST stat $B0/${V0}1/spb_me_heal/0
+
+TEST stat fool_me/1
+TEST stat $B0/${V0}0/fool_me/1
+TEST stat $B0/${V0}1/fool_me/1
+
+TEST stat fool_me/0
+TEST stat $B0/${V0}0/fool_me/0
+TEST stat $B0/${V0}1/fool_me/0
+
+TEST stat v1_fool_me/0
+TEST stat $B0/${V0}0/v1_fool_me/0
+TEST stat $B0/${V0}1/v1_fool_me/0
+
+TEST stat v1_fool_me/1
+TEST stat $B0/${V0}0/v1_fool_me/1
+TEST stat $B0/${V0}1/v1_fool_me/1
+
+TEST stat v1_dirty_me/0
+TEST stat $B0/${V0}0/v1_dirty_me/0
+TEST stat $B0/${V0}1/v1_dirty_me/0
+
+#Check if files that have gfid-mismatches in _me_ are giving EIO
+TEST ! stat spb_me/0
+
+#Check if stale files are deleted on access
+TEST ! stat source_deletions_me/fifo
+TEST ! stat $B0/${V0}0/source_deletions_me/fifo
+TEST ! stat $B0/${V0}1/source_deletions_me/fifo
+TEST ! stat source_deletions_me/block
+TEST ! stat $B0/${V0}0/source_deletions_me/block
+TEST ! stat $B0/${V0}1/source_deletions_me/block
+TEST ! stat source_deletions_me/char
+TEST ! stat $B0/${V0}0/source_deletions_me/char
+TEST ! stat $B0/${V0}1/source_deletions_me/char
+TEST ! stat source_deletions_me/file
+TEST ! stat $B0/${V0}0/source_deletions_me/file
+TEST ! stat $B0/${V0}1/source_deletions_me/file
+TEST ! stat source_deletions_me/file
+TEST ! stat $B0/${V0}0/source_deletions_me/file
+TEST ! stat $B0/${V0}1/source_deletions_me/file
+TEST ! stat source_deletions_me/dir1/dir2
+TEST ! stat $B0/${V0}0/source_deletions_me/dir1/dir2
+TEST ! stat $B0/${V0}1/source_deletions_me/dir1/dir2
+TEST ! stat source_deletions_me/dir1
+TEST ! stat $B0/${V0}0/source_deletions_me/dir1
+TEST ! stat $B0/${V0}1/source_deletions_me/dir1
+
+#Test if the files created as part of access are healed correctly
+r=$(get_file_type source_creations_me/fifo)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/fifo
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/fifo
+TEST [ -p source_creations_me/fifo ]
+
+r=$(get_file_type source_creations_me/block)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/block
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/block
+EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}1/source_creations_me/block
+EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}0/source_creations_me/block
+TEST [ -b source_creations_me/block ]
+
+r=$(get_file_type source_creations_me/char)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/char
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/char
+EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}1/source_creations_me/char
+EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}0/source_creations_me/char
+TEST [ -c source_creations_me/char ]
+
+r=$(get_file_type source_creations_me/file)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/file
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/file
+TEST [ -f source_creations_me/file ]
+
+r=$(get_file_type source_creations_me/slink)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/slink
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/slink
+TEST [ -h source_creations_me/slink ]
+
+r=$(get_file_type source_creations_me/dir1/dir2)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/dir1/dir2
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/dir1/dir2
+TEST [ -d source_creations_me/dir1/dir2 ]
+
+r=$(get_file_type source_creations_me/dir1)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/dir1
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/dir1
+TEST [ -d source_creations_me/dir1 ]
+
+#Trigger heal and check _heal dirs are healed properly
+#Trigger change in event generation number. That way inodes would get refreshed during lookup
+TEST kill_brick $V0 $H0 $B0/${V0}1
+$CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+TEST stat spb_heal
+TEST stat spb_me_heal
+TEST stat fool_heal
+TEST stat fool_me
+TEST stat v1_fool_heal
+TEST stat v1_fool_me
+TEST stat source_deletions_heal
+TEST stat source_deletions_me
+TEST stat source_self_accusing
+TEST stat source_creations_heal
+TEST stat source_creations_me
+TEST stat v1_dirty_heal
+TEST stat v1_dirty_me
+TEST $CLI volume stop $V0
+TEST rm -rf $B0/${V0}{0,1}/.glusterfs/indices/xattrop/*
+
+$CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+#Create base entry in indices/xattrop
+echo "Data" > $M0/FILE
+rm -f $M0/FILE
+EXPECT "1" count_index_entries $B0/${V0}0
+EXPECT "1" count_index_entries $B0/${V0}1
+
+TEST $CLI volume stop $V0;
+
+#Create entries for fool_heal and fool_me to ensure they are fully healed and dirty xattrs erased, before triggering index heal
+create_brick_xattrop_entry $B0/${V0}0 fool_heal fool_me source_creations_heal/dir1
+
+$CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+$CLI volume heal $V0 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+
+TEST $CLI volume heal $V0;
+EXPECT_WITHIN $HEAL_TIMEOUT "~" print_pending_heals spb_heal spb_me_heal fool_heal fool_me v1_fool_heal v1_fool_me source_deletions_heal source_deletions_me source_creations_heal source_creations_me v1_dirty_heal v1_dirty_me source_self_accusing
+
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 spb_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 spb_me_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 fool_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 fool_me
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_fool_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_fool_me
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_deletions_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_deletions_me
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_self_accusing
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_creations_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_creations_me
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_dirty_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_dirty_me
+
+#Don't access the files/dirs from mount point as that may cause self-heals
+# Check if conservative merges happened correctly on heal dirs
+TEST stat $B0/${V0}0/spb_heal/1
+TEST stat $B0/${V0}1/spb_heal/1
+
+TEST stat $B0/${V0}0/spb_heal/0
+TEST stat $B0/${V0}1/spb_heal/0
+
+TEST stat $B0/${V0}0/fool_heal/1
+TEST stat $B0/${V0}1/fool_heal/1
+
+TEST stat $B0/${V0}0/fool_heal/0
+TEST stat $B0/${V0}1/fool_heal/0
+
+TEST stat $B0/${V0}0/v1_fool_heal/0
+TEST stat $B0/${V0}1/v1_fool_heal/0
+
+TEST stat $B0/${V0}0/v1_fool_heal/1
+TEST stat $B0/${V0}1/v1_fool_heal/1
+
+TEST stat $B0/${V0}0/v1_dirty_heal/0
+TEST stat $B0/${V0}1/v1_dirty_heal/0
+
+#Check if files that have gfid-mismatches in spb are giving EIO
+TEST ! stat spb/0
+
+#Check if stale files are deleted on access
+TEST ! stat $B0/${V0}0/source_deletions_heal/fifo
+TEST ! stat $B0/${V0}1/source_deletions_heal/fifo
+TEST ! stat $B0/${V0}0/source_deletions_heal/block
+TEST ! stat $B0/${V0}1/source_deletions_heal/block
+TEST ! stat $B0/${V0}0/source_deletions_heal/char
+TEST ! stat $B0/${V0}1/source_deletions_heal/char
+TEST ! stat $B0/${V0}0/source_deletions_heal/file
+TEST ! stat $B0/${V0}1/source_deletions_heal/file
+TEST ! stat $B0/${V0}0/source_deletions_heal/file
+TEST ! stat $B0/${V0}1/source_deletions_heal/file
+TEST ! stat $B0/${V0}0/source_deletions_heal/dir1/dir2
+TEST ! stat $B0/${V0}1/source_deletions_heal/dir1/dir2
+TEST ! stat $B0/${V0}0/source_deletions_heal/dir1
+TEST ! stat $B0/${V0}1/source_deletions_heal/dir1
+
+#Check if stale files are deleted on access
+TEST ! stat $B0/${V0}0/source_self_accusing/fifo
+TEST ! stat $B0/${V0}1/source_self_accusing/fifo
+TEST ! stat $B0/${V0}0/source_self_accusing/block
+TEST ! stat $B0/${V0}1/source_self_accusing/block
+TEST ! stat $B0/${V0}0/source_self_accusing/char
+TEST ! stat $B0/${V0}1/source_self_accusing/char
+TEST ! stat $B0/${V0}0/source_self_accusing/file
+TEST ! stat $B0/${V0}1/source_self_accusing/file
+TEST ! stat $B0/${V0}0/source_self_accusing/file
+TEST ! stat $B0/${V0}1/source_self_accusing/file
+TEST ! stat $B0/${V0}0/source_self_accusing/dir1/dir2
+TEST ! stat $B0/${V0}1/source_self_accusing/dir1/dir2
+TEST ! stat $B0/${V0}0/source_self_accusing/dir1
+TEST ! stat $B0/${V0}1/source_self_accusing/dir1
+
+#Test if the files created as part of full self-heal correctly
+r=$(get_file_type $B0/${V0}0/source_creations_heal/fifo)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/fifo
+TEST [ -p $B0/${V0}0/source_creations_heal/fifo ]
+EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}1/source_creations_heal/block
+EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}0/source_creations_heal/block
+
+r=$(get_file_type $B0/${V0}0/source_creations_heal/block)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/block
+
+r=$(get_file_type $B0/${V0}0/source_creations_heal/char)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/char
+EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}1/source_creations_heal/char
+EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}0/source_creations_heal/char
+
+r=$(get_file_type $B0/${V0}0/source_creations_heal/file)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/file
+TEST [ -f $B0/${V0}0/source_creations_heal/file ]
+
+r=$(get_file_type source_creations_heal/file $B0/${V0}0/slink)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/file slink
+TEST [ -h $B0/${V0}0/source_creations_heal/slink ]
+
+r=$(get_file_type $B0/${V0}0/source_creations_heal/dir1/dir2)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/dir1/dir2
+TEST [ -d $B0/${V0}0/source_creations_heal/dir1/dir2 ]
+
+r=$(get_file_type $B0/${V0}0/source_creations_heal/dir1)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/dir1
+TEST [ -d $B0/${V0}0/source_creations_heal/dir1 ]
+
+cd -
+
+#Anonymous directory shouldn't be created
+TEST mkdir $M0/rename-dir
+before_rename=$(STAT_INO $B0/${V0}1/rename-dir)
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST mv $M0/rename-dir $M0/new-name
+TEST $CLI volume start $V0 force
+#'spb' is in split-brain so pending-heal-count will be 2
+EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
+after_rename=$(STAT_INO $B0/${V0}1/new-name)
+EXPECT "0" echo $(ls -a $B0/${V0}0/ | grep anonymous-inode | wc -l)
+EXPECT "0" echo $(ls -a $B0/${V0}1/ | grep anonymous-inode | wc -l)
+EXPECT_NOT "$before_rename" echo $after_rename
+cleanup
diff --git a/tests/basic/afr/rename-data-loss.t b/tests/basic/afr/rename-data-loss.t
new file mode 100644
index 00000000000..256ee2aafce
--- /dev/null
+++ b/tests/basic/afr/rename-data-loss.t
@@ -0,0 +1,72 @@
+#!/bin/bash
+#Self-heal tests
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1}
+TEST $CLI volume set $V0 write-behind off
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 data-self-heal off
+TEST $CLI volume set $V0 metadata-self-heal off
+TEST $CLI volume set $V0 entry-self-heal off
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+cd $M0
+TEST `echo "line1" >> file1`
+TEST mkdir dir1
+TEST mkdir dir2
+TEST mkdir -p dir1/dira/dirb
+TEST `echo "line1">>dir1/dira/dirb/file1`
+TEST mkdir delete_me
+TEST `echo "line1" >> delete_me/file1`
+
+#brick0 has witnessed the second write while brick1 is down.
+TEST kill_brick $V0 $H0 $B0/brick1
+TEST `echo "line2" >> file1`
+TEST `echo "line2" >> dir1/dira/dirb/file1`
+TEST `echo "line2" >> delete_me/file1`
+
+#Toggle the bricks that are up/down.
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST kill_brick $V0 $H0 $B0/brick0
+
+#Rename when the 'source' brick0 for data-selfheals is down.
+mv file1 file2
+mv dir1/dira dir2
+
+#Delete a dir when brick0 is down.
+rm -rf delete_me
+cd -
+
+#Bring everything up and trigger heal
+TEST $CLI volume set $V0 self-heal-daemon on
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/brick0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/brick1
+
+#Remount to avoid reading from caches
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+EXPECT "line2" tail -1 $M0/file2
+EXPECT "line2" tail -1 $M0/dir2/dira/dirb/file1
+TEST ! stat $M0/delete_me/file1
+TEST ! stat $M0/delete_me
+
+anon_inode_name=$(ls -a $B0/brick0 | grep glusterfs-anonymous-inode)
+TEST [[ -d $B0/brick0/$anon_inode_name ]]
+TEST [[ -d $B0/brick1/$anon_inode_name ]]
+cleanup
diff --git a/tests/basic/afr/split-brain-favorite-child-policy-client-side-healing.t b/tests/basic/afr/split-brain-favorite-child-policy-client-side-healing.t
new file mode 100644
index 00000000000..7c249c4bcbd
--- /dev/null
+++ b/tests/basic/afr/split-brain-favorite-child-policy-client-side-healing.t
@@ -0,0 +1,124 @@
+#!/bin/bash
+
+#Test the client side split-brain resolution
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+GET_MDATA_PATH=$(dirname $0)/../../utils
+build_tester $GET_MDATA_PATH/get-mdata-xattr.c
+
+TEST glusterd
+TEST pidof glusterd
+
+count_files () {
+ ls $1 | wc -l
+}
+
+#Create replica 2 volume
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume set $V0 cluster.quorum-type fixed
+TEST $CLI volume set $V0 cluster.quorum-count 1
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
+TEST $CLI volume set $V0 cluster.data-self-heal on
+TEST $CLI volume set $V0 cluster.entry-self-heal on
+
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+TEST mkdir $M0/data
+TEST touch $M0/data/file
+
+
+############ Client side healing using favorite-child-policy = mtime #################
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST dd if=/dev/urandom of=$M0/data/file bs=1024 count=1024
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST dd if=/dev/urandom of=$M0/data/file bs=1024 count=1024
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+mtime1=$(get_mtime $B0/${V0}0/data/file)
+mtime2=$(get_mtime $B0/${V0}1/data/file)
+if (( $(echo "$mtime1 > $mtime2" | bc -l) )); then
+ LATEST_MTIME_MD5=$(md5sum $B0/${V0}0/data/file | cut -d\ -f1)
+else
+ LATEST_MTIME_MD5=$(md5sum $B0/${V0}1/data/file | cut -d\ -f1)
+fi
+
+#file will be in split-brain
+cat $M0/data/file > /dev/null
+EXPECT "1" echo $?
+
+TEST $CLI volume set $V0 cluster.favorite-child-policy mtime
+TEST $CLI volume start $V0 force
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^2$" afr_get_split_brain_count $V0
+cat $M0/data/file > /dev/null
+EXPECT "0" echo $?
+M0_MD5=$(md5sum $M0/data/file | cut -d\ -f1)
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_get_split_brain_count $V0
+TEST [ "$LATEST_MTIME_MD5" == "$M0_MD5" ]
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+B0_MD5=$(md5sum $B0/${V0}0/data/file | cut -d\ -f1)
+B1_MD5=$(md5sum $B0/${V0}1/data/file | cut -d\ -f1)
+TEST [ "$LATEST_MTIME_MD5" == "$B0_MD5" ]
+TEST [ "$LATEST_MTIME_MD5" == "$B1_MD5" ]
+
+############ Client side directory conservative merge #################
+TEST $CLI volume reset $V0 cluster.favorite-child-policy
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST touch $M0/data/test
+files=$(count_files $M0/data)
+EXPECT "2" echo $files
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST touch $M0/data/test1
+files=$(count_files $M0/data)
+EXPECT "2" echo $files
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+#data dir will be in entry split-brain
+ls $M0/data > /dev/null
+EXPECT "2" echo $?
+
+TEST $CLI volume set $V0 cluster.favorite-child-policy mtime
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^2$" afr_get_split_brain_count $V0
+
+
+ls $M0/data > /dev/null
+EXPECT "0" echo $?
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_get_split_brain_count $V0
+#Entry Split-brain is gone, but data self-heal is pending on the files
+EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
+
+cat $M0/data/test > /dev/null
+cat $M0/data/test1 > /dev/null
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+files=$(count_files $M0/data)
+EXPECT "3" echo $files
+
+TEST force_umount $M0
+TEST rm $GET_MDATA_PATH/get-mdata-xattr
+
+cleanup
diff --git a/tests/basic/fuse/active-io-graph-switch.t b/tests/basic/fuse/active-io-graph-switch.t
new file mode 100644
index 00000000000..6ec3e1fcbfa
--- /dev/null
+++ b/tests/basic/fuse/active-io-graph-switch.t
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+TESTS_EXPECTED_IN_LOOP=12
+
+function perform_io_on_mount {
+ local m="$1"
+ local f="$2"
+ local lockfile="$3"
+ while [ -f "$m/$lockfile" ];
+ do
+ dd if=/dev/zero of=$m/$f bs=1M count=1
+ done
+}
+
+function perform_graph_switch {
+ for i in {1..3}
+ do
+ TEST_IN_LOOP $CLI volume set $V0 performance.stat-prefetch off
+ sleep 3
+ TEST_IN_LOOP $CLI volume set $V0 performance.stat-prefetch on
+ sleep 3
+ done
+}
+
+function count_files {
+ ls $M0 | wc -l
+}
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 flush-behind off
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST touch $M0/lock
+for i in {1..100}; do perform_io_on_mount $M0 $i lock & done
+EXPECT_WITHIN 5 "101" count_files
+
+perform_graph_switch
+TEST rm -f $M0/lock
+wait
+EXPECT "100" count_files
+TEST rm -f $M0/{1..100}
+EXPECT "0" count_files
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+#Repeat the tests with reader-thread-count
+TEST $GFS --reader-thread-count=10 --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST touch $M0/lock
+for i in {1..100}; do perform_io_on_mount $M0 $i lock & done
+EXPECT_WITHIN 5 "101" count_files
+
+perform_graph_switch
+TEST rm -f $M0/lock
+wait
+EXPECT "100" count_files
+TEST rm -f $M0/{1..100}
+EXPECT "0" count_files
+
+cleanup
diff --git a/tests/basic/gfapi/gfapi-ssl-load-volfile-test.c b/tests/basic/gfapi/gfapi-ssl-load-volfile-test.c
new file mode 100644
index 00000000000..7beb8dd1fe4
--- /dev/null
+++ b/tests/basic/gfapi/gfapi-ssl-load-volfile-test.c
@@ -0,0 +1,127 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define LOG_ERR(msg) \
+ do { \
+ fprintf(stderr, "%s : Error (%s)\n", msg, strerror(errno)); \
+ } while (0)
+
+glfs_t *
+init_glfs(const char *hostname, const char *volname, const char *volfile,
+ const char *logfile)
+{
+ int ret = -1;
+ glfs_t *fs = NULL;
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ LOG_ERR("glfs_new failed");
+ return NULL;
+ }
+
+ ret = glfs_set_volfile(fs, volfile);
+ if (ret < 0) {
+ LOG_ERR("glfs_set_volfile failed");
+ goto out;
+ }
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ if (ret < 0) {
+ LOG_ERR("glfs_set_logging failed");
+ goto out;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ LOG_ERR("glfs_init failed");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret) {
+ glfs_fini(fs);
+ fs = NULL;
+ }
+
+ return fs;
+}
+
+int
+glfs_test_function(const char *hostname, const char *volname,
+ const char *volfile, const char *logfile)
+{
+ int ret = -1;
+ int flags = O_CREAT | O_RDWR;
+ glfs_t *fs = NULL;
+ glfs_fd_t *glfd = NULL;
+ const char *buff = "This is from my prog\n";
+ const char *filename = "glfs_test.txt";
+
+ fs = init_glfs(hostname, volname, volfile, logfile);
+ if (fs == NULL) {
+ LOG_ERR("init_glfs failed");
+ return -1;
+ }
+
+ glfd = glfs_creat(fs, filename, flags, 0644);
+ if (glfd == NULL) {
+ LOG_ERR("glfs_creat failed");
+ goto out;
+ }
+
+ ret = glfs_write(glfd, buff, strlen(buff), flags);
+ if (ret < 0) {
+ LOG_ERR("glfs_write failed");
+ goto out;
+ }
+
+ ret = glfs_close(glfd);
+ if (ret < 0) {
+ LOG_ERR("glfs_write failed");
+ goto out;
+ }
+
+out:
+ ret = glfs_fini(fs);
+ if (ret) {
+ LOG_ERR("glfs_fini failed");
+ }
+
+ return ret;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 0;
+ char *hostname = NULL;
+ char *volname = NULL;
+ char *volfile = NULL;
+ char *logfile = NULL;
+
+ if (argc != 5) {
+ fprintf(stderr, "Invalid argument\n");
+ exit(1);
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ volfile = argv[3];
+ logfile = argv[4];
+
+ ret = glfs_test_function(hostname, volname, volfile, logfile);
+ if (ret) {
+ LOG_ERR("glfs_test_function failed");
+ }
+
+ return ret;
+}
diff --git a/tests/basic/gfapi/gfapi-ssl-load-volfile-test.t b/tests/basic/gfapi/gfapi-ssl-load-volfile-test.t
new file mode 100755
index 00000000000..8e94df9d321
--- /dev/null
+++ b/tests/basic/gfapi/gfapi-ssl-load-volfile-test.t
@@ -0,0 +1,76 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../traps.rc
+. $(dirname $0)/../../ssl.rc
+
+cleanup;
+
+sed -e "s,@@HOSTNAME@@,${H0},g" -e "s,@@BRICKPATH@@,${B0}/brick1,g" \
+ -e "s,@@SSL@@,off,g" \
+ $(dirname ${0})/protocol-client-ssl.vol.in \
+ > $(dirname ${0})/protocol-client-ssl.vol
+
+TEST create_self_signed_certs
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
+
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/gfapi-ssl-load-volfile-test.c -lgfapi
+
+# Run test without I/O or management encryption
+TEST $(dirname $0)/gfapi-ssl-load-volfile-test $H0 $V0 \
+ $(dirname ${0})/protocol-client-ssl.vol \
+ $logdir/gfapi-ssl-load-volfile-test.log
+
+# Enable management encryption
+touch $GLUSTERD_WORKDIR/secure-access
+
+killall_gluster
+
+TEST glusterd
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
+
+# Run test with management encryption (No I/O encryption)
+TEST $(dirname $0)/gfapi-ssl-load-volfile-test $H0 $V0 \
+ $(dirname ${0})/protocol-client-ssl.vol \
+ $logdir/gfapi-ssl-load-volfile-test.log
+
+# Enable I/O encryption
+TEST $CLI volume set $V0 server.ssl on
+
+killall_gluster
+
+sed -e "s,@@HOSTNAME@@,${H0},g" -e "s,@@BRICKPATH@@,${B0}/brick1,g" \
+ -e "s,@@SSL@@,on,g" \
+ $(dirname ${0})/protocol-client-ssl.vol.in \
+ > $(dirname ${0})/protocol-client-ssl.vol
+
+TEST glusterd
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
+
+# Run test without I/O or management encryption
+TEST $(dirname $0)/gfapi-ssl-load-volfile-test $H0 $V0 \
+ $(dirname ${0})/protocol-client-ssl.vol \
+ $logdir/gfapi-ssl-load-volfile-test.log
+
+cleanup_tester $(dirname $0)/gfapi-ssl-load-volfile-test
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
+
+# NetBSD build scripts are not up to date therefore this test
+# is failing in NetBSD. Therefore skipping the test in NetBSD
+# as of now.
+#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000
diff --git a/tests/basic/gfapi/protocol-client-ssl.vol.in b/tests/basic/gfapi/protocol-client-ssl.vol.in
new file mode 100644
index 00000000000..cdc0c9d0671
--- /dev/null
+++ b/tests/basic/gfapi/protocol-client-ssl.vol.in
@@ -0,0 +1,15 @@
+#
+# This .vol file expects that there is
+#
+# 1. GlusterD listening on @@HOSTNAME@@
+# 2. a volume that provides a brick on @@BRICKPATH@@
+# 3. the volume with the brick has been started
+#
+volume test
+ type protocol/client
+ option remote-host @@HOSTNAME@@
+ option remote-subvolume @@BRICKPATH@@
+ option transport-type socket
+ option transport.socket.ssl-enabled @@SSL@@
+end-volume
+
diff --git a/tests/basic/metadisp/fsyncdir.c b/tests/basic/metadisp/fsyncdir.c
new file mode 100644
index 00000000000..62b532b9ce4
--- /dev/null
+++ b/tests/basic/metadisp/fsyncdir.c
@@ -0,0 +1,29 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <fcntl.h>
+
+int
+main(int argc, char **argv)
+{
+ int pfd;
+
+ pfd = open(argv[1], O_RDONLY | O_DIRECTORY);
+ if (pfd == (-1)) {
+ perror("open");
+ return EXIT_FAILURE;
+ }
+
+ if (rename(argv[2], argv[3]) == (-1)) {
+ perror("rename");
+ return EXIT_FAILURE;
+ }
+
+ if (fsync(pfd) == (-1)) {
+ perror("fsync");
+ return EXIT_FAILURE;
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/tests/basic/metadisp/ftruncate.c b/tests/basic/metadisp/ftruncate.c
new file mode 100644
index 00000000000..c9185212c31
--- /dev/null
+++ b/tests/basic/metadisp/ftruncate.c
@@ -0,0 +1,34 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <fcntl.h>
+
+int
+main(int argc, char **argv)
+{
+ int pfd;
+
+ pfd = open(argv[1], O_RDWR);
+ if (pfd == (-1)) {
+ perror("open");
+ return EXIT_FAILURE;
+ }
+
+ if (ftruncate(pfd, 0) == (-1)) {
+ perror("ftruncate");
+ return EXIT_FAILURE;
+ }
+
+ if (write(pfd, "hello", 5) == (-1)) {
+ perror("write");
+ return EXIT_FAILURE;
+ }
+
+ if (fsync(pfd) == (-1)) {
+ perror("fsync");
+ return EXIT_FAILURE;
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/tests/basic/metadisp/fxattr.c b/tests/basic/metadisp/fxattr.c
new file mode 100644
index 00000000000..e552057778a
--- /dev/null
+++ b/tests/basic/metadisp/fxattr.c
@@ -0,0 +1,107 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <fcntl.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/xattr.h>
+
+static char MY_XATTR[] = "user.fxtest";
+static char *PROGRAM;
+#define CONSUME(v) \
+ do { \
+ if (!argc) { \
+ fprintf(stderr, "missing argument\n"); \
+ return EXIT_FAILURE; \
+ } \
+ v = argv[0]; \
+ ++argv; \
+ --argc; \
+ } while (0)
+
+static int
+do_get(int argc, char **argv, int fd)
+{
+ char *value;
+ int ret;
+ char buf[1024];
+
+ CONSUME(value);
+
+ ret = fgetxattr(fd, MY_XATTR, buf, sizeof(buf));
+ if (ret == (-1)) {
+ perror("fgetxattr");
+ return EXIT_FAILURE;
+ }
+
+ if (strncmp(buf, value, ret) != 0) {
+ fprintf(stderr, "data mismatch\n");
+ return EXIT_FAILURE;
+ }
+
+ return EXIT_SUCCESS;
+}
+
+static int
+do_set(int argc, char **argv, int fd)
+{
+ char *value;
+ int ret;
+
+ CONSUME(value);
+
+ ret = fsetxattr(fd, MY_XATTR, value, strlen(value), 0);
+ if (ret == (-1)) {
+ perror("fsetxattr");
+ return EXIT_FAILURE;
+ }
+
+ return EXIT_SUCCESS;
+}
+
+static int
+do_remove(int argc, char **argv, int fd)
+{
+ int ret;
+
+ ret = fremovexattr(fd, MY_XATTR);
+ if (ret == (-1)) {
+ perror("femovexattr");
+ return EXIT_FAILURE;
+ }
+
+ return EXIT_SUCCESS;
+}
+
+int
+main(int argc, char **argv)
+{
+ int fd;
+ char *path;
+ char *cmd;
+
+ CONSUME(PROGRAM);
+ CONSUME(path);
+ CONSUME(cmd);
+
+ fd = open(path, O_RDWR);
+ if (fd == (-1)) {
+ perror("open");
+ return EXIT_FAILURE;
+ }
+
+ if (strcmp(cmd, "get") == 0) {
+ return do_get(argc, argv, fd);
+ }
+
+ if (strcmp(cmd, "set") == 0) {
+ return do_set(argc, argv, fd);
+ }
+
+ if (strcmp(cmd, "remove") == 0) {
+ return do_remove(argc, argv, fd);
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/tests/basic/metadisp/gfs-fsetxattr.c b/tests/basic/metadisp/gfs-fsetxattr.c
new file mode 100644
index 00000000000..63578bc528f
--- /dev/null
+++ b/tests/basic/metadisp/gfs-fsetxattr.c
@@ -0,0 +1,141 @@
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+int gfapi = 1;
+
+int
+main(int argc, char *argv[])
+{
+ glfs_t *fs = NULL;
+ int ret = 0;
+ int i = 0;
+ glfs_fd_t *fd = NULL;
+ char *topdir = "topdir", *filename = "file1";
+ char *buf = NULL;
+ char *logfile = NULL;
+ char *hostname = NULL;
+ char *basename = NULL;
+ char *dir1 = NULL, *dir2 = NULL, *filename1 = NULL, *filename2 = NULL;
+ struct stat sb = {
+ 0,
+ };
+
+ if (argc != 5) {
+ fprintf(
+ stderr,
+ "Expect following args %s <hostname> <Vol> <log file> <basename>\n",
+ argv[0]);
+ return -1;
+ }
+
+ hostname = argv[1];
+ logfile = argv[3];
+ basename = argv[4];
+
+ fs = glfs_new(argv[2]);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL (%s)\n", strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_set_volfile_server failed ret:%d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_set_logging failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_init failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = asprintf(&dir1, "%s-dir", basename);
+ if (ret < 0) {
+ fprintf(stderr, "cannot construct filename (%s)", strerror(errno));
+ return ret;
+ }
+
+ ret = glfs_mkdir(fs, dir1, 0755);
+ if (ret < 0) {
+ fprintf(stderr, "mkdir(%s): %s\n", dir1, strerror(errno));
+ return -1;
+ }
+
+ fd = glfs_opendir(fs, dir1);
+ if (!fd) {
+ fprintf(stderr, "/: %s\n", strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_fsetxattr(fd, "user.dirfattr", "fsetxattr", 9, 0);
+ if (ret < 0) {
+ fprintf(stderr, "fsetxattr(%s): %d (%s)\n", dir1, ret, strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_closedir(fd);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_closedir failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = asprintf(&filename1, "%s-file", basename);
+ if (ret < 0) {
+ fprintf(stderr, "cannot construct filename (%s)", strerror(errno));
+ return ret;
+ }
+
+ ret = asprintf(&filename2, "%s-file-renamed", basename);
+ if (ret < 0) {
+ fprintf(stderr, "cannot construct filename (%s)", strerror(errno));
+ return ret;
+ }
+
+ fd = glfs_creat(fs, filename1, O_RDWR, 0644);
+ if (!fd) {
+ fprintf(stderr, "%s: (%p) %s\n", filename1, fd, strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_rename(fs, filename1, filename2);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_rename failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_lstat(fs, filename2, &sb);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_lstat failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_fsetxattr(fd, "user.filefattr", "fsetxattr", 9, 0);
+ if (ret < 0) {
+ fprintf(stderr, "fsetxattr(%s): %d (%s)\n", dir1, ret, strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_close(fd);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_close failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+}
diff --git a/tests/basic/metadisp/metadisp.t b/tests/basic/metadisp/metadisp.t
new file mode 100644
index 00000000000..894ffe07226
--- /dev/null
+++ b/tests/basic/metadisp/metadisp.t
@@ -0,0 +1,316 @@
+#!/usr/bin/env bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+
+# Considering `--enable-metadisp` is an option for `./configure`,
+# which is disabled by default, this test will never pass regression.
+# But to see the value of this test, run below after configuring
+# with above option :
+# `prove -vmfe '/bin/bash' tests/basic/metadisp/metadisp.t`
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST
+
+cleanup;
+
+TEST mkdir -p $B0/b0/{0,1}
+
+TEST setfattr -n trusted.glusterfs.volume-id -v 0xddab9eece7b64a95b07351a1f748f56f ${B0}/b0/0
+TEST setfattr -n trusted.glusterfs.volume-id -v 0xddab9eece7b64a95b07351a1f748f56f ${B0}/b0/1
+
+TEST $GFS --volfile=$(dirname $0)/metadisp.vol --volfile-id=$V0 $M0;
+
+NUM_FILES=40
+TEST touch $M0/{1..${NUM_FILES}}
+
+# each drive should get 40 files
+TEST [ $(dir -1 $B0/b0/0/ | wc -l) -eq $NUM_FILES ]
+TEST [ $(dir -1 $B0/b0/1/ | wc -l) -eq $NUM_FILES ]
+
+# now write some data to a file
+echo "hello" > $M0/3
+filename=$$
+echo "hello" > /tmp/metadisp-write-${filename}
+checksum=$(md5sum /tmp/metadisp-write-${filename} | awk '{print $1}')
+TEST [ "$(md5sum $M0/3 | awk '{print $1}')" == "$checksum" ]
+
+# check that the backend file exists on b1
+gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/b0/*/3))
+TEST [ $(dir -1 $B0/b0/1/$gfid | wc -l) -eq 1 ]
+
+# check that the backend file matches the frontend
+TEST [ "$(md5sum $B0/b0/1/$gfid | awk '{print $1}')" == "$checksum" ]
+
+# delete the file
+TEST rm $M0/3
+
+# ensure the frontend and backend files are cleaned up
+TEST ! -e $M0/3
+TEST ! [ stat $B0/b*/*/$gfid ]
+
+# Test TRUNCATE + WRITE flow
+echo "hello" | tee $M0/4
+echo "goo" | tee $M0/4
+filename=$$
+echo "goo" | tee /tmp/metadisp-truncate-${filename}
+checksum=$(md5sum /tmp/metadisp-truncate-${filename} | awk '{print $1}')
+TEST [ "$(md5sum $M0/4 | awk '{print $1}')" == "$checksum" ]
+
+# Test mkdir + rmdir.
+TEST mkdir $M0/rmdir_me
+nfiles=$(ls -d $B0/b*/*/rmdir_me 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "1" ]
+TEST rmdir $M0/rmdir_me
+nfiles=$(ls -d $B0/b*/*/rmdir_me 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "0" ]
+
+# Test rename.
+TEST touch $M0/rename_me
+nfiles=$(ls $B0/b*/*/rename_me 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "1" ]
+nfiles=$(ls $B0/b*/*/such_rename 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "0" ]
+TEST mv $M0/rename_me $M0/such_rename
+nfiles=$(ls $B0/b*/*/rename_me 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "0" ]
+nfiles=$(ls $B0/b*/*/such_rename 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "1" ]
+
+# Test rename of a file that doesn't exist.
+TEST ! mv $M0/does-not-exist $M0/neither-does-this
+
+
+# cleanup all the other files.
+TEST rm -v $M0/1 $M0/2 $M0/{4..${NUM_FILES}}
+TEST rm $M0/such_rename
+TEST [ $(ls /d/backends/b0/0/ | wc -l) -eq 0 ]
+TEST [ $(ls /d/backends/b0/1/ | wc -l) -eq 0 ]
+
+# Test CREATE flow
+NUM_FILES=40
+TEST touch $M0/{1..${NUM_FILES}}
+TEST [ $(ls /d/backends/b0/0/ | wc -l) -eq $NUM_FILES ]
+TEST [ $(ls /d/backends/b0/1/ | wc -l) -eq $NUM_FILES ]
+
+# Test UNLINK flow
+# No drives should have any files
+TEST rm -v $M0/{1..${NUM_FILES}}
+TEST [ $(ls /d/backends/b0/0/ | wc -l) -eq 0 ]
+TEST [ $(ls /d/backends/b0/1/ | wc -l) -eq 0 ]
+
+# Test CREATE + WRITE + READ flow
+filename=$$
+dd if=/dev/urandom of=/tmp/${filename} bs=1M count=10
+checksum=$(md5sum /tmp/${filename} | awk '{print $1}')
+TEST cp -v /tmp/${filename} $M0/1
+TEST cp -v /tmp/${filename} $M0/2
+TEST cp -v /tmp/${filename} $M0/3
+TEST cp -v /tmp/${filename} $M0/4
+TEST [ "$(md5sum $M0/1 | awk '{print $1}')" == "$checksum" ]
+TEST [ "$(md5sum $M0/2 | awk '{print $1}')" == "$checksum" ]
+TEST [ "$(md5sum $M0/3 | awk '{print $1}')" == "$checksum" ]
+TEST [ "$(md5sum $M0/4 | awk '{print $1}')" == "$checksum" ]
+
+# Test TRUNCATE + WRITE flow
+TEST dd if=/dev/zero of=$M0/1 bs=1M count=20
+
+# Check that readdir stats the files properly and we get the correct sizes
+TEST [ $(find $M0 -size +9M | wc -l) -eq 4 ];
+
+# Test mkdir + rmdir.
+TEST mkdir $M0/rmdir_me
+nfiles=$(ls -d $B0/b*/*/rmdir_me 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "1" ]
+TEST rmdir $M0/rmdir_me
+nfiles=$(ls -d $B0/b*/*/rmdir_me 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "0" ]
+
+# Test rename.
+# Still flaky, so disabled until it can be debugged.
+TEST touch $M0/rename_me
+nfiles=$(ls $B0/b*/*/rename_me 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "1" ]
+nfiles=$(ls $B0/b*/*/such_rename 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "0" ]
+TEST mv $M0/rename_me $M0/such_rename
+nfiles=$(ls $B0/b*/*/rename_me 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "0" ]
+nfiles=$(ls $B0/b*/*/such_rename 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "1" ]
+
+# Test rename of a file that doesn't exist.
+TEST ! mv $M0/does-not-exist $M0/neither-does-this
+
+# Test rename over an existing file.
+ok=yes
+for i in $(seq 0 9); do
+ echo foo > $M0/src$i
+ echo bar > $M0/dst$i
+done
+for i in $(seq 0 9); do
+ mv $M0/src$i $M0/dst$i
+done
+for i in $(seq 0 9); do
+ nfiles=$(cat $B0/b0/*/dst$i | wc -l)
+ if [ "$nfiles" = "2" ]; then
+ echo "COLLISION on dst$i"
+ (ls -l $B0/b0/*/dst$i; cat $B0/b0/*/dst$i) | sed "/^/s// /"
+ ok=no
+ fi
+done
+EXPECT "yes" echo $ok
+
+# Test rename of a directory.
+count_copies () {
+ ls -d $B0/b?/?/$1 2> /dev/null | wc -l
+}
+TEST mkdir $M0/foo_dir
+EXPECT 1 count_copies foo_dir
+EXPECT 0 count_copies bar_dir
+TEST mv $M0/foo_dir $M0/bar_dir
+EXPECT 0 count_copies foo_dir
+EXPECT 1 count_copies bar_dir
+
+for x in $(seq 0 99); do
+ touch $M0/target$x
+ ln -s $M0/target$x $M0/link$x
+done
+on_0=$(ls $B0/b*/0/link* | wc -l)
+on_1=$(ls $B0/b*/1/link* | wc -l)
+TEST [ "$on_0" -eq 100 ]
+TEST [ "$on_1" -eq 0 ]
+TEST [ "$(ls -l $M0/link* | wc -l)" = 100 ]
+
+# Test (hard) link.
+_test_hardlink () {
+ local b
+ local has_src
+ local has_dst
+ local src_inum
+ local dst_inum
+ touch $M0/hardsrc$1
+ ln $M0/hardsrc$1 $M0/harddst$1
+ for b in $B0/b{0}/{0,1}; do
+ [ -f $b/hardsrc$1 ]; has_src=$?
+ [ -f $b/harddst$1 ]; has_dst=$?
+ if [ "$has_src" != "$has_dst" ]; then
+ echo "MISSING $b/hardxxx$1 $has_src $has_dst"
+ return
+ fi
+ if [ "$has_src$has_dst" = "00" ]; then
+ src_inum=$(stat -c '%i' $b/hardsrc$1)
+ dst_inum=$(stat -c '%i' $b/harddst$1)
+ if [ "$dst_inum" != "$src_inum" ]; then
+ echo "MISMATCH $b/hardxx$i $src_inum $dst_inum"
+ return
+ fi
+ fi
+ done
+ echo "OK"
+}
+
+test_hardlink () {
+ local result=$(_test_hardlink $*)
+ # [ "$result" = "OK" ] || echo $result > /dev/tty
+ echo $result
+}
+
+# Do this multiple times to make sure colocation isn't a fluke.
+EXPECT "OK" test_hardlink 0
+EXPECT "OK" test_hardlink 1
+EXPECT "OK" test_hardlink 2
+EXPECT "OK" test_hardlink 3
+EXPECT "OK" test_hardlink 4
+EXPECT "OK" test_hardlink 5
+EXPECT "OK" test_hardlink 6
+EXPECT "OK" test_hardlink 7
+EXPECT "OK" test_hardlink 8
+EXPECT "OK" test_hardlink 9
+
+# Test remove hardlink source. ensure deleting one file
+# doesn't delete the data unless link-count is 1
+TEST mkdir $M0/hardlink
+TEST touch $M0/hardlink/fileA
+echo "data" >> $M0/hardlink/fileA
+checksum=$(md5sum $M0/hardlink/fileA | awk '{print $1}')
+TEST ln $M0/hardlink/fileA $M0/hardlink/fileB
+TEST [ $(dir -1 $M0/hardlink/ | wc -l) -eq 2 ]
+TEST rm $M0/hardlink/fileA
+TEST [ $(dir -1 $M0/hardlink/ | wc -l) -eq 1 ]
+TEST [ "$(md5sum $M0/hardlink/fileB | awk '{print $1}')" == "$checksum" ]
+
+#
+# FIXME: statfs values look ok but the test is bad
+#
+# Test statfs. If we're doing it right, the numbers for the mountpoint should be
+# double those for the brick filesystem times the number of bricks,
+# but unless we're on a completely idle
+# system (which never happens) the numbers can change even while this function
+# runs and that would trip us up. Do a sloppy comparison to deal with that.
+#compare_fields () {
+# val1=$(df $1 | grep / | awk "{print \$$3}")
+# val2=$(df $2 | grep / | awk "{print \$$3}")
+# [ "$val2" -gt "$(((val1/(29/10))*19/10))" -a "$val2" -lt "$(((val1/(31/10))*21/10))" ]
+#}
+
+#brick_df=$(df $B0 | grep /)
+#mount_df=$(df $M0 | grep /)
+#TEST compare_fields $B0 $M0 2 # Total blocks
+#TEST compare_fields $B0 $M0 3 # Used
+#TEST compare_fields $B0 $M0 4 # Available
+
+# Test removexattr.
+#RXATTR_FILE=$(get_file_not_on_disk0 rxtest)
+#TEST setfattr -n user.foo -v bar $M0/$RXATTR_FILE
+#TEST getfattr -n user.foo $B0/b0/1/$RXATTR_FILE
+#TEST setfattr -x user.foo $M0/$RXATTR_FILE
+#TEST ! getfattr -n user.foo $B0/b0/1/$RXATTR_FILE
+
+# Test fsyncdir. We can't really test whether it's doing the right thing,
+# but we can test that it doesn't fail and we can hand-check that it's calling
+# down to all of the disks instead of just one.
+#
+# P.S. There's no fsyncdir test in the rest of Gluster, so who even knows if
+# other translators are handling it correctly?
+
+#FSYNCDIR_EXE=$(dirname $0)/fsyncdir
+#build_tester ${FSYNCDIR_EXE}.c
+#TEST touch $M0/fsyncdir_src
+#TEST $FSYNCDIR_EXE $M0 $M0/fsyncdir_src $M0/fsyncdir_dst
+#TEST rm -f $FSYNCDIR_EXE
+
+# Test fsetxattr, fgetxattr, fremovexattr (in that order).
+FXATTR_FILE=$M0/fxfile1
+TEST touch $FXATTR_FILE
+FXATTR_EXE=$(dirname $0)/fxattr
+build_tester ${FXATTR_EXE}.c
+TEST ! getfattr -n user.fxtest $FXATTR_FILE
+TEST $FXATTR_EXE $FXATTR_FILE set value1
+TEST getfattr -n user.fxtest $FXATTR_FILE
+TEST setfattr -n user.fxtest -v value2 $FXATTR_FILE
+TEST $FXATTR_EXE $FXATTR_FILE get value2
+TEST $FXATTR_EXE $FXATTR_FILE remove
+TEST ! getfattr -n user.fxtest $FXATTR_FILE
+TEST rm -f $FXATTR_EXE
+
+# Test ftruncate
+FTRUNCATE_EXE=$(dirname $0)/ftruncate
+build_tester ${FTRUNCATE_EXE}.c
+FTRUNCATE_FILE=$M0/ftfile1
+TEST dd if=/dev/urandom of=$FTRUNCATE_FILE count=1 bs=1MB
+TEST $FTRUNCATE_EXE $FTRUNCATE_FILE
+#gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/b0/*/ftfile1))
+
+# Test fallocate, discard, zerofill. Actually we don't so much check that these
+# *work* as that they don't throw any errors (especially ENOENT because the
+# file's not on disk zero).
+FALLOC_FILE=fatest1
+TEST touch $M0/$FALLOC_FILE
+TEST fallocate -l $((4096*5)) $M0/$FALLOC_FILE
+TEST fallocate -p -o 4096 -l 4096 $M0/$FALLOC_FILE
+# This actually fails with "operation not supported" on most filesystems, so
+# don't leave it enabled except to test changes.
+#TEST fallocate -z -o $((4096*3)) -l 4096 $M0/$FALLOC_FILE
+
+#cleanup;
diff --git a/tests/basic/metadisp/metadisp.vol b/tests/basic/metadisp/metadisp.vol
new file mode 100644
index 00000000000..58ae2f6f2a8
--- /dev/null
+++ b/tests/basic/metadisp/metadisp.vol
@@ -0,0 +1,14 @@
+volume posix-0
+ type storage/posix
+ option directory /d/backends/b0/0
+end-volume
+
+volume posix-1
+ type storage/posix
+ option directory /d/backends/b0/1
+end-volume
+
+volume metadisp-0
+ type features/metadisp
+ subvolumes posix-0 posix-1
+end-volume
diff --git a/tests/bugs/fuse/many-groups-for-acl.t b/tests/bugs/fuse/many-groups-for-acl.t
index d959f750ee0..a51b1bc7267 100755
--- a/tests/bugs/fuse/many-groups-for-acl.t
+++ b/tests/bugs/fuse/many-groups-for-acl.t
@@ -38,6 +38,13 @@ do
done
TEST useradd -o -M -u ${NEW_UID} -g ${NEW_GID} -G ${NEW_USER}-${NEW_GIDS} ${NEW_USER}
+# Linux < 3.8 exports only first 32 gids of pid to userspace
+kernel_exports_few_gids=0
+if [ "$OSTYPE" = Linux ] && \
+ su -m ${NEW_USER} -c "grep ^Groups: /proc/self/status | wc -w | xargs -I@ expr @ - 1 '<' $LAST_GID - $NEW_GID + 1" > /dev/null; then
+ kernel_exports_few_gids=1
+fi
+
# preparation done, start the tests
TEST glusterd
@@ -48,6 +55,8 @@ TEST $CLI volume set $V0 nfs.disable off
TEST $CLI volume set ${V0} server.manage-gids off
TEST $CLI volume start ${V0}
+# This is just a synchronization hack to make sure the bricks are
+# up before going on.
EXPECT_WITHIN ${NFS_EXPORT_TIMEOUT} "1" is_nfs_export_available
# mount the volume with POSIX ACL support, without --resolve-gids
@@ -69,8 +78,8 @@ TEST [ $? -eq 0 ]
su -m ${NEW_USER} -c "touch ${M0}/first-32-gids-2/success > /dev/null"
TEST [ $? -eq 0 ]
-su -m ${NEW_USER} -c "touch ${M0}/gid-64/failure > /dev/null"
-TEST [ $? -ne 0 ]
+su -m ${NEW_USER} -c "touch ${M0}/gid-64/success--if-all-gids-exported > /dev/null"
+TEST [ $? -eq $kernel_exports_few_gids ]
su -m ${NEW_USER} -c "touch ${M0}/gid-120/failure > /dev/null"
TEST [ $? -ne 0 ]
diff --git a/tests/bugs/glusterd/brick-order-check-add-brick.t b/tests/bugs/glusterd/brick-order-check-add-brick.t
index 29f0ed1fe4e..0be31dac768 100644
--- a/tests/bugs/glusterd/brick-order-check-add-brick.t
+++ b/tests/bugs/glusterd/brick-order-check-add-brick.t
@@ -37,4 +37,25 @@ EXPECT '3 x 3 = 9' volinfo_field $V0 'Number of Bricks'
TEST $CLI_1 volume add-brick $V0 $H1:$L1/${V0}_3 $H1:$L1/${V0}_4 $H1:$L1/${V0}_5 force
EXPECT '4 x 3 = 12' volinfo_field $V0 'Number of Bricks'
+TEST $CLI_1 volume stop $V0
+TEST $CLI_1 volume delete $V0
+
+TEST $CLI_1 volume create $V0 replica 2 $H1:$L1/${V0}1 $H2:$L2/${V0}1
+EXPECT '1 x 2 = 2' volinfo_field $V0 'Number of Bricks'
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+TEST $CLI_1 volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+#Add-brick with Increasing replica count
+TEST $CLI_1 volume add-brick $V0 replica 3 $H3:$L3/${V0}1
+EXPECT '1 x 3 = 3' volinfo_field $V0 'Number of Bricks'
+
+#Add-brick with Increasing replica count from same host should fail
+TEST ! $CLI_1 volume add-brick $V0 replica 5 $H1:$L1/${V0}2 $H1:$L1/${V0}3
+
+#adding multiple bricks from same host should fail the brick order check
+TEST ! $CLI_1 volume add-brick $V0 replica 3 $H1:$L1/${V0}{4..6} $H2:$L2/${V0}{7..9}
+EXPECT '1 x 3 = 3' volinfo_field $V0 'Number of Bricks'
+
cleanup
diff --git a/tests/bugs/replicate/bug-1744548-heal-timeout.t b/tests/bugs/replicate/bug-1744548-heal-timeout.t
index c208112c8b0..011535066f9 100644
--- a/tests/bugs/replicate/bug-1744548-heal-timeout.t
+++ b/tests/bugs/replicate/bug-1744548-heal-timeout.t
@@ -25,14 +25,14 @@ TEST ! $CLI volume heal $V0
TEST $CLI volume profile $V0 start
TEST $CLI volume profile $V0 info clear
TEST $CLI volume heal $V0 enable
-# Each brick does 3 opendirs, corresponding to dirty, xattrop and entry-changes
-EXPECT_WITHIN $HEAL_TIMEOUT "^333$" get_cumulative_opendir_count
+# Each brick does 4 opendirs, corresponding to dirty, xattrop and entry-changes, anonymous-inode
+EXPECT_WITHIN 4 "^444$" get_cumulative_opendir_count
# Check that a change in heal-timeout is honoured immediately.
TEST $CLI volume set $V0 cluster.heal-timeout 5
sleep 10
# Two crawls must have happened.
-EXPECT_WITHIN $HEAL_TIMEOUT "^999$" get_cumulative_opendir_count
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^121212$" get_cumulative_opendir_count
# shd must not heal if it is disabled and heal-timeout is changed.
TEST $CLI volume heal $V0 disable
diff --git a/tests/bugs/shard/issue-1425.t b/tests/bugs/shard/issue-1425.t
new file mode 100644
index 00000000000..bbe82c0e5b2
--- /dev/null
+++ b/tests/bugs/shard/issue-1425.t
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+FILE_COUNT_TIME=5
+
+function get_file_count {
+ ls $1* | wc -l
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume start $V0
+TEST $CLI volume profile $V0 start
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+TEST fallocate -l 20M $M0/foo
+gfid_new=$(get_gfid_string $M0/foo)
+
+# Check for the base shard
+TEST stat $M0/foo
+TEST stat $B0/${V0}0/foo
+
+# There should be 4 associated shards
+EXPECT_WITHIN $FILE_COUNT_TIME 4 get_file_count $B0/${V0}0/.shard/$gfid_new
+
+# There should be 1+4 shards and we expect 4 lookups less than on the build without this patch
+EXPECT "21" echo `$CLI volume profile $V0 info incremental | grep -w LOOKUP | awk '{print $8}'`
+
+# Delete the base shard and check shards get cleaned up
+TEST unlink $M0/foo
+
+TEST ! stat $M0/foo
+TEST ! stat $B0/${V0}0/foo
+
+# There should be no shards now
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/$gfid_new
+cleanup
diff --git a/tests/features/trash.t b/tests/features/trash.t
index 472e909e567..da5b50bc85a 100755
--- a/tests/features/trash.t
+++ b/tests/features/trash.t
@@ -94,105 +94,105 @@ wildcard_not_exists() {
if [ $? -eq 0 ]; then echo "Y"; else echo "N"; fi
}
-# testing glusterd [1-3]
+# testing glusterd
TEST glusterd
TEST pidof glusterd
TEST $CLI volume info
-# creating distributed volume [4]
+# creating distributed volume
TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2}
-# checking volume status [5-7]
+# checking volume status
EXPECT "$V0" volinfo_field $V0 'Volume Name'
EXPECT 'Created' volinfo_field $V0 'Status'
EXPECT '2' brick_count $V0
-# test without enabling trash translator [8]
+# test without enabling trash translator
TEST start_vol $V0 $M0
-# test on enabling trash translator [9-10]
+# test on enabling trash translator
TEST $CLI volume set $V0 features.trash on
EXPECT 'on' volinfo_field $V0 'features.trash'
-# files directly under mount point [11]
+# files directly under mount point
create_files $M0/file1 $M0/file2
TEST file_exists $V0 file1 file2
-# perform unlink [12]
+# perform unlink
TEST unlink_op file1
-# perform truncate [13]
+# perform truncate
TEST truncate_op file2 4
-# create files directory hierarchy and check [14]
+# create files directory hierarchy and check
mkdir -p $M0/1/2/3
create_files $M0/1/2/3/foo1 $M0/1/2/3/foo2
TEST file_exists $V0 1/2/3/foo1 1/2/3/foo2
-# perform unlink [15]
+# perform unlink
TEST unlink_op 1/2/3/foo1
-# perform truncate [16]
+# perform truncate
TEST truncate_op 1/2/3/foo2 4
# create a directory for eliminate pattern
mkdir $M0/a
-# set the eliminate pattern [17-18]
+# set the eliminate pattern
TEST $CLI volume set $V0 features.trash-eliminate-path /a
EXPECT '/a' volinfo_field $V0 'features.trash-eliminate-path'
-# create two files and check [19]
+# create two files and check
create_files $M0/a/test1 $M0/a/test2
TEST file_exists $V0 a/test1 a/test2
-# remove from eliminate pattern [20]
+# remove from eliminate pattern
rm -f $M0/a/test1
EXPECT "Y" wildcard_not_exists $M0/.trashcan/a/test1*
-# truncate from eliminate path [21-23]
+# truncate from eliminate path
truncate -s 2 $M0/a/test2
TEST [ -e $M0/a/test2 ]
TEST [ `ls -l $M0/a/test2 | awk '{print $5}'` -eq 2 ]
EXPECT "Y" wildcard_not_exists $M0/.trashcan/a/test2*
-# set internal op on [24-25]
+# set internal op on
TEST $CLI volume set $V0 features.trash-internal-op on
EXPECT 'on' volinfo_field $V0 'features.trash-internal-op'
-# again create two files and check [26]
+# again create two files and check
create_files $M0/inop1 $M0/inop2
TEST file_exists $V0 inop1 inop2
-# perform unlink [27]
+# perform unlink
TEST unlink_op inop1
-# perform truncate [28]
+# perform truncate
TEST truncate_op inop2 4
-# remove one brick and restart the volume [28-31]
+# remove one brick and restart the volume
TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 force
EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
TEST $CLI volume stop $V0
TEST start_vol $V0 $M0 $M0/.trashcan
-# again create two files and check [33]
+# again create two files and check
create_files $M0/rebal1 $M0/rebal2
TEST file_exists $V0 rebal1 rebal2
-# add one brick [34-35]
+# add one brick
TEST $CLI volume add-brick $V0 $H0:$B0/${V0}3
TEST [ -d $B0/${V0}3 ]
-# perform rebalance [36]
+# perform rebalance
TEST $CLI volume rebalance $V0 start force
EXPECT_WITHIN $REBALANCE_TIMEOUT "0" rebalance_completed
#Find out which file was migrated to the new brick
file_name=$(ls $B0/${V0}3/rebal*| xargs basename)
-# check whether rebalance was succesful [37-40]
+# check whether rebalance was succesful
EXPECT "Y" wildcard_exists $B0/${V0}3/$file_name*
EXPECT "Y" wildcard_exists $B0/${V0}1/.trashcan/internal_op/$file_name*
@@ -201,52 +201,42 @@ EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
# force required in case rebalance is not over
TEST $CLI volume stop $V0 force
-# create a replicated volume [41]
+# create a replicated volume
TEST $CLI volume create $V1 replica 2 $H0:$B0/${V1}{1,2}
-# checking volume status [42-45]
+# checking volume status
EXPECT "$V1" volinfo_field $V1 'Volume Name'
EXPECT 'Replicate' volinfo_field $V1 'Type'
EXPECT 'Created' volinfo_field $V1 'Status'
EXPECT '2' brick_count $V1
-# enable trash with options and start the replicate volume by disabling automatic self-heal [46-50]
+# enable trash with options and start the replicate volume by disabling automatic self-heal
TEST $CLI volume set $V1 features.trash on
TEST $CLI volume set $V1 features.trash-internal-op on
EXPECT 'on' volinfo_field $V1 'features.trash'
EXPECT 'on' volinfo_field $V1 'features.trash-internal-op'
TEST start_vol $V1 $M1 $M1/.trashcan
-# mount and check for trash directory [51]
+# mount and check for trash directory
TEST [ -d $M1/.trashcan/internal_op ]
-# create a file and check [52]
+# create a file and check
touch $M1/self
TEST [ -e $B0/${V1}1/self -a -e $B0/${V1}2/self ]
-# kill one brick and delete the file from mount point [53-54]
+# kill one brick and delete the file from mount point
kill_brick $V1 $H0 $B0/${V1}1
EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "1" online_brick_count
rm -f $M1/self
EXPECT "Y" wildcard_exists $B0/${V1}2/.trashcan/self*
-# force start the volume and trigger the self-heal manually [55-57]
-TEST $CLI volume start $V1 force
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" online_brick_count
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
-# Since we created the file under root of the volume, it will be
-# healed automatically
-
-# check for the removed file in trashcan [58]
-EXPECT_WITHIN $HEAL_TIMEOUT "Y" wildcard_exists $B0/${V1}1/.trashcan/internal_op/self*
-
-# check renaming of trash directory through cli [59-62]
+# check renaming of trash directory through cli
TEST $CLI volume set $V0 trash-dir abc
TEST start_vol $V0 $M0 $M0/abc
TEST [ -e $M0/abc -a ! -e $M0/.trashcan ]
EXPECT "Y" wildcard_exists $B0/${V0}1/abc/internal_op/rebal*
-# ensure that rename and delete operation on trash directory fails [63-65]
+# ensure that rename and delete operation on trash directory fails
rm -rf $M0/abc/internal_op
TEST [ -e $M0/abc/internal_op ]
rm -rf $M0/abc/
diff --git a/tests/volume.rc b/tests/volume.rc
index 3924baeb7fc..b38848c0e52 100644
--- a/tests/volume.rc
+++ b/tests/volume.rc
@@ -999,4 +999,4 @@ function logging_time_check()
local logfile=`echo ${0##*/}`_glusterd1.log
cat $logdir/1/$logfile | tail -n 2 | head -n 1 | grep $(date +%H:%M) | wc -l
-} \ No newline at end of file
+}
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 8dbdb572abd..032ab5c8001 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -45,6 +45,41 @@ afr_quorum_errno(afr_private_t *priv)
return ENOTCONN;
}
+gf_boolean_t
+afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name,
+ pid_t pid)
+{
+ if (!__is_root_gfid(pargfid)) {
+ return _gf_false;
+ }
+
+ if (strcmp(name, GF_REPLICATE_TRASH_DIR) == 0) {
+ /*For backward compatibility /.landfill is private*/
+ return _gf_true;
+ }
+
+ if (pid == GF_CLIENT_PID_GSYNCD) {
+ /*geo-rep needs to create/sync private directory on slave because
+ * it appears in changelog*/
+ return _gf_false;
+ }
+
+ if (pid == GF_CLIENT_PID_GLFS_HEAL || pid == GF_CLIENT_PID_SELF_HEALD) {
+ if (strcmp(name, priv->anon_inode_name) == 0) {
+ /* anonymous-inode dir is private*/
+ return _gf_true;
+ }
+ } else {
+ if (strncmp(name, AFR_ANON_DIR_PREFIX, strlen(AFR_ANON_DIR_PREFIX)) ==
+ 0) {
+ /* anonymous-inode dir prefix is private for geo-rep to work*/
+ return _gf_true;
+ }
+ }
+
+ return _gf_false;
+}
+
void
afr_fill_success_replies(afr_local_t *local, afr_private_t *priv,
unsigned char *replies)
@@ -1197,12 +1232,11 @@ afr_inode_get_readable(call_frame_t *frame, inode_t *inode, xlator_t *this,
return 0;
}
-int
+static int
afr_inode_split_brain_choice_get(inode_t *inode, xlator_t *this,
int *spb_choice)
{
int ret = -1;
-
GF_VALIDATE_OR_GOTO(this->name, inode, out);
LOCK(&inode->lock);
@@ -1214,6 +1248,40 @@ out:
return ret;
}
+/*
+ * frame is used to get the favourite policy. Since
+ * afr_inode_split_brain_choice_get was called with afr_open, it is possible to
+ * have a frame with out local->replies. So in that case, frame is passed as
+ * null, hence this function will handle the frame NULL case.
+ */
+int
+afr_split_brain_read_subvol_get(inode_t *inode, xlator_t *this,
+ call_frame_t *frame, int *spb_subvol)
+{
+ int ret = -1;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO("afr", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+ GF_VALIDATE_OR_GOTO(this->name, spb_subvol, out);
+
+ priv = this->private;
+
+ ret = afr_inode_split_brain_choice_get(inode, this, spb_subvol);
+ if (*spb_subvol < 0 && priv->fav_child_policy && frame && frame->local) {
+ local = frame->local;
+ *spb_subvol = afr_sh_get_fav_by_policy(this, local->replies, inode,
+ NULL);
+ if (*spb_subvol >= 0) {
+ ret = 0;
+ }
+ }
+
+out:
+ return ret;
+}
int
afr_inode_read_subvol_set(inode_t *inode, xlator_t *this, unsigned char *data,
unsigned char *metadata, int event)
@@ -2238,8 +2306,9 @@ afr_hash_child(afr_read_subvol_args_t *args, afr_private_t *priv,
* need is a low probability that multiple clients
* won't converge on the same subvolume.
*/
+ gf_uuid_copy(gfid_copy, args->gfid);
pid = getpid();
- memcpy(gfid_copy, &pid, sizeof(pid));
+ *(pid_t *)gfid_copy ^= pid;
}
child = SuperFastHash((char *)gfid_copy, sizeof(gfid_copy)) %
priv->child_count;
@@ -2823,7 +2892,7 @@ afr_attempt_readsubvol_set(call_frame_t *frame, xlator_t *this,
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int spb_choice = -1;
+ int spb_subvol = -1;
int child_count = -1;
if (*read_subvol != -1)
@@ -2833,10 +2902,10 @@ afr_attempt_readsubvol_set(call_frame_t *frame, xlator_t *this,
local = frame->local;
child_count = priv->child_count;
- afr_inode_split_brain_choice_get(local->inode, this, &spb_choice);
- if ((spb_choice >= 0) &&
+ afr_split_brain_read_subvol_get(local->inode, this, frame, &spb_subvol);
+ if ((spb_subvol >= 0) &&
(AFR_COUNT(success_replies, child_count) == child_count)) {
- *read_subvol = spb_choice;
+ *read_subvol = spb_subvol;
} else if (!priv->quorum_count ||
frame->root->pid == GF_CLIENT_PID_GLFS_HEAL) {
*read_subvol = afr_first_up_child(frame, this);
@@ -3635,7 +3704,7 @@ afr_ta_id_file_check(void *opaque)
this = opaque;
priv = this->private;
- ret = afr_fill_ta_loc(this, &loc);
+ ret = afr_fill_ta_loc(this, &loc, _gf_false);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
"Failed to populate thin-arbiter loc for: %s.", loc.name);
@@ -3945,11 +4014,10 @@ afr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
return 0;
}
- if (__is_root_gfid(loc->parent->gfid)) {
- if (!strcmp(loc->name, GF_REPLICATE_TRASH_DIR)) {
- op_errno = EPERM;
- goto out;
- }
+ if (afr_is_private_directory(this->private, loc->parent->gfid, loc->name,
+ frame->root->pid)) {
+ op_errno = EPERM;
+ goto out;
}
local = AFR_FRAME_INIT(frame, op_errno);
@@ -5627,6 +5695,7 @@ afr_priv_dump(xlator_t *this)
priv->background_self_heal_count);
gf_proc_dump_write("healers", "%d", priv->healers);
gf_proc_dump_write("read-hash-mode", "%d", priv->hash_mode);
+ gf_proc_dump_write("use-anonymous-inode", "%d", priv->use_anon_inode);
if (priv->quorum_count == AFR_QUORUM_AUTO) {
gf_proc_dump_write("quorum-type", "auto");
} else if (priv->quorum_count == 0) {
@@ -6620,6 +6689,7 @@ afr_priv_destroy(afr_private_t *priv)
GF_FREE(priv->local);
GF_FREE(priv->pending_key);
GF_FREE(priv->children);
+ GF_FREE(priv->anon_inode);
GF_FREE(priv->child_up);
GF_FREE(priv->halo_child_up);
GF_FREE(priv->child_latency);
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
index f69013f3e0a..f8bf8340dab 100644
--- a/xlators/cluster/afr/src/afr-dir-read.c
+++ b/xlators/cluster/afr/src/afr-dir-read.c
@@ -164,8 +164,8 @@ afr_validate_read_subvol(inode_t *inode, xlator_t *this, int par_read_subvol)
}
static void
-afr_readdir_transform_entries(gf_dirent_t *subvol_entries, int subvol,
- gf_dirent_t *entries, fd_t *fd)
+afr_readdir_transform_entries(call_frame_t *frame, gf_dirent_t *subvol_entries,
+ int subvol, gf_dirent_t *entries, fd_t *fd)
{
int ret = -1;
gf_dirent_t *entry = NULL;
@@ -183,8 +183,8 @@ afr_readdir_transform_entries(gf_dirent_t *subvol_entries, int subvol,
list_for_each_entry_safe(entry, tmp, &subvol_entries->list, list)
{
- if (__is_root_gfid(fd->inode->gfid) &&
- !strcmp(entry->d_name, GF_REPLICATE_TRASH_DIR)) {
+ if (afr_is_private_directory(priv, fd->inode->gfid, entry->d_name,
+ frame->root->pid)) {
continue;
}
@@ -228,8 +228,8 @@ afr_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
}
if (op_ret >= 0)
- afr_readdir_transform_entries(subvol_entries, (long)cookie, &entries,
- local->fd);
+ afr_readdir_transform_entries(frame, subvol_entries, (long)cookie,
+ &entries, local->fd);
AFR_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, xdata);
diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c
index a5b004f4258..64856042b65 100644
--- a/xlators/cluster/afr/src/afr-open.c
+++ b/xlators/cluster/afr/src/afr-open.c
@@ -137,7 +137,7 @@ afr_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int spb_choice = 0;
+ int spb_subvol = 0;
int event_generation = 0;
int ret = 0;
int32_t op_errno = 0;
@@ -179,9 +179,9 @@ afr_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
ret = afr_inode_get_readable(frame, local->inode, this, NULL,
&event_generation, AFR_DATA_TRANSACTION);
if ((ret < 0) &&
- (afr_inode_split_brain_choice_get(local->inode, this, &spb_choice) ==
- 0) &&
- spb_choice < 0) {
+ (afr_split_brain_read_subvol_get(local->inode, this, NULL,
+ &spb_subvol) == 0) &&
+ spb_subvol < 0) {
afr_inode_refresh(frame, this, local->inode, local->inode->gfid,
afr_open_continue);
} else {
diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c
index 772b59f9a2f..6fc2c75145c 100644
--- a/xlators/cluster/afr/src/afr-read-txn.c
+++ b/xlators/cluster/afr/src/afr-read-txn.c
@@ -164,7 +164,7 @@ afr_ta_read_txn(void *opaque)
xdata_rsp = NULL;
/* It doesn't. So query thin-arbiter to see if it blames any data brick. */
- ret = afr_fill_ta_loc(this, &loc);
+ ret = afr_fill_ta_loc(this, &loc, _gf_true);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
"Failed to populate thin-arbiter loc for: %s.", loc.name);
@@ -272,7 +272,7 @@ afr_read_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err)
int read_subvol = -1;
inode_t *inode = NULL;
int ret = -1;
- int spb_choice = -1;
+ int spb_subvol = -1;
local = frame->local;
inode = local->inode;
@@ -303,9 +303,9 @@ afr_read_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err)
local->read_attempted[read_subvol] = 1;
readfn:
if (read_subvol == -1) {
- ret = afr_inode_split_brain_choice_get(inode, this, &spb_choice);
- if ((ret == 0) && spb_choice >= 0)
- read_subvol = spb_choice;
+ ret = afr_split_brain_read_subvol_get(inode, this, frame, &spb_subvol);
+ if ((ret == 0) && spb_subvol >= 0)
+ read_subvol = spb_subvol;
}
if (read_subvol == -1) {
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index f35c41df274..a580a1584cc 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -2750,3 +2750,185 @@ afr_choose_source_by_policy(afr_private_t *priv, unsigned char *sources,
out:
return source;
}
+
+static int
+afr_anon_inode_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ afr_local_t *local = frame->local;
+ int i = (long)cookie;
+
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
+ if (op_ret == 0) {
+ local->op_ret = 0;
+ local->replies[i].poststat = *buf;
+ local->replies[i].preparent = *preparent;
+ local->replies[i].postparent = *postparent;
+ }
+ if (xdata) {
+ local->replies[i].xdata = dict_ref(xdata);
+ }
+
+ syncbarrier_wake(&local->barrier);
+ return 0;
+}
+
+int
+afr_anon_inode_create(xlator_t *this, int child, inode_t **linked_inode)
+{
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = this->private;
+ unsigned char *mkdir_on = alloca0(priv->child_count);
+ unsigned char *lookup_on = alloca0(priv->child_count);
+ loc_t loc = {0};
+ int32_t op_errno = 0;
+ int32_t child_op_errno = 0;
+ struct iatt iatt = {0};
+ dict_t *xdata = NULL;
+ uuid_t anon_inode_gfid = {0};
+ int mkdir_count = 0;
+ int i = 0;
+
+ /*Try to mkdir everywhere and return success if the dir exists on 'child'
+ */
+
+ if (!priv->use_anon_inode) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ frame = afr_frame_create(this, &op_errno);
+ if (op_errno) {
+ goto out;
+ }
+ local = frame->local;
+ if (!local->child_up[child]) {
+ /*Other bricks may need mkdir so don't error out yet*/
+ child_op_errno = ENOTCONN;
+ }
+ gf_uuid_parse(priv->anon_gfid_str, anon_inode_gfid);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->child_up[i])
+ continue;
+
+ if (priv->anon_inode[i]) {
+ mkdir_on[i] = 0;
+ } else {
+ mkdir_on[i] = 1;
+ mkdir_count++;
+ }
+ }
+
+ if (mkdir_count == 0) {
+ *linked_inode = inode_find(this->itable, anon_inode_gfid);
+ if (*linked_inode) {
+ op_errno = 0;
+ goto out;
+ }
+ }
+
+ loc.parent = inode_ref(this->itable->root);
+ loc.name = priv->anon_inode_name;
+ loc.inode = inode_new(this->itable);
+ if (!loc.inode) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ xdata = dict_new();
+ if (!xdata) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ op_errno = -dict_set_gfuuid(xdata, "gfid-req", anon_inode_gfid, _gf_true);
+ if (op_errno) {
+ goto out;
+ }
+
+ if (mkdir_count == 0) {
+ memcpy(lookup_on, local->child_up, priv->child_count);
+ goto lookup;
+ }
+
+ AFR_ONLIST(mkdir_on, frame, afr_anon_inode_mkdir_cbk, mkdir, &loc, 0755, 0,
+ xdata);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!mkdir_on[i]) {
+ continue;
+ }
+
+ if (local->replies[i].op_ret == 0) {
+ priv->anon_inode[i] = 1;
+ iatt = local->replies[i].poststat;
+ } else if (local->replies[i].op_ret < 0 &&
+ local->replies[i].op_errno == EEXIST) {
+ lookup_on[i] = 1;
+ } else if (i == child) {
+ child_op_errno = local->replies[i].op_errno;
+ }
+ }
+
+ if (AFR_COUNT(lookup_on, priv->child_count) == 0) {
+ goto link;
+ }
+
+lookup:
+ AFR_ONLIST(lookup_on, frame, afr_selfheal_discover_cbk, lookup, &loc,
+ xdata);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!lookup_on[i]) {
+ continue;
+ }
+
+ if (local->replies[i].op_ret == 0) {
+ if (gf_uuid_compare(anon_inode_gfid,
+ local->replies[i].poststat.ia_gfid) == 0) {
+ priv->anon_inode[i] = 1;
+ iatt = local->replies[i].poststat;
+ } else {
+ if (i == child)
+ child_op_errno = EINVAL;
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_DATA,
+ "%s has gfid: %s", priv->anon_inode_name,
+ uuid_utoa(local->replies[i].poststat.ia_gfid));
+ }
+ } else if (i == child) {
+ child_op_errno = local->replies[i].op_errno;
+ }
+ }
+link:
+ if (!gf_uuid_is_null(iatt.ia_gfid)) {
+ *linked_inode = inode_link(loc.inode, loc.parent, loc.name, &iatt);
+ if (*linked_inode) {
+ op_errno = 0;
+ inode_lookup(*linked_inode);
+ } else {
+ op_errno = ENOMEM;
+ }
+ goto out;
+ }
+
+out:
+ if (xdata)
+ dict_unref(xdata);
+ loc_wipe(&loc);
+ /*child_op_errno takes precedence*/
+ if (child_op_errno == 0) {
+ child_op_errno = op_errno;
+ }
+
+ if (child_op_errno && *linked_inode) {
+ inode_unref(*linked_inode);
+ *linked_inode = NULL;
+ }
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+ return -child_op_errno;
+}
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index ac31751997f..64893f441e3 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -16,54 +16,170 @@
#include <glusterfs/syncop-utils.h>
#include <glusterfs/events.h>
-static int
-afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name,
- inode_t *inode, int child, struct afr_reply *replies)
+int
+afr_selfheal_entry_anon_inode(xlator_t *this, inode_t *dir, const char *name,
+ inode_t *inode, int child,
+ struct afr_reply *replies,
+ gf_boolean_t *anon_inode)
{
afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
xlator_t *subvol = NULL;
int ret = 0;
+ int i = 0;
+ char g[64] = {0};
+ unsigned char *lookup_success = NULL;
+ call_frame_t *frame = NULL;
+ loc_t loc2 = {
+ 0,
+ };
loc_t loc = {
0,
};
- char g[64];
priv = this->private;
-
subvol = priv->children[child];
+ lookup_success = alloca0(priv->child_count);
+ uuid_utoa_r(replies[child].poststat.ia_gfid, g);
+ loc.inode = inode_new(inode->table);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (replies[child].poststat.ia_type == IA_IFDIR) {
+ /* This directory may have sub-directory hierarchy which may need to
+ * be preserved for subsequent heals. So unconditionally move the
+ * directory to anonymous-inode directory*/
+ *anon_inode = _gf_true;
+ goto anon_inode;
+ }
+
+ frame = afr_frame_create(this, &ret);
+ if (!frame) {
+ ret = -ret;
+ goto out;
+ }
+ local = frame->local;
+ gf_uuid_copy(loc.gfid, replies[child].poststat.ia_gfid);
+ AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, &loc,
+ NULL);
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].op_ret == 0) {
+ lookup_success[i] = 1;
+ } else if (local->replies[i].op_errno != ENOENT &&
+ local->replies[i].op_errno != ESTALE) {
+ ret = -local->replies[i].op_errno;
+ }
+ }
+
+ if (priv->quorum_count) {
+ if (afr_has_quorum(lookup_success, this, NULL)) {
+ *anon_inode = _gf_true;
+ }
+ } else if (AFR_COUNT(lookup_success, priv->child_count) > 1) {
+ *anon_inode = _gf_true;
+ } else if (ret) {
+ goto out;
+ }
+
+anon_inode:
+ if (!*anon_inode) {
+ ret = 0;
+ goto out;
+ }
loc.parent = inode_ref(dir);
gf_uuid_copy(loc.pargfid, dir->gfid);
loc.name = name;
- loc.inode = inode_ref(inode);
- if (replies[child].valid && replies[child].op_ret == 0) {
- switch (replies[child].poststat.ia_type) {
- case IA_IFDIR:
- gf_msg(this->name, GF_LOG_WARNING, 0,
- AFR_MSG_EXPUNGING_FILE_OR_DIR,
- "expunging dir %s/%s (%s) on %s", uuid_utoa(dir->gfid),
- name, uuid_utoa_r(replies[child].poststat.ia_gfid, g),
- subvol->name);
- ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL);
- break;
- default:
- gf_msg(this->name, GF_LOG_WARNING, 0,
- AFR_MSG_EXPUNGING_FILE_OR_DIR,
- "expunging file %s/%s (%s) on %s", uuid_utoa(dir->gfid),
- name, uuid_utoa_r(replies[child].poststat.ia_gfid, g),
- subvol->name);
- ret = syncop_unlink(subvol, &loc, NULL, NULL);
- break;
- }
+ ret = afr_anon_inode_create(this, child, &loc2.parent);
+ if (ret < 0)
+ goto out;
+
+ loc2.name = g;
+ ret = syncop_rename(subvol, &loc, &loc2, NULL, NULL);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "Rename to %s dir %s/%s (%s) on %s failed",
+ priv->anon_inode_name, uuid_utoa(dir->gfid), name, g,
+ subvol->name);
+ } else {
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "Rename to %s dir %s/%s (%s) on %s successful",
+ priv->anon_inode_name, uuid_utoa(dir->gfid), name, g,
+ subvol->name);
}
+out:
loc_wipe(&loc);
+ loc_wipe(&loc2);
+ if (frame) {
+ AFR_STACK_DESTROY(frame);
+ }
return ret;
}
int
+afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name,
+ inode_t *inode, int child, struct afr_reply *replies)
+{
+ char g[64] = {0};
+ afr_private_t *priv = NULL;
+ xlator_t *subvol = NULL;
+ int ret = 0;
+ loc_t loc = {
+ 0,
+ };
+ gf_boolean_t anon_inode = _gf_false;
+
+ priv = this->private;
+ subvol = priv->children[child];
+
+ if ((!replies[child].valid) || (replies[child].op_ret < 0)) {
+ /*Nothing to do*/
+ ret = 0;
+ goto out;
+ }
+
+ if (priv->use_anon_inode) {
+ ret = afr_selfheal_entry_anon_inode(this, dir, name, inode, child,
+ replies, &anon_inode);
+ if (ret < 0 || anon_inode)
+ goto out;
+ }
+
+ loc.parent = inode_ref(dir);
+ loc.inode = inode_new(inode->table);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ loc.name = name;
+ switch (replies[child].poststat.ia_type) {
+ case IA_IFDIR:
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "expunging dir %s/%s (%s) on %s", uuid_utoa(dir->gfid), name,
+ uuid_utoa_r(replies[child].poststat.ia_gfid, g),
+ subvol->name);
+ ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL);
+ break;
+ default:
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "expunging file %s/%s (%s) on %s", uuid_utoa(dir->gfid),
+ name, uuid_utoa_r(replies[child].poststat.ia_gfid, g),
+ subvol->name);
+ ret = syncop_unlink(subvol, &loc, NULL, NULL);
+ break;
+ }
+
+out:
+ loc_wipe(&loc);
+ return ret;
+}
+
+int
afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
unsigned char *sources, inode_t *dir,
const char *name, inode_t *inode,
@@ -76,6 +192,9 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
loc_t srcloc = {
0,
};
+ loc_t anonloc = {
+ 0,
+ };
xlator_t *this = frame->this;
afr_private_t *priv = NULL;
dict_t *xdata = NULL;
@@ -86,15 +205,17 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
0,
};
unsigned char *newentry = NULL;
- char dir_uuid_str[64] = {0}, iatt_uuid_str[64] = {0};
+ char iatt_uuid_str[64] = {0};
+ char dir_uuid_str[64] = {0};
priv = this->private;
iatt = &replies[source].poststat;
+ uuid_utoa_r(iatt->ia_gfid, iatt_uuid_str);
if (iatt->ia_type == IA_INVAL || gf_uuid_is_null(iatt->ia_gfid)) {
gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SELF_HEAL_FAILED,
"Invalid ia_type (%d) or gfid(%s). source brick=%d, "
"pargfid=%s, name=%s",
- iatt->ia_type, uuid_utoa_r(iatt->ia_gfid, iatt_uuid_str), source,
+ iatt->ia_type, iatt_uuid_str, source,
uuid_utoa_r(dir->gfid, dir_uuid_str), name);
ret = -EINVAL;
goto out;
@@ -120,14 +241,24 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
srcloc.inode = inode_ref(inode);
gf_uuid_copy(srcloc.gfid, iatt->ia_gfid);
- if (iatt->ia_type != IA_IFDIR)
- ret = syncop_lookup(priv->children[dst], &srcloc, 0, 0, 0, 0);
- if (iatt->ia_type == IA_IFDIR || ret == -ENOENT || ret == -ESTALE) {
+ ret = syncop_lookup(priv->children[dst], &srcloc, 0, 0, 0, 0);
+ if (ret == -ENOENT || ret == -ESTALE) {
newentry[dst] = 1;
ret = afr_selfheal_newentry_mark(frame, this, inode, source, replies,
sources, newentry);
if (ret)
goto out;
+ } else if (ret == 0 && iatt->ia_type == IA_IFDIR && priv->use_anon_inode) {
+ // Try rename from hidden directory
+ ret = afr_anon_inode_create(this, dst, &anonloc.parent);
+ if (ret < 0)
+ goto out;
+ anonloc.inode = inode_ref(inode);
+ anonloc.name = iatt_uuid_str;
+ ret = syncop_rename(priv->children[dst], &anonloc, &loc, NULL, NULL);
+ if (ret == -ENOENT || ret == -ESTALE)
+ ret = -1; /*This sets 'mismatch' to true*/
+ goto out;
}
mode = st_mode_from_ia(iatt->ia_prot, iatt->ia_type);
@@ -166,6 +297,7 @@ out:
GF_FREE(linkname);
loc_wipe(&loc);
loc_wipe(&srcloc);
+ loc_wipe(&anonloc);
return ret;
}
@@ -578,6 +710,11 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
priv = this->private;
+ if (afr_is_private_directory(priv, fd->inode->gfid, name,
+ GF_CLIENT_PID_SELF_HEALD)) {
+ return 0;
+ }
+
xattr = dict_new();
if (!xattr)
return -ENOMEM;
@@ -626,7 +763,7 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
replies);
if ((ret == 0) && (priv->esh_granular) && parent_idx_inode) {
- ret = afr_shd_index_purge(subvol, parent_idx_inode, name,
+ ret = afr_shd_entry_purge(subvol, parent_idx_inode, name,
inode->ia_type);
/* Why is ret force-set to 0? We do not care about
* index purge failing for full heal as it is quite
@@ -756,10 +893,6 @@ afr_selfheal_entry_do_subvol(call_frame_t *frame, xlator_t *this, fd_t *fd,
if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
continue;
- if (__is_root_gfid(fd->inode->gfid) &&
- !strcmp(entry->d_name, GF_REPLICATE_TRASH_DIR))
- continue;
-
ret = afr_selfheal_entry_dirent(iter_frame, this, fd, entry->d_name,
loc.inode, subvol,
local->need_full_crawl);
@@ -822,7 +955,7 @@ afr_selfheal_entry_granular_dirent(xlator_t *subvol, gf_dirent_t *entry,
/* The name indices under the pgfid index dir are guaranteed
* to be regular files. Hence the hardcoding.
*/
- afr_shd_index_purge(subvol, parent->inode, entry->d_name, IA_IFREG);
+ afr_shd_entry_purge(subvol, parent->inode, entry->d_name, IA_IFREG);
ret = 0;
goto out;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c
index dd40c57ab12..834aac86d48 100644
--- a/xlators/cluster/afr/src/afr-self-heal-name.c
+++ b/xlators/cluster/afr/src/afr-self-heal-name.c
@@ -98,21 +98,12 @@ __afr_selfheal_name_expunge(xlator_t *this, inode_t *parent, uuid_t pargfid,
const char *bname, inode_t *inode,
struct afr_reply *replies)
{
- loc_t loc = {
- 0,
- };
int i = 0;
afr_private_t *priv = NULL;
- char g[64];
int ret = 0;
priv = this->private;
- loc.parent = inode_ref(parent);
- gf_uuid_copy(loc.pargfid, pargfid);
- loc.name = bname;
- loc.inode = inode_ref(inode);
-
for (i = 0; i < priv->child_count; i++) {
if (!replies[i].valid)
continue;
@@ -120,30 +111,10 @@ __afr_selfheal_name_expunge(xlator_t *this, inode_t *parent, uuid_t pargfid,
if (replies[i].op_ret)
continue;
- switch (replies[i].poststat.ia_type) {
- case IA_IFDIR:
- gf_msg(this->name, GF_LOG_WARNING, 0,
- AFR_MSG_EXPUNGING_FILE_OR_DIR,
- "expunging dir %s/%s (%s) on %s", uuid_utoa(pargfid),
- bname, uuid_utoa_r(replies[i].poststat.ia_gfid, g),
- priv->children[i]->name);
-
- ret |= syncop_rmdir(priv->children[i], &loc, 1, NULL, NULL);
- break;
- default:
- gf_msg(this->name, GF_LOG_WARNING, 0,
- AFR_MSG_EXPUNGING_FILE_OR_DIR,
- "expunging file %s/%s (%s) on %s", uuid_utoa(pargfid),
- bname, uuid_utoa_r(replies[i].poststat.ia_gfid, g),
- priv->children[i]->name);
-
- ret |= syncop_unlink(priv->children[i], &loc, NULL, NULL);
- break;
- }
+ ret |= afr_selfheal_entry_delete(this, parent, bname, inode, i,
+ replies);
}
- loc_wipe(&loc);
-
return ret;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
index 7a038fa7fe3..48e6dbcfb18 100644
--- a/xlators/cluster/afr/src/afr-self-heal.h
+++ b/xlators/cluster/afr/src/afr-self-heal.h
@@ -369,4 +369,9 @@ gf_boolean_t
afr_is_file_empty_on_all_children(afr_private_t *priv,
struct afr_reply *replies);
+int
+afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name,
+ inode_t *inode, int child, struct afr_reply *replies);
+int
+afr_anon_inode_create(xlator_t *this, int child, inode_t **linked_inode);
#endif /* !_AFR_SELFHEAL_H */
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
index 2219a53b277..109fd4b7421 100644
--- a/xlators/cluster/afr/src/afr-self-heald.c
+++ b/xlators/cluster/afr/src/afr-self-heald.c
@@ -94,7 +94,7 @@ __afr_shd_healer_wait(struct subvol_healer *healer)
priv = healer->this->private;
disabled_loop:
- wait_till.tv_sec = time(NULL) + priv->shd.timeout;
+ wait_till.tv_sec = gf_time() + priv->shd.timeout;
while (!healer->rerun) {
ret = pthread_cond_timedwait(&healer->cond, &healer->mutex, &wait_till);
@@ -222,7 +222,7 @@ out:
}
int
-afr_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name,
+afr_shd_entry_purge(xlator_t *subvol, inode_t *inode, char *name,
ia_type_t type)
{
int ret = 0;
@@ -371,7 +371,7 @@ afr_shd_sweep_prepare(struct subvol_healer *healer)
event->split_brain_count = 0;
event->heal_failed_count = 0;
- time(&event->start_time);
+ event->start_time = gf_time();
event->end_time = 0;
_mask_cancellation();
}
@@ -386,7 +386,7 @@ afr_shd_sweep_done(struct subvol_healer *healer)
event = &healer->crawl_event;
shd = &(((afr_private_t *)healer->this->private)->shd);
- time(&event->end_time);
+ event->end_time = gf_time();
history = gf_memdup(event, sizeof(*event));
event->start_time = 0;
@@ -424,7 +424,7 @@ afr_shd_index_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
ret = afr_shd_selfheal(healer, healer->subvol, gfid);
if (ret == -ENOENT || ret == -ESTALE)
- afr_shd_index_purge(subvol, parent->inode, entry->d_name, val);
+ afr_shd_entry_purge(subvol, parent->inode, entry->d_name, val);
if (ret == 2)
/* If bricks crashed in pre-op after creating indices/xattrop
@@ -843,6 +843,176 @@ out:
return need_heal;
}
+static int
+afr_shd_anon_inode_cleaner(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ void *data)
+{
+ struct subvol_healer *healer = data;
+ afr_private_t *priv = healer->this->private;
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ int ret = 0;
+ loc_t loc = {0};
+ int count = 0;
+ int i = 0;
+ int op_errno = 0;
+ struct iatt *iatt = NULL;
+ gf_boolean_t multiple_links = _gf_false;
+ unsigned char *gfid_present = alloca0(priv->child_count);
+ unsigned char *entry_present = alloca0(priv->child_count);
+ char *type = "file";
+
+ frame = afr_frame_create(healer->this, &ret);
+ if (!frame) {
+ ret = -ret;
+ goto out;
+ }
+ local = frame->local;
+ if (AFR_COUNT(local->child_up, priv->child_count) != priv->child_count) {
+ gf_msg_debug(healer->this->name, 0,
+ "Not all bricks are up. Skipping "
+ "cleanup of %s on %s",
+ entry->d_name, subvol->name);
+ ret = 0;
+ goto out;
+ }
+
+ loc.inode = inode_new(parent->inode->table);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ ret = gf_uuid_parse(entry->d_name, loc.gfid);
+ if (ret) {
+ ret = 0;
+ goto out;
+ }
+ AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, &loc,
+ NULL);
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].op_ret == 0) {
+ count++;
+ gfid_present[i] = 1;
+ iatt = &local->replies[i].poststat;
+ if (iatt->ia_type == IA_IFDIR) {
+ type = "dir";
+ }
+
+ if (i == healer->subvol) {
+ if (local->replies[i].poststat.ia_nlink > 1) {
+ multiple_links = _gf_true;
+ }
+ }
+ } else if (local->replies[i].op_errno != ENOENT &&
+ local->replies[i].op_errno != ESTALE) {
+ /*We don't have complete view. Skip the entry*/
+ gf_msg_debug(healer->this->name, local->replies[i].op_errno,
+ "Skipping cleanup of %s on %s", entry->d_name,
+ subvol->name);
+ ret = 0;
+ goto out;
+ }
+ }
+
+ /*Inode is deleted from subvol*/
+ if (count == 1 || (iatt->ia_type != IA_IFDIR && multiple_links)) {
+ gf_msg(healer->this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_EXPUNGING_FILE_OR_DIR, "expunging %s %s/%s on %s", type,
+ priv->anon_inode_name, entry->d_name, subvol->name);
+ ret = afr_shd_entry_purge(subvol, parent->inode, entry->d_name,
+ iatt->ia_type);
+ if (ret == -ENOENT || ret == -ESTALE)
+ ret = 0;
+ } else if (count > 1) {
+ loc_wipe(&loc);
+ loc.parent = inode_ref(parent->inode);
+ loc.name = entry->d_name;
+ loc.inode = inode_new(parent->inode->table);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup,
+ &loc, NULL);
+ count = 0;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].op_ret == 0) {
+ count++;
+ entry_present[i] = 1;
+ iatt = &local->replies[i].poststat;
+ } else if (local->replies[i].op_errno != ENOENT &&
+ local->replies[i].op_errno != ESTALE) {
+ /*We don't have complete view. Skip the entry*/
+ gf_msg_debug(healer->this->name, local->replies[i].op_errno,
+ "Skipping cleanup of %s on %s", entry->d_name,
+ subvol->name);
+ ret = 0;
+ goto out;
+ }
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if (gfid_present[i] && !entry_present[i]) {
+ /*Entry is not anonymous on at least one subvol*/
+ gf_msg_debug(healer->this->name, 0,
+ "Valid entry present on %s "
+ "Skipping cleanup of %s on %s",
+ priv->children[i]->name, entry->d_name,
+ subvol->name);
+ ret = 0;
+ goto out;
+ }
+ }
+
+ gf_msg(healer->this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "expunging %s %s/%s on all subvols", type, priv->anon_inode_name,
+ entry->d_name);
+ ret = 0;
+ for (i = 0; i < priv->child_count; i++) {
+ op_errno = -afr_shd_entry_purge(priv->children[i], loc.parent,
+ entry->d_name, iatt->ia_type);
+ if (op_errno != ENOENT && op_errno != ESTALE) {
+ ret |= -op_errno;
+ }
+ }
+ }
+
+out:
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+ loc_wipe(&loc);
+ return ret;
+}
+
+static void
+afr_cleanup_anon_inode_dir(struct subvol_healer *healer)
+{
+ int ret = 0;
+ call_frame_t *frame = NULL;
+ afr_private_t *priv = healer->this->private;
+ loc_t loc = {0};
+
+ ret = afr_anon_inode_create(healer->this, healer->subvol, &loc.inode);
+ if (ret)
+ goto out;
+
+ frame = afr_frame_create(healer->this, &ret);
+ if (!frame) {
+ ret = -ret;
+ goto out;
+ }
+
+ ret = syncop_mt_dir_scan(frame, priv->children[healer->subvol], &loc,
+ GF_CLIENT_PID_SELF_HEALD, healer,
+ afr_shd_anon_inode_cleaner, NULL,
+ priv->shd.max_threads, priv->shd.wait_qlength);
+out:
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+ loc_wipe(&loc);
+ return;
+}
+
void *
afr_shd_index_healer(void *data)
{
@@ -900,6 +1070,10 @@ afr_shd_index_healer(void *data)
sleep(1);
} while (ret > 0);
+ if (ret == 0) {
+ afr_cleanup_anon_inode_dir(healer);
+ }
+
if (ret == 0 && pre_crawl_xdata &&
!healer->crawl_event.heal_failed_count) {
afr_shd_ta_check_and_unset_xattrs(this, &loc, healer,
diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h
index 687c28e6472..18db728ea7b 100644
--- a/xlators/cluster/afr/src/afr-self-heald.h
+++ b/xlators/cluster/afr/src/afr-self-heald.h
@@ -70,6 +70,6 @@ afr_shd_gfid_to_path(xlator_t *this, xlator_t *subvol, uuid_t gfid,
char **path_p);
int
-afr_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name,
+afr_shd_entry_purge(xlator_t *subvol, inode_t *inode, char *name,
ia_type_t type);
#endif /* !_AFR_SELF_HEALD_H */
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index 67c3e0699e6..a51f79b1f43 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -124,9 +124,9 @@ afr_release_notify_lock_for_ta(void *opaque)
this = (xlator_t *)opaque;
priv = this->private;
- ret = afr_fill_ta_loc(this, &loc);
+ ret = afr_fill_ta_loc(this, &loc, _gf_true);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_THIN_ARB,
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
"Failed to populate loc for thin-arbiter.");
goto out;
}
@@ -1029,7 +1029,7 @@ set_response:
}
int
-afr_fill_ta_loc(xlator_t *this, loc_t *loc)
+afr_fill_ta_loc(xlator_t *this, loc_t *loc, gf_boolean_t is_gfid_based_fop)
{
afr_private_t *priv = NULL;
@@ -1037,6 +1037,11 @@ afr_fill_ta_loc(xlator_t *this, loc_t *loc)
loc->parent = inode_ref(priv->root_inode);
gf_uuid_copy(loc->pargfid, loc->parent->gfid);
loc->name = priv->pending_key[THIN_ARBITER_BRICK_INDEX];
+ if (is_gfid_based_fop && gf_uuid_is_null(priv->ta_gfid)) {
+ /* Except afr_ta_id_file_check() which is path based, all other gluster
+ * FOPS need gfid.*/
+ return -EINVAL;
+ }
gf_uuid_copy(loc->gfid, priv->ta_gfid);
loc->inode = inode_new(loc->parent->table);
if (!loc->inode) {
@@ -1046,86 +1051,6 @@ afr_fill_ta_loc(xlator_t *this, loc_t *loc)
return 0;
}
-int
-afr_changelog_thin_arbiter_post_op(xlator_t *this, afr_local_t *local)
-{
- int ret = 0;
- afr_private_t *priv = NULL;
- dict_t *xattr = NULL;
- int failed_count = 0;
- struct gf_flock flock = {
- 0,
- };
- loc_t loc = {
- 0,
- };
- int i = 0;
-
- priv = this->private;
- if (!priv->thin_arbiter_count)
- return 0;
-
- failed_count = AFR_COUNT(local->transaction.failed_subvols,
- priv->child_count);
- if (!failed_count)
- return 0;
-
- GF_ASSERT(failed_count == 1);
- ret = afr_fill_ta_loc(this, &loc);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
- "Failed to populate thin-arbiter loc for: %s.", loc.name);
- goto out;
- }
-
- xattr = dict_new();
- if (!xattr) {
- ret = -ENOMEM;
- goto out;
- }
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_set_static_bin(xattr, priv->pending_key[i],
- local->pending[i],
- AFR_NUM_CHANGE_LOGS * sizeof(int));
- if (ret)
- goto out;
- }
-
- flock.l_type = F_WRLCK;
- flock.l_start = 0;
- flock.l_len = 0;
-
- /*TODO: Convert to two domain locking. */
- ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX],
- AFR_TA_DOM_NOTIFY, &loc, F_SETLKW, &flock, NULL, NULL);
- if (ret)
- goto out;
-
- ret = syncop_xattrop(priv->children[THIN_ARBITER_BRICK_INDEX], &loc,
- GF_XATTROP_ADD_ARRAY, xattr, NULL, NULL, NULL);
-
- if (ret == -EINVAL) {
- gf_msg(this->name, GF_LOG_INFO, -ret, AFR_MSG_THIN_ARB,
- "Thin-arbiter has denied post-op on %s for gfid %s.",
- priv->pending_key[THIN_ARBITER_BRICK_INDEX],
- uuid_utoa(local->inode->gfid));
-
- } else if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
- "Post-op on thin-arbiter id file %s failed for gfid %s.",
- priv->pending_key[THIN_ARBITER_BRICK_INDEX],
- uuid_utoa(local->inode->gfid));
- }
- flock.l_type = F_UNLCK;
- syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX], AFR_TA_DOM_NOTIFY,
- &loc, F_SETLK, &flock, NULL, NULL);
-out:
- if (xattr)
- dict_unref(xattr);
-
- return ret;
-}
-
static int
afr_ta_post_op_done(int ret, call_frame_t *frame, void *opaque)
{
@@ -1220,9 +1145,9 @@ afr_ta_post_op_do(void *opaque)
this = local->transaction.frame->this;
priv = this->private;
- ret = afr_fill_ta_loc(this, &loc);
+ ret = afr_fill_ta_loc(this, &loc, _gf_true);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_THIN_ARB,
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
"Failed to populate loc for thin-arbiter.");
goto out;
}
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index 67e0a4d10be..df7366f0a65 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -135,6 +135,27 @@ set_data_self_heal_algorithm(afr_private_t *priv, char *algo)
}
}
+void
+afr_handle_anon_inode_options(afr_private_t *priv, dict_t *options)
+{
+ char *volfile_id_str = NULL;
+ uuid_t anon_inode_gfid = {0};
+
+ /*If volume id is not present don't enable anything*/
+ if (dict_get_str(options, "volume-id", &volfile_id_str))
+ return;
+ GF_ASSERT(strlen(AFR_ANON_DIR_PREFIX) + strlen(volfile_id_str) <= NAME_MAX);
+ /*anon_inode_name is not supposed to change once assigned*/
+ if (!priv->anon_inode_name[0]) {
+ snprintf(priv->anon_inode_name, sizeof(priv->anon_inode_name), "%s-%s",
+ AFR_ANON_DIR_PREFIX, volfile_id_str);
+ gf_uuid_parse(volfile_id_str, anon_inode_gfid);
+ /*Flip a bit to make sure volfile-id and anon-gfid are not same*/
+ anon_inode_gfid[0] ^= 1;
+ uuid_utoa_r(anon_inode_gfid, priv->anon_gfid_str);
+ }
+}
+
int
reconfigure(xlator_t *this, dict_t *options)
{
@@ -290,6 +311,10 @@ reconfigure(xlator_t *this, dict_t *options)
consistent_io = _gf_false;
priv->consistent_io = consistent_io;
+ afr_handle_anon_inode_options(priv, options);
+
+ GF_OPTION_RECONF("use-anonymous-inode", priv->use_anon_inode, options, bool,
+ out);
if (priv->shd.enabled) {
if ((priv->shd.enabled != enabled_old) ||
(timeout_old != priv->shd.timeout))
@@ -541,7 +566,9 @@ init(xlator_t *this)
GF_OPTION_INIT("consistent-metadata", priv->consistent_metadata, bool, out);
GF_OPTION_INIT("consistent-io", priv->consistent_io, bool, out);
+ afr_handle_anon_inode_options(priv, this->options);
+ GF_OPTION_INIT("use-anonymous-inode", priv->use_anon_inode, bool, out);
if (priv->quorum_count != 0)
priv->consistent_io = _gf_false;
@@ -553,6 +580,9 @@ init(xlator_t *this)
goto out;
}
+ priv->anon_inode = GF_CALLOC(sizeof(unsigned char), child_count,
+ gf_afr_mt_char);
+
priv->child_up = GF_CALLOC(sizeof(unsigned char), child_count,
gf_afr_mt_char);
@@ -561,7 +591,8 @@ init(xlator_t *this)
priv->halo_child_up = GF_CALLOC(sizeof(unsigned char), child_count,
gf_afr_mt_char);
- if (!priv->child_up || !priv->child_latency || !priv->halo_child_up) {
+ if (!priv->child_up || !priv->child_latency || !priv->halo_child_up ||
+ !priv->anon_inode) {
ret = -ENOMEM;
goto out;
}
@@ -1286,6 +1317,14 @@ struct volume_options options[] = {
.tags = {"replicate"},
.description = "This option exists only for backward compatibility "
"and configuring it doesn't have any effect"},
+ {.key = {"use-anonymous-inode"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .op_version = {GD_OP_VERSION_8_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE,
+ .tags = {"replicate"},
+ .description = "Setting this option heals directory renames efficiently"},
+
{.key = {NULL}},
};
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 1fff5640940..d62f9a9caf2 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -42,6 +42,7 @@
#define AFR_LK_HEAL_DOM "afr.lock-heal.domain"
#define AFR_HALO_MAX_LATENCY 99999
+#define AFR_ANON_DIR_PREFIX ".glusterfs-anonymous-inode"
#define PFLAG_PENDING (1 << 0)
#define PFLAG_SBRAIN (1 << 1)
@@ -190,6 +191,7 @@ typedef struct _afr_private {
struct list_head ta_waitq;
struct list_head ta_onwireq;
+ unsigned char *anon_inode;
unsigned char *child_up;
unsigned char *halo_child_up;
int64_t *child_latency;
@@ -275,10 +277,15 @@ typedef struct _afr_private {
gf_boolean_t esh_granular;
gf_boolean_t consistent_io;
gf_boolean_t data_self_heal; /* on/off */
+ gf_boolean_t use_anon_inode;
/*For lock healing.*/
struct list_head saved_locks;
struct list_head lk_healq;
+
+ /*For anon-inode handling */
+ char anon_inode_name[NAME_MAX + 1];
+ char anon_gfid_str[UUID_SIZE + 1];
} afr_private_t;
typedef enum {
@@ -1271,8 +1278,8 @@ int
afr_inode_split_brain_choice_set(inode_t *inode, xlator_t *this,
int spb_choice);
int
-afr_inode_split_brain_choice_get(inode_t *inode, xlator_t *this,
- int *spb_choice);
+afr_split_brain_read_subvol_get(inode_t *inode, xlator_t *this,
+ call_frame_t *frame, int *spb_subvol);
int
afr_get_child_index_from_name(xlator_t *this, char *name);
@@ -1357,7 +1364,7 @@ int
afr_set_inode_local(xlator_t *this, afr_local_t *local, inode_t *inode);
int
-afr_fill_ta_loc(xlator_t *this, loc_t *loc);
+afr_fill_ta_loc(xlator_t *this, loc_t *loc, gf_boolean_t is_gfid_based_fop);
int
afr_ta_post_op_lock(xlator_t *this, loc_t *loc);
@@ -1409,4 +1416,8 @@ afr_dom_lock_release(call_frame_t *frame);
void
afr_fill_success_replies(afr_local_t *local, afr_private_t *priv,
unsigned char *replies);
+
+gf_boolean_t
+afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name,
+ pid_t pid);
#endif /* __AFR_H__ */
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index be92236e3bd..8ba0cc4c732 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -382,7 +382,7 @@ out:
/* Code to save hashed subvol on inode ctx as a mds subvol
*/
-static int
+int
dht_inode_ctx_mdsvol_set(inode_t *inode, xlator_t *this, xlator_t *mds_subvol)
{
dht_inode_ctx_t *ctx = NULL;
@@ -2161,31 +2161,18 @@ static int
dht_fill_dict_to_avoid_unlink_of_migrating_file(dict_t *dict)
{
int ret = 0;
- xlator_t *this = NULL;
- char *linktoskip_key = NULL;
- this = THIS;
- GF_VALIDATE_OR_GOTO("dht", this, err);
-
- if (dht_is_tier_xlator(this))
- linktoskip_key = TIER_SKIP_NON_LINKTO_UNLINK;
- else
- linktoskip_key = DHT_SKIP_NON_LINKTO_UNLINK;
-
- ret = dict_set_int32(dict, linktoskip_key, 1);
+ ret = dict_set_int32_sizen(dict, DHT_SKIP_NON_LINKTO_UNLINK, 1);
if (ret)
- goto err;
+ return -1;
- ret = dict_set_int32(dict, DHT_SKIP_OPEN_FD_UNLINK, 1);
+ ret = dict_set_int32_sizen(dict, DHT_SKIP_OPEN_FD_UNLINK, 1);
if (ret)
- goto err;
+ return -1;
return 0;
-
-err:
- return -1;
}
static int32_t
@@ -4314,6 +4301,8 @@ dht_find_local_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
index = conf->local_subvols_cnt;
uuid_list_copy = gf_strdup(uuid_list);
+ if (!uuid_list_copy)
+ goto unlock;
for (uuid_str = strtok_r(uuid_list, " ", &saveptr); uuid_str;
uuid_str = next_uuid_str) {
@@ -4604,18 +4593,8 @@ dht_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
dict_del(xattr, conf->xattr_name);
dict_del(xattr, conf->mds_xattr_key);
- /* filter out following two xattrs that need not
- * be visible on the mount point for geo-rep -
- * trusted.tier.fix.layout.complete and
- * trusted.tier.tier-dht.commithash
- */
-
dict_del(xattr, conf->commithash_xattr_name);
- if (frame->root->pid >= 0 && dht_is_tier_xlator(this)) {
- dict_del(xattr, GF_XATTR_TIER_LAYOUT_FIXED_KEY);
- }
-
if (frame->root->pid >= 0) {
GF_REMOVE_INTERNAL_XATTR("trusted.glusterfs.quota*", xattr);
GF_REMOVE_INTERNAL_XATTR("trusted.pgfid*", xattr);
@@ -5893,22 +5872,7 @@ dht_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr,
if (local->rebalance.target_node) {
local->flags = forced_rebalance;
- /* Flag to suggest its a tiering migration
- * The reason for this dic key-value is that
- * promotions and demotions are multithreaded
- * so the original frame from gf_defrag_start()
- * is not carried. A new frame will be created when
- * we do syncop_setxattr(). This does not have the
- * frame->root->pid of the original frame. So we pass
- * this dic key-value when we do syncop_setxattr() to do
- * data migration and set the frame->root->pid to
- * GF_CLIENT_PID_TIER_DEFRAG in dht_setxattr() just before
- * calling dht_start_rebalance_task() */
- tmp = dict_get(xattr, TIERING_MIGRATION_KEY);
- if (tmp)
- frame->root->pid = GF_CLIENT_PID_TIER_DEFRAG;
- else
- frame->root->pid = GF_CLIENT_PID_DEFRAG;
+ frame->root->pid = GF_CLIENT_PID_DEFRAG;
ret = dht_start_rebalance_task(this, frame);
if (!ret)
@@ -6720,10 +6684,9 @@ dht_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
layout = local->layout;
- /* We have seen crashes in while running "rm -rf" on tier volumes
- when the layout was NULL on the hot tier. This will skip the
- entries on the subvol without a layout, hence preventing the crash
- but rmdir might fail with "directory not empty" errors*/
+ /* This will skip the entries on the subvol without a layout,
+ * hence preventing the crash but rmdir might fail with
+ * "directory not empty" errors*/
if (layout == NULL)
goto done;
@@ -10850,23 +10813,17 @@ dht_notify(xlator_t *this, int event, void *data, ...)
int had_heard_from_all = 0;
int have_heard_from_all = 0;
- struct timeval time = {
- 0,
- };
gf_defrag_info_t *defrag = NULL;
dict_t *dict = NULL;
gf_defrag_type cmd = 0;
dict_t *output = NULL;
va_list ap;
- dht_methods_t *methods = NULL;
struct gf_upcall *up_data = NULL;
struct gf_upcall_cache_invalidation *up_ci = NULL;
conf = this->private;
GF_VALIDATE_OR_GOTO(this->name, conf, out);
- methods = &(conf->methods);
-
/* had all subvolumes reported status once till now? */
had_heard_from_all = 1;
for (i = 0; i < conf->subvolume_cnt; i++) {
@@ -10896,12 +10853,11 @@ dht_notify(xlator_t *this, int event, void *data, ...)
break;
}
- gettimeofday(&time, NULL);
LOCK(&conf->subvolume_lock);
{
conf->subvolume_status[cnt] = 1;
conf->last_event[cnt] = event;
- conf->subvol_up_time[cnt] = time.tv_sec;
+ conf->subvol_up_time[cnt] = gf_time();
}
UNLOCK(&conf->subvolume_lock);
@@ -11090,15 +11046,13 @@ dht_notify(xlator_t *this, int event, void *data, ...)
* thread has already started.
*/
if (conf->defrag && !run_defrag) {
- if (methods->migration_needed(this)) {
- run_defrag = 1;
- ret = gf_thread_create(&conf->defrag->th, NULL, gf_defrag_start,
- this, "dhtdg");
- if (ret) {
- GF_FREE(conf->defrag);
- conf->defrag = NULL;
- kill(getpid(), SIGTERM);
- }
+ run_defrag = 1;
+ ret = gf_thread_create(&conf->defrag->th, NULL, gf_defrag_start,
+ this, "dhtdg");
+ if (ret) {
+ GF_FREE(conf->defrag);
+ conf->defrag = NULL;
+ kill(getpid(), SIGTERM);
}
}
}
@@ -11243,28 +11197,6 @@ out:
return ret;
}
-int32_t
-dht_migration_needed(xlator_t *this)
-{
- gf_defrag_info_t *defrag = NULL;
- dht_conf_t *conf = NULL;
- int ret = 0;
-
- conf = this->private;
-
- GF_VALIDATE_OR_GOTO("dht", conf, out);
- GF_VALIDATE_OR_GOTO("dht", conf->defrag, out);
-
- defrag = conf->defrag;
-
- if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) &&
- (defrag->cmd != GF_DEFRAG_CMD_START_DETACH_TIER))
- ret = 1;
-
-out:
- return ret;
-}
-
/*
This function should not be called more then once during a FOP
handling path. It is valid only for for ops on files
@@ -11299,14 +11231,6 @@ dht_set_local_rebalance(xlator_t *this, dht_local_t *local, struct iatt *stbuf,
return 0;
}
-gf_boolean_t
-dht_is_tier_xlator(xlator_t *this)
-{
- if (strcmp(this->type, "cluster/tier") == 0)
- return _gf_true;
- return _gf_false;
-}
-
int32_t
dht_release(xlator_t *this, fd_t *fd)
{
@@ -11465,117 +11389,3 @@ dht_dir_layout_error_check(xlator_t *this, inode_t *inode)
/* Returning the first xlator error as all xlators have errors */
return layout->list[0].err;
}
-
-/* Get brick paths from all the local subvols and store for use.
- *
- * TODO: Make sure newly added brick is not picked for migration.
- * Otherwise there will be no rebalance as directory entries won't be present
- * on a newly added brick */
-int
-dht_get_brick_paths(xlator_t *this, dht_conf_t *conf, loc_t *loc)
-{
- dict_t *dict = NULL;
- gf_defrag_info_t *defrag = conf->defrag;
- char *key = NULL;
- char *tmp = NULL;
- char *str = NULL;
- char *token;
- char *saveptr = NULL;
- int i = 1;
- int j = 0;
- int ret = 0;
-
- key = gf_strdup("glusterfs.pathinfo");
- if (!key) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0,
- "failed to allocate "
- "memory");
- ret = -1;
- goto out;
- }
-
- defrag->local_brick_paths = GF_CALLOC(conf->local_subvols_cnt,
- sizeof(*defrag->local_brick_paths),
- gf_common_mt_pointer);
-
- for (j = 0; j < conf->local_subvols_cnt; j++) {
- ret = syncop_getxattr(conf->local_subvols[j], loc, &dict, key, NULL,
- NULL);
- if (ret == -1) {
- gf_msg(this->name, GF_LOG_WARNING, 0, 0,
- "failed to get path,"
- " errno %d",
- ret);
- /* TODO: We need not break out from here and can resume operation.
- * We need a place holder in gf_defrag_info_t to mark which
- * local_brick_paths we are working on. Right now, we blindly
- * take defrag->local_brick_path[0]. This can be dynamic based on
- * need */
- goto out;
- }
-
- str = NULL;
- ret = dict_get_str(dict, key, &str);
- if (ret != 0) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, 0, "dict get failed for :%s",
- key);
- goto out;
- }
- if (str == NULL) {
- gf_msg(this->name, GF_LOG_ERROR, 0, 0, "key:%s not found", key);
- ret = -1;
- goto out;
- }
-
- if (!defrag->is_pure_distribute) {
- tmp = strstr(str, "REPLICATE");
- if (tmp) {
- defrag->is_pure_distribute = _gf_false;
- break;
- }
-
- /*TODO: fetching glusterfs.pathinfo on erasure volume is failing.
- *Function the old way till we get it resolved */
- tmp = strstr(str, "ERASURE");
- if (tmp) {
- defrag->is_pure_distribute = _gf_false;
- break;
- }
-
- defrag->is_pure_distribute = _gf_true;
- }
-
- saveptr = NULL;
-
- for (token = strtok_r(str, ":", &saveptr), i = 1; token;) {
- token = strtok_r(NULL, ":", &saveptr);
- i++;
- if (i == 3) {
- token = strtok_r(token, ">", &saveptr);
- break;
- } else {
- continue;
- }
- }
-
- defrag->local_brick_paths[j] = gf_strdup(token);
- }
-
-out:
- if (ret == -1) {
- gf_msg(this->name, GF_LOG_INFO, 0, 0,
- "failed to get brick path. "
- "Will operate old way");
- for (j = 0; j < conf->local_subvols_cnt; j++) {
- GF_FREE(defrag->local_brick_paths[j]);
- }
- defrag->is_pure_distribute = _gf_false;
- }
-
- if (defrag->is_pure_distribute) {
- gf_msg(this->name, GF_LOG_INFO, 0, 0, "volume type : pure distribute");
- }
-
- GF_FREE(key);
- return ret;
-}
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index 84891406c71..fe0dc3db34a 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -24,7 +24,6 @@
#define _DHT_H
#define GF_XATTR_FIX_LAYOUT_KEY "distribute.fix.layout"
-#define GF_XATTR_TIER_LAYOUT_FIXED_KEY "trusted.tier.fix.layout.complete"
#define GF_XATTR_FILE_MIGRATE_KEY "trusted.distribute.migrate-data"
#define DHT_MDS_STR "mds"
#define GF_DHT_LOOKUP_UNHASHED_OFF 0
@@ -36,7 +35,6 @@
#define DHT_LAYOUT_HEAL_DOMAIN "dht.layout.heal"
/* Namespace synchronization */
#define DHT_ENTRY_SYNC_DOMAIN "dht.entry.sync"
-#define TIERING_MIGRATION_KEY "tiering.migration"
#define DHT_LAYOUT_HASH_INVALID 1
#define MAX_REBAL_THREADS sysconf(_SC_NPROCESSORS_ONLN)
@@ -242,19 +240,6 @@ typedef gf_boolean_t (*dht_need_heal_t)(call_frame_t *frame,
dht_layout_t **inmem,
dht_layout_t **ondisk);
-typedef struct {
- uint64_t blocks_used;
- uint64_t pblocks_used;
- uint64_t files_used;
- uint64_t pfiles_used;
- uint64_t unhashed_blocks_used;
- uint64_t unhashed_pblocks_used;
- uint64_t unhashed_files_used;
- uint64_t unhashed_pfiles_used;
- uint64_t unhashed_fsid;
- uint64_t hashed_fsid;
-} tier_statvfs_t;
-
struct dht_local {
loc_t loc;
loc_t loc2;
@@ -272,7 +257,6 @@ struct dht_local {
struct iatt preparent;
struct iatt postparent;
struct statvfs statvfs;
- tier_statvfs_t tier_statvfs;
fd_t *fd;
inode_t *inode;
dict_t *params;
@@ -405,14 +389,7 @@ enum gf_defrag_type {
GF_DEFRAG_CMD_STATUS = 1 + 2,
GF_DEFRAG_CMD_START_LAYOUT_FIX = 1 + 3,
GF_DEFRAG_CMD_START_FORCE = 1 + 4,
- GF_DEFRAG_CMD_START_TIER = 1 + 5,
- GF_DEFRAG_CMD_STATUS_TIER = 1 + 6,
- GF_DEFRAG_CMD_START_DETACH_TIER = 1 + 7,
- GF_DEFRAG_CMD_STOP_DETACH_TIER = 1 + 8,
- GF_DEFRAG_CMD_PAUSE_TIER = 1 + 9,
- GF_DEFRAG_CMD_RESUME_TIER = 1 + 10,
GF_DEFRAG_CMD_DETACH_STATUS = 1 + 11,
- GF_DEFRAG_CMD_STOP_TIER = 1 + 12,
GF_DEFRAG_CMD_DETACH_START = 1 + 13,
GF_DEFRAG_CMD_DETACH_COMMIT = 1 + 14,
GF_DEFRAG_CMD_DETACH_COMMIT_FORCE = 1 + 15,
@@ -463,75 +440,6 @@ struct dht_container {
int local_subvol_index;
};
-typedef enum tier_mode_ {
- TIER_MODE_NONE = 0,
- TIER_MODE_TEST,
- TIER_MODE_WM
-} tier_mode_t;
-
-typedef enum tier_pause_state_ {
- TIER_RUNNING = 0,
- TIER_REQUEST_PAUSE,
- TIER_PAUSED
-} tier_pause_state_t;
-
-/* This Structure is only used in tiering fixlayout */
-typedef struct gf_tier_fix_layout_arg {
- xlator_t *this;
- dict_t *fix_layout;
- pthread_t thread_id;
-} gf_tier_fix_layout_arg_t;
-
-typedef struct gf_tier_conf {
- int is_tier;
- int watermark_hi;
- int watermark_low;
- int watermark_last;
- unsigned long block_size;
- fsblkcnt_t blocks_total;
- fsblkcnt_t blocks_used;
- uint64_t max_migrate_bytes;
- int max_migrate_files;
- int query_limit;
- tier_mode_t mode;
- int percent_full;
- /* These flags are only used for tier-compact */
- gf_boolean_t compact_active;
- /* These 3 flags are set to true when the client changes the */
- /* compaction mode on the command line. */
- /* When they are set, the daemon will trigger compaction as */
- /* soon as possible to activate or deactivate compaction. */
- /* If in the middle of a compaction, then the switches take */
- /* effect on the next compaction, not the current one. */
- /* If the user switches it off, we want to avoid needless */
- /* compactions. */
- /* If the user switches it on, they want to compact as soon */
- /* as possible. */
- gf_boolean_t compact_mode_switched;
- gf_boolean_t compact_mode_switched_hot;
- gf_boolean_t compact_mode_switched_cold;
- int tier_max_promote_size;
- int tier_promote_frequency;
- int tier_demote_frequency;
- int tier_compact_hot_frequency;
- int tier_compact_cold_frequency;
- uint64_t st_last_promoted_size;
- uint64_t st_last_demoted_size;
- struct synctask *pause_synctask;
- gf_timer_t *pause_timer;
- pthread_mutex_t pause_mutex;
- int promote_in_progress;
- int demote_in_progress;
- /* This Structure is only used in tiering fixlayout */
- gf_tier_fix_layout_arg_t tier_fix_layout_arg;
- /* Indicates the index of the first queryfile picked
- * in the last cycle of promote or demote */
- int32_t last_promote_qfile_index;
- int32_t last_demote_qfile_index;
- tier_pause_state_t pause_state;
- char volname[GD_VOLUME_NAME_MAX + 1];
-} gf_tier_conf_t;
-
typedef struct nodeuuid_info {
char info; /* Set to 1 is this is my node's uuid*/
uuid_t uuid; /* Store the nodeuuid as well for debugging*/
@@ -559,17 +467,10 @@ struct gf_defrag_info_ {
int cmd;
inode_t *root_inode;
uuid_t node_uuid;
- struct timeval start_time;
+ time_t start_time;
uint32_t new_commit_hash;
gf_defrag_status_t defrag_status;
gf_defrag_pattern_list_t *defrag_pattern;
- gf_tier_conf_t tier_conf;
-
- /*Data Tiering params for scanner*/
- uint64_t total_files_promoted;
- uint64_t total_files_demoted;
- int write_freq_threshold;
- int read_freq_threshold;
pthread_cond_t parallel_migration_cond;
pthread_mutex_t dfq_mutex;
@@ -598,15 +499,6 @@ struct gf_defrag_info_ {
gf_boolean_t stats;
/* lock migration flag */
gf_boolean_t lock_migration_enabled;
-
- /* local system crawl */
- char **local_brick_paths;
-
- /* whether the volume is pure distribute */
- gf_boolean_t is_pure_distribute;
-
- /*TODO: Introduce a glusterd option to tune this behaviour*/
- gf_boolean_t operate_dist;
};
typedef struct gf_defrag_info_ gf_defrag_info_t;
@@ -614,7 +506,6 @@ typedef struct gf_defrag_info_ gf_defrag_info_t;
struct dht_methods_s {
int32_t (*migration_get_dst_subvol)(xlator_t *this, dht_local_t *local);
int32_t (*migration_other)(xlator_t *this, gf_defrag_info_t *defrag);
- int32_t (*migration_needed)(xlator_t *this);
xlator_t *(*layout_search)(xlator_t *this, dht_layout_t *layout,
const char *name);
};
@@ -635,7 +526,7 @@ struct dht_conf {
int subvolume_cnt;
int32_t refresh_interval;
gf_lock_t subvolume_lock;
- struct timeval last_stat_fetch;
+ time_t last_stat_fetch;
gf_lock_t layout_lock;
dict_t *leaf_to_subvol;
void *private; /* Can be used by wrapper xlators over
@@ -1325,9 +1216,6 @@ dht_layout_missing_dirs(dht_layout_t *layout);
int
dht_refresh_layout(call_frame_t *frame);
-gf_boolean_t
-dht_is_tier_xlator(xlator_t *this);
-
int
dht_build_parent_loc(xlator_t *this, loc_t *parent, loc_t *child,
int32_t *op_errno);
@@ -1492,5 +1380,5 @@ int
dht_dir_layout_error_check(xlator_t *this, inode_t *inode);
int
-dht_get_brick_paths(xlator_t *this, dht_conf_t *conf, loc_t *loc);
+dht_inode_ctx_mdsvol_set(inode_t *inode, xlator_t *this, xlator_t *mds_subvol);
#endif /* _DHT_H */
diff --git a/xlators/cluster/dht/src/dht-diskusage.c b/xlators/cluster/dht/src/dht-diskusage.c
index 27097ca2475..c0588828fdb 100644
--- a/xlators/cluster/dht/src/dht-diskusage.c
+++ b/xlators/cluster/dht/src/dht-diskusage.c
@@ -151,22 +151,18 @@ dht_get_du_info(call_frame_t *frame, xlator_t *this, loc_t *loc)
dht_conf_t *conf = NULL;
call_frame_t *statfs_frame = NULL;
dht_local_t *statfs_local = NULL;
- struct timeval tv = {
- 0,
- };
loc_t tmp_loc = {
0,
};
+ time_t now;
conf = this->private;
-
- gettimeofday(&tv, NULL);
-
+ now = gf_time();
/* make it root gfid, should be enough to get the proper
info back */
tmp_loc.gfid[15] = 1;
- if (tv.tv_sec > (conf->refresh_interval + conf->last_stat_fetch.tv_sec)) {
+ if (now > (conf->refresh_interval + conf->last_stat_fetch)) {
statfs_frame = copy_frame(frame);
if (!statfs_frame) {
goto err;
@@ -198,7 +194,7 @@ dht_get_du_info(call_frame_t *frame, xlator_t *this, loc_t *loc)
statfs_local->params);
}
- conf->last_stat_fetch.tv_sec = tv.tv_sec;
+ conf->last_stat_fetch = now;
}
return 0;
err:
diff --git a/xlators/cluster/dht/src/dht-inode-write.c b/xlators/cluster/dht/src/dht-inode-write.c
index eda2491e0ff..2f23ce90fbd 100644
--- a/xlators/cluster/dht/src/dht-inode-write.c
+++ b/xlators/cluster/dht/src/dht-inode-write.c
@@ -93,30 +93,28 @@ dht_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
/* Check if the rebalance phase1 is true */
if (IS_DHT_MIGRATION_PHASE1(postbuf)) {
- if (!dht_is_tier_xlator(this)) {
+ if (!local->xattr_req) {
+ local->xattr_req = dict_new();
if (!local->xattr_req) {
- local->xattr_req = dict_new();
- if (!local->xattr_req) {
- gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, ENOMEM,
- "insufficient memory");
- local->op_errno = ENOMEM;
- local->op_ret = -1;
- goto out;
- }
- }
-
- ret = dict_set_uint32(local->xattr_req,
- GF_PROTECT_FROM_EXTERNAL_WRITES, 1);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_DICT_SET_FAILED, 0,
- "Failed to set key %s in dictionary",
- GF_PROTECT_FROM_EXTERNAL_WRITES);
+ gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, ENOMEM,
+ "insufficient memory");
local->op_errno = ENOMEM;
local->op_ret = -1;
goto out;
}
}
+ ret = dict_set_uint32(local->xattr_req, GF_PROTECT_FROM_EXTERNAL_WRITES,
+ 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_DICT_SET_FAILED, 0,
+ "Failed to set key %s in dictionary",
+ GF_PROTECT_FROM_EXTERNAL_WRITES);
+ local->op_errno = ENOMEM;
+ local->op_ret = -1;
+ goto out;
+ }
+
dht_iatt_merge(this, &local->stbuf, postbuf);
dht_iatt_merge(this, &local->prebuf, prebuf);
diff --git a/xlators/cluster/dht/src/dht-mem-types.h b/xlators/cluster/dht/src/dht-mem-types.h
index 33f9832395b..e3c4471334a 100644
--- a/xlators/cluster/dht/src/dht-mem-types.h
+++ b/xlators/cluster/dht/src/dht-mem-types.h
@@ -30,10 +30,7 @@ enum gf_dht_mem_types_ {
gf_dht_mt_container_t,
gf_dht_mt_octx_t,
gf_dht_mt_miginfo_t,
- gf_tier_mt_bricklist_t,
- gf_tier_mt_ipc_ctr_params_t,
gf_dht_mt_fd_ctx_t,
- gf_tier_mt_qfile_array_t,
gf_dht_ret_cache_t,
gf_dht_nodeuuids_t,
gf_dht_mt_end
diff --git a/xlators/cluster/dht/src/dht-messages.h b/xlators/cluster/dht/src/dht-messages.h
index 026879e14af..601f8dad78b 100644
--- a/xlators/cluster/dht/src/dht-messages.h
+++ b/xlators/cluster/dht/src/dht-messages.h
@@ -38,12 +38,11 @@ GLFS_MSGID(
DHT_MSG_REBALANCE_STATUS, DHT_MSG_REBALANCE_STOPPED, DHT_MSG_RENAME_FAILED,
DHT_MSG_SETATTR_FAILED, DHT_MSG_SUBVOL_INSUFF_INODES,
DHT_MSG_SUBVOL_INSUFF_SPACE, DHT_MSG_UNLINK_FAILED,
- DHT_MSG_LAYOUT_SET_FAILED, DHT_MSG_LOG_FIXED_LAYOUT, DHT_MSG_LOG_TIER_ERROR,
- DHT_MSG_LOG_TIER_STATUS, DHT_MSG_GET_XATTR_FAILED,
- DHT_MSG_FILE_LOOKUP_FAILED, DHT_MSG_OPEN_FD_FAILED,
- DHT_MSG_SET_INODE_CTX_FAILED, DHT_MSG_UNLOCKING_FAILED,
- DHT_MSG_DISK_LAYOUT_NULL, DHT_MSG_SUBVOL_INFO, DHT_MSG_CHUNK_SIZE_INFO,
- DHT_MSG_LAYOUT_FORM_FAILED, DHT_MSG_SUBVOL_ERROR,
+ DHT_MSG_LAYOUT_SET_FAILED, DHT_MSG_LOG_FIXED_LAYOUT,
+ DHT_MSG_GET_XATTR_FAILED, DHT_MSG_FILE_LOOKUP_FAILED,
+ DHT_MSG_OPEN_FD_FAILED, DHT_MSG_SET_INODE_CTX_FAILED,
+ DHT_MSG_UNLOCKING_FAILED, DHT_MSG_DISK_LAYOUT_NULL, DHT_MSG_SUBVOL_INFO,
+ DHT_MSG_CHUNK_SIZE_INFO, DHT_MSG_LAYOUT_FORM_FAILED, DHT_MSG_SUBVOL_ERROR,
DHT_MSG_LAYOUT_SORT_FAILED, DHT_MSG_REGEX_INFO, DHT_MSG_FOPEN_FAILED,
DHT_MSG_SET_HOSTNAME_FAILED, DHT_MSG_BRICK_ERROR, DHT_MSG_SYNCOP_FAILED,
DHT_MSG_MIGRATE_INFO, DHT_MSG_SOCKET_ERROR, DHT_MSG_CREATE_FD_FAILED,
@@ -69,8 +68,7 @@ GLFS_MSGID(
DHT_MSG_INIT_LOCAL_SUBVOL_FAILED, DHT_MSG_SYS_CALL_GET_TIME_FAILED,
DHT_MSG_NO_DISK_USAGE_STATUS, DHT_MSG_SUBVOL_DOWN_ERROR,
DHT_MSG_REBAL_THROTTLE_INFO, DHT_MSG_COMMIT_HASH_INFO,
- DHT_MSG_REBAL_STRUCT_SET, DHT_MSG_HAS_MIGINFO, DHT_MSG_LOG_IPC_TIER_ERROR,
- DHT_MSG_TIER_PAUSED, DHT_MSG_TIER_RESUME, DHT_MSG_SETTLE_HASH_FAILED,
+ DHT_MSG_REBAL_STRUCT_SET, DHT_MSG_HAS_MIGINFO, DHT_MSG_SETTLE_HASH_FAILED,
DHT_MSG_DEFRAG_PROCESS_DIR_FAILED, DHT_MSG_FD_CTX_SET_FAILED,
DHT_MSG_STALE_LOOKUP, DHT_MSG_PARENT_LAYOUT_CHANGED,
DHT_MSG_LOCK_MIGRATION_FAILED, DHT_MSG_LOCK_INODE_UNREF_FAILED,
@@ -96,15 +94,13 @@ GLFS_MSGID(
DHT_MSG_UNLOCK_FILE_FAILED, DHT_MSG_REMOVE_XATTR_FAILED,
DHT_MSG_DATA_MIGRATE_ABORT, DHT_MSG_DEFRAG_NULL, DHT_MSG_PARENT_NULL,
DHT_MSG_GFID_NOT_PRESENT, DHT_MSG_CHILD_LOC_FAILED,
- DHT_MSG_SET_LOOKUP_FAILED, DHT_MSG_DIR_REMOVED,
- DHT_MSG_TIER_FIX_LAYOUT_STARTED, DHT_MSG_FIX_NOT_COMP,
- DHT_MSG_REMOVE_TIER_FAILED, DHT_MSG_SUBVOL_DETER_FAILED,
- DHT_MSG_LOCAL_SUBVOL, DHT_MSG_NODE_UUID, DHT_MSG_SIZE_FILE,
- DHT_MSG_GET_DATA_SIZE_FAILED, DHT_MSG_PTHREAD_JOIN_FAILED,
- DHT_MSG_COUNTER_THREAD_CREATE_FAILED, DHT_MSG_MIGRATION_INIT_QUEUE_FAILED,
- DHT_MSG_PAUSED_TIMEOUT, DHT_MSG_WOKE, DHT_MSG_ABORT_REBALANCE,
- DHT_MSG_CREATE_TASK_REBAL_FAILED, DHT_MSG_REBAL_ESTIMATE_NOT_AVAIL,
- DHT_MSG_MIG_TIER_PAUSED, DHT_MSG_ADD_CHOICES_ERROR,
+ DHT_MSG_SET_LOOKUP_FAILED, DHT_MSG_DIR_REMOVED, DHT_MSG_FIX_NOT_COMP,
+ DHT_MSG_SUBVOL_DETER_FAILED, DHT_MSG_LOCAL_SUBVOL, DHT_MSG_NODE_UUID,
+ DHT_MSG_SIZE_FILE, DHT_MSG_GET_DATA_SIZE_FAILED,
+ DHT_MSG_PTHREAD_JOIN_FAILED, DHT_MSG_COUNTER_THREAD_CREATE_FAILED,
+ DHT_MSG_MIGRATION_INIT_QUEUE_FAILED, DHT_MSG_PAUSED_TIMEOUT, DHT_MSG_WOKE,
+ DHT_MSG_ABORT_REBALANCE, DHT_MSG_CREATE_TASK_REBAL_FAILED,
+ DHT_MSG_REBAL_ESTIMATE_NOT_AVAIL, DHT_MSG_ADD_CHOICES_ERROR,
DHT_MSG_GET_CHOICES_ERROR, DHT_MSG_PREPARE_STATUS_ERROR,
DHT_MSG_SET_CHOICE_FAILED, DHT_MSG_SET_HASHED_SUBVOL_FAILED,
DHT_MSG_XATTR_HEAL_NOT_POSS, DHT_MSG_LINKTO_FILE_FAILED,
@@ -180,7 +176,6 @@ GLFS_MSGID(
"adding bricks"
#define DHT_MSG_NEW_TARGET_FOUND_STR "New target found for file"
#define DHT_MSG_INSUFF_MEMORY_STR "insufficient memory"
-#define DHT_MSG_MIG_TIER_PAUSED_STR "Migrate file paused"
#define DHT_MSG_SET_XATTR_FAILED_STR "failed to set xattr"
#define DHT_MSG_SET_MODE_FAILED_STR "failed to set mode"
#define DHT_MSG_FILE_EXISTS_IN_DEST_STR "file exists in destination"
@@ -222,17 +217,14 @@ GLFS_MSGID(
#define DHT_MSG_GFID_NOT_PRESENT_STR "gfid not present"
#define DHT_MSG_CHILD_LOC_FAILED_STR "Child loc build failed"
#define DHT_MSG_SET_LOOKUP_FAILED_STR "Failed to set lookup"
-#define DHT_MSG_LOG_TIER_STATUS_STR "lookup to cold tier on attach heal failed"
#define DHT_MSG_DIR_LOOKUP_FAILED_STR "lookup failed"
#define DHT_MSG_DIR_REMOVED_STR "Dir renamed or removed. Skipping"
#define DHT_MSG_READDIR_ERROR_STR "readdir failed, Aborting fix-layout"
#define DHT_MSG_SETTLE_HASH_FAILED_STR "Settle hash failed"
#define DHT_MSG_DEFRAG_PROCESS_DIR_FAILED_STR "gf_defrag_process_dir failed"
-#define DHT_MSG_TIER_FIX_LAYOUT_STARTED_STR "Tiering fix layout started"
#define DHT_MSG_FIX_NOT_COMP_STR \
"Unable to retrieve fixlayout xattr. Assume background fix layout not " \
"complete"
-#define DHT_MSG_REMOVE_TIER_FAILED_STR "Failed removing tier fix layout xattr"
#define DHT_MSG_SUBVOL_DETER_FAILED_STR \
"local subvolume determination failed with error"
#define DHT_MSG_LOCAL_SUBVOL_STR "local subvol"
@@ -248,8 +240,6 @@ GLFS_MSGID(
#define DHT_MSG_MIGRATION_INIT_QUEUE_FAILED_STR \
"Failed to initialise migration queue"
#define DHT_MSG_REBALANCE_STOPPED_STR "Received stop command on rebalance"
-#define DHT_MSG_TIER_RESUME_STR "Pause end. Resume tiering"
-#define DHT_MSG_TIER_PAUSED_STR "Pause tiering"
#define DHT_MSG_PAUSED_TIMEOUT_STR "Request pause timer timeout"
#define DHT_MSG_WOKE_STR "woken"
#define DHT_MSG_ABORT_REBALANCE_STR "Aborting rebalance"
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index d850eef62ab..8ba8082bd86 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -14,7 +14,6 @@
#include <signal.h>
#include <glusterfs/events.h>
#include "glusterfs/compat-errno.h" // for ENODATA on BSD
-#include <string.h>
#define GF_DISK_SECTOR_SIZE 512
#define DHT_REBALANCE_PID 4242 /* Change it if required */
@@ -610,26 +609,23 @@ __dht_rebalance_create_dst_file(xlator_t *this, xlator_t *to, xlator_t *from,
goto out;
}
- if (!!dht_is_tier_xlator(this)) {
- xdata = dict_new();
- if (!xdata) {
- *fop_errno = ENOMEM;
- ret = -1;
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM,
- DHT_MSG_MIGRATE_FILE_FAILED, "%s: dict_new failed)",
- loc->path);
- goto out;
- }
+ xdata = dict_new();
+ if (!xdata) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: dict_new failed)", loc->path);
+ goto out;
+ }
- ret = dict_set_int32(xdata, GF_CLEAN_WRITE_PROTECTION, 1);
- if (ret) {
- *fop_errno = ENOMEM;
- ret = -1;
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
- "%s: failed to set dictionary value: key = %s ", loc->path,
- GF_CLEAN_WRITE_PROTECTION);
- goto out;
- }
+ ret = dict_set_int32_sizen(xdata, GF_CLEAN_WRITE_PROTECTION, 1);
+ if (ret) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "%s: failed to set dictionary value: key = %s ", loc->path,
+ GF_CLEAN_WRITE_PROTECTION);
+ goto out;
}
ret = syncop_lookup(to, loc, &new_stbuf, NULL, xdata, NULL);
@@ -1097,7 +1093,7 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag,
break;
}
- if (!conf->force_migration && !dht_is_tier_xlator(this)) {
+ if (!conf->force_migration) {
if (!xdata) {
xdata = dict_new();
if (!xdata) {
@@ -1537,21 +1533,6 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
goto out;
}
- /* If defrag is NULL, it should be assumed that migration is triggered
- * from client using the trusted.distribute.migrate-data virtual xattr
- */
- defrag = conf->defrag;
-
- /* migration of files from clients is restricted to non-tiered clients
- * for now */
- if (!defrag && dht_is_tier_xlator(this)) {
- ret = ENOTSUP;
- goto out;
- }
-
- if (defrag && defrag->tier_conf.is_tier)
- log_level = GF_LOG_TRACE;
-
gf_log(this->name, log_level, "%s: attempting to move from %s to %s",
loc->path, from->name, to->name);
@@ -2301,14 +2282,12 @@ out:
}
}
- if (!dht_is_tier_xlator(this)) {
- lk_ret = syncop_removexattr(to, loc, GF_PROTECT_FROM_EXTERNAL_WRITES,
- NULL, NULL);
- if (lk_ret && (lk_ret != -ENODATA) && (lk_ret != -ENOATTR)) {
- gf_msg(this->name, GF_LOG_WARNING, -lk_ret, 0,
- "%s: removexattr failed key %s", loc->path,
- GF_PROTECT_FROM_EXTERNAL_WRITES);
- }
+ lk_ret = syncop_removexattr(to, loc, GF_PROTECT_FROM_EXTERNAL_WRITES, NULL,
+ NULL);
+ if (lk_ret && (lk_ret != -ENODATA) && (lk_ret != -ENOATTR)) {
+ gf_msg(this->name, GF_LOG_WARNING, -lk_ret, 0,
+ "%s: removexattr failed key %s", loc->path,
+ GF_PROTECT_FROM_EXTERNAL_WRITES);
}
if (dict)
@@ -2895,8 +2874,7 @@ gf_defrag_migrate_single_file(void *opaque)
if (defrag->stats == _gf_true) {
gettimeofday(&end, NULL);
- elapsed = (end.tv_sec - start.tv_sec) * 1e6 +
- (end.tv_usec - start.tv_usec);
+ elapsed = gf_tvdiff(&start, &end);
gf_log(this->name, GF_LOG_INFO,
"Migration of "
"file:%s size:%" PRIu64
@@ -3075,7 +3053,7 @@ int static gf_defrag_get_entry(xlator_t *this, int i,
dht_conf_t *conf, gf_defrag_info_t *defrag,
fd_t *fd, dict_t *migrate_data,
struct dir_dfmeta *dir_dfmeta, dict_t *xattr_req,
- int *should_commit_hash, int *perrno)
+ int *perrno)
{
int ret = 0;
char is_linkfile = 0;
@@ -3279,7 +3257,7 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
int dfc_index = 0;
int throttle_up = 0;
struct dir_dfmeta *dir_dfmeta = NULL;
- int should_commit_hash = 1;
+ xlator_t *old_THIS = NULL;
gf_log(this->name, GF_LOG_INFO, "migrate data called on %s", loc->path);
gettimeofday(&dir_start, NULL);
@@ -3292,6 +3270,9 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
goto out;
}
+ old_THIS = THIS;
+ THIS = this;
+
dir_dfmeta = GF_CALLOC(1, sizeof(*dir_dfmeta), gf_common_mt_pointer);
if (!dir_dfmeta) {
gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta is NULL");
@@ -3456,7 +3437,7 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
ret = gf_defrag_get_entry(this, dfc_index, &container, loc, conf,
defrag, dir_dfmeta->lfd[dfc_index],
migrate_data, dir_dfmeta, xattr_req,
- &should_commit_hash, perrno);
+ perrno);
if (defrag->defrag_status == GF_DEFRAG_STATUS_STOPPED) {
goto out;
@@ -3500,24 +3481,19 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
}
gettimeofday(&end, NULL);
- elapsed = (end.tv_sec - dir_start.tv_sec) * 1e6 +
- (end.tv_usec - dir_start.tv_usec);
+ elapsed = gf_tvdiff(&dir_start, &end);
gf_log(this->name, GF_LOG_INFO,
"Migration operation on dir %s took "
"%.2f secs",
loc->path, elapsed / 1e6);
ret = 0;
out:
-
+ THIS = old_THIS;
gf_defrag_free_dir_dfmeta(dir_dfmeta, local_subvols_cnt);
if (xattr_req)
dict_unref(xattr_req);
- if (ret == 0 && should_commit_hash == 0) {
- ret = 2;
- }
-
/* It does not matter if it errored out - this number is
* used to calculate rebalance estimated time to complete.
* No locking required as dirs are processed by a single thread.
@@ -3525,6 +3501,7 @@ out:
defrag->num_dirs_processed++;
return ret;
}
+
int
gf_defrag_settle_hash(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
dict_t *fix_layout)
@@ -3539,7 +3516,6 @@ gf_defrag_settle_hash(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
* rebalance is complete.
*/
if (defrag->cmd == GF_DEFRAG_CMD_START_LAYOUT_FIX ||
- defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER ||
defrag->cmd == GF_DEFRAG_CMD_DETACH_START) {
return 0;
}
@@ -3585,114 +3561,6 @@ gf_defrag_settle_hash(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
return 0;
}
-/* Function for doing a named lookup on file inodes during an attach tier
- * So that a hardlink lookup heal i.e gfid to parent gfid lookup heal
- * happens on pre-existing data. This is required so that the ctr database has
- * hardlinks of all the exisitng file in the volume. CTR xlator on the
- * brick/server side does db update/insert of the hardlink on a namelookup.
- * Currently the namedlookup is done synchronous to the fixlayout that is
- * triggered by attach tier. This is not performant, adding more time to
- * fixlayout. The performant approach is record the hardlinks on a compressed
- * datastore and then do the namelookup asynchronously later, giving the ctr db
- * eventual consistency
- * */
-int
-gf_fix_layout_tier_attach_lookup(xlator_t *this, loc_t *parent_loc,
- gf_dirent_t *file_dentry)
-{
- int ret = -1;
- dict_t *lookup_xdata = NULL;
- dht_conf_t *conf = NULL;
- loc_t file_loc = {
- 0,
- };
- struct iatt iatt = {
- 0,
- };
-
- GF_VALIDATE_OR_GOTO("tier", this, out);
-
- GF_VALIDATE_OR_GOTO(this->name, parent_loc, out);
-
- GF_VALIDATE_OR_GOTO(this->name, file_dentry, out);
-
- GF_VALIDATE_OR_GOTO(this->name, this->private, out);
-
- if (!parent_loc->inode) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "%s/%s parent is NULL", parent_loc->path, file_dentry->d_name);
- goto out;
- }
-
- conf = this->private;
-
- loc_wipe(&file_loc);
-
- if (gf_uuid_is_null(file_dentry->d_stat.ia_gfid)) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "%s/%s gfid not present", parent_loc->path, file_dentry->d_name);
- goto out;
- }
-
- gf_uuid_copy(file_loc.gfid, file_dentry->d_stat.ia_gfid);
-
- if (gf_uuid_is_null(parent_loc->gfid)) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "%s/%s"
- " gfid not present",
- parent_loc->path, file_dentry->d_name);
- goto out;
- }
-
- gf_uuid_copy(file_loc.pargfid, parent_loc->gfid);
-
- ret = dht_build_child_loc(this, &file_loc, parent_loc, file_dentry->d_name);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Child loc build failed");
- ret = -1;
- goto out;
- }
-
- lookup_xdata = dict_new();
- if (!lookup_xdata) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed creating lookup dict for %s", file_dentry->d_name);
- goto out;
- }
-
- ret = dict_set_int32(lookup_xdata, CTR_ATTACH_TIER_LOOKUP, 1);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to set lookup flag");
- goto out;
- }
-
- gf_uuid_copy(file_loc.parent->gfid, parent_loc->gfid);
-
- /* Sending lookup to cold tier only */
- ret = syncop_lookup(conf->subvolumes[0], &file_loc, &iatt, NULL,
- lookup_xdata, NULL);
- if (ret) {
- /* If the file does not exist on the cold tier than it must */
- /* have been discovered on the hot tier. This is not an error. */
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "%s lookup to cold tier on attach heal failed", file_loc.path);
- goto out;
- }
-
- ret = 0;
-
-out:
-
- loc_wipe(&file_loc);
-
- if (lookup_xdata)
- dict_unref(lookup_xdata);
-
- return ret;
-}
-
int
gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
dict_t *fix_layout, dict_t *migrate_data)
@@ -3712,7 +3580,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
};
inode_t *linked_inode = NULL, *inode = NULL;
dht_conf_t *conf = NULL;
- int should_commit_hash = 1;
int perrno = 0;
conf = this->private;
@@ -3815,16 +3682,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
continue;
if (!IA_ISDIR(entry->d_stat.ia_type)) {
- /* If its a fix layout during the attach
- * tier operation do lookups on files
- * on cold subvolume so that there is a
- * CTR DB Lookup Heal triggered on existing
- * data.
- * */
- if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) {
- gf_fix_layout_tier_attach_lookup(this, loc, entry);
- }
-
continue;
}
loc_wipe(&entry_loc);
@@ -3841,8 +3698,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
goto out;
} else {
- should_commit_hash = 0;
-
continue;
}
}
@@ -3905,7 +3760,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
ret = -1;
goto out;
} else {
- should_commit_hash = 0;
continue;
}
}
@@ -3923,7 +3777,7 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
goto out;
}
- if (ret && ret != 2) {
+ if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LAYOUT_FIX_FAILED,
"Fix layout failed for %s", entry_loc.path);
@@ -3990,11 +3844,10 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
}
}
- if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) &&
- (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX)) {
+ if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) {
ret = gf_defrag_process_dir(this, defrag, loc, migrate_data, &perrno);
- if (ret && (ret != 2)) {
+ if (ret) {
if (perrno == ENOENT || perrno == ESTALE) {
ret = 0;
goto out;
@@ -4010,18 +3863,13 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
if (conf->decommission_in_progress) {
goto out;
}
-
- should_commit_hash = 0;
}
- } else if (ret == 2) {
- should_commit_hash = 0;
}
}
gf_msg_trace(this->name, 0, "fix layout called on %s", loc->path);
- if (should_commit_hash &&
- gf_defrag_settle_hash(this, defrag, loc, fix_layout) != 0) {
+ if (gf_defrag_settle_hash(this, defrag, loc, fix_layout) != 0) {
defrag->total_failures++;
gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SETTLE_HASH_FAILED,
@@ -4045,372 +3893,6 @@ out:
if (fd)
fd_unref(fd);
- if (ret == 0 && should_commit_hash == 0) {
- ret = 2;
- }
-
- return ret;
-}
-
-int
-gf_defrag_fix_layout_puredist(xlator_t *this, gf_defrag_info_t *defrag,
- loc_t *loc, dict_t *fix_layout,
- dict_t *migrate_data)
-{
- int ret = -1;
- loc_t entry_loc = {
- 0,
- };
- fd_t *fd = NULL;
- inode_t *linked_inode = NULL, *inode = NULL;
- dht_conf_t *conf = NULL;
- int should_commit_hash = 1;
- int perrno = 0;
- /* absolute brick path length */
- int brick_len = 0;
- /* dir path length (relative to gluster mount) */
- int dir_len = 0;
- /* absolute dir path length */
- int total_len = 0;
- struct dirent *entry = NULL;
- struct dirent scratch[2] = {{
- 0,
- }};
- DIR *dirp = NULL;
- int full_entry_length = 0;
- int entry_len = 0;
- char full_entry_path[4096] = {
- 0,
- };
- char full_dir_path[4096] = {
- 0,
- };
- ssize_t size = 0;
- uuid_t tmp_gfid;
- struct stat tmpbuf = {
- 0,
- };
- struct iatt iatt = {
- 0,
- };
-
- struct stat lstatbuf = {
- 0,
- };
- struct iatt stbuf = {
- 0,
- };
-
- conf = this->private;
- if (!conf) {
- ret = -1;
- goto out;
- }
-
- /*
- * Since the primary target for the following lookup is to figure out if the
- * entry still exists, going to do a direct stat call rather than going
- * through the whole gluster stack. There are some benefits of doing gluster
- * lookup, but this is redundant since we have done already one gluster
- * lookup in the parent function.
- *
- * Randomly selecting the first local subvol to read, since it is expected
- * that the directory structure is present in all the subvols identically
- */
-
- brick_len = strlen(defrag->local_brick_paths[0]);
- /* discarding the first "/" */
- dir_len = strlen(loc->path) - 1;
- /* Extra two: one for "/" at the end and one more for '\0'*/
- total_len = brick_len + dir_len + 2;
-
- snprintf(full_dir_path, total_len, "%s%s/", defrag->local_brick_paths[0],
- loc->path + 1);
-
- ret = sys_lstat(full_dir_path, &tmpbuf);
- if (ret == -1) {
- gf_log(this->name, GF_LOG_ERROR,
- "[absolutepath %s] directory "
- "not found, path %s error %d",
- full_dir_path, loc->path, errno);
- goto out;
- }
-
- dirp = sys_opendir(full_dir_path);
- if (!dirp) {
- ret = -1;
- gf_msg(this->name, GF_LOG_ERROR, errno, 0, "failed to open dir : %s",
- loc->path);
- if (conf->decommission_subvols_cnt) {
- defrag->total_failures++;
- }
- goto out;
- }
-
- while ((entry = sys_readdir(dirp, scratch)) != NULL) {
- if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
- ret = 1;
- goto out;
- }
- if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..") ||
- !strcmp(entry->d_name, ".glusterfs"))
- continue;
-
- /* TODO: Need to add a check for _DIRENT_HAVE_D_TYPE flag to fall back
- to stat in case d_type is not defined */
- if (entry->d_type != DT_DIR) {
- continue;
- }
-
- entry_len = strlen(entry->d_name);
- full_entry_length = total_len + entry_len + 1; /* one more for "/"*/
-
- snprintf(full_entry_path, full_entry_length, "%s%s/", full_dir_path,
- entry->d_name);
-
- size = sys_lgetxattr(full_entry_path, GFID_XATTR_KEY, tmp_gfid, 16);
- if (size != 16) {
- gf_log(this->name, GF_LOG_ERROR, "gfid not found, path %s",
- full_entry_path);
- continue;
- }
-
- loc_wipe(&entry_loc);
-
- ret = dht_build_child_loc(this, &entry_loc, loc, entry->d_name);
- if (ret) {
- gf_log(this->name, GF_LOG_ERROR,
- "Child loc"
- " build failed for entry: %s",
- entry->d_name);
-
- if (conf->decommission_in_progress) {
- defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
-
- goto out;
- } else {
- should_commit_hash = 0;
-
- continue;
- }
- }
-
- if (gf_uuid_is_null(tmp_gfid)) {
- gf_log(this->name, GF_LOG_ERROR,
- "%s/%s"
- " gfid not present",
- loc->path, entry->d_name);
- continue;
- }
-
- gf_uuid_copy(entry_loc.gfid, tmp_gfid);
-
- /*In case the gfid stored in the inode by inode_link
- *and the gfid obtained in the lookup differs, then
- *client3_3_lookup_cbk will return ESTALE and proper
- *error will be captured.
- */
- memset(&lstatbuf, 0, sizeof(struct stat));
- ret = sys_lstat(full_entry_path, &lstatbuf);
- if (ret == -1) {
- gf_msg(this->name, GF_LOG_ERROR, errno, 0, "lstat failed for %s",
- entry->d_name);
- }
-
- memset(&stbuf, 0, sizeof(struct iatt));
- iatt_from_stat(&stbuf, &lstatbuf);
- gf_uuid_copy(stbuf.ia_gfid, entry_loc.gfid);
- linked_inode = inode_link(entry_loc.inode, loc->inode, entry->d_name,
- &stbuf);
-
- inode = entry_loc.inode;
- entry_loc.inode = linked_inode;
- inode_unref(inode);
-
- if (gf_uuid_is_null(loc->gfid)) {
- gf_log(this->name, GF_LOG_ERROR,
- "%s/%s"
- " gfid not present",
- loc->path, entry->d_name);
- continue;
- }
-
- gf_uuid_copy(entry_loc.pargfid, loc->gfid);
-
- ret = syncop_lookup(this, &entry_loc, &iatt, NULL, NULL, NULL);
- if (ret) {
- if (-ret == ENOENT || -ret == ESTALE) {
- gf_msg(this->name, GF_LOG_INFO, -ret, DHT_MSG_DIR_LOOKUP_FAILED,
- "Dir:%s renamed or removed. "
- "Skipping",
- loc->path);
- ret = 0;
- if (conf->decommission_subvols_cnt) {
- defrag->total_failures++;
- }
- continue;
- } else {
- gf_msg(this->name, GF_LOG_ERROR, -ret,
- DHT_MSG_DIR_LOOKUP_FAILED, "lookup failed for:%s",
- entry_loc.path);
-
- defrag->total_failures++;
-
- if (conf->decommission_in_progress) {
- defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
- ret = -1;
- goto out;
- } else {
- should_commit_hash = 0;
- continue;
- }
- }
- }
-
- /* A return value of 2 means, either process_dir or
- * lookup of a dir failed. Hence, don't commit hash
- * for the current directory*/
-
- ret = gf_defrag_fix_layout_puredist(this, defrag, &entry_loc,
- fix_layout, migrate_data);
-
- if (defrag->defrag_status == GF_DEFRAG_STATUS_STOPPED) {
- goto out;
- }
-
- if (ret && ret != 2) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LAYOUT_FIX_FAILED,
- "Fix layout failed for %s", entry_loc.path);
-
- defrag->total_failures++;
-
- if (conf->decommission_in_progress) {
- defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
-
- goto out;
- } else {
- /* Let's not commit-hash if
- * gf_defrag_fix_layout failed*/
- continue;
- }
- }
- }
-
- ret = sys_closedir(dirp);
- if (ret) {
- gf_msg_debug(this->name, 0,
- "Failed to close dir %s. Reason :"
- " %s",
- full_dir_path, strerror(errno));
- ret = 0;
- }
-
- dirp = NULL;
-
- /* A directory layout is fixed only after its subdirs are healed to
- * any newly added bricks. If the layout is fixed before subdirs are
- * healed, the newly added brick will get a non-null layout.
- * Any subdirs which hash to that layout will no longer show up
- * in a directory listing until they are healed.
- */
-
- ret = syncop_setxattr(this, loc, fix_layout, 0, NULL, NULL);
-
- /* In case of a race where the directory is deleted just before
- * layout setxattr, the errors are updated in the layout structure.
- * We can use this information to make a decision whether the directory
- * is deleted entirely.
- */
- if (ret == 0) {
- ret = dht_dir_layout_error_check(this, loc->inode);
- ret = -ret;
- }
-
- if (ret) {
- if (-ret == ENOENT || -ret == ESTALE) {
- gf_msg(this->name, GF_LOG_INFO, -ret, DHT_MSG_LAYOUT_FIX_FAILED,
- "Setxattr failed. Dir %s "
- "renamed or removed",
- loc->path);
- if (conf->decommission_subvols_cnt) {
- defrag->total_failures++;
- }
- ret = 0;
- goto out;
- } else {
- gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LAYOUT_FIX_FAILED,
- "Setxattr failed for %s", loc->path);
-
- defrag->total_failures++;
-
- if (conf->decommission_in_progress) {
- defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
- ret = -1;
- goto out;
- }
- }
- }
-
- if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) &&
- (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX)) {
- ret = gf_defrag_process_dir(this, defrag, loc, migrate_data, &perrno);
-
- if (ret && (ret != 2)) {
- if (perrno == ENOENT || perrno == ESTALE) {
- ret = 0;
- goto out;
- } else {
- defrag->total_failures++;
-
- gf_msg(this->name, GF_LOG_ERROR, 0,
- DHT_MSG_DEFRAG_PROCESS_DIR_FAILED,
- "gf_defrag_process_dir failed for "
- "directory: %s",
- loc->path);
-
- if (conf->decommission_in_progress) {
- goto out;
- }
-
- should_commit_hash = 0;
- }
- } else if (ret == 2) {
- should_commit_hash = 0;
- }
- }
-
- gf_msg_trace(this->name, 0, "fix layout called on %s", loc->path);
-
- if (should_commit_hash &&
- gf_defrag_settle_hash(this, defrag, loc, fix_layout) != 0) {
- defrag->total_failures++;
-
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SETTLE_HASH_FAILED,
- "Settle hash failed for %s", loc->path);
-
- ret = -1;
-
- if (conf->decommission_in_progress) {
- defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
- goto out;
- }
- }
-
- ret = 0;
-out:
- loc_wipe(&entry_loc);
-
- if (fd)
- fd_unref(fd);
-
- if (ret == 0 && should_commit_hash == 0) {
- ret = 2;
- }
-
- if (dirp) {
- sys_closedir(dirp);
- }
-
return ret;
}
@@ -4419,31 +3901,26 @@ dht_init_local_subvols_and_nodeuuids(xlator_t *this, dht_conf_t *conf,
loc_t *loc)
{
dict_t *dict = NULL;
- gf_defrag_info_t *defrag = NULL;
uuid_t *uuid_ptr = NULL;
int ret = -1;
int i = 0;
int j = 0;
- defrag = conf->defrag;
-
- if (defrag->cmd != GF_DEFRAG_CMD_START_TIER) {
- /* Find local subvolumes */
- ret = syncop_getxattr(this, loc, &dict, GF_REBAL_FIND_LOCAL_SUBVOL,
- NULL, NULL);
- if (ret && (ret != -ENODATA)) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, 0,
- "local "
- "subvolume determination failed with error: %d",
- -ret);
- ret = -1;
- goto out;
- }
-
- if (!ret)
- goto out;
+ /* Find local subvolumes */
+ ret = syncop_getxattr(this, loc, &dict, GF_REBAL_FIND_LOCAL_SUBVOL, NULL,
+ NULL);
+ if (ret && (ret != -ENODATA)) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, 0,
+ "local "
+ "subvolume determination failed with error: %d",
+ -ret);
+ ret = -1;
+ goto out;
}
+ if (!ret)
+ goto out;
+
ret = syncop_getxattr(this, loc, &dict, GF_REBAL_OLD_FIND_LOCAL_SUBVOL,
NULL, NULL);
if (ret) {
@@ -4534,9 +4011,6 @@ dht_file_counter_thread(void *args)
struct timespec time_to_wait = {
0,
};
- struct timeval now = {
- 0,
- };
uint64_t tmp_size = 0;
if (!args)
@@ -4546,9 +4020,8 @@ dht_file_counter_thread(void *args)
dht_build_root_loc(defrag->root_inode, &root_loc);
while (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED) {
- gettimeofday(&now, NULL);
- time_to_wait.tv_sec = now.tv_sec + 600;
- time_to_wait.tv_nsec = 0;
+ timespec_now(&time_to_wait);
+ time_to_wait.tv_sec += 600;
pthread_mutex_lock(&defrag->fc_mutex);
pthread_cond_timedwait(&defrag->fc_wakeup_cond, &defrag->fc_mutex,
@@ -4621,7 +4094,7 @@ gf_defrag_estimates_init(xlator_t *this, loc_t *loc, pthread_t *filecnt_thread)
goto out;
}
- ret = gf_thread_create(filecnt_thread, NULL, &dht_file_counter_thread,
+ ret = gf_thread_create(filecnt_thread, NULL, dht_file_counter_thread,
(void *)defrag, "dhtfcnt");
if (ret) {
@@ -4678,7 +4151,7 @@ gf_defrag_parallel_migration_init(xlator_t *this, gf_defrag_info_t *defrag,
/*Spawn Threads Here*/
while (index < thread_spawn_count) {
- ret = gf_thread_create(&(tid[index]), NULL, &gf_defrag_task,
+ ret = gf_thread_create(&(tid[index]), NULL, gf_defrag_task,
(void *)defrag, "dhtmig%d", (index + 1) & 0x3ff);
if (ret != 0) {
gf_msg("DHT", GF_LOG_ERROR, ret, 0, "Thread[%d] creation failed. ",
@@ -4768,7 +4241,6 @@ gf_defrag_start_crawl(void *data)
pthread_t *tid = NULL;
pthread_t filecnt_thread;
gf_boolean_t fc_thread_started = _gf_false;
- int i = 0;
this = data;
if (!this)
@@ -4786,7 +4258,8 @@ gf_defrag_start_crawl(void *data)
if (!defrag)
goto exit;
- gettimeofday(&defrag->start_time, NULL);
+ defrag->start_time = gf_time();
+
dht_build_root_inode(this, &defrag->root_inode);
if (!defrag->root_inode)
goto out;
@@ -4903,12 +4376,6 @@ gf_defrag_start_crawl(void *data)
goto out;
}
- ret = dht_get_brick_paths(this, conf, &loc);
- if (ret) {
- gf_log(this->name, GF_LOG_WARNING, "could not get brick path");
- ret = 0;
- }
-
/* Initialise the structures required for parallel migration */
ret = gf_defrag_parallel_migration_init(this, defrag, &tid,
&thread_index);
@@ -4926,27 +4393,14 @@ gf_defrag_start_crawl(void *data)
}
}
- /* TODO: Need to introduce a flag to safely operate in the old way */
- if (defrag->operate_dist && defrag->is_pure_distribute) {
- ret = gf_defrag_fix_layout_puredist(this, defrag, &loc, fix_layout,
- migrate_data);
- if (ret && ret != 2) {
- defrag->total_failures++;
- ret = -1;
- goto out;
- }
- } else {
- ret = gf_defrag_fix_layout(this, defrag, &loc, fix_layout,
- migrate_data);
- if (ret && ret != 2) {
- defrag->total_failures++;
- ret = -1;
- goto out;
- }
+ ret = gf_defrag_fix_layout(this, defrag, &loc, fix_layout, migrate_data);
+ if (ret) {
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
}
- if (ret != 2 &&
- gf_defrag_settle_hash(this, defrag, &loc, fix_layout) != 0) {
+ if (gf_defrag_settle_hash(this, defrag, &loc, fix_layout) != 0) {
defrag->total_failures++;
ret = -1;
goto out;
@@ -4988,14 +4442,6 @@ out:
}
UNLOCK(&defrag->lock);
- for (i = 0; i < conf->local_subvols_cnt; i++) {
- if (defrag->local_brick_paths[i]) {
- GF_FREE(defrag->local_brick_paths[i]);
- }
- }
-
- GF_FREE(defrag->local_brick_paths);
-
GF_FREE(defrag);
conf->defrag = NULL;
@@ -5069,9 +4515,6 @@ gf_defrag_get_estimates_based_on_size(dht_conf_t *conf)
uint64_t total_processed = 0;
uint64_t tmp_count = 0;
uint64_t time_to_complete = 0;
- struct timeval now = {
- 0,
- };
double elapsed = 0;
defrag = conf->defrag;
@@ -5079,8 +4522,7 @@ gf_defrag_get_estimates_based_on_size(dht_conf_t *conf)
if (!g_totalsize)
goto out;
- gettimeofday(&now, NULL);
- elapsed = now.tv_sec - defrag->start_time.tv_sec;
+ elapsed = gf_time() - defrag->start_time;
/* Don't calculate the estimates for the first 10 minutes.
* It is unlikely to be accurate and estimates are not required
@@ -5130,13 +4572,8 @@ gf_defrag_status_get(dht_conf_t *conf, dict_t *dict)
uint64_t lookup = 0;
uint64_t failures = 0;
uint64_t skipped = 0;
- uint64_t promoted = 0;
- uint64_t demoted = 0;
char *status = "";
double elapsed = 0;
- struct timeval end = {
- 0,
- };
uint64_t time_to_complete = 0;
uint64_t time_left = 0;
gf_defrag_info_t *defrag = conf->defrag;
@@ -5153,17 +4590,12 @@ gf_defrag_status_get(dht_conf_t *conf, dict_t *dict)
lookup = defrag->num_files_lookedup;
failures = defrag->total_failures;
skipped = defrag->skipped;
- promoted = defrag->total_files_promoted;
- demoted = defrag->total_files_demoted;
- gettimeofday(&end, NULL);
-
- elapsed = end.tv_sec - defrag->start_time.tv_sec;
+ elapsed = gf_time() - defrag->start_time;
/* The rebalance is still in progress */
- if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) &&
- (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED)) {
+ if (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED) {
time_to_complete = gf_defrag_get_estimates_based_on_size(conf);
if (time_to_complete && (time_to_complete > elapsed))
@@ -5178,14 +4610,6 @@ gf_defrag_status_get(dht_conf_t *conf, dict_t *dict)
if (!dict)
goto log;
- ret = dict_set_uint64(dict, "promoted", promoted);
- if (ret)
- gf_log(THIS->name, GF_LOG_WARNING, "failed to set promoted count");
-
- ret = dict_set_uint64(dict, "demoted", demoted);
- if (ret)
- gf_log(THIS->name, GF_LOG_WARNING, "failed to set demoted count");
-
ret = dict_set_uint64(dict, "files", files);
if (ret)
gf_log(THIS->name, GF_LOG_WARNING, "failed to set file count");
diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c
index 1b6571cd43c..3e24065227c 100644
--- a/xlators/cluster/dht/src/dht-selfheal.c
+++ b/xlators/cluster/dht/src/dht-selfheal.c
@@ -1271,10 +1271,6 @@ dht_selfheal_dir_mkdir_lock_cbk(call_frame_t *frame, void *cookie,
local->call_cnt = conf->subvolume_cnt;
if (op_ret < 0) {
- /* We get this error when the directory entry was not created
- * on a newky attached tier subvol. Hence proceed and do mkdir
- * on the tier subvol.
- */
if (op_errno == EINVAL) {
local->call_cnt = 1;
dht_selfheal_dir_mkdir_lookup_done(frame, this);
@@ -1330,9 +1326,11 @@ dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout,
int ret = -1;
dht_local_t *local = NULL;
xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
local = frame->local;
this = frame->this;
+ conf = this->private;
local->selfheal.force_mkdir = force;
local->selfheal.hole_cnt = 0;
@@ -1372,15 +1370,44 @@ dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout,
return 0;
}
- if (local->hashed_subvol == NULL)
- local->hashed_subvol = dht_subvol_get_hashed(this, loc);
+ /* MDS xattr is populated only while DHT is having more than one
+ subvol.In case of graph switch while adding more dht subvols need to
+ consider hash subvol as a MDS to avoid MDS check failure at the time
+ of running fop on directory
+ */
+ if (!dict_get(local->xattr, conf->mds_xattr_key) &&
+ (conf->subvolume_cnt > 1)) {
+ if (local->hashed_subvol == NULL) {
+ local->hashed_subvol = dht_subvol_get_hashed(this, loc);
+ if (local->hashed_subvol == NULL) {
+ local->op_errno = EINVAL;
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED, "gfid=%s",
+ loc->pargfid, "name=%s", loc->name, "path=%s",
+ loc->path, NULL);
+ goto err;
+ }
+ }
+ ret = dht_inode_ctx_mdsvol_set(local->inode, this,
+ local->hashed_subvol);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED,
+ "Failed to set hashed subvol for %s on inode vol is %s",
+ local->loc.path,
+ local->hashed_subvol ? local->hashed_subvol->name : "NULL");
+ goto err;
+ }
+ }
if (local->hashed_subvol == NULL) {
- local->op_errno = EINVAL;
- gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
- DHT_MSG_HASHED_SUBVOL_GET_FAILED, "gfid=%s", loc->pargfid,
- "name=%s", loc->name, "path=%s", loc->path, NULL);
- goto err;
+ local->hashed_subvol = dht_subvol_get_hashed(this, loc);
+ if (local->hashed_subvol == NULL) {
+ local->op_errno = EINVAL;
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED, "gfid=%s", loc->pargfid,
+ "name=%s", loc->name, "path=%s", loc->path, NULL);
+ goto err;
+ }
}
local->current = &local->lock[0];
diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c
index 811bb55925f..bb72b0ffbb5 100644
--- a/xlators/cluster/dht/src/dht-shared.c
+++ b/xlators/cluster/dht/src/dht-shared.c
@@ -140,9 +140,9 @@ dht_priv_dump(xlator_t *this)
}
}
- if (conf->last_stat_fetch.tv_sec)
+ if (conf->last_stat_fetch)
gf_proc_dump_write("last_stat_fetch", "%s",
- ctime(&conf->last_stat_fetch.tv_sec));
+ ctime(&conf->last_stat_fetch));
UNLOCK(&conf->subvolume_lock);
@@ -537,6 +537,8 @@ gf_defrag_pattern_list_fill(xlator_t *this, gf_defrag_info_t *defrag,
pattern_str = strtok_r(data, ",", &tmp_str);
while (pattern_str) {
dup_str = gf_strdup(pattern_str);
+ if (!dup_str)
+ goto out;
pattern_list = GF_CALLOC(1, sizeof(gf_defrag_pattern_list_t), 1);
if (!pattern_list) {
goto out;
@@ -596,7 +598,6 @@ dht_init_methods(xlator_t *this)
methods = &(conf->methods);
methods->migration_get_dst_subvol = dht_migration_get_dst_subvol;
- methods->migration_needed = dht_migration_needed;
methods->migration_other = NULL;
methods->layout_search = dht_layout_search;
@@ -700,10 +701,6 @@ dht_init(xlator_t *this)
pthread_cond_init(&defrag->fc_wakeup_cond, 0);
defrag->global_error = 0;
-
- defrag->is_pure_distribute = _gf_false;
-
- defrag->operate_dist = _gf_true;
}
conf->use_fallocate = 1;
@@ -1049,84 +1046,6 @@ struct volume_options dht_options[] = {
/* NUFA option */
{.key = {"local-volume-name"}, .type = GF_OPTION_TYPE_XLATOR},
- /* tier options */
- {
- .key = {"tier-pause"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- },
-
- {
- .key = {"tier-promote-frequency"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "120",
- },
-
- {
- .key = {"tier-demote-frequency"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "3600",
- },
-
- {
- .key = {"write-freq-threshold"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "0",
- },
-
- {
- .key = {"read-freq-threshold"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "0",
- },
- {
- .key = {"watermark-hi"},
- .type = GF_OPTION_TYPE_PERCENT,
- .default_value = "90",
- },
- {
- .key = {"watermark-low"},
- .type = GF_OPTION_TYPE_PERCENT,
- .default_value = "75",
- },
- {
- .key = {"tier-mode"},
- .type = GF_OPTION_TYPE_STR,
- .default_value = "test",
- },
- {
- .key = {"tier-compact"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- },
- {.key = {"tier-hot-compact-frequency"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "604800",
- .description = "Frequency to compact DBs on hot tier in system"},
- {.key = {"tier-cold-compact-frequency"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "604800",
- .description = "Frequency to compact DBs on cold tier in system"},
- {
- .key = {"tier-max-mb"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "4000",
- },
- {
- .key = {"tier-max-promote-file-size"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "0",
- },
- {
- .key = {"tier-max-files"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "10000",
- },
- {
- .key = {"tier-query-limit"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "100",
- },
/* switch option */
{.key = {"pattern.switch.case"}, .type = GF_OPTION_TYPE_ANY},
diff --git a/xlators/cluster/dht/src/nufa.c b/xlators/cluster/dht/src/nufa.c
index 59313639c45..3648a564840 100644
--- a/xlators/cluster/dht/src/nufa.c
+++ b/xlators/cluster/dht/src/nufa.c
@@ -595,7 +595,6 @@ nufa_init(xlator_t *this)
dht_methods_t dht_methods = {
.migration_get_dst_subvol = dht_migration_get_dst_subvol,
- .migration_needed = dht_migration_needed,
.layout_search = dht_layout_search,
};
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
index 9abdcec3f78..b955efd8c2d 100644
--- a/xlators/cluster/ec/src/ec-common.c
+++ b/xlators/cluster/ec/src/ec-common.c
@@ -316,17 +316,19 @@ ec_check_status(ec_fop_data_t *fop)
}
}
- gf_msg(fop->xl->name, GF_LOG_WARNING, 0, EC_MSG_OP_FAIL_ON_SUBVOLS,
- "Operation failed on %d of %d subvolumes.(up=%s, mask=%s, "
- "remaining=%s, good=%s, bad=%s, %s)",
- gf_bits_count(ec->xl_up & ~(fop->remaining | fop->good)), ec->nodes,
- ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
- ec_bin(str2, sizeof(str2), fop->mask, ec->nodes),
- ec_bin(str3, sizeof(str3), fop->remaining, ec->nodes),
- ec_bin(str4, sizeof(str4), fop->good, ec->nodes),
- ec_bin(str5, sizeof(str5), ec->xl_up & ~(fop->remaining | fop->good),
- ec->nodes),
- ec_msg_str(fop));
+ gf_msg(
+ fop->xl->name, GF_LOG_WARNING, 0, EC_MSG_OP_FAIL_ON_SUBVOLS,
+ "Operation failed on %d of %d subvolumes.(up=%s, mask=%s, "
+ "remaining=%s, good=%s, bad=%s,"
+ "(Least significant bit represents first client/brick of subvol), %s)",
+ gf_bits_count(ec->xl_up & ~(fop->remaining | fop->good)), ec->nodes,
+ ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
+ ec_bin(str2, sizeof(str2), fop->mask, ec->nodes),
+ ec_bin(str3, sizeof(str3), fop->remaining, ec->nodes),
+ ec_bin(str4, sizeof(str4), fop->good, ec->nodes),
+ ec_bin(str5, sizeof(str5), ec->xl_up & ~(fop->remaining | fop->good),
+ ec->nodes),
+ ec_msg_str(fop));
if (fop->use_fd) {
if (fop->fd != NULL) {
ec_fheal(NULL, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, NULL,
@@ -614,10 +616,10 @@ ec_msg_str(ec_fop_data_t *fop)
loc_t *loc2 = NULL;
char gfid1[64] = {0};
char gfid2[64] = {0};
+ ec_fop_data_t *parent = fop->parent;
if (fop->errstr)
return fop->errstr;
-
if (!fop->use_fd) {
loc1 = &fop->loc[0];
loc2 = &fop->loc[1];
@@ -625,23 +627,45 @@ ec_msg_str(ec_fop_data_t *fop)
if (fop->id == GF_FOP_RENAME) {
gf_asprintf(&fop->errstr,
"FOP : '%s' failed on '%s' and '%s' with gfids "
- "%s and %s respectively",
+ "%s and %s respectively. Parent FOP: %s",
ec_fop_name(fop->id), loc1->path, loc2->path,
uuid_utoa_r(loc1->gfid, gfid1),
- uuid_utoa_r(loc2->gfid, gfid2));
+ uuid_utoa_r(loc2->gfid, gfid2),
+ parent ? ec_fop_name(parent->id) : "No Parent");
} else {
- gf_asprintf(&fop->errstr, "FOP : '%s' failed on '%s' with gfid %s",
- ec_fop_name(fop->id), loc1->path,
- uuid_utoa_r(loc1->gfid, gfid1));
+ gf_asprintf(
+ &fop->errstr,
+ "FOP : '%s' failed on '%s' with gfid %s. Parent FOP: %s",
+ ec_fop_name(fop->id), loc1->path,
+ uuid_utoa_r(loc1->gfid, gfid1),
+ parent ? ec_fop_name(parent->id) : "No Parent");
}
} else {
- gf_asprintf(&fop->errstr, "FOP : '%s' failed on gfid %s",
- ec_fop_name(fop->id),
- uuid_utoa_r(fop->fd->inode->gfid, gfid1));
+ gf_asprintf(
+ &fop->errstr, "FOP : '%s' failed on gfid %s. Parent FOP: %s",
+ ec_fop_name(fop->id), uuid_utoa_r(fop->fd->inode->gfid, gfid1),
+ parent ? ec_fop_name(parent->id) : "No Parent");
}
return fop->errstr;
}
+static void
+ec_log_insufficient_vol(ec_fop_data_t *fop, int32_t have, uint32_t need,
+ int32_t loglevel)
+{
+ ec_t *ec = fop->xl->private;
+ char str1[32], str2[32], str3[32];
+
+ gf_msg(ec->xl->name, loglevel, 0, EC_MSG_CHILDS_INSUFFICIENT,
+ "Insufficient available children for this request: "
+ "Have : %d, Need : %u : Child UP : %s "
+ "Mask: %s, Healing : %s : %s ",
+ have, need, ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
+ ec_bin(str2, sizeof(str2), fop->mask, ec->nodes),
+ ec_bin(str3, sizeof(str3), fop->healing, ec->nodes),
+ ec_msg_str(fop));
+}
+
static int32_t
ec_child_select(ec_fop_data_t *fop)
{
@@ -699,11 +723,7 @@ ec_child_select(ec_fop_data_t *fop)
ec_trace("SELECT", fop, "");
if ((num < fop->minimum) && (num < ec->fragments)) {
- gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_CHILDS_INSUFFICIENT,
- "Insufficient available children "
- "for this request (have %d, need "
- "%d). %s",
- num, fop->minimum, ec_msg_str(fop));
+ ec_log_insufficient_vol(fop, num, fop->minimum, GF_LOG_ERROR);
return 0;
}
@@ -711,11 +731,7 @@ ec_child_select(ec_fop_data_t *fop)
(fop->locks[0].update[EC_DATA_TXN] ||
fop->locks[0].update[EC_METADATA_TXN])) {
if (ec->quorum_count && (num < ec->quorum_count)) {
- gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_CHILDS_INSUFFICIENT,
- "Insufficient available children "
- "for this request (have %d, need "
- "%d). %s",
- num, ec->quorum_count, ec_msg_str(fop));
+ ec_log_insufficient_vol(fop, num, ec->quorum_count, GF_LOG_ERROR);
return 0;
}
}
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
index 89a433e5b91..7d991f04aac 100644
--- a/xlators/cluster/ec/src/ec-heal.c
+++ b/xlators/cluster/ec/src/ec-heal.c
@@ -2498,7 +2498,7 @@ out:
}
int
-ec_heal_set_dirty_without_lock(call_frame_t *frame, ec_t *ec, inode_t *inode)
+ec_heal_purge_stale_index(call_frame_t *frame, ec_t *ec, inode_t *inode)
{
int i = 0;
int ret = 0;
@@ -2528,7 +2528,6 @@ ec_heal_set_dirty_without_lock(call_frame_t *frame, ec_t *ec, inode_t *inode)
xattr[i] = dict;
on[i] = 1;
}
- dirty_xattr[EC_METADATA_TXN] = hton64(1);
ret = dict_set_static_bin(dict, EC_XATTR_DIRTY, dirty_xattr,
(sizeof(*dirty_xattr) * EC_VERSION_SIZE));
if (ret < 0) {
@@ -2629,13 +2628,11 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
gf_msg(ec->xl->name, GF_LOG_INFO, 0, EC_MSG_HEAL_FAIL,
"Index entry needs to be purged for: %s ",
uuid_utoa(loc->gfid));
- /* We need to send xattrop to set dirty flag so that it can be
- * healed and index entry could be removed. We need not to take lock
- * on this entry to do so as we are just setting dirty flag which
- * actually increases the trusted.ec.dirty count and does not set
- * the new value.
- * This will make sure that it is not interfering in other fops.*/
- ec_heal_set_dirty_without_lock(frame, ec, loc->inode);
+ /* We need to send zero-xattrop so that stale index entry could be
+ * removed. We need not take lock on this entry to do so as
+ * xattrop on a brick is atomic. */
+ ec_heal_purge_stale_index(frame, ec, loc->inode);
+ goto out;
} else if (need_heal == EC_HEAL_NONEED) {
gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL,
"Heal is not required for : %s ", uuid_utoa(loc->gfid));
diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c
index 63fe5d34e38..5c1586bc9c5 100644
--- a/xlators/cluster/ec/src/ec-heald.c
+++ b/xlators/cluster/ec/src/ec-heald.c
@@ -62,7 +62,7 @@ __ec_shd_healer_wait(struct subvol_healer *healer)
ec = healer->this->private;
disabled_loop:
- wait_till.tv_sec = time(NULL) + ec->shd.timeout;
+ wait_till.tv_sec = gf_time() + ec->shd.timeout;
while (!healer->rerun) {
ret = pthread_cond_timedwait(&healer->cond, &healer->mutex, &wait_till);
@@ -156,15 +156,58 @@ ec_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name)
return ret;
}
+static gf_boolean_t
+ec_is_heal_completed(char *status)
+{
+ char *bad_pos = NULL;
+ char *zero_pos = NULL;
+
+ if (!status) {
+ return _gf_false;
+ }
+
+ /*Logic:
+ * Status will be of the form Good: <binary>, Bad: <binary>
+ * If heal completes, if we do strchr for '0' it should be present after
+ * 'Bad:' i.e. strRchr for ':'
+ * */
+
+ zero_pos = strchr(status, '0');
+ bad_pos = strrchr(status, ':');
+ if (!zero_pos || !bad_pos) {
+ /*malformed status*/
+ return _gf_false;
+ }
+
+ if (zero_pos > bad_pos) {
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
+
int
ec_shd_selfheal(struct subvol_healer *healer, int child, loc_t *loc,
gf_boolean_t full)
{
dict_t *xdata = NULL;
+ dict_t *dict = NULL;
uint32_t count;
int32_t ret;
+ char *heal_status = NULL;
+ ec_t *ec = healer->this->private;
+
+ GF_ATOMIC_INC(ec->stats.shd.attempted);
+ ret = syncop_getxattr(healer->this, loc, &dict, EC_XATTR_HEAL, NULL,
+ &xdata);
+ if (ret == 0) {
+ if (dict && (dict_get_str(dict, EC_XATTR_HEAL, &heal_status) == 0)) {
+ if (ec_is_heal_completed(heal_status)) {
+ GF_ATOMIC_INC(ec->stats.shd.completed);
+ }
+ }
+ }
- ret = syncop_getxattr(healer->this, loc, NULL, EC_XATTR_HEAL, NULL, &xdata);
if (!full && (loc->inode->ia_type == IA_IFDIR)) {
/* If we have just healed a directory, it's possible that
* other index entries have appeared to be healed. */
@@ -183,6 +226,10 @@ ec_shd_selfheal(struct subvol_healer *healer, int child, loc_t *loc,
dict_unref(xdata);
}
+ if (dict) {
+ dict_unref(dict);
+ }
+
return ret;
}
diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
index 90da328e641..de9b89bb2c9 100644
--- a/xlators/cluster/ec/src/ec-types.h
+++ b/xlators/cluster/ec/src/ec-types.h
@@ -626,6 +626,11 @@ struct _ec_statistics {
requests. (Basically memory allocation
errors). */
} stripe_cache;
+ struct {
+ gf_atomic_t attempted; /*Number of heals attempted on
+ files/directories*/
+ gf_atomic_t completed; /*Number of heals complted on files/directories*/
+ } shd;
};
struct _ec {
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
index 66b4e634911..7344be4968d 100644
--- a/xlators/cluster/ec/src/ec.c
+++ b/xlators/cluster/ec/src/ec.c
@@ -325,13 +325,18 @@ ec_get_event_from_state(ec_t *ec)
void
ec_up(xlator_t *this, ec_t *ec)
{
+ char str1[32], str2[32];
+
if (ec->timer != NULL) {
gf_timer_call_cancel(this->ctx, ec->timer);
ec->timer = NULL;
}
ec->up = 1;
- gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_UP, "Going UP");
+ gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_UP,
+ "Going UP : Child UP = %s Child Notify = %s",
+ ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
+ ec_bin(str2, sizeof(str2), ec->xl_notify, ec->nodes));
gf_event(EVENT_EC_MIN_BRICKS_UP, "subvol=%s", this->name);
}
@@ -339,13 +344,18 @@ ec_up(xlator_t *this, ec_t *ec)
void
ec_down(xlator_t *this, ec_t *ec)
{
+ char str1[32], str2[32];
+
if (ec->timer != NULL) {
gf_timer_call_cancel(this->ctx, ec->timer);
ec->timer = NULL;
}
ec->up = 0;
- gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_DOWN, "Going DOWN");
+ gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_DOWN,
+ "Going DOWN : Child UP = %s Child Notify = %s",
+ ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
+ ec_bin(str2, sizeof(str2), ec->xl_notify, ec->nodes));
gf_event(EVENT_EC_MIN_BRICKS_NOT_UP, "subvol=%s", this->name);
}
@@ -700,6 +710,8 @@ ec_statistics_init(ec_t *ec)
GF_ATOMIC_INIT(ec->stats.stripe_cache.evicts, 0);
GF_ATOMIC_INIT(ec->stats.stripe_cache.allocs, 0);
GF_ATOMIC_INIT(ec->stats.stripe_cache.errors, 0);
+ GF_ATOMIC_INIT(ec->stats.shd.attempted, 0);
+ GF_ATOMIC_INIT(ec->stats.shd.completed, 0);
}
static int
@@ -1569,6 +1581,10 @@ ec_dump_private(xlator_t *this)
GF_ATOMIC_GET(ec->stats.stripe_cache.allocs));
gf_proc_dump_write("errors", "%" GF_PRI_ATOMIC,
GF_ATOMIC_GET(ec->stats.stripe_cache.errors));
+ gf_proc_dump_write("heals-attempted", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(ec->stats.shd.attempted));
+ gf_proc_dump_write("heals-completed", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(ec->stats.shd.completed));
return 0;
}
diff --git a/xlators/debug/error-gen/src/error-gen.c b/xlators/debug/error-gen/src/error-gen.c
index 0158e8d8546..d45655ef4c3 100644
--- a/xlators/debug/error-gen/src/error-gen.c
+++ b/xlators/debug/error-gen/src/error-gen.c
@@ -1507,8 +1507,8 @@ init(xlator_t *this)
this->private = pvt;
- /* Give some seed value here */
- srand(time(NULL));
+ /* Give some seed value here. */
+ srand(gf_time());
ret = 0;
out:
diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c
index 345dbe7e09c..aa00c446e5a 100644
--- a/xlators/debug/io-stats/src/io-stats.c
+++ b/xlators/debug/io-stats/src/io-stats.c
@@ -135,7 +135,7 @@ struct ios_global_stats {
gf_atomic_t block_count_read[IOS_BLOCK_COUNT_SIZE];
gf_atomic_t fop_hits[GF_FOP_MAXVALUE];
gf_atomic_t upcall_hits[GF_UPCALL_FLAGS_MAXVALUE];
- struct timeval started_at;
+ time_t started_at;
struct ios_lat latency[GF_FOP_MAXVALUE];
uint64_t nr_opens;
uint64_t max_nr_opens;
@@ -292,9 +292,7 @@ is_fop_latency_started(call_frame_t *frame)
begin = &frame->begin; \
end = &frame->end; \
\
- elapsed = ((end->tv_sec - begin->tv_sec) * 1e9 + \
- (end->tv_nsec - begin->tv_nsec)) / \
- 1000; \
+ elapsed = gf_tsdiff(begin, end) / 1000.0; \
throughput = op_ret / elapsed; \
\
conf = this->private; \
@@ -767,9 +765,8 @@ err:
int
io_stats_dump_global_to_json_logfp(xlator_t *this,
- struct ios_global_stats *stats,
- struct timeval *now, int interval,
- FILE *logfp)
+ struct ios_global_stats *stats, time_t now,
+ int interval, FILE *logfp)
{
int i = 0;
int j = 0;
@@ -795,10 +792,7 @@ io_stats_dump_global_to_json_logfp(xlator_t *this,
};
dict_t *xattr = NULL;
- interval_sec = ((now->tv_sec * 1000000.0 + now->tv_usec) -
- (stats->started_at.tv_sec * 1000000.0 +
- stats->started_at.tv_usec)) /
- 1000000.0;
+ interval_sec = (double)(now - stats->started_at);
conf = this->private;
@@ -950,8 +944,8 @@ io_stats_dump_global_to_json_logfp(xlator_t *this,
}
if (interval == -1) {
- ios_log(this, logfp, "\"%s.%s.uptime\": %" PRId64 ",", key_prefix,
- str_prefix, (uint64_t)(now->tv_sec - stats->started_at.tv_sec));
+ ios_log(this, logfp, "\"%s.%s.uptime\": %" PRIu64 ",", key_prefix,
+ str_prefix, (uint64_t)(now - stats->started_at));
ios_log(this, logfp,
"\"%s.%s.bytes_read\": "
"%" GF_PRI_ATOMIC ",",
@@ -1203,7 +1197,7 @@ out:
int
io_stats_dump_global_to_logfp(xlator_t *this, struct ios_global_stats *stats,
- struct timeval *now, int interval, FILE *logfp)
+ time_t now, int interval, FILE *logfp)
{
int i = 0;
int per_line = 0;
@@ -1226,8 +1220,8 @@ io_stats_dump_global_to_logfp(xlator_t *this, struct ios_global_stats *stats,
ios_log(this, logfp, "\n=== Cumulative stats ===");
else
ios_log(this, logfp, "\n=== Interval %d stats ===", interval);
- ios_log(this, logfp, " Duration : %" PRId64 " secs",
- (uint64_t)(now->tv_sec - stats->started_at.tv_sec));
+ ios_log(this, logfp, " Duration : %" PRIu64 " secs",
+ (uint64_t)(now - stats->started_at));
ios_log(this, logfp, " BytesRead : %" GF_PRI_ATOMIC,
GF_ATOMIC_GET(stats->data_read));
ios_log(this, logfp, " BytesWritten : %" GF_PRI_ATOMIC "\n",
@@ -1372,7 +1366,7 @@ io_stats_dump_global_to_logfp(xlator_t *this, struct ios_global_stats *stats,
int
io_stats_dump_global_to_dict(xlator_t *this, struct ios_global_stats *stats,
- struct timeval *now, int interval, dict_t *dict)
+ time_t now, int interval, dict_t *dict)
{
int ret = 0;
char key[64] = {0};
@@ -1398,7 +1392,7 @@ io_stats_dump_global_to_dict(xlator_t *this, struct ios_global_stats *stats,
interval);
snprintf(key, sizeof(key), "%d-duration", interval);
- sec = (uint64_t)(now->tv_sec - stats->started_at.tv_sec);
+ sec = now - stats->started_at;
ret = dict_set_uint64(dict, key, sec);
if (ret) {
gf_log(this->name, GF_LOG_ERROR,
@@ -1521,9 +1515,8 @@ out:
}
int
-io_stats_dump_global(xlator_t *this, struct ios_global_stats *stats,
- struct timeval *now, int interval,
- struct ios_dump_args *args)
+io_stats_dump_global(xlator_t *this, struct ios_global_stats *stats, time_t now,
+ int interval, struct ios_dump_args *args)
{
int ret = -1;
@@ -1581,13 +1574,13 @@ ios_dump_args_init(struct ios_dump_args *args, ios_dump_type_t type,
}
static void
-ios_global_stats_clear(struct ios_global_stats *stats, struct timeval *now)
+ios_global_stats_clear(struct ios_global_stats *stats, time_t now)
{
GF_ASSERT(stats);
GF_ASSERT(now);
memset(stats, 0, sizeof(*stats));
- stats->started_at = *now;
+ stats->started_at = now;
}
int
@@ -1598,7 +1591,7 @@ io_stats_dump(xlator_t *this, struct ios_dump_args *args, ios_info_op_t op,
struct ios_global_stats cumulative = {};
struct ios_global_stats incremental = {};
int increment = 0;
- struct timeval now;
+ time_t now = 0;
GF_ASSERT(this);
GF_ASSERT(args);
@@ -1606,8 +1599,8 @@ io_stats_dump(xlator_t *this, struct ios_dump_args *args, ios_info_op_t op,
GF_ASSERT(args->type < IOS_DUMP_TYPE_MAX);
conf = this->private;
+ now = gf_time();
- gettimeofday(&now, NULL);
LOCK(&conf->lock);
{
if (op == GF_IOS_INFO_ALL || op == GF_IOS_INFO_CUMULATIVE)
@@ -1620,17 +1613,17 @@ io_stats_dump(xlator_t *this, struct ios_dump_args *args, ios_info_op_t op,
if (!is_peek) {
increment = conf->increment++;
- ios_global_stats_clear(&conf->incremental, &now);
+ ios_global_stats_clear(&conf->incremental, now);
}
}
}
UNLOCK(&conf->lock);
if (op == GF_IOS_INFO_ALL || op == GF_IOS_INFO_CUMULATIVE)
- io_stats_dump_global(this, &cumulative, &now, -1, args);
+ io_stats_dump_global(this, &cumulative, now, -1, args);
if (op == GF_IOS_INFO_ALL || op == GF_IOS_INFO_INCREMENTAL)
- io_stats_dump_global(this, &incremental, &now, increment, args);
+ io_stats_dump_global(this, &incremental, now, increment, args);
return 0;
}
@@ -1640,9 +1633,8 @@ io_stats_dump_fd(xlator_t *this, struct ios_fd *iosfd)
{
struct ios_conf *conf = NULL;
struct timeval now;
- uint64_t sec = 0;
- uint64_t usec = 0;
int i = 0;
+ double usecs = 0;
uint64_t data_read = 0;
uint64_t data_written = 0;
uint64_t block_count_read = 0;
@@ -1657,23 +1649,15 @@ io_stats_dump_fd(xlator_t *this, struct ios_fd *iosfd)
return 0;
gettimeofday(&now, NULL);
-
- if (iosfd->opened_at.tv_usec > now.tv_usec) {
- now.tv_usec += 1000000;
- now.tv_usec--;
- }
-
- sec = now.tv_sec - iosfd->opened_at.tv_sec;
- usec = now.tv_usec - iosfd->opened_at.tv_usec;
+ usecs = gf_tvdiff(&iosfd->opened_at, &now);
gf_log(this->name, GF_LOG_INFO, "--- fd stats ---");
if (iosfd->filename)
gf_log(this->name, GF_LOG_INFO, " Filename : %s", iosfd->filename);
- if (sec)
- gf_log(this->name, GF_LOG_INFO,
- " Lifetime : %" PRId64 "secs, %" PRId64 "usecs", sec, usec);
+ if (usecs)
+ gf_log(this->name, GF_LOG_INFO, " Lifetime : %lf secs", usecs);
data_read = GF_ATOMIC_GET(iosfd->data_read);
if (data_read)
@@ -1776,9 +1760,7 @@ update_ios_latency(struct ios_conf *conf, call_frame_t *frame,
begin = &frame->begin;
end = &frame->end;
- elapsed = ((end->tv_sec - begin->tv_sec) * 1e9 +
- (end->tv_nsec - begin->tv_nsec)) /
- 1000;
+ elapsed = gf_tsdiff(begin, end) / 1000.0;
update_ios_latency_stats(&conf->cumulative, elapsed, op);
update_ios_latency_stats(&conf->incremental, elapsed, op);
@@ -3592,26 +3574,21 @@ ios_destroy_top_stats(struct ios_conf *conf)
return;
}
-static int
+static void
io_stats_clear(struct ios_conf *conf)
{
- struct timeval now;
- int ret = -1;
+ time_t now = 0;
GF_ASSERT(conf);
+ now = gf_time();
- if (!gettimeofday(&now, NULL)) {
- LOCK(&conf->lock);
- {
- ios_global_stats_clear(&conf->cumulative, &now);
- ios_global_stats_clear(&conf->incremental, &now);
- conf->increment = 0;
- }
- UNLOCK(&conf->lock);
- ret = 0;
+ LOCK(&conf->lock);
+ {
+ ios_global_stats_clear(&conf->cumulative, now);
+ ios_global_stats_clear(&conf->incremental, now);
+ conf->increment = 0;
}
-
- return ret;
+ UNLOCK(&conf->lock);
}
int32_t
@@ -3852,7 +3829,7 @@ ios_conf_destroy(struct ios_conf *conf)
_ios_destroy_dump_thread(conf);
ios_destroy_sample_buf(conf->ios_sample_buf);
LOCK_DESTROY(&conf->lock);
- GF_FREE(conf->dnscache);
+ gf_dnscache_deinit(conf->dnscache);
GF_FREE(conf);
}
@@ -3875,7 +3852,7 @@ ios_init_stats(struct ios_global_stats *stats)
for (i = 0; i < GF_UPCALL_FLAGS_MAXVALUE; i++)
GF_ATOMIC_INIT(stats->upcall_hits[i], 0);
- gettimeofday(&stats->started_at, NULL);
+ stats->started_at = gf_time();
}
int
@@ -3964,11 +3941,14 @@ init(xlator_t *this)
gf_log(this->name, GF_LOG_ERROR, "Out of memory.");
goto out;
}
- ret = -1;
GF_OPTION_INIT("ios-dnscache-ttl-sec", conf->ios_dnscache_ttl_sec, int32,
out);
conf->dnscache = gf_dnscache_init(conf->ios_dnscache_ttl_sec);
+ if (!conf->dnscache) {
+ ret = -1;
+ goto out;
+ }
GF_OPTION_INIT("sys-log-level", sys_log_str, str, out);
if (sys_log_str) {
@@ -4119,12 +4099,9 @@ notify(xlator_t *this, int32_t event, void *data, ...)
}
if (GF_IOS_INFO_CLEAR == op) {
- ret = io_stats_clear(this->private);
- if (ret)
- gf_log(this->name, GF_LOG_ERROR,
- "Failed to clear info stats");
+ io_stats_clear(this->private);
- ret = dict_set_int32(output, "stats-cleared", ret ? 0 : 1);
+ ret = dict_set_int32(output, "stats-cleared", 1);
if (ret)
gf_log(this->name, GF_LOG_ERROR,
"Failed to set stats-cleared"
diff --git a/xlators/features/Makefile.am b/xlators/features/Makefile.am
index 194634b003d..c57897f11ea 100644
--- a/xlators/features/Makefile.am
+++ b/xlators/features/Makefile.am
@@ -2,9 +2,13 @@ if BUILD_CLOUDSYNC
CLOUDSYNC_DIR = cloudsync
endif
+if BUILD_METADISP
+ METADISP_DIR = metadisp
+endif
+
SUBDIRS = locks quota read-only quiesce marker index barrier arbiter upcall \
compress changelog gfid-access snapview-client snapview-server trash \
shard bit-rot leases selinux sdfs namespace $(CLOUDSYNC_DIR) thin-arbiter \
- utime
+ utime $(METADISP_DIR)
CLEANFILES =
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c
index 34e20f9df11..5cef2ffa5e5 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c
@@ -40,21 +40,21 @@ br_inc_scrubbed_file(br_scrub_stats_t *scrub_stat)
}
void
-br_update_scrub_start_time(br_scrub_stats_t *scrub_stat, struct timeval *tv)
+br_update_scrub_start_time(br_scrub_stats_t *scrub_stat, time_t time)
{
if (!scrub_stat)
return;
pthread_mutex_lock(&scrub_stat->lock);
{
- scrub_stat->scrub_start_tv.tv_sec = tv->tv_sec;
+ scrub_stat->scrub_start_time = time;
}
pthread_mutex_unlock(&scrub_stat->lock);
}
void
br_update_scrub_finish_time(br_scrub_stats_t *scrub_stat, char *timestr,
- struct timeval *tv)
+ time_t time)
{
int lst_size = 0;
@@ -67,10 +67,10 @@ br_update_scrub_finish_time(br_scrub_stats_t *scrub_stat, char *timestr,
pthread_mutex_lock(&scrub_stat->lock);
{
- scrub_stat->scrub_end_tv.tv_sec = tv->tv_sec;
+ scrub_stat->scrub_end_time = time;
- scrub_stat->scrub_duration = scrub_stat->scrub_end_tv.tv_sec -
- scrub_stat->scrub_start_tv.tv_sec;
+ scrub_stat->scrub_duration = scrub_stat->scrub_end_time -
+ scrub_stat->scrub_start_time;
snprintf(scrub_stat->last_scrub_time, lst_size, "%s", timestr);
}
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h
index 24128b90a66..f022aa831eb 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h
@@ -15,20 +15,22 @@
#include <sys/time.h>
#include <pthread.h>
+#include <glusterfs/common-utils.h>
+
struct br_scrub_stats {
- uint64_t scrubbed_files; /* Total number of scrubbed file */
+ uint64_t scrubbed_files; /* Total number of scrubbed files. */
- uint64_t unsigned_files; /* Total number of unsigned file */
+ uint64_t unsigned_files; /* Total number of unsigned files. */
- uint64_t scrub_duration; /* Duration of last scrub */
+ uint64_t scrub_duration; /* Duration of last scrub. */
- char last_scrub_time[1024]; /*last scrub completion time */
+ char last_scrub_time[GF_TIMESTR_SIZE]; /* Last scrub completion time. */
- struct timeval scrub_start_tv; /* Scrubbing starting time*/
+ time_t scrub_start_time; /* Scrubbing starting time. */
- struct timeval scrub_end_tv; /* Scrubbing finishing time */
+ time_t scrub_end_time; /* Scrubbing finishing time. */
- int8_t scrub_running; /* Scrub running or not */
+ int8_t scrub_running; /* Whether scrub running or not. */
pthread_mutex_t lock;
};
@@ -40,9 +42,9 @@ br_inc_unsigned_file_count(br_scrub_stats_t *scrub_stat);
void
br_inc_scrubbed_file(br_scrub_stats_t *scrub_stat);
void
-br_update_scrub_start_time(br_scrub_stats_t *scrub_stat, struct timeval *tv);
+br_update_scrub_start_time(br_scrub_stats_t *scrub_stat, time_t time);
void
br_update_scrub_finish_time(br_scrub_stats_t *scrub_stat, char *timestr,
- struct timeval *tv);
+ time_t time);
#endif /* __BIT_ROT_SCRUB_STATUS_H__ */
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
index c4607654365..289dd53f610 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
@@ -604,22 +604,20 @@ br_scrubber_log_time(xlator_t *this, const char *sfx)
char timestr[GF_TIMESTR_SIZE] = {
0,
};
- struct timeval tv = {
- 0,
- };
br_private_t *priv = NULL;
+ time_t now = 0;
+ now = gf_time();
priv = this->private;
- gettimeofday(&tv, NULL);
- gf_time_fmt(timestr, sizeof(timestr), tv.tv_sec, gf_timefmt_FT);
+ gf_time_fmt(timestr, sizeof(timestr), now, gf_timefmt_FT);
if (strcasecmp(sfx, "started") == 0) {
- br_update_scrub_start_time(&priv->scrub_stat, &tv);
+ br_update_scrub_start_time(&priv->scrub_stat, now);
gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_START,
"Scrubbing %s at %s", sfx, timestr);
} else {
- br_update_scrub_finish_time(&priv->scrub_stat, timestr, &tv);
+ br_update_scrub_finish_time(&priv->scrub_stat, timestr, now);
gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_FINISH,
"Scrubbing %s at %s", sfx, timestr);
}
@@ -631,12 +629,10 @@ br_fsscanner_log_time(xlator_t *this, br_child_t *child, const char *sfx)
char timestr[GF_TIMESTR_SIZE] = {
0,
};
- struct timeval tv = {
- 0,
- };
+ time_t now = 0;
- gettimeofday(&tv, NULL);
- gf_time_fmt(timestr, sizeof(timestr), tv.tv_sec, gf_timefmt_FT);
+ now = gf_time();
+ gf_time_fmt(timestr, sizeof(timestr), now, gf_timefmt_FT);
if (strcasecmp(sfx, "started") == 0) {
gf_msg_debug(this->name, 0, "Scrubbing \"%s\" %s at %s",
@@ -919,9 +915,6 @@ br_fsscan_schedule(xlator_t *this)
{
uint32_t timo = 0;
br_private_t *priv = NULL;
- struct timeval tv = {
- 0,
- };
char timestr[GF_TIMESTR_SIZE] = {
0,
};
@@ -933,8 +926,7 @@ br_fsscan_schedule(xlator_t *this)
fsscrub = &priv->fsscrub;
scrub_monitor = &priv->scrub_monitor;
- (void)gettimeofday(&tv, NULL);
- scrub_monitor->boot = tv.tv_sec;
+ scrub_monitor->boot = gf_time();
timo = br_fsscan_calculate_timeout(fsscrub->frequency);
if (timo == 0) {
@@ -978,9 +970,7 @@ br_fsscan_activate(xlator_t *this)
char timestr[GF_TIMESTR_SIZE] = {
0,
};
- struct timeval now = {
- 0,
- };
+ time_t now = 0;
br_private_t *priv = NULL;
struct br_scrubber *fsscrub = NULL;
struct br_monitor *scrub_monitor = NULL;
@@ -989,7 +979,7 @@ br_fsscan_activate(xlator_t *this)
fsscrub = &priv->fsscrub;
scrub_monitor = &priv->scrub_monitor;
- (void)gettimeofday(&now, NULL);
+ now = gf_time();
timo = br_fsscan_calculate_timeout(fsscrub->frequency);
if (timo == 0) {
gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_ZERO_TIMEOUT_BUG,
@@ -1003,7 +993,7 @@ br_fsscan_activate(xlator_t *this)
}
pthread_mutex_unlock(&scrub_monitor->donelock);
- gf_time_fmt(timestr, sizeof(timestr), (now.tv_sec + timo), gf_timefmt_FT);
+ gf_time_fmt(timestr, sizeof(timestr), now + timo, gf_timefmt_FT);
(void)gf_tw_mod_timer(priv->timer_wheel, scrub_monitor->timer, timo);
_br_monitor_set_scrub_state(scrub_monitor, BR_SCRUB_STATE_PENDING);
@@ -1023,9 +1013,7 @@ br_fsscan_reschedule(xlator_t *this)
char timestr[GF_TIMESTR_SIZE] = {
0,
};
- struct timeval now = {
- 0,
- };
+ time_t now = 0;
br_private_t *priv = NULL;
struct br_scrubber *fsscrub = NULL;
struct br_monitor *scrub_monitor = NULL;
@@ -1037,7 +1025,7 @@ br_fsscan_reschedule(xlator_t *this)
if (!fsscrub->frequency_reconf)
return 0;
- (void)gettimeofday(&now, NULL);
+ now = gf_time();
timo = br_fsscan_calculate_timeout(fsscrub->frequency);
if (timo == 0) {
gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_ZERO_TIMEOUT_BUG,
@@ -1045,7 +1033,7 @@ br_fsscan_reschedule(xlator_t *this)
return -1;
}
- gf_time_fmt(timestr, sizeof(timestr), (now.tv_sec + timo), gf_timefmt_FT);
+ gf_time_fmt(timestr, sizeof(timestr), now + timo, gf_timefmt_FT);
pthread_mutex_lock(&scrub_monitor->donelock);
{
@@ -1076,20 +1064,16 @@ br_fsscan_ondemand(xlator_t *this)
char timestr[GF_TIMESTR_SIZE] = {
0,
};
- struct timeval now = {
- 0,
- };
+ time_t now = 0;
br_private_t *priv = NULL;
struct br_monitor *scrub_monitor = NULL;
priv = this->private;
scrub_monitor = &priv->scrub_monitor;
- (void)gettimeofday(&now, NULL);
-
+ now = gf_time();
timo = BR_SCRUB_ONDEMAND;
-
- gf_time_fmt(timestr, sizeof(timestr), (now.tv_sec + timo), gf_timefmt_FT);
+ gf_time_fmt(timestr, sizeof(timestr), now + timo, gf_timefmt_FT);
pthread_mutex_lock(&scrub_monitor->donelock);
{
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
index 8d2b7f051da..6c15a166f18 100644
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
@@ -44,7 +44,8 @@ GLFS_MSGID(BITROT_STUB, BRS_MSG_NO_MEMORY, BRS_MSG_SET_EVENT_FAILED,
BRS_MSG_NON_BITD_PID, BRS_MSG_SIGN_PREPARE_FAIL,
BRS_MSG_USING_DEFAULT_THREAD_SIZE, BRS_MSG_ALLOC_MEM_FAILED,
BRS_MSG_DICT_ALLOC_FAILED, BRS_MSG_CREATE_GF_DIRENT_FAILED,
- BRS_MSG_ALLOC_FAILED, BRS_MSG_PATH_XATTR_GET_FAILED);
+ BRS_MSG_ALLOC_FAILED, BRS_MSG_PATH_XATTR_GET_FAILED,
+ BRS_MSG_VERSION_PREPARE_FAIL);
#define BRS_MSG_MEM_ACNT_FAILED_STR "Memory accounting init failed"
#define BRS_MSG_BAD_OBJ_THREAD_FAIL_STR "pthread_init failed"
@@ -68,6 +69,8 @@ GLFS_MSGID(BITROT_STUB, BRS_MSG_NO_MEMORY, BRS_MSG_SET_EVENT_FAILED,
"daemon. Unwinding the fop"
#define BRS_MSG_SIGN_PREPARE_FAIL_STR \
"failed to prepare the signature. Unwinding the fop"
+#define BRS_MSG_VERSION_PREPARE_FAIL_STR \
+ "failed to prepare the version. Unwinding the fop"
#define BRS_MSG_STUB_ALLOC_FAILED_STR "failed to allocate stub fop, Unwinding"
#define BRS_MSG_BAD_OBJ_MARK_FAIL_STR "failed to mark object as bad"
#define BRS_MSG_NON_SCRUB_BAD_OBJ_MARK_STR \
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
index 605a5e4c3e4..447dd47ff41 100644
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
@@ -424,8 +424,8 @@ br_stub_prepare_version_request(xlator_t *this, dict_t *dict,
priv = this->private;
br_set_ongoingversion(obuf, oversion, priv->boot);
- return dict_set_static_bin(dict, BITROT_CURRENT_VERSION_KEY, (void *)obuf,
- sizeof(br_version_t));
+ return dict_set_bin(dict, BITROT_CURRENT_VERSION_KEY, (void *)obuf,
+ sizeof(br_version_t));
}
static int
@@ -436,8 +436,7 @@ br_stub_prepare_signing_request(dict_t *dict, br_signature_t *sbuf,
br_set_signature(sbuf, sign, signaturelen, &size);
- return dict_set_static_bin(dict, BITROT_SIGNING_VERSION_KEY, (void *)sbuf,
- size);
+ return dict_set_bin(dict, BITROT_SIGNING_VERSION_KEY, (void *)sbuf, size);
}
/**
@@ -854,23 +853,27 @@ br_stub_perform_incversioning(xlator_t *this, call_frame_t *frame,
op_errno = ENOMEM;
dict = dict_new();
if (!dict)
- goto done;
+ goto out;
ret = br_stub_alloc_versions(&obuf, NULL, 0);
- if (ret)
- goto dealloc_dict;
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ALLOC_MEM_FAILED,
+ "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+ goto out;
+ }
ret = br_stub_prepare_version_request(this, dict, obuf, writeback_version);
- if (ret)
- goto dealloc_versions;
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_VERSION_PREPARE_FAIL,
+ "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+ br_stub_dealloc_versions(obuf);
+ goto out;
+ }
ret = br_stub_fd_versioning(
this, frame, stub, dict, fd, br_stub_fd_incversioning_cbk,
writeback_version, BR_STUB_INCREMENTAL_VERSIONING, !WRITEBACK_DURABLE);
-
-dealloc_versions:
- br_stub_dealloc_versions(obuf);
-dealloc_dict:
- dict_unref(dict);
-done:
+out:
+ if (dict)
+ dict_unref(dict);
if (ret) {
if (local)
frame->local = NULL;
@@ -1025,31 +1028,36 @@ static int
br_stub_prepare_signature(xlator_t *this, dict_t *dict, inode_t *inode,
br_isignature_t *sign, int *fakesuccess)
{
- int32_t ret = 0;
+ int32_t ret = -1;
size_t signaturelen = 0;
br_signature_t *sbuf = NULL;
if (!br_is_signature_type_valid(sign->signaturetype))
- goto error_return;
+ goto out;
signaturelen = sign->signaturelen;
ret = br_stub_alloc_versions(NULL, &sbuf, signaturelen);
- if (ret)
- goto error_return;
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ALLOC_MEM_FAILED,
+ "gfid=%s", uuid_utoa(inode->gfid), NULL);
+ ret = -1;
+ goto out;
+ }
ret = br_stub_prepare_signing_request(dict, sbuf, sign, signaturelen);
- if (ret)
- goto dealloc_versions;
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_SIGN_PREPARE_FAIL,
+ "gfid=%s", uuid_utoa(inode->gfid), NULL);
+ ret = -1;
+ br_stub_dealloc_versions(sbuf);
+ goto out;
+ }
+ /* At this point sbuf has been added to dict, so the memory will be freed
+ * when the data from the dict is destroyed
+ */
ret = br_stub_compare_sign_version(this, inode, sbuf, dict, fakesuccess);
- if (ret)
- goto dealloc_versions;
-
- return 0;
-
-dealloc_versions:
- br_stub_dealloc_versions(sbuf);
-error_return:
- return -1;
+out:
+ return ret;
}
static void
diff --git a/xlators/features/changelog/src/changelog-helpers.c b/xlators/features/changelog/src/changelog-helpers.c
index 71fe1f032a0..e561997d858 100644
--- a/xlators/features/changelog/src/changelog-helpers.c
+++ b/xlators/features/changelog/src/changelog-helpers.c
@@ -242,8 +242,7 @@ changelog_write(int fd, char *buffer, size_t len)
}
int
-htime_update(xlator_t *this, changelog_priv_t *priv, unsigned long ts,
- char *buffer)
+htime_update(xlator_t *this, changelog_priv_t *priv, time_t ts, char *buffer)
{
char changelog_path[PATH_MAX + 1] = {
0,
@@ -273,7 +272,7 @@ htime_update(xlator_t *this, changelog_priv_t *priv, unsigned long ts,
goto out;
}
- len = snprintf(x_value, sizeof(x_value), "%lu:%d", ts,
+ len = snprintf(x_value, sizeof(x_value), "%ld:%d", ts,
priv->rollover_count);
if (len >= sizeof(x_value)) {
ret = -1;
@@ -382,8 +381,7 @@ out:
}
static int
-changelog_rollover_changelog(xlator_t *this, changelog_priv_t *priv,
- unsigned long ts)
+changelog_rollover_changelog(xlator_t *this, changelog_priv_t *priv, time_t ts)
{
int ret = -1;
int notify = 0;
@@ -421,16 +419,14 @@ changelog_rollover_changelog(xlator_t *this, changelog_priv_t *priv,
priv->changelog_fd = -1;
}
- time_t time = (time_t)ts;
-
- /* Get GMT time */
- gmt = gmtime(&time);
+ /* Get GMT time. */
+ gmt = gmtime(&ts);
strftime(yyyymmdd, sizeof(yyyymmdd), "%Y/%m/%d", gmt);
(void)snprintf(ofile, PATH_MAX, "%s/" CHANGELOG_FILE_NAME,
priv->changelog_dir);
- (void)snprintf(nfile, PATH_MAX, "%s/%s/" CHANGELOG_FILE_NAME ".%lu",
+ (void)snprintf(nfile, PATH_MAX, "%s/%s/" CHANGELOG_FILE_NAME ".%ld",
priv->changelog_dir, yyyymmdd, ts);
(void)snprintf(nfile_dir, PATH_MAX, "%s/%s", priv->changelog_dir, yyyymmdd);
@@ -593,7 +589,7 @@ out:
* returns -1 on failure or error
*/
int
-htime_open(xlator_t *this, changelog_priv_t *priv, unsigned long ts)
+htime_open(xlator_t *this, changelog_priv_t *priv, time_t ts)
{
int ht_file_fd = -1;
int ht_dir_fd = -1;
@@ -723,7 +719,7 @@ out:
* returns -1 on failure or error
*/
int
-htime_create(xlator_t *this, changelog_priv_t *priv, unsigned long ts)
+htime_create(xlator_t *this, changelog_priv_t *priv, time_t ts)
{
int ht_file_fd = -1;
int ht_dir_fd = -1;
@@ -741,12 +737,12 @@ htime_create(xlator_t *this, changelog_priv_t *priv, unsigned long ts)
int32_t len = 0;
gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_NEW_HTIME_FILE,
- "name=%lu", ts, NULL);
+ "name=%ld", ts, NULL);
CHANGELOG_FILL_HTIME_DIR(priv->changelog_dir, ht_dir_path);
/* get the htime file name in ht_file_path */
- len = snprintf(ht_file_path, PATH_MAX, "%s/%s.%lu", ht_dir_path,
+ len = snprintf(ht_file_path, PATH_MAX, "%s/%s.%ld", ht_dir_path,
HTIME_FILE_NAME, ts);
if ((len < 0) || (len >= PATH_MAX)) {
ret = -1;
@@ -792,7 +788,7 @@ htime_create(xlator_t *this, changelog_priv_t *priv, unsigned long ts)
goto out;
}
- (void)snprintf(ht_file_bname, sizeof(ht_file_bname), "%s.%lu",
+ (void)snprintf(ht_file_bname, sizeof(ht_file_bname), "%s.%ld",
HTIME_FILE_NAME, ts);
if (sys_fsetxattr(ht_dir_fd, HTIME_CURRENT, ht_file_bname,
strlen(ht_file_bname), 0)) {
@@ -963,8 +959,8 @@ out:
}
int
-changelog_start_next_change(xlator_t *this, changelog_priv_t *priv,
- unsigned long ts, gf_boolean_t finale)
+changelog_start_next_change(xlator_t *this, changelog_priv_t *priv, time_t ts,
+ gf_boolean_t finale)
{
int ret = -1;
@@ -985,21 +981,12 @@ changelog_entry_length()
return sizeof(changelog_log_data_t);
}
-int
+void
changelog_fill_rollover_data(changelog_log_data_t *cld, gf_boolean_t is_last)
{
- struct timeval tv = {
- 0,
- };
-
cld->cld_type = CHANGELOG_TYPE_ROLLOVER;
-
- if (gettimeofday(&tv, NULL))
- return -1;
-
- cld->cld_roll_time = (unsigned long)tv.tv_sec;
+ cld->cld_roll_time = gf_time();
cld->cld_finale = is_last;
- return 0;
}
int
@@ -1274,7 +1261,7 @@ changelog_rollover(void *data)
while (1) {
(void)pthread_testcancel();
- tv.tv_sec = time(NULL) + priv->rollover_time;
+ tv.tv_sec = gf_time() + priv->rollover_time;
tv.tv_nsec = 0;
ret = 0; /* Reset ret to zero */
@@ -1355,12 +1342,7 @@ changelog_rollover(void *data)
if (priv->explicit_rollover == _gf_true)
sleep(1);
- ret = changelog_fill_rollover_data(&cld, _gf_false);
- if (ret) {
- gf_smsg(this->name, GF_LOG_ERROR, 0,
- CHANGELOG_MSG_ROLLOVER_DATA_FILL_FAILED, NULL);
- continue;
- }
+ changelog_fill_rollover_data(&cld, _gf_false);
_mask_cancellation();
diff --git a/xlators/features/changelog/src/changelog-helpers.h b/xlators/features/changelog/src/changelog-helpers.h
index 0d06d98c9e1..38fa7590c32 100644
--- a/xlators/features/changelog/src/changelog-helpers.h
+++ b/xlators/features/changelog/src/changelog-helpers.h
@@ -31,7 +31,7 @@
*/
typedef struct changelog_log_data {
/* rollover related */
- unsigned long cld_roll_time;
+ time_t cld_roll_time;
/* reopen changelog? */
gf_boolean_t cld_finale;
@@ -97,12 +97,6 @@ struct changelog_encoder {
typedef struct changelog_time_slice {
/**
- * just in case we need nanosecond granularity some day.
- * field is unused as of now (maybe we'd need it later).
- */
- struct timeval tv_start;
-
- /**
* version of changelog file, incremented each time changes
* rollover.
*/
@@ -423,11 +417,11 @@ changelog_local_t *
changelog_local_init(xlator_t *this, inode_t *inode, uuid_t gfid,
int xtra_records, gf_boolean_t update_flag);
int
-changelog_start_next_change(xlator_t *this, changelog_priv_t *priv,
- unsigned long ts, gf_boolean_t finale);
+changelog_start_next_change(xlator_t *this, changelog_priv_t *priv, time_t ts,
+ gf_boolean_t finale);
int
changelog_open_journal(xlator_t *this, changelog_priv_t *priv);
-int
+void
changelog_fill_rollover_data(changelog_log_data_t *cld, gf_boolean_t is_last);
int
changelog_inject_single_event(xlator_t *this, changelog_priv_t *priv,
@@ -451,12 +445,11 @@ changelog_fsync_thread(void *data);
int
changelog_forget(xlator_t *this, inode_t *inode);
int
-htime_update(xlator_t *this, changelog_priv_t *priv, unsigned long ts,
- char *buffer);
+htime_update(xlator_t *this, changelog_priv_t *priv, time_t ts, char *buffer);
int
-htime_open(xlator_t *this, changelog_priv_t *priv, unsigned long ts);
+htime_open(xlator_t *this, changelog_priv_t *priv, time_t ts);
int
-htime_create(xlator_t *this, changelog_priv_t *priv, unsigned long ts);
+htime_create(xlator_t *this, changelog_priv_t *priv, time_t ts);
/* Geo-Rep snapshot dependency changes */
void
diff --git a/xlators/features/changelog/src/changelog-messages.h b/xlators/features/changelog/src/changelog-messages.h
index 4dd56b8ee97..cb0e16c85d8 100644
--- a/xlators/features/changelog/src/changelog-messages.h
+++ b/xlators/features/changelog/src/changelog-messages.h
@@ -59,12 +59,12 @@ GLFS_MSGID(
CHANGELOG_MSG_NO_HTIME_CURRENT, CHANGELOG_MSG_HTIME_CURRENT,
CHANGELOG_MSG_NEW_HTIME_FILE, CHANGELOG_MSG_MKDIR_ERROR,
CHANGELOG_MSG_PATH_NOT_FOUND, CHANGELOG_MSG_XATTR_INIT_FAILED,
- CHANGELOG_MSG_WROTE_TO_CSNAP, CHANGELOG_MSG_ROLLOVER_DATA_FILL_FAILED,
+ CHANGELOG_MSG_WROTE_TO_CSNAP, CHANGELOG_MSG_UNUSED_0,
CHANGELOG_MSG_GET_BUFFER_FAILED, CHANGELOG_MSG_BARRIER_STATE_NOTIFY,
CHANGELOG_MSG_BARRIER_DISABLED, CHANGELOG_MSG_BARRIER_ALREADY_DISABLED,
CHANGELOG_MSG_BARRIER_ON_ERROR, CHANGELOG_MSG_BARRIER_ENABLE,
CHANGELOG_MSG_BARRIER_KEY_NOT_FOUND, CHANGELOG_MSG_ERROR_IN_DICT_GET,
- CHANGELOG_MSG_GET_TIME_FAILURE, CHANGELOG_MSG_HTIME_FETCH_FAILED,
+ CHANGELOG_MSG_UNUSED_1, CHANGELOG_MSG_UNUSED_2,
CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS,
CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS_FINISHED,
CHANGELOG_MSG_BARRIER_TIMEOUT, CHANGELOG_MSG_TIMEOUT_ADD_FAILED,
@@ -123,8 +123,6 @@ GLFS_MSGID(
#define CHANGELOG_MSG_GET_TIME_OP_FAILED_STR "Problem rolling over changelog(s)"
#define CHANGELOG_MSG_BARRIER_INFO_STR "Explicit wakeup on barrier notify"
#define CHANGELOG_MSG_SELECT_FAILED_STR "pthread_cond_timedwait failed"
-#define CHANGELOG_MSG_ROLLOVER_DATA_FILL_FAILED_STR \
- "failed to fill rollover data"
#define CHANGELOG_MSG_INJECT_FSYNC_FAILED_STR "failed to inject fsync event"
#define CHANGELOG_MSG_LOCAL_INIT_FAILED_STR \
"changelog local initialization failed"
@@ -144,9 +142,7 @@ GLFS_MSGID(
#define CHANGELOG_MSG_BARRIER_KEY_NOT_FOUND_STR "barrier key not found"
#define CHANGELOG_MSG_ERROR_IN_DICT_GET_STR \
"Something went wrong in dict_get_str_boolean"
-#define CHANGELOG_MSG_GET_TIME_FAILURE_STR "gettimeofday() failure"
#define CHANGELOG_MSG_DIR_OPTIONS_NOT_SET_STR "changelog-dir option is not set"
-#define CHANGELOG_MSG_HTIME_FETCH_FAILED_STR "unable to fetch htime"
#define CHANGELOG_MSG_FREEUP_FAILED_STR "could not cleanup bootstrapper"
#define CHANGELOG_MSG_CHILD_MISCONFIGURED_STR \
"translator needs a single subvolume"
diff --git a/xlators/features/changelog/src/changelog.c b/xlators/features/changelog/src/changelog.c
index 9a7d158cbf2..6a6e5af859e 100644
--- a/xlators/features/changelog/src/changelog.c
+++ b/xlators/features/changelog/src/changelog.c
@@ -2252,23 +2252,11 @@ static int
changelog_init(xlator_t *this, changelog_priv_t *priv)
{
int i = 0;
- int ret = -1;
- struct timeval tv = {
- 0,
- };
+ int ret = 0;
changelog_log_data_t cld = {
0,
};
- ret = gettimeofday(&tv, NULL);
- if (ret) {
- gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_GET_TIME_FAILURE,
- NULL);
- goto out;
- }
-
- priv->slice.tv_start = tv;
-
priv->maps[CHANGELOG_TYPE_DATA] = "D ";
priv->maps[CHANGELOG_TYPE_METADATA] = "M ";
priv->maps[CHANGELOG_TYPE_METADATA_XATTR] = "M ";
@@ -2287,9 +2275,7 @@ changelog_init(xlator_t *this, changelog_priv_t *priv)
* in case there was an encoding change. so... things are kept
* simple here.
*/
- ret = changelog_fill_rollover_data(&cld, _gf_false);
- if (ret)
- goto out;
+ changelog_fill_rollover_data(&cld, _gf_false);
ret = htime_open(this, priv, cld.cld_roll_time);
/* call htime open with cld's rollover_time */
@@ -2470,9 +2456,6 @@ reconfigure(xlator_t *this, dict_t *options)
char csnap_dir[PATH_MAX] = {
0,
};
- struct timeval tv = {
- 0,
- };
uint32_t timeout = 0;
priv = this->private;
@@ -2564,9 +2547,7 @@ reconfigure(xlator_t *this, dict_t *options)
out);
if (active_now || active_earlier) {
- ret = changelog_fill_rollover_data(&cld, !active_now);
- if (ret)
- goto out;
+ changelog_fill_rollover_data(&cld, !active_now);
slice = &priv->slice;
@@ -2585,13 +2566,7 @@ reconfigure(xlator_t *this, dict_t *options)
if (!active_earlier) {
gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_RECONFIGURE,
NULL);
- if (gettimeofday(&tv, NULL)) {
- gf_smsg(this->name, GF_LOG_ERROR, 0,
- CHANGELOG_MSG_HTIME_FETCH_FAILED, NULL);
- ret = -1;
- goto out;
- }
- htime_create(this, priv, tv.tv_sec);
+ htime_create(this, priv, gf_time());
}
ret = changelog_spawn_helper_threads(this, priv);
}
diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c
index 7680260988b..23c3599825a 100644
--- a/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c
+++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c
@@ -237,7 +237,7 @@ aws_form_request(char *resource, char **date, char *reqtype, char *bucketid,
int date_len = -1;
int res_len = -1;
- ctime = time(NULL);
+ ctime = gf_time();
gtime = gmtime(&ctime);
date_len = strftime(httpdate, sizeof(httpdate),
diff --git a/xlators/features/index/src/index.c b/xlators/features/index/src/index.c
index 4ece7ff6fc8..4abb2c73ce5 100644
--- a/xlators/features/index/src/index.c
+++ b/xlators/features/index/src/index.c
@@ -2104,7 +2104,7 @@ index_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
worker_enqueue(this, stub);
return 0;
normal:
- ret = dict_get_str(xattr_req, "link-count", &flag);
+ ret = dict_get_str_sizen(xattr_req, "link-count", &flag);
if ((ret == 0) && (strcmp(flag, GF_XATTROP_INDEX_COUNT) == 0)) {
STACK_WIND(frame, index_lookup_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
@@ -2592,7 +2592,7 @@ notify(xlator_t *this, int event, void *data, ...)
if ((event == GF_EVENT_PARENT_DOWN) && victim->cleanup_starting) {
stub_cnt = GF_ATOMIC_GET(priv->stub_cnt);
- clock_gettime(CLOCK_REALTIME, &sleep_till);
+ timespec_now_realtime(&sleep_till);
sleep_till.tv_sec += 1;
/* Wait for draining stub from queue before notify PARENT_DOWN */
diff --git a/xlators/features/leases/src/leases-internal.c b/xlators/features/leases/src/leases-internal.c
index 67fdd53cee2..56dee244281 100644
--- a/xlators/features/leases/src/leases-internal.c
+++ b/xlators/features/leases/src/leases-internal.c
@@ -897,7 +897,7 @@ __recall_lease(xlator_t *this, lease_inode_ctx_t *lease_ctx)
}
priv = this->private;
- recall_time = time(NULL);
+ recall_time = gf_time();
list_for_each_entry_safe(lease_entry, tmp, &lease_ctx->lease_id_list,
lease_id_list)
{
@@ -1367,7 +1367,7 @@ expired_recall_cleanup(void *data)
gf_msg_debug(this->name, 0, "Started the expired_recall_cleanup thread");
while (1) {
- time_now = time(NULL);
+ time_now = gf_time();
pthread_mutex_lock(&priv->mutex);
{
if (priv->fini) {
diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c
index 1fbbae3541f..a2c6be93e03 100644
--- a/xlators/features/locks/src/common.c
+++ b/xlators/features/locks/src/common.c
@@ -605,13 +605,11 @@ static void
__insert_lock(pl_inode_t *pl_inode, posix_lock_t *lock)
{
if (lock->blocked)
- gettimeofday(&lock->blkd_time, NULL);
+ lock->blkd_time = gf_time();
else
- gettimeofday(&lock->granted_time, NULL);
+ lock->granted_time = gf_time();
list_add_tail(&lock->list, &pl_inode->ext_list);
-
- return;
}
/* Return true if the locks overlap, false otherwise */
diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c
index d5babcc325c..fd772c850dd 100644
--- a/xlators/features/locks/src/entrylk.c
+++ b/xlators/features/locks/src/entrylk.c
@@ -121,7 +121,6 @@ __stale_entrylk(xlator_t *this, pl_entry_lock_t *candidate_lock,
pl_entry_lock_t *requested_lock, time_t *lock_age_sec)
{
posix_locks_private_t *priv = NULL;
- struct timeval curr;
priv = this->private;
@@ -129,8 +128,7 @@ __stale_entrylk(xlator_t *this, pl_entry_lock_t *candidate_lock,
* chance? Or just the locks we are attempting to acquire?
*/
if (names_conflict(candidate_lock->basename, requested_lock->basename)) {
- gettimeofday(&curr, NULL);
- *lock_age_sec = curr.tv_sec - candidate_lock->granted_time.tv_sec;
+ *lock_age_sec = gf_time() - candidate_lock->granted_time;
if (*lock_age_sec > priv->revocation_secs)
return _gf_true;
}
@@ -544,14 +542,10 @@ static int
__lock_blocked_add(xlator_t *this, pl_inode_t *pinode, pl_dom_list_t *dom,
pl_entry_lock_t *lock, int nonblock)
{
- struct timeval now;
-
if (nonblock)
goto out;
- gettimeofday(&now, NULL);
-
- lock->blkd_time = now;
+ lock->blkd_time = gf_time();
list_add_tail(&lock->blocked_locks, &dom->blocked_entrylks);
gf_msg_trace(this->name, 0, "Blocking lock: {pinode=%p, basename=%s}",
@@ -612,7 +606,7 @@ __lock_entrylk(xlator_t *this, pl_inode_t *pinode, pl_entry_lock_t *lock,
}
__pl_entrylk_ref(lock);
- gettimeofday(&lock->granted_time, NULL);
+ lock->granted_time = gf_time();
list_add(&lock->domain_list, &dom->entrylk_list);
ret = 0;
diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c
index d4a24eb44be..d4e51d6e0a1 100644
--- a/xlators/features/locks/src/inodelk.c
+++ b/xlators/features/locks/src/inodelk.c
@@ -140,15 +140,13 @@ __stale_inodelk(xlator_t *this, pl_inode_lock_t *candidate_lock,
pl_inode_lock_t *requested_lock, time_t *lock_age_sec)
{
posix_locks_private_t *priv = NULL;
- struct timeval curr;
priv = this->private;
/* Question: Should we just prune them all given the
* chance? Or just the locks we are attempting to acquire?
*/
if (inodelk_conflict(candidate_lock, requested_lock)) {
- gettimeofday(&curr, NULL);
- *lock_age_sec = curr.tv_sec - candidate_lock->granted_time.tv_sec;
+ *lock_age_sec = gf_time() - candidate_lock->granted_time;
if (*lock_age_sec > priv->revocation_secs)
return _gf_true;
}
@@ -397,15 +395,11 @@ static int
__lock_blocked_add(xlator_t *this, pl_dom_list_t *dom, pl_inode_lock_t *lock,
int can_block)
{
- struct timeval now;
-
if (can_block == 0) {
goto out;
}
- gettimeofday(&now, NULL);
-
- lock->blkd_time = now;
+ lock->blkd_time = gf_time();
list_add_tail(&lock->blocked_locks, &dom->blocked_inodelks);
gf_msg_trace(this->name, 0,
@@ -466,7 +460,7 @@ __lock_inodelk(xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock,
return __lock_blocked_add(this, dom, lock, can_block);
}
__pl_inodelk_ref(lock);
- gettimeofday(&lock->granted_time, NULL);
+ lock->granted_time = gf_time();
list_add(&lock->list, &dom->inodelk_list);
return 0;
diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h
index 93c2973b2b3..c868eb494a2 100644
--- a/xlators/features/locks/src/locks.h
+++ b/xlators/features/locks/src/locks.h
@@ -43,9 +43,8 @@ struct __posix_lock {
fd_t *fd;
call_frame_t *frame;
- struct timeval blkd_time; /*time at which lock was queued into blkd list*/
- struct timeval
- granted_time; /*time at which lock was queued into active list*/
+ time_t blkd_time; /* time at which lock was queued into blkd list */
+ time_t granted_time; /* time at which lock was queued into active list */
/* These two together serve to uniquely identify each process
across nodes */
@@ -85,9 +84,9 @@ struct __pl_inode_lock {
call_frame_t *frame;
- struct timeval blkd_time; /*time at which lock was queued into blkd list*/
- struct timeval
- granted_time; /*time at which lock was queued into active list*/
+ time_t blkd_time; /* time at which lock was queued into blkd list */
+ time_t granted_time; /* time at which lock was queued into active list */
+
/*last time at which lock contention was detected and notified*/
struct timespec contention_time;
@@ -139,9 +138,9 @@ struct __entry_lock {
const char *basename;
- struct timeval blkd_time; /*time at which lock was queued into blkd list*/
- struct timeval
- granted_time; /*time at which lock was queued into active list*/
+ time_t blkd_time; /* time at which lock was queued into blkd list */
+ time_t granted_time; /* time at which lock was queued into active list */
+
/*last time at which lock contention was detected and notified*/
struct timespec contention_time;
diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
index d781cde8969..cf0ae4c57dd 100644
--- a/xlators/features/locks/src/posix.c
+++ b/xlators/features/locks/src/posix.c
@@ -494,6 +494,9 @@ pl_inodelk_xattr_fill_multiple(dict_t *this, char *key, data_t *value,
char *save_ptr = NULL;
tmp_key = gf_strdup(key);
+ if (!tmp_key)
+ return -1;
+
strtok_r(tmp_key, ":", &save_ptr);
if (!*save_ptr) {
if (tmp_key)
@@ -3684,10 +3687,10 @@ __dump_entrylks(pl_inode_t *pl_inode)
list_for_each_entry(lock, &dom->entrylk_list, domain_list)
{
- gf_time_fmt(granted, sizeof(granted), lock->granted_time.tv_sec,
+ gf_time_fmt(granted, sizeof(granted), lock->granted_time,
gf_timefmt_FT);
gf_proc_dump_build_key(key, k, "entrylk[%d](ACTIVE)", count);
- if (lock->blkd_time.tv_sec == 0) {
+ if (lock->blkd_time == 0) {
snprintf(tmp, sizeof(tmp), ENTRY_GRNTD_FMT,
lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK"
: "ENTRYLK_WRLCK",
@@ -3695,7 +3698,7 @@ __dump_entrylks(pl_inode_t *pl_inode)
lkowner_utoa(&lock->owner), lock->client,
lock->connection_id, granted);
} else {
- gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time.tv_sec,
+ gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time,
gf_timefmt_FT);
snprintf(tmp, sizeof(tmp), ENTRY_BLKD_GRNTD_FMT,
lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK"
@@ -3712,7 +3715,7 @@ __dump_entrylks(pl_inode_t *pl_inode)
list_for_each_entry(lock, &dom->blocked_entrylks, blocked_locks)
{
- gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time.tv_sec,
+ gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time,
gf_timefmt_FT);
gf_proc_dump_build_key(key, k, "entrylk[%d](BLOCKED)", count);
@@ -3764,9 +3767,8 @@ __dump_inodelks(pl_inode_t *pl_inode)
SET_FLOCK_PID(&lock->user_flock, lock);
pl_dump_lock(tmp, sizeof(tmp), &lock->user_flock, &lock->owner,
- lock->client, lock->connection_id,
- &lock->granted_time.tv_sec, &lock->blkd_time.tv_sec,
- _gf_true);
+ lock->client, lock->connection_id, &lock->granted_time,
+ &lock->blkd_time, _gf_true);
gf_proc_dump_write(key, "%s", tmp);
count++;
@@ -3778,8 +3780,8 @@ __dump_inodelks(pl_inode_t *pl_inode)
count);
SET_FLOCK_PID(&lock->user_flock, lock);
pl_dump_lock(tmp, sizeof(tmp), &lock->user_flock, &lock->owner,
- lock->client, lock->connection_id, 0,
- &lock->blkd_time.tv_sec, _gf_false);
+ lock->client, lock->connection_id, 0, &lock->blkd_time,
+ _gf_false);
gf_proc_dump_write(key, "%s", tmp);
count++;
@@ -3812,9 +3814,8 @@ __dump_posixlks(pl_inode_t *pl_inode)
gf_proc_dump_build_key(key, "posixlk", "posixlk[%d](%s)", count,
lock->blocked ? "BLOCKED" : "ACTIVE");
pl_dump_lock(tmp, sizeof(tmp), &lock->user_flock, &lock->owner,
- lock->client, lock->client_uid, &lock->granted_time.tv_sec,
- &lock->blkd_time.tv_sec,
- (lock->blocked) ? _gf_false : _gf_true);
+ lock->client, lock->client_uid, &lock->granted_time,
+ &lock->blkd_time, (lock->blocked) ? _gf_false : _gf_true);
gf_proc_dump_write(key, "%s", tmp);
count++;
diff --git a/xlators/features/metadisp/Makefile.am b/xlators/features/metadisp/Makefile.am
new file mode 100644
index 00000000000..a985f42a877
--- /dev/null
+++ b/xlators/features/metadisp/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/features/metadisp/src/Makefile.am b/xlators/features/metadisp/src/Makefile.am
new file mode 100644
index 00000000000..1520ad8c424
--- /dev/null
+++ b/xlators/features/metadisp/src/Makefile.am
@@ -0,0 +1,38 @@
+noinst_PYTHON = gen-fops.py
+
+EXTRA_DIST = fops-tmpl.c
+
+xlator_LTLIBRARIES = metadisp.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+nodist_metadisp_la_SOURCES = fops.c
+
+BUILT_SOURCES = fops.c
+
+metadisp_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+metadisp_la_SOURCES = metadisp.c \
+ metadisp-unlink.c \
+ metadisp-stat.c \
+ metadisp-lookup.c \
+ metadisp-readdir.c \
+ metadisp-create.c \
+ metadisp-open.c \
+ metadisp-fsync.c \
+ metadisp-setattr.c \
+ backend.c
+
+metadisp_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = metadisp.h metadisp-fops.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+fops.c: fops-tmpl.c $(top_srcdir)/libglusterfs/src/generator.py gen-fops.py
+ PYTHONPATH=$(top_srcdir)/libglusterfs/src \
+ $(PYTHON) $(srcdir)/gen-fops.py $(srcdir)/fops-tmpl.c > $@
+
+CLEANFILES = $(nodist_metadisp_la_SOURCES)
diff --git a/xlators/features/metadisp/src/backend.c b/xlators/features/metadisp/src/backend.c
new file mode 100644
index 00000000000..ee2c25bfaa7
--- /dev/null
+++ b/xlators/features/metadisp/src/backend.c
@@ -0,0 +1,45 @@
+#define GFID_STR_LEN 37
+
+#include "metadisp.h"
+
+/*
+ * backend.c
+ *
+ * functions responsible for converting user-facing paths to backend-style
+ * "/$GFID" paths.
+ */
+
+int32_t
+build_backend_loc(uuid_t gfid, loc_t *src_loc, loc_t *dst_loc)
+{
+ static uuid_t root = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+ char gfid_buf[GFID_STR_LEN + 1] = {
+ 0,
+ };
+ char *path = NULL;
+
+ GF_VALIDATE_OR_GOTO("metadisp", src_loc, out);
+ GF_VALIDATE_OR_GOTO("metadisp", dst_loc, out);
+
+ loc_copy(dst_loc, src_loc);
+ memcpy(dst_loc->pargfid, root, sizeof(root));
+ GF_FREE((char *)dst_loc->path); // we are overwriting path so nuke
+ // whatever loc_copy gave us
+
+ uuid_utoa_r(gfid, gfid_buf);
+
+ path = GF_CALLOC(GFID_STR_LEN + 1, sizeof(char),
+ gf_common_mt_char); // freed via loc_wipe
+
+ path[0] = '/';
+ strncpy(path + 1, gfid_buf, GFID_STR_LEN);
+ path[GFID_STR_LEN] = 0;
+ dst_loc->path = path;
+ if (src_loc->name)
+ dst_loc->name = strrchr(dst_loc->path, '/');
+ if (dst_loc->name)
+ dst_loc->name++;
+ return 0;
+out:
+ return -1;
+}
diff --git a/xlators/features/metadisp/src/fops-tmpl.c b/xlators/features/metadisp/src/fops-tmpl.c
new file mode 100644
index 00000000000..4385b7dd5b7
--- /dev/null
+++ b/xlators/features/metadisp/src/fops-tmpl.c
@@ -0,0 +1,10 @@
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <glusterfs/xlator.h>
+#include "metadisp.h"
+#include "metadisp-fops.h"
+
+#pragma generate
diff --git a/xlators/features/metadisp/src/gen-fops.py b/xlators/features/metadisp/src/gen-fops.py
new file mode 100644
index 00000000000..8b5e120fdec
--- /dev/null
+++ b/xlators/features/metadisp/src/gen-fops.py
@@ -0,0 +1,160 @@
+#!/usr/bin/python
+
+import sys
+from generator import fop_subs, generate
+
+FN_METADATA_CHILD_GENERIC = """
+int32_t
+metadisp_@NAME@ (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+{
+ METADISP_TRACE("@NAME@ metadata");
+ STACK_WIND (frame, default_@NAME@_cbk,
+ METADATA_CHILD(this), METADATA_CHILD(this)->fops->@NAME@,
+ @SHORT_ARGS@);
+ return 0;
+}
+"""
+
+FN_GENERIC_TEMPLATE = """
+int32_t
+metadisp_@NAME@ (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+{
+ METADISP_TRACE("@NAME@ generic");
+ STACK_WIND (frame, default_@NAME@_cbk,
+ DATA_CHILD(this), DATA_CHILD(this)->fops->@NAME@,
+ @SHORT_ARGS@);
+ return 0;
+}
+"""
+
+FN_DATAFD_TEMPLATE = """
+int32_t
+metadisp_@NAME@ (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+{
+ METADISP_TRACE("@NAME@ datafd");
+ xlator_t *child = NULL;
+ child = DATA_CHILD(this);
+ STACK_WIND (frame, default_@NAME@_cbk,
+ child, child->fops->@NAME@,
+ @SHORT_ARGS@);
+ return 0;
+}
+"""
+
+FN_DATALOC_TEMPLATE = """
+int32_t
+metadisp_@NAME@ (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+{
+ METADISP_TRACE("@NAME@ dataloc");
+ loc_t backend_loc = {
+ 0,
+ };
+ if (build_backend_loc(loc->gfid, loc, &backend_loc)) {
+ goto unwind;
+ }
+ xlator_t *child = NULL;
+ child = DATA_CHILD(this);
+ STACK_WIND (frame, default_@NAME@_cbk,
+ child, child->fops->@NAME@,
+ @SHORT_ARGS@);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(lookup, frame, -1, EINVAL, NULL, NULL, NULL, NULL);
+ return 0;
+}
+"""
+
+FOPS_LINE_TEMPLATE = "\t.@NAME@ = metadisp_@NAME@,"
+
+skipped = [
+ "readdir",
+ "readdirp",
+ "lookup",
+ "fsync",
+ "stat",
+ "open",
+ "create",
+ "unlink",
+ "setattr",
+ # TODO: implement "inodelk",
+]
+
+
+def gen_fops():
+ done = skipped
+
+ #
+ # these are fops that wind to the DATA_CHILD
+ #
+ # NOTE: re-written in order from google doc:
+ # https://docs.google.com/document/d/1KEwVtSNvDhs4qb63gWx2ulCp5GJjge77NGJk4p_Ms4Q
+ for name in [
+ "writev",
+ "readv",
+ "ftruncate",
+ "zerofill",
+ "discard",
+ "seek",
+ "fstat",
+ ]:
+ done = done + [name]
+ print(generate(FN_DATAFD_TEMPLATE, name, fop_subs))
+
+ for name in ["truncate"]:
+ done = done + [name]
+ print(generate(FN_DATALOC_TEMPLATE, name, fop_subs))
+
+ # these are fops that operate solely on dentries, folders,
+ # or extended attributes. Therefore, they must always
+ # wind to METADATA_CHILD and should never perform
+ # any path rewriting
+ #
+ # NOTE: re-written in order from google doc:
+ # https://docs.google.com/document/d/1KEwVtSNvDhs4qb63gWx2ulCp5GJjge77NGJk4p_Ms4Q
+ for name in [
+ "mkdir",
+ "symlink",
+ "link",
+ "rename",
+ "mknod",
+ "opendir",
+ # "readdir, # special-cased
+ # "readdirp, # special-cased
+ "fsyncdir",
+ # "setattr", # special-cased
+ "readlink",
+ "fentrylk",
+ "access",
+ # TODO: these wind to both,
+ # data for backend-attributes and metadata for the rest
+ "xattrop",
+ "setxattr",
+ "getxattr",
+ "removexattr",
+ "fgetxattr",
+ "fsetxattr",
+ "fremovexattr",
+ ]:
+
+ done = done + [name]
+ print(generate(FN_METADATA_CHILD_GENERIC, name, fop_subs))
+
+ print("struct xlator_fops fops = {")
+ for name in done:
+ print(generate(FOPS_LINE_TEMPLATE, name, fop_subs))
+
+ print("};")
+
+
+for l in open(sys.argv[1], "r").readlines():
+ if l.find("#pragma generate") != -1:
+ print("/* BEGIN GENERATED CODE - DO NOT MODIFY */")
+ gen_fops()
+ print("/* END GENERATED CODE */")
+ else:
+ print(l[:-1])
diff --git a/xlators/features/metadisp/src/metadisp-create.c b/xlators/features/metadisp/src/metadisp-create.c
new file mode 100644
index 00000000000..f8c9798dd59
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-create.c
@@ -0,0 +1,101 @@
+#include "metadisp.h"
+#include <glusterfs/call-stub.h>
+
+/**
+ * Create, like stat, is a two-step process. We send a create
+ * to the METADATA_CHILD, then send another create to the DATA_CHILD.
+ *
+ * We do the metadata child first to ensure that the ACLs are enforced.
+ */
+
+int32_t
+metadisp_create_dentry_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_create_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, mode_t mode, mode_t umask, fd_t *fd,
+ dict_t *xdata)
+{
+ // create the backend data inode
+ STACK_WIND(frame, metadisp_create_dentry_cbk, DATA_CHILD(this),
+ DATA_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
+ xdata);
+ return 0;
+}
+
+int32_t
+metadisp_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ METADISP_TRACE("%d %d", op_ret, op_errno);
+ call_stub_t *stub = cookie;
+ if (op_ret != 0) {
+ STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+ }
+
+ if (stub == NULL) {
+ goto unwind;
+ }
+
+ if (stub->poison) {
+ call_stub_destroy(stub);
+ return 0;
+ }
+
+ call_resume(stub);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(create, frame, -1, EINVAL, NULL, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
+}
+
+int32_t
+metadisp_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ METADISP_TRACE(".");
+
+ loc_t backend_loc = {
+ 0,
+ };
+ call_stub_t *stub = NULL;
+ uuid_t *gfid_req = NULL;
+
+ RESOLVE_GFID_REQ(xdata, gfid_req, out);
+
+ if (build_backend_loc(*gfid_req, loc, &backend_loc)) {
+ goto unwind;
+ }
+
+ frame->local = loc;
+
+ stub = fop_create_stub(frame, metadisp_create_resume, &backend_loc, flags,
+ mode, umask, fd, xdata);
+
+ STACK_WIND_COOKIE(frame, metadisp_create_cbk, stub, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->create, loc, flags, mode,
+ umask, fd, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(create, frame, -1, EINVAL, NULL, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
+out:
+ return -1;
+}
diff --git a/xlators/features/metadisp/src/metadisp-fops.h b/xlators/features/metadisp/src/metadisp-fops.h
new file mode 100644
index 00000000000..56dd427cf34
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-fops.h
@@ -0,0 +1,51 @@
+#ifndef GF_METADISP_FOPS_H_
+#define GF_METADISP_FOPS_H_
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/glusterfs.h>
+
+#include <sys/types.h>
+
+/* fops in here are defined in their own file. Every other fop is just defined
+ * inline of fops.c */
+
+int
+metadisp_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata);
+
+int
+metadisp_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *dict);
+
+int
+metadisp_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata);
+
+int
+metadisp_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata);
+
+int
+metadisp_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata);
+
+int
+metadisp_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata);
+
+int
+metadisp_inodelk(call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata);
+
+int
+metadisp_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata);
+
+int
+metadisp_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata);
+
+int
+metadisp_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
+
+#endif
diff --git a/xlators/features/metadisp/src/metadisp-fsync.c b/xlators/features/metadisp/src/metadisp-fsync.c
new file mode 100644
index 00000000000..2e46fa84eac
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-fsync.c
@@ -0,0 +1,54 @@
+
+#include "metadisp.h"
+#include <glusterfs/call-stub.h>
+
+int32_t
+metadisp_fsync_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t flags, dict_t *xdata)
+{
+ STACK_WIND(frame, default_fsync_cbk, DATA_CHILD(this),
+ DATA_CHILD(this)->fops->fsync, fd, flags, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ if (cookie) {
+ stub = cookie;
+ }
+
+ if (op_ret != 0) {
+ goto unwind;
+ }
+
+ if (stub->poison) {
+ call_stub_destroy(stub);
+ stub = NULL;
+ return 0;
+ }
+
+ call_resume(stub);
+ return 0;
+
+unwind:
+ if (stub) {
+ call_stub_destroy(stub);
+ }
+ STACK_UNWIND_STRICT(fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ stub = fop_fsync_stub(frame, metadisp_fsync_resume, fd, flags, xdata);
+ STACK_WIND_COOKIE(frame, metadisp_fsync_cbk, stub, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->fsync, fd, flags, xdata);
+ return 0;
+}
diff --git a/xlators/features/metadisp/src/metadisp-lookup.c b/xlators/features/metadisp/src/metadisp-lookup.c
new file mode 100644
index 00000000000..27d90c9f746
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-lookup.c
@@ -0,0 +1,90 @@
+#include "metadisp.h"
+#include <glusterfs/call-stub.h>
+
+/**
+ * Lookup, like stat, is a two-step process for grabbing the metadata details
+ * as well as the data details.
+ */
+
+int32_t
+metadisp_backend_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ METADISP_TRACE("backend_lookup_cbk");
+ if (op_errno == ENOENT) {
+ op_errno = ENODATA;
+ op_ret = -1;
+ }
+ STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, xdata,
+ postparent);
+ return 0;
+}
+
+int32_t
+metadisp_backend_lookup_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ METADISP_TRACE("backend_lookup_resume");
+ loc_t backend_loc = {
+ 0,
+ };
+ if (build_backend_loc(loc->gfid, loc, &backend_loc)) {
+ goto unwind;
+ }
+
+ STACK_WIND(frame, metadisp_backend_lookup_cbk, DATA_CHILD(this),
+ DATA_CHILD(this)->fops->lookup, &backend_loc, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(lookup, frame, -1, EINVAL, NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+metadisp_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+{
+ METADISP_TRACE("%d %d", op_ret, op_errno);
+ call_stub_t *stub = NULL;
+ stub = cookie;
+
+ if (op_ret != 0) {
+ goto unwind;
+ }
+
+ if (!IA_ISREG(buf->ia_type)) {
+ goto unwind;
+ } else if (!stub) {
+ op_errno = EINVAL;
+ goto unwind;
+ }
+
+ METADISP_TRACE("resuming stub");
+
+ // memcpy(stub->args.loc.gfid, buf->ia_gfid, sizeof(uuid_t));
+ call_resume(stub);
+ return 0;
+unwind:
+ METADISP_TRACE("unwinding %d %d", op_ret, op_errno);
+ STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, xdata,
+ postparent);
+ if (stub) {
+ call_stub_destroy(stub);
+ }
+ return 0;
+}
+
+int32_t
+metadisp_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ METADISP_TRACE("lookup");
+ call_stub_t *stub = NULL;
+ stub = fop_lookup_stub(frame, metadisp_backend_lookup_resume, loc, xdata);
+ STACK_WIND_COOKIE(frame, metadisp_lookup_cbk, stub, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->lookup, loc, xdata);
+ return 0;
+}
diff --git a/xlators/features/metadisp/src/metadisp-open.c b/xlators/features/metadisp/src/metadisp-open.c
new file mode 100644
index 00000000000..64814afe636
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-open.c
@@ -0,0 +1,70 @@
+#include <glusterfs/call-stub.h>
+#include "metadisp.h"
+
+int32_t
+metadisp_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ METADISP_TRACE("got open results %d %d", op_ret, op_errno);
+
+ call_stub_t *stub = NULL;
+ if (cookie) {
+ stub = cookie;
+ }
+
+ if (op_ret != 0) {
+ goto unwind;
+ }
+
+ if (!stub) {
+ goto unwind;
+ }
+
+ if (stub->poison) {
+ call_stub_destroy(stub);
+ stub = NULL;
+ return 0;
+ }
+
+ call_resume(stub);
+ return 0;
+
+unwind:
+ if (stub) {
+ call_stub_destroy(stub);
+ }
+ STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_open_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata)
+{
+ STACK_WIND_COOKIE(frame, metadisp_open_cbk, NULL, DATA_CHILD(this),
+ DATA_CHILD(this)->fops->open, loc, flags, fd, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ loc_t backend_loc = {
+ 0,
+ };
+
+ if (build_backend_loc(loc->gfid, loc, &backend_loc)) {
+ goto unwind;
+ }
+
+ stub = fop_open_stub(frame, metadisp_open_resume, &backend_loc, flags, fd,
+ xdata);
+ STACK_WIND_COOKIE(frame, metadisp_open_cbk, stub, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->open, loc, flags, fd, xdata);
+ return 0;
+unwind:
+ STACK_UNWIND_STRICT(open, frame, -1, EINVAL, NULL, NULL);
+ return 0;
+}
diff --git a/xlators/features/metadisp/src/metadisp-readdir.c b/xlators/features/metadisp/src/metadisp-readdir.c
new file mode 100644
index 00000000000..5f840b1e88f
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-readdir.c
@@ -0,0 +1,65 @@
+#include "metadisp.h"
+
+/**
+ * With a change to the posix xlator, readdir and readdirp are shockingly
+ * simple.
+ *
+ * The issue with separating the backend data of the files
+ * with the metadata is that readdirs must now read from multiple sources
+ * to coalesce the directory entries.
+ *
+ * The way we do this is to tell the METADATA_CHILD that when it's
+ * running readdirp, each file entry should have a stat wound to
+ * 'stat-source-of-truth'.
+ *
+ * see metadisp_stat for how it handles winds _from_posix.
+ */
+
+int32_t
+metadisp_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
+{
+ METADISP_TRACE(".");
+ /*
+ * Always use readdirp, even if the original was readdir. Why? Because NFS.
+ * There are multiple translations between Gluster, UNIX, and NFS stat
+ * structures in that path. One of them uses the type etc. from the stat
+ * structure, which is only filled in by readdirp. If we use readdir, the
+ * entries do actually go all the way back to the client and are visible in
+ * getdents, but then the readdir throws them away because of the
+ * uninitialized type.
+ */
+ GF_UNUSED int32_t ret;
+ if (!xdata) {
+ xdata = dict_new();
+ }
+
+ // ret = dict_set_int32 (xdata, "list-xattr", 1);
+
+ // I'm my own source of truth!
+ ret = dict_set_static_ptr(xdata, "stat-source-of-truth", (void *)this);
+
+ STACK_WIND(frame, default_readdirp_cbk, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->readdirp, fd, size, off, xdata);
+
+ return 0;
+}
+
+int32_t
+metadisp_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
+{
+ METADISP_TRACE(".");
+ if (!xdata) {
+ xdata = dict_new();
+ }
+ GF_UNUSED int32_t ret;
+ // ret = dict_set_int32 (xdata, "list-xattr", 1);
+
+ // I'm my own source of truth!
+ ret = dict_set_static_ptr(xdata, "stat-source-of-truth", (void *)this);
+
+ STACK_WIND(frame, default_readdirp_cbk, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->readdirp, fd, size, off, xdata);
+ return 0;
+}
diff --git a/xlators/features/metadisp/src/metadisp-setattr.c b/xlators/features/metadisp/src/metadisp-setattr.c
new file mode 100644
index 00000000000..6991cf644f3
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-setattr.c
@@ -0,0 +1,90 @@
+#include "metadisp.h"
+#include <glusterfs/call-stub.h>
+
+int32_t
+metadisp_backend_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *statpre, struct iatt *statpost,
+ dict_t *xdata)
+
+{
+ METADISP_TRACE("backend_setattr_cbk");
+ if (op_errno == ENOENT) {
+ op_errno = ENODATA;
+ op_ret = -1;
+ }
+ STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, statpre, statpost,
+ xdata);
+ return 0;
+}
+
+int32_t
+metadisp_backend_setattr_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid,
+ dict_t *xdata)
+
+{
+ METADISP_TRACE("backend_setattr_resume");
+ loc_t backend_loc = {
+ 0,
+ };
+ if (build_backend_loc(loc->gfid, loc, &backend_loc)) {
+ goto unwind;
+ }
+
+ STACK_WIND(frame, metadisp_backend_setattr_cbk, DATA_CHILD(this),
+ DATA_CHILD(this)->fops->setattr, &backend_loc, stbuf, valid,
+ xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(setattr, frame, -1, EINVAL, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+metadisp_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ METADISP_TRACE("%d %d", op_ret, op_errno);
+ call_stub_t *stub = NULL;
+ stub = cookie;
+
+ if (op_ret != 0) {
+ goto unwind;
+ }
+
+ if (!IA_ISREG(statpost->ia_type)) {
+ goto unwind;
+ } else if (!stub) {
+ op_errno = EINVAL;
+ goto unwind;
+ }
+
+ METADISP_TRACE("resuming stub");
+ call_resume(stub);
+ return 0;
+unwind:
+ METADISP_TRACE("unwinding %d %d", op_ret, op_errno);
+ STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, statpre, statpost,
+ xdata);
+ if (stub) {
+ call_stub_destroy(stub);
+ }
+ return 0;
+}
+
+int32_t
+metadisp_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ METADISP_TRACE("setattr");
+ call_stub_t *stub = NULL;
+ stub = fop_setattr_stub(frame, metadisp_backend_setattr_resume, loc, stbuf,
+ valid, xdata);
+ STACK_WIND_COOKIE(frame, metadisp_setattr_cbk, stub, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->setattr, loc, stbuf, valid,
+ xdata);
+ return 0;
+}
diff --git a/xlators/features/metadisp/src/metadisp-stat.c b/xlators/features/metadisp/src/metadisp-stat.c
new file mode 100644
index 00000000000..b06d0dbcddd
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-stat.c
@@ -0,0 +1,124 @@
+#include "metadisp.h"
+#include <glusterfs/call-stub.h>
+
+/**
+ * The stat flow in METADISP is complicated because we must
+ * do ensure a few things:
+ * 1. stat, on the path within the metadata layer,
+ * MUST get the backend FD of the data layer.
+ * --- we wind to the metadata layer, then the data layer.
+ *
+ * 2. the metadata layer MUST be able to ask the data
+ * layer for stat information.
+ * --- this is 'syncop-internal-from-posix'
+ *
+ * 3. when the metadata exists BUT the data is missing,
+ * we MUST mark the backend file as bad and heal it.
+ */
+
+int32_t
+metadisp_stat_backend_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ METADISP_TRACE("got backend stat results %d %d", op_ret, op_errno);
+ if (op_errno == ENOENT) {
+ STACK_UNWIND_STRICT(open, frame, -1, ENODATA, NULL, NULL);
+ return 0;
+ }
+ STACK_UNWIND_STRICT(stat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_stat_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ METADISP_TRACE("winding stat to path %s", loc->path);
+ if (gf_uuid_is_null(loc->gfid)) {
+ METADISP_TRACE("bad object, sending EUCLEAN");
+ STACK_UNWIND_STRICT(open, frame, -1, EUCLEAN, NULL, NULL);
+ return 0;
+ }
+
+ STACK_WIND(frame, metadisp_stat_backend_cbk, SECOND_CHILD(this),
+ SECOND_CHILD(this)->fops->stat, loc, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ METADISP_TRACE("got stat results %d %d", op_ret, op_errno);
+
+ if (cookie) {
+ stub = cookie;
+ }
+
+ if (op_ret != 0) {
+ goto unwind;
+ }
+
+ // only use the stub for the files
+ if (!IA_ISREG(buf->ia_type)) {
+ goto unwind;
+ }
+
+ if (stub->poison) {
+ call_stub_destroy(stub);
+ stub = NULL;
+ return 0;
+ }
+
+ call_resume(stub);
+ return 0;
+
+unwind:
+ if (stub) {
+ call_stub_destroy(stub);
+ }
+ STACK_UNWIND_STRICT(stat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ int32_t ret = 0;
+ loc_t backend_loc = {
+ 0,
+ };
+ METADISP_FILTER_ROOT(stat, loc, xdata);
+
+ if (build_backend_loc(loc->gfid, loc, &backend_loc)) {
+ goto unwind;
+ }
+
+ if (dict_get_int32(xdata, "syncop-internal-from-posix", &ret) == 0) {
+ // if we've just been sent a stat from posix, then we know
+ // that we must send down a stat for a file to the second child.
+ //
+ // that means we can skip the stat for the first child and just
+ // send to the data disk.
+ METADISP_TRACE("got syncop-internal-from-posix");
+ STACK_WIND(frame, default_stat_cbk, DATA_CHILD(this),
+ DATA_CHILD(this)->fops->stat, &backend_loc, xdata);
+ return 0;
+ }
+
+ // we do not know if the request is for a file, folder, etc. wind
+ // to first child to find out.
+ stub = fop_stat_stub(frame, metadisp_stat_resume, &backend_loc, xdata);
+ METADISP_TRACE("winding stat to first child %s", loc->path);
+ STACK_WIND_COOKIE(frame, metadisp_stat_cbk, stub, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->stat, loc, xdata);
+ return 0;
+unwind:
+ STACK_UNWIND_STRICT(stat, frame, -1, EINVAL, NULL, NULL);
+ return 0;
+}
diff --git a/xlators/features/metadisp/src/metadisp-unlink.c b/xlators/features/metadisp/src/metadisp-unlink.c
new file mode 100644
index 00000000000..1f6a8eb35ce
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-unlink.c
@@ -0,0 +1,160 @@
+
+#include "metadisp.h"
+#include <glusterfs/call-stub.h>
+
+/**
+ * The unlink flow in metadisp is complicated because we must
+ * do ensure that UNLINK causes both the metadata objects
+ * to get removed and the data objects to get removed.
+ */
+
+int32_t
+metadisp_unlink_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int xflag, dict_t *xdata)
+{
+ METADISP_TRACE("winding backend unlink to path %s", loc->path);
+ STACK_WIND(frame, default_unlink_cbk, DATA_CHILD(this),
+ DATA_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ METADISP_TRACE(". %d %d", op_ret, op_errno);
+
+ int ret = 0;
+ call_stub_t *stub = NULL;
+ int nlink = 0;
+
+ if (cookie) {
+ stub = cookie;
+ }
+
+ if (op_ret != 0) {
+ goto unwind;
+ }
+
+ if (stub->poison) {
+ call_stub_destroy(stub);
+ stub = NULL;
+ return 0;
+ }
+
+ ret = dict_get_uint32(xdata, GF_RESPONSE_LINK_COUNT_XDATA, &nlink);
+ if (ret != 0) {
+ op_errno = EINVAL;
+ op_ret = -1;
+ goto unwind;
+ }
+ METADISP_TRACE("frontend hardlink count %d %d", ret, nlink);
+ if (nlink > 1) {
+ goto unwind;
+ }
+
+ call_resume(stub);
+ return 0;
+
+unwind:
+ if (stub) {
+ call_stub_destroy(stub);
+ }
+ STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, preparent, postparent,
+ xdata);
+ return 0;
+}
+
+int32_t
+metadisp_unlink_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ call_stub_t *stub = NULL;
+
+ if (cookie) {
+ stub = cookie;
+ }
+
+ if (op_ret != 0) {
+ goto unwind;
+ }
+
+ // fail fast on empty gfid so we don't loop forever
+ if (gf_uuid_is_null(buf->ia_gfid)) {
+ op_ret = -1;
+ op_errno = ENODATA;
+ goto unwind;
+ }
+
+ // fill gfid since the stub is incomplete
+ memcpy(stub->args.loc.gfid, buf->ia_gfid, sizeof(uuid_t));
+ memcpy(stub->args.loc.pargfid, postparent->ia_gfid, sizeof(uuid_t));
+
+ if (stub->poison) {
+ call_stub_destroy(stub);
+ stub = NULL;
+ return 0;
+ }
+
+ call_resume(stub);
+ return 0;
+
+unwind:
+ if (stub) {
+ call_stub_destroy(stub);
+ }
+ STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+metadisp_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ loc_t backend_loc = {
+ 0,
+ };
+
+ if (gf_uuid_is_null(loc->gfid)) {
+ METADISP_TRACE("winding lookup for unlink to path %s", loc->path);
+
+ // loop back to ourselves after a lookup
+ stub = fop_unlink_stub(frame, metadisp_unlink, loc, xflag, xdata);
+ STACK_WIND_COOKIE(frame, metadisp_unlink_lookup_cbk, stub,
+ METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->lookup, loc, xdata);
+ return 0;
+ }
+
+ if (build_backend_loc(loc->gfid, loc, &backend_loc)) {
+ goto unwind;
+ }
+
+ //
+ // ensure we get the link count on the unlink response, so we can
+ // account for hardlinks before winding to the backend.
+ // NOTE:
+ // multiple xlators use GF_REQUEST_LINK_COUNT_XDATA. confirmation
+ // is needed to ensure that multiple requests will work in the same
+ // xlator stack.
+ //
+ if (!xdata) {
+ xdata = dict_new();
+ }
+ dict_set_int32(xdata, GF_REQUEST_LINK_COUNT_XDATA, 1);
+
+ METADISP_TRACE("winding frontend unlink to path %s", loc->path);
+ stub = fop_unlink_stub(frame, metadisp_unlink_resume, &backend_loc, xflag,
+ xdata);
+
+ STACK_WIND_COOKIE(frame, metadisp_unlink_cbk, stub, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ return 0;
+unwind:
+ STACK_UNWIND_STRICT(unlink, frame, -1, EINVAL, NULL, NULL, NULL);
+ return 0;
+}
diff --git a/xlators/features/metadisp/src/metadisp.c b/xlators/features/metadisp/src/metadisp.c
new file mode 100644
index 00000000000..3c8f150cebc
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp.c
@@ -0,0 +1,46 @@
+#include <glusterfs/call-stub.h>
+
+#include "metadisp.h"
+#include "metadisp-fops.h"
+
+int32_t
+init(xlator_t *this)
+{
+ if (!this->children) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "not configured with children. exiting");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log(this->name, GF_LOG_WARNING, "dangling volume. check volfile ");
+ }
+
+ return 0;
+}
+
+void
+fini(xlator_t *this)
+{
+ return;
+}
+
+/* defined in fops.c */
+struct xlator_fops fops;
+
+struct xlator_cbks cbks = {};
+
+struct volume_options options[] = {
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .op_version = {1},
+ .identifier = "metadisp",
+ .category = GF_EXPERIMENTAL,
+};
diff --git a/xlators/features/metadisp/src/metadisp.h b/xlators/features/metadisp/src/metadisp.h
new file mode 100644
index 00000000000..c8fd7a13c04
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp.h
@@ -0,0 +1,45 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef GF_METADISP_H_
+#define GF_METADISP_H_
+
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+
+#define METADATA_CHILD(_this) FIRST_CHILD(_this)
+#define DATA_CHILD(_this) SECOND_CHILD(_this)
+
+int32_t
+build_backend_loc(uuid_t gfid, loc_t *src_loc, loc_t *dst_loc);
+
+#define METADISP_TRACE(_args...) gf_log("metadisp", GF_LOG_INFO, _args)
+
+#define METADISP_FILTER_ROOT(_op, _args...) \
+ if (strcmp(loc->path, "/") == 0) { \
+ STACK_WIND(frame, default_##_op##_cbk, METADATA_CHILD(this), \
+ METADATA_CHILD(this)->fops->_op, _args); \
+ return 0; \
+ }
+
+#define METADISP_FILTER_ROOT_BY_GFID(_op, _gfid, _args...) \
+ if (__is_root_gfid(_gfid)) { \
+ STACK_WIND(frame, default_##_op##_cbk, METADATA_CHILD(this), \
+ METADATA_CHILD(this)->fops->_op, _args); \
+ return 0; \
+ }
+
+#define RESOLVE_GFID_REQ(_dict, _dest, _lbl) \
+ VALIDATE_OR_GOTO(dict_get_ptr(_dict, "gfid-req", (void **)&_dest) == 0, \
+ _lbl)
+
+#endif /* __TEMPLATE_H__ */
diff --git a/xlators/features/quota/src/quota.c b/xlators/features/quota/src/quota.c
index 73c008a2c00..18df9ae6d19 100644
--- a/xlators/features/quota/src/quota.c
+++ b/xlators/features/quota/src/quota.c
@@ -586,9 +586,6 @@ quota_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
quota_meta_t size = {
0,
};
- struct timeval tv = {
- 0,
- };
local = frame->local;
@@ -626,13 +623,12 @@ quota_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
* loop of validation and checking
* limit when timeout is zero.
*/
- gettimeofday(&tv, NULL);
LOCK(&ctx->lock);
{
ctx->size = size.size;
+ ctx->validate_time = gf_time();
ctx->file_count = size.file_count;
ctx->dir_count = size.dir_count;
- memcpy(&ctx->tv, &tv, sizeof(struct timeval));
}
UNLOCK(&ctx->lock);
@@ -644,27 +640,10 @@ unwind:
return 0;
}
-static uint64_t
-quota_time_elapsed(struct timeval *now, struct timeval *then)
-{
- return (now->tv_sec - then->tv_sec);
-}
-
-int32_t
-quota_timeout(struct timeval *tv, int32_t timeout)
+static inline gf_boolean_t
+quota_timeout(time_t t, uint32_t timeout)
{
- struct timeval now = {
- 0,
- };
- int32_t timed_out = 0;
-
- gettimeofday(&now, NULL);
-
- if (quota_time_elapsed(&now, tv) >= timeout) {
- timed_out = 1;
- }
-
- return timed_out;
+ return (gf_time() - t) >= timeout;
}
/* Return: 1 if new entry added
@@ -1128,7 +1107,7 @@ quota_check_object_limit(call_frame_t *frame, quota_inode_ctx_t *ctx,
timeout = priv->hard_timeout;
}
- if (!just_validated && quota_timeout(&ctx->tv, timeout)) {
+ if (!just_validated && quota_timeout(ctx->validate_time, timeout)) {
need_validate = 1;
} else if ((object_aggr_count) > ctx->object_hard_lim) {
hard_limit_exceeded = 1;
@@ -1195,7 +1174,7 @@ quota_check_size_limit(call_frame_t *frame, quota_inode_ctx_t *ctx,
timeout = priv->hard_timeout;
}
- if (!just_validated && quota_timeout(&ctx->tv, timeout)) {
+ if (!just_validated && quota_timeout(ctx->validate_time, timeout)) {
need_validate = 1;
} else if (wouldbe_size >= ctx->hard_lim) {
hard_limit_exceeded = 1;
@@ -4314,9 +4293,6 @@ quota_statfs_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
quota_meta_t size = {
0,
};
- struct timeval tv = {
- 0,
- };
local = frame->local;
@@ -4348,13 +4324,12 @@ quota_statfs_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
op_errno = EINVAL;
}
- gettimeofday(&tv, NULL);
LOCK(&ctx->lock);
{
ctx->size = size.size;
+ ctx->validate_time = gf_time();
ctx->file_count = size.file_count;
ctx->dir_count = size.dir_count;
- memcpy(&ctx->tv, &tv, sizeof(struct timeval));
}
UNLOCK(&ctx->lock);
@@ -4873,7 +4848,7 @@ off:
void
quota_log_helper(char **usage_str, int64_t cur_size, inode_t *inode,
- char **path, struct timeval *cur_time)
+ char **path, time_t *cur_time)
{
xlator_t *this = THIS;
@@ -4892,7 +4867,7 @@ quota_log_helper(char **usage_str, int64_t cur_size, inode_t *inode,
if (!(*path))
*path = uuid_utoa(inode->gfid);
- gettimeofday(cur_time, NULL);
+ *cur_time = gf_time();
}
/* Logs if
@@ -4903,9 +4878,7 @@ void
quota_log_usage(xlator_t *this, quota_inode_ctx_t *ctx, inode_t *inode,
int64_t delta)
{
- struct timeval cur_time = {
- 0,
- };
+ time_t cur_time = 0;
char *usage_str = NULL;
char *path = NULL;
int64_t cur_size = 0;
@@ -4931,12 +4904,12 @@ quota_log_usage(xlator_t *this, quota_inode_ctx_t *ctx, inode_t *inode,
"path=%s",
usage_str, priv->volume_uuid, path);
- ctx->prev_log = cur_time;
+ ctx->prev_log_time = cur_time;
}
/* Usage is above soft limit */
else if (cur_size > ctx->soft_lim &&
- quota_timeout(&ctx->prev_log, priv->log_timeout)) {
+ quota_timeout(ctx->prev_log_time, priv->log_timeout)) {
quota_log_helper(&usage_str, cur_size, inode, &path, &cur_time);
gf_msg(this->name, GF_LOG_ALERT, 0, Q_MSG_CROSSED_SOFT_LIMIT,
@@ -4947,7 +4920,7 @@ quota_log_usage(xlator_t *this, quota_inode_ctx_t *ctx, inode_t *inode,
"path=%s",
usage_str, priv->volume_uuid, path);
- ctx->prev_log = cur_time;
+ ctx->prev_log_time = cur_time;
}
if (path)
@@ -5184,9 +5157,9 @@ quota_priv_dump(xlator_t *this)
if (ret)
goto out;
else {
- gf_proc_dump_write("soft-timeout", "%d", priv->soft_timeout);
- gf_proc_dump_write("hard-timeout", "%d", priv->hard_timeout);
- gf_proc_dump_write("alert-time", "%d", priv->log_timeout);
+ gf_proc_dump_write("soft-timeout", "%u", priv->soft_timeout);
+ gf_proc_dump_write("hard-timeout", "%u", priv->hard_timeout);
+ gf_proc_dump_write("alert-time", "%u", priv->log_timeout);
gf_proc_dump_write("quota-on", "%d", priv->is_quota_on);
gf_proc_dump_write("statfs", "%d", priv->consider_statfs);
gf_proc_dump_write("volume-uuid", "%s", priv->volume_uuid);
diff --git a/xlators/features/quota/src/quota.h b/xlators/features/quota/src/quota.h
index 8a3dc7a77f5..0395d78c9ef 100644
--- a/xlators/features/quota/src/quota.h
+++ b/xlators/features/quota/src/quota.h
@@ -153,8 +153,8 @@ struct quota_inode_ctx {
int64_t object_soft_lim;
struct iatt buf;
struct list_head parents;
- struct timeval tv;
- struct timeval prev_log;
+ time_t validate_time;
+ time_t prev_log_time;
gf_boolean_t ancestry_built;
gf_lock_t lock;
};
@@ -199,6 +199,7 @@ struct quota_local {
typedef struct quota_local quota_local_t;
struct quota_priv {
+ /* FIXME: consider time_t for timeouts. */
uint32_t soft_timeout;
uint32_t hard_timeout;
uint32_t log_timeout;
diff --git a/xlators/features/read-only/src/worm-helper.c b/xlators/features/read-only/src/worm-helper.c
index 25fbd4aa748..df45f2a940b 100644
--- a/xlators/features/read-only/src/worm-helper.c
+++ b/xlators/features/read-only/src/worm-helper.c
@@ -41,7 +41,7 @@ worm_init_state(xlator_t *this, gf_boolean_t fop_with_fd, void *file_ptr)
GF_VALIDATE_OR_GOTO("worm", this, out);
GF_VALIDATE_OR_GOTO(this->name, file_ptr, out);
- start_time = time(NULL);
+ start_time = gf_time();
dict = dict_new();
if (!dict) {
gf_log(this->name, GF_LOG_ERROR, "Error creating the dict");
@@ -94,7 +94,7 @@ worm_set_state(xlator_t *this, gf_boolean_t fop_with_fd, void *file_ptr,
if (ret)
goto out;
stbuf->ia_mtime = stpre.ia_mtime;
- stbuf->ia_atime = time(NULL) + retention_state->ret_period;
+ stbuf->ia_atime = gf_time() + retention_state->ret_period;
if (fop_with_fd)
ret = syncop_fsetattr(this, (fd_t *)file_ptr, stbuf, GF_SET_ATTR_ATIME,
@@ -286,6 +286,7 @@ gf_worm_state_transition(xlator_t *this, gf_boolean_t fop_with_fd,
{
int op_errno = EROFS;
int ret = -1;
+ time_t now = 0;
uint64_t com_period = 0;
uint64_t start_time = 0;
dict_t *dict = NULL;
@@ -337,8 +338,10 @@ gf_worm_state_transition(xlator_t *this, gf_boolean_t fop_with_fd,
goto out;
}
- if (ret == -1 && (time(NULL) - start_time) >= com_period) {
- if ((time(NULL) - stbuf.ia_mtime) >= com_period) {
+ now = gf_time();
+
+ if (ret == -1 && (now - start_time) >= com_period) {
+ if ((now - stbuf.ia_mtime) >= com_period) {
ret = worm_set_state(this, fop_with_fd, file_ptr, &reten_state,
&stbuf);
if (ret) {
@@ -352,10 +355,10 @@ gf_worm_state_transition(xlator_t *this, gf_boolean_t fop_with_fd,
op_errno = 0;
goto out;
}
- } else if (ret == -1 && (time(NULL) - start_time) < com_period) {
+ } else if (ret == -1 && (now - start_time) < com_period) {
op_errno = 0;
goto out;
- } else if (reten_state.retain && ((time(NULL) >= stbuf.ia_atime))) {
+ } else if (reten_state.retain && ((now >= stbuf.ia_atime))) {
gf_worm_state_lookup(this, fop_with_fd, file_ptr, &reten_state, &stbuf);
}
if (reten_state.worm && !reten_state.retain && priv->worm_files_deletable &&
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
index ff928d0df61..e5f93063943 100644
--- a/xlators/features/shard/src/shard.c
+++ b/xlators/features/shard/src/shard.c
@@ -1004,6 +1004,10 @@ shard_initiate_evicted_inode_fsync(xlator_t *this, inode_t *inode)
}
int
+shard_common_inode_write_post_lookup_shards_handler(call_frame_t *frame,
+ xlator_t *this);
+
+int
shard_common_resolve_shards(call_frame_t *frame, xlator_t *this,
shard_post_resolve_fop_handler_t post_res_handler)
{
@@ -1020,21 +1024,47 @@ shard_common_resolve_shards(call_frame_t *frame, xlator_t *this,
inode_t *fsync_inode = NULL;
shard_priv_t *priv = NULL;
shard_local_t *local = NULL;
+ uint64_t resolve_count = 0;
priv = this->private;
local = frame->local;
local->call_count = 0;
shard_idx_iter = local->first_block;
res_inode = local->resolver_base_inode;
+
+ if ((local->op_ret < 0) || (local->resolve_not))
+ goto out;
+
+ /* If this prealloc FOP is for fresh file creation, then the size of the
+ * file will be 0. Then there will be no shards associated with this file.
+ * So we can skip the lookup process for the shards which do not exists
+ * and directly issue mknod to crete shards.
+ *
+ * In case the prealloc fop is to extend the preallocated file to bigger
+ * size then just lookup and populate inodes of existing shards and
+ * update the create count
+ */
+ if (local->fop == GF_FOP_FALLOCATE) {
+ if (!local->prebuf.ia_size) {
+ local->inode_list[0] = inode_ref(res_inode);
+ local->create_count = local->last_block;
+ shard_common_inode_write_post_lookup_shards_handler(frame, this);
+ return 0;
+ }
+ if (local->prebuf.ia_size < local->total_size)
+ local->create_count = local->last_block -
+ ((local->prebuf.ia_size - 1) /
+ local->block_size);
+ }
+
+ resolve_count = local->last_block - local->create_count;
+
if (res_inode)
gf_uuid_copy(gfid, res_inode->gfid);
else
gf_uuid_copy(gfid, local->base_gfid);
- if ((local->op_ret < 0) || (local->resolve_not))
- goto out;
-
- while (shard_idx_iter <= local->last_block) {
+ while (shard_idx_iter <= resolve_count) {
i++;
if (shard_idx_iter == 0) {
local->inode_list[i] = inode_ref(res_inode);
@@ -2443,7 +2473,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
int count = 0;
int call_count = 0;
int32_t shard_idx_iter = 0;
- int last_block = 0;
+ int lookup_count = 0;
char path[PATH_MAX] = {
0,
};
@@ -2463,7 +2493,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
local = frame->local;
count = call_count = local->call_count;
shard_idx_iter = local->first_block;
- last_block = local->last_block;
+ lookup_count = local->last_block - local->create_count;
local->pls_fop_handler = handler;
if (local->lookup_shards_barriered)
local->barrier.waitfor = local->call_count;
@@ -2473,7 +2503,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
else
gf_uuid_copy(gfid, local->base_gfid);
- while (shard_idx_iter <= last_block) {
+ while (shard_idx_iter <= lookup_count) {
if (local->inode_list[i]) {
i++;
shard_idx_iter++;
@@ -5656,6 +5686,8 @@ shard_common_inode_write_post_resolve_handler(call_frame_t *frame,
shard_common_lookup_shards(
frame, this, local->resolver_base_inode,
shard_common_inode_write_post_lookup_shards_handler);
+ } else if (local->create_count) {
+ shard_common_inode_write_post_lookup_shards_handler(frame, this);
} else {
shard_common_inode_write_do(frame, this);
}
diff --git a/xlators/features/trash/src/trash.c b/xlators/features/trash/src/trash.c
index a28aa1e09e0..7d09cba3e9c 100644
--- a/xlators/features/trash/src/trash.c
+++ b/xlators/features/trash/src/trash.c
@@ -216,7 +216,7 @@ append_time_stamp(char *name, size_t name_size)
0,
};
- gf_time_fmt(timestr, sizeof(timestr), time(NULL), gf_timefmt_F_HMS);
+ gf_time_fmt(timestr, sizeof(timestr), gf_time(), gf_timefmt_F_HMS);
/* removing white spaces in timestamp */
for (i = 0; i < strlen(timestr); i++) {
diff --git a/xlators/features/upcall/src/upcall-internal.c b/xlators/features/upcall/src/upcall-internal.c
index 978825f6b56..c641bd6f432 100644
--- a/xlators/features/upcall/src/upcall-internal.c
+++ b/xlators/features/upcall/src/upcall-internal.c
@@ -316,7 +316,7 @@ upcall_reaper_thread(void *data)
priv = this->private;
GF_ASSERT(priv);
- time_now = time(NULL);
+ time_now = gf_time();
while (!priv->fini) {
list_for_each_entry_safe(inode_ctx, tmp, &priv->inode_ctx_list,
inode_ctx_list)
@@ -344,7 +344,7 @@ upcall_reaper_thread(void *data)
/* don't do a very busy loop */
timeout = get_cache_invalidation_timeout(this);
sleep(timeout / 2);
- time_now = time(NULL);
+ time_now = gf_time();
}
return NULL;
@@ -533,7 +533,7 @@ upcall_cache_invalidate(call_frame_t *frame, xlator_t *this, client_t *client,
goto out;
}
- time_now = time(NULL);
+ time_now = gf_time();
pthread_mutex_lock(&up_inode_ctx->client_list_lock);
{
list_for_each_entry_safe(up_client_entry, tmp,
@@ -670,13 +670,13 @@ upcall_cache_forget(xlator_t *this, inode_t *inode,
return;
}
- time_now = time(NULL);
+ time_now = gf_time();
pthread_mutex_lock(&up_inode_ctx->client_list_lock);
{
list_for_each_entry_safe(up_client_entry, tmp,
&up_inode_ctx->client_list, client_list)
{
- /* Set the access time to time(NULL)
+ /* Set the access time to gf_time()
* to send notify */
up_client_entry->access_time = time_now;
diff --git a/xlators/mgmt/glusterd/src/Makefile.am b/xlators/mgmt/glusterd/src/Makefile.am
index eaa61c435e5..685beb42d27 100644
--- a/xlators/mgmt/glusterd/src/Makefile.am
+++ b/xlators/mgmt/glusterd/src/Makefile.am
@@ -25,13 +25,14 @@ glusterd_la_SOURCES = glusterd.c glusterd-handler.c glusterd-sm.c \
glusterd-conn-helper.c glusterd-snapd-svc.c glusterd-snapd-svc-helper.c \
glusterd-bitd-svc.c glusterd-scrub-svc.c glusterd-server-quorum.c \
glusterd-reset-brick.c glusterd-shd-svc.c glusterd-shd-svc-helper.c \
- glusterd-gfproxyd-svc.c glusterd-gfproxyd-svc-helper.c glusterd-ganesha.c
+ glusterd-gfproxyd-svc.c glusterd-gfproxyd-svc-helper.c glusterd-ganesha.c \
+ $(CONTRIBDIR)/mount/mntent.c
glusterd_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
$(top_builddir)/libglusterd/src/libglusterd.la \
$(top_builddir)/rpc/xdr/src/libgfxdr.la \
$(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
- $(XML_LIBS) -lcrypto $(URCU_LIBS) $(URCU_CDS_LIBS) $(LIB_DL)
+ $(XML_LIBS) -lcrypto $(URCU_LIBS) $(URCU_CDS_LIBS) $(LIB_DL) $(GF_XLATOR_MGNT_LIBADD)
noinst_HEADERS = glusterd.h glusterd-utils.h glusterd-op-sm.h \
glusterd-sm.h glusterd-store.h glusterd-mem-types.h \
@@ -46,7 +47,8 @@ noinst_HEADERS = glusterd.h glusterd-utils.h glusterd-op-sm.h \
glusterd-scrub-svc.h glusterd-server-quorum.h glusterd-errno.h \
glusterd-shd-svc.h glusterd-shd-svc-helper.h \
glusterd-gfproxyd-svc.h glusterd-gfproxyd-svc-helper.h \
- $(CONTRIBDIR)/userspace-rcu/rculist-extra.h
+ $(CONTRIBDIR)/userspace-rcu/rculist-extra.h \
+ $(CONTRIBDIR)/mount/mntent_compat.h
AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
-I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
index 6d1a1e98848..e56cd0e6c74 100644
--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
@@ -1359,14 +1359,14 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
"Unable to get volume name");
goto out;
}
ret = glusterd_volinfo_find(volname, &volinfo);
if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND,
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND,
"Unable to find volume: %s", volname);
goto out;
}
@@ -1378,13 +1378,7 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
ret = dict_get_int32n(dict, "replica-count", SLEN("replica-count"),
&replica_count);
if (ret) {
- gf_msg_debug(THIS->name, 0, "Unable to get replica count");
- }
-
- ret = dict_get_int32n(dict, "arbiter-count", SLEN("arbiter-count"),
- &arbiter_count);
- if (ret) {
- gf_msg_debug(THIS->name, 0, "No arbiter count present in the dict");
+ gf_msg_debug(this->name, 0, "Unable to get replica count");
}
if (replica_count > 0) {
@@ -1400,18 +1394,18 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
glusterd_add_peers_to_auth_list(volname);
- if (glusterd_is_volume_replicate(volinfo)) {
+ if (replica_count && glusterd_is_volume_replicate(volinfo)) {
/* Do not allow add-brick for stopped volumes when replica-count
* is being increased.
*/
- if (conf->op_version >= GD_OP_VERSION_3_7_10 && replica_count &&
- GLUSTERD_STATUS_STOPPED == volinfo->status) {
+ if (GLUSTERD_STATUS_STOPPED == volinfo->status &&
+ conf->op_version >= GD_OP_VERSION_3_7_10) {
ret = -1;
snprintf(msg, sizeof(msg),
" Volume must not be in"
" stopped state when replica-count needs to "
" be increased.");
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s",
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s",
msg);
*op_errstr = gf_strdup(msg);
goto out;
@@ -1419,25 +1413,31 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
/* op-version check for replica 2 to arbiter conversion. If we
* don't have this check, an older peer added as arbiter brick
* will not have the arbiter xlator in its volfile. */
- if ((conf->op_version < GD_OP_VERSION_3_8_0) && (arbiter_count == 1) &&
- (replica_count == 3)) {
- ret = -1;
- snprintf(msg, sizeof(msg),
- "Cluster op-version must "
- "be >= 30800 to add arbiter brick to a "
- "replica 2 volume.");
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s",
- msg);
- *op_errstr = gf_strdup(msg);
- goto out;
+ if ((replica_count == 3) && (conf->op_version < GD_OP_VERSION_3_8_0)) {
+ ret = dict_get_int32n(dict, "arbiter-count", SLEN("arbiter-count"),
+ &arbiter_count);
+ if (ret) {
+ gf_msg_debug(this->name, 0,
+ "No arbiter count present in the dict");
+ } else if (arbiter_count == 1) {
+ ret = -1;
+ snprintf(msg, sizeof(msg),
+ "Cluster op-version must "
+ "be >= 30800 to add arbiter brick to a "
+ "replica 2 volume.");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s",
+ msg);
+ *op_errstr = gf_strdup(msg);
+ goto out;
+ }
}
/* Do not allow increasing replica count for arbiter volumes. */
- if (replica_count && volinfo->arbiter_count) {
+ if (volinfo->arbiter_count) {
ret = -1;
snprintf(msg, sizeof(msg),
"Increasing replica count "
"for arbiter volumes is not supported.");
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s",
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s",
msg);
*op_errstr = gf_strdup(msg);
goto out;
@@ -1451,7 +1451,7 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
* doing this check at the originator node is sufficient.
*/
- if (is_origin_glusterd(dict) && !is_force) {
+ if (!is_force && is_origin_glusterd(dict)) {
ret = 0;
if (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) {
gf_msg_debug(this->name, 0,
@@ -1459,15 +1459,18 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
"found. Checking brick order.");
if (replica_count)
ret = glusterd_check_brick_order(dict, msg, volinfo->type,
+ &volname, &bricks, &count,
replica_count);
else
ret = glusterd_check_brick_order(dict, msg, volinfo->type,
+ &volname, &bricks, &count,
volinfo->replica_count);
} else if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE) {
gf_msg_debug(this->name, 0,
"Disperse cluster type"
" found. Checking brick order.");
- ret = glusterd_check_brick_order(dict, msg, volinfo->type,
+ ret = glusterd_check_brick_order(dict, msg, volinfo->type, &volname,
+ &bricks, &count,
volinfo->disperse_count);
}
if (ret) {
@@ -1496,7 +1499,7 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
if (len < 0) {
strcpy(msg, "<error>");
}
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s",
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s",
msg);
*op_errstr = gf_strdup(msg);
goto out;
@@ -1528,7 +1531,7 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
"Volume name %s rebalance is in "
"progress. Please retry after completion",
volname);
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_OIP_RETRY_LATER, "%s", msg);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OIP_RETRY_LATER, "%s", msg);
*op_errstr = gf_strdup(msg);
ret = -1;
goto out;
@@ -1546,18 +1549,22 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
msg[0] = '\0';
}
- ret = dict_get_int32n(dict, "count", SLEN("count"), &count);
- if (ret) {
- gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
- "Unable to get count");
- goto out;
+ if (!count) {
+ ret = dict_get_int32n(dict, "count", SLEN("count"), &count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to get count");
+ goto out;
+ }
}
- ret = dict_get_strn(dict, "bricks", SLEN("bricks"), &bricks);
- if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
- "Unable to get bricks");
- goto out;
+ if (!bricks) {
+ ret = dict_get_strn(dict, "bricks", SLEN("bricks"), &bricks);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to get bricks");
+ goto out;
+ }
}
if (bricks) {
@@ -1576,7 +1583,7 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
"brick path %s is "
"too long",
brick);
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_BRKPATH_TOO_LONG, "%s",
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRKPATH_TOO_LONG, "%s",
msg);
*op_errstr = gf_strdup(msg);
@@ -1587,7 +1594,7 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
ret = glusterd_brickinfo_new_from_brick(brick, &brickinfo, _gf_true,
NULL);
if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_NOT_FOUND,
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_NOT_FOUND,
"Add-brick: Unable"
" to get brickinfo");
goto out;
@@ -1657,7 +1664,7 @@ out:
GF_FREE(str_ret);
GF_FREE(all_bricks);
- gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
return ret;
}
@@ -2227,6 +2234,42 @@ out:
}
int
+glusterd_post_commit_add_brick(dict_t *dict, char **op_errstr)
+{
+ int ret = 0;
+ char *volname = NULL;
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to get volume name");
+ goto out;
+ }
+ ret = glusterd_replace_old_auth_allow_list(volname);
+out:
+ return ret;
+}
+
+int
+glusterd_post_commit_replace_brick(dict_t *dict, char **op_errstr)
+{
+ int ret = 0;
+ char *volname = NULL;
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to get volume name");
+ goto out;
+ }
+ ret = glusterd_replace_old_auth_allow_list(volname);
+out:
+ return ret;
+}
+
+int
glusterd_set_rebalance_id_for_remove_brick(dict_t *req_dict, dict_t *rsp_dict)
{
int ret = -1;
diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c
index 2d60daf180a..f08bd6cebee 100644
--- a/xlators/mgmt/glusterd/src/glusterd-ganesha.c
+++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c
@@ -421,6 +421,35 @@ check_host_list(void)
}
int
+gd_ganesha_send_dbus(char *volname, char *value)
+{
+ runner_t runner = {
+ 0,
+ };
+ int ret = -1;
+ runinit(&runner);
+
+ GF_VALIDATE_OR_GOTO("glusterd-ganesha", volname, out);
+ GF_VALIDATE_OR_GOTO("glusterd-ganesha", value, out);
+
+ ret = 0;
+ if (check_host_list()) {
+ /* Check whether ganesha is running on this node */
+ if (manage_service("status")) {
+ gf_msg("glusterd-ganesha", GF_LOG_WARNING, 0,
+ GD_MSG_GANESHA_NOT_RUNNING,
+ "Export failed, NFS-Ganesha is not running");
+ } else {
+ runner_add_args(&runner, GANESHA_PREFIX "/dbus-send.sh", CONFDIR,
+ value, volname, NULL);
+ ret = runner_run(&runner);
+ }
+ }
+out:
+ return ret;
+}
+
+int
manage_export_config(char *volname, char *value, char **op_errstr)
{
runner_t runner = {
@@ -447,9 +476,6 @@ int
ganesha_manage_export(dict_t *dict, char *value,
gf_boolean_t update_cache_invalidation, char **op_errstr)
{
- runner_t runner = {
- 0,
- };
int ret = -1;
glusterd_volinfo_t *volinfo = NULL;
dict_t *vol_opts = NULL;
@@ -458,7 +484,6 @@ ganesha_manage_export(dict_t *dict, char *value,
glusterd_conf_t *priv = NULL;
gf_boolean_t option = _gf_false;
- runinit(&runner);
this = THIS;
GF_ASSERT(this);
priv = this->private;
@@ -538,26 +563,13 @@ ganesha_manage_export(dict_t *dict, char *value,
goto out;
}
}
-
- if (check_host_list()) {
- /* Check whether ganesha is running on this node */
- if (manage_service("status")) {
- gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_GANESHA_NOT_RUNNING,
- "Export failed, NFS-Ganesha is not running");
- } else {
- runner_add_args(&runner, GANESHA_PREFIX "/dbus-send.sh", CONFDIR,
- value, volname, NULL);
- ret = runner_run(&runner);
- if (ret) {
- gf_asprintf(op_errstr,
- "Dynamic export"
- " addition/deletion failed."
- " Please see log file for details");
- goto out;
- }
- }
+ ret = gd_ganesha_send_dbus(volname, value);
+ if (ret) {
+ gf_asprintf(op_errstr,
+ "Dynamic export addition/deletion failed."
+ " Please see log file for details");
+ goto out;
}
-
if (update_cache_invalidation) {
vol_opts = volinfo->dict;
ret = dict_set_dynstr_with_alloc(vol_opts,
diff --git a/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c
index b01fd4da24b..a0bfea41f0f 100644
--- a/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c
+++ b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c
@@ -310,7 +310,7 @@ glusterd_gfproxydsvc_start(glusterd_svc_t *svc, int flags)
}
runinit(&runner);
- if (this->ctx->cmd_args.valgrind) {
+ if (this->ctx->cmd_args.vgtool != _gf_none) {
len = snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-%s",
svc->proc.logdir, svc->proc.logfile);
if ((len < 0) || (len >= PATH_MAX)) {
@@ -318,8 +318,13 @@ glusterd_gfproxydsvc_start(glusterd_svc_t *svc, int flags)
goto out;
}
- runner_add_args(&runner, "valgrind", "--leak-check=full",
- "--trace-children=yes", "--track-origins=yes", NULL);
+ if (this->ctx->cmd_args.vgtool == _gf_memcheck)
+ runner_add_args(&runner, "valgrind", "--leak-check=full",
+ "--trace-children=yes", "--track-origins=yes",
+ NULL);
+ else
+ runner_add_args(&runner, "valgrind", "--tool=drd", NULL);
+
runner_argprintf(&runner, "--log-file=%s", valgrind_logfile);
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
index 7d488ffd87a..1b21c40596d 100644
--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
+++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
@@ -5593,7 +5593,7 @@ glusterd_get_state(rpcsvc_request_t *req, dict_t *dict)
ret = dict_get_strn(dict, "filename", SLEN("filename"), &tmp_str);
if (ret) {
- now = time(NULL);
+ now = gf_time();
strftime(timestamp, sizeof(timestamp), "%Y%m%d_%H%M%S",
localtime(&now));
gf_asprintf(&filename, "%s_%s", "glusterd_state", timestamp);
diff --git a/xlators/mgmt/glusterd/src/glusterd-hooks.c b/xlators/mgmt/glusterd/src/glusterd-hooks.c
index d18eb6b2f5e..61c0f1c946f 100644
--- a/xlators/mgmt/glusterd/src/glusterd-hooks.c
+++ b/xlators/mgmt/glusterd/src/glusterd-hooks.c
@@ -206,11 +206,13 @@ glusterd_hooks_set_volume_args(dict_t *dict, runner_t *runner)
int i = 0;
int count = 0;
int ret = -1;
+ int flag = 0;
char query[1024] = {
0,
};
char *key = NULL;
char *value = NULL;
+ char *inet_family = NULL;
xlator_t *this = NULL;
this = THIS;
GF_ASSERT(this);
@@ -243,9 +245,23 @@ glusterd_hooks_set_volume_args(dict_t *dict, runner_t *runner)
continue;
runner_argprintf(runner, "%s=%s", key, value);
+ if ((strncmp(key, "cluster.enable-shared-storage",
+ SLEN("cluster.enable-shared-storage")) == 0 ||
+ strncmp(key, "enable-shared-storage",
+ SLEN("enable-shared-storage")) == 0) &&
+ strncmp(value, "enable", SLEN("enable")) == 0)
+ flag = 1;
}
glusterd_hooks_add_custom_args(dict, runner);
+ if (flag == 1) {
+ ret = dict_get_str_sizen(this->options, "transport.address-family",
+ &inet_family);
+ if (!ret) {
+ runner_argprintf(runner, "transport.address-family=%s",
+ inet_family);
+ }
+ }
ret = 0;
out:
diff --git a/xlators/mgmt/glusterd/src/glusterd-log-ops.c b/xlators/mgmt/glusterd/src/glusterd-log-ops.c
index a800d9543cf..34abf35cb00 100644
--- a/xlators/mgmt/glusterd/src/glusterd-log-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-log-ops.c
@@ -76,7 +76,7 @@ __glusterd_handle_log_rotate(rpcsvc_request_t *req)
"for volume %s",
volname);
- ret = dict_set_uint64(dict, "rotate-key", (uint64_t)time(NULL));
+ ret = dict_set_uint64(dict, "rotate-key", (uint64_t)gf_time());
if (ret)
goto out;
diff --git a/xlators/mgmt/glusterd/src/glusterd-mem-types.h b/xlators/mgmt/glusterd/src/glusterd-mem-types.h
index 17052cee263..d7257e1a7b5 100644
--- a/xlators/mgmt/glusterd/src/glusterd-mem-types.h
+++ b/xlators/mgmt/glusterd/src/glusterd-mem-types.h
@@ -27,6 +27,7 @@ typedef enum gf_gld_mem_types_ {
gf_gld_mt_mop_stage_req_t,
gf_gld_mt_probe_ctx_t,
gf_gld_mt_glusterd_volinfo_t,
+ gf_gld_mt_volinfo_dict_data_t,
gf_gld_mt_glusterd_brickinfo_t,
gf_gld_mt_peer_hostname_t,
gf_gld_mt_defrag_info,
diff --git a/xlators/mgmt/glusterd/src/glusterd-messages.h b/xlators/mgmt/glusterd/src/glusterd-messages.h
index c0891797fdf..3a1e600fb03 100644
--- a/xlators/mgmt/glusterd/src/glusterd-messages.h
+++ b/xlators/mgmt/glusterd/src/glusterd-messages.h
@@ -319,7 +319,8 @@ GLFS_MSGID(
GD_MSG_SNAPSHOT_NOT_THIN_PROVISIONED, GD_MSG_VOL_STOP_ARGS_GET_FAILED,
GD_MSG_LSTAT_FAIL, GD_MSG_VOLUME_NOT_IMPORTED,
GD_MSG_ADD_BRICK_MNT_INFO_FAIL, GD_MSG_GET_MNT_ENTRY_INFO_FAIL,
- GD_MSG_QUORUM_CLUSTER_COUNT_GET_FAIL);
+ GD_MSG_QUORUM_CLUSTER_COUNT_GET_FAIL, GD_MSG_POST_COMMIT_OP_FAIL,
+ GD_MSG_POST_COMMIT_FROM_UUID_REJCT, GD_MSG_POST_COMMIT_REQ_SEND_FAIL);
#define GD_MSG_INVALID_ENTRY_STR "Invalid data entry"
#define GD_MSG_INVALID_ARGUMENT_STR \
@@ -447,4 +448,4 @@ GLFS_MSGID(
"Failed to allocate memory or get serialized length of dict"
#define GD_MSG_GET_XATTR_FAIL_STR "Failed to get extended attribute"
-#endif /* !_GLUSTERD_MESSAGES_H_ */ \ No newline at end of file
+#endif /* !_GLUSTERD_MESSAGES_H_ */
diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c b/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c
index c170827eec0..1069688a89d 100644
--- a/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c
+++ b/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c
@@ -626,6 +626,136 @@ out:
}
static int
+glusterd_mgmt_v3_post_commit_send_resp(rpcsvc_request_t *req, int32_t op,
+ int32_t status, char *op_errstr,
+ uint32_t op_errno, dict_t *rsp_dict)
+{
+ gd1_mgmt_v3_post_commit_rsp rsp = {
+ {0},
+ };
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+
+ rsp.op_ret = status;
+ glusterd_get_uuid(&rsp.uuid);
+ rsp.op = op;
+ rsp.op_errno = op_errno;
+ if (op_errstr)
+ rsp.op_errstr = op_errstr;
+ else
+ rsp.op_errstr = "";
+
+ ret = dict_allocate_and_serialize(rsp_dict, &rsp.dict.dict_val,
+ &rsp.dict.dict_len);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
+ goto out;
+ }
+
+ ret = glusterd_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_v3_post_commit_rsp);
+
+ GF_FREE(rsp.dict.dict_val);
+out:
+ gf_msg_debug(this->name, 0, "Responded to post commit, ret: %d", ret);
+ return ret;
+}
+
+static int
+glusterd_handle_post_commit_fn(rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gd1_mgmt_v3_post_commit_req op_req = {
+ {0},
+ };
+ xlator_t *this = NULL;
+ char *op_errstr = NULL;
+ dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
+ uint32_t op_errno = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+
+ ret = xdr_to_generic(req->msg[0], &op_req,
+ (xdrproc_t)xdr_gd1_mgmt_v3_post_commit_req);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL,
+ "Failed to decode post commit "
+ "request received from peer");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (glusterd_peerinfo_find_by_uuid(op_req.uuid) == NULL) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_PEER_NOT_FOUND,
+ "%s doesn't "
+ "belong to the cluster. Ignoring request.",
+ uuid_utoa(op_req.uuid));
+ ret = -1;
+ goto out;
+ }
+
+ dict = dict_new();
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL);
+ goto out;
+ }
+
+ ret = dict_unserialize(op_req.dict.dict_val, op_req.dict.dict_len, &dict);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ NULL);
+ goto out;
+ }
+
+ rsp_dict = dict_new();
+ if (!rsp_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL);
+ return -1;
+ }
+
+ ret = gd_mgmt_v3_post_commit_fn(op_req.op, dict, &op_errstr, &op_errno,
+ rsp_dict);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL,
+ "post commit failed on operation %s", gd_op_list[op_req.op]);
+ }
+
+ ret = glusterd_mgmt_v3_post_commit_send_resp(req, op_req.op, ret, op_errstr,
+ op_errno, rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_OP_RESP_FAIL,
+ "Failed to send post commit "
+ "response for operation %s",
+ gd_op_list[op_req.op]);
+ goto out;
+ }
+
+out:
+ if (op_errstr && (strcmp(op_errstr, "")))
+ GF_FREE(op_errstr);
+
+ free(op_req.dict.dict_val);
+
+ if (dict)
+ dict_unref(dict);
+
+ if (rsp_dict)
+ dict_unref(rsp_dict);
+
+ /* Return 0 from handler to avoid double deletion of req obj */
+ return 0;
+}
+
+static int
glusterd_mgmt_v3_post_validate_send_resp(rpcsvc_request_t *req, int32_t op,
int32_t status, char *op_errstr,
dict_t *rsp_dict)
@@ -963,6 +1093,12 @@ glusterd_handle_commit(rpcsvc_request_t *req)
}
static int
+glusterd_handle_post_commit(rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler(req, glusterd_handle_post_commit_fn);
+}
+
+static int
glusterd_handle_post_validate(rpcsvc_request_t *req)
{
return glusterd_big_locked_handler(req, glusterd_handle_post_validate_fn);
@@ -986,6 +1122,9 @@ static rpcsvc_actor_t gd_svc_mgmt_v3_actors[GLUSTERD_MGMT_V3_MAXVALUE] = {
GLUSTERD_MGMT_V3_BRICK_OP, DRC_NA, 0},
[GLUSTERD_MGMT_V3_COMMIT] = {"COMMIT", glusterd_handle_commit, NULL,
GLUSTERD_MGMT_V3_COMMIT, DRC_NA, 0},
+ [GLUSTERD_MGMT_V3_POST_COMMIT] = {"POST_COMMIT",
+ glusterd_handle_post_commit, NULL,
+ GLUSTERD_MGMT_V3_POST_COMMIT, DRC_NA, 0},
[GLUSTERD_MGMT_V3_POST_VALIDATE] = {"POST_VAL",
glusterd_handle_post_validate, NULL,
GLUSTERD_MGMT_V3_POST_VALIDATE, DRC_NA,
diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-mgmt.c
index b2128e10a04..bca7221062b 100644
--- a/xlators/mgmt/glusterd/src/glusterd-mgmt.c
+++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.c
@@ -86,6 +86,11 @@ gd_mgmt_v3_collate_errors(struct syncargs *args, int op_ret, int op_errno,
peer_str, err_string);
break;
}
+ case GLUSTERD_MGMT_V3_POST_COMMIT: {
+ snprintf(op_err, sizeof(op_err), "Post commit failed on %s. %s",
+ peer_str, err_string);
+ break;
+ }
case GLUSTERD_MGMT_V3_POST_VALIDATE: {
snprintf(op_err, sizeof(op_err),
"Post Validation failed on %s. %s", peer_str,
@@ -405,6 +410,47 @@ out:
}
int32_t
+gd_mgmt_v3_post_commit_fn(glusterd_op_t op, dict_t *dict, char **op_errstr,
+ uint32_t *op_errno, dict_t *rsp_dict)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(op_errstr);
+ GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
+ GF_ASSERT(rsp_dict);
+
+ switch (op) {
+ case GD_OP_ADD_BRICK:
+ ret = glusterd_post_commit_add_brick(dict, op_errstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL,
+ "Add-brick post commit failed.");
+ goto out;
+ }
+ break;
+ case GD_OP_REPLACE_BRICK:
+ ret = glusterd_post_commit_replace_brick(dict, op_errstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL,
+ "Replace-brick post commit failed.");
+ goto out;
+ }
+ break;
+ default:
+ break;
+ }
+
+ ret = 0;
+out:
+ gf_msg_debug(this->name, 0, "OP = %d. Returning %d", op, ret);
+ return ret;
+}
+
+int32_t
gd_mgmt_v3_post_validate_fn(glusterd_op_t op, int32_t op_ret, dict_t *dict,
char **op_errstr, dict_t *rsp_dict)
{
@@ -1720,6 +1766,274 @@ out:
}
int32_t
+gd_mgmt_v3_post_commit_cbk_fn(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ int32_t ret = -1;
+ struct syncargs *args = NULL;
+ gd1_mgmt_v3_post_commit_rsp rsp = {
+ {0},
+ };
+ call_frame_t *frame = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = -1;
+ dict_t *rsp_dict = NULL;
+ xlator_t *this = NULL;
+ uuid_t *peerid = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+ GF_ASSERT(myframe);
+
+ frame = myframe;
+ args = frame->local;
+ peerid = frame->cookie;
+ frame->local = NULL;
+ frame->cookie = NULL;
+
+ if (-1 == req->rpc_status) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ GF_VALIDATE_OR_GOTO_WITH_ERROR(this->name, iov, out, op_errno, EINVAL);
+
+ ret = xdr_to_generic(*iov, &rsp,
+ (xdrproc_t)xdr_gd1_mgmt_v3_post_commit_rsp);
+ if (ret < 0)
+ goto out;
+
+ if (rsp.dict.dict_len) {
+ /* Unserialize the dictionary */
+ rsp_dict = dict_new();
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &rsp_dict);
+ if (ret < 0) {
+ free(rsp.dict.dict_val);
+ goto out;
+ } else {
+ rsp_dict->extra_stdfree = rsp.dict.dict_val;
+ }
+ }
+
+ gf_uuid_copy(args->uuid, rsp.uuid);
+ pthread_mutex_lock(&args->lock_dict);
+ {
+ ret = glusterd_syncop_aggr_rsp_dict(rsp.op, args->dict, rsp_dict);
+ }
+ pthread_mutex_unlock(&args->lock_dict);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RESP_AGGR_FAIL, "%s",
+ "Failed to aggregate response from "
+ " node/brick");
+ if (!rsp.op_ret)
+ op_ret = ret;
+ else {
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+ }
+ } else {
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+ }
+
+out:
+ if (rsp_dict)
+ dict_unref(rsp_dict);
+
+ gd_mgmt_v3_collate_errors(args, op_ret, op_errno, rsp.op_errstr,
+ GLUSTERD_MGMT_V3_POST_COMMIT, *peerid, rsp.uuid);
+ GF_FREE(peerid);
+
+ if (rsp.op_errstr)
+ free(rsp.op_errstr);
+
+ /* req->rpc_status set to -1 means, STACK_DESTROY will be called from
+ * the caller function.
+ */
+ if (req->rpc_status != -1)
+ STACK_DESTROY(frame->root);
+ synctask_barrier_wake(args);
+ return 0;
+}
+
+int32_t
+gd_mgmt_v3_post_commit_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ return glusterd_big_locked_cbk(req, iov, count, myframe,
+ gd_mgmt_v3_post_commit_cbk_fn);
+}
+
+int
+gd_mgmt_v3_post_commit_req(glusterd_op_t op, dict_t *op_ctx,
+ glusterd_peerinfo_t *peerinfo, struct syncargs *args,
+ uuid_t my_uuid, uuid_t recv_uuid)
+{
+ int32_t ret = -1;
+ gd1_mgmt_v3_post_commit_req req = {
+ {0},
+ };
+ xlator_t *this = NULL;
+ uuid_t *peerid = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(op_ctx);
+ GF_ASSERT(peerinfo);
+ GF_ASSERT(args);
+
+ ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
+ goto out;
+ }
+
+ gf_uuid_copy(req.uuid, my_uuid);
+ req.op = op;
+
+ GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_ALLOC_AND_COPY_UUID_FAIL, NULL);
+ goto out;
+ }
+
+ ret = gd_syncop_submit_request(
+ peerinfo->rpc, &req, args, peerid, &gd_mgmt_v3_prog,
+ GLUSTERD_MGMT_V3_POST_COMMIT, gd_mgmt_v3_post_commit_cbk,
+ (xdrproc_t)xdr_gd1_mgmt_v3_post_commit_req);
+out:
+ GF_FREE(req.dict.dict_val);
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_mgmt_v3_post_commit(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
+ char **op_errstr, uint32_t *op_errno,
+ uint32_t txn_generation)
+{
+ int32_t ret = -1;
+ int32_t peer_cnt = 0;
+ dict_t *rsp_dict = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ struct syncargs args = {0};
+ uuid_t peer_uuid = {0};
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ GF_ASSERT(op_ctx);
+ GF_ASSERT(req_dict);
+ GF_ASSERT(op_errstr);
+ GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
+
+ rsp_dict = dict_new();
+ if (!rsp_dict) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL,
+ "Failed to create response dictionary");
+ goto out;
+ }
+
+ /* Post commit on local node */
+ ret = gd_mgmt_v3_post_commit_fn(op, req_dict, op_errstr, op_errno,
+ rsp_dict);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL,
+ "Post commit failed for "
+ "operation %s on local node",
+ gd_op_list[op]);
+
+ if (*op_errstr == NULL) {
+ ret = gf_asprintf(op_errstr,
+ "Post commit failed "
+ "on localhost. Please "
+ "check log file for details.");
+ if (ret == -1)
+ *op_errstr = NULL;
+
+ ret = -1;
+ }
+ goto out;
+ }
+
+ ret = glusterd_syncop_aggr_rsp_dict(op, op_ctx, rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RESP_AGGR_FAIL, "%s",
+ "Failed to aggregate response from "
+ " node/brick");
+ goto out;
+ }
+
+ dict_unref(rsp_dict);
+ rsp_dict = NULL;
+
+ /* Sending post commit req to other nodes in the cluster */
+ gd_syncargs_init(&args, op_ctx);
+ ret = synctask_barrier_init((&args));
+ if (ret)
+ goto out;
+ peer_cnt = 0;
+
+ RCU_READ_LOCK;
+ cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list)
+ {
+ /* Only send requests to peers who were available before the
+ * transaction started
+ */
+ if (peerinfo->generation > txn_generation)
+ continue;
+ if (!peerinfo->connected)
+ continue;
+
+ if (op != GD_OP_SYNC_VOLUME &&
+ peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED)
+ continue;
+
+ gd_mgmt_v3_post_commit_req(op, req_dict, peerinfo, &args, MY_UUID,
+ peer_uuid);
+ peer_cnt++;
+ }
+ RCU_READ_UNLOCK;
+
+ if (0 == peer_cnt) {
+ ret = 0;
+ goto out;
+ }
+
+ gd_synctask_barrier_wait((&args), peer_cnt);
+
+ if (args.op_ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL,
+ "Post commit failed on peers");
+
+ if (args.errstr)
+ *op_errstr = gf_strdup(args.errstr);
+ }
+
+ ret = args.op_ret;
+ *op_errno = args.op_errno;
+
+ gf_msg_debug(this->name, 0,
+ "Sent post commit req for %s to %d "
+ "peers. Returning %d",
+ gd_op_list[op], peer_cnt, ret);
+out:
+ glusterd_op_modify_op_ctx(op, op_ctx);
+ return ret;
+}
+
+int32_t
gd_mgmt_v3_post_validate_cbk_fn(struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
@@ -2408,6 +2722,15 @@ glusterd_mgmt_v3_initiate_all_phases(rpcsvc_request_t *req, glusterd_op_t op,
goto out;
}
+ /* POST COMMIT OP PHASE */
+ ret = glusterd_mgmt_v3_post_commit(op, dict, req_dict, &op_errstr,
+ &op_errno, txn_generation);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL,
+ "Post commit Op Failed");
+ goto out;
+ }
+
/* POST-COMMIT VALIDATE PHASE */
/* As of now, post_validate is not trying to cleanup any failed
commands. So as of now, I am sending 0 (op_ret as 0).
diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.h b/xlators/mgmt/glusterd/src/glusterd-mgmt.h
index 71f793d0397..27dd1849519 100644
--- a/xlators/mgmt/glusterd/src/glusterd-mgmt.h
+++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.h
@@ -28,6 +28,10 @@ gd_mgmt_v3_commit_fn(glusterd_op_t op, dict_t *dict, char **op_errstr,
uint32_t *op_errno, dict_t *rsp_dict);
int32_t
+gd_mgmt_v3_post_commit_fn(glusterd_op_t op, dict_t *dict, char **op_errstr,
+ uint32_t *op_errno, dict_t *rsp_dict);
+
+int32_t
gd_mgmt_v3_post_validate_fn(glusterd_op_t op, int32_t op_ret, dict_t *dict,
char **op_errstr, dict_t *rsp_dict);
@@ -84,4 +88,10 @@ glusterd_reset_brick_prevalidate(dict_t *dict, char **op_errstr,
dict_t *rsp_dict);
int
glusterd_op_reset_brick(dict_t *dict, dict_t *rsp_dict);
+
+int
+glusterd_post_commit_add_brick(dict_t *dict, char **op_errstr);
+
+int
+glusterd_post_commit_replace_brick(dict_t *dict, char **op_errstr);
#endif /* _GLUSTERD_MGMT_H_ */
diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
index 2afd0fe1b74..458bf168ede 100644
--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c
+++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
@@ -219,6 +219,9 @@ glusterd_handle_defrag_start(glusterd_volinfo_t *volinfo, char *op_errstr,
char valgrind_logfile[PATH_MAX] = {
0,
};
+ char msg[1024] = {
+ 0,
+ };
char *volfileserver = NULL;
char *localtime_logging = NULL;
@@ -270,12 +273,17 @@ glusterd_handle_defrag_start(glusterd_volinfo_t *volinfo, char *op_errstr,
"rebalance");
runinit(&runner);
- if (this->ctx->cmd_args.valgrind) {
+ if (this->ctx->cmd_args.vgtool != _gf_none) {
snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-%s-rebalance.log",
priv->logdir, volinfo->volname);
- runner_add_args(&runner, "valgrind", "--leak-check=full",
- "--trace-children=yes", "--track-origins=yes", NULL);
+ if (this->ctx->cmd_args.vgtool == _gf_memcheck)
+ runner_add_args(&runner, "valgrind", "--leak-check=full",
+ "--trace-children=yes", "--track-origins=yes",
+ NULL);
+ else
+ runner_add_args(&runner, "valgrind", "--tool=drd", NULL);
+
runner_argprintf(&runner, "--log-file=%s", valgrind_logfile);
}
@@ -316,6 +324,10 @@ glusterd_handle_defrag_start(glusterd_volinfo_t *volinfo, char *op_errstr,
runner_add_arg(&runner, "--localtime-logging");
}
+ snprintf(msg, sizeof(msg), "Starting the rebalance service for volume %s",
+ volinfo->volname);
+ runner_log(&runner, this->name, GF_LOG_DEBUG, msg);
+
ret = runner_run_nowait(&runner);
if (ret) {
gf_msg_debug("glusterd", 0, "rebalance command failed");
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c b/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c
index 1f3f4909cbb..d75f249b29e 100644
--- a/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c
+++ b/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c
@@ -304,7 +304,7 @@ glusterd_snapdsvc_start(glusterd_svc_t *svc, int flags)
}
runinit(&runner);
- if (this->ctx->cmd_args.valgrind) {
+ if (this->ctx->cmd_args.vgtool != _gf_none) {
len = snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-snapd.log",
svc->proc.logdir);
if ((len < 0) || (len >= PATH_MAX)) {
@@ -313,8 +313,13 @@ glusterd_snapdsvc_start(glusterd_svc_t *svc, int flags)
goto out;
}
- runner_add_args(&runner, "valgrind", "--leak-check=full",
- "--trace-children=yes", "--track-origins=yes", NULL);
+ if (this->ctx->cmd_args.vgtool == _gf_memcheck)
+ runner_add_args(&runner, "valgrind", "--leak-check=full",
+ "--trace-children=yes", "--track-origins=yes",
+ NULL);
+ else
+ runner_add_args(&runner, "valgrind", "--tool=drd", NULL);
+
runner_argprintf(&runner, "--log-file=%s", valgrind_logfile);
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
index d96d5dd2cfc..995268b796d 100644
--- a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
@@ -2037,8 +2037,9 @@ glusterd_update_snaps_synctask(void *opaque)
"Failed to remove snap %s", snap->snapname);
goto out;
}
- if (dict)
- dict_unref(dict);
+
+ dict_unref(dict);
+ dict = NULL;
}
snprintf(buf, sizeof(buf), "%s.accept_peer_data", prefix);
ret = dict_get_int32(peer_data, buf, &val);
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c
index c2428dc0de0..aeaa8d15214 100644
--- a/xlators/mgmt/glusterd/src/glusterd-snapshot.c
+++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c
@@ -3930,7 +3930,8 @@ glusterd_handle_snapshot_create(rpcsvc_request_t *req, glusterd_op_t op,
goto out;
}
- ret = dict_set_int64(dict, "snap-time", (int64_t)time(&snap_time));
+ snap_time = gf_time();
+ ret = dict_set_int64(dict, "snap-time", (int64_t)snap_time);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
"Unable to set snap-time");
@@ -5322,6 +5323,48 @@ glusterd_do_snap_vol(glusterd_volinfo_t *origin_vol, glusterd_snap_t *snap,
dict_deln(snap_vol->dict, "features.barrier", SLEN("features.barrier"));
gd_update_volume_op_versions(snap_vol);
+ /* *
+ * Create the export file from the node where ganesha.enable "on"
+ * is executed
+ * */
+ if (glusterd_is_ganesha_cluster() &&
+ glusterd_check_ganesha_export(snap_vol)) {
+ if (is_origin_glusterd(dict)) {
+ ret = manage_export_config(clonename, "on", NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_EXPORT_FILE_CREATE_FAIL,
+ "Failed to create"
+ "export file for NFS-Ganesha\n");
+ goto out;
+ }
+ }
+
+ ret = dict_set_dynstr_with_alloc(snap_vol->dict,
+ "features.cache-invalidation", "on");
+ ret = gd_ganesha_send_dbus(clonename, "on");
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_EXPORT_FILE_CREATE_FAIL,
+ "Dynamic export addition/deletion failed."
+ " Please see log file for details. Clone name = %s",
+ clonename);
+ goto out;
+ }
+ }
+ if (!glusterd_is_ganesha_cluster() &&
+ glusterd_check_ganesha_export(snap_vol)) {
+ /* This happens when a snapshot was created when Ganesha was
+ * enabled globally. Then Ganesha disabled from the cluster.
+ * In such cases, we will have the volume level option set
+ * on dict, So we have to disable it as it doesn't make sense
+ * to keep the option.
+ */
+
+ ret = dict_set_dynstr(snap_vol->dict, "ganesha.enable", "off");
+ if (ret)
+ goto out;
+ }
+
ret = glusterd_store_volinfo(snap_vol, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_SET_FAIL,
@@ -5393,8 +5436,31 @@ out:
for (i = 0; unsupported_opt[i].key; i++)
GF_FREE(unsupported_opt[i].value);
- if (snap_vol)
+ if (snap_vol) {
+ if (glusterd_is_ganesha_cluster() &&
+ glusterd_check_ganesha_export(snap_vol)) {
+ if (is_origin_glusterd(dict)) {
+ ret = manage_export_config(clonename, "on", NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_EXPORT_FILE_CREATE_FAIL,
+ "Failed to create"
+ "export file for NFS-Ganesha\n");
+ }
+ }
+
+ ret = gd_ganesha_send_dbus(clonename, "off");
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_EXPORT_FILE_CREATE_FAIL,
+ "Dynamic export addition/deletion failed."
+ " Please see log file for details. Clone name = %s",
+ clonename);
+ }
+ }
+
glusterd_snap_volume_remove(rsp_dict, snap_vol, _gf_true, _gf_true);
+ }
snap_vol = NULL;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c
index 465e41ef00b..d94dceb10b7 100644
--- a/xlators/mgmt/glusterd/src/glusterd-store.c
+++ b/xlators/mgmt/glusterd/src/glusterd-store.c
@@ -74,7 +74,7 @@ glusterd_replace_slash_with_hyphen(char *str)
while (ptr) {
*ptr = '-';
- ptr = strchr(str, '/');
+ ptr = strchr(ptr, '/');
}
}
@@ -660,85 +660,72 @@ out:
}
static int
-_storeslaves(dict_t *this, char *key, data_t *value, void *data)
-{
- int32_t ret = 0;
- gf_store_handle_t *shandle = NULL;
- xlator_t *xl = NULL;
-
- xl = THIS;
- GF_ASSERT(xl);
-
- shandle = (gf_store_handle_t *)data;
-
- GF_ASSERT(shandle);
- GF_ASSERT(shandle->fd > 0);
- GF_ASSERT(shandle->path);
- GF_ASSERT(key);
- GF_ASSERT(value);
- GF_ASSERT(value->data);
-
- gf_msg_debug(xl->name, 0, "Storing in volinfo:key= %s, val=%s", key,
- value->data);
-
- ret = gf_store_save_value(shandle->fd, key, (char *)value->data);
- if (ret) {
- gf_msg(xl->name, GF_LOG_ERROR, 0, GD_MSG_STORE_HANDLE_WRITE_FAIL,
- "Unable to write into store"
- " handle for path: %s",
- shandle->path);
- return -1;
- }
- return 0;
-}
-
-int
-_storeopts(dict_t *this, char *key, data_t *value, void *data)
+_storeopts(dict_t *dict_value, char *key, data_t *value, void *data)
{
int32_t ret = 0;
int32_t exists = 0;
+ int32_t option_len = 0;
gf_store_handle_t *shandle = NULL;
- xlator_t *xl = NULL;
+ glusterd_volinfo_data_store_t *dict_data = NULL;
+ xlator_t *this = NULL;
- xl = THIS;
- GF_ASSERT(xl);
+ this = THIS;
+ GF_ASSERT(this);
- shandle = (gf_store_handle_t *)data;
+ dict_data = (glusterd_volinfo_data_store_t *)data;
+ shandle = dict_data->shandle;
GF_ASSERT(shandle);
GF_ASSERT(shandle->fd > 0);
- GF_ASSERT(shandle->path);
GF_ASSERT(key);
GF_ASSERT(value);
GF_ASSERT(value->data);
- if (is_key_glusterd_hooks_friendly(key)) {
- exists = 1;
+ if (dict_data->key_check == 1) {
+ if (is_key_glusterd_hooks_friendly(key)) {
+ exists = 1;
- } else {
- exists = glusterd_check_option_exists(key, NULL);
+ } else {
+ exists = glusterd_check_option_exists(key, NULL);
+ }
}
-
- if (1 == exists) {
- gf_msg_debug(xl->name, 0,
- "Storing in volinfo:key= %s, "
+ if (exists == 1 || dict_data->key_check == 0) {
+ gf_msg_debug(this->name, 0,
+ "Storing in buffer for volinfo:key= %s, "
"val=%s",
key, value->data);
-
} else {
- gf_msg_debug(xl->name, 0, "Discarding:key= %s, val=%s", key,
+ gf_msg_debug(this->name, 0, "Discarding:key= %s, val=%s", key,
value->data);
return 0;
}
- ret = gf_store_save_value(shandle->fd, key, (char *)value->data);
- if (ret) {
- gf_msg(xl->name, GF_LOG_ERROR, 0, GD_MSG_STORE_HANDLE_WRITE_FAIL,
- "Unable to write into store"
- " handle for path: %s",
- shandle->path);
+ /*
+ * The option_len considers the length of the key value
+ * pair and along with that '=' and '\n', but as value->len
+ * already considers a NULL at the end of the data, adding
+ * just 1.
+ */
+ option_len = strlen(key) + value->len + 1;
+
+ if ((VOLINFO_BUFFER_SIZE - dict_data->buffer_len - 1) < option_len) {
+ ret = gf_store_save_items(shandle->fd, dict_data->buffer);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED, NULL);
+ return -1;
+ }
+ dict_data->buffer_len = 0;
+ dict_data->buffer[0] = '\0';
+ }
+ ret = snprintf(dict_data->buffer + dict_data->buffer_len, option_len + 1,
+ "%s=%s\n", key, value->data);
+ if (ret < 0 || ret > option_len + 1) {
+ gf_smsg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_COPY_FAIL, NULL);
return -1;
}
+
+ dict_data->buffer_len += ret;
+
return 0;
}
@@ -1013,7 +1000,7 @@ glusterd_store_create_snap_dir(glusterd_snap_t *snap)
return ret;
}
-int32_t
+static int32_t
glusterd_store_volinfo_write(int fd, glusterd_volinfo_t *volinfo)
{
int32_t ret = -1;
@@ -1021,19 +1008,47 @@ glusterd_store_volinfo_write(int fd, glusterd_volinfo_t *volinfo)
GF_ASSERT(fd > 0);
GF_ASSERT(volinfo);
GF_ASSERT(volinfo->shandle);
+ xlator_t *this = NULL;
+ glusterd_volinfo_data_store_t *dict_data = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
shandle = volinfo->shandle;
+
+ dict_data = GF_CALLOC(1, sizeof(glusterd_volinfo_data_store_t),
+ gf_gld_mt_volinfo_dict_data_t);
+ if (dict_data == NULL) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_MEMORY, NULL);
+ return -1;
+ }
+
ret = glusterd_volume_exclude_options_write(fd, volinfo);
- if (ret)
+ if (ret) {
goto out;
+ }
+
+ dict_data->shandle = shandle;
+ dict_data->key_check = 1;
shandle->fd = fd;
- dict_foreach(volinfo->dict, _storeopts, shandle);
+ dict_foreach(volinfo->dict, _storeopts, (void *)dict_data);
+
+ dict_data->key_check = 0;
+ dict_foreach(volinfo->gsync_slaves, _storeopts, (void *)dict_data);
+
+ if (dict_data->buffer_len > 0) {
+ ret = gf_store_save_items(fd, dict_data->buffer);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED, NULL);
+ goto out;
+ }
+ }
- dict_foreach(volinfo->gsync_slaves, _storeslaves, shandle);
shandle->fd = 0;
out:
- gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+ GF_FREE(dict_data);
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
return ret;
}
@@ -1274,14 +1289,6 @@ out:
return ret;
}
-static int
-_gd_store_rebalance_dict(dict_t *dict, char *key, data_t *value, void *data)
-{
- int fd = *(int *)data;
-
- return gf_store_save_value(fd, key, value->data);
-}
-
int32_t
glusterd_store_node_state_write(int fd, glusterd_volinfo_t *volinfo)
{
@@ -1289,6 +1296,12 @@ glusterd_store_node_state_write(int fd, glusterd_volinfo_t *volinfo)
char buf[PATH_MAX];
char uuid[UUID_SIZE + 1];
uint total_len = 0;
+ glusterd_volinfo_data_store_t *dict_data = NULL;
+ gf_store_handle_t shandle;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
GF_ASSERT(fd > 0);
GF_ASSERT(volinfo);
@@ -1328,14 +1341,33 @@ glusterd_store_node_state_write(int fd, glusterd_volinfo_t *volinfo)
}
ret = gf_store_save_items(fd, buf);
- if (ret)
+ if (ret) {
goto out;
+ }
if (volinfo->rebal.dict) {
- dict_foreach(volinfo->rebal.dict, _gd_store_rebalance_dict, &fd);
+ dict_data = GF_CALLOC(1, sizeof(glusterd_volinfo_data_store_t),
+ gf_gld_mt_volinfo_dict_data_t);
+ if (dict_data == NULL) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_MEMORY, NULL);
+ return -1;
+ }
+ dict_data->shandle = &shandle;
+ shandle.fd = fd;
+ dict_foreach(volinfo->rebal.dict, _storeopts, (void *)dict_data);
+ if (dict_data->buffer_len > 0) {
+ ret = gf_store_save_items(fd, dict_data->buffer);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED,
+ NULL);
+ goto out;
+ ;
+ }
+ }
}
out:
- gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+ GF_FREE(dict_data);
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
return ret;
}
@@ -2309,7 +2341,7 @@ glusterd_store_retrieve_snapd(glusterd_volinfo_t *volinfo)
ret = 0;
out:
- if (gf_store_iter_destroy(iter)) {
+ if (gf_store_iter_destroy(&iter)) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL,
"Failed to destroy store iter");
ret = -1;
@@ -2642,7 +2674,7 @@ glusterd_store_retrieve_bricks(glusterd_volinfo_t *volinfo)
brick_count++;
}
- if (gf_store_iter_destroy(tmpiter)) {
+ if (gf_store_iter_destroy(&tmpiter)) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL,
"Failed to destroy store iter");
ret = -1;
@@ -2823,13 +2855,13 @@ glusterd_store_retrieve_bricks(glusterd_volinfo_t *volinfo)
ret = 0;
out:
- if (gf_store_iter_destroy(tmpiter)) {
+ if (gf_store_iter_destroy(&tmpiter)) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL,
"Failed to destroy store iter");
ret = -1;
}
- if (gf_store_iter_destroy(iter)) {
+ if (gf_store_iter_destroy(&iter)) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL,
"Failed to destroy store iter");
ret = -1;
@@ -2962,7 +2994,7 @@ glusterd_store_retrieve_node_state(glusterd_volinfo_t *volinfo)
ret = 0;
out:
- if (gf_store_iter_destroy(iter)) {
+ if (gf_store_iter_destroy(&iter)) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL,
"Failed to destroy store iter");
ret = -1;
@@ -3238,7 +3270,7 @@ glusterd_store_update_volinfo(glusterd_volinfo_t *volinfo)
ret = 0;
out:
- if (gf_store_iter_destroy(iter)) {
+ if (gf_store_iter_destroy(&iter)) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL,
"Failed to destroy store iter");
ret = -1;
@@ -3343,20 +3375,6 @@ glusterd_store_set_options_path(glusterd_conf_t *conf, char *path, size_t len)
snprintf(path, len, "%s/options", conf->workdir);
}
-int
-_store_global_opts(dict_t *this, char *key, data_t *value, void *data)
-{
- gf_store_handle_t *shandle = data;
-
- if (gf_store_save_value(shandle->fd, key, (char *)value->data)) {
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_STORE_HANDLE_WRITE_FAIL,
- "Unable to write into store handle for key : %s, value %s", key,
- (char *)value->data);
- }
-
- return 0;
-}
-
int32_t
glusterd_store_options(xlator_t *this, dict_t *opts)
{
@@ -3365,13 +3383,15 @@ glusterd_store_options(xlator_t *this, dict_t *opts)
char path[PATH_MAX] = {0};
int fd = -1;
int32_t ret = -1;
+ glusterd_volinfo_data_store_t *dict_data = NULL;
conf = this->private;
glusterd_store_set_options_path(conf, path, sizeof(path));
ret = gf_store_handle_new(path, &shandle);
- if (ret)
+ if (ret) {
goto out;
+ }
fd = gf_store_mkstemp(shandle);
if (fd <= 0) {
@@ -3379,15 +3399,30 @@ glusterd_store_options(xlator_t *this, dict_t *opts)
goto out;
}
+ dict_data = GF_CALLOC(1, sizeof(glusterd_volinfo_data_store_t),
+ gf_gld_mt_volinfo_dict_data_t);
+ if (dict_data == NULL) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_MEMORY, NULL);
+ return -1;
+ }
+ dict_data->shandle = shandle;
shandle->fd = fd;
- dict_foreach(opts, _store_global_opts, shandle);
- shandle->fd = 0;
+ dict_foreach(opts, _storeopts, (void *)dict_data);
+ if (dict_data->buffer_len > 0) {
+ ret = gf_store_save_items(fd, dict_data->buffer);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED, NULL);
+ goto out;
+ }
+ }
+
ret = gf_store_rename_tmppath(shandle);
- if (ret)
- goto out;
out:
- if ((ret < 0) && (fd > 0))
+ shandle->fd = 0;
+ GF_FREE(dict_data);
+ if ((ret < 0) && (fd > 0)) {
gf_store_unlink_tmppath(shandle);
+ }
gf_store_handle_destroy(shandle);
return ret;
}
@@ -3433,7 +3468,7 @@ glusterd_store_retrieve_options(xlator_t *this)
goto out;
ret = 0;
out:
- (void)gf_store_iter_destroy(iter);
+ (void)gf_store_iter_destroy(&iter);
gf_store_handle_destroy(shandle);
return ret;
}
@@ -3883,7 +3918,7 @@ glusterd_store_update_snap(glusterd_snap_t *snap)
ret = 0;
out:
- if (gf_store_iter_destroy(iter)) {
+ if (gf_store_iter_destroy(&iter)) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL,
"Failed to destroy store iter");
ret = -1;
@@ -4625,7 +4660,7 @@ glusterd_store_retrieve_peers(xlator_t *this)
is_ok = _gf_true;
next:
- (void)gf_store_iter_destroy(iter);
+ (void)gf_store_iter_destroy(&iter);
if (!is_ok) {
gf_log(this->name, GF_LOG_WARNING,
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h
index 04070549678..83f4df0783e 100644
--- a/xlators/mgmt/glusterd/src/glusterd-store.h
+++ b/xlators/mgmt/glusterd/src/glusterd-store.h
@@ -29,7 +29,7 @@ typedef enum glusterd_store_ver_ac_ {
} glusterd_volinfo_ver_ac_t;
#define UUID_SIZE 36
-
+#define VOLINFO_BUFFER_SIZE 4093
#define GLUSTERD_STORE_UUID_KEY "UUID"
#define GLUSTERD_STORE_KEY_VOL_TYPE "type"
@@ -112,6 +112,19 @@ typedef enum glusterd_store_ver_ac_ {
#define GLUSTERD_STORE_KEY_GANESHA_GLOBAL "nfs-ganesha"
+/*
+ * The structure is responsible for handling the parameter for writes into
+ * the buffer before it is finally written to the file. The writes will be
+ * of the form of key-value pairs.
+ */
+struct glusterd_volinfo_data_store_ {
+ gf_store_handle_t *shandle; /*Contains fd and path of the file */
+ int16_t buffer_len;
+ char key_check; /* flag to check if key is to be validated before write*/
+ char buffer[VOLINFO_BUFFER_SIZE];
+};
+typedef struct glusterd_volinfo_data_store_ glusterd_volinfo_data_store_t;
+
int32_t
glusterd_store_volinfo(glusterd_volinfo_t *volinfo,
glusterd_volinfo_ver_ac_t ac);
diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c
index 99119d69e45..18b3fb13630 100644
--- a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c
+++ b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c
@@ -162,6 +162,9 @@ glusterd_svc_start(glusterd_svc_t *svc, int flags, dict_t *cmdline)
char *localtime_logging = NULL;
char *log_level = NULL;
char daemon_log_level[30] = {0};
+ char msg[1024] = {
+ 0,
+ };
int32_t len = 0;
this = THIS;
@@ -187,7 +190,7 @@ glusterd_svc_start(glusterd_svc_t *svc, int flags, dict_t *cmdline)
runinit(&runner);
- if (this->ctx->cmd_args.valgrind) {
+ if (this->ctx->cmd_args.vgtool != _gf_none) {
len = snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-%s.log",
svc->proc.logdir, svc->name);
if ((len < 0) || (len >= PATH_MAX)) {
@@ -195,9 +198,13 @@ glusterd_svc_start(glusterd_svc_t *svc, int flags, dict_t *cmdline)
goto unlock;
}
- runner_add_args(&runner, "valgrind", "--leak-check=full",
- "--trace-children=yes", "--track-origins=yes",
- NULL);
+ if (this->ctx->cmd_args.vgtool == _gf_memcheck)
+ runner_add_args(&runner, "valgrind", "--leak-check=full",
+ "--trace-children=yes", "--track-origins=yes",
+ NULL);
+ else
+ runner_add_args(&runner, "valgrind", "--tool=drd", NULL);
+
runner_argprintf(&runner, "--log-file=%s", valgrind_logfile);
}
@@ -226,8 +233,8 @@ glusterd_svc_start(glusterd_svc_t *svc, int flags, dict_t *cmdline)
if (cmdline)
dict_foreach(cmdline, svc_add_args, (void *)&runner);
- gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_SVC_START_SUCCESS,
- "Starting %s service", svc->name);
+ snprintf(msg, sizeof(msg), "Starting %s service", svc->name);
+ runner_log(&runner, this->name, GF_LOG_DEBUG, msg);
if (flags == PROC_START_NO_WAIT) {
ret = runner_run_nowait(&runner);
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index 7d38b0a42d7..90ef2cf4c9c 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -79,6 +79,14 @@
#include <sys/sockio.h>
#endif
+#ifdef __FreeBSD__
+#include <sys/sysctl.h>
+#include <sys/param.h>
+#include <sys/queue.h>
+#include <libprocstat.h>
+#include <libutil.h>
+#endif
+
#define NFS_PROGRAM 100003
#define NFSV3_VERSION 3
@@ -1117,7 +1125,8 @@ glusterd_get_brick_mount_dir(char *brickpath, char *hostname, char *mount_dir)
}
brick_dir = &brickpath[strlen(mnt_pt)];
- brick_dir++;
+ if (brick_dir[0] == '/')
+ brick_dir++;
snprintf(mount_dir, VALID_GLUSTERD_PATHMAX, "/%s", brick_dir);
}
@@ -2068,8 +2077,8 @@ glusterd_volume_start_glusterfs(glusterd_volinfo_t *volinfo,
retry:
runinit(&runner);
- if (this->ctx->cmd_args.valgrind) {
- /* Run bricks with valgrind */
+ if (this->ctx->cmd_args.vgtool != _gf_none) {
+ /* Run bricks with valgrind. */
if (volinfo->logdir) {
len = snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-%s-%s.log",
volinfo->logdir, volinfo->volname, exp_path);
@@ -2083,8 +2092,13 @@ retry:
goto out;
}
- runner_add_args(&runner, "valgrind", "--leak-check=full",
- "--trace-children=yes", "--track-origins=yes", NULL);
+ if (this->ctx->cmd_args.vgtool == _gf_memcheck)
+ runner_add_args(&runner, "valgrind", "--leak-check=full",
+ "--trace-children=yes", "--track-origins=yes",
+ NULL);
+ else
+ runner_add_args(&runner, "valgrind", "--tool=drd", NULL);
+
runner_argprintf(&runner, "--log-file=%s", valgrind_logfile);
}
@@ -2197,7 +2211,7 @@ retry:
if (is_brick_mx_enabled())
runner_add_arg(&runner, "--brick-mux");
- runner_log(&runner, "", 0, "Starting GlusterFS");
+ runner_log(&runner, "", GF_LOG_DEBUG, "Starting GlusterFS");
brickinfo->port = port;
brickinfo->rdma_port = rdma_port;
@@ -2206,7 +2220,10 @@ retry:
if (wait) {
synclock_unlock(&priv->big_lock);
+ errno = 0;
ret = runner_run(&runner);
+ if (errno != 0)
+ ret = errno;
synclock_lock(&priv->big_lock);
if (ret == EADDRINUSE) {
@@ -2788,6 +2805,15 @@ glusterd_volume_compute_cksum(glusterd_volinfo_t *volinfo, char *cksum_path,
ret = -1;
goto out;
}
+ } else if (priv->op_version < GD_OP_VERSION_7_0) {
+ ret = get_checksum_for_path(filepath, &cksum, priv->op_version);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CKSUM_GET_FAIL,
+ "unable to get "
+ "checksum for path: %s",
+ filepath);
+ goto out;
+ }
}
ret = get_checksum_for_file(fd, &cksum, priv->op_version);
@@ -6083,7 +6109,6 @@ send_attach_req(xlator_t *this, struct rpc_clnt *rpc, char *path,
GF_ATOMIC_INC(conf->blockers);
ret = rpc_clnt_submit(rpc, &gd_brick_prog, op, cbkfn, &iov, 1, NULL, 0,
iobref, frame, NULL, 0, NULL, 0, NULL);
- return ret;
free_iobref:
iobref_unref(iobref);
@@ -6092,7 +6117,7 @@ maybe_free_iobuf:
iobuf_unref(iobuf);
}
err:
- return -1;
+ return ret;
}
extern size_t
@@ -6420,7 +6445,6 @@ find_compatible_brick(glusterd_conf_t *conf, glusterd_volinfo_t *volinfo,
int
glusterd_get_sock_from_brick_pid(int pid, char *sockpath, size_t len)
{
- char fname[128] = "";
char buf[1024] = "";
char cmdline[2048] = "";
xlator_t *this = NULL;
@@ -6435,6 +6459,22 @@ glusterd_get_sock_from_brick_pid(int pid, char *sockpath, size_t len)
this = THIS;
GF_ASSERT(this);
+#ifdef __FreeBSD__
+ blen = sizeof(buf);
+ int mib[4];
+
+ mib[0] = CTL_KERN;
+ mib[1] = KERN_PROC;
+ mib[2] = KERN_PROC_ARGS;
+ mib[3] = pid;
+
+ if (sys_sysctl(mib, 4, buf, &blen, NULL, blen) != 0) {
+ gf_log(this->name, GF_LOG_ERROR, "brick process %d is not running",
+ pid);
+ return ret;
+ }
+#else
+ char fname[128] = "";
snprintf(fname, sizeof(fname), "/proc/%d/cmdline", pid);
if (sys_access(fname, R_OK) != 0) {
@@ -6451,6 +6491,7 @@ glusterd_get_sock_from_brick_pid(int pid, char *sockpath, size_t len)
strerror(errno), fname);
return ret;
}
+#endif
/* convert cmdline to single string */
for (i = 0, j = 0; i < blen; i++) {
@@ -6499,6 +6540,43 @@ glusterd_get_sock_from_brick_pid(int pid, char *sockpath, size_t len)
char *
search_brick_path_from_proc(pid_t brick_pid, char *brickpath)
{
+ char *brick_path = NULL;
+#ifdef __FreeBSD__
+ struct filestat *fst;
+ struct procstat *ps;
+ struct kinfo_proc *kp;
+ struct filestat_list *head;
+
+ ps = procstat_open_sysctl();
+ if (ps == NULL)
+ goto out;
+
+ kp = kinfo_getproc(brick_pid);
+ if (kp == NULL)
+ goto out;
+
+ head = procstat_getfiles(ps, (void *)kp, 0);
+ if (head == NULL)
+ goto out;
+
+ STAILQ_FOREACH(fst, head, next)
+ {
+ if (fst->fs_fd < 0)
+ continue;
+
+ if (!strcmp(fst->fs_path, brickpath)) {
+ brick_path = gf_strdup(fst->fs_path);
+ break;
+ }
+ }
+
+out:
+ if (head != NULL)
+ procstat_freefiles(ps, head);
+ if (kp != NULL)
+ free(kp);
+ procstat_close(ps);
+#else
struct dirent *dp = NULL;
DIR *dirp = NULL;
size_t len = 0;
@@ -6509,7 +6587,6 @@ search_brick_path_from_proc(pid_t brick_pid, char *brickpath)
0,
},
};
- char *brick_path = NULL;
if (!brickpath)
goto out;
@@ -6547,6 +6624,7 @@ search_brick_path_from_proc(pid_t brick_pid, char *brickpath)
out:
if (dirp)
sys_closedir(dirp);
+#endif
return brick_path;
}
@@ -8417,7 +8495,8 @@ glusterd_sm_tr_log_transition_add(glusterd_sm_tr_log_t *log, int old_state,
transitions[next].old_state = old_state;
transitions[next].new_state = new_state;
transitions[next].event = event;
- time(&transitions[next].time);
+ transitions[next].time = gf_time();
+
log->current = next;
if (log->count < log->size)
log->count++;
@@ -14632,7 +14711,8 @@ glusterd_compare_addrinfo(struct addrinfo *first, struct addrinfo *next)
*/
int32_t
glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type,
- int32_t sub_count)
+ char **volname, char **brick_list,
+ int32_t *brick_count, int32_t sub_count)
{
int ret = -1;
int i = 0;
@@ -14643,12 +14723,9 @@ glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type,
addrinfo_list_t *ai_list_tmp1 = NULL;
addrinfo_list_t *ai_list_tmp2 = NULL;
char *brick = NULL;
- char *brick_list = NULL;
char *brick_list_dup = NULL;
char *brick_list_ptr = NULL;
char *tmpptr = NULL;
- char *volname = NULL;
- int32_t brick_count = 0;
struct addrinfo *ai_info = NULL;
char brick_addr[128] = {
0,
@@ -14676,32 +14753,38 @@ glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type,
ai_list->info = NULL;
CDS_INIT_LIST_HEAD(&ai_list->list);
- ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
- "Unable to get volume name");
- goto out;
+ if (!(*volname)) {
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &(*volname));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get volume name");
+ goto out;
+ }
}
- ret = dict_get_strn(dict, "bricks", SLEN("bricks"), &brick_list);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
- "Bricks check : Could not "
- "retrieve bricks list");
- goto out;
+ if (!(*brick_list)) {
+ ret = dict_get_strn(dict, "bricks", SLEN("bricks"), &(*brick_list));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Bricks check : Could not "
+ "retrieve bricks list");
+ goto out;
+ }
}
- ret = dict_get_int32n(dict, "count", SLEN("count"), &brick_count);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
- "Bricks check : Could not "
- "retrieve brick count");
- goto out;
+ if (!(*brick_count)) {
+ ret = dict_get_int32n(dict, "count", SLEN("count"), &(*brick_count));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Bricks check : Could not "
+ "retrieve brick count");
+ goto out;
+ }
}
- brick_list_dup = brick_list_ptr = gf_strdup(brick_list);
+ brick_list_dup = brick_list_ptr = gf_strdup(*brick_list);
/* Resolve hostnames and get addrinfo */
- while (i < brick_count) {
+ while (i < *brick_count) {
++i;
brick = strtok_r(brick_list_dup, " \n", &tmpptr);
brick_list_dup = tmpptr;
@@ -14737,8 +14820,12 @@ glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type,
i = 0;
ai_list_tmp1 = cds_list_entry(ai_list->list.next, addrinfo_list_t, list);
+ if (*brick_count < sub_count) {
+ sub_count = *brick_count;
+ }
+
/* Check for bad brick order */
- while (i < brick_count) {
+ while (i < *brick_count) {
++i;
ai_info = ai_list_tmp1->info;
ai_list_tmp1 = cds_list_entry(ai_list_tmp1->list.next, addrinfo_list_t,
@@ -14901,3 +14988,59 @@ out:
GF_FREE(new_auth_allow_list);
return;
}
+
+int
+glusterd_replace_old_auth_allow_list(char *volname)
+{
+ int ret = 0;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+ char *old_auth_allow_list = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_VALIDATE_OR_GOTO(this->name, volname, out);
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND,
+ "Unable to find volume: %s", volname);
+ goto out;
+ }
+
+ ret = dict_get_str_sizen(volinfo->dict, "old.auth.allow",
+ &old_auth_allow_list);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_INFO, errno, GD_MSG_DICT_GET_FAILED,
+ "old auth allow list is not set, no need to replace the list");
+ ret = 0;
+ goto out;
+ }
+
+ dict_del_sizen(volinfo->dict, "auth.allow");
+ ret = dict_set_strn(volinfo->dict, "auth.allow", SLEN("auth.allow"),
+ old_auth_allow_list);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Unable to replace auth.allow list");
+ goto out;
+ }
+
+ dict_del_sizen(volinfo->dict, "old.auth.allow");
+
+ ret = glusterd_create_volfiles_and_notify_services(volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOLFILE_CREATE_FAIL,
+ "failed to create volfiles");
+ goto out;
+ }
+ ret = glusterd_store_volinfo(volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOLINFO_STORE_FAIL,
+ "failed to store volinfo");
+ goto out;
+ }
+out:
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
index 05346916968..bf6ac295e26 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
@@ -859,6 +859,7 @@ glusterd_add_shd_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict,
int32_t count);
int32_t
glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type,
+ char **volname, char **bricks, int32_t *brick_count,
int32_t sub_count);
#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
index 087be916c23..8d6fb5e0fac 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
@@ -3361,11 +3361,20 @@ volgen_link_bricks(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
j);
j++;
}
+
if (!xl) {
ret = -1;
goto out;
}
+ if (strncmp(xl_type, "performance/readdir-ahead",
+ SLEN("performance/readdir-ahead")) == 0) {
+ ret = xlator_set_fixed_option(xl, "performance.readdir-ahead",
+ "on");
+ if (ret)
+ goto out;
+ }
+
ret = volgen_xlator_link(xl, trav);
if (ret)
goto out;
@@ -3593,13 +3602,13 @@ volgen_graph_build_readdir_ahead(volgen_graph_t *graph,
int32_t clusters = 0;
if (graph->type == GF_QUOTAD || graph->type == GF_SNAPD ||
- !glusterd_volinfo_get_boolean(volinfo, VKEY_PARALLEL_READDIR) ||
- !glusterd_volinfo_get_boolean(volinfo, VKEY_READDIR_AHEAD))
+ !glusterd_volinfo_get_boolean(volinfo, VKEY_PARALLEL_READDIR))
goto out;
clusters = volgen_link_bricks_from_list_tail(
graph, volinfo, "performance/readdir-ahead", "%s-readdir-ahead-%d",
child_count, 1);
+
out:
return clusters;
}
@@ -3801,6 +3810,38 @@ out:
}
static int
+set_volfile_id_option(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ int clusters)
+{
+ xlator_t *xlator = NULL;
+ int i = 0;
+ int ret = -1;
+ glusterd_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
+
+ if (conf->op_version < GD_OP_VERSION_9_0)
+ return 0;
+ xlator = first_of(graph);
+
+ for (i = 0; i < clusters; i++) {
+ ret = xlator_set_fixed_option(xlator, "volume-id",
+ uuid_utoa(volinfo->volume_id));
+ if (ret)
+ goto out;
+
+ xlator = xlator->next;
+ }
+
+out:
+ return ret;
+}
+
+static int
volgen_graph_build_afr_clusters(volgen_graph_t *graph,
glusterd_volinfo_t *volinfo)
{
@@ -3842,6 +3883,13 @@ volgen_graph_build_afr_clusters(volgen_graph_t *graph,
clusters = -1;
goto out;
}
+
+ ret = set_volfile_id_option(graph, volinfo, clusters);
+ if (ret) {
+ clusters = -1;
+ goto out;
+ }
+
if (!volinfo->arbiter_count && !volinfo->thin_arbiter_count)
goto out;
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
index cafdffb63c4..814ab14fb27 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
@@ -1002,7 +1002,8 @@ glusterd_op_stage_create_volume(dict_t *dict, char **op_errstr,
gf_msg_debug(this->name, 0,
"Replicate cluster type "
"found. Checking brick order.");
- ret = glusterd_check_brick_order(dict, msg, type,
+ ret = glusterd_check_brick_order(dict, msg, type, &volname,
+ &bricks, &brick_count,
replica_count);
} else if (type == GF_CLUSTER_TYPE_DISPERSE) {
ret = dict_get_int32n(dict, "disperse-count",
@@ -1016,7 +1017,8 @@ glusterd_op_stage_create_volume(dict_t *dict, char **op_errstr,
gf_msg_debug(this->name, 0,
"Disperse cluster type"
" found. Checking brick order.");
- ret = glusterd_check_brick_order(dict, msg, type,
+ ret = glusterd_check_brick_order(dict, msg, type, &volname,
+ &bricks, &brick_count,
disperse_count);
}
if (ret) {
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index 3ac8e2a29d7..398b4d76f52 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -1813,7 +1813,7 @@ struct volopt_map_entry glusterd_volopt_map[] = {
{.key = "performance.readdir-ahead",
.voltype = "performance/readdir-ahead",
.option = "!perf",
- .value = "on",
+ .value = "off",
.op_version = 3,
.description = "enable/disable readdir-ahead translator in the volume.",
.flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT},
@@ -3138,4 +3138,9 @@ struct volopt_map_entry glusterd_volopt_map[] = {
.type = NO_DOC,
},
+ {.key = "cluster.use-anonymous-inode",
+ .voltype = "cluster/replicate",
+ .op_version = GD_OP_VERSION_9_0,
+ .value = "yes",
+ .flags = VOLOPT_FLAG_CLIENT_OPT},
{.key = NULL}};
diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c
index 91c5f9ec5e3..7a86c2997b1 100644
--- a/xlators/mgmt/glusterd/src/glusterd.c
+++ b/xlators/mgmt/glusterd/src/glusterd.c
@@ -1423,7 +1423,7 @@ init(xlator_t *this)
char *mountbroker_root = NULL;
int i = 0;
int total_transport = 0;
- gf_boolean_t valgrind = _gf_false;
+ gf_valgrind_tool vgtool;
char *valgrind_str = NULL;
char *transport_type = NULL;
char var_run_dir[PATH_MAX] = {
@@ -1436,6 +1436,14 @@ init(xlator_t *this)
int32_t len = 0;
int op_version = 0;
+#if defined(RUN_WITH_MEMCHECK)
+ vgtool = _gf_memcheck;
+#elif defined(RUN_WITH_DRD)
+ vgtool = _gf_drd;
+#else
+ vgtool = _gf_none;
+#endif
+
#ifndef GF_DARWIN_HOST_OS
{
struct rlimit lim;
@@ -1925,18 +1933,24 @@ init(xlator_t *this)
}
/* Set option to run bricks on valgrind if enabled in glusterd.vol */
- this->ctx->cmd_args.valgrind = valgrind;
+ this->ctx->cmd_args.vgtool = vgtool;
ret = dict_get_str(this->options, "run-with-valgrind", &valgrind_str);
if (ret < 0) {
gf_msg_debug(this->name, 0, "cannot get run-with-valgrind value");
}
if (valgrind_str) {
- if (gf_string2boolean(valgrind_str, &valgrind)) {
+ gf_boolean_t vg = _gf_false;
+
+ if (!strcmp(valgrind_str, "memcheck"))
+ this->ctx->cmd_args.vgtool = _gf_memcheck;
+ else if (!strcmp(valgrind_str, "drd"))
+ this->ctx->cmd_args.vgtool = _gf_drd;
+ else if (!gf_string2boolean(valgrind_str, &vg))
+ this->ctx->cmd_args.vgtool = (vg ? _gf_memcheck : _gf_none);
+ else
gf_msg(this->name, GF_LOG_WARNING, EINVAL, GD_MSG_INVALID_ENTRY,
- "run-with-valgrind value not a boolean string");
- } else {
- this->ctx->cmd_args.valgrind = valgrind;
- }
+ "run-with-valgrind is neither boolean"
+ " nor one of 'memcheck' or 'drd'");
}
/* Store ping-timeout in conf */
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
index 2c8fab8f0e7..cc4f98ecf47 100644
--- a/xlators/mgmt/glusterd/src/glusterd.h
+++ b/xlators/mgmt/glusterd/src/glusterd.h
@@ -1197,6 +1197,8 @@ glusterd_op_set_ganesha(dict_t *dict, char **errstr);
int
ganesha_manage_export(dict_t *dict, char *value,
gf_boolean_t update_cache_invalidation, char **op_errstr);
+int
+gd_ganesha_send_dbus(char *volname, char *value);
gf_boolean_t
glusterd_is_ganesha_cluster();
gf_boolean_t
@@ -1367,4 +1369,7 @@ glusterd_recreate_volfiles(glusterd_conf_t *conf);
void
glusterd_add_peers_to_auth_list(char *volname);
+int
+glusterd_replace_old_auth_allow_list(char *volname);
+
#endif
diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
index f2eeac1d1ee..0e22fe411ee 100644
--- a/xlators/mount/fuse/src/fuse-bridge.c
+++ b/xlators/mount/fuse/src/fuse-bridge.c
@@ -179,7 +179,7 @@ fusedump_gettime(struct fusedump_timespec *fts)
0,
};
- clock_gettime(CLOCK_REALTIME, &ts);
+ timespec_now_realtime(&ts);
fts->sec = ts.tv_sec;
fts->nsec = ts.tv_nsec;
@@ -5899,7 +5899,9 @@ fuse_graph_sync(xlator_t *this)
new_graph_id = priv->next_graph->id;
priv->next_graph = NULL;
need_first_lookup = 1;
- priv->handle_graph_switch = _gf_true;
+ if (old_subvol) {
+ priv->handle_graph_switch = _gf_true;
+ }
while (!priv->event_recvd) {
ret = pthread_cond_wait(&priv->sync_cond, &priv->sync_mutex);
@@ -5935,13 +5937,6 @@ unlock:
if (winds_on_old_subvol == 0) {
xlator_notify(old_subvol, GF_EVENT_PARENT_DOWN, old_subvol, NULL);
}
- } else {
- pthread_mutex_lock(&priv->sync_mutex);
- {
- priv->handle_graph_switch = _gf_false;
- pthread_cond_broadcast(&priv->migrate_cond);
- }
- pthread_mutex_unlock(&priv->sync_mutex);
}
return 0;
diff --git a/xlators/mount/fuse/src/fuse-helpers.c b/xlators/mount/fuse/src/fuse-helpers.c
index fd11f2ba652..a2b0ad11fe4 100644
--- a/xlators/mount/fuse/src/fuse-helpers.c
+++ b/xlators/mount/fuse/src/fuse-helpers.c
@@ -139,8 +139,6 @@ get_fuse_state(xlator_t *this, fuse_in_header_t *finh)
return state;
}
-#define FUSE_MAX_AUX_GROUPS \
- 32 /* We can get only up to 32 aux groups from /proc */
void
frame_fill_groups(call_frame_t *frame)
{
@@ -150,8 +148,6 @@ frame_fill_groups(call_frame_t *frame)
char filename[32];
char line[4096];
char *ptr = NULL;
- FILE *fp = NULL;
- int idx = 0;
long int id = 0;
char *saveptr = NULL;
char *endptr = NULL;
@@ -191,45 +187,72 @@ frame_fill_groups(call_frame_t *frame)
call_stack_set_groups(frame->root, ngroups, &mygroups);
} else {
+ FILE *fp = NULL;
+
ret = snprintf(filename, sizeof filename, "/proc/%d/status",
frame->root->pid);
- if (ret >= sizeof filename)
+ if (ret >= sizeof filename) {
+ gf_log(this->name, GF_LOG_ERROR, "procfs path exceeds buffer size");
goto out;
+ }
fp = fopen(filename, "r");
- if (!fp)
+ if (!fp) {
+ gf_log(this->name, GF_LOG_ERROR, "failed to open %s: %s", filename,
+ strerror(errno));
goto out;
+ }
- if (call_stack_alloc_groups(frame->root, ngroups) != 0)
- goto out;
+ for (;;) {
+ gf_boolean_t found_groups = _gf_false;
+ int idx = 0;
- while ((ptr = fgets(line, sizeof line, fp))) {
- if (strncmp(ptr, "Groups:", 7) != 0)
- continue;
+ if (call_stack_alloc_groups(frame->root, ngroups) != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to allocate gid buffer");
+ goto out;
+ }
+ while ((ptr = fgets(line, sizeof line, fp))) {
+ if (strncmp(ptr, "Groups:", 7) == 0) {
+ found_groups = _gf_true;
+ break;
+ }
+ }
+ if (!found_groups) {
+ gf_log(this->name, GF_LOG_ERROR, "cannot find gid list in %s",
+ filename);
+ break;
+ }
ptr = line + 8;
for (ptr = strtok_r(ptr, " \t\r\n", &saveptr); ptr;
ptr = strtok_r(NULL, " \t\r\n", &saveptr)) {
errno = 0;
id = strtol(ptr, &endptr, 0);
- if (errno == ERANGE)
- break;
- if (!endptr || *endptr)
+ if (errno == ERANGE || !endptr || *endptr) {
+ gf_log(this->name, GF_LOG_ERROR, "failed to parse %s",
+ filename);
break;
- frame->root->groups[idx++] = id;
- if (idx == FUSE_MAX_AUX_GROUPS)
+ }
+ if (idx < call_stack_groups_capacity(frame->root))
+ frame->root->groups[idx] = id;
+ idx++;
+ if (idx == GF_MAX_AUX_GROUPS)
break;
}
-
- frame->root->ngrps = idx;
- break;
+ if (idx > call_stack_groups_capacity(frame->root)) {
+ ngroups = idx;
+ rewind(fp);
+ } else {
+ frame->root->ngrps = idx;
+ break;
+ }
}
+ out:
+ if (fp)
+ fclose(fp);
}
-
-out:
- if (fp)
- fclose(fp);
#elif defined(GF_SOLARIS_HOST_OS)
char filename[32];
char scratch[128];
@@ -245,7 +268,7 @@ out:
fp = fopen(filename, "r");
if (fp != NULL) {
if (fgets(scratch, sizeof scratch, fp) != NULL) {
- ngrps = MIN(prcred->pr_ngroups, FUSE_MAX_AUX_GROUPS);
+ ngrps = MIN(prcred->pr_ngroups, GF_MAX_AUX_GROUPS);
if (call_stack_alloc_groups(frame->root, ngrps) != 0) {
fclose(fp);
return;
diff --git a/xlators/mount/fuse/utils/mount_glusterfs.in b/xlators/mount/fuse/utils/mount_glusterfs.in
index d43fc97d084..3a5feb606d7 100755
--- a/xlators/mount/fuse/utils/mount_glusterfs.in
+++ b/xlators/mount/fuse/utils/mount_glusterfs.in
@@ -469,6 +469,7 @@ parse_options()
main ()
{
+#if !defined(__FreeBSD__)
## `mount` on OSX specifies options as first argument
echo $1|grep -q -- "-o"
if [ $? -eq 0 ]; then
@@ -478,7 +479,7 @@ main ()
volfile_loc=$1
mount_point=$2
fi
-
+#endif /* __FreeBSD__ */
while getopts "Vo:h" opt; do
case "${opt}" in
o)
@@ -499,6 +500,12 @@ main ()
esac
done
+#ifdef __FreeBSD__
+ shift $((OPTIND - 1))
+ volfile_loc="$1"
+ mount_point="$2"
+#endif /* __FreeBSD__ */
+
[ -r "$volfile_loc" ] || {
# '%' included to support ipv6 link local addresses
server_ip=$(echo "$volfile_loc" | sed -n 's/\([a-zA-Z0-9:%.\-]*\):.*/\1/p');
diff --git a/xlators/nfs/server/src/acl3.c b/xlators/nfs/server/src/acl3.c
index 3745188c7a5..7e3bbf16086 100644
--- a/xlators/nfs/server/src/acl3.c
+++ b/xlators/nfs/server/src/acl3.c
@@ -753,8 +753,7 @@ acl3svc_init(xlator_t *nfsx)
goto err;
}
- ret = dict_set_dynstr(options, "transport.socket.listen-port",
- GF_ACL3_PORT);
+ ret = dict_set_str(options, "transport.socket.listen-port", GF_ACL3_PORT);
if (ret == -1)
goto err;
ret = dict_set_str(options, "transport-type", "socket");
diff --git a/xlators/nfs/server/src/auth-cache.c b/xlators/nfs/server/src/auth-cache.c
index 64768646074..ffbf5b6cad6 100644
--- a/xlators/nfs/server/src/auth-cache.c
+++ b/xlators/nfs/server/src/auth-cache.c
@@ -189,7 +189,7 @@ out:
static int
_auth_cache_expired(struct auth_cache *cache, struct auth_cache_entry *entry)
{
- return ((time(NULL) - entry->timestamp) > cache->ttl_sec);
+ return ((gf_time() - entry->timestamp) > cache->ttl_sec);
}
/**
@@ -474,7 +474,7 @@ cache_nfs_fh(struct auth_cache *cache, struct nfs3_fh *fh,
goto out;
}
- entry->timestamp = time(NULL);
+ entry->timestamp = gf_time();
/* Update entry->item if it is pointing to a different export_item */
if (entry->item && entry->item != export_item) {
GF_REF_PUT(entry->item);
diff --git a/xlators/nfs/server/src/mount3udp_svc.c b/xlators/nfs/server/src/mount3udp_svc.c
index 0688779eb65..1a2b0f85453 100644
--- a/xlators/nfs/server/src/mount3udp_svc.c
+++ b/xlators/nfs/server/src/mount3udp_svc.c
@@ -216,7 +216,7 @@ mount3udp_thread(void *argv)
GF_ASSERT(nfsx);
- glusterfs_this_set(nfsx);
+ THIS = nfsx;
transp = svcudp_create(RPC_ANYSOCK);
if (transp == NULL) {
diff --git a/xlators/nfs/server/src/nfs3-helpers.c b/xlators/nfs/server/src/nfs3-helpers.c
index 8a58977b53c..897fb42b071 100644
--- a/xlators/nfs/server/src/nfs3-helpers.c
+++ b/xlators/nfs/server/src/nfs3-helpers.c
@@ -1072,7 +1072,7 @@ nfs3_sattr3_to_setattr_valid(sattr3 *sattr, struct iatt *buf, mode_t *omode)
if (sattr->atime.set_it == SET_TO_SERVER_TIME) {
valid |= GF_SET_ATTR_ATIME;
if (buf)
- buf->ia_atime = time(NULL);
+ buf->ia_atime = gf_time();
}
if (sattr->mtime.set_it == SET_TO_CLIENT_TIME) {
@@ -1084,7 +1084,7 @@ nfs3_sattr3_to_setattr_valid(sattr3 *sattr, struct iatt *buf, mode_t *omode)
if (sattr->mtime.set_it == SET_TO_SERVER_TIME) {
valid |= GF_SET_ATTR_MTIME;
if (buf)
- buf->ia_mtime = time(NULL);
+ buf->ia_mtime = gf_time();
}
return valid;
diff --git a/xlators/nfs/server/src/nfs3.c b/xlators/nfs/server/src/nfs3.c
index 7cfd75f9ed1..f9042bc3b3f 100644
--- a/xlators/nfs/server/src/nfs3.c
+++ b/xlators/nfs/server/src/nfs3.c
@@ -5651,7 +5651,7 @@ nfs3_init_state(xlator_t *nfsx)
goto free_localpool;
}
- nfs3->serverstart = (uint64_t)time(NULL);
+ nfs3->serverstart = (uint64_t)gf_time();
INIT_LIST_HEAD(&nfs3->fdlru);
LOCK_INIT(&nfs3->fdlrulock);
nfs3->fdcount = 0;
diff --git a/xlators/nfs/server/src/nlm4.c b/xlators/nfs/server/src/nlm4.c
index c909e3bc093..577e8543966 100644
--- a/xlators/nfs/server/src/nlm4.c
+++ b/xlators/nfs/server/src/nlm4.c
@@ -1011,7 +1011,8 @@ nlm4_establish_callback(nfs3_call_state_t *cs, call_frame_t *cbk_frame)
int port = -1;
struct nlm4_notify_args *ncf = NULL;
- glusterfs_this_set(cs->nfsx);
+ GF_ASSERT(cs->nfsx);
+ THIS = cs->nfsx;
rpc_transport_get_peeraddr(cs->trans, NULL, 0, &sock_union.storage,
sizeof(sock_union.storage));
@@ -2714,7 +2715,7 @@ nlm4svc_init(xlator_t *nfsx)
goto err;
}
- (void)gf_thread_create(&thr, NULL, nsm_thread, (void *)NULL, "nfsnsm");
+ (void)gf_thread_create(&thr, NULL, nsm_thread, nfsx, "nfsnsm");
timeout.tv_sec = nlm_grace_period;
timeout.tv_nsec = 0;
diff --git a/xlators/nfs/server/src/nlmcbk_svc.c b/xlators/nfs/server/src/nlmcbk_svc.c
index d18b86ce8db..eaa7b916190 100644
--- a/xlators/nfs/server/src/nlmcbk_svc.c
+++ b/xlators/nfs/server/src/nlmcbk_svc.c
@@ -84,9 +84,14 @@ nlmcbk_program_0(struct svc_req *rqstp, register SVCXPRT *transp)
void *
nsm_thread(void *argv)
{
+ xlator_t *nfsx = argv;
register SVCXPRT *transp;
int ret = 0;
+ GF_ASSERT(nfsx);
+
+ THIS = nfsx;
+
ret = pmap_unset(NLMCBK_PROGRAM, NLMCBK_V1);
if (ret == 0) {
gf_msg(GF_NLM, GF_LOG_ERROR, 0, NFS_MSG_PMAP_UNSET_FAIL,
diff --git a/xlators/performance/io-cache/src/io-cache.c b/xlators/performance/io-cache/src/io-cache.c
index 2fefaef8de8..9375d29c17f 100644
--- a/xlators/performance/io-cache/src/io-cache.c
+++ b/xlators/performance/io-cache/src/io-cache.c
@@ -133,23 +133,17 @@ ioc_update_pages(call_frame_t *frame, ioc_inode_t *ioc_inode,
return 0;
}
-int32_t
+static gf_boolean_t
ioc_inode_need_revalidate(ioc_inode_t *ioc_inode)
{
- int8_t need_revalidate = 0;
- struct timeval tv = {
- 0,
- };
ioc_table_t *table = NULL;
+ GF_ASSERT(ioc_inode);
table = ioc_inode->table;
+ GF_ASSERT(table);
- gettimeofday(&tv, NULL);
-
- if (time_elapsed(&tv, &ioc_inode->cache.tv) >= table->cache_timeout)
- need_revalidate = 1;
-
- return need_revalidate;
+ return (gf_time() - ioc_inode->cache.last_revalidate >=
+ table->cache_timeout);
}
/*
@@ -411,9 +405,6 @@ ioc_cache_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
ioc_inode_t *ioc_inode = NULL;
size_t destroy_size = 0;
struct iatt *local_stbuf = NULL;
- struct timeval tv = {
- 0,
- };
local = frame->local;
ioc_inode = local->inode;
@@ -451,10 +442,9 @@ ioc_cache_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret < 0)
local_stbuf = NULL;
- gettimeofday(&tv, NULL);
ioc_inode_lock(ioc_inode);
{
- memcpy(&ioc_inode->cache.tv, &tv, sizeof(struct timeval));
+ ioc_inode->cache.last_revalidate = gf_time();
}
ioc_inode_unlock(ioc_inode);
@@ -1405,9 +1395,6 @@ ioc_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
{
ioc_inode_t *ioc_inode = NULL;
uint64_t tmp_inode = 0;
- struct timeval tv = {
- 0,
- };
inode_ctx_get(fd->inode, this, &tmp_inode);
ioc_inode = (ioc_inode_t *)(long)tmp_inode;
@@ -1418,10 +1405,9 @@ ioc_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
return 0;
}
- gettimeofday(&tv, NULL);
ioc_inode_lock(ioc_inode);
{
- memcpy(&ioc_inode->cache.tv, &tv, sizeof(struct timeval));
+ ioc_inode->cache.last_revalidate = gf_time();
}
ioc_inode_unlock(ioc_inode);
@@ -1955,9 +1941,9 @@ __ioc_cache_dump(ioc_inode_t *ioc_inode, char *prefix)
table = ioc_inode->table;
- if (ioc_inode->cache.tv.tv_sec) {
- gf_time_fmt_tv(timestr, sizeof timestr, &ioc_inode->cache.tv,
- gf_timefmt_FT);
+ if (ioc_inode->cache.last_revalidate) {
+ gf_time_fmt(timestr, sizeof timestr, ioc_inode->cache.last_revalidate,
+ gf_timefmt_FT);
gf_proc_dump_write("last-cache-validation-time", "%s", timestr);
}
diff --git a/xlators/performance/io-cache/src/io-cache.h b/xlators/performance/io-cache/src/io-cache.h
index 4303c2fae13..14923c75edc 100644
--- a/xlators/performance/io-cache/src/io-cache.h
+++ b/xlators/performance/io-cache/src/io-cache.h
@@ -117,15 +117,13 @@ struct ioc_page {
struct ioc_cache {
rbthash_table_t *page_table;
struct list_head page_lru;
- time_t mtime; /*
- * seconds component of file mtime
- */
- time_t mtime_nsec; /*
- * nanosecond component of file mtime
- */
- struct timeval tv; /*
- * time-stamp at last re-validate
- */
+ time_t mtime; /*
+ * seconds component of file mtime
+ */
+ time_t mtime_nsec; /*
+ * nanosecond component of file mtime
+ */
+ time_t last_revalidate; /* timestamp at last re-validate */
};
struct ioc_inode {
@@ -270,17 +268,6 @@ ioc_frame_fill(ioc_page_t *page, call_frame_t *frame, off_t offset, size_t size,
pthread_mutex_unlock(&page->page_lock); \
} while (0)
-static inline uint64_t
-time_elapsed(struct timeval *now, struct timeval *then)
-{
- uint64_t sec = now->tv_sec - then->tv_sec;
-
- if (sec)
- return sec;
-
- return 0;
-}
-
ioc_inode_t *
ioc_inode_search(ioc_table_t *table, inode_t *inode);
diff --git a/xlators/performance/io-cache/src/page.c b/xlators/performance/io-cache/src/page.c
index a8edbde23f2..84b1ae6cb20 100644
--- a/xlators/performance/io-cache/src/page.c
+++ b/xlators/performance/io-cache/src/page.c
@@ -413,9 +413,6 @@ ioc_fault_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
ioc_waitq_t *waitq = NULL;
size_t iobref_page_size = 0;
char zero_filled = 0;
- struct timeval tv = {
- 0,
- };
GF_ASSERT(frame);
@@ -431,7 +428,6 @@ ioc_fault_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
zero_filled = ((op_ret >= 0) && (stbuf->ia_mtime == 0));
- gettimeofday(&tv, NULL);
ioc_inode_lock(ioc_inode);
{
if (op_ret == -1 ||
@@ -448,7 +444,7 @@ ioc_fault_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
ioc_inode->cache.mtime_nsec = stbuf->ia_mtime_nsec;
}
- memcpy(&ioc_inode->cache.tv, &tv, sizeof(struct timeval));
+ ioc_inode->cache.last_revalidate = gf_time();
if (op_ret < 0) {
/* error, readv returned -1 */
diff --git a/xlators/performance/io-threads/src/io-threads.c b/xlators/performance/io-threads/src/io-threads.c
index 6fa4d88389c..3d24cc97f4b 100644
--- a/xlators/performance/io-threads/src/io-threads.c
+++ b/xlators/performance/io-threads/src/io-threads.c
@@ -1016,16 +1016,13 @@ static uint32_t THRESH_LIMIT = 1209600; /* SECONDS * (EVENTS-1) */
static void
iot_apply_event(xlator_t *this, threshold_t *thresh)
{
- struct timespec now;
- time_t delta;
+ time_t delta, now = gf_time();
/* Refresh for manual testing/debugging. It's cheap. */
THRESH_LIMIT = THRESH_SECONDS * (THRESH_EVENTS - 1);
- timespec_now(&now);
-
if (thresh->value && thresh->update_time) {
- delta = now.tv_sec - thresh->update_time;
+ delta = now - thresh->update_time;
/* Be careful about underflow. */
if (thresh->value <= delta) {
thresh->value = 0;
@@ -1046,7 +1043,7 @@ iot_apply_event(xlator_t *this, threshold_t *thresh)
kill(getpid(), SIGTRAP);
}
- thresh->update_time = now.tv_sec;
+ thresh->update_time = now;
}
static void *
@@ -1311,7 +1308,7 @@ notify(xlator_t *this, int32_t event, void *data, ...)
/* Wait for draining stub from queue before notify PARENT_DOWN */
stub_cnt = GF_ATOMIC_GET(conf->stub_cnt);
if (stub_cnt) {
- clock_gettime(CLOCK_REALTIME, &sleep_till);
+ timespec_now_realtime(&sleep_till);
sleep_till.tv_sec += 1;
pthread_mutex_lock(&conf->mutex);
{
diff --git a/xlators/performance/md-cache/src/md-cache.c b/xlators/performance/md-cache/src/md-cache.c
index 1313c2b55c3..a405be51f02 100644
--- a/xlators/performance/md-cache/src/md-cache.c
+++ b/xlators/performance/md-cache/src/md-cache.c
@@ -8,7 +8,6 @@
cases as published by the Free Software Foundation.
*/
-#include <glusterfs/timespec.h>
#include <glusterfs/glusterfs.h>
#include <glusterfs/defaults.h>
#include <glusterfs/logging.h>
@@ -33,8 +32,7 @@
struct mdc_statfs_cache {
pthread_mutex_t lock;
- gf_boolean_t initialized;
- struct timespec last_refreshed;
+ time_t last_refreshed; /* (time_t)-1 if not yet initialized. */
struct statvfs buf;
};
@@ -61,7 +59,7 @@ struct mdc_statistics {
};
struct mdc_conf {
- int timeout;
+ uint32_t timeout;
gf_boolean_t cache_posix_acl;
gf_boolean_t cache_glusterfs_acl;
gf_boolean_t cache_selinux;
@@ -376,10 +374,9 @@ unlock:
static gf_boolean_t
__is_cache_valid(xlator_t *this, time_t mdc_time)
{
- time_t now = 0;
gf_boolean_t ret = _gf_true;
struct mdc_conf *conf = NULL;
- int timeout = 0;
+ uint32_t timeout = 0;
time_t last_child_down = 0;
conf = this->private;
@@ -393,15 +390,13 @@ __is_cache_valid(xlator_t *this, time_t mdc_time)
last_child_down = conf->last_child_down;
timeout = conf->timeout;
- time(&now);
-
if ((mdc_time == 0) ||
((last_child_down != 0) && (mdc_time < last_child_down))) {
ret = _gf_false;
goto out;
}
- if (now >= (mdc_time + timeout)) {
+ if (gf_time() >= (mdc_time + timeout)) {
ret = _gf_false;
}
@@ -581,10 +576,9 @@ mdc_inode_iatt_set_validate(xlator_t *this, inode_t *inode, struct iatt *prebuf,
mdc_from_iatt(mdc, iatt);
mdc->valid = _gf_true;
if (update_time) {
- time(&mdc->ia_time);
-
+ mdc->ia_time = gf_time();
if (mdc->xa_time && update_xa_time)
- time(&mdc->xa_time);
+ mdc->xa_time = mdc->ia_time;
}
gf_msg_callingfn(
@@ -785,7 +779,7 @@ mdc_inode_xatt_set(xlator_t *this, inode_t *inode, dict_t *dict)
if (newdict)
mdc->xattr = newdict;
- time(&mdc->xa_time);
+ mdc->xa_time = gf_time();
gf_msg_trace("md-cache", 0, "xatt cache set for (%s) time:%lld",
uuid_utoa(inode->gfid), (long long)mdc->xa_time);
}
@@ -1063,8 +1057,7 @@ mdc_cache_statfs(xlator_t *this, struct statvfs *buf)
pthread_mutex_lock(&conf->statfs_cache.lock);
{
memcpy(&conf->statfs_cache.buf, buf, sizeof(struct statvfs));
- clock_gettime(CLOCK_MONOTONIC, &conf->statfs_cache.last_refreshed);
- conf->statfs_cache.initialized = _gf_true;
+ conf->statfs_cache.last_refreshed = gf_time();
}
pthread_mutex_unlock(&conf->statfs_cache.lock);
}
@@ -1073,8 +1066,7 @@ int
mdc_load_statfs_info_from_cache(xlator_t *this, struct statvfs **buf)
{
struct mdc_conf *conf = this->private;
- struct timespec now;
- double cache_age = 0.0;
+ uint32_t cache_age = 0;
int ret = 0;
if (!buf || !conf) {
@@ -1083,23 +1075,23 @@ mdc_load_statfs_info_from_cache(xlator_t *this, struct statvfs **buf)
}
*buf = NULL;
- timespec_now(&now);
pthread_mutex_lock(&conf->statfs_cache.lock);
{
- /* Skip if the cache is not initialized */
- if (!conf->statfs_cache.initialized) {
+ /* Skip if the cache is not initialized. */
+ if (conf->statfs_cache.last_refreshed == (time_t)-1) {
ret = -1;
goto unlock;
}
- cache_age = (now.tv_sec - conf->statfs_cache.last_refreshed.tv_sec);
+ cache_age = (gf_time() - conf->statfs_cache.last_refreshed);
- gf_log(this->name, GF_LOG_DEBUG, "STATFS cache age = %lf", cache_age);
+ gf_log(this->name, GF_LOG_DEBUG, "STATFS cache age = %u secs",
+ cache_age);
if (cache_age > conf->timeout) {
- /* Expire the cache */
+ /* Expire the cache. */
gf_log(this->name, GF_LOG_DEBUG,
- "Cache age %lf exceeded timeout %d", cache_age,
+ "Cache age %u secs exceeded timeout %u secs", cache_age,
conf->timeout);
ret = -1;
goto unlock;
@@ -3616,7 +3608,7 @@ int
mdc_reconfigure(xlator_t *this, dict_t *options)
{
struct mdc_conf *conf = NULL;
- int timeout = 0;
+ int timeout = 0, ret = 0;
char *tmp_str = NULL;
conf = this->private;
@@ -3656,7 +3648,10 @@ mdc_reconfigure(xlator_t *this, dict_t *options)
GF_OPTION_RECONF("md-cache-statfs", conf->cache_statfs, options, bool, out);
GF_OPTION_RECONF("xattr-cache-list", tmp_str, options, str, out);
- mdc_xattr_list_populate(conf, tmp_str);
+
+ ret = mdc_xattr_list_populate(conf, tmp_str);
+ if (ret < 0)
+ goto out;
/* If timeout is greater than 60s (default before the patch that added
* cache invalidation support was added) then, cache invalidation
@@ -3669,25 +3664,22 @@ mdc_reconfigure(xlator_t *this, dict_t *options)
}
conf->timeout = timeout;
- (void)mdc_register_xattr_inval(this);
+ ret = mdc_register_xattr_inval(this);
out:
- return 0;
+ return ret;
}
int32_t
mdc_mem_acct_init(xlator_t *this)
{
- int ret = -1;
-
- ret = xlator_mem_acct_init(this, gf_mdc_mt_end + 1);
- return ret;
+ return xlator_mem_acct_init(this, gf_mdc_mt_end + 1);
}
int
mdc_init(xlator_t *this)
{
struct mdc_conf *conf = NULL;
- int timeout = 0;
+ uint32_t timeout = 0;
char *tmp_str = NULL;
conf = GF_CALLOC(sizeof(*conf), 1, gf_mdc_mt_mdc_conf_t);
@@ -3699,7 +3691,7 @@ mdc_init(xlator_t *this)
LOCK_INIT(&conf->lock);
- GF_OPTION_INIT("md-cache-timeout", timeout, int32, out);
+ GF_OPTION_INIT("md-cache-timeout", timeout, uint32, out);
GF_OPTION_INIT("cache-selinux", conf->cache_selinux, bool, out);
@@ -3733,7 +3725,9 @@ mdc_init(xlator_t *this)
GF_OPTION_INIT("xattr-cache-list", tmp_str, str, out);
mdc_xattr_list_populate(conf, tmp_str);
- time(&conf->last_child_down);
+ conf->last_child_down = gf_time();
+ conf->statfs_cache.last_refreshed = (time_t)-1;
+
/* initialize gf_atomic_t counters */
GF_ATOMIC_INIT(conf->mdc_counter.stat_hit, 0);
GF_ATOMIC_INIT(conf->mdc_counter.stat_miss, 0);
@@ -3764,7 +3758,7 @@ out:
}
void
-mdc_update_child_down_time(xlator_t *this, time_t *now)
+mdc_update_child_down_time(xlator_t *this, time_t now)
{
struct mdc_conf *conf = NULL;
@@ -3772,7 +3766,7 @@ mdc_update_child_down_time(xlator_t *this, time_t *now)
LOCK(&conf->lock);
{
- conf->last_child_down = *now;
+ conf->last_child_down = now;
}
UNLOCK(&conf->lock);
}
@@ -3782,14 +3776,12 @@ mdc_notify(xlator_t *this, int event, void *data, ...)
{
int ret = 0;
struct mdc_conf *conf = NULL;
- time_t now = 0;
conf = this->private;
switch (event) {
case GF_EVENT_CHILD_DOWN:
case GF_EVENT_SOME_DESCENDENT_DOWN:
- time(&now);
- mdc_update_child_down_time(this, &now);
+ mdc_update_child_down_time(this, gf_time());
break;
case GF_EVENT_UPCALL:
if (conf->mdc_invalidation)
diff --git a/xlators/performance/nl-cache/src/nl-cache-helper.c b/xlators/performance/nl-cache/src/nl-cache-helper.c
index 03dedf8ea08..29b99b5b8ea 100644
--- a/xlators/performance/nl-cache/src/nl-cache-helper.c
+++ b/xlators/performance/nl-cache/src/nl-cache-helper.c
@@ -113,7 +113,7 @@ out:
}
void
-nlc_update_child_down_time(xlator_t *this, time_t *now)
+nlc_update_child_down_time(xlator_t *this, time_t now)
{
nlc_conf_t *conf = NULL;
@@ -121,7 +121,7 @@ nlc_update_child_down_time(xlator_t *this, time_t *now)
LOCK(&conf->lock);
{
- conf->last_child_down = *now;
+ conf->last_child_down = now;
}
UNLOCK(&conf->lock);
@@ -262,7 +262,7 @@ nlc_init_invalid_ctx(xlator_t *this, inode_t *inode, nlc_ctx_t *nlc_ctx)
if (nlc_ctx->timer) {
gf_tw_mod_timer_pending(conf->timer_wheel, nlc_ctx->timer,
conf->cache_timeout);
- time(&nlc_ctx->cache_time);
+ nlc_ctx->cache_time = gf_time();
goto unlock;
}
@@ -496,7 +496,7 @@ __nlc_inode_ctx_timer_start(xlator_t *this, inode_t *inode, nlc_ctx_t *nlc_ctx)
nlc_ctx->timer_data = tmp;
gf_tw_add_timer(conf->timer_wheel, timer);
- time(&nlc_ctx->cache_time);
+ nlc_ctx->cache_time = gf_time();
gf_msg_trace(this->name, 0,
"Registering timer:%p, inode:%p, "
"gfid:%s",
diff --git a/xlators/performance/nl-cache/src/nl-cache.c b/xlators/performance/nl-cache/src/nl-cache.c
index cd0e1d195fd..33a7c471663 100644
--- a/xlators/performance/nl-cache/src/nl-cache.c
+++ b/xlators/performance/nl-cache/src/nl-cache.c
@@ -520,15 +520,13 @@ int
nlc_notify(xlator_t *this, int event, void *data, ...)
{
int ret = 0;
- time_t now = 0;
switch (event) {
case GF_EVENT_CHILD_DOWN:
case GF_EVENT_SOME_DESCENDENT_DOWN:
case GF_EVENT_CHILD_UP:
case GF_EVENT_SOME_DESCENDENT_UP:
- time(&now);
- nlc_update_child_down_time(this, &now);
+ nlc_update_child_down_time(this, gf_time());
/* TODO: nlc_clear_all_cache (this); else
lru prune will lazily clear it*/
break;
@@ -731,7 +729,7 @@ nlc_init(xlator_t *this)
GF_ATOMIC_INIT(conf->nlc_counter.nlc_invals, 0);
INIT_LIST_HEAD(&conf->lru);
- time(&conf->last_child_down);
+ conf->last_child_down = gf_time();
conf->timer_wheel = glusterfs_ctx_tw_get(this->ctx);
if (!conf->timer_wheel) {
diff --git a/xlators/performance/nl-cache/src/nl-cache.h b/xlators/performance/nl-cache/src/nl-cache.h
index 8b09972bb09..85fcc176342 100644
--- a/xlators/performance/nl-cache/src/nl-cache.h
+++ b/xlators/performance/nl-cache/src/nl-cache.h
@@ -155,7 +155,7 @@ nlc_local_init(call_frame_t *frame, xlator_t *this, glusterfs_fop_t fop,
loc_t *loc, loc_t *loc2);
void
-nlc_update_child_down_time(xlator_t *this, time_t *now);
+nlc_update_child_down_time(xlator_t *this, time_t now);
void
nlc_inode_clear_cache(xlator_t *this, inode_t *inode, int reason);
diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c
index e43fe73bcca..600c3b62ffe 100644
--- a/xlators/performance/open-behind/src/open-behind.c
+++ b/xlators/performance/open-behind/src/open-behind.c
@@ -333,6 +333,15 @@ ob_stub_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd,
return 0;
}
+static void
+ob_open_destroy(call_stub_t *stub, fd_t *fd)
+{
+ stub->frame->local = NULL;
+ STACK_DESTROY(stub->frame->root);
+ call_stub_destroy(stub);
+ fd_unref(fd);
+}
+
static int32_t
ob_open_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd,
call_stub_t *stub)
@@ -355,8 +364,7 @@ ob_open_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd,
if (stub != NULL) {
if (closed) {
- call_stub_destroy(stub);
- fd_unref(fd);
+ ob_open_destroy(stub, fd);
} else {
call_resume(stub);
}
@@ -509,6 +517,56 @@ ob_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd,
}
static int32_t
+ob_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ ob_inode_t *ob_inode;
+ call_stub_t *stub;
+ fd_t *first_fd;
+ ob_state_t state;
+
+ /* Create requests are never delayed. We always send them synchronously. */
+ state = ob_open_and_resume_fd(this, fd, 1, true, true, &ob_inode,
+ &first_fd);
+ if (state == OB_STATE_READY) {
+ /* There's no pending open, but there are other file descriptors opened
+ * so we simply forward the request synchronously. */
+ return default_create(frame, this, loc, flags, mode, umask, fd, xdata);
+ }
+
+ if (state == OB_STATE_OPEN_TRIGGERED) {
+ /* The first open is in progress (either because it was already issued
+ * or because this request triggered it). We try to create a new stub
+ * to retry the operation once the initial open completes. */
+ stub = fop_create_stub(frame, ob_create, loc, flags, mode, umask, fd,
+ xdata);
+ if (stub != NULL) {
+ return ob_stub_dispatch(this, ob_inode, first_fd, stub);
+ }
+
+ state = -ENOMEM;
+ }
+
+ /* Since we forced a synchronous request, OB_STATE_FIRST_OPEN will never
+ * be returned by ob_open_and_resume_fd(). If we are here it can only be
+ * because there has been a problem. */
+
+ /* In case of failure we need to decrement the number of open files because
+ * ob_fdclose() won't be called. */
+
+ LOCK(&fd->inode->lock);
+ {
+ ob_inode->open_count--;
+ }
+ UNLOCK(&fd->inode->lock);
+
+ gf_smsg(this->name, GF_LOG_ERROR, -state, OPEN_BEHIND_MSG_FAILED, "fop=%s",
+ "create", "path=%s", loc->path, NULL);
+
+ return default_create_failure_cbk(frame, -state);
+}
+
+static int32_t
ob_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
off_t offset, uint32_t flags, dict_t *xdata)
{
@@ -776,8 +834,7 @@ ob_fdclose(xlator_t *this, fd_t *fd)
UNLOCK(&fd->inode->lock);
if (stub != NULL) {
- call_stub_destroy(stub);
- fd_unref(fd);
+ ob_open_destroy(stub, fd);
}
ob_resume_pending(&list);
@@ -940,6 +997,7 @@ fini(xlator_t *this)
struct xlator_fops fops = {
.open = ob_open,
+ .create = ob_create,
.readv = ob_readv,
.writev = ob_writev,
.flush = ob_flush,
diff --git a/xlators/performance/quick-read/src/quick-read.c b/xlators/performance/quick-read/src/quick-read.c
index 640c9ac51c6..7fe4b3c3a4b 100644
--- a/xlators/performance/quick-read/src/quick-read.c
+++ b/xlators/performance/quick-read/src/quick-read.c
@@ -421,9 +421,6 @@ qr_content_update(xlator_t *this, qr_inode_t *qr_inode, void *data,
qr_private_t *priv = NULL;
qr_inode_table_t *table = NULL;
uint32_t rollover = 0;
- struct timeval tv = {
- 0,
- };
rollover = gen >> 32;
gen = gen & 0xffffffff;
@@ -431,7 +428,6 @@ qr_content_update(xlator_t *this, qr_inode_t *qr_inode, void *data,
priv = this->private;
table = &priv->table;
- gettimeofday(&tv, NULL);
LOCK(&table->lock);
{
if ((rollover != qr_inode->gen_rollover) ||
@@ -453,8 +449,7 @@ qr_content_update(xlator_t *this, qr_inode_t *qr_inode, void *data,
qr_inode->ia_ctime_nsec = buf->ia_ctime_nsec;
qr_inode->buf = *buf;
-
- memcpy(&qr_inode->last_refresh, &tv, sizeof(struct timeval));
+ qr_inode->last_refresh = gf_time();
__qr_inode_register(this, table, qr_inode);
}
@@ -524,9 +519,7 @@ __qr_content_refresh(xlator_t *this, qr_inode_t *qr_inode, struct iatt *buf,
if (qr_size_fits(conf, buf) && qr_time_equal(conf, qr_inode, buf)) {
qr_inode->buf = *buf;
-
- gettimeofday(&qr_inode->last_refresh, NULL);
-
+ qr_inode->last_refresh = gf_time();
__qr_inode_register(this, table, qr_inode);
} else {
__qr_inode_prune(this, table, qr_inode, gen);
@@ -558,20 +551,14 @@ __qr_cache_is_fresh(xlator_t *this, qr_inode_t *qr_inode)
{
qr_conf_t *conf = NULL;
qr_private_t *priv = NULL;
- struct timeval now;
- struct timeval diff;
priv = this->private;
conf = &priv->conf;
- gettimeofday(&now, NULL);
-
- timersub(&now, &qr_inode->last_refresh, &diff);
-
- if (qr_inode->last_refresh.tv_sec < priv->last_child_down)
+ if (qr_inode->last_refresh < priv->last_child_down)
return _gf_false;
- if (diff.tv_sec >= conf->cache_timeout)
+ if (gf_time() - qr_inode->last_refresh >= conf->cache_timeout)
return _gf_false;
return _gf_true;
@@ -1049,9 +1036,8 @@ qr_inodectx_dump(xlator_t *this, inode_t *inode)
gf_proc_dump_write("entire-file-cached", "%s",
qr_inode->data ? "yes" : "no");
- if (qr_inode->last_refresh.tv_sec) {
- gf_time_fmt_tv(buf, sizeof buf, &qr_inode->last_refresh, gf_timefmt_FT);
-
+ if (qr_inode->last_refresh) {
+ gf_time_fmt(buf, sizeof buf, qr_inode->last_refresh, gf_timefmt_FT);
gf_proc_dump_write("last-cache-validation-time", "%s", buf);
}
@@ -1404,7 +1390,7 @@ qr_init(xlator_t *this)
ret = 0;
- time(&priv->last_child_down);
+ priv->last_child_down = gf_time();
GF_ATOMIC_INIT(priv->generation, 0);
this->private = priv;
out:
@@ -1454,7 +1440,7 @@ qr_conf_destroy(qr_conf_t *conf)
}
void
-qr_update_child_down_time(xlator_t *this, time_t *now)
+qr_update_child_down_time(xlator_t *this, time_t now)
{
qr_private_t *priv = NULL;
@@ -1462,7 +1448,7 @@ qr_update_child_down_time(xlator_t *this, time_t *now)
LOCK(&priv->lock);
{
- priv->last_child_down = *now;
+ priv->last_child_down = now;
}
UNLOCK(&priv->lock);
}
@@ -1508,7 +1494,6 @@ qr_notify(xlator_t *this, int event, void *data, ...)
{
int ret = 0;
qr_private_t *priv = NULL;
- time_t now = 0;
qr_conf_t *conf = NULL;
priv = this->private;
@@ -1517,8 +1502,7 @@ qr_notify(xlator_t *this, int event, void *data, ...)
switch (event) {
case GF_EVENT_CHILD_DOWN:
case GF_EVENT_SOME_DESCENDENT_DOWN:
- time(&now);
- qr_update_child_down_time(this, &now);
+ qr_update_child_down_time(this, gf_time());
break;
case GF_EVENT_UPCALL:
if (conf->qr_invalidation)
diff --git a/xlators/performance/quick-read/src/quick-read.h b/xlators/performance/quick-read/src/quick-read.h
index 67850821b8e..20fcc70b3a7 100644
--- a/xlators/performance/quick-read/src/quick-read.h
+++ b/xlators/performance/quick-read/src/quick-read.h
@@ -39,7 +39,7 @@ struct qr_inode {
uint32_t ia_ctime_nsec;
uint32_t gen_rollover;
struct iatt buf;
- struct timeval last_refresh;
+ time_t last_refresh;
struct list_head lru;
uint64_t gen;
uint64_t invalidation_time;
diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c
index 48f4d0a7f87..721968004a0 100644
--- a/xlators/protocol/server/src/server.c
+++ b/xlators/protocol/server/src/server.c
@@ -267,6 +267,8 @@ server_priv(xlator_t *this)
gf_proc_dump_build_key(key, "server", "total-bytes-write");
gf_proc_dump_write(key, "%" PRIu64, total_write);
+ rpcsvc_statedump(conf->rpc);
+
ret = 0;
out:
if (ret)
diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c
index 609ddea2560..f10722ec3fb 100644
--- a/xlators/storage/posix/src/posix-common.c
+++ b/xlators/storage/posix/src/posix-common.c
@@ -140,6 +140,7 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...)
struct timespec sleep_till = {
0,
};
+ glusterfs_ctx_t *ctx = this->ctx;
switch (event) {
case GF_EVENT_PARENT_UP: {
@@ -150,8 +151,6 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...)
case GF_EVENT_PARENT_DOWN: {
if (!victim->cleanup_starting)
break;
- gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s",
- victim->name);
if (priv->janitor) {
pthread_mutex_lock(&priv->janitor_mutex);
@@ -160,7 +159,7 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...)
ret = gf_tw_del_timer(this->ctx->tw->timer_wheel,
priv->janitor);
if (!ret) {
- clock_gettime(CLOCK_REALTIME, &sleep_till);
+ timespec_now_realtime(&sleep_till);
sleep_till.tv_sec += 1;
/* Wait to set janitor_task flag to _gf_false by
* janitor_task_done */
@@ -168,7 +167,7 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...)
(void)pthread_cond_timedwait(&priv->janitor_cond,
&priv->janitor_mutex,
&sleep_till);
- clock_gettime(CLOCK_REALTIME, &sleep_till);
+ timespec_now_realtime(&sleep_till);
sleep_till.tv_sec += 1;
}
}
@@ -177,6 +176,16 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...)
GF_FREE(priv->janitor);
}
priv->janitor = NULL;
+ pthread_mutex_lock(&ctx->fd_lock);
+ {
+ while (priv->rel_fdcount > 0) {
+ pthread_cond_wait(&priv->fd_cond, &ctx->fd_lock);
+ }
+ }
+ pthread_mutex_unlock(&ctx->fd_lock);
+
+ gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s",
+ victim->name);
default_notify(this->parents->xlator, GF_EVENT_CHILD_DOWN, data);
} break;
default:
@@ -1084,7 +1093,13 @@ posix_init(xlator_t *this)
pthread_cond_init(&_private->fsync_cond, NULL);
pthread_mutex_init(&_private->janitor_mutex, NULL);
pthread_cond_init(&_private->janitor_cond, NULL);
+ pthread_cond_init(&_private->fd_cond, NULL);
INIT_LIST_HEAD(&_private->fsyncs);
+ _private->rel_fdcount = 0;
+ ret = posix_spawn_ctx_janitor_thread(this);
+ if (ret)
+ goto out;
+
ret = gf_thread_create(&_private->fsyncer, NULL, posix_fsyncer, this,
"posixfsy");
if (ret) {
@@ -1197,6 +1212,8 @@ posix_fini(xlator_t *this)
{
struct posix_private *priv = this->private;
gf_boolean_t health_check = _gf_false;
+ glusterfs_ctx_t *ctx = this->ctx;
+ uint32_t count;
int ret = 0;
int i = 0;
@@ -1243,6 +1260,19 @@ posix_fini(xlator_t *this)
priv->janitor = NULL;
}
+ pthread_mutex_lock(&ctx->fd_lock);
+ {
+ count = --ctx->pxl_count;
+ if (count == 0) {
+ pthread_cond_signal(&ctx->fd_cond);
+ }
+ }
+ pthread_mutex_unlock(&ctx->fd_lock);
+
+ if (count == 0) {
+ pthread_join(ctx->janitor, NULL);
+ }
+
if (priv->fsyncer) {
(void)gf_thread_cleanup_xint(priv->fsyncer);
priv->fsyncer = 0;
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
index bb4a5309f45..67db3324083 100644
--- a/xlators/storage/posix/src/posix-helpers.c
+++ b/xlators/storage/posix/src/posix-helpers.c
@@ -1505,7 +1505,7 @@ posix_janitor_task(void *data)
if (!priv)
goto out;
- time(&now);
+ now = gf_time();
if ((now - priv->last_landfill_check) > priv->janitor_sleep_duration) {
if (priv->disable_landfill_purge) {
gf_msg_debug(this->name, 0,
@@ -1592,16 +1592,108 @@ unlock:
return;
}
+static struct posix_fd *
+janitor_get_next_fd(glusterfs_ctx_t *ctx)
+{
+ struct posix_fd *pfd = NULL;
+
+ while (list_empty(&ctx->janitor_fds)) {
+ if (ctx->pxl_count == 0) {
+ return NULL;
+ }
+
+ pthread_cond_wait(&ctx->fd_cond, &ctx->fd_lock);
+ }
+
+ pfd = list_first_entry(&ctx->janitor_fds, struct posix_fd, list);
+ list_del_init(&pfd->list);
+
+ return pfd;
+}
+
+static void
+posix_close_pfd(xlator_t *xl, struct posix_fd *pfd)
+{
+ THIS = xl;
+
+ if (pfd->dir == NULL) {
+ gf_msg_trace(xl->name, 0, "janitor: closing file fd=%d", pfd->fd);
+ sys_close(pfd->fd);
+ } else {
+ gf_msg_debug(xl->name, 0, "janitor: closing dir fd=%p", pfd->dir);
+ sys_closedir(pfd->dir);
+ }
+
+ GF_FREE(pfd);
+}
+
+static void *
+posix_ctx_janitor_thread_proc(void *data)
+{
+ xlator_t *xl;
+ struct posix_fd *pfd;
+ glusterfs_ctx_t *ctx = NULL;
+ struct posix_private *priv_fd;
+
+ ctx = data;
+
+ pthread_mutex_lock(&ctx->fd_lock);
+
+ while ((pfd = janitor_get_next_fd(ctx)) != NULL) {
+ pthread_mutex_unlock(&ctx->fd_lock);
+
+ xl = pfd->xl;
+ posix_close_pfd(xl, pfd);
+
+ pthread_mutex_lock(&ctx->fd_lock);
+
+ priv_fd = xl->private;
+ priv_fd->rel_fdcount--;
+ if (!priv_fd->rel_fdcount)
+ pthread_cond_signal(&priv_fd->fd_cond);
+ }
+
+ pthread_mutex_unlock(&ctx->fd_lock);
+
+ return NULL;
+}
+
+int
+posix_spawn_ctx_janitor_thread(xlator_t *this)
+{
+ int ret = 0;
+ glusterfs_ctx_t *ctx = NULL;
+
+ ctx = this->ctx;
+
+ pthread_mutex_lock(&ctx->fd_lock);
+ {
+ if (ctx->pxl_count++ == 0) {
+ ret = gf_thread_create(&ctx->janitor, NULL,
+ posix_ctx_janitor_thread_proc, ctx,
+ "posixctxjan");
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_THREAD_FAILED,
+ "spawning janitor thread failed");
+ ctx->pxl_count--;
+ }
+ }
+ }
+ pthread_mutex_unlock(&ctx->fd_lock);
+
+ return ret;
+}
+
static int
-is_fresh_file(int64_t sec, int64_t ns)
+is_fresh_file(struct timespec *ts)
{
- struct timeval tv;
+ struct timespec now;
int64_t elapsed;
- gettimeofday(&tv, NULL);
+ timespec_now_realtime(&now);
+ elapsed = (int64_t)gf_tsdiff(ts, &now);
- elapsed = (tv.tv_sec - sec) * 1000000L;
- elapsed += tv.tv_usec - (ns / 1000L);
if (elapsed < 0) {
/* The file has been modified in the future !!!
* Is it fresh ? previous implementation considered this as a
@@ -1610,11 +1702,7 @@ is_fresh_file(int64_t sec, int64_t ns)
}
/* If the file is newer than a second, we consider it fresh. */
- if (elapsed < 1000000) {
- return 1;
- }
-
- return 0;
+ return elapsed < 1000000;
}
int
@@ -1677,7 +1765,9 @@ posix_gfid_heal(xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req)
if (ret != 16) {
/* TODO: This is a very hacky way of doing this, and very prone to
* errors and unexpected behavior. This should be changed. */
- if (is_fresh_file(stbuf.ia_ctime, stbuf.ia_ctime_nsec)) {
+ struct timespec ts = {.tv_sec = stbuf.ia_ctime,
+ .tv_nsec = stbuf.ia_ctime_nsec};
+ if (is_fresh_file(&ts)) {
gf_msg(this->name, GF_LOG_ERROR, ENOENT, P_MSG_FRESHFILE,
"Fresh file: %s", path);
return -ENOENT;
@@ -1691,7 +1781,7 @@ posix_gfid_heal(xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req)
if (ret != 16) {
/* TODO: This is a very hacky way of doing this, and very prone to
* errors and unexpected behavior. This should be changed. */
- if (is_fresh_file(stat.st_ctim.tv_sec, stat.st_ctim.tv_nsec)) {
+ if (is_fresh_file(&stat.st_ctim)) {
gf_msg(this->name, GF_LOG_ERROR, ENOENT, P_MSG_FRESHFILE,
"Fresh file: %s", path);
return -ENOENT;
@@ -1950,7 +2040,7 @@ posix_fs_health_check(xlator_t *this, char *file_path)
goto out;
}
- time_sec = time(NULL);
+ time_sec = gf_time();
gf_time_fmt(timestamp, sizeof timestamp, time_sec, gf_timefmt_FT);
timelen = strlen(timestamp);
diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c
index 762041b5831..6d54d37e5aa 100644
--- a/xlators/storage/posix/src/posix-inode-fd-ops.c
+++ b/xlators/storage/posix/src/posix-inode-fd-ops.c
@@ -1361,6 +1361,22 @@ out:
return 0;
}
+static void
+posix_add_fd_to_cleanup(xlator_t *this, struct posix_fd *pfd)
+{
+ glusterfs_ctx_t *ctx = this->ctx;
+ struct posix_private *priv = this->private;
+
+ pfd->xl = this;
+ pthread_mutex_lock(&ctx->fd_lock);
+ {
+ list_add_tail(&pfd->list, &ctx->janitor_fds);
+ priv->rel_fdcount++;
+ pthread_cond_signal(&ctx->fd_cond);
+ }
+ pthread_mutex_unlock(&ctx->fd_lock);
+}
+
int32_t
posix_releasedir(xlator_t *this, fd_t *fd)
{
@@ -1383,11 +1399,7 @@ posix_releasedir(xlator_t *this, fd_t *fd)
"pfd->dir is NULL for fd=%p", fd);
goto out;
}
-
- gf_msg_debug(this->name, 0, "janitor: closing dir fd=%p", pfd->dir);
-
- sys_closedir(pfd->dir);
- GF_FREE(pfd);
+ posix_add_fd_to_cleanup(this, pfd);
out:
return 0;
@@ -2510,7 +2522,6 @@ out:
int32_t
posix_release(xlator_t *this, fd_t *fd)
{
- struct posix_private *priv = NULL;
struct posix_fd *pfd = NULL;
int ret = -1;
uint64_t tmp_pfd = 0;
@@ -2518,8 +2529,6 @@ posix_release(xlator_t *this, fd_t *fd)
VALIDATE_OR_GOTO(this, out);
VALIDATE_OR_GOTO(fd, out);
- priv = this->private;
-
ret = fd_ctx_del(fd, this, &tmp_pfd);
if (ret < 0) {
gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_PFD_NULL,
@@ -2533,13 +2542,7 @@ posix_release(xlator_t *this, fd_t *fd)
"pfd->dir is %p (not NULL) for file fd=%p", pfd->dir, fd);
}
- gf_msg_debug(this->name, 0, "janitor: closing dir fd=%p", pfd->dir);
-
- sys_close(pfd->fd);
- GF_FREE(pfd);
-
- if (!priv)
- goto out;
+ posix_add_fd_to_cleanup(this, pfd);
out:
return 0;
diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
index 35be197c869..b8db146eef2 100644
--- a/xlators/storage/posix/src/posix.h
+++ b/xlators/storage/posix/src/posix.h
@@ -125,7 +125,7 @@ struct posix_fd {
off_t dir_eof; /* offset at dir EOF */
struct list_head list; /* to add to the janitor list */
int odirect;
-
+ xlator_t *xl;
char _pad[4]; /* manual padding */
};
@@ -137,10 +137,6 @@ struct posix_private {
gf_lock_t lock;
char *hostname;
- /* Statistics, provides activity of the server */
-
- struct timeval prev_fetch_time;
- struct timeval init_time;
time_t last_landfill_check;
@@ -170,6 +166,7 @@ struct posix_private {
pthread_cond_t fsync_cond;
pthread_mutex_t janitor_mutex;
pthread_cond_t janitor_cond;
+ pthread_cond_t fd_cond;
int fsync_queue_count;
int32_t janitor_sleep_duration;
@@ -254,8 +251,7 @@ struct posix_private {
gf_boolean_t aio_configured;
gf_boolean_t aio_init_done;
gf_boolean_t aio_capable;
-
- char _pad[4]; /* manual padding */
+ uint32_t rel_fdcount;
};
typedef struct {
@@ -662,6 +658,9 @@ posix_cs_maintenance(xlator_t *this, fd_t *fd, loc_t *loc, int *pfd,
int
posix_check_dev_file(xlator_t *this, inode_t *inode, char *fop, int *op_errno);
+int
+posix_spawn_ctx_janitor_thread(xlator_t *this);
+
void
posix_update_iatt_buf(struct iatt *buf, int fd, char *loc, dict_t *xdata);