summaryrefslogtreecommitdiffstats
path: root/tests
diff options
context:
space:
mode:
authorAtin Mukherjee <amukherj@redhat.com>2019-08-19 09:40:48 +0530
committerAtin Mukherjee <amukherj@redhat.com>2019-08-19 09:41:18 +0530
commitbbab770e2aa527059c6590162d944a90efb459c1 (patch)
tree3d6ef95eccf14512645c74bc2b5bfaa50f7d2bd3 /tests
parentc370c70f77079339e2cfb7f284f3a2fb13fd2f97 (diff)
tests: mark bug-1482023-snpashot-issue-with-other-processes-accessing-mounted-path.t as BRICK_MUX_BAD_TEST
Updates: bz#1743069 Change-Id: I1eea0186ca0c1b1226f4b3d0d7c0e41fc7821cbd Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
Diffstat (limited to 'tests')
-rw-r--r--tests/bugs/snapshot/bug-1482023-snpashot-issue-with-other-processes-accessing-mounted-path.t1
1 files changed, 1 insertions, 0 deletions
diff --git a/tests/bugs/snapshot/bug-1482023-snpashot-issue-with-other-processes-accessing-mounted-path.t b/tests/bugs/snapshot/bug-1482023-snpashot-issue-with-other-processes-accessing-mounted-path.t
index f30194b6339..04a85db0c1a 100644
--- a/tests/bugs/snapshot/bug-1482023-snpashot-issue-with-other-processes-accessing-mounted-path.t
+++ b/tests/bugs/snapshot/bug-1482023-snpashot-issue-with-other-processes-accessing-mounted-path.t
@@ -130,3 +130,4 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" mounted_snaps ${V1}
cleanup;
# run first!
+#G_TESTDEF_TEST_STATUS_CENTOS6=BRICK_MUX_BAD_TEST,BUG=1743069
86fbd4f4675f388c79f599b2c'>xlators/cluster/Makefile.am2
-rw-r--r--xlators/cluster/afr/src/Makefile.am32
-rw-r--r--xlators/cluster/afr/src/afr-common.c8914
-rw-r--r--xlators/cluster/afr/src/afr-dir-read.c919
-rw-r--r--xlators/cluster/afr/src/afr-dir-read.h49
-rw-r--r--xlators/cluster/afr/src/afr-dir-write.c2669
-rw-r--r--xlators/cluster/afr/src/afr-dir-write.h60
-rw-r--r--xlators/cluster/afr/src/afr-inode-read.c2269
-rw-r--r--xlators/cluster/afr/src/afr-inode-read.h54
-rw-r--r--xlators/cluster/afr/src/afr-inode-write.c3233
-rw-r--r--xlators/cluster/afr/src/afr-inode-write.h104
-rw-r--r--xlators/cluster/afr/src/afr-lk-common.c2620
-rw-r--r--xlators/cluster/afr/src/afr-mem-types.h64
-rw-r--r--xlators/cluster/afr/src/afr-messages.h167
-rw-r--r--xlators/cluster/afr/src/afr-open.c870
-rw-r--r--xlators/cluster/afr/src/afr-read-txn.c494
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-algorithm.c1090
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-algorithm.h60
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c3904
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.h73
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c1765
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c3286
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-metadata.c1128
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-name.c616
-rw-r--r--xlators/cluster/afr/src/afr-self-heal.h389
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.c1716
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.h75
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c3475
-rw-r--r--xlators/cluster/afr/src/afr-transaction.h84
-rw-r--r--xlators/cluster/afr/src/afr.c1933
-rw-r--r--xlators/cluster/afr/src/afr.h1980
-rw-r--r--xlators/cluster/afr/src/pump.c2536
-rw-r--r--xlators/cluster/afr/src/pump.h97
-rw-r--r--xlators/cluster/dht/src/Makefile.am40
-rw-r--r--xlators/cluster/dht/src/dht-common.c14067
-rw-r--r--xlators/cluster/dht/src/dht-common.h1579
-rw-r--r--xlators/cluster/dht/src/dht-diskusage.c627
-rw-r--r--xlators/cluster/dht/src/dht-hashfn.c147
-rw-r--r--xlators/cluster/dht/src/dht-helper.c2453
-rw-r--r--xlators/cluster/dht/src/dht-inode-read.c1658
-rw-r--r--xlators/cluster/dht/src/dht-inode-write.c1404
-rw-r--r--xlators/cluster/dht/src/dht-layout.c1189
-rw-r--r--xlators/cluster/dht/src/dht-linkfile.c451
-rw-r--r--xlators/cluster/dht/src/dht-lock.c1392
-rw-r--r--xlators/cluster/dht/src/dht-lock.h91
-rw-r--r--xlators/cluster/dht/src/dht-mem-types.h54
-rw-r--r--xlators/cluster/dht/src/dht-messages.h386
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c4702
-rw-r--r--xlators/cluster/dht/src/dht-rename.c2316
-rw-r--r--xlators/cluster/dht/src/dht-selfheal.c2845
-rw-r--r--xlators/cluster/dht/src/dht-shared.c1104
-rw-r--r--xlators/cluster/dht/src/dht.c638
-rw-r--r--xlators/cluster/dht/src/nufa.c1183
-rw-r--r--xlators/cluster/dht/src/switch.c1647
-rw-r--r--xlators/cluster/dht/src/unittest/dht_layout_mock.c73
-rw-r--r--xlators/cluster/dht/src/unittest/dht_layout_unittest.c127
-rw-r--r--xlators/cluster/ec/Makefile.am (renamed from xlators/cluster/ha/Makefile.am)0
-rw-r--r--xlators/cluster/ec/src/Makefile.am83
-rw-r--r--xlators/cluster/ec/src/ec-code-avx.c109
-rw-r--r--xlators/cluster/ec/src/ec-code-avx.h18
-rw-r--r--xlators/cluster/ec/src/ec-code-c.c11679
-rw-r--r--xlators/cluster/ec/src/ec-code-c.h27
-rw-r--r--xlators/cluster/ec/src/ec-code-intel.c594
-rw-r--r--xlators/cluster/ec/src/ec-code-intel.h191
-rw-r--r--xlators/cluster/ec/src/ec-code-sse.c101
-rw-r--r--xlators/cluster/ec/src/ec-code-sse.h18
-rw-r--r--xlators/cluster/ec/src/ec-code-x64.c144
-rw-r--r--xlators/cluster/ec/src/ec-code-x64.h18
-rw-r--r--xlators/cluster/ec/src/ec-code.c1060
-rw-r--r--xlators/cluster/ec/src/ec-code.h44
-rw-r--r--xlators/cluster/ec/src/ec-combine.c995
-rw-r--r--xlators/cluster/ec/src/ec-combine.h44
-rw-r--r--xlators/cluster/ec/src/ec-common.c3042
-rw-r--r--xlators/cluster/ec/src/ec-common.h234
-rw-r--r--xlators/cluster/ec/src/ec-data.c288
-rw-r--r--xlators/cluster/ec/src/ec-data.h35
-rw-r--r--xlators/cluster/ec/src/ec-dir-read.c647
-rw-r--r--xlators/cluster/ec/src/ec-dir-write.c1487
-rw-r--r--xlators/cluster/ec/src/ec-fops.h254
-rw-r--r--xlators/cluster/ec/src/ec-galois.c183
-rw-r--r--xlators/cluster/ec/src/ec-galois.h32
-rw-r--r--xlators/cluster/ec/src/ec-generic.c1591
-rw-r--r--xlators/cluster/ec/src/ec-gf8.c5882
-rw-r--r--xlators/cluster/ec/src/ec-gf8.h18
-rw-r--r--xlators/cluster/ec/src/ec-heal.c3367
-rw-r--r--xlators/cluster/ec/src/ec-heald.c681
-rw-r--r--xlators/cluster/ec/src/ec-heald.h30
-rw-r--r--xlators/cluster/ec/src/ec-helpers.c867
-rw-r--r--xlators/cluster/ec/src/ec-helpers.h200
-rw-r--r--xlators/cluster/ec/src/ec-inode-read.c2046
-rw-r--r--xlators/cluster/ec/src/ec-inode-write.c2369
-rw-r--r--xlators/cluster/ec/src/ec-locks.c1128
-rw-r--r--xlators/cluster/ec/src/ec-mem-types.h30
-rw-r--r--xlators/cluster/ec/src/ec-messages.h61
-rw-r--r--xlators/cluster/ec/src/ec-method.c433
-rw-r--r--xlators/cluster/ec/src/ec-method.h48
-rw-r--r--xlators/cluster/ec/src/ec-types.h690
-rw-r--r--xlators/cluster/ec/src/ec.c1873
-rw-r--r--xlators/cluster/ec/src/ec.h34
-rw-r--r--xlators/cluster/ha/src/Makefile.am15
-rw-r--r--xlators/cluster/ha/src/ha-helpers.c204
-rw-r--r--xlators/cluster/ha/src/ha-mem-types.h37
-rw-r--r--xlators/cluster/ha/src/ha.c4023
-rw-r--r--xlators/cluster/ha/src/ha.h63
-rw-r--r--xlators/cluster/map/src/Makefile.am15
-rw-r--r--xlators/cluster/map/src/map-helper.c358
-rw-r--r--xlators/cluster/map/src/map-mem-types.h35
-rw-r--r--xlators/cluster/map/src/map.c2577
-rw-r--r--xlators/cluster/map/src/map.h77
-rw-r--r--xlators/cluster/stripe/src/Makefile.am17
-rw-r--r--xlators/cluster/stripe/src/stripe-mem-types.h40
-rw-r--r--xlators/cluster/stripe/src/stripe.c4296
-rw-r--r--xlators/cluster/stripe/src/stripe.h189
-rw-r--r--xlators/cluster/unify/src/Makefile.am16
-rw-r--r--xlators/cluster/unify/src/unify-mem-types.h41
-rw-r--r--xlators/cluster/unify/src/unify-self-heal.c1239
-rw-r--r--xlators/cluster/unify/src/unify.c4589
-rw-r--r--xlators/cluster/unify/src/unify.h146
-rw-r--r--xlators/debug/Makefile.am2
-rw-r--r--xlators/debug/delay-gen/Makefile.am (renamed from xlators/features/access-control/Makefile.am)0
-rw-r--r--xlators/debug/delay-gen/src/Makefile.am11
-rw-r--r--xlators/debug/delay-gen/src/delay-gen-mem-types.h21
-rw-r--r--xlators/debug/delay-gen/src/delay-gen-messages.h26
-rw-r--r--xlators/debug/delay-gen/src/delay-gen.c697
-rw-r--r--xlators/debug/delay-gen/src/delay-gen.h27
-rw-r--r--xlators/debug/error-gen/src/Makefile.am10
-rw-r--r--xlators/debug/error-gen/src/error-gen-mem-types.h20
-rw-r--r--xlators/debug/error-gen/src/error-gen.c3075
-rw-r--r--xlators/debug/error-gen/src/error-gen.h60
-rw-r--r--xlators/debug/io-stats/src/Makefile.am10
-rw-r--r--xlators/debug/io-stats/src/io-stats-mem-types.h38
-rw-r--r--xlators/debug/io-stats/src/io-stats.c5761
-rw-r--r--xlators/debug/sink/Makefile.am (renamed from xlators/cluster/stripe/Makefile.am)1
-rw-r--r--xlators/debug/sink/src/Makefile.am14
-rw-r--r--xlators/debug/sink/src/sink.c94
-rw-r--r--xlators/debug/trace/src/Makefile.am9
-rw-r--r--xlators/debug/trace/src/trace-mem-types.h20
-rw-r--r--xlators/debug/trace/src/trace.c4911
-rw-r--r--xlators/debug/trace/src/trace.h55
-rw-r--r--xlators/encryption/Makefile.am3
-rw-r--r--xlators/encryption/rot-13/src/rot-13.c208
-rw-r--r--xlators/encryption/rot-13/src/rot-13.h33
-rw-r--r--xlators/features/Makefile.am13
-rw-r--r--xlators/features/access-control/src/Makefile.am13
-rw-r--r--xlators/features/access-control/src/access-control.c2060
-rw-r--r--xlators/features/access-control/src/access-control.h55
-rw-r--r--xlators/features/arbiter/Makefile.am (renamed from xlators/cluster/unify/Makefile.am)2
-rw-r--r--xlators/features/arbiter/src/Makefile.am19
-rw-r--r--xlators/features/arbiter/src/arbiter-mem-types.h18
-rw-r--r--xlators/features/arbiter/src/arbiter.c380
-rw-r--r--xlators/features/arbiter/src/arbiter.h21
-rw-r--r--xlators/features/barrier/Makefile.am (renamed from xlators/features/filter/Makefile.am)2
-rw-r--r--xlators/features/barrier/src/Makefile.am17
-rw-r--r--xlators/features/barrier/src/barrier-mem-types.h20
-rw-r--r--xlators/features/barrier/src/barrier.c809
-rw-r--r--xlators/features/barrier/src/barrier.h89
-rw-r--r--xlators/features/bit-rot/Makefile.am (renamed from xlators/protocol/legacy/transport/ib-verbs/Makefile.am)0
-rw-r--r--xlators/features/bit-rot/src/Makefile.am1
-rw-r--r--xlators/features/bit-rot/src/bitd/Makefile.am23
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h101
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c78
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h50
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub.c2070
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub.h46
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-ssm.c124
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-ssm.h38
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot.c2232
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot.h302
-rw-r--r--xlators/features/bit-rot/src/stub/Makefile.am20
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-common.h178
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-object-version.h30
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub-helpers.c796
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h36
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h117
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub.c3590
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub.h515
-rw-r--r--xlators/features/changelog/Makefile.am3
-rw-r--r--xlators/features/changelog/lib/Makefile.am (renamed from xlators/encryption/rot-13/Makefile.am)2
-rw-r--r--xlators/features/changelog/lib/examples/c/get-changes-multi.c90
-rw-r--r--xlators/features/changelog/lib/examples/c/get-changes.c93
-rw-r--r--xlators/features/changelog/lib/examples/c/get-history.c116
-rwxr-xr-xxlators/features/changelog/lib/examples/python/changes.py34
-rw-r--r--xlators/features/changelog/lib/examples/python/libgfchangelog.py71
-rw-r--r--xlators/features/changelog/lib/src/Makefile.am35
-rw-r--r--xlators/features/changelog/lib/src/changelog-lib-messages.h74
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-api.c224
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-helpers.c170
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-helpers.h255
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-journal-handler.c1029
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-journal.h116
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-reborp.c413
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-rpc.c98
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-rpc.h28
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog.c652
-rw-r--r--xlators/features/changelog/lib/src/gf-history-changelog.c1020
-rw-r--r--xlators/features/changelog/src/Makefile.am29
-rw-r--r--xlators/features/changelog/src/changelog-barrier.c131
-rw-r--r--xlators/features/changelog/src/changelog-encoders.c232
-rw-r--r--xlators/features/changelog/src/changelog-encoders.h50
-rw-r--r--xlators/features/changelog/src/changelog-ev-handle.c412
-rw-r--r--xlators/features/changelog/src/changelog-ev-handle.h136
-rw-r--r--xlators/features/changelog/src/changelog-helpers.c1977
-rw-r--r--xlators/features/changelog/src/changelog-helpers.h716
-rw-r--r--xlators/features/changelog/src/changelog-mem-types.h34
-rw-r--r--xlators/features/changelog/src/changelog-messages.h172
-rw-r--r--xlators/features/changelog/src/changelog-misc.h131
-rw-r--r--xlators/features/changelog/src/changelog-rpc-common.c359
-rw-r--r--xlators/features/changelog/src/changelog-rpc-common.h85
-rw-r--r--xlators/features/changelog/src/changelog-rpc.c440
-rw-r--r--xlators/features/changelog/src/changelog-rpc.h31
-rw-r--r--xlators/features/changelog/src/changelog-rt.c66
-rw-r--r--xlators/features/changelog/src/changelog-rt.h33
-rw-r--r--xlators/features/changelog/src/changelog.c2989
-rw-r--r--xlators/features/cloudsync/Makefile.am3
-rw-r--r--xlators/features/cloudsync/src/Makefile.am46
-rw-r--r--xlators/features/cloudsync/src/cloudsync-autogen-fops-tmpl.c30
-rw-r--r--xlators/features/cloudsync/src/cloudsync-autogen-fops-tmpl.h24
-rw-r--r--xlators/features/cloudsync/src/cloudsync-common.c60
-rw-r--r--xlators/features/cloudsync/src/cloudsync-common.h134
-rwxr-xr-xxlators/features/cloudsync/src/cloudsync-fops-c.py324
-rwxr-xr-xxlators/features/cloudsync/src/cloudsync-fops-h.py31
-rw-r--r--xlators/features/cloudsync/src/cloudsync-mem-types.h22
-rw-r--r--xlators/features/cloudsync/src/cloudsync-messages.h16
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/Makefile.am3
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/Makefile.am11
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/Makefile.am3
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/Makefile.am12
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3-mem-types.h19
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c584
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.h50
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.sym1
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/Makefile.am3
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/Makefile.am12
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/archivestore.h203
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/cvlt-messages.h30
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcloudsynccvlt.sym1
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt-mem-types.h19
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.c842
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.h84
-rw-r--r--xlators/features/cloudsync/src/cloudsync.c2076
-rw-r--r--xlators/features/cloudsync/src/cloudsync.h123
-rw-r--r--xlators/features/compress/Makefile.am3
-rw-r--r--xlators/features/compress/src/Makefile.am19
-rw-r--r--xlators/features/compress/src/cdc-helper.c527
-rw-r--r--xlators/features/compress/src/cdc-mem-types.h23
-rw-r--r--xlators/features/compress/src/cdc.c348
-rw-r--r--xlators/features/compress/src/cdc.h99
-rw-r--r--xlators/features/filter/src/Makefile.am15
-rw-r--r--xlators/features/filter/src/filter-mem-types.h30
-rw-r--r--xlators/features/filter/src/filter.c1744
-rw-r--r--xlators/features/gfid-access/Makefile.am (renamed from xlators/bindings/python/Makefile.am)0
-rw-r--r--xlators/features/gfid-access/src/Makefile.am16
-rw-r--r--xlators/features/gfid-access/src/gfid-access-mem-types.h22
-rw-r--r--xlators/features/gfid-access/src/gfid-access.c1420
-rw-r--r--xlators/features/gfid-access/src/gfid-access.h107
-rw-r--r--xlators/features/index/Makefile.am3
-rw-r--r--xlators/features/index/src/Makefile.am19
-rw-r--r--xlators/features/index/src/index-mem-types.h23
-rw-r--r--xlators/features/index/src/index-messages.h33
-rw-r--r--xlators/features/index/src/index.c2682
-rw-r--r--xlators/features/index/src/index.h86
-rw-r--r--xlators/features/leases/Makefile.am3
-rw-r--r--xlators/features/leases/src/Makefile.am20
-rw-r--r--xlators/features/leases/src/leases-internal.c1412
-rw-r--r--xlators/features/leases/src/leases-mem-types.h27
-rw-r--r--xlators/features/leases/src/leases-messages.h33
-rw-r--r--xlators/features/leases/src/leases.c1168
-rw-r--r--xlators/features/leases/src/leases.h259
-rw-r--r--xlators/features/locks/src/Makefile.am23
-rw-r--r--xlators/features/locks/src/clear.c460
-rw-r--r--xlators/features/locks/src/clear.h73
-rw-r--r--xlators/features/locks/src/common.c1946
-rw-r--r--xlators/features/locks/src/common.h265
-rw-r--r--xlators/features/locks/src/entrylk.c1483
-rw-r--r--xlators/features/locks/src/inodelk.c1480
-rw-r--r--xlators/features/locks/src/locks-mem-types.h44
-rw-r--r--xlators/features/locks/src/locks.h336
-rw-r--r--xlators/features/locks/src/pl-messages.h29
-rw-r--r--xlators/features/locks/src/posix.c5800
-rw-r--r--xlators/features/locks/src/reservelk.c606
-rw-r--r--xlators/features/locks/tests/unit-test.c118
-rw-r--r--xlators/features/mac-compat/Makefile.am3
-rw-r--r--xlators/features/mac-compat/src/Makefile.am13
-rw-r--r--xlators/features/mac-compat/src/mac-compat.c247
-rw-r--r--xlators/features/marker/Makefile.am2
-rw-r--r--xlators/features/marker/src/Makefile.am19
-rw-r--r--xlators/features/marker/src/marker-common.c98
-rw-r--r--xlators/features/marker/src/marker-common.h34
-rw-r--r--xlators/features/marker/src/marker-mem-types.h43
-rw-r--r--xlators/features/marker/src/marker-quota-helper.c554
-rw-r--r--xlators/features/marker/src/marker-quota-helper.h106
-rw-r--r--xlators/features/marker/src/marker-quota.c3406
-rw-r--r--xlators/features/marker/src/marker-quota.h243
-rw-r--r--xlators/features/marker/src/marker.c4046
-rw-r--r--xlators/features/marker/src/marker.h190
-rw-r--r--xlators/features/marker/utils/Makefile.am7
-rwxr-xr-xxlators/features/marker/utils/gsyncd.in7
-rw-r--r--xlators/features/marker/utils/syncdaemon/Makefile.am5
-rw-r--r--xlators/features/marker/utils/syncdaemon/README.md81
-rw-r--r--xlators/features/marker/utils/syncdaemon/__init__.py0
-rw-r--r--xlators/features/marker/utils/syncdaemon/configinterface.py145
-rw-r--r--xlators/features/marker/utils/syncdaemon/gconf.py14
-rw-r--r--xlators/features/marker/utils/syncdaemon/gsyncd.py333
-rw-r--r--xlators/features/marker/utils/syncdaemon/master.py366
-rw-r--r--xlators/features/marker/utils/syncdaemon/monitor.py54
-rw-r--r--xlators/features/marker/utils/syncdaemon/repce.py162
-rw-r--r--xlators/features/marker/utils/syncdaemon/resource.py495
-rw-r--r--xlators/features/marker/utils/syncdaemon/syncdutils.py50
-rw-r--r--xlators/features/metadisp/Makefile.am3
-rw-r--r--xlators/features/metadisp/src/Makefile.am38
-rw-r--r--xlators/features/metadisp/src/backend.c45
-rw-r--r--xlators/features/metadisp/src/fops-tmpl.c10
-rw-r--r--xlators/features/metadisp/src/gen-fops.py160
-rw-r--r--xlators/features/metadisp/src/metadisp-create.c101
-rw-r--r--xlators/features/metadisp/src/metadisp-fops.h51
-rw-r--r--xlators/features/metadisp/src/metadisp-fsync.c54
-rw-r--r--xlators/features/metadisp/src/metadisp-lookup.c90
-rw-r--r--xlators/features/metadisp/src/metadisp-open.c70
-rw-r--r--xlators/features/metadisp/src/metadisp-readdir.c65
-rw-r--r--xlators/features/metadisp/src/metadisp-setattr.c90
-rw-r--r--xlators/features/metadisp/src/metadisp-stat.c124
-rw-r--r--xlators/features/metadisp/src/metadisp-unlink.c160
-rw-r--r--xlators/features/metadisp/src/metadisp.c46
-rw-r--r--xlators/features/metadisp/src/metadisp.h45
-rw-r--r--xlators/features/namespace/Makefile.am3
-rw-r--r--xlators/features/namespace/src/Makefile.am17
-rw-r--r--xlators/features/namespace/src/namespace.c1344
-rw-r--r--xlators/features/namespace/src/namespace.h23
-rw-r--r--xlators/features/path-convertor/Makefile.am3
-rw-r--r--xlators/features/path-convertor/src/Makefile.am14
-rw-r--r--xlators/features/path-convertor/src/path-mem-types.h32
-rw-r--r--xlators/features/path-convertor/src/path.c1239
-rw-r--r--xlators/features/quiesce/src/Makefile.am10
-rw-r--r--xlators/features/quiesce/src/quiesce-mem-types.h27
-rw-r--r--xlators/features/quiesce/src/quiesce-messages.h28
-rw-r--r--xlators/features/quiesce/src/quiesce.c3532
-rw-r--r--xlators/features/quiesce/src/quiesce.h89
-rw-r--r--xlators/features/quota/src/Makefile.am29
-rw-r--r--xlators/features/quota/src/quota-enforcer-client.c503
-rw-r--r--xlators/features/quota/src/quota-mem-types.h46
-rw-r--r--xlators/features/quota/src/quota-messages.h39
-rw-r--r--xlators/features/quota/src/quota.c6481
-rw-r--r--xlators/features/quota/src/quota.h393
-rw-r--r--xlators/features/quota/src/quotad-aggregator.c494
-rw-r--r--xlators/features/quota/src/quotad-aggregator.h38
-rw-r--r--xlators/features/quota/src/quotad-helpers.c107
-rw-r--r--xlators/features/quota/src/quotad-helpers.h24
-rw-r--r--xlators/features/quota/src/quotad.c245
-rw-r--r--xlators/features/read-only/src/Makefile.am20
-rw-r--r--xlators/features/read-only/src/read-only-common.c406
-rw-r--r--xlators/features/read-only/src/read-only-common.h121
-rw-r--r--xlators/features/read-only/src/read-only-mem-types.h20
-rw-r--r--xlators/features/read-only/src/read-only.c374
-rw-r--r--xlators/features/read-only/src/read-only.h37
-rw-r--r--xlators/features/read-only/src/worm-helper.c395
-rw-r--r--xlators/features/read-only/src/worm-helper.h44
-rw-r--r--xlators/features/read-only/src/worm.c722
-rw-r--r--xlators/features/sdfs/Makefile.am3
-rw-r--r--xlators/features/sdfs/src/Makefile.am19
-rw-r--r--xlators/features/sdfs/src/sdfs-messages.h67
-rw-r--r--xlators/features/sdfs/src/sdfs.c1479
-rw-r--r--xlators/features/sdfs/src/sdfs.h49
-rw-r--r--xlators/features/selinux/Makefile.am3
-rw-r--r--xlators/features/selinux/src/Makefile.am20
-rw-r--r--xlators/features/selinux/src/selinux-mem-types.h19
-rw-r--r--xlators/features/selinux/src/selinux-messages.h30
-rw-r--r--xlators/features/selinux/src/selinux.c323
-rw-r--r--xlators/features/selinux/src/selinux.h24
-rw-r--r--xlators/features/shard/Makefile.am3
-rw-r--r--xlators/features/shard/src/Makefile.am17
-rw-r--r--xlators/features/shard/src/shard-mem-types.h24
-rw-r--r--xlators/features/shard/src/shard-messages.h39
-rw-r--r--xlators/features/shard/src/shard.c7382
-rw-r--r--xlators/features/shard/src/shard.h348
-rw-r--r--xlators/features/snapview-client/Makefile.am (renamed from xlators/performance/stat-prefetch/Makefile.am)0
-rw-r--r--xlators/features/snapview-client/src/Makefile.am16
-rw-r--r--xlators/features/snapview-client/src/snapview-client-mem-types.h24
-rw-r--r--xlators/features/snapview-client/src/snapview-client-messages.h71
-rw-r--r--xlators/features/snapview-client/src/snapview-client.c2791
-rw-r--r--xlators/features/snapview-client/src/snapview-client.h101
-rw-r--r--xlators/features/snapview-server/Makefile.am1
-rw-r--r--xlators/features/snapview-server/src/Makefile.am25
-rw-r--r--xlators/features/snapview-server/src/snapview-server-helpers.c715
-rw-r--r--xlators/features/snapview-server/src/snapview-server-mem-types.h25
-rw-r--r--xlators/features/snapview-server/src/snapview-server-messages.h54
-rw-r--r--xlators/features/snapview-server/src/snapview-server-mgmt.c524
-rw-r--r--xlators/features/snapview-server/src/snapview-server.c2720
-rw-r--r--xlators/features/snapview-server/src/snapview-server.h255
-rw-r--r--xlators/features/thin-arbiter/Makefile.am3
-rw-r--r--xlators/features/thin-arbiter/src/Makefile.am22
-rw-r--r--xlators/features/thin-arbiter/src/thin-arbiter-mem-types.h19
-rw-r--r--xlators/features/thin-arbiter/src/thin-arbiter-messages.h28
-rw-r--r--xlators/features/thin-arbiter/src/thin-arbiter.c661
-rw-r--r--xlators/features/thin-arbiter/src/thin-arbiter.h59
-rw-r--r--xlators/features/trash/src/Makefile.am12
-rw-r--r--xlators/features/trash/src/trash-mem-types.h33
-rw-r--r--xlators/features/trash/src/trash.c3761
-rw-r--r--xlators/features/trash/src/trash.h128
-rw-r--r--xlators/features/upcall/Makefile.am3
-rw-r--r--xlators/features/upcall/src/Makefile.am23
-rw-r--r--xlators/features/upcall/src/upcall-cache-invalidation.h18
-rw-r--r--xlators/features/upcall/src/upcall-internal.c689
-rw-r--r--xlators/features/upcall/src/upcall-mem-types.h23
-rw-r--r--xlators/features/upcall/src/upcall-messages.h29
-rw-r--r--xlators/features/upcall/src/upcall.c2505
-rw-r--r--xlators/features/upcall/src/upcall.h131
-rw-r--r--xlators/features/utime/Makefile.am3
-rw-r--r--xlators/features/utime/src/Makefile.am41
-rw-r--r--xlators/features/utime/src/utime-autogen-fops-tmpl.c28
-rw-r--r--xlators/features/utime/src/utime-autogen-fops-tmpl.h22
-rwxr-xr-xxlators/features/utime/src/utime-gen-fops-c.py147
-rwxr-xr-xxlators/features/utime/src/utime-gen-fops-h.py35
-rw-r--r--xlators/features/utime/src/utime-helpers.c110
-rw-r--r--xlators/features/utime/src/utime-helpers.h25
-rw-r--r--xlators/features/utime/src/utime-mem-types.h21
-rw-r--r--xlators/features/utime/src/utime-messages.h29
-rw-r--r--xlators/features/utime/src/utime.c392
-rw-r--r--xlators/features/utime/src/utime.h23
-rw-r--r--xlators/lib/src/libxlator.c779
-rw-r--r--xlators/lib/src/libxlator.h188
-rw-r--r--xlators/meta/Makefile.am2
-rw-r--r--xlators/meta/src/Makefile.am48
-rw-r--r--xlators/meta/src/active-link.c34
-rw-r--r--xlators/meta/src/cmdline-file.c39
-rw-r--r--xlators/meta/src/frames-file.c107
-rw-r--r--xlators/meta/src/graph-dir.c98
-rw-r--r--xlators/meta/src/graphs-dir.c67
-rw-r--r--xlators/meta/src/history-file.c44
-rw-r--r--xlators/meta/src/logfile-link.c34
-rw-r--r--xlators/meta/src/logging-dir.c44
-rw-r--r--xlators/meta/src/loglevel-file.c50
-rw-r--r--xlators/meta/src/mallinfo-file.c36
-rw-r--r--xlators/meta/src/measure-file.c49
-rw-r--r--xlators/meta/src/meminfo-file.c44
-rw-r--r--xlators/meta/src/meta-defaults.c655
-rw-r--r--xlators/meta/src/meta-helpers.c332
-rw-r--r--xlators/meta/src/meta-hooks.h48
-rw-r--r--xlators/meta/src/meta-mem-types.h36
-rw-r--r--xlators/meta/src/meta.c1355
-rw-r--r--xlators/meta/src/meta.h162
-rw-r--r--xlators/meta/src/misc.c67
-rw-r--r--xlators/meta/src/misc.h31
-rw-r--r--xlators/meta/src/name-file.c44
-rw-r--r--xlators/meta/src/option-file.c45
-rw-r--r--xlators/meta/src/options-dir.c65
-rw-r--r--xlators/meta/src/private-file.c44
-rw-r--r--xlators/meta/src/process_uuid-file.c37
-rw-r--r--xlators/meta/src/profile-file.c44
-rw-r--r--xlators/meta/src/root-dir.c77
-rw-r--r--xlators/meta/src/subvolume-link.c56
-rw-r--r--xlators/meta/src/subvolumes-dir.c62
-rw-r--r--xlators/meta/src/top-link.c40
-rw-r--r--xlators/meta/src/tree.c179
-rw-r--r--xlators/meta/src/tree.h35
-rw-r--r--xlators/meta/src/type-file.c44
-rw-r--r--xlators/meta/src/version-file.c37
-rw-r--r--xlators/meta/src/view-dir.c33
-rw-r--r--xlators/meta/src/view.c258
-rw-r--r--xlators/meta/src/view.h32
-rw-r--r--xlators/meta/src/volfile-file.c79
-rw-r--r--xlators/meta/src/xlator-dir.c97
-rw-r--r--xlators/mgmt/glusterd/src/Makefile.am86
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-bitd-svc.c206
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-bitd-svc.h40
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-bitrot.c822
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-brick-ops.c2796
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-conn-helper.c21
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-conn-helper.h21
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c191
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h53
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-errno.h33
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-ganesha.c927
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-geo-rep.c6782
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-geo-rep.h52
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc-helper.c235
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc-helper.h51
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c478
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.h47
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handler.c9054
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handshake.c2769
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-hooks.c641
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-hooks.h88
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-locks.c870
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-locks.h57
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-log-ops.c290
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mem-types.h104
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-messages.h451
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c1144
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt.c3114
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt.h97
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mountbroker.c721
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mountbroker.h37
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-nfs-svc.c228
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-nfs-svc.h27
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.c13922
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.h394
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-peer-utils.c1058
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-peer-utils.h82
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.c903
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.h83
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c152
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-proc-mgmt.h44
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-quota.c2259
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-quota.h17
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-quotad-svc.c217
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-quotad-svc.h31
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rcu.h36
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rebalance.c1908
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-replace-brick.c716
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-reset-brick.c376
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rpc-ops.c3739
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-scrub-svc.c207
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-scrub-svc.h45
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-server-quorum.c486
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-server-quorum.h46
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c153
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h42
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-shd-svc.c796
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-shd-svc.h45
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-sm.c2185
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-sm.h270
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapd-svc-helper.c75
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapd-svc-helper.h32
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapd-svc.c478
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapd-svc.h42
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c4290
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot-utils.h169
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot.c10087
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-statedump.c243
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-statedump.h18
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.c6010
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.h258
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-svc-helper.c1047
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-svc-helper.h72
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c536
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h112
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-syncop.c2043
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-syncop.h93
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-tierd-svc-helper.c207
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c16349
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.h893
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c7668
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.h349
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-ops.c3033
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c3146
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.c2540
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h1479
-rw-r--r--xlators/mount/fuse/src/Makefile.am28
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.c9033
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.h712
-rw-r--r--xlators/mount/fuse/src/fuse-helpers.c836
-rw-r--r--xlators/mount/fuse/src/fuse-mem-types.h42
-rw-r--r--xlators/mount/fuse/src/fuse-resolve.c1113
-rw-r--r--xlators/mount/fuse/utils/Makefile.am9
-rwxr-xr-xxlators/mount/fuse/utils/mount.glusterfs.in898
-rwxr-xr-xxlators/mount/fuse/utils/mount_glusterfs.in625
-rw-r--r--xlators/nfs/lib/src/auth-null.c72
-rw-r--r--xlators/nfs/lib/src/auth-unix.c97
-rw-r--r--xlators/nfs/lib/src/msg-nfs3.c554
-rw-r--r--xlators/nfs/lib/src/msg-nfs3.h186
-rw-r--r--xlators/nfs/lib/src/rpc-socket.c361
-rw-r--r--xlators/nfs/lib/src/rpc-socket.h65
-rw-r--r--xlators/nfs/lib/src/rpcsvc-auth.c400
-rw-r--r--xlators/nfs/lib/src/rpcsvc.c2923
-rw-r--r--xlators/nfs/lib/src/rpcsvc.h728
-rw-r--r--xlators/nfs/lib/src/xdr-common.h48
-rw-r--r--xlators/nfs/lib/src/xdr-nfs3.c1897
-rw-r--r--xlators/nfs/lib/src/xdr-nfs3.h1206
-rw-r--r--xlators/nfs/lib/src/xdr-rpc.c229
-rw-r--r--xlators/nfs/lib/src/xdr-rpc.h82
-rw-r--r--xlators/nfs/server/src/Makefile.am41
-rw-r--r--xlators/nfs/server/src/acl3.c933
-rw-r--r--xlators/nfs/server/src/acl3.h42
-rw-r--r--xlators/nfs/server/src/auth-cache.c496
-rw-r--r--xlators/nfs/server/src/auth-cache.h52
-rw-r--r--xlators/nfs/server/src/exports.c1484
-rw-r--r--xlators/nfs/server/src/exports.h93
-rw-r--r--xlators/nfs/server/src/mount3-auth.c642
-rw-r--r--xlators/nfs/server/src/mount3-auth.h59
-rw-r--r--xlators/nfs/server/src/mount3.c5121
-rw-r--r--xlators/nfs/server/src/mount3.h215
-rw-r--r--xlators/nfs/server/src/mount3udp_svc.c238
-rw-r--r--xlators/nfs/server/src/netgroups.c1161
-rw-r--r--xlators/nfs/server/src/netgroups.h53
-rw-r--r--xlators/nfs/server/src/nfs-common.c637
-rw-r--r--xlators/nfs/server/src/nfs-common.h79
-rw-r--r--xlators/nfs/server/src/nfs-fops.c2198
-rw-r--r--xlators/nfs/server/src/nfs-fops.h318
-rw-r--r--xlators/nfs/server/src/nfs-generics.c355
-rw-r--r--xlators/nfs/server/src/nfs-generics.h171
-rw-r--r--xlators/nfs/server/src/nfs-inodes.c790
-rw-r--r--xlators/nfs/server/src/nfs-inodes.h84
-rw-r--r--xlators/nfs/server/src/nfs-mem-types.h73
-rw-r--r--xlators/nfs/server/src/nfs-messages.h102
-rw-r--r--xlators/nfs/server/src/nfs.c2658
-rw-r--r--xlators/nfs/server/src/nfs.h173
-rw-r--r--xlators/nfs/server/src/nfs3-fh.c351
-rw-r--r--xlators/nfs/server/src/nfs3-fh.h139
-rw-r--r--xlators/nfs/server/src/nfs3-helpers.c5526
-rw-r--r--xlators/nfs/server/src/nfs3-helpers.h304
-rw-r--r--xlators/nfs/server/src/nfs3.c8866
-rw-r--r--xlators/nfs/server/src/nfs3.h387
-rw-r--r--xlators/nfs/server/src/nfsserver.sym12
-rw-r--r--xlators/nfs/server/src/nlm4.c2786
-rw-r--r--xlators/nfs/server/src/nlm4.h111
-rw-r--r--xlators/nfs/server/src/nlmcbk_svc.c134
-rw-r--r--xlators/performance/Makefile.am3
-rw-r--r--xlators/performance/io-cache/src/Makefile.am11
-rw-r--r--xlators/performance/io-cache/src/io-cache-messages.h69
-rw-r--r--xlators/performance/io-cache/src/io-cache.c3075
-rw-r--r--xlators/performance/io-cache/src/io-cache.h428
-rw-r--r--xlators/performance/io-cache/src/ioc-inode.c303
-rw-r--r--xlators/performance/io-cache/src/ioc-mem-types.h44
-rw-r--r--xlators/performance/io-cache/src/page.c1450
-rw-r--r--xlators/performance/io-threads/src/Makefile.am10
-rw-r--r--xlators/performance/io-threads/src/io-threads-messages.h41
-rw-r--r--xlators/performance/io-threads/src/io-threads.c3086
-rw-r--r--xlators/performance/io-threads/src/io-threads.h122
-rw-r--r--xlators/performance/io-threads/src/iot-mem-types.h30
-rw-r--r--xlators/performance/md-cache/Makefile.am1
-rw-r--r--xlators/performance/md-cache/src/Makefile.am29
-rw-r--r--xlators/performance/md-cache/src/md-cache-mem-types.h23
-rw-r--r--xlators/performance/md-cache/src/md-cache-messages.h29
-rw-r--r--xlators/performance/md-cache/src/md-cache.c4020
-rw-r--r--xlators/performance/nl-cache/Makefile.am3
-rw-r--r--xlators/performance/nl-cache/src/Makefile.am12
-rw-r--r--xlators/performance/nl-cache/src/nl-cache-helper.c1201
-rw-r--r--xlators/performance/nl-cache/src/nl-cache-mem-types.h27
-rw-r--r--xlators/performance/nl-cache/src/nl-cache-messages.h29
-rw-r--r--xlators/performance/nl-cache/src/nl-cache.c840
-rw-r--r--xlators/performance/nl-cache/src/nl-cache.h175
-rw-r--r--xlators/performance/open-behind/Makefile.am1
-rw-r--r--xlators/performance/open-behind/src/Makefile.am16
-rw-r--r--xlators/performance/open-behind/src/open-behind-mem-types.h22
-rw-r--r--xlators/performance/open-behind/src/open-behind-messages.h32
-rw-r--r--xlators/performance/open-behind/src/open-behind.c1101
-rw-r--r--xlators/performance/quick-read/src/Makefile.am10
-rw-r--r--xlators/performance/quick-read/src/quick-read-mem-types.h34
-rw-r--r--xlators/performance/quick-read/src/quick-read-messages.h31
-rw-r--r--xlators/performance/quick-read/src/quick-read.c4392
-rw-r--r--xlators/performance/quick-read/src/quick-read.h135
-rw-r--r--xlators/performance/read-ahead/src/Makefile.am10
-rw-r--r--xlators/performance/read-ahead/src/page.c844
-rw-r--r--xlators/performance/read-ahead/src/read-ahead-mem-types.h37
-rw-r--r--xlators/performance/read-ahead/src/read-ahead-messages.h31
-rw-r--r--xlators/performance/read-ahead/src/read-ahead.c1796
-rw-r--r--xlators/performance/read-ahead/src/read-ahead.h200
-rw-r--r--xlators/performance/readdir-ahead/Makefile.am3
-rw-r--r--xlators/performance/readdir-ahead/src/Makefile.am18
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h24
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead-messages.h30
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead.c1382
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead.h98
-rw-r--r--xlators/performance/stat-prefetch/src/Makefile.am14
-rw-r--r--xlators/performance/stat-prefetch/src/stat-prefetch-mem-types.h36
-rw-r--r--xlators/performance/stat-prefetch/src/stat-prefetch.c4114
-rw-r--r--xlators/performance/stat-prefetch/src/stat-prefetch.h106
-rw-r--r--xlators/performance/symlink-cache/Makefile.am3
-rw-r--r--xlators/performance/symlink-cache/src/Makefile.am12
-rw-r--r--xlators/performance/symlink-cache/src/symlink-cache.c407
-rw-r--r--xlators/performance/write-behind/src/Makefile.am10
-rw-r--r--xlators/performance/write-behind/src/write-behind-mem-types.h37
-rw-r--r--xlators/performance/write-behind/src/write-behind-messages.h31
-rw-r--r--xlators/performance/write-behind/src/write-behind.c5205
-rw-r--r--xlators/playground/Makefile.am2
-rw-r--r--xlators/playground/rot-13/Makefile.am (renamed from xlators/cluster/map/Makefile.am)0
-rw-r--r--xlators/playground/rot-13/src/Makefile.am (renamed from xlators/encryption/rot-13/src/Makefile.am)8
-rw-r--r--xlators/playground/rot-13/src/rot-13.c166
-rw-r--r--xlators/playground/rot-13/src/rot-13.h18
-rw-r--r--xlators/playground/template/Makefile.am2
-rw-r--r--xlators/playground/template/src/Makefile.am17
-rw-r--r--xlators/playground/template/src/template.c186
-rw-r--r--xlators/playground/template/src/template.h43
-rw-r--r--xlators/protocol/auth/addr/src/Makefile.am11
-rw-r--r--xlators/protocol/auth/addr/src/addr.c486
-rw-r--r--xlators/protocol/auth/login/src/Makefile.am10
-rw-r--r--xlators/protocol/auth/login/src/login.c302
-rw-r--r--xlators/protocol/client/src/Makefile.am18
-rw-r--r--xlators/protocol/client/src/client-callback.c335
-rw-r--r--xlators/protocol/client/src/client-common.c3589
-rw-r--r--xlators/protocol/client/src/client-common.h630
-rw-r--r--xlators/protocol/client/src/client-handshake.c2356
-rw-r--r--xlators/protocol/client/src/client-helpers.c964
-rw-r--r--xlators/protocol/client/src/client-lk.c1134
-rw-r--r--xlators/protocol/client/src/client-mem-types.h38
-rw-r--r--xlators/protocol/client/src/client-messages.h174
-rw-r--r--xlators/protocol/client/src/client-rpc-fops.c6079
-rw-r--r--xlators/protocol/client/src/client-rpc-fops_v2.c6177
-rw-r--r--xlators/protocol/client/src/client.c4449
-rw-r--r--xlators/protocol/client/src/client.h503
-rw-r--r--xlators/protocol/client/src/client3_1-fops.c5442
-rw-r--r--xlators/protocol/legacy/Makefile.am3
-rw-r--r--xlators/protocol/legacy/client/Makefile.am3
-rw-r--r--xlators/protocol/legacy/client/src/Makefile.am21
-rw-r--r--xlators/protocol/legacy/client/src/client-mem-types.h43
-rw-r--r--xlators/protocol/legacy/client/src/client-protocol.c6683
-rw-r--r--xlators/protocol/legacy/client/src/client-protocol.h178
-rw-r--r--xlators/protocol/legacy/client/src/saved-frames.c196
-rw-r--r--xlators/protocol/legacy/client/src/saved-frames.h79
-rw-r--r--xlators/protocol/legacy/lib/Makefile.am3
-rw-r--r--xlators/protocol/legacy/lib/src/Makefile.am14
-rw-r--r--xlators/protocol/legacy/lib/src/protocol.c108
-rw-r--r--xlators/protocol/legacy/lib/src/protocol.h1118
-rw-r--r--xlators/protocol/legacy/lib/src/transport.c422
-rw-r--r--xlators/protocol/legacy/lib/src/transport.h106
-rw-r--r--xlators/protocol/legacy/server/Makefile.am3
-rw-r--r--xlators/protocol/legacy/server/src/Makefile.am27
-rw-r--r--xlators/protocol/legacy/server/src/authenticate.c249
-rw-r--r--xlators/protocol/legacy/server/src/authenticate.h60
-rw-r--r--xlators/protocol/legacy/server/src/server-helpers.c622
-rw-r--r--xlators/protocol/legacy/server/src/server-helpers.h48
-rw-r--r--xlators/protocol/legacy/server/src/server-mem-types.h39
-rw-r--r--xlators/protocol/legacy/server/src/server-protocol.c6587
-rw-r--r--xlators/protocol/legacy/server/src/server-protocol.h191
-rw-r--r--xlators/protocol/legacy/server/src/server-resolve.c658
-rw-r--r--xlators/protocol/legacy/transport/Makefile.am3
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/Makefile.am19
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs-mem-types.h39
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs.c2625
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs.h220
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/name.c712
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/name.h47
-rw-r--r--xlators/protocol/legacy/transport/socket/Makefile.am1
-rw-r--r--xlators/protocol/legacy/transport/socket/src/Makefile.am19
-rw-r--r--xlators/protocol/legacy/transport/socket/src/name.c740
-rw-r--r--xlators/protocol/legacy/transport/socket/src/name.h44
-rw-r--r--xlators/protocol/legacy/transport/socket/src/socket-mem-types.h36
-rw-r--r--xlators/protocol/legacy/transport/socket/src/socket.c1625
-rw-r--r--xlators/protocol/legacy/transport/socket/src/socket.h129
-rw-r--r--xlators/protocol/server/src/Makefile.am31
-rw-r--r--xlators/protocol/server/src/authenticate.c377
-rw-r--r--xlators/protocol/server/src/authenticate.h58
-rw-r--r--xlators/protocol/server/src/server-common.c842
-rw-r--r--xlators/protocol/server/src/server-common.h199
-rw-r--r--xlators/protocol/server/src/server-handshake.c1313
-rw-r--r--xlators/protocol/server/src/server-helpers.c2488
-rw-r--r--xlators/protocol/server/src/server-helpers.h131
-rw-r--r--xlators/protocol/server/src/server-mem-types.h41
-rw-r--r--xlators/protocol/server/src/server-messages.h168
-rw-r--r--xlators/protocol/server/src/server-resolve.c1032
-rw-r--r--xlators/protocol/server/src/server-rpc-fops.c6084
-rw-r--r--xlators/protocol/server/src/server-rpc-fops_v2.c6031
-rw-r--r--xlators/protocol/server/src/server.c2336
-rw-r--r--xlators/protocol/server/src/server.h329
-rw-r--r--xlators/protocol/server/src/server3_1-fops.c5197
-rw-r--r--xlators/storage/Makefile.am2
-rw-r--r--xlators/storage/bdb/Makefile.am3
-rw-r--r--xlators/storage/bdb/src/Makefile.am18
-rw-r--r--xlators/storage/bdb/src/bctx.c341
-rw-r--r--xlators/storage/bdb/src/bdb-ll.c1464
-rw-r--r--xlators/storage/bdb/src/bdb-mem-types.h42
-rw-r--r--xlators/storage/bdb/src/bdb.c3603
-rw-r--r--xlators/storage/bdb/src/bdb.h530
-rw-r--r--xlators/storage/posix/src/Makefile.am25
-rw-r--r--xlators/storage/posix/src/posix-aio.c556
-rw-r--r--xlators/storage/posix/src/posix-aio.h34
-rw-r--r--xlators/storage/posix/src/posix-common.c1524
-rw-r--r--xlators/storage/posix/src/posix-entry-ops.c2496
-rw-r--r--xlators/storage/posix/src/posix-gfid-path.c243
-rw-r--r--xlators/storage/posix/src/posix-gfid-path.h28
-rw-r--r--xlators/storage/posix/src/posix-handle.c1020
-rw-r--r--xlators/storage/posix/src/posix-handle.h221
-rw-r--r--xlators/storage/posix/src/posix-helpers.c3666
-rw-r--r--xlators/storage/posix/src/posix-inode-fd-ops.c6004
-rw-r--r--xlators/storage/posix/src/posix-inode-handle.h118
-rw-r--r--xlators/storage/posix/src/posix-mem-types.h39
-rw-r--r--xlators/storage/posix/src/posix-messages.h74
-rw-r--r--xlators/storage/posix/src/posix-metadata-disk.h31
-rw-r--r--xlators/storage/posix/src/posix-metadata.c916
-rw-r--r--xlators/storage/posix/src/posix-metadata.h71
-rw-r--r--xlators/storage/posix/src/posix.c4688
-rw-r--r--xlators/storage/posix/src/posix.h702
-rw-r--r--xlators/system/Makefile.am1
-rw-r--r--xlators/system/posix-acl/Makefile.am1
-rw-r--r--xlators/system/posix-acl/src/Makefile.am28
-rw-r--r--xlators/system/posix-acl/src/posix-acl-mem-types.h23
-rw-r--r--xlators/system/posix-acl/src/posix-acl-messages.h28
-rw-r--r--xlators/system/posix-acl/src/posix-acl-xattr.c173
-rw-r--r--xlators/system/posix-acl/src/posix-acl-xattr.h29
-rw-r--r--xlators/system/posix-acl/src/posix-acl.c2246
-rw-r--r--xlators/system/posix-acl/src/posix-acl.h35
-rw-r--r--xlators/xlator.sym1
790 files changed, 421103 insertions, 189615 deletions
diff --git a/xlators/Makefile.am b/xlators/Makefile.am
index 4c94f5e44c1..ef20cbb64fa 100644
--- a/xlators/Makefile.am
+++ b/xlators/Makefile.am
@@ -1,3 +1,13 @@
-SUBDIRS = cluster storage protocol performance debug features encryption mount nfs mgmt
+if BUILD_GNFS
+ GNFS_DIR = nfs
+endif
+
+DIST_SUBDIRS = cluster storage protocol performance debug features \
+ mount nfs mgmt system playground meta
+
+SUBDIRS = cluster storage protocol performance debug features \
+ mount ${GNFS_DIR} mgmt system playground meta
+
+EXTRA_DIST = xlator.sym
CLEANFILES =
diff --git a/xlators/bindings/Makefile.am b/xlators/bindings/Makefile.am
deleted file mode 100644
index f7766580257..00000000000
--- a/xlators/bindings/Makefile.am
+++ /dev/null
@@ -1 +0,0 @@
-SUBDIRS = $(BINDINGS_SUBDIRS)
diff --git a/xlators/bindings/python/src/Makefile.am b/xlators/bindings/python/src/Makefile.am
deleted file mode 100644
index c0b9141c667..00000000000
--- a/xlators/bindings/python/src/Makefile.am
+++ /dev/null
@@ -1,19 +0,0 @@
-
-xlator_PROGRAMS = python.so
-
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/bindings
-
-python_PYTHON = gluster.py glustertypes.py glusterstack.py
-
-pythondir = $(xlatordir)/python
-
-python_so_SOURCES = python.c
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles \
- $(PYTHON_CPPLAGS) -DGLUSTER_PYTHON_PATH=\"$(pythondir)\"
-
-AM_LDFLAGS = $(PYTHON_LDFLAGS)
-
-CLEANFILES =
-
diff --git a/xlators/bindings/python/src/gluster.py b/xlators/bindings/python/src/gluster.py
deleted file mode 100644
index ee0eb131011..00000000000
--- a/xlators/bindings/python/src/gluster.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
-from ctypes import *
-from glustertypes import *
-from glusterstack import *
-import sys
-import inspect
-
-libglusterfs = CDLL("libglusterfs.so")
-_gf_log = libglusterfs._gf_log
-_gf_log.restype = c_int32
-_gf_log.argtypes = [c_char_p, c_char_p, c_char_p, c_int32, c_int, c_char_p]
-
-gf_log_loglevel = c_int.in_dll(libglusterfs, "gf_log_loglevel")
-
-GF_LOG_NONE = 0
-GF_LOG_CRITICAL = 1
-GF_LOG_ERROR = 2
-GF_LOG_WARNING = 3
-GF_LOG_DEBUG = 4
-
-def gf_log(module, level, fmt, *params):
- if level <= gf_log_loglevel:
- frame = sys._getframe(1)
- _gf_log(module, frame.f_code.co_filename, frame.f_code.co_name,
- frame.f_lineno, level, fmt, *params)
-
-class ComplexTranslator(object):
- def __init__(self, xlator):
- self.xlator = xlator_t.from_address(xlator)
-
- def __getattr__(self, item):
- return getattr(self.xlator, item)
diff --git a/xlators/bindings/python/src/glusterstack.py b/xlators/bindings/python/src/glusterstack.py
deleted file mode 100644
index ba24c81652e..00000000000
--- a/xlators/bindings/python/src/glusterstack.py
+++ /dev/null
@@ -1,55 +0,0 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
-from ctypes import *
-from glustertypes import *
-
-libc = CDLL("libc.so.6")
-calloc = libc.calloc
-calloc.argtypes = [c_int, c_int]
-calloc.restype = c_void_p
-
-# TODO: Can these be done in C somehow?
-def stack_wind(frame, rfn, obj, fn, *params):
- """Frame is a frame object"""
- _new = cast(calloc(1, sizeof(call_frame_t)), POINTER(call_frame_t))
- _new[0].root = frame.root
- _new[0].next = frame.root[0].frames.next
- _new[0].prev = pointer(frame.root[0].frames)
- if frame.root[0].frames.next:
- frame.root[0].frames.next[0].prev = _new
- frame.root[0].frames.next = _new
- _new[0].this = obj
- # TODO: Type checking like tmp_cbk?
- _new[0].ret = rfn
- _new[0].parent = pointer(frame)
- _new[0].cookie = cast(_new, c_void_p)
- # TODO: Initialize lock
- #_new.lock.init()
- frame.ref_count += 1
- fn(_new, obj, *params)
-
-def stack_unwind(frame, *params):
- """Frame is a frame object"""
- fn = frame[0].ret
- parent = frame[0].parent[0]
- parent.ref_count -= 1
-
- op_ret = params[0]
- op_err = params[1]
- params = params[2:]
- fn(parent, call_frame_t.from_address(frame[0].cookie), parent.this,
- op_ret, op_err, *params)
diff --git a/xlators/bindings/python/src/glustertypes.py b/xlators/bindings/python/src/glustertypes.py
deleted file mode 100644
index e9069d07c72..00000000000
--- a/xlators/bindings/python/src/glustertypes.py
+++ /dev/null
@@ -1,167 +0,0 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
-from ctypes import *
-import collections
-
-#
-# Forward declaration of some gluster types
-#
-class call_frame_t(Structure):
- pass
-
-class call_ctx_t(Structure):
- pass
-
-class call_pool_t(Structure):
- pass
-
-class xlator_t(Structure):
- def _getFirstChild(self):
- return self.children[0].xlator
- firstChild = property(_getFirstChild)
-
-class xlator_list_t(Structure):
- pass
-
-class xlator_fops(Structure):
- pass
-
-class xlator_mops(Structure):
- pass
-
-class glusterfs_ctx_t(Structure):
- pass
-
-class list_head(Structure):
- pass
-
-class dict_t(Structure):
- pass
-
-class inode_table_t(Structure):
- pass
-
-class fd_t(Structure):
- pass
-
-class iovec(Structure):
- _fields_ = [
- ("iov_base", c_void_p),
- ("iov_len", c_size_t),
- ]
-
- def __init__(self, s):
- self.iov_base = cast(c_char_p(s), c_void_p)
- self.iov_len = len(s)
-
- def getBytes(self):
- return string_at(self.iov_base, self.iov_len)
-
-# This is a pthread_spinlock_t
-# TODO: what happens to volatile-ness?
-gf_lock_t = c_int
-
-uid_t = c_uint32
-gid_t = c_uint32
-pid_t = c_int32
-
-off_t = c_int64
-
-#
-# Function pointer types
-#
-ret_fn_t = CFUNCTYPE(c_int32, POINTER(call_frame_t), POINTER(call_frame_t),
- POINTER(xlator_t), c_int32, c_int32)
-
-fini_fn_t = CFUNCTYPE(None, POINTER(xlator_t))
-init_fn_t = CFUNCTYPE(c_int32, POINTER(xlator_t))
-event_notify_fn_t = CFUNCTYPE(c_int32, POINTER(xlator_t), c_int32, c_void_p)
-
-list_head._fields_ = [
- ("next", POINTER(list_head)),
- ("prev", POINTER(list_head)),
- ]
-
-call_frame_t._fields_ = [
- ("root", POINTER(call_ctx_t)),
- ("parent", POINTER(call_frame_t)),
- ("next", POINTER(call_frame_t)),
- ("prev", POINTER(call_frame_t)),
- ("local", c_void_p),
- ("this", POINTER(xlator_t)),
- ("ret", ret_fn_t),
- ("ref_count", c_int32),
- ("lock", gf_lock_t),
- ("cookie", c_void_p),
- ("op", c_int32),
- ("type", c_int8),
- ]
-
-call_ctx_t._fields_ = [
- ("all_frames", list_head),
- ("trans", c_void_p),
- ("pool", call_pool_t),
- ("unique", c_uint64),
- ("state", c_void_p),
- ("uid", uid_t),
- ("gid", gid_t),
- ("pid", pid_t),
- ("frames", call_frame_t),
- ("req_refs", POINTER(dict_t)),
- ("rsp_refs", POINTER(dict_t)),
- ]
-
-xlator_t._fields_ = [
- ("name", c_char_p),
- ("type", c_char_p),
- ("next", POINTER(xlator_t)),
- ("prev", POINTER(xlator_t)),
- ("parent", POINTER(xlator_t)),
- ("children", POINTER(xlator_list_t)),
- ("fops", POINTER(xlator_fops)),
- ("mops", POINTER(xlator_mops)),
- ("fini", fini_fn_t),
- ("init", init_fn_t),
- ("notify", event_notify_fn_t),
- ("options", POINTER(dict_t)),
- ("ctx", POINTER(glusterfs_ctx_t)),
- ("itable", POINTER(inode_table_t)),
- ("ready", c_char),
- ("private", c_void_p),
- ]
-
-xlator_list_t._fields_ = [
- ("xlator", POINTER(xlator_t)),
- ("next", POINTER(xlator_list_t)),
- ]
-
-fop_functions = collections.defaultdict(lambda: c_void_p)
-fop_function_names = ['lookup', 'forget', 'stat', 'fstat', 'chmod', 'fchmod',
- 'chown', 'fchown', 'truncate', 'ftruncate', 'utimens', 'access',
- 'readlink', 'mknod', 'mkdir', 'unlink', 'rmdir', 'symlink',
- 'rename', 'link', 'create', 'open', 'readv', 'writev', 'flush',
- 'close', 'fsync', 'opendir', 'readdir', 'closedir', 'fsyncdir',
- 'statfs', 'setxattr', 'getxattr', 'removexattr', 'lk', 'writedir',
- # TODO: Call backs?
- ]
-
-fop_writev_t = CFUNCTYPE(c_int32, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(fd_t), POINTER(iovec), c_int32,
- off_t)
-
-fop_functions['writev'] = fop_writev_t
-xlator_fops._fields_ = [(f, fop_functions[f]) for f in fop_function_names]
diff --git a/xlators/bindings/python/src/python.c b/xlators/bindings/python/src/python.c
deleted file mode 100644
index 3310a211513..00000000000
--- a/xlators/bindings/python/src/python.c
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- Copyright (c) 2007-2010 Chris AtLee <chris@atlee.ca>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include <Python.h>
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
-#include "defaults.h"
-
-typedef struct
-{
- char *scriptname;
- PyObject *pXlator;
- PyObject *pScriptModule;
- PyObject *pGlusterModule;
- PyThreadState *pInterp;
-
- PyObject *pFrameType, *pVectorType, *pFdType;
-} python_private_t;
-
-int32_t
-python_writev (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t offset)
-{
- python_private_t *priv = (python_private_t *)this->private;
- gf_log("python", GF_LOG_DEBUG, "In writev");
- if (PyObject_HasAttrString(priv->pXlator, "writev"))
- {
-
- PyObject *retval = PyObject_CallMethod(priv->pXlator, "writev",
- "O O O i l",
- PyObject_CallMethod(priv->pFrameType, "from_address", "O&", PyLong_FromVoidPtr, frame),
- PyObject_CallMethod(priv->pFdType, "from_address", "O&", PyLong_FromVoidPtr, fd),
- PyObject_CallMethod(priv->pVectorType, "from_address", "O&", PyLong_FromVoidPtr, vector),
- count,
- offset);
- if (PyErr_Occurred())
- {
- PyErr_Print();
- }
- Py_XDECREF(retval);
- }
- else
- {
- return default_writev(frame, this, fd, vector, count, offset);
- }
- return 0;
-}
-
-struct xlator_fops fops = {
- .writev = python_writev
-};
-
-static PyObject *
-AnonModule_FromFile (const char* fname)
-{
- // Get the builtins
- PyThreadState* pThread = PyThreadState_Get();
- PyObject *pBuiltins = pThread->interp->builtins;
-
- if (PyErr_Occurred())
- {
- PyErr_Print();
- return NULL;
- }
-
- // Create a new dictionary for running code in
- PyObject *pModuleDict = PyDict_New();
- PyDict_SetItemString(pModuleDict, "__builtins__", pBuiltins);
- Py_INCREF(pBuiltins);
-
- // Run the file in the new context
- FILE* fp = fopen(fname, "r");
- PyRun_File(fp, fname, Py_file_input, pModuleDict, pModuleDict);
- fclose(fp);
- if (PyErr_Occurred())
- {
- PyErr_Print();
- Py_DECREF(pModuleDict);
- Py_DECREF(pBuiltins);
- return NULL;
- }
-
- // Create an object to hold the new context
- PyRun_String("class ModuleWrapper(object):\n\tpass\n", Py_single_input, pModuleDict, pModuleDict);
- if (PyErr_Occurred())
- {
- PyErr_Print();
- Py_DECREF(pModuleDict);
- Py_DECREF(pBuiltins);
- return NULL;
- }
- PyObject *pModule = PyRun_String("ModuleWrapper()", Py_eval_input, pModuleDict, pModuleDict);
- if (PyErr_Occurred())
- {
- PyErr_Print();
- Py_DECREF(pModuleDict);
- Py_DECREF(pBuiltins);
- Py_XDECREF(pModule);
- return NULL;
- }
-
- // Set the new context's dictionary to the one we used to run the code
- // inside
- PyObject_SetAttrString(pModule, "__dict__", pModuleDict);
- if (PyErr_Occurred())
- {
- PyErr_Print();
- Py_DECREF(pModuleDict);
- Py_DECREF(pBuiltins);
- Py_DECREF(pModule);
- return NULL;
- }
-
- return pModule;
-}
-
-int32_t
-init (xlator_t *this)
-{
- // This is ok to call more than once per process
- Py_InitializeEx(0);
-
- if (!this->children) {
- gf_log ("python", GF_LOG_ERROR,
- "FATAL: python should have exactly one child");
- return -1;
- }
-
- python_private_t *priv = CALLOC (sizeof (python_private_t), 1);
- ERR_ABORT (priv);
-
- data_t *scriptname = dict_get (this->options, "scriptname");
- if (scriptname) {
- priv->scriptname = data_to_str(scriptname);
- } else {
- gf_log("python", GF_LOG_ERROR,
- "FATAL: python requires the scriptname parameter");
- return -1;
- }
-
- priv->pInterp = Py_NewInterpreter();
-
- // Adjust python's path
- PyObject *syspath = PySys_GetObject("path");
- PyObject *path = PyString_FromString(GLUSTER_PYTHON_PATH);
- PyList_Append(syspath, path);
- Py_DECREF(path);
-
- gf_log("python", GF_LOG_DEBUG,
- "Loading gluster module");
-
- priv->pGlusterModule = PyImport_ImportModule("gluster");
- if (PyErr_Occurred())
- {
- PyErr_Print();
- return -1;
- }
-
- priv->pFrameType = PyObject_GetAttrString(priv->pGlusterModule, "call_frame_t");
- priv->pFdType = PyObject_GetAttrString(priv->pGlusterModule, "fd_t");
- priv->pVectorType = PyObject_GetAttrString(priv->pGlusterModule, "iovec");
-
- gf_log("python", GF_LOG_DEBUG, "Loading script...%s", priv->scriptname);
-
- priv->pScriptModule = AnonModule_FromFile(priv->scriptname);
- if (!priv->pScriptModule || PyErr_Occurred())
- {
- gf_log("python", GF_LOG_ERROR, "Error loading %s", priv->scriptname);
- PyErr_Print();
- return -1;
- }
-
- if (!PyObject_HasAttrString(priv->pScriptModule, "xlator"))
- {
- gf_log("python", GF_LOG_ERROR, "%s does not have a xlator attribute", priv->scriptname);
- return -1;
- }
- gf_log("python", GF_LOG_DEBUG, "Instantiating translator");
- priv->pXlator = PyObject_CallMethod(priv->pScriptModule, "xlator", "O&",
- PyLong_FromVoidPtr, this);
- if (PyErr_Occurred() || !priv->pXlator)
- {
- PyErr_Print();
- return -1;
- }
-
- this->private = priv;
-
- gf_log ("python", GF_LOG_DEBUG, "python xlator loaded");
- return 0;
-}
-
-void
-fini (xlator_t *this)
-{
- python_private_t *priv = (python_private_t*)(this->private);
- Py_DECREF(priv->pXlator);
- Py_DECREF(priv->pScriptModule);
- Py_DECREF(priv->pGlusterModule);
- Py_DECREF(priv->pFrameType);
- Py_DECREF(priv->pFdType);
- Py_DECREF(priv->pVectorType);
- Py_EndInterpreter(priv->pInterp);
- return;
-}
diff --git a/xlators/bindings/python/src/testxlator.py b/xlators/bindings/python/src/testxlator.py
deleted file mode 100644
index 507455c856a..00000000000
--- a/xlators/bindings/python/src/testxlator.py
+++ /dev/null
@@ -1,56 +0,0 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
-
-"""
-This is a test translator written in python.
-
-Important things to note:
- This file must be import-able from glusterfsd. This probably means
- setting PYTHONPATH to where this file is located.
-
- This file must have a top-level xlator class object that will be
- used to instantiate individual translators.
-"""
-from gluster import *
-
-class MyXlator(ComplexTranslator):
- name = "MyXlator"
- def writev_cbk(self, frame, cookie, op_ret, op_errno, buf):
- stack_unwind(frame, op_ret, op_errno, buf)
- return 0
-
- def writev(self, frame, fd, vector, count, offset):
- gf_log(self.name, GF_LOG_WARNING, "writev %i bytes", vector.iov_len)
- # TODO: Use cookie to pass this to writev_cbk
- old_count = vector.iov_len
-
- data = vector.getBytes().encode("zlib")
-
- vector = iovec(data)
- gf_log(self.name, GF_LOG_WARNING, "writev %i bytes", vector.iov_len)
-
- @ret_fn_t
- def rfn(frame, prev, this, op_ret, op_errno, *params):
- if len(params) == 0:
- params = [0]
- return self.writev_cbk(frame, prev, old_count, op_errno, *params)
-
- stack_wind(frame, rfn, self.firstChild,
- self.firstChild[0].fops[0].writev, fd, vector, count, offset)
- return 0
-
-xlator = MyXlator
diff --git a/xlators/cluster/Makefile.am b/xlators/cluster/Makefile.am
index 0990822a7d3..8e067d5ab58 100644
--- a/xlators/cluster/Makefile.am
+++ b/xlators/cluster/Makefile.am
@@ -1,3 +1,3 @@
-SUBDIRS = stripe afr dht
+SUBDIRS = afr dht ec
CLEANFILES =
diff --git a/xlators/cluster/afr/src/Makefile.am b/xlators/cluster/afr/src/Makefile.am
index e192b599bf4..610819b28fc 100644
--- a/xlators/cluster/afr/src/Makefile.am
+++ b/xlators/cluster/afr/src/Makefile.am
@@ -1,27 +1,35 @@
-xlator_LTLIBRARIES = afr.la pump.la
+xlator_LTLIBRARIES = afr.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
-afr_common_source = afr-dir-read.c afr-dir-write.c afr-inode-read.c afr-inode-write.c afr-open.c afr-transaction.c afr-self-heal-data.c afr-self-heal-common.c afr-self-heal-metadata.c afr-self-heal-entry.c afr-self-heal-algorithm.c afr-lk-common.c $(top_builddir)/xlators/lib/src/libxlator.c
+afr_common_source = afr-dir-read.c afr-dir-write.c afr-inode-read.c \
+ afr-inode-write.c afr-open.c afr-transaction.c afr-lk-common.c \
+ afr-read-txn.c \
+ $(top_builddir)/xlators/lib/src/libxlator.c
-afr_la_LDFLAGS = -module -avoidversion
-afr_la_SOURCES = $(afr_common_source) afr.c
+AFR_SELFHEAL_SOURCES = afr-self-heal-common.c afr-self-heal-data.c \
+ afr-self-heal-entry.c afr-self-heal-metadata.c afr-self-heald.c \
+ afr-self-heal-name.c
+
+afr_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+afr_la_SOURCES = $(afr_common_source) $(AFR_SELFHEAL_SOURCES) afr.c
afr_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-pump_la_LDFLAGS = -module -avoidversion
-pump_la_SOURCES = $(afr_common_source) pump.c
-pump_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h \
+ afr-dir-read.h afr-dir-write.h afr-self-heal.h afr-mem-types.h \
+ afr-common.c afr-self-heald.h \
+ $(top_builddir)/xlators/lib/src/libxlator.h afr-messages.h
-noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h afr-dir-read.h afr-dir-write.h afr-self-heal.h afr-self-heal-common.h afr-self-heal-algorithm.h pump.h afr-mem-types.h afr-common.c $(top_builddir)/xlators/lib/src/libxlator.h
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
+ -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/xlators/lib/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/contrib/md5 -shared -nostartfiles $(GF_CFLAGS) \
- -I$(top_srcdir)/xlators/lib/src
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
uninstall-local:
rm -f $(DESTDIR)$(xlatordir)/replicate.so
- rm -f $(DESTDIR)$(xlatordir)/pump.so
install-data-hook:
ln -sf afr.so $(DESTDIR)$(xlatordir)/replicate.so
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 7e54bfe5bd1..032ab5c8001 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include <libgen.h>
@@ -24,28 +15,20 @@
#include <stdlib.h>
#include <signal.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
+#include <glusterfs/glusterfs.h>
#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-#include "byte-order.h"
-#include "statedump.h"
-
-#include "fd.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/hashfn.h>
+#include <glusterfs/list.h>
+#include <glusterfs/call-stub.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/statedump.h>
+#include <glusterfs/events.h>
+#include <glusterfs/upcall-utils.h>
#include "afr-inode-read.h"
#include "afr-inode-write.h"
@@ -53,2504 +36,7843 @@
#include "afr-dir-write.h"
#include "afr-transaction.h"
#include "afr-self-heal.h"
-#include "afr-self-heal-common.h"
-#include "pump.h"
-
-#define AFR_ICTX_OPENDIR_DONE_MASK 0x0000000200000000ULL
-#define AFR_ICTX_SPLIT_BRAIN_MASK 0x0000000100000000ULL
-#define AFR_ICTX_READ_CHILD_MASK 0x00000000FFFFFFFFULL
+#include "afr-self-heald.h"
+#include "afr-messages.h"
int32_t
-afr_set_dict_gfid (dict_t *dict, uuid_t gfid)
+afr_quorum_errno(afr_private_t *priv)
{
- int ret = 0;
-
- GF_ASSERT (gfid);
+ return ENOTCONN;
+}
- ret = dict_set_static_bin (dict, "gfid-req", gfid, 16);
- if (ret)
- gf_log (THIS->name, GF_LOG_DEBUG, "gfid set failed");
+gf_boolean_t
+afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name,
+ pid_t pid)
+{
+ if (!__is_root_gfid(pargfid)) {
+ return _gf_false;
+ }
+
+ if (strcmp(name, GF_REPLICATE_TRASH_DIR) == 0) {
+ /*For backward compatibility /.landfill is private*/
+ return _gf_true;
+ }
+
+ if (pid == GF_CLIENT_PID_GSYNCD) {
+ /*geo-rep needs to create/sync private directory on slave because
+ * it appears in changelog*/
+ return _gf_false;
+ }
+
+ if (pid == GF_CLIENT_PID_GLFS_HEAL || pid == GF_CLIENT_PID_SELF_HEALD) {
+ if (strcmp(name, priv->anon_inode_name) == 0) {
+ /* anonymous-inode dir is private*/
+ return _gf_true;
+ }
+ } else {
+ if (strncmp(name, AFR_ANON_DIR_PREFIX, strlen(AFR_ANON_DIR_PREFIX)) ==
+ 0) {
+ /* anonymous-inode dir prefix is private for geo-rep to work*/
+ return _gf_true;
+ }
+ }
- return ret;
+ return _gf_false;
}
-uint64_t
-afr_is_split_brain (xlator_t *this, inode_t *inode)
+void
+afr_fill_success_replies(afr_local_t *local, afr_private_t *priv,
+ unsigned char *replies)
{
- int ret = 0;
+ int i = 0;
- uint64_t ctx = 0;
- uint64_t split_brain = 0;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].valid && local->replies[i].op_ret == 0) {
+ replies[i] = 1;
+ } else {
+ replies[i] = 0;
+ }
+ }
+}
- VALIDATE_OR_GOTO (inode, out);
+int
+afr_fav_child_reset_sink_xattrs(void *opaque);
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &ctx);
+int
+afr_fav_child_reset_sink_xattrs_cbk(int ret, call_frame_t *frame, void *opaque);
- if (ret < 0)
- goto unlock;
+static void
+afr_discover_done(call_frame_t *frame, xlator_t *this);
- split_brain = ctx & AFR_ICTX_SPLIT_BRAIN_MASK;
+int
+afr_dom_lock_acquire_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ afr_local_t *local = frame->local;
+ afr_private_t *priv = this->private;
+ int i = (long)cookie;
+
+ local->cont.lk.dom_lock_op_ret[i] = op_ret;
+ local->cont.lk.dom_lock_op_errno[i] = op_errno;
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_LK_HEAL_DOM,
+ "%s: Failed to acquire %s on %s",
+ uuid_utoa(local->fd->inode->gfid), AFR_LK_HEAL_DOM,
+ priv->children[i]->name);
+ } else {
+ local->cont.lk.dom_locked_nodes[i] = 1;
+ }
+
+ syncbarrier_wake(&local->barrier);
+
+ return 0;
+}
+
+int
+afr_dom_lock_acquire(call_frame_t *frame)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ struct gf_flock flock = {
+ 0,
+ };
+ int i = 0;
+
+ priv = frame->this->private;
+ local = frame->local;
+ local->cont.lk.dom_locked_nodes = GF_CALLOC(
+ priv->child_count, sizeof(*local->cont.lk.locked_nodes),
+ gf_afr_mt_char);
+ if (!local->cont.lk.dom_locked_nodes) {
+ return -ENOMEM;
+ }
+ local->cont.lk.dom_lock_op_ret = GF_CALLOC(
+ priv->child_count, sizeof(*local->cont.lk.dom_lock_op_ret),
+ gf_afr_mt_int32_t);
+ if (!local->cont.lk.dom_lock_op_ret) {
+ return -ENOMEM; /* CALLOC'd members are freed in afr_local_cleanup. */
+ }
+ local->cont.lk.dom_lock_op_errno = GF_CALLOC(
+ priv->child_count, sizeof(*local->cont.lk.dom_lock_op_errno),
+ gf_afr_mt_int32_t);
+ if (!local->cont.lk.dom_lock_op_errno) {
+ return -ENOMEM; /* CALLOC'd members are freed in afr_local_cleanup. */
+ }
+ flock.l_type = F_WRLCK;
+
+ AFR_ONALL(frame, afr_dom_lock_acquire_cbk, finodelk, AFR_LK_HEAL_DOM,
+ local->fd, F_SETLK, &flock, NULL);
+
+ if (!afr_has_quorum(local->cont.lk.dom_locked_nodes, frame->this, NULL))
+ goto blocking_lock;
+
+ /*If any of the bricks returned EAGAIN, we still need blocking locks.*/
+ if (AFR_COUNT(local->cont.lk.dom_locked_nodes, priv->child_count) !=
+ priv->child_count) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->cont.lk.dom_lock_op_ret[i] == -1 &&
+ local->cont.lk.dom_lock_op_errno[i] == EAGAIN)
+ goto blocking_lock;
}
-unlock:
- UNLOCK (&inode->lock);
+ }
-out:
- return split_brain;
+ return 0;
+
+blocking_lock:
+ afr_dom_lock_release(frame);
+ AFR_ONALL(frame, afr_dom_lock_acquire_cbk, finodelk, AFR_LK_HEAL_DOM,
+ local->fd, F_SETLKW, &flock, NULL);
+ if (!afr_has_quorum(local->cont.lk.dom_locked_nodes, frame->this, NULL)) {
+ afr_dom_lock_release(frame);
+ return -afr_quorum_errno(priv);
+ }
+
+ return 0;
}
+int
+afr_dom_lock_release_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ afr_local_t *local = frame->local;
+ afr_private_t *priv = this->private;
+ int i = (long)cookie;
+
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_LK_HEAL_DOM,
+ "%s: Failed to release %s on %s", local->loc.path,
+ AFR_LK_HEAL_DOM, priv->children[i]->name);
+ }
+ local->cont.lk.dom_locked_nodes[i] = 0;
+
+ syncbarrier_wake(&local->barrier);
+
+ return 0;
+}
void
-afr_set_split_brain (xlator_t *this, inode_t *inode, gf_boolean_t set)
+afr_dom_lock_release(call_frame_t *frame)
{
- uint64_t ctx = 0;
- int ret = 0;
-
- VALIDATE_OR_GOTO (inode, out);
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ unsigned char *locked_on = NULL;
+ struct gf_flock flock = {
+ 0,
+ };
+
+ local = frame->local;
+ priv = frame->this->private;
+ locked_on = local->cont.lk.dom_locked_nodes;
+ if (AFR_COUNT(locked_on, priv->child_count) == 0)
+ return;
+ flock.l_type = F_UNLCK;
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &ctx);
+ AFR_ONLIST(locked_on, frame, afr_dom_lock_release_cbk, finodelk,
+ AFR_LK_HEAL_DOM, local->fd, F_SETLK, &flock, NULL);
- if (ret < 0) {
- ctx = 0;
- }
+ return;
+}
- if (set) {
- ctx = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx)
- | (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_SPLIT_BRAIN_MASK);
- } else {
- ctx = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx);
- }
+static void
+afr_lk_heal_info_cleanup(afr_lk_heal_info_t *info)
+{
+ if (!info)
+ return;
+ if (info->xdata_req)
+ dict_unref(info->xdata_req);
+ if (info->fd)
+ fd_unref(info->fd);
+ GF_FREE(info->locked_nodes);
+ GF_FREE(info->child_up_event_gen);
+ GF_FREE(info->child_down_event_gen);
+ GF_FREE(info);
+}
- ret = __inode_ctx_put (inode, this, ctx);
- if (ret) {
- gf_log_callingfn (this->name, GF_LOG_INFO,
- "failed to set the inode ctx (%s)",
- uuid_utoa (inode->gfid));
- }
+static int
+afr_add_lock_to_saved_locks(call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = this->private;
+ afr_local_t *local = frame->local;
+ afr_lk_heal_info_t *info = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int ret = -ENOMEM;
+
+ info = GF_CALLOC(sizeof(*info), 1, gf_afr_mt_lk_heal_info_t);
+ if (!info) {
+ goto cleanup;
+ }
+ INIT_LIST_HEAD(&info->pos);
+ info->fd = fd_ref(local->fd);
+ info->cmd = local->cont.lk.cmd;
+ info->pid = frame->root->pid;
+ info->flock = local->cont.lk.user_flock;
+ info->xdata_req = dict_copy_with_ref(local->xdata_req, NULL);
+ if (!info->xdata_req) {
+ goto cleanup;
+ }
+ info->lk_owner = frame->root->lk_owner;
+ info->locked_nodes = GF_MALLOC(
+ sizeof(*info->locked_nodes) * priv->child_count, gf_afr_mt_char);
+ if (!info->locked_nodes) {
+ goto cleanup;
+ }
+ memcpy(info->locked_nodes, local->cont.lk.locked_nodes,
+ sizeof(*info->locked_nodes) * priv->child_count);
+ info->child_up_event_gen = GF_CALLOC(sizeof(*info->child_up_event_gen),
+ priv->child_count, gf_afr_mt_int32_t);
+ if (!info->child_up_event_gen) {
+ goto cleanup;
+ }
+ info->child_down_event_gen = GF_CALLOC(sizeof(*info->child_down_event_gen),
+ priv->child_count,
+ gf_afr_mt_int32_t);
+ if (!info->child_down_event_gen) {
+ goto cleanup;
+ }
+
+ LOCK(&local->fd->lock);
+ {
+ fd_ctx = __afr_fd_ctx_get(local->fd, this);
+ if (fd_ctx)
+ fd_ctx->lk_heal_info = info;
+ }
+ UNLOCK(&local->fd->lock);
+ if (!fd_ctx) {
+ goto cleanup;
+ }
+
+ LOCK(&priv->lock);
+ {
+ list_add_tail(&info->pos, &priv->saved_locks);
+ }
+ UNLOCK(&priv->lock);
+
+ return 0;
+cleanup:
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_LK_HEAL_DOM,
+ "%s: Failed to add lock to healq",
+ uuid_utoa(local->fd->inode->gfid));
+ if (info) {
+ afr_lk_heal_info_cleanup(info);
+ if (fd_ctx) {
+ LOCK(&local->fd->lock);
+ {
+ fd_ctx->lk_heal_info = NULL;
+ }
+ UNLOCK(&local->fd->lock);
}
- UNLOCK (&inode->lock);
+ }
+ return ret;
+}
+
+static int
+afr_remove_lock_from_saved_locks(afr_local_t *local, xlator_t *this)
+{
+ afr_private_t *priv = this->private;
+ struct gf_flock flock = local->cont.lk.user_flock;
+ afr_lk_heal_info_t *info = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int ret = -EINVAL;
+
+ fd_ctx = afr_fd_ctx_get(local->fd, this);
+ if (!fd_ctx || !fd_ctx->lk_heal_info) {
+ goto out;
+ }
+
+ info = fd_ctx->lk_heal_info;
+ if ((info->flock.l_start != flock.l_start) ||
+ (info->flock.l_whence != flock.l_whence) ||
+ (info->flock.l_len != flock.l_len)) {
+ /*TODO: Compare lkowners too.*/
+ goto out;
+ }
+
+ LOCK(&priv->lock);
+ {
+ list_del(&fd_ctx->lk_heal_info->pos);
+ }
+ UNLOCK(&priv->lock);
+
+ afr_lk_heal_info_cleanup(info);
+ fd_ctx->lk_heal_info = NULL;
+ ret = 0;
out:
- return;
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_LK_HEAL_DOM,
+ "%s: Failed to remove lock from healq",
+ uuid_utoa(local->fd->inode->gfid));
+ return ret;
}
+int
+afr_lock_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
+{
+ afr_local_t *local = frame->local;
+ int i = (long)cookie;
+
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
+ if (op_ret != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_LK_HEAL_DOM,
+ "Failed to heal lock on child %d for %s", i,
+ uuid_utoa(local->fd->inode->gfid));
+ }
+ syncbarrier_wake(&local->barrier);
+ return 0;
+}
-uint64_t
-afr_is_opendir_done (xlator_t *this, inode_t *inode)
+int
+afr_getlk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
{
- int ret = 0;
- uint64_t ctx = 0;
- uint64_t opendir_done = 0;
+ afr_local_t *local = frame->local;
+ int i = (long)cookie;
+
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
+ if (op_ret != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_LK_HEAL_DOM,
+ "Failed getlk for %s", uuid_utoa(local->fd->inode->gfid));
+ } else {
+ local->cont.lk.getlk_rsp[i] = *lock;
+ }
+
+ syncbarrier_wake(&local->barrier);
+ return 0;
+}
- VALIDATE_OR_GOTO (inode, out);
+static gf_boolean_t
+afr_does_lk_owner_match(call_frame_t *frame, afr_private_t *priv,
+ afr_lk_heal_info_t *info)
+{
+ int i = 0;
+ afr_local_t *local = frame->local;
+ struct gf_flock flock = {
+ 0,
+ };
+ gf_boolean_t ret = _gf_true;
+ char *wind_on = alloca0(priv->child_count);
+ unsigned char *success_replies = alloca0(priv->child_count);
+ local->cont.lk.getlk_rsp = GF_CALLOC(sizeof(*local->cont.lk.getlk_rsp),
+ priv->child_count, gf_afr_mt_gf_lock);
+
+ flock = info->flock;
+ for (i = 0; i < priv->child_count; i++) {
+ if (info->locked_nodes[i])
+ wind_on[i] = 1;
+ }
+
+ AFR_ONLIST(wind_on, frame, afr_getlk_cbk, lk, info->fd, F_GETLK, &flock,
+ info->xdata_req);
+
+ afr_fill_success_replies(local, priv, success_replies);
+ if (AFR_COUNT(success_replies, priv->child_count) == 0) {
+ ret = _gf_false;
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid || local->replies[i].op_ret != 0)
+ continue;
+ if (local->cont.lk.getlk_rsp[i].l_type == F_UNLCK)
+ continue;
+ /*TODO: Do we really need to compare lkowner if F_UNLCK is true?*/
+ if (!is_same_lkowner(&local->cont.lk.getlk_rsp[i].l_owner,
+ &info->lk_owner)) {
+ ret = _gf_false;
+ break;
+ }
+ }
+out:
+ afr_local_replies_wipe(local, priv);
+ GF_FREE(local->cont.lk.getlk_rsp);
+ local->cont.lk.getlk_rsp = NULL;
+ return ret;
+}
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &ctx);
+static void
+afr_mark_fd_bad(fd_t *fd, xlator_t *this)
+{
+ afr_fd_ctx_t *fd_ctx = NULL;
- if (ret < 0)
- goto unlock;
+ if (!fd)
+ return;
+ LOCK(&fd->lock);
+ {
+ fd_ctx = __afr_fd_ctx_get(fd, this);
+ if (fd_ctx) {
+ fd_ctx->is_fd_bad = _gf_true;
+ fd_ctx->lk_heal_info = NULL;
+ }
+ }
+ UNLOCK(&fd->lock);
+}
+
+static void
+afr_add_lock_to_lkhealq(afr_private_t *priv, afr_lk_heal_info_t *info)
+{
+ LOCK(&priv->lock);
+ {
+ list_del(&info->pos);
+ list_add_tail(&info->pos, &priv->lk_healq);
+ }
+ UNLOCK(&priv->lock);
+}
- opendir_done = ctx & AFR_ICTX_OPENDIR_DONE_MASK;
+static void
+afr_lock_heal_do(call_frame_t *frame, afr_private_t *priv,
+ afr_lk_heal_info_t *info)
+{
+ int i = 0;
+ int op_errno = 0;
+ int32_t *current_event_gen = NULL;
+ afr_local_t *local = frame->local;
+ xlator_t *this = frame->this;
+ char *wind_on = alloca0(priv->child_count);
+ gf_boolean_t retry = _gf_true;
+
+ frame->root->pid = info->pid;
+ lk_owner_copy(&frame->root->lk_owner, &info->lk_owner);
+
+ op_errno = -afr_dom_lock_acquire(frame);
+ if ((op_errno != 0)) {
+ goto release;
+ }
+
+ if (!afr_does_lk_owner_match(frame, priv, info)) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_LK_HEAL_DOM,
+ "Ignoring lock heal for %s since lk-onwers mismatch. "
+ "Lock possibly pre-empted by another client.",
+ uuid_utoa(info->fd->inode->gfid));
+ goto release;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (info->locked_nodes[i])
+ continue;
+ wind_on[i] = 1;
+ }
+
+ current_event_gen = alloca(priv->child_count);
+ memcpy(current_event_gen, info->child_up_event_gen,
+ priv->child_count * sizeof *current_event_gen);
+ AFR_ONLIST(wind_on, frame, afr_lock_heal_cbk, lk, info->fd, info->cmd,
+ &info->flock, info->xdata_req);
+
+ LOCK(&priv->lock);
+ {
+ for (i = 0; i < priv->child_count; i++) {
+ if (!wind_on[i])
+ continue;
+ if ((!local->replies[i].valid) || (local->replies[i].op_ret != 0)) {
+ continue;
+ }
+
+ if ((current_event_gen[i] == info->child_up_event_gen[i]) &&
+ (current_event_gen[i] > info->child_down_event_gen[i])) {
+ info->locked_nodes[i] = 1;
+ retry = _gf_false;
+ list_del_init(&info->pos);
+ list_add_tail(&info->pos, &priv->saved_locks);
+ } else {
+ /*We received subsequent child up/down events while heal was in
+ * progress; don't mark child as healed. Attempt again on the
+ * new child up*/
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_LK_HEAL_DOM,
+ "Event gen mismatch: skipped healing lock on child %d "
+ "for %s.",
+ i, uuid_utoa(info->fd->inode->gfid));
+ }
}
-unlock:
- UNLOCK (&inode->lock);
+ }
+ UNLOCK(&priv->lock);
+
+release:
+ afr_dom_lock_release(frame);
+ if (retry)
+ afr_add_lock_to_lkhealq(priv, info);
+ return;
+}
-out:
- return opendir_done;
+static int
+afr_lock_heal_done(int ret, call_frame_t *frame, void *opaque)
+{
+ STACK_DESTROY(frame->root);
+ return 0;
}
+static int
+afr_lock_heal(void *opaque)
+{
+ call_frame_t *frame = (call_frame_t *)opaque;
+ call_frame_t *iter_frame = NULL;
+ xlator_t *this = frame->this;
+ afr_private_t *priv = this->private;
+ afr_lk_heal_info_t *info = NULL;
+ afr_lk_heal_info_t *tmp = NULL;
+ struct list_head healq = {
+ 0,
+ };
+ int ret = 0;
+
+ iter_frame = afr_copy_frame(frame);
+ if (!iter_frame) {
+ return ENOMEM;
+ }
+
+ INIT_LIST_HEAD(&healq);
+ LOCK(&priv->lock);
+ {
+ list_splice_init(&priv->lk_healq, &healq);
+ }
+ UNLOCK(&priv->lock);
+
+ list_for_each_entry_safe(info, tmp, &healq, pos)
+ {
+ GF_ASSERT((AFR_COUNT(info->locked_nodes, priv->child_count) <
+ priv->child_count));
+ ((afr_local_t *)(iter_frame->local))->fd = fd_ref(info->fd);
+ afr_lock_heal_do(iter_frame, priv, info);
+ AFR_STACK_RESET(iter_frame);
+ if (iter_frame->local == NULL) {
+ ret = ENOTCONN;
+ gf_msg(frame->this->name, GF_LOG_ERROR, ENOTCONN,
+ AFR_MSG_LK_HEAL_DOM,
+ "Aborting processing of lk_healq."
+ "Healing will be reattempted on next child up for locks "
+ "that are still in quorum.");
+ LOCK(&priv->lock);
+ {
+ list_add_tail(&healq, &priv->lk_healq);
+ }
+ UNLOCK(&priv->lock);
+ break;
+ }
+ }
-void
-afr_set_opendir_done (xlator_t *this, inode_t *inode)
+ AFR_STACK_DESTROY(iter_frame);
+ return ret;
+}
+
+static int
+__afr_lock_heal_synctask(xlator_t *this, afr_private_t *priv, int child)
{
- uint64_t ctx = 0;
- int ret = 0;
+ int ret = 0;
+ call_frame_t *frame = NULL;
+ afr_lk_heal_info_t *info = NULL;
+ afr_lk_heal_info_t *tmp = NULL;
- VALIDATE_OR_GOTO (inode, out);
+ if (priv->shd.iamshd)
+ return 0;
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &ctx);
+ list_for_each_entry_safe(info, tmp, &priv->saved_locks, pos)
+ {
+ info->child_up_event_gen[child] = priv->event_generation;
+ list_del_init(&info->pos);
+ list_add_tail(&info->pos, &priv->lk_healq);
+ }
- if (ret < 0) {
- ctx = 0;
- }
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame)
+ return -1;
- ctx = (~AFR_ICTX_OPENDIR_DONE_MASK & ctx)
- | (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_OPENDIR_DONE_MASK);
+ ret = synctask_new(this->ctx->env, afr_lock_heal, afr_lock_heal_done, frame,
+ frame);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_LK_HEAL_DOM,
+ "Failed to launch lock heal synctask");
- ret = __inode_ctx_put (inode, this, ctx);
- if (ret) {
- gf_log_callingfn (this->name, GF_LOG_INFO,
- "failed to set the inode ctx (%s)",
- uuid_utoa (inode->gfid));
- }
- }
- UNLOCK (&inode->lock);
-out:
- return;
+ return ret;
}
-
-uint64_t
-afr_read_child (xlator_t *this, inode_t *inode)
+static int
+__afr_mark_pending_lk_heal(xlator_t *this, afr_private_t *priv, int child)
{
- int ret = 0;
+ afr_lk_heal_info_t *info = NULL;
+ afr_lk_heal_info_t *tmp = NULL;
- uint64_t ctx = 0;
- uint64_t read_child = 0;
-
- VALIDATE_OR_GOTO (inode, out);
+ if (priv->shd.iamshd)
+ return 0;
+ list_for_each_entry_safe(info, tmp, &priv->saved_locks, pos)
+ {
+ info->child_down_event_gen[child] = priv->event_generation;
+ if (info->locked_nodes[child] == 1)
+ info->locked_nodes[child] = 0;
+ if (!afr_has_quorum(info->locked_nodes, this, NULL)) {
+ /* Since the lock was lost on quorum no. of nodes, we should
+ * not attempt to heal it anymore. Some other client could have
+ * acquired the lock, modified data and released it and this
+ * client wouldn't know about it if we heal it.*/
+ afr_mark_fd_bad(info->fd, this);
+ list_del(&info->pos);
+ afr_lk_heal_info_cleanup(info);
+ /* We're not winding an unlock on the node where the lock is still
+ * present because when fencing logic switches over to the new
+ * client (since we marked the fd bad), it should preempt any
+ * existing lock. */
+ }
+ }
+ return 0;
+}
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &ctx);
+gf_boolean_t
+afr_is_consistent_io_possible(afr_local_t *local, afr_private_t *priv,
+ int32_t *op_errno)
+{
+ if (priv->consistent_io && local->call_count != priv->child_count) {
+ gf_msg(THIS->name, GF_LOG_INFO, 0, AFR_MSG_SUBVOLS_DOWN,
+ "All subvolumes are not up");
+ if (op_errno)
+ *op_errno = ENOTCONN;
+ return _gf_false;
+ }
+ return _gf_true;
+}
- if (ret < 0)
- goto unlock;
+gf_boolean_t
+afr_is_lock_mode_mandatory(dict_t *xdata)
+{
+ int ret = 0;
+ uint32_t lk_mode = GF_LK_ADVISORY;
- read_child = ctx & AFR_ICTX_READ_CHILD_MASK;
- }
-unlock:
- UNLOCK (&inode->lock);
+ ret = dict_get_uint32(xdata, GF_LOCK_MODE, &lk_mode);
+ if (!ret && lk_mode == GF_LK_MANDATORY)
+ return _gf_true;
-out:
- return read_child;
+ return _gf_false;
}
+call_frame_t *
+afr_copy_frame(call_frame_t *base)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ int op_errno = 0;
+
+ frame = copy_frame(base);
+ if (!frame)
+ return NULL;
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local) {
+ AFR_STACK_DESTROY(frame);
+ return NULL;
+ }
+
+ return frame;
+}
-void
-afr_set_read_child (xlator_t *this, inode_t *inode, int32_t read_child)
+/* Check if an entry or inode could be undergoing a transaction. */
+gf_boolean_t
+afr_is_possibly_under_txn(afr_transaction_type type, afr_local_t *local,
+ xlator_t *this)
{
- uint64_t ctx = 0;
- int ret = 0;
+ int i = 0;
+ int tmp = 0;
+ afr_private_t *priv = NULL;
+ GF_UNUSED char *key = NULL;
+ int keylen = 0;
+
+ priv = this->private;
+
+ if (type == AFR_ENTRY_TRANSACTION) {
+ key = GLUSTERFS_PARENT_ENTRYLK;
+ keylen = SLEN(GLUSTERFS_PARENT_ENTRYLK);
+ } else if (type == AFR_DATA_TRANSACTION) {
+ /*FIXME: Use GLUSTERFS_INODELK_DOM_COUNT etc. once
+ * pl_inodelk_xattr_fill supports separate keys for different
+ * domains.*/
+ key = GLUSTERFS_INODELK_COUNT;
+ keylen = SLEN(GLUSTERFS_INODELK_COUNT);
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].xdata)
+ continue;
+ if (dict_get_int32n(local->replies[i].xdata, key, keylen, &tmp) == 0)
+ if (tmp)
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
- VALIDATE_OR_GOTO (inode, out);
+static void
+afr_inode_ctx_destroy(afr_inode_ctx_t *ctx)
+{
+ int i = 0;
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &ctx);
+ if (!ctx)
+ return;
- if (ret < 0) {
- ctx = 0;
- }
+ for (i = 0; i < AFR_NUM_CHANGE_LOGS; i++) {
+ GF_FREE(ctx->pre_op_done[i]);
+ }
- ctx = (~AFR_ICTX_READ_CHILD_MASK & ctx)
- | (AFR_ICTX_READ_CHILD_MASK & read_child);
+ GF_FREE(ctx);
+}
- ret = __inode_ctx_put (inode, this, ctx);
- if (ret) {
- gf_log_callingfn (this->name, GF_LOG_INFO,
- "failed to set the inode ctx (%s)",
- uuid_utoa (inode->gfid));
- }
+int
+__afr_inode_ctx_get(xlator_t *this, inode_t *inode, afr_inode_ctx_t **ctx)
+{
+ uint64_t ctx_int = 0;
+ int ret = -1;
+ int i = -1;
+ int num_locks = -1;
+ afr_inode_ctx_t *ictx = NULL;
+ afr_lock_t *lock = NULL;
+ afr_private_t *priv = this->private;
+
+ ret = __inode_ctx_get(inode, this, &ctx_int);
+ if (ret == 0) {
+ *ctx = (afr_inode_ctx_t *)(uintptr_t)ctx_int;
+ return 0;
+ }
+
+ ictx = GF_CALLOC(1, sizeof(afr_inode_ctx_t), gf_afr_mt_inode_ctx_t);
+ if (!ictx)
+ goto out;
+
+ for (i = 0; i < AFR_NUM_CHANGE_LOGS; i++) {
+ ictx->pre_op_done[i] = GF_CALLOC(sizeof *ictx->pre_op_done[i],
+ priv->child_count, gf_afr_mt_int32_t);
+ if (!ictx->pre_op_done[i]) {
+ ret = -ENOMEM;
+ goto out;
}
- UNLOCK (&inode->lock);
-
+ }
+
+ num_locks = sizeof(ictx->lock) / sizeof(afr_lock_t);
+ for (i = 0; i < num_locks; i++) {
+ lock = &ictx->lock[i];
+ INIT_LIST_HEAD(&lock->post_op);
+ INIT_LIST_HEAD(&lock->frozen);
+ INIT_LIST_HEAD(&lock->waiting);
+ INIT_LIST_HEAD(&lock->owners);
+ }
+
+ ctx_int = (uint64_t)(uintptr_t)ictx;
+ ret = __inode_ctx_set(inode, this, &ctx_int);
+ if (ret) {
+ goto out;
+ }
+
+ ictx->spb_choice = -1;
+ ictx->read_subvol = 0;
+ ictx->write_subvol = 0;
+ ictx->lock_count = 0;
+ ret = 0;
+ *ctx = ictx;
out:
- return;
+ if (ret) {
+ afr_inode_ctx_destroy(ictx);
+ }
+ return ret;
}
-
-/**
- * afr_local_cleanup - cleanup everything in frame->local
+/*
+ * INODE CTX 64-bit VALUE FORMAT FOR SMALL (<= 16) SUBVOL COUNTS:
+ *
+ * |<---------- 64bit ------------>|
+ * 63 32 31 16 15 0
+ * | EVENT_GEN | DATA | METADATA |
+ *
+ *
+ * METADATA (bit-0 .. bit-15): bitmap representing subvolumes from which
+ * metadata can be attempted to be read.
+ *
+ * bit-0 => priv->subvolumes[0]
+ * bit-1 => priv->subvolumes[1]
+ * ... etc. till bit-15
+ *
+ * DATA (bit-16 .. bit-31): bitmap representing subvolumes from which data
+ * can be attempted to be read.
+ *
+ * bit-16 => priv->subvolumes[0]
+ * bit-17 => priv->subvolumes[1]
+ * ... etc. till bit-31
+ *
+ * EVENT_GEN (bit-32 .. bit-63): event generation (i.e priv->event_generation)
+ * when DATA and METADATA was last updated.
+ *
+ * If EVENT_GEN is < priv->event_generation,
+ * or is 0, it means afr_inode_refresh() needs
+ * to be called to recalculate the bitmaps.
*/
-void
-afr_local_sh_cleanup (afr_local_t *local, xlator_t *this)
+int
+__afr_set_in_flight_sb_status(xlator_t *this, afr_local_t *local,
+ inode_t *inode)
{
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
+ int i = 0;
+ int txn_type = 0;
+ int count = 0;
+ int index = -1;
+ uint16_t datamap_old = 0;
+ uint16_t metadatamap_old = 0;
+ uint16_t datamap = 0;
+ uint16_t metadatamap = 0;
+ uint16_t tmp_map = 0;
+ uint16_t mask = 0;
+ uint32_t event = 0;
+ uint64_t val = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ txn_type = local->transaction.type;
+
+ if (txn_type == AFR_DATA_TRANSACTION)
+ val = local->inode_ctx->write_subvol;
+ else
+ val = local->inode_ctx->read_subvol;
+
+ metadatamap_old = metadatamap = (val & 0x000000000000ffff);
+ datamap_old = datamap = (val & 0x00000000ffff0000) >> 16;
+ event = (val & 0xffffffff00000000) >> 32;
+
+ if (txn_type == AFR_DATA_TRANSACTION)
+ tmp_map = datamap;
+ else if (txn_type == AFR_METADATA_TRANSACTION)
+ tmp_map = metadatamap;
+
+ count = gf_bits_count(tmp_map);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->transaction.failed_subvols[i])
+ continue;
+
+ mask = 1 << i;
+ if (txn_type == AFR_METADATA_TRANSACTION)
+ metadatamap &= ~mask;
+ else if (txn_type == AFR_DATA_TRANSACTION)
+ datamap &= ~mask;
+ }
+
+ switch (txn_type) {
+ case AFR_METADATA_TRANSACTION:
+ if ((metadatamap_old != 0) && (metadatamap == 0) && (count == 1)) {
+ index = gf_bits_index(tmp_map);
+ local->transaction.in_flight_sb_errno = local->replies[index]
+ .op_errno;
+ local->transaction.in_flight_sb = _gf_true;
+ metadatamap |= (1 << index);
+ }
+ if (metadatamap_old != metadatamap) {
+ __afr_inode_need_refresh_set(inode, this);
+ }
+ break;
+
+ case AFR_DATA_TRANSACTION:
+ if ((datamap_old != 0) && (datamap == 0) && (count == 1)) {
+ index = gf_bits_index(tmp_map);
+ local->transaction.in_flight_sb_errno = local->replies[index]
+ .op_errno;
+ local->transaction.in_flight_sb = _gf_true;
+ datamap |= (1 << index);
+ }
+ if (datamap_old != datamap)
+ __afr_inode_need_refresh_set(inode, this);
+ break;
+ default:
+ break;
+ }
- sh = &local->self_heal;
- priv = this->private;
-
- if (sh->buf)
- GF_FREE (sh->buf);
+ val = ((uint64_t)metadatamap) | (((uint64_t)datamap) << 16) |
+ (((uint64_t)event) << 32);
- if (sh->xattr) {
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i]) {
- dict_unref (sh->xattr[i]);
- sh->xattr[i] = NULL;
- }
- }
- GF_FREE (sh->xattr);
- }
+ if (txn_type == AFR_DATA_TRANSACTION)
+ local->inode_ctx->write_subvol = val;
+ local->inode_ctx->read_subvol = val;
- if (sh->child_errno)
- GF_FREE (sh->child_errno);
+ return 0;
+}
- if (sh->pending_matrix) {
- for (i = 0; i < priv->child_count; i++) {
- GF_FREE (sh->pending_matrix[i]);
- }
- GF_FREE (sh->pending_matrix);
+gf_boolean_t
+afr_is_symmetric_error(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int op_errno = 0;
+ int i_errno = 0;
+ gf_boolean_t matching_errors = _gf_true;
+ int i = 0;
+
+ priv = this->private;
+ local = frame->local;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret != -1) {
+ /* Operation succeeded on at least one subvol,
+ so it is not a failed-everywhere situation.
+ */
+ matching_errors = _gf_false;
+ break;
+ }
+ i_errno = local->replies[i].op_errno;
+
+ if (i_errno == ENOTCONN) {
+ /* ENOTCONN is not a symmetric error. We do not
+ know if the operation was performed on the
+ backend or not.
+ */
+ matching_errors = _gf_false;
+ break;
}
- if (sh->delta_matrix) {
- for (i = 0; i < priv->child_count; i++) {
- GF_FREE (sh->delta_matrix[i]);
- }
- GF_FREE (sh->delta_matrix);
+ if (!op_errno) {
+ op_errno = i_errno;
+ } else if (op_errno != i_errno) {
+ /* Mismatching op_errno's */
+ matching_errors = _gf_false;
+ break;
}
+ }
- if (sh->sources)
- GF_FREE (sh->sources);
+ return matching_errors;
+}
+
+int
+afr_set_in_flight_sb_status(xlator_t *this, call_frame_t *frame, inode_t *inode)
+{
+ int ret = -1;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
- if (sh->success)
- GF_FREE (sh->success);
+ priv = this->private;
+ local = frame->local;
- if (sh->locked_nodes)
- GF_FREE (sh->locked_nodes);
+ /* If this transaction saw no failures, then exit. */
+ if (AFR_COUNT(local->transaction.failed_subvols, priv->child_count) == 0)
+ return 0;
- if (sh->healing_fd && !sh->healing_fd_opened) {
- fd_unref (sh->healing_fd);
- sh->healing_fd = NULL;
- }
+ if (afr_is_symmetric_error(frame, this))
+ return 0;
- if (sh->linkname)
- GF_FREE ((char *)sh->linkname);
+ LOCK(&inode->lock);
+ {
+ ret = __afr_set_in_flight_sb_status(this, local, inode);
+ }
+ UNLOCK(&inode->lock);
- loc_wipe (&sh->parent_loc);
+ return ret;
}
-
-void
-afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this)
+int
+__afr_inode_read_subvol_get_small(inode_t *inode, xlator_t *this,
+ unsigned char *data, unsigned char *metadata,
+ int *event_p)
{
- int i = 0;
- afr_private_t * priv = NULL;
+ afr_private_t *priv = NULL;
+ int ret = -1;
+ uint16_t datamap = 0;
+ uint16_t metadatamap = 0;
+ uint32_t event = 0;
+ uint64_t val = 0;
+ int i = 0;
+ afr_inode_ctx_t *ctx = NULL;
+
+ priv = this->private;
+
+ ret = __afr_inode_ctx_get(this, inode, &ctx);
+ if (ret < 0)
+ return ret;
- priv = this->private;
+ val = ctx->read_subvol;
- for (i = 0; i < priv->child_count; i++) {
- if (local->pending && local->pending[i])
- GF_FREE (local->pending[i]);
- }
+ metadatamap = (val & 0x000000000000ffff);
+ datamap = (val & 0x00000000ffff0000) >> 16;
+ event = (val & 0xffffffff00000000) >> 32;
- GF_FREE (local->pending);
+ for (i = 0; i < priv->child_count; i++) {
+ if (metadata)
+ metadata[i] = (metadatamap >> i) & 1;
+ if (data)
+ data[i] = (datamap >> i) & 1;
+ }
- if (local->internal_lock.locked_nodes)
- GF_FREE (local->internal_lock.locked_nodes);
+ if (event_p)
+ *event_p = event;
+ return ret;
+}
- if (local->internal_lock.inode_locked_nodes)
- GF_FREE (local->internal_lock.inode_locked_nodes);
+int
+__afr_inode_read_subvol_set_small(inode_t *inode, xlator_t *this,
+ unsigned char *data, unsigned char *metadata,
+ int event)
+{
+ afr_private_t *priv = NULL;
+ uint16_t datamap = 0;
+ uint16_t metadatamap = 0;
+ uint64_t val = 0;
+ int i = 0;
+ int ret = -1;
+ afr_inode_ctx_t *ctx = NULL;
- if (local->internal_lock.entry_locked_nodes)
- GF_FREE (local->internal_lock.entry_locked_nodes);
+ priv = this->private;
- if (local->internal_lock.lower_locked_nodes)
- GF_FREE (local->internal_lock.lower_locked_nodes);
+ ret = __afr_inode_ctx_get(this, inode, &ctx);
+ if (ret)
+ goto out;
+ for (i = 0; i < priv->child_count; i++) {
+ if (data[i])
+ datamap |= (1 << i);
+ if (metadata[i])
+ metadatamap |= (1 << i);
+ }
- GF_FREE (local->transaction.child_errno);
- GF_FREE (local->child_errno);
+ val = ((uint64_t)metadatamap) | (((uint64_t)datamap) << 16) |
+ (((uint64_t)event) << 32);
- GF_FREE (local->transaction.basename);
- GF_FREE (local->transaction.new_basename);
+ ctx->read_subvol = val;
- loc_wipe (&local->transaction.parent_loc);
- loc_wipe (&local->transaction.new_parent_loc);
+ ret = 0;
+out:
+ return ret;
}
-
-void
-afr_local_cleanup (afr_local_t *local, xlator_t *this)
+int
+__afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data,
+ unsigned char *metadata, int *event_p)
{
- int i = 0;
- afr_private_t * priv = NULL;
+ afr_private_t *priv = NULL;
+ int ret = -1;
- if (!local)
- return;
+ priv = this->private;
- afr_local_sh_cleanup (local, this);
+ if (priv->child_count <= 16)
+ ret = __afr_inode_read_subvol_get_small(inode, this, data, metadata,
+ event_p);
+ else
+ /* TBD: allocate structure with array and read from it */
+ ret = -1;
- afr_local_transaction_cleanup (local, this);
+ return ret;
+}
- priv = this->private;
+int
+__afr_inode_split_brain_choice_get(inode_t *inode, xlator_t *this,
+ int *spb_choice)
+{
+ afr_inode_ctx_t *ctx = NULL;
+ int ret = -1;
- loc_wipe (&local->loc);
- loc_wipe (&local->newloc);
+ ret = __afr_inode_ctx_get(this, inode, &ctx);
+ if (ret < 0)
+ return ret;
- if (local->fd)
- fd_unref (local->fd);
-
- if (local->xattr_req)
- dict_unref (local->xattr_req);
-
- GF_FREE (local->child_up);
-
- { /* lookup */
- if (local->cont.lookup.xattrs) {
- for (i = 0; i < priv->child_count; i++) {
- if (local->cont.lookup.xattrs[i]) {
- dict_unref (local->cont.lookup.xattrs[i]);
- local->cont.lookup.xattrs[i] = NULL;
- }
- }
- GF_FREE (local->cont.lookup.xattrs);
- local->cont.lookup.xattrs = NULL;
- }
+ *spb_choice = ctx->spb_choice;
+ return 0;
+}
- if (local->cont.lookup.xattr) {
- dict_unref (local->cont.lookup.xattr);
- }
+int
+__afr_inode_read_subvol_set(inode_t *inode, xlator_t *this, unsigned char *data,
+ unsigned char *metadata, int event)
+{
+ afr_private_t *priv = NULL;
+ int ret = -1;
- if (local->cont.lookup.inode) {
- inode_unref (local->cont.lookup.inode);
- }
- }
+ priv = this->private;
- { /* getxattr */
- if (local->cont.getxattr.name)
- GF_FREE (local->cont.getxattr.name);
- }
+ if (priv->child_count <= 16)
+ ret = __afr_inode_read_subvol_set_small(inode, this, data, metadata,
+ event);
+ else
+ ret = -1;
- { /* lk */
- if (local->cont.lk.locked_nodes)
- GF_FREE (local->cont.lk.locked_nodes);
- }
+ return ret;
+}
- { /* create */
- if (local->cont.create.fd)
- fd_unref (local->cont.create.fd);
- if (local->cont.create.params)
- dict_unref (local->cont.create.params);
- }
+int
+__afr_inode_split_brain_choice_set(inode_t *inode, xlator_t *this,
+ int spb_choice)
+{
+ afr_inode_ctx_t *ctx = NULL;
+ int ret = -1;
- { /* mknod */
- if (local->cont.mknod.params)
- dict_unref (local->cont.mknod.params);
- }
+ ret = __afr_inode_ctx_get(this, inode, &ctx);
+ if (ret)
+ goto out;
- { /* mkdir */
- if (local->cont.mkdir.params)
- dict_unref (local->cont.mkdir.params);
- }
+ ctx->spb_choice = spb_choice;
- { /* symlink */
- if (local->cont.symlink.params)
- dict_unref (local->cont.symlink.params);
- }
+ ret = 0;
+out:
+ return ret;
+}
- { /* writev */
- GF_FREE (local->cont.writev.vector);
- }
+int
+afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data,
+ unsigned char *metadata, int *event_p)
+{
+ int ret = -1;
- { /* setxattr */
- if (local->cont.setxattr.dict)
- dict_unref (local->cont.setxattr.dict);
- }
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
- { /* removexattr */
- GF_FREE (local->cont.removexattr.name);
- }
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_read_subvol_get(inode, this, data, metadata, event_p);
+ }
+ UNLOCK(&inode->lock);
+out:
+ return ret;
+}
- { /* symlink */
- GF_FREE (local->cont.symlink.linkpath);
+int
+afr_inode_get_readable(call_frame_t *frame, inode_t *inode, xlator_t *this,
+ unsigned char *readable, int *event_p, int type)
+{
+ afr_private_t *priv = this->private;
+ afr_local_t *local = frame->local;
+ unsigned char *data = alloca0(priv->child_count);
+ unsigned char *metadata = alloca0(priv->child_count);
+ int data_count = 0;
+ int metadata_count = 0;
+ int event_generation = 0;
+ int ret = 0;
+
+ ret = afr_inode_read_subvol_get(inode, this, data, metadata,
+ &event_generation);
+ if (ret == -1)
+ return -EIO;
+
+ data_count = AFR_COUNT(data, priv->child_count);
+ metadata_count = AFR_COUNT(metadata, priv->child_count);
+
+ if (inode->ia_type == IA_IFDIR) {
+ /* For directories, allow even if it is in data split-brain. */
+ if (type == AFR_METADATA_TRANSACTION || local->op == GF_FOP_STAT ||
+ local->op == GF_FOP_FSTAT) {
+ if (!metadata_count)
+ return -EIO;
}
-
- { /* opendir */
- if (local->cont.opendir.checksum)
- GF_FREE (local->cont.opendir.checksum);
+ } else {
+ /* For files, abort in case of data/metadata split-brain. */
+ if (!data_count || !metadata_count) {
+ return -EIO;
}
+ }
+
+ if (type == AFR_METADATA_TRANSACTION && readable)
+ memcpy(readable, metadata, priv->child_count * sizeof *metadata);
+ if (type == AFR_DATA_TRANSACTION && readable) {
+ if (!data_count)
+ memcpy(readable, local->child_up,
+ priv->child_count * sizeof *readable);
+ else
+ memcpy(readable, data, priv->child_count * sizeof *data);
+ }
+ if (event_p)
+ *event_p = event_generation;
+ return 0;
}
+static int
+afr_inode_split_brain_choice_get(inode_t *inode, xlator_t *this,
+ int *spb_choice)
+{
+ int ret = -1;
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_split_brain_choice_get(inode, this, spb_choice);
+ }
+ UNLOCK(&inode->lock);
+out:
+ return ret;
+}
+/*
+ * frame is used to get the favourite policy. Since
+ * afr_inode_split_brain_choice_get was called with afr_open, it is possible to
+ * have a frame with out local->replies. So in that case, frame is passed as
+ * null, hence this function will handle the frame NULL case.
+ */
int
-afr_frame_return (call_frame_t *frame)
+afr_split_brain_read_subvol_get(inode_t *inode, xlator_t *this,
+ call_frame_t *frame, int *spb_subvol)
{
- afr_local_t *local = NULL;
- int call_count = 0;
+ int ret = -1;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- local = frame->local;
+ GF_VALIDATE_OR_GOTO("afr", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+ GF_VALIDATE_OR_GOTO(this->name, spb_subvol, out);
- LOCK (&frame->lock);
- {
- call_count = --local->call_count;
+ priv = this->private;
+
+ ret = afr_inode_split_brain_choice_get(inode, this, spb_subvol);
+ if (*spb_subvol < 0 && priv->fav_child_policy && frame && frame->local) {
+ local = frame->local;
+ *spb_subvol = afr_sh_get_fav_by_policy(this, local->replies, inode,
+ NULL);
+ if (*spb_subvol >= 0) {
+ ret = 0;
}
- UNLOCK (&frame->lock);
+ }
- return call_count;
+out:
+ return ret;
}
+int
+afr_inode_read_subvol_set(inode_t *inode, xlator_t *this, unsigned char *data,
+ unsigned char *metadata, int event)
+{
+ int ret = -1;
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
-/**
- * up_children_count - return the number of children that are up
- */
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_read_subvol_set(inode, this, data, metadata, event);
+ }
+ UNLOCK(&inode->lock);
+out:
+ return ret;
+}
int
-afr_up_children_count (int child_count, unsigned char *child_up)
+afr_inode_split_brain_choice_set(inode_t *inode, xlator_t *this, int spb_choice)
{
- int i = 0;
- int ret = 0;
+ int ret = -1;
- for (i = 0; i < child_count; i++)
- if (child_up[i])
- ret++;
- return ret;
-}
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_split_brain_choice_set(inode, this, spb_choice);
+ }
+ UNLOCK(&inode->lock);
+out:
+ return ret;
+}
-ino64_t
-afr_itransform (ino64_t ino, int child_count, int child_index)
+/* The caller of this should perform afr_inode_refresh, if this function
+ * returns _gf_true
+ */
+gf_boolean_t
+afr_is_inode_refresh_reqd(inode_t *inode, xlator_t *this, int event_gen1,
+ int event_gen2)
{
- ino64_t scaled_ino = -1;
+ gf_boolean_t need_refresh = _gf_false;
+ afr_inode_ctx_t *ctx = NULL;
+ int ret = -1;
- if (ino == ((uint64_t) -1)) {
- scaled_ino = ((uint64_t) -1);
- goto out;
- }
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
- scaled_ino = (ino * child_count) + child_index;
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_ctx_get(this, inode, &ctx);
+ if (ret)
+ goto unlock;
+
+ need_refresh = ctx->need_refresh;
+ /* Hoping that the caller will do inode_refresh followed by
+ * this, hence setting the need_refresh to false */
+ ctx->need_refresh = _gf_false;
+ }
+unlock:
+ UNLOCK(&inode->lock);
+ if (event_gen1 != event_gen2)
+ need_refresh = _gf_true;
out:
- return scaled_ino;
+ return need_refresh;
}
-
int
-afr_deitransform_orig (ino64_t ino, int child_count)
+__afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
{
- int index = -1;
+ int ret = -1;
+ afr_inode_ctx_t *ctx = NULL;
- index = ino % child_count;
+ ret = __afr_inode_ctx_get(this, inode, &ctx);
+ if (ret == 0) {
+ ctx->need_refresh = _gf_true;
+ }
- return index;
+ return ret;
}
-
int
-afr_deitransform (ino64_t ino, int child_count)
+afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
{
- return 0;
-}
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_need_refresh_set(inode, this);
+ }
+ UNLOCK(&inode->lock);
+out:
+ return ret;
+}
int
-afr_self_heal_lookup_unwind (call_frame_t *frame, xlator_t *this)
+afr_spb_choice_timeout_cancel(xlator_t *this, inode_t *inode)
{
- afr_local_t *local = NULL;
+ afr_inode_ctx_t *ctx = NULL;
+ int ret = -1;
- local = frame->local;
+ if (!inode)
+ return ret;
- if (local->govinda_gOvinda && local->cont.lookup.inode) {
- afr_set_split_brain (this, local->cont.lookup.inode, _gf_true);
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_ctx_get(this, inode, &ctx);
+ if (ret < 0 || !ctx) {
+ UNLOCK(&inode->lock);
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR,
+ "Failed to cancel split-brain choice timer.");
+ goto out;
+ }
+ ctx->spb_choice = -1;
+ if (ctx->timer) {
+ gf_timer_call_cancel(this->ctx, ctx->timer);
+ ctx->timer = NULL;
}
+ ret = 0;
+ }
+ UNLOCK(&inode->lock);
+out:
+ return ret;
+}
- AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
- local->cont.lookup.inode,
- &local->cont.lookup.buf,
- local->cont.lookup.xattr,
- &local->cont.lookup.postparent);
+void
+afr_set_split_brain_choice_cbk(void *data)
+{
+ inode_t *inode = data;
+ xlator_t *this = THIS;
- return 0;
+ afr_spb_choice_timeout_cancel(this, inode);
+ inode_invalidate(inode);
+ inode_unref(inode);
+ return;
}
-
-static void
-afr_lookup_collect_xattr (afr_local_t *local, xlator_t *this,
- int child_index, dict_t *xattr)
+int
+afr_set_split_brain_choice(int ret, call_frame_t *frame, void *opaque)
{
- uint32_t inodelk_count = 0;
- uint32_t entrylk_count = 0;
- int ret = 0;
+ int op_errno = ENOMEM;
+ afr_private_t *priv = NULL;
+ afr_inode_ctx_t *ctx = NULL;
+ inode_t *inode = NULL;
+ loc_t *loc = NULL;
+ xlator_t *this = NULL;
+ afr_spbc_timeout_t *data = opaque;
+ struct timespec delta = {
+ 0,
+ };
+ gf_boolean_t timer_set = _gf_false;
+ gf_boolean_t timer_cancelled = _gf_false;
+ gf_boolean_t timer_reset = _gf_false;
+ int old_spb_choice = -1;
+
+ frame = data->frame;
+ loc = data->loc;
+ this = frame->this;
+ priv = this->private;
+
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ delta.tv_sec = priv->spb_choice_timeout;
+ delta.tv_nsec = 0;
+
+ if (!loc->inode) {
+ ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ if (!(data->d_spb || data->m_spb)) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR,
+ "Cannot set "
+ "replica.split-brain-choice on %s. File is"
+ " not in data/metadata split-brain.",
+ uuid_utoa(loc->gfid));
+ ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ /*
+ * we're ref'ing the inode before LOCK like it is done elsewhere in the
+ * code. If we ref after LOCK, coverity complains of possible deadlocks.
+ */
+ inode = inode_ref(loc->inode);
+
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_ctx_get(this, inode, &ctx);
+ if (ret) {
+ UNLOCK(&inode->lock);
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR,
+ "Failed to get inode_ctx for %s", loc->name);
+ goto post_unlock;
+ }
- if (afr_sh_has_metadata_pending (xattr, child_index, this)) {
- local->self_heal.need_metadata_self_heal = _gf_true;
- gf_log(this->name, GF_LOG_DEBUG,
- "metadata self-heal is pending for %s.",
- local->loc.path);
+ old_spb_choice = ctx->spb_choice;
+ ctx->spb_choice = data->spb_child_index;
+
+ /* Possible changes in spb-choice :
+ * valid to -1 : cancel timer and unref
+ * valid to valid : cancel timer and inject new one
+ * -1 to -1 : unref and do not do anything
+ * -1 to valid : inject timer
+ */
+
+ /* ctx->timer is NULL iff previous value of
+ * ctx->spb_choice is -1
+ */
+ if (ctx->timer) {
+ if (ctx->spb_choice == -1) {
+ if (!gf_timer_call_cancel(this->ctx, ctx->timer)) {
+ ctx->timer = NULL;
+ timer_cancelled = _gf_true;
+ }
+ /* If timer cancel failed here it means that the
+ * previous cbk will be executed which will set
+ * spb_choice to -1. So we can consider the
+ * 'valid to -1' case to be a success
+ * (i.e. ret = 0) and goto unlock.
+ */
+ goto unlock;
+ }
+ goto reset_timer;
+ } else {
+ if (ctx->spb_choice == -1)
+ goto unlock;
+ goto set_timer;
}
- if (afr_sh_has_entry_pending (xattr, child_index, this)) {
- local->self_heal.need_entry_self_heal = _gf_true;
- gf_log(this->name, GF_LOG_INFO,
- "entry self-heal is pending for %s.", local->loc.path);
+ reset_timer:
+ ret = gf_timer_call_cancel(this->ctx, ctx->timer);
+ if (ret != 0) {
+ /* We need to bail out now instead of launching a new
+ * timer. Otherwise the cbk of the previous timer event
+ * will cancel the new ctx->timer.
+ */
+ ctx->spb_choice = old_spb_choice;
+ ret = -1;
+ op_errno = EAGAIN;
+ goto unlock;
+ }
+ ctx->timer = NULL;
+ timer_reset = _gf_true;
+
+ set_timer:
+ ctx->timer = gf_timer_call_after(this->ctx, delta,
+ afr_set_split_brain_choice_cbk, inode);
+ if (!ctx->timer) {
+ ctx->spb_choice = old_spb_choice;
+ ret = -1;
+ op_errno = ENOMEM;
}
+ if (!timer_reset && ctx->timer)
+ timer_set = _gf_true;
+ if (timer_reset && !ctx->timer)
+ timer_cancelled = _gf_true;
+ }
+unlock:
+ UNLOCK(&inode->lock);
+post_unlock:
+ if (!timer_set)
+ inode_unref(inode);
+ if (timer_cancelled)
+ inode_unref(inode);
+ /*
+ * We need to invalidate the inode to prevent the kernel from serving
+ * reads from an older cached value despite a change in spb_choice to
+ * a new value.
+ */
+ inode_invalidate(inode);
+out:
+ GF_FREE(data);
+ AFR_STACK_UNWIND(setxattr, frame, ret, op_errno, NULL);
+ return 0;
+}
- if (afr_sh_has_data_pending (xattr, child_index, this)) {
- local->self_heal.need_data_self_heal = _gf_true;
- gf_log(this->name, GF_LOG_INFO,
- "data self-heal is pending for %s.", local->loc.path);
+int
+afr_accused_fill(xlator_t *this, dict_t *xdata, unsigned char *accused,
+ afr_transaction_type type)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int idx = afr_index_for_transaction_type(type);
+ void *pending_raw = NULL;
+ int pending[3];
+ int ret = 0;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ ret = dict_get_ptr(xdata, priv->pending_key[i], &pending_raw);
+ if (ret) /* no pending flags */
+ continue;
+ memcpy(pending, pending_raw, sizeof(pending));
+
+ if (ntoh32(pending[idx]))
+ accused[i] = 1;
+ }
+
+ return 0;
+}
+
+int
+afr_accuse_smallfiles(xlator_t *this, struct afr_reply *replies,
+ unsigned char *data_accused)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ uint64_t maxsize = 0;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies[i].valid && replies[i].xdata &&
+ dict_get_sizen(replies[i].xdata, GLUSTERFS_BAD_INODE))
+ continue;
+ if (data_accused[i])
+ continue;
+ if (replies[i].poststat.ia_size > maxsize)
+ maxsize = replies[i].poststat.ia_size;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (data_accused[i])
+ continue;
+ if (AFR_IS_ARBITER_BRICK(priv, i))
+ continue;
+ if (replies[i].poststat.ia_size < maxsize)
+ data_accused[i] = 1;
+ }
+
+ return 0;
+}
+
+int
+afr_readables_fill(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ unsigned char *data_accused, unsigned char *metadata_accused,
+ unsigned char *data_readable,
+ unsigned char *metadata_readable, struct afr_reply *replies)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ dict_t *xdata = NULL;
+ int i = 0;
+ int ret = 0;
+ ia_type_t ia_type = IA_INVAL;
+
+ local = frame->local;
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ data_readable[i] = 1;
+ metadata_readable[i] = 1;
+ }
+ if (AFR_IS_ARBITER_BRICK(priv, ARBITER_BRICK_INDEX)) {
+ data_readable[ARBITER_BRICK_INDEX] = 0;
+ metadata_readable[ARBITER_BRICK_INDEX] = 0;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies) { /* Lookup */
+ if (!replies[i].valid || replies[i].op_ret == -1 ||
+ (replies[i].xdata &&
+ dict_get_sizen(replies[i].xdata, GLUSTERFS_BAD_INODE))) {
+ data_readable[i] = 0;
+ metadata_readable[i] = 0;
+ continue;
+ }
+
+ xdata = replies[i].xdata;
+ ia_type = replies[i].poststat.ia_type;
+ } else { /* pre-op xattrop */
+ xdata = local->transaction.changelog_xdata[i];
+ ia_type = inode->ia_type;
}
- ret = dict_get_uint32 (xattr, GLUSTERFS_INODELK_COUNT,
- &inodelk_count);
- if (ret == 0)
- local->inodelk_count += inodelk_count;
+ if (!xdata)
+ continue; /* mkdir_cbk sends NULL xdata_rsp. */
+ afr_accused_fill(this, xdata, data_accused,
+ (ia_type == IA_IFDIR) ? AFR_ENTRY_TRANSACTION
+ : AFR_DATA_TRANSACTION);
+
+ afr_accused_fill(this, xdata, metadata_accused,
+ AFR_METADATA_TRANSACTION);
+ }
+
+ if (replies && ia_type != IA_INVAL && ia_type != IA_IFDIR &&
+ /* We want to accuse small files only when we know for
+ * sure that there is no IO happening. Otherwise, the
+ * ia_sizes obtained in post-refresh replies may
+ * mismatch due to a race between inode-refresh and
+ * ongoing writes, causing spurious heal launches*/
+ !afr_is_possibly_under_txn(AFR_DATA_TRANSACTION, local, this)) {
+ afr_accuse_smallfiles(this, replies, data_accused);
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (data_accused[i]) {
+ data_readable[i] = 0;
+ ret = 1;
+ }
+ if (metadata_accused[i]) {
+ metadata_readable[i] = 0;
+ ret = 1;
+ }
+ }
+ return ret;
+}
- ret = dict_get_uint32 (xattr, GLUSTERFS_ENTRYLK_COUNT,
- &entrylk_count);
- if (ret == 0)
- local->entrylk_count += entrylk_count;
+int
+afr_replies_interpret(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ gf_boolean_t *start_heal)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ struct afr_reply *replies = NULL;
+ int event_generation = 0;
+ int i = 0;
+ unsigned char *data_accused = NULL;
+ unsigned char *metadata_accused = NULL;
+ unsigned char *data_readable = NULL;
+ unsigned char *metadata_readable = NULL;
+ int ret = 0;
+
+ local = frame->local;
+ priv = this->private;
+ replies = local->replies;
+ event_generation = local->event_generation;
+
+ data_accused = alloca0(priv->child_count);
+ data_readable = alloca0(priv->child_count);
+ metadata_accused = alloca0(priv->child_count);
+ metadata_readable = alloca0(priv->child_count);
+
+ ret = afr_readables_fill(frame, this, inode, data_accused, metadata_accused,
+ data_readable, metadata_readable, replies);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (start_heal && priv->child_up[i] &&
+ (data_accused[i] || metadata_accused[i])) {
+ *start_heal = _gf_true;
+ break;
+ }
+ }
+ afr_inode_read_subvol_set(inode, this, data_readable, metadata_readable,
+ event_generation);
+ return ret;
}
+int
+afr_refresh_selfheal_done(int ret, call_frame_t *heal, void *opaque)
+{
+ if (heal)
+ AFR_STACK_DESTROY(heal);
+ return 0;
+}
-static void
-afr_lookup_self_heal_check (xlator_t *this, afr_local_t *local,
- struct iatt *buf, struct iatt *lookup_buf)
+int
+afr_inode_refresh_err(call_frame_t *frame, xlator_t *this)
{
- if (FILETYPE_DIFFERS (buf, lookup_buf)) {
- /* mismatching filetypes with same name
- */
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int err = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].valid && !local->replies[i].op_ret) {
+ err = 0;
+ goto ret;
+ }
+ }
+
+ err = afr_final_errno(local, priv);
+ret:
+ return err;
+}
- gf_log (this->name, GF_LOG_INFO,
- "filetype differs for %s ", local->loc.path);
+gf_boolean_t
+afr_selfheal_enabled(const xlator_t *this)
+{
+ const afr_private_t *priv = this->private;
+
+ return priv->data_self_heal || priv->metadata_self_heal ||
+ priv->entry_self_heal;
+}
- local->govinda_gOvinda = 1;
+int
+afr_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err)
+{
+ call_frame_t *heal_frame = NULL;
+ afr_local_t *heal_local = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ inode_t *inode = NULL;
+ int event_generation = 0;
+ int read_subvol = -1;
+ int ret = 0;
+
+ local = frame->local;
+ inode = local->inode;
+ priv = this->private;
+
+ if (err)
+ goto refresh_done;
+
+ if (local->op == GF_FOP_LOOKUP)
+ goto refresh_done;
+
+ ret = afr_inode_get_readable(frame, inode, this, local->readable,
+ &event_generation, local->transaction.type);
+
+ if (ret == -EIO) {
+ /* No readable subvolume even after refresh ==> splitbrain.*/
+ if (!priv->fav_child_policy) {
+ err = EIO;
+ goto refresh_done;
+ }
+ read_subvol = afr_sh_get_fav_by_policy(this, local->replies, inode,
+ NULL);
+ if (read_subvol == -1) {
+ err = EIO;
+ goto refresh_done;
}
- if (PERMISSION_DIFFERS (buf, lookup_buf)) {
- /* mismatching permissions */
- gf_log (this->name, GF_LOG_INFO,
- "permissions differ for %s ", local->loc.path);
- local->self_heal.need_metadata_self_heal = _gf_true;
+ heal_frame = afr_frame_create(this, NULL);
+ if (!heal_frame) {
+ err = EIO;
+ goto refresh_done;
+ }
+ heal_local = heal_frame->local;
+ heal_local->xdata_req = dict_new();
+ if (!heal_local->xdata_req) {
+ err = EIO;
+ AFR_STACK_DESTROY(heal_frame);
+ goto refresh_done;
}
+ heal_local->heal_frame = frame;
+ ret = synctask_new(this->ctx->env, afr_fav_child_reset_sink_xattrs,
+ afr_fav_child_reset_sink_xattrs_cbk, heal_frame,
+ heal_frame);
+ return 0;
+ }
+
+refresh_done:
+ afr_local_replies_wipe(local, this->private);
+ local->refreshfn(frame, this, err);
+
+ return 0;
+}
- if (OWNERSHIP_DIFFERS (buf, lookup_buf)) {
- /* mismatching permissions */
- local->self_heal.need_metadata_self_heal = _gf_true;
- gf_log (this->name, GF_LOG_INFO,
- "ownership differs for %s ", local->loc.path);
+int
+afr_inode_refresh_done(call_frame_t *frame, xlator_t *this, int error)
+{
+ call_frame_t *heal_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ gf_boolean_t start_heal = _gf_false;
+ afr_local_t *heal_local = NULL;
+ unsigned char *success_replies = NULL;
+ int ret = 0;
+
+ if (error != 0) {
+ goto refresh_done;
+ }
+
+ local = frame->local;
+ priv = this->private;
+ success_replies = alloca0(priv->child_count);
+ afr_fill_success_replies(local, priv, success_replies);
+
+ if (priv->thin_arbiter_count && local->is_read_txn &&
+ AFR_COUNT(success_replies, priv->child_count) != priv->child_count) {
+ /* We need to query the good bricks and/or thin-arbiter.*/
+ if (success_replies[0]) {
+ local->read_txn_query_child = AFR_CHILD_ZERO;
+ } else if (success_replies[1]) {
+ local->read_txn_query_child = AFR_CHILD_ONE;
}
+ error = EINVAL;
+ goto refresh_done;
+ }
+
+ if (!afr_has_quorum(success_replies, this, frame)) {
+ error = afr_final_errno(frame->local, this->private);
+ if (!error)
+ error = afr_quorum_errno(priv);
+ goto refresh_done;
+ }
+
+ ret = afr_replies_interpret(frame, this, local->refreshinode, &start_heal);
+
+ if (ret && afr_selfheal_enabled(this) && start_heal) {
+ heal_frame = afr_frame_create(this, NULL);
+ if (!heal_frame)
+ goto refresh_done;
+ heal_local = heal_frame->local;
+ heal_local->refreshinode = inode_ref(local->refreshinode);
+ heal_local->heal_frame = heal_frame;
+ if (!afr_throttled_selfheal(heal_frame, this)) {
+ AFR_STACK_DESTROY(heal_frame);
+ goto refresh_done;
+ }
+ }
+
+refresh_done:
+ afr_txn_refresh_done(frame, this, error);
- if (SIZE_DIFFERS (buf, lookup_buf)
- && IA_ISREG (buf->ia_type)) {
- gf_log (this->name, GF_LOG_INFO,
- "size differs for %s ", local->loc.path);
- local->self_heal.need_data_self_heal = _gf_true;
+ return 0;
+}
+
+void
+afr_inode_refresh_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *buf,
+ dict_t *xdata, struct iatt *par)
+{
+ afr_local_t *local = NULL;
+ int call_child = (long)cookie;
+ int8_t need_heal = 1;
+ int call_count = 0;
+ int ret = 0;
+
+ local = frame->local;
+ local->replies[call_child].valid = 1;
+ local->replies[call_child].op_ret = op_ret;
+ local->replies[call_child].op_errno = op_errno;
+ if (op_ret != -1) {
+ local->replies[call_child].poststat = *buf;
+ if (par)
+ local->replies[call_child].postparent = *par;
+ if (xdata)
+ local->replies[call_child].xdata = dict_ref(xdata);
+ }
+
+ if (xdata) {
+ ret = dict_get_int8(xdata, "link-count", &need_heal);
+ if (ret) {
+ gf_msg_debug(this->name, -ret, "Unable to get link count");
+ }
+ }
+
+ local->replies[call_child].need_heal = need_heal;
+ call_count = afr_frame_return(frame);
+ if (call_count == 0) {
+ afr_set_need_heal(this, local);
+ ret = afr_inode_refresh_err(frame, this);
+ if (ret) {
+ gf_msg_debug(this->name, ret, "afr_inode_refresh_err failed");
}
+ afr_inode_refresh_done(frame, this, ret);
+ }
+}
+int
+afr_inode_refresh_subvol_with_lookup_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret,
+ int op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *par)
+{
+ afr_inode_refresh_subvol_cbk(frame, cookie, this, op_ret, op_errno, buf,
+ xdata, par);
+ return 0;
}
+int
+afr_inode_refresh_subvol_with_lookup(call_frame_t *frame, xlator_t *this, int i,
+ inode_t *inode, uuid_t gfid, dict_t *xdata)
+{
+ loc_t loc = {
+ 0,
+ };
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ loc.inode = inode;
+ if (gf_uuid_is_null(inode->gfid) && gfid) {
+ /* To handle setattr/setxattr on yet to be linked inode from
+ * dht */
+ gf_uuid_copy(loc.gfid, gfid);
+ } else {
+ gf_uuid_copy(loc.gfid, inode->gfid);
+ }
+
+ STACK_WIND_COOKIE(frame, afr_inode_refresh_subvol_with_lookup_cbk,
+ (void *)(long)i, priv->children[i],
+ priv->children[i]->fops->lookup, &loc, xdata);
+ return 0;
+}
-static void
-afr_lookup_done (call_frame_t *frame, xlator_t *this, struct iatt *lookup_buf)
+int
+afr_inode_refresh_subvol_with_fstat_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
- int unwind = 1;
- int source = -1;
- int up_count = 0;
- char sh_type_str[256] = {0,};
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
+ afr_inode_refresh_subvol_cbk(frame, cookie, this, op_ret, op_errno, buf,
+ xdata, NULL);
+ return 0;
+}
- priv = this->private;
- local = frame->local;
+int
+afr_inode_refresh_subvol_with_fstat(call_frame_t *frame, xlator_t *this, int i,
+ dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
- local->cont.lookup.postparent.ia_ino = local->cont.lookup.parent_ino;
+ priv = this->private;
+ local = frame->local;
- if (local->cont.lookup.ino) {
- local->cont.lookup.buf.ia_ino = local->cont.lookup.ino;
- }
+ STACK_WIND_COOKIE(frame, afr_inode_refresh_subvol_with_fstat_cbk,
+ (void *)(long)i, priv->children[i],
+ priv->children[i]->fops->fstat, local->fd, xdata);
+ return 0;
+}
- if (local->op_ret == 0) {
- /* KLUDGE: assuming DHT will not itransform in
- revalidate */
- if (local->cont.lookup.inode->ino) {
- local->cont.lookup.buf.ia_ino =
- local->cont.lookup.inode->ino;
- }
+int
+afr_inode_refresh_do(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int i = 0;
+ int ret = 0;
+ dict_t *xdata = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ unsigned char *wind_subvols = NULL;
+
+ priv = this->private;
+ local = frame->local;
+ wind_subvols = alloca0(priv->child_count);
+
+ afr_local_replies_wipe(local, priv);
+
+ if (local->fd) {
+ fd_ctx = afr_fd_ctx_get(local->fd, this);
+ if (!fd_ctx) {
+ afr_inode_refresh_done(frame, this, EINVAL);
+ return 0;
}
- up_count = afr_up_children_count (priv->child_count, priv->child_up);
- if (up_count == 1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Only 1 child up - do not attempt to detect self heal");
+ }
- goto unwind;
- }
+ xdata = dict_new();
+ if (!xdata) {
+ afr_inode_refresh_done(frame, this, ENOMEM);
+ return 0;
+ }
- if (local->success_count && local->enoent_count) {
- local->self_heal.need_metadata_self_heal = _gf_true;
- local->self_heal.need_data_self_heal = _gf_true;
- local->self_heal.need_entry_self_heal = _gf_true;
- gf_log(this->name, GF_LOG_INFO,
- "entries are missing in lookup of %s.",
- local->loc.path);
- }
+ ret = afr_xattr_req_prepare(this, xdata);
+ if (ret != 0) {
+ dict_unref(xdata);
+ afr_inode_refresh_done(frame, this, -ret);
+ return 0;
+ }
- if (local->success_count) {
- /* check for split-brain case in previous lookup */
- if (afr_is_split_brain (this, local->cont.lookup.inode)) {
- local->self_heal.need_data_self_heal = _gf_true;
- gf_log(this->name, GF_LOG_WARNING,
- "split brain detected during lookup of %s.",
- local->loc.path);
- }
+ ret = dict_set_sizen_str_sizen(xdata, "link-count", GF_XATTROP_INDEX_COUNT);
+ if (ret) {
+ gf_msg_debug(this->name, -ret, "Unable to set link-count in dict ");
+ }
+
+ ret = dict_set_str_sizen(xdata, GLUSTERFS_INODELK_DOM_COUNT, this->name);
+ if (ret) {
+ gf_msg_debug(this->name, -ret,
+ "Unable to set inodelk-dom-count in dict ");
+ }
+
+ if (local->fd) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i] && fd_ctx->opened_on[i] == AFR_FD_OPENED)
+ wind_subvols[i] = 1;
}
+ } else {
+ memcpy(wind_subvols, local->child_up,
+ sizeof(*local->child_up) * priv->child_count);
+ }
+
+ local->call_count = AFR_COUNT(wind_subvols, priv->child_count);
+
+ call_count = local->call_count;
+ if (!call_count) {
+ dict_unref(xdata);
+ if (local->fd && AFR_COUNT(local->child_up, priv->child_count))
+ afr_inode_refresh_done(frame, this, EBADFD);
+ else
+ afr_inode_refresh_done(frame, this, ENOTCONN);
+ return 0;
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if (!wind_subvols[i])
+ continue;
- if ((local->self_heal.need_metadata_self_heal
- || local->self_heal.need_data_self_heal
- || local->self_heal.need_entry_self_heal)
- && ((!local->cont.lookup.is_revalidate)
- || (local->op_ret != -1))) {
+ if (local->fd)
+ afr_inode_refresh_subvol_with_fstat(frame, this, i, xdata);
+ else
+ afr_inode_refresh_subvol_with_lookup(
+ frame, this, i, local->refreshinode, local->refreshgfid, xdata);
- if (local->inodelk_count || local->entrylk_count) {
+ if (!--call_count)
+ break;
+ }
- /* Someone else is doing self-heal on this file.
- So just make a best effort to set the read-subvolume
- and return */
+ dict_unref(xdata);
- if (IA_ISREG (local->cont.lookup.inode->ia_type)) {
- source = afr_self_heal_get_source (this, local, local->cont.lookup.xattrs);
+ return 0;
+}
- if (source >= 0) {
- afr_set_read_child (this,
- local->cont.lookup.inode,
- source);
- }
- }
- goto unwind;
- }
+int
+afr_inode_refresh(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ uuid_t gfid, afr_inode_refresh_cbk_t refreshfn)
+{
+ afr_local_t *local = NULL;
- if (!local->cont.lookup.inode->ia_type) {
- /* fix for RT #602 */
- local->cont.lookup.inode->ia_type =
- lookup_buf->ia_type;
- }
+ local = frame->local;
- local->self_heal.background = _gf_true;
- local->self_heal.type = local->cont.lookup.buf.ia_type;
- local->self_heal.unwind = afr_self_heal_lookup_unwind;
+ local->refreshfn = refreshfn;
- unwind = 0;
+ if (local->refreshinode) {
+ inode_unref(local->refreshinode);
+ local->refreshinode = NULL;
+ }
- afr_self_heal_type_str_get(&local->self_heal,
- sh_type_str,
- sizeof(sh_type_str));
+ local->refreshinode = inode_ref(inode);
- gf_log (this->name, GF_LOG_INFO,
- "background %s self-heal triggered. path: %s",
- sh_type_str, local->loc.path);
+ if (gfid)
+ gf_uuid_copy(local->refreshgfid, gfid);
+ else
+ gf_uuid_clear(local->refreshgfid);
- afr_self_heal (frame, this);
- }
+ afr_inode_refresh_do(frame, this);
-unwind:
- if (unwind) {
- AFR_STACK_UNWIND (lookup, frame, local->op_ret,
- local->op_errno,
- local->cont.lookup.inode,
- &local->cont.lookup.buf,
- local->cont.lookup.xattr,
- &local->cont.lookup.postparent);
- }
+ return 0;
}
+int
+afr_xattr_req_prepare(xlator_t *this, dict_t *xattr_req)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ int ret = 0;
-/*
- * During a lookup, some errors are more "important" than
- * others in that they must be given higher priority while
- * returning to the user.
- *
- * The hierarchy is ESTALE > ENOENT > others
- *
- */
+ priv = this->private;
-static gf_boolean_t
-__error_more_important (int32_t old_errno, int32_t new_errno)
+ for (i = 0; i < priv->child_count; i++) {
+ ret = dict_set_uint64(xattr_req, priv->pending_key[i],
+ AFR_NUM_CHANGE_LOGS * sizeof(int));
+ if (ret < 0)
+ gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Unable to set dict value for %s", priv->pending_key[i]);
+ /* 3 = data+metadata+entry */
+ }
+ ret = dict_set_uint64(xattr_req, AFR_DIRTY,
+ AFR_NUM_CHANGE_LOGS * sizeof(int));
+ if (ret) {
+ gf_msg_debug(this->name, -ret,
+ "failed to set dirty "
+ "query flag");
+ }
+
+ ret = dict_set_int32_sizen(xattr_req, "list-xattr", 1);
+ if (ret) {
+ gf_msg_debug(this->name, -ret, "Unable to set list-xattr in dict ");
+ }
+
+ return ret;
+}
+
+int
+afr_lookup_xattr_req_prepare(afr_local_t *local, xlator_t *this,
+ dict_t *xattr_req, loc_t *loc)
{
- gf_boolean_t ret = _gf_true;
+ int ret = -ENOMEM;
+
+ if (!local->xattr_req)
+ local->xattr_req = dict_new();
+
+ if (!local->xattr_req)
+ goto out;
+
+ if (xattr_req && (xattr_req != local->xattr_req))
+ dict_copy(xattr_req, local->xattr_req);
+
+ ret = afr_xattr_req_prepare(this, local->xattr_req);
+
+ ret = dict_set_uint64(local->xattr_req, GLUSTERFS_INODELK_COUNT, 0);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_DICT_SET_FAILED,
+ "%s: Unable to set dict value for %s", loc->path,
+ GLUSTERFS_INODELK_COUNT);
+ }
+ ret = dict_set_uint64(local->xattr_req, GLUSTERFS_ENTRYLK_COUNT, 0);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_DICT_SET_FAILED,
+ "%s: Unable to set dict value for %s", loc->path,
+ GLUSTERFS_ENTRYLK_COUNT);
+ }
+
+ ret = dict_set_uint32(local->xattr_req, GLUSTERFS_PARENT_ENTRYLK, 0);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_DICT_SET_FAILED,
+ "%s: Unable to set dict value for %s", loc->path,
+ GLUSTERFS_PARENT_ENTRYLK);
+ }
+
+ ret = dict_set_sizen_str_sizen(local->xattr_req, "link-count",
+ GF_XATTROP_INDEX_COUNT);
+ if (ret) {
+ gf_msg_debug(this->name, -ret, "Unable to set link-count in dict ");
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
- /* Nothing should ever overwrite ESTALE */
- if (old_errno == ESTALE)
- ret = _gf_false;
+int
+afr_least_pending_reads_child(afr_private_t *priv, unsigned char *readable)
+{
+ int i = 0;
+ int child = -1;
+ int64_t read_iter = -1;
+ int64_t pending_read = -1;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (AFR_IS_ARBITER_BRICK(priv, i) || !readable[i])
+ continue;
+ read_iter = GF_ATOMIC_GET(priv->pending_reads[i]);
+ if (child == -1 || read_iter < pending_read) {
+ pending_read = read_iter;
+ child = i;
+ }
+ }
- /* Nothing should overwrite ENOENT, except ESTALE */
- else if ((old_errno == ENOENT) && (new_errno != ESTALE))
- ret = _gf_false;
+ return child;
+}
- return ret;
+static int32_t
+afr_least_latency_child(afr_private_t *priv, unsigned char *readable)
+{
+ int32_t i = 0;
+ int child = -1;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (AFR_IS_ARBITER_BRICK(priv, i) || !readable[i] ||
+ priv->child_latency[i] < 0)
+ continue;
+
+ if (child == -1 ||
+ priv->child_latency[i] < priv->child_latency[child]) {
+ child = i;
+ }
+ }
+ return child;
}
+static int32_t
+afr_least_latency_times_pending_reads_child(afr_private_t *priv,
+ unsigned char *readable)
+{
+ int32_t i = 0;
+ int child = -1;
+ int64_t pending_read = 0;
+ int64_t latency = -1;
+ int64_t least_latency = -1;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (AFR_IS_ARBITER_BRICK(priv, i) || !readable[i] ||
+ priv->child_latency[i] < 0)
+ continue;
+
+ pending_read = GF_ATOMIC_GET(priv->pending_reads[i]);
+ latency = (pending_read + 1) * priv->child_latency[i];
+
+ if (child == -1 || latency < least_latency) {
+ least_latency = latency;
+ child = i;
+ }
+ }
+ return child;
+}
int
-afr_fresh_lookup_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf, dict_t *xattr,
- struct iatt *postparent)
+afr_hash_child(afr_read_subvol_args_t *args, afr_private_t *priv,
+ unsigned char *readable)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- struct iatt * lookup_buf = NULL;
- int call_count = -1;
- int child_index = -1;
- int first_up_child = -1;
-
- child_index = (long) cookie;
- priv = this->private;
+ uuid_t gfid_copy = {
+ 0,
+ };
+ pid_t pid;
+ int child = -1;
+
+ switch (priv->hash_mode) {
+ case AFR_READ_POLICY_FIRST_UP:
+ break;
+ case AFR_READ_POLICY_GFID_HASH:
+ gf_uuid_copy(gfid_copy, args->gfid);
+ child = SuperFastHash((char *)gfid_copy, sizeof(gfid_copy)) %
+ priv->child_count;
+ break;
+ case AFR_READ_POLICY_GFID_PID_HASH:
+ if (args->ia_type != IA_IFDIR) {
+ /*
+ * Why getpid? Because it's one of the cheapest calls
+ * available - faster than gethostname etc. - and
+ * returns a constant-length value that's sure to be
+ * shorter than a UUID. It's still very unlikely to be
+ * the same across clients, so it still provides good
+ * mixing. We're not trying for perfection here. All we
+ * need is a low probability that multiple clients
+ * won't converge on the same subvolume.
+ */
+ gf_uuid_copy(gfid_copy, args->gfid);
+ pid = getpid();
+ *(pid_t *)gfid_copy ^= pid;
+ }
+ child = SuperFastHash((char *)gfid_copy, sizeof(gfid_copy)) %
+ priv->child_count;
+ break;
+ case AFR_READ_POLICY_LESS_LOAD:
+ child = afr_least_pending_reads_child(priv, readable);
+ break;
+ case AFR_READ_POLICY_LEAST_LATENCY:
+ child = afr_least_latency_child(priv, readable);
+ break;
+ case AFR_READ_POLICY_LOAD_LATENCY_HYBRID:
+ child = afr_least_latency_times_pending_reads_child(priv, readable);
+ break;
+ }
+
+ return child;
+}
- LOCK (&frame->lock);
- {
- local = frame->local;
+int
+afr_read_subvol_select_by_policy(inode_t *inode, xlator_t *this,
+ unsigned char *readable,
+ afr_read_subvol_args_t *args)
+{
+ int i = 0;
+ int read_subvol = -1;
+ afr_private_t *priv = NULL;
+ afr_read_subvol_args_t local_args = {
+ 0,
+ };
+
+ priv = this->private;
+
+ /* first preference - explicitly specified or local subvolume */
+ if (priv->read_child >= 0 && readable[priv->read_child])
+ return priv->read_child;
+
+ if (inode_is_linked(inode)) {
+ gf_uuid_copy(local_args.gfid, inode->gfid);
+ local_args.ia_type = inode->ia_type;
+ } else if (args) {
+ local_args = *args;
+ }
+
+ /* second preference - use hashed mode */
+ read_subvol = afr_hash_child(&local_args, priv, readable);
+ if (read_subvol >= 0 && readable[read_subvol])
+ return read_subvol;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (readable[i])
+ return i;
+ }
+
+ /* no readable subvolumes, either split brain or all subvols down */
+
+ return -1;
+}
- lookup_buf = &local->cont.lookup.buf;
+int
+afr_inode_read_subvol_type_get(inode_t *inode, xlator_t *this,
+ unsigned char *readable, int *event_p, int type)
+{
+ int ret = -1;
- if (op_ret == -1) {
- if (op_errno == ENOENT)
- local->enoent_count++;
+ if (type == AFR_METADATA_TRANSACTION)
+ ret = afr_inode_read_subvol_get(inode, this, 0, readable, event_p);
+ else
+ ret = afr_inode_read_subvol_get(inode, this, readable, 0, event_p);
+ return ret;
+}
- if (__error_more_important (local->op_errno, op_errno))
- local->op_errno = op_errno;
+void
+afr_readables_intersect_get(inode_t *inode, xlator_t *this, int *event,
+ unsigned char *intersection)
+{
+ afr_private_t *priv = NULL;
+ unsigned char *data_readable = NULL;
+ unsigned char *metadata_readable = NULL;
+ unsigned char *intersect = NULL;
+
+ priv = this->private;
+ data_readable = alloca0(priv->child_count);
+ metadata_readable = alloca0(priv->child_count);
+ intersect = alloca0(priv->child_count);
+
+ afr_inode_read_subvol_get(inode, this, data_readable, metadata_readable,
+ event);
+
+ AFR_INTERSECT(intersect, data_readable, metadata_readable,
+ priv->child_count);
+ if (intersection)
+ memcpy(intersection, intersect,
+ sizeof(*intersection) * priv->child_count);
+}
- if (local->op_errno == ESTALE) {
- local->op_ret = -1;
- }
+int
+afr_read_subvol_get(inode_t *inode, xlator_t *this, int *subvol_p,
+ unsigned char *readables, int *event_p,
+ afr_transaction_type type, afr_read_subvol_args_t *args)
+{
+ afr_private_t *priv = NULL;
+ unsigned char *readable = NULL;
+ unsigned char *intersection = NULL;
+ int subvol = -1;
+ int event = 0;
+
+ priv = this->private;
+
+ readable = alloca0(priv->child_count);
+ intersection = alloca0(priv->child_count);
+
+ afr_inode_read_subvol_type_get(inode, this, readable, &event, type);
+
+ afr_readables_intersect_get(inode, this, &event, intersection);
+
+ if (AFR_COUNT(intersection, priv->child_count) > 0)
+ subvol = afr_read_subvol_select_by_policy(inode, this, intersection,
+ args);
+ else
+ subvol = afr_read_subvol_select_by_policy(inode, this, readable, args);
+ if (subvol_p)
+ *subvol_p = subvol;
+ if (event_p)
+ *event_p = event;
+ if (readables)
+ memcpy(readables, readable, sizeof(*readables) * priv->child_count);
+ return subvol;
+}
- goto unlock;
- }
+void
+afr_local_transaction_cleanup(afr_local_t *local, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
- afr_lookup_collect_xattr (local, this, child_index, xattr);
+ priv = this->private;
- first_up_child = afr_first_up_child (priv);
+ afr_matrix_cleanup(local->pending, priv->child_count);
- if (child_index == first_up_child) {
- local->cont.lookup.ino =
- afr_itransform (buf->ia_ino,
- priv->child_count,
- first_up_child);
- }
+ GF_FREE(local->internal_lock.lower_locked_nodes);
- if (local->success_count == 0) {
- if (local->op_errno != ESTALE)
- local->op_ret = op_ret;
-
- local->cont.lookup.inode = inode_ref (inode);
- local->cont.lookup.xattr = dict_ref (xattr);
- local->cont.lookup.xattrs[child_index] = dict_ref (xattr);
- local->cont.lookup.postparent = *postparent;
-
- if (priv->first_lookup && inode->ino == 1) {
- gf_log (this->name, GF_LOG_INFO,
- "added root inode");
- priv->root_inode = inode_ref (inode);
- priv->first_lookup = 0;
- }
-
- *lookup_buf = *buf;
-
- lookup_buf->ia_ino = afr_itransform (buf->ia_ino,
- priv->child_count,
- child_index);
- if (priv->read_child >= 0) {
- afr_set_read_child (this,
- local->cont.lookup.inode,
- priv->read_child);
- } else {
- afr_set_read_child (this,
- local->cont.lookup.inode,
- child_index);
- }
-
- } else {
- afr_lookup_self_heal_check (this, local, buf, lookup_buf);
-
- if (child_index == local->read_child_index) {
- /*
- lookup has succeeded on the read child.
- So use its inode number
- */
- if (local->cont.lookup.xattr)
- dict_unref (local->cont.lookup.xattr);
-
- local->cont.lookup.xattr = dict_ref (xattr);
- local->cont.lookup.xattrs[child_index] = dict_ref (xattr);
- local->cont.lookup.postparent = *postparent;
-
- *lookup_buf = *buf;
- }
+ afr_lockees_cleanup(&local->internal_lock);
- }
+ GF_FREE(local->transaction.pre_op);
- local->success_count++;
+ GF_FREE(local->transaction.pre_op_sources);
+ if (local->transaction.changelog_xdata) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->transaction.changelog_xdata[i])
+ continue;
+ dict_unref(local->transaction.changelog_xdata[i]);
}
-unlock:
- UNLOCK (&frame->lock);
+ GF_FREE(local->transaction.changelog_xdata);
+ }
- call_count = afr_frame_return (frame);
+ GF_FREE(local->transaction.failed_subvols);
- if (call_count == 0) {
- afr_lookup_done (frame, this, lookup_buf);
- }
+ GF_FREE(local->transaction.basename);
+ GF_FREE(local->transaction.new_basename);
- return 0;
+ loc_wipe(&local->transaction.parent_loc);
+ loc_wipe(&local->transaction.new_parent_loc);
}
+void
+afr_reply_wipe(struct afr_reply *reply)
+{
+ if (reply->xdata) {
+ dict_unref(reply->xdata);
+ reply->xdata = NULL;
+ }
+
+ if (reply->xattr) {
+ dict_unref(reply->xattr);
+ reply->xattr = NULL;
+ }
+}
-int
-afr_revalidate_lookup_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf, dict_t *xattr,
- struct iatt *postparent)
+void
+afr_replies_wipe(struct afr_reply *replies, int count)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- struct iatt * lookup_buf = NULL;
- int call_count = -1;
- int child_index = -1;
- int first_up_child = -1;
+ int i = 0;
- child_index = (long) cookie;
- priv = this->private;
+ for (i = 0; i < count; i++) {
+ afr_reply_wipe(&replies[i]);
+ }
+}
- LOCK (&frame->lock);
- {
- local = frame->local;
+void
+afr_local_replies_wipe(afr_local_t *local, afr_private_t *priv)
+{
+ if (!local->replies)
+ return;
- lookup_buf = &local->cont.lookup.buf;
+ afr_replies_wipe(local->replies, priv->child_count);
- if (op_ret == -1) {
- if (op_errno == ENOENT)
- local->enoent_count++;
+ memset(local->replies, 0, sizeof(*local->replies) * priv->child_count);
+}
- if (__error_more_important (local->op_errno, op_errno))
- local->op_errno = op_errno;
+static gf_boolean_t
+afr_fop_lock_is_unlock(call_frame_t *frame)
+{
+ afr_local_t *local = frame->local;
+ switch (local->op) {
+ case GF_FOP_INODELK:
+ case GF_FOP_FINODELK:
+ if ((F_UNLCK == local->cont.inodelk.in_flock.l_type) &&
+ (local->cont.inodelk.in_cmd == F_SETLKW ||
+ local->cont.inodelk.in_cmd == F_SETLK))
+ return _gf_true;
+ break;
+ case GF_FOP_ENTRYLK:
+ case GF_FOP_FENTRYLK:
+ if (ENTRYLK_UNLOCK == local->cont.entrylk.in_cmd)
+ return _gf_true;
+ break;
+ default:
+ return _gf_false;
+ }
+ return _gf_false;
+}
- if (local->op_errno == ESTALE) {
- local->op_ret = -1;
- }
+static gf_boolean_t
+afr_lk_is_unlock(int32_t cmd, struct gf_flock *flock)
+{
+ switch (cmd) {
+ case F_RESLK_UNLCK:
+ return _gf_true;
+ break;
- goto unlock;
- }
+#if F_SETLKW != F_SETLKW64
+ case F_SETLKW64:
+#endif
+ case F_SETLKW:
- afr_lookup_collect_xattr (local, this, child_index, xattr);
+#if F_SETLK != F_SETLK64
+ case F_SETLK64:
+#endif
+ case F_SETLK:
+ if (F_UNLCK == flock->l_type)
+ return _gf_true;
+ break;
+ default:
+ return _gf_false;
+ }
+ return _gf_false;
+}
- first_up_child = afr_first_up_child (priv);
+void
+afr_handle_inconsistent_fop(call_frame_t *frame, int32_t *op_ret,
+ int32_t *op_errno)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
- if (child_index == first_up_child) {
- local->cont.lookup.ino =
- afr_itransform (buf->ia_ino,
- priv->child_count,
- first_up_child);
- }
+ if (!frame || !frame->this || !frame->local || !frame->this->private)
+ return;
- /* in case of revalidate, we need to send stat of the
- * child whose stat was sent during the first lookup.
- * (so that time stamp does not vary with revalidate.
- * in case it is down, stat of the fist success will
- * be replied */
+ if (*op_ret < 0)
+ return;
- /* inode number should be preserved across revalidates */
+ /* Failing inodelk/entrylk/lk here is not a good idea because we
+ * need to cleanup the locks on the other bricks if we choose to fail
+ * the fop here. The brick may go down just after unwind happens as well
+ * so anyways the fop will fail when the next fop is sent so leaving
+ * it like this for now.*/
+ local = frame->local;
+ switch (local->op) {
+ case GF_FOP_LOOKUP:
+ case GF_FOP_INODELK:
+ case GF_FOP_FINODELK:
+ case GF_FOP_ENTRYLK:
+ case GF_FOP_FENTRYLK:
+ case GF_FOP_LK:
+ return;
+ default:
+ break;
+ }
- if (local->success_count == 0) {
- if (local->op_errno != ESTALE)
- local->op_ret = op_ret;
+ priv = frame->this->private;
+ if (!priv->consistent_io)
+ return;
- local->cont.lookup.inode = inode_ref (inode);
- local->cont.lookup.xattr = dict_ref (xattr);
- local->cont.lookup.xattrs[child_index] = dict_ref (xattr);
- local->cont.lookup.postparent = *postparent;
+ if (local->event_generation &&
+ (local->event_generation != priv->event_generation))
+ goto inconsistent;
- *lookup_buf = *buf;
+ return;
+inconsistent:
+ *op_ret = -1;
+ *op_errno = ENOTCONN;
+}
- lookup_buf->ia_ino = afr_itransform (buf->ia_ino,
- priv->child_count,
- child_index);
+void
+afr_local_cleanup(afr_local_t *local, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
- if (priv->read_child >= 0) {
- afr_set_read_child (this,
- local->cont.lookup.inode,
- priv->read_child);
- } else {
- afr_set_read_child (this,
- local->cont.lookup.inode,
- child_index);
- }
+ if (!local)
+ return;
- } else {
- afr_lookup_self_heal_check (this, local, buf, lookup_buf);
+ syncbarrier_destroy(&local->barrier);
- if (child_index == local->read_child_index) {
+ afr_local_transaction_cleanup(local, this);
- /*
- lookup has succeeded on the read child.
- So use its inode number
- */
+ priv = this->private;
- if (local->cont.lookup.xattr)
- dict_unref (local->cont.lookup.xattr);
+ loc_wipe(&local->loc);
+ loc_wipe(&local->newloc);
- local->cont.lookup.xattr = dict_ref (xattr);
- local->cont.lookup.xattrs[child_index] = dict_ref (xattr);
- local->cont.lookup.postparent = *postparent;
+ if (local->fd)
+ fd_unref(local->fd);
- *lookup_buf = *buf;
- }
+ if (local->xattr_req)
+ dict_unref(local->xattr_req);
- }
+ if (local->xattr_rsp)
+ dict_unref(local->xattr_rsp);
- local->success_count++;
- }
-unlock:
- UNLOCK (&frame->lock);
+ if (local->dict)
+ dict_unref(local->dict);
- call_count = afr_frame_return (frame);
+ afr_local_replies_wipe(local, priv);
+ GF_FREE(local->replies);
- if (call_count == 0) {
- afr_lookup_done (frame, this, lookup_buf);
- }
+ GF_FREE(local->child_up);
- return 0;
-}
+ GF_FREE(local->read_attempted);
+
+ GF_FREE(local->readable);
+ GF_FREE(local->readable2);
+
+ if (local->inode)
+ inode_unref(local->inode);
+ if (local->parent)
+ inode_unref(local->parent);
+
+ if (local->parent2)
+ inode_unref(local->parent2);
+
+ if (local->refreshinode)
+ inode_unref(local->refreshinode);
+
+ { /* getxattr */
+ GF_FREE(local->cont.getxattr.name);
+ }
+
+ { /* lk */
+ GF_FREE(local->cont.lk.locked_nodes);
+ GF_FREE(local->cont.lk.dom_locked_nodes);
+ GF_FREE(local->cont.lk.dom_lock_op_ret);
+ GF_FREE(local->cont.lk.dom_lock_op_errno);
+ }
+
+ { /* create */
+ if (local->cont.create.fd)
+ fd_unref(local->cont.create.fd);
+ if (local->cont.create.params)
+ dict_unref(local->cont.create.params);
+ }
+
+ { /* mknod */
+ if (local->cont.mknod.params)
+ dict_unref(local->cont.mknod.params);
+ }
+
+ { /* mkdir */
+ if (local->cont.mkdir.params)
+ dict_unref(local->cont.mkdir.params);
+ }
+
+ { /* symlink */
+ if (local->cont.symlink.params)
+ dict_unref(local->cont.symlink.params);
+ }
+
+ { /* writev */
+ GF_FREE(local->cont.writev.vector);
+ if (local->cont.writev.iobref)
+ iobref_unref(local->cont.writev.iobref);
+ }
+
+ { /* setxattr */
+ if (local->cont.setxattr.dict)
+ dict_unref(local->cont.setxattr.dict);
+ }
+
+ { /* fsetxattr */
+ if (local->cont.fsetxattr.dict)
+ dict_unref(local->cont.fsetxattr.dict);
+ }
+
+ { /* removexattr */
+ GF_FREE(local->cont.removexattr.name);
+ }
+ { /* xattrop */
+ if (local->cont.xattrop.xattr)
+ dict_unref(local->cont.xattrop.xattr);
+ }
+ { /* symlink */
+ GF_FREE(local->cont.symlink.linkpath);
+ }
+
+ { /* opendir */
+ GF_FREE(local->cont.opendir.checksum);
+ }
+
+ { /* open */
+ if (local->cont.open.fd)
+ fd_unref(local->cont.open.fd);
+ }
+
+ { /* readdirp */
+ if (local->cont.readdir.dict)
+ dict_unref(local->cont.readdir.dict);
+ }
+
+ { /* inodelk */
+ GF_FREE(local->cont.inodelk.volume);
+ if (local->cont.inodelk.xdata)
+ dict_unref(local->cont.inodelk.xdata);
+ }
+
+ { /* entrylk */
+ GF_FREE(local->cont.entrylk.volume);
+ GF_FREE(local->cont.entrylk.basename);
+ if (local->cont.entrylk.xdata)
+ dict_unref(local->cont.entrylk.xdata);
+ }
+
+ if (local->xdata_req)
+ dict_unref(local->xdata_req);
+
+ if (local->xdata_rsp)
+ dict_unref(local->xdata_rsp);
+}
int
-afr_lookup (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr_req)
+afr_frame_return(call_frame_t *frame)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int ret = -1;
- int i = 0;
- fop_lookup_cbk_t callback = NULL;
- int call_count = 0;
- uint64_t ctx = 0;
- int32_t op_errno = 0;
+ afr_local_t *local = NULL;
+ int call_count = 0;
- priv = this->private;
+ local = frame->local;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ LOCK(&frame->lock);
+ {
+ call_count = --local->call_count;
+ }
+ UNLOCK(&frame->lock);
- local->op_ret = -1;
+ return call_count;
+}
- frame->local = local;
+static char *afr_ignore_xattrs[] = {GF_SELINUX_XATTR_KEY, QUOTA_SIZE_KEY, NULL};
- if (!strcmp (loc->path, "/" GF_REPLICATE_TRASH_DIR)) {
- op_errno = ENOENT;
- goto out;
- }
+gf_boolean_t
+afr_is_xattr_ignorable(char *key)
+{
+ int i = 0;
+
+ if (!strncmp(key, AFR_XATTR_PREFIX, SLEN(AFR_XATTR_PREFIX)))
+ return _gf_true;
+ for (i = 0; afr_ignore_xattrs[i]; i++) {
+ if (!strcmp(key, afr_ignore_xattrs[i]))
+ return _gf_true;
+ }
+ return _gf_false;
+}
- loc_copy (&local->loc, loc);
+static gf_boolean_t
+afr_xattr_match_needed(dict_t *this, char *key1, data_t *value1, void *data)
+{
+ /* Ignore all non-disk (i.e. virtual) xattrs right away. */
+ if (!gf_is_valid_xattr_namespace(key1))
+ return _gf_false;
- ret = inode_ctx_get (loc->inode, this, &ctx);
- if (ret == 0) {
- /* lookup is a revalidate */
+ /* Ignore on-disk xattrs that AFR doesn't need to heal. */
+ if (!afr_is_xattr_ignorable(key1))
+ return _gf_true;
- callback = afr_revalidate_lookup_cbk;
+ return _gf_false;
+}
- local->cont.lookup.is_revalidate = _gf_true;
- local->read_child_index = afr_read_child (this,
- loc->inode);
- } else {
- callback = afr_fresh_lookup_cbk;
+gf_boolean_t
+afr_xattrs_are_equal(dict_t *dict1, dict_t *dict2)
+{
+ return are_dicts_equal(dict1, dict2, afr_xattr_match_needed, NULL);
+}
- LOCK (&priv->read_child_lock);
- {
- local->read_child_index = (++priv->read_child_rr)
- % (priv->child_count);
- }
- UNLOCK (&priv->read_child_lock);
- }
+static int
+afr_get_parent_read_subvol(xlator_t *this, inode_t *parent,
+ struct afr_reply *replies, unsigned char *readable)
+{
+ int i = 0;
+ int par_read_subvol = -1;
+ int par_read_subvol_iter = -1;
+ afr_private_t *priv = NULL;
- if (loc->parent)
- local->cont.lookup.parent_ino = loc->parent->ino;
+ priv = this->private;
- local->child_up = memdup (priv->child_up, priv->child_count);
+ if (parent)
+ par_read_subvol = afr_data_subvol_get(parent, this, NULL, NULL, NULL,
+ NULL);
- local->cont.lookup.xattrs = GF_CALLOC (priv->child_count,
- sizeof (*local->cont.lookup.xattr),
- gf_afr_mt_dict_t);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
- local->call_count = afr_up_children_count (priv->child_count,
- local->child_up);
- call_count = local->call_count;
+ if (replies[i].op_ret < 0)
+ continue;
- if (local->call_count == 0) {
- ret = -1;
- op_errno = ENOTCONN;
- goto out;
+ if (par_read_subvol_iter == -1) {
+ par_read_subvol_iter = i;
+ continue;
}
- /* By default assume ENOTCONN. On success it will be set to 0. */
- local->op_errno = ENOTCONN;
+ if ((par_read_subvol_iter != par_read_subvol) && readable[i])
+ par_read_subvol_iter = i;
+
+ if (i == par_read_subvol)
+ par_read_subvol_iter = i;
+ }
+ /* At the end of the for-loop, the only reason why @par_read_subvol_iter
+ * could be -1 is when this LOOKUP has failed on all sub-volumes.
+ * So it is okay to send an arbitrary subvolume (0 in this case)
+ * as parent read subvol.
+ */
+ if (par_read_subvol_iter == -1)
+ par_read_subvol_iter = 0;
+
+ return par_read_subvol_iter;
+}
- if (xattr_req == NULL)
- local->xattr_req = dict_new ();
- else
- local->xattr_req = dict_ref (xattr_req);
+int
+afr_read_subvol_decide(inode_t *inode, xlator_t *this,
+ afr_read_subvol_args_t *args, unsigned char *readable)
+{
+ int event = 0;
+ afr_private_t *priv = NULL;
+ unsigned char *intersection = NULL;
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_set_uint64 (local->xattr_req, priv->pending_key[i],
- 3 * sizeof(int32_t));
- if (ret < 0)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: Unable to set dict value for %s",
- loc->path, priv->pending_key[i]);
- /* 3 = data+metadata+entry */
- }
+ priv = this->private;
+ intersection = alloca0(priv->child_count);
- ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_INODELK_COUNT, 0);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: Unable to set dict value for %s",
- loc->path, GLUSTERFS_INODELK_COUNT);
- }
+ afr_readables_intersect_get(inode, this, &event, intersection);
- ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_ENTRYLK_COUNT, 0);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: Unable to set dict value for %s",
- loc->path, GLUSTERFS_ENTRYLK_COUNT);
- }
+ if (AFR_COUNT(intersection, priv->child_count) <= 0) {
+ /* TODO: If we have one brick with valid data_readable and
+ * another with metadata_readable, try to send an iatt with
+ * valid bits from both.*/
+ return -1;
+ }
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, callback, (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->lookup,
- loc, local->xattr_req);
- if (!--call_count)
- break;
- }
- }
+ memcpy(readable, intersection, sizeof(*readable) * priv->child_count);
- ret = 0;
-out:
- if (ret == -1)
- AFR_STACK_UNWIND (lookup, frame, -1, op_errno,
- NULL, NULL, NULL, NULL);
+ return afr_read_subvol_select_by_policy(inode, this, intersection, args);
+}
- return 0;
+static inline int
+afr_first_up_child(call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++)
+ if (local->replies[i].valid && local->replies[i].op_ret == 0)
+ return i;
+ return -1;
}
+static void
+afr_attempt_readsubvol_set(call_frame_t *frame, xlator_t *this,
+ unsigned char *success_replies,
+ unsigned char *data_readable, int *read_subvol)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int spb_subvol = -1;
+ int child_count = -1;
-/* {{{ open */
+ if (*read_subvol != -1)
+ return;
-int
-afr_fd_ctx_set (xlator_t *this, fd_t *fd)
+ priv = this->private;
+ local = frame->local;
+ child_count = priv->child_count;
+
+ afr_split_brain_read_subvol_get(local->inode, this, frame, &spb_subvol);
+ if ((spb_subvol >= 0) &&
+ (AFR_COUNT(success_replies, child_count) == child_count)) {
+ *read_subvol = spb_subvol;
+ } else if (!priv->quorum_count ||
+ frame->root->pid == GF_CLIENT_PID_GLFS_HEAL) {
+ *read_subvol = afr_first_up_child(frame, this);
+ } else if (priv->quorum_count &&
+ afr_has_quorum(data_readable, this, NULL)) {
+ /* read_subvol is guaranteed to be valid if we hit this path. */
+ *read_subvol = afr_first_up_child(frame, this);
+ } else {
+ /* If quorum is enabled and we do not have a
+ readable yet, it means all good copies are down.
+ */
+ local->op_ret = -1;
+ local->op_errno = ENOTCONN;
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_READ_SUBVOL_ERROR,
+ "no read "
+ "subvols for %s",
+ local->loc.path);
+ }
+ if (*read_subvol >= 0)
+ dict_del_sizen(local->replies[*read_subvol].xdata, GF_CONTENT_KEY);
+}
+
+static void
+afr_lookup_done(call_frame_t *frame, xlator_t *this)
{
- afr_private_t * priv = NULL;
- int ret = -1;
- uint64_t ctx = 0;
- afr_fd_ctx_t * fd_ctx = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = -1;
+ int op_errno = 0;
+ int read_subvol = 0;
+ int par_read_subvol = 0;
+ int ret = -1;
+ unsigned char *readable = NULL;
+ unsigned char *success_replies = NULL;
+ int event = 0;
+ struct afr_reply *replies = NULL;
+ uuid_t read_gfid = {
+ 0,
+ };
+ gf_boolean_t locked_entry = _gf_false;
+ gf_boolean_t in_flight_create = _gf_false;
+ gf_boolean_t can_interpret = _gf_true;
+ inode_t *parent = NULL;
+ ia_type_t ia_type = IA_INVAL;
+ afr_read_subvol_args_t args = {
+ 0,
+ };
+ char *gfid_heal_msg = NULL;
+
+ priv = this->private;
+ local = frame->local;
+ replies = local->replies;
+ parent = local->loc.parent;
+
+ locked_entry = afr_is_possibly_under_txn(AFR_ENTRY_TRANSACTION, local,
+ this);
+
+ readable = alloca0(priv->child_count);
+ success_replies = alloca0(priv->child_count);
+
+ afr_inode_read_subvol_get(parent, this, readable, NULL, &event);
+ par_read_subvol = afr_get_parent_read_subvol(this, parent, replies,
+ readable);
+
+ /* First, check if we have a gfid-change from somewhere,
+ If so, propagate that so that a fresh lookup can be
+ issued
+ */
+ if (local->cont.lookup.needs_fresh_lookup) {
+ local->op_ret = -1;
+ local->op_errno = ESTALE;
+ goto error;
+ }
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (fd, out);
+ op_errno = afr_final_errno(frame->local, this->private);
+ local->op_errno = op_errno;
- priv = this->private;
+ read_subvol = -1;
+ afr_fill_success_replies(local, priv, success_replies);
- LOCK (&fd->lock);
- {
- ret = __fd_ctx_get (fd, this, &ctx);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
- if (ret == 0)
- goto unlock;
+ if (replies[i].op_ret == -1) {
+ if (locked_entry && replies[i].op_errno == ENOENT) {
+ in_flight_create = _gf_true;
+ }
+ continue;
+ }
- fd_ctx = GF_CALLOC (1, sizeof (afr_fd_ctx_t),
- gf_afr_mt_afr_fd_ctx_t);
- if (!fd_ctx) {
- ret = -ENOMEM;
- goto unlock;
- }
+ if (read_subvol == -1 || !readable[read_subvol]) {
+ read_subvol = i;
+ gf_uuid_copy(read_gfid, replies[i].poststat.ia_gfid);
+ ia_type = replies[i].poststat.ia_type;
+ local->op_ret = 0;
+ }
+ }
- fd_ctx->pre_op_done = GF_CALLOC (sizeof (*fd_ctx->pre_op_done),
- priv->child_count,
- gf_afr_mt_char);
- if (!fd_ctx->pre_op_done) {
- ret = -ENOMEM;
- goto unlock;
- }
+ if (in_flight_create && !afr_has_quorum(success_replies, this, NULL)) {
+ local->op_ret = -1;
+ local->op_errno = ENOENT;
+ goto error;
+ }
+
+ if (read_subvol == -1)
+ goto error;
+ /* We now have a read_subvol, which is readable[] (if there
+ were any). Next we look for GFID mismatches. We don't
+ consider a GFID mismatch as an error if read_subvol is
+ readable[] but the mismatching GFID subvol is not.
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret == -1) {
+ continue;
+ }
- fd_ctx->pre_op_piggyback = GF_CALLOC (sizeof (*fd_ctx->pre_op_piggyback),
- priv->child_count,
- gf_afr_mt_char);
- if (!fd_ctx->pre_op_piggyback) {
- ret = -ENOMEM;
- goto unlock;
- }
+ if (!gf_uuid_compare(replies[i].poststat.ia_gfid, read_gfid))
+ continue;
- fd_ctx->opened_on = GF_CALLOC (sizeof (*fd_ctx->opened_on),
- priv->child_count,
- gf_afr_mt_char);
- if (!fd_ctx->opened_on) {
- ret = -ENOMEM;
- goto unlock;
- }
+ can_interpret = _gf_false;
- fd_ctx->up_count = priv->up_count;
- fd_ctx->down_count = priv->down_count;
+ if (locked_entry)
+ continue;
- fd_ctx->locked_on = GF_CALLOC (sizeof (*fd_ctx->locked_on),
- priv->child_count,
- gf_afr_mt_char);
- if (!fd_ctx->locked_on) {
- ret = -ENOMEM;
- goto unlock;
- }
+ /* Now GFIDs mismatch. It's OK as long as this subvol
+ is not readable[] but read_subvol is */
+ if (readable[read_subvol] && !readable[i])
+ continue;
- ret = __fd_ctx_set (fd, this, (uint64_t)(long) fd_ctx);
- if (ret)
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set fd ctx (%p)", fd);
+ /* If we were called from glfsheal and there is still a gfid
+ * mismatch, succeed the lookup and let glfsheal print the
+ * response via gfid-heal-msg.*/
+ if (!dict_get_str_sizen(local->xattr_req, "gfid-heal-msg",
+ &gfid_heal_msg))
+ goto cant_interpret;
- INIT_LIST_HEAD (&fd_ctx->entries);
+ /* LOG ERROR */
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto error;
+ }
+
+ /* Forth, for the finalized GFID, pick the best subvolume
+ to return stats from.
+ */
+ read_subvol = -1;
+ memset(readable, 0, sizeof(*readable) * priv->child_count);
+ if (can_interpret) {
+ if (!afr_has_quorum(success_replies, this, NULL))
+ goto cant_interpret;
+ /* It is safe to call afr_replies_interpret() because we have
+ a response from all the UP subvolumes and all of them resolved
+ to the same GFID
+ */
+ gf_uuid_copy(args.gfid, read_gfid);
+ args.ia_type = ia_type;
+ ret = afr_replies_interpret(frame, this, local->inode, NULL);
+ read_subvol = afr_read_subvol_decide(local->inode, this, &args,
+ readable);
+ if (read_subvol == -1)
+ goto cant_interpret;
+ if (ret) {
+ afr_inode_need_refresh_set(local->inode, this);
+ dict_del_sizen(local->replies[read_subvol].xdata, GF_CONTENT_KEY);
}
-unlock:
- UNLOCK (&fd->lock);
-out:
- return ret;
+ } else {
+ cant_interpret:
+ afr_attempt_readsubvol_set(frame, this, success_replies, readable,
+ &read_subvol);
+ if (read_subvol == -1) {
+ goto error;
+ }
+ }
+
+ afr_handle_quota_size(frame, this);
+
+ afr_set_need_heal(this, local);
+ if (AFR_IS_ARBITER_BRICK(priv, read_subvol) && local->op_ret == 0) {
+ local->op_ret = -1;
+ local->op_errno = ENOTCONN;
+ gf_msg_debug(this->name, 0,
+ "Arbiter cannot be a read subvol "
+ "for %s",
+ local->loc.path);
+ goto error;
+ }
+
+ ret = dict_get_str_sizen(local->xattr_req, "gfid-heal-msg", &gfid_heal_msg);
+ if (!ret) {
+ ret = dict_set_str_sizen(local->replies[read_subvol].xdata,
+ "gfid-heal-msg", gfid_heal_msg);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_DICT_SET_FAILED,
+ "Error setting gfid-heal-msg dict");
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ }
+ }
+
+ AFR_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno,
+ local->inode, &local->replies[read_subvol].poststat,
+ local->replies[read_subvol].xdata,
+ &local->replies[par_read_subvol].postparent);
+ return;
+
+error:
+ AFR_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno, NULL, NULL,
+ NULL, NULL);
}
-/* {{{ flush */
+/*
+ * During a lookup, some errors are more "important" than
+ * others in that they must be given higher priority while
+ * returning to the user.
+ *
+ * The hierarchy is ENODATA > ENOENT > ESTALE > ENOSPC others
+ */
int
-afr_flush_unwind (call_frame_t *frame, xlator_t *this)
+afr_higher_errno(int32_t old_errno, int32_t new_errno)
{
- afr_local_t * local = NULL;
- call_frame_t *main_frame = NULL;
+ if (old_errno == ENODATA || new_errno == ENODATA)
+ return ENODATA;
+ if (old_errno == ENOENT || new_errno == ENOENT)
+ return ENOENT;
+ if (old_errno == ESTALE || new_errno == ESTALE)
+ return ESTALE;
+ if (old_errno == ENOSPC || new_errno == ENOSPC)
+ return ENOSPC;
+
+ return new_errno;
+}
- local = frame->local;
+int
+afr_final_errno(afr_local_t *local, afr_private_t *priv)
+{
+ int i = 0;
+ int op_errno = 0;
+ int tmp_errno = 0;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret >= 0)
+ continue;
+ tmp_errno = local->replies[i].op_errno;
+ op_errno = afr_higher_errno(op_errno, tmp_errno);
+ }
+
+ return op_errno;
+}
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+static int32_t
+afr_local_discovery_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ int ret = 0;
+ char *pathinfo = NULL;
+ gf_boolean_t is_local = _gf_false;
+ afr_private_t *priv = NULL;
+ int32_t child_index = -1;
+
+ if (op_ret != 0) {
+ goto out;
+ }
+
+ priv = this->private;
+ child_index = (int32_t)(long)cookie;
+
+ ret = dict_get_str_sizen(dict, GF_XATTR_PATHINFO_KEY, &pathinfo);
+ if (ret != 0) {
+ goto out;
+ }
+
+ ret = glusterfs_is_local_pathinfo(pathinfo, &is_local);
+ if (ret) {
+ goto out;
+ }
+
+ /*
+ * Note that one local subvolume will override another here. The only
+ * way to avoid that would be to retain extra information about whether
+ * the previous read_child is local, and it's just not worth it. Even
+ * the slowest local subvolume is far preferable to a remote one.
+ */
+ if (is_local) {
+ priv->local[child_index] = 1;
+ /* Don't set arbiter as read child. */
+ if (AFR_IS_ARBITER_BRICK(priv, child_index))
+ goto out;
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_LOCAL_CHILD,
+ "selecting local read_child %s",
+ priv->children[child_index]->name);
+
+ priv->read_child = child_index;
+ }
+out:
+ STACK_DESTROY(frame->root);
+ return 0;
+}
- if (main_frame) {
- AFR_STACK_UNWIND (flush, main_frame,
- local->op_ret, local->op_errno);
- }
+static void
+afr_attempt_local_discovery(xlator_t *this, int32_t child_index)
+{
+ call_frame_t *newframe = NULL;
+ loc_t tmploc = {
+ 0,
+ };
+ afr_private_t *priv = this->private;
+
+ newframe = create_frame(this, this->ctx->pool);
+ if (!newframe) {
+ return;
+ }
- return 0;
+ tmploc.gfid[sizeof(tmploc.gfid) - 1] = 1;
+ STACK_WIND_COOKIE(newframe, afr_local_discovery_cbk,
+ (void *)(long)child_index, priv->children[child_index],
+ priv->children[child_index]->fops->getxattr, &tmploc,
+ GF_XATTR_PATHINFO_KEY, NULL);
}
-
int
-afr_flush_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+afr_lookup_sh_metadata_wrap(void *opaque)
+{
+ call_frame_t *frame = opaque;
+ afr_local_t *local = NULL;
+ xlator_t *this = NULL;
+ inode_t *inode = NULL;
+ afr_private_t *priv = NULL;
+ struct afr_reply *replies = NULL;
+ int i = 0, first = -1;
+ int ret = -1;
+ dict_t *dict = NULL;
+
+ local = frame->local;
+ this = frame->this;
+ priv = this->private;
+ replies = local->replies;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret == -1)
+ continue;
+ first = i;
+ break;
+ }
+ if (first == -1)
+ goto out;
+
+ if (afr_selfheal_metadata_by_stbuf(this, &replies[first].poststat))
+ goto out;
+
+ afr_local_replies_wipe(local, this->private);
+
+ dict = dict_new();
+ if (!dict)
+ goto out;
+ if (local->xattr_req) {
+ dict_copy(local->xattr_req, dict);
+ }
+
+ ret = dict_set_sizen_str_sizen(dict, "link-count", GF_XATTROP_INDEX_COUNT);
+ if (ret) {
+ gf_msg_debug(this->name, -ret, "Unable to set link-count in dict ");
+ }
+
+ if (loc_is_nameless(&local->loc)) {
+ ret = afr_selfheal_unlocked_discover_on(frame, local->inode,
+ local->loc.gfid, local->replies,
+ local->child_up, dict);
+ } else {
+ inode = afr_selfheal_unlocked_lookup_on(frame, local->loc.parent,
+ local->loc.name, local->replies,
+ local->child_up, dict);
+ }
+ if (inode)
+ inode_unref(inode);
+out:
+ if (loc_is_nameless(&local->loc))
+ afr_discover_done(frame, this);
+ else
+ afr_lookup_done(frame, this);
+
+ if (dict)
+ dict_unref(dict);
+
+ return 0;
+}
+
+gf_boolean_t
+afr_is_pending_set(xlator_t *this, dict_t *xdata, int type)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int call_count = -1;
- int child_index = (long) cookie;
- int need_unwind = 0;
+ int idx = -1;
+ afr_private_t *priv = NULL;
+ void *pending_raw = NULL;
+ int *pending_int = NULL;
+ int i = 0;
- local = frame->local;
- priv = this->private;
+ priv = this->private;
+ idx = afr_index_for_transaction_type(type);
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- }
- local->success_count++;
-
- if (local->success_count == priv->wait_count) {
- need_unwind = 1;
- }
- }
+ if (dict_get_ptr(xdata, AFR_DIRTY, &pending_raw) == 0) {
+ if (pending_raw) {
+ pending_int = pending_raw;
+
+ if (ntoh32(pending_int[idx]))
+ return _gf_true;
+ }
+ }
- local->op_errno = op_errno;
+ for (i = 0; i < priv->child_count; i++) {
+ if (dict_get_ptr(xdata, priv->pending_key[i], &pending_raw))
+ continue;
+ if (!pending_raw)
+ continue;
+ pending_int = pending_raw;
+
+ if (ntoh32(pending_int[idx]))
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
+
+static gf_boolean_t
+afr_can_start_metadata_self_heal(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ struct afr_reply *replies = NULL;
+ int i = 0, first = -1;
+ gf_boolean_t start = _gf_false;
+ struct iatt stbuf = {
+ 0,
+ };
+
+ local = frame->local;
+ replies = local->replies;
+ priv = this->private;
+
+ if (!priv->metadata_self_heal)
+ return _gf_false;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret == -1)
+ continue;
+ if (first == -1) {
+ first = i;
+ stbuf = replies[i].poststat;
+ continue;
}
- UNLOCK (&frame->lock);
- if (need_unwind)
- afr_flush_unwind (frame, this);
+ if (afr_is_pending_set(this, replies[i].xdata,
+ AFR_METADATA_TRANSACTION)) {
+ /* Let shd do the heal so that lookup is not blocked
+ * on getting metadata lock/doing the heal */
+ start = _gf_false;
+ break;
+ }
- call_count = afr_frame_return (frame);
+ if (gf_uuid_compare(stbuf.ia_gfid, replies[i].poststat.ia_gfid)) {
+ start = _gf_false;
+ break;
+ }
+ if (!IA_EQUAL(stbuf, replies[i].poststat, type)) {
+ start = _gf_false;
+ break;
+ }
- if (call_count == 0) {
- local->transaction.resume (frame, this);
+ /*Check if iattrs need heal*/
+ if ((!IA_EQUAL(stbuf, replies[i].poststat, uid)) ||
+ (!IA_EQUAL(stbuf, replies[i].poststat, gid)) ||
+ (!IA_EQUAL(stbuf, replies[i].poststat, prot))) {
+ start = _gf_true;
+ continue;
}
- return 0;
+ /*Check if xattrs need heal*/
+ if (!afr_xattrs_are_equal(replies[first].xdata, replies[i].xdata))
+ start = _gf_true;
+ }
+
+ return start;
}
+int
+afr_lookup_metadata_heal_check(call_frame_t *frame, xlator_t *this)
+
+{
+ call_frame_t *heal = NULL;
+ afr_local_t *local = NULL;
+ int ret = 0;
+
+ local = frame->local;
+ if (!afr_can_start_metadata_self_heal(frame, this))
+ goto out;
+
+ heal = afr_frame_create(this, &ret);
+ if (!heal) {
+ ret = -ret;
+ goto out;
+ }
+
+ ret = synctask_new(this->ctx->env, afr_lookup_sh_metadata_wrap,
+ afr_refresh_selfheal_done, heal, frame);
+ if (ret)
+ goto out;
+ return ret;
+out:
+ if (loc_is_nameless(&local->loc))
+ afr_discover_done(frame, this);
+ else
+ afr_lookup_done(frame, this);
+ if (heal)
+ AFR_STACK_DESTROY(heal);
+ return ret;
+}
int
-afr_flush_wind (call_frame_t *frame, xlator_t *this)
+afr_lookup_selfheal_wrap(void *opaque)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- int call_count = -1;
+ int ret = 0;
+ call_frame_t *frame = opaque;
+ afr_local_t *local = NULL;
+ xlator_t *this = NULL;
+ inode_t *inode = NULL;
+ uuid_t pargfid = {
+ 0,
+ };
+
+ local = frame->local;
+ this = frame->this;
+ loc_pargfid(&local->loc, pargfid);
+
+ ret = afr_selfheal_name(frame->this, pargfid, local->loc.name,
+ &local->cont.lookup.gfid_req, local->xattr_req);
+ if (ret == -EIO)
+ goto unwind;
+
+ afr_local_replies_wipe(local, this->private);
+
+ inode = afr_selfheal_unlocked_lookup_on(frame, local->loc.parent,
+ local->loc.name, local->replies,
+ local->child_up, local->xattr_req);
+ if (inode)
+ inode_unref(inode);
+
+ afr_lookup_metadata_heal_check(frame, this);
+ return 0;
- local = frame->local;
- priv = this->private;
+unwind:
+ AFR_STACK_UNWIND(lookup, frame, -1, EIO, NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+int
+afr_lookup_entry_heal(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ call_frame_t *heal = NULL;
+ int i = 0, first = -1;
+ gf_boolean_t name_state_mismatch = _gf_false;
+ struct afr_reply *replies = NULL;
+ int ret = 0;
+ unsigned char *par_readables = NULL;
+ unsigned char *success = NULL;
+ int32_t op_errno = 0;
+ uuid_t gfid = {0};
+
+ local = frame->local;
+ replies = local->replies;
+ priv = this->private;
+ par_readables = alloca0(priv->child_count);
+ success = alloca0(priv->child_count);
+
+ ret = afr_inode_read_subvol_get(local->loc.parent, this, par_readables,
+ NULL, NULL);
+ if (ret < 0 || AFR_COUNT(par_readables, priv->child_count) == 0) {
+ /* In this case set par_readables to all 1 so that name_heal
+ * need checks at the end of this function will flag missing
+ * entry when name state mismatches*/
+ memset(par_readables, 1, priv->child_count);
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if (replies[i].op_ret == 0) {
+ if (gf_uuid_is_null(gfid)) {
+ gf_uuid_copy(gfid, replies[i].poststat.ia_gfid);
+ }
+ success[i] = 1;
+ } else {
+ if ((replies[i].op_errno != ENOTCONN) &&
+ (replies[i].op_errno != ENOENT) &&
+ (replies[i].op_errno != ESTALE)) {
+ op_errno = replies[i].op_errno;
+ }
+ }
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ /*gfid is missing, needs heal*/
+ if ((replies[i].op_ret == -1) && (replies[i].op_errno == ENODATA)) {
+ goto name_heal;
+ }
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
+ if (first == -1) {
+ first = i;
+ continue;
}
- local->call_count = call_count;
+ if (replies[i].op_ret != replies[first].op_ret) {
+ name_state_mismatch = _gf_true;
+ }
+ if (replies[i].op_ret == 0) {
+ /* Rename after this lookup may succeed if we don't do
+ * a name-heal and the destination may not have pending xattrs
+ * to indicate which name is good and which is bad so always do
+ * this heal*/
+ if (gf_uuid_compare(replies[i].poststat.ia_gfid, gfid)) {
+ goto name_heal;
+ }
+ }
+ }
+
+ if (name_state_mismatch) {
+ if (!priv->quorum_count)
+ goto name_heal;
+ if (!afr_has_quorum(success, this, NULL))
+ goto name_heal;
+ if (op_errno)
+ goto name_heal;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_flush_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->flush,
- local->fd);
-
- if (!--call_count)
- break;
- }
+ if (!replies[i].valid)
+ continue;
+ if (par_readables[i] && replies[i].op_ret < 0 &&
+ replies[i].op_errno != ENOTCONN) {
+ goto name_heal;
+ }
}
+ }
- return 0;
-}
+ goto metadata_heal;
+name_heal:
+ heal = afr_frame_create(this, NULL);
+ if (!heal)
+ goto metadata_heal;
+
+ ret = synctask_new(this->ctx->env, afr_lookup_selfheal_wrap,
+ afr_refresh_selfheal_done, heal, frame);
+ if (ret) {
+ AFR_STACK_DESTROY(heal);
+ goto metadata_heal;
+ }
+ return ret;
+
+metadata_heal:
+ ret = afr_lookup_metadata_heal_check(frame, this);
+
+ return ret;
+}
int
-afr_flush_done (call_frame_t *frame, xlator_t *this)
+afr_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
{
- afr_local_t *local = NULL;
+ afr_local_t *local = NULL;
+ int call_count = -1;
+ int child_index = -1;
+ GF_UNUSED int ret = 0;
+ int8_t need_heal = 1;
+
+ child_index = (long)cookie;
+
+ local = frame->local;
+
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+ /*
+ * On revalidate lookup if the gfid-changed, afr should unwind the fop
+ * with ESTALE so that a fresh lookup will be sent by the top xlator.
+ * So remember it.
+ */
+ if (xdata && dict_get_sizen(xdata, "gfid-changed"))
+ local->cont.lookup.needs_fresh_lookup = _gf_true;
+
+ if (xdata) {
+ ret = dict_get_int8(xdata, "link-count", &need_heal);
+ local->replies[child_index].need_heal = need_heal;
+ } else {
+ local->replies[child_index].need_heal = need_heal;
+ }
+ if (op_ret != -1) {
+ local->replies[child_index].poststat = *buf;
+ local->replies[child_index].postparent = *postparent;
+ if (xdata)
+ local->replies[child_index].xdata = dict_ref(xdata);
+ }
+
+ call_count = afr_frame_return(frame);
+ if (call_count == 0) {
+ afr_set_need_heal(this, local);
+ afr_lookup_entry_heal(frame, this);
+ }
+
+ return 0;
+}
- local = frame->local;
+static void
+afr_discover_unwind(call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int read_subvol = -1;
+ int ret = 0;
+ unsigned char *data_readable = NULL;
+ unsigned char *success_replies = NULL;
+
+ priv = this->private;
+ local = frame->local;
+ data_readable = alloca0(priv->child_count);
+ success_replies = alloca0(priv->child_count);
+
+ afr_fill_success_replies(local, priv, success_replies);
+ if (AFR_COUNT(success_replies, priv->child_count) > 0)
+ local->op_ret = 0;
+
+ if (local->op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno(frame->local, this->private);
+ goto error;
+ }
- local->transaction.unwind (frame, this);
+ if (!afr_has_quorum(success_replies, this, frame))
+ goto unwind;
- AFR_STACK_DESTROY (frame);
+ ret = afr_replies_interpret(frame, this, local->inode, NULL);
+ if (ret) {
+ afr_inode_need_refresh_set(local->inode, this);
+ }
- return 0;
-}
+ read_subvol = afr_read_subvol_decide(local->inode, this, NULL,
+ data_readable);
+unwind:
+ afr_attempt_readsubvol_set(frame, this, success_replies, data_readable,
+ &read_subvol);
+ if (read_subvol == -1)
+ goto error;
-int
-afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
+ if (AFR_IS_ARBITER_BRICK(priv, read_subvol) && local->op_ret == 0) {
+ local->op_ret = -1;
+ local->op_errno = ENOTCONN;
+ gf_msg_debug(this->name, 0,
+ "Arbiter cannot be a read subvol "
+ "for %s",
+ local->loc.path);
+ }
+
+ AFR_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno,
+ local->inode, &local->replies[read_subvol].poststat,
+ local->replies[read_subvol].xdata,
+ &local->replies[read_subvol].postparent);
+ return;
+
+error:
+ AFR_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno, NULL, NULL,
+ NULL, NULL);
+}
+
+static int
+afr_ta_id_file_check(void *opaque)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
- int call_count = 0;
+ afr_private_t *priv = NULL;
+ xlator_t *this = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt stbuf = {
+ 0,
+ };
+ dict_t *dict = NULL;
+ uuid_t gfid = {
+ 0,
+ };
+ fd_t *fd = NULL;
+ int ret = 0;
+
+ this = opaque;
+ priv = this->private;
+
+ ret = afr_fill_ta_loc(this, &loc, _gf_false);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to populate thin-arbiter loc for: %s.", loc.name);
+ goto out;
+ }
+
+ ret = syncop_lookup(priv->children[THIN_ARBITER_BRICK_INDEX], &loc, &stbuf,
+ 0, 0, 0);
+ if (ret == 0) {
+ goto out;
+ } else if (ret == -ENOENT) {
+ fd = fd_create(loc.inode, getpid());
+ if (!fd)
+ goto out;
+ dict = dict_new();
+ if (!dict)
+ goto out;
+ gf_uuid_generate(gfid);
+ ret = dict_set_gfuuid(dict, "gfid-req", gfid, true);
+ ret = syncop_create(priv->children[THIN_ARBITER_BRICK_INDEX], &loc,
+ O_RDWR, 0664, fd, &stbuf, dict, NULL);
+ }
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+out:
+ if (ret == 0) {
+ gf_uuid_copy(priv->ta_gfid, stbuf.ia_gfid);
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to lookup/create thin-arbiter id file.");
+ }
+ if (dict)
+ dict_unref(dict);
+ if (fd)
+ fd_unref(fd);
+ loc_wipe(&loc);
+
+ return 0;
+}
- priv = this->private;
+static int
+afr_ta_id_file_check_cbk(int ret, call_frame_t *ta_frame, void *opaque)
+{
+ return 0;
+}
- ALLOC_OR_GOTO (local, afr_local_t, out);
+static void
+afr_discover_done(call_frame_t *frame, xlator_t *this)
+{
+ int ret = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ if (!priv->thin_arbiter_count)
+ goto unwind;
+ if (!gf_uuid_is_null(priv->ta_gfid))
+ goto unwind;
+
+ ret = synctask_new(this->ctx->env, afr_ta_id_file_check,
+ afr_ta_id_file_check_cbk, NULL, this);
+ if (ret)
+ goto unwind;
+unwind:
+ afr_discover_unwind(frame, this);
+}
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
+int
+afr_discover_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ afr_local_t *local = NULL;
+ int call_count = -1;
+ int child_index = -1;
+ GF_UNUSED int ret = 0;
+ int8_t need_heal = 1;
+
+ child_index = (long)cookie;
+
+ local = frame->local;
+
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+ if (op_ret != -1) {
+ local->replies[child_index].poststat = *buf;
+ local->replies[child_index].postparent = *postparent;
+ if (xdata)
+ local->replies[child_index].xdata = dict_ref(xdata);
+ }
+
+ if (local->do_discovery && (op_ret == 0))
+ afr_attempt_local_discovery(this, child_index);
+
+ if (xdata) {
+ ret = dict_get_int8(xdata, "link-count", &need_heal);
+ local->replies[child_index].need_heal = need_heal;
+ } else {
+ local->replies[child_index].need_heal = need_heal;
+ }
+
+ call_count = afr_frame_return(frame);
+ if (call_count == 0) {
+ afr_set_need_heal(this, local);
+ afr_lookup_metadata_heal_check(frame, this);
+ }
+
+ return 0;
+}
+
+int
+afr_discover_do(call_frame_t *frame, xlator_t *this, int err)
+{
+ int ret = 0;
+ int i = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (err) {
+ local->op_errno = err;
+ goto out;
+ }
+
+ call_count = local->call_count = AFR_COUNT(local->child_up,
+ priv->child_count);
+
+ ret = afr_lookup_xattr_req_prepare(local, this, local->xattr_req,
+ &local->loc);
+ if (ret) {
+ local->op_errno = -ret;
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE(
+ frame, afr_discover_cbk, (void *)(long)i, priv->children[i],
+ priv->children[i]->fops->lookup, &local->loc, local->xattr_req);
+ if (!--call_count)
+ break;
}
+ }
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ return 0;
+out:
+ AFR_STACK_UNWIND(lookup, frame, -1, local->op_errno, 0, 0, 0, 0);
+ return 0;
+}
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
- goto out;
+int
+afr_discover(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
+{
+ int op_errno = ENOMEM;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int event = 0;
+
+ priv = this->private;
+
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
+
+ if (!local->call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ if (__is_root_gfid(loc->inode->gfid)) {
+ if (!priv->root_inode)
+ priv->root_inode = inode_ref(loc->inode);
+
+ if (priv->choose_local && !priv->did_discovery) {
+ /* Logic to detect which subvolumes of AFR are
+ local, in order to prefer them for reads
+ */
+ local->do_discovery = _gf_true;
+ priv->did_discovery = _gf_true;
}
+ }
- transaction_frame->local = local;
+ local->op = GF_FOP_LOOKUP;
- local->op = GF_FOP_FLUSH;
+ loc_copy(&local->loc, loc);
- local->transaction.fop = afr_flush_wind;
- local->transaction.done = afr_flush_done;
- local->transaction.unwind = afr_flush_unwind;
+ local->inode = inode_ref(loc->inode);
- local->fd = fd_ref (fd);
+ if (xattr_req) {
+ /* If xattr_req was null, afr_lookup_xattr_req_prepare() will
+ allocate one for us */
+ local->xattr_req = dict_copy_with_ref(xattr_req, NULL);
+ if (!local->xattr_req) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ }
- local->transaction.main_frame = frame;
- local->transaction.start = 0;
- local->transaction.len = 0;
+ if (gf_uuid_is_null(loc->inode->gfid)) {
+ afr_discover_do(frame, this, 0);
+ return 0;
+ }
- afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ afr_read_subvol_get(loc->inode, this, NULL, NULL, &event,
+ AFR_DATA_TRANSACTION, NULL);
+ afr_discover_do(frame, this, 0);
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ return 0;
+}
- AFR_STACK_UNWIND (flush, frame, op_ret, op_errno);
+int
+afr_lookup_do(call_frame_t *frame, xlator_t *this, int err)
+{
+ int ret = 0;
+ int i = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (err < 0) {
+ local->op_errno = err;
+ goto out;
+ }
+
+ call_count = local->call_count = AFR_COUNT(local->child_up,
+ priv->child_count);
+
+ ret = afr_lookup_xattr_req_prepare(local, this, local->xattr_req,
+ &local->loc);
+ if (ret) {
+ local->op_errno = -ret;
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE(
+ frame, afr_lookup_cbk, (void *)(long)i, priv->children[i],
+ priv->children[i]->fops->lookup, &local->loc, local->xattr_req);
+ if (!--call_count)
+ break;
}
-
- return 0;
+ }
+ return 0;
+out:
+ AFR_STACK_UNWIND(lookup, frame, -1, local->op_errno, 0, 0, 0, 0);
+ return 0;
}
-/* }}} */
-
+/*
+ * afr_lookup()
+ *
+ * The goal here is to figure out what the element getting looked up is.
+ * i.e what is the GFID, inode type and a conservative estimate of the
+ * inode attributes are.
+ *
+ * As we lookup, operations may be underway on the entry name and the
+ * inode. In lookup() we are primarily concerned only with the entry
+ * operations. If the entry is getting unlinked or renamed, we detect
+ * what operation is underway by querying for on-going transactions and
+ * pending self-healing on the entry through xdata.
+ *
+ * If the entry is a file/dir, it may need self-heal and/or in a
+ * split-brain condition. Lookup is not the place to worry about these
+ * conditions. Outcast marking will naturally handle them in the read
+ * paths.
+ *
+ * Here is a brief goal of what we are trying to achieve:
+ *
+ * - LOOKUP on all subvolumes concurrently, querying on-going transaction
+ * and pending self-heal info from the servers.
+ *
+ * - If all servers reply the same inode type and GFID, the overall call
+ * MUST be a success.
+ *
+ * - If inode types or GFIDs mismatch, and there IS either an on-going
+ * transaction or pending self-heal, inspect what the nature of the
+ * transaction or pending heal is, and select the appropriate subvolume's
+ * reply as the winner.
+ *
+ * - If inode types or GFIDs mismatch, and there are no on-going transactions
+ * or pending self-heal on the entry name on any of the servers, fail the
+ * lookup with EIO. Something has gone wrong beyond reasonable action.
+ */
int
-afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd)
+afr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
{
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- int ret = 0;
+ afr_local_t *local = NULL;
+ int32_t op_errno = 0;
+ int event = 0;
+ int ret = 0;
+
+ if (loc_is_nameless(loc)) {
+ if (xattr_req)
+ dict_del_sizen(xattr_req, "gfid-req");
+ afr_discover(frame, this, loc, xattr_req);
+ return 0;
+ }
- ret = fd_ctx_get (fd, this, &ctx);
- if (ret < 0)
- goto out;
+ if (afr_is_private_directory(this->private, loc->parent->gfid, loc->name,
+ frame->root->pid)) {
+ op_errno = EPERM;
+ goto out;
+ }
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
- if (fd_ctx) {
- if (fd_ctx->pre_op_done)
- GF_FREE (fd_ctx->pre_op_done);
+ if (!local->call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
- if (fd_ctx->opened_on)
- GF_FREE (fd_ctx->opened_on);
+ local->op = GF_FOP_LOOKUP;
- if (fd_ctx->locked_on)
- GF_FREE (fd_ctx->locked_on);
+ loc_copy(&local->loc, loc);
- if (fd_ctx->pre_op_piggyback)
- GF_FREE (fd_ctx->pre_op_piggyback);
+ local->inode = inode_ref(loc->inode);
- GF_FREE (fd_ctx);
+ if (xattr_req) {
+ /* If xattr_req was null, afr_lookup_xattr_req_prepare() will
+ allocate one for us */
+ local->xattr_req = dict_copy_with_ref(xattr_req, NULL);
+ if (!local->xattr_req) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ ret = dict_get_gfuuid(local->xattr_req, "gfid-req",
+ &local->cont.lookup.gfid_req);
+ if (ret == 0) {
+ dict_del_sizen(local->xattr_req, "gfid-req");
}
+ }
+
+ afr_read_subvol_get(loc->parent, this, NULL, NULL, &event,
+ AFR_DATA_TRANSACTION, NULL);
+
+ afr_lookup_do(frame, this, 0);
+ return 0;
out:
- return 0;
+ AFR_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+
+ return 0;
}
+void
+_afr_cleanup_fd_ctx(xlator_t *this, afr_fd_ctx_t *fd_ctx)
+{
+ afr_private_t *priv = this->private;
+
+ if (fd_ctx->lk_heal_info) {
+ LOCK(&priv->lock);
+ {
+ list_del(&fd_ctx->lk_heal_info->pos);
+ }
+ afr_lk_heal_info_cleanup(fd_ctx->lk_heal_info);
+ fd_ctx->lk_heal_info = NULL;
+ }
+ GF_FREE(fd_ctx->opened_on);
+ GF_FREE(fd_ctx);
+ return;
+}
int
-afr_release (xlator_t *this, fd_t *fd)
+afr_cleanup_fd_ctx(xlator_t *this, fd_t *fd)
{
- afr_locked_fd_t *locked_fd = NULL;
- afr_locked_fd_t *tmp = NULL;
- afr_private_t *priv = NULL;
+ uint64_t ctx = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int ret = 0;
- priv = this->private;
+ ret = fd_ctx_get(fd, this, &ctx);
+ if (ret < 0)
+ goto out;
- afr_cleanup_fd_ctx (this, fd);
+ fd_ctx = (afr_fd_ctx_t *)(long)ctx;
- list_for_each_entry_safe (locked_fd, tmp, &priv->saved_fds,
- list) {
+ if (fd_ctx) {
+ _afr_cleanup_fd_ctx(this, fd_ctx);
+ }
- if (locked_fd->fd == fd) {
- list_del_init (&locked_fd->list);
- GF_FREE (locked_fd);
- }
+out:
+ return 0;
+}
- }
+int
+afr_release(xlator_t *this, fd_t *fd)
+{
+ afr_cleanup_fd_ctx(this, fd);
- return 0;
+ return 0;
}
+afr_fd_ctx_t *
+__afr_fd_ctx_get(fd_t *fd, xlator_t *this)
+{
+ uint64_t ctx = 0;
+ int ret = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ ret = __fd_ctx_get(fd, this, &ctx);
-/* {{{ fsync */
+ if (ret < 0) {
+ ret = __afr_fd_ctx_set(this, fd);
+ if (ret < 0)
+ goto out;
-int
-afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ ret = __fd_ctx_get(fd, this, &ctx);
+ if (ret < 0)
+ goto out;
+ }
+
+ fd_ctx = (afr_fd_ctx_t *)(long)ctx;
+out:
+ return fd_ctx;
+}
+
+afr_fd_ctx_t *
+afr_fd_ctx_get(fd_t *fd, xlator_t *this)
{
- afr_local_t *local = NULL;
- int call_count = -1;
- int child_index = (long) cookie;
- int read_child = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
- local = frame->local;
+ LOCK(&fd->lock);
+ {
+ fd_ctx = __afr_fd_ctx_get(fd, this);
+ }
+ UNLOCK(&fd->lock);
- read_child = afr_read_child (this, local->fd->inode);
+ return fd_ctx;
+}
- LOCK (&frame->lock);
- {
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
+int
+__afr_fd_ctx_set(xlator_t *this, fd_t *fd)
+{
+ afr_private_t *priv = NULL;
+ int ret = -1;
+ uint64_t ctx = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int i = 0;
+
+ VALIDATE_OR_GOTO(this->private, out);
+ VALIDATE_OR_GOTO(fd, out);
+
+ priv = this->private;
+
+ ret = __fd_ctx_get(fd, this, &ctx);
+
+ if (ret == 0)
+ goto out;
+
+ fd_ctx = GF_CALLOC(1, sizeof(afr_fd_ctx_t), gf_afr_mt_afr_fd_ctx_t);
+ if (!fd_ctx) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ fd_ctx->opened_on = GF_CALLOC(sizeof(*fd_ctx->opened_on), priv->child_count,
+ gf_afr_mt_int32_t);
+ if (!fd_ctx->opened_on) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (fd_is_anonymous(fd))
+ fd_ctx->opened_on[i] = AFR_FD_OPENED;
+ else
+ fd_ctx->opened_on[i] = AFR_FD_NOT_OPENED;
+ }
- if (op_ret == 0) {
- local->op_ret = 0;
+ fd_ctx->readdir_subvol = -1;
+ fd_ctx->lk_heal_info = NULL;
- if (local->success_count == 0) {
- local->cont.fsync.prebuf = *prebuf;
- local->cont.fsync.postbuf = *postbuf;
- }
+ ret = __fd_ctx_set(fd, this, (uint64_t)(long)fd_ctx);
+ if (ret)
+ gf_msg_debug(this->name, 0, "failed to set fd ctx (%p)", fd);
+out:
+ if (ret && fd_ctx)
+ _afr_cleanup_fd_ctx(this, fd_ctx);
+ return ret;
+}
- if (child_index == read_child) {
- local->cont.fsync.prebuf = *prebuf;
- local->cont.fsync.postbuf = *postbuf;
- }
+/* {{{ flush */
- local->success_count++;
- }
+int
+afr_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int call_count = -1;
+
+ local = frame->local;
- local->op_errno = op_errno;
+ LOCK(&frame->lock);
+ {
+ if (op_ret != -1) {
+ local->op_ret = op_ret;
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref(xdata);
+ } else {
+ local->op_errno = op_errno;
}
- UNLOCK (&frame->lock);
+ call_count = --local->call_count;
+ }
+ UNLOCK(&frame->lock);
- call_count = afr_frame_return (frame);
+ if (call_count == 0)
+ AFR_STACK_UNWIND(flush, frame, local->op_ret, local->op_errno,
+ local->xdata_rsp);
- if (call_count == 0) {
- local->cont.fsync.prebuf.ia_ino = local->cont.fsync.ino;
- local->cont.fsync.postbuf.ia_ino = local->cont.fsync.ino;
+ return 0;
+}
- AFR_STACK_UNWIND (fsync, frame, local->op_ret, local->op_errno,
- &local->cont.fsync.prebuf,
- &local->cont.fsync.postbuf);
+static int
+afr_flush_wrapper(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ int i = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+
+ priv = this->private;
+ local = frame->local;
+ call_count = local->call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE(frame, afr_flush_cbk, (void *)(long)i,
+ priv->children[i], priv->children[i]->fops->flush,
+ local->fd, xdata);
+ if (!--call_count)
+ break;
}
+ }
- return 0;
+ return 0;
+}
+
+afr_local_t *
+afr_wakeup_same_fd_delayed_op(xlator_t *this, afr_lock_t *lock, fd_t *fd)
+{
+ afr_local_t *local = NULL;
+
+ if (lock->delay_timer) {
+ local = list_entry(lock->post_op.next, afr_local_t,
+ transaction.owner_list);
+ if (fd == local->fd) {
+ if (gf_timer_call_cancel(this->ctx, lock->delay_timer)) {
+ local = NULL;
+ } else {
+ lock->delay_timer = NULL;
+ }
+ } else {
+ local = NULL;
+ }
+ }
+
+ return local;
}
+void
+afr_delayed_changelog_wake_resume(xlator_t *this, inode_t *inode,
+ call_stub_t *stub)
+{
+ afr_inode_ctx_t *ctx = NULL;
+ afr_lock_t *lock = NULL;
+ afr_local_t *metadata_local = NULL;
+ afr_local_t *data_local = NULL;
+ LOCK(&inode->lock);
+ {
+ (void)__afr_inode_ctx_get(this, inode, &ctx);
+ lock = &ctx->lock[AFR_DATA_TRANSACTION];
+ data_local = afr_wakeup_same_fd_delayed_op(this, lock, stub->args.fd);
+ lock = &ctx->lock[AFR_METADATA_TRANSACTION];
+ metadata_local = afr_wakeup_same_fd_delayed_op(this, lock,
+ stub->args.fd);
+ }
+ UNLOCK(&inode->lock);
+
+ if (data_local) {
+ data_local->transaction.resume_stub = stub;
+ } else if (metadata_local) {
+ metadata_local->transaction.resume_stub = stub;
+ } else {
+ call_resume(stub);
+ }
+ if (data_local) {
+ afr_delayed_changelog_wake_up_cbk(data_local);
+ }
+ if (metadata_local) {
+ afr_delayed_changelog_wake_up_cbk(metadata_local);
+ }
+}
int
-afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t datasync)
+afr_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int ret = -1;
- int i = 0;
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
+ afr_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+ int op_errno = ENOMEM;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
- priv = this->private;
+ local->op = GF_FOP_FLUSH;
+ if (!afr_is_consistent_io_possible(local, this->private, &op_errno))
+ goto out;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ local->fd = fd_ref(fd);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
+ stub = fop_flush_stub(frame, afr_flush_wrapper, fd, xdata);
+ if (!stub)
+ goto out;
+
+ afr_delayed_changelog_wake_resume(this, fd->inode, stub);
+
+ return 0;
+out:
+ AFR_STACK_UNWIND(flush, frame, -1, op_errno, NULL);
+ return 0;
+}
+
+int
+afr_fsyncdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK(&frame->lock);
+ {
+ if (op_ret == 0) {
+ local->op_ret = 0;
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref(xdata);
+ } else {
+ local->op_errno = op_errno;
}
+ call_count = --local->call_count;
+ }
+ UNLOCK(&frame->lock);
- call_count = local->call_count;
- frame->local = local;
+ if (call_count == 0)
+ AFR_STACK_UNWIND(fsyncdir, frame, local->op_ret, local->op_errno,
+ local->xdata_rsp);
- local->fd = fd_ref (fd);
- local->cont.fsync.ino = fd->inode->ino;
+ return 0;
+}
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_fsync_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fsync,
- fd, datasync);
- if (!--call_count)
- break;
- }
+int
+afr_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_errno = ENOMEM;
+
+ priv = this->private;
+
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->op = GF_FOP_FSYNCDIR;
+ if (!afr_is_consistent_io_possible(local, priv, &op_errno))
+ goto out;
+
+ call_count = local->call_count;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND(frame, afr_fsyncdir_cbk, priv->children[i],
+ priv->children[i]->fops->fsyncdir, fd, datasync, xdata);
+ if (!--call_count)
+ break;
}
+ }
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (fsync, frame, op_ret, op_errno, NULL, NULL);
- }
- return 0;
+ AFR_STACK_UNWIND(fsyncdir, frame, -1, op_errno, NULL);
+
+ return 0;
}
/* }}} */
-/* {{{ fsync */
+static int
+afr_serialized_lock_wind(call_frame_t *frame, xlator_t *this);
-int32_t
-afr_fsyncdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+static gf_boolean_t
+afr_is_conflicting_lock_present(int32_t op_ret, int32_t op_errno)
{
- afr_local_t *local = NULL;
- int call_count = -1;
-
- local = frame->local;
+ if (op_ret == -1 && op_errno == EAGAIN)
+ return _gf_true;
+ return _gf_false;
+}
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
+static void
+afr_fop_lock_unwind(call_frame_t *frame, glusterfs_fop_t op, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ switch (op) {
+ case GF_FOP_INODELK:
+ AFR_STACK_UNWIND(inodelk, frame, op_ret, op_errno, xdata);
+ break;
+ case GF_FOP_FINODELK:
+ AFR_STACK_UNWIND(finodelk, frame, op_ret, op_errno, xdata);
+ break;
+ case GF_FOP_ENTRYLK:
+ AFR_STACK_UNWIND(entrylk, frame, op_ret, op_errno, xdata);
+ break;
+ case GF_FOP_FENTRYLK:
+ AFR_STACK_UNWIND(fentrylk, frame, op_ret, op_errno, xdata);
+ break;
+ default:
+ break;
+ }
+}
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+static void
+afr_fop_lock_wind(call_frame_t *frame, xlator_t *this, int child_index,
+ int32_t (*lock_cbk)(call_frame_t *, void *, xlator_t *,
+ int32_t, int32_t, dict_t *))
+{
+ afr_local_t *local = frame->local;
+ afr_private_t *priv = this->private;
+ int i = child_index;
+
+ switch (local->op) {
+ case GF_FOP_INODELK:
+ STACK_WIND_COOKIE(
+ frame, lock_cbk, (void *)(long)i, priv->children[i],
+ priv->children[i]->fops->inodelk,
+ (const char *)local->cont.inodelk.volume, &local->loc,
+ local->cont.inodelk.cmd, &local->cont.inodelk.flock,
+ local->cont.inodelk.xdata);
+ break;
+ case GF_FOP_FINODELK:
+ STACK_WIND_COOKIE(
+ frame, lock_cbk, (void *)(long)i, priv->children[i],
+ priv->children[i]->fops->finodelk,
+ (const char *)local->cont.inodelk.volume, local->fd,
+ local->cont.inodelk.cmd, &local->cont.inodelk.flock,
+ local->cont.inodelk.xdata);
+ break;
+ case GF_FOP_ENTRYLK:
+ STACK_WIND_COOKIE(
+ frame, lock_cbk, (void *)(long)i, priv->children[i],
+ priv->children[i]->fops->entrylk, local->cont.entrylk.volume,
+ &local->loc, local->cont.entrylk.basename,
+ local->cont.entrylk.cmd, local->cont.entrylk.type,
+ local->cont.entrylk.xdata);
+ break;
+ case GF_FOP_FENTRYLK:
+ STACK_WIND_COOKIE(
+ frame, lock_cbk, (void *)(long)i, priv->children[i],
+ priv->children[i]->fops->fentrylk, local->cont.entrylk.volume,
+ local->fd, local->cont.entrylk.basename,
+ local->cont.entrylk.cmd, local->cont.entrylk.type,
+ local->cont.entrylk.xdata);
+ break;
+ default:
+ break;
+ }
+}
- call_count = afr_frame_return (frame);
+void
+afr_fop_lock_proceed(call_frame_t *frame)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- if (call_count == 0)
- AFR_STACK_UNWIND (fsyncdir, frame, local->op_ret,
- local->op_errno);
+ local = frame->local;
+ priv = frame->this->private;
- return 0;
+ if (local->fop_lock_state != AFR_FOP_LOCK_PARALLEL) {
+ afr_fop_lock_unwind(frame, local->op, local->op_ret, local->op_errno,
+ local->xdata_rsp);
+ return;
+ }
+ /* At least one child is up */
+ /*
+ * Non-blocking locks also need to be serialized. Otherwise there is
+ * a chance that both the mounts which issued same non-blocking inodelk
+ * may endup not acquiring the lock on any-brick.
+ * Ex: Mount1 and Mount2
+ * request for full length lock on file f1. Mount1 afr may acquire the
+ * partial lock on brick-1 and may not acquire the lock on brick-2
+ * because Mount2 already got the lock on brick-2, vice versa. Since
+ * both the mounts only got partial locks, afr treats them as failure in
+ * gaining the locks and unwinds with EAGAIN errno.
+ */
+ local->op_ret = -1;
+ local->op_errno = EUCLEAN;
+ local->fop_lock_state = AFR_FOP_LOCK_SERIAL;
+ afr_local_replies_wipe(local, priv);
+ if (local->xdata_rsp)
+ dict_unref(local->xdata_rsp);
+ local->xdata_rsp = NULL;
+ switch (local->op) {
+ case GF_FOP_INODELK:
+ case GF_FOP_FINODELK:
+ local->cont.inodelk.cmd = local->cont.inodelk.in_cmd;
+ local->cont.inodelk.flock = local->cont.inodelk.in_flock;
+ if (local->cont.inodelk.xdata)
+ dict_unref(local->cont.inodelk.xdata);
+ local->cont.inodelk.xdata = NULL;
+ if (local->xdata_req)
+ local->cont.inodelk.xdata = dict_ref(local->xdata_req);
+ break;
+ case GF_FOP_ENTRYLK:
+ case GF_FOP_FENTRYLK:
+ local->cont.entrylk.cmd = local->cont.entrylk.in_cmd;
+ if (local->cont.entrylk.xdata)
+ dict_unref(local->cont.entrylk.xdata);
+ local->cont.entrylk.xdata = NULL;
+ if (local->xdata_req)
+ local->cont.entrylk.xdata = dict_ref(local->xdata_req);
+ break;
+ default:
+ break;
+ }
+ afr_serialized_lock_wind(frame, frame->this);
}
+static int32_t
+afr_unlock_partial_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
-int32_t
-afr_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t datasync)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int ret = -1;
- int i = 0;
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int child_index = (long)cookie;
+ uuid_t gfid = {0};
+
+ local = frame->local;
+ priv = this->private;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ if (op_ret < 0 && op_errno != ENOTCONN) {
+ if (local->fd)
+ gf_uuid_copy(gfid, local->fd->inode->gfid);
+ else
+ loc_gfid(&local->loc, gfid);
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_UNLOCK_FAIL,
+ "%s: Failed to unlock %s on %s "
+ "with lk_owner: %s",
+ uuid_utoa(gfid), gf_fop_list[local->op],
+ priv->children[child_index]->name,
+ lkowner_utoa(&frame->root->lk_owner));
+ }
+
+ call_count = afr_frame_return(frame);
+ if (call_count == 0)
+ afr_fop_lock_proceed(frame);
+
+ return 0;
+}
- priv = this->private;
+static int32_t
+afr_unlock_locks_and_proceed(call_frame_t *frame, xlator_t *this,
+ int call_count)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ if (call_count == 0) {
+ afr_fop_lock_proceed(frame);
+ goto out;
+ }
+
+ local = frame->local;
+ priv = this->private;
+ local->call_count = call_count;
+ switch (local->op) {
+ case GF_FOP_INODELK:
+ case GF_FOP_FINODELK:
+ local->cont.inodelk.flock.l_type = F_UNLCK;
+ local->cont.inodelk.cmd = F_SETLK;
+ if (local->cont.inodelk.xdata)
+ dict_unref(local->cont.inodelk.xdata);
+ local->cont.inodelk.xdata = NULL;
+ break;
+ case GF_FOP_ENTRYLK:
+ case GF_FOP_FENTRYLK:
+ local->cont.entrylk.cmd = ENTRYLK_UNLOCK;
+ if (local->cont.entrylk.xdata)
+ dict_unref(local->cont.entrylk.xdata);
+ local->cont.entrylk.xdata = NULL;
+ break;
+ default:
+ break;
+ }
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ if (local->replies[i].op_ret == -1)
+ continue;
- call_count = local->call_count;
- frame->local = local;
+ afr_fop_lock_wind(frame, this, i, afr_unlock_partial_lock_cbk);
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_fsyncdir_cbk,
- priv->children[i],
- priv->children[i]->fops->fsyncdir,
- fd, datasync);
- if (!--call_count)
- break;
- }
- }
+ if (!--call_count)
+ break;
+ }
- op_ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (fsyncdir, frame, op_ret, op_errno);
- }
- return 0;
+ return 0;
}
-/* }}} */
-
-/* {{{ xattrop */
-
int32_t
-afr_xattrop_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xattr)
+afr_fop_lock_done(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- int call_count = -1;
+ int i = 0;
+ int lock_count = 0;
+ unsigned char *success = NULL;
- local = frame->local;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
+ local = frame->local;
+ priv = this->private;
+ success = alloca0(priv->child_count);
- local->op_errno = op_errno;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+
+ if (local->replies[i].op_ret == 0) {
+ lock_count++;
+ success[i] = 1;
}
- UNLOCK (&frame->lock);
- call_count = afr_frame_return (frame);
+ if (local->op_ret == -1 && local->op_errno == EAGAIN)
+ continue;
- if (call_count == 0)
- AFR_STACK_UNWIND (xattrop, frame, local->op_ret, local->op_errno,
- xattr);
+ if ((local->replies[i].op_ret == -1) &&
+ (local->replies[i].op_errno == EAGAIN)) {
+ local->op_ret = -1;
+ local->op_errno = EAGAIN;
+ continue;
+ }
- return 0;
-}
+ if (local->replies[i].op_ret == 0)
+ local->op_ret = 0;
+ local->op_errno = local->replies[i].op_errno;
+ }
-int32_t
-afr_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t optype, dict_t *xattr)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int ret = -1;
- int i = 0;
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
+ if (afr_fop_lock_is_unlock(frame))
+ goto unwind;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ if (afr_is_conflicting_lock_present(local->op_ret, local->op_errno)) {
+ afr_unlock_locks_and_proceed(frame, this, lock_count);
+ } else if (priv->quorum_count && !afr_has_quorum(success, this, NULL)) {
+ local->fop_lock_state = AFR_FOP_LOCK_QUORUM_FAILED;
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno(local, priv);
+ if (local->op_errno == 0)
+ local->op_errno = afr_quorum_errno(priv);
+ afr_unlock_locks_and_proceed(frame, this, lock_count);
+ } else {
+ goto unwind;
+ }
+
+ return 0;
+unwind:
+ afr_fop_lock_unwind(frame, local->op, local->op_ret, local->op_errno,
+ local->xdata_rsp);
+ return 0;
+}
- priv = this->private;
+static int
+afr_common_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int child_index = (long)cookie;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ local = frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+ if (op_ret == 0 && xdata) {
+ local->replies[child_index].xdata = dict_ref(xdata);
+ LOCK(&frame->lock);
+ {
+ if (!local->xdata_rsp)
+ local->xdata_rsp = dict_ref(xdata);
}
+ UNLOCK(&frame->lock);
+ }
+ return 0;
+}
- call_count = local->call_count;
- frame->local = local;
+static int32_t
+afr_serialized_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_xattrop_cbk,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- loc, optype, xattr);
- if (!--call_count)
- break;
- }
- }
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int child_index = (long)cookie;
+ int next_child = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ afr_common_lock_cbk(frame, cookie, this, op_ret, op_errno, xdata);
+
+ for (next_child = child_index + 1; next_child < priv->child_count;
+ next_child++) {
+ if (local->child_up[next_child])
+ break;
+ }
+
+ if (afr_is_conflicting_lock_present(op_ret, op_errno) ||
+ (next_child == priv->child_count)) {
+ afr_fop_lock_done(frame, this);
+ } else {
+ afr_fop_lock_wind(frame, this, next_child, afr_serialized_lock_cbk);
+ }
+
+ return 0;
+}
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (xattrop, frame, op_ret, op_errno, NULL);
+static int
+afr_serialized_lock_wind(call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+
+ priv = this->private;
+ local = frame->local;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ afr_fop_lock_wind(frame, this, i, afr_serialized_lock_cbk);
+ break;
}
- return 0;
+ }
+ return 0;
}
-/* }}} */
+static int32_t
+afr_parallel_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
-/* {{{ fxattrop */
+{
+ int call_count = 0;
-int32_t
-afr_fxattrop_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xattr)
+ afr_common_lock_cbk(frame, cookie, this, op_ret, op_errno, xdata);
+
+ call_count = afr_frame_return(frame);
+ if (call_count == 0)
+ afr_fop_lock_done(frame, this);
+
+ return 0;
+}
+
+static int
+afr_parallel_lock_wind(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int call_count = 0;
+ int i = 0;
+
+ priv = this->private;
+ local = frame->local;
+ call_count = local->call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->child_up[i])
+ continue;
+ afr_fop_lock_wind(frame, this, i, afr_parallel_lock_cbk);
+ if (!--call_count)
+ break;
+ }
+ return 0;
+}
- int call_count = -1;
+static int
+afr_fop_handle_lock(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = frame->local;
+ int op_errno = 0;
- local = frame->local;
+ if (!afr_fop_lock_is_unlock(frame)) {
+ if (!afr_is_consistent_io_possible(local, this->private, &op_errno))
+ goto out;
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
+ switch (local->op) {
+ case GF_FOP_INODELK:
+ case GF_FOP_FINODELK:
+ local->cont.inodelk.cmd = F_SETLK;
+ break;
+ case GF_FOP_ENTRYLK:
+ case GF_FOP_FENTRYLK:
+ local->cont.entrylk.cmd = ENTRYLK_LOCK_NB;
+ break;
+ default:
+ break;
+ }
+ }
- local->op_errno = op_errno;
+ if (local->xdata_req) {
+ switch (local->op) {
+ case GF_FOP_INODELK:
+ case GF_FOP_FINODELK:
+ local->cont.inodelk.xdata = dict_ref(local->xdata_req);
+ break;
+ case GF_FOP_ENTRYLK:
+ case GF_FOP_FENTRYLK:
+ local->cont.entrylk.xdata = dict_ref(local->xdata_req);
+ break;
+ default:
+ break;
}
- UNLOCK (&frame->lock);
+ }
- call_count = afr_frame_return (frame);
+ local->fop_lock_state = AFR_FOP_LOCK_PARALLEL;
+ afr_parallel_lock_wind(frame, this);
+out:
+ return -op_errno;
+}
- if (call_count == 0)
- AFR_STACK_UNWIND (fxattrop, frame, local->op_ret, local->op_errno,
- xattr);
+static int32_t
+afr_handle_inodelk(call_frame_t *frame, xlator_t *this, glusterfs_fop_t fop,
+ const char *volume, loc_t *loc, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int32_t op_errno = ENOMEM;
+
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->op = fop;
+ if (loc)
+ loc_copy(&local->loc, loc);
+ if (fd && (flock->l_type != F_UNLCK)) {
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ local->fd = fd_ref(fd);
+ }
+
+ local->cont.inodelk.volume = gf_strdup(volume);
+ if (!local->cont.inodelk.volume) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->cont.inodelk.in_cmd = cmd;
+ local->cont.inodelk.cmd = cmd;
+ local->cont.inodelk.in_flock = *flock;
+ local->cont.inodelk.flock = *flock;
+ if (xdata)
+ local->xdata_req = dict_ref(xdata);
+
+ op_errno = -afr_fop_handle_lock(frame, frame->this);
+ if (op_errno)
+ goto out;
+ return 0;
+out:
+ afr_fop_lock_unwind(frame, fop, -1, op_errno, NULL);
- return 0;
+ return 0;
}
+int32_t
+afr_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
+ int32_t cmd, struct gf_flock *flock, dict_t *xdata)
+{
+ afr_handle_inodelk(frame, this, GF_FOP_INODELK, volume, loc, NULL, cmd,
+ flock, xdata);
+ return 0;
+}
int32_t
-afr_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_xattrop_flags_t optype, dict_t *xattr)
+afr_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int ret = -1;
- int i = 0;
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
+ afr_handle_inodelk(frame, this, GF_FOP_FINODELK, volume, NULL, fd, cmd,
+ flock, xdata);
+ return 0;
+}
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+static int
+afr_handle_entrylk(call_frame_t *frame, xlator_t *this, glusterfs_fop_t fop,
+ const char *volume, loc_t *loc, fd_t *fd,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int32_t op_errno = ENOMEM;
+
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->op = fop;
+ if (loc)
+ loc_copy(&local->loc, loc);
+ if (fd && (cmd != ENTRYLK_UNLOCK)) {
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ local->fd = fd_ref(fd);
+ }
+ local->cont.entrylk.cmd = cmd;
+ local->cont.entrylk.in_cmd = cmd;
+ local->cont.entrylk.type = type;
+ local->cont.entrylk.volume = gf_strdup(volume);
+ local->cont.entrylk.basename = gf_strdup(basename);
+ if (!local->cont.entrylk.volume || !local->cont.entrylk.basename) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ if (xdata)
+ local->xdata_req = dict_ref(xdata);
+ op_errno = -afr_fop_handle_lock(frame, frame->this);
+ if (op_errno)
+ goto out;
+
+ return 0;
+out:
+ afr_fop_lock_unwind(frame, fop, -1, op_errno, NULL);
+ return 0;
+}
- priv = this->private;
+int
+afr_entrylk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
+{
+ afr_handle_entrylk(frame, this, GF_FOP_ENTRYLK, volume, loc, NULL, basename,
+ cmd, type, xdata);
+ return 0;
+}
- ALLOC_OR_GOTO (local, afr_local_t, out);
+int
+afr_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
+{
+ afr_handle_entrylk(frame, this, GF_FOP_FENTRYLK, volume, NULL, fd, basename,
+ cmd, type, xdata);
+ return 0;
+}
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
+int
+afr_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct statvfs *statvfs, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int call_count = 0;
+ struct statvfs *buf = NULL;
+
+ local = frame->local;
+
+ LOCK(&frame->lock);
+ {
+ if (op_ret != 0) {
+ local->op_errno = op_errno;
+ goto unlock;
}
- call_count = local->call_count;
- frame->local = local;
+ local->op_ret = op_ret;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_fxattrop_cbk,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- fd, optype, xattr);
- if (!--call_count)
- break;
+ buf = &local->cont.statfs.buf;
+ if (local->cont.statfs.buf_set) {
+ if (statvfs->f_bavail < buf->f_bavail) {
+ *buf = *statvfs;
+ if (xdata) {
+ if (local->xdata_rsp)
+ dict_unref(local->xdata_rsp);
+ local->xdata_rsp = dict_ref(xdata);
}
+ }
+ } else {
+ *buf = *statvfs;
+ local->cont.statfs.buf_set = 1;
+ if (xdata)
+ local->xdata_rsp = dict_ref(xdata);
}
+ }
+unlock:
+ call_count = --local->call_count;
+ UNLOCK(&frame->lock);
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (fxattrop, frame, op_ret, op_errno, NULL);
- }
- return 0;
+ if (call_count == 0)
+ AFR_STACK_UNWIND(statfs, frame, local->op_ret, local->op_errno,
+ &local->cont.statfs.buf, local->xdata_rsp);
+
+ return 0;
}
-/* }}} */
+int
+afr_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int call_count = 0;
+ int32_t op_errno = ENOMEM;
+
+ priv = this->private;
+
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->op = GF_FOP_STATFS;
+ if (!afr_is_consistent_io_possible(local, priv, &op_errno))
+ goto out;
+
+ if (priv->arbiter_count == 1 && local->child_up[ARBITER_BRICK_INDEX])
+ local->call_count--;
+ call_count = local->call_count;
+ if (!call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ if (AFR_IS_ARBITER_BRICK(priv, i))
+ continue;
+ STACK_WIND(frame, afr_statfs_cbk, priv->children[i],
+ priv->children[i]->fops->statfs, loc, xdata);
+ if (!--call_count)
+ break;
+ }
+ }
+ return 0;
+out:
+ AFR_STACK_UNWIND(statfs, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
int32_t
-afr_inodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+afr_lk_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = this->private;
+ int call_count = -1;
+ int child_index = (long)cookie;
+
+ local = frame->local;
+
+ if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_UNLOCK_FAIL,
+ "gfid=%s: unlock failed on subvolume %s "
+ "with lock owner %s",
+ uuid_utoa(local->fd->inode->gfid),
+ priv->children[child_index]->name,
+ lkowner_utoa(&frame->root->lk_owner));
+ }
+
+ call_count = afr_frame_return(frame);
+ if (call_count == 0) {
+ AFR_STACK_UNWIND(lk, frame, local->op_ret, local->op_errno, NULL,
+ local->xdata_rsp);
+ }
+
+ return 0;
+}
+int32_t
+afr_lk_unlock(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- int call_count = -1;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int call_count = 0;
- local = frame->local;
+ local = frame->local;
+ priv = this->private;
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
+ call_count = afr_locked_nodes_count(local->cont.lk.locked_nodes,
+ priv->child_count);
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ if (call_count == 0) {
+ AFR_STACK_UNWIND(lk, frame, local->op_ret, local->op_errno, NULL,
+ local->xdata_rsp);
+ return 0;
+ }
- call_count = afr_frame_return (frame);
+ local->call_count = call_count;
- if (call_count == 0)
- AFR_STACK_UNWIND (inodelk, frame, local->op_ret,
- local->op_errno);
+ local->cont.lk.user_flock.l_type = F_UNLCK;
- return 0;
-}
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->cont.lk.locked_nodes[i]) {
+ STACK_WIND_COOKIE(frame, afr_lk_unlock_cbk, (void *)(long)i,
+ priv->children[i], priv->children[i]->fops->lk,
+ local->fd, F_SETLK, &local->cont.lk.user_flock,
+ NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+ return 0;
+}
int32_t
-afr_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *flock)
+afr_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int ret = -1;
- int i = 0;
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int child_index = -1;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ local = frame->local;
+ priv = this->private;
- priv = this->private;
+ child_index = (long)cookie;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ afr_common_lock_cbk(frame, cookie, this, op_ret, op_errno, xdata);
+ if (op_ret < 0 && op_errno == EAGAIN) {
+ local->op_ret = -1;
+ local->op_errno = EAGAIN;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ afr_lk_unlock(frame, this);
+ return 0;
+ }
+
+ if (op_ret == 0) {
+ local->op_ret = 0;
+ local->op_errno = 0;
+ local->cont.lk.locked_nodes[child_index] = 1;
+ local->cont.lk.ret_flock = *lock;
+ }
+
+ child_index++;
+
+ if (child_index < priv->child_count) {
+ STACK_WIND_COOKIE(frame, afr_lk_cbk, (void *)(long)child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->lk, local->fd,
+ local->cont.lk.cmd, &local->cont.lk.user_flock,
+ local->xdata_req);
+ } else if (priv->quorum_count &&
+ !afr_has_quorum(local->cont.lk.locked_nodes, this, NULL)) {
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno(local, priv);
- call_count = local->call_count;
- frame->local = local;
+ afr_lk_unlock(frame, this);
+ } else {
+ if (local->op_ret < 0)
+ local->op_errno = afr_final_errno(local, priv);
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_inodelk_cbk,
- priv->children[i],
- priv->children[i]->fops->inodelk,
- volume, loc, cmd, flock);
-
- if (!--call_count)
- break;
- }
- }
+ AFR_STACK_UNWIND(lk, frame, local->op_ret, local->op_errno,
+ &local->cont.lk.ret_flock, local->xdata_rsp);
+ }
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (inodelk, frame, op_ret, op_errno);
- }
- return 0;
+ return 0;
}
+int
+afr_lk_transaction_cbk(int ret, call_frame_t *frame, void *opaque)
+{
+ return 0;
+}
-int32_t
-afr_finodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+int
+afr_lk_txn_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int child_index = -1;
+
+ local = frame->local;
+ child_index = (long)cookie;
+ afr_common_lock_cbk(frame, cookie, this, op_ret, op_errno, xdata);
+ if (op_ret == 0) {
+ local->op_ret = 0;
+ local->op_errno = 0;
+ local->cont.lk.locked_nodes[child_index] = 1;
+ local->cont.lk.ret_flock = *lock;
+ }
+ syncbarrier_wake(&local->barrier);
+ return 0;
+}
+int
+afr_lk_txn_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
+{
+ afr_local_t *local = frame->local;
+ afr_private_t *priv = this->private;
+ int child_index = (long)cookie;
+
+ if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_UNLOCK_FAIL,
+ "gfid=%s: unlock failed on subvolume %s "
+ "with lock owner %s",
+ uuid_utoa(local->fd->inode->gfid),
+ priv->children[child_index]->name,
+ lkowner_utoa(&frame->root->lk_owner));
+ }
+ return 0;
+}
+int
+afr_lk_transaction(void *opaque)
{
- afr_local_t *local = NULL;
- int call_count = -1;
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ char *wind_on = NULL;
+ int op_errno = 0;
+ int i = 0;
+ int ret = 0;
+
+ frame = (call_frame_t *)opaque;
+ local = frame->local;
+ this = frame->this;
+ priv = this->private;
+ wind_on = alloca0(priv->child_count);
+
+ if (priv->arbiter_count || priv->child_count != 3) {
+ op_errno = ENOTSUP;
+ gf_msg(frame->this->name, GF_LOG_ERROR, op_errno, AFR_MSG_LK_HEAL_DOM,
+ "%s: Lock healing supported only for replica 3 volumes.",
+ uuid_utoa(local->fd->inode->gfid));
+ goto err;
+ }
+
+ op_errno = -afr_dom_lock_acquire(frame); // Released during
+ // AFR_STACK_UNWIND
+ if (op_errno != 0) {
+ goto err;
+ }
+ if (priv->quorum_count &&
+ !afr_has_quorum(local->cont.lk.dom_locked_nodes, this, NULL)) {
+ op_errno = afr_final_errno(local, priv);
+ goto err;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (priv->child_up[i] && local->cont.lk.dom_locked_nodes[i])
+ wind_on[i] = 1;
+ }
+ AFR_ONLIST(wind_on, frame, afr_lk_txn_wind_cbk, lk, local->fd,
+ local->cont.lk.cmd, &local->cont.lk.user_flock,
+ local->xdata_req);
+
+ if (priv->quorum_count &&
+ !afr_has_quorum(local->cont.lk.locked_nodes, this, NULL)) {
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno(local, priv);
+ goto unlock;
+ } else {
+ if (local->cont.lk.user_flock.l_type == F_UNLCK)
+ ret = afr_remove_lock_from_saved_locks(local, this);
+ else
+ ret = afr_add_lock_to_saved_locks(frame, this);
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = -ret;
+ goto unlock;
+ }
+ AFR_STACK_UNWIND(lk, frame, local->op_ret, local->op_errno,
+ &local->cont.lk.ret_flock, local->xdata_rsp);
+ }
- local = frame->local;
+ return 0;
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
+unlock:
+ local->cont.lk.user_flock.l_type = F_UNLCK;
+ AFR_ONLIST(local->cont.lk.locked_nodes, frame, afr_lk_txn_unlock_cbk, lk,
+ local->fd, F_SETLK, &local->cont.lk.user_flock, NULL);
+err:
+ AFR_STACK_UNWIND(lk, frame, -1, op_errno, NULL, NULL);
+ return -1;
+}
- local->op_errno = op_errno;
+int
+afr_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int ret = 0;
+ int i = 0;
+ int32_t op_errno = ENOMEM;
+
+ priv = this->private;
+
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->op = GF_FOP_LK;
+ if (!afr_lk_is_unlock(cmd, flock)) {
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ if (!afr_is_consistent_io_possible(local, priv, &op_errno))
+ goto out;
+ }
+
+ local->cont.lk.locked_nodes = GF_CALLOC(
+ priv->child_count, sizeof(*local->cont.lk.locked_nodes),
+ gf_afr_mt_char);
+
+ if (!local->cont.lk.locked_nodes) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->fd = fd_ref(fd);
+ local->cont.lk.cmd = cmd;
+ local->cont.lk.user_flock = *flock;
+ local->cont.lk.ret_flock = *flock;
+ if (xdata)
+ local->xdata_req = dict_ref(xdata);
+
+ if (afr_is_lock_mode_mandatory(xdata)) {
+ ret = synctask_new(this->ctx->env, afr_lk_transaction,
+ afr_lk_transaction_cbk, frame, frame);
+ if (ret) {
+ op_errno = ENOMEM;
+ goto out;
}
- UNLOCK (&frame->lock);
+ return 0;
+ }
- call_count = afr_frame_return (frame);
+ STACK_WIND_COOKIE(frame, afr_lk_cbk, (void *)(long)0, priv->children[i],
+ priv->children[i]->fops->lk, fd, cmd, flock,
+ local->xdata_req);
- if (call_count == 0)
- AFR_STACK_UNWIND (finodelk, frame, local->op_ret,
- local->op_errno);
+ return 0;
+out:
+ AFR_STACK_UNWIND(lk, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
+int32_t
+afr_lease_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_lease *lease,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int call_count = -1;
+
+ local = frame->local;
+ call_count = afr_frame_return(frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND(lease, frame, local->op_ret, local->op_errno, lease,
+ xdata);
+
+ return 0;
+}
int32_t
-afr_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *flock)
+afr_lease_unlock(call_frame_t *frame, xlator_t *this)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int ret = -1;
- int i = 0;
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int call_count = 0;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ local = frame->local;
+ priv = this->private;
- priv = this->private;
+ call_count = afr_locked_nodes_count(local->cont.lease.locked_nodes,
+ priv->child_count);
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ if (call_count == 0) {
+ AFR_STACK_UNWIND(lease, frame, local->op_ret, local->op_errno,
+ &local->cont.lease.ret_lease, NULL);
+ return 0;
+ }
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ local->call_count = call_count;
- call_count = local->call_count;
- frame->local = local;
+ local->cont.lease.user_lease.cmd = GF_UNLK_LEASE;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_finodelk_cbk,
- priv->children[i],
- priv->children[i]->fops->finodelk,
- volume, fd, cmd, flock);
-
- if (!--call_count)
- break;
- }
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->cont.lease.locked_nodes[i]) {
+ STACK_WIND(frame, afr_lease_unlock_cbk, priv->children[i],
+ priv->children[i]->fops->lease, &local->loc,
+ &local->cont.lease.user_lease, NULL);
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (finodelk, frame, op_ret, op_errno);
+ if (!--call_count)
+ break;
}
- return 0;
-}
+ }
+ return 0;
+}
int32_t
-afr_entrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+afr_lease_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct gf_lease *lease, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int child_index = -1;
+
+ local = frame->local;
+ priv = this->private;
+
+ child_index = (long)cookie;
+
+ afr_common_lock_cbk(frame, cookie, this, op_ret, op_errno, xdata);
+ if (op_ret < 0 && op_errno == EAGAIN) {
+ local->op_ret = -1;
+ local->op_errno = EAGAIN;
+
+ afr_lease_unlock(frame, this);
+ return 0;
+ }
+
+ if (op_ret == 0) {
+ local->op_ret = 0;
+ local->op_errno = 0;
+ local->cont.lease.locked_nodes[child_index] = 1;
+ local->cont.lease.ret_lease = *lease;
+ }
+
+ child_index++;
+ if (child_index < priv->child_count) {
+ STACK_WIND_COOKIE(frame, afr_lease_cbk, (void *)(long)child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->lease, &local->loc,
+ &local->cont.lease.user_lease, xdata);
+ } else if (priv->quorum_count &&
+ !afr_has_quorum(local->cont.lease.locked_nodes, this, NULL)) {
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno(local, priv);
+
+ afr_lease_unlock(frame, this);
+ } else {
+ if (local->op_ret < 0)
+ local->op_errno = afr_final_errno(local, priv);
+ AFR_STACK_UNWIND(lease, frame, local->op_ret, local->op_errno,
+ &local->cont.lease.ret_lease, NULL);
+ }
+ return 0;
+}
+
+int
+afr_lease(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct gf_lease *lease, dict_t *xdata)
{
- afr_local_t *local = NULL;
- int call_count = -1;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int32_t op_errno = ENOMEM;
- local = frame->local;
+ priv = this->private;
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ local->op = GF_FOP_LEASE;
+ local->cont.lease.locked_nodes = GF_CALLOC(
+ priv->child_count, sizeof(*local->cont.lease.locked_nodes),
+ gf_afr_mt_char);
- call_count = afr_frame_return (frame);
+ if (!local->cont.lease.locked_nodes) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- if (call_count == 0)
- AFR_STACK_UNWIND (entrylk, frame, local->op_ret,
- local->op_errno);
+ loc_copy(&local->loc, loc);
+ local->cont.lease.user_lease = *lease;
+ local->cont.lease.ret_lease = *lease;
- return 0;
+ STACK_WIND_COOKIE(frame, afr_lease_cbk, (void *)(long)0, priv->children[0],
+ priv->children[0]->fops->lease, loc, lease, xdata);
+
+ return 0;
+out:
+ AFR_STACK_UNWIND(lease, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
}
+int
+afr_ipc_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int child_index = (long)cookie;
+ int call_count = 0;
+ gf_boolean_t failed = _gf_false;
+ gf_boolean_t succeeded = _gf_false;
+ int i = 0;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+ if (xdata)
+ local->replies[child_index].xdata = dict_ref(xdata);
+
+ call_count = afr_frame_return(frame);
+ if (call_count)
+ goto out;
+ /* If any of the subvolumes failed with other than ENOTCONN
+ * return error else return success unless all the subvolumes
+ * failed.
+ * TODO: In case of failure, we need to unregister the xattrs
+ * from the other subvolumes where it succeeded (once upcall
+ * fixes the Bz-1371622)*/
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret < 0 &&
+ local->replies[i].op_errno != ENOTCONN) {
+ local->op_ret = local->replies[i].op_ret;
+ local->op_errno = local->replies[i].op_errno;
+ if (local->xdata_rsp)
+ dict_unref(local->xdata_rsp);
+ local->xdata_rsp = NULL;
+ if (local->replies[i].xdata) {
+ local->xdata_rsp = dict_ref(local->replies[i].xdata);
+ }
+ failed = _gf_true;
+ break;
+ }
+ if (local->replies[i].op_ret == 0) {
+ succeeded = _gf_true;
+ local->op_ret = 0;
+ local->op_errno = 0;
+ if (!local->xdata_rsp && local->replies[i].xdata) {
+ local->xdata_rsp = dict_ref(local->replies[i].xdata);
+ }
+ }
+ }
-int32_t
-afr_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc,
- const char *basename, entrylk_cmd cmd, entrylk_type type)
+ if (!succeeded && !failed) {
+ local->op_ret = -1;
+ local->op_errno = ENOTCONN;
+ }
+
+ AFR_STACK_UNWIND(ipc, frame, local->op_ret, local->op_errno,
+ local->xdata_rsp);
+
+out:
+ return 0;
+}
+
+int
+afr_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int ret = -1;
- int i = 0;
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
+ afr_local_t *local = NULL;
+ int32_t op_errno = -1;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int call_cnt = -1;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
- priv = this->private;
+ if (op != GF_IPC_TARGET_UPCALL)
+ goto wind_default;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ VALIDATE_OR_GOTO(this->private, err);
+ priv = this->private;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto err;
- call_count = local->call_count;
- frame->local = local;
+ call_cnt = local->call_count;
+ if (xdata) {
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_entrylk_cbk,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- volume, loc, basename, cmd, type);
-
- if (!--call_count)
- break;
- }
+ if (dict_set_int8(xdata, priv->pending_key[i], 0) < 0)
+ goto err;
}
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->child_up[i])
+ continue;
+
+ STACK_WIND_COOKIE(frame, afr_ipc_cbk, (void *)(long)i,
+ priv->children[i], priv->children[i]->fops->ipc, op,
+ xdata);
+ if (!--call_cnt)
+ break;
+ }
+ return 0;
+
+err:
+ if (op_errno == -1)
+ op_errno = errno;
+ AFR_STACK_UNWIND(ipc, frame, -1, op_errno, NULL);
+
+ return 0;
+
+wind_default:
+ STACK_WIND(frame, default_ipc_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ipc, op, xdata);
+ return 0;
+}
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (entrylk, frame, op_ret, op_errno);
- }
+int
+afr_forget(xlator_t *this, inode_t *inode)
+{
+ uint64_t ctx_int = 0;
+ afr_inode_ctx_t *ctx = NULL;
+
+ afr_spb_choice_timeout_cancel(this, inode);
+ inode_ctx_del(inode, this, &ctx_int);
+ if (!ctx_int)
return 0;
+
+ ctx = (afr_inode_ctx_t *)(uintptr_t)ctx_int;
+ afr_inode_ctx_destroy(ctx);
+ return 0;
}
+int
+afr_priv_dump(xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i = 0;
+
+ GF_ASSERT(this);
+ priv = this->private;
+
+ GF_ASSERT(priv);
+ snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name);
+ gf_proc_dump_add_section("%s", key_prefix);
+ gf_proc_dump_write("child_count", "%u", priv->child_count);
+ for (i = 0; i < priv->child_count; i++) {
+ sprintf(key, "child_up[%d]", i);
+ gf_proc_dump_write(key, "%d", priv->child_up[i]);
+ sprintf(key, "pending_key[%d]", i);
+ gf_proc_dump_write(key, "%s", priv->pending_key[i]);
+ sprintf(key, "pending_reads[%d]", i);
+ gf_proc_dump_write(key, "%" PRId64,
+ GF_ATOMIC_GET(priv->pending_reads[i]));
+ sprintf(key, "child_latency[%d]", i);
+ gf_proc_dump_write(key, "%" PRId64, priv->child_latency[i]);
+ sprintf(key, "halo_child_up[%d]", i);
+ gf_proc_dump_write(key, "%d", priv->halo_child_up[i]);
+ }
+ gf_proc_dump_write("data_self_heal", "%d", priv->data_self_heal);
+ gf_proc_dump_write("metadata_self_heal", "%d", priv->metadata_self_heal);
+ gf_proc_dump_write("entry_self_heal", "%d", priv->entry_self_heal);
+ gf_proc_dump_write("read_child", "%d", priv->read_child);
+ gf_proc_dump_write("wait_count", "%u", priv->wait_count);
+ gf_proc_dump_write("heal-wait-queue-length", "%d", priv->heal_wait_qlen);
+ gf_proc_dump_write("heal-waiters", "%d", priv->heal_waiters);
+ gf_proc_dump_write("background-self-heal-count", "%d",
+ priv->background_self_heal_count);
+ gf_proc_dump_write("healers", "%d", priv->healers);
+ gf_proc_dump_write("read-hash-mode", "%d", priv->hash_mode);
+ gf_proc_dump_write("use-anonymous-inode", "%d", priv->use_anon_inode);
+ if (priv->quorum_count == AFR_QUORUM_AUTO) {
+ gf_proc_dump_write("quorum-type", "auto");
+ } else if (priv->quorum_count == 0) {
+ gf_proc_dump_write("quorum-type", "none");
+ } else {
+ gf_proc_dump_write("quorum-type", "fixed");
+ gf_proc_dump_write("quorum-count", "%d", priv->quorum_count);
+ }
+ gf_proc_dump_write("up", "%u", afr_has_quorum(priv->child_up, this, NULL));
+ if (priv->thin_arbiter_count) {
+ gf_proc_dump_write("ta_child_up", "%d", priv->ta_child_up);
+ gf_proc_dump_write("ta_bad_child_index", "%d",
+ priv->ta_bad_child_index);
+ gf_proc_dump_write("ta_notify_dom_lock_offset", "%" PRId64,
+ priv->ta_notify_dom_lock_offset);
+ }
+
+ return 0;
+}
+/**
+ * find_child_index - find the child's index in the array of subvolumes
+ * @this: AFR
+ * @child: child
+ */
-int32_t
-afr_fentrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+static int
+afr_find_child_index(xlator_t *this, xlator_t *child)
+{
+ afr_private_t *priv = NULL;
+ int child_count = -1;
+ int i = -1;
+
+ priv = this->private;
+ child_count = priv->child_count;
+ if (priv->thin_arbiter_count) {
+ child_count++;
+ }
+
+ for (i = 0; i < child_count; i++) {
+ if ((xlator_t *)child == priv->children[i])
+ break;
+ }
+
+ return i;
+}
+int
+__afr_get_up_children_count(afr_private_t *priv)
{
- afr_local_t *local = NULL;
- int call_count = -1;
+ int up_children = 0;
+ int i = 0;
- local = frame->local;
+ for (i = 0; i < priv->child_count; i++)
+ if (priv->child_up[i] == 1)
+ up_children++;
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
+ return up_children;
+}
- local->op_errno = op_errno;
+static int
+__get_heard_from_all_status(xlator_t *this)
+{
+ afr_private_t *priv = this->private;
+ int i;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!priv->last_event[i]) {
+ return 0;
}
- UNLOCK (&frame->lock);
+ }
+ if (priv->thin_arbiter_count && !priv->ta_child_up) {
+ return 0;
+ }
+ return 1;
+}
- call_count = afr_frame_return (frame);
+glusterfs_event_t
+__afr_transform_event_from_state(xlator_t *this)
+{
+ int i = 0;
+ int up_children = 0;
+ afr_private_t *priv = this->private;
+
+ if (__get_heard_from_all_status(this))
+ /* have_heard_from_all. Let afr_notify() do the propagation. */
+ return GF_EVENT_MAXVAL;
+
+ up_children = __afr_get_up_children_count(priv);
+ /* Treat the children with pending notification, as having sent a
+ * GF_EVENT_CHILD_DOWN. i.e. set the event as GF_EVENT_SOME_DESCENDENT_DOWN,
+ * as done in afr_notify() */
+ for (i = 0; i < priv->child_count; i++) {
+ if (priv->last_event[i])
+ continue;
+ priv->last_event[i] = GF_EVENT_SOME_DESCENDENT_DOWN;
+ priv->child_up[i] = 0;
+ }
+
+ if (up_children)
+ /* We received at least one child up */
+ return GF_EVENT_CHILD_UP;
+ else
+ return GF_EVENT_CHILD_DOWN;
+
+ return GF_EVENT_MAXVAL;
+}
- if (call_count == 0)
- AFR_STACK_UNWIND (fentrylk, frame, local->op_ret,
- local->op_errno);
+static void
+afr_notify_cbk(void *data)
+{
+ xlator_t *this = data;
+ afr_private_t *priv = this->private;
+ glusterfs_event_t event = GF_EVENT_MAXVAL;
+ gf_boolean_t propagate = _gf_false;
+
+ LOCK(&priv->lock);
+ {
+ if (!priv->timer) {
+ /*
+ * Either child_up/child_down is already sent to parent.
+ * This is a spurious wake up.
+ */
+ goto unlock;
+ }
+ priv->timer = NULL;
+ event = __afr_transform_event_from_state(this);
+ if (event != GF_EVENT_MAXVAL)
+ propagate = _gf_true;
+ }
+unlock:
+ UNLOCK(&priv->lock);
+ if (propagate)
+ default_notify(this, event, NULL);
+}
- return 0;
+static void
+__afr_launch_notify_timer(xlator_t *this, afr_private_t *priv)
+{
+ struct timespec delay = {
+ 0,
+ };
+
+ gf_msg_debug(this->name, 0, "Initiating child-down timer");
+ delay.tv_sec = 10;
+ delay.tv_nsec = 0;
+ priv->timer = gf_timer_call_after(this->ctx, delay, afr_notify_cbk, this);
+ if (priv->timer == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_TIMER_CREATE_FAIL,
+ "Cannot create timer for delayed initialization");
+ }
}
+static int
+find_best_down_child(xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int i = -1;
+ int32_t best_child = -1;
+ int64_t best_latency = INT64_MAX;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!priv->child_up[i] && priv->child_latency[i] >= 0 &&
+ priv->child_latency[i] < best_latency) {
+ best_child = i;
+ best_latency = priv->child_latency[i];
+ }
+ }
+ if (best_child >= 0) {
+ gf_msg_debug(this->name, 0,
+ "Found best down child (%d) @ %" PRId64 " ms latency",
+ best_child, best_latency);
+ }
+ return best_child;
+}
-int32_t
-afr_fentrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd,
- const char *basename, entrylk_cmd cmd, entrylk_type type)
+int
+find_worst_up_child(xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int i = -1;
+ int32_t worst_child = -1;
+ int64_t worst_latency = INT64_MIN;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (priv->child_up[i] && priv->child_latency[i] >= 0 &&
+ priv->child_latency[i] > worst_latency) {
+ worst_child = i;
+ worst_latency = priv->child_latency[i];
+ }
+ }
+ if (worst_child >= 0) {
+ gf_msg_debug(this->name, 0,
+ "Found worst up child (%d) @ %" PRId64 " ms latency",
+ worst_child, worst_latency);
+ }
+ return worst_child;
+}
+
+void
+__afr_handle_ping_event(xlator_t *this, xlator_t *child_xlator, const int idx,
+ int64_t halo_max_latency_msec, int32_t *event,
+ int64_t child_latency_msec)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int ret = -1;
- int i = 0;
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
+ afr_private_t *priv = NULL;
+ int up_children = 0;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ priv = this->private;
- priv = this->private;
+ priv->child_latency[idx] = child_latency_msec;
+ gf_msg_debug(child_xlator->name, 0, "Client ping @ %" PRId64 " ms",
+ child_latency_msec);
+ if (priv->shd.iamshd)
+ return;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ up_children = __afr_get_up_children_count(priv);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
+ if (child_latency_msec > halo_max_latency_msec &&
+ priv->child_up[idx] == 1 && up_children > priv->halo_min_replicas) {
+ if ((up_children - 1) < priv->halo_min_replicas) {
+ gf_log(child_xlator->name, GF_LOG_INFO,
+ "Overriding halo threshold, "
+ "min replicas: %d",
+ priv->halo_min_replicas);
+ } else {
+ gf_log(child_xlator->name, GF_LOG_INFO,
+ "Child latency (%" PRId64
+ " ms) "
+ "exceeds halo threshold (%" PRId64
+ "), "
+ "marking child down.",
+ child_latency_msec, halo_max_latency_msec);
+ if (priv->halo_child_up[idx]) {
+ *event = GF_EVENT_CHILD_DOWN;
+ }
+ }
+ } else if (child_latency_msec < halo_max_latency_msec &&
+ priv->child_up[idx] == 0) {
+ if (up_children < priv->halo_max_replicas) {
+ gf_log(child_xlator->name, GF_LOG_INFO,
+ "Child latency (%" PRId64
+ " ms) "
+ "below halo threshold (%" PRId64
+ "), "
+ "marking child up.",
+ child_latency_msec, halo_max_latency_msec);
+ if (priv->halo_child_up[idx]) {
+ *event = GF_EVENT_CHILD_UP;
+ }
+ } else {
+ gf_log(child_xlator->name, GF_LOG_INFO,
+ "Not marking child %d up, "
+ "max replicas (%d) reached.",
+ idx, priv->halo_max_replicas);
}
+ }
+}
- call_count = local->call_count;
- frame->local = local;
+static int64_t
+afr_get_halo_latency(xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int64_t halo_max_latency_msec = 0;
+
+ priv = this->private;
+
+ if (priv->shd.iamshd) {
+ halo_max_latency_msec = priv->shd.halo_max_latency_msec;
+ } else if (priv->nfsd.iamnfsd) {
+ halo_max_latency_msec = priv->nfsd.halo_max_latency_msec;
+ } else {
+ halo_max_latency_msec = priv->halo_max_latency_msec;
+ }
+ gf_msg_debug(this->name, 0, "Using halo latency %" PRId64,
+ halo_max_latency_msec);
+ return halo_max_latency_msec;
+}
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_fentrylk_cbk,
- priv->children[i],
- priv->children[i]->fops->fentrylk,
- volume, fd, basename, cmd, type);
-
- if (!--call_count)
- break;
- }
+void
+__afr_handle_child_up_event(xlator_t *this, xlator_t *child_xlator,
+ const int idx, int64_t child_latency_msec,
+ int32_t *event, int32_t *call_psh,
+ int32_t *up_child)
+{
+ afr_private_t *priv = NULL;
+ int up_children = 0;
+ int worst_up_child = -1;
+ int64_t halo_max_latency_msec = afr_get_halo_latency(this);
+
+ priv = this->private;
+
+ /*
+ * This only really counts if the child was never up
+ * (value = -1) or had been down (value = 0). See
+ * comment at GF_EVENT_CHILD_DOWN for a more detailed
+ * explanation.
+ */
+ if (priv->child_up[idx] != 1) {
+ priv->event_generation++;
+ }
+ priv->child_up[idx] = 1;
+
+ *call_psh = 1;
+ *up_child = idx;
+ up_children = __afr_get_up_children_count(priv);
+ /*
+ * If this is an _actual_ CHILD_UP event, we
+ * want to set the child_latency to MAX to indicate
+ * the child needs ping data to be available before doing child-up
+ */
+ if (!priv->halo_enabled)
+ goto out;
+
+ if (child_latency_msec < 0) {
+ /*set to INT64_MAX-1 so that it is found for best_down_child*/
+ priv->halo_child_up[idx] = 1;
+ if (priv->child_latency[idx] < 0) {
+ priv->child_latency[idx] = AFR_HALO_MAX_LATENCY;
}
+ }
+
+ /*
+ * Handle the edge case where we exceed
+ * halo_min_replicas and we've got a child which is
+ * marked up as it was helping to satisfy the
+ * halo_min_replicas even though it's latency exceeds
+ * halo_max_latency_msec.
+ */
+ if (up_children > priv->halo_min_replicas) {
+ worst_up_child = find_worst_up_child(this);
+ if (worst_up_child >= 0 &&
+ priv->child_latency[worst_up_child] > halo_max_latency_msec) {
+ gf_msg_debug(this->name, 0,
+ "Marking child %d down, "
+ "doesn't meet halo threshold (%" PRId64
+ "), and > "
+ "halo_min_replicas (%d)",
+ worst_up_child, halo_max_latency_msec,
+ priv->halo_min_replicas);
+ priv->child_up[worst_up_child] = 0;
+ up_children--;
+ }
+ }
- op_ret = 0;
+ if (up_children > priv->halo_max_replicas && !priv->shd.iamshd) {
+ worst_up_child = find_worst_up_child(this);
+ if (worst_up_child < 0) {
+ worst_up_child = idx;
+ }
+ priv->child_up[worst_up_child] = 0;
+ up_children--;
+ gf_msg_debug(this->name, 0,
+ "Marking child %d down, "
+ "up_children (%d) > halo_max_replicas (%d)",
+ worst_up_child, up_children, priv->halo_max_replicas);
+ }
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (fentrylk, frame, op_ret, op_errno);
+ if (up_children == 1) {
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_SUBVOL_UP,
+ "Subvolume '%s' came back up; "
+ "going online.",
+ child_xlator->name);
+ gf_event(EVENT_AFR_SUBVOL_UP, "client-pid=%d; subvol=%s",
+ this->ctx->cmd_args.client_pid, this->name);
+ } else {
+ *event = GF_EVENT_SOME_DESCENDENT_UP;
+ }
+
+ priv->last_event[idx] = *event;
+}
+
+void
+__afr_handle_child_down_event(xlator_t *this, xlator_t *child_xlator, int idx,
+ int64_t child_latency_msec, int32_t *event,
+ int32_t *call_psh, int32_t *up_child)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int up_children = 0;
+ int down_children = 0;
+ int best_down_child = -1;
+
+ priv = this->private;
+
+ /*
+ * If a brick is down when we start, we'll get a
+ * CHILD_DOWN to indicate its initial state. There
+ * was never a CHILD_UP in this case, so if we
+ * increment "down_count" the difference between than
+ * and "up_count" will no longer be the number of
+ * children that are currently up. This has serious
+ * implications e.g. for quorum enforcement, so we
+ * don't increment these values unless the event
+ * represents an actual state transition between "up"
+ * (value = 1) and anything else.
+ */
+ if (priv->child_up[idx] == 1) {
+ priv->event_generation++;
+ }
+
+ /*
+ * If this is an _actual_ CHILD_DOWN event, we
+ * want to set the child_latency to < 0 to indicate
+ * the child is really disconnected.
+ */
+ if (child_latency_msec < 0) {
+ priv->child_latency[idx] = child_latency_msec;
+ priv->halo_child_up[idx] = 0;
+ }
+ priv->child_up[idx] = 0;
+
+ up_children = __afr_get_up_children_count(priv);
+ /*
+ * Handle the edge case where we need to find the
+ * next best child (to mark up) as marking this child
+ * down would cause us to fall below halo_min_replicas.
+ * We will also force the SHD to heal this child _now_
+ * as we want it to be up to date if we are going to
+ * begin using it synchronously.
+ */
+ if (priv->halo_enabled && up_children < priv->halo_min_replicas) {
+ best_down_child = find_best_down_child(this);
+ if (best_down_child >= 0) {
+ gf_msg_debug(this->name, 0,
+ "Swapping out child %d for "
+ "child %d to satisfy halo_min_replicas (%d).",
+ idx, best_down_child, priv->halo_min_replicas);
+ priv->child_up[best_down_child] = 1;
+ *call_psh = 1;
+ *up_child = best_down_child;
}
- return 0;
+ }
+ for (i = 0; i < priv->child_count; i++)
+ if (priv->child_up[i] == 0)
+ down_children++;
+ if (down_children == priv->child_count) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SUBVOLS_DOWN,
+ "All subvolumes are down. Going "
+ "offline until at least one of them "
+ "comes back up.");
+ gf_event(EVENT_AFR_SUBVOLS_DOWN, "client-pid=%d; subvol=%s",
+ this->ctx->cmd_args.client_pid, this->name);
+ } else {
+ *event = GF_EVENT_SOME_DESCENDENT_DOWN;
+ }
+ priv->last_event[idx] = *event;
}
-int32_t
-afr_statfs_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct statvfs *statvfs)
+void
+afr_ta_lock_release_synctask(xlator_t *this)
{
- afr_local_t *local = NULL;
- int call_count = 0;
+ call_frame_t *ta_frame = NULL;
+ int ret = 0;
- LOCK (&frame->lock);
- {
- local = frame->local;
-
- if (op_ret == 0) {
- local->op_ret = op_ret;
-
- if (local->cont.statfs.buf_set) {
- if (statvfs->f_bavail < local->cont.statfs.buf.f_bavail)
- local->cont.statfs.buf = *statvfs;
- } else {
- local->cont.statfs.buf = *statvfs;
- local->cont.statfs.buf_set = 1;
- }
- }
+ ta_frame = afr_ta_frame_create(this);
+ if (!ta_frame) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_THIN_ARB,
+ "Failed to create ta_frame");
+ return;
+ }
+
+ ret = synctask_new(this->ctx->env, afr_release_notify_lock_for_ta,
+ afr_ta_lock_release_done, ta_frame, this);
+ if (ret) {
+ STACK_DESTROY(ta_frame->root);
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_THIN_ARB,
+ "Failed to release "
+ "AFR_TA_DOM_NOTIFY lock.");
+ }
+}
- if (op_ret == -1)
- local->op_errno = op_errno;
+static void
+afr_handle_inodelk_contention(xlator_t *this, struct gf_upcall *upcall)
+{
+ struct gf_upcall_inodelk_contention *lc = NULL;
+ unsigned int inmem_count = 0;
+ unsigned int onwire_count = 0;
+ afr_private_t *priv = this->private;
- }
- UNLOCK (&frame->lock);
+ lc = upcall->data;
- call_count = afr_frame_return (frame);
+ if (strcmp(lc->domain, AFR_TA_DOM_NOTIFY) != 0)
+ return;
- if (call_count == 0)
- AFR_STACK_UNWIND (statfs, frame, local->op_ret, local->op_errno,
- &local->cont.statfs.buf);
+ if (priv->shd.iamshd) {
+ /* shd should ignore AFR_TA_DOM_NOTIFY release requests. */
+ return;
+ }
+ LOCK(&priv->lock);
+ {
+ if (priv->release_ta_notify_dom_lock == _gf_true) {
+ /* Ignore multiple release requests from shds.*/
+ UNLOCK(&priv->lock);
+ return;
+ }
+ priv->release_ta_notify_dom_lock = _gf_true;
+ inmem_count = priv->ta_in_mem_txn_count;
+ onwire_count = priv->ta_on_wire_txn_count;
+ }
+ UNLOCK(&priv->lock);
+ if (inmem_count || onwire_count)
+ /* lock release will happen in txn code path after
+ * in-memory or on-wire txns are over.*/
+ return;
- return 0;
+ afr_ta_lock_release_synctask(this);
}
+static void
+afr_handle_upcall_event(xlator_t *this, struct gf_upcall *upcall)
+{
+ struct gf_upcall_cache_invalidation *up_ci = NULL;
+ afr_private_t *priv = this->private;
+ inode_t *inode = NULL;
+ inode_table_t *itable = NULL;
+ int i = 0;
+
+ switch (upcall->event_type) {
+ case GF_UPCALL_INODELK_CONTENTION:
+ afr_handle_inodelk_contention(this, upcall);
+ break;
+ case GF_UPCALL_CACHE_INVALIDATION:
+ up_ci = (struct gf_upcall_cache_invalidation *)upcall->data;
+
+ /* Since md-cache will be aggressively filtering
+ * lookups, the stale read issue will be more
+ * pronounced. Hence when a pending xattr is set notify
+ * all the md-cache clients to invalidate the existing
+ * stat cache and send the lookup next time */
+ if (!up_ci->dict)
+ break;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!dict_get(up_ci->dict, priv->pending_key[i]))
+ continue;
+ up_ci->flags |= UP_INVAL_ATTR;
+ itable = ((xlator_t *)this->graph->top)->itable;
+ /*Internal processes may not have itable for
+ *top xlator*/
+ if (itable)
+ inode = inode_find(itable, upcall->gfid);
+ if (inode)
+ afr_inode_need_refresh_set(inode, this);
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+}
int32_t
-afr_statfs (call_frame_t *frame, xlator_t *this,
- loc_t *loc)
+afr_notify(xlator_t *this, int32_t event, void *data, void *data2)
{
- afr_private_t * priv = NULL;
- int child_count = 0;
- afr_local_t * local = NULL;
- int i = 0;
- int ret = -1;
- int call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
+ afr_private_t *priv = NULL;
+ xlator_t *child_xlator = NULL;
+ int i = -1;
+ int propagate = 0;
+ int had_heard_from_all = 0;
+ int have_heard_from_all = 0;
+ int idx = -1;
+ int ret = -1;
+ int call_psh = 0;
+ int up_child = -1;
+ dict_t *input = NULL;
+ dict_t *output = NULL;
+ gf_boolean_t had_quorum = _gf_false;
+ gf_boolean_t has_quorum = _gf_false;
+ int64_t halo_max_latency_msec = 0;
+ int64_t child_latency_msec = -1;
+
+ child_xlator = (xlator_t *)data;
+
+ priv = this->private;
+
+ if (!priv)
+ return 0;
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (loc, out);
+ /*
+ * We need to reset this in case children come up in "staggered"
+ * fashion, so that we discover a late-arriving local subvolume. Note
+ * that we could end up issuing N lookups to the first subvolume, and
+ * O(N^2) overall, but N is small for AFR so it shouldn't be an issue.
+ */
+ priv->did_discovery = _gf_false;
+
+ /* parent xlators don't need to know about every child_up, child_down
+ * because of afr ha. If all subvolumes go down, child_down has
+ * to be triggered. In that state when 1 subvolume comes up child_up
+ * needs to be triggered. dht optimizes revalidate lookup by sending
+ * it only to one of its subvolumes. When child up/down happens
+ * for afr's subvolumes dht should be notified by child_modified. The
+ * subsequent revalidate lookup happens on all the dht's subvolumes
+ * which triggers afr self-heals if any.
+ */
+ idx = afr_find_child_index(this, child_xlator);
+ if (idx < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_CHILD_UP,
+ "Received child_up from invalid subvolume");
+ goto out;
+ }
+
+ had_quorum = priv->quorum_count &&
+ afr_has_quorum(priv->child_up, this, NULL);
+ if (event == GF_EVENT_CHILD_PING) {
+ child_latency_msec = (int64_t)(uintptr_t)data2;
+ if (priv->halo_enabled) {
+ halo_max_latency_msec = afr_get_halo_latency(this);
+
+ /* Calculates the child latency and sets event
+ */
+ LOCK(&priv->lock);
+ {
+ __afr_handle_ping_event(this, child_xlator, idx,
+ halo_max_latency_msec, &event,
+ child_latency_msec);
+ }
+ UNLOCK(&priv->lock);
+ } else {
+ LOCK(&priv->lock);
+ {
+ priv->child_latency[idx] = child_latency_msec;
+ }
+ UNLOCK(&priv->lock);
+ }
+ }
- priv = this->private;
- child_count = priv->child_count;
+ if (event == GF_EVENT_CHILD_PING) {
+ /* This is the only xlator that handles PING, no reason to
+ * propagate.
+ */
+ goto out;
+ }
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ if (event == GF_EVENT_TRANSLATOR_OP) {
+ LOCK(&priv->lock);
+ {
+ had_heard_from_all = __get_heard_from_all_status(this);
+ }
+ UNLOCK(&priv->lock);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
+ if (!had_heard_from_all) {
+ ret = -1;
+ } else {
+ input = data;
+ output = data2;
+ ret = afr_xl_op(this, input, output);
}
+ goto out;
+ }
- frame->local = local;
- call_count = local->call_count;
+ if (event == GF_EVENT_UPCALL) {
+ afr_handle_upcall_event(this, data);
+ }
- for (i = 0; i < child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_statfs_cbk,
- priv->children[i],
- priv->children[i]->fops->statfs,
- loc);
- if (!--call_count)
- break;
+ LOCK(&priv->lock);
+ {
+ had_heard_from_all = __get_heard_from_all_status(this);
+ switch (event) {
+ case GF_EVENT_PARENT_UP:
+ __afr_launch_notify_timer(this, priv);
+ propagate = 1;
+ break;
+ case GF_EVENT_CHILD_UP:
+ if (priv->thin_arbiter_count &&
+ (idx == AFR_CHILD_THIN_ARBITER)) {
+ priv->ta_child_up = 1;
+ priv->ta_event_gen++;
+ break;
}
+ __afr_handle_child_up_event(this, child_xlator, idx,
+ child_latency_msec, &event,
+ &call_psh, &up_child);
+ __afr_lock_heal_synctask(this, priv, idx);
+ break;
+
+ case GF_EVENT_CHILD_DOWN:
+ if (priv->thin_arbiter_count &&
+ (idx == AFR_CHILD_THIN_ARBITER)) {
+ priv->ta_child_up = 0;
+ priv->ta_event_gen++;
+ afr_ta_locked_priv_invalidate(priv);
+ break;
+ }
+ __afr_handle_child_down_event(this, child_xlator, idx,
+ child_latency_msec, &event,
+ &call_psh, &up_child);
+ __afr_mark_pending_lk_heal(this, priv, idx);
+ break;
+
+ case GF_EVENT_CHILD_CONNECTING:
+ priv->last_event[idx] = event;
+
+ break;
+
+ case GF_EVENT_SOME_DESCENDENT_DOWN:
+ priv->last_event[idx] = event;
+ break;
+ default:
+ propagate = 1;
+ break;
}
+ have_heard_from_all = __get_heard_from_all_status(this);
+ if (!had_heard_from_all && have_heard_from_all) {
+ if (priv->timer) {
+ gf_timer_call_cancel(this->ctx, priv->timer);
+ priv->timer = NULL;
+ }
+ /* This is the first event which completes aggregation
+ of events from all subvolumes. If at least one subvol
+ had come up, propagate CHILD_UP, but only this time
+ */
+ event = GF_EVENT_CHILD_DOWN;
+ for (i = 0; i < priv->child_count; i++) {
+ if (priv->last_event[i] == GF_EVENT_CHILD_UP) {
+ event = GF_EVENT_CHILD_UP;
+ break;
+ }
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (statfs, frame, op_ret, op_errno, NULL);
+ if (priv->last_event[i] == GF_EVENT_CHILD_CONNECTING) {
+ event = GF_EVENT_CHILD_CONNECTING;
+ /* continue to check other events for CHILD_UP */
+ }
+ }
}
- return 0;
+ }
+ UNLOCK(&priv->lock);
+
+ if (priv->quorum_count) {
+ has_quorum = afr_has_quorum(priv->child_up, this, NULL);
+ if (!had_quorum && has_quorum) {
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_QUORUM_MET,
+ "Client-quorum is met");
+ gf_event(EVENT_AFR_QUORUM_MET, "client-pid=%d; subvol=%s",
+ this->ctx->cmd_args.client_pid, this->name);
+ }
+ if (had_quorum && !has_quorum) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_QUORUM_FAIL,
+ "Client-quorum is not met");
+ gf_event(EVENT_AFR_QUORUM_FAIL, "client-pid=%d; subvol=%s",
+ this->ctx->cmd_args.client_pid, this->name);
+ }
+ }
+
+ /* if all subvols have reported status, no need to hide anything
+ or wait for anything else. Just propagate blindly */
+ if (have_heard_from_all)
+ propagate = 1;
+
+ ret = 0;
+ if (propagate)
+ ret = default_notify(this, event, data);
+
+ if ((!had_heard_from_all) || call_psh) {
+ /* Launch self-heal on all local subvolumes if:
+ * a) We have_heard_from_all for the first time
+ * b) Already heard from everyone, but we now got a child-up
+ * event.
+ */
+ if (have_heard_from_all) {
+ afr_selfheal_childup(this, priv);
+ }
+ }
+out:
+ return ret;
}
+int
+afr_local_init(afr_local_t *local, afr_private_t *priv, int32_t *op_errno)
+{
+ int __ret = -1;
+ local->op_ret = -1;
+ local->op_errno = EUCLEAN;
+
+ __ret = syncbarrier_init(&local->barrier);
+ if (__ret) {
+ if (op_errno)
+ *op_errno = __ret;
+ goto out;
+ }
+
+ local->child_up = GF_MALLOC(priv->child_count * sizeof(*local->child_up),
+ gf_afr_mt_char);
+ if (!local->child_up) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ memcpy(local->child_up, priv->child_up,
+ sizeof(*local->child_up) * priv->child_count);
+ local->call_count = AFR_COUNT(local->child_up, priv->child_count);
+ if (local->call_count == 0) {
+ gf_msg(THIS->name, GF_LOG_INFO, 0, AFR_MSG_SUBVOLS_DOWN,
+ "no subvolumes up");
+ if (op_errno)
+ *op_errno = ENOTCONN;
+ goto out;
+ }
+
+ local->event_generation = priv->event_generation;
+
+ local->read_attempted = GF_CALLOC(priv->child_count, sizeof(char),
+ gf_afr_mt_char);
+ if (!local->read_attempted) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->readable = GF_CALLOC(priv->child_count, sizeof(char),
+ gf_afr_mt_char);
+ if (!local->readable) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->readable2 = GF_CALLOC(priv->child_count, sizeof(char),
+ gf_afr_mt_char);
+ if (!local->readable2) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->read_subvol = -1;
+
+ local->replies = GF_CALLOC(priv->child_count, sizeof(*local->replies),
+ gf_afr_mt_reply_t);
+ if (!local->replies) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->need_full_crawl = _gf_false;
+ if (priv->thin_arbiter_count) {
+ local->ta_child_up = priv->ta_child_up;
+ local->ta_failed_subvol = AFR_CHILD_UNKNOWN;
+ local->read_txn_query_child = AFR_CHILD_UNKNOWN;
+ local->ta_event_gen = priv->ta_event_gen;
+ local->fop_state = TA_SUCCESS;
+ }
+ local->is_new_entry = _gf_false;
+
+ INIT_LIST_HEAD(&local->healer);
+ return 0;
+out:
+ return -1;
+}
-int32_t
-afr_lk_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock)
+int
+afr_internal_lock_init(afr_internal_lock_t *lk, size_t child_count)
{
- afr_local_t * local = NULL;
- int call_count = -1;
+ int ret = -ENOMEM;
- local = frame->local;
- call_count = afr_frame_return (frame);
+ lk->lower_locked_nodes = GF_CALLOC(sizeof(*lk->lower_locked_nodes),
+ child_count, gf_afr_mt_char);
+ if (NULL == lk->lower_locked_nodes)
+ goto out;
- if (call_count == 0)
- AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
- lock);
+ lk->lock_op_ret = -1;
+ lk->lock_op_errno = EUCLEAN;
- return 0;
+ ret = 0;
+out:
+ return ret;
}
+void
+afr_matrix_cleanup(int32_t **matrix, unsigned int m)
+{
+ int i = 0;
-int32_t
-afr_lk_unlock (call_frame_t *frame, xlator_t *this)
+ if (!matrix)
+ goto out;
+ for (i = 0; i < m; i++) {
+ GF_FREE(matrix[i]);
+ }
+
+ GF_FREE(matrix);
+out:
+ return;
+}
+
+int32_t **
+afr_matrix_create(unsigned int m, unsigned int n)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int i = 0;
- int call_count = 0;
+ int32_t **matrix = NULL;
+ int i = 0;
+
+ matrix = GF_CALLOC(sizeof(*matrix), m, gf_afr_mt_int32_t);
+ if (!matrix)
+ goto out;
+
+ for (i = 0; i < m; i++) {
+ matrix[i] = GF_CALLOC(sizeof(*matrix[i]), n, gf_afr_mt_int32_t);
+ if (!matrix[i])
+ goto out;
+ }
+ return matrix;
+out:
+ afr_matrix_cleanup(matrix, m);
+ return NULL;
+}
- local = frame->local;
- priv = this->private;
+int
+afr_transaction_local_init(afr_local_t *local, xlator_t *this)
+{
+ int ret = -ENOMEM;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ INIT_LIST_HEAD(&local->transaction.wait_list);
+ INIT_LIST_HEAD(&local->transaction.owner_list);
+ INIT_LIST_HEAD(&local->ta_waitq);
+ INIT_LIST_HEAD(&local->ta_onwireq);
+ ret = afr_internal_lock_init(&local->internal_lock, priv->child_count);
+ if (ret < 0)
+ goto out;
+
+ ret = -ENOMEM;
+ local->pre_op_compat = priv->pre_op_compat;
+
+ local->transaction.pre_op = GF_CALLOC(sizeof(*local->transaction.pre_op),
+ priv->child_count, gf_afr_mt_char);
+ if (!local->transaction.pre_op)
+ goto out;
+
+ local->transaction.changelog_xdata = GF_CALLOC(
+ sizeof(*local->transaction.changelog_xdata), priv->child_count,
+ gf_afr_mt_dict_t);
+ if (!local->transaction.changelog_xdata)
+ goto out;
+
+ if (priv->arbiter_count == 1) {
+ local->transaction.pre_op_sources = GF_CALLOC(
+ sizeof(*local->transaction.pre_op_sources), priv->child_count,
+ gf_afr_mt_char);
+ if (!local->transaction.pre_op_sources)
+ goto out;
+ }
+
+ local->transaction.failed_subvols = GF_CALLOC(
+ sizeof(*local->transaction.failed_subvols), priv->child_count,
+ gf_afr_mt_char);
+ if (!local->transaction.failed_subvols)
+ goto out;
+
+ local->pending = afr_matrix_create(priv->child_count, AFR_NUM_CHANGE_LOGS);
+ if (!local->pending)
+ goto out;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+void
+afr_set_low_priority(call_frame_t *frame)
+{
+ frame->root->pid = LOW_PRIO_PROC_PID;
+}
+
+void
+afr_priv_destroy(afr_private_t *priv)
+{
+ int i = 0;
+ int child_count = -1;
+
+ if (!priv)
+ goto out;
+
+ GF_FREE(priv->sh_domain);
+ GF_FREE(priv->last_event);
+
+ child_count = priv->child_count;
+ if (priv->thin_arbiter_count) {
+ child_count++;
+ }
+ if (priv->pending_key) {
+ for (i = 0; i < child_count; i++)
+ GF_FREE(priv->pending_key[i]);
+ }
+
+ GF_FREE(priv->pending_reads);
+ GF_FREE(priv->local);
+ GF_FREE(priv->pending_key);
+ GF_FREE(priv->children);
+ GF_FREE(priv->anon_inode);
+ GF_FREE(priv->child_up);
+ GF_FREE(priv->halo_child_up);
+ GF_FREE(priv->child_latency);
+ LOCK_DESTROY(&priv->lock);
+
+ GF_FREE(priv);
+out:
+ return;
+}
+
+int **
+afr_mark_pending_changelog(afr_private_t *priv, unsigned char *pending,
+ dict_t *xattr, ia_type_t iat)
+{
+ int i = 0;
+ int **changelog = NULL;
+ int idx = -1;
+ int m_idx = 0;
+ int d_idx = 0;
+ int ret = 0;
+
+ m_idx = afr_index_for_transaction_type(AFR_METADATA_TRANSACTION);
+ d_idx = afr_index_for_transaction_type(AFR_DATA_TRANSACTION);
+
+ idx = afr_index_from_ia_type(iat);
+
+ changelog = afr_matrix_create(priv->child_count, AFR_NUM_CHANGE_LOGS);
+ if (!changelog)
+ goto out;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!pending[i])
+ continue;
+
+ changelog[i][m_idx] = hton32(1);
+ if (idx != -1)
+ changelog[i][idx] = hton32(1);
+ /* If the newentry marking is on a newly created directory,
+ * then mark it with the full-heal indicator.
+ */
+ if ((IA_ISDIR(iat)) && (priv->esh_granular))
+ changelog[i][d_idx] = hton32(1);
+ }
+ ret = afr_set_pending_dict(priv, xattr, changelog);
+ if (ret < 0) {
+ afr_matrix_cleanup(changelog, priv->child_count);
+ return NULL;
+ }
+out:
+ return changelog;
+}
+
+static dict_t *
+afr_set_heal_info(char *status)
+{
+ dict_t *dict = NULL;
+ int ret = -1;
+
+ dict = dict_new();
+ if (!dict) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = dict_set_dynstr_sizen(dict, "heal-info", status);
+ if (ret)
+ gf_msg("", GF_LOG_WARNING, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Failed to set heal-info key to "
+ "%s",
+ status);
+out:
+ /* Any error other than EINVAL, dict_set_dynstr frees status */
+ if (ret == -ENOMEM || ret == -EINVAL) {
+ GF_FREE(status);
+ }
+
+ if (ret && dict) {
+ dict_unref(dict);
+ dict = NULL;
+ }
+ return dict;
+}
+
+static gf_boolean_t
+afr_is_dirty_count_non_unary_for_txn(xlator_t *this, struct afr_reply *replies,
+ afr_transaction_type type)
+{
+ afr_private_t *priv = this->private;
+ int *dirty = alloca0(priv->child_count * sizeof(int));
+ int i = 0;
- call_count = afr_locked_nodes_count (local->cont.lk.locked_nodes,
- priv->child_count);
+ afr_selfheal_extract_xattr(this, replies, type, dirty, NULL);
+ for (i = 0; i < priv->child_count; i++) {
+ if (dirty[i] > 1)
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
+
+static gf_boolean_t
+afr_is_dirty_count_non_unary(xlator_t *this, struct afr_reply *replies,
+ ia_type_t ia_type)
+{
+ gf_boolean_t data_chk = _gf_false;
+ gf_boolean_t mdata_chk = _gf_false;
+ gf_boolean_t entry_chk = _gf_false;
+
+ switch (ia_type) {
+ case IA_IFDIR:
+ mdata_chk = _gf_true;
+ entry_chk = _gf_true;
+ break;
+ case IA_IFREG:
+ mdata_chk = _gf_true;
+ data_chk = _gf_true;
+ break;
+ default:
+ /*IA_IFBLK, IA_IFCHR, IA_IFLNK, IA_IFIFO, IA_IFSOCK*/
+ mdata_chk = _gf_true;
+ break;
+ }
+
+ if (data_chk && afr_is_dirty_count_non_unary_for_txn(
+ this, replies, AFR_DATA_TRANSACTION)) {
+ return _gf_true;
+ } else if (mdata_chk && afr_is_dirty_count_non_unary_for_txn(
+ this, replies, AFR_METADATA_TRANSACTION)) {
+ return _gf_true;
+ } else if (entry_chk && afr_is_dirty_count_non_unary_for_txn(
+ this, replies, AFR_ENTRY_TRANSACTION)) {
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
- if (call_count == 0) {
- AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
- &local->cont.lk.ret_flock);
- return 0;
+static int
+afr_update_heal_status(xlator_t *this, struct afr_reply *replies,
+ ia_type_t ia_type, gf_boolean_t *esh, gf_boolean_t *dsh,
+ gf_boolean_t *msh, unsigned char pending)
+{
+ int ret = -1;
+ GF_UNUSED int ret1 = 0;
+ int i = 0;
+ int io_domain_lk_count = 0;
+ int shd_domain_lk_count = 0;
+ afr_private_t *priv = NULL;
+ char *key1 = NULL;
+ char *key2 = NULL;
+
+ priv = this->private;
+ key1 = alloca0(strlen(GLUSTERFS_INODELK_DOM_PREFIX) + 2 +
+ strlen(this->name));
+ key2 = alloca0(strlen(GLUSTERFS_INODELK_DOM_PREFIX) + 2 +
+ strlen(priv->sh_domain));
+ sprintf(key1, "%s:%s", GLUSTERFS_INODELK_DOM_PREFIX, this->name);
+ sprintf(key2, "%s:%s", GLUSTERFS_INODELK_DOM_PREFIX, priv->sh_domain);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if ((replies[i].valid != 1) || (replies[i].op_ret != 0))
+ continue;
+ if (!io_domain_lk_count) {
+ ret1 = dict_get_int32(replies[i].xdata, key1, &io_domain_lk_count);
+ }
+ if (!shd_domain_lk_count) {
+ ret1 = dict_get_int32(replies[i].xdata, key2, &shd_domain_lk_count);
}
+ }
- local->call_count = call_count;
+ if (!pending) {
+ if ((afr_is_dirty_count_non_unary(this, replies, ia_type)) ||
+ (!io_domain_lk_count)) {
+ /* Needs heal. */
+ ret = 0;
+ } else {
+ /* No heal needed. */
+ *dsh = *esh = *msh = 0;
+ }
+ } else {
+ if (shd_domain_lk_count) {
+ ret = -EAGAIN; /*For 'possibly-healing'. */
+ } else {
+ ret = 0; /*needs heal. Just set a non -ve value so that it is
+ assumed as the source index.*/
+ }
+ }
+ return ret;
+}
- local->cont.lk.user_flock.l_type = F_UNLCK;
+/*return EIO, EAGAIN or pending*/
+int
+afr_lockless_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid,
+ inode_t **inode, gf_boolean_t *entry_selfheal,
+ gf_boolean_t *data_selfheal,
+ gf_boolean_t *metadata_selfheal, unsigned char *pending)
+{
+ int ret = -1;
+ int i = 0;
+ afr_private_t *priv = NULL;
+ struct afr_reply *replies = NULL;
+ gf_boolean_t dsh = _gf_false;
+ gf_boolean_t msh = _gf_false;
+ gf_boolean_t esh = _gf_false;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *valid_on = NULL;
+ uint64_t *witness = NULL;
+
+ priv = this->private;
+ replies = alloca0(sizeof(*replies) * priv->child_count);
+ sources = alloca0(sizeof(*sources) * priv->child_count);
+ sinks = alloca0(sizeof(*sinks) * priv->child_count);
+ witness = alloca0(sizeof(*witness) * priv->child_count);
+ valid_on = alloca0(sizeof(*valid_on) * priv->child_count);
+
+ ret = afr_selfheal_unlocked_inspect(frame, this, gfid, inode, &dsh, &msh,
+ &esh, replies);
+ if (ret)
+ goto out;
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies[i].valid && replies[i].op_ret == 0) {
+ valid_on[i] = 1;
+ }
+ }
+ if (msh) {
+ ret = afr_selfheal_find_direction(frame, this, replies,
+ AFR_METADATA_TRANSACTION, valid_on,
+ sources, sinks, witness, pending);
+ if (*pending & PFLAG_SBRAIN)
+ ret = -EIO;
+ if (ret)
+ goto out;
+ }
+ if (dsh) {
+ ret = afr_selfheal_find_direction(frame, this, replies,
+ AFR_DATA_TRANSACTION, valid_on,
+ sources, sinks, witness, pending);
+ if (*pending & PFLAG_SBRAIN)
+ ret = -EIO;
+ if (ret)
+ goto out;
+ }
+ if (esh) {
+ ret = afr_selfheal_find_direction(frame, this, replies,
+ AFR_ENTRY_TRANSACTION, valid_on,
+ sources, sinks, witness, pending);
+ if (*pending & PFLAG_SBRAIN)
+ ret = -EIO;
+ if (ret)
+ goto out;
+ }
- for (i = 0; i < priv->child_count; i++) {
- if (local->cont.lk.locked_nodes[i]) {
- STACK_WIND (frame, afr_lk_unlock_cbk,
- priv->children[i],
- priv->children[i]->fops->lk,
- local->fd, F_SETLK,
- &local->cont.lk.user_flock);
-
- if (!--call_count)
- break;
- }
+ ret = afr_update_heal_status(this, replies, (*inode)->ia_type, &esh, &dsh,
+ &msh, *pending);
+out:
+ *data_selfheal = dsh;
+ *entry_selfheal = esh;
+ *metadata_selfheal = msh;
+ if (replies)
+ afr_replies_wipe(replies, priv->child_count);
+ return ret;
+}
+
+int
+afr_get_heal_info(call_frame_t *frame, xlator_t *this, loc_t *loc)
+{
+ gf_boolean_t data_selfheal = _gf_false;
+ gf_boolean_t metadata_selfheal = _gf_false;
+ gf_boolean_t entry_selfheal = _gf_false;
+ unsigned char pending = 0;
+ dict_t *dict = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+ inode_t *inode = NULL;
+ char *substr = NULL;
+ char *status = NULL;
+ call_frame_t *heal_frame = NULL;
+ afr_local_t *heal_local = NULL;
+
+ /*Use frame with lk-owner set*/
+ heal_frame = afr_frame_create(frame->this, &op_errno);
+ if (!heal_frame) {
+ ret = -1;
+ goto out;
+ }
+ heal_local = heal_frame->local;
+ heal_frame->local = frame->local;
+
+ ret = afr_lockless_inspect(heal_frame, this, loc->gfid, &inode,
+ &entry_selfheal, &data_selfheal,
+ &metadata_selfheal, &pending);
+
+ if (ret == -ENOMEM) {
+ ret = -1;
+ goto out;
+ }
+
+ if (pending & PFLAG_PENDING) {
+ gf_asprintf(&substr, "-pending");
+ if (!substr)
+ goto out;
+ }
+
+ if (ret == -EIO) {
+ ret = gf_asprintf(&status, "split-brain%s", substr ? substr : "");
+ if (ret < 0) {
+ goto out;
+ }
+ dict = afr_set_heal_info(status);
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+ } else if (ret == -EAGAIN) {
+ ret = gf_asprintf(&status, "possibly-healing%s", substr ? substr : "");
+ if (ret < 0) {
+ goto out;
+ }
+ dict = afr_set_heal_info(status);
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+ } else if (ret >= 0) {
+ /* value of ret = source index
+ * so ret >= 0 and at least one of the 3 booleans set to
+ * true means a source is identified; heal is required.
+ */
+ if (!data_selfheal && !entry_selfheal && !metadata_selfheal) {
+ status = gf_strdup("no-heal");
+ if (!status) {
+ ret = -1;
+ goto out;
+ }
+ dict = afr_set_heal_info(status);
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+ } else {
+ ret = gf_asprintf(&status, "heal%s", substr ? substr : "");
+ if (ret < 0) {
+ goto out;
+ }
+ dict = afr_set_heal_info(status);
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+ }
+ } else if (ret < 0) {
+ /* Apart from above checked -ve ret values, there are
+ * other possible ret values like ENOTCONN
+ * (returned when number of valid replies received are
+ * less than 2)
+ * in which case heal is required when one of the
+ * selfheal booleans is set.
+ */
+ if (data_selfheal || entry_selfheal || metadata_selfheal) {
+ ret = gf_asprintf(&status, "heal%s", substr ? substr : "");
+ if (ret < 0) {
+ goto out;
+ }
+ dict = afr_set_heal_info(status);
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
}
+ }
- return 0;
+ ret = 0;
+ op_errno = 0;
+
+out:
+ if (heal_frame) {
+ heal_frame->local = heal_local;
+ AFR_STACK_DESTROY(heal_frame);
+ }
+ AFR_STACK_UNWIND(getxattr, frame, ret, op_errno, dict, NULL);
+ if (dict)
+ dict_unref(dict);
+ if (inode)
+ inode_unref(inode);
+ GF_FREE(substr);
+ return ret;
}
+int
+_afr_is_split_brain(call_frame_t *frame, xlator_t *this,
+ struct afr_reply *replies, afr_transaction_type type,
+ gf_boolean_t *spb)
+{
+ afr_private_t *priv = NULL;
+ uint64_t *witness = NULL;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ int sources_count = 0;
+ int ret = 0;
+
+ priv = this->private;
+
+ sources = alloca0(priv->child_count);
+ sinks = alloca0(priv->child_count);
+ witness = alloca0(priv->child_count * sizeof(*witness));
+
+ ret = afr_selfheal_find_direction(frame, this, replies, type,
+ priv->child_up, sources, sinks, witness,
+ NULL);
+ if (ret)
+ return ret;
-int32_t
-afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock)
+ sources_count = AFR_COUNT(sources, priv->child_count);
+ if (!sources_count)
+ *spb = _gf_true;
+
+ return ret;
+}
+
+int
+afr_is_split_brain(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ uuid_t gfid, gf_boolean_t *d_spb, gf_boolean_t *m_spb)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int child_index = -1;
-/* int ret = 0; */
+ int ret = -1;
+ afr_private_t *priv = NULL;
+ struct afr_reply *replies = NULL;
+ priv = this->private;
- local = frame->local;
- priv = this->private;
+ replies = alloca0(sizeof(*replies) * priv->child_count);
+
+ ret = afr_selfheal_unlocked_discover(frame, inode, gfid, replies);
+ if (ret)
+ goto out;
- child_index = (long) cookie;
+ if (!afr_can_decide_split_brain_source_sinks(replies, priv->child_count)) {
+ ret = -EAGAIN;
+ goto out;
+ }
- if (!child_went_down (op_ret, op_errno) && (op_ret == -1)) {
- local->op_ret = -1;
- local->op_errno = op_errno;
+ ret = _afr_is_split_brain(frame, this, replies, AFR_DATA_TRANSACTION,
+ d_spb);
+ if (ret)
+ goto out;
- afr_lk_unlock (frame, this);
- return 0;
+ ret = _afr_is_split_brain(frame, this, replies, AFR_METADATA_TRANSACTION,
+ m_spb);
+out:
+ if (replies) {
+ afr_replies_wipe(replies, priv->child_count);
+ replies = NULL;
+ }
+ return ret;
+}
+
+int
+afr_get_split_brain_status_cbk(int ret, call_frame_t *frame, void *opaque)
+{
+ GF_FREE(opaque);
+ return 0;
+}
+
+int
+afr_get_split_brain_status(void *opaque)
+{
+ gf_boolean_t d_spb = _gf_false;
+ gf_boolean_t m_spb = _gf_false;
+ int ret = -1;
+ int op_errno = 0;
+ int i = 0;
+ char *choices = NULL;
+ char *status = NULL;
+ dict_t *dict = NULL;
+ inode_t *inode = NULL;
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ loc_t *loc = NULL;
+ afr_spb_status_t *data = NULL;
+
+ data = opaque;
+ frame = data->frame;
+ this = frame->this;
+ loc = data->loc;
+ priv = this->private;
+ children = priv->children;
+
+ inode = afr_inode_find(this, loc->gfid);
+ if (!inode)
+ goto out;
+
+ dict = dict_new();
+ if (!dict) {
+ op_errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ /* Calculation for string length :
+ * (child_count X length of child-name) + SLEN(" Choices :")
+ * child-name consists of :
+ * a) 251 = max characters for volname according to GD_VOLUME_NAME_MAX
+ * b) strlen("-client-00,") assuming 16 replicas
+ */
+ choices = alloca0(priv->child_count * (256 + SLEN("-client-00,")) +
+ SLEN(" Choices:"));
+
+ ret = afr_is_split_brain(frame, this, inode, loc->gfid, &d_spb, &m_spb);
+ if (ret) {
+ op_errno = -ret;
+ if (ret == -EAGAIN) {
+ ret = dict_set_sizen_str_sizen(dict, GF_AFR_SBRAIN_STATUS,
+ SBRAIN_HEAL_NO_GO_MSG);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret,
+ AFR_MSG_DICT_SET_FAILED,
+ "Failed to set GF_AFR_SBRAIN_STATUS in dict");
+ }
}
+ ret = -1;
+ goto out;
+ }
- if (op_ret == 0) {
- local->op_ret = 0;
- local->op_errno = 0;
- local->cont.lk.locked_nodes[child_index] = 1;
- local->cont.lk.ret_flock = *lock;
+ if (d_spb || m_spb) {
+ sprintf(choices, " Choices:");
+ for (i = 0; i < priv->child_count; i++) {
+ strcat(choices, children[i]->name);
+ strcat(choices, ",");
}
+ choices[strlen(choices) - 1] = '\0';
- child_index++;
+ ret = gf_asprintf(&status,
+ "data-split-brain:%s "
+ "metadata-split-brain:%s%s",
+ (d_spb) ? "yes" : "no", (m_spb) ? "yes" : "no",
+ choices);
- if (child_index < priv->child_count) {
- STACK_WIND_COOKIE (frame, afr_lk_cbk, (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->lk,
- local->fd, local->cont.lk.cmd,
- &local->cont.lk.user_flock);
- } else if (local->op_ret == -1) {
- /* all nodes have gone down */
+ if (-1 == ret) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ ret = dict_set_dynstr_sizen(dict, GF_AFR_SBRAIN_STATUS, status);
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+ } else {
+ ret = dict_set_sizen_str_sizen(dict, GF_AFR_SBRAIN_STATUS,
+ SFILE_NOT_UNDER_DATA);
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+ }
- AFR_STACK_UNWIND (lk, frame, -1, ENOTCONN,
- &local->cont.lk.ret_flock);
- } else {
- /* locking has succeeded on all nodes that are up */
+ ret = 0;
+out:
+ AFR_STACK_UNWIND(getxattr, frame, ret, op_errno, dict, NULL);
+ if (dict)
+ dict_unref(dict);
+ if (inode)
+ inode_unref(inode);
+ return ret;
+}
+
+int32_t
+afr_heal_splitbrain_file(call_frame_t *frame, xlator_t *this, loc_t *loc)
+{
+ int ret = 0;
+ int op_errno = 0;
+ dict_t *dict = NULL;
+ afr_local_t *local = NULL;
+ afr_local_t *heal_local = NULL;
+ call_frame_t *heal_frame = NULL;
+
+ local = frame->local;
+ dict = dict_new();
+ if (!dict) {
+ op_errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ heal_frame = afr_frame_create(this, &op_errno);
+ if (!heal_frame) {
+ ret = -1;
+ goto out;
+ }
+ heal_local = heal_frame->local;
+ heal_frame->local = frame->local;
+ /*Initiate heal with heal_frame with lk-owner set so that inodelk/entrylk
+ * work correctly*/
+ ret = afr_selfheal_do(heal_frame, this, loc->gfid);
+
+ if (ret == 1 || ret == 2) {
+ ret = dict_set_sizen_str_sizen(dict, "sh-fail-msg",
+ SFILE_NOT_IN_SPLIT_BRAIN);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Failed to set sh-fail-msg in dict");
+ ret = 0;
+ goto out;
+ } else {
+ if (local->xdata_rsp) {
+ /* 'sh-fail-msg' has been set in the dict during self-heal.*/
+ dict_copy(local->xdata_rsp, dict);
+ ret = 0;
+ } else if (ret < 0) {
+ op_errno = -ret;
+ ret = -1;
+ }
+ }
+
+out:
+ if (heal_frame) {
+ heal_frame->local = heal_local;
+ AFR_STACK_DESTROY(heal_frame);
+ }
+ if (local->op == GF_FOP_GETXATTR)
+ AFR_STACK_UNWIND(getxattr, frame, ret, op_errno, dict, NULL);
+ else if (local->op == GF_FOP_SETXATTR)
+ AFR_STACK_UNWIND(setxattr, frame, ret, op_errno, NULL);
+ if (dict)
+ dict_unref(dict);
+ return ret;
+}
- /* temporarily
- ret = afr_mark_locked_nodes (this, local->fd,
- local->cont.lk.locked_nodes);
- if (ret)
- gf_log (this->name, GF_LOG_DEBUG,
- "Could not save locked nodes info in fdctx");
+int
+afr_get_child_index_from_name(xlator_t *this, char *name)
+{
+ afr_private_t *priv = this->private;
+ int index = -1;
+
+ for (index = 0; index < priv->child_count; index++) {
+ if (!strcmp(priv->children[index]->name, name))
+ goto out;
+ }
+ index = -1;
+out:
+ return index;
+}
- ret = afr_save_locked_fd (this, local->fd);
- if (ret)
- gf_log (this->name, GF_LOG_DEBUG,
- "Could not save locked fd");
+void
+afr_priv_need_heal_set(afr_private_t *priv, gf_boolean_t need_heal)
+{
+ LOCK(&priv->lock);
+ {
+ priv->need_heal = need_heal;
+ }
+ UNLOCK(&priv->lock);
+}
- */
- AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
- &local->cont.lk.ret_flock);
+void
+afr_set_need_heal(xlator_t *this, afr_local_t *local)
+{
+ int i = 0;
+ afr_private_t *priv = this->private;
+ gf_boolean_t need_heal = _gf_false;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].valid && local->replies[i].need_heal) {
+ need_heal = _gf_true;
+ break;
}
+ }
+ afr_priv_need_heal_set(priv, need_heal);
+ return;
+}
- return 0;
+gf_boolean_t
+afr_get_need_heal(xlator_t *this)
+{
+ afr_private_t *priv = this->private;
+ gf_boolean_t need_heal = _gf_true;
+
+ LOCK(&priv->lock);
+ {
+ need_heal = priv->need_heal;
+ }
+ UNLOCK(&priv->lock);
+ return need_heal;
}
+int
+afr_get_msg_id(char *op_type)
+{
+ if (!strcmp(op_type, GF_AFR_REPLACE_BRICK))
+ return AFR_MSG_REPLACE_BRICK_STATUS;
+ else if (!strcmp(op_type, GF_AFR_ADD_BRICK))
+ return AFR_MSG_ADD_BRICK_STATUS;
+ return -1;
+}
int
-afr_lk (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t cmd, struct gf_flock *flock)
+afr_fav_child_reset_sink_xattrs_cbk(int ret, call_frame_t *heal_frame,
+ void *opaque)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int i = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
+ call_frame_t *txn_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_local_t *heal_local = NULL;
+ xlator_t *this = NULL;
+
+ heal_local = heal_frame->local;
+ txn_frame = heal_local->heal_frame;
+ local = txn_frame->local;
+ this = txn_frame->this;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ /* Refresh the inode agan and proceed with the transaction.*/
+ afr_inode_refresh(txn_frame, this, local->inode, NULL, local->refreshfn);
- priv = this->private;
+ AFR_STACK_DESTROY(heal_frame);
- ALLOC_OR_GOTO (local, afr_local_t, out);
- AFR_LOCAL_INIT (local, priv);
+ return 0;
+}
- frame->local = local;
+int
+afr_fav_child_reset_sink_xattrs(void *opaque)
+{
+ call_frame_t *heal_frame = NULL;
+ call_frame_t *txn_frame = NULL;
+ xlator_t *this = NULL;
+ gf_boolean_t d_spb = _gf_false;
+ gf_boolean_t m_spb = _gf_false;
+ afr_local_t *heal_local = NULL;
+ afr_local_t *txn_local = NULL;
+ afr_private_t *priv = NULL;
+ inode_t *inode = NULL;
+ unsigned char *locked_on = NULL;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *healed_sinks = NULL;
+ unsigned char *undid_pending = NULL;
+ struct afr_reply *locked_replies = NULL;
+ int ret = 0;
+
+ heal_frame = (call_frame_t *)opaque;
+ heal_local = heal_frame->local;
+ txn_frame = heal_local->heal_frame;
+ txn_local = txn_frame->local;
+ this = txn_frame->this;
+ inode = txn_local->inode;
+ priv = this->private;
+ locked_on = alloca0(priv->child_count);
+ sources = alloca0(priv->child_count);
+ sinks = alloca0(priv->child_count);
+ healed_sinks = alloca0(priv->child_count);
+ undid_pending = alloca0(priv->child_count);
+ locked_replies = alloca0(sizeof(*locked_replies) * priv->child_count);
+
+ ret = _afr_is_split_brain(txn_frame, this, txn_local->replies,
+ AFR_DATA_TRANSACTION, &d_spb);
+
+ ret = _afr_is_split_brain(txn_frame, this, txn_local->replies,
+ AFR_METADATA_TRANSACTION, &m_spb);
+
+ /* Take appropriate locks and reset sink xattrs. */
+ if (d_spb) {
+ ret = afr_selfheal_inodelk(heal_frame, this, inode, this->name, 0, 0,
+ locked_on);
+ {
+ if (ret < priv->child_count)
+ goto data_unlock;
+ ret = __afr_selfheal_data_prepare(
+ heal_frame, this, inode, locked_on, sources, sinks,
+ healed_sinks, undid_pending, locked_replies, NULL);
+ }
+ data_unlock:
+ afr_selfheal_uninodelk(heal_frame, this, inode, this->name, 0, 0,
+ locked_on);
+ }
+
+ if (m_spb) {
+ memset(locked_on, 0, sizeof(*locked_on) * priv->child_count);
+ memset(undid_pending, 0, sizeof(*undid_pending) * priv->child_count);
+ ret = afr_selfheal_inodelk(heal_frame, this, inode, this->name,
+ LLONG_MAX - 1, 0, locked_on);
+ {
+ if (ret < priv->child_count)
+ goto mdata_unlock;
+ ret = __afr_selfheal_metadata_prepare(
+ heal_frame, this, inode, locked_on, sources, sinks,
+ healed_sinks, undid_pending, locked_replies, NULL);
+ }
+ mdata_unlock:
+ afr_selfheal_uninodelk(heal_frame, this, inode, this->name,
+ LLONG_MAX - 1, 0, locked_on);
+ }
- local->cont.lk.locked_nodes = GF_CALLOC (priv->child_count,
- sizeof (*local->cont.lk.locked_nodes),
- gf_afr_mt_char);
+ return ret;
+}
- if (!local->cont.lk.locked_nodes) {
- op_errno = ENOMEM;
+/*
+ * Concatenates the xattrs in local->replies separated by a delimiter.
+ */
+int
+afr_serialize_xattrs_with_delimiter(call_frame_t *frame, xlator_t *this,
+ char *buf, const char *default_str,
+ int32_t *serz_len, char delimiter)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ char *xattr = NULL;
+ int i = 0;
+ int len = 0;
+ int keylen = 0;
+ size_t str_len = 0;
+ int ret = -1;
+
+ priv = this->private;
+ local = frame->local;
+
+ keylen = strlen(local->cont.getxattr.name);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid || local->replies[i].op_ret) {
+ str_len = strlen(default_str);
+ buf = strncat(buf, default_str, str_len);
+ len += str_len;
+ buf[len++] = delimiter;
+ buf[len] = '\0';
+ } else {
+ ret = dict_get_strn(local->replies[i].xattr,
+ local->cont.getxattr.name, keylen, &xattr);
+ if (ret) {
+ gf_msg("TEST", GF_LOG_ERROR, -ret, AFR_MSG_DICT_GET_FAILED,
+ "Failed to get the node_uuid of brick "
+ "%d",
+ i);
goto out;
+ }
+ str_len = strlen(xattr);
+ buf = strncat(buf, xattr, str_len);
+ len += str_len;
+ buf[len++] = delimiter;
+ buf[len] = '\0';
}
+ }
+ buf[--len] = '\0'; /*remove the last delimiter*/
+ if (serz_len)
+ *serz_len = ++len;
+ ret = 0;
- local->fd = fd_ref (fd);
- local->cont.lk.cmd = cmd;
- local->cont.lk.user_flock = *flock;
- local->cont.lk.ret_flock = *flock;
+out:
+ return ret;
+}
- STACK_WIND_COOKIE (frame, afr_lk_cbk, (void *) (long) 0,
- priv->children[i],
- priv->children[i]->fops->lk,
- fd, cmd, flock);
+uint64_t
+afr_write_subvol_get(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ uint64_t write_subvol = 0;
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (lk, frame, op_ret, op_errno, NULL);
+ local = frame->local;
+ LOCK(&local->inode->lock);
+ write_subvol = local->inode_ctx->write_subvol;
+ UNLOCK(&local->inode->lock);
+
+ return write_subvol;
+}
+
+int
+afr_write_subvol_set(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ unsigned char *data_accused = NULL;
+ unsigned char *metadata_accused = NULL;
+ unsigned char *data_readable = NULL;
+ unsigned char *metadata_readable = NULL;
+ uint16_t datamap = 0;
+ uint16_t metadatamap = 0;
+ uint64_t val = 0;
+ int event = 0;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+ data_accused = alloca0(priv->child_count);
+ metadata_accused = alloca0(priv->child_count);
+ data_readable = alloca0(priv->child_count);
+ metadata_readable = alloca0(priv->child_count);
+ event = local->event_generation;
+
+ afr_readables_fill(frame, this, local->inode, data_accused,
+ metadata_accused, data_readable, metadata_readable,
+ NULL);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (data_readable[i])
+ datamap |= (1 << i);
+ if (metadata_readable[i])
+ metadatamap |= (1 << i);
+ }
+
+ val = ((uint64_t)metadatamap) | (((uint64_t)datamap) << 16) |
+ (((uint64_t)event) << 32);
+
+ LOCK(&local->inode->lock);
+ {
+ if (local->inode_ctx->write_subvol == 0 &&
+ local->transaction.type == AFR_DATA_TRANSACTION) {
+ local->inode_ctx->write_subvol = val;
}
- return 0;
+ }
+ UNLOCK(&local->inode->lock);
+
+ return 0;
}
int
-afr_priv_dump (xlator_t *this)
+afr_write_subvol_reset(call_frame_t *frame, xlator_t *this)
{
- afr_private_t *priv = NULL;
- char key_prefix[GF_DUMP_MAX_BUF_LEN];
- char key[GF_DUMP_MAX_BUF_LEN];
- int i = 0;
+ afr_local_t *local = NULL;
+ local = frame->local;
+ LOCK(&local->inode->lock);
+ {
+ GF_ASSERT(local->inode_ctx->lock_count > 0);
+ local->inode_ctx->lock_count--;
- GF_ASSERT (this);
- priv = this->private;
+ if (!local->inode_ctx->lock_count)
+ local->inode_ctx->write_subvol = 0;
+ }
+ UNLOCK(&local->inode->lock);
- GF_ASSERT (priv);
- snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name);
- gf_proc_dump_add_section(key_prefix);
- gf_proc_dump_build_key(key, key_prefix, "child_count");
- gf_proc_dump_write(key, "%u", priv->child_count);
- gf_proc_dump_build_key(key, key_prefix, "read_child_rr");
- gf_proc_dump_write(key, "%u", priv->read_child_rr);
- for (i = 0; i < priv->child_count; i++) {
- gf_proc_dump_build_key(key, key_prefix, "child_up[%d]", i);
- gf_proc_dump_write(key, "%d", priv->child_up[i]);
- gf_proc_dump_build_key(key, key_prefix,
- "pending_key[%d]", i);
- gf_proc_dump_write(key, "%s", priv->pending_key[i]);
- }
- gf_proc_dump_build_key(key, key_prefix, "data_self_heal");
- gf_proc_dump_write(key, "%d", priv->data_self_heal);
- gf_proc_dump_build_key(key, key_prefix, "metadata_self_heal");
- gf_proc_dump_write(key, "%d", priv->metadata_self_heal);
- gf_proc_dump_build_key(key, key_prefix, "entry_self_heal");
- gf_proc_dump_write(key, "%d", priv->entry_self_heal);
- gf_proc_dump_build_key(key, key_prefix, "data_change_log");
- gf_proc_dump_write(key, "%d", priv->data_change_log);
- gf_proc_dump_build_key(key, key_prefix, "metadata_change_log");
- gf_proc_dump_write(key, "%d", priv->metadata_change_log);
- gf_proc_dump_build_key(key, key_prefix, "entry_change_log");
- gf_proc_dump_write(key, "%d", priv->entry_change_log);
- gf_proc_dump_build_key(key, key_prefix, "read_child");
- gf_proc_dump_write(key, "%d", priv->read_child);
- gf_proc_dump_build_key(key, key_prefix, "favorite_child");
- gf_proc_dump_write(key, "%u", priv->favorite_child);
- gf_proc_dump_build_key(key, key_prefix, "data_lock_server_count");
- gf_proc_dump_write(key, "%u", priv->data_lock_server_count);
- gf_proc_dump_build_key(key, key_prefix, "metadata_lock_server_count");
- gf_proc_dump_write(key, "%u", priv->metadata_lock_server_count);
- gf_proc_dump_build_key(key, key_prefix, "entry_lock_server_count");
- gf_proc_dump_write(key, "%u", priv->entry_lock_server_count);
- gf_proc_dump_build_key(key, key_prefix, "wait_count");
- gf_proc_dump_write(key, "%u", priv->wait_count);
+ return 0;
+}
- return 0;
+int
+afr_set_inode_local(xlator_t *this, afr_local_t *local, inode_t *inode)
+{
+ int ret = 0;
+
+ local->inode = inode_ref(inode);
+ LOCK(&local->inode->lock);
+ {
+ ret = __afr_inode_ctx_get(this, local->inode, &local->inode_ctx);
+ }
+ UNLOCK(&local->inode->lock);
+ if (ret < 0) {
+ gf_msg_callingfn(
+ this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_INODE_CTX_GET_FAILED,
+ "Error getting inode ctx %s", uuid_utoa(local->inode->gfid));
+ }
+ return ret;
}
+gf_boolean_t
+afr_ta_is_fop_called_from_synctask(xlator_t *this)
+{
+ struct synctask *task = NULL;
+ gf_lkowner_t tmp_owner = {
+ 0,
+ };
-/**
- * find_child_index - find the child's index in the array of subvolumes
- * @this: AFR
- * @child: child
- */
+ task = synctask_get();
+ if (!task)
+ return _gf_false;
-static int
-find_child_index (xlator_t *this, xlator_t *child)
-{
- afr_private_t *priv = NULL;
- int i = -1;
+ set_lk_owner_from_ptr(&tmp_owner, (void *)this);
- priv = this->private;
+ if (!is_same_lkowner(&tmp_owner, &task->frame->root->lk_owner))
+ return _gf_false;
- for (i = 0; i < priv->child_count; i++) {
- if ((xlator_t *) child == priv->children[i])
- break;
+ return _gf_true;
+}
+
+int
+afr_ta_post_op_lock(xlator_t *this, loc_t *loc)
+{
+ int ret = 0;
+ uuid_t gfid = {
+ 0,
+ };
+ afr_private_t *priv = this->private;
+ gf_boolean_t locked = _gf_false;
+ struct gf_flock flock1 = {
+ 0,
+ };
+ struct gf_flock flock2 = {
+ 0,
+ };
+ int32_t cmd = 0;
+
+ /* Clients must take AFR_TA_DOM_NOTIFY lock only when the previous lock
+ * has been released in afr_notify due to upcall notification from shd.
+ */
+ GF_ASSERT(priv->ta_notify_dom_lock_offset == 0);
+
+ if (!priv->shd.iamshd)
+ GF_ASSERT(afr_ta_is_fop_called_from_synctask(this));
+ flock1.l_type = F_WRLCK;
+
+ while (!locked) {
+ if (priv->shd.iamshd) {
+ cmd = F_SETLKW;
+ flock1.l_start = 0;
+ flock1.l_len = 0;
+ } else {
+ cmd = F_SETLK;
+ gf_uuid_generate(gfid);
+ flock1.l_start = gfid_to_ino(gfid);
+ if (flock1.l_start < 0)
+ flock1.l_start = -flock1.l_start;
+ flock1.l_len = 1;
+ }
+ ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX],
+ AFR_TA_DOM_NOTIFY, loc, cmd, &flock1, NULL, NULL);
+ if (!ret) {
+ locked = _gf_true;
+ priv->ta_notify_dom_lock_offset = flock1.l_start;
+ } else if (ret == -EAGAIN) {
+ continue;
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to get "
+ "AFR_TA_DOM_NOTIFY lock on %s.",
+ loc->name);
+ goto out;
}
+ }
+
+ flock2.l_type = F_WRLCK;
+ flock2.l_start = 0;
+ flock2.l_len = 0;
+ ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX],
+ AFR_TA_DOM_MODIFY, loc, F_SETLKW, &flock2, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to get AFR_TA_DOM_MODIFY lock on %s.", loc->name);
+ flock1.l_type = F_UNLCK;
+ ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX],
+ AFR_TA_DOM_NOTIFY, loc, F_SETLK, &flock1, NULL,
+ NULL);
+ }
+out:
+ return ret;
+}
- return i;
+int
+afr_ta_post_op_unlock(xlator_t *this, loc_t *loc)
+{
+ afr_private_t *priv = this->private;
+ struct gf_flock flock = {
+ 0,
+ };
+ int ret = 0;
+
+ if (!priv->shd.iamshd)
+ GF_ASSERT(afr_ta_is_fop_called_from_synctask(this));
+ flock.l_type = F_UNLCK;
+ flock.l_start = 0;
+ flock.l_len = 0;
+
+ ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX],
+ AFR_TA_DOM_MODIFY, loc, F_SETLK, &flock, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to unlock AFR_TA_DOM_MODIFY lock.");
+ goto out;
+ }
+
+ if (!priv->shd.iamshd)
+ /* Mounts (clients) will not release the AFR_TA_DOM_NOTIFY lock
+ * in post-op as they use it as a notification mechanism. When
+ * shd sends a lock request on TA during heal, the clients will
+ * receive a lock-contention upcall notification upon which they
+ * will release the AFR_TA_DOM_NOTIFY lock after completing the
+ * in flight I/O.*/
+ goto out;
+
+ ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX],
+ AFR_TA_DOM_NOTIFY, loc, F_SETLK, &flock, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to unlock AFR_TA_DOM_NOTIFY lock.");
+ }
+out:
+ return ret;
}
-int32_t
-afr_notify (xlator_t *this, int32_t event,
- void *data, ...)
+call_frame_t *
+afr_ta_frame_create(xlator_t *this)
{
- afr_private_t * priv = NULL;
- unsigned char * child_up = NULL;
- int i = -1;
- int up_children = 0;
- int down_children = 0;
+ call_frame_t *frame = NULL;
+ void *lk_owner = NULL;
+
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame)
+ return NULL;
+ lk_owner = (void *)this;
+ afr_set_lk_owner(frame, this, lk_owner);
+ return frame;
+}
- priv = this->private;
+gf_boolean_t
+afr_ta_has_quorum(afr_private_t *priv, afr_local_t *local)
+{
+ int data_count = 0;
- if (!priv)
- return 0;
+ data_count = AFR_COUNT(local->child_up, priv->child_count);
+ if (data_count == 2) {
+ return _gf_true;
+ } else if (data_count == 1 && local->ta_child_up) {
+ return _gf_true;
+ }
- child_up = priv->child_up;
+ return _gf_false;
+}
- switch (event) {
- case GF_EVENT_CHILD_UP:
- i = find_child_index (this, data);
+static gf_boolean_t
+afr_is_add_replica_mount_lookup_on_root(call_frame_t *frame)
+{
+ afr_local_t *local = NULL;
- /* temporarily
- afr_attempt_lock_recovery (this, i);
- */
+ if (frame->root->pid != GF_CLIENT_PID_ADD_REPLICA_MOUNT)
+ return _gf_false;
- child_up[i] = 1;
+ local = frame->local;
- LOCK (&priv->lock);
- {
- priv->up_count++;
- }
- UNLOCK (&priv->lock);
+ if (local->op != GF_FOP_LOOKUP)
+ /* TODO:If the replica count is being increased on a plain distribute
+ * volume that was never mounted, we need to allow setxattr on '/' with
+ * GF_CLIENT_PID_NO_ROOT_SQUASH to accomodate for DHT layout setting */
+ return _gf_false;
- /*
- if all the children were down, and one child came up,
- send notify to parent
- */
-
- for (i = 0; i < priv->child_count; i++)
- if (child_up[i] == 1)
- up_children++;
-
- if (up_children == 1) {
- gf_log (this->name, GF_LOG_NORMAL,
- "Subvolume '%s' came back up; "
- "going online.", ((xlator_t *)data)->name);
-
- default_notify (this, event, data);
- } else {
- default_notify (this, GF_EVENT_CHILD_MODIFIED, data);
- }
+ if (local->inode == NULL)
+ return _gf_false;
- break;
+ if (!__is_root_gfid(local->inode->gfid))
+ return _gf_false;
- case GF_EVENT_CHILD_DOWN:
- i = find_child_index (this, data);
+ return _gf_true;
+}
- child_up[i] = 0;
+gf_boolean_t
+afr_lookup_has_quorum(call_frame_t *frame, const unsigned int up_children_count)
+{
+ if (frame && (up_children_count > 0) &&
+ afr_is_add_replica_mount_lookup_on_root(frame))
+ return _gf_true;
- LOCK (&priv->lock);
- {
- priv->down_count++;
- }
- UNLOCK (&priv->lock);
+ return _gf_false;
+}
- /*
- if all children are down, and this was the last to go down,
- send notify to parent
- */
-
- for (i = 0; i < priv->child_count; i++)
- if (child_up[i] == 0)
- down_children++;
-
- if (down_children == priv->child_count) {
- gf_log (this->name, GF_LOG_ERROR,
- "All subvolumes are down. Going offline "
- "until atleast one of them comes back up.");
-
- default_notify (this, event, data);
- } else {
- default_notify (this, GF_EVENT_CHILD_MODIFIED, data);
- }
+void
+afr_handle_replies_quorum(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = frame->local;
+ afr_private_t *priv = this->private;
+ unsigned char *success_replies = NULL;
- break;
+ success_replies = alloca0(priv->child_count);
+ afr_fill_success_replies(local, priv, success_replies);
- default:
- default_notify (this, event, data);
- }
+ if (priv->quorum_count && !afr_has_quorum(success_replies, this, NULL)) {
+ local->op_errno = afr_final_errno(local, priv);
+ if (!local->op_errno)
+ local->op_errno = afr_quorum_errno(priv);
+ local->op_ret = -1;
+ }
+}
- return 0;
+gf_boolean_t
+afr_ta_dict_contains_pending_xattr(dict_t *dict, afr_private_t *priv, int child)
+{
+ int *pending = NULL;
+ int ret = 0;
+ int i = 0;
+
+ ret = dict_get_ptr(dict, priv->pending_key[child], (void *)&pending);
+ if (ret == 0) {
+ for (i = 0; i < AFR_NUM_CHANGE_LOGS; i++) {
+ /* Not doing a ntoh32(pending) as we just want to check
+ * if it is non-zero or not. */
+ if (pending[i]) {
+ return _gf_true;
+ }
+ }
+ }
+ return _gf_false;
}
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
index 59bd7872e9f..f8bf8340dab 100644
--- a/xlators/cluster/afr/src/afr-dir-read.c
+++ b/xlators/cluster/afr/src/afr-dir-read.c
@@ -1,753 +1,346 @@
/*
- Copyright (c) 2007-2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#include <libgen.h>
#include <unistd.h>
-#include <fnmatch.h>
#include <sys/time.h>
#include <stdlib.h>
#include <signal.h>
#include <string.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-#include "checksum.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/list.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/compat.h>
#include "afr.h"
-#include "afr-self-heal.h"
-#include "afr-self-heal-common.h"
-
-
-int
-afr_examine_dir_sh_unwind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- afr_set_opendir_done (this, local->fd->inode);
-
- AFR_STACK_UNWIND (opendir, frame, local->op_ret,
- local->op_errno, local->fd);
-
- return 0;
-}
-
-
-gf_boolean_t
-__checksums_differ (uint32_t *checksum, int child_count,
- unsigned char *child_up)
-{
- int ret = _gf_false;
- int i = 0;
-
- uint32_t cksum;
-
- cksum = checksum[0];
-
- for (i = 0; i < child_count; i++) {
- if (!child_up[i])
- continue;
-
- if (cksum != checksum[i]) {
- ret = _gf_true;
- break;
- }
-
- cksum = checksum[i];
- }
-
- return ret;
-}
-
+#include "afr-transaction.h"
int32_t
-afr_examine_dir_readdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries)
+afr_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
-
- gf_dirent_t * entry = NULL;
- gf_dirent_t * tmp = NULL;
-
- int child_index = 0;
+ afr_local_t *local = NULL;
+ int call_count = -1;
+ int32_t child_index = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
- uint32_t entry_cksum;
+ local = frame->local;
+ fd_ctx = local->fd_ctx;
+ child_index = (long)cookie;
- int call_count = 0;
- off_t last_offset = 0;
- char sh_type_str[256] = {0,};
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- child_index = (long) cookie;
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+ LOCK(&frame->lock);
+ {
if (op_ret == -1) {
- local->op_ret = -1;
- local->op_ret = op_errno;
- goto out;
+ local->op_errno = op_errno;
+ fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED;
+ } else {
+ local->op_ret = op_ret;
+ fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref(xdata);
}
+ call_count = --local->call_count;
+ }
+ UNLOCK(&frame->lock);
- if (op_ret == 0)
- goto out;
+ if (call_count == 0) {
+ afr_handle_replies_quorum(frame, this);
+ AFR_STACK_UNWIND(opendir, frame, local->op_ret, local->op_errno,
+ local->fd, NULL);
+ }
- list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- entry_cksum = gf_rsync_weak_checksum (entry->d_name,
- strlen (entry->d_name));
- local->cont.opendir.checksum[child_index] ^= entry_cksum;
- }
-
- list_for_each_entry (entry, &entries->list, list) {
- last_offset = entry->d_off;
- }
-
- /* read more entries */
-
- STACK_WIND_COOKIE (frame, afr_examine_dir_readdir_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->readdir,
- local->fd, 131072, last_offset);
-
-out:
- if ((op_ret == 0) || (op_ret == -1)) {
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if (__checksums_differ (local->cont.opendir.checksum,
- priv->child_count,
- local->child_up)) {
-
- sh->need_entry_self_heal = _gf_true;
- sh->forced_merge = _gf_true;
- sh->type = local->fd->inode->ia_type;
- sh->background = _gf_false;
- sh->unwind = afr_examine_dir_sh_unwind;
-
- afr_self_heal_type_str_get(&local->self_heal,
- sh_type_str,
- sizeof(sh_type_str));
- gf_log (this->name, GF_LOG_NORMAL,
- "%s self-heal triggered. path: %s, "
- "reason: checksums of directory differ,"
- " forced merge option set",
- sh_type_str, local->loc.path);
-
- afr_self_heal (frame, this);
- } else {
- afr_set_opendir_done (this, local->fd->inode);
-
- AFR_STACK_UNWIND (opendir, frame, local->op_ret,
- local->op_errno, local->fd);
- }
- }
- }
-
- return 0;
+ return 0;
}
-
int
-afr_examine_dir (call_frame_t *frame, xlator_t *this)
+afr_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+ int call_count = -1;
+ int32_t op_errno = ENOMEM;
+ afr_fd_ctx_t *fd_ctx = NULL;
- int i;
- int call_count = 0;
+ priv = this->private;
- local = frame->local;
- priv = this->private;
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
- local->cont.opendir.checksum = GF_CALLOC (priv->child_count,
- sizeof (*local->cont.opendir.checksum),
- gf_afr_mt_int32_t);
+ local->op = GF_FOP_OPENDIR;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ if (priv->quorum_count && !afr_has_quorum(local->child_up, this, NULL)) {
+ op_errno = afr_quorum_errno(priv);
+ goto out;
+ }
- local->call_count = call_count;
+ if (!afr_is_consistent_io_possible(local, priv, &op_errno))
+ goto out;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_examine_dir_readdir_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->readdir,
- local->fd, 131072, 0);
+ fd_ctx = afr_fd_ctx_get(fd, this);
+ if (!fd_ctx)
+ goto out;
- if (!--call_count)
- break;
- }
- }
+ loc_copy(&local->loc, loc);
- return 0;
-}
-
-
-int32_t
-afr_opendir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- fd_t *fd)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int32_t up_children_count = 0;
- int ret = -1;
-
- int call_count = -1;
+ local->fd = fd_ref(fd);
+ local->fd_ctx = fd_ctx;
- priv = this->private;
- local = frame->local;
+ call_count = local->call_count;
- up_children_count = afr_up_children_count (priv->child_count,
- local->child_up);
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE(frame, afr_opendir_cbk, (void *)(long)i,
+ priv->children[i],
+ priv->children[i]->fops->opendir, loc, fd, NULL);
- LOCK (&frame->lock);
- {
- if (op_ret >= 0)
- local->op_ret = op_ret;
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if (local->op_ret == 0) {
-
- ret = afr_fd_ctx_set (this, local->fd);
-
- if (ret) {
- local->op_ret = -1;
- local->op_errno = -1;
- gf_log (this->name, GF_LOG_ERROR, " failed to "
- "set fd ctx for fd %p", local->fd);
- goto out;
- }
- if (!afr_is_opendir_done (this, local->fd->inode) &&
- up_children_count > 1) {
-
- /*
- * This is the first opendir on this inode. We need
- * to check if the directory's entries are the same
- * on all subvolumes. This is needed in addition
- * to regular entry self-heal because the readdir
- * call is sent only to the first subvolume, and
- * thus files that exist only there will never be healed
- * otherwise (assuming changelog shows no anamolies).
- */
-
- gf_log (this->name, GF_LOG_TRACE,
- "reading contents of directory %s looking for mismatch",
- local->loc.path);
-
- afr_examine_dir (frame, this);
-
- } else {
- AFR_STACK_UNWIND (opendir, frame, local->op_ret,
- local->op_errno, local->fd);
- }
- } else {
- out:
- AFR_STACK_UNWIND (opendir, frame, local->op_ret,
- local->op_errno, local->fd);
- }
+ if (!--call_count)
+ break;
}
+ }
- return 0;
-}
-
-
-int32_t
-afr_opendir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, fd_t *fd)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
-
- int child_count = 0;
- int i = 0;
-
- int ret = -1;
- int call_count = -1;
-
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- child_count = priv->child_count;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- loc_copy (&local->loc, loc);
-
- frame->local = local;
- local->fd = fd_ref (fd);
-
- call_count = local->call_count;
-
- for (i = 0; i < child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_opendir_cbk,
- priv->children[i],
- priv->children[i]->fops->opendir,
- loc, fd);
-
- if (!--call_count)
- break;
- }
- }
-
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (opendir, frame, op_ret, op_errno, fd);
- }
-
- return 0;
+ AFR_STACK_UNWIND(opendir, frame, -1, op_errno, fd, NULL);
+ return 0;
}
-
-/**
- * Common algorithm for directory read calls:
- *
- * - Try the fop on the first child that is up
- * - if we have failed due to ENOTCONN:
- * try the next child
- *
- * Applicable to: readdir
- */
-
-
-struct entry_name {
- char *name;
- struct list_head list;
-};
-
-
-static gf_boolean_t
-remembered_name (const char *name, struct list_head *entries)
+static int
+afr_validate_read_subvol(inode_t *inode, xlator_t *this, int par_read_subvol)
{
- struct entry_name *e;
- gf_boolean_t ret = _gf_false;
-
- list_for_each_entry (e, entries, list) {
- if (!strcmp (name, e->name)) {
- ret = _gf_true;
- goto out;
- }
- }
+ int gen = 0;
+ int entry_read_subvol = 0;
+ unsigned char *data_readable = NULL;
+ unsigned char *metadata_readable = NULL;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ data_readable = alloca0(priv->child_count);
+ metadata_readable = alloca0(priv->child_count);
+
+ afr_inode_read_subvol_get(inode, this, data_readable, metadata_readable,
+ &gen);
+
+ if (gen != priv->event_generation || !data_readable[par_read_subvol] ||
+ !metadata_readable[par_read_subvol])
+ return -1;
+
+ /* Once the control reaches the following statement, it means that the
+ * parent's read subvol is perfectly readable. So calling
+ * either afr_data_subvol_get() or afr_metadata_subvol_get() would
+ * yield the same result. Hence, choosing afr_data_subvol_get() below.
+ */
+
+ if (!priv->consistent_metadata)
+ return 0;
-out:
- return ret;
+ /* For an inode fetched through readdirp which is yet to be linked,
+ * inode ctx would not be initialised (yet). So this function returns
+ * -1 above due to gen being 0, which is why it is OK to pass NULL for
+ * read_subvol_args here.
+ */
+ entry_read_subvol = afr_data_subvol_get(inode, this, NULL, NULL, NULL,
+ NULL);
+ if (entry_read_subvol != par_read_subvol)
+ return -1;
+
+ return 0;
}
-
static void
-afr_remember_entries (gf_dirent_t *entries, fd_t *fd)
+afr_readdir_transform_entries(call_frame_t *frame, gf_dirent_t *subvol_entries,
+ int subvol, gf_dirent_t *entries, fd_t *fd)
{
- struct entry_name *n = NULL;
- gf_dirent_t * entry = NULL;
-
- int ret = 0;
-
- uint64_t ctx;
- afr_fd_ctx_t *fd_ctx;
-
- ret = fd_ctx_get (fd, THIS, &ctx);
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_DEBUG,
- "could not get fd ctx for fd=%p", fd);
- return;
+ int ret = -1;
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
+ xlator_t *this = NULL;
+ afr_private_t *priv = NULL;
+ gf_boolean_t need_heal = _gf_false;
+ gf_boolean_t validate_subvol = _gf_false;
+
+ this = THIS;
+ priv = this->private;
+
+ need_heal = afr_get_need_heal(this);
+ validate_subvol = need_heal | priv->consistent_metadata;
+
+ list_for_each_entry_safe(entry, tmp, &subvol_entries->list, list)
+ {
+ if (afr_is_private_directory(priv, fd->inode->gfid, entry->d_name,
+ frame->root->pid)) {
+ continue;
}
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ list_del_init(&entry->list);
+ list_add_tail(&entry->list, &entries->list);
- list_for_each_entry (entry, &entries->list, list) {
- n = GF_CALLOC (1, sizeof (*n), gf_afr_mt_entry_name);
- n->name = gf_strdup (entry->d_name);
- INIT_LIST_HEAD (&n->list);
+ if (!validate_subvol)
+ continue;
- list_add (&n->list, &fd_ctx->entries);
+ if (entry->inode) {
+ ret = afr_validate_read_subvol(entry->inode, this, subvol);
+ if (ret == -1) {
+ inode_unref(entry->inode);
+ entry->inode = NULL;
+ continue;
+ }
}
+ }
}
-
-static off_t
-afr_filter_entries (gf_dirent_t *entries, fd_t *fd)
+int32_t
+afr_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *subvol_entries,
+ dict_t *xdata)
{
- gf_dirent_t *entry, *tmp;
- int ret = 0;
+ afr_local_t *local = NULL;
+ gf_dirent_t entries;
- uint64_t ctx;
- afr_fd_ctx_t *fd_ctx;
+ INIT_LIST_HEAD(&entries.list);
- off_t offset = 0;
+ local = frame->local;
- ret = fd_ctx_get (fd, THIS, &ctx);
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_DEBUG,
- "could not get fd ctx for fd=%p", fd);
- return -1;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- offset = entry->d_off;
-
- if (remembered_name (entry->d_name, &fd_ctx->entries)) {
- list_del (&entry->list);
- GF_FREE (entry);
- }
- }
+ if (op_ret < 0 && !local->cont.readdir.offset) {
+ /* failover only if this was first readdir, detected
+ by offset == 0 */
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
- return offset;
-}
-
-
-static void
-afr_forget_entries (fd_t *fd)
-{
- struct entry_name *entry, *tmp;
- int ret = 0;
+ afr_read_txn_continue(frame, this, (long)cookie);
+ return 0;
+ }
- uint64_t ctx;
- afr_fd_ctx_t *fd_ctx;
+ if (op_ret >= 0)
+ afr_readdir_transform_entries(frame, subvol_entries, (long)cookie,
+ &entries, local->fd);
- ret = fd_ctx_get (fd, THIS, &ctx);
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_DEBUG,
- "could not get fd ctx for fd=%p", fd);
- return;
- }
+ AFR_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, xdata);
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ gf_dirent_free(&entries);
- list_for_each_entry_safe (entry, tmp, &fd_ctx->entries, list) {
- GF_FREE (entry->name);
- list_del (&entry->list);
- GF_FREE (entry);
- }
+ return 0;
}
-
-int32_t
-afr_readdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries)
+int
+afr_readdir_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
-
- gf_dirent_t * entry = NULL;
- gf_dirent_t * tmp = NULL;
-
- int child_index = -1;
-
- priv = this->private;
- local = frame->local;
- child_index = (long) cookie;
-
- if (op_ret != -1) {
- list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- entry->d_ino = afr_itransform (entry->d_ino,
- priv->child_count,
- child_index);
-
- if ((local->fd->inode == local->fd->inode->table->root)
- && !strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR)) {
- list_del_init (&entry->list);
- GF_FREE (entry);
- }
- }
- }
-
- AFR_STACK_UNWIND (readdir, frame, op_ret, op_errno, entries);
-
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ priv = this->private;
+ local = frame->local;
+ fd_ctx = afr_fd_ctx_get(local->fd, this);
+ if (!fd_ctx) {
+ local->op_errno = EINVAL;
+ local->op_ret = -1;
+ }
+
+ if (subvol == -1 || !fd_ctx) {
+ AFR_STACK_UNWIND(readdir, frame, local->op_ret, local->op_errno, 0, 0);
return 0;
+ }
+
+ fd_ctx->readdir_subvol = subvol;
+
+ if (local->op == GF_FOP_READDIR)
+ STACK_WIND_COOKIE(frame, afr_readdir_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->readdir, local->fd,
+ local->cont.readdir.size, local->cont.readdir.offset,
+ local->xdata_req);
+ else
+ STACK_WIND_COOKIE(frame, afr_readdir_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->readdirp, local->fd,
+ local->cont.readdir.size, local->cont.readdir.offset,
+ local->xdata_req);
+ return 0;
}
-
-int32_t
-afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)
+int
+afr_do_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, int whichop, dict_t *dict)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- ino_t inum = 0;
-
- int call_child = 0;
- int ret = 0;
-
- gf_dirent_t * entry = NULL;
- gf_dirent_t * tmp = NULL;
-
- int child_index = -1;
-
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
-
- off_t offset = 0;
-
- priv = this->private;
- children = priv->children;
-
- local = frame->local;
-
- child_index = (long) cookie;
-
- if (priv->strict_readdir) {
- ret = fd_ctx_get (local->fd, this, &ctx);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "could not get fd ctx for fd=%p", local->fd);
- op_ret = -1;
- op_errno = -ret;
- goto out;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- if (child_went_down (op_ret, op_errno)) {
- if (all_tried (child_index, priv->child_count)) {
- goto out;
- }
-
- call_child = ++child_index;
-
- gf_log (this->name, GF_LOG_TRACE,
- "starting readdir afresh on child %d, offset %"PRId64,
- call_child, (uint64_t) 0);
-
- fd_ctx->failed_over = _gf_true;
-
- STACK_WIND_COOKIE (frame, afr_readdirp_cbk,
- (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->readdirp, local->fd,
- local->cont.readdir.size, 0);
- return 0;
- }
- }
-
- if (op_ret != -1) {
- list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- inum = afr_itransform (entry->d_ino, priv->child_count,
- child_index);
- entry->d_ino = inum;
- inum = afr_itransform (entry->d_stat.ia_ino,
- priv->child_count, child_index);
- entry->d_stat.ia_ino = inum;
-
- if ((local->fd->inode == local->fd->inode->table->root)
- && !strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR)) {
- list_del_init (&entry->list);
- GF_FREE (entry);
- }
- }
- }
-
- if (priv->strict_readdir) {
- if (fd_ctx->failed_over) {
- if (list_empty (&entries->list)) {
- goto out;
- }
-
- offset = afr_filter_entries (entries, local->fd);
-
- afr_remember_entries (entries, local->fd);
-
- if (list_empty (&entries->list)) {
- /* All the entries we got were duplicate. We
- shouldn't send an empty list now, because
- that'll make the application stop reading. So
- try to get more entries */
-
- gf_log (this->name, GF_LOG_TRACE,
- "trying to fetch non-duplicate entries from offset %"PRId64", child %s",
- offset, children[child_index]->name);
-
- STACK_WIND_COOKIE (frame, afr_readdirp_cbk,
- (void *) (long) child_index,
- children[child_index],
- children[child_index]->fops->readdirp,
- local->fd, local->cont.readdir.size, offset);
- return 0;
- }
- } else {
- afr_remember_entries (entries, local->fd);
- }
- }
-
+ afr_local_t *local = NULL;
+ int32_t op_errno = 0;
+ int subvol = -1;
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
+
+ fd_ctx = afr_fd_ctx_get(fd, this);
+ if (!fd_ctx) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ local->op = whichop;
+ local->fd = fd_ref(fd);
+ local->cont.readdir.size = size;
+ local->cont.readdir.offset = offset;
+ local->xdata_req = (dict) ? dict_ref(dict) : NULL;
+
+ subvol = fd_ctx->readdir_subvol;
+
+ if (offset == 0 || subvol == -1) {
+ /* First readdir has option of failing over and selecting
+ an appropriate read subvolume */
+ afr_read_txn(frame, this, fd->inode, afr_readdir_wind,
+ AFR_DATA_TRANSACTION);
+ } else {
+ /* But continued readdirs MUST stick to the same subvolume
+ without an option to failover */
+ afr_readdir_wind(frame, this, subvol);
+ }
+
+ return 0;
out:
- AFR_STACK_UNWIND (readdirp, frame, op_ret, op_errno, entries);
-
- return 0;
+ AFR_STACK_UNWIND(readdir, frame, -1, op_errno, NULL, NULL);
+ return 0;
}
-
int32_t
-afr_do_readdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset, int whichop)
+afr_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- xlator_t ** children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
-
- uint64_t ctx;
- afr_fd_ctx_t *fd_ctx;
-
- int ret = -1;
-
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
- children = priv->children;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- frame->local = local;
-
- call_child = afr_first_up_child (priv);
- if (call_child == -1) {
- op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_DEBUG,
- "no child is up");
- goto out;
- }
+ afr_do_readdir(frame, this, fd, size, offset, GF_FOP_READDIR, xdata);
- local->fd = fd_ref (fd);
- local->cont.readdir.size = size;
-
- if (priv->strict_readdir) {
- ret = fd_ctx_get (fd, this, &ctx);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "could not get fd ctx for fd=%p", fd);
- op_errno = -ret;
- goto out;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- if (fd_ctx->last_tried != call_child) {
- gf_log (this->name, GF_LOG_TRACE,
- "first up child has changed from %d to %d, restarting readdir from offset 0",
- fd_ctx->last_tried, call_child);
-
- fd_ctx->failed_over = _gf_true;
- offset = 0;
- }
-
- fd_ctx->last_tried = call_child;
- }
-
- if (whichop == GF_FOP_READDIR)
- STACK_WIND_COOKIE (frame, afr_readdir_cbk,
- (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->readdir, fd,
- size, offset);
- else
- STACK_WIND_COOKIE (frame, afr_readdirp_cbk,
- (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->readdirp, fd,
- size, offset);
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (readdir, frame, op_ret, op_errno, NULL);
- }
- return 0;
+ return 0;
}
-
int32_t
-afr_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
+afr_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *dict)
{
- afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIR);
- return 0;
-}
-
+ afr_do_readdir(frame, this, fd, size, offset, GF_FOP_READDIRP, dict);
-int32_t
-afr_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
-{
- afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIRP);
- return 0;
+ return 0;
}
-
int32_t
-afr_releasedir (xlator_t *this, fd_t *fd)
+afr_releasedir(xlator_t *this, fd_t *fd)
{
- afr_forget_entries (fd);
- afr_cleanup_fd_ctx (this, fd);
+ afr_cleanup_fd_ctx(this, fd);
- return 0;
+ return 0;
}
diff --git a/xlators/cluster/afr/src/afr-dir-read.h b/xlators/cluster/afr/src/afr-dir-read.h
index 40c7b6aef28..773e925ec6c 100644
--- a/xlators/cluster/afr/src/afr-dir-read.h
+++ b/xlators/cluster/afr/src/afr-dir-read.h
@@ -1,50 +1,33 @@
/*
- Copyright (c) 2007-2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __DIR_READ_H__
#define __DIR_READ_H__
-
int32_t
-afr_opendir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, fd_t *fd);
+afr_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata);
int32_t
-afr_releasedir (xlator_t *this, fd_t *fd);
+afr_releasedir(xlator_t *this, fd_t *fd);
int32_t
-afr_readdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset);
-
+afr_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata);
int32_t
-afr_readdirp (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset);
+afr_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *dict);
int32_t
-afr_getdents (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset, int32_t flag);
-
-
-int32_t
-afr_checksum (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags);
-
+afr_checksum(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ dict_t *xdata);
#endif /* __DIR_READ_H__ */
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
index 06559ede08e..b7cceb79158 100644
--- a/xlators/cluster/afr/src/afr-dir-write.c
+++ b/xlators/cluster/afr/src/afr-dir-write.c
@@ -1,1313 +1,937 @@
/*
- Copyright (c) 2007-2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#include <libgen.h>
#include <unistd.h>
-#include <fnmatch.h>
#include <sys/time.h>
#include <stdlib.h>
#include <signal.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
+#include <glusterfs/glusterfs.h>
#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/list.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/byte-order.h>
#include "afr.h"
#include "afr-transaction.h"
-
void
-afr_build_parent_loc (loc_t *parent, loc_t *child)
-{
- char *tmp = NULL;
-
- if (!child->parent) {
- loc_copy (parent, child);
- return;
- }
-
- tmp = gf_strdup (child->path);
- parent->path = gf_strdup (dirname (tmp));
- GF_FREE (tmp);
-
- parent->name = strrchr (parent->path, '/');
- if (parent->name)
- parent->name++;
-
- parent->inode = inode_ref (child->parent);
- parent->parent = inode_parent (parent->inode, 0, NULL);
- parent->ino = parent->inode->ino;
-}
-
-/* {{{ create */
+afr_mark_entry_pending_changelog(call_frame_t *frame, xlator_t *this);
int
-afr_create_unwind (call_frame_t *frame, xlator_t *this)
+afr_build_parent_loc(loc_t *parent, loc_t *child, int32_t *op_errno)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- if (local->cont.create.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.create.read_child_buf;
- } else {
- unwind_buf = &local->cont.create.buf;
- }
-
- unwind_buf->ia_ino = local->cont.create.ino;
-
- local->cont.create.preparent.ia_ino = local->cont.create.parent_ino;
- local->cont.create.postparent.ia_ino = local->cont.create.parent_ino;
-
- AFR_STACK_UNWIND (create, main_frame,
- local->op_ret, local->op_errno,
- local->cont.create.fd,
- local->cont.create.inode,
- unwind_buf, &local->cont.create.preparent,
- &local->cont.create.postparent);
- }
+ int ret = -1;
+ char *child_path = NULL;
+
+ if (!child->parent) {
+ if (op_errno)
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ child_path = gf_strdup(child->path);
+ if (!child_path) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ parent->path = gf_strdup(dirname(child_path));
+ if (!parent->path) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ parent->inode = inode_ref(child->parent);
+ gf_uuid_copy(parent->gfid, child->pargfid);
+
+ ret = 0;
+out:
+ GF_FREE(child_path);
- return 0;
+ return ret;
}
-
-int
-afr_create_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- fd_t *fd, inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+static void
+__afr_dir_write_finalize(call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- int ret = 0;
- int call_count = -1;
- int child_index = -1;
-
- local = frame->local;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- ret = afr_fd_ctx_set (this, fd);
-
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "could not set ctx on fd=%p", fd);
-
- local->op_ret = -1;
- local->op_errno = -ret;
- goto unlock;
- }
-
- ret = fd_ctx_get (fd, this, &ctx);
-
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "could not get fd ctx for fd=%p", fd);
- local->op_ret = -1;
- local->op_errno = -ret;
- goto unlock;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- fd_ctx->opened_on[child_index] = 1;
- fd_ctx->flags = local->cont.create.flags;
-
- if (local->success_count == 0) {
- local->cont.create.buf = *buf;
-
- local->cont.create.ino =
- afr_itransform (buf->ia_ino,
- priv->child_count,
- child_index);
-
- if (priv->read_child >= 0) {
- afr_set_read_child (this, inode,
- priv->read_child);
- } else {
- afr_set_read_child (this, inode,
- local->read_child_index);
- }
- }
-
- if (child_index == local->first_up_child) {
- local->cont.create.ino =
- afr_itransform (buf->ia_ino,
- priv->child_count,
- local->first_up_child);
- }
-
- if (child_index == local->read_child_index) {
- local->cont.create.read_child_buf = *buf;
- local->cont.create.preparent = *preparent;
- local->cont.create.postparent = *postparent;
- }
-
- local->cont.create.inode = inode;
-
- local->success_count++;
- }
-
- local->op_errno = op_errno;
- }
-
-unlock:
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int inode_read_subvol = -1;
+ int parent_read_subvol = -1;
+ int parent2_read_subvol = -1;
+ int i = 0;
+ afr_read_subvol_args_t args = {
+ 0,
+ };
+
+ local = frame->local;
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret == -1)
+ continue;
+ gf_uuid_copy(args.gfid, local->replies[i].poststat.ia_gfid);
+ args.ia_type = local->replies[i].poststat.ia_type;
+ break;
+ }
+
+ if (local->inode) {
+ if (local->op != GF_FOP_RENAME && local->op != GF_FOP_LINK)
+ afr_replies_interpret(frame, this, local->inode, NULL);
+
+ inode_read_subvol = afr_data_subvol_get(local->inode, this, NULL, NULL,
+ NULL, &args);
+ }
+
+ if (local->parent)
+ parent_read_subvol = afr_data_subvol_get(local->parent, this, NULL,
+ local->readable, NULL, NULL);
+
+ if (local->parent2)
+ parent2_read_subvol = afr_data_subvol_get(local->parent2, this, NULL,
+ local->readable2, NULL, NULL);
+
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno(local, priv);
+ afr_pick_error_xdata(local, priv, local->parent, local->readable,
+ local->parent2, local->readable2);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret < 0) {
+ if (local->inode)
+ afr_inode_need_refresh_set(local->inode, this);
+ if (local->parent)
+ afr_inode_need_refresh_set(local->parent, this);
+ if (local->parent2)
+ afr_inode_need_refresh_set(local->parent2, this);
+ continue;
+ }
+
+ if (local->op_ret == -1) {
+ local->op_ret = local->replies[i].op_ret;
+ local->op_errno = local->replies[i].op_errno;
+
+ local->cont.dir_fop.buf = local->replies[i].poststat;
+ local->cont.dir_fop.preparent = local->replies[i].preparent;
+ local->cont.dir_fop.postparent = local->replies[i].postparent;
+ local->cont.dir_fop.prenewparent = local->replies[i].preparent2;
+ local->cont.dir_fop.postnewparent = local->replies[i].postparent2;
+ if (local->xdata_rsp) {
+ dict_unref(local->xdata_rsp);
+ local->xdata_rsp = NULL;
+ }
+
+ if (local->replies[i].xdata)
+ local->xdata_rsp = dict_ref(local->replies[i].xdata);
+ continue;
+ }
+
+ if (i == inode_read_subvol) {
+ local->cont.dir_fop.buf = local->replies[i].poststat;
+ if (local->replies[i].xdata) {
+ if (local->xdata_rsp)
+ dict_unref(local->xdata_rsp);
+ local->xdata_rsp = dict_ref(local->replies[i].xdata);
+ }
+ }
+
+ if (i == parent_read_subvol) {
+ local->cont.dir_fop.preparent = local->replies[i].preparent;
+ local->cont.dir_fop.postparent = local->replies[i].postparent;
+ }
+
+ if (i == parent2_read_subvol) {
+ local->cont.dir_fop.prenewparent = local->replies[i].preparent2;
+ local->cont.dir_fop.postnewparent = local->replies[i].postparent2;
+ }
+ }
}
+static void
+__afr_dir_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
+ int op_ret, int op_errno, struct iatt *poststat,
+ struct iatt *preparent, struct iatt *postparent,
+ struct iatt *preparent2, struct iatt *postparent2,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ local = frame->local;
+ fd_ctx = local->fd_ctx;
+
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+ if (xdata)
+ local->replies[child_index].xdata = dict_ref(xdata);
+
+ if (op_ret >= 0) {
+ if (poststat)
+ local->replies[child_index].poststat = *poststat;
+ if (preparent)
+ local->replies[child_index].preparent = *preparent;
+ if (postparent)
+ local->replies[child_index].postparent = *postparent;
+ if (preparent2)
+ local->replies[child_index].preparent2 = *preparent2;
+ if (postparent2)
+ local->replies[child_index].postparent2 = *postparent2;
+ if (fd_ctx)
+ fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
+ } else {
+ if (op_errno != ENOTEMPTY)
+ afr_transaction_fop_failed(frame, this, child_index);
+ if (fd_ctx)
+ fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED;
+ }
+
+ return;
+}
-int
-afr_create_wind (call_frame_t *frame, xlator_t *this)
+static int
+__afr_dir_write_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ struct iatt *preparent2, struct iatt *postparent2,
+ dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
+ afr_local_t *local = NULL;
+ int child_index = (long)cookie;
+ int call_count = -1;
+ afr_private_t *priv = NULL;
- local = frame->local;
- priv = this->private;
+ priv = this->private;
+ local = frame->local;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ LOCK(&frame->lock);
+ {
+ __afr_dir_write_fill(frame, this, child_index, op_ret, op_errno, buf,
+ preparent, postparent, preparent2, postparent2,
+ xdata);
+ call_count = --local->call_count;
+ }
+ UNLOCK(&frame->lock);
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+ if (call_count == 0) {
+ __afr_dir_write_finalize(frame, this);
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_create_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->create,
- &local->loc,
- local->cont.create.flags,
- local->cont.create.mode,
- local->cont.create.fd,
- local->cont.create.params);
- if (!--call_count)
- break;
- }
+ if (afr_txn_nothing_failed(frame, this)) {
+ /*if it did pre-op, it will do post-op changing ctime*/
+ if (priv->consistent_metadata && afr_needs_changelog_update(local))
+ afr_zero_fill_stat(local);
+ local->transaction.unwind(frame, this);
}
- return 0;
-}
+ afr_mark_entry_pending_changelog(frame, this);
+ afr_transaction_resume(frame, this);
+ }
+
+ return 0;
+}
int
-afr_create_done (call_frame_t *frame, xlator_t *this)
+afr_mark_new_entry_changelog_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ dict_t *xattr, dict_t *xdata)
{
- afr_local_t * local = NULL;
-
- local = frame->local;
+ int call_count = 0;
- local->transaction.unwind (frame, this);
+ call_count = afr_frame_return(frame);
- AFR_STACK_DESTROY (frame);
+ if (call_count == 0)
+ AFR_STACK_DESTROY(frame);
- return 0;
+ return 0;
}
-
-int
-afr_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode,
- fd_t *fd, dict_t *params)
+void
+afr_mark_new_entry_changelog(call_frame_t *frame, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- goto out;
- }
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- transaction_frame->local = local;
+ call_frame_t *new_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_local_t *new_local = NULL;
+ afr_private_t *priv = NULL;
+ dict_t *xattr = NULL;
+ int32_t **changelog = NULL;
+ int i = 0;
+ int op_errno = ENOMEM;
+ unsigned char *pending = NULL;
+ int call_count = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ new_frame = copy_frame(frame);
+ if (!new_frame)
+ goto out;
+
+ new_local = AFR_FRAME_INIT(new_frame, op_errno);
+ if (!new_local)
+ goto out;
+
+ xattr = dict_new();
+ if (!xattr)
+ goto out;
+
+ pending = alloca0(priv->child_count);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i] &&
+ !local->transaction.failed_subvols[i]) {
+ call_count++;
+ continue;
+ }
+ pending[i] = 1;
+ }
+
+ changelog = afr_mark_pending_changelog(priv, pending, xattr,
+ local->cont.dir_fop.buf.ia_type);
+ if (!changelog)
+ goto out;
+
+ new_local->pending = changelog;
+ gf_uuid_copy(new_local->loc.gfid, local->cont.dir_fop.buf.ia_gfid);
+ new_local->loc.inode = inode_ref(local->inode);
+
+ new_local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (pending[i])
+ continue;
+
+ STACK_WIND_COOKIE(new_frame, afr_mark_new_entry_changelog_cbk,
+ (void *)(long)i, priv->children[i],
+ priv->children[i]->fops->xattrop, &new_local->loc,
+ GF_XATTROP_ADD_ARRAY, xattr, NULL);
+ if (!--call_count)
+ break;
+ }
+
+ new_frame = NULL;
+out:
+ if (new_frame)
+ AFR_STACK_DESTROY(new_frame);
+ if (xattr)
+ dict_unref(xattr);
+ return;
+}
- loc_copy (&local->loc, loc);
+void
+afr_mark_entry_pending_changelog(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int pre_op_count = 0;
+ int failed_count = 0;
+ unsigned char *success_replies = NULL;
- LOCK (&priv->read_child_lock);
- {
- local->read_child_index = (++priv->read_child_rr)
- % (priv->child_count);
- }
- UNLOCK (&priv->read_child_lock);
+ local = frame->local;
+ priv = this->private;
- local->cont.create.flags = flags;
- local->cont.create.mode = mode;
- local->cont.create.fd = fd_ref (fd);
- if (params)
- local->cont.create.params = dict_ref (params);
+ if (local->op_ret < 0)
+ return;
- if (loc->parent)
- local->cont.create.parent_ino = loc->parent->ino;
+ if (local->op != GF_FOP_CREATE && local->op != GF_FOP_MKNOD &&
+ local->op != GF_FOP_MKDIR)
+ return;
- local->transaction.fop = afr_create_wind;
- local->transaction.done = afr_create_done;
- local->transaction.unwind = afr_create_unwind;
+ pre_op_count = AFR_COUNT(local->transaction.pre_op, priv->child_count);
+ failed_count = AFR_COUNT(local->transaction.failed_subvols,
+ priv->child_count);
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
+ /* FOP succeeded on all bricks. */
+ if (pre_op_count == priv->child_count && !failed_count)
+ return;
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (loc->path);
+ /* FOP did not suceed on quorum no. of bricks. */
+ success_replies = alloca0(priv->child_count);
+ afr_fill_success_replies(local, priv, success_replies);
+ if (!afr_has_quorum(success_replies, this, NULL))
+ return;
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (priv->thin_arbiter_count) {
+ /*Mark new entry using ta file*/
+ local->is_new_entry = _gf_true;
+ return;
+ }
- op_ret = 0;
-out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (create, frame, op_ret, op_errno,
- NULL, NULL, NULL, NULL, NULL);
- }
+ afr_mark_new_entry_changelog(frame, this);
- return 0;
+ return;
}
-/* }}} */
-
-/* {{{ mknod */
+/* {{{ create */
int
-afr_mknod_unwind (call_frame_t *frame, xlator_t *this)
+afr_create_unwind(call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- if (local->cont.mknod.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.mknod.read_child_buf;
- } else {
- unwind_buf = &local->cont.mknod.buf;
- }
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- unwind_buf->ia_ino = local->cont.mknod.ino;
+ local = frame->local;
- local->cont.mknod.preparent.ia_ino = local->cont.mknod.parent_ino;
- local->cont.mknod.postparent.ia_ino = local->cont.mknod.parent_ino;
-
- AFR_STACK_UNWIND (mknod, main_frame,
- local->op_ret, local->op_errno,
- local->cont.mknod.inode,
- unwind_buf, &local->cont.mknod.preparent,
- &local->cont.mknod.postparent);
- }
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
+ AFR_STACK_UNWIND(create, main_frame, local->op_ret, local->op_errno,
+ local->cont.create.fd, local->inode,
+ &local->cont.dir_fop.buf, &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
+ return 0;
+}
int
-afr_mknod_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
+afr_create_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int call_count = -1;
- int child_index = -1;
-
- local = frame->local;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0){
- local->cont.mknod.buf = *buf;
- local->cont.mknod.ino =
- afr_itransform (buf->ia_ino,
- priv->child_count,
- child_index);
-
- if (priv->read_child >= 0) {
- afr_set_read_child (this, inode,
- priv->read_child);
- } else {
- afr_set_read_child (this, inode,
- local->read_child_index);
- }
- }
-
- if (child_index == local->first_up_child) {
- local->cont.mknod.ino =
- afr_itransform (buf->ia_ino,
- priv->child_count,
- local->first_up_child);
- }
-
- if (child_index == local->read_child_index) {
- local->cont.mknod.read_child_buf = *buf;
- local->cont.mknod.preparent = *preparent;
- local->cont.mknod.postparent = *postparent;
- }
-
- local->cont.mknod.inode = inode;
-
- local->success_count++;
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return __afr_dir_write_cbk(frame, cookie, this, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
}
-
-int32_t
-afr_mknod_wind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_mknod_wind_cbk, (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->mknod,
- &local->loc, local->cont.mknod.mode,
- local->cont.mknod.dev,
- local->cont.mknod.params);
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
-
-
int
-afr_mknod_done (call_frame_t *frame, xlator_t *this)
+afr_create_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t * local = NULL;
-
- local = frame->local;
-
- local->transaction.unwind (frame, this);
- AFR_STACK_DESTROY (frame);
-
- return 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE(frame, afr_create_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->create, &local->loc,
+ local->cont.create.flags, local->cont.create.mode,
+ local->umask, local->cont.create.fd, local->xdata_req);
+ return 0;
}
-
int
-afr_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t dev, dict_t *params)
+afr_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- goto out;
- }
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- transaction_frame->local = local;
-
- loc_copy (&local->loc, loc);
-
- LOCK (&priv->read_child_lock);
- {
- local->read_child_index = (++priv->read_child_rr)
- % (priv->child_count);
- }
- UNLOCK (&priv->read_child_lock);
-
- local->cont.mknod.mode = mode;
- local->cont.mknod.dev = dev;
- if (params)
- local->cont.mknod.params = dict_ref (params);
-
- if (loc->parent)
- local->cont.mknod.parent_ino = loc->parent->ino;
-
- local->transaction.fop = afr_mknod_wind;
- local->transaction.done = afr_mknod_done;
- local->transaction.unwind = afr_mknod_unwind;
-
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (loc->path);
-
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
-
- op_ret = 0;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
+
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
+
+ loc_copy(&local->loc, loc);
+
+ local->fd_ctx = afr_fd_ctx_get(fd, this);
+ if (!local->fd_ctx)
+ goto out;
+
+ local->inode = inode_ref(loc->inode);
+ local->parent = inode_ref(loc->parent);
+
+ local->op = GF_FOP_CREATE;
+ local->cont.create.flags = flags;
+ local->fd_ctx->flags = flags;
+ local->cont.create.mode = mode;
+ local->cont.create.fd = fd_ref(fd);
+ local->umask = umask;
+
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->transaction.wind = afr_create_wind;
+ local->transaction.unwind = afr_create_unwind;
+
+ ret = afr_build_parent_loc(&local->transaction.parent_loc, loc, &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME(loc->path);
+ ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (mknod, frame, op_ret, op_errno,
- NULL, NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- return 0;
+ AFR_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
}
/* }}} */
-/* {{{ mkdir */
-
+/* {{{ mknod */
int
-afr_mkdir_unwind (call_frame_t *frame, xlator_t *this)
+afr_mknod_unwind(call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- if (local->cont.mkdir.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.mkdir.read_child_buf;
- } else {
- unwind_buf = &local->cont.mkdir.buf;
- }
-
- unwind_buf->ia_ino = local->cont.mkdir.ino;
-
- local->cont.mkdir.preparent.ia_ino = local->cont.mkdir.parent_ino;
- local->cont.mkdir.postparent.ia_ino = local->cont.mkdir.parent_ino;
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- AFR_STACK_UNWIND (mkdir, main_frame,
- local->op_ret, local->op_errno,
- local->cont.mkdir.inode,
- unwind_buf, &local->cont.mkdir.preparent,
- &local->cont.mkdir.postparent);
- }
+ local = frame->local;
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
-
-int
-afr_mkdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
-{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int call_count = -1;
- int child_index = -1;
-
- local = frame->local;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0) {
- local->cont.mkdir.buf = *buf;
-
- local->cont.mkdir.ino =
- afr_itransform (buf->ia_ino,
- priv->child_count,
- child_index);
-
- if (priv->read_child >= 0) {
- afr_set_read_child (this, inode,
- priv->read_child);
- } else {
- afr_set_read_child (this, inode,
- local->read_child_index);
- }
- }
-
- if (child_index == local->first_up_child) {
- local->cont.mkdir.ino =
- afr_itransform (buf->ia_ino,
- priv->child_count,
- local->first_up_child);
- }
-
- if (child_index == local->read_child_index) {
- local->cont.mkdir.read_child_buf = *buf;
- local->cont.mkdir.preparent = *preparent;
- local->cont.mkdir.postparent = *postparent;
- }
-
- local->cont.mkdir.inode = inode;
-
- local->success_count++;
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ AFR_STACK_UNWIND(mknod, main_frame, local->op_ret, local->op_errno,
+ local->inode, &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
+ return 0;
}
-
int
-afr_mkdir_wind (call_frame_t *frame, xlator_t *this)
+afr_mknod_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_mkdir_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->mkdir,
- &local->loc, local->cont.mkdir.mode,
- local->cont.mkdir.params);
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ return __afr_dir_write_cbk(frame, cookie, this, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
}
-
int
-afr_mkdir_done (call_frame_t *frame, xlator_t *this)
+afr_mknod_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t * local = NULL;
-
- local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
- return 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE(frame, afr_mknod_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->mknod, &local->loc,
+ local->cont.mknod.mode, local->cont.mknod.dev,
+ local->umask, local->xdata_req);
+ return 0;
}
-
int
-afr_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dict_t *params)
+afr_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t dev, mode_t umask, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- goto out;
- }
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- transaction_frame->local = local;
-
- loc_copy (&local->loc, loc);
-
- LOCK (&priv->read_child_lock);
- {
- local->read_child_index = (++priv->read_child_rr)
- % (priv->child_count);
- }
- UNLOCK (&priv->read_child_lock);
-
- local->cont.mkdir.mode = mode;
- if (params)
- local->cont.mkdir.params = dict_ref (params);
-
- if (loc->parent)
- local->cont.mkdir.parent_ino = loc->parent->ino;
-
- local->transaction.fop = afr_mkdir_wind;
- local->transaction.done = afr_mkdir_done;
- local->transaction.unwind = afr_mkdir_unwind;
-
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (loc->path);
-
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
-
- op_ret = 0;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
+
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
+
+ loc_copy(&local->loc, loc);
+ local->inode = inode_ref(loc->inode);
+ local->parent = inode_ref(loc->parent);
+
+ local->op = GF_FOP_MKNOD;
+ local->cont.mknod.mode = mode;
+ local->cont.mknod.dev = dev;
+ local->umask = umask;
+
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->transaction.wind = afr_mknod_wind;
+ local->transaction.unwind = afr_mknod_unwind;
+
+ ret = afr_build_parent_loc(&local->transaction.parent_loc, loc, &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME(loc->path);
+ ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- AFR_STACK_UNWIND (mkdir, frame, op_ret, op_errno,
- NULL, NULL, NULL, NULL);
- }
-
- return 0;
+ AFR_STACK_UNWIND(mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
+ return 0;
}
/* }}} */
-/* {{{ link */
-
+/* {{{ mkdir */
int
-afr_link_unwind (call_frame_t *frame, xlator_t *this)
+afr_mkdir_unwind(call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- if (main_frame) {
- if (local->cont.link.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.link.read_child_buf;
- } else {
- unwind_buf = &local->cont.link.buf;
- }
-
- unwind_buf->ia_ino = local->cont.link.ino;
-
- local->cont.link.preparent.ia_ino = local->cont.link.parent_ino;
- local->cont.link.postparent.ia_ino = local->cont.link.parent_ino;
-
- AFR_STACK_UNWIND (link, main_frame,
- local->op_ret, local->op_errno,
- local->cont.link.inode,
- unwind_buf, &local->cont.link.preparent,
- &local->cont.link.postparent);
- }
+ local = frame->local;
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
+ AFR_STACK_UNWIND(mkdir, main_frame, local->op_ret, local->op_errno,
+ local->inode, &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
+ return 0;
+}
int
-afr_link_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+afr_mkdir_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int call_count = -1;
- int child_index = -1;
-
- local = frame->local;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0) {
- local->cont.link.buf = *buf;
-
- if (priv->read_child >= 0) {
- afr_set_read_child (this, inode,
- priv->read_child);
- } else {
- afr_set_read_child (this, inode,
- local->read_child_index);
- }
- }
-
- if (child_index == local->read_child_index) {
- local->cont.link.read_child_buf = *buf;
- local->cont.link.preparent = *preparent;
- local->cont.link.postparent = *postparent;
- }
-
- local->cont.link.inode = inode;
-
- local->success_count++;
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
+ return __afr_dir_write_cbk(frame, cookie, this, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
+}
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
+int
+afr_mkdir_wind(call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- local->transaction.resume (frame, this);
- }
+ local = frame->local;
+ priv = this->private;
- return 0;
+ STACK_WIND_COOKIE(frame, afr_mkdir_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->mkdir, &local->loc,
+ local->cont.mkdir.mode, local->umask, local->xdata_req);
+ return 0;
}
-
int
-afr_link_wind (call_frame_t *frame, xlator_t *this)
+afr_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
+
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
+
+ loc_copy(&local->loc, loc);
+ local->inode = inode_ref(loc->inode);
+ local->parent = inode_ref(loc->parent);
+
+ local->cont.mkdir.mode = mode;
+ local->umask = umask;
+
+ if (!xdata || !dict_get_sizen(xdata, "gfid-req")) {
+ op_errno = EPERM;
+ gf_msg_callingfn(this->name, GF_LOG_WARNING, op_errno,
+ AFR_MSG_GFID_NULL,
+ "mkdir: %s is received "
+ "without gfid-req %p",
+ loc->path, xdata);
+ goto out;
+ }
+
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ if (!local->xdata_req) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->op = GF_FOP_MKDIR;
+ local->transaction.wind = afr_mkdir_wind;
+ local->transaction.unwind = afr_mkdir_unwind;
+
+ ret = afr_build_parent_loc(&local->transaction.parent_loc, loc, &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME(loc->path);
+ ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ AFR_STACK_UNWIND(mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
+ return 0;
+}
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+/* }}} */
- local->call_count = call_count;
+/* {{{ link */
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_link_wind_cbk, (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->link,
- &local->loc,
- &local->newloc);
+int
+afr_link_unwind(call_frame_t *frame, xlator_t *this)
+{
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- if (!--call_count)
- break;
- }
- }
+ local = frame->local;
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
+ AFR_STACK_UNWIND(link, main_frame, local->op_ret, local->op_errno,
+ local->inode, &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
+ return 0;
+}
int
-afr_link_done (call_frame_t *frame, xlator_t *this)
+afr_link_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t * local = frame->local;
+ return __afr_dir_write_cbk(frame, cookie, this, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
+}
- local->transaction.unwind (frame, this);
+int
+afr_link_wind(call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- AFR_STACK_DESTROY (frame);
+ local = frame->local;
+ priv = this->private;
- return 0;
+ STACK_WIND_COOKIE(frame, afr_link_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->link, &local->loc,
+ &local->newloc, local->xdata_req);
+ return 0;
}
-
int
-afr_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
+afr_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- goto out;
- }
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
- transaction_frame->local = local;
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
- loc_copy (&local->loc, oldloc);
- loc_copy (&local->newloc, newloc);
+ loc_copy(&local->loc, oldloc);
+ loc_copy(&local->newloc, newloc);
- LOCK (&priv->read_child_lock);
- {
- local->read_child_index = (++priv->read_child_rr)
- % (priv->child_count);
- }
- UNLOCK (&priv->read_child_lock);
+ local->inode = inode_ref(oldloc->inode);
+ local->parent = inode_ref(newloc->parent);
- local->cont.link.ino = oldloc->inode->ino;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
- if (oldloc->parent)
- local->cont.link.parent_ino = newloc->parent->ino;
+ if (!local->xdata_req)
+ goto out;
- local->transaction.fop = afr_link_wind;
- local->transaction.done = afr_link_done;
- local->transaction.unwind = afr_link_unwind;
+ local->op = GF_FOP_LINK;
- afr_build_parent_loc (&local->transaction.parent_loc, oldloc);
+ local->transaction.wind = afr_link_wind;
+ local->transaction.unwind = afr_link_unwind;
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (oldloc->path);
- local->transaction.new_basename = AFR_BASENAME (newloc->path);
+ ret = afr_build_parent_loc(&local->transaction.parent_loc, newloc,
+ &op_errno);
+ if (ret)
+ goto out;
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME(newloc->path);
+ ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (link, frame, op_ret, op_errno,
- NULL, NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- return 0;
+ AFR_STACK_UNWIND(link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
+ return 0;
}
/* }}} */
/* {{{ symlink */
-
int
-afr_symlink_unwind (call_frame_t *frame, xlator_t *this)
+afr_symlink_unwind(call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- if (local->cont.symlink.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.symlink.read_child_buf;
- } else {
- unwind_buf = &local->cont.symlink.buf;
- }
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- unwind_buf->ia_ino = local->cont.symlink.ino;
-
- local->cont.symlink.preparent.ia_ino = local->cont.symlink.parent_ino;
- local->cont.symlink.postparent.ia_ino = local->cont.symlink.parent_ino;
-
- AFR_STACK_UNWIND (symlink, main_frame,
- local->op_ret, local->op_errno,
- local->cont.symlink.inode,
- unwind_buf, &local->cont.symlink.preparent,
- &local->cont.symlink.postparent);
- }
+ local = frame->local;
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
-
-
-int
-afr_symlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
-{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int call_count = -1;
- int child_index = -1;
-
- local = frame->local;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0) {
- local->cont.symlink.buf = *buf;
- local->cont.symlink.ino =
- afr_itransform (buf->ia_ino, priv->child_count,
- child_index);
-
- if (priv->read_child >= 0) {
- afr_set_read_child (this, inode,
- priv->read_child);
- } else {
- afr_set_read_child (this, inode,
- local->read_child_index);
- }
- }
-
- if (child_index == local->first_up_child) {
- local->cont.symlink.ino =
- afr_itransform (buf->ia_ino,
- priv->child_count,
- local->first_up_child);
- }
-
- if (child_index == local->read_child_index) {
- local->cont.symlink.read_child_buf = *buf;
- local->cont.symlink.preparent = *preparent;
- local->cont.symlink.postparent = *postparent;
- }
-
- local->cont.symlink.inode = inode;
-
- local->success_count++;
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ AFR_STACK_UNWIND(symlink, main_frame, local->op_ret, local->op_errno,
+ local->inode, &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
+ return 0;
}
-
int
-afr_symlink_wind (call_frame_t *frame, xlator_t *this)
+afr_symlink_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_symlink_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->symlink,
- local->cont.symlink.linkpath,
- &local->loc,
- local->cont.symlink.params);
-
- if (!--call_count)
- break;
-
- }
- }
-
- return 0;
+ return __afr_dir_write_cbk(frame, cookie, this, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
}
-
int
-afr_symlink_done (call_frame_t *frame, xlator_t *this)
+afr_symlink_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t * local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
- return 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE(frame, afr_symlink_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->symlink,
+ local->cont.symlink.linkpath, &local->loc, local->umask,
+ local->xdata_req);
+ return 0;
}
-
int
-afr_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *loc, dict_t *params)
+afr_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- goto out;
- }
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- transaction_frame->local = local;
-
- loc_copy (&local->loc, loc);
-
- LOCK (&priv->read_child_lock);
- {
- local->read_child_index = (++priv->read_child_rr)
- % (priv->child_count);
- }
- UNLOCK (&priv->read_child_lock);
-
- local->cont.symlink.linkpath = gf_strdup (linkpath);
- if (params)
- local->cont.symlink.params = dict_ref (params);
-
- if (loc->parent)
- local->cont.symlink.parent_ino = loc->parent->ino;
-
- local->transaction.fop = afr_symlink_wind;
- local->transaction.done = afr_symlink_done;
- local->transaction.unwind = afr_symlink_unwind;
-
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (loc->path);
-
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
-
- op_ret = 0;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
+
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
+
+ loc_copy(&local->loc, loc);
+ local->inode = inode_ref(loc->inode);
+ local->parent = inode_ref(loc->parent);
+
+ local->cont.symlink.linkpath = gf_strdup(linkpath);
+ local->umask = umask;
+
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->op = GF_FOP_SYMLINK;
+ local->transaction.wind = afr_symlink_wind;
+ local->transaction.unwind = afr_symlink_unwind;
+
+ ret = afr_build_parent_loc(&local->transaction.parent_loc, loc, &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME(loc->path);
+ ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (symlink, frame, op_ret, op_errno,
- NULL, NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- return 0;
+ AFR_STACK_UNWIND(symlink, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
}
/* }}} */
@@ -1315,223 +939,118 @@ out:
/* {{{ rename */
int
-afr_rename_unwind (call_frame_t *frame, xlator_t *this)
+afr_rename_unwind(call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- if (local->cont.rename.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.rename.read_child_buf;
- } else {
- unwind_buf = &local->cont.rename.buf;
- }
-
- unwind_buf->ia_ino = local->cont.rename.ino;
-
- local->cont.rename.preoldparent.ia_ino = local->cont.rename.oldparent_ino;
- local->cont.rename.postoldparent.ia_ino = local->cont.rename.oldparent_ino;
- local->cont.rename.prenewparent.ia_ino = local->cont.rename.newparent_ino;
- local->cont.rename.postnewparent.ia_ino = local->cont.rename.newparent_ino;
-
- AFR_STACK_UNWIND (rename, main_frame,
- local->op_ret, local->op_errno,
- unwind_buf,
- &local->cont.rename.preoldparent,
- &local->cont.rename.postoldparent,
- &local->cont.rename.prenewparent,
- &local->cont.rename.postnewparent);
- }
-
- return 0;
-}
-
-
-int
-afr_rename_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
-{
- afr_local_t * local = NULL;
- int call_count = -1;
- int child_index = -1;
-
- local = frame->local;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno) && op_errno != ENOTEMPTY)
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- if (buf) {
- local->cont.rename.buf = *buf;
- }
-
- local->success_count++;
- }
-
- if (child_index == local->read_child_index) {
- local->cont.rename.read_child_buf = *buf;
-
- local->cont.rename.preoldparent = *preoldparent;
- local->cont.rename.postoldparent = *postoldparent;
- local->cont.rename.prenewparent = *prenewparent;
- local->cont.rename.postnewparent = *postnewparent;
- }
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
- local->transaction.resume (frame, this);
- }
+ local = frame->local;
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
+ AFR_STACK_UNWIND(rename, main_frame, local->op_ret, local->op_errno,
+ &local->cont.dir_fop.buf, &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ &local->cont.dir_fop.prenewparent,
+ &local->cont.dir_fop.postnewparent, local->xdata_rsp);
+ return 0;
+}
-int32_t
-afr_rename_wind (call_frame_t *frame, xlator_t *this)
+int
+afr_rename_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_rename_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->rename,
- &local->loc,
- &local->newloc);
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ return __afr_dir_write_cbk(frame, cookie, this, op_ret, op_errno, buf,
+ preoldparent, postoldparent, prenewparent,
+ postnewparent, xdata);
}
-
int
-afr_rename_done (call_frame_t *frame, xlator_t *this)
+afr_rename_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t * local = frame->local;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- local->transaction.unwind (frame, this);
+ local = frame->local;
+ priv = this->private;
- AFR_STACK_DESTROY (frame);
-
- return 0;
+ STACK_WIND_COOKIE(frame, afr_rename_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->rename, &local->loc,
+ &local->newloc, local->xdata_req);
+ return 0;
}
-
int
-afr_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
+afr_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- goto out;
- }
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- transaction_frame->local = local;
-
- loc_copy (&local->loc, oldloc);
- loc_copy (&local->newloc, newloc);
-
- local->read_child_index = afr_read_child (this, oldloc->inode);
-
- local->cont.rename.ino = oldloc->inode->ino;
-
- if (oldloc->parent)
- local->cont.rename.oldparent_ino = oldloc->parent->ino;
- if (newloc->parent)
- local->cont.rename.newparent_ino = newloc->parent->ino;
-
- local->transaction.fop = afr_rename_wind;
- local->transaction.done = afr_rename_done;
- local->transaction.unwind = afr_rename_unwind;
-
- afr_build_parent_loc (&local->transaction.parent_loc, oldloc);
- afr_build_parent_loc (&local->transaction.new_parent_loc, newloc);
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (oldloc->path);
- local->transaction.new_basename = AFR_BASENAME (newloc->path);
-
- afr_transaction (transaction_frame, this, AFR_ENTRY_RENAME_TRANSACTION);
-
- op_ret = 0;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
+
+ loc_copy(&local->loc, oldloc);
+ loc_copy(&local->newloc, newloc);
+
+ local->inode = inode_ref(oldloc->inode);
+ local->parent = inode_ref(oldloc->parent);
+ local->parent2 = inode_ref(newloc->parent);
+
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->op = GF_FOP_RENAME;
+ local->transaction.wind = afr_rename_wind;
+ local->transaction.unwind = afr_rename_unwind;
+
+ ret = afr_build_parent_loc(&local->transaction.parent_loc, oldloc,
+ &op_errno);
+ if (ret)
+ goto out;
+ ret = afr_build_parent_loc(&local->transaction.new_parent_loc, newloc,
+ &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME(oldloc->path);
+ local->transaction.new_basename = AFR_BASENAME(newloc->path);
+ ret = afr_transaction(transaction_frame, this,
+ AFR_ENTRY_RENAME_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
-
- AFR_STACK_UNWIND (rename, frame, op_ret, op_errno,
- NULL, NULL, NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- return 0;
+ AFR_STACK_UNWIND(rename, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
}
/* }}} */
@@ -1539,399 +1058,205 @@ out:
/* {{{ unlink */
int
-afr_unlink_unwind (call_frame_t *frame, xlator_t *this)
+afr_unlink_unwind(call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- if (main_frame) {
- local->cont.unlink.preparent.ia_ino = local->cont.unlink.parent_ino;
- local->cont.unlink.postparent.ia_ino = local->cont.unlink.parent_ino;
-
- AFR_STACK_UNWIND (unlink, main_frame,
- local->op_ret, local->op_errno,
- &local->cont.unlink.preparent,
- &local->cont.unlink.postparent);
- }
+ local = frame->local;
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
+ AFR_STACK_UNWIND(unlink, main_frame, local->op_ret, local->op_errno,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
+ return 0;
+}
int
-afr_unlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+afr_unlink_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int call_count = -1;
- int child_index = (long) cookie;
-
- local = frame->local;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (child_index == local->read_child_index) {
- local->read_child_returned = _gf_true;
- }
-
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.unlink.preparent = *preparent;
- local->cont.unlink.postparent = *postparent;
- }
-
- if (child_index == local->read_child_index) {
- local->cont.unlink.preparent = *preparent;
- local->cont.unlink.postparent = *postparent;
- }
-
- local->success_count++;
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return __afr_dir_write_cbk(frame, cookie, this, op_ret, op_errno, NULL,
+ preparent, postparent, NULL, NULL, xdata);
}
-
-int32_t
-afr_unlink_wind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_unlink_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->unlink,
- &local->loc);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
-
-
-int32_t
-afr_unlink_done (call_frame_t *frame, xlator_t *this)
+int
+afr_unlink_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t * local = frame->local;
-
- local->transaction.unwind (frame, this);
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- AFR_STACK_DESTROY (frame);
+ local = frame->local;
+ priv = this->private;
- return 0;
+ STACK_WIND_COOKIE(frame, afr_unlink_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->unlink, &local->loc,
+ local->xflag, local->xdata_req);
+ return 0;
}
-
-int32_t
-afr_unlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc)
+int
+afr_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- goto out;
- }
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- transaction_frame->local = local;
-
- loc_copy (&local->loc, loc);
-
- if (loc->parent)
- local->cont.unlink.parent_ino = loc->parent->ino;
-
- local->transaction.fop = afr_unlink_wind;
- local->transaction.done = afr_unlink_done;
- local->transaction.unwind = afr_unlink_unwind;
-
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (loc->path);
-
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
-
- op_ret = 0;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
+
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
+
+ loc_copy(&local->loc, loc);
+ local->xflag = xflag;
+
+ local->inode = inode_ref(loc->inode);
+ local->parent = inode_ref(loc->parent);
+
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->op = GF_FOP_UNLINK;
+ local->transaction.wind = afr_unlink_wind;
+ local->transaction.unwind = afr_unlink_unwind;
+
+ ret = afr_build_parent_loc(&local->transaction.parent_loc, loc, &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME(loc->path);
+ ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (unlink, frame, op_ret, op_errno,
- NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- return 0;
+ AFR_STACK_UNWIND(unlink, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
/* }}} */
/* {{{ rmdir */
-
-
int
-afr_rmdir_unwind (call_frame_t *frame, xlator_t *this)
+afr_rmdir_unwind(call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- local->cont.rmdir.preparent.ia_ino = local->cont.rmdir.parent_ino;
- local->cont.rmdir.postparent.ia_ino = local->cont.rmdir.parent_ino;
-
- AFR_STACK_UNWIND (rmdir, main_frame,
- local->op_ret, local->op_errno,
- &local->cont.rmdir.preparent,
- &local->cont.rmdir.postparent);
- }
+ local = frame->local;
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
-
-
-int
-afr_rmdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
-{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int call_count = -1;
- int child_index = (long) cookie;
- int read_child = 0;
-
- local = frame->local;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
-
- if (afr_fop_failed (op_ret, op_errno) && (op_errno != ENOTEMPTY))
- afr_transaction_fop_failed (frame, this, child_index);
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.rmdir.preparent = *preparent;
- local->cont.rmdir.postparent = *postparent;
-
- }
-
- if (child_index == read_child) {
- local->cont.rmdir.preparent = *preparent;
- local->cont.rmdir.postparent = *postparent;
- }
-
- local->success_count++;
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ AFR_STACK_UNWIND(rmdir, main_frame, local->op_ret, local->op_errno,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
+ return 0;
}
-
int
-afr_rmdir_wind (call_frame_t *frame, xlator_t *this)
+afr_rmdir_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_rmdir_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->rmdir,
- &local->loc, local->cont.rmdir.flags);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ return __afr_dir_write_cbk(frame, cookie, this, op_ret, op_errno, NULL,
+ preparent, postparent, NULL, NULL, xdata);
}
-
int
-afr_rmdir_done (call_frame_t *frame, xlator_t *this)
+afr_rmdir_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t * local = frame->local;
-
- local->transaction.unwind (frame, this);
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- AFR_STACK_DESTROY (frame);
+ local = frame->local;
+ priv = this->private;
- return 0;
+ STACK_WIND_COOKIE(frame, afr_rmdir_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->rmdir, &local->loc,
+ local->cont.rmdir.flags, local->xdata_req);
+ return 0;
}
-
int
-afr_rmdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int flags)
+afr_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- goto out;
- }
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- transaction_frame->local = local;
-
- local->cont.rmdir.flags = flags;
- loc_copy (&local->loc, loc);
-
- if (loc->parent)
- local->cont.rmdir.parent_ino = loc->parent->ino;
-
- local->transaction.fop = afr_rmdir_wind;
- local->transaction.done = afr_rmdir_done;
- local->transaction.unwind = afr_rmdir_unwind;
-
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (loc->path);
-
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
-
- op_ret = 0;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
+
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
+
+ loc_copy(&local->loc, loc);
+ local->inode = inode_ref(loc->inode);
+ local->parent = inode_ref(loc->parent);
+
+ local->cont.rmdir.flags = flags;
+
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->op = GF_FOP_RMDIR;
+ local->transaction.wind = afr_rmdir_wind;
+ local->transaction.unwind = afr_rmdir_unwind;
+
+ ret = afr_build_parent_loc(&local->transaction.parent_loc, loc, &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME(loc->path);
+ ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (rmdir, frame, op_ret, op_errno,
- NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- return 0;
+ AFR_STACK_UNWIND(rmdir, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
/* }}} */
diff --git a/xlators/cluster/afr/src/afr-dir-write.h b/xlators/cluster/afr/src/afr-dir-write.h
index e589efa3794..1d88c3b9b26 100644
--- a/xlators/cluster/afr/src/afr-dir-write.h
+++ b/xlators/cluster/afr/src/afr-dir-write.h
@@ -1,60 +1,46 @@
/*
- Copyright (c) 2007-2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __DIR_WRITE_H__
#define __DIR_WRITE_H__
int32_t
-afr_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode,
- fd_t *fd, dict_t *params);
+afr_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata);
int32_t
-afr_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t dev, dict_t *params);
+afr_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t dev, mode_t umask, dict_t *xdata);
int32_t
-afr_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dict_t *params);
+afr_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata);
int32_t
-afr_unlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc);
+afr_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata);
int32_t
-afr_rmdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int flags);
+afr_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata);
int32_t
-afr_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc);
+afr_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata);
int32_t
-afr_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc);
+afr_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata);
int
-afr_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *oldloc, dict_t *params);
-
-int32_t
-afr_setdents (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t flags, dir_entry_t *entries, int32_t count);
+afr_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *oldloc, mode_t umask, dict_t *params);
#endif /* __DIR_WRITE_H__ */
diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c
index 11db1e1d733..c5521704de2 100644
--- a/xlators/cluster/afr/src/afr-inode-read.c
+++ b/xlators/cluster/afr/src/afr-inode-read.c
@@ -1,23 +1,13 @@
/*
- Copyright (c) 2007-2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#include <libgen.h>
#include <unistd.h>
#include <fnmatch.h>
@@ -25,931 +15,1880 @@
#include <stdlib.h>
#include <signal.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-
+#include <glusterfs/glusterfs.h>
#include "afr.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/list.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/quota-common-utils.h>