summaryrefslogtreecommitdiffstats
path: root/xlators/bindings/python/src/glusterstack.py
diff options
context:
space:
mode:
authorPranith Kumar K <pranithk@gluster.com>2011-03-31 03:31:00 +0000
committerVijay Bellur <vijay@dev.gluster.com>2011-03-31 04:20:09 -0700
commit43700ed04c1bac15f5df3702410c83fb63607476 (patch)
tree7cef18d734c1784b62f90e0f14e2ca7bce2e3955 /xlators/bindings/python/src/glusterstack.py
parent215a8352e2219c7ead29693ccab3452054a89041 (diff)
cli: Improve profile output messages
Signed-off-by: Pranith Kumar K <pranithk@gluster.com> Signed-off-by: Vijay Bellur <vijay@dev.gluster.com> BUG: 2616 () URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=2616
Diffstat (limited to 'xlators/bindings/python/src/glusterstack.py')
0 files changed, 0 insertions, 0 deletions
/table> -rw-r--r--xlators/bindings/python/src/testxlator.py56
-rw-r--r--xlators/cluster/afr/src/Makefile.am31
-rw-r--r--xlators/cluster/afr/src/afr-common.c4347
-rw-r--r--xlators/cluster/afr/src/afr-dir-read.c831
-rw-r--r--xlators/cluster/afr/src/afr-dir-read.h31
-rw-r--r--xlators/cluster/afr/src/afr-dir-write.c2175
-rw-r--r--xlators/cluster/afr/src/afr-dir-write.h43
-rw-r--r--xlators/cluster/afr/src/afr-inode-read.c1982
-rw-r--r--xlators/cluster/afr/src/afr-inode-read.h39
-rw-r--r--xlators/cluster/afr/src/afr-inode-write.c2286
-rw-r--r--xlators/cluster/afr/src/afr-inode-write.h68
-rw-r--r--xlators/cluster/afr/src/afr-lk-common.c1715
-rw-r--r--xlators/cluster/afr/src/afr-mem-types.h33
-rw-r--r--xlators/cluster/afr/src/afr-open.c445
-rw-r--r--xlators/cluster/afr/src/afr-read-txn.c239
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-algorithm.c743
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-algorithm.h42
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c2805
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.h132
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c1753
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c2583
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-metadata.c839
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-name.c457
-rw-r--r--xlators/cluster/afr/src/afr-self-heal.h181
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.c1521
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.h94
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c1869
-rw-r--r--xlators/cluster/afr/src/afr-transaction.h54
-rw-r--r--xlators/cluster/afr/src/afr.c401
-rw-r--r--xlators/cluster/afr/src/afr.h1025
-rw-r--r--xlators/cluster/afr/src/pump.c1142
-rw-r--r--xlators/cluster/afr/src/pump.h34
-rw-r--r--xlators/cluster/dht/src/Makefile.am13
-rw-r--r--xlators/cluster/dht/src/dht-common.c2211
-rw-r--r--xlators/cluster/dht/src/dht-common.h364
-rw-r--r--xlators/cluster/dht/src/dht-diskusage.c523
-rw-r--r--xlators/cluster/dht/src/dht-hashfn.c96
-rw-r--r--xlators/cluster/dht/src/dht-helper.c546
-rw-r--r--xlators/cluster/dht/src/dht-inode-read.c297
-rw-r--r--xlators/cluster/dht/src/dht-inode-write.c549
-rw-r--r--xlators/cluster/dht/src/dht-layout.c278
-rw-r--r--xlators/cluster/dht/src/dht-linkfile.c182
-rw-r--r--xlators/cluster/dht/src/dht-mem-types.h23
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c1519
-rw-r--r--xlators/cluster/dht/src/dht-rename.c283
-rw-r--r--xlators/cluster/dht/src/dht-selfheal.c590
-rw-r--r--xlators/cluster/dht/src/dht-shared.c781
-rw-r--r--xlators/cluster/dht/src/dht.c484
-rw-r--r--xlators/cluster/dht/src/nufa.c395
-rw-r--r--xlators/cluster/dht/src/switch.c312
-rw-r--r--xlators/cluster/dht/src/unittest/dht_layout_mock.c63
-rw-r--r--xlators/cluster/dht/src/unittest/dht_layout_unittest.c124
-rw-r--r--xlators/cluster/ha/src/Makefile.am7
-rw-r--r--xlators/cluster/ha/src/ha-helpers.c24
-rw-r--r--xlators/cluster/ha/src/ha-mem-types.h21
-rw-r--r--xlators/cluster/ha/src/ha.c32
-rw-r--r--xlators/cluster/ha/src/ha.h24
-rw-r--r--xlators/cluster/map/src/Makefile.am7
-rw-r--r--xlators/cluster/map/src/map-helper.c24
-rw-r--r--xlators/cluster/map/src/map-mem-types.h21
-rw-r--r--xlators/cluster/map/src/map.c27
-rw-r--r--xlators/cluster/map/src/map.h22
-rw-r--r--xlators/cluster/stripe/src/Makefile.am11
-rw-r--r--xlators/cluster/stripe/src/stripe-helpers.c677
-rw-r--r--xlators/cluster/stripe/src/stripe-mem-types.h28
-rw-r--r--xlators/cluster/stripe/src/stripe.c3229
-rw-r--r--xlators/cluster/stripe/src/stripe.h164
-rw-r--r--xlators/cluster/unify/src/Makefile.am16
-rw-r--r--xlators/cluster/unify/src/unify-mem-types.h41
-rw-r--r--xlators/cluster/unify/src/unify-self-heal.c1239
-rw-r--r--xlators/cluster/unify/src/unify.c4589
-rw-r--r--xlators/cluster/unify/src/unify.h146
-rw-r--r--xlators/debug/error-gen/src/Makefile.am7
-rw-r--r--xlators/debug/error-gen/src/error-gen-mem-types.h20
-rw-r--r--xlators/debug/error-gen/src/error-gen.c996
-rw-r--r--xlators/debug/error-gen/src/error-gen.h37
-rw-r--r--xlators/debug/io-stats/src/Makefile.am9
-rw-r--r--xlators/debug/io-stats/src/io-stats-mem-types.h21
-rw-r--r--xlators/debug/io-stats/src/io-stats.c1045
-rw-r--r--xlators/debug/trace/src/Makefile.am8
-rw-r--r--xlators/debug/trace/src/trace-mem-types.h21
-rw-r--r--xlators/debug/trace/src/trace.c3079
-rw-r--r--xlators/debug/trace/src/trace.h98
-rw-r--r--xlators/encryption/Makefile.am2
-rw-r--r--xlators/encryption/crypt/Makefile.am (renamed from xlators/cluster/unify/Makefile.am)0
-rw-r--r--xlators/encryption/crypt/src/Makefile.am24
-rw-r--r--xlators/encryption/crypt/src/atom.c962
-rw-r--r--xlators/encryption/crypt/src/crypt-common.h141
-rw-r--r--xlators/encryption/crypt/src/crypt-mem-types.h44
-rw-r--r--xlators/encryption/crypt/src/crypt.c4522
-rw-r--r--xlators/encryption/crypt/src/crypt.h903
-rw-r--r--xlators/encryption/crypt/src/data.c769
-rw-r--r--xlators/encryption/crypt/src/keys.c302
-rw-r--r--xlators/encryption/crypt/src/metadata.c605
-rw-r--r--xlators/encryption/crypt/src/metadata.h74
-rw-r--r--xlators/encryption/rot-13/src/Makefile.am7
-rw-r--r--xlators/encryption/rot-13/src/rot-13.c71
-rw-r--r--xlators/encryption/rot-13/src/rot-13.h20
-rw-r--r--xlators/features/Makefile.am3
-rw-r--r--xlators/features/changelog/Makefile.am3
-rw-r--r--xlators/features/changelog/lib/Makefile.am (renamed from xlators/storage/bdb/Makefile.am)2
-rw-r--r--xlators/features/changelog/lib/examples/c/get-changes.c87
-rw-r--r--xlators/features/changelog/lib/examples/python/changes.py32
-rw-r--r--xlators/features/changelog/lib/examples/python/libgfchangelog.py64
-rw-r--r--xlators/features/changelog/lib/src/Makefile.am37
-rw-r--r--xlators/features/changelog/lib/src/changelog.h31
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-helpers.c180
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-helpers.h97
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-process.c618
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog.c516
-rw-r--r--xlators/features/changelog/src/Makefile.am19
-rw-r--r--xlators/features/changelog/src/changelog-encoders.c197
-rw-r--r--xlators/features/changelog/src/changelog-encoders.h48
-rw-r--r--xlators/features/changelog/src/changelog-helpers.c696
-rw-r--r--xlators/features/changelog/src/changelog-helpers.h405
-rw-r--r--xlators/features/changelog/src/changelog-mem-types.h29
-rw-r--r--xlators/features/changelog/src/changelog-misc.h101
-rw-r--r--xlators/features/changelog/src/changelog-notifier.c314
-rw-r--r--xlators/features/changelog/src/changelog-notifier.h19
-rw-r--r--xlators/features/changelog/src/changelog-rt.c72
-rw-r--r--xlators/features/changelog/src/changelog-rt.h33
-rw-r--r--xlators/features/changelog/src/changelog.c1568
-rw-r--r--xlators/features/compress/Makefile.am (renamed from xlators/protocol/legacy/lib/Makefile.am)2
-rw-r--r--xlators/features/compress/src/Makefile.am17
-rw-r--r--xlators/features/compress/src/cdc-helper.c547
-rw-r--r--xlators/features/compress/src/cdc-mem-types.h23
-rw-r--r--xlators/features/compress/src/cdc.c361
-rw-r--r--xlators/features/compress/src/cdc.h107
-rw-r--r--xlators/features/filter/src/Makefile.am7
-rw-r--r--xlators/features/filter/src/filter-mem-types.h20
-rw-r--r--xlators/features/filter/src/filter.c24
-rw-r--r--xlators/features/gfid-access/Makefile.am (renamed from xlators/bindings/python/Makefile.am)0
-rw-r--r--xlators/features/gfid-access/src/Makefile.am15
-rw-r--r--xlators/features/gfid-access/src/gfid-access-mem-types.h23
-rw-r--r--xlators/features/gfid-access/src/gfid-access.c1287
-rw-r--r--xlators/features/gfid-access/src/gfid-access.h134
-rw-r--r--xlators/features/glupy/Makefile.am3
-rw-r--r--xlators/features/glupy/doc/README.md44
-rw-r--r--xlators/features/glupy/doc/TESTING9
-rw-r--r--xlators/features/glupy/doc/test.vol10
-rw-r--r--xlators/features/glupy/examples/Makefile.am5
-rw-r--r--xlators/features/glupy/examples/debug-trace.py775
-rw-r--r--xlators/features/glupy/examples/helloworld.py19
-rw-r--r--xlators/features/glupy/examples/negative.py91
-rw-r--r--xlators/features/glupy/src/Makefile.am21
-rw-r--r--xlators/features/glupy/src/glupy.c2471
-rw-r--r--xlators/features/glupy/src/glupy.h69
-rw-r--r--xlators/features/glupy/src/glupy.py841
-rw-r--r--xlators/features/glupy/src/setup.py.in24
-rw-r--r--xlators/features/index/Makefile.am (renamed from xlators/protocol/legacy/server/Makefile.am)2
-rw-r--r--xlators/features/index/src/Makefile.am17
-rw-r--r--xlators/features/index/src/index-mem-types.h22
-rw-r--r--xlators/features/index/src/index.c1275
-rw-r--r--xlators/features/index/src/index.h59
-rw-r--r--xlators/features/locks/src/Makefile.am17
-rw-r--r--xlators/features/locks/src/clear.c423
-rw-r--r--xlators/features/locks/src/clear.h76
-rw-r--r--xlators/features/locks/src/common.c223
-rw-r--r--xlators/features/locks/src/common.h70
-rw-r--r--xlators/features/locks/src/entrylk.c588
-rw-r--r--xlators/features/locks/src/inodelk.c487
-rw-r--r--xlators/features/locks/src/locks-mem-types.h21
-rw-r--r--xlators/features/locks/src/locks.h101
-rw-r--r--xlators/features/locks/src/posix.c1279
-rw-r--r--xlators/features/locks/src/reservelk.c51
-rw-r--r--xlators/features/locks/tests/unit-test.c22
-rw-r--r--xlators/features/mac-compat/src/Makefile.am7
-rw-r--r--xlators/features/mac-compat/src/mac-compat.c52
-rw-r--r--xlators/features/marker/Makefile.am2
-rw-r--r--xlators/features/marker/src/Makefile.am8
-rw-r--r--xlators/features/marker/src/marker-common.c37
-rw-r--r--xlators/features/marker/src/marker-common.h27
-rw-r--r--xlators/features/marker/src/marker-mem-types.h24
-rw-r--r--xlators/features/marker/src/marker-quota-helper.c100
-rw-r--r--xlators/features/marker/src/marker-quota-helper.h27
-rw-r--r--xlators/features/marker/src/marker-quota.c546
-rw-r--r--xlators/features/marker/src/marker-quota.h73
-rw-r--r--xlators/features/marker/src/marker.c1238
-rw-r--r--xlators/features/marker/src/marker.h50
-rw-r--r--xlators/features/marker/utils/Makefile.am3
-rw-r--r--xlators/features/marker/utils/src/Makefile.am22
-rw-r--r--xlators/features/marker/utils/src/gsyncd.c346
-rw-r--r--xlators/features/marker/utils/src/procdiggy.c124
-rw-r--r--xlators/features/marker/utils/src/procdiggy.h26
-rw-r--r--xlators/features/marker/utils/syncdaemon/Makefile.am6
-rw-r--r--xlators/features/marker/utils/syncdaemon/README.md81
-rw-r--r--xlators/features/marker/utils/syncdaemon/__codecheck.py46
-rw-r--r--xlators/features/marker/utils/syncdaemon/__init__.py0
-rw-r--r--xlators/features/marker/utils/syncdaemon/configinterface.py224
-rw-r--r--xlators/features/marker/utils/syncdaemon/gconf.py19
-rw-r--r--xlators/features/marker/utils/syncdaemon/gsyncd.py367
-rw-r--r--xlators/features/marker/utils/syncdaemon/libcxattr.py72
-rw-r--r--xlators/features/marker/utils/syncdaemon/master.py518
-rw-r--r--xlators/features/marker/utils/syncdaemon/monitor.py102
-rw-r--r--xlators/features/marker/utils/syncdaemon/repce.py226
-rw-r--r--xlators/features/marker/utils/syncdaemon/resource.py838
-rw-r--r--xlators/features/marker/utils/syncdaemon/syncdutils.py249
-rw-r--r--xlators/features/path-convertor/src/Makefile.am7
-rw-r--r--xlators/features/path-convertor/src/path-mem-types.h20
-rw-r--r--xlators/features/path-convertor/src/path.c29
-rw-r--r--xlators/features/protect/Makefile.am (renamed from xlators/protocol/legacy/client/Makefile.am)0
-rw-r--r--xlators/features/protect/src/Makefile.am21
-rw-r--r--xlators/features/protect/src/prot_client.c217
-rw-r--r--xlators/features/protect/src/prot_dht.c168
-rw-r--r--xlators/features/protect/src/prot_server.c51
-rw-r--r--xlators/features/qemu-block/Makefile.am (renamed from xlators/performance/stat-prefetch/Makefile.am)0
-rw-r--r--xlators/features/qemu-block/src/Makefile.am155
-rw-r--r--xlators/features/qemu-block/src/bdrv-xlator.c389
-rw-r--r--xlators/features/qemu-block/src/bh-syncop.c48
-rw-r--r--xlators/features/qemu-block/src/clock-timer.c60
-rw-r--r--xlators/features/qemu-block/src/coroutine-synctask.c116
-rw-r--r--xlators/features/qemu-block/src/monitor-logging.c50
-rw-r--r--xlators/features/qemu-block/src/qb-coroutines.c667
-rw-r--r--xlators/features/qemu-block/src/qb-coroutines.h30
-rw-r--r--xlators/features/qemu-block/src/qemu-block-memory-types.h25
-rw-r--r--xlators/features/qemu-block/src/qemu-block.c1140
-rw-r--r--xlators/features/qemu-block/src/qemu-block.h109
-rw-r--r--xlators/features/quiesce/src/Makefile.am7
-rw-r--r--xlators/features/quiesce/src/quiesce-mem-types.h20
-rw-r--r--xlators/features/quiesce/src/quiesce.c661
-rw-r--r--xlators/features/quiesce/src/quiesce.h23
-rw-r--r--xlators/features/quota/src/Makefile.am22
-rw-r--r--xlators/features/quota/src/quota-enforcer-client.c403
-rw-r--r--xlators/features/quota/src/quota-mem-types.h26
-rw-r--r--xlators/features/quota/src/quota.c3443
-rw-r--r--xlators/features/quota/src/quota.h183
-rw-r--r--xlators/features/quota/src/quotad-aggregator.c423
-rw-r--r--xlators/features/quota/src/quotad-aggregator.h37
-rw-r--r--xlators/features/quota/src/quotad-helpers.c113
-rw-r--r--xlators/features/quota/src/quotad-helpers.h24
-rw-r--r--xlators/features/quota/src/quotad.c210
-rw-r--r--xlators/features/read-only/src/Makefile.am9
-rw-r--r--xlators/features/read-only/src/read-only-common.c133
-rw-r--r--xlators/features/read-only/src/read-only-common.h76
-rw-r--r--xlators/features/read-only/src/read-only.c22
-rw-r--r--xlators/features/read-only/src/worm.c36
-rw-r--r--xlators/features/trash/src/Makefile.am7
-rw-r--r--xlators/features/trash/src/trash-mem-types.h23
-rw-r--r--xlators/features/trash/src/trash.c111
-rw-r--r--xlators/features/trash/src/trash.h22
-rw-r--r--xlators/lib/src/libxlator.c536
-rw-r--r--xlators/lib/src/libxlator.h106
-rw-r--r--xlators/meta/src/Makefile.am5
-rw-r--r--xlators/meta/src/meta-mem-types.h20
-rw-r--r--xlators/meta/src/meta.c20
-rw-r--r--xlators/meta/src/meta.h20
-rw-r--r--xlators/meta/src/misc.c20
-rw-r--r--xlators/meta/src/misc.h20
-rw-r--r--xlators/meta/src/tree.c22
-rw-r--r--xlators/meta/src/tree.h20
-rw-r--r--xlators/meta/src/view.c20
-rw-r--r--xlators/meta/src/view.h20
-rw-r--r--xlators/mgmt/glusterd/src/Makefile.am52
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-brick-ops.c1741
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-geo-rep.c3705
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handler.c3018
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handshake.c957
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-hooks.c559
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-hooks.h89
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-locks.c213
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-locks.h37
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-log-ops.c668
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mem-types.h30
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mountbroker.c127
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mountbroker.h24
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.c4155
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.h129
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.c140
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.h26
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-quota.c1530
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rebalance.c1192
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-replace-brick.c1506
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rpc-ops.c1823
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-sm.c170
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-sm.h61
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.c2054
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.h92
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-syncop.c1690
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-syncop.h54
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c7100
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.h402
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c1788
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.h110
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-ops.c1792
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c1491
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.c794
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h507
-rw-r--r--xlators/mount/fuse/src/Makefile.am22
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.c3389
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.h338
-rw-r--r--xlators/mount/fuse/src/fuse-helpers.c460
-rw-r--r--xlators/mount/fuse/src/fuse-mem-types.h23
-rw-r--r--xlators/mount/fuse/src/fuse-resolve.c928
-rwxr-xr-xxlators/mount/fuse/utils/mount.glusterfs.in683
-rwxr-xr-xxlators/mount/fuse/utils/mount_glusterfs.in3
-rw-r--r--xlators/nfs/server/src/Makefile.am23
-rw-r--r--xlators/nfs/server/src/acl3.c843
-rw-r--r--xlators/nfs/server/src/acl3.h37
-rw-r--r--xlators/nfs/server/src/mount3.c1201
-rw-r--r--xlators/nfs/server/src/mount3.h41
-rw-r--r--xlators/nfs/server/src/mount3udp_svc.c189
-rw-r--r--xlators/nfs/server/src/nfs-common.c230
-rw-r--r--xlators/nfs/server/src/nfs-common.h27
-rw-r--r--xlators/nfs/server/src/nfs-fops.c500
-rw-r--r--xlators/nfs/server/src/nfs-fops.h41
-rw-r--r--xlators/nfs/server/src/nfs-generics.c43
-rw-r--r--xlators/nfs/server/src/nfs-generics.h32
-rw-r--r--xlators/nfs/server/src/nfs-inodes.c73
-rw-r--r--xlators/nfs/server/src/nfs-inodes.h19
-rw-r--r--xlators/nfs/server/src/nfs-mem-types.h30
-rw-r--r--xlators/nfs/server/src/nfs.c1062
-rw-r--r--xlators/nfs/server/src/nfs.h52
-rw-r--r--xlators/nfs/server/src/nfs3-fh.c183
-rw-r--r--xlators/nfs/server/src/nfs3-fh.h55
-rw-r--r--xlators/nfs/server/src/nfs3-helpers.c2744
-rw-r--r--xlators/nfs/server/src/nfs3-helpers.h42
-rw-r--r--xlators/nfs/server/src/nfs3.c1037
-rw-r--r--xlators/nfs/server/src/nfs3.h130
-rw-r--r--xlators/nfs/server/src/nlm4.c2527
-rw-r--r--xlators/nfs/server/src/nlm4.h77
-rw-r--r--xlators/nfs/server/src/nlmcbk_svc.c117
-rw-r--r--xlators/performance/Makefile.am2
-rw-r--r--xlators/performance/io-cache/src/Makefile.am8
-rw-r--r--xlators/performance/io-cache/src/io-cache.c515
-rw-r--r--xlators/performance/io-cache/src/io-cache.h30
-rw-r--r--xlators/performance/io-cache/src/ioc-inode.c22
-rw-r--r--xlators/performance/io-cache/src/ioc-mem-types.h20
-rw-r--r--xlators/performance/io-cache/src/page.c91
-rw-r--r--xlators/performance/io-threads/src/Makefile.am7
-rw-r--r--xlators/performance/io-threads/src/io-threads.c2060
-rw-r--r--xlators/performance/io-threads/src/io-threads.h33
-rw-r--r--xlators/performance/io-threads/src/iot-mem-types.h21
-rw-r--r--xlators/performance/md-cache/Makefile.am1
-rw-r--r--xlators/performance/md-cache/src/Makefile.am25
-rw-r--r--xlators/performance/md-cache/src/md-cache-mem-types.h24
-rw-r--r--xlators/performance/md-cache/src/md-cache.c2303
-rw-r--r--xlators/performance/open-behind/Makefile.am1
-rw-r--r--xlators/performance/open-behind/src/Makefile.am15
-rw-r--r--xlators/performance/open-behind/src/open-behind-mem-types.h21
-rw-r--r--xlators/performance/open-behind/src/open-behind.c1020
-rw-r--r--xlators/performance/quick-read/src/Makefile.am7
-rw-r--r--xlators/performance/quick-read/src/quick-read-mem-types.h22
-rw-r--r--xlators/performance/quick-read/src/quick-read.c3561
-rw-r--r--xlators/performance/quick-read/src/quick-read.h61
-rw-r--r--xlators/performance/read-ahead/src/Makefile.am7
-rw-r--r--xlators/performance/read-ahead/src/page.c73
-rw-r--r--xlators/performance/read-ahead/src/read-ahead-mem-types.h20
-rw-r--r--xlators/performance/read-ahead/src/read-ahead.c491
-rw-r--r--xlators/performance/read-ahead/src/read-ahead.h23
-rw-r--r--xlators/performance/readdir-ahead/Makefile.am3
-rw-r--r--xlators/performance/readdir-ahead/src/Makefile.am15
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h24
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead.c560
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead.h46
-rw-r--r--xlators/performance/stat-prefetch/src/Makefile.am14
-rw-r--r--xlators/performance/stat-prefetch/src/stat-prefetch-mem-types.h36
-rw-r--r--xlators/performance/stat-prefetch/src/stat-prefetch.c4295
-rw-r--r--xlators/performance/stat-prefetch/src/stat-prefetch.h106
-rw-r--r--xlators/performance/symlink-cache/src/Makefile.am7
-rw-r--r--xlators/performance/symlink-cache/src/symlink-cache.c56
-rw-r--r--xlators/performance/write-behind/src/Makefile.am7
-rw-r--r--xlators/performance/write-behind/src/write-behind-mem-types.h23
-rw-r--r--xlators/performance/write-behind/src/write-behind.c3744
-rw-r--r--xlators/playground/Makefile.am2
-rw-r--r--xlators/playground/template/Makefile.am2
-rw-r--r--xlators/playground/template/src/Makefile.am16
-rw-r--r--xlators/playground/template/src/template.c49
-rw-r--r--xlators/playground/template/src/template.h24
-rw-r--r--xlators/protocol/auth/addr/src/Makefile.am11
-rw-r--r--xlators/protocol/auth/addr/src/addr.c37
-rw-r--r--xlators/protocol/auth/login/src/Makefile.am7
-rw-r--r--xlators/protocol/auth/login/src/login.c22
-rw-r--r--xlators/protocol/client/src/Makefile.am13
-rw-r--r--xlators/protocol/client/src/client-callback.c25
-rw-r--r--xlators/protocol/client/src/client-handshake.c1017
-rw-r--r--xlators/protocol/client/src/client-helpers.c162
-rw-r--r--xlators/protocol/client/src/client-lk.c419
-rw-r--r--xlators/protocol/client/src/client-mem-types.h23
-rw-r--r--xlators/protocol/client/src/client-rpc-fops.c (renamed from xlators/protocol/client/src/client3_1-fops.c)3651
-rw-r--r--xlators/protocol/client/src/client.c819
-rw-r--r--xlators/protocol/client/src/client.h157
-rw-r--r--xlators/protocol/legacy/Makefile.am3
-rw-r--r--xlators/protocol/legacy/client/src/Makefile.am21
-rw-r--r--xlators/protocol/legacy/client/src/client-mem-types.h43
-rw-r--r--xlators/protocol/legacy/client/src/client-protocol.c6683
-rw-r--r--xlators/protocol/legacy/client/src/client-protocol.h178
-rw-r--r--xlators/protocol/legacy/client/src/saved-frames.c196
-rw-r--r--xlators/protocol/legacy/client/src/saved-frames.h79
-rw-r--r--xlators/protocol/legacy/lib/src/Makefile.am14
-rw-r--r--xlators/protocol/legacy/lib/src/protocol.c108
-rw-r--r--xlators/protocol/legacy/lib/src/protocol.h1118
-rw-r--r--xlators/protocol/legacy/lib/src/transport.c422
-rw-r--r--xlators/protocol/legacy/lib/src/transport.h106
-rw-r--r--xlators/protocol/legacy/server/src/Makefile.am27
-rw-r--r--xlators/protocol/legacy/server/src/authenticate.c249
-rw-r--r--xlators/protocol/legacy/server/src/authenticate.h60
-rw-r--r--xlators/protocol/legacy/server/src/server-helpers.c622
-rw-r--r--xlators/protocol/legacy/server/src/server-helpers.h48
-rw-r--r--xlators/protocol/legacy/server/src/server-mem-types.h39
-rw-r--r--xlators/protocol/legacy/server/src/server-protocol.c6587
-rw-r--r--xlators/protocol/legacy/server/src/server-protocol.h191
-rw-r--r--xlators/protocol/legacy/server/src/server-resolve.c658
-rw-r--r--xlators/protocol/legacy/transport/Makefile.am3
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/Makefile.am1
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/Makefile.am19
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs-mem-types.h39
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs.c2625
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs.h220
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/name.c712
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/name.h47
-rw-r--r--xlators/protocol/legacy/transport/socket/Makefile.am1
-rw-r--r--xlators/protocol/legacy/transport/socket/src/Makefile.am19
-rw-r--r--xlators/protocol/legacy/transport/socket/src/name.c740
-rw-r--r--xlators/protocol/legacy/transport/socket/src/name.h44
-rw-r--r--xlators/protocol/legacy/transport/socket/src/socket-mem-types.h36
-rw-r--r--xlators/protocol/legacy/transport/socket/src/socket.c1625
-rw-r--r--xlators/protocol/legacy/transport/socket/src/socket.h129
-rw-r--r--xlators/protocol/server/src/Makefile.am20
-rw-r--r--xlators/protocol/server/src/authenticate.c109
-rw-r--r--xlators/protocol/server/src/authenticate.h19
-rw-r--r--xlators/protocol/server/src/server-handshake.c210
-rw-r--r--xlators/protocol/server/src/server-helpers.c1211
-rw-r--r--xlators/protocol/server/src/server-helpers.h55
-rw-r--r--xlators/protocol/server/src/server-mem-types.h20
-rw-r--r--xlators/protocol/server/src/server-resolve.c452
-rw-r--r--xlators/protocol/server/src/server-rpc-fops.c6182
-rw-r--r--xlators/protocol/server/src/server.c673
-rw-r--r--xlators/protocol/server/src/server.h116
-rw-r--r--xlators/protocol/server/src/server3_1-fops.c5234
-rw-r--r--xlators/storage/Makefile.am6
-rw-r--r--xlators/storage/bd/Makefile.am3
-rw-r--r--xlators/storage/bd/src/Makefile.am20
-rw-r--r--xlators/storage/bd/src/bd-aio.c528
-rw-r--r--xlators/storage/bd/src/bd-aio.h41
-rw-r--r--xlators/storage/bd/src/bd-helper.c1021
-rw-r--r--xlators/storage/bd/src/bd-mem-types.h27
-rw-r--r--xlators/storage/bd/src/bd.c2449
-rw-r--r--xlators/storage/bd/src/bd.h177
-rw-r--r--xlators/storage/bdb/src/Makefile.am18
-rw-r--r--xlators/storage/bdb/src/bctx.c341
-rw-r--r--xlators/storage/bdb/src/bdb-ll.c1464
-rw-r--r--xlators/storage/bdb/src/bdb-mem-types.h42
-rw-r--r--xlators/storage/bdb/src/bdb.c3603
-rw-r--r--xlators/storage/bdb/src/bdb.h530
-rw-r--r--xlators/storage/posix/src/Makefile.am17
-rw-r--r--xlators/storage/posix/src/posix-aio.c569
-rw-r--r--xlators/storage/posix/src/posix-aio.h39
-rw-r--r--xlators/storage/posix/src/posix-handle.c881
-rw-r--r--xlators/storage/posix/src/posix-handle.h228
-rw-r--r--xlators/storage/posix/src/posix-helpers.c1075
-rw-r--r--xlators/storage/posix/src/posix-mem-types.h21
-rw-r--r--xlators/storage/posix/src/posix.c3536
-rw-r--r--xlators/storage/posix/src/posix.h140
-rw-r--r--xlators/system/posix-acl/src/Makefile.am12
-rw-r--r--xlators/system/posix-acl/src/posix-acl-mem-types.h24
-rw-r--r--xlators/system/posix-acl/src/posix-acl-xattr.c32
-rw-r--r--xlators/system/posix-acl/src/posix-acl-xattr.h44
-rw-r--r--xlators/system/posix-acl/src/posix-acl.c546
-rw-r--r--xlators/system/posix-acl/src/posix-acl.h71
466 files changed, 133215 insertions, 101973 deletions
diff --git a/xlators/Makefile.am b/xlators/Makefile.am
index b1643d26c..f60fa85ce 100644
--- a/xlators/Makefile.am
+++ b/xlators/Makefile.am
@@ -1,3 +1,4 @@
-SUBDIRS = cluster storage protocol performance debug features encryption mount nfs mgmt system
+SUBDIRS = cluster storage protocol performance debug features encryption mount nfs mgmt system \
+ playground
CLEANFILES =
diff --git a/xlators/bindings/Makefile.am b/xlators/bindings/Makefile.am
deleted file mode 100644
index f77665802..000000000
--- a/xlators/bindings/Makefile.am
+++ /dev/null
@@ -1 +0,0 @@
-SUBDIRS = $(BINDINGS_SUBDIRS)
diff --git a/xlators/bindings/python/src/Makefile.am b/xlators/bindings/python/src/Makefile.am
deleted file mode 100644
index c0b9141c6..000000000
--- a/xlators/bindings/python/src/Makefile.am
+++ /dev/null
@@ -1,19 +0,0 @@
-
-xlator_PROGRAMS = python.so
-
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/bindings
-
-python_PYTHON = gluster.py glustertypes.py glusterstack.py
-
-pythondir = $(xlatordir)/python
-
-python_so_SOURCES = python.c
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles \
- $(PYTHON_CPPLAGS) -DGLUSTER_PYTHON_PATH=\"$(pythondir)\"
-
-AM_LDFLAGS = $(PYTHON_LDFLAGS)
-
-CLEANFILES =
-
diff --git a/xlators/bindings/python/src/gluster.py b/xlators/bindings/python/src/gluster.py
deleted file mode 100644
index ee0eb1310..000000000
--- a/xlators/bindings/python/src/gluster.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
-from ctypes import *
-from glustertypes import *
-from glusterstack import *
-import sys
-import inspect
-
-libglusterfs = CDLL("libglusterfs.so")
-_gf_log = libglusterfs._gf_log
-_gf_log.restype = c_int32
-_gf_log.argtypes = [c_char_p, c_char_p, c_char_p, c_int32, c_int, c_char_p]
-
-gf_log_loglevel = c_int.in_dll(libglusterfs, "gf_log_loglevel")
-
-GF_LOG_NONE = 0
-GF_LOG_CRITICAL = 1
-GF_LOG_ERROR = 2
-GF_LOG_WARNING = 3
-GF_LOG_DEBUG = 4
-
-def gf_log(module, level, fmt, *params):
- if level <= gf_log_loglevel:
- frame = sys._getframe(1)
- _gf_log(module, frame.f_code.co_filename, frame.f_code.co_name,
- frame.f_lineno, level, fmt, *params)
-
-class ComplexTranslator(object):
- def __init__(self, xlator):
- self.xlator = xlator_t.from_address(xlator)
-
- def __getattr__(self, item):
- return getattr(self.xlator, item)
diff --git a/xlators/bindings/python/src/glusterstack.py b/xlators/bindings/python/src/glusterstack.py
deleted file mode 100644
index ba24c8165..000000000
--- a/xlators/bindings/python/src/glusterstack.py
+++ /dev/null
@@ -1,55 +0,0 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
-from ctypes import *
-from glustertypes import *
-
-libc = CDLL("libc.so.6")
-calloc = libc.calloc
-calloc.argtypes = [c_int, c_int]
-calloc.restype = c_void_p
-
-# TODO: Can these be done in C somehow?
-def stack_wind(frame, rfn, obj, fn, *params):
- """Frame is a frame object"""
- _new = cast(calloc(1, sizeof(call_frame_t)), POINTER(call_frame_t))
- _new[0].root = frame.root
- _new[0].next = frame.root[0].frames.next
- _new[0].prev = pointer(frame.root[0].frames)
- if frame.root[0].frames.next:
- frame.root[0].frames.next[0].prev = _new
- frame.root[0].frames.next = _new
- _new[0].this = obj
- # TODO: Type checking like tmp_cbk?
- _new[0].ret = rfn
- _new[0].parent = pointer(frame)
- _new[0].cookie = cast(_new, c_void_p)
- # TODO: Initialize lock
- #_new.lock.init()
- frame.ref_count += 1
- fn(_new, obj, *params)
-
-def stack_unwind(frame, *params):
- """Frame is a frame object"""
- fn = frame[0].ret
- parent = frame[0].parent[0]
- parent.ref_count -= 1
-
- op_ret = params[0]
- op_err = params[1]
- params = params[2:]
- fn(parent, call_frame_t.from_address(frame[0].cookie), parent.this,
- op_ret, op_err, *params)
diff --git a/xlators/bindings/python/src/glustertypes.py b/xlators/bindings/python/src/glustertypes.py
deleted file mode 100644
index e9069d07c..000000000
--- a/xlators/bindings/python/src/glustertypes.py
+++ /dev/null
@@ -1,167 +0,0 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
-from ctypes import *
-import collections
-
-#
-# Forward declaration of some gluster types
-#
-class call_frame_t(Structure):
- pass
-
-class call_ctx_t(Structure):
- pass
-
-class call_pool_t(Structure):
- pass
-
-class xlator_t(Structure):
- def _getFirstChild(self):
- return self.children[0].xlator
- firstChild = property(_getFirstChild)
-
-class xlator_list_t(Structure):
- pass
-
-class xlator_fops(Structure):
- pass
-
-class xlator_mops(Structure):
- pass
-
-class glusterfs_ctx_t(Structure):
- pass
-
-class list_head(Structure):
- pass
-
-class dict_t(Structure):
- pass
-
-class inode_table_t(Structure):
- pass
-
-class fd_t(Structure):
- pass
-
-class iovec(Structure):
- _fields_ = [
- ("iov_base", c_void_p),
- ("iov_len", c_size_t),
- ]
-
- def __init__(self, s):
- self.iov_base = cast(c_char_p(s), c_void_p)
- self.iov_len = len(s)
-
- def getBytes(self):
- return string_at(self.iov_base, self.iov_len)
-
-# This is a pthread_spinlock_t
-# TODO: what happens to volatile-ness?
-gf_lock_t = c_int
-
-uid_t = c_uint32
-gid_t = c_uint32
-pid_t = c_int32
-
-off_t = c_int64
-
-#
-# Function pointer types
-#
-ret_fn_t = CFUNCTYPE(c_int32, POINTER(call_frame_t), POINTER(call_frame_t),
- POINTER(xlator_t), c_int32, c_int32)
-
-fini_fn_t = CFUNCTYPE(None, POINTER(xlator_t))
-init_fn_t = CFUNCTYPE(c_int32, POINTER(xlator_t))
-event_notify_fn_t = CFUNCTYPE(c_int32, POINTER(xlator_t), c_int32, c_void_p)
-
-list_head._fields_ = [
- ("next", POINTER(list_head)),
- ("prev", POINTER(list_head)),
- ]
-
-call_frame_t._fields_ = [
- ("root", POINTER(call_ctx_t)),
- ("parent", POINTER(call_frame_t)),
- ("next", POINTER(call_frame_t)),
- ("prev", POINTER(call_frame_t)),
- ("local", c_void_p),
- ("this", POINTER(xlator_t)),
- ("ret", ret_fn_t),
- ("ref_count", c_int32),
- ("lock", gf_lock_t),
- ("cookie", c_void_p),
- ("op", c_int32),
- ("type", c_int8),
- ]
-
-call_ctx_t._fields_ = [
- ("all_frames", list_head),
- ("trans", c_void_p),
- ("pool", call_pool_t),
- ("unique", c_uint64),
- ("state", c_void_p),
- ("uid", uid_t),
- ("gid", gid_t),
- ("pid", pid_t),
- ("frames", call_frame_t),
- ("req_refs", POINTER(dict_t)),
- ("rsp_refs", POINTER(dict_t)),
- ]
-
-xlator_t._fields_ = [
- ("name", c_char_p),
- ("type", c_char_p),
- ("next", POINTER(xlator_t)),
- ("prev", POINTER(xlator_t)),
- ("parent", POINTER(xlator_t)),
- ("children", POINTER(xlator_list_t)),
- ("fops", POINTER(xlator_fops)),
- ("mops", POINTER(xlator_mops)),
- ("fini", fini_fn_t),
- ("init", init_fn_t),
- ("notify", event_notify_fn_t),
- ("options", POINTER(dict_t)),
- ("ctx", POINTER(glusterfs_ctx_t)),
- ("itable", POINTER(inode_table_t)),
- ("ready", c_char),
- ("private", c_void_p),
- ]
-
-xlator_list_t._fields_ = [
- ("xlator", POINTER(xlator_t)),
- ("next", POINTER(xlator_list_t)),
- ]
-
-fop_functions = collections.defaultdict(lambda: c_void_p)
-fop_function_names = ['lookup', 'forget', 'stat', 'fstat', 'chmod', 'fchmod',
- 'chown', 'fchown', 'truncate', 'ftruncate', 'utimens', 'access',
- 'readlink', 'mknod', 'mkdir', 'unlink', 'rmdir', 'symlink',
- 'rename', 'link', 'create', 'open', 'readv', 'writev', 'flush',
- 'close', 'fsync', 'opendir', 'readdir', 'closedir', 'fsyncdir',
- 'statfs', 'setxattr', 'getxattr', 'removexattr', 'lk', 'writedir',
- # TODO: Call backs?
- ]
-
-fop_writev_t = CFUNCTYPE(c_int32, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(fd_t), POINTER(iovec), c_int32,
- off_t)
-
-fop_functions['writev'] = fop_writev_t
-xlator_fops._fields_ = [(f, fop_functions[f]) for f in fop_function_names]
diff --git a/xlators/bindings/python/src/python.c b/xlators/bindings/python/src/python.c
deleted file mode 100644
index 3310a2115..000000000
--- a/xlators/bindings/python/src/python.c
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- Copyright (c) 2007-2010 Chris AtLee <chris@atlee.ca>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include <Python.h>
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
-#include "defaults.h"
-
-typedef struct
-{
- char *scriptname;
- PyObject *pXlator;
- PyObject *pScriptModule;
- PyObject *pGlusterModule;
- PyThreadState *pInterp;
-
- PyObject *pFrameType, *pVectorType, *pFdType;
-} python_private_t;
-
-int32_t
-python_writev (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t offset)
-{
- python_private_t *priv = (python_private_t *)this->private;
- gf_log("python", GF_LOG_DEBUG, "In writev");
- if (PyObject_HasAttrString(priv->pXlator, "writev"))
- {
-
- PyObject *retval = PyObject_CallMethod(priv->pXlator, "writev",
- "O O O i l",
- PyObject_CallMethod(priv->pFrameType, "from_address", "O&", PyLong_FromVoidPtr, frame),
- PyObject_CallMethod(priv->pFdType, "from_address", "O&", PyLong_FromVoidPtr, fd),
- PyObject_CallMethod(priv->pVectorType, "from_address", "O&", PyLong_FromVoidPtr, vector),
- count,
- offset);
- if (PyErr_Occurred())
- {
- PyErr_Print();
- }
- Py_XDECREF(retval);
- }
- else
- {
- return default_writev(frame, this, fd, vector, count, offset);
- }
- return 0;
-}
-
-struct xlator_fops fops = {
- .writev = python_writev
-};
-
-static PyObject *
-AnonModule_FromFile (const char* fname)
-{
- // Get the builtins
- PyThreadState* pThread = PyThreadState_Get();
- PyObject *pBuiltins = pThread->interp->builtins;
-
- if (PyErr_Occurred())
- {
- PyErr_Print();
- return NULL;
- }
-
- // Create a new dictionary for running code in
- PyObject *pModuleDict = PyDict_New();
- PyDict_SetItemString(pModuleDict, "__builtins__", pBuiltins);
- Py_INCREF(pBuiltins);
-
- // Run the file in the new context
- FILE* fp = fopen(fname, "r");
- PyRun_File(fp, fname, Py_file_input, pModuleDict, pModuleDict);
- fclose(fp);
- if (PyErr_Occurred())
- {
- PyErr_Print();
- Py_DECREF(pModuleDict);
- Py_DECREF(pBuiltins);
- return NULL;
- }
-
- // Create an object to hold the new context
- PyRun_String("class ModuleWrapper(object):\n\tpass\n", Py_single_input, pModuleDict, pModuleDict);
- if (PyErr_Occurred())
- {
- PyErr_Print();
- Py_DECREF(pModuleDict);
- Py_DECREF(pBuiltins);
- return NULL;
- }
- PyObject *pModule = PyRun_String("ModuleWrapper()", Py_eval_input, pModuleDict, pModuleDict);
- if (PyErr_Occurred())
- {
- PyErr_Print();
- Py_DECREF(pModuleDict);
- Py_DECREF(pBuiltins);
- Py_XDECREF(pModule);
- return NULL;
- }
-
- // Set the new context's dictionary to the one we used to run the code
- // inside
- PyObject_SetAttrString(pModule, "__dict__", pModuleDict);
- if (PyErr_Occurred())
- {
- PyErr_Print();
- Py_DECREF(pModuleDict);
- Py_DECREF(pBuiltins);
- Py_DECREF(pModule);
- return NULL;
- }
-
- return pModule;
-}
-
-int32_t
-init (xlator_t *this)
-{
- // This is ok to call more than once per process
- Py_InitializeEx(0);
-
- if (!this->children) {
- gf_log ("python", GF_LOG_ERROR,
- "FATAL: python should have exactly one child");
- return -1;
- }
-
- python_private_t *priv = CALLOC (sizeof (python_private_t), 1);
- ERR_ABORT (priv);
-
- data_t *scriptname = dict_get (this->options, "scriptname");
- if (scriptname) {
- priv->scriptname = data_to_str(scriptname);
- } else {
- gf_log("python", GF_LOG_ERROR,
- "FATAL: python requires the scriptname parameter");
- return -1;
- }
-
- priv->pInterp = Py_NewInterpreter();
-
- // Adjust python's path
- PyObject *syspath = PySys_GetObject("path");
- PyObject *path = PyString_FromString(GLUSTER_PYTHON_PATH);
- PyList_Append(syspath, path);
- Py_DECREF(path);
-
- gf_log("python", GF_LOG_DEBUG,
- "Loading gluster module");
-
- priv->pGlusterModule = PyImport_ImportModule("gluster");
- if (PyErr_Occurred())
- {
- PyErr_Print();
- return -1;
- }
-
- priv->pFrameType = PyObject_GetAttrString(priv->pGlusterModule, "call_frame_t");
- priv->pFdType = PyObject_GetAttrString(priv->pGlusterModule, "fd_t");
- priv->pVectorType = PyObject_GetAttrString(priv->pGlusterModule, "iovec");
-
- gf_log("python", GF_LOG_DEBUG, "Loading script...%s", priv->scriptname);
-
- priv->pScriptModule = AnonModule_FromFile(priv->scriptname);
- if (!priv->pScriptModule || PyErr_Occurred())
- {
- gf_log("python", GF_LOG_ERROR, "Error loading %s", priv->scriptname);
- PyErr_Print();
- return -1;
- }
-
- if (!PyObject_HasAttrString(priv->pScriptModule, "xlator"))
- {
- gf_log("python", GF_LOG_ERROR, "%s does not have a xlator attribute", priv->scriptname);
- return -1;
- }
- gf_log("python", GF_LOG_DEBUG, "Instantiating translator");
- priv->pXlator = PyObject_CallMethod(priv->pScriptModule, "xlator", "O&",
- PyLong_FromVoidPtr, this);
- if (PyErr_Occurred() || !priv->pXlator)
- {
- PyErr_Print();
- return -1;
- }
-
- this->private = priv;
-
- gf_log ("python", GF_LOG_DEBUG, "python xlator loaded");
- return 0;
-}
-
-void
-fini (xlator_t *this)
-{
- python_private_t *priv = (python_private_t*)(this->private);
- Py_DECREF(priv->pXlator);
- Py_DECREF(priv->pScriptModule);
- Py_DECREF(priv->pGlusterModule);
- Py_DECREF(priv->pFrameType);
- Py_DECREF(priv->pFdType);
- Py_DECREF(priv->pVectorType);
- Py_EndInterpreter(priv->pInterp);
- return;
-}
diff --git a/xlators/bindings/python/src/testxlator.py b/xlators/bindings/python/src/testxlator.py
deleted file mode 100644
index 507455c85..000000000
--- a/xlators/bindings/python/src/testxlator.py
+++ /dev/null
@@ -1,56 +0,0 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
-
-"""
-This is a test translator written in python.
-
-Important things to note:
- This file must be import-able from glusterfsd. This probably means
- setting PYTHONPATH to where this file is located.
-
- This file must have a top-level xlator class object that will be
- used to instantiate individual translators.
-"""
-from gluster import *
-
-class MyXlator(ComplexTranslator):
- name = "MyXlator"
- def writev_cbk(self, frame, cookie, op_ret, op_errno, buf):
- stack_unwind(frame, op_ret, op_errno, buf)
- return 0
-
- def writev(self, frame, fd, vector, count, offset):
- gf_log(self.name, GF_LOG_WARNING, "writev %i bytes", vector.iov_len)
- # TODO: Use cookie to pass this to writev_cbk
- old_count = vector.iov_len
-
- data = vector.getBytes().encode("zlib")
-
- vector = iovec(data)
- gf_log(self.name, GF_LOG_WARNING, "writev %i bytes", vector.iov_len)
-
- @ret_fn_t
- def rfn(frame, prev, this, op_ret, op_errno, *params):
- if len(params) == 0:
- params = [0]
- return self.writev_cbk(frame, prev, old_count, op_errno, *params)
-
- stack_wind(frame, rfn, self.firstChild,
- self.firstChild[0].fops[0].writev, fd, vector, count, offset)
- return 0
-
-xlator = MyXlator
diff --git a/xlators/cluster/afr/src/Makefile.am b/xlators/cluster/afr/src/Makefile.am
index 16ed25af1..ea5a90abb 100644
--- a/xlators/cluster/afr/src/Makefile.am
+++ b/xlators/cluster/afr/src/Makefile.am
@@ -1,27 +1,38 @@
xlator_LTLIBRARIES = afr.la pump.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
-afr_common_source = afr-dir-read.c afr-dir-write.c afr-inode-read.c afr-inode-write.c afr-open.c afr-transaction.c afr-self-heal-data.c afr-self-heal-common.c afr-self-heal-metadata.c afr-self-heal-entry.c afr-self-heal-algorithm.c afr-lk-common.c afr-self-heald.c $(top_builddir)/xlators/lib/src/libxlator.c
+afr_common_source = afr-dir-read.c afr-dir-write.c afr-inode-read.c \
+ afr-inode-write.c afr-open.c afr-transaction.c afr-lk-common.c \
+ afr-read-txn.c \
+ $(top_builddir)/xlators/lib/src/libxlator.c
-afr_la_LDFLAGS = -module -avoidversion
-afr_la_SOURCES = $(afr_common_source) afr.c
+AFR_SELFHEAL_SOURCES = afr-self-heal-common.c afr-self-heal-data.c \
+ afr-self-heal-entry.c afr-self-heal-metadata.c afr-self-heald.c \
+ afr-self-heal-name.c
+
+afr_la_LDFLAGS = -module -avoid-version
+afr_la_SOURCES = $(afr_common_source) $(AFR_SELFHEAL_SOURCES) afr.c
afr_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-pump_la_LDFLAGS = -module -avoidversion
-pump_la_SOURCES = $(afr_common_source) pump.c
+pump_la_LDFLAGS = -module -avoid-version
+pump_la_SOURCES = $(afr_common_source) $(AFR_SELFHEAL_SOURCES) pump.c
pump_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h afr-dir-read.h afr-dir-write.h afr-self-heal.h afr-self-heal-common.h afr-self-heal-algorithm.h pump.h afr-mem-types.h afr-common.c afr-self-heald.h $(top_builddir)/xlators/lib/src/libxlator.h $(top_builddir)/glusterfsd/src/glusterfsd.h
+noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h \
+ afr-dir-read.h afr-dir-write.h afr-self-heal.h afr-mem-types.h \
+ afr-common.c afr-self-heald.h pump.h \
+ $(top_builddir)/xlators/lib/src/libxlator.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
+ -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/xlators/lib/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/contrib/md5 -shared -nostartfiles $(GF_CFLAGS) \
- -I$(top_srcdir)/xlators/lib/src
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
uninstall-local:
rm -f $(DESTDIR)$(xlatordir)/replicate.so
- rm -f $(DESTDIR)$(xlatordir)/pump.so
install-data-hook:
ln -sf afr.so $(DESTDIR)$(xlatordir)/replicate.so
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 7c38d65ed..6bd231600 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include <libgen.h>
@@ -44,6 +35,7 @@
#include "compat.h"
#include "byte-order.h"
#include "statedump.h"
+#include "inode.h"
#include "fd.h"
@@ -53,719 +45,821 @@
#include "afr-dir-write.h"
#include "afr-transaction.h"
#include "afr-self-heal.h"
-#include "afr-self-heal-common.h"
#include "afr-self-heald.h"
-#include "pump.h"
-#define AFR_ICTX_OPENDIR_DONE_MASK 0x0000000200000000ULL
-#define AFR_ICTX_SPLIT_BRAIN_MASK 0x0000000100000000ULL
-#define AFR_ICTX_READ_CHILD_MASK 0x00000000FFFFFFFFULL
-int
-afr_lookup_done_success_action (call_frame_t *frame, xlator_t *this,
- gf_boolean_t fail_conflict);
-void
-afr_children_copy (int32_t *dst, int32_t *src, unsigned int child_count)
+call_frame_t *
+afr_copy_frame (call_frame_t *base)
{
- int i = 0;
+ afr_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ int op_errno = 0;
+
+ frame = copy_frame (base);
+ if (!frame)
+ return NULL;
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local) {
+ AFR_STACK_DESTROY (frame);
+ return NULL;
+ }
- for (i = 0; i < child_count; i++)
- dst[i] = src[i];
+ return frame;
}
-void
-afr_xattr_req_prepare (xlator_t *this, dict_t *xattr_req, const char *path)
+/*
+ * INODE CTX 64-bit VALUE FORMAT FOR SMALL (<= 16) SUBVOL COUNTS:
+ *
+ * |<---------- 64bit ------------>|
+ * 63 32 31 16 15 0
+ * | EVENT_GEN | DATA | METADATA |
+ *
+ *
+ * METADATA (bit-0 .. bit-15): bitmap representing subvolumes from which
+ * metadata can be attempted to be read.
+ *
+ * bit-0 => priv->subvolumes[0]
+ * bit-1 => priv->subvolumes[1]
+ * ... etc. till bit-15
+ *
+ * DATA (bit-16 .. bit-31): bitmap representing subvolumes from which data
+ * can be attempted to be read.
+ *
+ * bit-16 => priv->subvolumes[0]
+ * bit-17 => priv->subvolumes[1]
+ * ... etc. till bit-31
+ *
+ * EVENT_GEN (bit-32 .. bit-63): event generation (i.e priv->event_generation)
+ * when DATA and METADATA was last updated.
+ *
+ * If EVENT_GEN is < priv->event_generation,
+ * or is 0, it means afr_inode_refresh() needs
+ * to be called to recalculate the bitmaps.
+ */
+
+int
+__afr_inode_read_subvol_get_small (inode_t *inode, xlator_t *this,
+ unsigned char *data, unsigned char *metadata,
+ int *event_p)
{
- int i = 0;
- afr_private_t *priv = NULL;
- int ret = 0;
+ afr_private_t *priv = NULL;
+ int ret = -1;
+ uint16_t datamap = 0;
+ uint16_t metadatamap = 0;
+ uint32_t event = 0;
+ uint64_t val = 0;
+ int i = 0;
- priv = this->private;
+ priv = this->private;
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_set_uint64 (xattr_req, priv->pending_key[i],
- 3 * sizeof(int32_t));
- if (ret < 0)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: Unable to set dict value for %s",
- path, priv->pending_key[i]);
- /* 3 = data+metadata+entry */
- }
+ ret = __inode_ctx_get (inode, this, &val);
+ if (ret < 0)
+ return ret;
+
+ metadatamap = (val & 0x000000000000ffff);
+ datamap = (val & 0x00000000ffff0000) >> 16;
+ event = (val & 0xffffffff00000000) >> 32;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (metadata)
+ metadata[i] = (metadatamap >> i) & 1;
+ if (data)
+ data[i] = (datamap >> i) & 1;
+ }
+
+ if (event_p)
+ *event_p = event;
+ return ret;
}
+
int
-afr_errno_count (int32_t *children, int *child_errno,
- unsigned int child_count, int32_t op_errno)
+__afr_inode_read_subvol_set_small (inode_t *inode, xlator_t *this,
+ unsigned char *data, unsigned char *metadata,
+ int event)
{
- int i = 0;
- int errno_count = 0;
- int child = 0;
+ afr_private_t *priv = NULL;
+ uint16_t datamap = 0;
+ uint16_t metadatamap = 0;
+ uint64_t val = 0;
+ int i = 0;
- for (i = 0; i < child_count; i++) {
- if (children) {
- child = children[i];
- if (child == -1)
- break;
- } else {
- child = i;
- }
- if (child_errno[child] == op_errno)
- errno_count++;
- }
- return errno_count;
-}
+ priv = this->private;
-int32_t
-afr_set_dict_gfid (dict_t *dict, uuid_t gfid)
-{
- int ret = 0;
- uuid_t *pgfid = NULL;
+ for (i = 0; i < priv->child_count; i++) {
+ if (data[i])
+ datamap |= (1 << i);
+ if (metadata[i])
+ metadatamap |= (1 << i);
+ }
- GF_ASSERT (gfid);
+ val = ((uint64_t) metadatamap) |
+ (((uint64_t) datamap) << 16) |
+ (((uint64_t) event) << 32);
- pgfid = GF_CALLOC (1, sizeof (uuid_t), gf_common_mt_char);
- if (!pgfid) {
- ret = -1;
- goto out;
- }
+ return __inode_ctx_set (inode, this, &val);
+}
- uuid_copy (*pgfid, gfid);
- ret = dict_set_dynptr (dict, "gfid-req", pgfid, sizeof (uuid_t));
- if (ret)
- gf_log (THIS->name, GF_LOG_ERROR, "gfid set failed");
+int
+__afr_inode_read_subvol_reset_small (inode_t *inode, xlator_t *this)
+{
+ int ret = -1;
+ uint16_t datamap = 0;
+ uint16_t metadatamap = 0;
+ uint32_t event = 0;
+ uint64_t val = 0;
-out:
- if (ret && pgfid)
- GF_FREE (pgfid);
+ ret = __inode_ctx_get (inode, this, &val);
+ (void) ret;
- return ret;
+ metadatamap = (val & 0x000000000000ffff) >> 0;
+ datamap = (val & 0x00000000ffff0000) >> 16;
+ event = 0;
+
+ val = ((uint64_t) metadatamap) |
+ (((uint64_t) datamap) << 16) |
+ (((uint64_t) event) << 32);
+
+ return __inode_ctx_set (inode, this, &val);
}
-afr_inode_ctx_t*
-afr_inode_ctx_get_from_addr (uint64_t addr, int32_t child_count)
+
+int
+__afr_inode_read_subvol_get (inode_t *inode, xlator_t *this,
+ unsigned char *data, unsigned char *metadata,
+ int *event_p)
{
- int ret = -1;
- afr_inode_ctx_t *ctx = NULL;
- size_t size = 0;
+ afr_private_t *priv = NULL;
+ int ret = -1;
- GF_ASSERT (child_count > 0);
+ priv = this->private;
- if (!addr) {
- ctx = GF_CALLOC (1, sizeof (*ctx),
- gf_afr_mt_inode_ctx_t);
- if (!ctx)
- goto out;
- size = sizeof (*ctx->fresh_children);
- ctx->fresh_children = GF_CALLOC (child_count, size,
- gf_afr_mt_int32_t);
- if (!ctx->fresh_children)
- goto out;
- } else {
- ctx = (afr_inode_ctx_t*) (long) addr;
- }
- ret = 0;
-out:
- if (ret && ctx) {
- if (ctx->fresh_children)
- GF_FREE (ctx->fresh_children);
- GF_FREE (ctx);
- ctx = NULL;
- }
- return ctx;
+ if (priv->child_count <= 16)
+ ret = __afr_inode_read_subvol_get_small (inode, this, data,
+ metadata, event_p);
+ else
+ /* TBD: allocate structure with array and read from it */
+ ret = -1;
+
+ return ret;
}
-void
-afr_inode_get_ctx (xlator_t *this, inode_t *inode, afr_inode_params_t *params)
+
+int
+__afr_inode_read_subvol_set (inode_t *inode, xlator_t *this, unsigned char *data,
+ unsigned char *metadata, int event)
{
- GF_ASSERT (inode);
- GF_ASSERT (params);
+ afr_private_t *priv = NULL;
+ int ret = -1;
- int ret = 0;
- afr_inode_ctx_t *ctx = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- uint64_t ctx_addr = 0;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
+ priv = this->private;
- priv = this->private;
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &ctx_addr);
- if (ret < 0)
- goto unlock;
- ctx = afr_inode_ctx_get_from_addr (ctx_addr, priv->child_count);
- if (!ctx)
- goto unlock;
- switch (params->op) {
- case AFR_INODE_GET_READ_CTX:
- fresh_children = params->u.read_ctx.children;
- read_child = (int32_t)(ctx->masks &
- AFR_ICTX_READ_CHILD_MASK);
- params->u.read_ctx.read_child = read_child;
- if (!fresh_children)
- goto unlock;
- for (i = 0; i < priv->child_count; i++)
- fresh_children[i] = ctx->fresh_children[i];
- break;
- case AFR_INODE_GET_OPENDIR_DONE:
- params->u.value = ctx->masks &
- AFR_ICTX_OPENDIR_DONE_MASK;
- break;
- case AFR_INODE_GET_SPLIT_BRAIN:
- params->u.value = ctx->masks & AFR_ICTX_SPLIT_BRAIN_MASK;
- break;
- default:
- GF_ASSERT (0);
- break;
- }
- }
-unlock:
- UNLOCK (&inode->lock);
+ if (priv->child_count <= 16)
+ ret = __afr_inode_read_subvol_set_small (inode, this, data,
+ metadata, event);
+ else
+ ret = -1;
+
+ return ret;
}
-uint64_t
-afr_is_split_brain (xlator_t *this, inode_t *inode)
+
+int
+__afr_inode_read_subvol_reset (inode_t *inode, xlator_t *this)
{
- afr_inode_params_t params = {0};
+ afr_private_t *priv = NULL;
+ int ret = -1;
- params.op = AFR_INODE_GET_SPLIT_BRAIN;
- afr_inode_get_ctx (this, inode, &params);
- return params.u.value;
-}
+ priv = this->private;
-gf_boolean_t
-afr_is_opendir_done (xlator_t *this, inode_t *inode)
-{
- afr_inode_params_t params = {0};
+ if (priv->child_count <= 16)
+ ret = __afr_inode_read_subvol_reset_small (inode, this);
+ else
+ ret = -1;
- params.op = AFR_INODE_GET_OPENDIR_DONE;
- afr_inode_get_ctx (this, inode, &params);
- return params.u.value;
+ return ret;
}
-int32_t
-afr_inode_get_read_ctx (xlator_t *this, inode_t *inode, int32_t *fresh_children)
+int
+afr_inode_read_subvol_get (inode_t *inode, xlator_t *this, unsigned char *data,
+ unsigned char *metadata, int *event_p)
{
- afr_inode_params_t params = {0};
+ int ret = -1;
+
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_read_subvol_get (inode, this, data,
+ metadata, event_p);
+ }
+ UNLOCK(&inode->lock);
- params.op = AFR_INODE_GET_READ_CTX;
- params.u.read_ctx.children = fresh_children;
- afr_inode_get_ctx (this, inode, &params);
- return params.u.read_ctx.read_child;
+ return ret;
}
-void
-afr_inode_ctx_set_read_child (afr_inode_ctx_t *ctx, int32_t read_child)
+
+int
+afr_inode_read_subvol_set (inode_t *inode, xlator_t *this, unsigned char *data,
+ unsigned char *metadata, int event)
{
- uint64_t remaining_mask = 0;
- uint64_t mask = 0;
+ int ret = -1;
- GF_ASSERT (read_child >= 0);
- remaining_mask = (~AFR_ICTX_READ_CHILD_MASK & ctx->masks);
- mask = (AFR_ICTX_READ_CHILD_MASK & read_child);
- ctx->masks = remaining_mask | mask;
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_read_subvol_set (inode, this, data, metadata,
+ event);
+ }
+ UNLOCK(&inode->lock);
+
+ return ret;
}
-void
-afr_inode_ctx_set_read_ctx (afr_inode_ctx_t *ctx, int32_t read_child,
- int32_t *fresh_children, int32_t child_count)
+
+int
+afr_inode_read_subvol_reset (inode_t *inode, xlator_t *this)
{
- int i = 0;
+ int ret = -1;
- afr_inode_ctx_set_read_child (ctx, read_child);
- for (i = 0; i < child_count; i++) {
- if (fresh_children)
- ctx->fresh_children[i] = fresh_children[i];
- else
- ctx->fresh_children[i] = -1;
- }
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_read_subvol_reset (inode, this);
+ }
+ UNLOCK(&inode->lock);
+
+ return ret;
}
-void
-afr_inode_ctx_rm_stale_children (afr_inode_ctx_t *ctx, int32_t read_child,
- int32_t *stale_children, int32_t child_count)
+
+int
+afr_accused_fill (xlator_t *this, dict_t *xdata, unsigned char *accused,
+ afr_transaction_type type)
{
- int i = 0;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int idx = afr_index_for_transaction_type (type);
+ void *pending_raw = NULL;
+ int pending[3];
+ int ret = 0;
- GF_ASSERT (stale_children);
- afr_inode_ctx_set_read_child (ctx, read_child);
- for (i = 0; i < child_count; i++) {
- if ((ctx->fresh_children[i] == -1) || (stale_children[i] == -1))
- break;
- afr_children_rm_child (ctx->fresh_children,
- stale_children[i], child_count);
- }
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ ret = dict_get_ptr (xdata, priv->pending_key[i],
+ &pending_raw);
+ if (ret) /* no pending flags */
+ continue;
+ memcpy (pending, pending_raw, sizeof(pending));
+
+ if (ntoh32 (pending[idx]))
+ accused[i] = 1;
+ }
+
+ return 0;
}
-void
-afr_inode_ctx_set_opendir_done (afr_inode_ctx_t *ctx)
+
+int
+afr_accuse_smallfiles (xlator_t *this, struct afr_reply *replies,
+ unsigned char *data_accused)
{
- uint64_t remaining_mask = 0;
- uint64_t mask = 0;
+ int i = 0;
+ afr_private_t *priv = NULL;
+ uint64_t maxsize = 0;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (data_accused[i])
+ continue;
+ if (replies[i].poststat.ia_size > maxsize)
+ maxsize = replies[i].poststat.ia_size;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (data_accused[i])
+ continue;
+ if (replies[i].poststat.ia_size < maxsize)
+ data_accused[i] = 1;
+ }
- remaining_mask = (~AFR_ICTX_OPENDIR_DONE_MASK & ctx->masks);
- mask = (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_OPENDIR_DONE_MASK);
- ctx->masks = remaining_mask | mask;
+ return 0;
}
-void
-afr_inode_ctx_set_splitbrain (afr_inode_ctx_t *ctx, gf_boolean_t set)
-{
- uint64_t remaining_mask = 0;
- uint64_t mask = 0;
- if (set) {
- remaining_mask = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx->masks);
- mask = (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_SPLIT_BRAIN_MASK);
- ctx->masks = remaining_mask | mask;
- } else {
- ctx->masks = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx->masks);
- }
+int
+afr_replies_interpret (call_frame_t *frame, xlator_t *this, inode_t *inode)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ struct afr_reply *replies = NULL;
+ int event_generation = 0;
+ int i = 0;
+ unsigned char *data_accused = NULL;
+ unsigned char *metadata_accused = NULL;
+ unsigned char *data_readable = NULL;
+ unsigned char *metadata_readable = NULL;
+ int ret = 0;
+
+ local = frame->local;
+ priv = this->private;
+ replies = local->replies;
+ event_generation = local->event_generation;
+
+ data_accused = alloca0 (priv->child_count);
+ data_readable = alloca0 (priv->child_count);
+ metadata_accused = alloca0 (priv->child_count);
+ metadata_readable = alloca0 (priv->child_count);
+
+ for (i = 0; i < priv->child_count; i++) {
+ data_readable[i] = 1;
+ metadata_readable[i] = 1;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid) {
+ data_readable[i] = 0;
+ metadata_readable[i] = 0;
+ continue;
+ }
+
+ if (replies[i].op_ret == -1) {
+ data_readable[i] = 0;
+ metadata_readable[i] = 0;
+ continue;
+ }
+
+ afr_accused_fill (this, replies[i].xdata, data_accused,
+ (inode->ia_type == IA_IFDIR) ?
+ AFR_ENTRY_TRANSACTION : AFR_DATA_TRANSACTION);
+
+ afr_accused_fill (this, replies[i].xdata,
+ metadata_accused, AFR_METADATA_TRANSACTION);
+
+ }
+
+ if (inode->ia_type != IA_IFDIR)
+ afr_accuse_smallfiles (this, replies, data_accused);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (data_accused[i]) {
+ data_readable[i] = 0;
+ ret = 1;
+ }
+ if (metadata_accused[i]) {
+ metadata_readable[i] = 0;
+ ret = 1;
+ }
+ }
+
+ afr_inode_read_subvol_set (inode, this, data_readable,
+ metadata_readable, event_generation);
+ return ret;
}
-void
-afr_inode_set_ctx (xlator_t *this, inode_t *inode, afr_inode_params_t *params)
-{
- GF_ASSERT (inode);
- GF_ASSERT (params);
- int ret = 0;
- afr_inode_ctx_t *ctx = NULL;
- afr_private_t *priv = NULL;
- uint64_t ctx_addr = 0;
- gf_boolean_t set = _gf_false;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
- int32_t *stale_children = NULL;
- priv = this->private;
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &ctx_addr);
- if (ret < 0)
- ctx_addr = 0;
- ctx = afr_inode_ctx_get_from_addr (ctx_addr, priv->child_count);
- if (!ctx)
- goto unlock;
- switch (params->op) {
- case AFR_INODE_SET_READ_CTX:
- read_child = params->u.read_ctx.read_child;
- fresh_children = params->u.read_ctx.children;
- afr_inode_ctx_set_read_ctx (ctx, read_child,
- fresh_children,
- priv->child_count);
- break;
- case AFR_INODE_RM_STALE_CHILDREN:
- read_child = params->u.read_ctx.read_child;
- stale_children = params->u.read_ctx.children;
- afr_inode_ctx_rm_stale_children (ctx, read_child,
- stale_children,
- priv->child_count);
- break;
- case AFR_INODE_SET_OPENDIR_DONE:
- afr_inode_ctx_set_opendir_done (ctx);
- break;
- case AFR_INODE_SET_SPLIT_BRAIN:
- set = params->u.value;
- afr_inode_ctx_set_splitbrain (ctx, set);
- break;
- default:
- GF_ASSERT (0);
- break;
- }
- ret = __inode_ctx_put (inode, this, (uint64_t)ctx);
- if (ret) {
- gf_log_callingfn (this->name, GF_LOG_ERROR, "failed to "
- "set the inode ctx (%s)",
- uuid_utoa (inode->gfid));
- }
- }
-unlock:
- UNLOCK (&inode->lock);
+int
+afr_refresh_selfheal_done (int ret, call_frame_t *heal, void *opaque)
+{
+ if (heal)
+ STACK_DESTROY (heal->root);
+ return 0;
}
-void
-afr_set_split_brain (xlator_t *this, inode_t *inode, gf_boolean_t set)
+int
+afr_inode_refresh_err (call_frame_t *frame, xlator_t *this)
{
- afr_inode_params_t params = {0};
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int err = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].valid && !local->replies[i].op_ret) {
+ err = 0;
+ goto ret;
+ }
+ }
- params.op = AFR_INODE_SET_SPLIT_BRAIN;
- params.u.value = set;
- afr_inode_set_ctx (this, inode, &params);
+ err = afr_final_errno (local, priv);
+ret:
+ return -err;
}
-void
-afr_set_opendir_done (xlator_t *this, inode_t *inode)
+
+int
+afr_refresh_selfheal_wrap (void *opaque)
{
- afr_inode_params_t params = {0};
+ call_frame_t *frame = opaque;
+ afr_local_t *local = NULL;
+ xlator_t *this = NULL;
+ int err = 0;
+
+ local = frame->local;
+ this = frame->this;
- params.op = AFR_INODE_SET_OPENDIR_DONE;
- afr_inode_set_ctx (this, inode, &params);
+ afr_selfheal (frame->this, local->refreshinode->gfid);
+
+ afr_selfheal_unlocked_discover (frame, local->refreshinode,
+ local->refreshinode->gfid,
+ local->replies);
+
+ afr_replies_interpret (frame, this, local->refreshinode);
+
+ err = afr_inode_refresh_err (frame, this);
+
+ afr_replies_wipe (local, this->private);
+
+ local->refreshfn (frame, this, err);
+
+ return 0;
}
-void
-afr_inode_set_read_ctx (xlator_t *this, inode_t *inode, int32_t read_child,
- int32_t *fresh_children)
+
+gf_boolean_t
+afr_selfheal_enabled (xlator_t *this)
{
- afr_inode_params_t params = {0};
- afr_private_t *priv = NULL;
+ afr_private_t *priv = NULL;
+ gf_boolean_t data = _gf_false;
- priv = this->private;
- GF_ASSERT (read_child >= 0);
- GF_ASSERT (fresh_children);
- GF_ASSERT (afr_is_child_present (fresh_children, priv->child_count,
- read_child));
+ priv = this->private;
+
+ gf_string2boolean (priv->data_self_heal, &data);
- params.op = AFR_INODE_SET_READ_CTX;
- params.u.read_ctx.read_child = read_child;
- params.u.read_ctx.children = fresh_children;
- afr_inode_set_ctx (this, inode, &params);
+ return data || priv->metadata_self_heal || priv->entry_self_heal;
}
-void
-afr_inode_rm_stale_children (xlator_t *this, inode_t *inode, int32_t read_child,
- int32_t *stale_children)
+
+
+int
+afr_inode_refresh_done (call_frame_t *frame, xlator_t *this)
{
- afr_inode_params_t params = {0};
+ call_frame_t *heal = NULL;
+ afr_local_t *local = NULL;
+ int ret = 0;
+ int err = 0;
+
+ local = frame->local;
+
+ ret = afr_replies_interpret (frame, this, local->refreshinode);
+
+ err = afr_inode_refresh_err (frame, this);
+
+ afr_replies_wipe (local, this->private);
- GF_ASSERT (read_child >= 0);
- GF_ASSERT (stale_children);
+ if (ret && afr_selfheal_enabled (this)) {
+ heal = copy_frame (frame);
+ if (heal)
+ heal->root->pid = -1;
+ ret = synctask_new (this->ctx->env, afr_refresh_selfheal_wrap,
+ afr_refresh_selfheal_done, heal, frame);
+ if (ret)
+ goto refresh_done;
+ } else {
+ refresh_done:
+ local->refreshfn (frame, this, err);
+ }
- params.op = AFR_INODE_RM_STALE_CHILDREN;
- params.u.read_ctx.read_child = read_child;
- params.u.read_ctx.children = stale_children;
- afr_inode_set_ctx (this, inode, &params);
+ return 0;
}
-gf_boolean_t
-afr_is_source_child (int32_t *sources, int32_t child_count, int32_t child)
+
+int
+afr_inode_refresh_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *par)
{
- gf_boolean_t source_xattrs = _gf_false;
+ afr_local_t *local = NULL;
+ int call_child = (long) cookie;
+ int call_count = 0;
- GF_ASSERT (child < child_count);
+ local = frame->local;
- if ((child >= 0) && (child < child_count) &&
- sources[child]) {
- source_xattrs = _gf_true;
- }
- return source_xattrs;
+ local->replies[call_child].valid = 1;
+ local->replies[call_child].op_ret = op_ret;
+ local->replies[call_child].op_errno = op_errno;
+ if (op_ret != -1) {
+ local->replies[call_child].poststat = *buf;
+ local->replies[call_child].postparent = *par;
+ local->replies[call_child].xdata = dict_ref (xdata);
+ }
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ afr_inode_refresh_done (frame, this);
+
+ return 0;
}
-gf_boolean_t
-afr_is_child_present (int32_t *success_children, int32_t child_count,
- int32_t child)
+
+int
+afr_inode_refresh_subvol (call_frame_t *frame, xlator_t *this, int i,
+ inode_t *inode, dict_t *xdata)
{
- gf_boolean_t success_child = _gf_false;
- int i = 0;
+ loc_t loc = {0, };
+ afr_private_t *priv = NULL;
- GF_ASSERT (child < child_count);
+ priv = this->private;
- for (i = 0; i < child_count; i++) {
- if (success_children[i] == -1)
- break;
- if (child == success_children[i]) {
- success_child = _gf_true;
- break;
- }
- }
- return success_child;
+ loc.inode = inode;
+ uuid_copy (loc.gfid, inode->gfid);
+
+ STACK_WIND_COOKIE (frame, afr_inode_refresh_subvol_cbk,
+ (void *) (long) i, priv->children[i],
+ priv->children[i]->fops->lookup, &loc, xdata);
+ return 0;
}
-gf_boolean_t
-afr_is_read_child (int32_t *success_children, int32_t *sources,
- int32_t child_count, int32_t child)
+
+int
+afr_inode_refresh_do (call_frame_t *frame, xlator_t *this)
{
- gf_boolean_t success_child = _gf_false;
- gf_boolean_t source = _gf_false;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int i = 0;
+ dict_t *xdata = NULL;
- GF_ASSERT (success_children);
- GF_ASSERT (child_count > 0);
+ priv = this->private;
+ local = frame->local;
- success_child = afr_is_child_present (success_children, child_count,
- child);
- if (!success_child)
- goto out;
- if (NULL == sources) {
- source = _gf_true;
- goto out;
- }
- source = afr_is_source_child (sources, child_count, child);
-out:
- return (success_child && source);
+ afr_replies_wipe (local, priv);
+
+ xdata = dict_new ();
+ if (!xdata) {
+ afr_inode_refresh_done (frame, this);
+ return 0;
+ }
+
+ if (afr_xattr_req_prepare (this, xdata) != 0) {
+ dict_unref (xdata);
+ afr_inode_refresh_done (frame, this);
+ return 0;
+ }
+
+ local->call_count = AFR_COUNT (local->child_up, priv->child_count);
+
+ call_count = local->call_count;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->child_up[i])
+ continue;
+
+ afr_inode_refresh_subvol (frame, this, i, local->refreshinode,
+ xdata);
+
+ if (!--call_count)
+ break;
+ }
+
+ dict_unref (xdata);
+
+ return 0;
}
-/* If sources is NULL the xattrs are assumed to be of source for all
- * success_children.
- */
+
int
-afr_select_read_child_from_policy (int32_t *success_children, int32_t child_count,
- int32_t prev_read_child,
- int32_t config_read_child, int32_t *sources)
+afr_inode_refresh (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ afr_inode_refresh_cbk_t refreshfn)
{
- int32_t read_child = -1;
- int i = 0;
+ afr_local_t *local = NULL;
- GF_ASSERT (success_children);
+ local = frame->local;
- read_child = prev_read_child;
- if (afr_is_read_child (success_children, sources, child_count,
- read_child))
- goto out;
+ local->refreshfn = refreshfn;
- read_child = config_read_child;
- if (afr_is_read_child (success_children, sources, child_count,
- read_child))
- goto out;
+ if (local->refreshinode) {
+ inode_unref (local->refreshinode);
+ local->refreshinode = NULL;
+ }
- for (i = 0; i < child_count; i++) {
- read_child = success_children[i];
- if (read_child < 0)
- break;
- if (afr_is_read_child (success_children, sources, child_count,
- read_child))
- goto out;
- }
- read_child = -1;
+ local->refreshinode = inode_ref (inode);
-out:
- return read_child;
+ afr_inode_refresh_do (frame, this);
+
+ return 0;
}
-/* This function should be used when all the success_children are sources
- */
-void
-afr_set_read_ctx_from_policy (xlator_t *this, inode_t *inode,
- int32_t *fresh_children, int32_t prev_read_child,
- int32_t config_read_child)
-{
- int read_child = -1;
- afr_private_t *priv = NULL;
- priv = this->private;
- read_child = afr_select_read_child_from_policy (fresh_children,
- priv->child_count,
- prev_read_child,
- config_read_child,
- NULL);
- if (read_child >= 0)
- afr_inode_set_read_ctx (this, inode, read_child,
- fresh_children);
-}
-
-/* afr_next_call_child ()
- * This is a common function used by all the read-type fops
- * This function should not be called with the inode's read_children array.
- * The fop's handler should make a copy of the inode's read_children,
- * preferred read_child into the local vars, because while this function is
- * in execution there is a chance for inode's read_ctx to change.
- */
-int32_t
-afr_next_call_child (int32_t *fresh_children, unsigned char *child_up,
- size_t child_count, int32_t *last_index,
- int32_t read_child)
+int
+afr_xattr_req_prepare (xlator_t *this, dict_t *xattr_req)
{
- int next_index = 0;
- int32_t next_call_child = -1;
+ int i = 0;
+ afr_private_t *priv = NULL;
+ int ret = 0;
- GF_ASSERT (last_index);
+ priv = this->private;
- next_index = *last_index;
-retry:
- next_index++;
- if ((next_index >= child_count) ||
- (fresh_children[next_index] == -1))
- goto out;
- if ((fresh_children[next_index] == read_child) ||
- (!child_up[fresh_children[next_index]]))
- goto retry;
- *last_index = next_index;
- next_call_child = fresh_children[next_index];
-out:
- return next_call_child;
+ for (i = 0; i < priv->child_count; i++) {
+ ret = dict_set_uint64 (xattr_req, priv->pending_key[i],
+ AFR_NUM_CHANGE_LOGS * sizeof(int));
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_WARNING,
+ "Unable to set dict value for %s",
+ priv->pending_key[i]);
+ /* 3 = data+metadata+entry */
+ }
+ ret = dict_set_uint64 (xattr_req, AFR_DIRTY,
+ AFR_NUM_CHANGE_LOGS * sizeof(int));
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "failed to set dirty "
+ "query flag");
+ }
+
+ return ret;
}
- /* This function should not be called with the inode's read_children array.
- * The fop's handler should make a copy of the inode's read_children,
- * preferred read_child into the local vars, because while this function is
- * in execution there is a chance for inode's read_ctx to change.
- */
-int32_t
-afr_get_call_child (xlator_t *this, unsigned char *child_up, int32_t read_child,
- int32_t *fresh_children,
- int32_t *call_child, int32_t *last_index)
+int
+afr_lookup_xattr_req_prepare (afr_local_t *local, xlator_t *this,
+ dict_t *xattr_req, loc_t *loc)
{
- int ret = 0;
- afr_private_t *priv = NULL;
- int i = 0;
-
- GF_ASSERT (child_up);
- GF_ASSERT (call_child);
- GF_ASSERT (last_index);
- GF_ASSERT (fresh_children);
- GF_ASSERT (read_child >= 0);
+ int ret = -ENOMEM;
- priv = this->private;
- *call_child = -1;
- *last_index = -1;
+ local->xattr_req = dict_new ();
+ if (!local->xattr_req)
+ goto out;
+ if (xattr_req)
+ dict_copy (xattr_req, local->xattr_req);
- if (child_up[read_child]) {
- *call_child = read_child;
- } else {
- for (i = 0; i < priv->child_count; i++) {
- if (fresh_children[i] == -1)
- break;
- if (child_up[fresh_children[i]]) {
- *call_child = fresh_children[i];
- ret = 0;
- break;
- }
- }
+ ret = afr_xattr_req_prepare (this, local->xattr_req);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: Unable to prepare xattr_req", loc->path);
+ }
- if (*call_child == -1) {
- ret = -ENOTCONN;
- goto out;
- }
+ ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_INODELK_COUNT, 0);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: Unable to set dict value for %s",
+ loc->path, GLUSTERFS_INODELK_COUNT);
+ }
+ ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_ENTRYLK_COUNT, 0);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: Unable to set dict value for %s",
+ loc->path, GLUSTERFS_ENTRYLK_COUNT);
+ }
- *last_index = i;
+ ret = dict_set_uint32 (local->xattr_req, GLUSTERFS_PARENT_ENTRYLK, 0);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: Unable to set dict value for %s",
+ loc->path, GLUSTERFS_PARENT_ENTRYLK);
}
+
+ ret = 0;
out:
- gf_log (this->name, GF_LOG_DEBUG, "Returning %d, call_child: %d, "
- "last_index: %d", ret, *call_child, *last_index);
return ret;
}
-void
-afr_reset_xattr (dict_t **xattr, unsigned int child_count)
+
+int
+afr_hash_child (inode_t *inode, int32_t child_count, int hashmode)
{
- unsigned int i = 0;
+ uuid_t gfid_copy = {0,};
+ pid_t pid;
- if (!xattr)
- goto out;
- for (i = 0; i < child_count; i++) {
- if (xattr[i]) {
- dict_unref (xattr[i]);
- xattr[i] = NULL;
- }
+ if (!hashmode) {
+ return -1;
}
-out:
- return;
-}
-void
-afr_local_sh_cleanup (afr_local_t *local, xlator_t *this)
-{
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
+ if (inode) {
+ uuid_copy (gfid_copy, inode->gfid);
+ }
+ if (hashmode > 1) {
+ /*
+ * Why getpid? Because it's one of the cheapest calls
+ * available - faster than gethostname etc. - and returns a
+ * constant-length value that's sure to be shorter than a UUID.
+ * It's still very unlikely to be the same across clients, so
+ * it still provides good mixing. We're not trying for
+ * perfection here. All we need is a low probability that
+ * multiple clients won't converge on the same subvolume.
+ */
+ pid = getpid();
+ memcpy (gfid_copy, &pid, sizeof(pid));
+ }
- sh = &local->self_heal;
- priv = this->private;
+ return SuperFastHash((char *)gfid_copy,
+ sizeof(gfid_copy)) % child_count;
+}
- if (sh->buf)
- GF_FREE (sh->buf);
- if (sh->parentbufs)
- GF_FREE (sh->parentbufs);
+int
+afr_read_subvol_select_by_policy (inode_t *inode, xlator_t *this,
+ unsigned char *readable)
+{
+ afr_private_t *priv = NULL;
+ int read_subvol = -1;
+ int i = 0;
- if (sh->inode)
- inode_unref (sh->inode);
+ priv = this->private;
- if (sh->xattr) {
- afr_reset_xattr (sh->xattr, priv->child_count);
- GF_FREE (sh->xattr);
- }
+ /* first preference - explicitly specified or local subvolume */
+ if (priv->read_child >= 0 && readable[priv->read_child])
+ return priv->read_child;
- if (sh->child_errno)
- GF_FREE (sh->child_errno);
+ /* second preference - use hashed mode */
+ read_subvol = afr_hash_child (inode, priv->child_count,
+ priv->hash_mode);
+ if (read_subvol >= 0 && readable[read_subvol])
+ return read_subvol;
- if (sh->pending_matrix) {
- for (i = 0; i < priv->child_count; i++) {
- GF_FREE (sh->pending_matrix[i]);
- }
- GF_FREE (sh->pending_matrix);
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (readable[i])
+ return i;
+ }
- if (sh->delta_matrix) {
- for (i = 0; i < priv->child_count; i++) {
- GF_FREE (sh->delta_matrix[i]);
- }
- GF_FREE (sh->delta_matrix);
- }
+ /* no readable subvolumes, either split brain or all subvols down */
- if (sh->sources)
- GF_FREE (sh->sources);
+ return -1;
+}
- if (sh->success)
- GF_FREE (sh->success);
- if (sh->locked_nodes)
- GF_FREE (sh->locked_nodes);
+int
+afr_inode_read_subvol_type_get (inode_t *inode, xlator_t *this,
+ unsigned char *readable, int *event_p,
+ int type)
+{
+ int ret = -1;
- if (sh->healing_fd) {
- fd_unref (sh->healing_fd);
- sh->healing_fd = NULL;
- }
+ if (type == AFR_METADATA_TRANSACTION)
+ ret = afr_inode_read_subvol_get (inode, this, 0, readable,
+ event_p);
+ else
+ ret = afr_inode_read_subvol_get (inode, this, readable, 0,
+ event_p);
+ return ret;
+}
- if (sh->linkname)
- GF_FREE ((char *)sh->linkname);
- if (sh->success_children)
- GF_FREE (sh->success_children);
+int
+afr_read_subvol_get (inode_t *inode, xlator_t *this, int *subvol_p,
+ int *event_p, afr_transaction_type type)
+{
+ afr_private_t *priv = NULL;
+ unsigned char *data_readable = NULL;
+ unsigned char *metadata_readable = NULL;
+ unsigned char *readable = NULL;
+ unsigned char *intersection = NULL;
+ int subvol = -1;
+ int event = 0;
+
+ priv = this->private;
- if (sh->fresh_children)
- GF_FREE (sh->fresh_children);
+ readable = alloca0 (priv->child_count);
+ data_readable = alloca0 (priv->child_count);
+ metadata_readable = alloca0 (priv->child_count);
+ intersection = alloca0 (priv->child_count);
- if (sh->fresh_parent_dirs)
- GF_FREE (sh->fresh_parent_dirs);
+ afr_inode_read_subvol_type_get (inode, this, readable, &event, type);
- loc_wipe (&sh->parent_loc);
- loc_wipe (&sh->lookup_loc);
+ afr_inode_read_subvol_get (inode, this, data_readable, metadata_readable,
+ &event);
- if (sh->checksum)
- GF_FREE (sh->checksum);
+ AFR_INTERSECT (intersection, data_readable, metadata_readable,
+ priv->child_count);
- if (sh->write_needed)
- GF_FREE (sh->write_needed);
- if (sh->healing_fd)
- fd_unref (sh->healing_fd);
+ if (AFR_COUNT (intersection, priv->child_count) > 0)
+ subvol = afr_read_subvol_select_by_policy (inode, this,
+ intersection);
+ else
+ subvol = afr_read_subvol_select_by_policy (inode, this,
+ readable);
+ if (subvol_p)
+ *subvol_p = subvol;
+ if (event_p)
+ *event_p = event;
+ return subvol;
}
void
afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this)
{
- int i = 0;
- afr_private_t * priv = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
priv = this->private;
- for (i = 0; i < priv->child_count; i++) {
- if (local->pending && local->pending[i])
- GF_FREE (local->pending[i]);
- }
-
- GF_FREE (local->pending);
+ afr_matrix_cleanup (local->pending, priv->child_count);
- if (local->internal_lock.locked_nodes)
- GF_FREE (local->internal_lock.locked_nodes);
+ GF_FREE (local->internal_lock.locked_nodes);
- if (local->internal_lock.inode_locked_nodes)
- GF_FREE (local->internal_lock.inode_locked_nodes);
-
- if (local->internal_lock.entry_locked_nodes)
- GF_FREE (local->internal_lock.entry_locked_nodes);
+ for (i = 0; local->internal_lock.inodelk[i].domain; i++) {
+ GF_FREE (local->internal_lock.inodelk[i].locked_nodes);
+ }
- if (local->internal_lock.lower_locked_nodes)
- GF_FREE (local->internal_lock.lower_locked_nodes);
+ GF_FREE (local->internal_lock.lower_locked_nodes);
+ afr_entry_lockee_cleanup (&local->internal_lock);
GF_FREE (local->transaction.pre_op);
- GF_FREE (local->transaction.child_errno);
- GF_FREE (local->child_errno);
GF_FREE (local->transaction.eager_lock);
GF_FREE (local->transaction.basename);
@@ -773,8 +867,37 @@ afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this)
loc_wipe (&local->transaction.parent_loc);
loc_wipe (&local->transaction.new_parent_loc);
+
+}
+
+
+void
+afr_replies_wipe (afr_local_t *local, afr_private_t *priv)
+{
+ int i;
+
+ if (!local->replies)
+ return;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].xdata) {
+ dict_unref (local->replies[i].xdata);
+ local->replies[i].xdata = NULL;
+ }
+ }
+
+ memset (local->replies, 0, sizeof(*local->replies) * priv->child_count);
}
+void
+afr_remove_eager_lock_stub (afr_local_t *local)
+{
+ LOCK (&local->fd->lock);
+ {
+ list_del_init (&local->transaction.eager_locked);
+ }
+ UNLOCK (&local->fd->lock);
+}
void
afr_local_cleanup (afr_local_t *local, xlator_t *this)
@@ -784,7 +907,11 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this)
if (!local)
return;
- afr_local_sh_cleanup (local, this);
+ syncbarrier_destroy (&local->barrier);
+
+ if (local->transaction.eager_lock_on &&
+ !list_empty (&local->transaction.eager_locked))
+ afr_remove_eager_lock_stub (local);
afr_local_transaction_cleanup (local, this);
@@ -799,52 +926,36 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this)
if (local->xattr_req)
dict_unref (local->xattr_req);
- if (local->child_up)
- GF_FREE (local->child_up);
+ if (local->dict)
+ dict_unref (local->dict);
- if (local->fresh_children)
- GF_FREE (local->fresh_children);
+ afr_replies_wipe (local, priv);
+ GF_FREE(local->replies);
- if (local->fd_open_on)
- GF_FREE (local->fd_open_on);
+ GF_FREE (local->child_up);
- { /* lookup */
- if (local->cont.lookup.xattrs) {
- afr_reset_xattr (local->cont.lookup.xattrs,
- priv->child_count);
- GF_FREE (local->cont.lookup.xattrs);
- local->cont.lookup.xattrs = NULL;
- }
+ GF_FREE (local->read_attempted);
- if (local->cont.lookup.xattr) {
- dict_unref (local->cont.lookup.xattr);
- }
+ GF_FREE (local->readable);
- if (local->cont.lookup.inode) {
- inode_unref (local->cont.lookup.inode);
- }
+ if (local->inode)
+ inode_unref (local->inode);
- if (local->cont.lookup.postparents)
- GF_FREE (local->cont.lookup.postparents);
+ if (local->parent)
+ inode_unref (local->parent);
- if (local->cont.lookup.bufs)
- GF_FREE (local->cont.lookup.bufs);
+ if (local->parent2)
+ inode_unref (local->parent2);
- if (local->cont.lookup.success_children)
- GF_FREE (local->cont.lookup.success_children);
-
- if (local->cont.lookup.sources)
- GF_FREE (local->cont.lookup.sources);
- }
+ if (local->refreshinode)
+ inode_unref (local->refreshinode);
{ /* getxattr */
- if (local->cont.getxattr.name)
- GF_FREE (local->cont.getxattr.name);
+ GF_FREE (local->cont.getxattr.name);
}
{ /* lk */
- if (local->cont.lk.locked_nodes)
- GF_FREE (local->cont.lk.locked_nodes);
+ GF_FREE (local->cont.lk.locked_nodes);
}
{ /* create */
@@ -878,18 +989,40 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this)
dict_unref (local->cont.setxattr.dict);
}
+ { /* fsetxattr */
+ if (local->cont.fsetxattr.dict)
+ dict_unref (local->cont.fsetxattr.dict);
+ }
+
{ /* removexattr */
GF_FREE (local->cont.removexattr.name);
}
-
+ { /* xattrop */
+ if (local->cont.xattrop.xattr)
+ dict_unref (local->cont.xattrop.xattr);
+ }
+ { /* fxattrop */
+ if (local->cont.fxattrop.xattr)
+ dict_unref (local->cont.fxattrop.xattr);
+ }
{ /* symlink */
GF_FREE (local->cont.symlink.linkpath);
}
{ /* opendir */
- if (local->cont.opendir.checksum)
- GF_FREE (local->cont.opendir.checksum);
+ GF_FREE (local->cont.opendir.checksum);
+ }
+
+ { /* readdirp */
+ if (local->cont.readdir.dict)
+ dict_unref (local->cont.readdir.dict);
}
+
+ if (local->xdata_req)
+ dict_unref (local->xdata_req);
+
+ if (local->xdata_rsp)
+ dict_unref (local->xdata_rsp);
}
@@ -910,1383 +1043,1061 @@ afr_frame_return (call_frame_t *frame)
return call_count;
}
-int
-afr_set_elem_count_get (unsigned char *elems, int child_count)
-{
- int i = 0;
- int ret = 0;
-
- for (i = 0; i < child_count; i++)
- if (elems[i])
- ret++;
- return ret;
-}
-
-/**
- * up_children_count - return the number of children that are up
- */
-
-unsigned int
-afr_up_children_count (unsigned char *child_up, unsigned int child_count)
-{
- return afr_set_elem_count_get (child_up, child_count);
-}
-
-unsigned int
-afr_locked_children_count (unsigned char *children, unsigned int child_count)
-{
- return afr_set_elem_count_get (children, child_count);
-}
-
-unsigned int
-afr_pre_op_done_children_count (unsigned char *pre_op,
- unsigned int child_count)
-{
- return afr_set_elem_count_get (pre_op, child_count);
-}
gf_boolean_t
-afr_is_fresh_lookup (loc_t *loc, xlator_t *this)
+afr_is_entry_possibly_under_txn (afr_local_t *local, xlator_t *this)
{
- uint64_t ctx = 0;
- int32_t ret = 0;
+ int i = 0;
+ int tmp = 0;
+ afr_private_t *priv = NULL;
- GF_ASSERT (loc);
- GF_ASSERT (this);
- GF_ASSERT (loc->inode);
-
- ret = inode_ctx_get (loc->inode, this, &ctx);
- if (0 == ret)
- return _gf_false;
- return _gf_true;
-}
+ priv = this->private;
-void
-afr_update_loc_gfids (loc_t *loc, struct iatt *buf, struct iatt *postparent)
-{
- GF_ASSERT (loc);
- GF_ASSERT (buf);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].xdata)
+ continue;
+ if (dict_get_int32 (local->replies[i].xdata,
+ GLUSTERFS_PARENT_ENTRYLK,
+ &tmp) == 0)
+ if (tmp)
+ return _gf_true;
+ }
- uuid_copy (loc->gfid, buf->ia_gfid);
- if (postparent)
- uuid_copy (loc->pargfid, postparent->ia_gfid);
+ return _gf_false;
}
-int
-afr_lookup_build_response_params (afr_local_t *local, xlator_t *this)
-{
- int32_t read_child = -1;
- struct iatt *buf = NULL;
- struct iatt *postparent = NULL;
- dict_t **xattr = NULL;
- int ret = 0;
-
- GF_ASSERT (local);
-
- buf = &local->cont.lookup.buf;
- postparent = &local->cont.lookup.postparent;
- xattr = &local->cont.lookup.xattr;
- read_child = afr_inode_get_read_ctx (this, local->cont.lookup.inode,
- NULL);
- if (read_child < 0) {
- ret = -1;
- goto out;
- }
- gf_log (this->name, GF_LOG_DEBUG, "Building lookup response from %d",
- read_child);
- *xattr = dict_ref (local->cont.lookup.xattrs[read_child]);
- *buf = local->cont.lookup.bufs[read_child];
- *postparent = local->cont.lookup.postparents[read_child];
-
- if (IA_INVAL == local->cont.lookup.inode->ia_type) {
- /* fix for RT #602 */
- local->cont.lookup.inode->ia_type = buf->ia_type;
- }
-out:
- return ret;
-}
+/*
+ * Quota size xattrs are not maintained by afr. There is a
+ * possibility that they differ even when both the directory changelog xattrs
+ * suggest everything is fine. So if there is at least one 'source' check among
+ * the sources which has the maximum quota size. Otherwise check among all the
+ * available ones for maximum quota size. This way if there is a source and
+ * stale copies it always votes for the 'source'.
+ * */
static void
-afr_lookup_update_lk_counts (afr_local_t *local, xlator_t *this,
- int child_index, dict_t *xattr)
-{
- uint32_t inodelk_count = 0;
- uint32_t entrylk_count = 0;
- int ret = -1;
-
- GF_ASSERT (local);
- GF_ASSERT (this);
- GF_ASSERT (xattr);
- GF_ASSERT (child_index >= 0);
-
- ret = dict_get_uint32 (xattr, GLUSTERFS_INODELK_COUNT,
- &inodelk_count);
- if (ret == 0)
- local->inodelk_count += inodelk_count;
-
- ret = dict_get_uint32 (xattr, GLUSTERFS_ENTRYLK_COUNT,
- &entrylk_count);
- if (ret == 0)
- local->entrylk_count += entrylk_count;
+afr_handle_quota_size (call_frame_t *frame, xlator_t *this)
+{
+ unsigned char *readable = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ struct afr_reply *replies = NULL;
+ int i = 0;
+ uint64_t size = 0;
+ uint64_t max_size = 0;
+ int readable_cnt = 0;
+
+ local = frame->local;
+ priv = this->private;
+ replies = local->replies;
+
+ readable = alloca0 (priv->child_count);
+
+ afr_inode_read_subvol_get (local->inode, this, readable, 0, 0);
+
+ readable_cnt = AFR_COUNT (readable, priv->child_count);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret == -1)
+ continue;
+ if (readable_cnt && !readable[i])
+ continue;
+ if (!replies[i].xdata)
+ continue;
+ if (dict_get_uint64 (replies[i].xdata, QUOTA_SIZE_KEY, &size))
+ continue;
+ if (size > max_size)
+ max_size = size;
+ }
+
+ if (!max_size)
+ return;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret == -1)
+ continue;
+ if (readable_cnt && !readable[i])
+ continue;
+ if (!replies[i].xdata)
+ continue;
+ if (dict_set_uint64 (replies[i].xdata, QUOTA_SIZE_KEY, max_size))
+ continue;
+ }
}
-static void
-afr_lookup_set_self_heal_params_by_xattr (afr_local_t *local, xlator_t *this,
- dict_t *xattr)
-{
- GF_ASSERT (local);
- GF_ASSERT (this);
- GF_ASSERT (xattr);
-
- if (afr_sh_has_metadata_pending (xattr, this)) {
- local->self_heal.do_metadata_self_heal = _gf_true;
- gf_log(this->name, GF_LOG_DEBUG,
- "metadata self-heal is pending for %s.",
- local->loc.path);
- }
-
- if (afr_sh_has_entry_pending (xattr, this)) {
- local->self_heal.do_entry_self_heal = _gf_true;
- gf_log(this->name, GF_LOG_DEBUG,
- "entry self-heal is pending for %s.", local->loc.path);
- }
-
- if (afr_sh_has_data_pending (xattr, this)) {
- local->self_heal.do_data_self_heal = _gf_true;
- gf_log(this->name, GF_LOG_DEBUG,
- "data self-heal is pending for %s.", local->loc.path);
- }
-}
static void
-afr_detect_self_heal_by_iatt (afr_local_t *local, xlator_t *this,
- struct iatt *buf, struct iatt *lookup_buf)
+afr_lookup_done (call_frame_t *frame, xlator_t *this)
{
- if (PERMISSION_DIFFERS (buf, lookup_buf)) {
- /* mismatching permissions */
- gf_log (this->name, GF_LOG_INFO,
- "permissions differ for %s ", local->loc.path);
- local->self_heal.do_metadata_self_heal = _gf_true;
- }
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = -1;
+ int op_errno = 0;
+ int read_subvol = 0;
+ unsigned char *readable = NULL;
+ int event = 0;
+ struct afr_reply *replies = NULL;
+ uuid_t read_gfid = {0, };
+ gf_boolean_t locked_entry = _gf_false;
+ gf_boolean_t can_interpret = _gf_true;
- if (OWNERSHIP_DIFFERS (buf, lookup_buf)) {
- /* mismatching permissions */
- local->self_heal.do_metadata_self_heal = _gf_true;
- gf_log (this->name, GF_LOG_INFO,
- "ownership differs for %s ", local->loc.path);
- }
+ priv = this->private;
+ local = frame->local;
+ replies = local->replies;
- if (SIZE_DIFFERS (buf, lookup_buf)
- && IA_ISREG (buf->ia_type)) {
- gf_log (this->name, GF_LOG_INFO,
- "size differs for %s ", local->loc.path);
- local->self_heal.do_data_self_heal = _gf_true;
- }
+ locked_entry = afr_is_entry_possibly_under_txn (local, this);
- if (uuid_compare (buf->ia_gfid, lookup_buf->ia_gfid)) {
- /* mismatching gfid */
- gf_log (this->name, GF_LOG_WARNING,
- "%s: gfid different on subvolume", local->loc.path);
- }
-}
+ readable = alloca0 (priv->child_count);
-static void
-afr_detect_self_heal_by_lookup_status (afr_local_t *local, xlator_t *this,
- gf_boolean_t split_brain)
-{
- GF_ASSERT (local);
- GF_ASSERT (this);
+ afr_inode_read_subvol_get (local->loc.parent, this, readable,
+ NULL, &event);
- if ((local->success_count > 0) && (local->enoent_count > 0)) {
- local->self_heal.do_metadata_self_heal = _gf_true;
- local->self_heal.do_data_self_heal = _gf_true;
- local->self_heal.do_entry_self_heal = _gf_true;
- local->self_heal.do_gfid_self_heal = _gf_true;
- local->self_heal.do_missing_entry_self_heal = _gf_true;
- gf_log(this->name, GF_LOG_INFO,
- "entries are missing in lookup of %s.",
- local->loc.path);
- //If all self-heals are needed no need to check for other rules
- goto out;
- }
-
- if ((local->success_count > 0) && split_brain &&
- IA_ISREG (local->cont.lookup.inode->ia_type)) {
- local->self_heal.do_data_self_heal = _gf_true;
- local->self_heal.do_gfid_self_heal = _gf_true;
- local->self_heal.do_missing_entry_self_heal = _gf_true;
- gf_log (this->name, GF_LOG_WARNING,
- "split brain detected during lookup of %s.",
- local->loc.path);
- }
-
-out:
- return;
+ /* First, check if we have an ESTALE from somewhere,
+ If so, propagate that so that a revalidate can be
+ issued
+ */
+ op_errno = afr_final_errno (frame->local, this->private);
+ local->op_errno = op_errno;
+ if (op_errno == ESTALE) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ goto unwind;
+ }
+
+ read_subvol = -1;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if (locked_entry && replies[i].op_ret == -1 &&
+ replies[i].op_errno == ENOENT) {
+ /* Second, check entry is still
+ "underway" in creation */
+ local->op_ret = -1;
+ local->op_errno = ENOENT;
+ read_subvol = i;
+ goto unwind;
+ }
+
+ if (replies[i].op_ret == -1)
+ continue;
+
+ if (read_subvol == -1 || !readable[read_subvol]) {
+ read_subvol = i;
+ uuid_copy (read_gfid, replies[i].poststat.ia_gfid);
+ local->op_ret = 0;
+ }
+ }
+
+ if (read_subvol == -1)
+ goto unwind;
+ /* We now have a read_subvol, which is readable[] (if there
+ were any). Next we look for GFID mismatches. We don't
+ consider a GFID mismatch as an error if read_subvol is
+ readable[] but the mismatching GFID subvol is not.
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret == -1) {
+ if (priv->child_up[i])
+ can_interpret = _gf_false;
+ continue;
+ }
+
+ if (!uuid_compare (replies[i].poststat.ia_gfid,
+ read_gfid))
+ continue;
+
+ can_interpret = _gf_false;
+
+ if (locked_entry)
+ continue;
+
+ /* Now GFIDs mismatch. It's OK as long as this subvol
+ is not readable[] but read_subvol is */
+ if (readable[read_subvol] && !readable[i])
+ continue;
+
+ /* LOG ERROR */
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto unwind;
+ }
+
+ /* Forth, for the finalized GFID, pick the best subvolume
+ to return stats from.
+ */
+ if (can_interpret) {
+ /* It is safe to call afr_replies_interpret() because we have
+ a response from all the UP subvolumes and all of them resolved
+ to the same GFID
+ */
+ if (afr_replies_interpret (frame, this, local->inode)) {
+ read_subvol = afr_data_subvol_get (local->inode, this,
+ 0, 0);
+ afr_inode_read_subvol_reset (local->inode, this);
+ goto cant_interpret;
+ } else {
+ read_subvol = afr_data_subvol_get (local->inode, this,
+ 0, 0);
+ }
+ } else {
+ cant_interpret:
+ if (read_subvol == -1)
+ dict_del (replies[0].xdata, GF_CONTENT_KEY);
+ else
+ dict_del (replies[read_subvol].xdata, GF_CONTENT_KEY);
+ }
+
+ afr_handle_quota_size (frame, this);
+
+unwind:
+ if (read_subvol == -1)
+ read_subvol = 0;
+
+ AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
+ local->inode, &local->replies[read_subvol].poststat,
+ local->replies[read_subvol].xdata,
+ &local->replies[read_subvol].postparent);
}
-gf_boolean_t
-afr_can_self_heal_proceed (afr_self_heal_t *sh, afr_private_t *priv)
+/*
+ * During a lookup, some errors are more "important" than
+ * others in that they must be given higher priority while
+ * returning to the user.
+ *
+ * The hierarchy is ESTALE > ENOENT > others
+ */
+
+int
+afr_higher_errno (int32_t old_errno, int32_t new_errno)
{
- GF_ASSERT (sh);
- GF_ASSERT (priv);
+ if (old_errno == ENODATA || new_errno == ENODATA)
+ return ENODATA;
+ if (old_errno == ESTALE || new_errno == ESTALE)
+ return ESTALE;
+ if (old_errno == ENOENT || new_errno == ENOENT)
+ return ENOENT;
- return (sh->do_gfid_self_heal
- || sh->do_missing_entry_self_heal
- || (afr_data_self_heal_enabled (priv->data_self_heal) &&
- sh->do_data_self_heal)
- || (priv->metadata_self_heal && sh->do_metadata_self_heal)
- || (priv->entry_self_heal && sh->do_entry_self_heal));
+ return new_errno;
}
-afr_transaction_type
-afr_transaction_type_get (ia_type_t ia_type)
+
+int
+afr_final_errno (afr_local_t *local, afr_private_t *priv)
{
- afr_transaction_type type = AFR_METADATA_TRANSACTION;
+ int i = 0;
+ int op_errno = 0;
+ int tmp_errno = 0;
- GF_ASSERT (ia_type != IA_INVAL);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret == 0)
+ continue;
+ tmp_errno = local->replies[i].op_errno;
+ op_errno = afr_higher_errno (op_errno, tmp_errno);
+ }
- if (IA_ISDIR (ia_type)) {
- type = AFR_ENTRY_TRANSACTION;
- } else if (IA_ISREG (ia_type)) {
- type = AFR_DATA_TRANSACTION;
- }
- return type;
+ return op_errno;
}
-int
-afr_lookup_select_read_child (afr_local_t *local, xlator_t *this,
- int32_t *read_child)
+static int
+get_pathinfo_host (char *pathinfo, char *hostname, size_t size)
{
- ia_type_t ia_type = IA_INVAL;
- int32_t source = -1;
- int ret = -1;
- dict_t **xattrs = NULL;
- int32_t *success_children = NULL;
- afr_transaction_type type = AFR_METADATA_TRANSACTION;
+ char *start = NULL;
+ char *end = NULL;
+ int ret = -1;
+ int i = 0;
- GF_ASSERT (local);
- GF_ASSERT (this);
- GF_ASSERT (local->success_count > 0);
+ if (!pathinfo)
+ goto out;
- success_children = local->cont.lookup.success_children;
- /*We can take the success_children[0] only because we already
- *handle the conflicting children other wise, we could select the
- *read_child based on wrong file type
- */
- ia_type = local->cont.lookup.bufs[success_children[0]].ia_type;
- type = afr_transaction_type_get (ia_type);
- xattrs = local->cont.lookup.xattrs;
- source = afr_lookup_select_read_child_by_txn_type (this, local, xattrs,
- type);
- if (source < 0) {
- gf_log (this->name, GF_LOG_DEBUG, "failed to select source "
- "for %s", local->loc.path);
+ start = strchr (pathinfo, ':');
+ if (!start)
+ goto out;
+ end = strrchr (pathinfo, ':');
+ if (start == end)
goto out;
- }
- gf_log (this->name, GF_LOG_DEBUG, "Source selected as %d for %s",
- source, local->loc.path);
- *read_child = source;
+ memset (hostname, 0, size);
+ i = 0;
+ while (++start != end)
+ hostname[i++] = *start;
ret = 0;
out:
return ret;
}
-static inline gf_boolean_t
-afr_is_transaction_running (afr_local_t *local)
+int
+afr_local_pathinfo (char *pathinfo, gf_boolean_t *local)
{
- GF_ASSERT (local->fop == GF_FOP_LOOKUP);
- return ((local->inodelk_count > 0) || (local->entrylk_count > 0));
-}
-
-void
-afr_launch_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode,
- gf_boolean_t background, ia_type_t ia_type, char *reason,
- void (*gfid_sh_success_cbk) (call_frame_t *sh_frame,
- xlator_t *this),
- int (*unwind) (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno))
-{
- afr_local_t *local = NULL;
- char sh_type_str[256] = {0,};
- char *bg = "";
-
- GF_ASSERT (frame);
- GF_ASSERT (this);
- GF_ASSERT (inode);
- GF_ASSERT (ia_type != IA_INVAL);
-
- local = frame->local;
- local->self_heal.background = background;
- local->self_heal.type = ia_type;
- local->self_heal.unwind = unwind;
- local->self_heal.gfid_sh_success_cbk = gfid_sh_success_cbk;
+ int ret = 0;
+ char pathinfohost[1024] = {0};
+ char localhost[1024] = {0};
+ xlator_t *this = THIS;
- afr_self_heal_type_str_get (&local->self_heal,
- sh_type_str,
- sizeof (sh_type_str));
+ *local = _gf_false;
+ ret = get_pathinfo_host (pathinfo, pathinfohost, sizeof (pathinfohost));
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Invalid pathinfo: %s",
+ pathinfo);
+ goto out;
+ }
- if (background)
- bg = "background";
- gf_log (this->name, GF_LOG_INFO,
- "%s %s self-heal triggered. path: %s, reason: %s", bg,
- sh_type_str, local->loc.path, reason);
+ ret = gethostname (localhost, sizeof (localhost));
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "gethostname() failed, "
+ "reason: %s", strerror (errno));
+ goto out;
+ }
- afr_self_heal (frame, this, inode);
+ if (!strcmp (localhost, pathinfohost))
+ *local = _gf_true;
+out:
+ return ret;
}
-unsigned int
-afr_gfid_missing_count (const char *xlator_name, int32_t *success_children,
- struct iatt *bufs, unsigned int child_count,
- const char *path)
+static int32_t
+afr_local_discovery_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
- unsigned int gfid_miss_count = 0;
- int i = 0;
- struct iatt *child1 = NULL;
+ int ret = 0;
+ char *pathinfo = NULL;
+ gf_boolean_t is_local = _gf_false;
+ afr_private_t *priv = NULL;
+ int32_t child_index = -1;
- for (i = 0; i < child_count; i++) {
- if (success_children[i] == -1)
- break;
- child1 = &bufs[success_children[i]];
- if (uuid_is_null (child1->ia_gfid)) {
- gf_log (xlator_name, GF_LOG_DEBUG, "%s: gfid is null"
- " on subvolume %d", path, success_children[i]);
- gfid_miss_count++;
- }
+ if (op_ret != 0) {
+ goto out;
}
- return gfid_miss_count;
-}
-
-static int
-afr_lookup_gfid_missing_count (afr_local_t *local, xlator_t *this)
-{
- int32_t *success_children = NULL;
- afr_private_t *priv = NULL;
- struct iatt *bufs = NULL;
- int miss_count = 0;
+ priv = this->private;
+ child_index = (int32_t)(long)cookie;
- priv = this->private;
- bufs = local->cont.lookup.bufs;
- success_children = local->cont.lookup.success_children;
+ ret = dict_get_str (dict, GF_XATTR_PATHINFO_KEY, &pathinfo);
+ if (ret != 0) {
+ goto out;
+ }
- miss_count = afr_gfid_missing_count (this->name, success_children,
- bufs, priv->child_count,
- local->loc.path);
- return miss_count;
-}
+ ret = afr_local_pathinfo (pathinfo, &is_local);
+ if (ret) {
+ goto out;
+ }
-gf_boolean_t
-afr_conflicting_iattrs (struct iatt *bufs, int32_t *success_children,
- unsigned int child_count, const char *path,
- const char *xlator_name)
-{
- gf_boolean_t conflicting = _gf_false;
- int i = 0;
- struct iatt *child1 = NULL;
- struct iatt *child2 = NULL;
- uuid_t *gfid = NULL;
-
- for (i = 0; i < child_count; i++) {
- if (success_children[i] == -1)
- break;
- child1 = &bufs[success_children[i]];
- if ((!gfid) && (!uuid_is_null (child1->ia_gfid)))
- gfid = &child1->ia_gfid;
-
- if (i == 0)
- continue;
-
- child2 = &bufs[success_children[i-1]];
- if (FILETYPE_DIFFERS (child1, child2)) {
- gf_log (xlator_name, GF_LOG_WARNING, "%s: filetype "
- "differs on subvolumes (%d, %d)", path,
- success_children[i-1], success_children[i]);
- conflicting = _gf_true;
- goto out;
- }
- if (!gfid || uuid_is_null (child1->ia_gfid))
- continue;
- if (uuid_compare (*gfid, child1->ia_gfid)) {
- gf_log (xlator_name, GF_LOG_WARNING, "%s: gfid differs"
- " on subvolume %d", path, success_children[i]);
- conflicting = _gf_true;
- goto out;
- }
+ /*
+ * Note that one local subvolume will override another here. The only
+ * way to avoid that would be to retain extra information about whether
+ * the previous read_child is local, and it's just not worth it. Even
+ * the slowest local subvolume is far preferable to a remote one.
+ */
+ if (is_local) {
+ gf_log (this->name, GF_LOG_INFO,
+ "selecting local read_child %s",
+ priv->children[child_index]->name);
+ priv->read_child = child_index;
}
out:
- return conflicting;
+ STACK_DESTROY(frame->root);
+ return 0;
}
-/* afr_update_gfid_from_iatts: This function should be called only if the
- * iatts are not conflicting.
- */
-void
-afr_update_gfid_from_iatts (uuid_t uuid, struct iatt *bufs,
- int32_t *success_children, unsigned int child_count)
+static void
+afr_attempt_local_discovery (xlator_t *this, int32_t child_index)
{
- uuid_t *gfid = NULL;
- int i = 0;
- int child = 0;
+ call_frame_t *newframe = NULL;
+ loc_t tmploc = {0,};
+ afr_private_t *priv = this->private;
- for (i = 0; i < child_count; i++) {
- child = success_children[i];
- if (child == -1)
- break;
- if ((!gfid) && (!uuid_is_null (bufs[child].ia_gfid))) {
- gfid = &bufs[child].ia_gfid;
- } else if (gfid && (!uuid_is_null (bufs[child].ia_gfid))) {
- if (uuid_compare (*gfid, bufs[child].ia_gfid)) {
- GF_ASSERT (0);
- goto out;
- }
- }
+ newframe = create_frame(this,this->ctx->pool);
+ if (!newframe) {
+ return;
}
- if (gfid && (!uuid_is_null (*gfid)))
- uuid_copy (uuid, *gfid);
-out:
- return;
-}
-static gf_boolean_t
-afr_lookup_conflicting_entries (afr_local_t *local, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- gf_boolean_t conflict = _gf_false;
-
- priv = this->private;
- conflict = afr_conflicting_iattrs (local->cont.lookup.bufs,
- local->cont.lookup.success_children,
- priv->child_count, local->loc.path,
- this->name);
- return conflict;
+ tmploc.gfid[sizeof(tmploc.gfid)-1] = 1;
+ STACK_WIND_COOKIE (newframe, afr_local_discovery_cbk,
+ (void *)(long)child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->getxattr,
+ &tmploc, GF_XATTR_PATHINFO_KEY, NULL);
}
-gf_boolean_t
-afr_open_only_data_self_heal (char *data_self_heal)
-{
- return !strcmp (data_self_heal, "open");
-}
-gf_boolean_t
-afr_data_self_heal_enabled (char *data_self_heal)
+int
+afr_lookup_selfheal_wrap (void *opaque)
{
- gf_boolean_t enabled = _gf_false;
+ call_frame_t *frame = opaque;
+ afr_local_t *local = NULL;
+ xlator_t *this = NULL;
+ inode_t *inode = NULL;
- if (gf_string2boolean (data_self_heal, &enabled) == -1) {
- enabled = !strcmp (data_self_heal, "open");
- GF_ASSERT (enabled);
- }
+ local = frame->local;
+ this = frame->this;
- return enabled;
-}
+ afr_selfheal_name (frame->this, local->loc.pargfid, local->loc.name);
-static void
-afr_lookup_set_self_heal_params (afr_local_t *local, xlator_t *this)
-{
- int i = 0;
- struct iatt *bufs = NULL;
- dict_t **xattr = NULL;
- afr_private_t *priv = NULL;
- int32_t child1 = -1;
- int32_t child2 = -1;
- afr_self_heal_t *sh = NULL;
- gf_boolean_t split_brain = _gf_false;
+ afr_replies_wipe (local, this->private);
- priv = this->private;
- sh = &local->self_heal;
-
- split_brain = afr_is_split_brain (this, local->cont.lookup.inode);
- afr_detect_self_heal_by_lookup_status (local, this, split_brain);
+ inode = afr_selfheal_unlocked_lookup_on (frame, local->loc.parent,
+ local->loc.name, local->replies,
+ local->child_up);
+ if (inode)
+ inode_unref (inode);
+ afr_lookup_done (frame, this);
- if (afr_lookup_gfid_missing_count (local, this))
- local->self_heal.do_gfid_self_heal = _gf_true;
+ return 0;
+}
- if (_gf_true == afr_lookup_conflicting_entries (local, this))
- local->self_heal.do_missing_entry_self_heal = _gf_true;
- else
- afr_update_gfid_from_iatts (local->self_heal.sh_gfid_req,
- local->cont.lookup.bufs,
- local->cont.lookup.success_children,
- priv->child_count);
- bufs = local->cont.lookup.bufs;
- for (i = 1; i < local->success_count; i++) {
- child1 = local->cont.lookup.success_children[i-1];
- child2 = local->cont.lookup.success_children[i];
- afr_detect_self_heal_by_iatt (local, this,
- &bufs[child1], &bufs[child2]);
- }
-
- xattr = local->cont.lookup.xattrs;
- for (i = 0; i < local->success_count; i++) {
- child1 = local->cont.lookup.success_children[i];
- afr_lookup_set_self_heal_params_by_xattr (local, this,
- xattr[child1]);
- }
- if (afr_open_only_data_self_heal (priv->data_self_heal)
- && !split_brain)
- sh->do_data_self_heal = _gf_false;
+int
+afr_lookup_entry_heal (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ call_frame_t *heal = NULL;
+ int i = 0, first = -1;
+ gf_boolean_t need_heal = _gf_false;
+ struct afr_reply *replies = NULL;
+ int ret = 0;
+
+ local = frame->local;
+ replies = local->replies;
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if (first == -1) {
+ first = i;
+ continue;
+ }
+
+ if (replies[i].op_ret != replies[first].op_ret) {
+ need_heal = _gf_true;
+ break;
+ }
+
+ if (uuid_compare (replies[i].poststat.ia_gfid,
+ replies[first].poststat.ia_gfid)) {
+ need_heal = _gf_true;
+ break;
+ }
+ }
+
+ if (need_heal) {
+ heal = copy_frame (frame);
+ if (heal)
+ heal->root->pid = -1;
+ ret = synctask_new (this->ctx->env, afr_lookup_selfheal_wrap,
+ afr_refresh_selfheal_done, heal, frame);
+ if (ret)
+ goto lookup_done;
+ } else {
+ lookup_done:
+ afr_lookup_done (frame, this);
+ }
+
+ return ret;
}
+
int
-afr_self_heal_lookup_unwind (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+afr_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode, struct iatt *buf,
+ dict_t *xdata, struct iatt *postparent)
{
- afr_local_t *local = NULL;
+ afr_local_t * local = NULL;
+ int call_count = -1;
+ int child_index = -1;
- local = frame->local;
+ child_index = (long) cookie;
- if (op_ret == -1) {
- local->op_ret = -1;
- if (afr_error_more_important (local->op_errno, op_errno))
- local->op_errno = op_errno;
+ local = frame->local;
- goto out;
- } else {
- local->op_ret = 0;
- }
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+ if (op_ret != -1) {
+ local->replies[child_index].poststat = *buf;
+ local->replies[child_index].postparent = *postparent;
+ if (xdata)
+ local->replies[child_index].xdata = dict_ref (xdata);
+ }
- afr_lookup_done_success_action (frame, this, _gf_true);
-out:
- AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
- local->cont.lookup.inode, &local->cont.lookup.buf,
- local->cont.lookup.xattr,
- &local->cont.lookup.postparent);
+ call_count = afr_frame_return (frame);
+ if (call_count == 0) {
+ afr_lookup_entry_heal (frame, this);
+ }
- return 0;
+ return 0;
}
-//TODO: At the moment only lookup needs this, so not doing any checks, in the
-// future we will have to do fop specific operations
-void
-afr_post_gfid_sh_success (call_frame_t *sh_frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_local_t *sh_local = NULL;
- afr_private_t *priv = NULL;
- afr_self_heal_t *sh = NULL;
- int i = 0;
- struct iatt *lookup_bufs = NULL;
- struct iatt *lookup_parentbufs = NULL;
-
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
- local = sh->orig_frame->local;
- lookup_bufs = local->cont.lookup.bufs;
- lookup_parentbufs = local->cont.lookup.postparents;
- priv = this->private;
-
- memcpy (lookup_bufs, sh->buf, priv->child_count * sizeof (*sh->buf));
- memcpy (lookup_parentbufs, sh->parentbufs,
- priv->child_count * sizeof (*sh->parentbufs));
-
- afr_reset_xattr (local->cont.lookup.xattrs, priv->child_count);
- if (local->cont.lookup.xattr) {
- dict_unref (local->cont.lookup.xattr);
- local->cont.lookup.xattr = NULL;
- }
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i])
- local->cont.lookup.xattrs[i] = dict_ref (sh->xattr[i]);
- }
-
- afr_reset_children (local->cont.lookup.success_children,
- priv->child_count);
- afr_children_copy (local->cont.lookup.success_children,
- sh->fresh_children, priv->child_count);
-}
static void
-afr_lookup_perform_self_heal (call_frame_t *frame, xlator_t *this,
- gf_boolean_t *sh_launched)
+afr_discover_done (call_frame_t *frame, xlator_t *this)
{
- unsigned int up_count = 0;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- char *reason = NULL;
-
- GF_ASSERT (sh_launched);
- *sh_launched = _gf_false;
- priv = this->private;
- local = frame->local;
-
- up_count = afr_up_children_count (local->child_up, priv->child_count);
- if (up_count == 1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Only 1 child up - do not attempt to detect self heal");
- goto out;
- }
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = -1;
+ int op_errno = 0;
+ int read_subvol = 0;
- afr_lookup_set_self_heal_params (local, this);
- if (afr_can_self_heal_proceed (&local->self_heal, priv)) {
- if (afr_is_transaction_running (local))
- goto out;
+ priv = this->private;
+ local = frame->local;
- reason = "lookup detected pending operations";
- afr_launch_self_heal (frame, this, local->cont.lookup.inode,
- _gf_true, local->cont.lookup.buf.ia_type,
- reason, afr_post_gfid_sh_success,
- afr_self_heal_lookup_unwind);
- *sh_launched = _gf_true;
- }
-out:
- return;
-}
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret == 0)
+ local->op_ret = 0;
+ }
-void
-afr_get_fresh_children (int32_t *success_children, int32_t *sources,
- int32_t *fresh_children, unsigned int child_count)
-{
- unsigned int i = 0;
- unsigned int j = 0;
+ op_errno = afr_final_errno (frame->local, this->private);
- GF_ASSERT (success_children);
- GF_ASSERT (sources);
- GF_ASSERT (fresh_children);
+ if (local->op_ret < 0) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ goto unwind;
+ }
- afr_reset_children (fresh_children, child_count);
- for (i = 0; i < child_count; i++) {
- if (success_children[i] == -1)
- break;
- if (afr_is_read_child (success_children, sources, child_count,
- success_children[i])) {
- fresh_children[j] = success_children[i];
- j++;
- }
- }
-}
+ afr_replies_interpret (frame, this, local->inode);
-static int
-afr_lookup_set_read_ctx (afr_local_t *local, xlator_t *this, int32_t read_child)
-{
- afr_private_t *priv = NULL;
+ read_subvol = afr_data_subvol_get (local->inode, this, 0, 0);
+ if (read_subvol == -1) {
+ gf_log (this->name, GF_LOG_WARNING, "no read subvols for %s",
+ local->loc.path);
- GF_ASSERT (read_child >= 0);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid ||
+ local->replies[i].op_ret == -1)
+ continue;
+ read_subvol = i;
+ break;
+ }
+ }
- priv = this->private;
- afr_get_fresh_children (local->cont.lookup.success_children,
- local->cont.lookup.sources,
- local->fresh_children, priv->child_count);
- afr_inode_set_read_ctx (this, local->cont.lookup.inode, read_child,
- local->fresh_children);
+unwind:
+ if (read_subvol == -1)
+ read_subvol = 0;
- return 0;
+ AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
+ local->inode, &local->replies[read_subvol].poststat,
+ local->replies[read_subvol].xdata,
+ &local->replies[read_subvol].postparent);
}
-int
-afr_lookup_done_success_action (call_frame_t *frame, xlator_t *this,
- gf_boolean_t fail_conflict)
-{
- int32_t read_child = -1;
- int32_t ret = -1;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- local = frame->local;
- priv = this->private;
-
- if (local->loc.parent == NULL)
- fail_conflict = _gf_true;
-
- if (afr_conflicting_iattrs (local->cont.lookup.bufs,
- local->cont.lookup.success_children,
- priv->child_count, local->loc.path,
- this->name)) {
- if (fail_conflict == _gf_false)
- ret = 0;
- goto out;
- }
- if (!afr_is_transaction_running (local)) {
- ret = afr_lookup_select_read_child (local, this, &read_child);
- if (ret)
- goto out;
-
- ret = afr_lookup_set_read_ctx (local, this, read_child);
- if (ret)
- goto out;
- }
-
- ret = afr_lookup_build_response_params (local, this);
- if (ret)
- goto out;
- if (afr_is_fresh_lookup (&local->loc, this)) {
- afr_update_loc_gfids (&local->loc,
- &local->cont.lookup.buf,
- &local->cont.lookup.postparent);
- }
-
- ret = 0;
-out:
- if (ret) {
- local->op_ret = -1;
- local->op_errno = EIO;
- }
- return ret;
-}
-
-static void
-afr_lookup_done (call_frame_t *frame, xlator_t *this)
+int
+afr_discover_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode, struct iatt *buf,
+ dict_t *xdata, struct iatt *postparent)
{
- int unwind = 1;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int ret = -1;
- gf_boolean_t sh_launched = _gf_false;
- int gfid_miss_count = 0;
- int enotconn_count = 0;
- int up_children_count = 0;
+ afr_local_t * local = NULL;
+ int call_count = -1;
+ int child_index = -1;
- priv = this->private;
- local = frame->local;
+ child_index = (long) cookie;
- if (local->op_ret < 0)
- goto unwind;
- gfid_miss_count = afr_lookup_gfid_missing_count (local, this);
- up_children_count = afr_up_children_count (local->child_up,
- priv->child_count);
- enotconn_count = priv->child_count - up_children_count;
- if ((gfid_miss_count == local->success_count) &&
- (enotconn_count > 0)) {
- local->op_ret = -1;
- local->op_errno = EIO;
- gf_log (this->name, GF_LOG_ERROR, "Failing lookup for %s, "
- "LOOKUP on a file without gfid is not allowed when "
- "some of the children are down", local->loc.path);
- goto unwind;
- }
+ local = frame->local;
- ret = afr_lookup_done_success_action (frame, this, _gf_false);
- if (ret)
- goto unwind;
- uuid_copy (local->self_heal.sh_gfid_req, local->cont.lookup.gfid_req);
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+ if (op_ret != -1) {
+ local->replies[child_index].poststat = *buf;
+ local->replies[child_index].postparent = *postparent;
+ if (xdata)
+ local->replies[child_index].xdata = dict_ref (xdata);
+ }
- afr_lookup_perform_self_heal (frame, this, &sh_launched);
- if (sh_launched) {
- unwind = 0;
- goto unwind;
- }
+ if (local->do_discovery && (op_ret == 0))
+ afr_attempt_local_discovery (this, child_index);
- unwind:
- if (unwind) {
- AFR_STACK_UNWIND (lookup, frame, local->op_ret,
- local->op_errno, local->cont.lookup.inode,
- &local->cont.lookup.buf,
- local->cont.lookup.xattr,
- &local->cont.lookup.postparent);
+ call_count = afr_frame_return (frame);
+ if (call_count == 0) {
+ afr_discover_done (frame, this);
}
+
+ return 0;
}
-/*
- * During a lookup, some errors are more "important" than
- * others in that they must be given higher priority while
- * returning to the user.
- *
- * The hierarchy is ESTALE > ENOENT > others
- *
- */
-gf_boolean_t
-afr_error_more_important (int32_t old_errno, int32_t new_errno)
+int
+afr_discover_do (call_frame_t *frame, xlator_t *this, int err)
{
- gf_boolean_t ret = _gf_true;
+ int ret = 0;
+ int i = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
- /* Nothing should ever overwrite ESTALE */
- if (old_errno == ESTALE)
- ret = _gf_false;
+ local = frame->local;
+ priv = this->private;
- /* Nothing should overwrite ENOENT, except ESTALE */
- else if ((old_errno == ENOENT) && (new_errno != ESTALE))
- ret = _gf_false;
+ if (err) {
+ local->op_errno = -err;
+ ret = -1;
+ goto out;
+ }
- return ret;
-}
+ call_count = local->call_count = AFR_COUNT (local->child_up,
+ priv->child_count);
-int32_t
-afr_resultant_errno_get (int32_t *children,
- int *child_errno, unsigned int child_count)
-{
- int i = 0;
- int32_t op_errno = 0;
- int child = 0;
+ ret = afr_lookup_xattr_req_prepare (local, this, local->xattr_req,
+ &local->loc);
+ if (ret) {
+ local->op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
- for (i = 0; i < child_count; i++) {
- if (children) {
- child = children[i];
- if (child == -1)
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_discover_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->lookup,
+ &local->loc, local->xattr_req);
+ if (!--call_count)
break;
- } else {
- child = i;
}
- if (afr_error_more_important (op_errno, child_errno[child]))
- op_errno = child_errno[child];
}
- return op_errno;
+
+ return 0;
+out:
+ AFR_STACK_UNWIND (lookup, frame, -1, local->op_errno, 0, 0, 0, 0);
+ return 0;
}
-static void
-afr_lookup_handle_error (afr_local_t *local, int32_t op_ret, int32_t op_errno)
+
+int
+afr_discover (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
{
- GF_ASSERT (local);
- if (op_errno == ENOENT)
- local->enoent_count++;
+ int op_errno = ENOMEM;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int event = 0;
- if (afr_error_more_important (local->op_errno, op_errno))
- local->op_errno = op_errno;
+ priv = this->private;
- if (local->op_errno == ESTALE) {
- local->op_ret = -1;
- }
-}
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
-static void
-afr_set_root_inode_on_first_lookup (afr_local_t *local, xlator_t *this,
- inode_t *inode)
-{
- afr_private_t *priv = NULL;
- GF_ASSERT (inode);
-
- if (inode->ino != 1)
- goto out;
- if (!afr_is_fresh_lookup (&local->loc, this))
+ if (!local->call_count) {
+ op_errno = ENOTCONN;
goto out;
- priv = this->private;
- if ((priv->first_lookup)) {
- gf_log (this->name, GF_LOG_INFO, "added root inode");
- priv->root_inode = inode_ref (inode);
- priv->first_lookup = 0;
}
-out:
- return;
-}
-
-static void
-afr_lookup_cache_args (afr_local_t *local, int child_index, dict_t *xattr,
- struct iatt *buf, struct iatt *postparent)
-{
- GF_ASSERT (child_index >= 0);
- local->cont.lookup.xattrs[child_index] = dict_ref (xattr);
- local->cont.lookup.postparents[child_index] = *postparent;
- local->cont.lookup.bufs[child_index] = *buf;
-}
-static void
-afr_lookup_handle_first_success (afr_local_t *local, xlator_t *this,
- inode_t *inode, struct iatt *buf)
-{
- local->cont.lookup.inode = inode_ref (inode);
- local->cont.lookup.buf = *buf;
- afr_set_root_inode_on_first_lookup (local, this, inode);
-}
+ if (__is_root_gfid (loc->inode->gfid)) {
+ if (!this->itable)
+ this->itable = loc->inode->table;
+ if (!priv->root_inode)
+ priv->root_inode = inode_ref (loc->inode);
-static void
-afr_lookup_handle_success (afr_local_t *local, xlator_t *this, int32_t child_index,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *xattr,
- struct iatt *postparent)
-{
- if (local->success_count == 0) {
- if (local->op_errno != ESTALE) {
- local->op_ret = op_ret;
- local->op_errno = 0;
+ if (priv->choose_local && !priv->did_discovery) {
+ /* Logic to detect which subvolumes of AFR are
+ local, in order to prefer them for reads
+ */
+ local->do_discovery = _gf_true;
+ priv->did_discovery = _gf_true;
}
- afr_lookup_handle_first_success (local, this, inode, buf);
- }
- afr_lookup_update_lk_counts (local, this,
- child_index, xattr);
+ }
- afr_lookup_cache_args (local, child_index, xattr,
- buf, postparent);
- local->cont.lookup.success_children[local->success_count] = child_index;
- local->success_count++;
-}
+ local->op = GF_FOP_LOOKUP;
-int
-afr_lookup_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf, dict_t *xattr,
- struct iatt *postparent)
-{
- afr_local_t * local = NULL;
- int call_count = -1;
- int child_index = -1;
+ loc_copy (&local->loc, loc);
- child_index = (long) cookie;
+ local->inode = inode_ref (loc->inode);
- LOCK (&frame->lock);
- {
- local = frame->local;
+ if (xattr_req)
+ /* If xattr_req was null, afr_lookup_xattr_req_prepare() will
+ allocate one for us */
+ local->xattr_req = dict_ref (xattr_req);
- if (op_ret == -1) {
- afr_lookup_handle_error (local, op_ret, op_errno);
- goto unlock;
- }
- afr_lookup_handle_success (local, this, child_index, op_ret,
- op_errno, inode, buf, xattr,
- postparent);
+ if (uuid_is_null (loc->inode->gfid)) {
+ afr_discover_do (frame, this, 0);
+ return 0;
+ }
- }
-unlock:
- UNLOCK (&frame->lock);
+ afr_read_subvol_get (loc->inode, this, NULL, &event,
+ AFR_DATA_TRANSACTION);
- call_count = afr_frame_return (frame);
- if (call_count == 0) {
- afr_lookup_done (frame, this);
- }
+ if (event != local->event_generation)
+ afr_inode_refresh (frame, this, loc->inode, afr_discover_do);
+ else
+ afr_discover_do (frame, this, 0);
- return 0;
+ return 0;
+out:
+ AFR_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ return 0;
}
+
int
-afr_lookup_cont_init (afr_local_t *local, unsigned int child_count)
+afr_lookup_do (call_frame_t *frame, xlator_t *this, int err)
{
- int ret = -ENOMEM;
- struct iatt *iatts = NULL;
- int32_t *success_children = NULL;
+ int ret = 0;
+ int i = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
- GF_ASSERT (local);
- local->cont.lookup.xattrs = GF_CALLOC (child_count,
- sizeof (*local->cont.lookup.xattr),
- gf_afr_mt_dict_t);
- if (NULL == local->cont.lookup.xattrs)
- goto out;
+ local = frame->local;
+ priv = this->private;
- iatts = GF_CALLOC (child_count, sizeof (*iatts), gf_afr_mt_iatt);
- if (NULL == iatts)
- goto out;
- local->cont.lookup.postparents = iatts;
-
- iatts = GF_CALLOC (child_count, sizeof (*iatts), gf_afr_mt_iatt);
- if (NULL == iatts)
- goto out;
- local->cont.lookup.bufs = iatts;
+ if (err < 0) {
+ local->op_errno = -err;
+ ret = -1;
+ goto out;
+ }
- success_children = afr_children_create (child_count);
- if (NULL == success_children)
- goto out;
- local->cont.lookup.success_children = success_children;
+ call_count = local->call_count = AFR_COUNT (local->child_up,
+ priv->child_count);
- local->fresh_children = afr_children_create (child_count);
- if (NULL == local->fresh_children)
+ ret = afr_lookup_xattr_req_prepare (local, this, local->xattr_req,
+ &local->loc);
+ if (ret) {
+ local->op_errno = -ret;
+ ret = -1;
goto out;
+ }
- ret = 0;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_lookup_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->lookup,
+ &local->loc, local->xattr_req);
+ if (!--call_count)
+ break;
+ }
+ }
+ return 0;
out:
- return ret;
+ AFR_STACK_UNWIND (lookup, frame, -1, local->op_errno, 0, 0, 0, 0);
+ return 0;
}
-int
-afr_lookup (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr_req)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- void *gfid_req = NULL;
- int ret = -1;
- int i = 0;
- int call_count = 0;
- uint64_t ctx = 0;
- int32_t op_errno = 0;
+/*
+ * afr_lookup()
+ *
+ * The goal here is to figure out what the element getting looked up is.
+ * i.e what is the GFID, inode type and a conservative estimate of the
+ * inode attributes are.
+ *
+ * As we lookup, operations may be underway on the entry name and the
+ * inode. In lookup() we are primarily concerned only with the entry
+ * operations. If the entry is getting unlinked or renamed, we detect
+ * what operation is underway by querying for on-going transactions and
+ * pending self-healing on the entry through xdata.
+ *
+ * If the entry is a file/dir, it may need self-heal and/or in a
+ * split-brain condition. Lookup is not the place to worry about these
+ * conditions. Outcast marking will naturally handle them in the read
+ * paths.
+ *
+ * Here is a brief goal of what we are trying to achieve:
+ *
+ * - LOOKUP on all subvolumes concurrently, querying on-going transaction
+ * and pending self-heal info from the servers.
+ *
+ * - If all servers reply the same inode type and GFID, the overall call
+ * MUST be a success.
+ *
+ * - If inode types or GFIDs mismatch, and there IS either an on-going
+ * transaction or pending self-heal, inspect what the nature of the
+ * transaction or pending heal is, and select the appropriate subvolume's
+ * reply as the winner.
+ *
+ * - If inode types or GFIDs mismatch, and there are no on-going transactions
+ * or pending self-heal on the entry name on any of the servers, fail the
+ * lookup with EIO. Something has gone wrong beyond reasonable action.
+ */
- priv = this->private;
+int
+afr_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
+{
+ afr_local_t *local = NULL;
+ int32_t op_errno = 0;
+ int event = 0;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ if (!loc->parent) {
+ afr_discover (frame, this, loc, xattr_req);
+ return 0;
+ }
- local->op_ret = -1;
+ if (__is_root_gfid (loc->parent->gfid)) {
+ if (!strcmp (loc->name, GF_REPLICATE_TRASH_DIR)) {
+ op_errno = EPERM;
+ goto out;
+ }
+ }
- frame->local = local;
- local->fop = GF_FOP_LOOKUP;
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
- if (!strcmp (loc->path, "/" GF_REPLICATE_TRASH_DIR)) {
- op_errno = ENOENT;
+ if (!local->call_count) {
+ op_errno = ENOTCONN;
goto out;
}
+ local->op = GF_FOP_LOOKUP;
+
loc_copy (&local->loc, loc);
- ret = inode_ctx_get (loc->inode, this, &ctx);
- if (ret == 0) {
- /* lookup is a revalidate */
+ local->inode = inode_ref (loc->inode);
- local->read_child_index = afr_inode_get_read_ctx (this,
- loc->inode,
- NULL);
- } else {
- LOCK (&priv->read_child_lock);
- {
- local->read_child_index = (++priv->read_child_rr)
- % (priv->child_count);
- }
- UNLOCK (&priv->read_child_lock);
- }
+ if (xattr_req)
+ /* If xattr_req was null, afr_lookup_xattr_req_prepare() will
+ allocate one for us */
+ local->xattr_req = dict_ref (xattr_req);
- if (loc->parent)
- local->cont.lookup.parent_ino = loc->parent->ino;
+ afr_read_subvol_get (loc->parent, this, NULL, &event,
+ AFR_DATA_TRANSACTION);
- local->child_up = memdup (priv->child_up,
- sizeof (*local->child_up) * priv->child_count);
- if (NULL == local->child_up) {
- op_errno = ENOMEM;
- goto out;
- }
+ if (event != local->event_generation)
+ afr_inode_refresh (frame, this, loc->parent, afr_lookup_do);
+ else
+ afr_lookup_do (frame, this, 0);
- ret = afr_lookup_cont_init (local, priv->child_count);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ return 0;
+out:
+ AFR_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
- local->call_count = afr_up_children_count (local->child_up,
- priv->child_count);
- call_count = local->call_count;
+ return 0;
+}
- if (local->call_count == 0) {
- ret = -1;
- op_errno = ENOTCONN;
- goto out;
- }
- /* By default assume ENOTCONN. On success it will be set to 0. */
- local->op_errno = ENOTCONN;
+/* {{{ open */
- if (xattr_req == NULL)
- local->xattr_req = dict_new ();
- else
- local->xattr_req = dict_ref (xattr_req);
+afr_fd_ctx_t *
+__afr_fd_ctx_get (fd_t *fd, xlator_t *this)
+{
+ uint64_t ctx = 0;
+ int ret = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
- afr_xattr_req_prepare (this, local->xattr_req, loc->path);
- ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_INODELK_COUNT, 0);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: Unable to set dict value for %s",
- loc->path, GLUSTERFS_INODELK_COUNT);
- }
- ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_ENTRYLK_COUNT, 0);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: Unable to set dict value for %s",
- loc->path, GLUSTERFS_ENTRYLK_COUNT);
- }
+ ret = __fd_ctx_get (fd, this, &ctx);
- ret = dict_get_ptr (local->xattr_req, "gfid-req", &gfid_req);
- if (ret) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get the gfid from dict");
- } else {
- uuid_copy (local->cont.lookup.gfid_req, gfid_req);
- if (local->loc.parent)
- dict_del (local->xattr_req, "gfid-req");
- }
+ if (ret < 0) {
+ ret = __afr_fd_ctx_set (this, fd);
+ if (ret < 0)
+ goto out;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_lookup_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->lookup,
- loc, local->xattr_req);
- if (!--call_count)
- break;
- }
+ ret = __fd_ctx_get (fd, this, &ctx);
+ if (ret < 0)
+ goto out;
}
- ret = 0;
+ fd_ctx = (afr_fd_ctx_t *)(long) ctx;
out:
- if (ret == -1)
- AFR_STACK_UNWIND (lookup, frame, -1, op_errno,
- NULL, NULL, NULL, NULL);
-
- return 0;
+ return fd_ctx;
}
-/* {{{ open */
+afr_fd_ctx_t *
+afr_fd_ctx_get (fd_t *fd, xlator_t *this)
+{
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ LOCK(&fd->lock);
+ {
+ fd_ctx = __afr_fd_ctx_get (fd, this);
+ }
+ UNLOCK(&fd->lock);
+
+ return fd_ctx;
+}
+
int
-afr_fd_ctx_set (xlator_t *this, fd_t *fd)
+__afr_fd_ctx_set (xlator_t *this, fd_t *fd)
{
afr_private_t * priv = NULL;
int ret = -1;
uint64_t ctx = 0;
afr_fd_ctx_t * fd_ctx = NULL;
+ int i = 0;
VALIDATE_OR_GOTO (this->private, out);
VALIDATE_OR_GOTO (fd, out);
priv = this->private;
- LOCK (&fd->lock);
- {
- ret = __fd_ctx_get (fd, this, &ctx);
-
- if (ret == 0)
- goto unlock;
-
- fd_ctx = GF_CALLOC (1, sizeof (afr_fd_ctx_t),
- gf_afr_mt_afr_fd_ctx_t);
- if (!fd_ctx) {
- ret = -ENOMEM;
- goto unlock;
- }
+ ret = __fd_ctx_get (fd, this, &ctx);
- fd_ctx->pre_op_done = GF_CALLOC (sizeof (*fd_ctx->pre_op_done),
- priv->child_count,
- gf_afr_mt_char);
- if (!fd_ctx->pre_op_done) {
- ret = -ENOMEM;
- goto unlock;
- }
+ if (ret == 0)
+ goto out;
- fd_ctx->pre_op_piggyback = GF_CALLOC (sizeof (*fd_ctx->pre_op_piggyback),
- priv->child_count,
- gf_afr_mt_char);
- if (!fd_ctx->pre_op_piggyback) {
- ret = -ENOMEM;
- goto unlock;
- }
+ fd_ctx = GF_CALLOC (1, sizeof (afr_fd_ctx_t),
+ gf_afr_mt_afr_fd_ctx_t);
+ if (!fd_ctx) {
+ ret = -ENOMEM;
+ goto out;
+ }
- fd_ctx->opened_on = GF_CALLOC (sizeof (*fd_ctx->opened_on),
- priv->child_count,
- gf_afr_mt_int32_t);
- if (!fd_ctx->opened_on) {
- ret = -ENOMEM;
- goto unlock;
- }
+ for (i = 0; i < AFR_NUM_CHANGE_LOGS; i++) {
+ fd_ctx->pre_op_done[i] = GF_CALLOC (sizeof (*fd_ctx->pre_op_done[i]),
+ priv->child_count,
+ gf_afr_mt_int32_t);
+ if (!fd_ctx->pre_op_done[i]) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+
+ fd_ctx->opened_on = GF_CALLOC (sizeof (*fd_ctx->opened_on),
+ priv->child_count,
+ gf_afr_mt_int32_t);
+ if (!fd_ctx->opened_on) {
+ ret = -ENOMEM;
+ goto out;
+ }
- fd_ctx->lock_piggyback = GF_CALLOC (sizeof (*fd_ctx->lock_piggyback),
- priv->child_count,
- gf_afr_mt_char);
- if (!fd_ctx->lock_piggyback) {
- ret = -ENOMEM;
- goto unlock;
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (fd_is_anonymous (fd))
+ fd_ctx->opened_on[i] = AFR_FD_OPENED;
+ else
+ fd_ctx->opened_on[i] = AFR_FD_NOT_OPENED;
+ }
- fd_ctx->lock_acquired = GF_CALLOC (sizeof (*fd_ctx->lock_acquired),
- priv->child_count,
- gf_afr_mt_char);
- if (!fd_ctx->lock_acquired) {
- ret = -ENOMEM;
- goto unlock;
- }
+ fd_ctx->lock_piggyback = GF_CALLOC (sizeof (*fd_ctx->lock_piggyback),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!fd_ctx->lock_piggyback) {
+ ret = -ENOMEM;
+ goto out;
+ }
- fd_ctx->up_count = priv->up_count;
- fd_ctx->down_count = priv->down_count;
+ fd_ctx->lock_acquired = GF_CALLOC (sizeof (*fd_ctx->lock_acquired),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!fd_ctx->lock_acquired) {
+ ret = -ENOMEM;
+ goto out;
+ }
- fd_ctx->locked_on = GF_CALLOC (sizeof (*fd_ctx->locked_on),
- priv->child_count,
- gf_afr_mt_char);
- if (!fd_ctx->locked_on) {
- ret = -ENOMEM;
- goto unlock;
- }
+ pthread_mutex_init (&fd_ctx->delay_lock, NULL);
- INIT_LIST_HEAD (&fd_ctx->paused_calls);
- INIT_LIST_HEAD (&fd_ctx->entries);
+ INIT_LIST_HEAD (&fd_ctx->eager_locked);
- ret = __fd_ctx_set (fd, this, (uint64_t)(long) fd_ctx);
- if (ret)
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set fd ctx (%p)", fd);
- }
-unlock:
- UNLOCK (&fd->lock);
+ ret = __fd_ctx_set (fd, this, (uint64_t)(long) fd_ctx);
+ if (ret)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to set fd ctx (%p)", fd);
out:
return ret;
}
-/* {{{ flush */
int
-afr_flush_unwind (call_frame_t *frame, xlator_t *this)
+afr_fd_ctx_set (xlator_t *this, fd_t *fd)
{
- afr_local_t * local = NULL;
- call_frame_t *main_frame = NULL;
-
- local = frame->local;
+ int ret = -1;
- LOCK (&frame->lock);
+ LOCK (&fd->lock);
{
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (flush, main_frame,
- local->op_ret, local->op_errno);
+ ret = __afr_fd_ctx_set (this, fd);
}
+ UNLOCK (&fd->lock);
- return 0;
+ return ret;
}
+/* {{{ flush */
int
-afr_flush_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+afr_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int call_count = -1;
- int child_index = (long) cookie;
- int need_unwind = 0;
+ afr_local_t *local = NULL;
+ int call_count = -1;
local = frame->local;
- priv = this->private;
LOCK (&frame->lock);
{
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- }
- local->success_count++;
-
- if (local->success_count == priv->wait_count) {
- need_unwind = 1;
- }
- }
-
- local->op_errno = op_errno;
+ local->op_ret = op_ret;
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref (xdata);
+ } else {
+ local->op_errno = op_errno;
+ }
}
UNLOCK (&frame->lock);
- if (need_unwind)
- afr_flush_unwind (frame, this);
-
- call_count = afr_frame_return (frame);
+ call_count = afr_frame_return (frame);
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
+ if (call_count == 0)
+ AFR_STACK_UNWIND (flush, frame, local->op_ret,
+ local->op_errno, local->xdata_rsp);
return 0;
}
-
-int
-afr_flush_wind (call_frame_t *frame, xlator_t *this)
+static int
+afr_flush_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- int call_count = -1;
+ int i = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
- local = frame->local;
priv = this->private;
-
- call_count = afr_up_children_count (local->child_up, priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
+ local = frame->local;
+ call_count = local->call_count;
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_flush_wind_cbk,
+ STACK_WIND_COOKIE (frame, afr_flush_cbk,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->flush,
- local->fd);
-
+ local->fd, xdata);
if (!--call_count)
break;
+
}
}
return 0;
}
-
int
-afr_flush_done (call_frame_t *frame, xlator_t *this)
+afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- afr_local_t *local = NULL;
+ afr_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+ int op_errno = ENOMEM;
- local = frame->local;
-
- local->transaction.unwind (frame, this);
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
- AFR_STACK_DESTROY (frame);
+ if (!local->call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
- return 0;
-}
+ local->fd = fd_ref(fd);
-
-int
-afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
- int call_count = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ stub = fop_flush_stub (frame, afr_flush_wrapper, fd, xdata);
+ if (!stub)
goto out;
- }
- call_count = afr_up_children_count (local->child_up, priv->child_count);
-
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
- goto out;
- }
+ afr_delayed_changelog_wake_resume (this, fd, stub);
- transaction_frame->local = local;
-
- local->op = GF_FOP_FLUSH;
-
- local->transaction.fop = afr_flush_wind;
- local->transaction.done = afr_flush_done;
- local->transaction.unwind = afr_flush_unwind;
-
- local->fd = fd_ref (fd);
-
- local->transaction.main_frame = frame;
- local->transaction.start = 0;
- local->transaction.len = 0;
-
- ret = afr_open_fd_fix (transaction_frame, this, _gf_false);
- if (ret) {
- op_ret = -1;
- op_errno = -ret;
- goto out;
- }
- afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
-
-
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
-
- AFR_STACK_UNWIND (flush, frame, op_ret, op_errno);
- }
-
+ AFR_STACK_UNWIND (flush, frame, -1, op_errno, NULL);
return 0;
}
@@ -2299,8 +2110,7 @@ afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd)
uint64_t ctx = 0;
afr_fd_ctx_t *fd_ctx = NULL;
int ret = 0;
- afr_fd_paused_call_t *paused_call = NULL;
- afr_fd_paused_call_t *tmp = NULL;
+ int i = 0;
ret = fd_ctx_get (fd, this, &ctx);
if (ret < 0)
@@ -2309,28 +2119,22 @@ afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd)
fd_ctx = (afr_fd_ctx_t *)(long) ctx;
if (fd_ctx) {
- if (fd_ctx->pre_op_done)
- GF_FREE (fd_ctx->pre_op_done);
+ //no need to take any locks
+ if (!list_empty (&fd_ctx->eager_locked))
+ gf_log (this->name, GF_LOG_WARNING, "%s: Stale "
+ "Eager-lock stubs found",
+ uuid_utoa (fd->inode->gfid));
- if (fd_ctx->opened_on)
- GF_FREE (fd_ctx->opened_on);
+ for (i = 0; i < AFR_NUM_CHANGE_LOGS; i++)
+ GF_FREE (fd_ctx->pre_op_done[i]);
- if (fd_ctx->locked_on)
- GF_FREE (fd_ctx->locked_on);
+ GF_FREE (fd_ctx->opened_on);
- if (fd_ctx->pre_op_piggyback)
- GF_FREE (fd_ctx->pre_op_piggyback);
- list_for_each_entry_safe (paused_call, tmp, &fd_ctx->paused_calls,
- call_list) {
- list_del_init (&paused_call->call_list);
- GF_FREE (paused_call);
- }
+ GF_FREE (fd_ctx->lock_piggyback);
- if (fd_ctx->lock_piggyback)
- GF_FREE (fd_ctx->lock_piggyback);
+ GF_FREE (fd_ctx->lock_acquired);
- if (fd_ctx->lock_acquired)
- GF_FREE (fd_ctx->lock_acquired);
+ pthread_mutex_destroy (&fd_ctx->delay_lock);
GF_FREE (fd_ctx);
}
@@ -2343,24 +2147,8 @@ out:
int
afr_release (xlator_t *this, fd_t *fd)
{
- afr_locked_fd_t *locked_fd = NULL;
- afr_locked_fd_t *tmp = NULL;
- afr_private_t *priv = NULL;
-
- priv = this->private;
-
afr_cleanup_fd_ctx (this, fd);
- list_for_each_entry_safe (locked_fd, tmp, &priv->saved_fds,
- list) {
-
- if (locked_fd->fd == fd) {
- list_del_init (&locked_fd->list);
- GF_FREE (locked_fd);
- }
-
- }
-
return 0;
}
@@ -2368,51 +2156,87 @@ afr_release (xlator_t *this, fd_t *fd)
/* {{{ fsync */
int
+afr_fsync_unwind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ AFR_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+int
afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
afr_local_t *local = NULL;
int call_count = -1;
int child_index = (long) cookie;
- int read_child = 0;
+ int read_subvol = 0;
+ call_stub_t *stub = NULL;
local = frame->local;
- read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
+ read_subvol = afr_data_subvol_get (local->inode, this, 0, 0);
LOCK (&frame->lock);
{
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
-
if (op_ret == 0) {
- local->op_ret = 0;
+ if (local->op_ret == -1) {
+ local->op_ret = 0;
- if (local->success_count == 0) {
- local->cont.fsync.prebuf = *prebuf;
- local->cont.fsync.postbuf = *postbuf;
- }
+ local->cont.inode_wfop.prebuf = *prebuf;
+ local->cont.inode_wfop.postbuf = *postbuf;
- if (child_index == read_child) {
- local->cont.fsync.prebuf = *prebuf;
- local->cont.fsync.postbuf = *postbuf;
+ if (xdata)
+ local->xdata_rsp = dict_ref (xdata);
}
- local->success_count++;
- }
-
- local->op_errno = op_errno;
+ if (child_index == read_subvol) {
+ local->cont.inode_wfop.prebuf = *prebuf;
+ local->cont.inode_wfop.postbuf = *postbuf;
+ if (xdata) {
+ if (local->xdata_rsp)
+ dict_unref (local->xdata_rsp);
+ local->xdata_rsp = dict_ref (xdata);
+ }
+ }
+ } else {
+ local->op_errno = op_errno;
+ }
}
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
if (call_count == 0) {
- AFR_STACK_UNWIND (fsync, frame, local->op_ret, local->op_errno,
- &local->cont.fsync.prebuf,
- &local->cont.fsync.postbuf);
+ /* Make a stub out of the frame, and register it
+ with the waking up post-op. When the call-stub resumes,
+ we are guaranteed that there was no post-op pending
+ (i.e changelogs were unset in the server). This is an
+ essential "guarantee", that fsync() returns only after
+ completely finishing EVERYTHING, including the delayed
+ post-op. This guarantee is expected by FUSE graph switching
+ for example.
+ */
+ stub = fop_fsync_cbk_stub (frame, afr_fsync_unwind_cbk,
+ local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ local->xdata_rsp);
+ if (!stub) {
+ AFR_STACK_UNWIND (fsync, frame, -1, ENOMEM, 0, 0, 0);
+ return 0;
+ }
+
+ /* If no new unstable writes happened between the
+ time we cleared the unstable write witness flag in afr_fsync
+ and now, calling afr_delayed_changelog_wake_up() should
+ wake up and skip over the fsync phase and go straight to
+ afr_changelog_post_op_now()
+ */
+ afr_delayed_changelog_wake_resume (this, local->fd, stub);
}
return 0;
@@ -2420,36 +2244,34 @@ afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
-afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t datasync)
+afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
{
- afr_private_t *priv = NULL;
+ afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int ret = -1;
int i = 0;
int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
+ int32_t op_errno = ENOMEM;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ priv = this->private;
- priv = this->private;
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ call_count = local->call_count;
+ if (!call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ local->fd = fd_ref (fd);
- call_count = local->call_count;
- frame->local = local;
+ if (afr_fd_has_witnessed_unstable_write (this, fd)) {
+ /* don't care. we only wanted to CLEAR the bit */
+ }
- local->fd = fd_ref (fd);
- local->cont.fsync.ino = fd->inode->ino;
+ local->inode = inode_ref (fd->inode);
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
@@ -2457,17 +2279,16 @@ afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->fsync,
- fd, datasync);
+ fd, datasync, xdata);
if (!--call_count)
break;
}
}
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (fsync, frame, op_ret, op_errno, NULL, NULL);
- }
+ AFR_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL, NULL);
+
return 0;
}
@@ -2475,9 +2296,9 @@ out:
/* {{{ fsync */
-int32_t
-afr_fsyncdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+int
+afr_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
int call_count = -1;
@@ -2486,10 +2307,13 @@ afr_fsyncdir_cbk (call_frame_t *frame, void *cookie,
LOCK (&frame->lock);
{
- if (op_ret == 0)
+ if (op_ret == 0) {
local->op_ret = 0;
-
- local->op_errno = op_errno;
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref (xdata);
+ } else {
+ local->op_errno = op_errno;
+ }
}
UNLOCK (&frame->lock);
@@ -2497,57 +2321,49 @@ afr_fsyncdir_cbk (call_frame_t *frame, void *cookie,
if (call_count == 0)
AFR_STACK_UNWIND (fsyncdir, frame, local->op_ret,
- local->op_errno);
+ local->op_errno, local->xdata_rsp);
return 0;
}
-int32_t
-afr_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t datasync)
+int
+afr_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
{
- afr_private_t *priv = NULL;
+ afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int ret = -1;
int i = 0;
int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
+ int32_t op_errno = ENOMEM;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ priv = this->private;
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
call_count = local->call_count;
- frame->local = local;
+ if (!call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND (frame, afr_fsyncdir_cbk,
priv->children[i],
priv->children[i]->fops->fsyncdir,
- fd, datasync);
+ fd, datasync, xdata);
if (!--call_count)
break;
}
}
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (fsyncdir, frame, op_ret, op_errno);
- }
+ AFR_STACK_UNWIND (fsyncdir, frame, -1, op_errno, NULL);
+
return 0;
}
@@ -2558,7 +2374,7 @@ out:
int32_t
afr_xattrop_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xattr)
+ dict_t *xattr, dict_t *xdata)
{
afr_local_t *local = NULL;
int call_count = -1;
@@ -2567,8 +2383,15 @@ afr_xattrop_cbk (call_frame_t *frame, void *cookie,
LOCK (&frame->lock);
{
- if (op_ret == 0)
+ if (op_ret == 0) {
+ if (!local->cont.xattrop.xattr)
+ local->cont.xattrop.xattr = dict_ref (xattr);
+
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref (xdata);
+
local->op_ret = 0;
+ }
local->op_errno = op_errno;
}
@@ -2578,7 +2401,7 @@ afr_xattrop_cbk (call_frame_t *frame, void *cookie,
if (call_count == 0)
AFR_STACK_UNWIND (xattrop, frame, local->op_ret, local->op_errno,
- xattr);
+ local->cont.xattrop.xattr, local->xdata_rsp);
return 0;
}
@@ -2586,49 +2409,41 @@ afr_xattrop_cbk (call_frame_t *frame, void *cookie,
int32_t
afr_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t optype, dict_t *xattr)
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int ret = -1;
int i = 0;
int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ int32_t op_errno = ENOMEM;
priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
call_count = local->call_count;
- frame->local = local;
+ if (!call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND (frame, afr_xattrop_cbk,
priv->children[i],
priv->children[i]->fops->xattrop,
- loc, optype, xattr);
+ loc, optype, xattr, xdata);
if (!--call_count)
break;
}
}
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (xattrop, frame, op_ret, op_errno, NULL);
- }
+ AFR_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL, NULL);
+
return 0;
}
@@ -2639,7 +2454,7 @@ out:
int32_t
afr_fxattrop_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xattr)
+ dict_t *xattr, dict_t *xdata)
{
afr_local_t *local = NULL;
@@ -2649,8 +2464,14 @@ afr_fxattrop_cbk (call_frame_t *frame, void *cookie,
LOCK (&frame->lock);
{
- if (op_ret == 0)
+ if (op_ret == 0) {
+ if (!local->cont.fxattrop.xattr)
+ local->cont.fxattrop.xattr = dict_ref (xattr);
+
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref (xdata);
local->op_ret = 0;
+ }
local->op_errno = op_errno;
}
@@ -2660,7 +2481,7 @@ afr_fxattrop_cbk (call_frame_t *frame, void *cookie,
if (call_count == 0)
AFR_STACK_UNWIND (fxattrop, frame, local->op_ret, local->op_errno,
- xattr);
+ local->cont.fxattrop.xattr, local->xdata_rsp);
return 0;
}
@@ -2668,49 +2489,41 @@ afr_fxattrop_cbk (call_frame_t *frame, void *cookie,
int32_t
afr_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_xattrop_flags_t optype, dict_t *xattr)
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int ret = -1;
int i = 0;
int32_t call_count = 0;
- int32_t op_ret = -1;
int32_t op_errno = 0;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
goto out;
- }
call_count = local->call_count;
- frame->local = local;
+ if (!call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND (frame, afr_fxattrop_cbk,
priv->children[i],
priv->children[i]->fops->fxattrop,
- fd, optype, xattr);
+ fd, optype, xattr, xdata);
if (!--call_count)
break;
}
}
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (fxattrop, frame, op_ret, op_errno, NULL);
- }
+ AFR_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL, NULL);
+
return 0;
}
@@ -2718,8 +2531,8 @@ out:
int32_t
-afr_inodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+afr_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
@@ -2740,7 +2553,7 @@ afr_inodelk_cbk (call_frame_t *frame, void *cookie,
if (call_count == 0)
AFR_STACK_UNWIND (inodelk, frame, local->op_ret,
- local->op_errno);
+ local->op_errno, xdata);
return 0;
}
@@ -2748,57 +2561,50 @@ afr_inodelk_cbk (call_frame_t *frame, void *cookie,
int32_t
afr_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *flock)
+ const char *volume, loc_t *loc, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int ret = -1;
int i = 0;
int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ int32_t op_errno = ENOMEM;
priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
goto out;
- }
call_count = local->call_count;
- frame->local = local;
+ if (!call_count) {
+ op_errno = ENOMEM;
+ goto out;
+ }
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND (frame, afr_inodelk_cbk,
priv->children[i],
priv->children[i]->fops->inodelk,
- volume, loc, cmd, flock);
+ volume, loc, cmd, flock, xdata);
if (!--call_count)
break;
}
}
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (inodelk, frame, op_ret, op_errno);
- }
+ AFR_STACK_UNWIND (inodelk, frame, -1, op_errno, NULL);
+
return 0;
}
int32_t
-afr_finodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+afr_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
@@ -2819,66 +2625,57 @@ afr_finodelk_cbk (call_frame_t *frame, void *cookie,
if (call_count == 0)
AFR_STACK_UNWIND (finodelk, frame, local->op_ret,
- local->op_errno);
+ local->op_errno, xdata);
return 0;
}
int32_t
-afr_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *flock)
+afr_finodelk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int ret = -1;
int i = 0;
int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ int32_t op_errno = ENOMEM;
priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
call_count = local->call_count;
- frame->local = local;
+ if (!call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND (frame, afr_finodelk_cbk,
priv->children[i],
priv->children[i]->fops->finodelk,
- volume, fd, cmd, flock);
+ volume, fd, cmd, flock, xdata);
if (!--call_count)
break;
}
}
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (finodelk, frame, op_ret, op_errno);
- }
+ AFR_STACK_UNWIND (finodelk, frame, -1, op_errno, NULL);
+
return 0;
}
int32_t
-afr_entrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
+afr_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
int call_count = -1;
@@ -2898,67 +2695,59 @@ afr_entrylk_cbk (call_frame_t *frame, void *cookie,
if (call_count == 0)
AFR_STACK_UNWIND (entrylk, frame, local->op_ret,
- local->op_errno);
+ local->op_errno, xdata);
return 0;
}
-int32_t
-afr_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc,
- const char *basename, entrylk_cmd cmd, entrylk_type type)
+int
+afr_entrylk (call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int ret = -1;
int i = 0;
int32_t call_count = 0;
- int32_t op_ret = -1;
int32_t op_errno = 0;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
call_count = local->call_count;
- frame->local = local;
+ if (!call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND (frame, afr_entrylk_cbk,
priv->children[i],
priv->children[i]->fops->entrylk,
- volume, loc, basename, cmd, type);
+ volume, loc, basename, cmd, type, xdata);
if (!--call_count)
break;
}
}
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (entrylk, frame, op_ret, op_errno);
- }
+ AFR_STACK_UNWIND (entrylk, frame, -1, op_errno, NULL);
+
return 0;
}
-int32_t
-afr_fentrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+int
+afr_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
@@ -2979,156 +2768,148 @@ afr_fentrylk_cbk (call_frame_t *frame, void *cookie,
if (call_count == 0)
AFR_STACK_UNWIND (fentrylk, frame, local->op_ret,
- local->op_errno);
+ local->op_errno, xdata);
return 0;
}
-int32_t
-afr_fentrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd,
- const char *basename, entrylk_cmd cmd, entrylk_type type)
+int
+afr_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int ret = -1;
int i = 0;
int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ int32_t op_errno = ENOMEM;
priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
call_count = local->call_count;
- frame->local = local;
+ if (!call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND (frame, afr_fentrylk_cbk,
priv->children[i],
priv->children[i]->fops->fentrylk,
- volume, fd, basename, cmd, type);
+ volume, fd, basename, cmd, type, xdata);
if (!--call_count)
break;
}
}
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (fentrylk, frame, op_ret, op_errno);
- }
+ AFR_STACK_UNWIND (fentrylk, frame, -1, op_errno, NULL);
+
return 0;
}
-int32_t
-afr_statfs_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct statvfs *statvfs)
+
+int
+afr_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct statvfs *statvfs, dict_t *xdata)
{
afr_local_t *local = NULL;
int call_count = 0;
+ struct statvfs *buf = NULL;
LOCK (&frame->lock);
{
local = frame->local;
- if (op_ret == 0) {
- local->op_ret = op_ret;
-
- if (local->cont.statfs.buf_set) {
- if (statvfs->f_bavail < local->cont.statfs.buf.f_bavail)
- local->cont.statfs.buf = *statvfs;
- } else {
- local->cont.statfs.buf = *statvfs;
- local->cont.statfs.buf_set = 1;
- }
- }
-
- if (op_ret == -1)
+ if (op_ret != 0) {
local->op_errno = op_errno;
-
+ goto unlock;
+ }
+
+ local->op_ret = op_ret;
+
+ buf = &local->cont.statfs.buf;
+ if (local->cont.statfs.buf_set) {
+ if (statvfs->f_bavail < buf->f_bavail) {
+ *buf = *statvfs;
+ if (xdata) {
+ if (local->xdata_rsp)
+ dict_unref (local->xdata_rsp);
+ local->xdata_rsp = dict_ref (xdata);
+ }
+ }
+ } else {
+ *buf = *statvfs;
+ local->cont.statfs.buf_set = 1;
+ if (xdata)
+ local->xdata_rsp = dict_ref (xdata);
+ }
}
+unlock:
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
if (call_count == 0)
AFR_STACK_UNWIND (statfs, frame, local->op_ret, local->op_errno,
- &local->cont.statfs.buf);
+ &local->cont.statfs.buf, local->xdata_rsp);
return 0;
}
-int32_t
-afr_statfs (call_frame_t *frame, xlator_t *this,
- loc_t *loc)
+int
+afr_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- int child_count = 0;
afr_local_t * local = NULL;
+ afr_private_t *priv = NULL;
int i = 0;
- int ret = -1;
int call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (loc, out);
+ int32_t op_errno = ENOMEM;
- priv = this->private;
- child_count = priv->child_count;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ priv = this->private;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
- frame->local = local;
call_count = local->call_count;
+ if (!call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
- for (i = 0; i < child_count; i++) {
+ for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND (frame, afr_statfs_cbk,
priv->children[i],
priv->children[i]->fops->statfs,
- loc);
+ loc, xdata);
if (!--call_count)
break;
}
}
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (statfs, frame, op_ret, op_errno, NULL);
- }
+ AFR_STACK_UNWIND (statfs, frame, -1, op_errno, NULL, NULL);
+
return 0;
}
int32_t
afr_lk_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock)
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
{
afr_local_t * local = NULL;
int call_count = -1;
@@ -3138,7 +2919,7 @@ afr_lk_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (call_count == 0)
AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
- lock);
+ lock, xdata);
return 0;
}
@@ -3160,7 +2941,7 @@ afr_lk_unlock (call_frame_t *frame, xlator_t *this)
if (call_count == 0) {
AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
- &local->cont.lk.ret_flock);
+ &local->cont.lk.ret_flock, NULL);
return 0;
}
@@ -3174,7 +2955,7 @@ afr_lk_unlock (call_frame_t *frame, xlator_t *this)
priv->children[i],
priv->children[i]->fops->lk,
local->fd, F_SETLK,
- &local->cont.lk.user_flock);
+ &local->cont.lk.user_flock, NULL);
if (!--call_count)
break;
@@ -3187,7 +2968,7 @@ afr_lk_unlock (call_frame_t *frame, xlator_t *this)
int32_t
afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock)
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
@@ -3222,30 +3003,15 @@ afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
priv->children[child_index],
priv->children[child_index]->fops->lk,
local->fd, local->cont.lk.cmd,
- &local->cont.lk.user_flock);
+ &local->cont.lk.user_flock, xdata);
} else if (local->op_ret == -1) {
/* all nodes have gone down */
AFR_STACK_UNWIND (lk, frame, -1, ENOTCONN,
- &local->cont.lk.ret_flock);
+ &local->cont.lk.ret_flock, NULL);
} else {
- /* locking has succeeded on all nodes that are up */
-
- /* temporarily
- ret = afr_mark_locked_nodes (this, local->fd,
- local->cont.lk.locked_nodes);
- if (ret)
- gf_log (this->name, GF_LOG_DEBUG,
- "Could not save locked nodes info in fdctx");
-
- ret = afr_save_locked_fd (this, local->fd);
- if (ret)
- gf_log (this->name, GF_LOG_DEBUG,
- "Could not save locked fd");
-
- */
AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
- &local->cont.lk.ret_flock);
+ &local->cont.lk.ret_flock, NULL);
}
return 0;
@@ -3254,24 +3020,18 @@ afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
afr_lk (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t cmd, struct gf_flock *flock)
+ fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
int i = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ int32_t op_errno = ENOMEM;
priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
- AFR_LOCAL_INIT (local, priv);
-
- frame->local = local;
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
local->cont.lk.locked_nodes = GF_CALLOC (priv->child_count,
sizeof (*local->cont.lk.locked_nodes),
@@ -3290,32 +3050,18 @@ afr_lk (call_frame_t *frame, xlator_t *this,
STACK_WIND_COOKIE (frame, afr_lk_cbk, (void *) (long) 0,
priv->children[i],
priv->children[i]->fops->lk,
- fd, cmd, flock);
+ fd, cmd, flock, xdata);
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (lk, frame, op_ret, op_errno, NULL);
- }
+ AFR_STACK_UNWIND (lk, frame, -1, op_errno, NULL, NULL);
+
return 0;
}
int
afr_forget (xlator_t *this, inode_t *inode)
{
- uint64_t ctx_addr = 0;
- afr_inode_ctx_t *ctx = NULL;
-
- inode_ctx_get (inode, this, &ctx_addr);
-
- if (!ctx_addr)
- goto out;
-
- ctx = (afr_inode_ctx_t *)(long)ctx_addr;
- if (ctx->fresh_children)
- GF_FREE (ctx->fresh_children);
- GF_FREE (ctx);
-out:
return 0;
}
@@ -3335,7 +3081,6 @@ afr_priv_dump (xlator_t *this)
snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name);
gf_proc_dump_add_section(key_prefix);
gf_proc_dump_write("child_count", "%u", priv->child_count);
- gf_proc_dump_write("read_child_rr", "%u", priv->read_child_rr);
for (i = 0; i < priv->child_count; i++) {
sprintf (key, "child_up[%d]", i);
gf_proc_dump_write(key, "%d", priv->child_up[i]);
@@ -3350,11 +3095,6 @@ afr_priv_dump (xlator_t *this)
gf_proc_dump_write("entry-change_log", "%d", priv->entry_change_log);
gf_proc_dump_write("read_child", "%d", priv->read_child);
gf_proc_dump_write("favorite_child", "%d", priv->favorite_child);
- gf_proc_dump_write("data_lock_server_count", "%u", priv->data_lock_server_count);
- gf_proc_dump_write("metadata_lock_server_count", "%u",
- priv->metadata_lock_server_count);
- gf_proc_dump_write("entry_lock_server_count", "%u",
- priv->entry_lock_server_count);
gf_proc_dump_write("wait_count", "%u", priv->wait_count);
return 0;
@@ -3385,7 +3125,7 @@ find_child_index (xlator_t *this, xlator_t *child)
int32_t
afr_notify (xlator_t *this, int32_t event,
- void *data, ...)
+ void *data, void *data2)
{
afr_private_t *priv = NULL;
int i = -1;
@@ -3397,13 +3137,23 @@ afr_notify (xlator_t *this, int32_t event,
int idx = -1;
int ret = -1;
int call_psh = 0;
- int up_child = AFR_ALL_CHILDREN;
+ int up_child = -1;
+ dict_t *input = NULL;
+ dict_t *output = NULL;
priv = this->private;
if (!priv)
return 0;
+ /*
+ * We need to reset this in case children come up in "staggered"
+ * fashion, so that we discover a late-arriving local subvolume. Note
+ * that we could end up issuing N lookups to the first subvolume, and
+ * O(N^2) overall, but N is small for AFR so it shouldn't be an issue.
+ */
+ priv->did_discovery = _gf_false;
+
had_heard_from_all = 1;
for (i = 0; i < priv->child_count; i++) {
if (!priv->last_event[i]) {
@@ -3414,7 +3164,7 @@ afr_notify (xlator_t *this, int32_t event,
/* parent xlators dont need to know about every child_up, child_down
* because of afr ha. If all subvolumes go down, child_down has
* to be triggered. In that state when 1 subvolume comes up child_up
- * needs to be triggered. dht optimises revalidate lookup by sending
+ * needs to be triggered. dht optimizes revalidate lookup by sending
* it only to one of its subvolumes. When child up/down happens
* for afr's subvolumes dht should be notified by child_modified. The
* subsequent revalidate lookup happens on all the dht's subvolumes
@@ -3431,9 +3181,20 @@ afr_notify (xlator_t *this, int32_t event,
case GF_EVENT_CHILD_UP:
LOCK (&priv->lock);
{
+ /*
+ * This only really counts if the child was never up
+ * (value = -1) or had been down (value = 0). See
+ * comment at GF_EVENT_CHILD_DOWN for a more detailed
+ * explanation.
+ */
+ if (priv->child_up[idx] != 1) {
+ priv->up_count++;
+ priv->event_generation++;
+ }
priv->child_up[idx] = 1;
- priv->up_count++;
+ call_psh = 1;
+ up_child = idx;
for (i = 0; i < priv->child_count; i++)
if (priv->child_up[i] == 1)
up_children++;
@@ -3443,12 +3204,6 @@ afr_notify (xlator_t *this, int32_t event,
"going online.", ((xlator_t *)data)->name);
} else {
event = GF_EVENT_CHILD_MODIFIED;
- gf_log (this->name, GF_LOG_INFO, "subvol %d came up, "
- "start crawl", idx);
- if (had_heard_from_all) {
- call_psh = 1;
- up_child = idx;
- }
}
priv->last_event[idx] = event;
@@ -3460,8 +3215,23 @@ afr_notify (xlator_t *this, int32_t event,
case GF_EVENT_CHILD_DOWN:
LOCK (&priv->lock);
{
+ /*
+ * If a brick is down when we start, we'll get a
+ * CHILD_DOWN to indicate its initial state. There
+ * was never a CHILD_UP in this case, so if we
+ * increment "down_count" the difference between than
+ * and "up_count" will no longer be the number of
+ * children that are currently up. This has serious
+ * implications e.g. for quorum enforcement, so we
+ * don't increment these values unless the event
+ * represents an actual state transition between "up"
+ * (value = 1) and anything else.
+ */
+ if (priv->child_up[idx] == 1) {
+ priv->down_count++;
+ priv->event_generation++;
+ }
priv->child_up[idx] = 0;
- priv->down_count++;
for (i = 0; i < priv->child_count; i++)
if (priv->child_up[i] == 0)
@@ -3489,10 +3259,15 @@ afr_notify (xlator_t *this, int32_t event,
break;
- case GF_EVENT_TRIGGER_HEAL:
- gf_log (this->name, GF_LOG_INFO, "Self-heal was triggered"
- " manually. Start crawling");
- call_psh = 1;
+ case GF_EVENT_TRANSLATOR_OP:
+ input = data;
+ output = data2;
+ if (!had_heard_from_all) {
+ ret = -1;
+ goto out;
+ }
+ ret = afr_xl_op (this, input, output);
+ goto out;
break;
default:
@@ -3521,8 +3296,7 @@ afr_notify (xlator_t *this, int32_t event,
LOCK (&priv->lock);
{
- up_children = afr_up_children_count (priv->child_up,
- priv->child_count);
+ up_children = AFR_COUNT (priv->child_up, priv->child_count);
for (i = 0; i < priv->child_count; i++) {
if (priv->last_event[i] == GF_EVENT_CHILD_UP) {
event = GF_EVENT_CHILD_UP;
@@ -3537,65 +3311,75 @@ afr_notify (xlator_t *this, int32_t event,
}
}
UNLOCK (&priv->lock);
- if (up_children > 1) {
- gf_log (this->name, GF_LOG_INFO, "All subvolumes came "
- "up, start crawl");
- call_psh = 1;
- }
}
ret = 0;
if (propagate)
ret = default_notify (this, event, data);
- if (call_psh)
- afr_proactive_self_heal (this, up_child);
+ if (call_psh && priv->shd.iamshd) {
+ afr_selfheal_childup (this, up_child);
+ }
out:
return ret;
}
-int
-afr_first_up_child (unsigned char *child_up, size_t child_count)
-{
- int ret = -1;
- int i = 0;
-
- GF_ASSERT (child_up);
-
- for (i = 0; i < child_count; i++) {
- if (child_up[i]) {
- ret = i;
- break;
- }
- }
-
- return ret;
-}
int
-AFR_LOCAL_INIT (afr_local_t *local, afr_private_t *priv)
+afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno)
{
local->op_ret = -1;
local->op_errno = EUCLEAN;
- local->call_count = afr_up_children_count (priv->child_up,
- priv->child_count);
- if (local->call_count == 0) {
- gf_log (THIS->name, GF_LOG_INFO, "no subvolumes up");
- return -ENOTCONN;
- }
+ syncbarrier_init (&local->barrier);
- local->child_up = GF_CALLOC (sizeof (*local->child_up),
- priv->child_count,
+ local->child_up = GF_CALLOC (priv->child_count,
+ sizeof (*local->child_up),
gf_afr_mt_char);
if (!local->child_up) {
- return -ENOMEM;
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
}
memcpy (local->child_up, priv->child_up,
sizeof (*local->child_up) * priv->child_count);
-
- return 0;
+ local->call_count = AFR_COUNT (local->child_up, priv->child_count);
+ if (local->call_count == 0) {
+ gf_log (THIS->name, GF_LOG_INFO, "no subvolumes up");
+ if (op_errno)
+ *op_errno = ENOTCONN;
+ goto out;
+ }
+ local->event_generation = priv->event_generation;
+
+ local->read_attempted = GF_CALLOC (priv->child_count, sizeof (char),
+ gf_afr_mt_char);
+ if (!local->read_attempted) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->readable = GF_CALLOC (priv->child_count, sizeof (char),
+ gf_afr_mt_char);
+ if (!local->readable) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->replies = GF_CALLOC(priv->child_count, sizeof(*local->replies),
+ gf_afr_mt_reply_t);
+ if (!local->replies) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ return 0;
+out:
+ return -1;
}
int
@@ -3604,16 +3388,6 @@ afr_internal_lock_init (afr_internal_lock_t *lk, size_t child_count,
{
int ret = -ENOMEM;
- lk->inode_locked_nodes = GF_CALLOC (sizeof (*lk->inode_locked_nodes),
- child_count, gf_afr_mt_char);
- if (NULL == lk->inode_locked_nodes)
- goto out;
-
- lk->entry_locked_nodes = GF_CALLOC (sizeof (*lk->entry_locked_nodes),
- child_count, gf_afr_mt_char);
- if (NULL == lk->entry_locked_nodes)
- goto out;
-
lk->locked_nodes = GF_CALLOC (sizeof (*lk->locked_nodes),
child_count, gf_afr_mt_char);
if (NULL == lk->locked_nodes)
@@ -3633,10 +3407,62 @@ out:
return ret;
}
+void
+afr_matrix_cleanup (int32_t **matrix, unsigned int m)
+{
+ int i = 0;
+
+ if (!matrix)
+ goto out;
+ for (i = 0; i < m; i++) {
+ GF_FREE (matrix[i]);
+ }
+
+ GF_FREE (matrix);
+out:
+ return;
+}
+
+int32_t**
+afr_matrix_create (unsigned int m, unsigned int n)
+{
+ int32_t **matrix = NULL;
+ int i = 0;
+
+ matrix = GF_CALLOC (sizeof (*matrix), m, gf_afr_mt_int32_t);
+ if (!matrix)
+ goto out;
+
+ for (i = 0; i < m; i++) {
+ matrix[i] = GF_CALLOC (sizeof (*matrix[i]), n,
+ gf_afr_mt_int32_t);
+ if (!matrix[i])
+ goto out;
+ }
+ return matrix;
+out:
+ afr_matrix_cleanup (matrix, m);
+ return NULL;
+}
+
+int
+afr_inodelk_init (afr_inodelk_t *lk, char *dom, size_t child_count)
+{
+ int ret = -ENOMEM;
+
+ lk->domain = dom;
+ lk->locked_nodes = GF_CALLOC (sizeof (*lk->locked_nodes),
+ child_count, gf_afr_mt_char);
+ if (NULL == lk->locked_nodes)
+ goto out;
+ ret = 0;
+out:
+ return ret;
+}
+
int
afr_transaction_local_init (afr_local_t *local, xlator_t *this)
{
- int i = 0;
int child_up_count = 0;
int ret = -ENOMEM;
afr_private_t *priv = NULL;
@@ -3647,20 +3473,20 @@ afr_transaction_local_init (afr_local_t *local, xlator_t *this)
if (ret < 0)
goto out;
+ if ((local->transaction.type == AFR_DATA_TRANSACTION) ||
+ (local->transaction.type == AFR_METADATA_TRANSACTION)) {
+ ret = afr_inodelk_init (&local->internal_lock.inodelk[0],
+ this->name, priv->child_count);
+ if (ret < 0)
+ goto out;
+ }
+
ret = -ENOMEM;
- child_up_count = afr_up_children_count (local->child_up,
- priv->child_count);
+ child_up_count = AFR_COUNT (local->child_up, priv->child_count);
if (priv->optimistic_change_log && child_up_count == priv->child_count)
local->optimistic_change_log = 1;
- local->first_up_child = afr_first_up_child (local->child_up,
- priv->child_count);
-
- local->child_errno = GF_CALLOC (sizeof (*local->child_errno),
- priv->child_count,
- gf_afr_mt_int32_t);
- if (!local->child_errno)
- goto out;
+ local->pre_op_compat = priv->pre_op_compat;
local->transaction.eager_lock =
GF_CALLOC (sizeof (*local->transaction.eager_lock),
@@ -3670,167 +3496,130 @@ afr_transaction_local_init (afr_local_t *local, xlator_t *this)
if (!local->transaction.eager_lock)
goto out;
- local->pending = GF_CALLOC (sizeof (*local->pending),
- priv->child_count,
- gf_afr_mt_int32_t);
-
- if (!local->pending)
- goto out;
-
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children)
- goto out;
-
- if (local->fd) {
- local->fd_open_on = GF_CALLOC (sizeof (*local->fd_open_on),
- priv->child_count,
- gf_afr_mt_char);
- if (!local->fd_open_on)
- goto out;
- }
-
local->transaction.pre_op = GF_CALLOC (sizeof (*local->transaction.pre_op),
priv->child_count,
gf_afr_mt_char);
if (!local->transaction.pre_op)
goto out;
- for (i = 0; i < priv->child_count; i++) {
- local->pending[i] = GF_CALLOC (sizeof (*local->pending[i]),
- 3, /* data + metadata + entry */
- gf_afr_mt_int32_t);
- if (!local->pending[i])
- goto out;
- }
+ local->transaction.fop_subvols = GF_CALLOC (sizeof (*local->transaction.fop_subvols),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!local->transaction.fop_subvols)
+ goto out;
- local->transaction.child_errno =
- GF_CALLOC (sizeof (*local->transaction.child_errno),
- priv->child_count,
- gf_afr_mt_int32_t);
- local->transaction.erase_pending = 1;
+ local->transaction.failed_subvols = GF_CALLOC (sizeof (*local->transaction.failed_subvols),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!local->transaction.failed_subvols)
+ goto out;
+
+ local->pending = afr_matrix_create (priv->child_count,
+ AFR_NUM_CHANGE_LOGS);
+ if (!local->pending)
+ goto out;
+
+ INIT_LIST_HEAD (&local->transaction.eager_locked);
ret = 0;
out:
return ret;
}
+
void
-afr_reset_children (int32_t *fresh_children, int32_t child_count)
+afr_set_low_priority (call_frame_t *frame)
{
- unsigned int i = 0;
- for (i = 0; i < child_count; i++)
- fresh_children[i] = -1;
+ frame->root->pid = LOW_PRIO_PROC_PID;
}
-int32_t*
-afr_children_create (int32_t child_count)
+
+gf_boolean_t
+afr_have_quorum (char *logname, afr_private_t *priv)
{
- int32_t *children = NULL;
- int i = 0;
+ unsigned int quorum = 0;
- GF_ASSERT (child_count > 0);
+ GF_VALIDATE_OR_GOTO(logname,priv,out);
- children = GF_CALLOC (child_count, sizeof (*children),
- gf_afr_mt_int32_t);
- if (NULL == children)
- goto out;
- for (i = 0; i < child_count; i++)
- children[i] = -1;
-out:
- return children;
-}
+ quorum = priv->quorum_count;
+ if (quorum != AFR_QUORUM_AUTO) {
+ return (priv->up_count >= (priv->down_count + quorum));
+ }
-void
-afr_children_add_child (int32_t *children, int32_t child,
- int32_t child_count)
-{
- gf_boolean_t child_found = _gf_false;
- int i = 0;
+ quorum = priv->child_count / 2 + 1;
+ if (priv->up_count >= (priv->down_count + quorum)) {
+ return _gf_true;
+ }
- for (i = 0; i < child_count; i++) {
- if (children[i] == -1)
- break;
- if (children[i] == child) {
- child_found = _gf_true;
- break;
+ /*
+ * Special case for even numbers of nodes: if we have exactly half
+ * and that includes the first ("senior-most") node, then that counts
+ * as quorum even if it wouldn't otherwise. This supports e.g. N=2
+ * while preserving the critical property that there can only be one
+ * such group.
+ */
+ if ((priv->child_count % 2) == 0) {
+ quorum = priv->child_count / 2;
+ if (priv->up_count >= (priv->down_count + quorum)) {
+ if (priv->child_up[0]) {
+ return _gf_true;
+ }
}
}
- if (!child_found) {
- GF_ASSERT (i < child_count);
- children[i] = child;
- }
+out:
+ return _gf_false;
}
void
-afr_children_rm_child (int32_t *children, int32_t child, int32_t child_count)
+afr_priv_destroy (afr_private_t *priv)
{
- int i = 0;
+ int i = 0;
- GF_ASSERT ((child >= 0) && (child < child_count));
- for (i = 0; i < child_count; i++) {
- if (children[i] == -1)
- break;
- if (children[i] == child) {
- if (i != (child_count - 1))
- memmove (children + i, children + i + 1,
- sizeof (*children)*(child_count - i - 1));
- children[child_count - 1] = -1;
- break;
- }
- }
+ if (!priv)
+ goto out;
+ inode_unref (priv->root_inode);
+ GF_FREE (priv->last_event);
+ if (priv->pending_key) {
+ for (i = 0; i < priv->child_count; i++)
+ GF_FREE (priv->pending_key[i]);
+ }
+ GF_FREE (priv->pending_key);
+ GF_FREE (priv->children);
+ GF_FREE (priv->child_up);
+ LOCK_DESTROY (&priv->lock);
+
+ GF_FREE (priv);
+out:
+ return;
}
int
-afr_get_children_count (int32_t *children, unsigned int child_count)
+xlator_subvolume_count (xlator_t *this)
{
- int count = 0;
int i = 0;
+ xlator_list_t *list = NULL;
- for (i = 0; i < child_count; i++) {
- if (children[i] == -1)
- break;
- count++;
- }
- return count;
+ for (list = this->children; list; list = list->next)
+ i++;
+ return i;
}
+
void
-afr_set_low_priority (call_frame_t *frame)
+afr_handle_open_fd_count (call_frame_t *frame, xlator_t *this)
{
- frame->root->pid = LOW_PRIO_PROC_PID;
-}
+ afr_local_t *local = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
-int
-afr_child_fd_ctx_set (xlator_t *this, fd_t *fd, int32_t child,
- int flags, int32_t wbflags)
-{
- int ret = 0;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
+ local = frame->local;
- GF_ASSERT (fd && fd->inode);
- ret = afr_fd_ctx_set (this, fd);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "could not set fd ctx for fd=%p", fd);
- goto out;
- }
+ if (!local->fd)
+ return;
- ret = fd_ctx_get (fd, this, &ctx);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "could not get fd ctx for fd=%p", fd);
- goto out;
- }
+ fd_ctx = afr_fd_ctx_get (local->fd, this);
+ if (!fd_ctx)
+ return;
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
- fd_ctx->opened_on[child] = AFR_FD_OPENED;
- if (!IA_ISDIR (fd->inode->ia_type)) {
- fd_ctx->flags = flags;
- fd_ctx->wbflags = wbflags;
- }
- ret = 0;
-out:
- return ret;
+ fd_ctx->open_fd_count = local->open_fd_count;
}
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
index f2e6760cf..fa1da3958 100644
--- a/xlators/cluster/afr/src/afr-dir-read.c
+++ b/xlators/cluster/afr/src/afr-dir-read.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -46,701 +37,384 @@
#include "checksum.h"
#include "afr.h"
-#include "afr-self-heal.h"
-#include "afr-self-heal-common.h"
-
-int
-afr_examine_dir_sh_unwind (call_frame_t *frame, xlator_t *this, int32_t op_ret,
- int32_t op_errno)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- afr_set_opendir_done (this, local->fd->inode);
-
- AFR_STACK_UNWIND (opendir, frame, local->op_ret,
- local->op_errno, local->fd);
-
- return 0;
-}
-
-
-gf_boolean_t
-__checksums_differ (uint32_t *checksum, int child_count,
- unsigned char *child_up)
-{
- int ret = _gf_false;
- int i = 0;
- uint32_t cksum = 0;
- gf_boolean_t activate_check = _gf_false;
-
- for (i = 0; i < child_count; i++) {
- if (!child_up[i])
- continue;
- if (_gf_false == activate_check) {
- cksum = checksum[i];
- activate_check = _gf_true;
- continue;
- }
-
- if (cksum != checksum[i]) {
- ret = _gf_true;
- break;
- }
-
- cksum = checksum[i];
- }
-
- return ret;
-}
-
-
-int32_t
-afr_examine_dir_readdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
- gf_dirent_t * entry = NULL;
- gf_dirent_t * tmp = NULL;
- char *reason = NULL;
- int child_index = 0;
- uint32_t entry_cksum = 0;
- int call_count = 0;
- off_t last_offset = 0;
- inode_t *inode = NULL;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
- inode = local->fd->inode;
-
- child_index = (long) cookie;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_INFO,
- "%s: failed to do opendir on %s",
- local->loc.path, priv->children[child_index]->name);
- local->op_ret = -1;
- local->op_ret = op_errno;
- goto out;
- }
-
- if (op_ret == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: no entries found in %s",
- local->loc.path, priv->children[child_index]->name);
- goto out;
- }
-
- list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- entry_cksum = gf_rsync_weak_checksum (entry->d_name,
- strlen (entry->d_name));
- local->cont.opendir.checksum[child_index] ^= entry_cksum;
- }
-
- list_for_each_entry (entry, &entries->list, list) {
- last_offset = entry->d_off;
- }
-
- /* read more entries */
-
- STACK_WIND_COOKIE (frame, afr_examine_dir_readdir_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->readdir,
- local->fd, 131072, last_offset);
-
- return 0;
-
-out:
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if (__checksums_differ (local->cont.opendir.checksum,
- priv->child_count,
- local->child_up)) {
-
- sh->do_entry_self_heal = _gf_true;
- sh->forced_merge = _gf_true;
-
- reason = "checksums of directory differ";
- afr_launch_self_heal (frame, this, inode, _gf_false,
- inode->ia_type, reason, NULL,
- afr_examine_dir_sh_unwind);
- } else {
- afr_set_opendir_done (this, inode);
-
- AFR_STACK_UNWIND (opendir, frame, local->op_ret,
- local->op_errno, local->fd);
- }
- }
-
- return 0;
-}
-
-
-int
-afr_examine_dir (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- int i = 0;
- int call_count = 0;
-
- local = frame->local;
- priv = this->private;
-
- local->cont.opendir.checksum = GF_CALLOC (priv->child_count,
- sizeof (*local->cont.opendir.checksum),
- gf_afr_mt_int32_t);
-
- call_count = afr_up_children_count (local->child_up, priv->child_count);
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_examine_dir_readdir_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->readdir,
- local->fd, 131072, 0);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
+#include "afr-transaction.h"
int32_t
afr_opendir_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
- afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int32_t up_children_count = 0;
- int ret = -1;
int call_count = -1;
int32_t child_index = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
- priv = this->private;
local = frame->local;
+ fd_ctx = local->fd_ctx;
child_index = (long) cookie;
- up_children_count = afr_up_children_count (local->child_up,
- priv->child_count);
-
LOCK (&frame->lock);
{
- if (op_ret >= 0) {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED;
+ } else {
local->op_ret = op_ret;
- ret = afr_child_fd_ctx_set (this, fd, child_index,
- 0, 0);
- if (ret) {
- local->op_ret = -1;
- local->op_errno = -ret;
- goto unlock;
- }
+ fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref (xdata);
}
-
- local->op_errno = op_errno;
}
-unlock:
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
- if (call_count == 0) {
- if (local->op_ret != 0)
- goto out;
-
- if (!afr_is_opendir_done (this, local->fd->inode) &&
- up_children_count > 1) {
-
- /*
- * This is the first opendir on this inode. We need
- * to check if the directory's entries are the same
- * on all subvolumes. This is needed in addition
- * to regular entry self-heal because the readdir
- * call is sent only to the first subvolume, and
- * thus files that exist only there will never be healed
- * otherwise (assuming changelog shows no anamolies).
- */
-
- gf_log (this->name, GF_LOG_TRACE,
- "reading contents of directory %s looking for mismatch",
- local->loc.path);
-
- afr_examine_dir (frame, this);
-
- } else {
- /* do the unwind */
- goto out;
- }
- }
-
- return 0;
-
-out:
- AFR_STACK_UNWIND (opendir, frame, local->op_ret,
- local->op_errno, local->fd);
-
+ if (call_count == 0)
+ AFR_STACK_UNWIND (opendir, frame, local->op_ret,
+ local->op_errno, local->fd, NULL);
return 0;
}
-int32_t
-afr_opendir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, fd_t *fd)
+int
+afr_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
- int child_count = 0;
int i = 0;
- int ret = -1;
int call_count = -1;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ int32_t op_errno = ENOMEM;
+ afr_fd_ctx_t *fd_ctx = NULL;
priv = this->private;
- child_count = priv->child_count;
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
- ALLOC_OR_GOTO (local, afr_local_t, out);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx)
+ goto out;
loc_copy (&local->loc, loc);
- frame->local = local;
local->fd = fd_ref (fd);
+ local->fd_ctx = fd_ctx;
call_count = local->call_count;
- for (i = 0; i < child_count; i++) {
+ for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND_COOKIE (frame, afr_opendir_cbk,
(void*) (long) i,
priv->children[i],
priv->children[i]->fops->opendir,
- loc, fd);
+ loc, fd, NULL);
if (!--call_count)
break;
}
}
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (opendir, frame, op_ret, op_errno, fd);
- }
-
+ AFR_STACK_UNWIND (opendir, frame, -1, op_errno, fd, NULL);
return 0;
}
-/**
- * Common algorithm for directory read calls:
- *
- * - Try the fop on the first child that is up
- * - if we have failed due to ENOTCONN:
- * try the next child
- *
- * Applicable to: readdir
- */
-
+#define BACKEND_D_OFF_BITS 63
+#define PRESENT_D_OFF_BITS 63
-struct entry_name {
- char *name;
- struct list_head list;
-};
+#define ONE 1ULL
+#define MASK (~0ULL)
+#define PRESENT_MASK (MASK >> (64 - PRESENT_D_OFF_BITS))
+#define BACKEND_MASK (MASK >> (64 - BACKEND_D_OFF_BITS))
+#define TOP_BIT (ONE << (PRESENT_D_OFF_BITS - 1))
+#define SHIFT_BITS (max (0, (BACKEND_D_OFF_BITS - PRESENT_D_OFF_BITS + 1)))
-static gf_boolean_t
-remembered_name (const char *name, struct list_head *entries)
+static uint64_t
+afr_bits_for (uint64_t num)
{
- struct entry_name *e = NULL;
- gf_boolean_t ret = _gf_false;
+ uint64_t bits = 0, ctrl = 1;
- list_for_each_entry (e, entries, list) {
- if (!strcmp (name, e->name)) {
- ret = _gf_true;
- goto out;
- }
- }
+ while (ctrl < num) {
+ ctrl *= 2;
+ bits ++;
+ }
-out:
- return ret;
+ return bits;
}
-
-static void
-afr_remember_entries (gf_dirent_t *entries, fd_t *fd)
+int
+afr_itransform (xlator_t *this, int subvol, uint64_t x, uint64_t *y_p)
{
- struct entry_name *n = NULL;
- gf_dirent_t *entry = NULL;
- int ret = 0;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
-
- ret = fd_ctx_get (fd, THIS, &ctx);
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_INFO,
- "could not get fd ctx for fd=%p", fd);
- return;
+ afr_private_t *conf = NULL;
+ int cnt = 0;
+ int max = 0;
+ uint64_t y = 0;
+ uint64_t hi_mask = 0;
+ uint64_t off_mask = 0;
+ int max_bits = 0;
+
+ if (x == ((uint64_t) -1)) {
+ y = (uint64_t) -1;
+ goto out;
}
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- list_for_each_entry (entry, &entries->list, list) {
- n = GF_CALLOC (1, sizeof (*n), gf_afr_mt_entry_name);
- n->name = gf_strdup (entry->d_name);
- INIT_LIST_HEAD (&n->list);
-
- list_add (&n->list, &fd_ctx->entries);
- }
-}
+ conf = this->private;
+ if (!conf)
+ goto out;
+ max = conf->child_count;
+ cnt = subvol;
-static off_t
-afr_filter_entries (gf_dirent_t *entries, fd_t *fd)
-{
- gf_dirent_t *entry = NULL;
- gf_dirent_t *tmp = NULL;
- int ret = 0;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- off_t offset = 0;
-
- ret = fd_ctx_get (fd, THIS, &ctx);
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_INFO,
- "could not get fd ctx for fd=%p", fd);
- return -1;
- }
+ if (max == 1) {
+ y = x;
+ goto out;
+ }
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ max_bits = afr_bits_for (max);
- list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- offset = entry->d_off;
+ hi_mask = ~(PRESENT_MASK >> (max_bits + 1));
- if (remembered_name (entry->d_name, &fd_ctx->entries)) {
- list_del (&entry->list);
- GF_FREE (entry);
- }
+ if (x & hi_mask) {
+ /* HUGE d_off */
+ off_mask = MASK << max_bits;
+ y = TOP_BIT | ((x >> SHIFT_BITS) & off_mask) | cnt;
+ } else {
+ /* small d_off */
+ y = ((x * max) + cnt);
}
- return offset;
+out:
+ if (y_p)
+ *y_p = y;
+
+ return 0;
}
-static void
-afr_forget_entries (fd_t *fd)
+int
+afr_deitransform (xlator_t *this, uint64_t y, int *subvol_p,
+ uint64_t *x_p)
{
- struct entry_name *entry = NULL;
- struct entry_name *tmp = NULL;
- int ret = 0;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
-
- ret = fd_ctx_get (fd, THIS, &ctx);
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_INFO,
- "could not get fd ctx for fd=%p", fd);
- return;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- list_for_each_entry_safe (entry, tmp, &fd_ctx->entries, list) {
- GF_FREE (entry->name);
- list_del (&entry->list);
- GF_FREE (entry);
- }
-}
+ afr_private_t *conf = NULL;
+ int cnt = 0;
+ int max = 0;
+ uint64_t x = 0;
+ int subvol = 0;
+ int max_bits = 0;
+ uint64_t off_mask = 0;
+ uint64_t host_mask = 0;
+
+ if (!this->private)
+ return -1;
+ conf = this->private;
+ max = conf->child_count;
-int32_t
-afr_readdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries)
-{
- afr_local_t * local = NULL;
- gf_dirent_t * entry = NULL;
- gf_dirent_t * tmp = NULL;
+ if (max == 1) {
+ x = y;
+ cnt = 0;
+ goto out;
+ }
- local = frame->local;
+ if (y & TOP_BIT) {
+ /* HUGE d_off */
+ max_bits = afr_bits_for (max);
+ off_mask = (MASK << max_bits);
+ host_mask = ~(off_mask);
- if (op_ret == -1)
- goto out;
+ x = ((y & ~TOP_BIT) & off_mask) << SHIFT_BITS;
- list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- if ((local->fd->inode == local->fd->inode->table->root)
- && !strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR)) {
- list_del_init (&entry->list);
- GF_FREE (entry);
- }
+ cnt = y & host_mask;
+ } else {
+ /* small d_off */
+ cnt = y % max;
+ x = y / max;
}
out:
- AFR_STACK_UNWIND (readdir, frame, op_ret, op_errno, entries);
+ subvol = cnt;
+
+ if (subvol_p)
+ *subvol_p = subvol;
+
+ if (x_p)
+ *x_p = x;
return 0;
}
-int32_t
-afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)
+static void
+afr_readdir_transform_entries (gf_dirent_t *subvol_entries, int subvol,
+ gf_dirent_t *entries, fd_t *fd)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int32_t next_call_child = -1;
- int ret = 0;
- gf_dirent_t * entry = NULL;
- gf_dirent_t * tmp = NULL;
- int32_t *last_index = NULL;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- off_t offset = 0;
- int32_t call_child = -1;
-
- priv = this->private;
- children = priv->children;
-
- local = frame->local;
-
- read_child = (long) cookie;
- last_index = &local->cont.readdir.last_index;
- fresh_children = local->fresh_children;
-
- /* the value of the last_index changes if afr_next_call_child is
- * called. So to find the call_child of this callback use last_index
- * before the next_call_child call.
- */
- if (*last_index == -1)
- call_child = read_child;
- else
- call_child = fresh_children[*last_index];
-
- if (priv->strict_readdir) {
- ret = fd_ctx_get (local->fd, this, &ctx);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "could not get fd ctx for fd=%p", local->fd);
- op_ret = -1;
- op_errno = -ret;
- goto out;
+ afr_private_t *priv = NULL;
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
+ unsigned char *data_readable = NULL;
+ unsigned char *metadata_readable = NULL;
+ int gen = 0;
+
+ priv = THIS->private;
+
+ data_readable = alloca0 (priv->child_count);
+ metadata_readable = alloca0 (priv->child_count);
+
+ list_for_each_entry_safe (entry, tmp, &subvol_entries->list, list) {
+ if (__is_root_gfid (fd->inode->gfid) &&
+ !strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR)) {
+ continue;
}
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ list_del_init (&entry->list);
+ afr_itransform (THIS, subvol, entry->d_off, &entry->d_off);
+ list_add_tail (&entry->list, &entries->list);
- if (op_ret == -1) {
- next_call_child = afr_next_call_child (fresh_children,
- local->child_up,
- priv->child_count,
- last_index,
- read_child);
- if (next_call_child < 0)
- goto out;
- gf_log (this->name, GF_LOG_TRACE,
- "starting readdir afresh on child %d, offset %"PRId64,
- next_call_child, (uint64_t) 0);
-
- fd_ctx->failed_over = _gf_true;
-
- STACK_WIND_COOKIE (frame, afr_readdirp_cbk,
- (void *) (long) read_child,
- children[next_call_child],
- children[next_call_child]->fops->readdirp,
- local->fd,
- local->cont.readdir.size, 0);
- return 0;
- }
- }
+ if (entry->inode) {
+ gen = 0;
+ afr_inode_read_subvol_get (entry->inode, THIS,
+ data_readable,
+ metadata_readable, &gen);
- if (op_ret != -1) {
- list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- if ((local->fd->inode == local->fd->inode->table->root)
- && !strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR)) {
- list_del_init (&entry->list);
- GF_FREE (entry);
- }
- }
- }
+ if (gen != priv->event_generation ||
+ !data_readable[subvol] ||
+ !metadata_readable[subvol]) {
- if (priv->strict_readdir) {
- if (fd_ctx->failed_over) {
- if (list_empty (&entries->list)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no entries found");
- goto out;
- }
-
- offset = afr_filter_entries (entries, local->fd);
-
- afr_remember_entries (entries, local->fd);
-
- if (list_empty (&entries->list)) {
- /* All the entries we got were duplicate. We
- shouldn't send an empty list now, because
- that'll make the application stop reading. So
- try to get more entries */
-
- gf_log (this->name, GF_LOG_TRACE,
- "trying to fetch non-duplicate entries "
- "from offset %"PRId64", child %s",
- offset, children[call_child]->name);
-
- STACK_WIND_COOKIE (frame, afr_readdirp_cbk,
- (void *) (long) read_child,
- children[call_child],
- children[call_child]->fops->readdirp,
- local->fd, local->cont.readdir.size, offset);
- return 0;
- }
- } else {
- afr_remember_entries (entries, local->fd);
- }
+ inode_unref (entry->inode);
+ entry->inode = NULL;
+ }
+ }
}
-
-out:
- AFR_STACK_UNWIND (readdirp, frame, op_ret, op_errno, entries);
-
- return 0;
}
+
int32_t
-afr_do_readdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset, int whichop)
+afr_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *subvol_entries,
+ dict_t *xdata)
{
- afr_private_t * priv = NULL;
- xlator_t ** children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- int ret = -1;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- uint64_t read_child = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
- children = priv->children;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
- frame->local = local;
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ afr_local_t *local = NULL;
+ gf_dirent_t entries;
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children) {
- op_errno = ENOMEM;
- goto out;
- }
+ INIT_LIST_HEAD (&entries.list);
- read_child = afr_inode_get_read_ctx (this, fd->inode,
- local->fresh_children);
- op_ret = afr_get_call_child (this, local->child_up, read_child,
- local->fresh_children,
- &call_child,
- &local->cont.readdir.last_index);
- if (op_ret < 0) {
- op_errno = -op_ret;
- op_ret = -1;
- goto out;
- }
+ local = frame->local;
- local->fd = fd_ref (fd);
- local->cont.readdir.size = size;
+ if (op_ret < 0 && !local->cont.readdir.offset) {
+ /* failover only if this was first readdir, detected
+ by offset == 0 */
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
- if (priv->strict_readdir) {
- ret = fd_ctx_get (fd, this, &ctx);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "could not get fd ctx for fd=%p", fd);
- op_errno = -ret;
- goto out;
- }
+ afr_read_txn_continue (frame, this, (long) cookie);
+ return 0;
+ }
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ if (op_ret >= 0)
+ afr_readdir_transform_entries (subvol_entries, (long) cookie,
+ &entries, local->fd);
- if (fd_ctx->last_tried != call_child) {
- gf_log (this->name, GF_LOG_TRACE,
- "first up child has changed from %d to %d, "
- "restarting readdir from offset 0",
- fd_ctx->last_tried, call_child);
+ AFR_STACK_UNWIND (readdir, frame, op_ret, op_errno, &entries, xdata);
- fd_ctx->failed_over = _gf_true;
- offset = 0;
- }
+ return 0;
+}
- fd_ctx->last_tried = call_child;
- }
- if (whichop == GF_FOP_READDIR)
+int
+afr_readdir_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (subvol == -1) {
+ AFR_STACK_UNWIND (readdir, frame, local->op_ret,
+ local->op_errno, 0, 0);
+ return 0;
+ }
+
+ if (local->op == GF_FOP_READDIR)
STACK_WIND_COOKIE (frame, afr_readdir_cbk,
- (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->readdir, fd,
- size, offset);
+ (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->readdir,
+ local->fd, local->cont.readdir.size,
+ local->cont.readdir.offset,
+ local->xdata_req);
else
- STACK_WIND_COOKIE (frame, afr_readdirp_cbk,
- (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->readdirp, fd,
- size, offset);
+ STACK_WIND_COOKIE (frame, afr_readdir_cbk,
+ (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->readdirp,
+ local->fd, local->cont.readdir.size,
+ local->cont.readdir.offset,
+ local->xdata_req);
+ return 0;
+}
- op_ret = 0;
+
+int
+afr_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, int whichop, dict_t *dict)
+{
+ afr_local_t *local = NULL;
+ int32_t op_errno = 0;
+ int subvol = -1;
+
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->op = whichop;
+ local->fd = fd_ref (fd);
+ local->cont.readdir.size = size;
+ local->cont.readdir.offset = offset;
+ local->xdata_req = (dict)? dict_ref (dict) : NULL;
+
+ if (offset == 0) {
+ /* First readdir has option of failing over and selecting
+ an appropriate read subvolume */
+ afr_read_txn (frame, this, fd->inode, afr_readdir_wind,
+ AFR_DATA_TRANSACTION);
+ } else {
+ /* But continued readdirs MUST stick to the same subvolume
+ without an option to failover */
+ afr_deitransform (this, offset, &subvol,
+ (uint64_t *)&local->cont.readdir.offset);
+ afr_readdir_wind (frame, this, subvol);
+ }
+
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (readdir, frame, op_ret, op_errno, NULL);
- }
+ AFR_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL);
return 0;
}
int32_t
afr_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
+ off_t offset, dict_t *xdata)
{
- afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIR);
+ afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIR, xdata);
+
return 0;
}
int32_t
afr_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
+ off_t offset, dict_t *dict)
{
- afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIRP);
+ afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIRP, dict);
+
return 0;
}
@@ -748,7 +422,6 @@ afr_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
int32_t
afr_releasedir (xlator_t *this, fd_t *fd)
{
- afr_forget_entries (fd);
afr_cleanup_fd_ctx (this, fd);
return 0;
diff --git a/xlators/cluster/afr/src/afr-dir-read.h b/xlators/cluster/afr/src/afr-dir-read.h
index 6a6bc6354..09456d159 100644
--- a/xlators/cluster/afr/src/afr-dir-read.h
+++ b/xlators/cluster/afr/src/afr-dir-read.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __DIR_READ_H__
@@ -23,23 +14,23 @@
int32_t
afr_opendir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, fd_t *fd);
+ loc_t *loc, fd_t *fd, dict_t *xdata);
int32_t
afr_releasedir (xlator_t *this, fd_t *fd);
int32_t
afr_readdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset);
+ fd_t *fd, size_t size, off_t offset, dict_t *xdata);
int32_t
afr_readdirp (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset);
+ fd_t *fd, size_t size, off_t offset, dict_t *dict);
int32_t
afr_checksum (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags);
+ loc_t *loc, int32_t flags, dict_t *xdata);
#endif /* __DIR_READ_H__ */
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
index 2929ad741..465dde54f 100644
--- a/xlators/cluster/afr/src/afr-dir-write.c
+++ b/xlators/cluster/afr/src/afr-dir-write.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -43,72 +34,218 @@
#include "common-utils.h"
#include "compat-errno.h"
#include "compat.h"
+#include "byte-order.h"
#include "afr.h"
#include "afr-transaction.h"
-
void
-afr_build_parent_loc (loc_t *parent, loc_t *child)
+afr_mark_entry_pending_changelog (call_frame_t *frame, xlator_t *this);
+
+int
+afr_build_parent_loc (loc_t *parent, loc_t *child, int32_t *op_errno)
{
- char *tmp = NULL;
+ int ret = -1;
+ char *child_path = NULL;
if (!child->parent) {
- //this should never be called with root as the child
- GF_ASSERT (0);
- loc_copy (parent, child);
- return;
+ if (op_errno)
+ *op_errno = EINVAL;
+ goto out;
}
- tmp = gf_strdup (child->path);
- parent->path = gf_strdup (dirname (tmp));
- GF_FREE (tmp);
+ child_path = gf_strdup (child->path);
+ if (!child_path) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
- parent->name = strrchr (parent->path, '/');
- if (parent->name)
- parent->name++;
+ parent->path = gf_strdup (dirname (child_path));
+ if (!parent->path) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ parent->inode = inode_ref (child->parent);
+ uuid_copy (parent->gfid, child->pargfid);
- parent->inode = inode_ref (child->parent);
- parent->parent = inode_parent (parent->inode, 0, NULL);
- parent->ino = parent->inode->ino;
+ ret = 0;
+out:
+ GF_FREE (child_path);
- if (!uuid_is_null (child->pargfid))
- uuid_copy (parent->gfid, child->pargfid);
+ return ret;
}
-/* {{{ create */
-int
-afr_create_unwind (call_frame_t *frame, xlator_t *this)
+static void
+__afr_dir_write_finalize (call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int inode_read_subvol = -1;
+ int parent_read_subvol = -1;
+ int parent2_read_subvol = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (local->inode) {
+ afr_replies_interpret (frame, this, local->inode);
+ inode_read_subvol = afr_data_subvol_get (local->inode, this,
+ NULL, NULL);
+ }
+ if (local->parent)
+ parent_read_subvol = afr_data_subvol_get (local->parent, this,
+ NULL, NULL);
+ if (local->parent2)
+ parent2_read_subvol = afr_data_subvol_get (local->parent2, this,
+ NULL, NULL);
+
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno (local, priv);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret < 0) {
+ if (local->inode)
+ afr_inode_read_subvol_reset (local->inode,
+ this);
+ if (local->parent)
+ afr_inode_read_subvol_reset (local->parent,
+ this);
+ if (local->parent2)
+ afr_inode_read_subvol_reset (local->parent2,
+ this);
+ continue;
+ }
+
+ if (local->op_ret == -1) {
+ local->op_ret = local->replies[i].op_ret;
+ local->op_errno = local->replies[i].op_errno;
+
+ local->cont.dir_fop.buf =
+ local->replies[i].poststat;
+ local->cont.dir_fop.preparent =
+ local->replies[i].preparent;
+ local->cont.dir_fop.postparent =
+ local->replies[i].postparent;
+ local->cont.dir_fop.prenewparent =
+ local->replies[i].preparent2;
+ local->cont.dir_fop.postnewparent =
+ local->replies[i].postparent2;
+ if (local->replies[i].xdata)
+ local->xdata_rsp =
+ dict_ref (local->replies[i].xdata);
+ continue;
+ }
+
+ if (i == inode_read_subvol) {
+ local->cont.dir_fop.buf =
+ local->replies[i].poststat;
+ if (local->replies[i].xdata) {
+ if (local->xdata_rsp)
+ dict_unref (local->xdata_rsp);
+ local->xdata_rsp =
+ dict_ref (local->replies[i].xdata);
+ }
+ }
+
+ if (i == parent_read_subvol) {
+ local->cont.dir_fop.preparent =
+ local->replies[i].preparent;
+ local->cont.dir_fop.postparent =
+ local->replies[i].postparent;
+ }
+
+ if (i == parent2_read_subvol) {
+ local->cont.dir_fop.prenewparent =
+ local->replies[i].preparent2;
+ local->cont.dir_fop.postnewparent =
+ local->replies[i].postparent2;
+ }
+ }
+}
+
+
+static void
+__afr_dir_write_fill (call_frame_t *frame, xlator_t *this, int child_index,
+ int op_ret, int op_errno, struct iatt *poststat,
+ struct iatt *preparent, struct iatt *postparent,
+ struct iatt *preparent2, struct iatt *postparent2,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
local = frame->local;
+ fd_ctx = local->fd_ctx;
+
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+
+ if (op_ret >= 0) {
+ if (poststat)
+ local->replies[child_index].poststat = *poststat;
+ if (preparent)
+ local->replies[child_index].preparent = *preparent;
+ if (postparent)
+ local->replies[child_index].postparent = *postparent;
+ if (preparent2)
+ local->replies[child_index].preparent2 = *preparent2;
+ if (postparent2)
+ local->replies[child_index].postparent2 = *postparent2;
+ if (xdata)
+ local->replies[child_index].xdata = dict_ref (xdata);
+
+ if (fd_ctx)
+ fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
+ } else {
+ if (op_errno != ENOTEMPTY)
+ afr_transaction_fop_failed (frame, this, child_index);
+ if (fd_ctx)
+ fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED;
+ }
+
+ return;
+}
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- if (local->cont.create.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.create.read_child_buf;
- } else {
- unwind_buf = &local->cont.create.buf;
- }
-
- AFR_STACK_UNWIND (create, main_frame,
- local->op_ret, local->op_errno,
- local->cont.create.fd,
- local->cont.create.inode,
- unwind_buf, &local->cont.create.preparent,
- &local->cont.create.postparent);
+
+static int
+__afr_dir_write_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ struct iatt *preparent2, struct iatt *postparent2,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int child_index = (long) cookie;
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ __afr_dir_write_fill (frame, this, child_index, op_ret,
+ op_errno, buf, preparent, postparent,
+ preparent2, postparent2, xdata);
+ }
+ UNLOCK (&frame->lock);
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ __afr_dir_write_finalize (frame, this);
+
+ if (afr_txn_nothing_failed (frame, this))
+ local->transaction.unwind (frame, this);
+
+ afr_mark_entry_pending_changelog (frame, this);
+
+ local->transaction.resume (frame, this);
}
return 0;
@@ -116,224 +253,269 @@ afr_create_unwind (call_frame_t *frame, xlator_t *this)
int
-afr_create_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- fd_t *fd, inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+afr_mark_new_entry_changelog_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ dict_t *xattr, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- int ret = 0;
- int call_count = -1;
- int child_index = -1;
- int32_t *fresh_children = NULL;
+ int call_count = 0;
- local = frame->local;
- priv = this->private;
+ call_count = afr_frame_return (frame);
- child_index = (long) cookie;
+ if (call_count == 0)
+ AFR_STACK_DESTROY (frame);
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
+ return 0;
+}
- if (op_ret != -1) {
- local->op_ret = op_ret;
- ret = afr_fd_ctx_set (this, fd);
+void
+afr_mark_new_entry_changelog (call_frame_t *frame, xlator_t *this)
+{
+ call_frame_t *new_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_local_t *new_local = NULL;
+ afr_private_t *priv = NULL;
+ dict_t *xattr = NULL;
+ int32_t **changelog = NULL;
+ int i = 0;
+ int idx = 0;
+ int op_errno = ENOMEM;
+ unsigned char *pending = NULL;
+ int call_count = 0;
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "could not set ctx on fd=%p", fd);
+ local = frame->local;
+ priv = this->private;
- local->op_ret = -1;
- local->op_errno = -ret;
- goto unlock;
- }
+ new_frame = copy_frame (frame);
+ if (!new_frame)
+ goto out;
- ret = fd_ctx_get (fd, this, &ctx);
+ new_local = AFR_FRAME_INIT (new_frame, op_errno);
+ if (!new_local)
+ goto out;
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "could not get fd ctx for fd=%p", fd);
- local->op_ret = -1;
- local->op_errno = -ret;
- goto unlock;
- }
+ changelog = afr_matrix_create (priv->child_count, AFR_NUM_CHANGE_LOGS);
+ if (!changelog)
+ goto out;
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ xattr = dict_new ();
+ if (!xattr)
+ goto out;
- fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
- fd_ctx->flags = local->cont.create.flags;
+ idx = afr_index_for_transaction_type (AFR_DATA_TRANSACTION);
- if (local->success_count == 0)
- local->cont.create.buf = *buf;
+ pending = alloca0 (priv->child_count);
- if (child_index == local->read_child_index) {
- local->cont.create.read_child_buf = *buf;
- local->cont.create.preparent = *preparent;
- local->cont.create.postparent = *postparent;
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i] &&
+ !local->transaction.failed_subvols[i]) {
+ call_count ++;
+ continue;
+ }
- local->cont.create.inode = inode;
+ changelog[i][idx] = hton32(1);
+ pending[i] = 1;
+ }
- fresh_children = local->fresh_children;
- fresh_children[local->success_count] = child_index;
- local->success_count++;
- }
+ new_local->pending = changelog;
+ uuid_copy (new_local->loc.gfid, local->cont.dir_fop.buf.ia_gfid);
+ new_local->loc.inode = inode_ref (local->inode);
- local->op_errno = op_errno;
- }
-unlock:
- UNLOCK (&frame->lock);
+ afr_set_pending_dict (priv, xattr, changelog);
- call_count = afr_frame_return (frame);
+ new_local->call_count = call_count;
- if (call_count == 0) {
- afr_set_read_ctx_from_policy (this, inode,
- local->fresh_children,
- local->read_child_index,
- priv->read_child);
- local->transaction.unwind (frame, this);
+ for (i = 0; i < priv->child_count; i++) {
+ if (pending[i])
+ continue;
- local->transaction.resume (frame, this);
+ STACK_WIND_COOKIE (new_frame, afr_mark_new_entry_changelog_cbk,
+ (void *) (long) i, priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &new_local->loc, GF_XATTROP_ADD_ARRAY,
+ xattr, NULL);
+ if (!--call_count)
+ break;
}
- return 0;
+ new_frame = NULL;
+out:
+ if (new_frame)
+ AFR_STACK_DESTROY (new_frame);
+ if (xattr)
+ dict_unref (xattr);
+ return;
}
-int
-afr_create_wind (call_frame_t *frame, xlator_t *this)
+void
+afr_mark_entry_pending_changelog (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
+ int pre_op_count = 0;
+ int failed_count = 0;
local = frame->local;
- priv = this->private;
+ priv = this->private;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
+ if (local->op_ret < 0)
+ return;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+ if (local->op != GF_FOP_CREATE && local->op != GF_FOP_MKNOD)
+ return;
- local->call_count = call_count;
+ pre_op_count = AFR_COUNT (local->transaction.pre_op, priv->child_count);
+ failed_count = AFR_COUNT (local->transaction.failed_subvols,
+ priv->child_count);
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_create_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->create,
- &local->loc,
- local->cont.create.flags,
- local->cont.create.mode,
- local->cont.create.fd,
- local->cont.create.params);
- if (!--call_count)
- break;
- }
- }
+ if (pre_op_count == priv->child_count && !failed_count)
+ return;
- return 0;
+ afr_mark_new_entry_changelog (frame, this);
+
+ return;
}
+/* {{{ create */
+
int
-afr_create_done (call_frame_t *frame, xlator_t *this)
+afr_create_unwind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
local = frame->local;
- local->transaction.unwind (frame, this);
+ main_frame = afr_transaction_detach_fop_frame (frame);
- AFR_STACK_DESTROY (frame);
+ if (!main_frame)
+ return 0;
+ AFR_STACK_UNWIND (create, main_frame, local->op_ret, local->op_errno,
+ local->cont.create.fd, local->inode,
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
return 0;
}
int
-afr_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode,
- fd_t *fd, dict_t *params)
+afr_create_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ fd_t *fd, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
+ return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
+}
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+int
+afr_create_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
priv = this->private;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- goto out;
- }
+ STACK_WIND_COOKIE (frame, afr_create_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->create,
+ &local->loc, local->cont.create.flags,
+ local->cont.create.mode, local->umask,
+ local->cont.create.fd, local->xdata_req);
+ return 0;
+}
- ALLOC_OR_GOTO (local, afr_local_t, out);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+int
+afr_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+
+ priv = this->private;
+
+ QUORUM_CHECK(create,out);
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
goto out;
- }
- transaction_frame->local = local;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
loc_copy (&local->loc, loc);
- LOCK (&priv->read_child_lock);
- {
- local->read_child_index = (++priv->read_child_rr)
- % (priv->child_count);
- }
- UNLOCK (&priv->read_child_lock);
+ local->fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!local->fd_ctx)
+ goto out;
+
+ local->inode = inode_ref (loc->inode);
+ local->parent = inode_ref (loc->parent);
+ local->op = GF_FOP_CREATE;
local->cont.create.flags = flags;
local->cont.create.mode = mode;
local->cont.create.fd = fd_ref (fd);
- if (params)
- local->cont.create.params = dict_ref (params);
+ local->umask = umask;
- if (loc->parent)
- local->cont.create.parent_ino = loc->parent->ino;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- local->transaction.fop = afr_create_wind;
- local->transaction.done = afr_create_done;
+ if (!local->xdata_req)
+ goto out;
+
+ local->transaction.wind = afr_create_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
local->transaction.unwind = afr_create_unwind;
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (create, frame, op_ret, op_errno,
- NULL, NULL, NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL, NULL);
return 0;
}
@@ -346,33 +528,17 @@ afr_mknod_unwind (call_frame_t *frame, xlator_t *this)
{
call_frame_t *main_frame = NULL;
afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- if (local->cont.mknod.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.mknod.read_child_buf;
- } else {
- unwind_buf = &local->cont.mknod.buf;
- }
-
- AFR_STACK_UNWIND (mknod, main_frame,
- local->op_ret, local->op_errno,
- local->cont.mknod.inode,
- unwind_buf, &local->cont.mknod.preparent,
- &local->cont.mknod.postparent);
- }
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
+ AFR_STACK_UNWIND (mknod, main_frame, local->op_ret, local->op_errno,
+ local->inode, &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
return 0;
}
@@ -381,184 +547,107 @@ int
afr_mknod_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int child_index = -1;
- int32_t *fresh_children = NULL;
-
- local = frame->local;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0)
- local->cont.mknod.buf = *buf;
-
- if (child_index == local->read_child_index) {
- local->cont.mknod.read_child_buf = *buf;
- local->cont.mknod.preparent = *preparent;
- local->cont.mknod.postparent = *postparent;
- }
-
- local->cont.mknod.inode = inode;
-
- fresh_children = local->fresh_children;
- fresh_children[local->success_count] = child_index;
- local->success_count++;
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- afr_set_read_ctx_from_policy (this, inode,
- local->fresh_children,
- local->read_child_index,
- priv->read_child);
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
}
-int32_t
-afr_mknod_wind (call_frame_t *frame, xlator_t *this)
+int
+afr_mknod_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
local = frame->local;
priv = this->private;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_mknod_wind_cbk, (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->mknod,
- &local->loc, local->cont.mknod.mode,
- local->cont.mknod.dev,
- local->cont.mknod.params);
- if (!--call_count)
- break;
- }
- }
-
+ STACK_WIND_COOKIE (frame, afr_mknod_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->mknod,
+ &local->loc, local->cont.mknod.mode,
+ local->cont.mknod.dev, local->umask,
+ local->xdata_req);
return 0;
}
-
int
-afr_mknod_done (call_frame_t *frame, xlator_t *this)
+afr_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t dev, mode_t umask, dict_t *xdata)
{
- afr_local_t * local = NULL;
-
- local = frame->local;
-
- local->transaction.unwind (frame, this);
- AFR_STACK_DESTROY (frame);
-
- return 0;
-}
-
-
-int
-afr_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t dev, dict_t *params)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
priv = this->private;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- goto out;
- }
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ QUORUM_CHECK(mknod,out);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
goto out;
- }
- transaction_frame->local = local;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
loc_copy (&local->loc, loc);
+ local->inode = inode_ref (loc->inode);
+ local->parent = inode_ref (loc->parent);
- LOCK (&priv->read_child_lock);
- {
- local->read_child_index = (++priv->read_child_rr)
- % (priv->child_count);
- }
- UNLOCK (&priv->read_child_lock);
-
+ local->op = GF_FOP_MKNOD;
local->cont.mknod.mode = mode;
local->cont.mknod.dev = dev;
- if (params)
- local->cont.mknod.params = dict_ref (params);
+ local->umask = umask;
- if (loc->parent)
- local->cont.mknod.parent_ino = loc->parent->ino;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- local->transaction.fop = afr_mknod_wind;
- local->transaction.done = afr_mknod_done;
+ if (!local->xdata_req)
+ goto out;
+
+ local->transaction.wind = afr_mknod_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
local->transaction.unwind = afr_mknod_unwind;
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (mknod, frame, op_ret, op_errno,
- NULL, NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL);
return 0;
}
@@ -572,33 +661,17 @@ afr_mkdir_unwind (call_frame_t *frame, xlator_t *this)
{
call_frame_t *main_frame = NULL;
afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- if (local->cont.mkdir.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.mkdir.read_child_buf;
- } else {
- unwind_buf = &local->cont.mkdir.buf;
- }
-
- AFR_STACK_UNWIND (mkdir, main_frame,
- local->op_ret, local->op_errno,
- local->cont.mkdir.inode,
- unwind_buf, &local->cont.mkdir.preparent,
- &local->cont.mkdir.postparent);
- }
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
+ AFR_STACK_UNWIND (mkdir, main_frame, local->op_ret, local->op_errno,
+ local->inode, &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
return 0;
}
@@ -607,185 +680,106 @@ int
afr_mkdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int child_index = -1;
- int32_t *fresh_children = NULL;
-
- local = frame->local;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0)
- local->cont.mkdir.buf = *buf;
-
- if (child_index == local->read_child_index) {
- local->cont.mkdir.read_child_buf = *buf;
- local->cont.mkdir.preparent = *preparent;
- local->cont.mkdir.postparent = *postparent;
- }
-
- local->cont.mkdir.inode = inode;
-
- fresh_children = local->fresh_children;
- fresh_children[local->success_count] = child_index;
- local->success_count++;
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- afr_set_read_ctx_from_policy (this, inode,
- local->fresh_children,
- local->read_child_index,
- priv->read_child);
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
}
int
-afr_mkdir_wind (call_frame_t *frame, xlator_t *this)
+afr_mkdir_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
local = frame->local;
priv = this->private;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_mkdir_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->mkdir,
- &local->loc, local->cont.mkdir.mode,
- local->cont.mkdir.params);
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
-
-
-int
-afr_mkdir_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = NULL;
-
- local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
+ STACK_WIND_COOKIE (frame, afr_mkdir_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->mkdir, &local->loc,
+ local->cont.mkdir.mode, local->umask,
+ local->xdata_req);
return 0;
}
int
-afr_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dict_t *params)
+afr_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
priv = this->private;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- goto out;
- }
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ QUORUM_CHECK(mkdir,out);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
goto out;
- }
- transaction_frame->local = local;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
loc_copy (&local->loc, loc);
-
- LOCK (&priv->read_child_lock);
- {
- local->read_child_index = (++priv->read_child_rr)
- % (priv->child_count);
- }
- UNLOCK (&priv->read_child_lock);
+ local->inode = inode_ref (loc->inode);
+ local->parent = inode_ref (loc->parent);
local->cont.mkdir.mode = mode;
- if (params)
- local->cont.mkdir.params = dict_ref (params);
+ local->umask = umask;
- if (loc->parent)
- local->cont.mkdir.parent_ino = loc->parent->ino;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- local->transaction.fop = afr_mkdir_wind;
- local->transaction.done = afr_mkdir_done;
+ if (!local->xdata_req)
+ goto out;
+
+ local->op = GF_FOP_MKDIR;
+ local->transaction.wind = afr_mkdir_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
local->transaction.unwind = afr_mkdir_unwind;
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
-
- AFR_STACK_UNWIND (mkdir, frame, op_ret, op_errno,
- NULL, NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL);
return 0;
}
@@ -799,33 +793,17 @@ afr_link_unwind (call_frame_t *frame, xlator_t *this)
{
call_frame_t *main_frame = NULL;
afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- if (local->cont.link.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.link.read_child_buf;
- } else {
- unwind_buf = &local->cont.link.buf;
- }
-
- AFR_STACK_UNWIND (link, main_frame,
- local->op_ret, local->op_errno,
- local->cont.link.inode,
- unwind_buf, &local->cont.link.preparent,
- &local->cont.link.postparent);
- }
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
+ AFR_STACK_UNWIND (link, main_frame, local->op_ret, local->op_errno,
+ local->inode, &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
return 0;
}
@@ -834,183 +812,105 @@ int
afr_link_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int child_index = -1;
- int32_t *fresh_children = NULL;
-
- local = frame->local;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0) {
- local->cont.link.buf = *buf;
- }
-
- if (child_index == local->read_child_index) {
- local->cont.link.read_child_buf = *buf;
- local->cont.link.preparent = *preparent;
- local->cont.link.postparent = *postparent;
- }
-
- local->cont.link.inode = inode;
-
- fresh_children = local->fresh_children;
- fresh_children[local->success_count] = child_index;
- local->success_count++;
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- afr_set_read_ctx_from_policy (this, inode,
- local->fresh_children,
- local->read_child_index,
- priv->read_child);
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
}
int
-afr_link_wind (call_frame_t *frame, xlator_t *this)
+afr_link_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
local = frame->local;
priv = this->private;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_link_wind_cbk, (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->link,
- &local->loc,
- &local->newloc);
-
- if (!--call_count)
- break;
- }
- }
-
+ STACK_WIND_COOKIE (frame, afr_link_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->link,
+ &local->loc, &local->newloc, local->xdata_req);
return 0;
}
int
-afr_link_done (call_frame_t *frame, xlator_t *this)
+afr_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
- afr_local_t * local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
- return 0;
-}
-
-
-int
-afr_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
priv = this->private;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- goto out;
- }
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ QUORUM_CHECK(link,out);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
goto out;
- }
- transaction_frame->local = local;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
loc_copy (&local->loc, oldloc);
loc_copy (&local->newloc, newloc);
- LOCK (&priv->read_child_lock);
- {
- local->read_child_index = (++priv->read_child_rr)
- % (priv->child_count);
- }
- UNLOCK (&priv->read_child_lock);
+ local->inode = inode_ref (oldloc->inode);
+ local->parent = inode_ref (newloc->parent);
+
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- local->cont.link.ino = oldloc->inode->ino;
+ if (!local->xdata_req)
+ goto out;
- if (oldloc->parent)
- local->cont.link.parent_ino = newloc->parent->ino;
+ local->op = GF_FOP_LINK;
- local->transaction.fop = afr_link_wind;
- local->transaction.done = afr_link_done;
+ local->transaction.wind = afr_link_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
local->transaction.unwind = afr_link_unwind;
- afr_build_parent_loc (&local->transaction.parent_loc, oldloc);
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, newloc,
+ &op_errno);
+ if (ret)
+ goto out;
local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (oldloc->path);
- local->transaction.new_basename = AFR_BASENAME (newloc->path);
+ local->transaction.basename = AFR_BASENAME (newloc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (link, frame, op_ret, op_errno,
- NULL, NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL);
return 0;
}
@@ -1024,33 +924,17 @@ afr_symlink_unwind (call_frame_t *frame, xlator_t *this)
{
call_frame_t *main_frame = NULL;
afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- if (local->cont.symlink.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.symlink.read_child_buf;
- } else {
- unwind_buf = &local->cont.symlink.buf;
- }
-
- AFR_STACK_UNWIND (symlink, main_frame,
- local->op_ret, local->op_errno,
- local->cont.symlink.inode,
- unwind_buf, &local->cont.symlink.preparent,
- &local->cont.symlink.postparent);
- }
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
+ AFR_STACK_UNWIND (symlink, main_frame, local->op_ret, local->op_errno,
+ local->inode, &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
return 0;
}
@@ -1059,185 +943,106 @@ int
afr_symlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int child_index = -1;
- int32_t *fresh_children = NULL;
-
- local = frame->local;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0)
- local->cont.symlink.buf = *buf;
-
- if (child_index == local->read_child_index) {
- local->cont.symlink.read_child_buf = *buf;
- local->cont.symlink.preparent = *preparent;
- local->cont.symlink.postparent = *postparent;
- }
-
- local->cont.symlink.inode = inode;
-
- fresh_children = local->fresh_children;
- fresh_children[local->success_count] = child_index;
- local->success_count++;
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- afr_set_read_ctx_from_policy (this, inode,
- local->fresh_children,
- local->read_child_index,
- priv->read_child);
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
}
int
-afr_symlink_wind (call_frame_t *frame, xlator_t *this)
+afr_symlink_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
local = frame->local;
priv = this->private;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_symlink_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->symlink,
- local->cont.symlink.linkpath,
- &local->loc,
- local->cont.symlink.params);
-
- if (!--call_count)
- break;
-
- }
- }
-
+ STACK_WIND_COOKIE (frame, afr_symlink_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->symlink,
+ local->cont.symlink.linkpath, &local->loc,
+ local->umask, local->xdata_req);
return 0;
}
int
-afr_symlink_done (call_frame_t *frame, xlator_t *this)
+afr_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
- afr_local_t * local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
- return 0;
-}
-
-
-int
-afr_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *loc, dict_t *params)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
priv = this->private;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- goto out;
- }
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ QUORUM_CHECK(symlink,out);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
goto out;
- }
- transaction_frame->local = local;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
loc_copy (&local->loc, loc);
-
- LOCK (&priv->read_child_lock);
- {
- local->read_child_index = (++priv->read_child_rr)
- % (priv->child_count);
- }
- UNLOCK (&priv->read_child_lock);
+ local->inode = inode_ref (loc->inode);
+ local->parent = inode_ref (loc->parent);
local->cont.symlink.linkpath = gf_strdup (linkpath);
- if (params)
- local->cont.symlink.params = dict_ref (params);
+ local->umask = umask;
+
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- if (loc->parent)
- local->cont.symlink.parent_ino = loc->parent->ino;
+ if (!local->xdata_req)
+ goto out;
- local->transaction.fop = afr_symlink_wind;
- local->transaction.done = afr_symlink_done;
+ local->op = GF_FOP_SYMLINK;
+ local->transaction.wind = afr_symlink_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
local->transaction.unwind = afr_symlink_unwind;
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (symlink, frame, op_ret, op_errno,
- NULL, NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (symlink, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL);
return 0;
}
@@ -1250,35 +1055,19 @@ afr_rename_unwind (call_frame_t *frame, xlator_t *this)
{
call_frame_t *main_frame = NULL;
afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- if (local->cont.rename.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.rename.read_child_buf;
- } else {
- unwind_buf = &local->cont.rename.buf;
- }
-
- AFR_STACK_UNWIND (rename, main_frame,
- local->op_ret, local->op_errno,
- unwind_buf,
- &local->cont.rename.preoldparent,
- &local->cont.rename.postoldparent,
- &local->cont.rename.prenewparent,
- &local->cont.rename.postnewparent);
- }
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
+ AFR_STACK_UNWIND (rename, main_frame, local->op_ret, local->op_errno,
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ &local->cont.dir_fop.prenewparent,
+ &local->cont.dir_fop.postnewparent, local->xdata_rsp);
return 0;
}
@@ -1287,175 +1076,135 @@ int
afr_rename_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
- afr_local_t * local = NULL;
- int call_count = -1;
- int child_index = -1;
-
- local = frame->local;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno) && op_errno != ENOTEMPTY)
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
-
- if (buf) {
- local->cont.rename.buf = *buf;
- }
-
- local->success_count++;
- }
-
- if (child_index == local->read_child_index) {
- local->cont.rename.read_child_buf = *buf;
-
- local->cont.rename.preoldparent = *preoldparent;
- local->cont.rename.postoldparent = *postoldparent;
- local->cont.rename.prenewparent = *prenewparent;
- local->cont.rename.postnewparent = *postnewparent;
- }
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, buf,
+ preoldparent, postoldparent, prenewparent,
+ postnewparent, xdata);
}
-int32_t
-afr_rename_wind (call_frame_t *frame, xlator_t *this)
+int
+afr_rename_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_rename_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->rename,
- &local->loc,
- &local->newloc);
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
-
-
-int
-afr_rename_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = frame->local;
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
+ local = frame->local;
+ priv = this->private;
+ STACK_WIND_COOKIE (frame, afr_rename_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->rename,
+ &local->loc, &local->newloc, local->xdata_req);
return 0;
}
int
-afr_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
+afr_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+ int nlockee = 0;
priv = this->private;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- goto out;
- }
+ QUORUM_CHECK(rename,out);
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
+ op_errno = ENOMEM;
- transaction_frame->local = local;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
loc_copy (&local->loc, oldloc);
loc_copy (&local->newloc, newloc);
- local->read_child_index = afr_inode_get_read_ctx (this, oldloc->inode, NULL);
+ local->inode = inode_ref (oldloc->inode);
+ local->parent = inode_ref (oldloc->parent);
+ local->parent2 = inode_ref (newloc->parent);
- local->cont.rename.ino = oldloc->inode->ino;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- if (oldloc->parent)
- local->cont.rename.oldparent_ino = oldloc->parent->ino;
- if (newloc->parent)
- local->cont.rename.newparent_ino = newloc->parent->ino;
+ if (!local->xdata_req)
+ goto out;
- local->transaction.fop = afr_rename_wind;
- local->transaction.done = afr_rename_done;
+ local->op = GF_FOP_RENAME;
+ local->transaction.wind = afr_rename_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
local->transaction.unwind = afr_rename_unwind;
- afr_build_parent_loc (&local->transaction.parent_loc, oldloc);
- afr_build_parent_loc (&local->transaction.new_parent_loc, newloc);
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, oldloc,
+ &op_errno);
+ if (ret)
+ goto out;
+ ret = afr_build_parent_loc (&local->transaction.new_parent_loc, newloc,
+ &op_errno);
+ if (ret)
+ goto out;
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME (oldloc->path);
local->transaction.new_basename = AFR_BASENAME (newloc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = nlockee = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
+ &local->transaction.new_parent_loc,
+ local->transaction.new_basename,
+ priv->child_count);
+ if (ret)
+ goto out;
- afr_transaction (transaction_frame, this, AFR_ENTRY_RENAME_TRANSACTION);
+ nlockee++;
+ ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
- op_ret = 0;
-out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
+ nlockee++;
+ if (local->newloc.inode && IA_ISDIR (local->newloc.inode->ia_type)) {
+ ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
+ &local->newloc,
+ NULL,
+ priv->child_count);
+ if (ret)
+ goto out;
- AFR_STACK_UNWIND (rename, frame, op_ret, op_errno,
- NULL, NULL, NULL, NULL, NULL);
+ nlockee++;
}
+ qsort (int_lock->lockee, nlockee, sizeof (*int_lock->lockee),
+ afr_entry_lockee_cmp);
+ int_lock->lockee_count = nlockee;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_RENAME_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+
+ AFR_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL, NULL);
return 0;
}
@@ -1471,22 +1220,13 @@ afr_unlink_unwind (call_frame_t *frame, xlator_t *this)
local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (unlink, main_frame,
- local->op_ret, local->op_errno,
- &local->cont.unlink.preparent,
- &local->cont.unlink.postparent);
- }
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
+ AFR_STACK_UNWIND (unlink, main_frame, local->op_ret, local->op_errno,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
return 0;
}
@@ -1494,162 +1234,103 @@ afr_unlink_unwind (call_frame_t *frame, xlator_t *this)
int
afr_unlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t * local = NULL;
- int call_count = -1;
- int child_index = (long) cookie;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (child_index == local->read_child_index) {
- local->read_child_returned = _gf_true;
- }
-
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.unlink.preparent = *preparent;
- local->cont.unlink.postparent = *postparent;
- }
-
- if (child_index == local->read_child_index) {
- local->cont.unlink.preparent = *preparent;
- local->cont.unlink.postparent = *postparent;
- }
-
- local->success_count++;
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, NULL,
+ preparent, postparent, NULL, NULL, xdata);
}
-int32_t
-afr_unlink_wind (call_frame_t *frame, xlator_t *this)
+int
+afr_unlink_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
local = frame->local;
priv = this->private;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_unlink_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->unlink,
- &local->loc);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
-
-
-int32_t
-afr_unlink_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
+ STACK_WIND_COOKIE (frame, afr_unlink_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->unlink,
+ &local->loc, local->xflag, local->xdata_req);
return 0;
}
-int32_t
-afr_unlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc)
+int
+afr_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
priv = this->private;
+ QUORUM_CHECK(unlink,out);
+
transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
+ if (!transaction_frame)
goto out;
- }
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ loc_copy (&local->loc, loc);
+ local->xflag = xflag;
- transaction_frame->local = local;
+ local->inode = inode_ref (loc->inode);
+ local->parent = inode_ref (loc->parent);
- loc_copy (&local->loc, loc);
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- if (loc->parent)
- local->cont.unlink.parent_ino = loc->parent->ino;
+ if (!local->xdata_req)
+ goto out;
- local->transaction.fop = afr_unlink_wind;
- local->transaction.done = afr_unlink_done;
+ local->op = GF_FOP_UNLINK;
+ local->transaction.wind = afr_unlink_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
local->transaction.unwind = afr_unlink_unwind;
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (unlink, frame, op_ret, op_errno,
- NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -1667,22 +1348,13 @@ afr_rmdir_unwind (call_frame_t *frame, xlator_t *this)
local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (rmdir, main_frame,
- local->op_ret, local->op_errno,
- &local->cont.rmdir.preparent,
- &local->cont.rmdir.postparent);
- }
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
+ AFR_STACK_UNWIND (rmdir, main_frame, local->op_ret, local->op_errno,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
return 0;
}
@@ -1690,164 +1362,117 @@ afr_rmdir_unwind (call_frame_t *frame, xlator_t *this)
int
afr_rmdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t * local = NULL;
- int call_count = -1;
- int child_index = (long) cookie;
- int read_child = 0;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
-
- if (afr_fop_failed (op_ret, op_errno) && (op_errno != ENOTEMPTY))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.rmdir.preparent = *preparent;
- local->cont.rmdir.postparent = *postparent;
-
- }
-
- if (child_index == read_child) {
- local->cont.rmdir.preparent = *preparent;
- local->cont.rmdir.postparent = *postparent;
- }
-
- local->success_count++;
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, NULL,
+ preparent, postparent, NULL, NULL, xdata);
}
int
-afr_rmdir_wind (call_frame_t *frame, xlator_t *this)
+afr_rmdir_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
local = frame->local;
priv = this->private;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_rmdir_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->rmdir,
- &local->loc, local->cont.rmdir.flags);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
-
-
-int
-afr_rmdir_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
+ STACK_WIND_COOKIE (frame, afr_rmdir_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->rmdir,
+ &local->loc, local->cont.rmdir.flags, local->xdata_req);
return 0;
}
int
-afr_rmdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int flags)
+afr_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+ int nlockee = 0;
priv = this->private;
+ QUORUM_CHECK(rmdir,out);
+
transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
+ if (!transaction_frame)
goto out;
- }
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
- transaction_frame->local = local;
+ loc_copy (&local->loc, loc);
+ local->inode = inode_ref (loc->inode);
+ local->parent = inode_ref (loc->parent);
local->cont.rmdir.flags = flags;
- loc_copy (&local->loc, loc);
- if (loc->parent)
- local->cont.rmdir.parent_ino = loc->parent->ino;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
+
+ if (!local->xdata_req)
+ goto out;
- local->transaction.fop = afr_rmdir_wind;
- local->transaction.done = afr_rmdir_done;
+ local->op = GF_FOP_RMDIR;
+ local->transaction.wind = afr_rmdir_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
local->transaction.unwind = afr_rmdir_unwind;
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = nlockee = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ nlockee++;
+ ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
+ &local->loc,
+ NULL,
+ priv->child_count);
+ if (ret)
+ goto out;
- op_ret = 0;
-out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (rmdir, frame, op_ret, op_errno,
- NULL, NULL);
+ nlockee++;
+ qsort (int_lock->lockee, nlockee, sizeof (*int_lock->lockee),
+ afr_entry_lockee_cmp);
+ int_lock->lockee_count = nlockee;
+
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
}
+ return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+
+ AFR_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
diff --git a/xlators/cluster/afr/src/afr-dir-write.h b/xlators/cluster/afr/src/afr-dir-write.h
index 0290c6350..02f0a3682 100644
--- a/xlators/cluster/afr/src/afr-dir-write.h
+++ b/xlators/cluster/afr/src/afr-dir-write.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __DIR_WRITE_H__
@@ -23,38 +14,34 @@
int32_t
afr_create (call_frame_t *frame, xlator_t *this,
loc_t *loc, int32_t flags, mode_t mode,
- fd_t *fd, dict_t *params);
+ mode_t umask, fd_t *fd, dict_t *xdata);
int32_t
afr_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t dev, dict_t *params);
+ loc_t *loc, mode_t mode, dev_t dev, mode_t umask, dict_t *xdata);
int32_t
afr_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dict_t *params);
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata);
int32_t
afr_unlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc);
+ loc_t *loc, int xflag, dict_t *xdata);
int32_t
afr_rmdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int flags);
+ loc_t *loc, int flags, dict_t *xdata);
int32_t
afr_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc);
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata);
int32_t
afr_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc);
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata);
int
afr_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *oldloc, dict_t *params);
-
-int32_t
-afr_setdents (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t flags, dir_entry_t *entries, int32_t count);
+ const char *linkpath, loc_t *oldloc, mode_t umask, dict_t *params);
#endif /* __DIR_WRITE_H__ */
diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c
index 1258afe09..01e078c13 100644
--- a/xlators/cluster/afr/src/afr-inode-read.c
+++ b/xlators/cluster/afr/src/afr-inode-read.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -44,244 +35,153 @@
#include "compat-errno.h"
#include "compat.h"
-/**
- * Common algorithm for inode read calls:
- *
- * - Try the fop on the first child that is up
- * - if we have failed due to ENOTCONN:
- * try the next child
- *
- * Applicable to: access, stat, fstat, readlink, getxattr
- */
+#include "afr-transaction.h"
+
/* {{{ access */
-int32_t
-afr_access_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+int
+afr_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int unwind = 1;
- int32_t *last_index = NULL;
- int32_t next_call_child = -1;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
-
- priv = this->private;
- children = priv->children;
+ afr_local_t *local = NULL;
local = frame->local;
- read_child = (long) cookie;
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
- if (op_ret == -1) {
- last_index = &local->cont.access.last_index;
- fresh_children = local->fresh_children;
- next_call_child = afr_next_call_child (fresh_children,
- local->child_up,
- priv->child_count,
- last_index, read_child);
- if (next_call_child < 0)
- goto out;
+ afr_read_txn_continue (frame, this, (long) cookie);
+ return 0;
+ }
- unwind = 0;
-
- STACK_WIND_COOKIE (frame, afr_access_cbk,
- (void *) (long) read_child,
- children[next_call_child],
- children[next_call_child]->fops->access,
- &local->loc, local->cont.access.mask);
- }
-
-out:
- if (unwind) {
- AFR_STACK_UNWIND (access, frame, op_ret, op_errno);
- }
+ AFR_STACK_UNWIND (access, frame, op_ret, op_errno, xdata);
return 0;
}
-int32_t
-afr_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask)
+int
+afr_access_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_private_t *priv = NULL;
- xlator_t **children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- int32_t read_child = -1;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
-
- children = priv->children;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
- frame->local = local;
-
- op_ret = AFR_LOCAL_INIT (local, priv);
- if (op_ret < 0) {
- op_errno = -op_ret;
- goto out;
- }
-
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children) {
- op_errno = ENOMEM;
- goto out;
- }
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (subvol == -1) {
+ AFR_STACK_UNWIND (access, frame, local->op_ret,
+ local->op_errno, 0);
+ return 0;
+ }
+
+ STACK_WIND_COOKIE (frame, afr_access_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->access,
+ &local->loc, local->cont.access.mask,
+ local->xdata_req);
+ return 0;
+}
+int
+afr_access (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int mask, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int op_errno = 0;
- read_child = afr_inode_get_read_ctx (this, loc->inode,
- local->fresh_children);
- op_ret = afr_get_call_child (this, local->child_up, read_child,
- local->fresh_children,
- &call_child,
- &local->cont.access.last_index);
- if (op_ret < 0) {
- op_errno = -op_ret;
- op_ret = -1;
- goto out;
- }
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
- loc_copy (&local->loc, loc);
- local->cont.access.mask = mask;
+ local->op = GF_FOP_ACCESS;
+ loc_copy (&local->loc, loc);
+ local->cont.access.mask = mask;
+ if (xdata)
+ local->xdata_req = dict_ref (xdata);
- STACK_WIND_COOKIE (frame, afr_access_cbk,
- (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->access,
- loc, mask);
+ afr_read_txn (frame, this, loc->inode, afr_access_wind,
+ AFR_METADATA_TRANSACTION);
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (access, frame, op_ret, op_errno);
- }
+ AFR_STACK_UNWIND (access, frame, -1, op_errno, NULL);
+
return 0;
}
-
/* }}} */
/* {{{ stat */
-int32_t
+int
afr_stat_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iatt *buf)
+ struct iatt *buf, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int unwind = 1;
- int32_t *last_index = NULL;
- int32_t next_call_child = -1;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
-
- priv = this->private;
- children = priv->children;
-
- read_child = (long) cookie;
+ afr_local_t *local = NULL;
local = frame->local;
- if (op_ret == -1) {
- last_index = &local->cont.stat.last_index;
- fresh_children = local->fresh_children;
- next_call_child = afr_next_call_child (fresh_children,
- local->child_up,
- priv->child_count,
- last_index, read_child);
- if (next_call_child < 0)
- goto out;
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
- unwind = 0;
+ afr_read_txn_continue (frame, this, (long) cookie);
+ return 0;
+ }
- STACK_WIND_COOKIE (frame, afr_stat_cbk,
- (void *) (long) read_child,
- children[next_call_child],
- children[next_call_child]->fops->stat,
- &local->loc);
- }
-
-out:
- if (unwind) {
- AFR_STACK_UNWIND (stat, frame, op_ret, op_errno, buf);
- }
+ AFR_STACK_UNWIND (stat, frame, op_ret, op_errno, buf, xdata);
return 0;
}
-int32_t
-afr_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
+int
+afr_stat_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- xlator_t **children = NULL;
- int call_child = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- int32_t read_child = -1;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
-
- children = priv->children;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
- frame->local = local;
- op_ret = AFR_LOCAL_INIT (local, priv);
- if (op_ret < 0) {
- op_errno = -op_ret;
- goto out;
- }
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (subvol == -1) {
+ AFR_STACK_UNWIND (stat, frame, local->op_ret, local->op_errno,
+ 0, 0);
+ return 0;
+ }
+
+ STACK_WIND_COOKIE (frame, afr_stat_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->stat,
+ &local->loc, local->xdata_req);
+ return 0;
+}
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children) {
- op_errno = ENOMEM;
- goto out;
- }
+int
+afr_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int op_errno = 0;
- read_child = afr_inode_get_read_ctx (this, loc->inode,
- local->fresh_children);
- op_ret = afr_get_call_child (this, local->child_up, read_child,
- local->fresh_children,
- &call_child,
- &local->cont.stat.last_index);
- if (op_ret < 0) {
- op_errno = -op_ret;
- op_ret = -1;
- goto out;
- }
- loc_copy (&local->loc, loc);
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
- local->cont.stat.ino = loc->inode->ino;
+ local->op = GF_FOP_STAT;
+ loc_copy (&local->loc, loc);
+ if (xdata)
+ local->xdata_req = dict_ref (xdata);
- STACK_WIND_COOKIE (frame, afr_stat_cbk, (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->stat,
- loc);
+ afr_read_txn (frame, this, loc->inode, afr_stat_wind,
+ AFR_DATA_TRANSACTION);
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (stat, frame, op_ret, op_errno, NULL);
- }
+ AFR_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -291,127 +191,76 @@ out:
/* {{{ fstat */
-int32_t
+int
afr_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- xlator_t **children = NULL;
- int unwind = 1;
- int32_t *last_index = NULL;
- int32_t next_call_child = -1;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
-
- priv = this->private;
- children = priv->children;
+ afr_local_t *local = NULL;
local = frame->local;
- read_child = (long) cookie;
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
- if (op_ret == -1) {
- last_index = &local->cont.fstat.last_index;
- fresh_children = local->fresh_children;
- next_call_child = afr_next_call_child (fresh_children,
- local->child_up,
- priv->child_count,
- last_index, read_child);
- if (next_call_child < 0)
- goto out;
+ afr_read_txn_continue (frame, this, (long) cookie);
+ return 0;
+ }
- unwind = 0;
+ AFR_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf, xdata);
- STACK_WIND_COOKIE (frame, afr_fstat_cbk,
- (void *) (long) read_child,
- children[next_call_child],
- children[next_call_child]->fops->fstat,
- local->fd);
- }
+ return 0;
+}
-out:
- if (unwind) {
- AFR_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf);
- }
- return 0;
+int
+afr_fstat_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (subvol == -1) {
+ AFR_STACK_UNWIND (fstat, frame, local->op_ret, local->op_errno,
+ 0, 0);
+ return 0;
+ }
+
+ STACK_WIND_COOKIE (frame, afr_fstat_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fstat,
+ local->fd, local->xdata_req);
+ return 0;
}
int32_t
afr_fstat (call_frame_t *frame, xlator_t *this,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- xlator_t **children = NULL;
- int call_child = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- int32_t read_child = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (fd, out);
- VALIDATE_OR_GOTO (this->private, out);
+ afr_local_t *local = NULL;
+ int op_errno = 0;
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
- children = priv->children;
+ local->op = GF_FOP_FSTAT;
+ local->fd = fd_ref (fd);
+ if (xdata)
+ local->xdata_req = dict_ref (xdata);
- VALIDATE_OR_GOTO (fd->inode, out);
+ afr_fix_open (fd, this);
- ALLOC_OR_GOTO (local, afr_local_t, out);
- frame->local = local;
-
- op_ret = AFR_LOCAL_INIT (local, priv);
- if (op_ret < 0) {
- op_errno = -op_ret;
- goto out;
- }
-
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children) {
- op_errno = ENOMEM;
- goto out;
- }
-
- read_child = afr_inode_get_read_ctx (this, fd->inode,
- local->fresh_children);
-
-
-
- op_ret = afr_get_call_child (this, local->child_up, read_child,
- local->fresh_children,
- &call_child,
- &local->cont.fstat.last_index);
- if (op_ret < 0) {
- op_errno = -op_ret;
- op_ret = -1;
- goto out;
- }
-
- local->cont.fstat.ino = fd->inode->ino;
- local->fd = fd_ref (fd);
-
- op_ret = afr_open_fd_fix (frame, this, _gf_false);
- if (op_ret) {
- op_errno = -op_ret;
- op_ret = -1;
- goto out;
- }
- STACK_WIND_COOKIE (frame, afr_fstat_cbk, (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->fstat,
- fd);
+ afr_read_txn (frame, this, fd->inode, afr_fstat_wind,
+ AFR_DATA_TRANSACTION);
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (fstat, frame, op_ret, op_errno, NULL);
- }
+ AFR_STACK_UNWIND (fstat, frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -420,118 +269,77 @@ out:
/* {{{ readlink */
-int32_t
+int
afr_readlink_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- const char *buf, struct iatt *sbuf)
+ const char *buf, struct iatt *sbuf, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int unwind = 1;
- int32_t *last_index = NULL;
- int32_t next_call_child = -1;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
+ afr_local_t *local = NULL;
- priv = this->private;
- children = priv->children;
+ local = frame->local;
- local = frame->local;
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
- read_child = (long) cookie;
-
- if (op_ret == -1) {
- last_index = &local->cont.readlink.last_index;
- fresh_children = local->fresh_children;
- next_call_child = afr_next_call_child (fresh_children,
- local->child_up,
- priv->child_count,
- last_index, read_child);
- if (next_call_child < 0)
- goto out;
-
- unwind = 0;
- STACK_WIND_COOKIE (frame, afr_readlink_cbk,
- (void *) (long) read_child,
- children[next_call_child],
- children[next_call_child]->fops->readlink,
- &local->loc,
- local->cont.readlink.size);
- }
+ afr_read_txn_continue (frame, this, (long) cookie);
+ return 0;
+ }
-out:
- if (unwind) {
- AFR_STACK_UNWIND (readlink, frame, op_ret, op_errno, buf, sbuf);
- }
+ AFR_STACK_UNWIND (readlink, frame, op_ret, op_errno,
+ buf, sbuf, xdata);
+ return 0;
+}
- return 0;
+int
+afr_readlink_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (subvol == -1) {
+ AFR_STACK_UNWIND (readlink, frame, local->op_ret,
+ local->op_errno, 0, 0, 0);
+ return 0;
+ }
+
+ STACK_WIND_COOKIE (frame, afr_readlink_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->readlink,
+ &local->loc, local->cont.readlink.size,
+ local->xdata_req);
+ return 0;
}
-int32_t
+int
afr_readlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, size_t size)
+ loc_t *loc, size_t size, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- xlator_t **children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
- int32_t op_ret = -1;
+ afr_local_t * local = NULL;
int32_t op_errno = 0;
- int32_t read_child = -1;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
-
- children = priv->children;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
- frame->local = local;
- op_ret = AFR_LOCAL_INIT (local, priv);
- if (op_ret < 0) {
- op_errno = -op_ret;
- goto out;
- }
-
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children) {
- op_errno = ENOMEM;
- goto out;
- }
- read_child = afr_inode_get_read_ctx (this, loc->inode,
- local->fresh_children);
- op_ret = afr_get_call_child (this, local->child_up, read_child,
- local->fresh_children,
- &call_child,
- &local->cont.readlink.last_index);
- if (op_ret < 0) {
- op_errno = -op_ret;
- op_ret = -1;
- goto out;
- }
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
+ local->op = GF_FOP_READLINK;
loc_copy (&local->loc, loc);
+ local->cont.readlink.size = size;
+ if (xdata)
+ local->xdata_req = dict_ref (xdata);
- local->cont.readlink.size = size;
- local->cont.readlink.ino = loc->inode->ino;
-
- STACK_WIND_COOKIE (frame, afr_readlink_cbk,
- (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->readlink,
- loc, size);
+ afr_read_txn (frame, this, loc->inode, afr_readlink_wind,
+ AFR_DATA_TRANSACTION);
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (readlink, frame, op_ret, op_errno, NULL, NULL);
- }
return 0;
+out:
+ AFR_STACK_UNWIND(readlink, frame, -1, op_errno, 0, 0, 0);
+
+ return 0;
}
@@ -545,7 +353,7 @@ struct _xattr_key {
};
-void
+int
__gather_xattr_keys (dict_t *dict, char *key, data_t *value,
void *data)
{
@@ -557,18 +365,19 @@ __gather_xattr_keys (dict_t *dict, char *key, data_t *value,
xkey = GF_CALLOC (1, sizeof (*xkey), gf_afr_mt_xattr_key);
if (!xkey)
- return;
+ return -1;
xkey->key = key;
INIT_LIST_HEAD (&xkey->list);
list_add_tail (&xkey->list, list);
}
+ return 0;
}
void
-__filter_xattrs (dict_t *dict)
+afr_filter_xattrs (dict_t *dict)
{
struct list_head keys = {0,};
struct _xattr_key *key = NULL;
@@ -589,85 +398,669 @@ __filter_xattrs (dict_t *dict)
}
-
-int32_t
+int
afr_getxattr_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *dict)
+ dict_t *dict, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int unwind = 1;
- int32_t *last_index = NULL;
- int32_t next_call_child = -1;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ afr_read_txn_continue (frame, this, (long) cookie);
+ return 0;
+ }
+
+ if (dict)
+ afr_filter_xattrs (dict);
+
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
+
+ return 0;
+}
+
+
+int
+afr_getxattr_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (subvol == -1) {
+ AFR_STACK_UNWIND (getxattr, frame, local->op_ret,
+ local->op_errno, NULL, NULL);
+ return 0;
+ }
+
+ STACK_WIND_COOKIE (frame, afr_getxattr_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->getxattr,
+ &local->loc, local->cont.getxattr.name,
+ local->xdata_req);
+ return 0;
+}
+
+
+int32_t
+afr_getxattr_unwind (call_frame_t *frame, int op_ret, int op_errno,
+ dict_t *dict, dict_t *xdata)
+
+{
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+int32_t
+afr_fgetxattr_clrlk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ dict_t *xattr = NULL;
+ char *tmp_report = NULL;
+ char lk_summary[1024] = {0,};
+ int serz_len = 0;
+ int32_t callcnt = 0;
+ long int cky = 0;
+ int ret = 0;
priv = this->private;
children = priv->children;
local = frame->local;
+ cky = (long) cookie;
- read_child = (long) cookie;
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+ if (op_ret == -1)
+ local->replies[cky].op_errno = op_errno;
+
+ if (!local->dict)
+ local->dict = dict_new ();
+ if (local->dict) {
+ ret = dict_get_str (dict, local->cont.getxattr.name,
+ &tmp_report);
+ if (ret)
+ goto unlock;
+ ret = dict_set_dynstr (local->dict,
+ children[cky]->name,
+ gf_strdup (tmp_report));
+ if (ret)
+ goto unlock;
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
- if (op_ret == -1) {
- last_index = &local->cont.getxattr.last_index;
- fresh_children = local->fresh_children;
- next_call_child = afr_next_call_child (fresh_children,
- local->child_up,
- priv->child_count,
- last_index, read_child);
- if (next_call_child < 0)
- goto out;
+ if (!callcnt) {
+ xattr = dict_new ();
+ if (!xattr) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ ret = dict_serialize_value_with_delim (local->dict,
+ lk_summary,
+ &serz_len, '\n');
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error serializing dictionary");
+ goto unwind;
+ }
+ if (serz_len == -1)
+ snprintf (lk_summary, sizeof (lk_summary),
+ "No locks cleared.");
+ ret = dict_set_dynstr (xattr, local->cont.getxattr.name,
+ gf_strdup (lk_summary));
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error setting dictionary");
+ goto unwind;
+ }
+
+ unwind:
+ // Updating child_errno with more recent 'events'
+ op_errno = afr_final_errno (local, priv);
+
+ AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, xattr,
+ xdata);
+ if (xattr)
+ dict_unref (xattr);
+ }
+
+ return ret;
+}
+
+int32_t
+afr_getxattr_clrlk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ dict_t *xattr = NULL;
+ char *tmp_report = NULL;
+ char lk_summary[1024] = {0,};
+ int serz_len = 0;
+ int32_t callcnt = 0;
+ long int cky = 0;
+ int ret = 0;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+ cky = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+ if (op_ret == -1)
+ local->replies[cky].op_errno = op_errno;
+
+ if (!local->dict)
+ local->dict = dict_new ();
+ if (local->dict) {
+ ret = dict_get_str (dict, local->cont.getxattr.name,
+ &tmp_report);
+ if (ret)
+ goto unlock;
+ ret = dict_set_dynstr (local->dict,
+ children[cky]->name,
+ gf_strdup (tmp_report));
+ if (ret)
+ goto unlock;
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ xattr = dict_new ();
+ if (!xattr) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ ret = dict_serialize_value_with_delim (local->dict,
+ lk_summary,
+ &serz_len, '\n');
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error serializing dictionary");
+ goto unwind;
+ }
+ if (serz_len == -1)
+ snprintf (lk_summary, sizeof (lk_summary),
+ "No locks cleared.");
+ ret = dict_set_dynstr (xattr, local->cont.getxattr.name,
+ gf_strdup (lk_summary));
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error setting dictionary");
+ goto unwind;
+ }
+
+ unwind:
+ // Updating child_errno with more recent 'events'
+ op_errno = afr_final_errno (local, priv);
+
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr, xdata);
+
+ if (xattr)
+ dict_unref (xattr);
+ }
+
+ return ret;
+}
+
+/**
+ * node-uuid cbk uses next child querying mechanism
+ */
+int32_t
+afr_getxattr_node_uuid_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ xlator_t **children = NULL;
+ int unwind = 1;
+ int curr_call_child = 0;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+
+ if (op_ret == -1) { /** query the _next_ child */
+
+ /**
+ * _current_ becomes _next_
+ * If done with all childs and yet no success; give up !
+ */
+ curr_call_child = (int) ((long)cookie);
+ if (++curr_call_child == priv->child_count)
+ goto unwind;
+
+ gf_log (this->name, GF_LOG_WARNING,
+ "op_ret (-1): Re-querying afr-child (%d/%d)",
+ curr_call_child, priv->child_count);
unwind = 0;
- STACK_WIND_COOKIE (frame, afr_getxattr_cbk,
- (void *) (long) read_child,
- children[next_call_child],
- children[next_call_child]->fops->getxattr,
+ STACK_WIND_COOKIE (frame, afr_getxattr_node_uuid_cbk,
+ (void *) (long) curr_call_child,
+ children[curr_call_child],
+ children[curr_call_child]->fops->getxattr,
&local->loc,
- local->cont.getxattr.name);
+ local->cont.getxattr.name,
+ NULL);
}
-out:
- if (unwind) {
- if (op_ret >= 0 && dict)
- __filter_xattrs (dict);
+ unwind:
+ if (unwind)
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict,
+ NULL);
+
+ return 0;
+}
+
+int32_t
+afr_getxattr_lockinfo_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ int call_cnt = 0, len = 0;
+ char *lockinfo_buf = NULL;
+ dict_t *lockinfo = NULL, *newdict = NULL;
+ afr_local_t *local = NULL;
+
+ LOCK (&frame->lock);
+ {
+ local = frame->local;
+
+ call_cnt = --local->call_count;
+
+ if ((op_ret < 0) || (!dict && !xdata)) {
+ goto unlock;
+ }
+
+ if (xdata) {
+ if (!local->xdata_rsp) {
+ local->xdata_rsp = dict_new ();
+ if (!local->xdata_rsp) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+ }
+
+ if (!dict) {
+ goto unlock;
+ }
+
+ op_ret = dict_get_ptr_and_len (dict, GF_XATTR_LOCKINFO_KEY,
+ (void **)&lockinfo_buf, &len);
+
+ if (!lockinfo_buf) {
+ goto unlock;
+ }
+
+ if (!local->dict) {
+ local->dict = dict_new ();
+ if (!local->dict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ if (lockinfo_buf != NULL) {
+ lockinfo = dict_new ();
+ if (lockinfo == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ } else {
+ op_ret = dict_unserialize (lockinfo_buf, len,
+ &lockinfo);
+
+ if (lockinfo && local->dict) {
+ dict_copy (lockinfo, local->dict);
+ }
+ }
+ }
+
+ if (xdata && local->xdata_rsp) {
+ dict_copy (xdata, local->xdata_rsp);
+ }
+
+ if (!call_cnt) {
+ newdict = dict_new ();
+ if (!newdict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict);
+ len = dict_serialized_length (local->dict);
+ if (len == 0) {
+ goto unwind;
+ }
+
+ lockinfo_buf = GF_CALLOC (1, len, gf_common_mt_char);
+ if (!lockinfo_buf) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ op_ret = dict_serialize (local->dict, lockinfo_buf);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -op_ret;
+ }
+
+ op_ret = dict_set_dynptr (newdict, GF_XATTR_LOCKINFO_KEY,
+ (void *)lockinfo_buf, len);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -op_ret;
+ goto unwind;
+ }
+
+ unwind:
+ AFR_STACK_UNWIND (getxattr, frame, op_ret,
+ op_errno, newdict,
+ local->xdata_rsp);
}
+ dict_unref (lockinfo);
+
return 0;
}
int32_t
-afr_getxattr_unwind (call_frame_t *frame,
- int op_ret, int op_errno, dict_t *dict)
-
+afr_fgetxattr_lockinfo_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
{
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict);
+ int call_cnt = 0, len = 0;
+ char *lockinfo_buf = NULL;
+ dict_t *lockinfo = NULL, *newdict = NULL;
+ afr_local_t *local = NULL;
+
+ LOCK (&frame->lock);
+ {
+ local = frame->local;
+
+ call_cnt = --local->call_count;
+
+ if ((op_ret < 0) || (!dict && !xdata)) {
+ goto unlock;
+ }
+
+ if (xdata) {
+ if (!local->xdata_rsp) {
+ local->xdata_rsp = dict_new ();
+ if (!local->xdata_rsp) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+ }
+
+ if (!dict) {
+ goto unlock;
+ }
+
+ op_ret = dict_get_ptr_and_len (dict, GF_XATTR_LOCKINFO_KEY,
+ (void **)&lockinfo_buf, &len);
+
+ if (!lockinfo_buf) {
+ goto unlock;
+ }
+
+ if (!local->dict) {
+ local->dict = dict_new ();
+ if (!local->dict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ if (lockinfo_buf != NULL) {
+ lockinfo = dict_new ();
+ if (lockinfo == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ } else {
+ op_ret = dict_unserialize (lockinfo_buf, len,
+ &lockinfo);
+
+ if (lockinfo && local->dict) {
+ dict_copy (lockinfo, local->dict);
+ }
+ }
+ }
+
+ if (xdata && local->xdata_rsp) {
+ dict_copy (xdata, local->xdata_rsp);
+ }
+
+ if (!call_cnt) {
+ newdict = dict_new ();
+ if (!newdict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ len = dict_serialized_length (local->dict);
+ if (len <= 0) {
+ goto unwind;
+ }
+
+ lockinfo_buf = GF_CALLOC (1, len, gf_common_mt_char);
+ if (!lockinfo_buf) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ op_ret = dict_serialize (local->dict, lockinfo_buf);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -op_ret;
+ }
+
+ op_ret = dict_set_dynptr (newdict, GF_XATTR_LOCKINFO_KEY,
+ (void *)lockinfo_buf, len);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -op_ret;
+ goto unwind;
+ }
+
+ unwind:
+ AFR_STACK_UNWIND (fgetxattr, frame, op_ret,
+ op_errno, newdict,
+ local->xdata_rsp);
+ }
+
+ dict_unref (lockinfo);
+
return 0;
}
int32_t
+afr_fgetxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int32_t callcnt = 0;
+ int ret = 0;
+ char *xattr = NULL;
+ char *xattr_serz = NULL;
+ char xattr_cky[1024] = {0,};
+ dict_t *nxattr = NULL;
+ long cky = 0;
+ int32_t padding = 0;
+ int32_t tlen = 0;
+
+ if (!frame || !frame->local || !this) {
+ gf_log ("", GF_LOG_ERROR, "possible NULL deref");
+ goto out;
+ }
+
+ local = frame->local;
+ cky = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret < 0) {
+ local->op_errno = op_errno;
+ } else {
+ local->op_ret = op_ret;
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref (xdata);
+ }
+
+ if (!dict || (op_ret < 0))
+ goto out;
+
+ if (!local->dict)
+ local->dict = dict_new ();
+
+ if (local->dict) {
+ ret = dict_get_str (dict,
+ local->cont.getxattr.name,
+ &xattr);
+ if (ret)
+ goto out;
+
+ xattr = gf_strdup (xattr);
+
+ (void)snprintf (xattr_cky, 1024, "%s-%ld",
+ local->cont.getxattr.name, cky);
+ ret = dict_set_dynstr (local->dict,
+ xattr_cky, xattr);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Cannot set xattr cookie key");
+ goto out;
+ }
+
+ local->cont.getxattr.xattr_len
+ += strlen (xattr) + 1;
+ }
+ }
+out:
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ if (!local->cont.getxattr.xattr_len)
+ goto unwind;
+
+ nxattr = dict_new ();
+ if (!nxattr)
+ goto unwind;
+
+ /* extra bytes for decorations (brackets and <>'s) */
+ padding += strlen (this->name)
+ + strlen (AFR_PATHINFO_HEADER) + 4;
+ local->cont.getxattr.xattr_len += (padding + 2);
+
+ xattr_serz = GF_CALLOC (local->cont.getxattr.xattr_len,
+ sizeof (char), gf_common_mt_char);
+
+ if (!xattr_serz)
+ goto unwind;
+
+ /* the xlator info */
+ (void) sprintf (xattr_serz, "(<"AFR_PATHINFO_HEADER"%s> ",
+ this->name);
+
+ /* actual series of pathinfo */
+ ret = dict_serialize_value_with_delim (local->dict,
+ xattr_serz
+ + strlen (xattr_serz),
+ &tlen, ' ');
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Error serializing"
+ " dictionary");
+ goto unwind;
+ }
+
+ /* closing part */
+ *(xattr_serz + padding + tlen) = ')';
+ *(xattr_serz + padding + tlen + 1) = '\0';
+
+ ret = dict_set_dynstr (nxattr, local->cont.getxattr.name,
+ xattr_serz);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "Cannot set pathinfo"
+ " key in dict");
+
+ unwind:
+ AFR_STACK_UNWIND (fgetxattr, frame, local->op_ret,
+ local->op_errno, nxattr, local->xdata_rsp);
+
+ if (nxattr)
+ dict_unref (nxattr);
+ }
+
+ return ret;
+}
+
+int32_t
afr_getxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *dict)
+ dict_t *dict, dict_t *xdata)
{
- afr_local_t *local = NULL;
- int32_t callcnt = 0;
- int ret = 0;
- char *pathinfo = NULL;
- char *pathinfo_serz = NULL;
- char pathinfo_cky[1024] = {0,};
- dict_t *xattr = NULL;
- long cky = 0;
- int32_t padding = 0;
- int32_t tlen = 0;
+ afr_local_t *local = NULL;
+ int32_t callcnt = 0;
+ int ret = 0;
+ char *xattr = NULL;
+ char *xattr_serz = NULL;
+ char xattr_cky[1024] = {0,};
+ dict_t *nxattr = NULL;
+ long cky = 0;
+ int32_t padding = 0;
+ int32_t tlen = 0;
if (!frame || !frame->local || !this) {
- gf_log (this->name, GF_LOG_ERROR, "possible NULL deref");
+ gf_log ("", GF_LOG_ERROR, "possible NULL deref");
goto out;
}
@@ -678,6 +1071,14 @@ afr_getxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,
{
callcnt = --local->call_count;
+ if (op_ret < 0) {
+ local->op_errno = op_errno;
+ } else {
+ local->op_ret = op_ret;
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref (xdata);
+ }
+
if (!dict || (op_ret < 0))
goto out;
@@ -685,142 +1086,341 @@ afr_getxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,
local->dict = dict_new ();
if (local->dict) {
- ret = dict_get_str (dict, GF_XATTR_PATHINFO_KEY, &pathinfo);
+ ret = dict_get_str (dict,
+ local->cont.getxattr.name,
+ &xattr);
if (ret)
goto out;
- pathinfo = gf_strdup (pathinfo);
+ xattr = gf_strdup (xattr);
- snprintf (pathinfo_cky, 1024, "%s-%ld", GF_XATTR_PATHINFO_KEY, cky);
- ret = dict_set_dynstr (local->dict, pathinfo_cky, pathinfo);
+ (void)snprintf (xattr_cky, 1024, "%s-%ld",
+ local->cont.getxattr.name, cky);
+ ret = dict_set_dynstr (local->dict,
+ xattr_cky, xattr);
if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Cannot set pathinfo cookie key");
+ gf_log (this->name, GF_LOG_ERROR,
+ "Cannot set xattr cookie key");
goto out;
}
- local->cont.getxattr.pathinfo_len += strlen (pathinfo) + 1;
+ local->cont.getxattr.xattr_len += strlen (xattr) + 1;
}
}
out:
UNLOCK (&frame->lock);
if (!callcnt) {
- if (!local->cont.getxattr.pathinfo_len)
+ if (!local->cont.getxattr.xattr_len)
goto unwind;
- xattr = dict_new ();
- if (!xattr)
+ nxattr = dict_new ();
+ if (!nxattr)
goto unwind;
/* extra bytes for decorations (brackets and <>'s) */
- padding = strlen (this->name) + strlen (AFR_PATHINFO_HEADER) + 4;
- local->cont.getxattr.pathinfo_len += (padding + 2);
+ padding += strlen (this->name) + strlen (AFR_PATHINFO_HEADER) + 4;
+ local->cont.getxattr.xattr_len += (padding + 2);
- pathinfo_serz = GF_CALLOC (local->cont.getxattr.pathinfo_len, sizeof (char),
- gf_common_mt_char);
+ xattr_serz = GF_CALLOC (local->cont.getxattr.xattr_len,
+ sizeof (char), gf_common_mt_char);
- if (!pathinfo_serz)
+ if (!xattr_serz)
goto unwind;
/* the xlator info */
- sprintf (pathinfo_serz, "(<"AFR_PATHINFO_HEADER"%s> ", this->name);
+ (void) sprintf (xattr_serz, "(<"AFR_PATHINFO_HEADER"%s> ",
+ this->name);
/* actual series of pathinfo */
- ret = dict_serialize_value_with_delim (local->dict, pathinfo_serz + strlen (pathinfo_serz),
+ ret = dict_serialize_value_with_delim (local->dict,
+ xattr_serz + strlen (xattr_serz),
&tlen, ' ');
if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Error serializing dictionary");
+ gf_log (this->name, GF_LOG_ERROR, "Error serializing"
+ " dictionary");
goto unwind;
}
/* closing part */
- *(pathinfo_serz + padding + tlen) = ')';
- *(pathinfo_serz + padding + tlen + 1) = '\0';
+ *(xattr_serz + padding + tlen) = ')';
+ *(xattr_serz + padding + tlen + 1) = '\0';
- ret = dict_set_dynstr (xattr, GF_XATTR_PATHINFO_KEY, pathinfo_serz);
+ ret = dict_set_dynstr (nxattr, local->cont.getxattr.name,
+ xattr_serz);
if (ret)
- gf_log (this->name, GF_LOG_ERROR, "Cannot set pathinfo key in dict");
+ gf_log (this->name, GF_LOG_ERROR, "Cannot set pathinfo"
+ " key in dict");
unwind:
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr);
-
- if (local->dict)
- dict_unref (local->dict);
+ AFR_STACK_UNWIND (getxattr, frame, local->op_ret,
+ local->op_errno, nxattr, local->xdata_rsp);
- if (xattr)
- dict_unref (xattr);
+ if (nxattr)
+ dict_unref (nxattr);
}
return ret;
}
+static int
+afr_aggregate_stime_xattr (dict_t *this, char *key, data_t *value, void *data)
+{
+ int ret = 0;
+
+ if (fnmatch (GF_XATTR_STIME_PATTERN, key, FNM_NOESCAPE) == 0)
+ ret = gf_get_max_stime (THIS, data, key, value);
+
+ return ret;
+}
+
+int32_t
+afr_common_getxattr_stime_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int32_t callcnt = 0;
+
+ if (!frame || !frame->local || !this) {
+ gf_log ("", GF_LOG_ERROR, "possible NULL deref");
+ goto out;
+ }
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (!dict || (op_ret < 0)) {
+ local->op_errno = op_errno;
+ goto cleanup;
+ }
+
+ if (!local->dict)
+ local->dict = dict_copy_with_ref (dict, NULL);
+ else
+ dict_foreach (dict, afr_aggregate_stime_xattr,
+ local->dict);
+ local->op_ret = 0;
+ }
+
+cleanup:
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ AFR_STACK_UNWIND (getxattr, frame, local->op_ret,
+ local->op_errno, local->dict, xdata);
+ }
+
+out:
+ return 0;
+}
+
+
+static gf_boolean_t
+afr_is_special_xattr (const char *name, fop_getxattr_cbk_t *cbk,
+ gf_boolean_t is_fgetxattr)
+{
+ gf_boolean_t is_spl = _gf_true;
+
+ GF_ASSERT (cbk);
+ if (!cbk || !name) {
+ is_spl = _gf_false;
+ goto out;
+ }
+
+ if (!strcmp (name, GF_XATTR_PATHINFO_KEY) ||
+ !strcmp (name, GF_XATTR_USER_PATHINFO_KEY)) {
+ if (is_fgetxattr) {
+ *cbk = afr_fgetxattr_pathinfo_cbk;
+ } else {
+ *cbk = afr_getxattr_pathinfo_cbk;
+ }
+ } else if (!strncmp (name, GF_XATTR_CLRLK_CMD,
+ strlen (GF_XATTR_CLRLK_CMD))) {
+ if (is_fgetxattr) {
+ *cbk = afr_fgetxattr_clrlk_cbk;
+ } else {
+ *cbk = afr_getxattr_clrlk_cbk;
+ }
+ } else if (!strncmp (name, GF_XATTR_LOCKINFO_KEY,
+ strlen (GF_XATTR_LOCKINFO_KEY))) {
+ if (is_fgetxattr) {
+ *cbk = afr_fgetxattr_lockinfo_cbk;
+ } else {
+ *cbk = afr_getxattr_lockinfo_cbk;
+ }
+ } else if (fnmatch (GF_XATTR_STIME_PATTERN, name, FNM_NOESCAPE) == 0) {
+ *cbk = afr_common_getxattr_stime_cbk;
+ } else {
+ is_spl = _gf_false;
+ }
+
+out:
+ return is_spl;
+}
+
+static void
+afr_getxattr_all_subvols (xlator_t *this, call_frame_t *frame,
+ const char *name, loc_t *loc,
+ fop_getxattr_cbk_t cbk)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+ int call_count = 0;
+
+ priv = this->private;
+
+ local = frame->local;
+ //local->call_count set in afr_local_init
+ call_count = local->call_count;
+
+ //If up-children count is 0, afr_local_init would have failed already
+ //and the call would have unwound so not handling it here.
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, cbk,
+ (void *) (long) i, priv->children[i],
+ priv->children[i]->fops->getxattr,
+ loc, name, NULL);
+ if (!--call_count)
+ break;
+ }
+ }
+ return;
+}
+
int32_t
afr_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name)
+ loc_t *loc, const char *name, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- xlator_t **children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- xlator_t **sub_volumes = NULL;
- int i = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- int32_t read_child = -1;
-
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ afr_local_t *local = NULL;
+ xlator_list_t *trav = NULL;
+ xlator_t **sub_volumes = NULL;
+ int i = 0;
+ int32_t op_errno = 0;
+ int ret = -1;
+ fop_getxattr_cbk_t cbk = NULL;
+ int afr_xtime_gauge[MCNT_MAX] = {0,};
+
+
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
children = priv->children;
- ALLOC_OR_GOTO (local, afr_local_t, out);
- frame->local = local;
+ loc_copy (&local->loc, loc);
+
+ local->op = GF_FOP_GETXATTR;
+
+ if (xdata)
+ local->xdata_req = dict_ref (xdata);
+
+ if (!name)
+ goto no_name;
+
+ local->cont.getxattr.name = gf_strdup (name);
+
+ if (!local->cont.getxattr.name) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- op_ret = AFR_LOCAL_INIT (local, priv);
- if (op_ret < 0) {
- op_errno = -op_ret;
+ if (!strncmp (name, AFR_XATTR_PREFIX,
+ strlen (AFR_XATTR_PREFIX))) {
+ gf_log (this->name, GF_LOG_INFO,
+ "%s: no data present for key %s",
+ loc->path, name);
+ op_errno = ENODATA;
goto out;
}
+ if ((strcmp (GF_XATTR_MARKER_KEY, name) == 0)
+ && (GF_CLIENT_PID_GSYNCD == frame->root->pid)) {
- loc_copy (&local->loc, loc);
- if (name)
- local->cont.getxattr.name = gf_strdup (name);
+ local->marker.call_count = priv->child_count;
+ sub_volumes = alloca ( priv->child_count * sizeof (xlator_t *));
+ for (i = 0, trav = this->children; trav ;
+ trav = trav->next, i++) {
+
+ *(sub_volumes + i) = trav->xlator;
+ }
+
+ if (cluster_getmarkerattr (frame, this, loc, name,
+ local, afr_getxattr_unwind,
+ sub_volumes,
+ priv->child_count,
+ MARKER_UUID_TYPE,
+ marker_uuid_default_gauge,
+ priv->vol_uuid)) {
- if (name) {
- if (!strncmp (name, AFR_XATTR_PREFIX,
- strlen (AFR_XATTR_PREFIX))) {
gf_log (this->name, GF_LOG_INFO,
- "%s: no data present for key %s",
+ "%s: failed to get marker attr (%s)",
loc->path, name);
- op_errno = ENODATA;
+ op_errno = EINVAL;
goto out;
}
- if ((strcmp (GF_XATTR_MARKER_KEY, name) == 0)
- && (-1 == frame->root->pid)) {
+ return 0;
+ }
+
+ /*
+ * if we are doing getxattr with pathinfo as the key then we
+ * collect information from all childs
+ */
+ if (afr_is_special_xattr (name, &cbk, 0)) {
+ afr_getxattr_all_subvols (this, frame, name, loc, cbk);
+ return 0;
+ }
+
+ if (XATTR_IS_NODE_UUID (name)) {
+ i = 0;
+ STACK_WIND_COOKIE (frame, afr_getxattr_node_uuid_cbk,
+ (void *) (long) i,
+ children[i],
+ children[i]->fops->getxattr,
+ loc, name, xdata);
+ return 0;
+ }
+
+ if (*priv->vol_uuid) {
+ if ((match_uuid_local (name, priv->vol_uuid) == 0)
+ && (GF_CLIENT_PID_GSYNCD == frame->root->pid)) {
local->marker.call_count = priv->child_count;
- sub_volumes = alloca ( priv->child_count * sizeof (xlator_t *));
+ sub_volumes = alloca ( priv->child_count
+ * sizeof (xlator_t *));
for (i = 0, trav = this->children; trav ;
trav = trav->next, i++) {
*(sub_volumes + i) = trav->xlator;
+
}
- if (cluster_getmarkerattr (frame, this, loc, name,
- local, afr_getxattr_unwind,
+ /* don't err out on getting ENOTCONN (brick down)
+ * from a subset of the bricks
+ */
+ memcpy (afr_xtime_gauge, marker_xtime_default_gauge,
+ sizeof (afr_xtime_gauge));
+ afr_xtime_gauge[MCNT_NOTFOUND] = 0;
+ afr_xtime_gauge[MCNT_ENOTCONN] = 0;
+ if (cluster_getmarkerattr (frame, this, loc,
+ name, local,
+ afr_getxattr_unwind,
sub_volumes,
priv->child_count,
- MARKER_UUID_TYPE,
+ MARKER_XTIME_TYPE,
+ afr_xtime_gauge,
priv->vol_uuid)) {
-
gf_log (this->name, GF_LOG_INFO,
"%s: failed to get marker attr (%s)",
loc->path, name);
@@ -830,86 +1430,150 @@ afr_getxattr (call_frame_t *frame, xlator_t *this,
return 0;
}
+ }
- /*
- * if we are doing getxattr with pathinfo as the key then we
- * collect information from all childs
- */
- if (strncmp (name, GF_XATTR_PATHINFO_KEY,
- strlen (GF_XATTR_PATHINFO_KEY)) == 0) {
-
- local->call_count = priv->child_count;
- for (i = 0; i < priv->child_count; i++) {
- STACK_WIND_COOKIE (frame, afr_getxattr_pathinfo_cbk,
- (void *) (long) i,
- children[i], children[i]->fops->getxattr,
- loc, name);
- }
+no_name:
- return 0;
- }
+ afr_read_txn (frame, this, local->loc.inode, afr_getxattr_wind,
+ AFR_METADATA_TRANSACTION);
- if (*priv->vol_uuid) {
- if ((match_uuid_local (name, priv->vol_uuid) == 0)
- && (-1 == frame->root->pid)) {
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
- local->marker.call_count = priv->child_count;
+/* {{{ fgetxattr */
- sub_volumes = alloca ( priv->child_count * sizeof (xlator_t *));
- for (i = 0, trav = this->children; trav ;
- trav = trav->next, i++) {
- *(sub_volumes + i) = trav->xlator;
+int32_t
+afr_fgetxattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
- }
+ local = frame->local;
- if (cluster_getmarkerattr (frame, this, loc,
- name, local,
- afr_getxattr_unwind,
- sub_volumes,
- priv->child_count,
- MARKER_XTIME_TYPE,
- priv->vol_uuid)) {
- gf_log (this->name, GF_LOG_INFO,
- "%s: failed to get marker attr (%s)",
- loc->path, name);
- op_errno = EINVAL;
- goto out;
- }
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
- return 0;
- }
+ afr_read_txn_continue (frame, this, (long) cookie);
+ return 0;
+ }
+
+ if (dict)
+ afr_filter_xattrs (dict);
+
+ AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, dict, xdata);
+
+ return 0;
+}
+
+int
+afr_fgetxattr_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (subvol == -1) {
+ AFR_STACK_UNWIND (fgetxattr, frame, local->op_ret,
+ local->op_errno, NULL, NULL);
+ return 0;
+ }
+
+ STACK_WIND_COOKIE (frame, (void *) (long) subvol, afr_fgetxattr_cbk,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fgetxattr,
+ local->fd, local->cont.getxattr.name,
+ local->xdata_req);
+ return 0;
+}
+
+
+static void
+afr_fgetxattr_all_subvols (xlator_t *this, call_frame_t *frame,
+ fop_fgetxattr_cbk_t cbk)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+ int call_count = 0;
+
+ priv = this->private;
+
+ local = frame->local;
+ //local->call_count set in afr_local_init
+ call_count = local->call_count;
+
+ //If up-children count is 0, afr_local_init would have failed already
+ //and the call would have unwound so not handling it here.
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fgetxattr,
+ local->fd, local->cont.getxattr.name,
+ NULL);
+ if (!--call_count)
+ break;
}
}
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children) {
- op_errno = ENOMEM;
- goto out;
- }
+ return;
+}
- read_child = afr_inode_get_read_ctx (this, loc->inode, local->fresh_children);
- op_ret = afr_get_call_child (this, local->child_up, read_child,
- local->fresh_children,
- &call_child,
- &local->cont.getxattr.last_index);
- if (op_ret < 0) {
- op_errno = -op_ret;
- op_ret = -1;
- goto out;
+
+int
+afr_fgetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int32_t op_errno = 0;
+ fop_fgetxattr_cbk_t cbk = NULL;
+
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->op = GF_FOP_FGETXATTR;
+ local->fd = fd_ref (fd);
+ if (name) {
+ local->cont.getxattr.name = gf_strdup (name);
+ if (!local->cont.getxattr.name) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ }
+ if (xdata)
+ local->xdata_req = dict_ref (xdata);
+
+ /* pathinfo gets handled only in getxattr(), but we need to handle
+ * lockinfo.
+ * If we are doing fgetxattr with lockinfo as the key then we
+ * collect information from all children.
+ */
+ if (afr_is_special_xattr (name, &cbk, 1)) {
+ afr_fgetxattr_all_subvols (this, frame, cbk);
+ return 0;
}
- STACK_WIND_COOKIE (frame, afr_getxattr_cbk,
- (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->getxattr,
- loc, name);
+ afr_fix_open (fd, this);
+
+ afr_read_txn (frame, this, fd->inode, afr_fgetxattr_wind,
+ AFR_METADATA_TRANSACTION);
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, NULL);
- }
+ AFR_STACK_UNWIND (fgetxattr, frame, -1, op_errno, NULL, NULL);
+
return 0;
}
@@ -918,146 +1582,84 @@ out:
/* {{{ readv */
-/**
- * read algorithm:
- *
- * if the user has specified a read subvolume, use it
- * otherwise -
- * use the inode number to hash it to one of the subvolumes, and
- * read from there (to balance read load)
- *
- * if any of the above read's fail, try the children in sequence
- * beginning at the beginning
- */
-
-int32_t
+int
afr_readv_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
struct iovec *vector, int32_t count, struct iatt *buf,
- struct iobref *iobref)
+ struct iobref *iobref, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int unwind = 1;
- int32_t *last_index = NULL;
- int32_t next_call_child = -1;
- int32_t *fresh_children = NULL;
- int32_t read_child = -1;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ afr_local_t *local = NULL;
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
+ local = frame->local;
- children = priv->children;
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
- local = frame->local;
+ afr_read_txn_continue (frame, this, (long) cookie);
+ return 0;
+ }
- read_child = (long) cookie;
-
- if (op_ret == -1) {
- last_index = &local->cont.readv.last_index;
- fresh_children = local->fresh_children;
- next_call_child = afr_next_call_child (fresh_children,
- local->child_up,
- priv->child_count,
- last_index, read_child);
- if (next_call_child < 0)
- goto out;
-
- unwind = 0;
-
- STACK_WIND_COOKIE (frame, afr_readv_cbk,
- (void *) (long) read_child,
- children[next_call_child],
- children[next_call_child]->fops->readv,
- local->fd, local->cont.readv.size,
- local->cont.readv.offset);
- }
+ AFR_STACK_UNWIND (readv, frame, op_ret, op_errno,
+ vector, count, buf, iobref, xdata);
+ return 0;
+}
-out:
- if (unwind) {
- AFR_STACK_UNWIND (readv, frame, op_ret, op_errno,
- vector, count, buf, iobref);
- }
- return 0;
+int
+afr_readv_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (subvol == -1) {
+ AFR_STACK_UNWIND (readv, frame, local->op_ret, local->op_errno,
+ 0, 0, 0, 0, 0);
+ return 0;
+ }
+
+ STACK_WIND_COOKIE (frame, afr_readv_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->readv,
+ local->fd, local->cont.readv.size,
+ local->cont.readv.offset, local->cont.readv.flags,
+ local->xdata_req);
+ return 0;
}
-int32_t
-afr_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset)
+int
+afr_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
{
- afr_private_t * priv = NULL;
afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int call_child = 0;
- int32_t op_ret = -1;
int32_t op_errno = 0;
- int32_t read_child = -1;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (fd, out);
- priv = this->private;
- children = priv->children;
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
- ALLOC_OR_GOTO (local, afr_local_t, out);
- frame->local = local;
- op_ret = AFR_LOCAL_INIT (local, priv);
- if (op_ret < 0) {
- op_errno = -op_ret;
- goto out;
- }
-
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children) {
- op_errno = ENOMEM;
- goto out;
- }
-
- read_child = afr_inode_get_read_ctx (this, fd->inode, local->fresh_children);
- op_ret = afr_get_call_child (this, local->child_up, read_child,
- local->fresh_children,
- &call_child,
- &local->cont.readv.last_index);
- if (op_ret < 0) {
- op_errno = -op_ret;
- op_ret = -1;
- goto out;
- }
-
- local->fd = fd_ref (fd);
+ local->op = GF_FOP_READ;
+ local->fd = fd_ref (fd);
+ local->cont.readv.size = size;
+ local->cont.readv.offset = offset;
+ local->cont.readv.flags = flags;
+ if (xdata)
+ local->xdata_req = dict_ref (xdata);
- local->cont.readv.ino = fd->inode->ino;
- local->cont.readv.size = size;
- local->cont.readv.offset = offset;
+ afr_fix_open (fd, this);
- op_ret = afr_open_fd_fix (frame, this, _gf_false);
- if (op_ret) {
- op_errno = -op_ret;
- op_ret = -1;
- goto out;
- }
- STACK_WIND_COOKIE (frame, afr_readv_cbk,
- (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->readv,
- fd, size, offset);
+ afr_read_txn (frame, this, fd->inode, afr_readv_wind,
+ AFR_DATA_TRANSACTION);
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (readv, frame, op_ret, op_errno, NULL, 0, NULL,
- NULL);
- }
return 0;
+out:
+ AFR_STACK_UNWIND(readv, frame, -1, op_errno, 0, 0, 0, 0, 0);
+
+ return 0;
}
/* }}} */
diff --git a/xlators/cluster/afr/src/afr-inode-read.h b/xlators/cluster/afr/src/afr-inode-read.h
index 5479cfbd5..e4091a793 100644
--- a/xlators/cluster/afr/src/afr-inode-read.h
+++ b/xlators/cluster/afr/src/afr-inode-read.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __INODE_READ_H__
@@ -22,26 +13,30 @@
int32_t
afr_access (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t mask);
+ loc_t *loc, int32_t mask, dict_t *xdata);
int32_t
afr_stat (call_frame_t *frame, xlator_t *this,
- loc_t *loc);
+ loc_t *loc, dict_t *xdata);
int32_t
afr_fstat (call_frame_t *frame, xlator_t *this,
- fd_t *fd);
+ fd_t *fd, dict_t *xdata);
int32_t
afr_readlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, size_t size);
+ loc_t *loc, size_t size, dict_t *xdata);
int32_t
afr_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset);
+ fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata);
int32_t
afr_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name);
+ loc_t *loc, const char *name, dict_t *xdata);
+
+int32_t
+afr_fgetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata);
#endif /* __INODE_READ_H__ */
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
index 639b89b1f..3dacfc8dd 100644
--- a/xlators/cluster/afr/src/afr-inode-write.c
+++ b/xlators/cluster/afr/src/afr-inode-write.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -46,142 +37,303 @@
#include "afr.h"
#include "afr-transaction.h"
-#include "afr-self-heal-common.h"
+//#include "afr-self-heal-common.h"
-/* {{{ writev */
-int
-afr_writev_unwind (call_frame_t *frame, xlator_t *this)
+static void
+__afr_inode_write_finalize (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int read_subvol = 0;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (local->inode) {
+ if (local->transaction.type == AFR_METADATA_TRANSACTION)
+ read_subvol = afr_metadata_subvol_get (local->inode, this,
+ NULL, NULL);
+ else
+ read_subvol = afr_data_subvol_get (local->inode, this,
+ NULL, NULL);
+ }
+
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno (local, priv);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret < 0) {
+ afr_inode_read_subvol_reset (local->inode, this);
+ continue;
+ }
+
+ /* Order of checks in the compound conditional
+ below is important.
+
+ - Highest precedence: largest op_ret
+ - Next precendence: if all op_rets are equal, read subvol
+ - Least precedence: any succeeded subvol
+ */
+ if ((local->op_ret < local->replies[i].op_ret) ||
+ ((local->op_ret == local->replies[i].op_ret) &&
+ (i == read_subvol))) {
+
+ local->op_ret = local->replies[i].op_ret;
+ local->op_errno = local->replies[i].op_errno;
+
+ local->cont.inode_wfop.prebuf =
+ local->replies[i].prestat;
+ local->cont.inode_wfop.postbuf =
+ local->replies[i].poststat;
+
+ if (local->replies[i].xdata) {
+ if (local->xdata_rsp)
+ dict_unref (local->xdata_rsp);
+ local->xdata_rsp =
+ dict_ref (local->replies[i].xdata);
+ }
+ }
+ }
+}
+
+
+static void
+__afr_inode_write_fill (call_frame_t *frame, xlator_t *this, int child_index,
+ int op_ret, int op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+
+ if (op_ret >= 0) {
+ if (prebuf)
+ local->replies[child_index].prestat = *prebuf;
+ if (postbuf)
+ local->replies[child_index].poststat = *postbuf;
+ if (xdata)
+ local->replies[child_index].xdata = dict_ref (xdata);
+ } else {
+ afr_transaction_fop_failed (frame, this, child_index);
+ }
+
+ return;
+}
+
+
+static int
+__afr_inode_write_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int child_index = (long) cookie;
+ int call_count = -1;
local = frame->local;
LOCK (&frame->lock);
{
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
+ __afr_inode_write_fill (frame, this, child_index, op_ret,
+ op_errno, prebuf, postbuf, xdata);
}
UNLOCK (&frame->lock);
- if (main_frame) {
- AFR_STACK_UNWIND (writev, main_frame,
- local->op_ret, local->op_errno,
- &local->cont.writev.prebuf,
- &local->cont.writev.postbuf);
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ __afr_inode_write_finalize (frame, this);
+
+ if (afr_txn_nothing_failed (frame, this))
+ local->transaction.unwind (frame, this);
+
+ local->transaction.resume (frame, this);
}
+
return 0;
}
+/* {{{ writev */
-int
-afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+void
+afr_writev_copy_outvars (call_frame_t *src_frame, call_frame_t *dst_frame)
{
- afr_local_t * local = NULL;
- int child_index = (long) cookie;
- int call_count = -1;
- int read_child = 0;
+ afr_local_t *src_local = NULL;
+ afr_local_t *dst_local = NULL;
+
+ src_local = src_frame->local;
+ dst_local = dst_frame->local;
+
+ dst_local->op_ret = src_local->op_ret;
+ dst_local->op_errno = src_local->op_errno;
+ dst_local->cont.inode_wfop.prebuf = src_local->cont.inode_wfop.prebuf;
+ dst_local->cont.inode_wfop.postbuf = src_local->cont.inode_wfop.postbuf;
+ if (src_local->xdata_rsp)
+ dst_local->xdata_rsp = dict_ref (src_local->xdata_rsp);
+}
+void
+afr_writev_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
local = frame->local;
- read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
-
- LOCK (&frame->lock);
- {
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
+ AFR_STACK_UNWIND (writev, frame,
+ local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ local->xdata_rsp);
+}
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.writev.prebuf = *prebuf;
- local->cont.writev.postbuf = *postbuf;
- }
+int
+afr_transaction_writev_unwind (call_frame_t *frame, xlator_t *this)
+{
+ call_frame_t *fop_frame = NULL;
- if (child_index == read_child) {
- local->cont.writev.prebuf = *prebuf;
- local->cont.writev.postbuf = *postbuf;
- }
- }
+ fop_frame = afr_transaction_detach_fop_frame (frame);
- local->op_errno = op_errno;
+ if (fop_frame) {
+ afr_writev_copy_outvars (frame, fop_frame);
+ afr_writev_unwind (fop_frame, this);
}
- UNLOCK (&frame->lock);
+ return 0;
+}
- call_count = afr_frame_return (frame);
+static void
+afr_writev_handle_short_writes (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
+ local = frame->local;
+ priv = this->private;
+ /*
+ * We already have the best case result of the writev calls staged
+ * as the return value. Any writev that returns some value less
+ * than the best case is now out of sync, so mark the fop as
+ * failed. Note that fops that have returned with errors have
+ * already been marked as failed.
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if ((!local->replies[i].valid) ||
+ (local->replies[i].op_ret == -1))
+ continue;
- local->transaction.resume (frame, this);
+ if (local->replies[i].op_ret < local->op_ret)
+ afr_transaction_fop_failed(frame, this, i);
}
- return 0;
}
int
-afr_writev_wind (call_frame_t *frame, xlator_t *this)
+afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- int call_count = -1;
+ afr_local_t * local = NULL;
+ call_frame_t *fop_frame = NULL;
+ int child_index = (long) cookie;
+ int call_count = -1;
+ int ret = 0;
+ uint32_t open_fd_count = 0;
+ uint32_t write_is_append = 0;
local = frame->local;
- priv = this->private;
-
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
+ LOCK (&frame->lock);
+ {
+ __afr_inode_write_fill (frame, this, child_index, op_ret,
+ op_errno, prebuf, postbuf, xdata);
+ if (op_ret == -1 || !xdata)
+ goto unlock;
+
+ write_is_append = 0;
+ ret = dict_get_uint32 (xdata, GLUSTERFS_WRITE_IS_APPEND,
+ &write_is_append);
+ if (ret || !write_is_append)
+ local->append_write = _gf_false;
+
+ ret = dict_get_uint32 (xdata, GLUSTERFS_OPEN_FD_COUNT,
+ &open_fd_count);
+ if (ret == -1)
+ goto unlock;
+ if ((open_fd_count > local->open_fd_count)) {
+ local->open_fd_count = open_fd_count;
+ local->update_open_fd_count = _gf_true;
+ }
}
+unlock:
+ UNLOCK (&frame->lock);
- local->call_count = call_count;
+ call_count = afr_frame_return (frame);
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_writev_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->writev,
- local->fd,
- local->cont.writev.vector,
- local->cont.writev.count,
- local->cont.writev.offset,
- local->cont.writev.iobref);
-
- if (!--call_count)
- break;
+ if (call_count == 0) {
+ if (!local->stable_write && !local->append_write)
+ /* An appended write removes the necessity to
+ fsync() the file. This is because self-heal
+ has the logic to check for larger file when
+ the xattrs are not reliably pointing at
+ a stale file.
+ */
+ afr_fd_report_unstable_write (this, local->fd);
+
+ __afr_inode_write_finalize (frame, this);
+
+ afr_writev_handle_short_writes (frame, this);
+
+ if (local->update_open_fd_count)
+ afr_handle_open_fd_count (frame, this);
+
+ if (!afr_txn_nothing_failed (frame, this)) {
+ //Don't unwind until post-op is complete
+ local->transaction.resume (frame, this);
+ } else {
+ /*
+ * Generally inode-write fops do transaction.unwind then
+ * transaction.resume, but writev needs to make sure that
+ * delayed post-op frame is placed in fdctx before unwind
+ * happens. This prevents the race of flush doing the
+ * changelog wakeup first in fuse thread and then this
+ * writev placing its delayed post-op frame in fdctx.
+ * This helps flush make sure all the delayed post-ops are
+ * completed.
+ */
+
+ fop_frame = afr_transaction_detach_fop_frame (frame);
+ afr_writev_copy_outvars (frame, fop_frame);
+ local->transaction.resume (frame, this);
+ afr_writev_unwind (fop_frame, this);
}
}
-
return 0;
}
int
-afr_writev_done (call_frame_t *frame, xlator_t *this)
+afr_writev_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
local = frame->local;
+ priv = this->private;
- iobref_unref (local->cont.writev.iobref);
- local->cont.writev.iobref = NULL;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
+ STACK_WIND_COOKIE (frame, afr_writev_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->writev,
+ local->fd, local->cont.writev.vector,
+ local->cont.writev.count, local->cont.writev.offset,
+ local->cont.writev.flags, local->cont.writev.iobref,
+ local->xdata_req);
return 0;
}
@@ -191,30 +343,37 @@ afr_do_writev (call_frame_t *frame, xlator_t *this)
{
call_frame_t *transaction_frame = NULL;
afr_local_t *local = NULL;
- int op_ret = -1;
- int op_errno = 0;
-
- local = frame->local;
+ int ret = -1;
+ int op_errno = ENOMEM;
transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
+ if (!transaction_frame)
goto out;
- }
+ local = frame->local;
transaction_frame->local = local;
- frame->local = NULL;
+ frame->local = NULL;
- local->op = GF_FOP_WRITE;
+ if (!AFR_FRAME_INIT (frame, op_errno))
+ goto out;
- local->success_count = 0;
+ local->op = GF_FOP_WRITE;
- local->transaction.fop = afr_writev_wind;
- local->transaction.done = afr_writev_done;
- local->transaction.unwind = afr_writev_unwind;
+ local->transaction.wind = afr_writev_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_transaction_writev_unwind;
local->transaction.main_frame = frame;
+
if (local->fd->flags & O_APPEND) {
+ /*
+ * Backend vfs ignores the 'offset' for append mode fd so
+ * locking just the region provided for the writev does not
+ * give consistency gurantee. The actual write may happen at a
+ * completely different range than the one provided by the
+ * offset, len in the fop. So lock the entire file.
+ */
local->transaction.start = 0;
local->transaction.len = 0;
} else {
@@ -223,1498 +382,1373 @@ afr_do_writev (call_frame_t *frame, xlator_t *this)
local->cont.writev.count);
}
- afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (writev, frame, op_ret, op_errno, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
-static int
-afr_prepare_loc (call_frame_t *frame, fd_t *fd)
+
+int
+afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
- afr_local_t *local = NULL;
- char *name = NULL;
- char *path = NULL;
- int ret = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int op_errno = ENOMEM;
- if ((!fd) || (!fd->inode))
- return -1;
+ priv = this->private;
- local = frame->local;
- ret = inode_path (fd->inode, NULL, (char **)&path);
- if (ret <= 0) {
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "Unable to get path for gfid: %s",
- uuid_utoa (fd->inode->gfid));
- return -1;
- }
+ QUORUM_CHECK(writev,out);
- if (local->loc.path) {
- if (strcmp (path, local->loc.path))
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "overwriting old loc->path %s with %s",
- local->loc.path, path);
- GF_FREE ((char *)local->loc.path);
- }
- local->loc.path = path;
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
- name = strrchr (local->loc.path, '/');
- if (name)
- name++;
- local->loc.name = name;
+ local->cont.writev.vector = iov_dup (vector, count);
+ if (!local->cont.writev.vector)
+ goto out;
+ local->cont.writev.count = count;
+ local->cont.writev.offset = offset;
+ local->cont.writev.flags = flags;
+ local->cont.writev.iobref = iobref_ref (iobref);
- if (local->loc.inode) {
- inode_unref (local->loc.inode);
- }
- local->loc.inode = inode_ref (fd->inode);
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- if (local->loc.parent) {
- inode_unref (local->loc.parent);
- }
+ if (!local->xdata_req)
+ goto out;
- local->loc.parent = inode_parent (local->loc.inode, 0, NULL);
+ local->fd = fd_ref (fd);
+ local->inode = inode_ref (fd->inode);
- return 0;
-}
+ if (dict_set_uint32 (local->xdata_req, GLUSTERFS_OPEN_FD_COUNT, 4)) {
+ op_errno = ENOMEM;
+ goto out;
+ }
-afr_fd_paused_call_t*
-afr_paused_call_create (call_frame_t *frame)
-{
- afr_local_t *local = NULL;
- afr_fd_paused_call_t *paused_call = NULL;
+ if (dict_set_uint32 (local->xdata_req, GLUSTERFS_WRITE_IS_APPEND, 4)) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- local = frame->local;
- GF_ASSERT (local->fop_call_continue);
+ /* Set append_write to be true speculatively. If on any
+ server it turns not be true, we unset it in the
+ callback.
+ */
+ local->append_write = _gf_true;
- paused_call = GF_CALLOC (1, sizeof (*paused_call),
- gf_afr_fd_paused_call_t);
- if (paused_call) {
- INIT_LIST_HEAD (&paused_call->call_list);
- paused_call->frame = frame;
- }
+ /* detect here, but set it in writev_wind_cbk *after* the unstable
+ write is performed
+ */
+ local->stable_write = !!((fd->flags|flags)&(O_SYNC|O_DSYNC));
- return paused_call;
-}
+ afr_fix_open (fd, this);
-static int
-afr_pause_fd_fop (call_frame_t *frame, xlator_t *this, afr_fd_ctx_t *fd_ctx)
-{
- afr_fd_paused_call_t *paused_call = NULL;
- int ret = 0;
+ afr_do_writev (frame, this);
- paused_call = afr_paused_call_create (frame);
- if (paused_call)
- list_add (&paused_call->call_list, &fd_ctx->paused_calls);
- else
- ret = -ENOMEM;
+ return 0;
+out:
+ AFR_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
- return ret;
+ return 0;
}
-static void
-afr_trigger_open_fd_self_heal (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- inode_t *inode = NULL;
- char *reason = NULL;
- local = frame->local;
- sh = &local->self_heal;
- inode = local->fd->inode;
-
- sh->do_missing_entry_self_heal = _gf_true;
- sh->do_gfid_self_heal = _gf_true;
- sh->do_data_self_heal = _gf_true;
+/* }}} */
- reason = "subvolume came online";
- afr_launch_self_heal (frame, this, inode, _gf_true, inode->ia_type,
- reason, NULL, NULL);
-}
+/* {{{ truncate */
int
-afr_open_fd_fix (call_frame_t *frame, xlator_t *this, gf_boolean_t pause_fop)
+afr_truncate_unwind (call_frame_t *frame, xlator_t *this)
{
- int ret = 0;
- int i = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- gf_boolean_t need_self_heal = _gf_false;
- int *need_open = NULL;
- int need_open_count = 0;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- gf_boolean_t fop_continue = _gf_true;
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
local = frame->local;
- priv = this->private;
- GF_ASSERT (local->fd);
- if (pause_fop)
- GF_ASSERT (local->fop_call_continue);
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
- ret = afr_prepare_loc (frame, local->fd);
- if (ret < 0) {
- //File does not exist we cant open it.
- ret = 0;
- goto out;
- }
+ AFR_STACK_UNWIND (truncate, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
+ return 0;
+}
- fd_ctx = afr_fd_ctx_get (local->fd, this);
- if (!fd_ctx) {
- ret = -EINVAL;
- goto unlock;
- }
- LOCK (&local->fd->lock);
- {
- if (fd_ctx->up_count < priv->up_count) {
- need_self_heal = _gf_true;
- fd_ctx->up_count = priv->up_count;
- fd_ctx->down_count = priv->down_count;
- }
+int
+afr_truncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
- for (i = 0; i < priv->child_count; i++) {
- if ((fd_ctx->opened_on[i] == AFR_FD_NOT_OPENED) &&
- local->child_up[i]) {
- fd_ctx->opened_on[i] = AFR_FD_OPENING;
- if (!need_open)
- need_open = GF_CALLOC (priv->child_count,
- sizeof (*need_open),
- gf_afr_mt_int32_t);
- need_open[i] = 1;
- need_open_count++;
- } else if (pause_fop && local->child_up[i] &&
- (fd_ctx->opened_on[i] == AFR_FD_OPENING)) {
- local->fop_paused = _gf_true;
- }
- }
+ local = frame->local;
- if (local->fop_paused) {
- GF_ASSERT (pause_fop);
- gf_log (this->name, GF_LOG_INFO, "Pause fd %p",
- local->fd);
- ret = afr_pause_fd_fop (frame, this, fd_ctx);
- if (ret)
- goto unlock;
- fop_continue = _gf_false;
- }
- }
-unlock:
- UNLOCK (&local->fd->lock);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to fix fd for %s",
- local->loc.path);
- fop_continue = _gf_false;
- goto out;
- }
+ if (op_ret == 0 && prebuf->ia_size != postbuf->ia_size)
+ local->stable_write = _gf_false;
- if (need_self_heal)
- afr_trigger_open_fd_self_heal (frame, this);
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+}
- if (!need_open_count)
- goto out;
- gf_log (this->name, GF_LOG_INFO, "Opening fd %p", local->fd);
- afr_fix_open (frame, this, fd_ctx, need_open_count, need_open);
- fop_continue = _gf_false;
-out:
- if (need_open)
- GF_FREE (need_open);
- if (fop_continue && local->fop_call_continue)
- local->fop_call_continue (frame, this);
- return ret;
+int
+afr_truncate_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE (frame, afr_truncate_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->truncate,
+ &local->loc, local->cont.truncate.offset,
+ local->xdata_req);
+ return 0;
}
+
int
-afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t offset,
- struct iobref *iobref)
+afr_truncate (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, off_t offset, dict_t *xdata)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
- int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ QUORUM_CHECK(truncate,out);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
goto out;
- }
- frame->local = local;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
- local->cont.writev.vector = iov_dup (vector, count);
- local->cont.writev.count = count;
- local->cont.writev.offset = offset;
- local->cont.writev.ino = fd->inode->ino;
- local->cont.writev.iobref = iobref_ref (iobref);
+ local->cont.truncate.offset = offset;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- local->fd = fd_ref (fd);
- local->fop_call_continue = afr_do_writev;
+ if (!local->xdata_req)
+ goto out;
- ret = afr_open_fd_fix (frame, this, _gf_true);
- if (ret) {
- op_errno = -ret;
- goto out;
+ local->transaction.wind = afr_truncate_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_truncate_unwind;
+
+ loc_copy (&local->loc, loc);
+ local->inode = inode_ref (loc->inode);
+
+ local->op = GF_FOP_TRUNCATE;
+
+ local->transaction.main_frame = frame;
+ local->transaction.start = offset;
+ local->transaction.len = 0;
+
+ /* Set it true speculatively, will get reset in afr_truncate_wind_cbk
+ if truncate was not a NOP */
+ local->stable_write = _gf_true;
+
+ ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
}
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (writev, frame, op_ret, op_errno, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
/* }}} */
-/* {{{ truncate */
+/* {{{ ftruncate */
+
int
-afr_truncate_unwind (call_frame_t *frame, xlator_t *this)
+afr_ftruncate_unwind (call_frame_t *frame, xlator_t *this)
{
afr_local_t * local = NULL;
call_frame_t *main_frame = NULL;
local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
- if (main_frame) {
- AFR_STACK_UNWIND (truncate, main_frame, local->op_ret,
- local->op_errno,
- &local->cont.truncate.prebuf,
- &local->cont.truncate.postbuf);
- }
+ AFR_STACK_UNWIND (ftruncate, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
+ return 0;
+}
+
+
+int
+afr_ftruncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret == 0 && prebuf->ia_size != postbuf->ia_size)
+ local->stable_write = _gf_false;
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+}
+
+
+int
+afr_ftruncate_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE (frame, afr_ftruncate_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->ftruncate,
+ local->fd, local->cont.ftruncate.offset,
+ local->xdata_req);
return 0;
}
int
-afr_truncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+afr_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int child_index = (long) cookie;
- int read_child = 0;
- int call_count = -1;
- int need_unwind = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- local = frame->local;
- priv = this->private;
+ priv = this->private;
- read_child = afr_inode_get_read_ctx (this, local->loc.inode, NULL);
+ QUORUM_CHECK(ftruncate,out);
- LOCK (&frame->lock);
- {
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
+ transaction_frame = copy_frame (frame);
+ if (!frame)
+ goto out;
- if (afr_fop_failed (op_ret, op_errno) && op_errno != EFBIG)
- afr_transaction_fop_failed (frame, this, child_index);
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.truncate.prebuf = *prebuf;
- local->cont.truncate.postbuf = *postbuf;
- }
+ local->cont.ftruncate.offset = offset;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- if (child_index == read_child) {
- local->cont.truncate.prebuf = *prebuf;
- local->cont.truncate.postbuf = *postbuf;
- }
+ if (!local->xdata_req)
+ goto out;
- local->success_count++;
+ local->fd = fd_ref (fd);
+ local->inode = inode_ref (fd->inode);
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
- }
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ local->op = GF_FOP_FTRUNCATE;
- if (need_unwind)
- local->transaction.unwind (frame, this);
+ local->transaction.wind = afr_ftruncate_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_ftruncate_unwind;
- call_count = afr_frame_return (frame);
+ local->transaction.main_frame = frame;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
+ local->transaction.start = local->cont.ftruncate.offset;
+ local->transaction.len = 0;
+
+ afr_fix_open (fd, this);
+
+ /* Set it true speculatively, will get reset in afr_ftruncate_wind_cbk
+ if truncate was not a NOP */
+ local->stable_write = _gf_true;
+
+ ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
}
+ return 0;
+out:
+ AFR_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
+
return 0;
}
+/* }}} */
+
+/* {{{ setattr */
-int32_t
-afr_truncate_wind (call_frame_t *frame, xlator_t *this)
+int
+afr_setattr_unwind (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
+ call_frame_t *main_frame = NULL;
local = frame->local;
- priv = this->private;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
+ AFR_STACK_UNWIND (setattr, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ local->xdata_rsp);
+ return 0;
+}
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_truncate_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->truncate,
- &local->loc,
- local->cont.truncate.offset);
-
- if (!--call_count)
- break;
- }
- }
- return 0;
+int
+afr_setattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
+{
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ preop, postop, xdata);
}
int
-afr_truncate_done (call_frame_t *frame, xlator_t *this)
+afr_setattr_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
local = frame->local;
+ priv = this->private;
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
+ STACK_WIND_COOKIE (frame, afr_setattr_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->setattr,
+ &local->loc, &local->cont.setattr.in_buf,
+ local->cont.setattr.valid, local->xdata_req);
return 0;
}
int
-afr_truncate (call_frame_t *frame, xlator_t *this,
- loc_t *loc, off_t offset)
+afr_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *buf,
+ int32_t valid, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ int op_errno = ENOMEM;
priv = this->private;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- goto out;
- }
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ QUORUM_CHECK(setattr,out);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
goto out;
- }
- transaction_frame->local = local;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
- local->op_ret = -1;
+ local->cont.setattr.in_buf = *buf;
+ local->cont.setattr.valid = valid;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- local->cont.truncate.offset = offset;
- local->cont.truncate.ino = loc->inode->ino;
+ if (!local->xdata_req)
+ goto out;
- local->transaction.fop = afr_truncate_wind;
- local->transaction.done = afr_truncate_done;
- local->transaction.unwind = afr_truncate_unwind;
+ local->transaction.wind = afr_setattr_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_setattr_unwind;
loc_copy (&local->loc, loc);
+ local->inode = inode_ref (loc->inode);
+
+ local->op = GF_FOP_SETATTR;
local->transaction.main_frame = frame;
- local->transaction.start = offset;
+ local->transaction.start = LLONG_MAX - 1;
local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (truncate, frame, op_ret, op_errno, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
-
-/* }}} */
-
-/* {{{ ftruncate */
-
+/* {{{ fsetattr */
int
-afr_ftruncate_unwind (call_frame_t *frame, xlator_t *this)
+afr_fsetattr_unwind (call_frame_t *frame, xlator_t *this)
{
afr_local_t * local = NULL;
call_frame_t *main_frame = NULL;
local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
- if (main_frame) {
- AFR_STACK_UNWIND (ftruncate, main_frame, local->op_ret,
- local->op_errno,
- &local->cont.ftruncate.prebuf,
- &local->cont.ftruncate.postbuf);
- }
+ AFR_STACK_UNWIND (fsetattr, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
return 0;
}
int
-afr_ftruncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+afr_fsetattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int child_index = (long) cookie;
- int call_count = -1;
- int need_unwind = 0;
- int read_child = 0;
-
- local = frame->local;
- priv = this->private;
-
- read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
-
- LOCK (&frame->lock);
- {
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
-
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.ftruncate.prebuf = *prebuf;
- local->cont.ftruncate.postbuf = *postbuf;
- }
-
- if (child_index == read_child) {
- local->cont.ftruncate.prebuf = *prebuf;
- local->cont.ftruncate.postbuf = *postbuf;
- }
-
- local->success_count++;
-
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
- }
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ preop, postop, xdata);
+}
- if (need_unwind)
- local->transaction.unwind (frame, this);
- call_count = afr_frame_return (frame);
+int
+afr_fsetattr_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
+ local = frame->local;
+ priv = this->private;
+ STACK_WIND_COOKIE (frame, afr_fsetattr_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fsetattr,
+ local->fd, &local->cont.fsetattr.in_buf,
+ local->cont.fsetattr.valid, local->xdata_req);
return 0;
}
int
-afr_ftruncate_wind (call_frame_t *frame, xlator_t *this)
+afr_fsetattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, struct iatt *buf, int32_t valid, dict_t *xdata)
{
- afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- local = frame->local;
priv = this->private;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
+ QUORUM_CHECK(fsetattr,out);
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
+ goto out;
- local->call_count = call_count;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_ftruncate_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->ftruncate,
- local->fd, local->cont.ftruncate.offset);
-
- if (!--call_count)
- break;
- }
- }
+ local->cont.fsetattr.in_buf = *buf;
+ local->cont.fsetattr.valid = valid;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- return 0;
-}
+ if (!local->xdata_req)
+ goto out;
+ local->transaction.wind = afr_fsetattr_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_fsetattr_unwind;
-int
-afr_ftruncate_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
+ local->fd = fd_ref (fd);
+ local->inode = inode_ref (fd->inode);
- local = frame->local;
+ local->op = GF_FOP_FSETATTR;
- local->transaction.unwind (frame, this);
+ afr_fix_open (fd, this);
- AFR_STACK_DESTROY (frame);
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
+
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+ return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+
+ AFR_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
+/* {{{ setxattr */
+
+
int
-afr_do_ftruncate (call_frame_t *frame, xlator_t *this)
+afr_setxattr_unwind (call_frame_t *frame, xlator_t *this)
{
- call_frame_t * transaction_frame = NULL;
- afr_local_t * local = NULL;
- int op_ret = -1;
- int op_errno = 0;
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
local = frame->local;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- goto out;
- }
-
- transaction_frame->local = local;
- frame->local = NULL;
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
- local->op = GF_FOP_FTRUNCATE;
+ AFR_STACK_UNWIND (setxattr, main_frame, local->op_ret, local->op_errno,
+ local->xdata_rsp);
+ return 0;
+}
- local->transaction.fop = afr_ftruncate_wind;
- local->transaction.done = afr_ftruncate_done;
- local->transaction.unwind = afr_ftruncate_unwind;
- local->transaction.main_frame = frame;
+int
+afr_setxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ NULL, NULL, xdata);
+}
- local->transaction.start = local->cont.ftruncate.offset;
- local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+int
+afr_setxattr_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- op_ret = 0;
-out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, NULL, NULL);
- }
+ local = frame->local;
+ priv = this->private;
+ STACK_WIND_COOKIE (frame, afr_setxattr_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->setxattr,
+ &local->loc, local->cont.setxattr.dict,
+ local->cont.setxattr.flags, local->xdata_req);
return 0;
}
int
-afr_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset)
+afr_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
+ int ret = -1;
+ int op_errno = EINVAL;
+
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.afr.*", dict,
+ op_errno, out);
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.afr.*", dict,
+ op_errno, out);
priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
- ret = AFR_LOCAL_INIT (local, priv);
+ QUORUM_CHECK(setxattr,out);
- if (ret < 0) {
- op_errno = -ret;
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
goto out;
- }
- frame->local = local;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
- local->cont.ftruncate.offset = offset;
- local->cont.ftruncate.ino = fd->inode->ino;
+ local->cont.setxattr.dict = dict_ref (dict);
+ local->cont.setxattr.flags = flags;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- local->fd = fd_ref (fd);
- local->fop_call_continue = afr_do_ftruncate;
+ if (!local->xdata_req)
+ goto out;
- ret = afr_open_fd_fix (frame, this, _gf_true);
- if (ret) {
- op_errno = -ret;
- goto out;
+ local->transaction.wind = afr_setxattr_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_setxattr_unwind;
+
+ loc_copy (&local->loc, loc);
+ local->inode = inode_ref (loc->inode);
+
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
+
+ local->op = GF_FOP_SETXATTR;
+
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
}
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+
+ AFR_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
return 0;
}
-/* }}} */
+/* {{{ fsetxattr */
-/* {{{ setattr */
int
-afr_setattr_unwind (call_frame_t *frame, xlator_t *this)
+afr_fsetxattr_unwind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (setattr, main_frame, local->op_ret,
- local->op_errno,
- &local->cont.setattr.preop_buf,
- &local->cont.setattr.postop_buf);
- }
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
+ AFR_STACK_UNWIND (fsetxattr, main_frame, local->op_ret, local->op_errno,
+ local->xdata_rsp);
return 0;
}
int
-afr_setattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
+afr_fsetxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int child_index = (long) cookie;
- int read_child = 0;
- int call_count = -1;
- int need_unwind = 0;
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ NULL, NULL, xdata);
+}
+
+
+int
+afr_fsetxattr_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
local = frame->local;
- priv = this->private;
+ priv = this->private;
- read_child = afr_inode_get_read_ctx (this, local->loc.inode, NULL);
+ STACK_WIND_COOKIE (frame, afr_fsetxattr_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fsetxattr,
+ local->fd, local->cont.fsetxattr.dict,
+ local->cont.fsetxattr.flags, local->xdata_req);
+ return 0;
+}
- LOCK (&frame->lock);
- {
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
+int
+afr_fsetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.setattr.preop_buf = *preop;
- local->cont.setattr.postop_buf = *postop;
- }
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.afr.*", dict,
+ op_errno, out);
- if (child_index == read_child) {
- local->cont.setattr.preop_buf = *preop;
- local->cont.setattr.postop_buf = *postop;
- }
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.afr.*", dict,
+ op_errno, out);
- local->success_count++;
+ priv = this->private;
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
- }
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ QUORUM_CHECK(fsetxattr,out);
- if (need_unwind)
- local->transaction.unwind (frame, this);
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
+ goto out;
- call_count = afr_frame_return (frame);
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
+ local->cont.fsetxattr.dict = dict_ref (dict);
+ local->cont.fsetxattr.flags = flags;
+
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->transaction.wind = afr_fsetxattr_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_fsetxattr_unwind;
+
+ local->fd = fd_ref (fd);
+ local->inode = inode_ref (fd->inode);
+
+ local->op = GF_FOP_FSETXATTR;
+
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
+
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
}
+ return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+
+ AFR_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL);
return 0;
}
+/* }}} */
-int32_t
-afr_setattr_wind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
- local = frame->local;
- priv = this->private;
+/* {{{ removexattr */
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+int
+afr_removexattr_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
- local->call_count = call_count;
+ local = frame->local;
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_setattr_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->setattr,
- &local->loc,
- &local->cont.setattr.in_buf,
- local->cont.setattr.valid);
-
- if (!--call_count)
- break;
- }
- }
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
+ AFR_STACK_UNWIND (removexattr, main_frame, local->op_ret, local->op_errno,
+ local->xdata_rsp);
return 0;
}
int
-afr_setattr_done (call_frame_t *frame, xlator_t *this)
+afr_removexattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_local_t *local = NULL;
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ NULL, NULL, xdata);
+}
- local = frame->local;
- local->transaction.unwind (frame, this);
+int
+afr_removexattr_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- AFR_STACK_DESTROY (frame);
+ local = frame->local;
+ priv = this->private;
+ STACK_WIND_COOKIE (frame, afr_removexattr_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->removexattr,
+ &local->loc, local->cont.removexattr.name,
+ local->xdata_req);
return 0;
}
int
-afr_setattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, struct iatt *buf, int32_t valid)
+afr_removexattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.afr.*",
+ name, op_errno, out);
- priv = this->private;
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.afr.*",
+ name, op_errno, out);
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- goto out;
- }
+ priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ QUORUM_CHECK(removexattr,out);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
goto out;
- }
- transaction_frame->local = local;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
- local->op_ret = -1;
+ local->cont.removexattr.name = gf_strdup (name);
- local->cont.setattr.ino = loc->inode->ino;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- local->cont.setattr.in_buf = *buf;
- local->cont.setattr.valid = valid;
+ if (!local->xdata_req)
+ goto out;
- local->transaction.fop = afr_setattr_wind;
- local->transaction.done = afr_setattr_done;
- local->transaction.unwind = afr_setattr_unwind;
+ local->transaction.wind = afr_removexattr_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_removexattr_unwind;
loc_copy (&local->loc, loc);
+ local->inode = inode_ref (loc->inode);
+
+ local->op = GF_FOP_REMOVEXATTR;
local->transaction.main_frame = frame;
local->transaction.start = LLONG_MAX - 1;
local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (setattr, frame, op_ret, op_errno, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
return 0;
}
-/* {{{ fsetattr */
-
+/* ffremovexattr */
int
-afr_fsetattr_unwind (call_frame_t *frame, xlator_t *this)
+afr_fremovexattr_unwind (call_frame_t *frame, xlator_t *this)
{
afr_local_t * local = NULL;
call_frame_t *main_frame = NULL;
local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (fsetattr, main_frame, local->op_ret,
- local->op_errno,
- &local->cont.fsetattr.preop_buf,
- &local->cont.fsetattr.postop_buf);
- }
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
+ AFR_STACK_UNWIND (fremovexattr, main_frame, local->op_ret, local->op_errno,
+ local->xdata_rsp);
return 0;
}
int
-afr_fsetattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
+afr_fremovexattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int child_index = (long) cookie;
- int read_child = 0;
- int call_count = -1;
- int need_unwind = 0;
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ NULL, NULL, xdata);
+}
+
+
+int
+afr_fremovexattr_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
local = frame->local;
- priv = this->private;
+ priv = this->private;
- read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
+ STACK_WIND_COOKIE (frame, afr_fremovexattr_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fremovexattr,
+ local->fd, local->cont.removexattr.name,
+ local->xdata_req);
+ return 0;
+}
- LOCK (&frame->lock);
- {
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
+int
+afr_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.fsetattr.preop_buf = *preop;
- local->cont.fsetattr.postop_buf = *postop;
- }
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.afr.*",
+ name, op_errno, out);
- if (child_index == read_child) {
- local->cont.fsetattr.preop_buf = *preop;
- local->cont.fsetattr.postop_buf = *postop;
- }
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.afr.*",
+ name, op_errno, out);
- local->success_count++;
+ priv = this->private;
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
- }
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ QUORUM_CHECK(fremovexattr, out);
- if (need_unwind)
- local->transaction.unwind (frame, this);
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
+ goto out;
- call_count = afr_frame_return (frame);
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
+ local->cont.removexattr.name = gf_strdup (name);
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->transaction.wind = afr_fremovexattr_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_fremovexattr_unwind;
+
+ local->fd = fd_ref (fd);
+ local->inode = inode_ref (fd->inode);
+
+ local->op = GF_FOP_FREMOVEXATTR;
+
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
+
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
}
+ return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+
+ AFR_STACK_UNWIND (fremovexattr, frame, -1, op_errno, NULL);
+
return 0;
}
-int32_t
-afr_fsetattr_wind (call_frame_t *frame, xlator_t *this)
+int
+afr_fallocate_unwind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
local = frame->local;
- priv = this->private;
-
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
- local->call_count = call_count;
+ AFR_STACK_UNWIND (fallocate, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
+ return 0;
+}
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_fsetattr_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fsetattr,
- local->fd,
- &local->cont.fsetattr.in_buf,
- local->cont.fsetattr.valid);
-
- if (!--call_count)
- break;
- }
- }
- return 0;
+int
+afr_fallocate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ prebuf, postbuf, xdata);
}
int
-afr_fsetattr_done (call_frame_t *frame, xlator_t *this)
+afr_fallocate_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
local = frame->local;
+ priv = this->private;
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
+ STACK_WIND_COOKIE (frame, afr_fallocate_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fallocate,
+ local->fd, local->cont.fallocate.mode,
+ local->cont.fallocate.offset,
+ local->cont.fallocate.len, local->xdata_req);
return 0;
}
+
int
-afr_fsetattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, struct iatt *buf, int32_t valid)
+afr_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
+ afr_private_t *priv = NULL;
+ call_frame_t *transaction_frame = NULL;
+ afr_local_t *local = NULL;
int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
+ int op_errno = ENOMEM;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ priv = this->private;
- priv = this->private;
+ QUORUM_CHECK(fallocate,out);
transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
+ if (!transaction_frame)
goto out;
- }
- ALLOC_OR_GOTO (local, afr_local_t, out);
- transaction_frame->local = local;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ local->cont.fallocate.mode = mode;
+ local->cont.fallocate.offset = offset;
+ local->cont.fallocate.len = len;
- local->op_ret = -1;
+ local->fd = fd_ref (fd);
+ local->inode = inode_ref (fd->inode);
- local->cont.fsetattr.ino = fd->inode->ino;
- local->cont.fsetattr.in_buf = *buf;
- local->cont.fsetattr.valid = valid;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- local->transaction.fop = afr_fsetattr_wind;
- local->transaction.done = afr_fsetattr_done;
- local->transaction.unwind = afr_fsetattr_unwind;
+ if (!local->xdata_req)
+ goto out;
- local->fd = fd_ref (fd);
+ local->op = GF_FOP_FALLOCATE;
- op_ret = afr_open_fd_fix (transaction_frame, this, _gf_false);
- if (ret) {
- op_errno = -op_ret;
- op_ret = -1;
- goto out;
- }
+ local->transaction.wind = afr_fallocate_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_fallocate_unwind;
local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
+
+ local->transaction.start = local->cont.fallocate.offset;
local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ afr_fix_open (fd, this);
- op_ret = 0;
-out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (fsetattr, frame, op_ret, op_errno, NULL, NULL);
+ ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
}
+ return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+
+ AFR_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
-/* {{{ setxattr */
+/* }}} */
+/* {{{ discard */
int
-afr_setxattr_unwind (call_frame_t *frame, xlator_t *this)
+afr_discard_unwind (call_frame_t *frame, xlator_t *this)
{
afr_local_t * local = NULL;
call_frame_t *main_frame = NULL;
local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
- if (main_frame) {
- AFR_STACK_UNWIND (setxattr, main_frame,
- local->op_ret, local->op_errno)
- }
+ AFR_STACK_UNWIND (discard, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
return 0;
}
int
-afr_setxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+afr_discard_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int call_count = -1;
- int need_unwind = 0;
-
- local = frame->local;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- }
- local->success_count++;
-
- if (local->success_count == priv->child_count) {
- need_unwind = 1;
- }
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- if (need_unwind)
- local->transaction.unwind (frame, this);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ prebuf, postbuf, xdata);
}
int
-afr_setxattr_wind (call_frame_t *frame, xlator_t *this)
+afr_discard_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
local = frame->local;
priv = this->private;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_setxattr_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->setxattr,
- &local->loc,
- local->cont.setxattr.dict,
- local->cont.setxattr.flags);
-
- if (!--call_count)
- break;
- }
- }
-
+ STACK_WIND_COOKIE (frame, afr_discard_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->discard,
+ local->fd, local->cont.discard.offset,
+ local->cont.discard.len, local->xdata_req);
return 0;
}
int
-afr_setxattr_done (call_frame_t *frame, xlator_t *this)
+afr_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
{
- afr_local_t * local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
- return 0;
-}
-
-int
-afr_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *dict, int32_t flags)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ int op_errno = ENOMEM;
priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ QUORUM_CHECK(discard, out);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
+ goto out;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- goto out;
- }
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
- transaction_frame->local = local;
+ local->cont.discard.offset = offset;
+ local->cont.discard.len = len;
- local->op_ret = -1;
+ local->fd = fd_ref (fd);
+ local->inode = inode_ref (fd->inode);
- local->cont.setxattr.dict = dict_ref (dict);
- local->cont.setxattr.flags = flags;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- local->transaction.fop = afr_setxattr_wind;
- local->transaction.done = afr_setxattr_done;
- local->transaction.unwind = afr_setxattr_unwind;
+ if (!local->xdata_req)
+ goto out;
- loc_copy (&local->loc, loc);
+ local->op = GF_FOP_DISCARD;
+
+ local->transaction.wind = afr_discard_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_discard_unwind;
local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
+
+ local->transaction.start = local->cont.discard.offset;
local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ afr_fix_open (fd, this);
- op_ret = 0;
-out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (setxattr, frame, op_ret, op_errno);
+ ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
}
+ return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+
+ AFR_STACK_UNWIND (discard, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
-/* }}} */
-
-/* {{{ removexattr */
+/* {{{ zerofill */
int
-afr_removexattr_unwind (call_frame_t *frame, xlator_t *this)
+afr_zerofill_unwind (call_frame_t *frame, xlator_t *this)
{
afr_local_t * local = NULL;
call_frame_t *main_frame = NULL;
local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
- if (main_frame) {
- AFR_STACK_UNWIND (removexattr, main_frame,
- local->op_ret, local->op_errno)
- }
+ AFR_STACK_UNWIND (discard, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
return 0;
}
int
-afr_removexattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+afr_zerofill_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int call_count = -1;
- int need_unwind = 0;
-
- local = frame->local;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- }
- local->success_count++;
-
- if (local->success_count == priv->wait_count) {
- need_unwind = 1;
- }
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- if (need_unwind)
- local->transaction.unwind (frame, this);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ prebuf, postbuf, xdata);
}
-int32_t
-afr_removexattr_wind (call_frame_t *frame, xlator_t *this)
+int
+afr_zerofill_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
local = frame->local;
priv = this->private;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_removexattr_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->removexattr,
- &local->loc,
- local->cont.removexattr.name);
-
- if (!--call_count)
- break;
- }
- }
-
+ STACK_WIND_COOKIE (frame, afr_zerofill_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->zerofill,
+ local->fd, local->cont.zerofill.offset,
+ local->cont.zerofill.len, local->xdata_req);
return 0;
}
-
int
-afr_removexattr_done (call_frame_t *frame, xlator_t *this)
+afr_zerofill (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
{
- afr_local_t * local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
- return 0;
-}
-
-
-int
-afr_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (loc, out);
+ int op_errno = ENOMEM;
priv = this->private;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- goto out;
- }
+ QUORUM_CHECK(discard, out);
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
+ goto out;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
- transaction_frame->local = local;
+ local->cont.zerofill.offset = offset;
+ local->cont.zerofill.len = len;
- local->op_ret = -1;
+ local->fd = fd_ref (fd);
+ local->inode = inode_ref (fd->inode);
- local->cont.removexattr.name = gf_strdup (name);
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- local->transaction.fop = afr_removexattr_wind;
- local->transaction.done = afr_removexattr_done;
- local->transaction.unwind = afr_removexattr_unwind;
+ if (!local->xdata_req)
+ goto out;
- loc_copy (&local->loc, loc);
+ local->op = GF_FOP_ZEROFILL;
+
+ local->transaction.wind = afr_zerofill_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_zerofill_unwind;
local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
- local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ local->transaction.start = local->cont.discard.offset;
+ local->transaction.len = len;
- op_ret = 0;
-out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (removexattr, frame, op_ret, op_errno);
+ afr_fix_open (fd, this);
+
+ ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
}
+ return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+
+ AFR_STACK_UNWIND (zerofill, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
+
+/* }}} */
+
+
diff --git a/xlators/cluster/afr/src/afr-inode-write.h b/xlators/cluster/afr/src/afr-inode-write.h
index f9aa7bd36..7b1fc5528 100644
--- a/xlators/cluster/afr/src/afr-inode-write.h
+++ b/xlators/cluster/afr/src/afr-inode-write.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __INODE_WRITE_H__
@@ -22,51 +13,70 @@
int32_t
afr_chmod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode);
+ loc_t *loc, mode_t mode, dict_t *xdata);
int32_t
afr_chown (call_frame_t *frame, xlator_t *this,
- loc_t *loc, uid_t uid, gid_t gid);
+ loc_t *loc, uid_t uid, gid_t gid, dict_t *xdata);
int
afr_fchown (call_frame_t *frame, xlator_t *this,
- fd_t *fd, uid_t uid, gid_t gid);
+ fd_t *fd, uid_t uid, gid_t gid, dict_t *xdata);
int32_t
afr_fchmod (call_frame_t *frame, xlator_t *this,
- fd_t *fd, mode_t mode);
+ fd_t *fd, mode_t mode, dict_t *xdata);
int32_t
-afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iovec *vector, int32_t count, off_t offset,
- struct iobref *iobref);
+ uint32_t flags, struct iobref *iobref, dict_t *xdata);
int32_t
afr_truncate (call_frame_t *frame, xlator_t *this,
- loc_t *loc, off_t offset);
+ loc_t *loc, off_t offset, dict_t *xdata);
int32_t
afr_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset);
+ fd_t *fd, off_t offset, dict_t *xdata);
int32_t
afr_utimens (call_frame_t *frame, xlator_t *this,
- loc_t *loc, struct timespec tv[2]);
+ loc_t *loc, struct timespec tv[2], dict_t *xdata);
int
afr_setattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, struct iatt *buf, int32_t valid);
+ loc_t *loc, struct iatt *buf, int32_t valid, dict_t *xdata);
int
afr_fsetattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, struct iatt *buf, int32_t valid);
+ fd_t *fd, struct iatt *buf, int32_t valid, dict_t *xdata);
int32_t
afr_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *dict, int32_t flags);
+ loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata);
+
+int32_t
+afr_fsetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, dict_t *dict, int32_t flags, dict_t *xdata);
int32_t
afr_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name);
+ loc_t *loc, const char *name, dict_t *xdata);
+
+int32_t
+afr_fremovexattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata);
+int
+afr_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata);
+
+int
+afr_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata);
+
+int
+afr_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata);
#endif /* __INODE_WRITE_H__ */
diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c
index 1828ddde7..a2a758f35 100644
--- a/xlators/cluster/afr/src/afr-lk-common.c
+++ b/xlators/cluster/afr/src/afr-lk-common.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include "dict.h"
@@ -31,8 +22,69 @@
#define LOCKED_YES 0x1 /* for DATA, METADATA, ENTRY and higher_path */
#define LOCKED_LOWER 0x2 /* for lower path */
+#define AFR_TRACE_INODELK_IN(frame, this, params ...) \
+ do { \
+ afr_private_t *_priv = this->private; \
+ if (!_priv->inodelk_trace) \
+ break; \
+ afr_trace_inodelk_in (frame, this, params); \
+ } while (0);
+
+#define AFR_TRACE_INODELK_OUT(frame, this, params ...) \
+ do { \
+ afr_private_t *_priv = this->private; \
+ if (!_priv->inodelk_trace) \
+ break; \
+ afr_trace_inodelk_out (frame, this, params); \
+ } while (0);
+
+#define AFR_TRACE_ENTRYLK_IN(frame, this, params ...) \
+ do { \
+ afr_private_t *_priv = this->private; \
+ if (!_priv->entrylk_trace) \
+ break; \
+ afr_trace_entrylk_in (frame, this, params); \
+ } while (0);
+
+#define AFR_TRACE_ENTRYLK_OUT(frame, this, params ...) \
+ do { \
+ afr_private_t *_priv = this->private; \
+ if (!_priv->entrylk_trace) \
+ break; \
+ afr_trace_entrylk_out (frame, this, params); \
+ } while (0);
+
int
-afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index);
+afr_entry_lockee_cmp (const void *l1, const void *l2)
+{
+ const afr_entry_lockee_t *r1 = l1;
+ const afr_entry_lockee_t *r2 = l2;
+ int ret = 0;
+ uuid_t gfid1 = {0};
+ uuid_t gfid2 = {0};
+
+ loc_gfid ((loc_t*)&r1->loc, gfid1);
+ loc_gfid ((loc_t*)&r2->loc, gfid2);
+ ret = uuid_compare (gfid1, gfid2);
+ /*Entrylks with NULL basename are the 'smallest'*/
+ if (ret == 0) {
+ if (!r1->basename)
+ return -1;
+ if (!r2->basename)
+ return 1;
+ ret = strcmp (r1->basename, r2->basename);
+ }
+
+ if (ret <= 0)
+ return -1;
+ else
+ return 1;
+}
+
+int afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index);
+
+static int
+afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this);
static uint64_t afr_lock_number = 1;
@@ -57,12 +109,13 @@ afr_set_lock_number (call_frame_t *frame, xlator_t *this)
}
void
-afr_set_lk_owner (call_frame_t *frame, xlator_t *this)
+afr_set_lk_owner (call_frame_t *frame, xlator_t *this, void *lk_owner)
{
gf_log (this->name, GF_LOG_TRACE,
"Setting lk-owner=%llu",
- (unsigned long long) (unsigned long)frame->root);
- frame->root->lk_owner = (uint64_t) (unsigned long)frame->root;
+ (unsigned long long) (unsigned long)lk_owner);
+
+ set_lk_owner_from_ptr (&frame->root->lk_owner, lk_owner);
}
static int
@@ -98,16 +151,9 @@ internal_lock_count (call_frame_t *frame, xlator_t *this)
local = frame->local;
priv = this->private;
- if (local->fd) {
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i] && local->fd_open_on[i])
- ++call_count;
- }
- } else {
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i])
- ++call_count;
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i])
+ ++call_count;
}
return call_count;
@@ -115,7 +161,7 @@ internal_lock_count (call_frame_t *frame, xlator_t *this)
static void
afr_print_inodelk (char *str, int size, int cmd,
- struct gf_flock *flock, uint64_t owner)
+ struct gf_flock *flock, gf_lkowner_t *owner)
{
char *cmd_str = NULL;
char *type_str = NULL;
@@ -163,11 +209,11 @@ afr_print_inodelk (char *str, int size, int cmd,
}
snprintf (str, size, "lock=INODELK, cmd=%s, type=%s, "
- "start=%llu, len=%llu, pid=%llu, lk-owner=%llu",
+ "start=%llu, len=%llu, pid=%llu, lk-owner=%s",
cmd_str, type_str, (unsigned long long) flock->l_start,
(unsigned long long) flock->l_len,
(unsigned long long) flock->l_pid,
- (unsigned long long) owner);
+ lkowner_utoa (owner));
}
@@ -183,11 +229,11 @@ afr_print_lockee (char *str, int size, loc_t *loc, fd_t *fd,
void
afr_print_entrylk (char *str, int size, const char *basename,
- uint64_t owner)
+ gf_lkowner_t *owner)
{
- snprintf (str, size, "Basename=%s, lk-owner=%llu",
+ snprintf (str, size, "Basename=%s, lk-owner=%s",
basename ? basename : "<nul>",
- (unsigned long long)owner);
+ lkowner_utoa (owner));
}
static void
@@ -241,27 +287,20 @@ afr_set_lock_call_type (afr_lock_call_type_t lock_call_type,
}
static void
-afr_trace_inodelk_out (call_frame_t *frame, afr_lock_call_type_t lock_call_type,
+afr_trace_inodelk_out (call_frame_t *frame, xlator_t *this,
+ afr_lock_call_type_t lock_call_type,
afr_lock_op_type_t lk_op_type, struct gf_flock *flock,
int op_ret, int op_errno, int32_t child_index)
{
- xlator_t *this = NULL;
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
char lockee[256];
char lock_call_type_str[256];
char verdict[16];
- this = THIS;
local = frame->local;
int_lock = &local->internal_lock;
- priv = this->private;
-
- if (!priv->inodelk_trace) {
- return;
- }
afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index);
@@ -270,39 +309,31 @@ afr_trace_inodelk_out (call_frame_t *frame, afr_lock_call_type_t lock_call_type,
afr_print_verdict (op_ret, op_errno, verdict);
gf_log (this->name, GF_LOG_INFO,
- "[%s %s] [%s] Lockee={%s} Number={%llu}",
+ "[%s %s] [%s] lk-owner=%s Lockee={%s} Number={%llu}",
lock_call_type_str,
lk_op_type == AFR_LOCK_OP ? "LOCK REPLY" : "UNLOCK REPLY",
- verdict,
- lockee,
+ verdict, lkowner_utoa (&frame->root->lk_owner), lockee,
(unsigned long long) int_lock->lock_number);
}
static void
-afr_trace_inodelk_in (call_frame_t *frame, afr_lock_call_type_t lock_call_type,
+afr_trace_inodelk_in (call_frame_t *frame, xlator_t *this,
+ afr_lock_call_type_t lock_call_type,
afr_lock_op_type_t lk_op_type, struct gf_flock *flock,
int32_t cmd, int32_t child_index)
{
- xlator_t *this = NULL;
afr_local_t *local = NULL;
afr_internal_lock_t *int_lock = NULL;
- afr_private_t *priv = NULL;
char lock[256];
char lockee[256];
char lock_call_type_str[256];
- this = THIS;
local = frame->local;
int_lock = &local->internal_lock;
- priv = this->private;
-
- if (!priv->inodelk_trace) {
- return;
- }
- afr_print_inodelk (lock, 256, cmd, flock, frame->root->lk_owner);
+ afr_print_inodelk (lock, 256, cmd, flock, &frame->root->lk_owner);
afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index);
afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock);
@@ -317,20 +348,21 @@ afr_trace_inodelk_in (call_frame_t *frame, afr_lock_call_type_t lock_call_type,
}
static void
-afr_trace_entrylk_in (call_frame_t *frame, afr_lock_call_type_t lock_call_type,
+afr_trace_entrylk_in (call_frame_t *frame, xlator_t *this,
+ afr_lock_call_type_t lock_call_type,
afr_lock_op_type_t lk_op_type, const char *basename,
- int32_t child_index)
+ int32_t cookie)
{
- xlator_t *this = NULL;
afr_local_t *local = NULL;
afr_internal_lock_t *int_lock = NULL;
afr_private_t *priv = NULL;
+ int child_index = 0;
+ int lockee_no = 0;
char lock[256];
char lockee[256];
char lock_call_type_str[256];
- this = THIS;
local = frame->local;
int_lock = &local->internal_lock;
priv = this->private;
@@ -338,36 +370,41 @@ afr_trace_entrylk_in (call_frame_t *frame, afr_lock_call_type_t lock_call_type,
if (!priv->entrylk_trace) {
return;
}
+ lockee_no = cookie / priv->child_count;
+ child_index = cookie % priv->child_count;
- afr_print_entrylk (lock, 256, basename, frame->root->lk_owner);
- afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index);
+ afr_print_entrylk (lock, 256, basename, &frame->root->lk_owner);
+ afr_print_lockee (lockee, 256, &int_lock->lockee[lockee_no].loc, local->fd,
+ child_index);
afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock);
gf_log (this->name, GF_LOG_INFO,
- "[%s %s] Lock={%s} Lockee={%s} Number={%llu}",
+ "[%s %s] Lock={%s} Lockee={%s} Number={%llu}, Cookie={%d}",
lock_call_type_str,
lk_op_type == AFR_LOCK_OP ? "LOCK REQUEST" : "UNLOCK REQUEST",
lock, lockee,
- (unsigned long long) int_lock->lock_number);
+ (unsigned long long) int_lock->lock_number,
+ cookie);
}
static void
-afr_trace_entrylk_out (call_frame_t *frame, afr_lock_call_type_t lock_call_type,
- afr_lock_op_type_t lk_op_type, const char *basename, int op_ret,
- int op_errno, int32_t child_index)
+afr_trace_entrylk_out (call_frame_t *frame, xlator_t *this,
+ afr_lock_call_type_t lock_call_type,
+ afr_lock_op_type_t lk_op_type, const char *basename,
+ int op_ret, int op_errno, int32_t cookie)
{
- xlator_t *this = NULL;
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
+ int lockee_no = 0;
+ int child_index = 0;
char lock[256];
char lockee[256];
char lock_call_type_str[256];
char verdict[16];
- this = THIS;
local = frame->local;
int_lock = &local->internal_lock;
priv = this->private;
@@ -375,20 +412,25 @@ afr_trace_entrylk_out (call_frame_t *frame, afr_lock_call_type_t lock_call_type,
if (!priv->entrylk_trace) {
return;
}
+ lockee_no = cookie / priv->child_count;
+ child_index = cookie % priv->child_count;
- afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index);
+ afr_print_entrylk (lock, 256, basename, &frame->root->lk_owner);
+ afr_print_lockee (lockee, 256, &int_lock->lockee[lockee_no].loc, local->fd,
+ child_index);
afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock);
afr_print_verdict (op_ret, op_errno, verdict);
gf_log (this->name, GF_LOG_INFO,
- "[%s %s] [%s] Lock={%s} Lockee={%s} Number={%llu}",
+ "[%s %s] [%s] Lock={%s} Lockee={%s} Number={%llu} Cookie={%d}",
lock_call_type_str,
lk_op_type == AFR_LOCK_OP ? "LOCK REPLY" : "UNLOCK REPLY",
verdict,
lock, lockee,
- (unsigned long long) int_lock->lock_number);
+ (unsigned long long) int_lock->lock_number,
+ cookie);
}
@@ -441,6 +483,47 @@ is_afr_lock_transaction (afr_local_t *local)
return ret;
}
+int
+afr_init_entry_lockee (afr_entry_lockee_t *lockee, afr_local_t *local,
+ loc_t *loc, char *basename, int child_count)
+{
+ int ret = -1;
+
+ loc_copy (&lockee->loc, loc);
+ lockee->basename = (basename)? gf_strdup (basename): NULL;
+ if (basename && !lockee->basename)
+ goto out;
+
+ lockee->locked_count = 0;
+ lockee->locked_nodes = GF_CALLOC (child_count,
+ sizeof (*lockee->locked_nodes),
+ gf_afr_mt_afr_node_character);
+
+ if (!lockee->locked_nodes)
+ goto out;
+
+ ret = 0;
+out:
+ return ret;
+
+}
+
+void
+afr_entry_lockee_cleanup (afr_internal_lock_t *int_lock)
+{
+ int i = 0;
+
+ for (i = 0; i < int_lock->lockee_count; i++) {
+ loc_wipe (&int_lock->lockee[i].loc);
+ if (int_lock->lockee[i].basename)
+ GF_FREE (int_lock->lockee[i].basename);
+ if (int_lock->lockee[i].locked_nodes)
+ GF_FREE (int_lock->lockee[i].locked_nodes);
+ }
+
+ return;
+}
+
static int
initialize_entrylk_variables (call_frame_t *frame, xlator_t *this)
{
@@ -458,8 +541,13 @@ initialize_entrylk_variables (call_frame_t *frame, xlator_t *this)
int_lock->lock_op_ret = -1;
int_lock->lock_op_errno = 0;
- for (i = 0; i < priv->child_count; i++) {
- int_lock->entry_locked_nodes[i] = 0;
+ for (i = 0; i < AFR_LOCKEE_COUNT_MAX; i++) {
+ if (!int_lock->lockee[i].locked_nodes)
+ break;
+ int_lock->lockee[i].locked_count = 0;
+ memset (int_lock->lockee[i].locked_nodes, 0,
+ sizeof (*int_lock->lockee[i].locked_nodes) *
+ priv->child_count);
}
return 0;
@@ -471,37 +559,37 @@ initialize_inodelk_variables (call_frame_t *frame, xlator_t *this)
afr_local_t *local = NULL;
afr_internal_lock_t *int_lock = NULL;
afr_private_t *priv = NULL;
- int i = 0;
+ afr_inodelk_t *inodelk = NULL;
priv = this->private;
local = frame->local;
int_lock = &local->internal_lock;
- int_lock->inodelk_lock_count = 0;
- int_lock->lock_op_ret = -1;
- int_lock->lock_op_errno = 0;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
- for (i = 0; i < priv->child_count; i++) {
- int_lock->inode_locked_nodes[i] = 0;
- }
+ inodelk->lock_count = 0;
+ int_lock->lk_attempted_count = 0;
+ int_lock->lock_op_ret = -1;
+ int_lock->lock_op_errno = 0;
+
+ memset (inodelk->locked_nodes, 0,
+ sizeof (*inodelk->locked_nodes) * priv->child_count);
+ memset (int_lock->locked_nodes, 0,
+ sizeof (*int_lock->locked_nodes) * priv->child_count);
return 0;
}
-loc_t *
-lower_path (loc_t *l1, const char *b1, loc_t *l2, const char *b2)
+int
+afr_lockee_locked_nodes_count (afr_internal_lock_t *int_lock)
{
- int ret = 0;
-
- ret = strcmp (l1->path, l2->path);
+ int call_count = 0;
+ int i = 0;
- if (ret == 0)
- ret = strcmp (b1, b2);
+ for (i = 0; i < int_lock->lockee_count; i++)
+ call_count += int_lock->lockee[i].locked_count;
- if (ret <= 0)
- return l1;
- else
- return l2;
+ return call_count;
}
int
@@ -522,7 +610,7 @@ afr_locked_nodes_count (unsigned char *locked_nodes, int child_count)
/* FIXME: What if UNLOCK fails */
static int32_t
afr_unlock_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
afr_internal_lock_t *int_lock = NULL;
@@ -548,33 +636,37 @@ afr_unlock_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
static int32_t
afr_unlock_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
int32_t child_index = (long)cookie;
+ afr_private_t *priv = NULL;
local = frame->local;
int_lock = &local->internal_lock;
- afr_trace_inodelk_out (frame, AFR_INODELK_TRANSACTION,
+ AFR_TRACE_INODELK_OUT (frame, this, AFR_INODELK_TRANSACTION,
AFR_UNLOCK_OP, NULL, op_ret,
op_errno, child_index);
+ priv = this->private;
+
if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: unlock failed on %d, reason: %s",
- local->loc.path, child_index, strerror (op_errno));
+ gf_log (this->name, GF_LOG_INFO, "%s: unlock failed on subvolume %s "
+ "with lock owner %s", local->loc.path,
+ priv->children[child_index]->name,
+ lkowner_utoa (&frame->root->lk_owner));
}
- int_lock->inode_locked_nodes[child_index] &= LOCKED_NO;
-
- if (op_ret == 1) {
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ inodelk->locked_nodes[child_index] &= LOCKED_NO;
+ if (local->transaction.eager_lock)
local->transaction.eager_lock[child_index] = 0;
- }
- afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno);
+ afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno, xdata);
return 0;
@@ -584,10 +676,12 @@ static int
afr_unlock_inodelk (call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
struct gf_flock flock = {0,};
struct gf_flock full_flock = {0,};
+ struct gf_flock *flock_use = NULL;
int call_count = 0;
int i = 0;
int piggyback = 0;
@@ -598,18 +692,14 @@ afr_unlock_inodelk (call_frame_t *frame, xlator_t *this)
int_lock = &local->internal_lock;
priv = this->private;
- flock.l_start = int_lock->lk_flock.l_start;
- flock.l_len = int_lock->lk_flock.l_len;
- flock.l_type = F_UNLCK;
-
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
- gf_log (this->name, GF_LOG_DEBUG, "attempting data unlock range %"PRIu64
- " %"PRIu64" by %"PRIu64, flock.l_start, flock.l_len,
- frame->root->lk_owner);
+ flock.l_start = inodelk->flock.l_start;
+ flock.l_len = inodelk->flock.l_len;
+ flock.l_type = F_UNLCK;
full_flock.l_type = F_UNLCK;
-
- call_count = afr_locked_nodes_count (int_lock->inode_locked_nodes,
+ call_count = afr_locked_nodes_count (inodelk->locked_nodes,
priv->child_count);
int_lock->lk_call_count = call_count;
@@ -625,11 +715,11 @@ afr_unlock_inodelk (call_frame_t *frame, xlator_t *this)
fd_ctx = afr_fd_ctx_get (local->fd, this);
for (i = 0; i < priv->child_count; i++) {
- if ((int_lock->inode_locked_nodes[i] & LOCKED_YES)
- != LOCKED_YES)
+ if ((inodelk->locked_nodes[i] & LOCKED_YES) != LOCKED_YES)
continue;
if (local->fd) {
+ flock_use = &flock;
if (!local->transaction.eager_lock[i]) {
goto wind;
}
@@ -641,43 +731,48 @@ afr_unlock_inodelk (call_frame_t *frame, xlator_t *this)
if (fd_ctx->lock_piggyback[i]) {
fd_ctx->lock_piggyback[i]--;
piggyback = 1;
+ } else {
+ fd_ctx->lock_acquired[i]--;
}
}
UNLOCK (&local->fd->lock);
if (piggyback) {
afr_unlock_inodelk_cbk (frame, (void *) (long) i,
- this, 1, 0);
+ this, 1, 0, NULL);
if (!--call_count)
break;
continue;
}
- fd_ctx->lock_acquired[i]--;
+ flock_use = &full_flock;
wind:
- afr_trace_inodelk_in (frame, AFR_INODELK_TRANSACTION,
- AFR_UNLOCK_OP, &flock, F_SETLK, i);
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_TRANSACTION,
+ AFR_UNLOCK_OP, flock_use, F_SETLK,
+ i);
STACK_WIND_COOKIE (frame, afr_unlock_inodelk_cbk,
(void *) (long)i,
priv->children[i],
priv->children[i]->fops->finodelk,
- this->name, local->fd,
- F_SETLK, &flock);
+ int_lock->domain, local->fd,
+ F_SETLK, flock_use, NULL);
if (!--call_count)
break;
} else {
- afr_trace_inodelk_in (frame, AFR_INODELK_TRANSACTION,
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_TRANSACTION,
AFR_UNLOCK_OP, &flock, F_SETLK, i);
STACK_WIND_COOKIE (frame, afr_unlock_inodelk_cbk,
(void *) (long)i,
priv->children[i],
priv->children[i]->fops->inodelk,
- this->name, &local->loc,
- F_SETLK, &flock);
+ int_lock->domain, &local->loc,
+ F_SETLK, &flock, NULL);
if (!--call_count)
break;
@@ -689,24 +784,34 @@ out:
static int32_t
afr_unlock_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
- int32_t child_index = (long)cookie;
+ afr_private_t *priv = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ int32_t child_index = 0;
+ int lockee_no = 0;
+
+ priv = this->private;
+ lockee_no = (int)((long) cookie) / priv->child_count;
+ child_index = (int) ((long) cookie) % priv->child_count;
local = frame->local;
+ int_lock = &local->internal_lock;
- afr_trace_entrylk_out (frame, AFR_ENTRYLK_TRANSACTION,
- AFR_UNLOCK_OP, NULL, op_ret,
- op_errno, child_index);
+ AFR_TRACE_ENTRYLK_OUT (frame, this, AFR_ENTRYLK_TRANSACTION,
+ AFR_UNLOCK_OP,
+ int_lock->lockee[lockee_no].basename, op_ret,
+ op_errno, (int) ((long)cookie));
- if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) {
+ if (op_ret < 0) {
gf_log (this->name, GF_LOG_ERROR,
"%s: unlock failed on %d, reason: %s",
local->loc.path, child_index, strerror (op_errno));
}
- afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno);
+ int_lock->lockee[lockee_no].locked_nodes[child_index] &= LOCKED_NO;
+ afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno, NULL);
return 0;
}
@@ -714,24 +819,22 @@ afr_unlock_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
static int
afr_unlock_entrylk (call_frame_t *frame, xlator_t *this)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- const char *basename = NULL;
- loc_t *loc = NULL;
- int call_count = 0;
- int i = -1;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int index = 0;
+ int lockee_no = 0;
+ int copies = 0;
+ int i = -1;
local = frame->local;
int_lock = &local->internal_lock;
priv = this->private;
+ copies = priv->child_count;
- basename = int_lock->lk_basename;
- if (int_lock->lk_loc)
- loc = int_lock->lk_loc;
+ call_count = afr_lockee_locked_nodes_count (int_lock);
- call_count = afr_locked_nodes_count (int_lock->entry_locked_nodes,
- priv->child_count);
int_lock->lk_call_count = call_count;
if (!call_count){
@@ -741,18 +844,23 @@ afr_unlock_entrylk (call_frame_t *frame, xlator_t *this)
goto out;
}
- for (i = 0; i < priv->child_count; i++) {
- if (int_lock->entry_locked_nodes[i] & LOCKED_YES) {
- afr_trace_entrylk_in (frame, AFR_ENTRYLK_NB_TRANSACTION,
- AFR_UNLOCK_OP, basename, i);
+ for (i = 0; i < int_lock->lockee_count * priv->child_count; i++) {
+ lockee_no = i / copies;
+ index = i % copies;
+ if (int_lock->lockee[lockee_no].locked_nodes[index] & LOCKED_YES) {
+ AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_NB_TRANSACTION,
+ AFR_UNLOCK_OP,
+ int_lock->lockee[lockee_no].basename,
+ i);
STACK_WIND_COOKIE (frame, afr_unlock_entrylk_cbk,
(void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- this->name,
- loc, basename,
- ENTRYLK_UNLOCK, ENTRYLK_WRLCK);
+ priv->children[index],
+ priv->children[index]->fops->entrylk,
+ int_lock->domain,
+ &int_lock->lockee[lockee_no].loc,
+ int_lock->lockee[lockee_no].basename,
+ ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL);
if (!--call_count)
break;
@@ -766,15 +874,22 @@ out:
static int32_t
afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- int child_index = (long) cookie;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int cky = (long) cookie;
+ int child_index = 0;
+ int lockee_no = 0;
+ priv = this->private;
local = frame->local;
int_lock = &local->internal_lock;
+ child_index = ((int)cky) % priv->child_count;
+ lockee_no = ((int)cky) / priv->child_count;
+
LOCK (&frame->lock);
{
if (op_ret == -1) {
@@ -790,6 +905,8 @@ afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->op_errno = op_errno;
int_lock->lock_op_errno = op_errno;
}
+
+ int_lock->lk_attempted_count++;
}
UNLOCK (&frame->lock);
@@ -798,10 +915,17 @@ afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
afr_unlock (frame, this);
} else {
if (op_ret == 0) {
- int_lock->locked_nodes[child_index] |= LOCKED_YES;
- int_lock->lock_count++;
+ if (local->transaction.type == AFR_ENTRY_TRANSACTION ||
+ local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) {
+ int_lock->lockee[lockee_no].locked_nodes[child_index] |= LOCKED_YES;
+ int_lock->lockee[lockee_no].locked_count++;
+ int_lock->entrylk_lock_count++;
+ } else {
+ int_lock->locked_nodes[child_index] |= LOCKED_YES;
+ int_lock->lock_count++;
+ }
}
- afr_lock_blocking (frame, this, child_index + 1);
+ afr_lock_blocking (frame, this, cky + 1);
}
return 0;
@@ -809,98 +933,26 @@ afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
static int32_t
afr_blocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_trace_inodelk_out (frame, AFR_INODELK_TRANSACTION,
+ AFR_TRACE_INODELK_OUT (frame, this, AFR_INODELK_TRANSACTION,
AFR_LOCK_OP, NULL, op_ret,
op_errno, (long) cookie);
- afr_lock_cbk (frame, cookie, this, op_ret, op_errno);
+ afr_lock_cbk (frame, cookie, this, op_ret, op_errno, xdata);
return 0;
}
static int32_t
-afr_lock_lower_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- loc_t *lower = NULL;
- loc_t *higher = NULL;
- const char *higher_name = NULL;
- int child_index = (long) cookie;
-
- priv = this->private;
- local = frame->local;
- int_lock = &local->internal_lock;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- if (op_errno == ENOSYS) {
- /* return ENOTSUP */
-
- gf_log (this->name, GF_LOG_ERROR,
- "subvolume does not support locking. "
- "please load features/locks xlator on server");
-
- local->op_ret = op_ret;
- }
-
- local->op_errno = op_errno;
- }
- }
- UNLOCK (&frame->lock);
-
- if (op_ret != 0) {
- afr_unlock (frame, this);
- goto out;
- } else {
- int_lock->lower_locked_nodes[child_index] |= LOCKED_LOWER;
- int_lock->lock_count++;
- }
-
- /* The lower path has been locked. Now lock the higher path */
-
- lower = lower_path (&local->transaction.parent_loc,
- local->transaction.basename,
- &local->transaction.new_parent_loc,
- local->transaction.new_basename);
-
- higher = (lower == &local->transaction.parent_loc ?
- &local->transaction.new_parent_loc :
- &local->transaction.parent_loc);
-
- higher_name = (higher == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
- afr_trace_entrylk_in (frame, AFR_ENTRYLK_TRANSACTION,
- AFR_LOCK_OP, higher_name, child_index);
-
-
- STACK_WIND_COOKIE (frame, afr_lock_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->entrylk,
- this->name, higher, higher_name,
- ENTRYLK_LOCK, ENTRYLK_WRLCK);
-
-out:
- return 0;
-}
-
-static int32_t
afr_blocking_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_trace_entrylk_out (frame, AFR_ENTRYLK_TRANSACTION,
+ AFR_TRACE_ENTRYLK_OUT (frame, this, AFR_ENTRYLK_TRANSACTION,
AFR_LOCK_OP, NULL, op_ret,
op_errno, (long)cookie);
- afr_lock_cbk (frame, cookie, this, op_ret, op_errno);
+ afr_lock_cbk (frame, cookie, this, op_ret, op_errno, xdata);
return 0;
}
@@ -908,6 +960,7 @@ static int
afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
@@ -918,18 +971,16 @@ afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this)
switch (local->transaction.type) {
case AFR_DATA_TRANSACTION:
case AFR_METADATA_TRANSACTION:
- memcpy (int_lock->inode_locked_nodes,
- int_lock->locked_nodes,
- priv->child_count);
- int_lock->inodelk_lock_count = int_lock->lock_count;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ memcpy (inodelk->locked_nodes, int_lock->locked_nodes,
+ sizeof (*inodelk->locked_nodes) * priv->child_count);
+ inodelk->lock_count = int_lock->lock_count;
break;
case AFR_ENTRY_RENAME_TRANSACTION:
case AFR_ENTRY_TRANSACTION:
- memcpy (int_lock->entry_locked_nodes,
- int_lock->locked_nodes,
- priv->child_count);
- int_lock->entrylk_lock_count = int_lock->lock_count;
+ /*entrylk_count is being used in both non-blocking and blocking
+ * modes */
break;
}
@@ -937,25 +988,67 @@ afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this)
}
+static inline gf_boolean_t
+afr_is_entrylk (afr_internal_lock_t *int_lock,
+ afr_transaction_type trans_type)
+{
+ gf_boolean_t is_entrylk = _gf_false;
+
+ if ((int_lock->transaction_lk_type == AFR_SELFHEAL_LK) &&
+ int_lock->selfheal_lk_type == AFR_ENTRY_SELF_HEAL_LK) {
+
+ is_entrylk = _gf_true;
+
+ } else if ((int_lock->transaction_lk_type == AFR_TRANSACTION_LK) &&
+ (trans_type == AFR_ENTRY_TRANSACTION ||
+ trans_type == AFR_ENTRY_RENAME_TRANSACTION)) {
+
+ is_entrylk = _gf_true;
+
+ } else {
+ is_entrylk = _gf_false;
+ }
+
+ return is_entrylk;
+}
+
+static gf_boolean_t
+_is_lock_wind_needed (afr_local_t *local, int child_index)
+{
+ if (!local->child_up[child_index])
+ return _gf_false;
+
+ return _gf_true;
+}
+
int
-afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index)
+afr_lock_blocking (call_frame_t *frame, xlator_t *this, int cookie)
{
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- loc_t *lower = NULL;
- const char *lower_name = NULL;
struct gf_flock flock = {0,};
uint64_t ctx = 0;
int ret = 0;
+ int child_index = 0;
+ int lockee_no = 0;
+ gf_boolean_t is_entrylk = _gf_false;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ priv = this->private;
+ child_index = cookie % priv->child_count;
+ lockee_no = cookie / priv->child_count;
+ is_entrylk = afr_is_entrylk (int_lock, local->transaction.type);
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
- flock.l_start = int_lock->lk_flock.l_start;
- flock.l_len = int_lock->lk_flock.l_len;
- flock.l_type = int_lock->lk_flock.l_type;
+ if (!is_entrylk) {
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ flock.l_start = inodelk->flock.l_start;
+ flock.l_len = inodelk->flock.l_len;
+ flock.l_type = inodelk->flock.l_type;
+ }
if (local->fd) {
ret = fd_ctx_get (local->fd, this, &ctx);
@@ -974,42 +1067,26 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index)
return 0;
}
-
- /* skip over children that or down
- or don't have the fd open */
-
- while ((child_index < priv->child_count)
- && (!local->child_up[child_index] ||
- !local->fd_open_on[child_index]))
-
- child_index++;
- } else {
- /* skip over children that are down */
- while ((child_index < priv->child_count)
- && !local->child_up[child_index])
- child_index++;
}
- if ((child_index == priv->child_count) &&
- int_lock->lock_count == 0) {
-
- gf_log (this->name, GF_LOG_INFO,
- "unable to lock on even one child");
-
- local->op_ret = -1;
- int_lock->lock_op_ret = -1;
+ if (int_lock->lk_expected_count == int_lock->lk_attempted_count) {
+ if ((is_entrylk && int_lock->entrylk_lock_count == 0) ||
+ (!is_entrylk && int_lock->lock_count == 0)) {
+ gf_log (this->name, GF_LOG_INFO,
+ "unable to lock on even one child");
- afr_copy_locked_nodes (frame, this);
+ local->op_ret = -1;
+ int_lock->lock_op_ret = -1;
- afr_unlock(frame, this);
+ afr_copy_locked_nodes (frame, this);
- return 0;
+ afr_unlock(frame, this);
+ return 0;
+ }
}
- if ((child_index == priv->child_count)
- || (int_lock->lock_count == int_lock->lk_expected_count)) {
-
+ if (int_lock->lk_expected_count == int_lock->lk_attempted_count) {
/* we're done locking */
gf_log (this->name, GF_LOG_DEBUG,
@@ -1022,12 +1099,18 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index)
return 0;
}
+ if (!_is_lock_wind_needed (local, child_index)) {
+ afr_lock_blocking (frame, this, cookie + 1);
+ return 0;
+ }
+
switch (local->transaction.type) {
case AFR_DATA_TRANSACTION:
case AFR_METADATA_TRANSACTION:
if (local->fd) {
- afr_trace_inodelk_in (frame, AFR_INODELK_TRANSACTION,
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_TRANSACTION,
AFR_LOCK_OP, &flock, F_SETLKW,
child_index);
@@ -1035,11 +1118,12 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index)
(void *) (long) child_index,
priv->children[child_index],
priv->children[child_index]->fops->finodelk,
- this->name, local->fd,
- F_SETLKW, &flock);
+ int_lock->domain, local->fd,
+ F_SETLKW, &flock, NULL);
} else {
- afr_trace_inodelk_in (frame, AFR_INODELK_TRANSACTION,
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_TRANSACTION,
AFR_LOCK_OP, &flock, F_SETLKW,
child_index);
@@ -1047,63 +1131,44 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index)
(void *) (long) child_index,
priv->children[child_index],
priv->children[child_index]->fops->inodelk,
- this->name, &local->loc,
- F_SETLKW, &flock);
+ int_lock->domain, &local->loc,
+ F_SETLKW, &flock, NULL);
}
break;
case AFR_ENTRY_RENAME_TRANSACTION:
- {
- lower = lower_path (&local->transaction.parent_loc,
- local->transaction.basename,
- &local->transaction.new_parent_loc,
- local->transaction.new_basename);
-
- lower_name = (lower == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
- afr_trace_entrylk_in (frame, AFR_ENTRYLK_TRANSACTION,
- AFR_LOCK_OP, lower_name, child_index);
-
-
- STACK_WIND_COOKIE (frame, afr_lock_lower_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->entrylk,
- this->name, lower, lower_name,
- ENTRYLK_LOCK, ENTRYLK_WRLCK);
-
- break;
- }
-
case AFR_ENTRY_TRANSACTION:
+ /*Accounting for child_index increments on 'down'
+ *and 'fd-less' children */
+
if (local->fd) {
- afr_trace_entrylk_in (frame, AFR_ENTRYLK_TRANSACTION,
- AFR_LOCK_OP, local->transaction.basename,
- child_index);
+ AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_TRANSACTION,
+ AFR_LOCK_OP,
+ int_lock->lockee[lockee_no].basename,
+ cookie);
STACK_WIND_COOKIE (frame, afr_blocking_entrylk_cbk,
- (void *) (long) child_index,
+ (void *) (long) cookie,
priv->children[child_index],
priv->children[child_index]->fops->fentrylk,
- this->name, local->fd,
- local->transaction.basename,
- ENTRYLK_LOCK, ENTRYLK_WRLCK);
+ int_lock->domain, local->fd,
+ int_lock->lockee[lockee_no].basename,
+ ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
} else {
- afr_trace_entrylk_in (frame, AFR_ENTRYLK_TRANSACTION,
+ AFR_TRACE_ENTRYLK_IN (frame, this,
+ AFR_ENTRYLK_TRANSACTION,
AFR_LOCK_OP, local->transaction.basename,
child_index);
STACK_WIND_COOKIE (frame, afr_blocking_entrylk_cbk,
- (void *) (long) child_index,
+ (void *) (long) cookie,
priv->children[child_index],
priv->children[child_index]->fops->entrylk,
- this->name,
- &local->transaction.parent_loc,
- local->transaction.basename,
- ENTRYLK_LOCK, ENTRYLK_WRLCK);
+ int_lock->domain,
+ &int_lock->lockee[lockee_no].loc,
+ int_lock->lockee[lockee_no].basename,
+ ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
}
break;
@@ -1131,11 +1196,11 @@ afr_blocking_lock (call_frame_t *frame, xlator_t *this)
break;
case AFR_ENTRY_RENAME_TRANSACTION:
- up_count = afr_up_children_count (local->child_up,
- priv->child_count);
- int_lock->lk_expected_count = 2 * up_count;
- //fallthrough
case AFR_ENTRY_TRANSACTION:
+ up_count = AFR_COUNT (local->child_up, priv->child_count);
+ int_lock->lk_call_count = int_lock->lk_expected_count
+ = (int_lock->lockee_count *
+ up_count);
initialize_entrylk_variables (frame, this);
break;
}
@@ -1147,47 +1212,60 @@ afr_blocking_lock (call_frame_t *frame, xlator_t *this)
static int32_t
afr_nonblocking_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
int call_count = 0;
int child_index = (long) cookie;
+ int copies = 0;
+ int index = 0;
+ int lockee_no = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ copies = priv->child_count;
+ index = child_index % copies;
+ lockee_no = child_index / copies;
local = frame->local;
int_lock = &local->internal_lock;
- afr_trace_entrylk_out (frame, AFR_ENTRYLK_TRANSACTION,
- AFR_LOCK_OP, NULL, op_ret,
+ AFR_TRACE_ENTRYLK_OUT (frame, this, AFR_ENTRYLK_TRANSACTION,
+ AFR_LOCK_OP,
+ int_lock->lockee[lockee_no].basename, op_ret,
op_errno, (long) cookie);
- LOCK (&frame->lock);
- {
- call_count = --int_lock->lk_call_count;
- }
- UNLOCK (&frame->lock);
-
- if (op_ret < 0 ) {
- if (op_errno == ENOSYS) {
+ LOCK (&frame->lock);
+ {
+ if (op_ret < 0 ) {
+ if (op_errno == ENOSYS) {
/* return ENOTSUP */
- gf_log (this->name, GF_LOG_ERROR,
- "subvolume does not support locking. "
- "please load features/locks xlator on server");
- local->op_ret = op_ret;
- int_lock->lock_op_ret = op_ret;
+ gf_log (this->name, GF_LOG_ERROR,
+ "subvolume does not support locking. "
+ "please load features/locks xlator on server");
+ local->op_ret = op_ret;
+ int_lock->lock_op_ret = op_ret;
+
+ int_lock->lock_op_errno = op_errno;
+ local->op_errno = op_errno;
+ }
+ } else if (op_ret == 0) {
+ int_lock->lockee[lockee_no].locked_nodes[index] |= \
+ LOCKED_YES;
+ int_lock->lockee[lockee_no].locked_count++;
+ int_lock->entrylk_lock_count++;
+ }
- int_lock->lock_op_errno = op_errno;
- local->op_errno = op_errno;
- }
- } else if (op_ret == 0) {
- int_lock->entry_locked_nodes[child_index] |= LOCKED_YES;
- int_lock->entrylk_lock_count++;
+ call_count = --int_lock->lk_call_count;
}
+ UNLOCK (&frame->lock);
if (call_count == 0) {
gf_log (this->name, GF_LOG_TRACE,
"Last locking reply received");
- /* all locks successfull. Proceed to call FOP */
+ /* all locks successful. Proceed to call FOP */
if (int_lock->entrylk_lock_count ==
int_lock->lk_expected_count) {
gf_log (this->name, GF_LOG_TRACE,
@@ -1195,7 +1273,7 @@ afr_nonblocking_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int_lock->lock_op_ret = 0;
int_lock->lock_cbk (frame, this);
}
- /* Not all locks were successfull. Unlock and try locking
+ /* Not all locks were successful. Unlock and try locking
again, this time with serially blocking locks */
else {
gf_log (this->name, GF_LOG_TRACE,
@@ -1209,42 +1287,26 @@ afr_nonblocking_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-void
-afr_mark_fd_open_on (afr_local_t *local, afr_fd_ctx_t *fd_ctx,
- size_t child_count)
-{
- int i = 0;
-
- GF_ASSERT (local->fd_open_on);
-
- memset (local->fd_open_on, 0, sizeof (*local->fd_open_on)*child_count);
- for (i = 0; i < child_count; i++)
- if (fd_ctx->opened_on[i] == AFR_FD_OPENED)
- local->fd_open_on[i] = 1;
-}
-
int
afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_fd_ctx_t *fd_ctx = NULL;
- const char *basename = NULL;
- loc_t *loc = NULL;
- int32_t call_count = 0;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int copies = 0;
+ int index = 0;
+ int lockee_no = 0;
+ int32_t call_count = 0;
int i = 0;
local = frame->local;
int_lock = &local->internal_lock;
priv = this->private;
+ copies = priv->child_count;
initialize_entrylk_variables (frame, this);
- basename = int_lock->lk_basename;
- if (int_lock->lk_loc)
- loc = int_lock->lk_loc;
-
if (local->fd) {
fd_ctx = afr_fd_ctx_get (local->fd, this);
if (!fd_ctx) {
@@ -1257,11 +1319,11 @@ afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this)
local->op_errno = EINVAL;
int_lock->lock_op_errno = EINVAL;
+ afr_unlock (frame, this);
return -1;
}
- afr_mark_fd_open_on (local, fd_ctx, priv->child_count);
- call_count = internal_lock_count (frame, this);
+ call_count = int_lock->lockee_count * internal_lock_count (frame, this);
int_lock->lk_call_count = call_count;
int_lock->lk_expected_count = call_count;
@@ -1274,42 +1336,52 @@ afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this)
/* Send non-blocking entrylk calls only on up children
and where the fd has been opened */
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i] && local->fd_open_on[i]) {
- afr_trace_entrylk_in (frame, AFR_ENTRYLK_NB_TRANSACTION,
- AFR_LOCK_OP, basename, i);
+ for (i = 0; i < int_lock->lockee_count*priv->child_count; i++) {
+ index = i%copies;
+ lockee_no = i/copies;
+ if (local->child_up[index]) {
+ AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_NB_TRANSACTION,
+ AFR_LOCK_OP,
+ int_lock->lockee[lockee_no].basename,
+ i);
STACK_WIND_COOKIE (frame, afr_nonblocking_entrylk_cbk,
(void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fentrylk,
+ priv->children[index],
+ priv->children[index]->fops->fentrylk,
this->name, local->fd,
- basename,
- ENTRYLK_LOCK_NB, ENTRYLK_WRLCK);
+ int_lock->lockee[lockee_no].basename,
+ ENTRYLK_LOCK_NB, ENTRYLK_WRLCK,
+ NULL);
+ if (!--call_count)
+ break;
}
}
} else {
- GF_ASSERT (loc);
-
- call_count = internal_lock_count (frame, this);
+ call_count = int_lock->lockee_count * internal_lock_count (frame, this);
int_lock->lk_call_count = call_count;
int_lock->lk_expected_count = call_count;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- afr_trace_entrylk_in (frame, AFR_ENTRYLK_NB_TRANSACTION,
- AFR_LOCK_OP, basename, i);
+ for (i = 0; i < int_lock->lockee_count*priv->child_count; i++) {
+ index = i%copies;
+ lockee_no = i/copies;
+ if (local->child_up[index]) {
+ AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_NB_TRANSACTION,
+ AFR_LOCK_OP,
+ int_lock->lockee[lockee_no].basename,
+ i);
STACK_WIND_COOKIE (frame, afr_nonblocking_entrylk_cbk,
(void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- this->name, loc, basename,
- ENTRYLK_LOCK_NB, ENTRYLK_WRLCK);
+ priv->children[index],
+ priv->children[index]->fops->entrylk,
+ this->name, &int_lock->lockee[lockee_no].loc,
+ int_lock->lockee[lockee_no].basename,
+ ENTRYLK_LOCK_NB, ENTRYLK_WRLCK,
+ NULL);
if (!--call_count)
break;
-
}
}
}
@@ -1319,76 +1391,75 @@ out:
int32_t
afr_nonblocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
afr_local_t *local = NULL;
int call_count = 0;
int child_index = (long) cookie;
afr_fd_ctx_t *fd_ctx = NULL;
- afr_private_t *priv = NULL;
- priv = this->private;
local = frame->local;
int_lock = &local->internal_lock;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
- afr_trace_inodelk_out (frame, AFR_INODELK_NB_TRANSACTION,
+ AFR_TRACE_INODELK_OUT (frame, this, AFR_INODELK_NB_TRANSACTION,
AFR_LOCK_OP, NULL, op_ret,
op_errno, (long) cookie);
+ if (local->fd)
+ fd_ctx = afr_fd_ctx_get (local->fd, this);
+
LOCK (&frame->lock);
{
+ if (op_ret < 0) {
+ if (op_errno == ENOSYS) {
+ /* return ENOTSUP */
+ gf_log (this->name, GF_LOG_ERROR,
+ "subvolume does not support locking. "
+ "please load features/locks xlator on "
+ "server");
+ local->op_ret = op_ret;
+ int_lock->lock_op_ret = op_ret;
+ int_lock->lock_op_errno = op_errno;
+ local->op_errno = op_errno;
+ }
+ if (local->transaction.eager_lock)
+ local->transaction.eager_lock[child_index] = 0;
+ } else {
+ inodelk->locked_nodes[child_index] |= LOCKED_YES;
+ inodelk->lock_count++;
+
+ if (local->transaction.eager_lock &&
+ local->transaction.eager_lock[child_index] &&
+ local->fd) {
+ /* piggybacked */
+ if (op_ret == 1) {
+ /* piggybacked */
+ } else if (op_ret == 0) {
+ /* lock acquired from server */
+ fd_ctx->lock_acquired[child_index]++;
+ }
+ }
+ }
+
call_count = --int_lock->lk_call_count;
}
UNLOCK (&frame->lock);
- if (op_ret < 0) {
- if (op_errno == ENOSYS) {
- /* return ENOTSUP */
- gf_log (this->name, GF_LOG_ERROR,
- "subvolume does not support locking. "
- "please load features/locks xlator on server");
- local->op_ret = op_ret;
- int_lock->lock_op_ret = op_ret;
- int_lock->lock_op_errno = op_errno;
- local->op_errno = op_errno;
- }
- } else {
- int_lock->inode_locked_nodes[child_index]
- |= LOCKED_YES;
- int_lock->inodelk_lock_count++;
-
- if (priv->eager_lock && local->fd) {
- fd_ctx = afr_fd_ctx_get (local->fd, this);
- local->transaction.eager_lock[child_index] = 1;
- /* piggybacked */
-
- if (op_ret == 1) {
- /* piggybacked */
- } else if (op_ret == 0) {
- /* lock acquired from server */
- LOCK (&local->fd->lock);
- {
- fd_ctx->lock_acquired[child_index]++;
- }
- UNLOCK (&local->fd->lock);
- }
- }
- }
-
if (call_count == 0) {
gf_log (this->name, GF_LOG_TRACE,
"Last inode locking reply received");
- /* all locks successfull. Proceed to call FOP */
- if (int_lock->inodelk_lock_count ==
- int_lock->lk_expected_count) {
+ /* all locks successful. Proceed to call FOP */
+ if (inodelk->lock_count == int_lock->lk_expected_count) {
gf_log (this->name, GF_LOG_TRACE,
"All servers locked. Calling the cbk");
int_lock->lock_op_ret = 0;
int_lock->lock_cbk (frame, this);
}
- /* Not all locks were successfull. Unlock and try locking
+ /* Not all locks were successful. Unlock and try locking
again, this time with serially blocking locks */
else {
gf_log (this->name, GF_LOG_TRACE,
@@ -1406,30 +1477,29 @@ int
afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
afr_fd_ctx_t *fd_ctx = NULL;
- int32_t call_count = 0;
- int i = 0;
- int ret = 0;
- struct gf_flock flock = {0,};
- struct gf_flock full_flock = {0,};
- struct gf_flock *flock_use = &flock;
- int piggyback = 0;
+ int32_t call_count = 0;
+ int i = 0;
+ int ret = 0;
+ struct gf_flock flock = {0,};
+ struct gf_flock full_flock = {0,};
+ struct gf_flock *flock_use = NULL;
+ int piggyback = 0;
local = frame->local;
int_lock = &local->internal_lock;
priv = this->private;
- flock.l_start = int_lock->lk_flock.l_start;
- flock.l_len = int_lock->lk_flock.l_len;
- flock.l_type = int_lock->lk_flock.l_type;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
- gf_log (this->name, GF_LOG_DEBUG, "attempting data lock range %"PRIu64
- " %"PRIu64" by %"PRIu64, flock.l_start, flock.l_len,
- frame->root->lk_owner);
+ flock.l_start = inodelk->flock.l_start;
+ flock.l_len = inodelk->flock.l_len;
+ flock.l_type = inodelk->flock.l_type;
- full_flock.l_type = int_lock->lk_flock.l_type;
+ full_flock.l_type = inodelk->flock.l_type;
initialize_inodelk_variables (frame, this);
@@ -1445,11 +1515,11 @@ afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)
local->op_errno = EINVAL;
int_lock->lock_op_errno = EINVAL;
+ afr_unlock (frame, this);
ret = -1;
goto out;
}
- afr_mark_fd_open_on (local, fd_ctx, priv->child_count);
call_count = internal_lock_count (frame, this);
int_lock->lk_call_count = call_count;
int_lock->lk_expected_count = call_count;
@@ -1464,14 +1534,18 @@ afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)
/* Send non-blocking inodelk calls only on up children
and where the fd has been opened */
for (i = 0; i < priv->child_count; i++) {
- if (!local->child_up[i] || !local->fd_open_on[i])
+ if (!local->child_up[i])
continue;
- if (!priv->eager_lock)
+ flock_use = &flock;
+ if (!local->transaction.eager_lock_on) {
goto wind;
+ }
- flock_use = &full_flock;
piggyback = 0;
+ local->transaction.eager_lock[i] = 1;
+
+ afr_set_delayed_post_op (frame, this);
LOCK (&local->fd->lock);
{
@@ -1485,21 +1559,23 @@ afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)
if (piggyback) {
/* (op_ret == 1) => indicate piggybacked lock */
afr_nonblocking_inodelk_cbk (frame, (void *) (long) i,
- this, 1, 0);
+ this, 1, 0, NULL);
if (!--call_count)
break;
continue;
}
+ flock_use = &full_flock;
wind:
- afr_trace_inodelk_in (frame, AFR_INODELK_NB_TRANSACTION,
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_NB_TRANSACTION,
AFR_LOCK_OP, flock_use, F_SETLK, i);
STACK_WIND_COOKIE (frame, afr_nonblocking_inodelk_cbk,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->finodelk,
- this->name, local->fd,
- F_SETLK, flock_use);
+ int_lock->domain, local->fd,
+ F_SETLK, flock_use, NULL);
if (!--call_count)
break;
@@ -1512,15 +1588,16 @@ afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)
for (i = 0; i < priv->child_count; i++) {
if (!local->child_up[i])
continue;
- afr_trace_inodelk_in (frame, AFR_INODELK_NB_TRANSACTION,
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_NB_TRANSACTION,
AFR_LOCK_OP, &flock, F_SETLK, i);
STACK_WIND_COOKIE (frame, afr_nonblocking_inodelk_cbk,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->inodelk,
- this->name, &local->loc,
- F_SETLK, &flock);
+ int_lock->domain, &local->loc,
+ F_SETLK, &flock, NULL);
if (!--call_count)
break;
@@ -1530,200 +1607,6 @@ out:
return ret;
}
-static int
-__is_lower_locked (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int count = 0;
- int i = 0;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- if (int_lock->lower_locked_nodes[i] & LOCKED_LOWER)
- count++;
- }
-
- return count;
-
-}
-
-static int
-__is_higher_locked (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int count = 0;
- int i = 0;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- if (int_lock->locked_nodes[i] & LOCKED_YES)
- count++;
- }
-
- return count;
-
-}
-
-static int
-afr_unlock_lower_entrylk (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- const char *basename = NULL;
- loc_t *loc = NULL;
- int call_count = 0;
- int i = -1;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
-
- basename = int_lock->lk_basename;
- if (int_lock->lk_loc)
- loc = int_lock->lk_loc;
-
- call_count = __is_lower_locked (frame, this);
- int_lock->lk_call_count = call_count;
-
- if (!call_count){
- gf_log (this->name, GF_LOG_TRACE,
- "No internal locks unlocked");
- int_lock->lock_cbk (frame, this);
- goto out;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (int_lock->lower_locked_nodes[i] & LOCKED_LOWER) {
- afr_trace_entrylk_in (frame, AFR_ENTRYLK_NB_TRANSACTION,
- AFR_UNLOCK_OP, basename, i);
-
- STACK_WIND_COOKIE (frame, afr_unlock_entrylk_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- this->name,
- loc, basename,
- ENTRYLK_UNLOCK, ENTRYLK_WRLCK);
-
- if (!--call_count)
- break;
-
- }
- }
-
-out:
- return 0;
-
-}
-
-
-static int
-afr_post_unlock_higher_cbk (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- local->transaction.done (frame, this);
- return 0;
-}
-
-static int
-afr_post_unlock_lower_cbk (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- loc_t *lower = NULL;
- loc_t *higher = NULL;
- const char *higher_name = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- lower = lower_path (&local->transaction.parent_loc,
- local->transaction.basename,
- &local->transaction.new_parent_loc,
- local->transaction.new_basename);
-
- higher = (lower == &local->transaction.parent_loc ?
- &local->transaction.new_parent_loc :
- &local->transaction.parent_loc);
-
- higher_name = (higher == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
- if (__is_higher_locked (frame, this)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "unlocking higher");
- int_lock->lk_basename = higher_name;
- int_lock->lk_loc = higher;
- int_lock->lock_cbk = afr_post_unlock_higher_cbk;
-
- afr_unlock_entrylk (frame, this);
- } else
- local->transaction.done (frame, this);
-
- return 0;
-}
-
-static int
-afr_rename_unlock (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- loc_t *lower = NULL;
- const char *lower_name = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- lower = lower_path (&local->transaction.parent_loc,
- local->transaction.basename,
- &local->transaction.new_parent_loc,
- local->transaction.new_basename);
-
- lower_name = (lower == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
- if (__is_lower_locked (frame, this)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "unlocking lower");
- int_lock->lk_basename = lower_name;
- int_lock->lk_loc = lower;
- int_lock->lock_cbk = afr_post_unlock_lower_cbk;
-
- afr_unlock_lower_entrylk (frame, this);
- } else
- afr_post_unlock_lower_cbk (frame, this);
-
- return 0;
-}
-
-static int
-afr_rename_transaction (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- return (local->transaction.type ==
- AFR_ENTRY_RENAME_TRANSACTION);
-
-}
-
int32_t
afr_unlock (call_frame_t *frame, xlator_t *this)
{
@@ -1735,10 +1618,8 @@ afr_unlock (call_frame_t *frame, xlator_t *this)
if (is_afr_lock_transaction (local))
afr_unlock_inodelk (frame, this);
else
- if (!afr_rename_transaction (frame, this))
- afr_unlock_entrylk (frame, this);
- else
- afr_rename_unlock (frame, this);
+ afr_unlock_entrylk (frame, this);
+
} else {
if (is_afr_lock_selfheal (local))
afr_unlock_inodelk (frame, this);
@@ -1750,485 +1631,37 @@ afr_unlock (call_frame_t *frame, xlator_t *this)
}
int
-afr_mark_locked_nodes (xlator_t *this, fd_t *fd,
- unsigned char *locked_nodes)
-{
- afr_private_t *priv = NULL;
- afr_fd_ctx_t *fdctx = NULL;
- uint64_t tmp = 0;
- int ret = 0;
-
- priv = this->private;
-
- ret = afr_fd_ctx_set (this, fd);
- if (ret)
- goto out;
-
- ret = fd_ctx_get (fd, this, &tmp);
- if (ret) {
- gf_log (this->name, GF_LOG_INFO,
- "failed to get the fd ctx");
- goto out;
- }
- fdctx = (afr_fd_ctx_t *) (long) tmp;
-
- GF_ASSERT (fdctx->locked_on);
-
- memcpy (fdctx->locked_on, locked_nodes,
- priv->child_count);
-
-out:
- return ret;
-}
-
-static int
-__is_fd_saved (xlator_t *this, fd_t *fd)
-{
- afr_locked_fd_t *locked_fd = NULL;
- afr_private_t *priv = NULL;
- int found = 0;
-
- priv = this->private;
-
- list_for_each_entry (locked_fd, &priv->saved_fds, list) {
- if (locked_fd->fd == fd) {
- found = 1;
- break;
- }
- }
-
- return found;
-}
-
-static int
-__afr_save_locked_fd (xlator_t *this, fd_t *fd)
-{
- afr_private_t *priv = NULL;
- afr_locked_fd_t *locked_fd = NULL;
- int ret = 0;
-
- priv = this->private;
-
- locked_fd = GF_CALLOC (1, sizeof (*locked_fd),
- gf_afr_mt_locked_fd);
- if (!locked_fd) {
- ret = -1;
- goto out;
- }
-
- locked_fd->fd = fd;
- INIT_LIST_HEAD (&locked_fd->list);
-
- list_add_tail (&locked_fd->list, &priv->saved_fds);
-
-out:
- return ret;
-}
-
-int
-afr_save_locked_fd (xlator_t *this, fd_t *fd)
-{
- afr_private_t *priv = NULL;
- int ret = 0;
-
- priv = this->private;
-
- pthread_mutex_lock (&priv->mutex);
- {
- if (__is_fd_saved (this, fd)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "fd=%p already saved", fd);
- goto unlock;
- }
-
- ret = __afr_save_locked_fd (this, fd);
- if (ret) {
- gf_log (this->name, GF_LOG_INFO,
- "fd=%p could not be saved", fd);
- goto unlock;
- }
- }
-unlock:
- pthread_mutex_unlock (&priv->mutex);
-
- return ret;
-}
-
-static int
-afr_lock_recovery_cleanup (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_locked_fd_t *locked_fd = NULL;
-
- local = frame->local;
-
- locked_fd = local->locked_fd;
-
- STACK_DESTROY (frame->root);
- afr_local_cleanup (local, this);
-
- afr_save_locked_fd (this, locked_fd->fd);
-
- return 0;
-
-}
-
-static int
-afr_get_source_lock_recovery (xlator_t *this, fd_t *fd)
-{
- afr_fd_ctx_t *fdctx = NULL;
- afr_private_t *priv = NULL;
- uint64_t tmp = 0;
- int i = 0;
- int source_child = -1;
- int ret = 0;
-
- priv = this->private;
-
- ret = fd_ctx_get (fd, this, &tmp);
- if (ret)
- goto out;
-
- fdctx = (afr_fd_ctx_t *) (long) tmp;
-
- for (i = 0; i < priv->child_count; i++) {
- if (fdctx->locked_on[i]) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Found lock recovery source=%d", i);
- source_child = i;
- break;
- }
- }
-
-out:
- return source_child;
-
-}
-
-int32_t
-afr_get_locks_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock);
-int32_t
-afr_recover_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int32_t source_child = 0;
- struct gf_flock flock = {0,};
-
- local = frame->local;
- priv = this->private;
-
- if (op_ret) {
- gf_log (this->name, GF_LOG_INFO,
- "lock recovery failed");
- goto cleanup;
- }
-
- source_child = local->source_child;
-
- memcpy (&flock, lock, sizeof (*lock));
-
- STACK_WIND_COOKIE (frame, afr_get_locks_fd_cbk,
- (void *) (long) source_child,
- priv->children[source_child],
- priv->children[source_child]->fops->lk,
- local->fd, F_GETLK_FD, &flock);
-
- return 0;
-
-cleanup:
- afr_lock_recovery_cleanup (frame, this);
- return 0;
-}
-
-int
-afr_recover_lock (call_frame_t *frame, xlator_t *this,
- struct gf_flock *flock)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int32_t lock_recovery_child = 0;
-
- priv = this->private;
- local = frame->local;
-
- lock_recovery_child = local->lock_recovery_child;
-
- frame->root->lk_owner = flock->l_owner;
-
- STACK_WIND_COOKIE (frame, afr_recover_lock_cbk,
- (void *) (long) lock_recovery_child,
- priv->children[lock_recovery_child],
- priv->children[lock_recovery_child]->fops->lk,
- local->fd, F_SETLK, flock);
-
- return 0;
-}
-
-static int
-is_afr_lock_eol (struct gf_flock *lock)
-{
- int ret = 0;
-
- if ((lock->l_type == GF_LK_EOL))
- ret = 1;
-
- return ret;
-}
-
-int32_t
-afr_get_locks_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock)
-{
- if (op_ret) {
- gf_log (this->name, GF_LOG_INFO,
- "Failed to get locks on fd");
- goto cleanup;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Got a lock on fd");
-
- if (is_afr_lock_eol (lock)) {
- gf_log (this->name, GF_LOG_INFO,
- "Reached EOL on locks on fd");
- goto cleanup;
- }
-
- afr_recover_lock (frame, this, lock);
-
- return 0;
-
-cleanup:
- afr_lock_recovery_cleanup (frame, this);
-
- return 0;
-}
-
-static int
-afr_lock_recovery (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- fd_t *fd = NULL;
- int ret = 0;
- int32_t source_child = 0;
- struct gf_flock flock = {0,};
-
- priv = this->private;
- local = frame->local;
-
- fd = local->fd;
-
- source_child = afr_get_source_lock_recovery (this, fd);
- if (source_child < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "Could not recover locks due to lock "
- "split brain");
- ret = -1;
- goto out;
- }
-
- local->source_child = source_child;
-
- /* the flock can be zero filled as we're querying incrementally
- the locks held on the fd.
- */
- STACK_WIND_COOKIE (frame, afr_get_locks_fd_cbk,
- (void *) (long) source_child,
- priv->children[source_child],
- priv->children[source_child]->fops->lk,
- local->fd, F_GETLK_FD, &flock);
-
-out:
- return ret;
-}
-
-
-static int
-afr_mark_fd_opened (xlator_t *this, fd_t *fd, int32_t child_index)
-{
- afr_fd_ctx_t *fdctx = NULL;
- uint64_t tmp = 0;
- int ret = 0;
-
- ret = fd_ctx_get (fd, this, &tmp);
- if (ret)
- goto out;
-
- fdctx = (afr_fd_ctx_t *) (long) tmp;
-
- fdctx->opened_on[child_index] = AFR_FD_OPENED;
-
-out:
- return ret;
-}
-
-int32_t
-afr_lock_recovery_preopen_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
-{
- int32_t child_index = (long )cookie;
- int ret = 0;
-
- if (op_ret) {
- gf_log (this->name, GF_LOG_INFO,
- "Reopen during lock-recovery failed");
- goto cleanup;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Open succeeded => proceed to recover locks");
-
- ret = afr_lock_recovery (frame, this);
- if (ret) {
- gf_log (this->name, GF_LOG_INFO,
- "Lock recovery failed");
- goto cleanup;
- }
-
- ret = afr_mark_fd_opened (this, fd, child_index);
- if (ret) {
- gf_log (this->name, GF_LOG_INFO,
- "Marking fd open failed");
- goto cleanup;
- }
-
- return 0;
-
-cleanup:
- afr_lock_recovery_cleanup (frame, this);
- return 0;
-}
-
-static int
-afr_lock_recovery_preopen (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- uint64_t tmp = 0;
- afr_fd_ctx_t *fdctx = NULL;
- loc_t loc = {0,};
- int32_t child_index = 0;
- int ret = 0;
-
- priv = this->private;
- local = frame->local;
-
- GF_ASSERT (local && local->fd);
-
- ret = fd_ctx_get (local->fd, this, &tmp);
- fdctx = (afr_fd_ctx_t *) (long) tmp;
- GF_ASSERT (fdctx);
-
- child_index = local->lock_recovery_child;
-
- inode_path (local->fd->inode, NULL, (char **)&loc.path);
- loc.name = strrchr (loc.path, '/');
- loc.inode = inode_ref (local->fd->inode);
- loc.parent = inode_parent (local->fd->inode, 0, NULL);
-
-
- STACK_WIND_COOKIE (frame, afr_lock_recovery_preopen_cbk,
- (void *)(long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->open,
- &loc, fdctx->flags, local->fd,
- fdctx->wbflags);
-
- return 0;
-}
-
-static int
-is_fd_opened (fd_t *fd, int32_t child_index)
-{
- afr_fd_ctx_t *fdctx = NULL;
- uint64_t tmp = 0;
- int ret = 0;
-
- ret = fd_ctx_get (fd, THIS, &tmp);
- if (ret)
- goto out;
-
- fdctx = (afr_fd_ctx_t *) (long) tmp;
-
- if (fdctx->opened_on[child_index] == AFR_FD_OPENED)
- ret = 1;
-
-out:
- return ret;
-}
-
-int
-afr_attempt_lock_recovery (xlator_t *this, int32_t child_index)
+afr_lk_transfer_datalock (call_frame_t *dst, call_frame_t *src, char *dom,
+ unsigned int child_count)
{
- call_frame_t *frame = NULL;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_locked_fd_t *locked_fd = NULL;
- afr_locked_fd_t *tmp = NULL;
- int ret = 0;
- struct list_head locks_list = {0,};
-
-
- priv = this->private;
-
- if (list_empty (&priv->saved_fds))
+ afr_local_t *dst_local = NULL;
+ afr_local_t *src_local = NULL;
+ afr_internal_lock_t *dst_lock = NULL;
+ afr_internal_lock_t *src_lock = NULL;
+ afr_inodelk_t *dst_inodelk = NULL;
+ afr_inodelk_t *src_inodelk = NULL;
+ int ret = -1;
+
+ src_local = src->local;
+ src_lock = &src_local->internal_lock;
+ src_inodelk = afr_get_inodelk (src_lock, dom);
+ dst_local = dst->local;
+ dst_lock = &dst_local->internal_lock;
+ dst_inodelk = afr_get_inodelk (dst_lock, dom);
+ if (!dst_inodelk || !src_inodelk)
goto out;
-
- frame = create_frame (this, this->ctx->pool);
- if (!frame) {
- ret = -1;
- goto out;
- }
-
- local = GF_CALLOC (1, sizeof (*local),
- gf_afr_mt_afr_local_t);
- if (!local) {
- ret = -1;
- goto out;
- }
-
- AFR_LOCAL_INIT (local, priv);
- if (!local) {
- ret = -1;
- goto out;
- }
-
- frame->local = local;
-
- INIT_LIST_HEAD (&locks_list);
-
- pthread_mutex_lock (&priv->mutex);
- {
- list_splice_init (&priv->saved_fds, &locks_list);
- }
- pthread_mutex_unlock (&priv->mutex);
-
- list_for_each_entry_safe (locked_fd, tmp,
- &locks_list, list) {
-
- list_del_init (&locked_fd->list);
-
- local->fd = fd_ref (locked_fd->fd);
- local->lock_recovery_child = child_index;
- local->locked_fd = locked_fd;
-
- if (!is_fd_opened (locked_fd->fd, child_index)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "attempting open before lock "
- "recovery");
- afr_lock_recovery_preopen (frame, this);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "attempting lock recovery "
- "without a preopen");
- afr_lock_recovery (frame, this);
- }
- }
-
+ if (src_inodelk->locked_nodes) {
+ memcpy (dst_inodelk->locked_nodes, src_inodelk->locked_nodes,
+ sizeof (*dst_inodelk->locked_nodes) * child_count);
+ memset (src_inodelk->locked_nodes, 0,
+ sizeof (*src_inodelk->locked_nodes) * child_count);
+ }
+
+ dst_lock->transaction_lk_type = src_lock->transaction_lk_type;
+ dst_lock->selfheal_lk_type = src_lock->selfheal_lk_type;
+ dst_inodelk->lock_count = src_inodelk->lock_count;
+ src_inodelk->lock_count = 0;
+ ret = 0;
out:
return ret;
}
diff --git a/xlators/cluster/afr/src/afr-mem-types.h b/xlators/cluster/afr/src/afr-mem-types.h
index ebe189c35..05df90cc0 100644
--- a/xlators/cluster/afr/src/afr-mem-types.h
+++ b/xlators/cluster/afr/src/afr-mem-types.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -26,7 +17,6 @@
enum gf_afr_mem_types_ {
gf_afr_mt_iovec = gf_common_mt_end + 1,
gf_afr_mt_afr_fd_ctx_t,
- gf_afr_mt_afr_local_t,
gf_afr_mt_afr_private_t,
gf_afr_mt_int32_t,
gf_afr_mt_char,
@@ -44,8 +34,15 @@ enum gf_afr_mem_types_ {
gf_afr_mt_locked_fd,
gf_afr_mt_inode_ctx_t,
gf_afr_fd_paused_call_t,
- gf_afr_mt_afr_crawl_data_t,
- gf_afr_mt_afr_brick_pos_t,
+ gf_afr_mt_crawl_data_t,
+ gf_afr_mt_brick_pos_t,
+ gf_afr_mt_shd_bool_t,
+ gf_afr_mt_shd_timer_t,
+ gf_afr_mt_shd_event_t,
+ gf_afr_mt_time_t,
+ gf_afr_mt_pos_data_t,
+ gf_afr_mt_reply_t,
+ gf_afr_mt_subvol_healer_t,
gf_afr_mt_end
};
#endif
diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c
index 646d23ccb..f86aa7fd8 100644
--- a/xlators/cluster/afr/src/afr-open.c
+++ b/xlators/cluster/afr/src/afr-open.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include <libgen.h>
@@ -52,87 +43,31 @@
#include "afr-dir-read.h"
#include "afr-dir-write.h"
#include "afr-transaction.h"
-#include "afr-self-heal.h"
-#include "afr-self-heal-common.h"
-
-int
-afr_stale_child_up (afr_local_t *local, xlator_t *this)
-{
- int i = 0;
- afr_private_t *priv = NULL;
- int up = -1;
-
- priv = this->private;
- if (!local->fresh_children)
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children)
- goto out;
-
- afr_inode_get_read_ctx (this, local->fd->inode, local->fresh_children);
- if (priv->child_count == afr_get_children_count (local->fresh_children,
- priv->child_count))
- goto out;
-
- for (i = 0; i < priv->child_count; i++) {
- if (!local->child_up[i])
- continue;
- if (afr_is_child_present (local->fresh_children,
- priv->child_count, i))
- continue;
- up = i;
- break;
- }
-out:
- return up;
-}
-void
-afr_perform_data_self_heal (call_frame_t *frame, xlator_t *this)
+gf_boolean_t
+afr_is_fd_fixable (fd_t *fd)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- inode_t *inode = NULL;
- int st_child = -1;
- char reason[64] = {0};
-
- local = frame->local;
- sh = &local->self_heal;
- inode = local->fd->inode;
-
- if (!IA_ISREG (inode->ia_type))
- goto out;
-
- st_child = afr_stale_child_up (local, this);
- if (st_child < 0)
- goto out;
-
- sh->do_data_self_heal = _gf_true;
- sh->do_metadata_self_heal = _gf_true;
- sh->do_gfid_self_heal = _gf_true;
- sh->do_missing_entry_self_heal = _gf_true;
-
- snprintf (reason, sizeof (reason), "stale subvolume %d detected",
- st_child);
- afr_launch_self_heal (frame, this, inode, _gf_true, inode->ia_type,
- reason, NULL, NULL);
-out:
- return;
+ if (!fd || !fd->inode)
+ return _gf_false;
+ else if (fd_is_anonymous (fd))
+ return _gf_false;
+ else if (uuid_is_null (fd->inode->gfid))
+ return _gf_false;
+
+ return _gf_true;
}
+
int
afr_open_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
afr_local_t * local = frame->local;
- afr_private_t *priv = NULL;
- priv = this->private;
- if (afr_open_only_data_self_heal (priv->data_self_heal))
- afr_perform_data_self_heal (frame, this);
AFR_STACK_UNWIND (open, frame, local->op_ret, local->op_errno,
- local->fd);
+ local->fd, xdata);
return 0;
}
@@ -140,53 +75,41 @@ afr_open_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
afr_open_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
afr_local_t * local = NULL;
- int ret = 0;
int call_count = -1;
int child_index = (long) cookie;
- afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
- priv = this->private;
local = frame->local;
+ fd_ctx = local->fd_ctx;
LOCK (&frame->lock);
{
if (op_ret == -1) {
local->op_errno = op_errno;
- }
-
- if (op_ret >= 0) {
+ fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED;
+ } else {
local->op_ret = op_ret;
- local->success_count++;
-
- ret = afr_child_fd_ctx_set (this, fd, child_index,
- local->cont.open.flags,
- local->cont.open.wbflags);
- if (ret) {
- local->op_ret = -1;
- local->op_errno = -ret;
- goto unlock;
- }
+ fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref (xdata);
}
}
-unlock:
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
if (call_count == 0) {
- if ((local->cont.open.flags & O_TRUNC)
- && (local->op_ret >= 0)) {
+ if ((fd_ctx->flags & O_TRUNC) && (local->op_ret >= 0)) {
STACK_WIND (frame, afr_open_ftruncate_cbk,
this, this->fops->ftruncate,
- fd, 0);
+ fd, 0, NULL);
} else {
- if (afr_open_only_data_self_heal (priv->data_self_heal))
- afr_perform_data_self_heal (frame, this);
AFR_STACK_UNWIND (open, frame, local->op_ret,
- local->op_errno, local->fd);
+ local->op_errno, local->fd,
+ local->xdata_rsp);
}
}
@@ -195,238 +118,220 @@ unlock:
int
afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, int32_t wbflags)
+ fd_t *fd, dict_t *xdata)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
int i = 0;
- int ret = -1;
int32_t call_count = 0;
- int32_t op_ret = -1;
int32_t op_errno = 0;
- int32_t wind_flags = flags & (~O_TRUNC);
- //We can't let truncation to happen outside transaction.
+ afr_fd_ctx_t *fd_ctx = NULL;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (loc, out);
+ //We can't let truncation to happen outside transaction.
priv = this->private;
- if (afr_is_split_brain (this, loc->inode)) {
- /* self-heal failed */
- gf_log (this->name, GF_LOG_WARNING,
- "failed to open as split brain seen, returning EIO");
- op_errno = EIO;
- goto out;
+ if (flags & (O_CREAT|O_TRUNC)) {
+ QUORUM_CHECK(open,out);
}
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- frame->local = local;
- call_count = local->call_count;
- loc_copy (&local->loc, loc);
+ local->fd = fd_ref (fd);
+ local->fd_ctx = fd_ctx;
+ fd_ctx->flags = flags;
- local->cont.open.flags = flags;
- local->cont.open.wbflags = wbflags;
+ call_count = local->call_count;
- local->fd = fd_ref (fd);
+ local->cont.open.flags = flags;
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND_COOKIE (frame, afr_open_cbk, (void *) (long) i,
priv->children[i],
priv->children[i]->fops->open,
- loc, wind_flags, fd, wbflags);
-
+ loc, (flags & ~O_TRUNC), fd, xdata);
if (!--call_count)
break;
}
}
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (open, frame, op_ret, op_errno, fd);
- }
+ AFR_STACK_UNWIND (open, frame, -1, op_errno, fd, NULL);
return 0;
}
-//NOTE: this function should be called with holding the lock on
-//fd to which fd_ctx belongs
-void
-afr_get_resumable_calls (xlator_t *this, afr_fd_ctx_t *fd_ctx,
- struct list_head *list)
-{
- afr_fd_paused_call_t *paused_call = NULL;
- afr_fd_paused_call_t *tmp = NULL;
- afr_local_t *call_local = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- gf_boolean_t call = _gf_false;
-
- priv = this->private;
- list_for_each_entry_safe (paused_call, tmp, &fd_ctx->paused_calls,
- call_list) {
- call = _gf_true;
- call_local = paused_call->frame->local;
- for (i = 0; i < priv->child_count; i++) {
- if (call_local->child_up[i] &&
- (fd_ctx->opened_on[i] == AFR_FD_OPENING))
- call = _gf_false;
- }
-
- if (call) {
- list_del_init (&paused_call->call_list);
- list_add (&paused_call->call_list, list);
- }
- }
-}
-
-void
-afr_resume_calls (xlator_t *this, struct list_head *list)
-{
- afr_fd_paused_call_t *paused_call = NULL;
- afr_fd_paused_call_t *tmp = NULL;
- afr_local_t *call_local = NULL;
-
- list_for_each_entry_safe (paused_call, tmp, list, call_list) {
- list_del_init (&paused_call->call_list);
- call_local = paused_call->frame->local;
- call_local->fop_call_continue (paused_call->frame, this);
- GF_FREE (paused_call);
- }
-}
-
int
afr_openfd_fix_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_fd_ctx_t *fd_ctx = NULL;
- int call_count = 0;
- int child_index = (long) cookie;
- struct list_head paused_calls = {0};
- gf_boolean_t fop_paused = _gf_false;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int call_count = 0;
+ int child_index = (long) cookie;
priv = this->private;
local = frame->local;
- call_count = afr_frame_return (frame);
-
- //Note: No frame locking needed for this block of code
- fd_ctx = afr_fd_ctx_get (local->fd, this);
- if (!fd_ctx) {
- gf_log (this->name, GF_LOG_WARNING,
- "failed to get fd context, %p", local->fd);
- goto out;
+ if (op_ret >= 0) {
+ gf_log (this->name, GF_LOG_DEBUG, "fd for %s opened "
+ "successfully on subvolume %s", local->loc.path,
+ priv->children[child_index]->name);
+ } else {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to open %s "
+ "on subvolume %s", local->loc.path,
+ priv->children[child_index]->name);
}
- fop_paused = local->fop_paused;
+ fd_ctx = local->fd_ctx;
+
LOCK (&local->fd->lock);
{
if (op_ret >= 0) {
fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
- gf_log (this->name, GF_LOG_INFO, "fd for %s opened "
- "successfully on subvolume %s", local->loc.path,
- priv->children[child_index]->name);
} else {
- //Change open status from OPENING to NOT OPENED.
fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED;
}
- if (call_count == 0) {
- INIT_LIST_HEAD (&paused_calls);
- afr_get_resumable_calls (this, fd_ctx, &paused_calls);
- }
}
UNLOCK (&local->fd->lock);
-out:
- if (call_count == 0) {
- afr_resume_calls (this, &paused_calls);
- //If the fop is paused then resume_calls will continue the fop
- if (fop_paused)
- goto done;
-
- if (local->fop_call_continue)
- local->fop_call_continue (frame, this);
- else
- AFR_STACK_DESTROY (frame);
- }
-done:
+ call_count = afr_frame_return (frame);
+ if (call_count == 0)
+ AFR_STACK_DESTROY (frame);
+
return 0;
}
-int
-afr_fix_open (call_frame_t *frame, xlator_t *this, afr_fd_ctx_t *fd_ctx,
- int need_open_count, int *need_open)
+
+static int
+afr_fd_ctx_need_open (fd_t *fd, xlator_t *this, unsigned char *need_open)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- call_frame_t *open_frame = NULL;
- afr_local_t *open_local = NULL;
- int ret = -1;
- GF_UNUSED int32_t op_errno = 0;
-
- GF_ASSERT (fd_ctx);
- GF_ASSERT (need_open_count > 0);
- GF_ASSERT (need_open);
+ afr_fd_ctx_t *fd_ctx = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int count = 0;
+
+ priv = this->private;
+
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx)
+ return 0;
+
+ LOCK (&fd->lock);
+ {
+ for (i = 0; i < priv->child_count; i++) {
+ if (fd_ctx->opened_on[i] == AFR_FD_NOT_OPENED &&
+ priv->child_up[i]) {
+ fd_ctx->opened_on[i] = AFR_FD_OPENING;
+ need_open[i] = 1;
+ count++;
+ } else {
+ need_open[i] = 0;
+ }
+ }
+ }
+ UNLOCK (&fd->lock);
+
+ return count;
+}
+
+
+void
+afr_fix_open (fd_t *fd, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int32_t op_errno = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ unsigned char *need_open = NULL;
+ int call_count = 0;
- local = frame->local;
priv = this->private;
- if (!local->fop_call_continue) {
- open_frame = copy_frame (frame);
- if (!open_frame) {
- ret = -ENOMEM;
- goto out;
- }
- ALLOC_OR_GOTO (open_local, afr_local_t, out);
- open_frame->local = open_local;
- ret = AFR_LOCAL_INIT (open_local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
- loc_copy (&open_local->loc, &local->loc);
- open_local->fd = fd_ref (local->fd);
- } else {
- ret = 0;
- open_frame = frame;
- open_local = local;
- }
- open_local->call_count = need_open_count;
+ if (!afr_is_fd_fixable (fd))
+ goto out;
+
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx)
+ goto out;
+
+ need_open = alloca0 (priv->child_count);
+
+ call_count = afr_fd_ctx_need_open (fd, this, need_open);
+ if (!call_count)
+ goto out;
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ goto out;
+
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->loc.inode = inode_ref (fd->inode);
+ ret = loc_path (&local->loc, NULL);
+ if (ret < 0)
+ goto out;
+
+ local->fd = fd_ref (fd);
+ local->fd_ctx = fd_ctx;
+
+ local->call_count = call_count;
gf_log (this->name, GF_LOG_DEBUG, "need open count: %d",
- need_open_count);
+ call_count);
for (i = 0; i < priv->child_count; i++) {
- if (need_open[i]) {
+ if (!need_open[i])
+ continue;
+
+ if (IA_IFDIR == fd->inode->ia_type) {
gf_log (this->name, GF_LOG_DEBUG,
- "opening fd for %s on subvolume %s",
+ "opening fd for dir %s on subvolume %s",
local->loc.path, priv->children[i]->name);
- STACK_WIND_COOKIE (open_frame, afr_openfd_fix_open_cbk,
+ STACK_WIND_COOKIE (frame, afr_openfd_fix_open_cbk,
+ (void*) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->opendir,
+ &local->loc, local->fd,
+ NULL);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "opening fd for file %s on subvolume %s",
+ local->loc.path, priv->children[i]->name);
+
+ STACK_WIND_COOKIE (frame, afr_openfd_fix_open_cbk,
(void *)(long) i,
priv->children[i],
priv->children[i]->fops->open,
- &open_local->loc, fd_ctx->flags,
- open_local->fd, fd_ctx->wbflags);
-
+ &local->loc,
+ fd_ctx->flags & (~O_TRUNC),
+ local->fd, NULL);
}
+
+ if (!--call_count)
+ break;
}
+
+ return;
out:
- if (ret && open_frame)
- AFR_STACK_DESTROY (open_frame);
- return ret;
+ if (frame)
+ AFR_STACK_DESTROY (frame);
}
diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c
new file mode 100644
index 000000000..186f68c33
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-read-txn.c
@@ -0,0 +1,239 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "afr.h"
+#include "afr-transaction.h"
+
+int
+afr_read_txn_next_subvol (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int subvol = -1;
+
+ local = frame->local;
+ priv = this->private;
+
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->readable[i]) {
+ /* don't even bother trying here.
+ just mark as attempted and move on. */
+ local->read_attempted[i] = 1;
+ continue;
+ }
+
+ if (!local->read_attempted[i]) {
+ subvol = i;
+ break;
+ }
+ }
+
+ /* If no more subvols were available for reading, we leave
+ @subvol as -1, which is an indication we have run out of
+ readable subvols. */
+ if (subvol != -1)
+ local->read_attempted[subvol] = 1;
+ local->readfn (frame, this, subvol);
+
+ return 0;
+}
+
+
+int
+afr_read_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err)
+{
+ afr_local_t *local = NULL;
+ int read_subvol = 0;
+ int event_generation = 0;
+ inode_t *inode = NULL;
+ int ret = -1;
+
+ local = frame->local;
+ inode = local->inode;
+
+ if (err) {
+ local->op_errno = -err;
+ local->op_ret = -1;
+ read_subvol = -1;
+ goto readfn;
+ }
+
+ ret = afr_inode_read_subvol_type_get (inode, this, local->readable,
+ &event_generation,
+ local->transaction.type);
+
+ if (ret == -1 || !event_generation) {
+ /* Even after refresh, we don't have a good
+ read subvolume. Time to bail */
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ read_subvol = -1;
+ goto readfn;
+ }
+
+ read_subvol = afr_read_subvol_select_by_policy (inode, this,
+ local->readable);
+
+ if (read_subvol == -1) {
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto readfn;
+ }
+
+ if (local->read_attempted[read_subvol]) {
+ afr_read_txn_next_subvol (frame, this);
+ return 0;
+ }
+
+ local->read_attempted[read_subvol] = 1;
+readfn:
+ local->readfn (frame, this, read_subvol);
+
+ return 0;
+}
+
+
+int
+afr_read_txn_continue (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (!local->refreshed) {
+ local->refreshed = _gf_true;
+ afr_inode_refresh (frame, this, local->inode,
+ afr_read_txn_refresh_done);
+ } else {
+ afr_read_txn_next_subvol (frame, this);
+ }
+
+ return 0;
+}
+
+
+/* afr_read_txn_wipe:
+
+ clean internal variables in @local in order to make
+ it possible to call afr_read_txn() multiple times from
+ the same frame
+*/
+
+void
+afr_read_txn_wipe (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ local->readfn = NULL;
+
+ if (local->inode)
+ inode_unref (local->inode);
+
+ for (i = 0; i < priv->child_count; i++) {
+ local->read_attempted[i] = 0;
+ local->readable[i] = 0;
+ }
+}
+
+
+/*
+ afr_read_txn:
+
+ This is the read transaction function. The way it works:
+
+ - Determine read-subvolume from inode ctx.
+
+ - If read-subvolume's generation was stale, refresh ctx once by
+ calling afr_inode_refresh()
+
+ Else make an attempt to read on read-subvolume.
+
+ - If attempted read on read-subvolume fails, refresh ctx once
+ by calling afr_inode_refresh()
+
+ - After ctx refresh, query read-subvolume freshly and attempt
+ read once.
+
+ - If read fails, try every other readable[] subvolume before
+ finally giving up. readable[] elements are set by afr_inode_refresh()
+ based on dirty and pending flags.
+
+ - If file is in split brain in the backend, generation will be
+ kept 0 by afr_inode_refresh() and readable[] will be set 0 for
+ all elements. Therefore reads always fail.
+*/
+
+int
+afr_read_txn (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ afr_read_txn_wind_t readfn, afr_transaction_type type)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int read_subvol = -1;
+ int event_generation = 0;
+ int ret = -1;
+
+ priv = this->private;
+ local = frame->local;
+
+ afr_read_txn_wipe (frame, this);
+
+ local->readfn = readfn;
+ local->inode = inode_ref (inode);
+
+ local->transaction.type = type;
+ ret = afr_inode_read_subvol_type_get (inode, this, local->readable,
+ &event_generation, type);
+ if (ret == -1)
+ /* very first transaction on this inode */
+ goto refresh;
+
+ if (local->event_generation != event_generation)
+ /* servers have disconnected / reconnected, and possibly
+ rebooted, very likely changing the state of freshness
+ of copies */
+ goto refresh;
+
+ read_subvol = afr_read_subvol_select_by_policy (inode, this,
+ local->readable);
+
+ if (read_subvol < 0 || read_subvol > priv->child_count) {
+ gf_log (this->name, GF_LOG_WARNING, "Unreadable subvolume %d "
+ "found with event generation %d", read_subvol,
+ event_generation);
+ goto refresh;
+ }
+
+ if (!local->child_up[read_subvol]) {
+ /* should never happen, just in case */
+ gf_log (this->name, GF_LOG_WARNING, "subvolume %d is the "
+ "read subvolume in this generation, but is not up",
+ read_subvol);
+ goto refresh;
+ }
+
+ local->read_attempted[read_subvol] = 1;
+
+ local->readfn (frame, this, read_subvol);
+
+ return 0;
+
+refresh:
+ afr_inode_refresh (frame, this, inode, afr_read_txn_refresh_done);
+
+ return 0;
+}
diff --git a/xlators/cluster/afr/src/afr-self-heal-algorithm.c b/xlators/cluster/afr/src/afr-self-heal-algorithm.c
deleted file mode 100644
index 4dfb85824..000000000
--- a/xlators/cluster/afr/src/afr-self-heal-algorithm.c
+++ /dev/null
@@ -1,743 +0,0 @@
-/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#include "glusterfs.h"
-#include "afr.h"
-#include "xlator.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-#include "byte-order.h"
-#include "md5.h"
-
-#include "afr-transaction.h"
-#include "afr-self-heal.h"
-#include "afr-self-heal-common.h"
-#include "afr-self-heal-algorithm.h"
-
-/*
- This file contains the various self-heal algorithms
-*/
-
-static int
-sh_loop_driver (call_frame_t *sh_frame, xlator_t *this,
- gf_boolean_t is_first_call, call_frame_t *old_loop_frame);
-static int
-sh_loop_return (call_frame_t *sh_frame, xlator_t *this, call_frame_t *loop_frame,
- int32_t op_ret, int32_t op_errno);
-static int
-sh_destroy_frame (call_frame_t *frame, xlator_t *this)
-{
- if (!frame)
- goto out;
-
- AFR_STACK_DESTROY (frame);
-out:
- return 0;
-}
-
-static void
-sh_private_cleanup (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_sh_algo_private_t *sh_priv = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
-
- sh_priv = sh->private;
- if (sh_priv)
- GF_FREE (sh_priv);
-}
-
-static int
-sh_number_of_writes_needed (unsigned char *write_needed, int child_count)
-{
- int writes = 0;
- int i = 0;
-
- for (i = 0; i < child_count; i++) {
- if (write_needed[i])
- writes++;
- }
-
- return writes;
-}
-
-
-static int
-sh_loop_driver_done (call_frame_t *sh_frame, xlator_t *this,
- call_frame_t *last_loop_frame)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_sh_algo_private_t *sh_priv = NULL;
- int32_t total_blocks = 0;
- int32_t diff_blocks = 0;
-
- local = sh_frame->local;
- sh = &local->self_heal;
- sh_priv = sh->private;
-
- if (sh_priv) {
- total_blocks = sh_priv->total_blocks;
- diff_blocks = sh_priv->diff_blocks;
- }
-
- sh_private_cleanup (sh_frame, this);
- if (sh->op_failed) {
- GF_ASSERT (!last_loop_frame);
- //loop_finish should have happened and the old_loop should be NULL
- gf_log (this->name, GF_LOG_INFO,
- "self-heal aborting on %s",
- local->loc.path);
-
- local->self_heal.algo_abort_cbk (sh_frame, this);
- } else {
- GF_ASSERT (last_loop_frame);
- if (diff_blocks == total_blocks) {
- gf_log (this->name, GF_LOG_INFO, "full self-heal "
- "completed on %s",local->loc.path);
- } else {
- gf_log (this->name, GF_LOG_INFO,
- "diff self-heal on %s: completed. "
- "(%d blocks of %d were different (%.2f%%))",
- local->loc.path, diff_blocks, total_blocks,
- ((diff_blocks * 1.0)/total_blocks) * 100);
- }
-
- if (sh_frame == last_loop_frame)
- sh->old_loop_frame = NULL;
- else
- sh->old_loop_frame = last_loop_frame;
- local->self_heal.algo_completion_cbk (sh_frame, this);
- }
-
- return 0;
-}
-
-int
-sh_loop_finish (call_frame_t *loop_frame, xlator_t *this)
-{
- afr_local_t *loop_local = NULL;
- afr_self_heal_t *loop_sh = NULL;
-
- if (!loop_frame)
- goto out;
-
- loop_local = loop_frame->local;
- if (loop_local) {
- loop_sh = &loop_local->self_heal;
- }
-
- if (loop_sh && loop_sh->loop_completion_cbk) {
- if (loop_sh->data_lock_held) {
- afr_sh_data_unlock (loop_frame, this,
- loop_sh->loop_completion_cbk);
- } else {
- loop_sh->loop_completion_cbk (loop_frame, this);
- }
- } else {
- //default loop_completion_cbk destroys the loop_frame
- if (loop_sh && !loop_sh->loop_completion_cbk)
- GF_ASSERT (!loop_sh->data_lock_held);
- sh_destroy_frame (loop_frame, this);
- }
-out:
- return 0;
-}
-
-static int
-sh_loop_lock_success (call_frame_t *loop_frame, xlator_t *this)
-{
- afr_local_t *loop_local = NULL;
- afr_self_heal_t *loop_sh = NULL;
-
- loop_local = loop_frame->local;
- loop_sh = &loop_local->self_heal;
-
- sh_loop_finish (loop_sh->old_loop_frame, this);
- loop_sh->old_loop_frame = NULL;
-
- gf_log (this->name, GF_LOG_DEBUG, "Aquired lock for range %"PRIu64
- " %"PRIu64, loop_sh->offset, loop_sh->block_size);
- loop_sh->data_lock_held = _gf_true;
- loop_sh->sh_data_algo_start (loop_frame, this);
- return 0;
-}
-
-static int
-sh_loop_lock_failure (call_frame_t *loop_frame, xlator_t *this)
-{
- call_frame_t *sh_frame = NULL;
- afr_local_t *loop_local = NULL;
- afr_self_heal_t *loop_sh = NULL;
-
- loop_local = loop_frame->local;
- loop_sh = &loop_local->self_heal;
- sh_frame = loop_sh->sh_frame;
-
- gf_log (this->name, GF_LOG_ERROR, "failed lock for range %"PRIu64
- " %"PRIu64, loop_sh->offset, loop_sh->block_size);
- if (loop_sh->old_loop_frame != loop_sh->sh_frame)
- sh_loop_finish (loop_sh->old_loop_frame, this);
- loop_sh->old_loop_frame = NULL;
- sh_loop_return (sh_frame, this, loop_frame, -1, ENOTCONN);
- return 0;
-}
-
-static int
-sh_loop_start (call_frame_t *sh_frame, xlator_t *this, off_t offset,
- call_frame_t *old_loop_frame)
-{
- call_frame_t *new_loop_frame = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_local_t *new_loop_local = NULL;
- afr_self_heal_t *new_loop_sh = NULL;
- afr_private_t *priv = NULL;
-
- GF_ASSERT (sh_frame);
-
- local = sh_frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- new_loop_frame = copy_frame (sh_frame);
- if (!new_loop_frame)
- goto out;
- //We want the frame to have same lk_oner as sh_frame
- new_loop_local = afr_local_copy (local, this);
- if (!new_loop_local)
- goto out;
- new_loop_frame->local = new_loop_local;
-
- new_loop_sh = &new_loop_local->self_heal;
- new_loop_sh->sources = memdup (sh->sources,
- priv->child_count * sizeof (*sh->sources));
- if (!new_loop_sh->sources)
- goto out;
- new_loop_sh->write_needed = GF_CALLOC (priv->child_count,
- sizeof (*new_loop_sh->write_needed),
- gf_afr_mt_char);
- if (!new_loop_sh->write_needed)
- goto out;
- new_loop_sh->checksum = GF_CALLOC (priv->child_count, MD5_DIGEST_LEN,
- gf_afr_mt_uint8_t);
- if (!new_loop_sh->checksum)
- goto out;
- new_loop_sh->offset = offset;
- new_loop_sh->block_size = sh->block_size;
- new_loop_sh->inode = inode_ref (sh->inode);
- new_loop_sh->sh_data_algo_start = sh->sh_data_algo_start;
- new_loop_sh->source = sh->source;
- new_loop_sh->active_sinks = sh->active_sinks;
- new_loop_sh->healing_fd = fd_ref (sh->healing_fd);
- new_loop_sh->file_has_holes = sh->file_has_holes;
- new_loop_sh->loop_completion_cbk = sh_destroy_frame;
- new_loop_sh->old_loop_frame = old_loop_frame;
- new_loop_sh->sh_frame = sh_frame;
- afr_sh_data_lock (new_loop_frame, this, offset, new_loop_sh->block_size,
- sh_loop_lock_success, sh_loop_lock_failure);
- return 0;
-out:
- sh->op_failed = 1;
- if (new_loop_frame) {
- new_loop_frame->local = new_loop_local;
- }
- if (old_loop_frame != sh_frame)
- sh_loop_finish (old_loop_frame, this);
- sh_loop_return (sh_frame, this, new_loop_frame, -1, ENOMEM);
- return 0;
-}
-
-static int
-sh_loop_driver (call_frame_t *sh_frame, xlator_t *this,
- gf_boolean_t is_first_call, call_frame_t *old_loop_frame)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
- afr_sh_algo_private_t *sh_priv = NULL;
- gf_boolean_t is_driver_done = _gf_false;
- blksize_t block_size = 0;
- int loop = 0;
- off_t offset = 0;
-
- priv = this->private;
- local = sh_frame->local;
- sh = &local->self_heal;
- sh_priv = sh->private;
-
- LOCK (&sh_priv->lock);
- {
- if (_gf_false == is_first_call)
- sh_priv->loops_running--;
- offset = sh_priv->offset;
- block_size = sh->block_size;
- while ((!sh->eof_reached) && (0 == sh->op_failed) &&
- (sh_priv->loops_running < priv->data_self_heal_window_size)
- && (sh_priv->offset < sh->file_size)) {
-
- loop++;
- sh_priv->offset += block_size;
- sh_priv->loops_running++;
-
- if (_gf_false == is_first_call)
- break;
- }
- if (0 == sh_priv->loops_running) {
- is_driver_done = _gf_true;
- }
- }
- UNLOCK (&sh_priv->lock);
-
- if (0 == loop) {
- //loop finish does unlock, but the erasing of the pending
- //xattrs needs to happen before that so do not finish the loop
- if (is_driver_done && !sh->op_failed)
- goto driver_done;
- if (old_loop_frame) {
- sh_loop_finish (old_loop_frame, this);
- old_loop_frame = NULL;
- }
- }
-
- //If we have more loops to form we should finish previous loop after
- //the next loop lock
- while (loop--) {
- if (sh->op_failed) {
- // op failed in other loop, stop spawning more loops
- if (old_loop_frame) {
- sh_loop_finish (old_loop_frame, this);
- old_loop_frame = NULL;
- }
- sh_loop_driver (sh_frame, this, _gf_false, NULL);
- } else {
- gf_log (this->name, GF_LOG_TRACE, "spawning a loop "
- "for offset %"PRId64, offset);
-
- sh_loop_start (sh_frame, this, offset, old_loop_frame);
- old_loop_frame = NULL;
- offset += block_size;
- }
- }
-
-driver_done:
- if (is_driver_done) {
- sh_loop_driver_done (sh_frame, this, old_loop_frame);
- }
- return 0;
-}
-
-static int
-sh_loop_return (call_frame_t *sh_frame, xlator_t *this, call_frame_t *loop_frame,
- int32_t op_ret, int32_t op_errno)
-{
- afr_local_t * loop_local = NULL;
- afr_self_heal_t * loop_sh = NULL;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t *sh = NULL;
-
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
-
- if (loop_frame) {
- GF_ASSERT (loop_frame != sh_frame);
- loop_local = loop_frame->local;
- if (loop_local)
- loop_sh = &loop_local->self_heal;
- if (loop_sh)
- gf_log (this->name, GF_LOG_TRACE, "loop for offset "
- "%"PRId64" returned", loop_sh->offset);
- }
-
- if (op_ret == -1) {
- sh->op_failed = 1;
- afr_sh_set_error (sh, op_errno);
- if (loop_frame) {
- sh_loop_finish (loop_frame, this);
- loop_frame = NULL;
- }
- }
-
- sh_loop_driver (sh_frame, this, _gf_false, loop_frame);
-
- return 0;
-}
-
-static int
-sh_loop_write_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- struct iatt *postbuf)
-{
- afr_private_t * priv = NULL;
- afr_local_t * loop_local = NULL;
- afr_self_heal_t * loop_sh = NULL;
- call_frame_t *sh_frame = NULL;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t *sh = NULL;
- int call_count = 0;
- int child_index = 0;
-
- priv = this->private;
- loop_local = loop_frame->local;
- loop_sh = &loop_local->self_heal;
-
- sh_frame = loop_sh->sh_frame;
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
-
- child_index = (long) cookie;
-
- gf_log (this->name, GF_LOG_TRACE,
- "wrote %d bytes of data from %s to child %d, offset %"PRId64"",
- op_ret, sh_local->loc.path, child_index, loop_sh->offset);
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "write to %s failed on subvolume %s (%s)",
- sh_local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
-
- sh->op_failed = 1;
- afr_sh_set_error (loop_sh, op_errno);
- }
-
- call_count = afr_frame_return (loop_frame);
-
- if (call_count == 0) {
- sh_loop_return (sh_frame, this, loop_frame,
- loop_sh->op_ret, loop_sh->op_errno);
- }
-
- return 0;
-}
-
-
-static int
-sh_loop_read_cbk (call_frame_t *loop_frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iovec *vector, int32_t count, struct iatt *buf,
- struct iobref *iobref)
-{
- afr_private_t * priv = NULL;
- afr_local_t * loop_local = NULL;
- afr_self_heal_t * loop_sh = NULL;
- call_frame_t *sh_frame = NULL;
- int i = 0;
- int call_count = 0;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t * sh = NULL;
-
- priv = this->private;
- loop_local = loop_frame->local;
- loop_sh = &loop_local->self_heal;
-
- sh_frame = loop_sh->sh_frame;
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
-
- gf_log (this->name, GF_LOG_TRACE,
- "read %d bytes of data from %s, offset %"PRId64"",
- op_ret, loop_local->loc.path, loop_sh->offset);
-
- if (op_ret <= 0) {
- if (op_ret < 0) {
- sh->op_failed = 1;
- gf_log (this->name, GF_LOG_ERROR, "read failed on %d "
- "for %s reason :%s", sh->source,
- sh_local->loc.path, strerror (errno));
- } else {
- sh->eof_reached = _gf_true;
- gf_log (this->name, GF_LOG_DEBUG, "Eof reached for %s",
- sh_local->loc.path);
- }
- sh_loop_return (sh_frame, this, loop_frame, op_ret, op_errno);
- goto out;
- }
-
- if (loop_sh->file_has_holes && iov_0filled (vector, count) == 0) {
- gf_log (this->name, GF_LOG_DEBUG, "0 filled block");
- sh_loop_return (sh_frame, this, loop_frame,
- op_ret, op_errno);
- goto out;
- }
-
- call_count = sh_number_of_writes_needed (loop_sh->write_needed,
- priv->child_count);
- GF_ASSERT (call_count > 0);
- loop_local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (!loop_sh->write_needed[i])
- continue;
- STACK_WIND_COOKIE (loop_frame, sh_loop_write_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->writev,
- loop_sh->healing_fd, vector, count,
- loop_sh->offset, iobref);
-
- if (!--call_count)
- break;
- }
-
-out:
- return 0;
-}
-
-
-static int
-sh_loop_read (call_frame_t *loop_frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_local_t *loop_local = NULL;
- afr_self_heal_t *loop_sh = NULL;
-
- priv = this->private;
- loop_local = loop_frame->local;
- loop_sh = &loop_local->self_heal;
-
- STACK_WIND_COOKIE (loop_frame, sh_loop_read_cbk,
- (void *) (long) loop_sh->source,
- priv->children[loop_sh->source],
- priv->children[loop_sh->source]->fops->readv,
- loop_sh->healing_fd, loop_sh->block_size,
- loop_sh->offset);
-
- return 0;
-}
-
-
-static int
-sh_diff_checksum_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- uint32_t weak_checksum, uint8_t *strong_checksum)
-{
- afr_private_t *priv = NULL;
- afr_local_t *loop_local = NULL;
- afr_self_heal_t *loop_sh = NULL;
- call_frame_t *sh_frame = NULL;
- afr_local_t *sh_local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_sh_algo_private_t *sh_priv = NULL;
- int child_index = 0;
- int call_count = 0;
- int i = 0;
- int write_needed = 0;
-
- priv = this->private;
-
- loop_local = loop_frame->local;
- loop_sh = &loop_local->self_heal;
-
- sh_frame = loop_sh->sh_frame;
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
-
- sh_priv = sh->private;
-
- child_index = (long) cookie;
-
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "checksum on %s failed on subvolume %s (%s)",
- sh_local->loc.path, priv->children[child_index]->name,
- strerror (op_errno));
- sh->op_failed = 1;
- } else {
- memcpy (loop_sh->checksum + child_index * MD5_DIGEST_LEN,
- strong_checksum, MD5_DIGEST_LEN);
- }
-
- call_count = afr_frame_return (loop_frame);
-
- if (call_count == 0) {
- for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] || !sh_local->child_up[i])
- continue;
-
- if (memcmp (loop_sh->checksum + (i * MD5_DIGEST_LEN),
- loop_sh->checksum + (sh->source * MD5_DIGEST_LEN),
- MD5_DIGEST_LEN)) {
- /*
- Checksums differ, so this block
- must be written to this sink
- */
-
- gf_log (this->name, GF_LOG_DEBUG,
- "checksum on subvolume %s at offset %"
- PRId64" differs from that on source",
- priv->children[i]->name, loop_sh->offset);
-
- write_needed = loop_sh->write_needed[i] = 1;
- }
- }
-
- LOCK (&sh_priv->lock);
- {
- sh_priv->total_blocks++;
- if (write_needed)
- sh_priv->diff_blocks++;
- }
- UNLOCK (&sh_priv->lock);
-
- if (write_needed && !sh->op_failed) {
- sh_loop_read (loop_frame, this);
- } else {
- sh_loop_return (sh_frame, this, loop_frame,
- op_ret, op_errno);
- }
- }
-
- return 0;
-}
-
-static int
-sh_diff_checksum (call_frame_t *loop_frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_local_t *loop_local = NULL;
- afr_self_heal_t *loop_sh = NULL;
- int call_count = 0;
- int i = 0;
-
- priv = this->private;
- loop_local = loop_frame->local;
- loop_sh = &loop_local->self_heal;
-
- call_count = loop_sh->active_sinks + 1; /* sinks and source */
-
- loop_local->call_count = call_count;
-
- STACK_WIND_COOKIE (loop_frame, sh_diff_checksum_cbk,
- (void *) (long) loop_sh->source,
- priv->children[loop_sh->source],
- priv->children[loop_sh->source]->fops->rchecksum,
- loop_sh->healing_fd,
- loop_sh->offset, loop_sh->block_size);
-
- for (i = 0; i < priv->child_count; i++) {
- if (loop_sh->sources[i] || !loop_local->child_up[i])
- continue;
-
- STACK_WIND_COOKIE (loop_frame, sh_diff_checksum_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->rchecksum,
- loop_sh->healing_fd,
- loop_sh->offset, loop_sh->block_size);
-
- if (!--call_count)
- break;
- }
-
- return 0;
-}
-
-static int
-sh_full_read_write_to_sinks (call_frame_t *loop_frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_local_t *loop_local = NULL;
- afr_self_heal_t *loop_sh = NULL;
- int i = 0;
-
- priv = this->private;
- loop_local = loop_frame->local;
- loop_sh = &loop_local->self_heal;
-
- for (i = 0; i < priv->child_count; i++) {
- if (loop_sh->sources[i] || !loop_local->child_up[i])
- continue;
- loop_sh->write_needed[i] = 1;
- }
- sh_loop_read (loop_frame, this);
- return 0;
-}
-
-static int
-sh_do_nothing (call_frame_t *frame, xlator_t *this)
-{
- return 0;
-}
-
-int
-afr_sh_start_loops (call_frame_t *sh_frame, xlator_t *this,
- afr_sh_algo_fn sh_data_algo_start)
-{
- afr_local_t *sh_local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_sh_algo_private_t *sh_priv = NULL;
-
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
-
- sh_priv = GF_CALLOC (1, sizeof (*sh_priv),
- gf_afr_mt_afr_private_t);
- if (!sh_priv) {
- sh->op_failed = 1;
- sh_loop_driver_done (sh_frame, this, NULL);
- goto out;
- }
-
- LOCK_INIT (&sh_priv->lock);
-
- sh->private = sh_priv;
- sh->sh_data_algo_start = sh_data_algo_start;
-
- sh_local->call_count = 0;
-
- sh->loop_completion_cbk = sh_do_nothing;
- sh_loop_driver (sh_frame, this, _gf_true, sh_frame);
-out:
- return 0;
-}
-
-int
-afr_sh_algo_diff (call_frame_t *sh_frame, xlator_t *this)
-{
- afr_sh_start_loops (sh_frame, this, sh_diff_checksum);
- return 0;
-}
-
-int
-afr_sh_algo_full (call_frame_t *sh_frame, xlator_t *this)
-{
- afr_sh_start_loops (sh_frame, this, sh_full_read_write_to_sinks);
- return 0;
-}
-
-struct afr_sh_algorithm afr_self_heal_algorithms[] = {
- {.name = "full", .fn = afr_sh_algo_full},
- {.name = "diff", .fn = afr_sh_algo_diff},
- {0, 0},
-};
diff --git a/xlators/cluster/afr/src/afr-self-heal-algorithm.h b/xlators/cluster/afr/src/afr-self-heal-algorithm.h
deleted file mode 100644
index 04d8e8a6c..000000000
--- a/xlators/cluster/afr/src/afr-self-heal-algorithm.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef __AFR_SELF_HEAL_ALGORITHM_H__
-#define __AFR_SELF_HEAL_ALGORITHM_H__
-
-
-typedef int (*afr_sh_algo_fn) (call_frame_t *frame,
- xlator_t *this);
-
-struct afr_sh_algorithm {
- const char *name;
- afr_sh_algo_fn fn;
-};
-
-extern struct afr_sh_algorithm afr_self_heal_algorithms[3];
-typedef struct {
- gf_lock_t lock;
- unsigned int loops_running;
- off_t offset;
-
- int32_t total_blocks;
- int32_t diff_blocks;
-} afr_sh_algo_private_t;
-
-#endif /* __AFR_SELF_HEAL_ALGORITHM_H__ */
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index b11be3872..4dac83113 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -1,2300 +1,1009 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#include "glusterfs.h"
-#include "xlator.h"
-#include "byte-order.h"
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
#include "afr.h"
-#include "afr-transaction.h"
-#include "afr-self-heal-common.h"
#include "afr-self-heal.h"
-#include "pump.h"
-
-//Intersection[child]=1 if child is part of intersection
-void
-afr_children_intersection_get (int32_t *set1, int32_t *set2,
- int *intersection, unsigned int child_count)
-{
- int i = 0;
-
- memset (intersection, 0, sizeof (*intersection) * child_count);
- for (i = 0; i < child_count; i++) {
- intersection[i] = afr_is_child_present (set1, child_count, i)
- && afr_is_child_present (set2, child_count,
- i);
- }
-}
+#include "byte-order.h"
-/**
- * select_source - select a source and return it
- */
int
-afr_sh_select_source (int sources[], int child_count)
+afr_selfheal_post_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
{
- int i = 0;
- for (i = 0; i < child_count; i++)
- if (sources[i])
- return i;
+ afr_local_t *local = NULL;
- return -1;
-}
+ local = frame->local;
-void
-afr_sh_mark_source_sinks (call_frame_t *frame, xlator_t *this)
-{
- int i = 0;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int active_sinks = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] == 0 && local->child_up[i] == 1) {
- active_sinks++;
- sh->success[i] = 1;
- } else if (sh->sources[i] == 1 && local->child_up[i] == 1) {
- sh->success[i] = 1;
- }
- }
- sh->active_sinks = active_sinks;
+ syncbarrier_wake (&local->barrier);
+
+ return 0;
}
-/**
- * sink_count - return number of sinks in sources array
- */
int
-afr_sh_sink_count (int sources[], int child_count)
+afr_selfheal_post_op (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ int subvol, dict_t *xattr)
{
- int i = 0;
- int sinks = 0;
- for (i = 0; i < child_count; i++)
- if (!sources[i])
- sinks++;
- return sinks;
-}
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ loc_t loc = {0, };
-int
-afr_sh_source_count (int sources[], int child_count)
-{
- int i = 0;
- int nsource = 0;
+ priv = this->private;
+ local = frame->local;
- for (i = 0; i < child_count; i++)
- if (sources[i])
- nsource++;
- return nsource;
-}
+ loc.inode = inode_ref (inode);
+ uuid_copy (loc.gfid, inode->gfid);
-void
-afr_sh_set_error (afr_self_heal_t *sh, int32_t op_errno)
-{
- sh->op_ret = -1;
- if (afr_error_more_important (sh->op_errno, op_errno))
- sh->op_errno = op_errno;
-}
+ STACK_WIND (frame, afr_selfheal_post_op_cbk, priv->children[subvol],
+ priv->children[subvol]->fops->xattrop, &loc,
+ GF_XATTROP_ADD_ARRAY, xattr, NULL);
-void
-afr_sh_print_pending_matrix (int32_t *pending_matrix[], xlator_t *this)
-{
- afr_private_t * priv = this->private;
- char *buf = NULL;
- char *ptr = NULL;
- int i = 0;
- int j = 0;
-
- /* 10 digits per entry + 1 space + '[' and ']' */
- buf = GF_MALLOC (priv->child_count * 11 + 8, gf_afr_mt_char);
-
- for (i = 0; i < priv->child_count; i++) {
- ptr = buf;
- ptr += sprintf (ptr, "[ ");
- for (j = 0; j < priv->child_count; j++) {
- ptr += sprintf (ptr, "%d ", pending_matrix[i][j]);
- }
- sprintf (ptr, "]");
- gf_log (this->name, GF_LOG_DEBUG,
- "pending_matrix: %s", buf);
- }
-
- GF_FREE (buf);
+ syncbarrier_wait (&local->barrier, 1);
+
+ return 0;
}
-void
-afr_init_pending_matrix (int32_t **pending_matrix, size_t child_count)
+
+dict_t *
+afr_selfheal_output_xattr (xlator_t *this, afr_transaction_type type,
+ int *output_dirty, int **output_matrix, int subvol)
{
- int i = 0;
- int j = 0;
+ dict_t *xattr = NULL;
+ afr_private_t *priv = NULL;
+ int j = 0;
+ int idx = 0;
+ int ret = 0;
+ int *raw = 0;
- GF_ASSERT (pending_matrix);
+ priv = this->private;
+ idx = afr_index_for_transaction_type (type);
- for (i = 0; i < child_count; i++) {
- for (j = 0; j < child_count; j++) {
- pending_matrix[i][j] = 0;
- }
- }
-}
+ xattr = dict_new ();
+ if (!xattr)
+ return NULL;
-void
-afr_mark_ignorant_subvols_as_pending (int32_t **pending_matrix,
- unsigned char *ignorant_subvols,
- size_t child_count)
-{
- int i = 0;
- int j = 0;
-
- GF_ASSERT (pending_matrix);
- GF_ASSERT (ignorant_subvols);
-
- for (i = 0; i < child_count; i++) {
- if (ignorant_subvols[i]) {
- for (j = 0; j < child_count; j++) {
- if (!ignorant_subvols[j])
- pending_matrix[j][i] += 1;
- }
- }
- }
+ if (output_dirty[subvol]) {
+ /* clear dirty */
+ raw = GF_CALLOC (sizeof(int), AFR_NUM_CHANGE_LOGS, gf_afr_mt_int32_t);
+ if (!raw)
+ goto err;
+
+ raw[idx] = hton32 (output_dirty[subvol]);
+ ret = dict_set_bin (xattr, AFR_DIRTY, raw,
+ sizeof(int) * AFR_NUM_CHANGE_LOGS);
+ if (ret)
+ goto err;
+ }
+
+ /* clear/set pending */
+ for (j = 0; j < priv->child_count; j++) {
+ if (!output_matrix[subvol][j])
+ continue;
+
+ raw = GF_CALLOC (sizeof(int), AFR_NUM_CHANGE_LOGS,
+ gf_afr_mt_int32_t);
+ if (!raw)
+ goto err;
+
+ raw[idx] = hton32 (output_matrix[subvol][j]);
+
+ ret = dict_set_bin (xattr, priv->pending_key[j],
+ raw, sizeof(int) * AFR_NUM_CHANGE_LOGS);
+ if (ret)
+ goto err;
+ }
+
+ return xattr;
+err:
+ if (xattr)
+ dict_unref (xattr);
+ return NULL;
}
+
int
-afr_build_pending_matrix (char **pending_key, int32_t **pending_matrix,
- dict_t *xattr[], afr_transaction_type type,
- size_t child_count)
-{
- /* Indexable by result of afr_index_for_transaction_type(): 0 -- 2. */
- int32_t pending[3] = {0,};
- void *pending_raw = NULL;
- int ret = -1;
- int i = 0;
- int j = 0;
- int k = 0;
- unsigned char *ignorant_subvols = NULL;
-
- ignorant_subvols = GF_CALLOC (sizeof (*ignorant_subvols), child_count,
- gf_afr_mt_char);
- if (NULL == ignorant_subvols)
- goto out;
-
- afr_init_pending_matrix (pending_matrix, child_count);
-
- for (i = 0; i < child_count; i++) {
- pending_raw = NULL;
-
- for (j = 0; j < child_count; j++) {
- ret = dict_get_ptr (xattr[i], pending_key[j],
- &pending_raw);
-
- if (ret != 0) {
- /*
- * There is no xattr present. This means this
- * subvolume should be considered an 'ignorant'
- * subvolume.
- */
-
- ignorant_subvols[i] = 1;
- continue;
- }
-
- memcpy (pending, pending_raw, sizeof(pending));
- k = afr_index_for_transaction_type (type);
-
- pending_matrix[i][j] = ntoh32 (pending[k]);
- }
- }
-
- afr_mark_ignorant_subvols_as_pending (pending_matrix,
- ignorant_subvols,
- child_count);
- GF_FREE (ignorant_subvols);
-out:
- return ret;
+afr_selfheal_undo_pending (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ unsigned char *sources, unsigned char *sinks,
+ unsigned char *healed_sinks, afr_transaction_type type,
+ struct afr_reply *replies, unsigned char *locked_on)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int j = 0;
+ unsigned char *pending = NULL;
+ int *input_dirty = NULL;
+ int **input_matrix = NULL;
+ int *output_dirty = NULL;
+ int **output_matrix = NULL;
+ dict_t *xattr = NULL;
+
+ priv = this->private;
+
+ pending = alloca0 (priv->child_count);
+
+ input_dirty = alloca0 (priv->child_count * sizeof (int));
+ input_matrix = ALLOC_MATRIX (priv->child_count, int);
+ output_dirty = alloca0 (priv->child_count * sizeof (int));
+ output_matrix = ALLOC_MATRIX (priv->child_count, int);
+
+ afr_selfheal_extract_xattr (this, replies, type, input_dirty,
+ input_matrix);
+
+ for (i = 0; i < priv->child_count; i++)
+ if (sinks[i] && !healed_sinks[i])
+ pending[i] = 1;
+
+ for (i = 0; i < priv->child_count; i++) {
+ for (j = 0; j < priv->child_count; j++) {
+ if (pending[j])
+ output_matrix[i][j] = 1;
+ else
+ output_matrix[i][j] = -input_matrix[i][j];
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!pending[i])
+ output_dirty[i] = -input_dirty[i];
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!locked_on[i])
+ /* perform post-op only on subvols we had locked
+ and inspected on.
+ */
+ continue;
+
+ xattr = afr_selfheal_output_xattr (this, type, output_dirty,
+ output_matrix, i);
+ if (!xattr) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "unable to allocate xdata for subvol %d", i);
+ continue;
+ }
+
+ afr_selfheal_post_op (frame, this, inode, i, xattr);
+
+ dict_unref (xattr);
+ }
+
+ return 0;
}
-typedef enum {
- AFR_NODE_INNOCENT,
- AFR_NODE_FOOL,
- AFR_NODE_WISE,
- AFR_NODE_INVALID = -1,
-} afr_node_type;
-typedef struct {
- afr_node_type type;
- int wisdom;
-} afr_node_character;
+void
+afr_replies_copy (struct afr_reply *dst, struct afr_reply *src, int count)
+{
+ int i = 0;
+ dict_t *xdata = NULL;
+
+ if (dst == src)
+ return;
+
+ for (i = 0; i < count; i++) {
+ dst[i].valid = src[i].valid;
+ dst[i].op_ret = src[i].op_ret;
+ dst[i].op_errno = src[i].op_errno;
+ dst[i].prestat = src[i].prestat;
+ dst[i].poststat = src[i].poststat;
+ dst[i].preparent = src[i].preparent;
+ dst[i].postparent = src[i].postparent;
+ dst[i].preparent2 = src[i].preparent2;
+ dst[i].postparent2 = src[i].postparent2;
+ if (src[i].xdata)
+ xdata = dict_ref (src[i].xdata);
+ else
+ xdata = NULL;
+ if (dst[i].xdata)
+ dict_unref (dst[i].xdata);
+ dst[i].xdata = xdata;
+ memcpy (dst[i].checksum, src[i].checksum,
+ MD5_DIGEST_LENGTH);
+ }
+}
-static int
-afr_sh_is_innocent (int32_t *array, int child_count)
+int
+afr_selfheal_fill_dirty (xlator_t *this, int *dirty, int subvol,
+ int idx, dict_t *xdata)
{
- int i = 0;
- int ret = 1; /* innocent until proven guilty */
+ void *pending_raw = NULL;
+ int pending[3] = {0, };
- for (i = 0; i < child_count; i++) {
- if (array[i]) {
- ret = 0;
- break;
- }
- }
+ if (dict_get_ptr (xdata, AFR_DIRTY, &pending_raw))
+ return -1;
- return ret;
-}
+ if (!pending_raw)
+ return -1;
+ memcpy (pending, pending_raw, sizeof(pending));
-static int
-afr_sh_is_fool (int32_t *array, int i, int child_count)
-{
- return array[i]; /* fool if accuses itself */
+ dirty[subvol] = ntoh32 (pending[idx]);
+
+ return 0;
}
-static int
-afr_sh_is_wise (int32_t *array, int i, int child_count)
+int
+afr_selfheal_fill_matrix (xlator_t *this, int **matrix, int subvol,
+ int idx, dict_t *xdata)
{
- return !array[i]; /* wise if does not accuse itself */
-}
+ int i = 0;
+ void *pending_raw = NULL;
+ int pending[3] = {0, };
+ afr_private_t *priv = NULL;
+ priv = this->private;
-static int
-afr_sh_all_nodes_innocent (afr_node_character *characters,
- int child_count)
-{
- int i = 0;
- int ret = 1;
+ for (i = 0; i < priv->child_count; i++) {
+ if (dict_get_ptr (xdata, priv->pending_key[i], &pending_raw))
+ continue;
+
+ if (!pending_raw)
+ continue;
+
+ memcpy (pending, pending_raw, sizeof(pending));
- for (i = 0; i < child_count; i++) {
- if (characters[i].type != AFR_NODE_INNOCENT) {
- ret = 0;
- break;
- }
- }
+ matrix[subvol][i] = ntoh32 (pending[idx]);
+ }
- return ret;
+ return 0;
}
-static int
-afr_sh_wise_nodes_exist (afr_node_character *characters, int child_count)
+int
+afr_selfheal_extract_xattr (xlator_t *this, struct afr_reply *replies,
+ afr_transaction_type type, int *dirty, int **matrix)
{
- int i = 0;
- int ret = 0;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ dict_t *xdata = NULL;
+ int idx = -1;
+
+ idx = afr_index_for_transaction_type (type);
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].xdata)
+ continue;
- for (i = 0; i < child_count; i++) {
- if (characters[i].type == AFR_NODE_WISE) {
- ret = 1;
- break;
- }
- }
+ xdata = replies[i].xdata;
- return ret;
+ afr_selfheal_fill_dirty (this, dirty, i, idx, xdata);
+ afr_selfheal_fill_matrix (this, matrix, i, idx, xdata);
+ }
+
+ return 0;
}
+
/*
- * The 'wisdom' of a wise node is 0 if any other wise node accuses it.
- * It is 1 if no other wise node accuses it.
- * Only wise nodes with wisdom 1 are sources.
+ * This function determines if a self-heal is required for a given inode,
+ * and if needed, in what direction.
+ *
+ * locked_on[] is the array representing servers which have been locked and
+ * from which xattrs have been fetched for analysis.
+ *
+ * The output of the function is by filling the arrays sources[] and sinks[].
+ *
+ * sources[i] is set if i'th server is an eligible source for a selfheal.
+ *
+ * sinks[i] is set if i'th server needs to be healed.
+ *
+ * if sources[0..N] are all set, there is no need for a selfheal.
+ *
+ * if sinks[0..N] are all set, the inode is in split brain.
*
- * If no nodes with wisdom 1 exist, a split-brain has occured.
*/
-static void
-afr_sh_compute_wisdom (int32_t *pending_matrix[],
- afr_node_character characters[], int child_count)
-{
- int i = 0;
- int j = 0;
-
- for (i = 0; i < child_count; i++) {
- if (characters[i].type == AFR_NODE_WISE) {
- characters[i].wisdom = 1;
-
- for (j = 0; j < child_count; j++) {
- if ((characters[j].type == AFR_NODE_WISE)
- && pending_matrix[j][i]) {
-
- characters[i].wisdom = 0;
- }
- }
- }
- }
+int
+afr_selfheal_find_direction (call_frame_t *frame, xlator_t *this,
+ struct afr_reply *replies,
+ afr_transaction_type type, unsigned char *locked_on,
+ unsigned char *sources, unsigned char *sinks)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int j = 0;
+ int *dirty = NULL;
+ int **matrix = NULL;
+ char *accused = NULL;
+
+ priv = this->private;
+
+ dirty = alloca0 (priv->child_count * sizeof (int));
+ accused = alloca0 (priv->child_count);
+ matrix = ALLOC_MATRIX(priv->child_count, int);
+
+ /* First construct the pending matrix for further analysis */
+ afr_selfheal_extract_xattr (this, replies, type, dirty, matrix);
+
+ /* Next short list all accused to exclude them from being sources */
+ for (i = 0; i < priv->child_count; i++) {
+ for (j = 0; j < priv->child_count; j++) {
+ if (matrix[i][j])
+ accused[j] = 1;
+ }
+ }
+
+ /* Short list all non-accused as sources */
+ memset (sources, 0, priv->child_count);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!accused[i] && locked_on[i])
+ sources[i] = 1;
+ }
+
+ /* Everyone accused by sources are sinks */
+ memset (sinks, 0, priv->child_count);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ for (j = 0; j < priv->child_count; j++) {
+ if (matrix[i][j])
+ sinks[j] = 1;
+ }
+ }
+
+ /* If any source has 'dirty' bit, pick first
+ 'dirty' source and make everybody else sinks */
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i] && dirty[i]) {
+ for (j = 0; j < priv->child_count; j++) {
+ if (j != i) {
+ sources[j] = 0;
+ sinks[j] = 1;
+ }
+ }
+ break;
+ }
+ }
+
+ /* If no sources, all locked nodes are sinks - split brain */
+ if (AFR_COUNT (sources, priv->child_count) == 0) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (locked_on[i])
+ sinks[i] = 1;
+ }
+ }
+
+ return 0;
}
-static int
-afr_sh_wise_nodes_conflict (afr_node_character *characters,
- int child_count)
+int
+afr_selfheal_discover_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *parbuf)
{
- int i = 0;
- int ret = 1;
+ afr_local_t *local = NULL;
+ int i = -1;
- for (i = 0; i < child_count; i++) {
- if ((characters[i].type == AFR_NODE_WISE)
- && characters[i].wisdom == 1) {
+ local = frame->local;
+ i = (long) cookie;
- /* There is atleast one bona-fide wise node */
- ret = 0;
- break;
- }
- }
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
+ if (buf)
+ local->replies[i].poststat = *buf;
+ if (parbuf)
+ local->replies[i].postparent = *parbuf;
+ if (xdata)
+ local->replies[i].xdata = dict_ref (xdata);
- return ret;
+ syncbarrier_wake (&local->barrier);
+
+ return 0;
}
-static int
-afr_sh_mark_wisest_as_sources (int sources[],
- afr_node_character *characters,
- int child_count)
+inode_t *
+afr_selfheal_unlocked_lookup_on (call_frame_t *frame, inode_t *parent,
+ const char *name, struct afr_reply *replies,
+ unsigned char *lookup_on)
{
- int nsources = 0;
- int i = 0;
+ loc_t loc = {0, };
+ dict_t *xattr_req = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ inode_t *inode = NULL;
- for (i = 0; i < child_count; i++) {
- if (characters[i].wisdom == 1) {
- sources[i] = 1;
- nsources++;
- }
- }
+ local = frame->local;
+ priv = frame->this->private;
- return nsources;
-}
+ xattr_req = dict_new ();
+ if (!xattr_req)
+ return NULL;
-static void
-afr_compute_witness_of_fools (int32_t *witnesses, int32_t **pending_matrix,
- afr_node_character *characters,
- int32_t child_count)
-{
- int i = 0;
- int j = 0;
- int witness = 0;
-
- GF_ASSERT (witnesses);
- GF_ASSERT (pending_matrix);
- GF_ASSERT (characters);
- GF_ASSERT (child_count > 0);
-
- for (i = 0; i < child_count; i++) {
- if (characters[i].type != AFR_NODE_FOOL)
- continue;
-
- witness = 0;
- for (j = 0; j < child_count; j++) {
- if (i == j)
- continue;
- witness += pending_matrix[i][j];
- }
- witnesses[i] = witness;
- }
-}
+ if (afr_xattr_req_prepare (frame->this, xattr_req) != 0) {
+ dict_destroy (xattr_req);
+ return NULL;
+ }
-static int32_t
-afr_find_biggest_witness_among_fools (int32_t *witnesses,
- afr_node_character *characters,
- int32_t child_count)
-{
- int i = 0;
- int biggest_witness = -1;
+ inode = inode_new (parent->table);
+ if (!inode) {
+ dict_destroy (xattr_req);
+ return NULL;
+ }
- GF_ASSERT (witnesses);
- GF_ASSERT (characters);
- GF_ASSERT (child_count > 0);
+ loc.parent = inode_ref (parent);
+ uuid_copy (loc.pargfid, parent->gfid);
+ loc.name = name;
+ loc.inode = inode_ref (inode);
- for (i = 0; i < child_count; i++) {
- if (characters[i].type != AFR_NODE_FOOL)
- continue;
+ AFR_ONLIST (lookup_on, frame, afr_selfheal_discover_cbk, lookup, &loc,
+ xattr_req);
- if (biggest_witness < witnesses[i])
- biggest_witness = witnesses[i];
- }
- return biggest_witness;
-}
+ afr_replies_copy (replies, local->replies, priv->child_count);
-int
-afr_mark_fool_as_source_by_witness (int32_t *sources, int32_t *witnesses,
- afr_node_character *characters,
- int32_t child_count, int32_t witness)
-{
- int i = 0;
- int nsources = 0;
-
- GF_ASSERT (sources);
- GF_ASSERT (witnesses);
- GF_ASSERT (characters);
- GF_ASSERT (child_count > 0);
-
- for (i = 0; i < child_count; i++) {
- if (characters[i].type != AFR_NODE_FOOL)
- continue;
-
- if (witness == witnesses[i]) {
- sources[i] = 1;
- nsources++;
- }
- }
- return nsources;
-}
+ loc_wipe (&loc);
+ dict_unref (xattr_req);
-static int
-afr_mark_biggest_of_fools_as_source (int32_t *sources, int32_t **pending_matrix,
- afr_node_character *characters,
- int child_count)
-{
- int32_t biggest_witness = 0;
- int nsources = 0;
- int32_t *witnesses = NULL;
-
- GF_ASSERT (child_count > 0);
-
- witnesses = GF_CALLOC (child_count, sizeof (*witnesses),
- gf_afr_mt_int32_t);
- if (NULL == witnesses) {
- nsources = -1;
- goto out;
- }
-
- afr_compute_witness_of_fools (witnesses, pending_matrix, characters,
- child_count);
- biggest_witness = afr_find_biggest_witness_among_fools (witnesses,
- characters,
- child_count);
- nsources = afr_mark_fool_as_source_by_witness (sources, witnesses,
- characters, child_count,
- biggest_witness);
-out:
- if (witnesses)
- GF_FREE (witnesses);
- return nsources;
+ return inode;
}
-int
-afr_mark_child_as_source_by_uid (int32_t *sources, struct iatt *bufs,
- int32_t *valid_children, int child_count,
- uint32_t uid)
-{
- int i = 0;
- int nsources = 0;
- int child = 0;
-
- GF_ASSERT (bufs);
- GF_ASSERT (valid_children);
- GF_ASSERT (sources);
- GF_ASSERT (child_count > 0);
-
- for (i = 0; i < child_count; i++) {
- if (-1 == valid_children[i])
- continue;
-
- child = valid_children[i];
- if (uid == bufs[child].ia_uid) {
- sources[child] = 1;
- nsources++;
- }
- }
- return nsources;
-}
int
-afr_get_child_with_lowest_uid (struct iatt *bufs, int32_t *valid_children,
- int child_count)
+afr_selfheal_unlocked_discover_on (call_frame_t *frame, inode_t *inode,
+ uuid_t gfid, struct afr_reply *replies,
+ unsigned char *discover_on)
{
- int i = 0;
- int smallest = -1;
- int child = 0;
-
- GF_ASSERT (bufs);
- GF_ASSERT (valid_children);
- GF_ASSERT (child_count > 0);
-
- for (i = 0; i < child_count; i++) {
- if (-1 == valid_children[i])
- continue;
- child = valid_children[i];
- if ((smallest == -1) ||
- (bufs[child].ia_uid < bufs[smallest].ia_uid)) {
- smallest = child;
- }
- }
- return smallest;
-}
+ loc_t loc = {0, };
+ dict_t *xattr_req = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
-static int
-afr_sh_mark_lowest_uid_as_source (struct iatt *bufs, int32_t *valid_children,
- int child_count, int32_t *sources)
-{
- int nsources = 0;
- int smallest = 0;
-
- smallest = afr_get_child_with_lowest_uid (bufs, valid_children,
- child_count);
- if (smallest < 0) {
- nsources = -1;
- goto out;
- }
- nsources = afr_mark_child_as_source_by_uid (sources, bufs,
- valid_children, child_count,
- bufs[smallest].ia_uid);
-out:
- return nsources;
-}
+ local = frame->local;
+ priv = frame->this->private;
-char *
-afr_get_character_str (afr_node_type type)
-{
- char *character = NULL;
-
- switch (type) {
- case AFR_NODE_INNOCENT:
- character = "innocent";
- break;
- case AFR_NODE_FOOL:
- character = "fool";
- break;
- case AFR_NODE_WISE:
- character = "wise";
- break;
- default:
- character = "invalid";
- break;
- }
- return character;
-}
+ xattr_req = dict_new ();
+ if (!xattr_req)
+ return -ENOMEM;
-afr_node_type
-afr_find_child_character_type (int32_t *pending_row, int32_t child,
- int32_t child_count, const char *xlator_name)
-{
- afr_node_type type = AFR_NODE_INVALID;
-
- GF_ASSERT (pending_row);
- GF_ASSERT (child_count > 0);
- GF_ASSERT ((child >= 0) && (child < child_count));
-
- if (afr_sh_is_innocent (pending_row, child_count))
- type = AFR_NODE_INNOCENT;
- else if (afr_sh_is_fool (pending_row, child, child_count))
- type = AFR_NODE_FOOL;
- else if (afr_sh_is_wise (pending_row, child, child_count))
- type = AFR_NODE_WISE;
- else
- GF_ASSERT (0);
-
- gf_log (xlator_name, GF_LOG_DEBUG, "child %d character %s",
- child, afr_get_character_str (type));
- return type;
-}
-
-int
-afr_build_sources (xlator_t *this, dict_t **xattr, struct iatt *bufs,
- int32_t **pending_matrix, int32_t *sources,
- int32_t *success_children, afr_transaction_type type)
-{
- afr_private_t *priv = NULL;
- afr_self_heal_type sh_type = AFR_SELF_HEAL_INVALID;
- int nsources = -1;
+ if (afr_xattr_req_prepare (frame->this, xattr_req) != 0) {
+ dict_destroy (xattr_req);
+ return -ENOMEM;
+ }
- priv = this->private;
+ loc.inode = inode_ref (inode);
+ uuid_copy (loc.gfid, gfid);
- if (afr_get_children_count (success_children, priv->child_count) == 0)
- goto out;
+ AFR_ONLIST (discover_on, frame, afr_selfheal_discover_cbk, lookup, &loc,
+ xattr_req);
- afr_build_pending_matrix (priv->pending_key, pending_matrix,
- xattr, type, priv->child_count);
+ afr_replies_copy (replies, local->replies, priv->child_count);
- sh_type = afr_self_heal_type_for_transaction (type);
- if (AFR_SELF_HEAL_INVALID == sh_type)
- goto out;
+ loc_wipe (&loc);
+ dict_unref (xattr_req);
- afr_sh_print_pending_matrix (pending_matrix, this);
-
- nsources = afr_mark_sources (sources, pending_matrix, bufs,
- priv->child_count, sh_type,
- success_children, this->name);
-out:
- return nsources;
+ return 0;
}
-/**
- * mark_sources: Mark all 'source' nodes and return number of source
- * nodes found
- *
- * A node (a row in the pending matrix) belongs to one of
- * three categories:
- *
- * M is the pending matrix.
- *
- * 'innocent' - M[i] is all zeroes
- * 'fool' - M[i] has i'th element = 1 (self-reference)
- * 'wise' - M[i] has i'th element = 0, others are 1 or 0.
- *
- * All 'innocent' nodes are sinks. If all nodes are innocent, no self-heal is
- * needed.
- *
- * A 'wise' node can be a source. If two 'wise' nodes conflict, it is
- * a split-brain. If one wise node refers to the other but the other doesn't
- * refer back, the referrer is a source.
- *
- * All fools are sinks, unless there are no 'wise' nodes. In that case,
- * one of the fools is made a source.
- */
-
int
-afr_mark_sources (int32_t *sources, int32_t **pending_matrix, struct iatt *bufs,
- int32_t child_count, afr_self_heal_type type,
- int32_t *valid_children, const char *xlator_name)
+afr_selfheal_unlocked_discover (call_frame_t *frame, inode_t *inode,
+ uuid_t gfid, struct afr_reply *replies)
{
- /* stores the 'characters' (innocent, fool, wise) of the nodes */
-
- afr_node_character *characters = NULL;
- int i = 0;
- int nsources = -1;
- xlator_t *this = NULL;
-
- characters = GF_CALLOC (sizeof (afr_node_character),
- child_count, gf_afr_mt_afr_node_character);
- if (!characters)
- goto out;
-
- this = THIS;
-
- /* start clean */
- for (i = 0; i < child_count; i++) {
- sources[i] = 0;
- }
-
- nsources = 0;
- for (i = 0; i < child_count; i++) {
- characters[i].type =
- afr_find_child_character_type (pending_matrix[i], i,
- child_count,
- xlator_name);
- if (AFR_NODE_INVALID == characters[i].type)
- gf_log (xlator_name, GF_LOG_WARNING,
- "child %d had invalid xattrs", i);
- }
-
- if ((type == AFR_SELF_HEAL_METADATA)
- && afr_sh_all_nodes_innocent (characters, child_count)) {
-
- nsources = afr_sh_mark_lowest_uid_as_source (bufs,
- valid_children,
- child_count,
- sources);
- goto out;
- }
-
- if (afr_sh_wise_nodes_exist (characters, child_count)) {
- afr_sh_compute_wisdom (pending_matrix, characters, child_count);
-
- if (afr_sh_wise_nodes_conflict (characters, child_count)) {
- /* split-brain */
- gf_log (this->name, GF_LOG_INFO,
- "split-brain possible, no source detected");
- nsources = -1;
-
- } else {
- nsources = afr_sh_mark_wisest_as_sources (sources,
- characters,
- child_count);
- }
- } else {
- nsources = afr_mark_biggest_of_fools_as_source (sources,
- pending_matrix,
- characters,
- child_count);
- }
+ afr_private_t *priv = NULL;
-out:
- if (nsources == 0) {
- for (i = 0; i < child_count; i++) {
- if (valid_children[i] != -1)
- sources[valid_children[i]] = 1;
- }
- }
- if (characters)
- GF_FREE (characters);
-
- gf_log (this->name, GF_LOG_DEBUG, "Number of sources: %d", nsources);
- return nsources;
-}
+ priv = frame->this->private;
-void
-afr_sh_pending_to_delta (afr_private_t *priv, dict_t **xattr,
- int32_t *delta_matrix[], unsigned char success[],
- int child_count, afr_transaction_type type)
-{
- /* Indexable by result of afr_index_for_transaction_type(): 0 -- 2. */
- int32_t pending[3] = {0,};
- void *pending_raw = NULL;
- int ret = 0;
- int i = 0;
- int j = 0;
- int k = 0;
-
- /* start clean */
- for (i = 0; i < child_count; i++) {
- for (j = 0; j < child_count; j++) {
- delta_matrix[i][j] = 0;
- }
- }
-
- for (i = 0; i < child_count; i++) {
- if (pending_raw)
- pending_raw = NULL;
-
- for (j = 0; j < child_count; j++) {
- ret = dict_get_ptr (xattr[i], priv->pending_key[j],
- &pending_raw);
- if (ret < 0)
- gf_log (THIS->name, GF_LOG_DEBUG,
- "Unable to get dict value.");
- if (!success[j])
- continue;
-
- k = afr_index_for_transaction_type (type);
-
- if (pending_raw != NULL) {
- memcpy (pending, pending_raw, sizeof(pending));
- delta_matrix[i][j] = -(ntoh32 (pending[k]));
- } else {
- delta_matrix[i][j] = 0;
- }
-
- }
- }
+ return afr_selfheal_unlocked_discover_on (frame, inode, gfid, replies,
+ priv->child_up);
}
int
-afr_sh_delta_to_xattr (afr_private_t *priv,
- int32_t *delta_matrix[], dict_t *xattr[],
- int child_count, afr_transaction_type type)
+afr_selfheal_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
{
- int i = 0;
- int j = 0;
- int k = 0;
- int ret = 0;
- int32_t *pending = NULL;
-
- for (i = 0; i < child_count; i++) {
- if (!xattr[i])
- continue;
-
- for (j = 0; j < child_count; j++) {
- pending = GF_CALLOC (sizeof (int32_t), 3,
- gf_afr_mt_int32_t);
-
- if (!pending)
- continue;
- /* 3 = data+metadata+entry */
-
- k = afr_index_for_transaction_type (type);
-
- pending[k] = hton32 (delta_matrix[i][j]);
-
- ret = dict_set_bin (xattr[i], priv->pending_key[j],
- pending,
- 3 * sizeof (int32_t));
- if (ret < 0)
- gf_log (THIS->name, GF_LOG_WARNING,
- "Unable to set dict value.");
- }
- }
- return 0;
-}
+ afr_local_t *local = NULL;
+ int i = 0;
+ local = frame->local;
+ i = (long) cookie;
-int
-afr_sh_has_metadata_pending (dict_t *xattr, xlator_t *this)
-{
- /* Indexable by result of afr_index_for_transaction_type(): 0 -- 2. */
- int32_t pending[3] = {0,};
- void *pending_raw = NULL;
- afr_private_t *priv = NULL;
- int ret = -1;
- int i = 0;
- int j = 0;
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
- priv = this->private;
+ syncbarrier_wake (&local->barrier);
+
+ return 0;
+}
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_get_ptr (xattr, priv->pending_key[i],
- &pending_raw);
- if (ret != 0)
- return 0;
+int
+afr_selfheal_locked_fill (call_frame_t *frame, xlator_t *this,
+ unsigned char *locked_on)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int count = 0;
- memcpy (pending, pending_raw, sizeof(pending));
- j = afr_index_for_transaction_type (AFR_METADATA_TRANSACTION);
+ local = frame->local;
+ priv = this->private;
- if (pending[j])
- return 1;
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].valid && local->replies[i].op_ret == 0) {
+ locked_on[i] = 1;
+ count++;
+ } else {
+ locked_on[i] = 0;
+ }
+ }
- return 0;
+ return count;
}
int
-afr_sh_has_data_pending (dict_t *xattr, xlator_t *this)
+afr_selfheal_tryinodelk (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, off_t off, size_t size,
+ unsigned char *locked_on)
{
- /* Indexable by result of afr_index_for_transaction_type(): 0 -- 2. */
- int32_t pending[3] = {0,};
- void *pending_raw = NULL;
- afr_private_t *priv = NULL;
- int ret = -1;
- int i = 0;
- int j = 0;
+ loc_t loc = {0,};
+ struct gf_flock flock = {0, };
- priv = this->private;
+ loc.inode = inode_ref (inode);
+ uuid_copy (loc.gfid, inode->gfid);
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_get_ptr (xattr, priv->pending_key[i],
- &pending_raw);
+ flock.l_type = F_WRLCK;
+ flock.l_start = off;
+ flock.l_len = size;
- if (ret != 0)
- return 0;
+ AFR_ONALL (frame, afr_selfheal_lock_cbk, inodelk, dom,
+ &loc, F_SETLK, &flock, NULL);
- memcpy (pending, pending_raw, sizeof(pending));
- j = afr_index_for_transaction_type (AFR_DATA_TRANSACTION);
+ loc_wipe (&loc);
- if (pending[j])
- return 1;
- }
-
- return 0;
+ return afr_selfheal_locked_fill (frame, this, locked_on);
}
int
-afr_sh_has_entry_pending (dict_t *xattr, xlator_t *this)
+afr_selfheal_inodelk (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, off_t off, size_t size,
+ unsigned char *locked_on)
{
- /* Indexable by result of afr_index_for_transaction_type(): 0 -- 2. */
- int32_t pending[3] = {0,};
- void *pending_raw = NULL;
- afr_private_t *priv = NULL;
- int ret = -1;
- int i = 0;
- int j = 0;
+ loc_t loc = {0,};
+ struct gf_flock flock = {0, };
+ afr_local_t *local = NULL;
+ int i = 0;
+ afr_private_t *priv = NULL;
- priv = this->private;
+ priv = this->private;
+ local = frame->local;
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_get_ptr (xattr, priv->pending_key[i],
- &pending_raw);
+ loc.inode = inode_ref (inode);
+ uuid_copy (loc.gfid, inode->gfid);
- if (ret != 0)
- return 0;
+ flock.l_type = F_WRLCK;
+ flock.l_start = off;
+ flock.l_len = size;
- memcpy (pending, pending_raw, sizeof(pending));
- j = afr_index_for_transaction_type (AFR_ENTRY_TRANSACTION);
+ AFR_ONALL (frame, afr_selfheal_lock_cbk, inodelk, dom,
+ &loc, F_SETLK, &flock, NULL);
- if (pending[j])
- return 1;
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].op_ret == -1 &&
+ local->replies[i].op_errno == EAGAIN) {
+ afr_selfheal_locked_fill (frame, this, locked_on);
+ afr_selfheal_uninodelk (frame, this, inode, dom, off,
+ size, locked_on);
- return 0;
-}
+ AFR_SEQ (frame, afr_selfheal_lock_cbk, inodelk, dom,
+ &loc, F_SETLKW, &flock, NULL);
+ break;
+ }
+ }
+ loc_wipe (&loc);
-/**
- * is_matrix_zero - return true if pending matrix is all zeroes
- */
-
-int
-afr_sh_is_matrix_zero (int32_t *pending_matrix[], int child_count)
-{
- int i = 0;
- int j = 0;
-
- for (i = 0; i < child_count; i++)
- for (j = 0; j < child_count; j++)
- if (pending_matrix[i][j])
- return 0;
- return 1;
+ return afr_selfheal_locked_fill (frame, this, locked_on);
}
int
-afr_sh_missing_entries_done (call_frame_t *frame, xlator_t *this)
+afr_selfheal_uninodelk (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, off_t off, size_t size,
+ const unsigned char *locked_on)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
-// memset (sh->child_errno, 0, sizeof (int) * priv->child_count);
- memset (sh->buf, 0, sizeof (struct iatt) * priv->child_count);
-
- for (i = 0; i < priv->child_count; i++) {
- sh->locked_nodes[i] = 0;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i])
- dict_unref (sh->xattr[i]);
- sh->xattr[i] = NULL;
- }
-
- if (local->govinda_gOvinda || sh->op_failed) {
- gf_log (this->name, GF_LOG_INFO,
- "split brain found, aborting selfheal of %s",
- local->loc.path);
- sh->op_failed = 1;
- sh->completion_cbk (frame, this);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "proceeding to metadata check on %s",
- local->loc.path);
- afr_self_heal_metadata (frame, this);
- }
-
- return 0;
-}
+ loc_t loc = {0,};
+ struct gf_flock flock = {0, };
-static int
-afr_sh_missing_entries_finish (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
+ loc.inode = inode_ref (inode);
+ uuid_copy (loc.gfid, inode->gfid);
- local = frame->local;
- int_lock = &local->internal_lock;
+ flock.l_type = F_UNLCK;
+ flock.l_start = off;
+ flock.l_len = size;
- int_lock->lock_cbk = afr_sh_missing_entries_done;
- afr_unlock (frame, this);
+ AFR_ONLIST (locked_on, frame, afr_selfheal_lock_cbk, inodelk,
+ dom, &loc, F_SETLK, &flock, NULL);
- return 0;
-}
+ loc_wipe (&loc);
-int
-afr_sh_common_create (afr_self_heal_t *sh, unsigned int child_count)
-{
- int ret = -ENOMEM;
- sh->buf = GF_CALLOC (child_count, sizeof (*sh->buf),
- gf_afr_mt_iatt);
- if (!sh->buf)
- goto out;
- sh->parentbufs = GF_CALLOC (child_count, sizeof (*sh->parentbufs),
- gf_afr_mt_iatt);
- if (!sh->parentbufs)
- goto out;
- sh->child_errno = GF_CALLOC (child_count, sizeof (*sh->child_errno),
- gf_afr_mt_int);
- if (!sh->child_errno)
- goto out;
- sh->success_children = afr_children_create (child_count);
- if (!sh->success_children)
- goto out;
- sh->fresh_children = afr_children_create (child_count);
- if (!sh->fresh_children)
- goto out;
- sh->xattr = GF_CALLOC (child_count, sizeof (*sh->xattr),
- gf_afr_mt_dict_t);
- if (!sh->xattr)
- goto out;
- ret = 0;
-out:
- return ret;
+ return 0;
}
-void
-afr_sh_common_lookup_resp_handler (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf,
- dict_t *xattr, struct iatt *postparent,
- loc_t *loc)
-{
- int child_index = 0;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- priv = this->private;
- sh = &local->self_heal;
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0) {
- sh->buf[child_index] = *buf;
- sh->parentbufs[child_index] = *postparent;
- sh->success_children[sh->success_count] = child_index;
- sh->success_count++;
- sh->xattr[child_index] = dict_ref (xattr);
- } else {
- gf_log (this->name, GF_LOG_ERROR, "path %s on subvolume"
- " %s => -1 (%s)", loc->path,
- priv->children[child_index]->name,
- strerror (op_errno));
- local->self_heal.child_errno[child_index] = op_errno;
- }
- }
- UNLOCK (&frame->lock);
- return;
-}
-
-gf_boolean_t
-afr_valid_ia_type (ia_type_t ia_type)
-{
- switch (ia_type) {
- case IA_IFSOCK:
- case IA_IFREG:
- case IA_IFBLK:
- case IA_IFCHR:
- case IA_IFIFO:
- case IA_IFLNK:
- case IA_IFDIR:
- return _gf_true;
- default:
- return _gf_false;
- }
- return _gf_false;
-}
int
-afr_impunge_frame_create (call_frame_t *frame, xlator_t *this,
- int active_source, int ret_child, mode_t entry_mode,
- call_frame_t **impunge_frame)
+afr_selfheal_tryentrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, const char *name, unsigned char *locked_on)
{
- afr_local_t *local = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int32_t op_errno = 0;
- afr_private_t *priv = NULL;
- int ret = 0;
- call_frame_t *new_frame = NULL;
-
- op_errno = ENOMEM;
- priv = this->private;
- new_frame = copy_frame (frame);
- if (!new_frame) {
- goto out;
- }
-
- ALLOC_OR_GOTO (impunge_local, afr_local_t, out);
-
- local = frame->local;
- new_frame->local = impunge_local;
- impunge_sh = &impunge_local->self_heal;
- impunge_sh->sh_frame = frame;
- impunge_sh->active_source = active_source;
- impunge_sh->impunge_ret_child = ret_child;
- impunge_sh->impunging_entry_mode = entry_mode;
- impunge_local->child_up = memdup (local->child_up,
- sizeof (*local->child_up) *
- priv->child_count);
- if (!impunge_local->child_up)
- goto out;
-
- ret = afr_sh_common_create (impunge_sh, priv->child_count);
- if (ret) {
- op_errno = -ret;
- goto out;
- }
- op_errno = 0;
- *impunge_frame = new_frame;
-out:
- if (op_errno && new_frame)
- AFR_STACK_DESTROY (new_frame);
- return -op_errno;
-}
+ loc_t loc = {0,};
-void
-afr_sh_call_entry_impunge_recreate (call_frame_t *frame, xlator_t *this,
- int child_index, struct iatt *buf,
- struct iatt *postparent,
- afr_impunge_done_cbk_t impunge_done)
-{
- call_frame_t *impunge_frame = NULL;
- afr_local_t *local = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *sh = NULL;
- int ret = 0;
- mode_t mode = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- mode = st_mode_from_ia (buf->ia_prot, buf->ia_type);
- ret = afr_impunge_frame_create (frame, this, sh->source, child_index,
- mode, &impunge_frame);
- if (ret)
- goto out;
- impunge_local = impunge_frame->local;
- loc_copy (&impunge_local->loc, &local->loc);
- sh->impunge_done = impunge_done;
- impunge_local->call_count = 1;
- afr_sh_entry_impunge_create (impunge_frame, this, child_index, buf,
- postparent);
- return;
-out:
- gf_log (this->name, GF_LOG_ERROR, "impunge of %s failed, reason: %s",
- local->loc.path, strerror (-ret));
- impunge_done (frame, this, child_index, -1, -ret);
-}
+ loc.inode = inode_ref (inode);
+ uuid_copy (loc.gfid, inode->gfid);
-int
-afr_sh_create_entry_cbk (call_frame_t *frame, xlator_t *this, int child,
- int32_t op_ret, int32_t op_errno)
-{
- int call_count = 0;
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- if (op_ret == -1)
- gf_log (this->name, GF_LOG_ERROR,
- "create entry %s failed, on child %d reason, %s",
- local->loc.path, child, strerror (op_errno));
- call_count = afr_frame_return (frame);
- if (call_count == 0)
- afr_sh_missing_entries_finish (frame, this);
- return 0;
-}
+ AFR_ONALL (frame, afr_selfheal_lock_cbk, entrylk, dom,
+ &loc, name, ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL);
-static int
-sh_missing_entries_create (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int type = 0;
- afr_private_t *priv = NULL;
- int enoent_count = 0;
- int i = 0;
- struct iatt *buf = NULL;
- struct iatt *postparent = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- enoent_count = afr_errno_count (NULL, sh->child_errno,
- priv->child_count, ENOENT);
- if (enoent_count == 0) {
- gf_log (this->name, GF_LOG_INFO,
- "no missing files - %s. proceeding to metadata check",
- local->loc.path);
- /* proceed to next step - metadata self-heal */
- afr_sh_missing_entries_finish (frame, this);
- return 0;
- }
-
- buf = &sh->buf[sh->source];
- postparent = &sh->parentbufs[sh->source];
-
- type = buf->ia_type;
- if (!afr_valid_ia_type (type)) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: unknown file type: 0%o", local->loc.path, type);
- local->govinda_gOvinda = 1;
- afr_sh_missing_entries_finish (frame, this);
- goto out;
- }
-
- local->call_count = enoent_count;
- for (i = 0; i < priv->child_count; i++) {
- //If !child_up errno will be zero
- if (sh->child_errno[i] != ENOENT)
- continue;
- afr_sh_call_entry_impunge_recreate (frame, this, i,
- buf, postparent,
- afr_sh_create_entry_cbk);
- enoent_count--;
- }
- GF_ASSERT (enoent_count == 0);
-out:
- return 0;
-}
+ loc_wipe (&loc);
-void
-afr_sh_missing_entries_lookup_done (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- ia_type_t ia_type = IA_INVAL;
- int32_t nsources = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- if (op_ret < 0) {
- if (op_errno == EIO)
- local->govinda_gOvinda = 1;
- // EIO can happen if finding the fresh parent dir failed
- goto out;
- }
-
- //now No chance for the ia_type to conflict
- ia_type = sh->buf[sh->success_children[0]].ia_type;
- nsources = afr_build_sources (this, sh->xattr, sh->buf,
- sh->pending_matrix, sh->sources,
- sh->success_children,
- afr_transaction_type_get (ia_type));
- if (nsources < 0) {
- gf_log (this->name, GF_LOG_INFO, "No sources for dir of %s,"
- " in missing entry self-heal, continuing with the rest"
- " of the self-heals", local->loc.path);
- op_errno = EIO;
- goto out;
- }
-
- afr_get_fresh_children (sh->success_children, sh->sources,
- sh->fresh_children, priv->child_count);
- sh->source = sh->fresh_children[0];
- if (sh->source == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "No active sources found.");
- op_errno = EIO;
- goto out;
- }
-
- if (sh->gfid_sh_success_cbk)
- sh->gfid_sh_success_cbk (frame, this);
- sh->type = sh->buf[sh->source].ia_type;
- sh_missing_entries_create (frame, this);
- return;
-out:
- sh->op_failed = 1;
- afr_sh_set_error (sh, op_errno);
- afr_sh_missing_entries_finish (frame, this);
- return;
+ return afr_selfheal_locked_fill (frame, this, locked_on);
}
-static int
-afr_sh_common_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *xattr,
- struct iatt *postparent)
-{
- int call_count = 0;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- afr_sh_common_lookup_resp_handler (frame, cookie, this, op_ret,
- op_errno, inode, buf, xattr,
- postparent, &sh->lookup_loc);
- call_count = afr_frame_return (frame);
-
- if (call_count)
- goto out;
- op_ret = -1;
- if (!sh->success_count) {
- op_errno = afr_resultant_errno_get (NULL, sh->child_errno,
- priv->child_count);
- gf_log (this->name, GF_LOG_ERROR, "Failed to lookup %s, "
- "reason %s", sh->lookup_loc.path,
- strerror (op_errno));
- goto done;
- }
-
- if ((sh->lookup_flags & AFR_LOOKUP_FAIL_CONFLICTS) &&
- (afr_conflicting_iattrs (sh->buf, sh->success_children,
- priv->child_count,
- sh->lookup_loc.path, this->name))) {
- op_errno = EIO;
- gf_log (this->name, GF_LOG_ERROR, "Conflicting entries "
- "for %s", sh->lookup_loc.path);
- goto done;
- }
-
- if ((sh->lookup_flags & AFR_LOOKUP_FAIL_MISSING_GFIDS) &&
- (afr_gfid_missing_count (this->name, sh->success_children,
- sh->buf, priv->child_count,
- sh->lookup_loc.path))) {
- op_errno = ENODATA;
- gf_log (this->name, GF_LOG_ERROR, "Missing Gfids "
- "for %s", sh->lookup_loc.path);
- goto done;
- }
- op_ret = 0;
-
-done:
- sh->lookup_done (frame, this, op_ret, op_errno);
-out:
- return 0;
-}
int
-afr_sh_remove_entry_cbk (call_frame_t *frame, xlator_t *this, int child,
- int32_t op_ret, int32_t op_errno)
+afr_selfheal_entrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, const char *name, unsigned char *locked_on)
{
- int call_count = 0;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
-
- GF_ASSERT (sh->post_remove_call);
- if ((op_ret == -1) && (op_errno != ENOENT)) {
- gf_log (this->name, GF_LOG_ERROR,
- "purge entry %s failed, on child %d reason, %s",
- local->loc.path, child, strerror (op_errno));
- LOCK (&frame->lock);
- {
- afr_sh_set_error (sh, EIO);
- sh->op_failed = 1;
- }
- UNLOCK (&frame->lock);
- }
- call_count = afr_frame_return (frame);
- if (call_count == 0)
- sh->post_remove_call (frame, this);
- return 0;
-}
+ loc_t loc = {0,};
+ afr_local_t *local = NULL;
+ int i = 0;
+ afr_private_t *priv = NULL;
-void
-afr_sh_call_entry_expunge_remove (call_frame_t *frame, xlator_t *this,
- int child_index, struct iatt *buf,
- afr_expunge_done_cbk_t expunge_done)
-{
- call_frame_t *expunge_frame = NULL;
- afr_local_t *local = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- int32_t op_errno = 0;
-
- expunge_frame = copy_frame (frame);
- if (!expunge_frame) {
- goto out;
- }
-
- ALLOC_OR_GOTO (expunge_local, afr_local_t, out);
-
- local = frame->local;
- sh = &local->self_heal;
- expunge_frame->local = expunge_local;
- expunge_sh = &expunge_local->self_heal;
- expunge_sh->sh_frame = frame;
- loc_copy (&expunge_local->loc, &local->loc);
- sh->expunge_done = expunge_done;
- afr_sh_entry_expunge_remove (expunge_frame, this, child_index, buf);
- return;
-out:
- gf_log (this->name, GF_LOG_ERROR, "Expunge of %s failed, reason: %s",
- local->loc.path, strerror (op_errno));
- expunge_done (frame, this, child_index, -1, op_errno);
-}
+ priv = this->private;
+ local = frame->local;
-void
-afr_sh_remove_stale_lookup_info (afr_self_heal_t *sh, int32_t *success_children,
- int32_t *fresh_children,
- unsigned int child_count)
-{
- int i = 0;
-
- for (i = 0; i < child_count; i++) {
- if (afr_is_child_present (success_children, child_count, i) &&
- !afr_is_child_present (fresh_children, child_count, i)) {
- sh->child_errno[i] = ENOENT;
- GF_ASSERT (sh->xattr[i]);
- dict_unref (sh->xattr[i]);
- sh->xattr[i] = NULL;
- }
- }
-}
+ loc.inode = inode_ref (inode);
+ uuid_copy (loc.gfid, inode->gfid);
-int
-afr_sh_purge_stale_entries_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- if (sh->op_failed) {
- afr_sh_missing_entries_finish (frame, this);
- } else {
- if (afr_gfid_missing_count (this->name, sh->fresh_children,
- sh->buf, priv->child_count,
- local->loc.path)) {
- afr_sh_common_lookup (frame, this, &local->loc,
- afr_sh_missing_entries_lookup_done,
- sh->sh_gfid_req,
- AFR_LOOKUP_FAIL_CONFLICTS|
- AFR_LOOKUP_FAIL_MISSING_GFIDS);
- } else {
- //No need to set gfid so goto missing entries lookup done
- //Behave as if you have done the lookup
- afr_sh_remove_stale_lookup_info (sh,
- sh->success_children,
- sh->fresh_children,
- priv->child_count);
- afr_children_copy (sh->success_children,
- sh->fresh_children,
- priv->child_count);
- afr_sh_missing_entries_lookup_done (frame, this, 0, 0);
- }
- }
- return 0;
-}
+ AFR_ONALL (frame, afr_selfheal_lock_cbk, entrylk, dom, &loc,
+ name, ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL);
-gf_boolean_t
-afr_sh_purge_entry_condition (afr_local_t *local, afr_private_t *priv,
- int child)
-{
- afr_self_heal_t *sh = NULL;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].op_ret == -1 &&
+ local->replies[i].op_errno == EAGAIN) {
+ afr_selfheal_locked_fill (frame, this, locked_on);
+ afr_selfheal_unentrylk (frame, this, inode, dom, name,
+ locked_on);
- sh = &local->self_heal;
+ AFR_SEQ (frame, afr_selfheal_lock_cbk, entrylk, dom,
+ &loc, name, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
+ break;
+ }
+ }
- if (local->child_up[child] &&
- (!afr_is_child_present (sh->fresh_parent_dirs, priv->child_count,
- child))
- && (sh->child_errno[child] != ENOENT))
- return _gf_true;
+ loc_wipe (&loc);
- return _gf_false;
+ return afr_selfheal_locked_fill (frame, this, locked_on);
}
-gf_boolean_t
-afr_sh_purge_stale_entry_condition (afr_local_t *local, afr_private_t *priv,
- int child)
+
+int
+afr_selfheal_unentrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, const char *name, unsigned char *locked_on)
{
- afr_self_heal_t *sh = NULL;
+ loc_t loc = {0,};
- sh = &local->self_heal;
+ loc.inode = inode_ref (inode);
+ uuid_copy (loc.gfid, inode->gfid);
- if (local->child_up[child] &&
- (!afr_is_child_present (sh->fresh_children, priv->child_count,
- child))
- && (sh->child_errno[child] != ENOENT))
- return _gf_true;
+ AFR_ONLIST (locked_on, frame, afr_selfheal_lock_cbk, entrylk,
+ dom, &loc, name, ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL);
- return _gf_false;
-}
+ loc_wipe (&loc);
-void
-afr_sh_purge_entry_common (call_frame_t *frame, xlator_t *this,
- gf_boolean_t purge_condition (afr_local_t *local,
- afr_private_t *priv,
- int child))
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_self_heal_t *sh = NULL;
- int i = 0;
- int call_count = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- if (purge_condition (local, priv, i))
- call_count++;
- }
-
- if (call_count == 0) {
- sh->post_remove_call (frame, this);
- goto out;
- }
-
- local->call_count = call_count;
- for (i = 0; i < priv->child_count; i++) {
- if (!purge_condition (local, priv, i))
- continue;
- gf_log (this->name, GF_LOG_INFO, "purging the stale entry %s "
- "on %d", local->loc.path, i);
- afr_sh_call_entry_expunge_remove (frame, this,
- (long) i, &sh->buf[i],
- afr_sh_remove_entry_cbk);
- }
-out:
- return;
+ return 0;
}
-void
-afr_sh_purge_entry (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
- sh->post_remove_call = afr_sh_missing_entries_finish;
- afr_sh_purge_entry_common (frame, this, afr_sh_purge_entry_condition);
-}
-
-void
-afr_sh_purge_stale_entry (call_frame_t *frame, xlator_t *this)
+gf_boolean_t
+afr_is_pending_set (xlator_t *this, dict_t *xdata, int type)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
+ int idx = -1;
+ afr_private_t *priv = NULL;
+ void *pending_raw = NULL;
+ int *pending_int = NULL;
+ int i = 0;
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ priv = this->private;
+ idx = afr_index_for_transaction_type (type);
- sh->post_remove_call = afr_sh_purge_stale_entries_done;
+ if (dict_get_ptr (xdata, AFR_DIRTY, &pending_raw) == 0) {
+ if (pending_raw) {
+ pending_int = pending_raw;
- for (i = 0; i < priv->child_count; i++) {
- if (afr_is_child_present (sh->fresh_children,
- priv->child_count, i))
- continue;
+ if (ntoh32 (pending_int[idx]))
+ return _gf_true;
+ }
+ }
- if ((!local->child_up[i]) || sh->child_errno[i] != 0)
- continue;
+ for (i = 0; i < priv->child_count; i++) {
+ if (dict_get_ptr (xdata, priv->pending_key[i],
+ &pending_raw))
+ continue;
+ if (!pending_raw)
+ continue;
+ pending_int = pending_raw;
- GF_ASSERT (!uuid_is_null (sh->entrybuf.ia_gfid) ||
- uuid_is_null (sh->buf[i].ia_gfid));
+ if (ntoh32 (pending_int[idx]))
+ return _gf_true;
+ }
- if ((sh->entrybuf.ia_type != sh->buf[i].ia_type) ||
- (uuid_compare (sh->buf[i].ia_gfid,
- sh->entrybuf.ia_gfid)))
- continue;
-
- afr_children_add_child (sh->fresh_children, i,
- priv->child_count);
-
- }
- afr_sh_purge_entry_common (frame, this,
- afr_sh_purge_stale_entry_condition);
+ return _gf_false;
}
-void
-afr_sh_save_child_iatts_from_policy (int32_t *children, struct iatt *bufs,
- struct iatt *save,
- unsigned int child_count)
-{
- int i = 0;
- int child = 0;
- gf_boolean_t saved = _gf_false;
-
- GF_ASSERT (save);
- //if iatt buf with gfid exists sets it
- for (i = 0; i < child_count; i++) {
- child = children[i];
- if (child == -1)
- break;
- *save = bufs[child];
- saved = _gf_true;
- if (!uuid_is_null (save->ia_gfid))
- break;
- }
- GF_ASSERT (saved);
-}
-void
-afr_get_children_of_fresh_parent_dirs (afr_self_heal_t *sh,
- unsigned int child_count)
+gf_boolean_t
+afr_is_data_set (xlator_t *this, dict_t *xdata)
{
- afr_children_intersection_get (sh->success_children,
- sh->fresh_parent_dirs,
- sh->sources, child_count);
- afr_get_fresh_children (sh->success_children, sh->sources,
- sh->fresh_children, child_count);
- memset (sh->sources, 0, sizeof (*sh->sources) * child_count);
+ return afr_is_pending_set (this, xdata, AFR_DATA_TRANSACTION);
}
-void
-afr_sh_children_lookup_done (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+gf_boolean_t
+afr_is_metadata_set (xlator_t *this, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int32_t fresh_child_enoents = 0;
- int32_t fresh_parent_count = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- if (op_ret < 0)
- goto fail;
- afr_get_children_of_fresh_parent_dirs (sh, priv->child_count);
- fresh_parent_count = afr_get_children_count (sh->fresh_parent_dirs,
- priv->child_count);
- //we need the enoent count of the subvols present in fresh_parent_dirs
- fresh_child_enoents = afr_errno_count (sh->fresh_parent_dirs,
- sh->child_errno,
- priv->child_count, ENOENT);
- if (fresh_child_enoents == fresh_parent_count) {
- gf_log (this->name, GF_LOG_INFO, "Deleting stale file %s",
- local->loc.path);
- afr_sh_set_error (sh, ENOENT);
- sh->op_failed = 1;
- afr_sh_purge_entry (frame, this);
- } else if (!afr_conflicting_iattrs (sh->buf, sh->fresh_children,
- priv->child_count, local->loc.path,
- this->name)) {
- afr_sh_save_child_iatts_from_policy (sh->fresh_children,
- sh->buf, &sh->entrybuf,
- priv->child_count);
- afr_update_gfid_from_iatts (sh->sh_gfid_req, sh->buf,
- sh->fresh_children,
- priv->child_count);
- afr_sh_purge_stale_entry (frame, this);
- } else {
- op_errno = EIO;
- local->govinda_gOvinda = 1;
- goto fail;
- }
-
- return;
-
-fail:
- sh->op_failed = 1;
- afr_sh_set_error (sh, op_errno);
- afr_sh_missing_entries_finish (frame, this);
- return;
+ return afr_is_pending_set (this, xdata, AFR_METADATA_TRANSACTION);
}
-static void
-afr_sh_find_fresh_parents (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+gf_boolean_t
+afr_is_entry_set (xlator_t *this, dict_t *xdata)
{
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int enoent_count = 0;
- int nsources = 0;
- int source = -1;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- /* If We can't find a fresh parent directory here,
- * we wont know which subvol is correct without finding a parent dir
- * upwards which has correct xattrs, for that we may have to
- * do lookups till root, we dont wanna do that,
- * instead make sure that if there are conflicting gfid
- * parent dirs, self-heal thus lookup is failed with EIO.
- * if there are missing entries we dont know whether to delete or
- * create so fail with EIO,
- * If there are conflicting xattr fail with EIO.
- */
- if (op_ret < 0)
- goto out;
- enoent_count = afr_errno_count (NULL, sh->child_errno,
- priv->child_count, ENOENT);
- if (enoent_count > 0) {
- gf_log (this->name, GF_LOG_ERROR, "Parent dir missing for %s,"
- " in missing entry self-heal, aborting self-heal",
- local->loc.path);
- goto out;
- }
-
- nsources = afr_build_sources (this, sh->xattr, sh->buf,
- sh->pending_matrix, sh->sources,
- sh->success_children,
- AFR_ENTRY_TRANSACTION);
- if (nsources < 0) {
- gf_log (this->name, GF_LOG_ERROR, "No sources for dir of %s,"
- " in missing entry self-heal, aborting self-heal",
- local->loc.path);
- goto out;
- }
-
- source = afr_sh_select_source (sh->sources, priv->child_count);
- if (source == -1) {
- GF_ASSERT (0);
- gf_log (this->name, GF_LOG_DEBUG, "No active sources found.");
- goto out;
- }
- afr_get_fresh_children (sh->success_children, sh->sources,
- sh->fresh_parent_dirs, priv->child_count);
- afr_sh_common_lookup (frame, this, &local->loc,
- afr_sh_children_lookup_done, NULL, 0);
- return;
-
-out:
- afr_sh_set_error (sh, EIO);
- sh->op_failed = 1;
- afr_sh_missing_entries_finish (frame, this);
- return;
+ return afr_is_pending_set (this, xdata, AFR_ENTRY_TRANSACTION);
}
+
void
-afr_sh_common_reset (afr_self_heal_t *sh, unsigned int child_count)
+afr_inode_link (inode_t *inode, struct iatt *iatt)
{
- int i = 0;
-
- for (i = 0; i < child_count; i++) {
- memset (&sh->buf[i], 0, sizeof (sh->buf[i]));
- memset (&sh->parentbufs[i], 0, sizeof (sh->parentbufs[i]));
- sh->child_errno[i] = 0;
- }
- memset (&sh->parentbuf, 0, sizeof (sh->parentbuf));
- sh->success_count = 0;
- afr_reset_children (sh->success_children, child_count);
- afr_reset_children (sh->fresh_children, child_count);
- afr_reset_xattr (sh->xattr, child_count);
- loc_wipe (&sh->lookup_loc);
-}
+ inode_t *linked_inode = NULL;
-/* afr self-heal state will be lost if this call is made
- * please check the afr_sh_common_reset that is called in this function
- */
-int
-afr_sh_common_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
- afr_lookup_done_cbk_t lookup_done , uuid_t gfid,
- int32_t flags)
-{
- afr_local_t *local = NULL;
- int i = 0;
- int call_count = 0;
- afr_private_t *priv = NULL;
- dict_t *xattr_req = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- priv = this->private;
- sh = &local->self_heal;
-
- call_count = afr_up_children_count (local->child_up, priv->child_count);
-
- local->call_count = call_count;
-
- xattr_req = dict_new();
-
- if (xattr_req) {
- afr_xattr_req_prepare (this, xattr_req, loc->path);
- if (gfid) {
- gf_log (this->name, GF_LOG_DEBUG,
- "looking up %s with gfid: %s",
- loc->path, uuid_utoa (gfid));
- GF_ASSERT (!uuid_is_null (gfid));
- afr_set_dict_gfid (xattr_req, gfid);
- }
- }
-
- afr_sh_common_reset (sh, priv->child_count);
- sh->lookup_done = lookup_done;
- loc_copy (&sh->lookup_loc, loc);
- sh->lookup_flags = flags;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- gf_log (this->name, GF_LOG_DEBUG,
- "looking up %s on subvolume %s",
- loc->path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame,
- afr_sh_common_lookup_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->lookup,
- loc, xattr_req);
-
- if (!--call_count)
- break;
- }
- }
-
- if (xattr_req)
- dict_unref (xattr_req);
-
- return 0;
+ linked_inode = inode_link (inode, NULL, NULL, iatt);
+
+ uuid_copy (inode->gfid, iatt->ia_gfid);
+ inode->ia_type = iatt->ia_type;
+
+ if (linked_inode) {
+ inode_lookup (linked_inode);
+ inode_unref (linked_inode);
+ }
}
+/*
+ * This function inspects the looked up replies (in an unlocked manner)
+ * and decides whether a locked verification and possible healing is
+ * required or not. It updates the three booleans for each type
+ * of healing. If the boolean flag gets set to FALSE, then we are sure
+ * no healing is required. If the boolean flag gets set to TRUE then
+ * we have to proceed with locked reinspection.
+ */
int
-afr_sh_post_nb_entrylk_conflicting_sh_cbk (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- sh = &local->self_heal;
-
- if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "Non blocking entrylks failed.");
- afr_sh_missing_entries_done (frame, this);
- } else {
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Non blocking entrylks done. Proceeding to FOP");
- afr_sh_common_lookup (frame, this, &sh->parent_loc,
- afr_sh_find_fresh_parents,
- NULL, AFR_LOOKUP_FAIL_CONFLICTS);
- }
-
- return 0;
-}
+afr_selfheal_unlocked_inspect (call_frame_t *frame, xlator_t *this,
+ inode_t *inode, uuid_t gfid,
+ gf_boolean_t *data_selfheal,
+ gf_boolean_t *metadata_selfheal,
+ gf_boolean_t *entry_selfheal)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int valid_cnt = 0;
+ struct iatt first = {0, };
+ struct afr_reply *replies = NULL;
+ int ret = -1;
+
+ priv = this->private;
+
+ replies = alloca0 (sizeof (*replies) * priv->child_count);
+
+ ret = afr_selfheal_unlocked_discover (frame, inode, gfid, replies);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+ if (replies[i].op_ret == -1)
+ continue;
+
+ if (afr_is_data_set (this, replies[i].xdata))
+ *data_selfheal = _gf_true;
+
+ if (afr_is_metadata_set (this, replies[i].xdata))
+ *metadata_selfheal = _gf_true;
-int
-afr_sh_post_nb_entrylk_gfid_sh_cbk (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
- int_lock = &local->internal_lock;
-
- if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "Non blocking entrylks failed.");
- afr_sh_missing_entries_done (frame, this);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "Non blocking entrylks done. Proceeding to FOP");
- afr_sh_common_lookup (frame, this, &local->loc,
- afr_sh_missing_entries_lookup_done,
- sh->sh_gfid_req, AFR_LOOKUP_FAIL_CONFLICTS|
- AFR_LOOKUP_FAIL_MISSING_GFIDS);
- }
-
- return 0;
-}
+ if (afr_is_entry_set (this, replies[i].xdata))
+ *entry_selfheal = _gf_true;
-int
-afr_sh_entrylk (call_frame_t *frame, xlator_t *this, loc_t *loc,
- char *base_name, afr_lock_cbk_t lock_cbk)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
+ valid_cnt ++;
+ if (valid_cnt == 1) {
+ first = replies[i].poststat;
+ continue;
+ }
- local = frame->local;
- int_lock = &local->internal_lock;
+ if (!IA_EQUAL (first, replies[i].poststat, type)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "TYPE mismatch %d vs %d on %s for gfid:%s",
+ (int) first.ia_type,
+ (int) replies[i].poststat.ia_type,
+ priv->children[i]->name,
+ uuid_utoa (replies[i].poststat.ia_gfid));
+ return -EIO;
+ }
- int_lock->transaction_lk_type = AFR_SELFHEAL_LK;
- int_lock->selfheal_lk_type = AFR_ENTRY_SELF_HEAL_LK;
+ if (!IA_EQUAL (first, replies[i].poststat, uid)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "UID mismatch %d vs %d on %s for gfid:%s",
+ (int) first.ia_uid,
+ (int) replies[i].poststat.ia_uid,
+ priv->children[i]->name,
+ uuid_utoa (replies[i].poststat.ia_gfid));
- afr_set_lock_number (frame, this);
+ *metadata_selfheal = _gf_true;
+ }
- int_lock->lk_basename = base_name;
- int_lock->lk_loc = loc;
- int_lock->lock_cbk = lock_cbk;
+ if (!IA_EQUAL (first, replies[i].poststat, gid)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "GID mismatch %d vs %d on %s for gfid:%s",
+ (int) first.ia_uid,
+ (int) replies[i].poststat.ia_uid,
+ priv->children[i]->name,
+ uuid_utoa (replies[i].poststat.ia_gfid));
- afr_nonblocking_entrylk (frame, this);
+ *metadata_selfheal = _gf_true;
+ }
- return 0;
-}
+ if (!IA_EQUAL (first, replies[i].poststat, prot)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "MODE mismatch %d vs %d on %s for gfid:%s",
+ (int) st_mode_from_ia (first.ia_prot, 0),
+ (int) st_mode_from_ia (replies[i].poststat.ia_prot, 0),
+ priv->children[i]->name,
+ uuid_utoa (replies[i].poststat.ia_gfid));
-static int
-afr_self_heal_parent_entrylk (call_frame_t *frame, xlator_t *this,
- afr_lock_cbk_t lock_cbk)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
+ *metadata_selfheal = _gf_true;
+ }
- local = frame->local;
- sh = &local->self_heal;
+ if (IA_ISREG(first.ia_type) &&
+ !IA_EQUAL (first, replies[i].poststat, size)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "SIZE mismatch %lld vs %lld on %s for gfid:%s",
+ (long long) first.ia_size,
+ (long long) replies[i].poststat.ia_size,
+ priv->children[i]->name,
+ uuid_utoa (replies[i].poststat.ia_gfid));
- gf_log (this->name, GF_LOG_TRACE,
- "attempting to recreate missing entries for path=%s",
- local->loc.path);
+ *data_selfheal = _gf_true;
+ }
+ }
- GF_ASSERT (local->loc.parent);
- afr_build_parent_loc (&sh->parent_loc, &local->loc);
+ if (valid_cnt > 0)
+ afr_inode_link (inode, &first);
- afr_sh_entrylk (frame, this, &sh->parent_loc, NULL,
- lock_cbk);
- return 0;
-}
+ if (valid_cnt < 2)
+ return -ENOTCONN;
-static int
-afr_self_heal_conflicting_entries (call_frame_t *frame, xlator_t *this)
-{
- afr_self_heal_parent_entrylk (frame, this,
- afr_sh_post_nb_entrylk_conflicting_sh_cbk);
- return 0;
+ return 0;
}
-static int
-afr_self_heal_gfids (call_frame_t *frame, xlator_t *this)
-{
- afr_self_heal_parent_entrylk (frame, this,
- afr_sh_post_nb_entrylk_gfid_sh_cbk);
- return 0;
-}
-afr_local_t *afr_local_copy (afr_local_t *l, xlator_t *this)
+inode_t *
+afr_inode_find (xlator_t *this, uuid_t gfid)
{
- afr_private_t *priv = NULL;
- afr_local_t *lc = NULL;
- afr_self_heal_t *sh = NULL;
- afr_self_heal_t *shc = NULL;
-
- priv = this->private;
-
- sh = &l->self_heal;
-
- lc = GF_CALLOC (1, sizeof (afr_local_t),
- gf_afr_mt_afr_local_t);
- if (!lc)
- goto out;
-
- shc = &lc->self_heal;
-
- shc->unwind = sh->unwind;
- shc->gfid_sh_success_cbk = sh->gfid_sh_success_cbk;
- shc->do_missing_entry_self_heal = sh->do_missing_entry_self_heal;
- shc->do_gfid_self_heal = sh->do_gfid_self_heal;
- shc->do_data_self_heal = sh->do_data_self_heal;
- shc->do_metadata_self_heal = sh->do_metadata_self_heal;
- shc->do_entry_self_heal = sh->do_entry_self_heal;
- shc->forced_merge = sh->forced_merge;
- shc->background = sh->background;
- shc->type = sh->type;
-
- uuid_copy (shc->sh_gfid_req, sh->sh_gfid_req);
- if (l->loc.path)
- loc_copy (&lc->loc, &l->loc);
-
- lc->child_up = memdup (l->child_up,
- sizeof (*lc->child_up) * priv->child_count);
- if (l->xattr_req)
- lc->xattr_req = dict_ref (l->xattr_req);
-
- if (l->cont.lookup.inode)
- lc->cont.lookup.inode = inode_ref (l->cont.lookup.inode);
- if (l->cont.lookup.xattr)
- lc->cont.lookup.xattr = dict_ref (l->cont.lookup.xattr);
- if (l->internal_lock.inode_locked_nodes)
- lc->internal_lock.inode_locked_nodes =
- memdup (l->internal_lock.inode_locked_nodes,
- sizeof (*lc->internal_lock.inode_locked_nodes) * priv->child_count);
- else
- lc->internal_lock.inode_locked_nodes =
- GF_CALLOC (sizeof (*l->internal_lock.inode_locked_nodes),
- priv->child_count,
- gf_afr_mt_char);
- if (l->internal_lock.entry_locked_nodes)
- lc->internal_lock.entry_locked_nodes =
- memdup (l->internal_lock.entry_locked_nodes,
- sizeof (*lc->internal_lock.entry_locked_nodes) * priv->child_count);
- else
- lc->internal_lock.entry_locked_nodes =
- GF_CALLOC (sizeof (*l->internal_lock.entry_locked_nodes),
- priv->child_count,
- gf_afr_mt_char);
- if (l->internal_lock.locked_nodes)
- lc->internal_lock.locked_nodes =
- memdup (l->internal_lock.locked_nodes,
- sizeof (*lc->internal_lock.locked_nodes) * priv->child_count);
- else
- lc->internal_lock.locked_nodes =
- GF_CALLOC (sizeof (*l->internal_lock.locked_nodes),
- priv->child_count,
- gf_afr_mt_char);
-
- lc->internal_lock.inodelk_lock_count =
- l->internal_lock.inodelk_lock_count;
- lc->internal_lock.entrylk_lock_count =
- l->internal_lock.entrylk_lock_count;
+ inode_table_t *table = NULL;
+ inode_t *inode = NULL;
-out:
- return lc;
-}
+ table = this->itable;
+ if (!table)
+ return NULL;
-int
-afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
- char sh_type_str[256] = {0,};
- gf_boolean_t split_brain = _gf_false;
-
- priv = this->private;
- local = bgsh_frame->local;
- sh = &local->self_heal;
-
- if (local->govinda_gOvinda)
- split_brain = _gf_true;
-
- afr_set_split_brain (this, sh->inode, split_brain);
-
- afr_self_heal_type_str_get (sh, sh_type_str,
- sizeof(sh_type_str));
- if (sh->op_failed) {
- gf_log (this->name, GF_LOG_ERROR, "background %s self-heal "
- "failed on %s", sh_type_str, local->loc.path);
- } else {
- gf_log (this->name, GF_LOG_INFO, "background %s self-heal "
- "completed on %s", sh_type_str, local->loc.path);
- }
-
- FRAME_SU_UNDO (bgsh_frame, afr_local_t);
-
- if (!sh->unwound && sh->unwind) {
- sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno);
- }
-
- if (sh->background) {
- LOCK (&priv->lock);
- {
- priv->background_self_heals_started--;
- }
- UNLOCK (&priv->lock);
- }
-
- AFR_STACK_DESTROY (bgsh_frame);
-
- return 0;
-}
+ inode = inode_find (table, gfid);
+ if (inode)
+ return inode;
-int
-afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- int32_t op_errno = 0;
- int ret = 0;
- afr_self_heal_t *orig_sh = NULL;
-
- call_frame_t *sh_frame = NULL;
- afr_local_t *sh_local = NULL;
-
- local = frame->local;
- orig_sh = &local->self_heal;
- priv = this->private;
-
- GF_ASSERT (local->loc.path);
-
- gf_log (this->name, GF_LOG_TRACE,
- "performing self heal on %s (metadata=%d data=%d entry=%d)",
- local->loc.path,
- local->self_heal.do_metadata_self_heal,
- local->self_heal.do_data_self_heal,
- local->self_heal.do_entry_self_heal);
-
- op_errno = ENOMEM;
- sh_frame = copy_frame (frame);
- if (!sh_frame)
- goto out;
- afr_set_lk_owner (sh_frame, this);
- afr_set_low_priority (sh_frame);
-
- sh_local = afr_local_copy (local, this);
- if (!sh_local)
- goto out;
- sh_frame->local = sh_local;
- sh = &sh_local->self_heal;
-
- sh->inode = inode_ref (inode);
-
- sh->orig_frame = frame;
-
- sh->completion_cbk = afr_self_heal_completion_cbk;
-
- sh->success = GF_CALLOC (priv->child_count, sizeof (*sh->success),
- gf_afr_mt_char);
- if (!sh->success)
- goto out;
- sh->sources = GF_CALLOC (sizeof (*sh->sources), priv->child_count,
- gf_afr_mt_int);
- if (!sh->sources)
- goto out;
- sh->locked_nodes = GF_CALLOC (sizeof (*sh->locked_nodes),
- priv->child_count,
- gf_afr_mt_int);
- if (!sh->locked_nodes)
- goto out;
-
- sh->pending_matrix = GF_CALLOC (sizeof (int32_t *), priv->child_count,
- gf_afr_mt_int32_t);
- if (!sh->pending_matrix)
- goto out;
-
- for (i = 0; i < priv->child_count; i++) {
- sh->pending_matrix[i] = GF_CALLOC (sizeof (int32_t),
- priv->child_count,
- gf_afr_mt_int32_t);
- if (!sh->pending_matrix[i])
- goto out;
- }
-
- sh->delta_matrix = GF_CALLOC (sizeof (int32_t *), priv->child_count,
- gf_afr_mt_int32_t);
- if (!sh->delta_matrix)
- goto out;
- for (i = 0; i < priv->child_count; i++) {
- sh->delta_matrix[i] = GF_CALLOC (sizeof (int32_t),
- priv->child_count,
- gf_afr_mt_int32_t);
- if (!sh->delta_matrix)
- goto out;
- }
- sh->fresh_parent_dirs = afr_children_create (priv->child_count);
- if (!sh->fresh_parent_dirs)
- goto out;
- ret = afr_sh_common_create (sh, priv->child_count);
- if (ret) {
- op_errno = -ret;
- goto out;
- }
-
- if (local->self_heal.background) {
- LOCK (&priv->lock);
- {
- if (priv->background_self_heals_started
- < priv->background_self_heal_count) {
- priv->background_self_heals_started++;
-
-
- } else {
- local->self_heal.background = _gf_false;
- }
- }
- UNLOCK (&priv->lock);
- }
-
- FRAME_SU_DO (sh_frame, afr_local_t);
- if (sh->do_missing_entry_self_heal) {
- afr_self_heal_conflicting_entries (sh_frame, this);
- } else if (sh->do_gfid_self_heal) {
- GF_ASSERT (!uuid_is_null (sh->sh_gfid_req));
- afr_self_heal_gfids (sh_frame, this);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "proceeding to metadata check on %s",
- local->loc.path);
-
- afr_sh_missing_entries_done (sh_frame, this);
- }
- op_errno = 0;
+ inode = inode_new (table);
+ if (!inode)
+ return NULL;
-out:
- if (op_errno) {
- orig_sh->unwind (frame, this, -1, op_errno);
- }
- return 0;
+ uuid_copy (inode->gfid, gfid);
+
+ return inode;
}
-void
-afr_self_heal_type_str_get (afr_self_heal_t *self_heal_p, char *str,
- size_t size)
+
+call_frame_t *
+afr_frame_create (xlator_t *this)
{
- GF_ASSERT (str && (size > strlen (" missing-entry gfid "
- "meta-data data entry")));
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ int op_errno = 0;
+ pid_t pid = -1;
- if (self_heal_p->do_metadata_self_heal) {
- snprintf (str, size, " meta-data");
- }
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ return NULL;
- if (self_heal_p->do_data_self_heal) {
- snprintf (str + strlen(str), size - strlen(str), " data");
- }
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local) {
+ STACK_DESTROY (frame->root);
+ return NULL;
+ }
- if (self_heal_p->do_entry_self_heal) {
- snprintf (str + strlen(str), size - strlen(str), " entry");
- }
+ syncopctx_setfspid (&pid);
- if (self_heal_p->do_missing_entry_self_heal) {
- snprintf (str + strlen(str), size - strlen(str),
- " missing-entry");
- }
+ frame->root->pid = pid;
- if (self_heal_p->do_gfid_self_heal) {
- snprintf (str + strlen(str), size - strlen(str), " gfid");
- }
-}
+ afr_set_lk_owner (frame, this, frame->root);
-afr_self_heal_type
-afr_self_heal_type_for_transaction (afr_transaction_type type)
-{
- afr_self_heal_type sh_type = AFR_SELF_HEAL_INVALID;
-
- switch (type) {
- case AFR_DATA_TRANSACTION:
- sh_type = AFR_SELF_HEAL_DATA;
- break;
- case AFR_METADATA_TRANSACTION:
- sh_type = AFR_SELF_HEAL_METADATA;
- break;
- case AFR_ENTRY_TRANSACTION:
- sh_type = AFR_SELF_HEAL_ENTRY;
- break;
- case AFR_ENTRY_RENAME_TRANSACTION:
- GF_ASSERT (0);
- break;
- }
- return sh_type;
+ return frame;
}
+
+/*
+ * This is the entry point for healing a given GFID
+ */
+
int
-afr_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name)
+afr_selfheal (xlator_t *this, uuid_t gfid)
{
- int ret = -1;
-
- if (!child) {
- goto out;
- }
+ inode_t *inode = NULL;
+ call_frame_t *frame = NULL;
+ int ret = -1;
+ gf_boolean_t data_selfheal = _gf_false;
+ gf_boolean_t metadata_selfheal = _gf_false;
+ gf_boolean_t entry_selfheal = _gf_false;
- if (strcmp (parent->path, "/") == 0)
- ret = gf_asprintf ((char **)&child->path, "/%s", name);
- else
- ret = gf_asprintf ((char **)&child->path, "%s/%s", parent->path,
- name);
+ inode = afr_inode_find (this, gfid);
+ if (!inode)
+ goto out;
- if (-1 == ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "asprintf failed while setting child path");
- }
+ frame = afr_frame_create (this);
+ if (!frame)
+ goto out;
- if (!child->path) {
- goto out;
- }
+ ret = afr_selfheal_unlocked_inspect (frame, this, inode, gfid,
+ &data_selfheal,
+ &metadata_selfheal,
+ &entry_selfheal);
+ if (ret)
+ goto out;
- child->name = strrchr (child->path, '/');
- if (child->name)
- child->name++;
+ if (data_selfheal)
+ afr_selfheal_data (frame, this, inode);
- child->parent = inode_ref (parent->inode);
- child->inode = inode_new (parent->inode->table);
+ if (metadata_selfheal)
+ afr_selfheal_metadata (frame, this, inode);
- if (!child->inode) {
- ret = -1;
- goto out;
- }
+ if (entry_selfheal)
+ afr_selfheal_entry (frame, this, inode);
- ret = 0;
+ inode_forget (inode, 1);
out:
- if (ret == -1)
- loc_wipe (child);
+ if (inode)
+ inode_unref (inode);
+ if (frame)
+ AFR_STACK_DESTROY (frame);
- return ret;
+ return ret;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.h b/xlators/cluster/afr/src/afr-self-heal-common.h
deleted file mode 100644
index bc0dcd78c..000000000
--- a/xlators/cluster/afr/src/afr-self-heal-common.h
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef __AFR_SELF_HEAL_COMMON_H__
-#define __AFR_SELF_HEAL_COMMON_H__
-
-#define FILE_HAS_HOLES(buf) (((buf)->ia_size) > ((buf)->ia_blocks * 512))
-
-typedef enum {
- AFR_SELF_HEAL_ENTRY,
- AFR_SELF_HEAL_METADATA,
- AFR_SELF_HEAL_DATA,
- AFR_SELF_HEAL_INVALID = -1,
-} afr_self_heal_type;
-
-typedef enum {
- AFR_LOOKUP_FAIL_CONFLICTS = 1,
- AFR_LOOKUP_FAIL_MISSING_GFIDS = 2,
-} afr_lookup_flags_t;
-
-int
-afr_sh_select_source (int sources[], int child_count);
-
-int
-afr_sh_sink_count (int sources[], int child_count);
-
-int
-afr_sh_source_count (int sources[], int child_count);
-
-void
-afr_sh_print_pending_matrix (int32_t *pending_matrix[], xlator_t *this);
-
-int
-afr_build_pending_matrix (char **pending_key, int32_t **pending_matrix,
- dict_t *xattr[], afr_transaction_type type,
- size_t child_count);
-
-void
-afr_sh_pending_to_delta (afr_private_t *priv, dict_t **xattr,
- int32_t *delta_matrix[], unsigned char success[],
- int child_count, afr_transaction_type type);
-
-int
-afr_mark_sources (int32_t *sources, int32_t **pending_matrix, struct iatt *bufs,
- int32_t child_count, afr_self_heal_type type,
- int32_t *valid_children, const char *xlator_name);
-
-int
-afr_sh_delta_to_xattr (afr_private_t *priv,
- int32_t *delta_matrix[], dict_t *xattr[],
- int child_count, afr_transaction_type type);
-
-int
-afr_sh_is_matrix_zero (int32_t *pending_matrix[], int child_count);
-
-void
-afr_self_heal_type_str_get (afr_self_heal_t *self_heal_p, char *str,
- size_t size);
-
-afr_self_heal_type
-afr_self_heal_type_for_transaction (afr_transaction_type type);
-
-int
-afr_build_sources (xlator_t *xlator, dict_t **xattr, struct iatt *bufs,
- int32_t **pending_matrix, int32_t *sources,
- int32_t *success_children, afr_transaction_type type);
-void
-afr_sh_common_reset (afr_self_heal_t *sh, unsigned int child_count);
-
-void
-afr_sh_common_lookup_resp_handler (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf,
- dict_t *xattr, struct iatt *postparent,
- loc_t *loc);
-
-int
-afr_sh_common_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
- afr_lookup_done_cbk_t lookup_cbk, uuid_t uuid,
- int32_t flags);
-int
-afr_sh_entry_expunge_remove (call_frame_t *expunge_frame, xlator_t *this,
- int active_src, struct iatt *buf);
-int
-afr_sh_entrylk (call_frame_t *frame, xlator_t *this, loc_t *loc,
- char *base_name, afr_lock_cbk_t lock_cbk);
-int
-afr_sh_entry_impunge_create (call_frame_t *impunge_frame, xlator_t *this,
- int child_index, struct iatt *buf,
- struct iatt *postparent);
-int
-afr_sh_data_unlock (call_frame_t *frame, xlator_t *this,
- afr_lock_cbk_t lock_cbk);
-afr_local_t *
-afr_local_copy (afr_local_t *l, xlator_t *this);
-int
-afr_sh_data_lock (call_frame_t *frame, xlator_t *this,
- off_t start, off_t len,
- afr_lock_cbk_t success_handler,
- afr_lock_cbk_t failure_handler);
-void
-afr_sh_set_error (afr_self_heal_t *sh, int32_t op_errno);
-void
-afr_sh_mark_source_sinks (call_frame_t *frame, xlator_t *this);
-typedef int
-(*afr_fxattrop_cbk_t) (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xattr);
-int
-afr_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name);
-int
-afr_impunge_frame_create (call_frame_t *frame, xlator_t *this,
- int active_source, int ret_child, mode_t entry_mode,
- call_frame_t **impunge_frame);
-#endif /* __AFR_SELF_HEAL_COMMON_H__ */
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index 216017cbb..c0548d995 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -1,1350 +1,635 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#include <libgen.h>
-#include <unistd.h>
-#include <fnmatch.h>
-#include <sys/time.h>
-#include <stdlib.h>
-#include <signal.h>
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
#endif
-#include "glusterfs.h"
#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-#include "byte-order.h"
-
-#include "afr-transaction.h"
#include "afr-self-heal.h"
-#include "afr-self-heal-common.h"
-#include "afr-self-heal-algorithm.h"
-
-
-extern int
-sh_loop_finish (call_frame_t *loop_frame, xlator_t *this);
-
-int
-afr_post_sh_big_lock_success (call_frame_t *frame, xlator_t *this);
-
-int
-afr_post_sh_big_lock_failure (call_frame_t *frame, xlator_t *this);
-
-int
-afr_sh_data_finish (call_frame_t *frame, xlator_t *this);
-
-int
-afr_sh_data_fxattrop (call_frame_t *frame, xlator_t *this,
- afr_fxattrop_cbk_t fxattrop_cbk);
-
-int
-afr_post_sh_data_fxattrop_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xattr);
-
-int
-afr_sh_data_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
-
- sh->completion_cbk (frame, this);
-
- return 0;
-}
-
-
-int
-afr_sh_data_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int child_index = (long) cookie;
-
- local = frame->local;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_INFO,
- "flush failed on %s on subvolume %s: %s",
- local->loc.path, priv->children[child_index]->name,
- strerror (op_errno));
- }
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- afr_sh_data_done (frame, this);
- }
-
- return 0;
-}
-
-int
-afr_sh_data_close (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_self_heal_t *sh = NULL;
- int i = 0;
- int call_count = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- call_count = afr_set_elem_count_get (sh->success,
- priv->child_count);
- local->call_count = call_count;
-
- if (call_count == 0) {
- afr_sh_data_done (frame, this);
- return 0;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (!sh->success[i])
- continue;
- gf_log (this->name, GF_LOG_DEBUG,
- "closing fd of %s on %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_data_flush_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->flush,
- sh->healing_fd);
-
- if (!--call_count)
- break;
- }
-
- return 0;
-}
-
-int
-afr_sh_data_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *statpre,
- struct iatt *statpost)
-{
-
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int child_index = (long) cookie;
-
- local = frame->local;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_INFO,
- "setattr failed on %s on subvolume %s: %s",
- local->loc.path, priv->children[child_index]->name,
- strerror (op_errno));
- }
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- afr_sh_data_finish (frame, this);
- }
-
- return 0;
-}
-
-int
-afr_sh_data_setattr (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_self_heal_t *sh = NULL;
- int i = 0;
- int call_count = 0;
- int source = 0;
- int32_t valid = 0;
- struct iatt stbuf = {0,};
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- source = sh->source;
-
- valid |= (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME);
-
- stbuf.ia_atime = sh->buf[source].ia_atime;
- stbuf.ia_atime_nsec = sh->buf[source].ia_atime_nsec;
- stbuf.ia_mtime = sh->buf[source].ia_mtime;
- stbuf.ia_mtime_nsec = sh->buf[source].ia_mtime_nsec;
-
- call_count = afr_set_elem_count_get (sh->success,
- priv->child_count);
- local->call_count = call_count;
-
- if (call_count == 0) {
- GF_ASSERT (0);
- afr_sh_data_finish (frame, this);
- return 0;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (!sh->success[i])
- continue;
+#include "byte-order.h"
- STACK_WIND_COOKIE (frame, afr_sh_data_setattr_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->setattr,
- &local->loc, &stbuf, valid);
+enum {
+ AFR_SELFHEAL_DATA_FULL = 0,
+ AFR_SELFHEAL_DATA_DIFF,
+};
- if (!--call_count)
- break;
- }
- return 0;
-}
-
-int
-afr_sh_data_setattr_fstat_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iatt *buf)
+#define HAS_HOLES(i) ((i->ia_blocks * 512) < (i->ia_size))
+static int
+__checksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, uint32_t weak, uint8_t *strong,
+ dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int child_index = (long) cookie;
+ afr_local_t *local = NULL;
+ int i = (long) cookie;
- local = frame->local;
- sh = &local->self_heal;
+ local = frame->local;
- GF_ASSERT (sh->source == child_index);
- if (op_ret != -1)
- sh->buf[child_index] = *buf;
- afr_sh_data_setattr (frame, this);
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
+ if (strong)
+ memcpy (local->replies[i].checksum, strong, MD5_DIGEST_LENGTH);
- return 0;
+ syncbarrier_wake (&local->barrier);
+ return 0;
}
-/*
- * If there are any writes after the self-heal is triggered then the
- * stbuf stored in local->self_heal.buf[] will be invalid so we do one more
- * stat on the source and then set the [am]times
- */
-int
-afr_sh_set_timestamps (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- STACK_WIND_COOKIE (frame, afr_sh_data_setattr_fstat_cbk,
- (void *) (long) sh->source,
- priv->children[sh->source],
- priv->children[sh->source]->fops->fstat,
- sh->healing_fd);
- return 0;
-}
-
-//Fun fact, lock_cbk is being used for both lock & unlock
-int
-afr_sh_data_unlock (call_frame_t *frame, xlator_t *this,
- afr_lock_cbk_t lock_cbk)
-{
- afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- sh = &local->self_heal;
- GF_ASSERT (sh->data_lock_held);
-
- sh->data_lock_held = _gf_false;
- int_lock->lock_cbk = lock_cbk;
- afr_unlock (frame, this);
-
- return 0;
-}
-
-int
-afr_sh_data_finish (call_frame_t *frame, xlator_t *this)
+static int
+attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *pre, struct iatt *post,
+ dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "finishing data selfheal of %s", local->loc.path);
-
- if (sh->data_lock_held)
- afr_sh_data_unlock (frame, this, afr_sh_data_close);
- else
- afr_sh_data_close (frame, this);
+ int i = (long) cookie;
+ afr_local_t *local = NULL;
- return 0;
-}
+ local = frame->local;
-int
-afr_sh_data_fail (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
+ if (pre)
+ local->replies[i].prestat = *pre;
+ if (post)
+ local->replies[i].poststat = *post;
+ if (xdata)
+ local->replies[i].xdata = dict_ref (xdata);
- local = frame->local;
- sh = &local->self_heal;
+ syncbarrier_wake (&local->barrier);
- gf_log (this->name, GF_LOG_DEBUG,
- "finishing failed data selfheal of %s", local->loc.path);
-
- sh->op_failed = 1;
- if (sh->data_lock_held)
- afr_sh_data_unlock (frame, this, afr_sh_data_close);
- else
- afr_sh_data_close (frame, this);
- return 0;
+ return 0;
}
-int
-afr_sh_data_erase_pending_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xattr)
-{
- int call_count = 0;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local = frame->local;
- sh = &local->self_heal;
- if (NULL == sh->old_loop_frame) {
- GF_ASSERT (sh->data_lock_held);
- afr_sh_data_fxattrop (frame, this,
- afr_post_sh_data_fxattrop_cbk);
- goto out;
- }
-
- afr_sh_data_lock (frame, this, 0, 0,
- afr_post_sh_big_lock_success,
- afr_post_sh_big_lock_failure);
- }
-out:
- return 0;
-}
-int
-afr_sh_data_erase_pending (call_frame_t *frame, xlator_t *this)
+static gf_boolean_t
+__afr_selfheal_data_checksums_match (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int source,
+ unsigned char *healed_sinks,
+ off_t offset, size_t size)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int i = 0;
- dict_t **erase_xattr = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- afr_sh_pending_to_delta (priv, sh->xattr, sh->delta_matrix, sh->success,
- priv->child_count, AFR_DATA_TRANSACTION);
- gf_log (this->name, GF_LOG_DEBUG, "Delta matrix for: %"PRIu64,
- frame->root->lk_owner);
- afr_sh_print_pending_matrix (sh->delta_matrix, this);
-
- erase_xattr = GF_CALLOC (sizeof (*erase_xattr), priv->child_count,
- gf_afr_mt_dict_t);
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i]) {
- call_count++;
-
- erase_xattr[i] = get_new_dict();
- dict_ref (erase_xattr[i]);
- }
- }
-
- afr_sh_delta_to_xattr (priv, sh->delta_matrix, erase_xattr,
- priv->child_count, AFR_DATA_TRANSACTION);
-
- GF_ASSERT (call_count);
- local->call_count = call_count;
- for (i = 0; i < priv->child_count; i++) {
- if (!erase_xattr[i])
- continue;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "erasing pending flags from %s on %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_data_erase_pending_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- sh->healing_fd,
- GF_XATTROP_ADD_ARRAY, erase_xattr[i]);
- if (!--call_count)
- break;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (erase_xattr[i]) {
- dict_unref (erase_xattr[i]);
- }
- }
- GF_FREE (erase_xattr);
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ unsigned char *wind_subvols = NULL;
+ int i = 0;
- return 0;
-}
+ priv = this->private;
+ local = frame->local;
+ wind_subvols = alloca0 (priv->child_count);
+ for (i = 0; i < priv->child_count; i++) {
+ if (i == source || healed_sinks[i])
+ wind_subvols[i] = 1;
+ }
-static struct afr_sh_algorithm *
-sh_algo_from_name (xlator_t *this, char *name)
-{
- int i = 0;
+ AFR_ONLIST (wind_subvols, frame, __checksum_cbk, rchecksum, fd,
+ offset, size, NULL);
- while (afr_self_heal_algorithms[i].name) {
- if (!strcmp (name, afr_self_heal_algorithms[i].name)) {
- return &afr_self_heal_algorithms[i];
- }
+ if (!local->replies[source].valid || local->replies[source].op_ret != 0)
+ return _gf_false;
- i++;
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (i == source)
+ continue;
+ if (memcmp (local->replies[source].checksum,
+ local->replies[i].checksum,
+ MD5_DIGEST_LENGTH))
+ return _gf_false;
+ }
- return NULL;
+ return _gf_true;
}
static int
-sh_zero_byte_files_exist (afr_self_heal_t *sh, int child_count)
-{
- int i;
- int ret = 0;
-
- for (i = 0; i < child_count; i++) {
- if (sh->buf[i].ia_size == 0) {
- ret = 1;
- break;
- }
- }
-
- return ret;
-}
-
-
-struct afr_sh_algorithm *
-afr_sh_data_pick_algo (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t * priv = NULL;
- struct afr_sh_algorithm * algo = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
- algo = sh_algo_from_name (this, priv->data_self_heal_algorithm);
-
- if (algo == NULL) {
- /* option not set, so fall back on heuristics */
-
- if ((local->enoent_count != 0)
- || sh_zero_byte_files_exist (sh, priv->child_count)
- || (sh->file_size <= (priv->data_self_heal_window_size *
- this->ctx->page_size))) {
-
- /*
- * If the file does not exist on one of the subvolumes,
- * or a zero-byte file exists (created by entry self-heal)
- * the entire content has to be copied anyway, so there
- * is no benefit from using the "diff" algorithm.
- *
- * If the file size is about the same as page size,
- * the entire file can be read and written with a few
- * (pipelined) STACK_WINDs, which will be faster
- * than "diff" which has to read checksums and then
- * read and write.
- */
-
- algo = sh_algo_from_name (this, "full");
-
- } else {
- algo = sh_algo_from_name (this, "diff");
- }
- }
-
- return algo;
+__afr_selfheal_data_read_write (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int source, unsigned char *healed_sinks,
+ off_t offset, size_t size,
+ struct afr_reply *replies)
+{
+ struct iovec *iovec = NULL;
+ int count = 0;
+ struct iobref *iobref = NULL;
+ int ret = 0;
+ int i = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ ret = syncop_readv (priv->children[source], fd, size, offset, 0,
+ &iovec, &count, &iobref);
+ if (ret <= 0)
+ return ret;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!healed_sinks[i])
+ continue;
+
+ /*
+ * TODO: Use fiemap() and discard() to heal holes
+ * in the future.
+ *
+ * For now,
+ *
+ * - if the source had any holes at all,
+ * AND
+ * - if we are writing past the original file size
+ * of the sink
+ * AND
+ * - is NOT the last block of the source file. if
+ * the block contains EOF, it has to be written
+ * in order to set the file size even if the
+ * last block is 0-filled.
+ * AND
+ * - if the read buffer is filled with only 0's
+ *
+ * then, skip writing to this source. We don't depend
+ * on the write to happen to update the size as we
+ * have performed an ftruncate() upfront anyways.
+ */
+#define is_last_block(o,b,s) ((s >= o) && (s <= (o + b)))
+ if (HAS_HOLES ((&replies[source].poststat)) &&
+ offset >= replies[i].poststat.ia_size &&
+ !is_last_block (offset, size,
+ replies[source].poststat.ia_size) &&
+ (iov_0filled (iovec, count) == 0))
+ continue;
+
+ ret = syncop_writev (priv->children[i], fd, iovec, count,
+ offset, iobref, 0);
+ if (ret != iov_length (iovec, count)) {
+ /* write() failed on this sink. unset the corresponding
+ member in sinks[] (which is healed_sinks[] in the
+ caller) so that this server does NOT get considered
+ as successfully healed.
+ */
+ healed_sinks[i] = 0;
+ }
+ }
+ if (iobref)
+ iobref_unref (iobref);
+
+ return ret;
}
-int
-afr_sh_data_sync_prepare (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- struct afr_sh_algorithm *sh_algo = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
-
- sh->algo_completion_cbk = afr_sh_data_erase_pending;
- sh->algo_abort_cbk = afr_sh_data_fail;
-
- sh_algo = afr_sh_data_pick_algo (frame, this);
-
- sh_algo->fn (frame, this);
-
- return 0;
+static int
+afr_selfheal_data_block (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int source, unsigned char *healed_sinks, off_t offset,
+ size_t size, int type, struct afr_reply *replies)
+{
+ int ret = -1;
+ int sink_count = 0;
+ afr_private_t *priv = NULL;
+ unsigned char *data_lock = NULL;
+
+ priv = this->private;
+ sink_count = AFR_COUNT (healed_sinks, priv->child_count);
+ data_lock = alloca0 (priv->child_count);
+
+ ret = afr_selfheal_inodelk (frame, this, fd->inode, this->name,
+ offset, size, data_lock);
+ {
+ if (ret < sink_count) {
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+
+ if (type == AFR_SELFHEAL_DATA_DIFF &&
+ __afr_selfheal_data_checksums_match (frame, this, fd, source,
+ healed_sinks, offset, size)) {
+ ret = 0;
+ goto unlock;
+ }
+
+ ret = __afr_selfheal_data_read_write (frame, this, fd, source,
+ healed_sinks, offset, size,
+ replies);
+ }
+unlock:
+ afr_selfheal_uninodelk (frame, this, fd->inode, this->name,
+ offset, size, data_lock);
+ return ret;
}
-int
-afr_sh_data_trim_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- int call_count = 0;
- int child_index = 0;
-
- priv = this->private;
- local = frame->local;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1)
- gf_log (this->name, GF_LOG_INFO,
- "ftruncate of %s on subvolume %s failed (%s)",
- local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- else
- gf_log (this->name, GF_LOG_DEBUG,
- "ftruncate of %s on subvolume %s completed",
- local->loc.path,
- priv->children[child_index]->name);
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
- if (call_count == 0)
- afr_sh_data_sync_prepare (frame, this);
-
- return 0;
-}
-
-int
-afr_sh_data_trim_sinks (call_frame_t *frame, xlator_t *this)
+static int
+afr_selfheal_data_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ unsigned char *healed_sinks)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t *sh = NULL;
- int *sources = NULL;
- int call_count = 0;
- int i = 0;
-
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
+ local = frame->local;
+ priv = this->private;
- sources = sh->sources;
- call_count = sh->active_sinks;
+ AFR_ONLIST (healed_sinks, frame, attr_cbk, fsync, fd, 0, NULL);
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (sources[i] || !local->child_up[i])
- continue;
-
- STACK_WIND_COOKIE (frame, afr_sh_data_trim_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->ftruncate,
- sh->healing_fd, sh->file_size);
-
- if (!--call_count)
- break;
- }
-
- return 0;
+ for (i = 0; i < priv->child_count; i++)
+ if (healed_sinks[i] && local->replies[i].op_ret != 0)
+ /* fsync() failed. Do NOT consider this server
+ as successfully healed. Mark it so.
+ */
+ healed_sinks[i] = 0;
+ return 0;
}
-int
-afr_sh_inode_set_read_ctx (afr_self_heal_t *sh, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- int ret = 0;
-
- priv = this->private;
- sh->source = afr_sh_select_source (sh->sources, priv->child_count);
- if (sh->source < 0) {
- ret = -1;
- goto out;
- }
- afr_reset_children (sh->fresh_children, priv->child_count);
- afr_get_fresh_children (sh->success_children, sh->sources,
- sh->fresh_children, priv->child_count);
- afr_inode_set_read_ctx (this, sh->inode, sh->source,
- sh->fresh_children);
-out:
- return ret;
-}
-
-int
-afr_sh_data_fix (call_frame_t *frame, xlator_t *this)
+static int
+afr_selfheal_data_restore_time (call_frame_t *frame, xlator_t *this,
+ inode_t *inode, int source,
+ unsigned char *healed_sinks,
+ struct afr_reply *replies)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int nsources = 0;
- int source = 0;
- int i = 0;
- int ret = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- gf_log (this->name, GF_LOG_DEBUG, "Pending matrix for: %"PRIu64,
- frame->root->lk_owner);
- nsources = afr_build_sources (this, sh->xattr, sh->buf, sh->pending_matrix,
- sh->sources, sh->success_children,
- AFR_DATA_TRANSACTION);
- if (nsources == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "No self-heal needed for %s",
- local->loc.path);
-
- afr_sh_data_finish (frame, this);
- return 0;
- }
-
- if ((nsources == -1)
- && (priv->favorite_child != -1)
- && (sh->child_errno[priv->favorite_child] == 0)) {
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Picking favorite child %s as authentic source to "
- "resolve conflicting data of %s",
- priv->children[priv->favorite_child]->name,
- local->loc.path);
-
- sh->sources[priv->favorite_child] = 1;
-
- nsources = afr_sh_source_count (sh->sources,
- priv->child_count);
- }
-
- if (nsources == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "Unable to self-heal contents of '%s' (possible "
- "split-brain). Please delete the file from all but "
- "the preferred subvolume.", local->loc.path);
-
- local->govinda_gOvinda = 1;
-
- afr_sh_data_fail (frame, this);
- return 0;
- }
-
- ret = afr_sh_inode_set_read_ctx (sh, this);
- if (ret) {
- gf_log (this->name, GF_LOG_DEBUG,
- "No active sources found.");
-
- afr_sh_data_fail (frame, this);
- return 0;
- }
-
- source = sh->source;
- sh->block_size = this->ctx->page_size;
- sh->file_size = sh->buf[source].ia_size;
+ loc_t loc = {0, };
- if (FILE_HAS_HOLES (&sh->buf[source]))
- sh->file_has_holes = 1;
+ loc.inode = inode_ref (inode);
+ uuid_copy (loc.gfid, inode->gfid);
- /* detect changes not visible through pending flags -- JIC */
- for (i = 0; i < priv->child_count; i++) {
- if (i == source || sh->child_errno[i])
- continue;
+ AFR_ONLIST (healed_sinks, frame, attr_cbk, setattr, &loc,
+ &replies[source].poststat,
+ (GF_SET_ATTR_ATIME|GF_SET_ATTR_MTIME), NULL);
- if (SIZE_DIFFERS (&sh->buf[i], &sh->buf[source]))
- sh->sources[i] = 0;
- }
-
- if (sh->background && sh->unwind) {
- sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno);
- sh->unwound = _gf_true;
- }
-
- afr_sh_mark_source_sinks (frame, this);
- if (sh->active_sinks == 0) {
- gf_log (this->name, GF_LOG_INFO,
- "no active sinks for performing self-heal on file %s",
- local->loc.path);
- afr_sh_data_finish (frame, this);
- return 0;
- }
+ loc_wipe (&loc);
- gf_log (this->name, GF_LOG_DEBUG,
- "self-healing file %s from subvolume %s to %d other",
- local->loc.path, priv->children[sh->source]->name,
- sh->active_sinks);
- afr_sh_data_trim_sinks (frame, this);
-
- return 0;
+ return 0;
}
-static void
-afr_destroy_pending_matrix (int32_t **pending_matrix, int32_t child_count)
+static int
+afr_data_self_heal_type_get (afr_private_t *priv, unsigned char *healed_sinks,
+ int source, struct afr_reply *replies)
{
- int i = 0;
- GF_ASSERT (child_count > 0);
- if (pending_matrix) {
- for (i = 0; i < child_count; i++) {
- if (pending_matrix[i])
- GF_FREE (pending_matrix[i]);
- }
- GF_FREE (pending_matrix);
- }
-}
+ int type = AFR_SELFHEAL_DATA_FULL;
+ int i = 0;
-static int32_t**
-afr_create_pending_matrix (int32_t child_count)
-{
- gf_boolean_t cleanup = _gf_false;
- int32_t **pending_matrix = NULL;
- int i = 0;
-
- GF_ASSERT (child_count > 0);
-
- pending_matrix = GF_CALLOC (sizeof (*pending_matrix), child_count,
- gf_afr_mt_int32_t);
- if (NULL == pending_matrix)
- goto out;
- for (i = 0; i < child_count; i++) {
- pending_matrix[i] = GF_CALLOC (sizeof (**pending_matrix),
- child_count,
- gf_afr_mt_int32_t);
- if (NULL == pending_matrix[i]) {
- cleanup = _gf_true;
- goto out;
+ if (priv->data_self_heal_algorithm == NULL) {
+ type = AFR_SELFHEAL_DATA_FULL;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!healed_sinks[i] && i != source)
+ continue;
+ if (replies[i].poststat.ia_size) {
+ type = AFR_SELFHEAL_DATA_DIFF;
+ break;
+ }
}
+ } else if (strcmp (priv->data_self_heal_algorithm, "full") == 0) {
+ type = AFR_SELFHEAL_DATA_FULL;
+ } else if (strcmp (priv->data_self_heal_algorithm, "diff") == 0) {
+ type = AFR_SELFHEAL_DATA_DIFF;
}
-out:
- if (_gf_true == cleanup) {
- afr_destroy_pending_matrix (pending_matrix, child_count);
- pending_matrix = NULL;
- }
- return pending_matrix;
+ return type;
}
-int
-afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,
- dict_t **xattr,
- afr_transaction_type txn_type)
-{
- afr_private_t *priv = NULL;
- int read_child = -1;
- int ret = -1;
- int32_t **pending_matrix = NULL;
- int32_t *sources = NULL;
- int32_t *success_children = NULL;
- struct iatt *bufs = NULL;
- int32_t nsources = 0;
- int32_t prev_read_child = -1;
- int32_t config_read_child = -1;
-
- priv = this->private;
- bufs = local->cont.lookup.bufs;
- success_children = local->cont.lookup.success_children;
-
- pending_matrix = afr_create_pending_matrix (priv->child_count);
- if (NULL == pending_matrix)
- goto out;
-
- sources = GF_CALLOC (sizeof (*sources), priv->child_count,
- gf_afr_mt_int32_t);
- if (NULL == sources)
- goto out;
-
- nsources = afr_build_sources (this, xattr, bufs, pending_matrix,
- sources, success_children, txn_type);
- if (nsources < 0) {
- ret = -1;
- goto out;
- }
+static int
+afr_selfheal_data_do (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int source, unsigned char *healed_sinks,
+ struct afr_reply *replies)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ off_t off = 0;
+ size_t block = 128 * 1024;
+ int type = AFR_SELFHEAL_DATA_FULL;
+ int ret = -1;
+ call_frame_t *iter_frame = NULL;
+ char *sinks_str = NULL;
+ char *p = NULL;
+
+ priv = this->private;
+
+ sinks_str = alloca0 (priv->child_count * 8);
+ p = sinks_str;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!healed_sinks[i])
+ continue;
+ p += sprintf (p, "%d ", i);
+ }
+
+ gf_log (this->name, GF_LOG_INFO, "performing data selfheal on %s. "
+ "source=%d sinks=%s",
+ uuid_utoa (fd->inode->gfid), source, sinks_str);
+
+ type = afr_data_self_heal_type_get (priv, healed_sinks, source,
+ replies);
+
+ iter_frame = afr_copy_frame (frame);
+ if (!iter_frame)
+ return -ENOMEM;
+
+ for (off = 0; off < replies[source].poststat.ia_size; off += block) {
+ ret = afr_selfheal_data_block (iter_frame, this, fd, source,
+ healed_sinks, off, block, type,
+ replies);
+ if (ret < 0)
+ goto out;
+
+ AFR_STACK_RESET (iter_frame);
+ }
+
+ afr_selfheal_data_restore_time (frame, this, fd->inode, source,
+ healed_sinks, replies);
+
+ ret = afr_selfheal_data_fsync (frame, this, fd, healed_sinks);
- prev_read_child = local->read_child_index;
- config_read_child = priv->read_child;
- read_child = afr_select_read_child_from_policy (success_children,
- priv->child_count,
- prev_read_child,
- config_read_child,
- sources);
- ret = 0;
- local->cont.lookup.sources = sources;
out:
- afr_destroy_pending_matrix (pending_matrix, priv->child_count);
- if (-1 == ret) {
- if (sources)
- GF_FREE (sources);
- }
- gf_log (this->name, GF_LOG_DEBUG, "returning read_child: %d", read_child);
- return read_child;
-}
-
-int
-afr_sh_data_fstat_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iatt *buf)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int call_count = -1;
- int child_index = (long) cookie;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (op_ret != -1) {
- gf_log (this->name, GF_LOG_TRACE,
- "fstat of %s on %s succeeded",
- local->loc.path,
- priv->children[child_index]->name);
-
- sh->buf[child_index] = *buf;
- sh->success_children[sh->success_count] = child_index;
- sh->success_count++;
- }
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- afr_sh_data_fix (frame, this);
- }
-
- return 0;
+ if (iter_frame)
+ AFR_STACK_DESTROY (iter_frame);
+ return ret;
}
-int
-afr_sh_data_fstat (call_frame_t *frame, xlator_t *this)
+static int
+__afr_selfheal_truncate_sinks (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, unsigned char *healed_sinks,
+ struct afr_reply *replies, uint64_t size)
{
- afr_self_heal_t *sh = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int i = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- call_count = afr_up_children_count (local->child_up,
- priv->child_count);
-
- local->call_count = call_count;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ unsigned char *larger_sinks = 0;
+ int i = 0;
- afr_reset_children (sh->success_children, priv->child_count);
- sh->success_count = 0;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_sh_data_fstat_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fstat,
- sh->healing_fd);
+ local = frame->local;
+ priv = this->private;
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
-
-void
-afr_sh_common_fxattrop_resp_handler (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xattr)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int child_index = (long) cookie;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (op_ret != -1) {
- gf_log (this->name, GF_LOG_TRACE,
- "fxattrop of %s on %s succeeded",
- local->loc.path,
- priv->children[child_index]->name);
-
- sh->xattr[child_index] = dict_ref (xattr);
- sh->success_children[sh->success_count] = child_index;
- sh->success_count++;
- }
- }
- UNLOCK (&frame->lock);
-}
+ larger_sinks = alloca0 (priv->child_count);
+ for (i = 0; i < priv->child_count; i++) {
+ if (healed_sinks[i] && replies[i].poststat.ia_size > size)
+ larger_sinks[i] = 1;
+ }
-int
-afr_post_sh_data_fxattrop_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xattr)
-{
- int call_count = -1;
- int ret = 0;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- afr_sh_common_fxattrop_resp_handler (frame, cookie, this, op_ret,
- op_errno, xattr);
-
- local = frame->local;
- sh = &local->self_heal;
- call_count = afr_frame_return (frame);
- if (call_count == 0) {
- (void) afr_build_sources (this, sh->xattr, NULL,
- sh->pending_matrix,
- sh->sources, sh->success_children,
- AFR_DATA_TRANSACTION);
- ret = afr_sh_inode_set_read_ctx (sh, this);
- if (ret)
- afr_sh_data_fail (frame, this);
- else
- afr_sh_set_timestamps (frame, this);
- }
+ AFR_ONLIST (larger_sinks, frame, attr_cbk, ftruncate, fd, size, NULL);
- return 0;
+ for (i = 0; i < priv->child_count; i++)
+ if (healed_sinks[i] && local->replies[i].op_ret == -1)
+ /* truncate() failed. Do NOT consider this server
+ as successfully healed. Mark it so.
+ */
+ healed_sinks[i] = 0;
+ return 0;
}
-int
-afr_sh_data_fxattrop_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xattr)
-{
- int call_count = -1;
-
- afr_sh_common_fxattrop_resp_handler (frame, cookie, this, op_ret,
- op_errno, xattr);
-
- call_count = afr_frame_return (frame);
- if (call_count == 0) {
- afr_sh_data_fstat (frame, this);
- }
-
- return 0;
+/*
+ * If by chance there are multiple sources with differing sizes, select
+ * the largest file as the source.
+ *
+ * This can only happen if data was directly modified in the backend.
+ */
+static int
+__afr_selfheal_data_finalize_source (xlator_t *this, unsigned char *sources,
+ unsigned char *sinks,
+ unsigned char *healed_sinks,
+ unsigned char *locked_on,
+ struct afr_reply *replies)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ uint64_t size = 0;
+ int source = -1;
+ int locked_count = 0;
+ int sources_count = 0;
+ int healed_sinks_count = 0;
+
+ priv = this->private;
+
+ locked_count = AFR_COUNT (locked_on, priv->child_count);
+ sources_count = AFR_COUNT (sources, priv->child_count);
+ healed_sinks_count = AFR_COUNT (healed_sinks, priv->child_count);
+
+ if (locked_count == healed_sinks_count || !sources_count) {
+ /* split brain */
+ return -EIO;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ if (size <= replies[i].poststat.ia_size) {
+ size = replies[i].poststat.ia_size;
+ source = i;
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ if (replies[i].poststat.ia_size < size) {
+ sources[i] = 0;
+ sinks[i] = 1;
+ }
+ }
+
+ return source;
}
-
-int
-afr_sh_data_fxattrop (call_frame_t *frame, xlator_t *this,
- afr_fxattrop_cbk_t fxattrop_cbk)
+/*
+ * __afr_selfheal_data_prepare:
+ *
+ * This function inspects the on-disk xattrs and determines which subvols
+ * are sources and sinks.
+ *
+ * The return value is the index of the subvolume to be used as the source
+ * for self-healing, or -1 if no healing is necessary/split brain.
+ */
+static int
+__afr_selfheal_data_prepare (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ unsigned char *locked_on, unsigned char *sources,
+ unsigned char *sinks, unsigned char *healed_sinks,
+ struct afr_reply *replies)
{
- afr_self_heal_t *sh = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- dict_t *xattr_req = NULL;
- int32_t *zero_pending = NULL;
- int call_count = 0;
- int i = 0;
- int ret = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- call_count = afr_up_children_count (local->child_up,
- priv->child_count);
+ int ret = -1;
+ int source = -1;
+ afr_private_t *priv = NULL;
+ int i = 0;
- local->call_count = call_count;
-
- xattr_req = dict_new();
- if (!xattr_req) {
- ret = -1;
- goto out;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- zero_pending = GF_CALLOC (3, sizeof (*zero_pending),
- gf_afr_mt_int32_t);
- if (!zero_pending) {
- ret = -1;
- goto out;
- }
- ret = dict_set_dynptr (xattr_req, priv->pending_key[i],
- zero_pending,
- 3 * sizeof (*zero_pending));
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Unable to set dict value");
- goto out;
- } else {
- zero_pending = NULL;
- }
- }
+ priv = this->private;
- afr_reset_xattr (sh->xattr, priv->child_count);
- afr_reset_children (sh->success_children, priv->child_count);
- sh->success_count = 0;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, fxattrop_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- sh->healing_fd, GF_XATTROP_ADD_ARRAY,
- xattr_req);
-
- if (!--call_count)
- break;
- }
- }
+ ret = afr_selfheal_unlocked_discover (frame, fd->inode, fd->inode->gfid,
+ replies);
+ if (ret)
+ return ret;
-out:
- if (xattr_req)
- dict_unref (xattr_req);
-
- if (ret) {
- if (zero_pending)
- GF_FREE (zero_pending);
- sh->op_failed = 1;
- afr_sh_data_done (frame, this);
- }
+ ret = afr_selfheal_find_direction (frame, this, replies,
+ AFR_DATA_TRANSACTION,
+ locked_on, sources, sinks);
+ if (ret)
+ return ret;
- return 0;
-}
+ source = __afr_selfheal_data_finalize_source (this, sources, sinks,
+ healed_sinks, locked_on,
+ replies);
+ if (source < 0)
+ return -EIO;
-int
-afr_sh_data_big_lock_success (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
+ for (i = 0; i < priv->child_count; i++)
+ /* Initialize the healed_sinks[] array optimistically to
+ the intersection of to-be-healed (i.e sinks[]) and
+ the list of servers which are up (i.e locked_on[]).
- local = frame->local;
- sh = &local->self_heal;
+ As we encounter failures in the healing process, we
+ will unmark the respective servers in the healed_sinks[]
+ array.
+ */
+ healed_sinks[i] = sinks[i] && locked_on[i];
- sh->data_lock_held = _gf_true;
- afr_sh_data_fxattrop (frame, this, afr_sh_data_fxattrop_cbk);
- return 0;
+ return source;
}
-int
-afr_sh_data_post_blocking_inodelk_cbk (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- sh = &local->self_heal;
-
- if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_ERROR, "Blocking data inodelks "
- "failed for %s. by %"PRIu64,
- local->loc.path, frame->root->lk_owner);
- sh->data_lock_failure_handler (frame, this);
- } else {
-
- gf_log (this->name, GF_LOG_DEBUG, "Blocking data inodelks "
- "done for %s by %"PRIu64". Proceding to self-heal",
- local->loc.path, frame->root->lk_owner);
- sh->data_lock_success_handler (frame, this);
- }
- return 0;
+static int
+__afr_selfheal_data (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ unsigned char *locked_on)
+{
+ afr_private_t *priv = NULL;
+ int ret = -1;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *data_lock = NULL;
+ unsigned char *healed_sinks = NULL;
+ struct afr_reply *locked_replies = NULL;
+ int source = -1;
+ gf_boolean_t compat = _gf_false;
+ unsigned char *compat_lock = NULL;
+
+ priv = this->private;
+
+ sources = alloca0 (priv->child_count);
+ sinks = alloca0 (priv->child_count);
+ healed_sinks = alloca0 (priv->child_count);
+ data_lock = alloca0 (priv->child_count);
+ compat_lock = alloca0 (priv->child_count);
+
+ locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count);
+
+ ret = afr_selfheal_inodelk (frame, this, fd->inode, this->name, 0, 0,
+ data_lock);
+ {
+ if (ret < 2) {
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+
+ ret = __afr_selfheal_data_prepare (frame, this, fd, data_lock,
+ sources, sinks, healed_sinks,
+ locked_replies);
+ if (ret < 0)
+ goto unlock;
+
+ source = ret;
+
+ ret = __afr_selfheal_truncate_sinks (frame, this, fd, healed_sinks,
+ locked_replies,
+ locked_replies[source].poststat.ia_size);
+ if (ret < 0)
+ goto unlock;
+
+ ret = 0;
+
+ /* Locking from (LLONG_MAX - 2) to (LLONG_MAX - 1) is for
+ compatibility with older self-heal clients which do not
+ hold a lock in the @priv->sh_domain domain to guard
+ against concurrent ongoing self-heals
+ */
+ afr_selfheal_inodelk (frame, this, fd->inode, this->name,
+ LLONG_MAX - 2, 1, compat_lock);
+ compat = _gf_true;
+ }
+unlock:
+ afr_selfheal_uninodelk (frame, this, fd->inode, this->name, 0, 0,
+ data_lock);
+ if (ret < 0)
+ goto out;
+
+ ret = afr_selfheal_data_do (frame, this, fd, source, healed_sinks,
+ locked_replies);
+ if (ret)
+ goto out;
+
+ ret = afr_selfheal_undo_pending (frame, this, fd->inode, sources, sinks,
+ healed_sinks, AFR_DATA_TRANSACTION,
+ locked_replies, data_lock);
+out:
+ if (compat)
+ afr_selfheal_uninodelk (frame, this, fd->inode, this->name,
+ LLONG_MAX - 2, 1, compat_lock);
+ return ret;
}
-int
-afr_sh_data_post_nonblocking_inodelk_cbk (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- sh = &local->self_heal;
-
- if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG, "Non Blocking data inodelks "
- "failed for %s. by %"PRIu64,
- local->loc.path, frame->root->lk_owner);
- int_lock->lock_cbk = afr_sh_data_post_blocking_inodelk_cbk;
- afr_blocking_lock (frame, this);
- } else {
-
- gf_log (this->name, GF_LOG_DEBUG, "Non Blocking data inodelks "
- "done for %s by %"PRIu64". Proceeding to self-heal",
- local->loc.path, frame->root->lk_owner);
- sh->data_lock_success_handler (frame, this);
- }
-
- return 0;
-}
-int
-afr_sh_data_lock_rec (call_frame_t *frame, xlator_t *this, off_t start, off_t len)
+static fd_t *
+afr_selfheal_data_open (xlator_t *this, inode_t *inode)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- int_lock->transaction_lk_type = AFR_SELFHEAL_LK;
- int_lock->selfheal_lk_type = AFR_DATA_SELF_HEAL_LK;
-
- afr_set_lock_number (frame, this);
-
- int_lock->lk_flock.l_start = start;
- int_lock->lk_flock.l_len = len;
- int_lock->lk_flock.l_type = F_WRLCK;
- int_lock->lock_cbk = afr_sh_data_post_nonblocking_inodelk_cbk;
+ loc_t loc = {0,};
+ int ret = 0;
+ fd_t *fd = NULL;
- afr_nonblocking_inodelk (frame, this);
+ fd = fd_create (inode, 0);
+ if (!fd)
+ return NULL;
- return 0;
-}
-
-int
-afr_post_sh_big_lock_success (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
-
- GF_ASSERT (sh->old_loop_frame);
- sh_loop_finish (sh->old_loop_frame, this);
- sh->old_loop_frame = NULL;
- sh->data_lock_held = _gf_true;
- afr_sh_data_fxattrop (frame, this, afr_post_sh_data_fxattrop_cbk);
- return 0;
-}
+ loc.inode = inode_ref (inode);
+ uuid_copy (loc.gfid, inode->gfid);
-int
-afr_post_sh_big_lock_failure (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
+ ret = syncop_open (this, &loc, O_RDWR|O_LARGEFILE, fd);
+ if (ret) {
+ fd_unref (fd);
+ fd = NULL;
+ } else {
+ fd_bind (fd);
+ }
- local = frame->local;
- sh = &local->self_heal;
+ loc_wipe (&loc);
- GF_ASSERT (sh->old_loop_frame);
- sh_loop_finish (sh->old_loop_frame, this);
- sh->old_loop_frame = NULL;
- afr_sh_set_timestamps (frame, this);
- return 0;
+ return fd;
}
int
-afr_sh_data_lock (call_frame_t *frame, xlator_t *this,
- off_t start, off_t len,
- afr_lock_cbk_t success_handler,
- afr_lock_cbk_t failure_handler)
-{
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
-
- sh->data_lock_success_handler = success_handler;
- sh->data_lock_failure_handler = failure_handler;
- return afr_sh_data_lock_rec (frame, this, start, len);
-}
-
-int
-afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+afr_selfheal_data (call_frame_t *frame, xlator_t *this, inode_t *inode)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int child_index = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- child_index = (long) cookie;
-
- /* TODO: some of the open's might fail.
- In that case, modify cleanup fn to send flush on those
- fd's which are already open */
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "open of %s failed on child %s (%s)",
- local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- sh->op_failed = 1;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "open of %s succeeded on child %s",
- local->loc.path,
- priv->children[child_index]->name);
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if (sh->op_failed) {
- afr_sh_data_fail (frame, this);
- return 0;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "fd for %s opened, commencing sync",
- local->loc.path);
-
- afr_sh_data_lock (frame, this, 0, 0,
- afr_sh_data_big_lock_success,
- afr_sh_data_fail);
- }
+ afr_private_t *priv = NULL;
+ unsigned char *locked_on = NULL;
+ int ret = 0;
+ fd_t *fd = NULL;
- return 0;
-}
+ priv = this->private;
+ fd = afr_selfheal_data_open (this, inode);
+ if (!fd)
+ return -EIO;
-int
-afr_sh_data_open (call_frame_t *frame, xlator_t *this)
-{
- int i = 0;
- int call_count = 0;
- fd_t *fd = NULL;
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- call_count = afr_up_children_count (local->child_up, priv->child_count);
- local->call_count = call_count;
-
- fd = fd_create (local->loc.inode, frame->root->pid);
- sh->healing_fd = fd;
-
- /* open sinks */
- for (i = 0; i < priv->child_count; i++) {
- if(!local->child_up[i])
- continue;
-
- STACK_WIND_COOKIE (frame, afr_sh_data_open_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->open,
- &local->loc,
- O_RDWR|O_LARGEFILE, fd, 0);
-
- if (!--call_count)
- break;
- }
+ locked_on = alloca0 (priv->child_count);
- return 0;
-}
+ ret = afr_selfheal_tryinodelk (frame, this, inode, priv->sh_domain, 0, 0,
+ locked_on);
+ {
+ if (ret < 2) {
+ /* Either less than two subvols available, or another
+ selfheal (from another server) is in progress. Skip
+ for now in any case there isn't anything to do.
+ */
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+ ret = __afr_selfheal_data (frame, this, fd, locked_on);
+ }
+unlock:
+ afr_selfheal_uninodelk (frame, this, inode, priv->sh_domain, 0, 0, locked_on);
-int
-afr_self_heal_data (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = this->private;
-
- local = frame->local;
- sh = &local->self_heal;
-
- if (sh->do_data_self_heal &&
- afr_data_self_heal_enabled (priv->data_self_heal)) {
- afr_sh_data_open (frame, this);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "not doing data self heal on %s",
- local->loc.path);
- afr_sh_data_done (frame, this);
- }
+ if (fd)
+ fd_unref (fd);
- return 0;
+ return ret;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index 5a3302f7f..9605d69f4 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -1,2194 +1,631 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#include <libgen.h>
-#include <unistd.h>
-#include <fnmatch.h>
-#include <sys/time.h>
-#include <stdlib.h>
-#include <signal.h>
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
#endif
-#include "glusterfs.h"
-#include "inode.h"
#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
+#include "afr-self-heal.h"
#include "byte-order.h"
-
#include "afr-transaction.h"
-#include "afr-self-heal.h"
-#include "afr-self-heal-common.h"
-
-#define AFR_INIT_SH_FRAME_VALS(_frame, _local, _sh, _sh_frame, _sh_local, _sh_sh)\
- do {\
- _local = _frame->local;\
- _sh = &_local->self_heal;\
- _sh_frame = _sh->sh_frame;\
- _sh_local = _sh_frame->local;\
- _sh_sh = &_sh_local->self_heal;\
- } while (0);
-
-int
-afr_sh_entry_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
-
- if (sh->healing_fd)
- fd_unref (sh->healing_fd);
- sh->healing_fd = NULL;
-
- sh->completion_cbk (frame, this);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_unlock (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- int_lock->lock_cbk = afr_sh_entry_done;
- afr_unlock (frame, this);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_finish (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- gf_log (this->name, GF_LOG_TRACE,
- "finishing entry selfheal of %s", local->loc.path);
-
- afr_sh_entry_unlock (frame, this);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_erase_pending_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xattr)
-{
- long i = 0;
- int call_count = 0;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_local_t *orig_local = NULL;
- call_frame_t *orig_frame = NULL;
- afr_private_t *priv = NULL;
- int32_t read_child = -1;
-
- local = frame->local;
- priv = this->private;
- sh = &local->self_heal;
- i = (long)cookie;
-
-
- afr_children_add_child (sh->fresh_children, i, priv->child_count);
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_INFO,
- "%s: failed to erase pending xattrs on %s (%s)",
- local->loc.path, priv->children[i]->name,
- strerror (op_errno));
- }
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if (sh->source == -1) {
- //this happens if the forced merge option is set
- read_child = sh->fresh_children[0];
- } else {
- read_child = sh->source;
- }
- afr_inode_set_read_ctx (this, sh->inode, read_child,
- sh->fresh_children);
- orig_frame = sh->orig_frame;
- orig_local = orig_frame->local;
-
- if (sh->source != -1) {
- orig_local->cont.lookup.buf.ia_nlink = sh->buf[sh->source].ia_nlink;
- }
-
- afr_sh_entry_finish (frame, this);
- }
-
- return 0;
-}
-
-
-int
-afr_sh_entry_erase_pending (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int i = 0;
- dict_t **erase_xattr = NULL;
- int need_unwind = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- afr_sh_pending_to_delta (priv, sh->xattr, sh->delta_matrix, sh->success,
- priv->child_count, AFR_ENTRY_TRANSACTION);
-
- erase_xattr = GF_CALLOC (sizeof (*erase_xattr), priv->child_count,
- gf_afr_mt_dict_t);
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i]) {
- call_count++;
-
- erase_xattr[i] = get_new_dict();
- dict_ref (erase_xattr[i]);
- }
- }
-
- if (call_count == 0)
- need_unwind = 1;
-
- afr_sh_delta_to_xattr (priv, sh->delta_matrix, erase_xattr,
- priv->child_count, AFR_ENTRY_TRANSACTION);
-
- local->call_count = call_count;
- for (i = 0; i < priv->child_count; i++) {
- if (!erase_xattr[i])
- continue;
-
- gf_log (this->name, GF_LOG_TRACE,
- "erasing pending flags from %s on %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_entry_erase_pending_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->loc,
- GF_XATTROP_ADD_ARRAY, erase_xattr[i]);
- if (!--call_count)
- break;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (erase_xattr[i]) {
- dict_unref (erase_xattr[i]);
- }
- }
- GF_FREE (erase_xattr);
-
- if (need_unwind)
- afr_sh_entry_finish (frame, this);
-
- return 0;
-}
-
static int
-next_active_source (call_frame_t *frame, xlator_t *this,
- int current_active_source)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int source = -1;
- int next_active_source = -1;
- int i = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- source = sh->source;
-
- if (source != -1) {
- if (current_active_source != source)
- next_active_source = source;
- goto out;
- }
-
- /*
- the next active sink becomes the source for the
- 'conservative decision' of merging all entries
- */
-
- for (i = 0; i < priv->child_count; i++) {
- if ((sh->sources[i] == 0)
- && (local->child_up[i] == 1)
- && (i > current_active_source)) {
-
- next_active_source = i;
- break;
- }
- }
-out:
- return next_active_source;
-}
-
-
-
-static int
-next_active_sink (call_frame_t *frame, xlator_t *this,
- int current_active_sink)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int next_active_sink = -1;
- int i = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- /*
- the next active sink becomes the source for the
- 'conservative decision' of merging all entries
- */
-
- for (i = 0; i < priv->child_count; i++) {
- if ((sh->sources[i] == 0)
- && (local->child_up[i] == 1)
- && (i > current_active_sink)) {
-
- next_active_sink = i;
- break;
- }
- }
-
- return next_active_sink;
-}
+afr_selfheal_entry_delete (call_frame_t *frame, xlator_t *this, inode_t *dir,
+ const char *name, inode_t *inode, int child,
+ struct afr_reply *replies)
+{
+ afr_private_t *priv = NULL;
+ xlator_t *subvol = NULL;
+ int ret = 0;
+ loc_t loc = {0, };
+ char g[64];
+
+ priv = this->private;
+
+ subvol = priv->children[child];
+
+ loc.parent = inode_ref (dir);
+ uuid_copy (loc.pargfid, dir->gfid);
+ loc.name = name;
+ loc.inode = inode_ref (inode);
+
+ if (replies[child].valid && replies[child].op_ret == 0) {
+ switch (replies[child].poststat.ia_type) {
+ case IA_IFDIR:
+ gf_log (this->name, GF_LOG_WARNING,
+ "expunging dir %s/%s (%s) on %s",
+ uuid_utoa (dir->gfid), name,
+ uuid_utoa_r (replies[child].poststat.ia_gfid, g),
+ subvol->name);
+ ret = syncop_rmdir (subvol, &loc, 1);
+ break;
+ default:
+ gf_log (this->name, GF_LOG_WARNING,
+ "expunging file %s/%s (%s) on %s",
+ uuid_utoa (dir->gfid), name,
+ uuid_utoa_r (replies[child].poststat.ia_gfid, g),
+ subvol->name);
+ ret = syncop_unlink (subvol, &loc);
+ break;
+ }
+ }
+
+ loc_wipe (&loc);
+
+ return ret;
+}
+
+
+int
+afr_selfheal_recreate_entry (call_frame_t *frame, xlator_t *this, int dst,
+ int source, inode_t *dir, const char *name,
+ inode_t *inode, struct afr_reply *replies)
+{
+ int ret = 0;
+ loc_t loc = {0,};
+ loc_t srcloc = {0,};
+ afr_private_t *priv = NULL;
+ dict_t *xdata = NULL;
+ struct iatt *iatt = NULL;
+ char *linkname = NULL;
+ mode_t mode = 0;
+ struct iatt newent = {0,};
+
+ priv = this->private;
+
+ xdata = dict_new();
+ if (!xdata)
+ return -ENOMEM;
+
+ loc.parent = inode_ref (dir);
+ uuid_copy (loc.pargfid, dir->gfid);
+ loc.name = name;
+ loc.inode = inode_ref (inode);
+
+ ret = afr_selfheal_entry_delete (frame, this, dir, name, inode, dst,
+ replies);
+ if (ret)
+ goto out;
+
+ ret = dict_set_static_bin (xdata, "gfid-req",
+ replies[source].poststat.ia_gfid, 16);
+ if (ret)
+ goto out;
+
+ iatt = &replies[source].poststat;
+
+ srcloc.inode = inode_ref (inode);
+ uuid_copy (srcloc.gfid, iatt->ia_gfid);
+
+ mode = st_mode_from_ia (iatt->ia_prot, iatt->ia_type);
+
+ switch (iatt->ia_type) {
+ case IA_IFDIR:
+ ret = syncop_mkdir (priv->children[dst], &loc, mode, xdata, 0);
+ break;
+ case IA_IFLNK:
+ ret = syncop_lookup (priv->children[dst], &srcloc, 0, 0, 0, 0);
+ if (ret == 0) {
+ ret = syncop_link (priv->children[dst], &srcloc, &loc);
+ } else {
+ ret = syncop_readlink (priv->children[source], &srcloc,
+ &linkname, 4096);
+ if (ret <= 0)
+ goto out;
+ ret = syncop_symlink (priv->children[dst], &loc, linkname,
+ xdata, NULL);
+ }
+ break;
+ default:
+ ret = dict_set_int32 (xdata, GLUSTERFS_INTERNAL_FOP_KEY, 1);
+ if (ret)
+ goto out;
+ ret = syncop_mknod (priv->children[dst], &loc, mode,
+ iatt->ia_rdev, xdata, &newent);
+ if (ret == 0 && iatt->ia_size && !newent.ia_size) {
+ /* New entry created. Mark @dst pending on all sources */
+ ret = 1;
+ }
+ break;
+ }
-int
-afr_sh_entry_impunge_all (call_frame_t *frame, xlator_t *this);
-
-int
-afr_sh_entry_impunge_subvol (call_frame_t *frame, xlator_t *this,
- int active_src);
-
-int
-afr_sh_entry_expunge_all (call_frame_t *frame, xlator_t *this);
-
-int
-afr_sh_entry_expunge_subvol (call_frame_t *frame, xlator_t *this,
- int active_src);
-
-int
-afr_sh_entry_expunge_entry_done (call_frame_t *frame, xlator_t *this,
- int active_src, int32_t op_ret,
- int32_t op_errno)
-{
- int call_count = 0;
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- afr_sh_entry_expunge_subvol (frame, this, active_src);
-
- return 0;
-}
-
-int
-afr_sh_entry_expunge_parent_setattr_cbk (call_frame_t *expunge_frame,
- void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
-{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- call_frame_t *frame = NULL;
- int active_src = (long) cookie;
- afr_self_heal_t *sh = NULL;
- afr_local_t *local = NULL;
-
- priv = this->private;
- expunge_local = expunge_frame->local;
- expunge_sh = &expunge_local->self_heal;
- frame = expunge_sh->sh_frame;
- local = frame->local;
- sh = &local->self_heal;
-
- if (op_ret != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "setattr on parent directory of %s on subvolume %s failed: %s",
- expunge_local->loc.path,
- priv->children[active_src]->name, strerror (op_errno));
- }
-
- AFR_STACK_DESTROY (expunge_frame);
- sh->expunge_done (frame, this, active_src, op_ret, op_errno);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_expunge_remove_cbk (call_frame_t *expunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- int active_src = 0;
- int32_t valid = 0;
-
- priv = this->private;
- expunge_local = expunge_frame->local;
- expunge_sh = &expunge_local->self_heal;
-
- active_src = (long) cookie;
-
- if (op_ret == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "removed %s on %s",
- expunge_local->loc.path,
- priv->children[active_src]->name);
- } else {
- gf_log (this->name, GF_LOG_INFO,
- "removing %s on %s failed (%s)",
- expunge_local->loc.path,
- priv->children[active_src]->name,
- strerror (op_errno));
- }
-
- valid = GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
- afr_build_parent_loc (&expunge_sh->parent_loc, &expunge_local->loc);
-
- STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_parent_setattr_cbk,
- (void *) (long) active_src,
- priv->children[active_src],
- priv->children[active_src]->fops->setattr,
- &expunge_sh->parent_loc,
- &expunge_sh->parentbuf,
- valid);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_expunge_unlink (call_frame_t *expunge_frame, xlator_t *this,
- int active_src)
-{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
-
- priv = this->private;
- expunge_local = expunge_frame->local;
-
- gf_log (this->name, GF_LOG_TRACE,
- "expunging file %s on %s",
- expunge_local->loc.path, priv->children[active_src]->name);
-
- STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_remove_cbk,
- (void *) (long) active_src,
- priv->children[active_src],
- priv->children[active_src]->fops->unlink,
- &expunge_local->loc);
-
- return 0;
-}
-
-
-
-int
-afr_sh_entry_expunge_rmdir (call_frame_t *expunge_frame, xlator_t *this,
- int active_src)
-{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
-
- priv = this->private;
- expunge_local = expunge_frame->local;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "expunging directory %s on %s",
- expunge_local->loc.path, priv->children[active_src]->name);
-
- STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_remove_cbk,
- (void *) (long) active_src,
- priv->children[active_src],
- priv->children[active_src]->fops->rmdir,
- &expunge_local->loc, 1);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_expunge_remove (call_frame_t *expunge_frame, xlator_t *this,
- int active_src, struct iatt *buf)
-{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- call_frame_t *frame = NULL;
- int type = 0;
- afr_self_heal_t *sh = NULL;
- afr_local_t *local = NULL;
-
- priv = this->private;
- expunge_local = expunge_frame->local;
- expunge_sh = &expunge_local->self_heal;
- frame = expunge_sh->sh_frame;
- local = frame->local;
- sh = &local->self_heal;
-
- type = buf->ia_type;
-
- switch (type) {
- case IA_IFSOCK:
- case IA_IFREG:
- case IA_IFBLK:
- case IA_IFCHR:
- case IA_IFIFO:
- case IA_IFLNK:
- afr_sh_entry_expunge_unlink (expunge_frame, this, active_src);
- break;
- case IA_IFDIR:
- afr_sh_entry_expunge_rmdir (expunge_frame, this, active_src);
- break;
- default:
- gf_log (this->name, GF_LOG_ERROR,
- "%s has unknown file type on %s: 0%o",
- expunge_local->loc.path,
- priv->children[active_src]->name, type);
- goto out;
- break;
- }
-
- return 0;
out:
- AFR_STACK_DESTROY (expunge_frame);
- sh->expunge_done (frame, this, active_src, -1, EINVAL);
-
- return 0;
+ if (xdata)
+ dict_unref (xdata);
+ loc_wipe (&loc);
+ loc_wipe (&srcloc);
+ return ret;
}
-int
-afr_sh_entry_expunge_lookup_cbk (call_frame_t *expunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf, dict_t *x,
- struct iatt *postparent)
-{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- call_frame_t *frame = NULL;
- int active_src = 0;
- afr_self_heal_t *sh = NULL;
- afr_local_t *local = NULL;
-
- priv = this->private;
- expunge_local = expunge_frame->local;
- expunge_sh = &expunge_local->self_heal;
- frame = expunge_sh->sh_frame;
- active_src = (long) cookie;
- local = frame->local;
- sh = &local->self_heal;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "lookup of %s on %s failed (%s)",
- expunge_local->loc.path,
- priv->children[active_src]->name,
- strerror (op_errno));
- goto out;
- }
-
- afr_sh_entry_expunge_remove (expunge_frame, this, active_src, buf);
-
- return 0;
-out:
- AFR_STACK_DESTROY (expunge_frame);
- sh->expunge_done (frame, this, active_src, op_ret, op_errno);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_expunge_purge (call_frame_t *expunge_frame, xlator_t *this,
- int active_src)
+static int
+afr_selfheal_newentry_mark (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ int source, struct afr_reply *replies,
+ unsigned char *sources, unsigned char *newentry)
{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
+ int ret = 0;
+ int i = 0;
+ afr_private_t *priv = NULL;
+ dict_t *xattr = NULL;
+ int **changelog = NULL;
+ int idx = 0;
- priv = this->private;
- expunge_local = expunge_frame->local;
+ priv = this->private;
- gf_log (this->name, GF_LOG_TRACE,
- "looking up %s on %s",
- expunge_local->loc.path, priv->children[active_src]->name);
+ idx = afr_index_for_transaction_type (AFR_DATA_TRANSACTION);
- STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_lookup_cbk,
- (void *) (long) active_src,
- priv->children[active_src],
- priv->children[active_src]->fops->lookup,
- &expunge_local->loc, 0);
+ uuid_copy (inode->gfid, replies[source].poststat.ia_gfid);
- return 0;
-}
+ changelog = afr_matrix_create (priv->child_count, AFR_NUM_CHANGE_LOGS);
-int
-afr_sh_entry_expunge_entry_cbk (call_frame_t *expunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf, dict_t *x,
- struct iatt *postparent)
-{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- int source = 0;
- call_frame_t *frame = NULL;
- int active_src = 0;
- int need_expunge = 0;
- afr_self_heal_t *sh = NULL;
- afr_local_t *local = NULL;
-
- priv = this->private;
- expunge_local = expunge_frame->local;
- expunge_sh = &expunge_local->self_heal;
- frame = expunge_sh->sh_frame;
- active_src = expunge_sh->active_source;
- source = (long) cookie;
- local = frame->local;
- sh = &local->self_heal;
-
- if (op_ret == -1 && op_errno == ENOENT)
- need_expunge = 1;
- else if (op_ret == -1)
- goto out;
-
- if (!uuid_is_null (expunge_sh->entrybuf.ia_gfid) &&
- !uuid_is_null (buf->ia_gfid) &&
- (uuid_compare (expunge_sh->entrybuf.ia_gfid, buf->ia_gfid) != 0)) {
- char uuidbuf1[64];
- char uuidbuf2[64];
- gf_log (this->name, GF_LOG_DEBUG,
- "entry %s found on %s with mismatching gfid (%s/%s)",
- expunge_local->loc.path,
- priv->children[source]->name,
- uuid_utoa_r (expunge_sh->entrybuf.ia_gfid, uuidbuf1),
- uuid_utoa_r (buf->ia_gfid, uuidbuf2));
- need_expunge = 1;
- }
-
- if (need_expunge) {
- gf_log (this->name, GF_LOG_INFO,
- "missing entry %s on %s",
- expunge_local->loc.path,
- priv->children[source]->name);
-
- if (postparent)
- expunge_sh->parentbuf = *postparent;
-
- afr_sh_entry_expunge_purge (expunge_frame, this, active_src);
-
- return 0;
- }
+ xattr = dict_new();
+ if (!xattr)
+ return -ENOMEM;
-out:
- if (op_ret == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "%s exists under %s",
- expunge_local->loc.path,
- priv->children[source]->name);
- } else {
- gf_log (this->name, GF_LOG_INFO,
- "looking up %s under %s failed (%s)",
- expunge_local->loc.path,
- priv->children[source]->name,
- strerror (op_errno));
- }
-
- AFR_STACK_DESTROY (expunge_frame);
- sh->expunge_done (frame, this, active_src, op_ret, op_errno);
-
- return 0;
-}
+ for (i = 0; i < priv->child_count; i++) {
+ if (!newentry[i])
+ continue;
+ changelog[i][idx] = hton32(1);
+ }
+ afr_set_pending_dict (priv, xattr, changelog);
-int
-afr_sh_entry_expunge_entry (call_frame_t *frame, xlator_t *this,
- gf_dirent_t *entry)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int ret = -1;
- call_frame_t *expunge_frame = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- int active_src = 0;
- int source = 0;
- int op_errno = 0;
- char *name = NULL;
- int op_ret = -1;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- active_src = sh->active_source;
- source = sh->source;
- sh->expunge_done = afr_sh_entry_expunge_entry_done;
-
- name = entry->d_name;
-
- if ((strcmp (name, ".") == 0)
- || (strcmp (name, "..") == 0)
- || ((strcmp (local->loc.path, "/") == 0)
- && (strcmp (name, GF_REPLICATE_TRASH_DIR) == 0))) {
-
- gf_log (this->name, GF_LOG_TRACE,
- "skipping inspection of %s under %s",
- name, local->loc.path);
- op_ret = 0;
- goto out;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "inspecting existance of %s under %s",
- name, local->loc.path);
-
- expunge_frame = copy_frame (frame);
- if (!expunge_frame) {
- op_errno = ENOMEM;
- goto out;
- }
-
- ALLOC_OR_GOTO (expunge_local, afr_local_t, out);
-
- expunge_frame->local = expunge_local;
- expunge_sh = &expunge_local->self_heal;
- expunge_sh->sh_frame = frame;
- expunge_sh->active_source = active_src;
- expunge_sh->entrybuf = entry->d_stat;
-
- ret = afr_build_child_loc (this, &expunge_local->loc, &local->loc, name);
- if (ret != 0) {
- op_errno = EINVAL;
- goto out;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "looking up %s on %s", expunge_local->loc.path,
- priv->children[source]->name);
-
- STACK_WIND_COOKIE (expunge_frame,
- afr_sh_entry_expunge_entry_cbk,
- (void *) (long) source,
- priv->children[source],
- priv->children[source]->fops->lookup,
- &expunge_local->loc, 0);
-
- ret = 0;
-out:
- if (ret == -1)
- sh->expunge_done (frame, this, active_src, op_ret, op_errno);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ afr_selfheal_post_op (frame, this, inode, i, xattr);
+ }
- return 0;
+ dict_unref (xattr);
+ return ret;
}
-int
-afr_sh_entry_expunge_readdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- gf_dirent_t *entry = NULL;
- off_t last_offset = 0;
- int active_src = 0;
- int entry_count = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- active_src = sh->active_source;
-
- if (op_ret <= 0) {
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "readdir of %s on subvolume %s failed (%s)",
- local->loc.path,
- priv->children[active_src]->name,
- strerror (op_errno));
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "readdir of %s on subvolume %s complete",
- local->loc.path,
- priv->children[active_src]->name);
- }
-
- afr_sh_entry_expunge_all (frame, this);
- return 0;
- }
-
- list_for_each_entry (entry, &entries->list, list) {
- last_offset = entry->d_off;
- entry_count++;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "readdir'ed %d entries from %s",
- entry_count, priv->children[active_src]->name);
-
- sh->offset = last_offset;
- local->call_count = entry_count;
-
- list_for_each_entry (entry, &entries->list, list) {
- afr_sh_entry_expunge_entry (frame, this, entry);
- }
-
- return 0;
-}
-
-int
-afr_sh_entry_expunge_subvol (call_frame_t *frame, xlator_t *this,
- int active_src)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- STACK_WIND (frame, afr_sh_entry_expunge_readdir_cbk,
- priv->children[active_src],
- priv->children[active_src]->fops->readdirp,
- sh->healing_fd, sh->block_size, sh->offset);
-
- return 0;
+static int
+__afr_selfheal_heal_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ char *name, inode_t *inode, int source,
+ unsigned char *sources, unsigned char *healed_sinks,
+ unsigned char *locked_on, struct afr_reply *replies)
+{
+ int ret = 0;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ unsigned char *newentry = NULL;
+
+ priv = this->private;
+ newentry = alloca0 (priv->child_count);
+
+ if (!replies[source].valid)
+ return -EIO;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!healed_sinks[i])
+ continue;
+ if (replies[source].op_ret == -1 &&
+ replies[source].op_errno == ENOENT) {
+ ret = afr_selfheal_entry_delete (frame, this, fd->inode,
+ name, inode, i, replies);
+ } else {
+ if (!uuid_compare (replies[i].poststat.ia_gfid,
+ replies[source].poststat.ia_gfid))
+ continue;
+
+ ret = afr_selfheal_recreate_entry (frame, this, i, source,
+ fd->inode, name, inode,
+ replies);
+ if (ret > 0) {
+ newentry[i] = 1;
+ ret = 0;
+ }
+ }
+ if (ret < 0)
+ break;
+ }
+
+ if (AFR_COUNT (newentry, priv->child_count))
+ afr_selfheal_newentry_mark (frame, this, inode, source, replies,
+ sources, newentry);
+ return ret;
}
-int
-afr_sh_entry_expunge_all (call_frame_t *frame, xlator_t *this)
+static int
+__afr_selfheal_merge_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ char *name, inode_t *inode, unsigned char *sources,
+ unsigned char *healed_sinks, unsigned char *locked_on,
+ struct afr_reply *replies)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int active_src = -1;
+ int ret = 0;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int source = -1;
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
+ priv = this->private;
- sh->offset = 0;
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies[i].valid && replies[i].op_ret == 0) {
+ source = i;
+ break;
+ }
+ }
- if (sh->source == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no active sources for %s to expunge entries",
- local->loc.path);
- goto out;
- }
+ if (source == -1) {
+ /* entry got deleted in the mean time? */
+ return 0;
+ }
- active_src = next_active_sink (frame, this, sh->active_source);
- sh->active_source = active_src;
+ for (i = 0; i < priv->child_count; i++) {
+ if (i == source || !healed_sinks[i])
+ continue;
- if (sh->op_failed) {
- goto out;
- }
+ if (replies[i].op_errno != ENOENT)
+ continue;
- if (active_src == -1) {
- /* completed creating missing files on all subvolumes */
- goto out;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "expunging entries of %s on %s to other sinks",
- local->loc.path, priv->children[active_src]->name);
-
- afr_sh_entry_expunge_subvol (frame, this, active_src);
-
- return 0;
-out:
- afr_sh_entry_impunge_all (frame, this);
- return 0;
+ ret = afr_selfheal_recreate_entry (frame, this, i, source,
+ fd->inode, name, inode,
+ replies);
+ }
+ return ret;
}
-int
-afr_sh_entry_impunge_entry_done (call_frame_t *frame, xlator_t *this,
- int active_src, int32_t op_ret,
- int32_t op_errno)
-{
- int call_count = 0;
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- afr_sh_entry_impunge_subvol (frame, this, active_src);
-
- return 0;
-}
-
-void
-afr_sh_entry_call_impunge_done (call_frame_t *impunge_frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+static int
+__afr_selfheal_entry_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ char *name, inode_t *inode, int source,
+ unsigned char *sources, unsigned char *healed_sinks,
+ unsigned char *locked_on, struct afr_reply *replies)
{
- afr_local_t *impunge_local = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- call_frame_t *frame = NULL;
- int32_t impunge_ret_child = 0;
-
- AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
- frame, local, sh);
-
- impunge_ret_child = impunge_sh->impunge_ret_child;
- AFR_STACK_DESTROY (impunge_frame);
- sh->impunge_done (frame, this, impunge_ret_child, op_ret,
- op_errno);
-}
+ int ret = -1;
-int
-afr_sh_entry_impunge_setattr_cbk (call_frame_t *impunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
-{
- int call_count = 0;
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- int child_index = 0;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- child_index = (long) cookie;
-
- if (op_ret == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "setattr done for %s on %s",
- impunge_local->loc.path,
- priv->children[child_index]->name);
- } else {
- gf_log (this->name, GF_LOG_INFO,
- "setattr (%s) on %s failed (%s)",
- impunge_local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- }
-
- LOCK (&impunge_frame->lock);
- {
- call_count = --impunge_local->call_count;
- }
- UNLOCK (&impunge_frame->lock);
-
- if (call_count == 0)
- afr_sh_entry_call_impunge_done (impunge_frame, this,
- op_ret, op_errno);
-
- return 0;
+ if (source < 0)
+ ret = __afr_selfheal_merge_dirent (frame, this, fd, name, inode,
+ sources, healed_sinks,
+ locked_on, replies);
+ else
+ ret = __afr_selfheal_heal_dirent (frame, this, fd, name, inode,
+ source, sources, healed_sinks,
+ locked_on, replies);
+ return ret;
}
-int
-afr_sh_entry_impunge_xattrop_cbk (call_frame_t *impunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- dict_t *xattr)
-{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- int child_index = 0;
- struct iatt stbuf = {0};
- int32_t valid = 0;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
-
- child_index = (long) cookie;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_INFO,
- "%s: failed to perform xattrop on %s (%s)",
- impunge_local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "setting ownership of %s on %s to %d/%d",
- impunge_local->loc.path,
- priv->children[child_index]->name,
- impunge_local->cont.lookup.buf.ia_uid,
- impunge_local->cont.lookup.buf.ia_gid);
-
- stbuf.ia_atime = impunge_local->cont.lookup.buf.ia_atime;
- stbuf.ia_atime_nsec = impunge_local->cont.lookup.buf.ia_atime_nsec;
- stbuf.ia_mtime = impunge_local->cont.lookup.buf.ia_mtime;
- stbuf.ia_mtime_nsec = impunge_local->cont.lookup.buf.ia_mtime_nsec;
-
- stbuf.ia_uid = impunge_local->cont.lookup.buf.ia_uid;
- stbuf.ia_gid = impunge_local->cont.lookup.buf.ia_gid;
-
- valid = GF_SET_ATTR_UID | GF_SET_ATTR_GID |
- GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
-
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_setattr_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->setattr,
- &impunge_local->loc,
- &stbuf, valid);
- return 0;
+static int
+afr_selfheal_entry_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int source, unsigned char *sources,
+ unsigned char *healed_sinks, char *name)
+{
+ afr_private_t *priv = NULL;
+ int ret = 0;
+ unsigned char *locked_on = NULL;
+ struct afr_reply *replies = NULL;
+ inode_t *inode = NULL;
+
+ priv = this->private;
+
+ locked_on = alloca0 (priv->child_count);
+
+ replies = alloca0 (priv->child_count * sizeof(*replies));
+
+ ret = afr_selfheal_entrylk (frame, this, fd->inode, this->name,
+ name, locked_on);
+ {
+ if (ret < 2) {
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+
+ inode = afr_selfheal_unlocked_lookup_on (frame, fd->inode, name,
+ replies, locked_on);
+ if (!inode) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+
+ ret = __afr_selfheal_entry_dirent (frame, this, fd, name, inode,
+ source, sources, healed_sinks,
+ locked_on, replies);
+ }
+unlock:
+ afr_selfheal_unentrylk (frame, this, fd->inode, this->name, name,
+ locked_on);
+ if (inode)
+ inode_unref (inode);
+ return ret;
}
-int
-afr_sh_entry_impunge_parent_setattr_cbk (call_frame_t *setattr_frame,
- void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
-{
- loc_t *parent_loc = cookie;
-
- if (op_ret != 0) {
- gf_log (this->name, GF_LOG_INFO,
- "setattr on parent directory (%s) failed: %s",
- parent_loc->path, strerror (op_errno));
- }
-
- loc_wipe (parent_loc);
-
- GF_FREE (parent_loc);
-
- AFR_STACK_DESTROY (setattr_frame);
- return 0;
-}
-
-int
-afr_sh_entry_impunge_newfile_cbk (call_frame_t *impunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent,
- struct iatt *postparent)
+static int
+afr_selfheal_entry_do_subvol (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int child, int source, unsigned char *sources,
+ unsigned char *healed_sinks)
{
- int call_count = 0;
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int active_src = 0;
- int child_index = 0;
- int32_t *pending_array = NULL;
- dict_t *xattr = NULL;
- int ret = 0;
- int idx = 0;
- call_frame_t *setattr_frame = NULL;
- int32_t valid = 0;
- loc_t *parent_loc = NULL;
- struct iatt parentbuf = {0,};
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- active_src = impunge_sh->active_source;
-
- child_index = (long) cookie;
-
- if (op_ret == -1) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "creation of %s on %s failed (%s)",
- impunge_local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- goto out;
- }
-
- inode->ia_type = stbuf->ia_type;
-
- xattr = dict_new ();
- if (!xattr) {
- ret = -1;
- goto out;
- }
-
- pending_array = (int32_t*) GF_CALLOC (3, sizeof (*pending_array),
- gf_afr_mt_int32_t);
-
- if (!pending_array) {
- ret = -1;
- goto out;
- }
- idx = afr_index_for_transaction_type (AFR_METADATA_TRANSACTION);
- pending_array[idx] = hton32 (1);
- if (IA_ISDIR (stbuf->ia_type))
- idx = afr_index_for_transaction_type (AFR_ENTRY_TRANSACTION);
- else
- idx = afr_index_for_transaction_type (AFR_DATA_TRANSACTION);
- pending_array[idx] = hton32 (1);
-
- ret = dict_set_dynptr (xattr, priv->pending_key[child_index],
- pending_array,
- 3 * sizeof (*pending_array));
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Unable to set dict value.");
- } else {
- pending_array = NULL;
- }
-
- valid = GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
- parentbuf = impunge_sh->parentbuf;
- setattr_frame = copy_frame (impunge_frame);
-
- parent_loc = GF_CALLOC (1, sizeof (*parent_loc),
- gf_afr_mt_loc_t);
- if (!parent_loc) {
- ret = -1;
- goto out;
- }
- afr_build_parent_loc (parent_loc, &impunge_local->loc);
-
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_xattrop_cbk,
- (void *) (long) child_index,
- priv->children[active_src],
- priv->children[active_src]->fops->xattrop,
- &impunge_local->loc, GF_XATTROP_ADD_ARRAY, xattr);
-
- STACK_WIND_COOKIE (setattr_frame, afr_sh_entry_impunge_parent_setattr_cbk,
- (void *) (long) parent_loc,
- priv->children[child_index],
- priv->children[child_index]->fops->setattr,
- parent_loc, &parentbuf, valid);
+ int ret = 0;
+ gf_dirent_t entries;
+ gf_dirent_t *entry = NULL;
+ off_t offset = 0;
+ call_frame_t *iter_frame = NULL;
+ xlator_t *subvol = NULL;
+ afr_private_t *priv = NULL;
-out:
- if (xattr)
- dict_unref (xattr);
+ priv = this->private;
+ subvol = priv->children[child];
- if (ret) {
- if (pending_array)
- GF_FREE (pending_array);
+ INIT_LIST_HEAD (&entries.list);
- LOCK (&impunge_frame->lock);
- {
- call_count = --impunge_local->call_count;
- }
- UNLOCK (&impunge_frame->lock);
+ iter_frame = afr_copy_frame (frame);
+ if (!iter_frame)
+ return -ENOMEM;
- if (call_count == 0)
- afr_sh_entry_call_impunge_done (impunge_frame, this,
- -1, op_errno);
- }
+ while ((ret = syncop_readdir (subvol, fd, 131072, offset, &entries))) {
+ if (ret > 0)
+ ret = 0;
+ list_for_each_entry (entry, &entries.list, list) {
+ offset = entry->d_off;
- return 0;
-}
+ if (!strcmp (entry->d_name, ".") ||
+ !strcmp (entry->d_name, ".."))
+ continue;
+ if (__is_root_gfid (fd->inode->gfid) &&
+ !strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR))
+ continue;
-int
-afr_sh_entry_impunge_mknod (call_frame_t *impunge_frame, xlator_t *this,
- int child_index, struct iatt *stbuf)
-{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- dict_t *dict = NULL;
- int ret = 0;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "creating missing file %s on %s",
- impunge_local->loc.path,
- priv->children[child_index]->name);
-
- dict = dict_new ();
- if (!dict)
- gf_log (this->name, GF_LOG_ERROR, "Out of memory");
-
- GF_ASSERT (!uuid_is_null (stbuf->ia_gfid));
- ret = afr_set_dict_gfid (dict, stbuf->ia_gfid);
- if (ret)
- gf_log (this->name, GF_LOG_INFO, "%s: gfid set failed",
- impunge_local->loc.path);
-
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_newfile_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->mknod,
- &impunge_local->loc,
- st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type),
- makedev (ia_major (stbuf->ia_rdev),
- ia_minor (stbuf->ia_rdev)), dict);
-
- if (dict)
- dict_unref (dict);
-
- return 0;
-}
+ ret = afr_selfheal_entry_dirent (iter_frame, this, fd,
+ source, sources,
+ healed_sinks,
+ entry->d_name);
+ AFR_STACK_RESET (iter_frame);
+ if (ret)
+ break;
+ }
+ gf_dirent_free (&entries);
+ if (ret)
+ break;
+ }
-int
-afr_sh_entry_impunge_mkdir (call_frame_t *impunge_frame, xlator_t *this,
- int child_index, struct iatt *stbuf)
-{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- dict_t *dict = NULL;
-
- int ret = 0;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
-
- dict = dict_new ();
- if (!dict) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- return 0;
- }
-
- GF_ASSERT (!uuid_is_null (stbuf->ia_gfid));
- ret = afr_set_dict_gfid (dict, stbuf->ia_gfid);
- if (ret)
- gf_log (this->name, GF_LOG_INFO, "%s: gfid set failed",
- impunge_local->loc.path);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "creating missing directory %s on %s",
- impunge_local->loc.path,
- priv->children[child_index]->name);
-
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_newfile_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->mkdir,
- &impunge_local->loc,
- st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type),
- dict);
-
- if (dict)
- dict_unref (dict);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_impunge_symlink (call_frame_t *impunge_frame, xlator_t *this,
- int child_index, const char *linkname)
-{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- dict_t *dict = NULL;
- struct iatt *buf = NULL;
- int ret = 0;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
-
- buf = &impunge_local->cont.symlink.buf;
-
- dict = dict_new ();
- if (!dict) {
- afr_sh_entry_call_impunge_done (impunge_frame, this,
- -1, ENOMEM);
- goto out;
- }
-
- GF_ASSERT (!uuid_is_null (buf->ia_gfid));
- ret = afr_set_dict_gfid (dict, buf->ia_gfid);
- if (ret)
- gf_log (this->name, GF_LOG_INFO,
- "%s: dict set gfid failed",
- impunge_local->loc.path);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "creating missing symlink %s -> %s on %s",
- impunge_local->loc.path, linkname,
- priv->children[child_index]->name);
-
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_newfile_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->symlink,
- linkname, &impunge_local->loc, dict);
-
- if (dict)
- dict_unref (dict);
-out:
- return 0;
+ AFR_STACK_DESTROY (iter_frame);
+ return ret;
}
-
-int
-afr_sh_entry_impunge_symlink_unlink_cbk (call_frame_t *impunge_frame,
- void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int child_index = -1;
- int call_count = -1;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
-
- child_index = (long) cookie;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_INFO,
- "unlink of %s on %s failed (%s)",
- impunge_local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- goto out;
- }
-
- afr_sh_entry_impunge_symlink (impunge_frame, this, child_index,
- impunge_sh->linkname);
-
- return 0;
-out:
- LOCK (&impunge_frame->lock);
- {
- call_count = --impunge_local->call_count;
- }
- UNLOCK (&impunge_frame->lock);
-
- if (call_count == 0)
- afr_sh_entry_call_impunge_done (impunge_frame, this,
- op_ret, op_errno);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_impunge_symlink_unlink (call_frame_t *impunge_frame, xlator_t *this,
- int child_index)
+static int
+afr_selfheal_entry_do (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int source, unsigned char *sources,
+ unsigned char *healed_sinks,
+ struct afr_reply *locked_replies)
{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
+ int i = 0;
+ afr_private_t *priv = NULL;
+ int ret = 0;
- gf_log (this->name, GF_LOG_DEBUG,
- "unlinking symlink %s with wrong target on %s",
- impunge_local->loc.path,
- priv->children[child_index]->name);
+ priv = this->private;
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_symlink_unlink_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->unlink,
- &impunge_local->loc);
+ gf_log (this->name, GF_LOG_INFO, "performing entry selfheal on %s",
+ uuid_utoa (fd->inode->gfid));
- return 0;
+ for (i = 0; i < priv->child_count; i++) {
+ if (i != source && !healed_sinks[i])
+ continue;
+ ret = afr_selfheal_entry_do_subvol (frame, this, fd, i, source,
+ sources, healed_sinks);
+ if (ret)
+ break;
+ }
+ return ret;
}
-int
-afr_sh_entry_impunge_readlink_sink_cbk (call_frame_t *impunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- const char *linkname, struct iatt *sbuf)
+static int
+__afr_selfheal_entry_finalize_source (xlator_t *this, unsigned char *sources,
+ unsigned char *sinks,
+ unsigned char *locked_on,
+ struct afr_reply *replies)
{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int child_index = -1;
- int call_count = -1;
- int active_src = -1;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- active_src = impunge_sh->active_source;
-
- child_index = (long) cookie;
-
- if ((op_ret == -1) && (op_errno != ENOENT)) {
- gf_log (this->name, GF_LOG_INFO,
- "readlink of %s on %s failed (%s)",
- impunge_local->loc.path,
- priv->children[active_src]->name,
- strerror (op_errno));
- goto out;
- }
-
- /* symlink doesn't exist on the sink */
-
- if ((op_ret == -1) && (op_errno == ENOENT)) {
- afr_sh_entry_impunge_symlink (impunge_frame, this,
- child_index, impunge_sh->linkname);
- return 0;
- }
-
-
- /* symlink exists on the sink, so check if targets match */
-
- if (strcmp (linkname, impunge_sh->linkname) == 0) {
- /* targets match, nothing to do */
-
- goto out;
- } else {
- /*
- * Hah! Sneaky wolf in sheep's clothing!
- */
- afr_sh_entry_impunge_symlink_unlink (impunge_frame, this,
- child_index);
- return 0;
- }
+ int i = 0;
+ afr_private_t *priv = NULL;
+ int source = -1;
+ int locked_count = 0;
+ int sources_count = 0;
+ int sinks_count = 0;
-out:
- LOCK (&impunge_frame->lock);
- {
- call_count = --impunge_local->call_count;
- }
- UNLOCK (&impunge_frame->lock);
+ priv = this->private;
- if (call_count == 0)
- afr_sh_entry_call_impunge_done (impunge_frame, this,
- op_ret, op_errno);
+ locked_count = AFR_COUNT (locked_on, priv->child_count);
+ sources_count = AFR_COUNT (sources, priv->child_count);
+ sinks_count = AFR_COUNT (sinks, priv->child_count);
- return 0;
-}
+ if (locked_count == sinks_count || !sources_count) {
+ return -1;
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i]) {
+ source = i;
+ break;
+ }
+ }
-int
-afr_sh_entry_impunge_readlink_sink (call_frame_t *impunge_frame, xlator_t *this,
- int child_index)
-{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "checking symlink target of %s on %s",
- impunge_local->loc.path, priv->children[child_index]->name);
-
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_readlink_sink_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->readlink,
- &impunge_local->loc, 4096);
-
- return 0;
+ return source;
}
-int
-afr_sh_entry_impunge_readlink_cbk (call_frame_t *impunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- const char *linkname, struct iatt *sbuf)
+static int
+__afr_selfheal_entry_prepare (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ unsigned char *locked_on, unsigned char *sources,
+ unsigned char *sinks, unsigned char *healed_sinks,
+ struct afr_reply *replies, int *source_p)
{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int child_index = -1;
- int call_count = -1;
- int active_src = -1;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- active_src = impunge_sh->active_source;
-
- child_index = (long) cookie;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_INFO,
- "readlink of %s on %s failed (%s)",
- impunge_local->loc.path,
- priv->children[active_src]->name,
- strerror (op_errno));
- goto out;
- }
-
- impunge_sh->linkname = gf_strdup (linkname);
- afr_sh_entry_impunge_readlink_sink (impunge_frame, this, child_index);
-
- return 0;
-
-out:
- LOCK (&impunge_frame->lock);
- {
- call_count = --impunge_local->call_count;
- }
- UNLOCK (&impunge_frame->lock);
+ int ret = -1;
+ int source = -1;
+ afr_private_t *priv = NULL;
+ int i = 0;
- if (call_count == 0)
- afr_sh_entry_call_impunge_done (impunge_frame, this,
- op_ret, op_errno);
+ priv = this->private;
- return 0;
-}
-
-
-int
-afr_sh_entry_impunge_readlink (call_frame_t *impunge_frame, xlator_t *this,
- int child_index, struct iatt *stbuf)
-{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int active_src = -1;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- active_src = impunge_sh->active_source;
- impunge_local->cont.symlink.buf = *stbuf;
-
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_readlink_cbk,
- (void *) (long) child_index,
- priv->children[active_src],
- priv->children[active_src]->fops->readlink,
- &impunge_local->loc, 4096);
-
- return 0;
-}
-
-int
-afr_sh_entry_impunge_create (call_frame_t *impunge_frame, xlator_t *this,
- int child_index, struct iatt *buf,
- struct iatt *postparent)
-{
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- afr_private_t *priv = NULL;
- ia_type_t type = IA_INVAL;
- int ret = 0;
- int active_src = 0;
-
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- impunge_sh->parentbuf = *postparent;
- active_src = impunge_sh->active_source;
- impunge_local->cont.lookup.buf = *buf;
- afr_update_loc_gfids (&impunge_local->loc, buf, postparent);
-
- type = buf->ia_type;
-
- switch (type) {
- case IA_IFSOCK:
- case IA_IFREG:
- case IA_IFBLK:
- case IA_IFCHR:
- case IA_IFIFO:
- afr_sh_entry_impunge_mknod (impunge_frame, this,
- child_index, buf);
- break;
- case IA_IFLNK:
- afr_sh_entry_impunge_readlink (impunge_frame, this,
- child_index, buf);
- break;
- case IA_IFDIR:
- afr_sh_entry_impunge_mkdir (impunge_frame, this,
- child_index, buf);
- break;
- default:
- gf_log (this->name, GF_LOG_ERROR,
- "%s has unknown file type on %s: 0%o",
- impunge_local->loc.path,
- priv->children[active_src]->name, type);
- ret = -1;
- break;
- }
-
- return ret;
-}
+ ret = afr_selfheal_unlocked_discover (frame, fd->inode, fd->inode->gfid,
+ replies);
+ if (ret)
+ return ret;
-gf_boolean_t
-afr_sh_need_recreate (afr_self_heal_t *impunge_sh, int *sources,
- unsigned int child, unsigned int child_count)
-{
- int32_t *success_children = NULL;
- gf_boolean_t recreate = _gf_false;
-
- GF_ASSERT (impunge_sh->impunging_entry_mode);
- GF_ASSERT (impunge_sh->child_errno);
- GF_ASSERT (sources);
-
- success_children = impunge_sh->success_children;
- if (sources[child] || (child == impunge_sh->active_source)) {
- GF_ASSERT (afr_is_child_present (success_children,
- child_count, child));
- goto out;
- }
-
- if (IA_ISLNK (impunge_sh->impunging_entry_mode)) {
- recreate = _gf_true;
- goto out;
- }
-
- if (impunge_sh->child_errno[child] == ENOENT)
- recreate = _gf_true;
-out:
- return recreate;
-}
+ ret = afr_selfheal_find_direction (frame, this, replies,
+ AFR_ENTRY_TRANSACTION,
+ locked_on, sources, sinks);
+ if (ret)
+ return ret;
-unsigned int
-afr_sh_recreate_count (afr_self_heal_t *impunge_sh, int *sources,
- unsigned int child_count)
-{
- int count = 0;
- int i = 0;
+ source = __afr_selfheal_entry_finalize_source (this, sources, sinks,
+ locked_on, replies);
+ if (source < 0) {
+ /* If source is < 0 (typically split-brain), we perform a
+ conservative merge of entries rather than erroring out */
+ }
+ *source_p = source;
- for (i = 0; i < child_count; i++) {
- if (afr_sh_need_recreate (impunge_sh, sources, i, child_count))
- count++;
- }
+ for (i = 0; i < priv->child_count; i++)
+ /* Initialize the healed_sinks[] array optimistically to
+ the intersection of to-be-healed (i.e sinks[]) and
+ the list of servers which are up (i.e locked_on[]).
- return count;
-}
+ As we encounter failures in the healing process, we
+ will unmark the respective servers in the healed_sinks[]
+ array.
+ */
+ healed_sinks[i] = sinks[i] && locked_on[i];
-int
-afr_sh_entry_call_impunge_recreate (call_frame_t *impunge_frame,
- xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- call_frame_t *frame = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- struct iatt *buf = NULL;
- struct iatt *postparent = NULL;
- unsigned int recreate_count = 0;
- int i = 0;
- int active_src = 0;
-
- priv = this->private;
- AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
- frame, local, sh);
- active_src = impunge_sh->active_source;
- buf = &impunge_sh->buf[active_src];
- postparent = &impunge_sh->parentbufs[active_src];
-
- recreate_count = afr_sh_recreate_count (impunge_sh, sh->sources,
- priv->child_count);
- GF_ASSERT (recreate_count);
- impunge_local->call_count = recreate_count;
- for (i = 0; i < priv->child_count; i++) {
- if (afr_sh_need_recreate (impunge_sh, sh->sources, i,
- priv->child_count)) {
- (void)afr_sh_entry_impunge_create (impunge_frame, this,
- i, buf,
- postparent);
- recreate_count--;
- }
- }
- GF_ASSERT (!recreate_count);
- return 0;
+ return ret;
}
-void
-afr_sh_entry_common_lookup_done (call_frame_t *impunge_frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- call_frame_t *frame = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- unsigned int recreate_count = 0;
- unsigned int gfid_miss_count = 0;
- unsigned int children_up_count = 0;
- uuid_t gfid = {0};
- int active_src = 0;
-
- priv = this->private;
- AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
- frame, local, sh);
- active_src = impunge_sh->active_source;
-
- if (op_ret < 0)
- goto done;
- if (impunge_sh->child_errno[active_src]) {
- op_ret = -1;
- op_errno = impunge_sh->child_errno[active_src];
- goto done;
- }
-
- gfid_miss_count = afr_gfid_missing_count (this->name,
- impunge_sh->success_children,
- impunge_sh->buf, priv->child_count,
- impunge_local->loc.path);
- children_up_count = afr_up_children_count (impunge_local->child_up,
- priv->child_count);
- if ((gfid_miss_count == children_up_count) &&
- (children_up_count < priv->child_count)) {
- op_ret = -1;
- op_errno = ENODATA;
- gf_log (this->name, GF_LOG_ERROR, "Not all children are up, "
- "gfid should not be assigned in this state for %s",
- impunge_local->loc.path);
- goto done;
- }
-
- if (gfid_miss_count) {
- afr_update_gfid_from_iatts (gfid, impunge_sh->buf,
- impunge_sh->success_children,
- priv->child_count);
- if (uuid_is_null (gfid))
- uuid_generate (gfid);
- afr_sh_common_lookup (impunge_frame, this, &impunge_local->loc,
- afr_sh_entry_common_lookup_done, gfid,
- AFR_LOOKUP_FAIL_CONFLICTS |
- AFR_LOOKUP_FAIL_MISSING_GFIDS);
- } else {
- recreate_count = afr_sh_recreate_count (impunge_sh, sh->sources,
- priv->child_count);
- if (!recreate_count) {
- op_ret = 0;
- op_errno = 0;
- goto done;
- }
- afr_sh_entry_call_impunge_recreate (impunge_frame, this);
- }
- return;
-done:
- afr_sh_entry_call_impunge_done (impunge_frame, this,
- op_ret, op_errno);
- return;
-}
-int
-afr_sh_entry_impunge_entry (call_frame_t *frame, xlator_t *this,
- gf_dirent_t *entry)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int ret = -1;
- call_frame_t *impunge_frame = NULL;
- afr_local_t *impunge_local = NULL;
- int active_src = 0;
- int op_errno = 0;
- int op_ret = -1;
- mode_t entry_mode = 0;
-
- local = frame->local;
- sh = &local->self_heal;
-
- active_src = sh->active_source;
- sh->impunge_done = afr_sh_entry_impunge_entry_done;
-
- if ((strcmp (entry->d_name, ".") == 0)
- || (strcmp (entry->d_name, "..") == 0)
- || ((strcmp (local->loc.path, "/") == 0)
- && (strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR) == 0))) {
-
- gf_log (this->name, GF_LOG_TRACE,
- "skipping inspection of %s under %s",
- entry->d_name, local->loc.path);
- op_ret = 0;
- goto out;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "inspecting existance of %s under %s",
- entry->d_name, local->loc.path);
-
- entry_mode = st_mode_from_ia (entry->d_stat.ia_prot,
- entry->d_stat.ia_type);
- ret = afr_impunge_frame_create (frame, this, active_src, active_src,
- entry_mode, &impunge_frame);
- if (ret) {
- op_errno = -ret;
- goto out;
- }
-
- impunge_local = impunge_frame->local;
- ret = afr_build_child_loc (this, &impunge_local->loc, &local->loc,
- entry->d_name);
- if (ret != 0) {
- op_errno = ENOMEM;
- goto out;
- }
-
- afr_sh_common_lookup (impunge_frame, this, &impunge_local->loc,
- afr_sh_entry_common_lookup_done, NULL,
- AFR_LOOKUP_FAIL_CONFLICTS);
-
- op_ret = 0;
+static int
+__afr_selfheal_entry (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ unsigned char *locked_on)
+{
+ afr_private_t *priv = NULL;
+ int ret = -1;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *data_lock = NULL;
+ unsigned char *healed_sinks = NULL;
+ struct afr_reply *locked_replies = NULL;
+ int source = -1;
+
+ priv = this->private;
+
+ sources = alloca0 (priv->child_count);
+ sinks = alloca0 (priv->child_count);
+ healed_sinks = alloca0 (priv->child_count);
+ data_lock = alloca0 (priv->child_count);
+
+ locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count);
+
+ ret = afr_selfheal_entrylk (frame, this, fd->inode, this->name, NULL,
+ data_lock);
+ {
+ if (ret < 2) {
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+
+ ret = __afr_selfheal_entry_prepare (frame, this, fd, data_lock,
+ sources, sinks, healed_sinks,
+ locked_replies, &source);
+ }
+unlock:
+ afr_selfheal_unentrylk (frame, this, fd->inode, this->name, NULL,
+ data_lock);
+ if (ret < 0)
+ goto out;
+
+ ret = afr_selfheal_entry_do (frame, this, fd, source, sources,
+ healed_sinks, locked_replies);
+ if (ret)
+ goto out;
+
+ ret = afr_selfheal_undo_pending (frame, this, fd->inode, sources, sinks,
+ healed_sinks, AFR_ENTRY_TRANSACTION,
+ locked_replies, data_lock);
out:
- if (ret) {
- if (impunge_frame)
- AFR_STACK_DESTROY (impunge_frame);
- sh->impunge_done (frame, this, active_src, op_ret, op_errno);
- }
-
- return 0;
+ return ret;
}
-int
-afr_sh_entry_impunge_readdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries)
+static fd_t *
+afr_selfheal_data_opendir (xlator_t *this, inode_t *inode)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- gf_dirent_t *entry = NULL;
- off_t last_offset = 0;
- int active_src = 0;
- int entry_count = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- active_src = sh->active_source;
-
- if (op_ret <= 0) {
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "readdir of %s on subvolume %s failed (%s)",
- local->loc.path,
- priv->children[active_src]->name,
- strerror (op_errno));
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "readdir of %s on subvolume %s complete",
- local->loc.path,
- priv->children[active_src]->name);
- }
-
- afr_sh_entry_impunge_all (frame, this);
- return 0;
- }
-
- list_for_each_entry (entry, &entries->list, list) {
- last_offset = entry->d_off;
- entry_count++;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "readdir'ed %d entries from %s",
- entry_count, priv->children[active_src]->name);
-
- sh->offset = last_offset;
- local->call_count = entry_count;
-
- list_for_each_entry (entry, &entries->list, list) {
- afr_sh_entry_impunge_entry (frame, this, entry);
- }
-
- return 0;
-}
+ loc_t loc = {0,};
+ int ret = 0;
+ fd_t *fd = NULL;
+ fd = fd_create (inode, 0);
+ if (!fd)
+ return NULL;
-int
-afr_sh_entry_impunge_subvol (call_frame_t *frame, xlator_t *this,
- int active_src)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
+ loc.inode = inode_ref (inode);
+ uuid_copy (loc.gfid, inode->gfid);
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
+ ret = syncop_opendir (this, &loc, fd);
+ if (ret) {
+ fd_unref (fd);
+ fd = NULL;
+ } else {
+ fd_bind (fd);
+ }
- STACK_WIND (frame, afr_sh_entry_impunge_readdir_cbk,
- priv->children[active_src],
- priv->children[active_src]->fops->readdirp,
- sh->healing_fd, sh->block_size, sh->offset);
+ loc_wipe (&loc);
- return 0;
+ return fd;
}
-int
-afr_sh_entry_impunge_all (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int active_src = -1;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- sh->offset = 0;
-
- active_src = next_active_source (frame, this, sh->active_source);
- sh->active_source = active_src;
-
- if (sh->op_failed) {
- afr_sh_entry_finish (frame, this);
- return 0;
- }
-
- if (active_src == -1) {
- /* completed creating missing files on all subvolumes */
- afr_sh_entry_erase_pending (frame, this);
- return 0;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "impunging entries of %s on %s to other sinks",
- local->loc.path, priv->children[active_src]->name);
-
- afr_sh_entry_impunge_subvol (frame, this, active_src);
-
- return 0;
-}
-
int
-afr_sh_entry_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+afr_selfheal_entry (call_frame_t *frame, xlator_t *this, inode_t *inode)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int child_index = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- child_index = (long) cookie;
-
- /* TODO: some of the open's might fail.
- In that case, modify cleanup fn to send flush on those
- fd's which are already open */
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "opendir of %s failed on child %s (%s)",
- local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- sh->op_failed = 1;
- }
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if (sh->op_failed) {
- afr_sh_entry_finish (frame, this);
- return 0;
- }
- gf_log (this->name, GF_LOG_TRACE,
- "fd for %s opened, commencing sync",
- local->loc.path);
-
- sh->active_source = -1;
- afr_sh_entry_expunge_all (frame, this);
- }
-
- return 0;
-}
+ afr_private_t *priv = NULL;
+ unsigned char *locked_on = NULL;
+ fd_t *fd = NULL;
+ int ret = 0;
+ priv = this->private;
-int
-afr_sh_entry_open (call_frame_t *frame, xlator_t *this)
-{
- int i = 0;
- int call_count = 0;
+ fd = afr_selfheal_data_opendir (this, inode);
+ if (!fd)
+ return -EIO;
- int source = -1;
- int *sources = NULL;
-
- fd_t *fd = NULL;
-
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- source = local->self_heal.source;
- sources = local->self_heal.sources;
-
- sh->block_size = 65536; //131072
- sh->offset = 0;
-
- call_count = sh->active_sinks;
- if (source != -1)
- call_count++;
-
- local->call_count = call_count;
-
- fd = fd_create (local->loc.inode, frame->root->pid);
- sh->healing_fd = fd;
-
- if (source != -1) {
- gf_log (this->name, GF_LOG_TRACE,
- "opening directory %s on subvolume %s (source)",
- local->loc.path, priv->children[source]->name);
-
- /* open source */
- STACK_WIND_COOKIE (frame, afr_sh_entry_opendir_cbk,
- (void *) (long) source,
- priv->children[source],
- priv->children[source]->fops->opendir,
- &local->loc, fd);
- call_count--;
- }
-
- /* open sinks */
- for (i = 0; i < priv->child_count; i++) {
- if (sources[i] || !local->child_up[i])
- continue;
-
- gf_log (this->name, GF_LOG_TRACE,
- "opening directory %s on subvolume %s (sink)",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_entry_opendir_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->opendir,
- &local->loc, fd);
-
- if (!--call_count)
- break;
- }
-
- return 0;
-}
-
-
-int
-afr_sh_entry_sync_prepare (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int source = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- source = sh->source;
-
- afr_sh_mark_source_sinks (frame, this);
- if (source != -1)
- sh->success[source] = 1;
-
- if (sh->active_sinks == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "no active sinks for self-heal on dir %s",
- local->loc.path);
- afr_sh_entry_finish (frame, this);
- return 0;
- }
- if (source == -1 && sh->active_sinks < 2) {
- gf_log (this->name, GF_LOG_TRACE,
- "cannot sync with 0 sources and 1 sink on dir %s",
- local->loc.path);
- afr_sh_entry_finish (frame, this);
- return 0;
- }
-
- if (source != -1)
- gf_log (this->name, GF_LOG_DEBUG,
- "self-healing directory %s from subvolume %s to "
- "%d other",
- local->loc.path, priv->children[source]->name,
- sh->active_sinks);
- else
- gf_log (this->name, GF_LOG_DEBUG,
- "no active sources for %s found. "
- "merging all entries as a conservative decision",
- local->loc.path);
-
- afr_sh_entry_open (frame, this);
-
- return 0;
-}
-
-
-void
-afr_sh_entry_fix (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int source = 0;
-
- int nsources = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- if (op_ret < 0) {
- sh->op_failed = 1;
- afr_sh_set_error (sh, op_errno);
- afr_sh_entry_finish (frame, this);
- goto out;
- }
-
- if (sh->forced_merge) {
- sh->source = -1;
- goto heal;
- }
-
- nsources = afr_build_sources (this, sh->xattr, sh->buf,
- sh->pending_matrix, sh->sources,
- sh->success_children,
- AFR_ENTRY_TRANSACTION);
- if (nsources == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "No self-heal needed for %s",
- local->loc.path);
-
- afr_sh_entry_finish (frame, this);
- return;
- }
-
- source = afr_sh_select_source (sh->sources, priv->child_count);
-
- sh->source = source;
-
- afr_reset_children (sh->fresh_children, priv->child_count);
- afr_get_fresh_children (sh->success_children, sh->sources,
- sh->fresh_children, priv->child_count);
- if (sh->source >= 0)
- afr_inode_set_read_ctx (this, sh->inode, sh->source,
- sh->fresh_children);
-
-heal:
- afr_sh_entry_sync_prepare (frame, this);
-out:
- return;
-}
-
-int
-afr_sh_post_nonblocking_entry_cbk (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- sh = &local->self_heal;
-
- if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_ERROR, "Non Blocking entrylks "
- "failed for %s.", local->loc.path);
- sh->op_failed = 1;
- afr_sh_entry_done (frame, this);
- } else {
-
- gf_log (this->name, GF_LOG_DEBUG, "Non Blocking entrylks done "
- "for %s. Proceeding to FOP", local->loc.path);
- afr_sh_common_lookup (frame, this, &local->loc,
- afr_sh_entry_fix, NULL,
- AFR_LOOKUP_FAIL_CONFLICTS |
- AFR_LOOKUP_FAIL_MISSING_GFIDS);
- }
-
- return 0;
-}
-
-int
-afr_self_heal_entry (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
+ locked_on = alloca0 (priv->child_count);
+ ret = afr_selfheal_tryentrylk (frame, this, inode, priv->sh_domain, NULL,
+ locked_on);
+ {
+ if (ret < 2) {
+ /* Either less than two subvols available, or another
+ selfheal (from another server) is in progress. Skip
+ for now in any case there isn't anything to do.
+ */
+ ret = -ENOTCONN;
+ goto unlock;
+ }
- priv = this->private;
- local = frame->local;
+ ret = __afr_selfheal_entry (frame, this, fd, locked_on);
+ }
+unlock:
+ afr_selfheal_unentrylk (frame, this, inode, priv->sh_domain, NULL, locked_on);
- if (local->self_heal.do_entry_self_heal && priv->entry_self_heal) {
- afr_sh_entrylk (frame, this, &local->loc, NULL,
- afr_sh_post_nonblocking_entry_cbk);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "proceeding to completion on %s",
- local->loc.path);
- afr_sh_entry_done (frame, this);
- }
+ if (fd)
+ fd_unref (fd);
- return 0;
+ return ret;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
index efc841261..b31a33237 100644
--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
@@ -1,632 +1,281 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#include <libgen.h>
-#include <unistd.h>
-#include <fnmatch.h>
-#include <sys/time.h>
-#include <stdlib.h>
-#include <signal.h>
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
#endif
-#include "glusterfs.h"
#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-#include "byte-order.h"
-
-#include "afr-transaction.h"
#include "afr-self-heal.h"
-#include "afr-self-heal-common.h"
-
-
-int
-afr_sh_metadata_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
-// memset (sh->child_errno, 0, sizeof (int) * priv->child_count);
- memset (sh->buf, 0, sizeof (struct iatt) * priv->child_count);
- memset (sh->success, 0, sizeof (*sh->success) * priv->child_count);
-
- afr_reset_xattr (sh->xattr, priv->child_count);
- if (local->govinda_gOvinda) {
- gf_log (this->name, GF_LOG_INFO,
- "split-brain detected, aborting selfheal of %s",
- local->loc.path);
- sh->op_failed = 1;
- sh->completion_cbk (frame, this);
- } else {
- if (IA_ISREG (sh->type)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "proceeding to data check on %s",
- local->loc.path);
- afr_self_heal_data (frame, this);
- return 0;
- }
-
- if (IA_ISDIR (sh->type)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "proceeding to entry check on %s",
- local->loc.path);
- afr_self_heal_entry (frame, this);
- return 0;
- }
- sh->completion_cbk (frame, this);
- }
-
- return 0;
-}
-
-
-int
-afr_sh_metadata_unlck_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- int call_count = 0;
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- afr_sh_metadata_done (frame, this);
-
- return 0;
-}
-
-int
-afr_sh_inode_unlock (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- int_lock->lock_cbk = afr_sh_metadata_done;
- afr_unlock (frame, this);
-
- return 0;
-}
-
-int
-afr_sh_metadata_finish (call_frame_t *frame, xlator_t *this)
-{
- afr_sh_inode_unlock (frame, this);
-
- return 0;
-}
-
-
-int
-afr_sh_metadata_erase_pending_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xattr)
-{
- afr_local_t *local = NULL;
- int call_count = 0;
- long i = 0;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
-
- local = frame->local;
- priv = this->private;
- sh = &local->self_heal;
- i = (long)cookie;
-
- if ((!IA_ISREG (sh->buf[sh->source].ia_type)) &&
- (!IA_ISDIR (sh->buf[sh->source].ia_type))) {
- afr_children_add_child (sh->fresh_children, i,
- priv->child_count);
- }
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if ((!IA_ISREG (sh->buf[sh->source].ia_type)) &&
- (!IA_ISDIR (sh->buf[sh->source].ia_type))) {
- afr_inode_set_read_ctx (this, sh->inode, sh->source,
- sh->fresh_children);
- }
- afr_sh_metadata_finish (frame, this);
- }
-
- return 0;
-}
-
-
-int
-afr_sh_metadata_erase_pending (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int i = 0;
- dict_t **erase_xattr = NULL;
-
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- afr_sh_pending_to_delta (priv, sh->xattr, sh->delta_matrix,
- sh->success, priv->child_count,
- AFR_METADATA_TRANSACTION);
-
- erase_xattr = GF_CALLOC (sizeof (*erase_xattr), priv->child_count,
- gf_afr_mt_dict_t);
- if (!erase_xattr)
- return -ENOMEM;
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i]) {
- call_count++;
-
- erase_xattr[i] = get_new_dict();
- dict_ref (erase_xattr[i]);
- }
- }
-
- afr_sh_delta_to_xattr (priv, sh->delta_matrix, erase_xattr,
- priv->child_count, AFR_METADATA_TRANSACTION);
-
- local->call_count = call_count;
-
- if (call_count == 0) {
- gf_log (this->name, GF_LOG_INFO,
- "metadata of %s not healed on any subvolume",
- local->loc.path);
-
- afr_sh_metadata_finish (frame, this);
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (!erase_xattr[i])
- continue;
-
- gf_log (this->name, GF_LOG_TRACE,
- "erasing pending flags from %s on %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_metadata_erase_pending_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->loc,
- GF_XATTROP_ADD_ARRAY, erase_xattr[i]);
- if (!--call_count)
- break;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (erase_xattr[i]) {
- dict_unref (erase_xattr[i]);
- }
- }
- GF_FREE (erase_xattr);
-
- return 0;
-}
-
-
-int
-afr_sh_metadata_sync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int child_index = 0;
-
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_INFO,
- "setting attributes failed for %s on %s (%s)",
- local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
-
- sh->success[child_index] = 0;
- }
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- afr_sh_metadata_erase_pending (frame, this);
-
- return 0;
-}
-
-
-int
-afr_sh_metadata_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
-{
- afr_sh_metadata_sync_cbk (frame, cookie, this, op_ret, op_errno);
-
- return 0;
-}
-
-
-int
-afr_sh_metadata_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_sh_metadata_sync_cbk (frame, cookie, this, op_ret, op_errno);
+#include "byte-order.h"
- return 0;
-}
+#define AFR_HEAL_ATTR (GF_SET_ATTR_UID|GF_SET_ATTR_GID|GF_SET_ATTR_MODE)
int
-afr_sh_metadata_sync (call_frame_t *frame, xlator_t *this, dict_t *xattr)
+afr_selfheal_metadata_do (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ int source, unsigned char *healed_sinks,
+ struct afr_reply *locked_replies)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int source = 0;
- int active_sinks = 0;
- int call_count = 0;
- int i = 0;
-
- struct iatt stbuf = {0,};
- int32_t valid = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- source = sh->source;
- active_sinks = sh->active_sinks;
-
- /*
- * 2 calls per sink - setattr, setxattr
- */
- if (xattr)
- call_count = active_sinks * 2;
- else
- call_count = active_sinks;
-
- local->call_count = call_count;
-
- stbuf.ia_atime = sh->buf[source].ia_atime;
- stbuf.ia_atime_nsec = sh->buf[source].ia_atime_nsec;
- stbuf.ia_mtime = sh->buf[source].ia_mtime;
- stbuf.ia_mtime_nsec = sh->buf[source].ia_mtime_nsec;
-
- stbuf.ia_uid = sh->buf[source].ia_uid;
- stbuf.ia_gid = sh->buf[source].ia_gid;
-
- stbuf.ia_type = sh->buf[source].ia_type;
- stbuf.ia_prot = sh->buf[source].ia_prot;
-
- valid = GF_SET_ATTR_MODE |
- GF_SET_ATTR_UID | GF_SET_ATTR_GID |
- GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
-
- for (i = 0; i < priv->child_count; i++) {
- if (call_count == 0) {
- break;
- }
- if (sh->sources[i] || !local->child_up[i])
- continue;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "self-healing metadata of %s from %s to %s",
- local->loc.path, priv->children[source]->name,
- priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_metadata_setattr_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->setattr,
- &local->loc, &stbuf, valid);
-
- call_count--;
-
- if (!xattr)
- continue;
-
- STACK_WIND_COOKIE (frame, afr_sh_metadata_xattr_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->setxattr,
- &local->loc, xattr, 0);
- call_count--;
- }
-
- return 0;
+ int ret = -1;
+ loc_t loc = {0,};
+ dict_t *xattr = NULL;
+ dict_t *old_xattr = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ priv = this->private;
+
+ loc.inode = inode_ref (inode);
+ uuid_copy (loc.gfid, inode->gfid);
+
+ gf_log (this->name, GF_LOG_INFO, "performing metadata selfheal on %s",
+ uuid_utoa (inode->gfid));
+
+ ret = syncop_getxattr (priv->children[source], &loc, &xattr, NULL);
+ if (ret < 0) {
+ loc_wipe (&loc);
+ return -EIO;
+ }
+
+ afr_filter_xattrs (xattr);
+ dict_del (xattr, GF_SELINUX_XATTR_KEY);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!healed_sinks[i])
+ continue;
+
+ ret = syncop_setattr (priv->children[i], &loc,
+ &locked_replies[source].poststat,
+ AFR_HEAL_ATTR, NULL, NULL);
+ if (ret)
+ healed_sinks[i] = 0;
+
+ old_xattr = NULL;
+ ret = syncop_getxattr (priv->children[i], &loc, &old_xattr, 0);
+ if (old_xattr) {
+ dict_del (old_xattr, GF_SELINUX_XATTR_KEY);
+ afr_filter_xattrs (old_xattr);
+ ret = syncop_removexattr (priv->children[i], &loc, "",
+ old_xattr);
+ }
+
+ ret = syncop_setxattr (priv->children[i], &loc, xattr, 0);
+ if (ret)
+ healed_sinks[i] = 0;
+ }
+
+ loc_wipe (&loc);
+ if (xattr)
+ dict_unref (xattr);
+
+ return 0;
}
-int
-afr_sh_metadata_getxattr_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr)
+/*
+ * Look for mismatching uid/gid or mode even if xattrs don't say so, and
+ * pick one arbitrarily as winner.
+ */
+
+static int
+__afr_selfheal_metadata_finalize_source (xlator_t *this, unsigned char *sources,
+ unsigned char *sinks,
+ unsigned char *locked_on,
+ struct afr_reply *replies)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int source = 0;
-
- int i;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- source = sh->source;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "getxattr of %s failed on subvolume %s (%s). proceeding without xattr",
- local->loc.path, priv->children[source]->name,
- strerror (op_errno));
-
- afr_sh_metadata_sync (frame, this, NULL);
- } else {
- for (i = 0; i < priv->child_count; i++) {
- dict_del (xattr, priv->pending_key[i]);
- }
-
- afr_sh_metadata_sync (frame, this, xattr);
- }
-
- return 0;
+ int i = 0;
+ afr_private_t *priv = NULL;
+ struct iatt first = {0, };
+ int source = -1;
+ int locked_count = 0;
+ int sources_count = 0;
+ int sinks_count = 0;
+
+ priv = this->private;
+
+ locked_count = AFR_COUNT (locked_on, priv->child_count);
+ sources_count = AFR_COUNT (sources, priv->child_count);
+ sinks_count = AFR_COUNT (sinks, priv->child_count);
+
+ if (locked_count == sinks_count || !sources_count) {
+ if (!priv->metadata_splitbrain_forced_heal) {
+ return -EIO;
+ }
+ /* Metadata split brain, select one subvol
+ arbitrarily */
+ for (i = 0; i < priv->child_count; i++) {
+ if (locked_on[i] && sinks[i]) {
+ sources[i] = 1;
+ sinks[i] = 0;
+ break;
+ }
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ if (source == -1) {
+ source = i;
+ first = replies[i].poststat;
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ if (!IA_EQUAL (first, replies[i].poststat, type) ||
+ !IA_EQUAL (first, replies[i].poststat, uid) ||
+ !IA_EQUAL (first, replies[i].poststat, gid) ||
+ !IA_EQUAL (first, replies[i].poststat, prot)) {
+ sources[i] = 0;
+ sinks[i] = 1;
+ }
+ }
+
+ return source;
}
-int
-afr_sh_metadata_sync_prepare (call_frame_t *frame, xlator_t *this)
+static int
+__afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ unsigned char *locked_on, unsigned char *sources,
+ unsigned char *sinks, unsigned char *healed_sinks,
+ struct afr_reply *replies)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int source = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- source = sh->source;
-
- afr_sh_mark_source_sinks (frame, this);
- if (sh->active_sinks == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no active sinks for performing self-heal on file %s",
- local->loc.path);
- afr_sh_metadata_finish (frame, this);
- return 0;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "syncing metadata of %s from subvolume %s to %d active sinks",
- local->loc.path, priv->children[source]->name,
- sh->active_sinks);
-
- STACK_WIND (frame, afr_sh_metadata_getxattr_cbk,
- priv->children[source],
- priv->children[source]->fops->getxattr,
- &local->loc, NULL);
-
- return 0;
+ int ret = -1;
+ int source = -1;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ priv = this->private;
+
+ ret = afr_selfheal_unlocked_discover (frame, inode, inode->gfid,
+ replies);
+ if (ret)
+ return ret;
+
+ ret = afr_selfheal_find_direction (frame, this, replies,
+ AFR_METADATA_TRANSACTION,
+ locked_on, sources, sinks);
+ if (ret)
+ return ret;
+
+ source = __afr_selfheal_metadata_finalize_source (this, sources, sinks,
+ locked_on, replies);
+ if (source < 0)
+ return -EIO;
+
+ for (i = 0; i < priv->child_count; i++)
+ /* Initialize the healed_sinks[] array optimistically to
+ the intersection of to-be-healed (i.e sinks[]) and
+ the list of servers which are up (i.e locked_on[]).
+
+ As we encounter failures in the healing process, we
+ will unmark the respective servers in the healed_sinks[]
+ array.
+ */
+ healed_sinks[i] = sinks[i] && locked_on[i];
+
+ return source;
}
-void
-afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+static int
+__afr_selfheal_metadata (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ unsigned char *locked_on)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int nsources = 0;
- int source = 0;
- int i = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- if (op_ret < 0) {
- sh->op_failed = 1;
- afr_sh_set_error (sh, op_errno);
- afr_sh_metadata_finish (frame, this);
- goto out;
- }
- nsources = afr_build_sources (this, sh->xattr, sh->buf,
- sh->pending_matrix, sh->sources,
- sh->success_children,
- AFR_METADATA_TRANSACTION);
- if (nsources == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "No self-heal needed for %s",
- local->loc.path);
-
- afr_sh_metadata_finish (frame, this);
- goto out;
- }
-
- if ((nsources == -1)
- && (priv->favorite_child != -1)
- && (sh->child_errno[priv->favorite_child] == 0)) {
-
- gf_log (this->name, GF_LOG_WARNING,
- "Picking favorite child %s as authentic source to resolve conflicting metadata of %s",
- priv->children[priv->favorite_child]->name,
- local->loc.path);
-
- sh->sources[priv->favorite_child] = 1;
-
- nsources = afr_sh_source_count (sh->sources,
- priv->child_count);
- }
-
- if (nsources == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "Unable to self-heal permissions/ownership of '%s' "
- "(possible split-brain). Please fix the file on "
- "all backend volumes", local->loc.path);
-
- local->govinda_gOvinda = 1;
-
- afr_sh_metadata_finish (frame, this);
- goto out;
- }
-
- source = afr_sh_select_source (sh->sources, priv->child_count);
-
- if (source == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "No active sources found.");
-
- afr_sh_metadata_finish (frame, this);
- goto out;
- }
-
- sh->source = source;
-
- /* detect changes not visible through pending flags -- JIC */
- for (i = 0; i < priv->child_count; i++) {
- if (i == source || sh->child_errno[i])
- continue;
-
- if (PERMISSION_DIFFERS (&sh->buf[i], &sh->buf[source]))
- sh->sources[i] = 0;
-
- if (OWNERSHIP_DIFFERS (&sh->buf[i], &sh->buf[source]))
- sh->sources[i] = 0;
- }
-
- if ((!IA_ISREG (sh->buf[source].ia_type)) &&
- (!IA_ISDIR (sh->buf[source].ia_type))) {
- afr_reset_children (sh->fresh_children, priv->child_count);
- afr_get_fresh_children (sh->success_children, sh->sources,
- sh->fresh_children, priv->child_count);
- afr_inode_set_read_ctx (this, sh->inode, sh->source,
- sh->fresh_children);
- }
-
- afr_sh_metadata_sync_prepare (frame, this);
+ afr_private_t *priv = NULL;
+ int ret = -1;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *data_lock = NULL;
+ unsigned char *healed_sinks = NULL;
+ struct afr_reply *locked_replies = NULL;
+ int source = -1;
+
+ priv = this->private;
+
+ sources = alloca0 (priv->child_count);
+ sinks = alloca0 (priv->child_count);
+ healed_sinks = alloca0 (priv->child_count);
+ data_lock = alloca0 (priv->child_count);
+
+ locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count);
+
+ ret = afr_selfheal_inodelk (frame, this, inode, this->name,
+ LLONG_MAX - 1, 0, data_lock);
+ {
+ if (ret < 2) {
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+
+ ret = __afr_selfheal_metadata_prepare (frame, this, inode, data_lock,
+ sources, sinks, healed_sinks,
+ locked_replies);
+ if (ret < 0)
+ goto unlock;
+
+ source = ret;
+ ret = 0;
+ }
+unlock:
+ afr_selfheal_uninodelk (frame, this, inode, this->name,
+ LLONG_MAX -1, 0, data_lock);
+ if (ret < 0)
+ goto out;
+
+ ret = afr_selfheal_metadata_do (frame, this, inode, source, healed_sinks,
+ locked_replies);
+ if (ret)
+ goto out;
+
+ ret = afr_selfheal_undo_pending (frame, this, inode, sources, sinks,
+ healed_sinks, AFR_METADATA_TRANSACTION,
+ locked_replies, data_lock);
out:
- return;
+ return ret;
}
-int
-afr_sh_metadata_post_nonblocking_inodelk_cbk (call_frame_t *frame,
- xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_ERROR, "Non Blocking metadata "
- "inodelks failed for %s.", local->loc.path);
- gf_log (this->name, GF_LOG_ERROR, "Metadata self-heal "
- "failed for %s.", local->loc.path);
- afr_sh_metadata_done (frame, this);
- } else {
-
- gf_log (this->name, GF_LOG_DEBUG, "Non Blocking metadata "
- "inodelks done for %s. Proceeding to FOP",
- local->loc.path);
- afr_sh_common_lookup (frame, this, &local->loc,
- afr_sh_metadata_fix, NULL,
- AFR_LOOKUP_FAIL_CONFLICTS |
- AFR_LOOKUP_FAIL_MISSING_GFIDS);
- }
-
- return 0;
-}
int
-afr_sh_metadata_lock (call_frame_t *frame, xlator_t *this)
+afr_selfheal_metadata (call_frame_t *frame, xlator_t *this, inode_t *inode)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- int_lock->transaction_lk_type = AFR_SELFHEAL_LK;
- int_lock->selfheal_lk_type = AFR_METADATA_SELF_HEAL_LK;
-
- afr_set_lock_number (frame, this);
-
- int_lock->lk_flock.l_start = 0;
- int_lock->lk_flock.l_len = 0;
- int_lock->lk_flock.l_type = F_WRLCK;
- int_lock->lock_cbk = afr_sh_metadata_post_nonblocking_inodelk_cbk;
-
- afr_nonblocking_inodelk (frame, this);
-
- return 0;
-}
-
-
-int
-afr_self_heal_metadata (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = this->private;
-
-
- local = frame->local;
-
- if (local->self_heal.do_metadata_self_heal && priv->metadata_self_heal) {
- afr_sh_metadata_lock (frame, this);
- } else {
- afr_sh_metadata_done (frame, this);
- }
-
- return 0;
+ afr_private_t *priv = NULL;
+ unsigned char *locked_on = NULL;
+ int ret = 0;
+
+ priv = this->private;
+
+ locked_on = alloca0 (priv->child_count);
+
+ ret = afr_selfheal_tryinodelk (frame, this, inode, priv->sh_domain, 0, 0,
+ locked_on);
+ {
+ if (ret < 2) {
+ /* Either less than two subvols available, or another
+ selfheal (from another server) is in progress. Skip
+ for now in any case there isn't anything to do.
+ */
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+
+ ret = __afr_selfheal_metadata (frame, this, inode, locked_on);
+ }
+unlock:
+ afr_selfheal_uninodelk (frame, this, inode, priv->sh_domain, 0, 0, locked_on);
+
+ return ret;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c
new file mode 100644
index 000000000..ce80b8da3
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-self-heal-name.c
@@ -0,0 +1,457 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "afr.h"
+#include "afr-self-heal.h"
+
+
+int
+__afr_selfheal_assign_gfid (call_frame_t *frame, xlator_t *this, inode_t *parent,
+ uuid_t pargfid, const char *bname, inode_t *inode,
+ struct afr_reply *replies, int gfid_idx)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ dict_t *xdata = NULL;
+ int ret = 0;
+ loc_t loc = {0, };
+
+ priv = this->private;
+
+ uuid_copy (parent->gfid, pargfid);
+
+ xdata = dict_new ();
+ if (!xdata) {
+ return -ENOMEM;
+ }
+
+ ret = dict_set_static_bin (xdata, "gfid-req",
+ replies[gfid_idx].poststat.ia_gfid, 16);
+ if (ret) {
+ dict_destroy (xdata);
+ return -ENOMEM;
+ }
+
+ loc.parent = inode_ref (parent);
+ loc.inode = inode_ref (inode);
+ uuid_copy (loc.pargfid, pargfid);
+ loc.name = bname;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies[i].op_ret == 0 || replies[i].op_errno != ENODATA)
+ continue;
+
+ ret = syncop_lookup (priv->children[i], &loc, xdata, 0, 0, 0);
+ }
+
+ loc_wipe (&loc);
+ dict_unref (xdata);
+
+ return ret;
+}
+
+
+int
+__afr_selfheal_name_impunge (call_frame_t *frame, xlator_t *this, inode_t *parent,
+ uuid_t pargfid, const char *bname, inode_t *inode,
+ struct afr_reply *replies, int gfid_idx)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ int ret = 0;
+
+ priv = this->private;
+
+ uuid_copy (parent->gfid, pargfid);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if (uuid_compare (replies[i].poststat.ia_gfid,
+ replies[gfid_idx].poststat.ia_gfid) == 0)
+ continue;
+
+ ret |= afr_selfheal_recreate_entry (frame, this, i, gfid_idx,
+ parent, bname, inode, replies);
+ }
+
+ return ret;
+}
+
+
+int
+__afr_selfheal_name_expunge (call_frame_t *frame, xlator_t *this, inode_t *parent,
+ uuid_t pargfid, const char *bname, inode_t *inode,
+ struct afr_reply *replies)
+{
+ loc_t loc = {0, };
+ int i = 0;
+ afr_private_t *priv = NULL;
+ char g[64];
+ int ret = 0;
+
+ priv = this->private;
+
+ loc.parent = inode_ref (parent);
+ uuid_copy (loc.pargfid, pargfid);
+ loc.name = bname;
+ loc.inode = inode_ref (inode);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if (replies[i].op_ret)
+ continue;
+
+ switch (replies[i].poststat.ia_type) {
+ case IA_IFDIR:
+ gf_log (this->name, GF_LOG_WARNING,
+ "expunging dir %s/%s (%s) on %s",
+ uuid_utoa (pargfid), bname,
+ uuid_utoa_r (replies[i].poststat.ia_gfid, g),
+ priv->children[i]->name);
+ ret |= syncop_rmdir (priv->children[i], &loc, 1);
+ break;
+ default:
+ gf_log (this->name, GF_LOG_WARNING,
+ "expunging file %s/%s (%s) on %s",
+ uuid_utoa (pargfid), bname,
+ uuid_utoa_r (replies[i].poststat.ia_gfid, g),
+ priv->children[i]->name);
+ ret |= syncop_unlink (priv->children[i], &loc);
+ break;
+ }
+ }
+
+ loc_wipe (&loc);
+
+ return ret;
+
+}
+
+
+int
+__afr_selfheal_name_do (call_frame_t *frame, xlator_t *this, inode_t *parent,
+ uuid_t pargfid, const char *bname, inode_t *inode,
+ unsigned char *sources, unsigned char *sinks,
+ unsigned char *healed_sinks, int source,
+ unsigned char *locked_on, struct afr_reply *replies)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ uuid_t gfid = {0, };
+ int gfid_idx = -1;
+ gf_boolean_t source_is_empty = _gf_true;
+ gf_boolean_t need_heal = _gf_false;
+ int first_idx = -1;
+ char g1[64],g2[64];
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if (first_idx == -1) {
+ first_idx = i;
+ continue;
+ }
+
+ if (replies[i].op_ret != replies[first_idx].op_ret)
+ need_heal = _gf_true;
+
+ if (uuid_compare (replies[i].poststat.ia_gfid,
+ replies[first_idx].poststat.ia_gfid))
+ need_heal = _gf_true;
+ }
+
+ if (!need_heal)
+ return 0;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if (!replies[i].op_ret && (source == -1 || sources[i])) {
+ source_is_empty = _gf_false;
+ break;
+ }
+ }
+
+ if (source_is_empty) {
+ return __afr_selfheal_name_expunge (frame, this, parent, pargfid,
+ bname, inode, replies);
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if (uuid_is_null (replies[i].poststat.ia_gfid))
+ continue;
+
+ if (uuid_is_null (gfid)) {
+ uuid_copy (gfid, replies[i].poststat.ia_gfid);
+ gfid_idx = i;
+ continue;
+ }
+
+ if (sources[i] || source == -1) {
+ if (gfid_idx != -1 &&
+ (sources[gfid_idx] || source == -1) &&
+ uuid_compare (gfid, replies[i].poststat.ia_gfid)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "GFID mismatch for <gfid:%s>/%s "
+ "%s on %s and %s on %s",
+ uuid_utoa (pargfid), bname,
+ uuid_utoa_r (replies[i].poststat.ia_gfid, g1),
+ priv->children[i]->name,
+ uuid_utoa_r (replies[gfid_idx].poststat.ia_gfid, g2),
+ priv->children[gfid_idx]->name);
+ return -1;
+ }
+
+ uuid_copy (gfid, replies[i].poststat.ia_gfid);
+ gfid_idx = i;
+ continue;
+ }
+ }
+
+ if (gfid_idx == -1)
+ return -1;
+
+ __afr_selfheal_assign_gfid (frame, this, parent, pargfid, bname, inode,
+ replies, gfid_idx);
+
+ return __afr_selfheal_name_impunge (frame, this, parent, pargfid,
+ bname, inode, replies, gfid_idx);
+}
+
+
+int
+__afr_selfheal_name_finalize_source (xlator_t *this, unsigned char *sources,
+ unsigned char *sinks, unsigned char *locked_on,
+ struct afr_reply *replies)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ int source = -1;
+ int locked_count = 0;
+ int sources_count = 0;
+ int sinks_count = 0;
+
+ priv = this->private;
+
+ locked_count = AFR_COUNT (locked_on, priv->child_count);
+ sources_count = AFR_COUNT (sources, priv->child_count);
+ sinks_count = AFR_COUNT (sinks, priv->child_count);
+
+ if (locked_count == sinks_count || !sources_count) {
+ return -1;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i]) {
+ source = i;
+ break;
+ }
+ }
+
+ return source;
+}
+
+
+int
+__afr_selfheal_name_prepare (call_frame_t *frame, xlator_t *this, inode_t *parent,
+ uuid_t pargfid, unsigned char *locked_on,
+ unsigned char *sources, unsigned char *sinks,
+ unsigned char *healed_sinks, struct afr_reply *replies,
+ int *source_p)
+{
+ int ret = -1;
+ int source = -1;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ priv = this->private;
+
+ ret = afr_selfheal_unlocked_discover (frame, parent, pargfid, replies);
+ if (ret)
+ return ret;
+
+ ret = afr_selfheal_find_direction (frame, this, replies,
+ AFR_ENTRY_TRANSACTION,
+ locked_on, sources, sinks);
+ if (ret)
+ return ret;
+
+ source = __afr_selfheal_name_finalize_source (this, sources, sinks,
+ locked_on, replies);
+ if (source < 0) {
+ /* If source is < 0 (typically split-brain), we perform a
+ conservative merge of entries rather than erroring out */
+ }
+ *source_p = source;
+
+ for (i = 0; i < priv->child_count; i++)
+ /* Initialize the healed_sinks[] array optimistically to
+ the intersection of to-be-healed (i.e sinks[]) and
+ the list of servers which are up (i.e locked_on[]).
+
+ As we encounter failures in the healing process, we
+ will unmark the respective servers in the healed_sinks[]
+ array.
+ */
+ healed_sinks[i] = sinks[i] && locked_on[i];
+
+ return ret;
+}
+
+
+int
+afr_selfheal_name_do (call_frame_t *frame, xlator_t *this, inode_t *parent,
+ uuid_t pargfid, const char *bname)
+{
+ afr_private_t *priv = NULL;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *healed_sinks = NULL;
+ unsigned char *locked_on = NULL;
+ int source = -1;
+ struct afr_reply *replies = NULL;
+ int ret = -1;
+ inode_t *inode = NULL;
+
+ priv = this->private;
+
+ locked_on = alloca0 (priv->child_count);
+ sources = alloca0 (priv->child_count);
+ sinks = alloca0 (priv->child_count);
+ healed_sinks = alloca0 (priv->child_count);
+
+ replies = alloca0 (priv->child_count * sizeof(*replies));
+
+ ret = afr_selfheal_entrylk (frame, this, parent, this->name, bname,
+ locked_on);
+ {
+ if (ret < 2) {
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+
+ ret = __afr_selfheal_name_prepare (frame, this, parent, pargfid,
+ locked_on, sources, sinks,
+ healed_sinks, replies,
+ &source);
+ if (ret)
+ goto unlock;
+
+ inode = afr_selfheal_unlocked_lookup_on (frame, parent, bname,
+ replies, locked_on);
+ if (!inode) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+
+ ret = __afr_selfheal_name_do (frame, this, parent, pargfid, bname,
+ inode, sources, sinks, healed_sinks,
+ source, locked_on, replies);
+ }
+unlock:
+ afr_selfheal_unentrylk (frame, this, parent, this->name, bname,
+ locked_on);
+ if (inode)
+ inode_unref (inode);
+
+ return ret;
+}
+
+
+int
+afr_selfheal_name_unlocked_inspect (call_frame_t *frame, xlator_t *this,
+ inode_t *parent, uuid_t pargfid,
+ const char *bname, gf_boolean_t *need_heal)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ struct afr_reply *replies = NULL;
+ inode_t *inode = NULL;
+ int first_idx = -1;
+
+ priv = this->private;
+
+ replies = alloca0 (sizeof (*replies) * priv->child_count);
+
+ inode = afr_selfheal_unlocked_lookup_on (frame, parent, bname,
+ replies, priv->child_up);
+ if (!inode)
+ return -ENOMEM;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if (first_idx == -1) {
+ first_idx = i;
+ continue;
+ }
+
+ if (replies[i].op_ret != replies[first_idx].op_ret)
+ *need_heal = _gf_true;
+
+ if (uuid_compare (replies[i].poststat.ia_gfid,
+ replies[first_idx].poststat.ia_gfid))
+ *need_heal = _gf_true;
+ }
+
+ if (inode)
+ inode_unref (inode);
+ return 0;
+}
+
+int
+afr_selfheal_name (xlator_t *this, uuid_t pargfid, const char *bname)
+{
+ inode_t *parent = NULL;
+ call_frame_t *frame = NULL;
+ int ret = -1;
+ gf_boolean_t need_heal = _gf_false;
+
+ parent = afr_inode_find (this, pargfid);
+ if (!parent)
+ goto out;
+
+ frame = afr_frame_create (this);
+ if (!frame)
+ goto out;
+
+ ret = afr_selfheal_name_unlocked_inspect (frame, this, parent, pargfid,
+ bname, &need_heal);
+ if (ret)
+ goto out;
+
+ if (need_heal)
+ afr_selfheal_name_do (frame, this, parent, pargfid, bname);
+out:
+ if (parent)
+ inode_unref (parent);
+ if (frame)
+ AFR_STACK_DESTROY (frame);
+
+ return ret;
+}
diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
index f40c06faa..a1b972ac3 100644
--- a/xlators/cluster/afr/src/afr-self-heal.h
+++ b/xlators/cluster/afr/src/afr-self-heal.h
@@ -1,58 +1,167 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#ifndef __AFR_SELF_HEAL_H__
-#define __AFR_SELF_HEAL_H__
-#include <sys/stat.h>
+#ifndef _AFR_SELFHEAL_H
+#define _AFR_SELFHEAL_H
+
+
+/* Perform fop on all UP subvolumes and wait for all callbacks to return */
+
+#define AFR_ONALL(frame, rfn, fop, args ...) do { \
+ afr_local_t *__local = frame->local; \
+ afr_private_t *__priv = frame->this->private; \
+ int __i = 0, __count = 0; \
+ \
+ afr_replies_wipe (__local, __priv); \
+ \
+ for (__i = 0; __i < __priv->child_count; __i++) { \
+ if (!__priv->child_up[__i]) continue; \
+ STACK_WIND_COOKIE (frame, rfn, (void *)(long) __i, \
+ __priv->children[__i], \
+ __priv->children[__i]->fops->fop, args); \
+ __count++; \
+ } \
+ syncbarrier_wait (&__local->barrier, __count); \
+ } while (0)
+
+
+/* Perform fop on all subvolumes represented by list[] array and wait
+ for all callbacks to return */
+
+#define AFR_ONLIST(list, frame, rfn, fop, args ...) do { \
+ afr_local_t *__local = frame->local; \
+ afr_private_t *__priv = frame->this->private; \
+ int __i = 0, __count = 0; \
+ \
+ afr_replies_wipe (__local, __priv); \
+ \
+ for (__i = 0; __i < __priv->child_count; __i++) { \
+ if (!list[__i]) continue; \
+ STACK_WIND_COOKIE (frame, rfn, (void *)(long) __i, \
+ __priv->children[__i], \
+ __priv->children[__i]->fops->fop, args); \
+ __count++; \
+ } \
+ syncbarrier_wait (&__local->barrier, __count); \
+ } while (0)
+
+
+#define AFR_SEQ(frame, rfn, fop, args ...) do { \
+ afr_local_t *__local = frame->local; \
+ afr_private_t *__priv = frame->this->private; \
+ int __i = 0; \
+ \
+ afr_replies_wipe (__local, __priv); \
+ \
+ for (__i = 0; __i < __priv->child_count; __i++) { \
+ if (!__priv->child_up[__i]) continue; \
+ STACK_WIND_COOKIE (frame, rfn, (void *)(long) __i, \
+ __priv->children[__i], \
+ __priv->children[__i]->fops->fop, args); \
+ syncbarrier_wait (&__local->barrier, 1); \
+ } \
+ } while (0)
-#define FILETYPE_DIFFERS(buf1,buf2) ((buf1)->ia_type != (buf2)->ia_type)
-#define PERMISSION_DIFFERS(buf1,buf2) (st_mode_from_ia ((buf1)->ia_prot, (buf1)->ia_type) != st_mode_from_ia ((buf2)->ia_prot, (buf2)->ia_type))
-#define OWNERSHIP_DIFFERS(buf1,buf2) (((buf1)->ia_uid != (buf2)->ia_uid) || ((buf1)->ia_gid != (buf2)->ia_gid))
-#define SIZE_DIFFERS(buf1,buf2) ((buf1)->ia_size != (buf2)->ia_size)
-#define SIZE_GREATER(buf1,buf2) ((buf1)->ia_size > (buf2)->ia_size)
+#define ALLOC_MATRIX(n, type) ({type **__ptr = NULL; \
+ int __i; \
+ __ptr = alloca0 (n * sizeof(type *)); \
+ for (__i = 0; __i < n; __i++) __ptr[__i] = alloca0 (n * sizeof(type)); \
+ __ptr;})
+
+
+#define IA_EQUAL(f,s,field) (memcmp (&(f.ia_##field), &(s.ia_##field), sizeof (s.ia_##field)) == 0)
+
int
-afr_sh_has_metadata_pending (dict_t *xattr, xlator_t *this);
+afr_selfheal (xlator_t *this, uuid_t gfid);
+
int
-afr_sh_has_entry_pending (dict_t *xattr, xlator_t *this);
+afr_selfheal_name (xlator_t *this, uuid_t gfid, const char *name);
+
int
-afr_sh_has_data_pending (dict_t *xattr, xlator_t *this);
+afr_selfheal_data (call_frame_t *frame, xlator_t *this, inode_t *inode);
int
-afr_self_heal_entry (call_frame_t *frame, xlator_t *this);
+afr_selfheal_metadata (call_frame_t *frame, xlator_t *this, inode_t *inode);
int
-afr_self_heal_data (call_frame_t *frame, xlator_t *this);
+afr_selfheal_entry (call_frame_t *frame, xlator_t *this, inode_t *inode);
+
+
+int
+afr_selfheal_inodelk (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, off_t off, size_t size,
+ unsigned char *locked_on);
int
-afr_self_heal_metadata (call_frame_t *frame, xlator_t *this);
+afr_selfheal_tryinodelk (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, off_t off, size_t size,
+ unsigned char *locked_on);
int
-afr_self_heal_get_source (xlator_t *this, afr_local_t *local, dict_t **xattr);
+afr_selfheal_uninodelk (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, off_t off, size_t size,
+ const unsigned char *locked_on);
int
-afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode);
+afr_selfheal_entrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, const char *name, unsigned char *locked_on);
int
-afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,
- dict_t **xattr,
- afr_transaction_type txn_type);
-#endif /* __AFR_SELF_HEAL_H__ */
+afr_selfheal_tryentrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, const char *name, unsigned char *locked_on);
+
+int
+afr_selfheal_unentrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, const char *name, unsigned char *locked_on);
+
+int
+afr_selfheal_unlocked_discover (call_frame_t *frame, inode_t *inode,
+ uuid_t gfid, struct afr_reply *replies);
+
+inode_t *
+afr_selfheal_unlocked_lookup_on (call_frame_t *frame, inode_t *parent,
+ const char *name, struct afr_reply *replies,
+ unsigned char *lookup_on);
+
+int
+afr_selfheal_find_direction (call_frame_t *frame, xlator_t *this,
+ struct afr_reply *replies,
+ afr_transaction_type type, unsigned char *locked_on,
+ unsigned char *sources, unsigned char *sinks);
+
+int
+afr_selfheal_extract_xattr (xlator_t *this, struct afr_reply *replies,
+ afr_transaction_type type, int *dirty, int **matrix);
+
+int
+afr_selfheal_undo_pending (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ unsigned char *sources, unsigned char *sinks,
+ unsigned char *healed_sinks, afr_transaction_type type,
+ struct afr_reply *replies, unsigned char *locked_on);
+
+int
+afr_selfheal_recreate_entry (call_frame_t *frame, xlator_t *this, int dst,
+ int source, inode_t *dir, const char *name,
+ inode_t *inode, struct afr_reply *replies);
+
+int
+afr_selfheal_post_op (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ int subvol, dict_t *xattr);
+
+call_frame_t *
+afr_frame_create (xlator_t *this);
+
+inode_t *
+afr_inode_find (xlator_t *this, uuid_t gfid);
+
+#endif /* !_AFR_SELFHEAL_H */
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
index dd0dd86da..4bfe909bc 100644
--- a/xlators/cluster/afr/src/afr-self-heald.c
+++ b/xlators/cluster/afr/src/afr-self-heald.c
@@ -1,525 +1,1256 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
+
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
#endif
+
#include "afr.h"
-#include "syncop.h"
+#include "afr-self-heal.h"
#include "afr-self-heald.h"
-#include "afr-self-heal-common.h"
+#include "protocol-common.h"
-static int
-_crawl_directory (loc_t *loc, pid_t pid, uuid_t gfid);
-static int
-get_pathinfo_host (char *pathinfo, char *hostname, size_t size)
-{
- char *start = NULL;
- char *end = NULL;
- int ret = -1;
- int i = 0;
+#define SHD_INODE_LRU_LIMIT 2048
+#define AFR_EH_HEALED_LIMIT 1024
+#define AFR_EH_HEAL_FAIL_LIMIT 1024
+#define AFR_EH_SPLIT_BRAIN_LIMIT 1024
+#define AFR_STATISTICS_HISTORY_SIZE 50
- if (!pathinfo)
- goto out;
- start = strchr (pathinfo, ':');
- if (!start)
- goto out;
- end = strrchr (pathinfo, ':');
- if (start == end)
- goto out;
+#define ASSERT_LOCAL(this, healer) \
+ if (!afr_shd_is_subvol_local(this, healer->subvol)) { \
+ healer->local = _gf_false; \
+ if (safe_break (healer)) { \
+ break; \
+ } else { \
+ continue; \
+ } \
+ } else { \
+ healer->local = _gf_true; \
+ }
- memset (hostname, 0, size);
- i = 0;
- while (++start != end)
- hostname[i++] = *start;
- ret = 0;
-out:
- return ret;
+
+#define NTH_INDEX_HEALER(this, n) &((((afr_private_t *)this->private))->shd.index_healers[n])
+#define NTH_FULL_HEALER(this, n) &((((afr_private_t *)this->private))->shd.full_healers[n])
+
+int afr_shd_gfid_to_path (xlator_t *this, xlator_t *subvol, uuid_t gfid, char **path_p);
+
+char *
+afr_subvol_name (xlator_t *this, int subvol)
+{
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ if (subvol < 0 || subvol > priv->child_count)
+ return NULL;
+
+ return priv->children[subvol]->name;
}
-int
-afr_local_pathinfo (char *pathinfo, gf_boolean_t *local)
+
+void
+afr_destroy_crawl_event_data (void *data)
{
- int ret = 0;
- char pathinfohost[1024] = {0};
- char localhost[1024] = {0};
- xlator_t *this = THIS;
+ return;
+}
- *local = _gf_false;
- ret = get_pathinfo_host (pathinfo, pathinfohost, sizeof (pathinfohost));
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Invalid pathinfo: %s",
- pathinfo);
- goto out;
- }
- ret = gethostname (localhost, sizeof (localhost));
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "gethostname() failed, "
- "reason: %s", strerror (errno));
- goto out;
- }
+void
+afr_destroy_shd_event_data (void *data)
+{
+ shd_event_t *shd_event = data;
- if (!strcmp (localhost, pathinfohost))
- *local = _gf_true;
-out:
- return ret;
+ if (!shd_event)
+ return;
+ GF_FREE (shd_event->path);
+
+ return;
}
-static void
-_generate_gfid_on_empty (uuid_t gfid)
+
+gf_boolean_t
+afr_shd_is_subvol_local (xlator_t *this, int subvol)
{
- if (uuid_is_null (gfid))
- uuid_generate (gfid);
+ char *pathinfo = NULL;
+ afr_private_t *priv = NULL;
+ dict_t *xattr = NULL;
+ int ret = 0;
+ gf_boolean_t is_local = _gf_false;
+ loc_t loc = {0, };
+
+ priv = this->private;
+
+ loc.inode = this->itable->root;
+ uuid_copy (loc.gfid, loc.inode->gfid);
+
+ ret = syncop_getxattr (priv->children[subvol], &loc, &xattr,
+ GF_XATTR_PATHINFO_KEY);
+ if (ret)
+ return _gf_false;
+ if (!xattr)
+ return _gf_false;
+
+ ret = dict_get_str (xattr, GF_XATTR_PATHINFO_KEY, &pathinfo);
+ if (ret)
+ return _gf_false;
+
+ afr_local_pathinfo (pathinfo, &is_local);
+
+ gf_log (this->name, GF_LOG_DEBUG, "subvol %s is %slocal",
+ priv->children[subvol]->name, is_local? "" : "not ");
+
+ return is_local;
}
-static void
-_empty_gfid_on_set (uuid_t gfid, int lookup_status, struct iatt *iatt)
+
+int
+__afr_shd_healer_wait (struct subvol_healer *healer)
{
- if (lookup_status || !uuid_compare (gfid, iatt->ia_gfid))
- uuid_clear (gfid);
+ afr_private_t *priv = NULL;
+ struct timespec wait_till = {0, };
+ int ret = 0;
+
+ priv = healer->this->private;
+
+disabled_loop:
+ wait_till.tv_sec = time (NULL) + 60;
+
+ while (!healer->rerun) {
+ ret = pthread_cond_timedwait (&healer->cond,
+ &healer->mutex,
+ &wait_till);
+ if (ret == ETIMEDOUT)
+ break;
+ }
+
+ ret = healer->rerun;
+ healer->rerun = 0;
+
+ if (!priv->shd.enabled)
+ goto disabled_loop;
+
+ return ret;
}
-static void
-_fill_loc_info (loc_t *loc, struct iatt *iatt, struct iatt *parent)
+
+int
+afr_shd_healer_wait (struct subvol_healer *healer)
{
- afr_update_loc_gfids (loc, iatt, parent);
- uuid_copy (loc->inode->gfid, iatt->ia_gfid);
+ int ret = 0;
+
+ pthread_mutex_lock (&healer->mutex);
+ {
+ ret = __afr_shd_healer_wait (healer);
+ }
+ pthread_mutex_unlock (&healer->mutex);
+
+ return ret;
}
-static int
-_perform_self_heal (xlator_t *this, loc_t *parentloc, gf_dirent_t *entries,
- uuid_t gfid, off_t *offset, pid_t pid)
+
+gf_boolean_t
+safe_break (struct subvol_healer *healer)
{
- gf_dirent_t *entry = NULL;
- gf_dirent_t *tmp = NULL;
- struct iatt iatt = {0};
- struct iatt parent = {0};;
- int ret = 0;
- loc_t entry_loc = {0};
- dict_t *xattr_req = NULL;
+ gf_boolean_t ret = _gf_false;
- xattr_req = dict_new ();
- if (!xattr_req) {
- ret = -1;
- goto out;
- }
+ pthread_mutex_lock (&healer->mutex);
+ {
+ if (healer->rerun)
+ goto unlock;
- list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- *offset = entry->d_off;
- if (IS_ENTRY_CWD (entry->d_name) ||
- IS_ENTRY_PARENT (entry->d_name))
- continue;
+ healer->running = _gf_false;
+ ret = _gf_true;
+ }
+unlock:
+ pthread_mutex_unlock (&healer->mutex);
- ret = dict_reset (xattr_req);
- if (ret)
- goto out;
+ return ret;
+}
- loc_wipe (&entry_loc);
- ret = afr_build_child_loc (this, &entry_loc,
- parentloc, entry->d_name);
- if (ret)
- goto out;
- _generate_gfid_on_empty (gfid);
- ret = afr_set_dict_gfid (xattr_req, gfid);
- if (ret)
- goto out;
- gf_log (this->name, GF_LOG_DEBUG, "lookup %s", entry_loc.path);
-
- ret = syncop_lookup (this, &entry_loc, xattr_req,
- &iatt, NULL, &parent);
- _empty_gfid_on_set (gfid, ret, &iatt);
- //Don't fail the crawl if lookup fails as it
- //could be because of split-brain
- if (ret || (!IA_ISDIR (iatt.ia_type)))
- continue;
- _fill_loc_info (&entry_loc, &iatt, &parent);
- ret = _crawl_directory (&entry_loc, pid, gfid);
- }
- ret = 0;
+inode_t *
+afr_shd_inode_find (xlator_t *this, xlator_t *subvol, uuid_t gfid)
+{
+ inode_t *inode = NULL;
+ int ret = 0;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+
+ inode = inode_find (this->itable, gfid);
+ if (inode)
+ goto out;
+
+ loc.inode = inode_new (this->itable);
+ if (!loc.inode)
+ goto out;
+ uuid_copy (loc.gfid, gfid);
+
+ ret = syncop_lookup (subvol, &loc, NULL, &iatt, NULL, NULL);
+ if (ret < 0)
+ goto out;
+
+ inode = inode_link (loc.inode, NULL, NULL, &iatt);
+ if (inode)
+ inode_lookup (inode);
out:
- if (xattr_req)
- dict_unref (xattr_req);
- if (entry_loc.path)
- loc_wipe (&entry_loc);
- return ret;
+ loc_wipe (&loc);
+ return inode;
}
-static int
-_crawl_directory (loc_t *loc, pid_t pid, uuid_t gfid)
+
+fd_t *
+afr_shd_index_opendir (xlator_t *this, int child)
{
- xlator_t *this = NULL;
- afr_private_t *priv = NULL;
- fd_t *fd = NULL;
- off_t offset = 0;
- gf_dirent_t entries;
- struct iatt iatt = {0};
- struct iatt parent = {0};;
- int ret = 0;
- gf_boolean_t free_entries = _gf_false;
+ fd_t *fd = NULL;
+ afr_private_t *priv = NULL;
+ xlator_t *subvol = NULL;
+ loc_t rootloc = {0, };
+ inode_t *inode = NULL;
+ int ret = 0;
+ dict_t *xattr = NULL;
+ void *index_gfid = NULL;
+
+ priv = this->private;
+ subvol = priv->children[child];
+
+ rootloc.inode = inode_ref (this->itable->root);
+ uuid_copy (rootloc.gfid, rootloc.inode->gfid);
+
+ ret = syncop_getxattr (subvol, &rootloc, &xattr,
+ GF_XATTROP_INDEX_GFID);
+ if (ret || !xattr) {
+ errno = -ret;
+ goto out;
+ }
+
+ ret = dict_get_ptr (xattr, GF_XATTROP_INDEX_GFID, &index_gfid);
+ if (ret)
+ goto out;
+
+ gf_log (this->name, GF_LOG_DEBUG, "index-dir gfid for %s: %s",
+ subvol->name, uuid_utoa (index_gfid));
+
+ inode = afr_shd_inode_find (this, subvol, index_gfid);
+ if (!inode)
+ goto out;
+ fd = fd_anonymous (inode);
+out:
+ loc_wipe (&rootloc);
+ if (xattr)
+ dict_unref (xattr);
+ return fd;
+}
- INIT_LIST_HEAD (&entries.list);
- this = THIS;
- priv = this->private;
- GF_ASSERT (loc->inode);
+int
+afr_shd_index_purge (xlator_t *subvol, inode_t *inode, char *name)
+{
+ loc_t loc = {0, };
+ int ret = 0;
- gf_log (this->name, GF_LOG_DEBUG, "crawling %s", loc->path);
- fd = fd_create (loc->inode, pid);
- if (!fd) {
- gf_log (this->name, GF_LOG_ERROR,
- "Failed to create fd for %s", loc->path);
- goto out;
- }
+ loc.parent = inode_ref (inode);
+ loc.name = name;
- if (!loc->parent) {
- ret = syncop_lookup (this, loc, NULL,
- &iatt, NULL, &parent);
- }
+ ret = syncop_unlink (subvol, &loc);
- ret = syncop_opendir (this, loc, fd);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "opendir failed on %s", loc->path);
- goto out;
- }
+ loc_wipe (&loc);
+ return ret;
+}
- while (syncop_readdirp (this, fd, 131072, offset, &entries)) {
- ret = 0;
- free_entries = _gf_true;
- if (afr_up_children_count (priv->child_up,
- priv->child_count) < 2) {
- gf_log (this->name, GF_LOG_ERROR, "Stopping crawl as "
- "< 2 children are up");
- ret = -1;
- goto out;
- }
- if (list_empty (&entries.list))
- goto out;
+int
+afr_shd_selfheal_name (struct subvol_healer *healer, int child, uuid_t parent,
+ const char *bname)
+{
+ int ret = -1;
- ret = _perform_self_heal (this, loc, &entries, gfid, &offset, pid);
- gf_dirent_free (&entries);
- free_entries = _gf_false;
- }
- if (fd)
- fd_unref (fd);
- ret = 0;
-out:
- if (free_entries)
- gf_dirent_free (&entries);
- return ret;
+ ret = afr_selfheal_name (THIS, parent, bname);
+
+ return ret;
}
int
-afr_find_child_position (xlator_t *this, int child)
+afr_shd_selfheal (struct subvol_healer *healer, int child, uuid_t gfid)
{
- afr_private_t *priv = NULL;
- dict_t *xattr_rsp = NULL;
- loc_t loc = {0};
- int ret = 0;
- gf_boolean_t local = _gf_false;
- char *pathinfo = NULL;
- afr_child_pos_t *pos = NULL;
- inode_table_t *itable = NULL;
+ int ret = 0;
+ eh_t *eh = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ shd_event_t *shd_event = NULL;
+ char *path = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *this = NULL;
+ crawl_event_t *crawl_event = NULL;
+
+ this = healer->this;
+ priv = this->private;
+ shd = &priv->shd;
+ crawl_event = &healer->crawl_event;
+
+ subvol = priv->children[child];
+
+ ret = afr_selfheal (this, gfid);
+
+ if (ret == -EIO) {
+ eh = shd->split_brain;
+ crawl_event->split_brain_count++;
+ } else if (ret < 0) {
+ eh = shd->heal_failed;
+ crawl_event->heal_failed_count++;
+ } else if (ret == 0) {
+ eh = shd->healed;
+ crawl_event->healed_count++;
+ }
+
+ afr_shd_gfid_to_path (this, subvol, gfid, &path);
+ if (!path)
+ return ret;
+
+ if (eh) {
+ shd_event = GF_CALLOC (1, sizeof(*shd_event),
+ gf_afr_mt_shd_event_t);
+ if (!shd_event) {
+ GF_FREE (path);
+ return ret;
+ }
+
+ shd_event->child = child;
+ shd_event->path = path;
+
+ if (eh_save_history (eh, shd_event) < 0) {
+ GF_FREE (shd_event);
+ GF_FREE (path);
+ }
+ }
+ return ret;
+}
- priv = this->private;
- pos = &priv->shd.pos[child];
- if (*pos != AFR_POS_UNKNOWN) {
- goto out;
- }
+void
+afr_shd_sweep_prepare (struct subvol_healer *healer)
+{
+ crawl_event_t *event = NULL;
- //TODO: Hack to make the root_loc hack work
- LOCK (&priv->lock);
- {
- if (!priv->root_inode) {
- itable = inode_table_new (0, this);
- if (!itable)
- goto unlock;
- priv->root_inode = inode_new (itable);
- if (!priv->root_inode)
- goto unlock;
- }
- }
-unlock:
- UNLOCK (&priv->lock);
+ event = &healer->crawl_event;
- if (!priv->root_inode) {
- ret = -1;
- goto out;
- }
- afr_build_root_loc (priv->root_inode, &loc);
+ event->healed_count = 0;
+ event->split_brain_count = 0;
+ event->heal_failed_count = 0;
- ret = syncop_getxattr (priv->children[child], &loc, &xattr_rsp,
- GF_XATTR_PATHINFO_KEY);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "getxattr failed on child "
- "%d", child);
- goto out;
- }
+ time (&event->start_time);
+ event->end_time = 0;
+}
- ret = dict_get_str (xattr_rsp, GF_XATTR_PATHINFO_KEY, &pathinfo);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Pathinfo key not found on "
- "child %d", child);
- goto out;
- }
- ret = afr_local_pathinfo (pathinfo, &local);
- if (ret)
- goto out;
- if (local)
- *pos = AFR_POS_LOCAL;
- else
- *pos = AFR_POS_REMOTE;
+void
+afr_shd_sweep_done (struct subvol_healer *healer)
+{
+ crawl_event_t *event = NULL;
+ crawl_event_t *history = NULL;
+ afr_self_heald_t *shd = NULL;
- gf_log (this->name, GF_LOG_INFO, "child %d is %d", child, *pos);
-out:
- return ret;
+ event = &healer->crawl_event;
+ shd = &(((afr_private_t *)healer->this->private)->shd);
+
+ time (&event->end_time);
+ history = memdup (event, sizeof (*event));
+ event->start_time = 0;
+
+ if (!history)
+ return;
+
+ if (eh_save_history (shd->statistics[healer->subvol], history) < 0)
+ GF_FREE (history);
}
-static int
-afr_crawl_done (int ret, call_frame_t *sync_frame, void *data)
+
+int
+afr_shd_index_sweep (struct subvol_healer *healer)
{
- GF_FREE (data);
- STACK_DESTROY (sync_frame->root);
- return 0;
+ xlator_t *this = NULL;
+ int child = -1;
+ fd_t *fd = NULL;
+ xlator_t *subvol = NULL;
+ afr_private_t *priv = NULL;
+ off_t offset = 0;
+ gf_dirent_t entries;
+ gf_dirent_t *entry = NULL;
+ uuid_t gfid;
+ int ret = 0;
+ int count = 0;
+
+ this = healer->this;
+ child = healer->subvol;
+ priv = this->private;
+ subvol = priv->children[child];
+
+ fd = afr_shd_index_opendir (this, child);
+ if (!fd) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "unable to opendir index-dir on %s", subvol->name);
+ return -errno;
+ }
+
+ INIT_LIST_HEAD (&entries.list);
+
+ while ((ret = syncop_readdir (subvol, fd, 131072, offset, &entries))) {
+ if (ret > 0)
+ ret = 0;
+ list_for_each_entry (entry, &entries.list, list) {
+ offset = entry->d_off;
+
+ if (!priv->shd.enabled) {
+ ret = -EBUSY;
+ break;
+ }
+
+ if (!strcmp (entry->d_name, ".") ||
+ !strcmp (entry->d_name, ".."))
+ continue;
+
+ gf_log (this->name, GF_LOG_DEBUG, "got entry: %s",
+ entry->d_name);
+
+ ret = uuid_parse (entry->d_name, gfid);
+ if (ret)
+ continue;
+
+ ret = afr_shd_selfheal (healer, child, gfid);
+ if (ret == 0)
+ count++;
+
+ if (ret == -ENOENT || ret == -ESTALE) {
+ afr_shd_index_purge (subvol, fd->inode,
+ entry->d_name);
+ ret = 0;
+ }
+ }
+
+ gf_dirent_free (&entries);
+ if (ret)
+ break;
+ }
+
+ if (fd)
+ fd_unref (fd);
+ if (!ret)
+ ret = count;
+ return ret;
}
-static int
-afr_find_all_children_postions (xlator_t *this)
+
+int
+afr_shd_full_sweep (struct subvol_healer *healer, inode_t *inode)
{
- int ret = -1;
- int i = 0;
- gf_boolean_t succeeded = _gf_false;
- afr_private_t *priv = NULL;
+ fd_t *fd = NULL;
+ xlator_t *this = NULL;
+ xlator_t *subvol = NULL;
+ afr_private_t *priv = NULL;
+ off_t offset = 0;
+ gf_dirent_t entries;
+ gf_dirent_t *entry = NULL;
+ int ret = 0;
+
+ this = healer->this;
+ priv = this->private;
+ subvol = priv->children[healer->subvol];
+
+ fd = fd_anonymous (inode);
+ if (!fd)
+ return -errno;
+
+ INIT_LIST_HEAD (&entries.list);
+
+ while ((ret = syncop_readdirp (subvol, fd, 131072, offset, 0, &entries))) {
+ if (ret < 0)
+ break;
+
+ ret = gf_link_inodes_from_dirent (this, fd->inode, &entries);
+ if (ret)
+ break;
+
+ list_for_each_entry (entry, &entries.list, list) {
+ offset = entry->d_off;
+
+ if (!priv->shd.enabled) {
+ ret = -EBUSY;
+ break;
+ }
+
+ if (!strcmp (entry->d_name, ".") ||
+ !strcmp (entry->d_name, ".."))
+ continue;
+
+ afr_shd_selfheal_name (healer, healer->subvol,
+ inode->gfid, entry->d_name);
+
+ afr_shd_selfheal (healer, healer->subvol,
+ entry->d_stat.ia_gfid);
+
+ if (entry->d_stat.ia_type == IA_IFDIR) {
+ ret = afr_shd_full_sweep (healer, entry->inode);
+ if (ret)
+ break;
+ }
+ }
+
+ gf_dirent_free (&entries);
+ if (ret)
+ break;
+ }
+
+ if (fd)
+ fd_unref (fd);
+ return ret;
+}
- priv = this->private;
- for (i = 0; i < priv->child_count; i++) {
- if (priv->child_up[i] != 1)
- continue;
- ret = afr_find_child_position (this, i);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "Failed to determine if the "
- "child %s is local.",
- priv->children[i]->name);
- continue;
- }
- succeeded = _gf_true;
- }
- if (succeeded)
- ret = 0;
- return ret;
+
+void *
+afr_shd_index_healer (void *data)
+{
+ struct subvol_healer *healer = NULL;
+ xlator_t *this = NULL;
+ int ret = 0;
+
+ healer = data;
+ THIS = this = healer->this;
+
+ for (;;) {
+ afr_shd_healer_wait (healer);
+
+ ASSERT_LOCAL(this, healer);
+
+ do {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "starting index sweep on subvol %s",
+ afr_subvol_name (this, healer->subvol));
+
+ afr_shd_sweep_prepare (healer);
+
+ ret = afr_shd_index_sweep (healer);
+
+ afr_shd_sweep_done (healer);
+ /*
+ As long as at least one gfid was
+ healed, keep retrying. We may have
+ just healed a directory and thereby
+ created entries for other gfids which
+ could not be healed thus far.
+ */
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "finished index sweep on subvol %s",
+ afr_subvol_name (this, healer->subvol));
+ /*
+ Give a pause before retrying to avoid a busy loop
+ in case the only entry in index is because of
+ an ongoing I/O.
+ */
+ sleep (1);
+ } while (ret > 0);
+ }
+
+ return NULL;
}
-static gf_boolean_t
-afr_local_child_exists (afr_child_pos_t *pos, unsigned int child_count)
+
+void *
+afr_shd_full_healer (void *data)
{
- int i = 0;
- gf_boolean_t local = _gf_false;
+ struct subvol_healer *healer = NULL;
+ xlator_t *this = NULL;
+ int run = 0;
- for (i = 0; i < child_count; i++, pos++) {
- if (*pos == AFR_POS_LOCAL) {
- local = _gf_true;
- break;
- }
- }
- return local;
+ healer = data;
+ THIS = this = healer->this;
+
+ for (;;) {
+ pthread_mutex_lock (&healer->mutex);
+ {
+ run = __afr_shd_healer_wait (healer);
+ if (!run)
+ healer->running = _gf_false;
+ }
+ pthread_mutex_unlock (&healer->mutex);
+
+ if (!run)
+ break;
+
+ ASSERT_LOCAL(this, healer);
+
+ gf_log (this->name, GF_LOG_INFO,
+ "starting full sweep on subvol %s",
+ afr_subvol_name (this, healer->subvol));
+
+ afr_shd_sweep_prepare (healer);
+
+ afr_shd_full_sweep (healer, this->itable->root);
+
+ afr_shd_sweep_done (healer);
+
+ gf_log (this->name, GF_LOG_INFO,
+ "finished full sweep on subvol %s",
+ afr_subvol_name (this, healer->subvol));
+ }
+
+ return NULL;
}
+
int
-afr_init_child_position (xlator_t *this, int child)
+afr_shd_healer_init (xlator_t *this, struct subvol_healer *healer)
{
- int ret = 0;
+ int ret = 0;
- if (child == AFR_ALL_CHILDREN) {
- ret = afr_find_all_children_postions (this);
- } else {
- ret = afr_find_child_position (this, child);
- }
- return ret;
+ ret = pthread_mutex_init (&healer->mutex, NULL);
+ if (ret)
+ goto out;
+
+ ret = pthread_cond_init (&healer->cond, NULL);
+ if (ret)
+ goto out;
+
+ healer->this = this;
+ healer->running = _gf_false;
+ healer->rerun = _gf_false;
+ healer->local = _gf_false;
+out:
+ return ret;
}
+
int
-afr_is_local_child (afr_self_heald_t *shd, int child, unsigned int child_count)
+afr_shd_healer_spawn (xlator_t *this, struct subvol_healer *healer,
+ void *(threadfn)(void *))
{
- gf_boolean_t local = _gf_false;
+ int ret = 0;
+
+ pthread_mutex_lock (&healer->mutex);
+ {
+ if (healer->running) {
+ pthread_cond_signal (&healer->cond);
+ } else {
+ ret = gf_thread_create (&healer->thread, NULL,
+ threadfn, healer);
+ if (ret)
+ goto unlock;
+ healer->running = 1;
+ }
+
+ healer->rerun = 1;
+ }
+unlock:
+ pthread_mutex_unlock (&healer->mutex);
- if (child == AFR_ALL_CHILDREN)
- local = afr_local_child_exists (shd->pos, child_count);
- else
- local = (shd->pos[child] == AFR_POS_LOCAL);
+ return ret;
+}
- return local;
+
+int
+afr_shd_full_healer_spawn (xlator_t *this, int subvol)
+{
+ return afr_shd_healer_spawn (this, NTH_FULL_HEALER (this, subvol),
+ afr_shd_full_healer);
}
-static int
-afr_crawl_directory (xlator_t *this, pid_t pid)
+
+int
+afr_shd_index_healer_spawn (xlator_t *this, int subvol)
+{
+ return afr_shd_healer_spawn (this, NTH_INDEX_HEALER (this, subvol),
+ afr_shd_index_healer);
+}
+
+
+int
+afr_shd_dict_add_crawl_event (xlator_t *this, dict_t *output,
+ crawl_event_t *crawl_event)
{
- afr_private_t *priv = NULL;
- afr_self_heald_t *shd = NULL;
- loc_t loc = {0};
- gf_boolean_t crawl = _gf_false;
int ret = 0;
- uuid_t gfid = {0};
+ uint64_t count = 0;
+ char key[256] = {0};
+ int xl_id = 0;
+ uint64_t healed_count = 0;
+ uint64_t split_brain_count = 0;
+ uint64_t heal_failed_count = 0;
+ char *start_time_str = 0;
+ char *end_time_str = NULL;
+ char *crawl_type = NULL;
+ int progress = -1;
+ int child = -1;
+
+ child = crawl_event->child;
+ healed_count = crawl_event->healed_count;
+ split_brain_count = crawl_event->split_brain_count;
+ heal_failed_count = crawl_event->heal_failed_count;
+ crawl_type = crawl_event->crawl_type;
+
+ if (!crawl_event->start_time)
+ goto out;
+
+ start_time_str = gf_strdup (ctime (&crawl_event->start_time));
+
+ if (crawl_event->end_time)
+ end_time_str = gf_strdup (ctime (&crawl_event->end_time));
+
+ ret = dict_get_int32 (output, this->name, &xl_id);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "xl does not have id");
+ goto out;
+ }
- priv = this->private;
- shd = &priv->shd;
+ snprintf (key, sizeof (key), "statistics-%d-%d-count", xl_id, child);
+ ret = dict_get_uint64 (output, key, &count);
- LOCK (&priv->lock);
- {
- if (shd->inprogress) {
- shd->pending = _gf_true;
- } else {
- shd->inprogress = _gf_true;
- crawl = _gf_true;
- }
+ snprintf (key, sizeof (key), "statistics_healed_cnt-%d-%d-%"PRIu64,
+ xl_id, child, count);
+ ret = dict_set_uint64(output, key, healed_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not add statistics_healed_count to outout");
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "statistics_sb_cnt-%d-%d-%"PRIu64,
+ xl_id, child, count);
+ ret = dict_set_uint64 (output, key, split_brain_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not add statistics_split_brain_count to outout");
+ goto out;
}
- UNLOCK (&priv->lock);
- if (!priv->root_inode) {
- ret = -1;
+ snprintf (key, sizeof (key), "statistics_crawl_type-%d-%d-%"PRIu64,
+ xl_id, child, count);
+ ret = dict_set_str (output, key, crawl_type);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not add statistics_crawl_type to output");
goto out;
}
- if (!crawl)
+ snprintf (key, sizeof (key), "statistics_heal_failed_cnt-%d-%d-%"PRIu64,
+ xl_id, child, count);
+ ret = dict_set_uint64 (output, key, heal_failed_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not add statistics_healed_failed_count to outout");
goto out;
+ }
- afr_build_root_loc (priv->root_inode, &loc);
- while (crawl) {
- ret = _crawl_directory (&loc, pid, gfid);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR, "Crawl failed");
- else
- gf_log (this->name, GF_LOG_INFO, "Crawl completed");
- LOCK (&priv->lock);
- {
- if (shd->pending) {
- shd->pending = _gf_false;
- } else {
- shd->inprogress = _gf_false;
- crawl = _gf_false;
- }
- }
- UNLOCK (&priv->lock);
+ snprintf (key, sizeof (key), "statistics_strt_time-%d-%d-%"PRIu64,
+ xl_id, child, count);
+ ret = dict_set_dynstr (output, key, start_time_str);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not add statistics_crawl_start_time to outout");
+ goto out;
+ } else {
+ start_time_str = NULL;
+ }
+
+ if (!end_time_str)
+ progress = 1;
+ else
+ progress = 0;
+
+ snprintf (key, sizeof (key), "statistics_end_time-%d-%d-%"PRIu64,
+ xl_id, child, count);
+ if (!end_time_str)
+ end_time_str = gf_strdup ("Could not determine the end time");
+ ret = dict_set_dynstr (output, key, end_time_str);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not add statistics_crawl_end_time to outout");
+ goto out;
+ } else {
+ end_time_str = NULL;
+ }
+
+ snprintf (key, sizeof (key), "statistics_inprogress-%d-%d-%"PRIu64,
+ xl_id, child, count);
+
+ ret = dict_set_int32 (output, key, progress);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not add statistics_inprogress to outout");
+ goto out;
}
+
+ snprintf (key, sizeof (key), "statistics-%d-%d-count", xl_id, child);
+ ret = dict_set_uint64 (output, key, count + 1);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not increment the counter.");
+ goto out;
+ }
out:
+ GF_FREE (start_time_str);
+ GF_FREE (end_time_str);
return ret;
}
-static int
-afr_crawl (void *data)
+
+int
+afr_shd_dict_add_path (xlator_t *this, dict_t *output, int child, char *path,
+ struct timeval *tv)
{
- xlator_t *this = NULL;
- afr_private_t *priv = NULL;
- afr_self_heald_t *shd = NULL;
- int ret = -1;
- afr_crawl_data_t *crawl_data = data;
+ int ret = -1;
+ uint64_t count = 0;
+ char key[256] = {0};
+ int xl_id = 0;
- this = THIS;
- priv = this->private;
- shd = &priv->shd;
+ ret = dict_get_int32 (output, this->name, &xl_id);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "xl does not have id");
+ goto out;
+ }
- ret = afr_init_child_position (this, crawl_data->child);
- if (ret)
+ snprintf (key, sizeof (key), "%d-%d-count", xl_id, child);
+ ret = dict_get_uint64 (output, key, &count);
+
+ snprintf (key, sizeof (key), "%d-%d-%"PRIu64, xl_id, child, count);
+ ret = dict_set_dynstr (output, key, path);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s: Could not add to output",
+ path);
goto out;
+ }
- if (!afr_is_local_child (shd, crawl_data->child, priv->child_count))
+ if (tv) {
+ snprintf (key, sizeof (key), "%d-%d-%"PRIu64"-time", xl_id,
+ child, count);
+ ret = dict_set_uint32 (output, key, tv->tv_sec);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s: Could not set time",
+ path);
+ goto out;
+ }
+ }
+
+ snprintf (key, sizeof (key), "%d-%d-count", xl_id, child);
+
+ ret = dict_set_uint64 (output, key, count + 1);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not increment count");
goto out;
+ }
- ret = afr_crawl_directory (this, crawl_data->pid);
+ ret = 0;
out:
return ret;
}
-void
-afr_proactive_self_heal (xlator_t *this, int idx)
+
+int
+afr_shd_gfid_to_path (xlator_t *this, xlator_t *subvol, uuid_t gfid, char **path_p)
{
- afr_private_t *priv = NULL;
- afr_self_heald_t *shd = NULL;
- call_frame_t *frame = NULL;
- afr_crawl_data_t *crawl_data = NULL;
- int ret = 0;
+ loc_t loc = {0,};
+ char *path = NULL;
+ dict_t *xattr = NULL;
+ int ret = 0;
+
+ uuid_copy (loc.gfid, gfid);
+ loc.inode = inode_new (this->itable);
+
+ ret = syncop_getxattr (subvol, &loc, &xattr, GFID_TO_PATH_KEY);
+ loc_wipe (&loc);
+ if (ret)
+ return ret;
+
+ ret = dict_get_str (xattr, GFID_TO_PATH_KEY, &path);
+ if (ret || !path)
+ return -EINVAL;
+
+ *path_p = gf_strdup (path);
+ if (!*path_p)
+ return -ENOMEM;
+ return 0;
+}
- priv = this->private;
- shd = &priv->shd;
- if (!shd->enabled)
- goto out;
- if ((idx != AFR_ALL_CHILDREN) &&
- (shd->pos[idx] == AFR_POS_REMOTE))
- goto out;
+int
+afr_shd_gather_index_entries (xlator_t *this, int child, dict_t *output)
+{
+ fd_t *fd = NULL;
+ xlator_t *subvol = NULL;
+ afr_private_t *priv = NULL;
+ off_t offset = 0;
+ gf_dirent_t entries;
+ gf_dirent_t *entry = NULL;
+ uuid_t gfid;
+ int ret = 0;
+ int count = 0;
+ char *path = NULL;
+
+ priv = this->private;
+ subvol = priv->children[child];
+
+ fd = afr_shd_index_opendir (this, child);
+ if (!fd) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "unable to opendir index-dir on %s", subvol->name);
+ return -errno;
+ }
+
+ INIT_LIST_HEAD (&entries.list);
+
+ while ((ret = syncop_readdir (subvol, fd, 131072, offset, &entries))) {
+ if (ret > 0)
+ ret = 0;
+ list_for_each_entry (entry, &entries.list, list) {
+ offset = entry->d_off;
+
+ if (!strcmp (entry->d_name, ".") ||
+ !strcmp (entry->d_name, ".."))
+ continue;
+
+ gf_log (this->name, GF_LOG_DEBUG, "got entry: %s",
+ entry->d_name);
+
+ ret = uuid_parse (entry->d_name, gfid);
+ if (ret)
+ continue;
+
+ path = NULL;
+ ret = afr_shd_gfid_to_path (this, subvol, gfid, &path);
+
+ if (ret == -ENOENT || ret == -ESTALE) {
+ afr_shd_index_purge (subvol, fd->inode,
+ entry->d_name);
+ ret = 0;
+ continue;
+ }
+
+ ret = afr_shd_dict_add_path (this, output, child, path,
+ NULL);
+ }
+
+ gf_dirent_free (&entries);
+ if (ret)
+ break;
+ }
+
+ if (fd)
+ fd_unref (fd);
+ if (!ret)
+ ret = count;
+ return ret;
+}
- frame = create_frame (this, this->ctx->pool);
- if (!frame)
- goto out;
- afr_set_lk_owner (frame, this);
- afr_set_low_priority (frame);
- crawl_data = GF_CALLOC (1, sizeof (*crawl_data),
- gf_afr_mt_afr_crawl_data_t);
- if (!crawl_data)
- goto out;
- crawl_data->child = idx;
- crawl_data->pid = frame->root->pid;
- gf_log (this->name, GF_LOG_INFO, "starting crawl for %d", idx);
- ret = synctask_new (this->ctx->env, afr_crawl,
- afr_crawl_done, frame, crawl_data);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR, "Could not create the "
- "task for %d ret %d", idx, ret);
-out:
- return;
+int
+afr_add_shd_event (circular_buffer_t *cb, void *data)
+{
+ dict_t *output = NULL;
+ xlator_t *this = THIS;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ shd_event_t *shd_event = NULL;
+ char *path = NULL;
+
+ output = data;
+ priv = this->private;
+ shd = &priv->shd;
+ shd_event = cb->data;
+
+ if (!shd->index_healers[shd_event->child].local)
+ return 0;
+
+ path = gf_strdup (shd_event->path);
+ if (!path)
+ return -ENOMEM;
+
+ afr_shd_dict_add_path (this, output, shd_event->child, path,
+ &cb->tv);
+ return 0;
}
-//TODO: This is a hack
-void
-afr_build_root_loc (inode_t *inode, loc_t *loc)
+int
+afr_add_crawl_event (circular_buffer_t *cb, void *data)
{
- loc->path = "/";
- loc->name = "";
- loc->inode = inode;
- loc->ino = 1;
- loc->inode->ino = 1;
- loc->inode->ia_type = IA_IFDIR;
- memset (loc->inode->gfid, 0, 16);
- loc->inode->gfid[15] = 1;
+ dict_t *output = NULL;
+ xlator_t *this = THIS;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ crawl_event_t *crawl_event = NULL;
+ output = data;
+ priv = this->private;
+ shd = &priv->shd;
+ crawl_event = cb->data;
+
+ if (!shd->index_healers[crawl_event->child].local)
+ return 0;
+
+ afr_shd_dict_add_crawl_event (this, output, crawl_event);
+
+ return 0;
}
+
int
-afr_set_root_gfid (dict_t *dict)
+afr_selfheal_daemon_init (xlator_t *this)
{
- uuid_t gfid;
- int ret = 0;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ int ret = -1;
+ int i = 0;
+
+ priv = this->private;
+ shd = &priv->shd;
+
+ this->itable = inode_table_new (SHD_INODE_LRU_LIMIT, this);
+ if (!this->itable)
+ goto out;
+
+ shd->index_healers = GF_CALLOC (sizeof(*shd->index_healers),
+ priv->child_count,
+ gf_afr_mt_subvol_healer_t);
+ if (!shd->index_healers)
+ goto out;
+
+ for (i = 0; i < priv->child_count; i++) {
+ shd->index_healers[i].subvol = i;
+ ret = afr_shd_healer_init (this, &shd->index_healers[i]);
+ if (ret)
+ goto out;
+ }
+
+ shd->full_healers = GF_CALLOC (sizeof(*shd->full_healers),
+ priv->child_count,
+ gf_afr_mt_subvol_healer_t);
+ if (!shd->full_healers)
+ goto out;
+ for (i = 0; i < priv->child_count; i++) {
+ shd->full_healers[i].subvol = i;
+ ret = afr_shd_healer_init (this, &shd->full_healers[i]);
+ if (ret)
+ goto out;
+ }
+
+ shd->healed = eh_new (AFR_EH_HEALED_LIMIT, _gf_false,
+ afr_destroy_shd_event_data);
+ if (!shd->healed)
+ goto out;
+
+ shd->heal_failed = eh_new (AFR_EH_HEAL_FAIL_LIMIT, _gf_false,
+ afr_destroy_shd_event_data);
+ if (!shd->heal_failed)
+ goto out;
+
+ shd->split_brain = eh_new (AFR_EH_SPLIT_BRAIN_LIMIT, _gf_false,
+ afr_destroy_shd_event_data);
+ if (!shd->split_brain)
+ goto out;
+
+ shd->statistics = GF_CALLOC (sizeof(eh_t *), priv->child_count,
+ gf_common_mt_eh_t);
+ if (!shd->statistics)
+ goto out;
+
+ for (i = 0; i < priv->child_count ; i++) {
+ shd->statistics[i] = eh_new (AFR_STATISTICS_HISTORY_SIZE,
+ _gf_false,
+ afr_destroy_crawl_event_data);
+ if (!shd->statistics[i])
+ goto out;
+ shd->full_healers[i].crawl_event.child = i;
+ shd->full_healers[i].crawl_event.crawl_type = "FULL";
+ shd->index_healers[i].crawl_event.child = i;
+ shd->index_healers[i].crawl_event.crawl_type = "INDEX";
+ }
- memset (gfid, 0, 16);
- gfid[15] = 1;
+ ret = 0;
+out:
+ return ret;
+}
- ret = afr_set_dict_gfid (dict, gfid);
- return ret;
+int
+afr_selfheal_childup (xlator_t *this, int subvol)
+{
+ afr_shd_index_healer_spawn (this, subvol);
+
+ return 0;
}
+
+int64_t
+afr_shd_get_index_count (xlator_t *this, int i)
+{
+ afr_private_t *priv = NULL;
+ xlator_t *subvol = NULL;
+ uint64_t count = 0;
+ loc_t rootloc = {0, };
+ dict_t *xattr = NULL;
+ int ret = -1;
+
+ priv = this->private;
+ subvol = priv->children[i];
+
+ rootloc.inode = inode_ref (this->itable->root);
+ uuid_copy (rootloc.gfid, rootloc.inode->gfid);
+
+ ret = syncop_getxattr (subvol, &rootloc, &xattr,
+ GF_XATTROP_INDEX_COUNT);
+ loc_wipe (&rootloc);
+
+ if (ret < 0)
+ return -1;
+
+ ret = dict_get_uint64 (xattr, GF_XATTROP_INDEX_COUNT, &count);
+ if (ret)
+ return -1;
+ return count;
+}
+
+
+int
+afr_xl_op (xlator_t *this, dict_t *input, dict_t *output)
+{
+ gf_xl_afr_op_t op = GF_AFR_OP_INVALID;
+ int ret = 0;
+ int xl_id = 0;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ struct subvol_healer *healer = NULL;
+ int i = 0;
+ char key[64];
+ int op_ret = 0;
+ int64_t cnt = 0;
+
+ priv = this->private;
+ shd = &priv->shd;
+
+ for (i = 0; i < priv->child_count; i++)
+ if (priv->child_up[i] == -1)
+ goto out;
+
+ ret = dict_get_int32 (input, "xl-op", (int32_t*)&op);
+ if (ret)
+ goto out;
+ ret = dict_get_int32 (input, this->name, &xl_id);
+ if (ret)
+ goto out;
+ ret = dict_set_int32 (output, this->name, xl_id);
+ if (ret)
+ goto out;
+ switch (op) {
+ case GF_AFR_OP_HEAL_INDEX:
+ op_ret = -1;
+
+ for (i = 0; i < priv->child_count; i++) {
+ healer = &shd->index_healers[i];
+ snprintf (key, 64, "%d-%d-status", xl_id, i);
+
+ if (!priv->child_up[i]) {
+ ret = dict_set_str (output, key,
+ "Brick is not connected");
+ } else if (AFR_COUNT (priv->child_up,
+ priv->child_count) < 2) {
+ ret = dict_set_str (output, key,
+ "< 2 bricks in replica are up");
+ } else if (!afr_shd_is_subvol_local (this, healer->subvol)) {
+ ret = dict_set_str (output, key,
+ "Brick is remote");
+ } else {
+ ret = dict_set_str (output, key,
+ "Started self-heal");
+ afr_shd_index_healer_spawn (this, i);
+ op_ret = 0;
+ }
+ }
+ break;
+ case GF_AFR_OP_HEAL_FULL:
+ op_ret = -1;
+
+ for (i = 0; i < priv->child_count; i++) {
+ healer = &shd->full_healers[i];
+ snprintf (key, 64, "%d-%d-status", xl_id, i);
+
+ if (!priv->child_up[i]) {
+ ret = dict_set_str (output, key,
+ "Brick is not connected");
+ } else if (AFR_COUNT (priv->child_up,
+ priv->child_count) < 2) {
+ ret = dict_set_str (output, key,
+ "< 2 bricks in replica are up");
+ } else if (!afr_shd_is_subvol_local (this, healer->subvol)) {
+ ret = dict_set_str (output, key,
+ "Brick is remote");
+ } else {
+ ret = dict_set_str (output, key,
+ "Started self-heal");
+ afr_shd_full_healer_spawn (this, i);
+ op_ret = 0;
+ }
+ }
+ break;
+ case GF_AFR_OP_INDEX_SUMMARY:
+ for (i = 0; i < priv->child_count; i++)
+ if (shd->index_healers[i].local)
+ afr_shd_gather_index_entries (this, i, output);
+ break;
+ case GF_AFR_OP_HEALED_FILES:
+ eh_dump (shd->healed, output, afr_add_shd_event);
+ break;
+ case GF_AFR_OP_HEAL_FAILED_FILES:
+ eh_dump (shd->heal_failed, output, afr_add_shd_event);
+ break;
+ case GF_AFR_OP_SPLIT_BRAIN_FILES:
+ eh_dump (shd->split_brain, output, afr_add_shd_event);
+ break;
+ case GF_AFR_OP_STATISTICS:
+ for (i = 0; i < priv->child_count; i++) {
+ eh_dump (shd->statistics[i], output,
+ afr_add_crawl_event);
+ afr_shd_dict_add_crawl_event (this, output,
+ &shd->index_healers[i].crawl_event);
+ afr_shd_dict_add_crawl_event (this, output,
+ &shd->full_healers[i].crawl_event);
+ }
+ break;
+ case GF_AFR_OP_STATISTICS_HEAL_COUNT:
+ case GF_AFR_OP_STATISTICS_HEAL_COUNT_PER_REPLICA:
+ op_ret = -1;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!priv->child_up[i]) {
+ snprintf (key, 64, "%d-%d-status", xl_id, i);
+ ret = dict_set_str (output, key,
+ "Brick is not connected");
+ } else {
+ snprintf (key, 64, "%d-%d-hardlinks", xl_id, i);
+ cnt = afr_shd_get_index_count (this, i);
+ if (cnt >= 0) {
+ ret = dict_set_uint64 (output, key, cnt);
+ }
+ op_ret = 0;
+ }
+ }
+
+// ret = _do_crawl_op_on_local_subvols (this, INDEX_TO_BE_HEALED,
+// STATISTICS_TO_BE_HEALED,
+// output);
+ break;
+
+ default:
+ gf_log (this->name, GF_LOG_ERROR, "Unknown set op %d", op);
+ break;
+ }
+out:
+ dict_del (output, this->name);
+ return op_ret;
+}
diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h
index 5d7892fa7..10e229ee7 100644
--- a/xlators/cluster/afr/src/afr-self-heald.h
+++ b/xlators/cluster/afr/src/afr-self-heald.h
@@ -1,40 +1,72 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#ifndef __AFR_SELF_HEALD_H__
-#define __AFR_SELF_HEALD_H__
-#include "xlator.h"
-#define IS_ROOT_PATH(path) (!strcmp (path, "/"))
-#define IS_ENTRY_CWD(entry) (!strcmp (entry, "."))
-#define IS_ENTRY_PARENT(entry) (!strcmp (entry, ".."))
-#define AFR_ALL_CHILDREN -1
+#ifndef _AFR_SELF_HEALD_H
+#define _AFR_SELF_HEALD_H
+
+#include <pthread.h>
+
+
+typedef struct {
+ int child;
+ char *path;
+} shd_event_t;
+
+typedef struct {
+ int child;
+ uint64_t healed_count;
+ uint64_t split_brain_count;
+ uint64_t heal_failed_count;
+
+ /* If start_time is 0, it means crawler is not in progress
+ and stats are not valid */
+ time_t start_time;
+ /* If start_time is NOT 0 and end_time is 0, it means
+ cralwer is in progress */
+ time_t end_time;
+ char *crawl_type;
+} crawl_event_t;
+
+struct subvol_healer {
+ xlator_t *this;
+ int subvol;
+ gf_boolean_t local;
+ gf_boolean_t running;
+ gf_boolean_t rerun;
+ crawl_event_t crawl_event;
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+ pthread_t thread;
+};
+
+typedef struct {
+ gf_boolean_t iamshd;
+ gf_boolean_t enabled;
+ struct subvol_healer *index_healers;
+ struct subvol_healer *full_healers;
+
+ eh_t *healed;
+ eh_t *heal_failed;
+ eh_t *split_brain;
+ eh_t **statistics;
+} afr_self_heald_t;
-typedef struct afr_crawl_data_ {
- int child;
- pid_t pid;
-} afr_crawl_data_t;
-void afr_proactive_self_heal (xlator_t *this, int idx);
+int
+afr_selfheal_childup (xlator_t *this, int subvol);
-void afr_build_root_loc (inode_t *inode, loc_t *loc);
+int
+afr_selfheal_daemon_init (xlator_t *this);
-int afr_set_root_gfid (dict_t *dict);
+int
+afr_xl_op (xlator_t *this, dict_t *input, dict_t *output);
-#endif /* __AFR_SELF_HEALD_H__ */
+#endif /* !_AFR_SELF_HEALD_H */
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index d3960dcff..205ff759e 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -1,186 +1,173 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include "dict.h"
#include "byte-order.h"
#include "common-utils.h"
+#include "timer.h"
#include "afr.h"
#include "afr-transaction.h"
#include <signal.h>
+gf_boolean_t
+afr_changelog_pre_op_uninherit (call_frame_t *frame, xlator_t *this);
-#define LOCKED_NO 0x0 /* no lock held */
-#define LOCKED_YES 0x1 /* for DATA, METADATA, ENTRY and higher_path
- of RENAME */
-#define LOCKED_LOWER 0x2 /* for lower_path of RENAME */
+gf_boolean_t
+afr_changelog_pre_op_update (call_frame_t *frame, xlator_t *this);
+int
+afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
+ afr_changelog_resume_t changelog_resume);
-afr_fd_ctx_t *
-afr_fd_ctx_get (fd_t *fd, xlator_t *this)
-{
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- int ret = 0;
- ret = fd_ctx_get (fd, this, &ctx);
+int
+__afr_txn_write_fop (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
- if (ret < 0)
- goto out;
+ local = frame->local;
+ priv = this->private;
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ call_count = AFR_COUNT (local->transaction.pre_op, priv->child_count);
-out:
- return fd_ctx;
-}
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+ local->call_count = call_count;
-static void
-afr_pid_save (call_frame_t *frame)
-{
- afr_local_t * local = NULL;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ local->transaction.wind (frame, this, i);
- local = frame->local;
+ if (!--call_count)
+ break;
+ }
+ }
- local->saved_pid = frame->root->pid;
+ return 0;
}
-static void
-afr_pid_restore (call_frame_t *frame)
+int
+__afr_txn_write_done (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
+ afr_local_t *local = NULL;
local = frame->local;
- frame->root->pid = local->saved_pid;
-}
-
+ local->transaction.unwind (frame, this);
-static void
-__mark_all_pending (int32_t *pending[], int child_count,
- afr_transaction_type type)
-{
- int i = 0;
- int j = 0;
+ AFR_STACK_DESTROY (frame);
- for (i = 0; i < child_count; i++) {
- j = afr_index_for_transaction_type (type);
- pending[i][j] = hton32 (1);
- }
+ return 0;
}
-static void
-__mark_child_dead (int32_t *pending[], int child_count, int child,
- afr_transaction_type type)
+call_frame_t*
+afr_transaction_detach_fop_frame (call_frame_t *frame)
{
- int j = 0;
+ afr_local_t * local = NULL;
+ call_frame_t *fop_frame = NULL;
- j = afr_index_for_transaction_type (type);
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ fop_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
- pending[child][j] = 0;
+ return fop_frame;
}
static void
-__mark_pre_op_done_on_fd (call_frame_t *frame, xlator_t *this, int child_index)
+afr_save_lk_owner (call_frame_t *frame)
{
- afr_local_t *local = NULL;
- afr_fd_ctx_t *fd_ctx = NULL;
+ afr_local_t * local = NULL;
local = frame->local;
- if (!local->fd)
- return;
-
- fd_ctx = afr_fd_ctx_get (local->fd, this);
-
- if (!fd_ctx)
- goto out;
-
- LOCK (&local->fd->lock);
- {
- if (local->transaction.type == AFR_DATA_TRANSACTION)
- fd_ctx->pre_op_done[child_index]++;
- }
- UNLOCK (&local->fd->lock);
-out:
- return;
+ local->saved_lk_owner = frame->root->lk_owner;
}
static void
-__mark_pre_op_undone_on_fd (call_frame_t *frame, xlator_t *this, int child_index)
+afr_restore_lk_owner (call_frame_t *frame)
{
- afr_local_t *local = NULL;
- afr_fd_ctx_t *fd_ctx = NULL;
+ afr_local_t * local = NULL;
local = frame->local;
- if (!local->fd)
- return;
-
- fd_ctx = afr_fd_ctx_get (local->fd, this);
-
- if (!fd_ctx)
- goto out;
-
- LOCK (&local->fd->lock);
- {
- if (local->transaction.type == AFR_DATA_TRANSACTION)
- fd_ctx->pre_op_done[child_index]--;
- }
- UNLOCK (&local->fd->lock);
-out:
- return;
+ frame->root->lk_owner = local->saved_lk_owner;
}
-
-static void
-__mark_down_children (int32_t *pending[], int child_count,
- unsigned char *child_up, afr_transaction_type type)
+void
+__mark_all_success (call_frame_t *frame, xlator_t *this)
{
- int i = 0;
- int j = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i;
- for (i = 0; i < child_count; i++) {
- j = afr_index_for_transaction_type (type);
+ local = frame->local;
+ priv = this->private;
- if (!child_up[i])
- pending[i][j] = 0;
- }
+ for (i = 0; i < priv->child_count; i++) {
+ local->transaction.failed_subvols[i] = 0;
+ }
}
-static void
-__mark_all_success (int32_t *pending[], int child_count,
- afr_transaction_type type)
+int
+afr_transaction_perform_fop (call_frame_t *frame, xlator_t *this)
{
- int i;
- int j;
+ afr_local_t *local = NULL;
+ fd_t *fd = NULL;
- for (i = 0; i < child_count; i++) {
- j = afr_index_for_transaction_type (type);
- pending[i][j] = hton32 (-1);
- }
+ local = frame->local;
+ fd = local->fd;
+
+ /* Perform fops with the lk-owner from top xlator.
+ * Eg: lk-owner of posix-lk and flush should be same,
+ * flush cant clear the posix-lks without that lk-owner.
+ */
+ afr_save_lk_owner (frame);
+ frame->root->lk_owner =
+ local->transaction.main_frame->root->lk_owner;
+
+ if (local->pre_op_compat)
+ /* old mode, pre-op was done as afr_changelog_do()
+ just now, before OP */
+ afr_changelog_pre_op_update (frame, this);
+
+ /* The wake up needs to happen independent of
+ what type of fop arrives here. If it was
+ a write, then it has already inherited the
+ lock and changelog. If it was not a write,
+ then the presumption of the optimization (of
+ optimizing for successive write operations)
+ fails.
+ */
+ if (fd)
+ afr_delayed_changelog_wake_up (this, fd);
+ local->transaction.fop (frame, this);
+
+ return 0;
}
@@ -247,63 +234,30 @@ __fop_changelog_needed (call_frame_t *frame, xlator_t *this)
}
-static int
-afr_set_pending_dict (afr_private_t *priv, dict_t *xattr, int32_t **pending)
+int
+afr_set_pending_dict (afr_private_t *priv, dict_t *xattr, int **pending)
{
int i = 0;
int ret = 0;
+ int pending_zero[AFR_NUM_CHANGE_LOGS] = {0, };
for (i = 0; i < priv->child_count; i++) {
+ if (!memcmp (pending_zero, pending[i], sizeof (pending_zero)))
+ /* don't set xattrs for non-pending servers */
+ continue;
+
ret = dict_set_static_bin (xattr, priv->pending_key[i],
- pending[i], 3 * sizeof (int32_t));
+ pending[i],
+ AFR_NUM_CHANGE_LOGS * sizeof (int));
/* 3 = data+metadata+entry */
- if (ret < 0)
- goto out;
- }
-
-out:
- return ret;
-}
-
-
-static int
-afr_set_piggyback_dict (afr_private_t *priv, dict_t *xattr, int32_t **pending,
- afr_transaction_type type)
-{
- int i = 0;
- int ret = 0;
- int *arr = NULL;
- int index = 0;
- size_t pending_xattr_size = 3 * sizeof (int32_t);
- /* 3 = data+metadata+entry */
-
- index = afr_index_for_transaction_type (type);
-
- for (i = 0; i < priv->child_count; i++) {
- arr = GF_CALLOC (1, pending_xattr_size,
- gf_afr_mt_char);
- if (!arr) {
- ret = -1;
- goto out;
- }
-
- memcpy (arr, pending[i], pending_xattr_size);
-
- arr[index] = hton32 (ntoh32(arr[index]) + 1);
-
- ret = dict_set_bin (xattr, priv->pending_key[i],
- arr, pending_xattr_size);
-
- if (ret < 0)
- goto out;
+ if (ret)
+ break;
}
-out:
return ret;
}
-
int
afr_lock_server_count (afr_private_t *priv, afr_transaction_type type)
{
@@ -329,410 +283,532 @@ afr_lock_server_count (afr_private_t *priv, afr_transaction_type type)
/* {{{ pending */
-int32_t
-afr_changelog_post_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr)
+
+int
+afr_changelog_post_op_done (call_frame_t *frame, xlator_t *this)
{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
afr_internal_lock_t *int_lock = NULL;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int call_count = -1;
- priv = this->private;
- local = frame->local;
+ local = frame->local;
+ priv = this->private;
int_lock = &local->internal_lock;
- LOCK (&frame->lock);
- {
- call_count = --local->call_count;
- }
- UNLOCK (&frame->lock);
+ if (local->transaction.resume_stub) {
+ call_resume (local->transaction.resume_stub);
+ local->transaction.resume_stub = NULL;
+ }
- if (call_count == 0) {
- if (afr_lock_server_count (priv, local->transaction.type) == 0) {
- local->transaction.done (frame, this);
- } else {
- int_lock->lock_cbk = local->transaction.done;
- afr_unlock (frame, this);
- }
- }
+ if (afr_lock_server_count (priv, local->transaction.type) == 0) {
+ local->transaction.done (frame, this);
+ } else {
+ int_lock->lock_cbk = local->transaction.done;
+ afr_unlock (frame, this);
+ }
- return 0;
+ return 0;
}
-void
-afr_transaction_rm_stale_children (call_frame_t *frame, xlator_t *this,
- inode_t *inode, afr_transaction_type type)
+afr_inodelk_t*
+afr_get_inodelk (afr_internal_lock_t *int_lock, char *dom)
{
- int i = -1;
- int count = 0;
- int read_child = -1;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int **pending = NULL;
- int idx = 0;
- int32_t *stale_children = NULL;
- int32_t *fresh_children = NULL;
- gf_boolean_t rm_stale_children = _gf_false;
-
- idx = afr_index_for_transaction_type (type);
+ afr_inodelk_t *inodelk = NULL;
+ int i = 0;
- priv = this->private;
- local = frame->local;
- pending = local->pending;
-
- stale_children = afr_children_create (priv->child_count);
- if (!stale_children)
- goto out;
-
- fresh_children = local->fresh_children;
- read_child = afr_inode_get_read_ctx (this, inode, fresh_children);
-
- GF_ASSERT (read_child >= 0);
-
- if (pending[read_child][idx] == 0)
- read_child = -1;
-
- for (i = 0; i < priv->child_count; i++) {
- if (!afr_is_child_present (fresh_children,
- priv->child_count, i))
- continue;
- if (pending[i][idx] == 0) {
- /* child is down or op failed on it */
- rm_stale_children = _gf_true;
- afr_children_rm_child (fresh_children, i,
- priv->child_count);
- stale_children[count++] = i;
- }
+ for (i = 0; int_lock->inodelk[i].domain; i++) {
+ inodelk = &int_lock->inodelk[i];
+ if (strcmp (dom, inodelk->domain) == 0)
+ return inodelk;
}
-
- if (!rm_stale_children) {
- GF_ASSERT (read_child >= 0);
- goto out;
- }
-
- if (fresh_children[0] == -1) {
- //All children failed. leave as-is
- goto out;
- }
-
- if (read_child == -1)
- read_child = fresh_children[0];
- afr_inode_rm_stale_children (this, inode, read_child, stale_children);
-out:
- if (stale_children)
- GF_FREE (stale_children);
- return;
+ return NULL;
}
unsigned char*
afr_locked_nodes_get (afr_transaction_type type, afr_internal_lock_t *int_lock)
{
unsigned char *locked_nodes = NULL;
+ afr_inodelk_t *inodelk = NULL;
switch (type) {
case AFR_DATA_TRANSACTION:
case AFR_METADATA_TRANSACTION:
- locked_nodes = int_lock->inode_locked_nodes;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ locked_nodes = inodelk->locked_nodes;
break;
case AFR_ENTRY_TRANSACTION:
case AFR_ENTRY_RENAME_TRANSACTION:
- locked_nodes = int_lock->entry_locked_nodes;
+ /*Because same set of subvols participate in all lockee
+ * entities*/
+ locked_nodes = int_lock->lockee[0].locked_nodes;
break;
}
return locked_nodes;
}
+
int
-afr_changelog_pre_op_call_count (afr_transaction_type type,
- afr_internal_lock_t *int_lock,
- unsigned int child_count)
+afr_changelog_call_count (afr_transaction_type type,
+ unsigned char *pre_op_subvols,
+ unsigned int child_count)
{
- int call_count = 0;
- unsigned char *locked_nodes = NULL;
+ int call_count = 0;
- locked_nodes = afr_locked_nodes_get (type, int_lock);
- GF_ASSERT (locked_nodes);
+ call_count = AFR_COUNT(pre_op_subvols, child_count);
- call_count = afr_locked_children_count (locked_nodes, child_count);
- if (type == AFR_ENTRY_RENAME_TRANSACTION) {
+ if (type == AFR_ENTRY_RENAME_TRANSACTION)
call_count *= 2;
- }
return call_count;
}
-int
-afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t * priv = this->private;
- afr_internal_lock_t *int_lock = NULL;
- int ret = 0;
- int i = 0;
- int call_count = 0;
-
- afr_local_t * local = NULL;
- afr_fd_ctx_t *fdctx = NULL;
- dict_t **xattr = NULL;
- int piggyback = 0;
- int index = 0;
- int nothing_failed = 1;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- __mark_down_children (local->pending, priv->child_count,
- local->child_up, local->transaction.type);
+gf_boolean_t
+afr_txn_nothing_failed (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
- if (local->fd)
- afr_transaction_rm_stale_children (frame, this,
- local->fd->inode,
- local->transaction.type);
+ local = frame->local;
+ priv = this->private;
- xattr = alloca (priv->child_count * sizeof (*xattr));
- memset (xattr, 0, (priv->child_count * sizeof (*xattr)));
for (i = 0; i < priv->child_count; i++) {
- xattr[i] = dict_new ();
+ if (local->transaction.failed_subvols[i])
+ return _gf_false;
}
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
- local->call_count = call_count;
+ return _gf_true;
+}
- if (local->fd)
- fdctx = afr_fd_ctx_get (local->fd, this);
- if (call_count == 0) {
- /* no child is up */
- int_lock->lock_cbk = local->transaction.done;
- afr_unlock (frame, this);
- goto out;
- }
+void
+afr_handle_symmetric_errors (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int op_errno = 0;
+ int i_errno = 0;
+ gf_boolean_t matching_errors = _gf_true;
+ int i = 0;
+
+ priv = this->private;
+ local = frame->local;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret != -1) {
+ /* Operation succeeded on at least on subvol,
+ so it is not a failed-everywhere situation.
+ */
+ matching_errors = _gf_false;
+ break;
+ }
+ i_errno = local->replies[i].op_errno;
+
+ if (i_errno == ENOTCONN) {
+ /* ENOTCONN is not a symmetric error. We do not
+ know if the operation was performed on the
+ backend or not.
+ */
+ matching_errors = _gf_false;
+ break;
+ }
+
+ if (!op_errno) {
+ op_errno = i_errno;
+ } else if (op_errno != i_errno) {
+ /* Mismatching op_errno's */
+ matching_errors = _gf_false;
+ break;
+ }
+ }
+
+ if (matching_errors)
+ __mark_all_success (frame, this);
+}
- /* check if something has failed, to handle piggybacking */
- nothing_failed = 1;
- index = afr_index_for_transaction_type (local->transaction.type);
- for (i = 0; i < priv->child_count; i++) {
- if (local->pending[i][index] == 0) {
- nothing_failed = 0;
- break;
- }
- }
- index = afr_index_for_transaction_type (local->transaction.type);
- if (local->optimistic_change_log &&
- local->transaction.type != AFR_DATA_TRANSACTION) {
- /* if nothing_failed, then local->pending[..] == {0 .. 0} */
- for (i = 0; i < priv->child_count; i++)
- local->pending[i][index]++;
- }
+int
+afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t * priv = this->private;
+ int i = 0;
+ int ret = 0;
+ int idx = 0;
+ afr_local_t * local = NULL;
+ dict_t *xattr = NULL;
+ int nothing_failed = 1;
+ gf_boolean_t need_undirty = _gf_false;
- for (i = 0; i < priv->child_count; i++) {
- if (!local->transaction.pre_op[i])
- continue;
- ret = afr_set_pending_dict (priv, xattr[i],
- local->pending);
+ local = frame->local;
+ idx = afr_index_for_transaction_type (local->transaction.type);
- if (ret < 0)
- gf_log (this->name, GF_LOG_INFO,
- "failed to set pending entry");
+ nothing_failed = afr_txn_nothing_failed (frame, this);
+ if (afr_changelog_pre_op_uninherit (frame, this))
+ need_undirty = _gf_false;
+ else
+ need_undirty = _gf_true;
- switch (local->transaction.type) {
- case AFR_DATA_TRANSACTION:
- {
- if (!fdctx) {
- STACK_WIND (frame, afr_changelog_post_op_cbk,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->loc,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- break;
- }
+ if (nothing_failed && !need_undirty) {
+ afr_changelog_post_op_done (frame, this);
+ goto out;
+ }
+
+ xattr = dict_new ();
+ if (!xattr) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ afr_changelog_post_op_done (frame, this);
+ goto out;
+ }
+
+ if (need_undirty) {
+ local->dirty[idx] = hton32(-1);
+
+ ret = dict_set_static_bin (xattr, AFR_DIRTY, local->dirty,
+ sizeof(int) * AFR_NUM_CHANGE_LOGS);
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ afr_changelog_post_op_done (frame, this);
+ goto out;
+ }
+
+ }
+
+ if (!nothing_failed) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.failed_subvols[i])
+ local->pending[i][idx] = hton32(1);
+ }
+ ret = afr_set_pending_dict (priv, xattr, local->pending);
+ if (ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ afr_changelog_post_op_done (frame, this);
+ goto out;
+ }
+
+ }
+
+ afr_changelog_do (frame, this, xattr, afr_changelog_post_op_done);
+out:
+ if (xattr)
+ dict_unref (xattr);
- LOCK (&local->fd->lock);
- {
- piggyback = 0;
- if (fdctx->pre_op_piggyback[i]) {
- fdctx->pre_op_piggyback[i]--;
- piggyback = 1;
- }
- }
- UNLOCK (&local->fd->lock);
+ return 0;
+}
- if (piggyback && !nothing_failed)
- ret = afr_set_piggyback_dict (priv, xattr[i],
- local->pending,
- local->transaction.type);
- if (nothing_failed && piggyback) {
- afr_changelog_post_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i]);
- } else {
- __mark_pre_op_undone_on_fd (frame, this, i);
- STACK_WIND_COOKIE (frame,
- afr_changelog_post_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- }
- }
- break;
- case AFR_METADATA_TRANSACTION:
- {
- if (nothing_failed) {
- afr_changelog_post_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i]);
- break;
- }
+gf_boolean_t
+afr_changelog_pre_op_uninherit (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ fd_t *fd = NULL;
+ int i = 0;
+ gf_boolean_t ret = _gf_false;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int type = 0;
+
+ local = frame->local;
+ priv = this->private;
+ fd = local->fd;
+
+ type = afr_index_for_transaction_type (local->transaction.type);
+ if (type != AFR_DATA_TRANSACTION)
+ return !local->transaction.dirtied;
+
+ if (!fd)
+ return !local->transaction.dirtied;
+
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx)
+ return _gf_false;
+
+ if (local->transaction.no_uninherit)
+ return _gf_false;
+
+ /* This function must be idempotent. So check if we
+ were called before and return the same answer again.
+
+ It is important to keep this function idempotent for
+ the call in afr_changelog_post_op_safe() to not have
+ side effects on the call from afr_changelog_post_op_now()
+ */
+ if (local->transaction.uninherit_done)
+ return local->transaction.uninherit_value;
+
+ LOCK(&fd->lock);
+ {
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i] !=
+ fd_ctx->pre_op_done[type][i]) {
+ ret = !local->transaction.dirtied;
+ goto unlock;
+ }
+ }
+
+ if (fd_ctx->inherited[type]) {
+ ret = _gf_true;
+ fd_ctx->inherited[type]--;
+ } else if (fd_ctx->on_disk[type]) {
+ ret = _gf_false;
+ fd_ctx->on_disk[type]--;
+ } else {
+ /* ASSERT */
+ ret = _gf_false;
+ }
+
+ if (!fd_ctx->inherited[type] && !fd_ctx->on_disk[type]) {
+ for (i = 0; i < priv->child_count; i++)
+ fd_ctx->pre_op_done[type][i] = 0;
+ }
+ }
+unlock:
+ UNLOCK(&fd->lock);
+
+ local->transaction.uninherit_done = _gf_true;
+ local->transaction.uninherit_value = ret;
+
+ return ret;
+}
- if (local->fd)
- STACK_WIND (frame, afr_changelog_post_op_cbk,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- else
- STACK_WIND (frame, afr_changelog_post_op_cbk,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->loc,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- }
- break;
- case AFR_ENTRY_RENAME_TRANSACTION:
- {
- if (nothing_failed) {
- afr_changelog_post_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i]);
- } else {
- STACK_WIND_COOKIE (frame, afr_changelog_post_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->transaction.new_parent_loc,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- }
- call_count--;
- }
+gf_boolean_t
+afr_changelog_pre_op_inherit (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ fd_t *fd = NULL;
+ int i = 0;
+ gf_boolean_t ret = _gf_false;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int type = 0;
+
+ local = frame->local;
+ priv = this->private;
+ fd = local->fd;
+
+ if (local->transaction.type != AFR_DATA_TRANSACTION)
+ return _gf_false;
+
+ type = afr_index_for_transaction_type (local->transaction.type);
+
+ if (!fd)
+ return _gf_false;
+
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx)
+ return _gf_false;
+
+ LOCK(&fd->lock);
+ {
+ if (!fd_ctx->on_disk[type]) {
+ /* nothing to inherit yet */
+ ret = _gf_false;
+ goto unlock;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i] !=
+ fd_ctx->pre_op_done[type][i]) {
+ /* either inherit exactly, or don't */
+ ret = _gf_false;
+ goto unlock;
+ }
+ }
+
+ fd_ctx->inherited[type]++;
+
+ ret = _gf_true;
+
+ local->transaction.inherited = _gf_true;
+ }
+unlock:
+ UNLOCK(&fd->lock);
+
+ return ret;
+}
- /*
- set it again because previous stack_wind
- might have already returned (think of case
- where subvolume is posix) and would have
- used the dict as placeholder for return
- value
- */
- ret = afr_set_pending_dict (priv, xattr[i],
- local->pending);
+gf_boolean_t
+afr_changelog_pre_op_update (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ fd_t *fd = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int i = 0;
+ gf_boolean_t ret = _gf_false;
+ int type = 0;
+
+ local = frame->local;
+ priv = this->private;
+ fd = local->fd;
+
+ if (!fd)
+ return _gf_false;
+
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx)
+ return _gf_false;
+
+ if (local->transaction.inherited)
+ /* was already inherited in afr_changelog_pre_op */
+ return _gf_false;
+
+ if (!local->transaction.dirtied)
+ return _gf_false;
+
+ if (!afr_txn_nothing_failed (frame, this))
+ return _gf_false;
+
+ type = afr_index_for_transaction_type (local->transaction.type);
+
+ ret = _gf_false;
+
+ LOCK(&fd->lock);
+ {
+ if (!fd_ctx->on_disk[type]) {
+ for (i = 0; i < priv->child_count; i++)
+ fd_ctx->pre_op_done[type][i] =
+ local->transaction.pre_op[i];
+ } else {
+ for (i = 0; i < priv->child_count; i++)
+ if (fd_ctx->pre_op_done[type][i] !=
+ local->transaction.pre_op[i]) {
+ local->transaction.no_uninherit = 1;
+ goto unlock;
+ }
+ }
+ fd_ctx->on_disk[type]++;
+
+ ret = _gf_true;
+ }
+unlock:
+ UNLOCK(&fd->lock);
+
+ return ret;
+}
- if (ret < 0)
- gf_log (this->name, GF_LOG_INFO,
- "failed to set pending entry");
- /* fall through */
+int
+afr_changelog_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int call_count = -1;
- case AFR_ENTRY_TRANSACTION:
- {
- if (nothing_failed) {
- afr_changelog_post_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i]);
- break;
- }
+ local = frame->local;
- if (local->fd)
- STACK_WIND (frame, afr_changelog_post_op_cbk,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- else
- STACK_WIND (frame, afr_changelog_post_op_cbk,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->transaction.parent_loc,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- }
- break;
- }
+ if (op_ret == -1)
+ afr_transaction_fop_failed (frame, this, (long) cookie);
- if (!--call_count)
- break;
- }
+ call_count = afr_frame_return (frame);
-out:
- for (i = 0; i < priv->child_count; i++) {
- dict_unref (xattr[i]);
- }
+ if (call_count == 0)
+ local->transaction.changelog_resume (frame, this);
return 0;
}
-int32_t
-afr_changelog_pre_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr)
+int
+afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
+ afr_changelog_resume_t changelog_resume)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = this->private;
- int call_count = -1;
- int child_index = (long) cookie;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int call_count = 0;
- local = frame->local;
+ local = frame->local;
+ priv = this->private;
- LOCK (&frame->lock);
- {
- switch (op_ret) {
- case 0:
- __mark_pre_op_done_on_fd (frame, this, child_index);
- //fallthrough we need to mark the pre_op
- case 1:
- local->transaction.pre_op[child_index] = 1;
- /* special op_ret for piggyback */
- break;
- case -1:
- if (op_errno == ENOTSUP) {
- gf_log (this->name, GF_LOG_ERROR,
- "xattrop not supported by %s",
- priv->children[child_index]->name);
- local->op_ret = -1;
-
- } else if (!child_went_down (op_ret, op_errno)) {
- gf_log (this->name, GF_LOG_ERROR,
- "xattrop failed on child %s: %s",
- priv->children[child_index]->name,
- strerror (op_errno));
+ call_count = afr_changelog_call_count (local->transaction.type,
+ local->transaction.pre_op,
+ priv->child_count);
+
+ if (call_count == 0) {
+ changelog_resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ local->transaction.changelog_resume = changelog_resume;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->transaction.pre_op[i])
+ continue;
+
+ switch (local->transaction.type) {
+ case AFR_DATA_TRANSACTION:
+ case AFR_METADATA_TRANSACTION:
+ if (!local->fd) {
+ STACK_WIND_COOKIE (frame, afr_changelog_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &local->loc,
+ GF_XATTROP_ADD_ARRAY, xattr,
+ NULL);
+ } else {
+ STACK_WIND_COOKIE (frame, afr_changelog_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fxattrop,
+ local->fd,
+ GF_XATTROP_ADD_ARRAY, xattr,
+ NULL);
}
- local->op_errno = op_errno;
- break;
- }
+ break;
+ case AFR_ENTRY_RENAME_TRANSACTION:
- call_count = --local->call_count;
- }
- UNLOCK (&frame->lock);
+ STACK_WIND_COOKIE (frame, afr_changelog_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &local->transaction.new_parent_loc,
+ GF_XATTROP_ADD_ARRAY, xattr,
+ NULL);
+ call_count--;
- if (call_count == 0) {
- if ((local->op_ret == -1) &&
- (local->op_errno == ENOTSUP)) {
- local->transaction.resume (frame, this);
- } else {
- __mark_all_success (local->pending, priv->child_count,
- local->transaction.type);
+ /* fall through */
- afr_pid_restore (frame);
+ case AFR_ENTRY_TRANSACTION:
+ if (local->fd)
+ STACK_WIND_COOKIE (frame, afr_changelog_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fxattrop,
+ local->fd,
+ GF_XATTROP_ADD_ARRAY, xattr,
+ NULL);
+ else
+ STACK_WIND_COOKIE (frame, afr_changelog_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &local->transaction.parent_loc,
+ GF_XATTROP_ADD_ARRAY, xattr,
+ NULL);
+ break;
+ }
- local->transaction.fop (frame, this);
- }
+ if (!--call_count)
+ break;
}
- return 0;
+ return 0;
}
+
int
afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
{
@@ -740,193 +816,122 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
int i = 0;
int ret = 0;
int call_count = 0;
- dict_t **xattr = NULL;
- afr_fd_ctx_t *fdctx = NULL;
+ int op_errno = 0;
afr_local_t *local = NULL;
- int piggyback = 0;
afr_internal_lock_t *int_lock = NULL;
unsigned char *locked_nodes = NULL;
+ unsigned char *pending_subvols = NULL;
+ int idx = -1;
+ gf_boolean_t pre_nop = _gf_true;
+ dict_t *xdata_req = NULL;
local = frame->local;
int_lock = &local->internal_lock;
-
- xattr = alloca (priv->child_count * sizeof (*xattr));
- memset (xattr, 0, (priv->child_count * sizeof (*xattr)));
-
- for (i = 0; i < priv->child_count; i++) {
- xattr[i] = dict_new ();
- }
-
- call_count = afr_changelog_pre_op_call_count (local->transaction.type,
- int_lock,
- priv->child_count);
- if (call_count == 0) {
- local->internal_lock.lock_cbk =
- local->transaction.done;
- afr_unlock (frame, this);
- goto out;
- }
-
- local->call_count = call_count;
-
- __mark_all_pending (local->pending, priv->child_count,
- local->transaction.type);
-
- if (local->fd)
- fdctx = afr_fd_ctx_get (local->fd, this);
+ idx = afr_index_for_transaction_type (local->transaction.type);
locked_nodes = afr_locked_nodes_get (local->transaction.type, int_lock);
- for (i = 0; i < priv->child_count; i++) {
- if (!locked_nodes[i])
- continue;
- ret = afr_set_pending_dict (priv, xattr[i],
- local->pending);
- if (ret < 0)
- gf_log (this->name, GF_LOG_INFO,
- "failed to set pending entry");
-
-
- switch (local->transaction.type) {
- case AFR_DATA_TRANSACTION:
- {
- if (!fdctx) {
- STACK_WIND_COOKIE (frame,
- afr_changelog_pre_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &(local->loc),
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- break;
- }
+ pending_subvols = alloca0 (priv->child_count);
- LOCK (&local->fd->lock);
- {
- piggyback = 0;
- if (fdctx->pre_op_done[i]) {
- fdctx->pre_op_piggyback[i]++;
- piggyback = 1;
- fdctx->hit++;
- } else {
- fdctx->miss++;
- }
- }
- UNLOCK (&local->fd->lock);
+ for (i = 0; i < priv->child_count; i++) {
+ if (locked_nodes[i]) {
+ local->transaction.pre_op[i] = 1;
+ call_count++;
+ } else {
+ pending_subvols[i] = 1;
+ }
+ }
- if (piggyback)
- afr_changelog_pre_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i]);
- else
- STACK_WIND_COOKIE (frame,
- afr_changelog_pre_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- }
- break;
- case AFR_METADATA_TRANSACTION:
- {
- if (local->optimistic_change_log) {
- afr_changelog_pre_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i]);
- break;
- }
+ /* TBD: quorum check w/ call_count */
- if (local->fd)
- STACK_WIND_COOKIE (frame,
- afr_changelog_pre_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- else
- STACK_WIND_COOKIE (frame,
- afr_changelog_pre_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &(local->loc),
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- }
- break;
-
- case AFR_ENTRY_RENAME_TRANSACTION:
- {
- if (local->optimistic_change_log) {
- afr_changelog_pre_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i]);
- } else {
- STACK_WIND_COOKIE (frame,
- afr_changelog_pre_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->transaction.new_parent_loc,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- }
-
- call_count--;
- }
-
-
- /*
- set it again because previous stack_wind
- might have already returned (think of case
- where subvolume is posix) and would have
- used the dict as placeholder for return
- value
- */
-
- ret = afr_set_pending_dict (priv, xattr[i],
- local->pending);
-
- if (ret < 0)
- gf_log (this->name, GF_LOG_INFO,
- "failed to set pending entry");
-
- /* fall through */
+ if (call_count == 0) {
+ op_errno = ENOTCONN;
+ goto err;
+ }
+
+ xdata_req = dict_new();
+ if (!xdata_req) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ pre_nop = _gf_true;
+
+ if (afr_changelog_pre_op_inherit (frame, this))
+ goto next;
+
+ if (call_count < priv->child_count) {
+ /* For subvols we are not performing operation on,
+ mark them as pending up-front along with the FOP
+ so that we can safely defer unmarking dirty until
+ later.
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if (pending_subvols[i])
+ local->pending[i][idx] = hton32(1);
+ }
+ ret = afr_set_pending_dict (priv, xdata_req,
+ local->pending);
+ if (ret < 0) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ pre_nop = _gf_false;
+ }
+
+ if (call_count > 1 &&
+ (local->transaction.type == AFR_DATA_TRANSACTION ||
+ !local->optimistic_change_log)) {
+
+ /* If we are performing change on only one subvol, no
+ need to mark dirty, because we are setting the pending
+ counts already anyways
+ */
+ local->dirty[idx] = hton32(1);
+
+ ret = dict_set_static_bin (xdata_req, AFR_DIRTY, local->dirty,
+ sizeof(int) * AFR_NUM_CHANGE_LOGS);
+ if (ret) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ pre_nop = _gf_false;
+ local->transaction.dirtied = 1;
+ }
+
+ if (pre_nop)
+ goto next;
+
+ if (!local->pre_op_compat) {
+ dict_copy (xdata_req, local->xdata_req);
+ goto next;
+ }
+
+ afr_changelog_do (frame, this, xdata_req, afr_transaction_perform_fop);
+
+ if (xdata_req)
+ dict_unref (xdata_req);
+
+ return 0;
+next:
+ afr_transaction_perform_fop (frame, this);
+
+ if (xdata_req)
+ dict_unref (xdata_req);
- case AFR_ENTRY_TRANSACTION:
- {
- if (local->optimistic_change_log) {
- afr_changelog_pre_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i]);
- break;
- }
+ return 0;
+err:
+ local->internal_lock.lock_cbk = local->transaction.done;
+ local->op_ret = -1;
+ local->op_errno = op_errno;
- if (local->fd)
- STACK_WIND_COOKIE (frame,
- afr_changelog_pre_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- else
- STACK_WIND_COOKIE (frame,
- afr_changelog_pre_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->transaction.parent_loc,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- }
- break;
- }
+ afr_unlock (frame, this);
- if (!--call_count)
- break;
- }
-out:
- for (i = 0; i < priv->child_count; i++) {
- dict_unref (xattr[i]);
- }
+ if (xdata_req)
+ dict_unref (xdata_req);
- return 0;
+ return 0;
}
@@ -1075,12 +1080,14 @@ int
afr_set_transaction_flock (afr_local_t *local)
{
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
int_lock = &local->internal_lock;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
- int_lock->lk_flock.l_len = local->transaction.len;
- int_lock->lk_flock.l_start = local->transaction.start;
- int_lock->lk_flock.l_type = F_WRLCK;
+ inodelk->flock.l_len = local->transaction.len;
+ inodelk->flock.l_start = local->transaction.start;
+ inodelk->flock.l_type = F_WRLCK;
return 0;
}
@@ -1095,6 +1102,7 @@ afr_lock_rec (call_frame_t *frame, xlator_t *this)
int_lock = &local->internal_lock;
int_lock->transaction_lk_type = AFR_TRANSACTION_LK;
+ int_lock->domain = this->name;
switch (local->transaction.type) {
case AFR_DATA_TRANSACTION:
@@ -1108,8 +1116,8 @@ afr_lock_rec (call_frame_t *frame, xlator_t *this)
case AFR_ENTRY_RENAME_TRANSACTION:
- int_lock->lock_cbk = afr_post_blocking_rename_cbk;
- afr_blocking_lock (frame, this);
+ int_lock->lock_cbk = afr_post_nonblocking_entrylk_cbk;
+ afr_nonblocking_entrylk (frame, this);
break;
case AFR_ENTRY_TRANSACTION:
@@ -1131,12 +1139,6 @@ afr_lock_rec (call_frame_t *frame, xlator_t *this)
int
afr_lock (call_frame_t *frame, xlator_t *this)
{
- afr_pid_save (frame);
-
- frame->root->pid = (long) frame->root;
-
- afr_set_lk_owner (frame, this);
-
afr_set_lock_number (frame, this);
return afr_lock_rec (frame, this);
@@ -1148,47 +1150,427 @@ afr_lock (call_frame_t *frame, xlator_t *this)
int
afr_internal_lock_finish (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- priv = this->private;
- local = frame->local;
-
if (__fop_changelog_needed (frame, this)) {
afr_changelog_pre_op (frame, this);
} else {
- __mark_all_success (local->pending, priv->child_count,
- local->transaction.type);
+ afr_transaction_perform_fop (frame, this);
+ }
+
+ return 0;
+}
+
+
+void
+afr_set_delayed_post_op (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ /* call this function from any of the related optimizations
+ which benefit from delaying post op are enabled, namely:
+
+ - changelog piggybacking
+ - eager locking
+ */
+
+ priv = this->private;
+ if (!priv)
+ return;
+
+ if (!priv->post_op_delay_secs)
+ return;
+
+ local = frame->local;
+ if (!local->transaction.eager_lock_on)
+ return;
+
+ if (!local)
+ return;
+
+ if (!local->fd)
+ return;
+
+ if (local->op == GF_FOP_WRITE)
+ local->delayed_post_op = _gf_true;
+}
+
+gf_boolean_t
+afr_are_multiple_fds_opened (fd_t *fd, xlator_t *this)
+{
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ if (!fd) {
+ /* If false is returned, it may keep on taking eager-lock
+ * which may lead to starvation, so return true to avoid that.
+ */
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "Invalid fd");
+ return _gf_true;
+ }
+ /* Lets say mount1 has eager-lock(full-lock) and after the eager-lock
+ * is taken mount2 opened the same file, it won't be able to
+ * perform any data operations until mount1 releases eager-lock.
+ * To avoid such scenario do not enable eager-lock for this transaction
+ * if open-fd-count is > 1
+ */
+
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx)
+ return _gf_true;
+
+ if (fd_ctx->open_fd_count > 1)
+ return _gf_true;
+
+ return _gf_false;
+}
+
+
+gf_boolean_t
+is_afr_delayed_changelog_post_op_needed (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ gf_boolean_t res = _gf_false;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (!local->delayed_post_op)
+ goto out;
+
+ //Mark pending changelog ASAP
+ if (!afr_txn_nothing_failed (frame, this))
+ goto out;
+
+ if (local->fd && afr_are_multiple_fds_opened (local->fd, this))
+ goto out;
+
+ res = _gf_true;
+out:
+ return res;
+}
+
+
+void
+afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd,
+ call_stub_t *stub);
+
+void
+afr_delayed_changelog_wake_up_cbk (void *data)
+{
+ fd_t *fd = NULL;
+
+ fd = data;
+
+ afr_delayed_changelog_wake_up (THIS, fd);
+}
+
+
+/* SET operation */
+int
+afr_fd_report_unstable_write (xlator_t *this, fd_t *fd)
+{
+ afr_fd_ctx_t *fdctx = NULL;
+
+ fdctx = afr_fd_ctx_get (fd, this);
+
+ LOCK(&fd->lock);
+ {
+ fdctx->witnessed_unstable_write = _gf_true;
+ }
+ UNLOCK(&fd->lock);
+
+ return 0;
+}
+
+/* TEST and CLEAR operation */
+gf_boolean_t
+afr_fd_has_witnessed_unstable_write (xlator_t *this, fd_t *fd)
+{
+ afr_fd_ctx_t *fdctx = NULL;
+ gf_boolean_t witness = _gf_false;
+
+ fdctx = afr_fd_ctx_get (fd, this);
+ if (!fdctx)
+ return _gf_true;
+
+ LOCK(&fd->lock);
+ {
+ if (fdctx->witnessed_unstable_write) {
+ witness = _gf_true;
+ fdctx->witnessed_unstable_write = _gf_false;
+ }
+ }
+ UNLOCK (&fd->lock);
+
+ return witness;
+}
- afr_pid_restore (frame);
- local->transaction.fop (frame, this);
+int
+afr_changelog_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ int child_index = (long) cookie;
+ int call_count = -1;
+ afr_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (op_ret != 0) {
+ /* Failure of fsync() is as good as failure of previous
+ write(). So treat it like one.
+ */
+ gf_log (this->name, GF_LOG_WARNING,
+ "fsync(%s) failed on subvolume %s. Transaction was %s",
+ uuid_utoa (local->fd->inode->gfid),
+ priv->children[child_index]->name,
+ gf_fop_list[local->op]);
+
+ afr_transaction_fop_failed (frame, this, child_index);
+ }
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ afr_changelog_post_op_now (frame, this);
+
+ return 0;
+}
+
+
+int
+afr_changelog_fsync (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ int i = 0;
+ int call_count = 0;
+ afr_private_t *priv = NULL;
+ dict_t *xdata = NULL;
+ GF_UNUSED int ret = -1;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = AFR_COUNT (local->transaction.pre_op, priv->child_count);
+
+ if (!call_count) {
+ /* will go straight to unlock */
+ afr_changelog_post_op_now (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ xdata = dict_new();
+ if (xdata)
+ ret = dict_set_int32 (xdata, "batch-fsync", 1);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->transaction.pre_op[i])
+ continue;
+
+ STACK_WIND_COOKIE (frame, afr_changelog_fsync_cbk,
+ (void *) (long) i, priv->children[i],
+ priv->children[i]->fops->fsync, local->fd,
+ 1, xdata);
+ if (!--call_count)
+ break;
}
+ if (xdata)
+ dict_unref (xdata);
+
return 0;
}
int
+afr_changelog_post_op_safe (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (!local->fd || local->transaction.type != AFR_DATA_TRANSACTION) {
+ afr_changelog_post_op_now (frame, this);
+ return 0;
+ }
+
+ if (afr_changelog_pre_op_uninherit (frame, this) &&
+ afr_txn_nothing_failed (frame, this)) {
+ /* just detected that this post-op is about to
+ be optimized away as a new write() has
+ already piggybacked on this frame's changelog.
+ */
+ afr_changelog_post_op_now (frame, this);
+ return 0;
+ }
+
+ /* Calling afr_changelog_post_op_now() now will result in
+ issuing ->[f]xattrop().
+
+ Performing a hard POST-OP (->[f]xattrop() FOP) is a more
+ responsible operation that what it might appear on the surface.
+
+ The changelog of a file (in the xattr of the file on the server)
+ stores information (pending count) about the state of the file
+ on the OTHER server. This changelog is blindly trusted, and must
+ therefore be updated in such a way it remains trustworthy. This
+ implies that decrementing the pending count (essentially "clearing
+ the dirty flag") must be done STRICTLY after we are sure that the
+ operation on the other server has reached stable storage.
+
+ While the backend filesystem on that server will eventually flush
+ it to stable storage, we (being in userspace) have no mechanism
+ to get notified when the write became "stable".
+
+ This means we need take matter into our own hands and issue an
+ fsync() EVEN IF THE APPLICATION WAS PERFORMING UNSTABLE WRITES,
+ and get an acknowledgement for it. And we need to wait for the
+ fsync() acknowledgement before initiating the hard POST-OP.
+
+ However if the FD itself was opened in O_SYNC or O_DSYNC then
+ we are already guaranteed that the writes were made stable as
+ part of the FOP itself. The same holds true for NFS stable
+ writes which happen on an anonymous FD with O_DSYNC or O_SYNC
+ flag set in the writev() @flags param. For all other write types,
+ mark a flag in the fdctx whenever an unstable write is witnessed.
+ */
+
+ if (!afr_fd_has_witnessed_unstable_write (this, local->fd)) {
+ afr_changelog_post_op_now (frame, this);
+ return 0;
+ }
+
+ /* Check whether users want durability and perform fsync/post-op
+ * accordingly.
+ */
+ if (priv->ensure_durability) {
+ /* Time to fsync() */
+ afr_changelog_fsync (frame, this);
+ } else {
+ afr_changelog_post_op_now (frame, this);
+ }
+
+ return 0;
+}
+
+
+void
+afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd,
+ call_stub_t *stub)
+{
+ afr_fd_ctx_t *fd_ctx = NULL;
+ call_frame_t *prev_frame = NULL;
+ struct timespec delta = {0, };
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ priv = this->private;
+
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx)
+ goto out;
+
+ delta.tv_sec = priv->post_op_delay_secs;
+ delta.tv_nsec = 0;
+
+ pthread_mutex_lock (&fd_ctx->delay_lock);
+ {
+ prev_frame = fd_ctx->delay_frame;
+ fd_ctx->delay_frame = NULL;
+ if (fd_ctx->delay_timer)
+ gf_timer_call_cancel (this->ctx, fd_ctx->delay_timer);
+ fd_ctx->delay_timer = NULL;
+ if (!frame)
+ goto unlock;
+ fd_ctx->delay_timer = gf_timer_call_after (this->ctx, delta,
+ afr_delayed_changelog_wake_up_cbk,
+ fd);
+ fd_ctx->delay_frame = frame;
+ }
+unlock:
+ pthread_mutex_unlock (&fd_ctx->delay_lock);
+
+out:
+ if (prev_frame) {
+ local = prev_frame->local;
+ local->transaction.resume_stub = stub;
+ afr_changelog_post_op_now (prev_frame, this);
+ } else if (stub) {
+ call_resume (stub);
+ }
+}
+
+
+void
+afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (is_afr_delayed_changelog_post_op_needed (frame, this))
+ afr_delayed_changelog_post_op (this, frame, local->fd, NULL);
+ else
+ afr_changelog_post_op_safe (frame, this);
+}
+
+
+
+/* Wake up the sleeping/delayed post-op, and also register
+ a stub to have it resumed after this transaction
+ completely finishes.
+
+ The @stub gets saved in @local and gets resumed in
+ afr_local_cleanup()
+ */
+void
+afr_delayed_changelog_wake_resume (xlator_t *this, fd_t *fd, call_stub_t *stub)
+{
+ afr_delayed_changelog_post_op (this, NULL, fd, stub);
+}
+
+
+void
+afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd)
+{
+ afr_delayed_changelog_post_op (this, NULL, fd, NULL);
+}
+
+
+int
afr_transaction_resume (call_frame_t *frame, xlator_t *this)
{
- afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
+
+ if (local->transaction.eager_lock_on) {
+ /* We don't need to retain "local" in the
+ fd list anymore, writes to all subvols
+ are finished by now */
+ afr_remove_eager_lock_stub (local);
+ }
+
+ afr_restore_lk_owner (frame);
+
+ afr_handle_symmetric_errors (frame, this);
+
+ if (!local->pre_op_compat)
+ /* new mode, pre-op was done along
+ with OP */
+ afr_changelog_pre_op_update (frame, this);
if (__fop_changelog_needed (frame, this)) {
afr_changelog_post_op (frame, this);
} else {
- if (afr_lock_server_count (priv, local->transaction.type) == 0) {
- local->transaction.done (frame, this);
- } else {
- int_lock->lock_cbk = local->transaction.done;
- afr_unlock (frame, this);
- }
+ afr_changelog_post_op_done (frame, this);
}
return 0;
@@ -1200,16 +1582,96 @@ afr_transaction_resume (call_frame_t *frame, xlator_t *this)
*/
void
-afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this, int child_index)
+afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this,
+ int child_index)
{
afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
local = frame->local;
- priv = this->private;
- __mark_child_dead (local->pending, priv->child_count,
- child_index, local->transaction.type);
+ local->transaction.failed_subvols[child_index] = 1;
+}
+
+
+
+ static gf_boolean_t
+afr_locals_overlap (afr_local_t *local1, afr_local_t *local2)
+{
+ uint64_t start1 = local1->transaction.start;
+ uint64_t start2 = local2->transaction.start;
+ uint64_t end1 = 0;
+ uint64_t end2 = 0;
+
+ if (local1->transaction.len)
+ end1 = start1 + local1->transaction.len - 1;
+ else
+ end1 = ULLONG_MAX;
+
+ if (local2->transaction.len)
+ end2 = start2 + local2->transaction.len - 1;
+ else
+ end2 = ULLONG_MAX;
+
+ return ((end1 >= start2) && (end2 >= start1));
+}
+
+void
+afr_transaction_eager_lock_init (afr_local_t *local, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fdctx = NULL;
+ afr_local_t *each = NULL;
+
+ priv = this->private;
+
+ if (!local->fd)
+ return;
+
+ if (local->transaction.type != AFR_DATA_TRANSACTION)
+ return;
+
+ if (!priv->eager_lock)
+ return;
+
+ fdctx = afr_fd_ctx_get (local->fd, this);
+ if (!fdctx)
+ return;
+
+ if (afr_are_multiple_fds_opened (local->fd, this))
+ return;
+ /*
+ * Once full file lock is acquired in eager-lock phase, overlapping
+ * writes do not compete for inode-locks, instead are transferred to the
+ * next writes. Because of this overlapping writes are not ordered.
+ * This can cause inconsistencies in replication.
+ * Example:
+ * Two overlapping writes w1, w2 are sent in parallel on same fd
+ * in two threads t1, t2.
+ * Both threads can execute afr_writev_wind in the following manner.
+ * t1 winds w1 on brick-0
+ * t2 winds w2 on brick-0
+ * t2 winds w2 on brick-1
+ * t1 winds w1 on brick-1
+ *
+ * This check makes sure the locks are not transferred for
+ * overlapping writes.
+ */
+ LOCK (&local->fd->lock);
+ {
+ list_for_each_entry (each, &fdctx->eager_locked,
+ transaction.eager_locked) {
+ if (afr_locals_overlap (each, local)) {
+ local->transaction.eager_lock_on = _gf_false;
+ goto unlock;
+ }
+ }
+
+ local->transaction.eager_lock_on = _gf_true;
+ list_add_tail (&local->transaction.eager_locked,
+ &fdctx->eager_locked);
+ }
+unlock:
+ UNLOCK (&local->fd->lock);
}
@@ -1218,20 +1680,43 @@ afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type)
{
afr_local_t * local = NULL;
afr_private_t * priv = NULL;
+ fd_t *fd = NULL;
+ int ret = -1;
local = frame->local;
priv = this->private;
- afr_transaction_local_init (local, this);
-
local->transaction.resume = afr_transaction_resume;
local->transaction.type = type;
+ ret = afr_transaction_local_init (local, this);
+ if (ret < 0)
+ goto out;
+
+ afr_transaction_eager_lock_init (local, this);
+
+ if (local->fd && local->transaction.eager_lock_on)
+ afr_set_lk_owner (frame, this, local->fd);
+ else
+ afr_set_lk_owner (frame, this, frame->root);
+
+ if (!local->transaction.eager_lock_on && local->loc.inode) {
+ fd = fd_lookup (local->loc.inode, frame->root->pid);
+ if (fd == NULL)
+ fd = fd_lookup_anonymous (local->loc.inode);
+
+ if (fd) {
+ afr_delayed_changelog_wake_up (this, fd);
+ fd_unref (fd);
+ }
+ }
+
if (afr_lock_server_count (priv, local->transaction.type) == 0) {
afr_internal_lock_finish (frame, this);
} else {
afr_lock (frame, this);
}
-
- return 0;
+ ret = 0;
+out:
+ return ret;
}
diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h
index 10f274fec..77cc8eed0 100644
--- a/xlators/cluster/afr/src/afr-transaction.h
+++ b/xlators/cluster/afr/src/afr-transaction.h
@@ -1,25 +1,18 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __TRANSACTION_H__
#define __TRANSACTION_H__
+#include "afr.h"
+
void
afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this,
int child_index);
@@ -27,9 +20,34 @@ afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this,
int
afr_lock_server_count (afr_private_t *priv, afr_transaction_type type);
+afr_inodelk_t*
+afr_get_inodelk (afr_internal_lock_t *int_lock, char *dom);
+
int32_t
afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type);
-afr_fd_ctx_t *
-afr_fd_ctx_get (fd_t *fd, xlator_t *this);
+int
+afr_set_pending_dict (afr_private_t *priv, dict_t *xattr, int32_t **pending);
+
+void
+afr_set_delayed_post_op (call_frame_t *frame, xlator_t *this);
+
+void
+afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd);
+
+void
+__mark_all_success (call_frame_t *frame, xlator_t *this);
+
+gf_boolean_t
+afr_txn_nothing_failed (call_frame_t *frame, xlator_t *this);
+
+int afr_read_txn (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ afr_read_txn_wind_t readfn, afr_transaction_type type);
+
+int afr_read_txn_continue (call_frame_t *frame, xlator_t *this, int subvol);
+
+int __afr_txn_write_fop (call_frame_t *frame, xlator_t *this);
+int __afr_txn_write_done (call_frame_t *frame, xlator_t *this);
+call_frame_t *afr_transaction_detach_fop_frame (call_frame_t *frame);
+
#endif /* __TRANSACTION_H__ */
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index 30da3fc72..5e12910b7 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include <libgen.h>
@@ -37,8 +28,13 @@ notify (xlator_t *this, int32_t event,
void *data, ...)
{
int ret = -1;
+ va_list ap;
+ void *data2 = NULL;
- ret = afr_notify (this, event, data);
+ va_start (ap, data);
+ data2 = va_arg (ap, dict_t*);
+ va_end (ap);
+ ret = afr_notify (this, event, data, data2);
return ret;
}
@@ -85,29 +81,42 @@ xlator_subvolume_index (xlator_t *this, xlator_t *subvol)
return index;
}
-
-int
-xlator_subvolume_count (xlator_t *this)
+void
+fix_quorum_options (xlator_t *this, afr_private_t *priv, char *qtype)
{
- int i = 0;
- xlator_list_t *list = NULL;
-
- for (list = this->children; list; list = list->next)
- i++;
- return i;
+ if (priv->quorum_count && strcmp(qtype,"fixed")) {
+ gf_log(this->name,GF_LOG_WARNING,
+ "quorum-type %s overriding quorum-count %u",
+ qtype, priv->quorum_count);
+ }
+ if (!strcmp(qtype,"none")) {
+ priv->quorum_count = 0;
+ }
+ else if (!strcmp(qtype,"auto")) {
+ priv->quorum_count = AFR_QUORUM_AUTO;
+ }
}
-
int
reconfigure (xlator_t *this, dict_t *options)
{
- afr_private_t * priv = NULL;
- xlator_t * read_subvol = NULL;
- int ret = -1;
- int index = -1;
+ afr_private_t *priv = NULL;
+ xlator_t *read_subvol = NULL;
+ int read_subvol_index = -1;
+ int ret = -1;
+ int index = -1;
+ char *qtype = NULL;
priv = this->private;
+ GF_OPTION_RECONF ("afr-dirty-xattr",
+ priv->afr_dirty, options, str,
+ out);
+
+ GF_OPTION_RECONF ("metadata-splitbrain-forced-heal",
+ priv->metadata_splitbrain_forced_heal, options, bool,
+ out);
+
GF_OPTION_RECONF ("background-self-heal-count",
priv->background_self_heal_count, options, uint32,
out);
@@ -121,9 +130,6 @@ reconfigure (xlator_t *this, dict_t *options)
GF_OPTION_RECONF ("entry-self-heal", priv->entry_self_heal, options,
bool, out);
- GF_OPTION_RECONF ("strict-readdir", priv->strict_readdir, options, bool,
- out);
-
GF_OPTION_RECONF ("data-self-heal-window-size",
priv->data_self_heal_window_size, options,
uint32, out);
@@ -140,10 +146,11 @@ reconfigure (xlator_t *this, dict_t *options)
GF_OPTION_RECONF ("data-self-heal-algorithm",
priv->data_self_heal_algorithm, options, str, out);
- GF_OPTION_RECONF ("self-heal-daemon", priv->shd.enabled, options, bool, out);
-
GF_OPTION_RECONF ("read-subvolume", read_subvol, options, xlator, out);
+ GF_OPTION_RECONF ("read-hash-mode", priv->hash_mode,
+ options, uint32, out);
+
if (read_subvol) {
index = xlator_subvolume_index (this, read_subvol);
if (index == -1) {
@@ -154,6 +161,43 @@ reconfigure (xlator_t *this, dict_t *options)
priv->read_child = index;
}
+ GF_OPTION_RECONF ("read-subvolume-index",read_subvol_index, options,int32,out);
+
+ if (read_subvol_index >-1) {
+ index=read_subvol_index;
+ if (index >= priv->child_count) {
+ gf_log (this->name, GF_LOG_ERROR, "%d not a subvolume-index",
+ index);
+ goto out;
+ }
+ priv->read_child = index;
+ }
+
+ GF_OPTION_RECONF ("pre-op-compat", priv->pre_op_compat, options, bool, out);
+
+ GF_OPTION_RECONF ("eager-lock", priv->eager_lock, options, bool, out);
+ GF_OPTION_RECONF ("quorum-type", qtype, options, str, out);
+ GF_OPTION_RECONF ("quorum-count", priv->quorum_count, options,
+ uint32, out);
+ fix_quorum_options(this,priv,qtype);
+
+ GF_OPTION_RECONF ("post-op-delay-secs", priv->post_op_delay_secs, options,
+ uint32, out);
+
+ GF_OPTION_RECONF (AFR_SH_READDIR_SIZE_KEY, priv->sh_readdir_size,
+ options, size, out);
+ /* Reset this so we re-discover in case the topology changed. */
+ GF_OPTION_RECONF ("ensure-durability", priv->ensure_durability, options,
+ bool, out);
+
+ GF_OPTION_RECONF ("self-heal-daemon", priv->shd.enabled, options,
+ bool, out);
+
+ GF_OPTION_RECONF ("iam-self-heal-daemon", priv->shd.iamshd, options,
+ bool, out);
+
+ priv->did_discovery = _gf_false;
+
ret = 0;
out:
return ret;
@@ -173,15 +217,16 @@ static const char *favorite_child_warning_str = "You have specified subvolume '%
int32_t
init (xlator_t *this)
{
- afr_private_t * priv = NULL;
- int child_count = 0;
- xlator_list_t * trav = NULL;
- int i = 0;
- int ret = -1;
- GF_UNUSED int op_errno = 0;
- xlator_t * read_subvol = NULL;
- xlator_t * fav_child = NULL;
-
+ afr_private_t *priv = NULL;
+ int child_count = 0;
+ xlator_list_t *trav = NULL;
+ int i = 0;
+ int ret = -1;
+ GF_UNUSED int op_errno = 0;
+ xlator_t *read_subvol = NULL;
+ int read_subvol_index = -1;
+ xlator_t *fav_child = NULL;
+ char *qtype = NULL;
if (!this->children) {
gf_log (this->name, GF_LOG_ERROR,
@@ -195,12 +240,25 @@ init (xlator_t *this)
"Volume is dangling.");
}
- ALLOC_OR_GOTO (this->private, afr_private_t, out);
+ this->private = GF_CALLOC (1, sizeof (afr_private_t),
+ gf_afr_mt_afr_private_t);
+ if (!this->private)
+ goto out;
priv = this->private;
+ LOCK_INIT (&priv->lock);
+
+ child_count = xlator_subvolume_count (this);
+
+ priv->child_count = child_count;
priv->read_child = -1;
+ GF_OPTION_INIT ("afr-dirty-xattr", priv->afr_dirty, str, out);
+
+ GF_OPTION_INIT ("metadata-splitbrain-forced-heal",
+ priv->metadata_splitbrain_forced_heal, bool, out);
+
GF_OPTION_INIT ("read-subvolume", read_subvol, xlator, out);
if (read_subvol) {
priv->read_child = xlator_subvolume_index (this, read_subvol);
@@ -210,6 +268,18 @@ init (xlator_t *this)
goto out;
}
}
+ GF_OPTION_INIT ("read-subvolume-index",read_subvol_index,int32,out);
+ if (read_subvol_index > -1) {
+ if (read_subvol_index >= priv->child_count) {
+ gf_log (this->name, GF_LOG_ERROR, "%d not a subvolume-index",
+ read_subvol_index);
+ goto out;
+ }
+ priv->read_child = read_subvol_index;
+ }
+ GF_OPTION_INIT ("choose-local", priv->choose_local, bool, out);
+
+ GF_OPTION_INIT ("read-hash-mode", priv->hash_mode, uint32, out);
priv->favorite_child = -1;
GF_OPTION_INIT ("favorite-child", fav_child, xlator, out);
@@ -242,8 +312,6 @@ init (xlator_t *this)
GF_OPTION_INIT ("entry-self-heal", priv->entry_self_heal, bool, out);
- GF_OPTION_INIT ("self-heal-daemon", priv->shd.enabled, bool, out);
-
GF_OPTION_INIT ("data-change-log", priv->data_change_log, bool, out);
GF_OPTION_INIT ("metadata-change-log", priv->metadata_change_log, bool,
@@ -258,16 +326,24 @@ init (xlator_t *this)
GF_OPTION_INIT ("entrylk-trace", priv->entrylk_trace, bool, out);
- GF_OPTION_INIT ("strict-readdir", priv->strict_readdir, bool, out);
+ GF_OPTION_INIT ("pre-op-compat", priv->pre_op_compat, bool, out);
- priv->wait_count = 1;
+ GF_OPTION_INIT ("eager-lock", priv->eager_lock, bool, out);
+ GF_OPTION_INIT ("quorum-type", qtype, str, out);
+ GF_OPTION_INIT ("quorum-count", priv->quorum_count, uint32, out);
+ GF_OPTION_INIT (AFR_SH_READDIR_SIZE_KEY, priv->sh_readdir_size, size,
+ out);
+ fix_quorum_options(this,priv,qtype);
- child_count = xlator_subvolume_count (this);
+ GF_OPTION_INIT ("post-op-delay-secs", priv->post_op_delay_secs, uint32, out);
+ GF_OPTION_INIT ("ensure-durability", priv->ensure_durability, bool,
+ out);
- priv->child_count = child_count;
+ GF_OPTION_INIT ("self-heal-daemon", priv->shd.enabled, bool, out);
- LOCK_INIT (&priv->lock);
- LOCK_INIT (&priv->read_child_lock);
+ GF_OPTION_INIT ("iam-self-heal-daemon", priv->shd.iamshd, bool, out);
+
+ priv->wait_count = 1;
priv->child_up = GF_CALLOC (sizeof (unsigned char), child_count,
gf_afr_mt_char);
@@ -307,8 +383,6 @@ init (xlator_t *this)
AFR_XATTR_PREFIX,
trav->xlator->name);
if (-1 == ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "asprintf failed to set pending key");
ret = -ENOMEM;
goto out;
}
@@ -317,6 +391,13 @@ init (xlator_t *this)
i++;
}
+ ret = gf_asprintf (&priv->sh_domain, AFR_SH_DATA_DOMAIN_FMT,
+ this->name);
+ if (-1 == ret) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
priv->last_event = GF_CALLOC (child_count, sizeof (*priv->last_event),
gf_afr_mt_int32_t);
if (!priv->last_event) {
@@ -324,20 +405,23 @@ init (xlator_t *this)
goto out;
}
- priv->shd.pos = GF_CALLOC (sizeof (*priv->shd.pos), child_count,
- gf_afr_mt_afr_brick_pos_t);
- if (!priv->shd.pos) {
- ret = -ENOMEM;
+ ret = afr_selfheal_daemon_init (this);
+ if (ret) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ /* keep more local here as we may need them for self-heal etc */
+ this->local_pool = mem_pool_new (afr_local_t, 512);
+ if (!this->local_pool) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
goto out;
}
- LOCK_INIT (&priv->root_inode_lk);
- priv->first_lookup = 1;
priv->root_inode = NULL;
- pthread_mutex_init (&priv->mutex, NULL);
- INIT_LIST_HEAD (&priv->saved_fds);
-
ret = 0;
out:
return ret;
@@ -347,6 +431,13 @@ out:
int
fini (xlator_t *this)
{
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ this->private = NULL;
+ afr_priv_destroy (priv);
+ if (this->itable);//I dont see any destroy func
+
return 0;
}
@@ -365,6 +456,9 @@ struct xlator_fops fops = {
.finodelk = afr_finodelk,
.entrylk = afr_entrylk,
.fentrylk = afr_fentrylk,
+ .fallocate = afr_fallocate,
+ .discard = afr_discard,
+ .zerofill = afr_zerofill,
/* inode read */
.access = afr_access,
@@ -372,6 +466,7 @@ struct xlator_fops fops = {
.fstat = afr_fstat,
.readlink = afr_readlink,
.getxattr = afr_getxattr,
+ .fgetxattr = afr_fgetxattr,
.readv = afr_readv,
/* inode write */
@@ -379,9 +474,11 @@ struct xlator_fops fops = {
.truncate = afr_truncate,
.ftruncate = afr_ftruncate,
.setxattr = afr_setxattr,
+ .fsetxattr = afr_fsetxattr,
.setattr = afr_setattr,
.fsetattr = afr_fsetattr,
.removexattr = afr_removexattr,
+ .fremovexattr = afr_fremovexattr,
/* dir read */
.opendir = afr_opendir,
@@ -414,33 +511,79 @@ struct xlator_cbks cbks = {
struct volume_options options[] = {
{ .key = {"read-subvolume" },
- .type = GF_OPTION_TYPE_XLATOR
+ .type = GF_OPTION_TYPE_XLATOR,
+ .description = "inode-read fops happen only on one of the bricks in "
+ "replicate. Afr will prefer the one specified using "
+ "this option if it is not stale. Option value must be "
+ "one of the xlator names of the children. "
+ "Ex: <volname>-client-0 till "
+ "<volname>-client-<number-of-bricks - 1>"
+ },
+ { .key = {"read-subvolume-index" },
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "-1",
+ .description = "inode-read fops happen only on one of the bricks in "
+ "replicate. AFR will prefer the one specified using "
+ "this option if it is not stale. allowed options"
+ " include -1 till replica-count - 1"
+ },
+ { .key = {"read-hash-mode" },
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = 2,
+ .default_value = "1",
+ .description = "inode-read fops happen only on one of the bricks in "
+ "replicate. AFR will prefer the one computed using "
+ "the method specified using this option"
+ "0 = first up server, "
+ "1 = hash by GFID of file (all clients use "
+ "same subvolume), "
+ "2 = hash by GFID of file and client PID",
+ },
+ { .key = {"choose-local" },
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "true",
+ .description = "Choose a local subvolume (i.e. Brick) to read from"
+ " if read-subvolume is not explicitly set.",
},
{ .key = {"favorite-child"},
- .type = GF_OPTION_TYPE_XLATOR
+ .type = GF_OPTION_TYPE_XLATOR,
+ .description = "If a split-brain happens choose subvol/brick set by "
+ "this option as source."
},
{ .key = {"background-self-heal-count"},
.type = GF_OPTION_TYPE_INT,
.min = 0,
.default_value = "16",
+ .validate = GF_OPT_VALIDATE_MIN,
+ .description = "This specifies the number of self-heals that can be "
+ " performed in background without blocking the fop"
},
{ .key = {"data-self-heal"},
.type = GF_OPTION_TYPE_STR,
- .default_value = "",
.value = {"1", "on", "yes", "true", "enable",
"0", "off", "no", "false", "disable",
"open"},
.default_value = "on",
+ .description = "Using this option we can enable/disable data "
+ "self-heal on the file. \"open\" means data "
+ "self-heal action will only be triggered by file "
+ "open operations."
},
{ .key = {"data-self-heal-algorithm"},
.type = GF_OPTION_TYPE_STR,
- .default_value = "",
.description = "Select between \"full\", \"diff\". The "
"\"full\" algorithm copies the entire file from "
"source to sink. The \"diff\" algorithm copies to "
"sink only those blocks whose checksums don't match "
- "with those of source.",
- .value = { "diff", "full", "" }
+ "with those of source. If no option is configured "
+ "the option is chosen dynamically as follows: "
+ "If the file does not exist on one of the sinks "
+ "or empty file exists or if the source file size is "
+ "about the same as page size the entire file will "
+ "be read and written i.e \"full\" algo, "
+ "otherwise \"diff\" algo is chosen.",
+ .value = { "diff", "full"}
},
{ .key = {"data-self-heal-window-size"},
.type = GF_OPTION_TYPE_INT,
@@ -453,42 +596,154 @@ struct volume_options options[] = {
{ .key = {"metadata-self-heal"},
.type = GF_OPTION_TYPE_BOOL,
.default_value = "on",
+ .description = "Using this option we can enable/disable metadata "
+ "i.e. Permissions, ownerships, xattrs self-heal on "
+ "the file/directory."
},
{ .key = {"entry-self-heal"},
.type = GF_OPTION_TYPE_BOOL,
.default_value = "on",
+ .description = "Using this option we can enable/disable entry "
+ "self-heal on the directory."
},
{ .key = {"data-change-log"},
.type = GF_OPTION_TYPE_BOOL,
.default_value = "on",
+ .description = "Data fops like write/truncate will not perform "
+ "pre/post fop changelog operations in afr transaction "
+ "if this option is disabled"
},
{ .key = {"metadata-change-log"},
.type = GF_OPTION_TYPE_BOOL,
.default_value = "on",
+ .description = "Metadata fops like setattr/setxattr will not perform "
+ "pre/post fop changelog operations in afr transaction "
+ "if this option is disabled"
},
{ .key = {"entry-change-log"},
.type = GF_OPTION_TYPE_BOOL,
.default_value = "on",
+ .description = "Entry fops like create/unlink will not perform "
+ "pre/post fop changelog operations in afr transaction "
+ "if this option is disabled"
},
{ .key = {"optimistic-change-log"},
.type = GF_OPTION_TYPE_BOOL,
.default_value = "on",
- },
- { .key = {"strict-readdir"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
+ .description = "Entry/Metadata fops will not perform "
+ "pre fop changelog operations in afr transaction "
+ "if this option is enabled."
},
{ .key = {"inodelk-trace"},
.type = GF_OPTION_TYPE_BOOL,
.default_value = "off",
+ .description = "Enabling this option logs inode lock/unlocks"
},
{ .key = {"entrylk-trace"},
.type = GF_OPTION_TYPE_BOOL,
.default_value = "off",
+ .description = "Enabling this option logs entry lock/unlocks"
+ },
+ { .key = {"pre-op-compat"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Use separate pre-op xattrop() FOP rather than "
+ "overloading xdata of the OP"
+ },
+ { .key = {"eager-lock"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Lock phase of a transaction has two sub-phases. "
+ "First is an attempt to acquire locks in parallel by "
+ "broadcasting non-blocking lock requests. If lock "
+ "acquisition fails on any server, then the held locks "
+ "are unlocked and revert to a blocking locked mode "
+ "sequentially on one server after another. If this "
+ "option is enabled the initial broadcasting lock "
+ "request attempt to acquire lock on the entire file. "
+ "If this fails, we revert back to the sequential "
+ "\"regional\" blocking lock as before. In the case "
+ "where such an \"eager\" lock is granted in the "
+ "non-blocking phase, it gives rise to an opportunity "
+ "for optimization. i.e, if the next write transaction "
+ "on the same FD arrives before the unlock phase of "
+ "the first transaction, it \"takes over\" the full "
+ "file lock. Similarly if yet another data transaction "
+ "arrives before the unlock phase of the \"optimized\" "
+ "transaction, that in turn \"takes over\" the lock as "
+ "well. The actual unlock now happens at the end of "
+ "the last \"optimized\" transaction."
+
},
{ .key = {"self-heal-daemon"},
.type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "This option applies to only self-heal-daemon. "
+ "Index directory crawl and automatic healing of files "
+ "will not be performed if this option is turned off."
+ },
+ { .key = {"iam-self-heal-daemon"},
+ .type = GF_OPTION_TYPE_BOOL,
.default_value = "off",
+ .description = "This option differentiates if the replicate "
+ "translator is running as part of self-heal-daemon "
+ "or not."
+ },
+ { .key = {"quorum-type"},
+ .type = GF_OPTION_TYPE_STR,
+ .value = { "none", "auto", "fixed"},
+ .default_value = "none",
+ .description = "If value is \"fixed\" only allow writes if "
+ "quorum-count bricks are present. If value is "
+ "\"auto\" only allow writes if more than half of "
+ "bricks, or exactly half including the first, are "
+ "present.",
+ },
+ { .key = {"quorum-count"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = INT_MAX,
+ .default_value = 0,
+ .description = "If quorum-type is \"fixed\" only allow writes if "
+ "this many bricks or present. Other quorum types "
+ "will OVERWRITE this value.",
+ },
+ { .key = {"node-uuid"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "Local glusterd uuid string, used in starting "
+ "self-heal-daemon so that it can crawl only on "
+ "local index directories.",
+ },
+ { .key = {"post-op-delay-secs"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = INT_MAX,
+ .default_value = "1",
+ .description = "Time interval induced artificially before "
+ "post-operation phase of the transaction to "
+ "enhance overlap of adjacent write operations.",
+ },
+ { .key = {AFR_SH_READDIR_SIZE_KEY},
+ .type = GF_OPTION_TYPE_SIZET,
+ .description = "readdirp size for performing entry self-heal",
+ .min = 1024,
+ .max = 131072,
+ .default_value = "1KB",
+ },
+ { .key = {"ensure-durability"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .description = "Afr performs fsyncs for transactions if this "
+ "option is on to make sure the changelogs/data is "
+ "written to the disk",
+ .default_value = "on",
},
+ { .key = {"afr-dirty-xattr"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = AFR_DIRTY_DEFAULT,
+ },
+ { .key = {"metadata-splitbrain-forced-heal"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ },
{ .key = {NULL} },
};
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 7320c8d7c..36042f7b2 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -29,78 +20,42 @@
#include "call-stub.h"
#include "compat-errno.h"
#include "afr-mem-types.h"
-#include "afr-self-heal-algorithm.h"
#include "libxlator.h"
+#include "timer.h"
+#include "syncop.h"
+
+#include "afr-self-heald.h"
#define AFR_XATTR_PREFIX "trusted.afr"
#define AFR_PATHINFO_HEADER "REPLICATE:"
+#define AFR_SH_READDIR_SIZE_KEY "self-heal-readdir-size"
+#define AFR_SH_DATA_DOMAIN_FMT "%s:self-heal"
+#define AFR_DIRTY_DEFAULT AFR_XATTR_PREFIX ".dirty"
+#define AFR_DIRTY (((afr_private_t *) (THIS->private))->afr_dirty)
-struct _pump_private;
+#define AFR_LOCKEE_COUNT_MAX 3
+#define AFR_DOM_COUNT_MAX 3
+#define AFR_NUM_CHANGE_LOGS 3 /*data + metadata + entry*/
-typedef int (*afr_expunge_done_cbk_t) (call_frame_t *frame, xlator_t *this,
- int child, int32_t op_error,
- int32_t op_errno);
+typedef int (*afr_lock_cbk_t) (call_frame_t *frame, xlator_t *this);
-typedef int (*afr_impunge_done_cbk_t) (call_frame_t *frame, xlator_t *this,
- int child, int32_t op_error,
- int32_t op_errno);
-typedef int (*afr_post_remove_call_t) (call_frame_t *frame, xlator_t *this);
+typedef int (*afr_read_txn_wind_t) (call_frame_t *frame, xlator_t *this, int subvol);
-typedef int (*afr_lock_cbk_t) (call_frame_t *frame, xlator_t *this);
-typedef void (*afr_lookup_done_cbk_t) (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno);
+typedef int (*afr_inode_refresh_cbk_t) (call_frame_t *frame, xlator_t *this, int err);
-typedef enum {
- AFR_POS_UNKNOWN,
- AFR_POS_LOCAL,
- AFR_POS_REMOTE
-} afr_child_pos_t;
+typedef int (*afr_changelog_resume_t) (call_frame_t *frame, xlator_t *this);
-typedef enum {
- AFR_INODE_SET_READ_CTX = 1,
- AFR_INODE_RM_STALE_CHILDREN,
- AFR_INODE_SET_OPENDIR_DONE,
- AFR_INODE_SET_SPLIT_BRAIN,
- AFR_INODE_GET_READ_CTX,
- AFR_INODE_GET_OPENDIR_DONE,
- AFR_INODE_GET_SPLIT_BRAIN,
-} afr_inode_op_t;
-
-typedef struct afr_inode_params_ {
- afr_inode_op_t op;
- union {
- gf_boolean_t value;
- struct {
- int32_t read_child;
- int32_t *children;
- } read_ctx;
- } u;
-} afr_inode_params_t;
-
-typedef struct afr_inode_ctx_ {
- uint64_t masks;
- int32_t *fresh_children;//increasing order of latency
-} afr_inode_ctx_t;
-
-typedef struct afr_self_heald_ {
- gf_boolean_t enabled;
- gf_boolean_t pending;
- gf_boolean_t inprogress;
- afr_child_pos_t *pos;
-} afr_self_heald_t;
+#define alloca0(size) ({void *__ptr; __ptr = alloca(size); memset(__ptr, 0, size); __ptr;})
+#define AFR_COUNT(array,max) ({int __i; int __res = 0; for (__i = 0; __i < max; __i++) if (array[__i]) __res++; __res;})
+#define AFR_INTERSECT(dst,src1,src2,max) ({int __i; for (__i = 0; __i < max; __i++) dst[__i] = src1[__i] && src2[__i];})
typedef struct _afr_private {
gf_lock_t lock; /* to guard access to child_count, etc */
unsigned int child_count; /* total number of children */
- unsigned int read_child_rr; /* round-robin index of the read_child */
- gf_lock_t read_child_lock; /* lock to protect above */
-
xlator_t **children;
- gf_lock_t root_inode_lk;
- int first_lookup;
inode_t *root_inode;
unsigned char *child_up;
@@ -121,145 +76,54 @@ typedef struct _afr_private {
gf_boolean_t metadata_change_log; /* on/off */
gf_boolean_t entry_change_log; /* on/off */
+ gf_boolean_t metadata_splitbrain_forced_heal; /* on/off */
int read_child; /* read-subvolume */
+ unsigned int hash_mode; /* for when read_child is not set */
int favorite_child; /* subvolume to be preferred in resolving
split-brain cases */
- unsigned int data_lock_server_count;
- unsigned int metadata_lock_server_count;
- unsigned int entry_lock_server_count;
-
gf_boolean_t inodelk_trace;
gf_boolean_t entrylk_trace;
- gf_boolean_t strict_readdir;
-
unsigned int wait_count; /* # of servers to wait for success */
uint64_t up_count; /* number of CHILD_UPs we have seen */
uint64_t down_count; /* number of CHILD_DOWNs we have seen */
- struct _pump_private *pump_private; /* Set if we are loaded as pump */
- int use_afr_in_pump;
-
- pthread_mutex_t mutex;
- struct list_head saved_fds; /* list of fds on which locks have succeeded */
- gf_boolean_t optimistic_change_log;
- gf_boolean_t eager_lock;
+ gf_boolean_t optimistic_change_log;
+ gf_boolean_t eager_lock;
+ gf_boolean_t pre_op_compat; /* on/off */
+ uint32_t post_op_delay_secs;
+ unsigned int quorum_count;
char vol_uuid[UUID_SIZE + 1];
int32_t *last_event;
- afr_self_heald_t shd;
-} afr_private_t;
-
-typedef struct {
- /* External interface: These are variables (some optional) that
- are set by whoever has triggered self-heal */
-
- gf_boolean_t do_data_self_heal;
- gf_boolean_t do_metadata_self_heal;
- gf_boolean_t do_entry_self_heal;
- gf_boolean_t do_gfid_self_heal;
- gf_boolean_t do_missing_entry_self_heal;
-
- gf_boolean_t forced_merge; /* Is this a self-heal triggered to
- forcibly merge the directories? */
-
- gf_boolean_t background; /* do self-heal in background
- if possible */
- ia_type_t type; /* st_mode of the entry we're doing
- self-heal on */
- inode_t *inode; /* inode on which the self-heal is
- performed on */
- uuid_t sh_gfid_req; /* gfid self-heal needs to be done
- with this gfid if it is not null */
-
- /* Function to call to unwind. If self-heal is being done in the
- background, this function will be called as soon as possible. */
-
- int (*unwind) (call_frame_t *frame, xlator_t *this, int32_t op_ret,
- int32_t op_errno);
-
- /* End of external interface members */
-
-
- /* array of stat's, one for each child */
- struct iatt *buf;
- struct iatt *parentbufs;
- struct iatt parentbuf;
- struct iatt entrybuf;
-
- afr_expunge_done_cbk_t expunge_done;
- afr_impunge_done_cbk_t impunge_done;
- int32_t impunge_ret_child;
-
- /* array of xattr's, one for each child */
- dict_t **xattr;
-
- /* array containing if the lookups succeeded in the order of response
- */
- int32_t *success_children;
- int success_count;
- /* array containing the fresh children found in the self-heal process */
- int32_t *fresh_children;
- /* array containing the fresh children found in the parent lookup */
- int32_t *fresh_parent_dirs;
- /* array of errno's, one for each child */
- int *child_errno;
- /*loc used for lookup*/
- loc_t lookup_loc;
- int32_t lookup_flags;
- afr_lookup_done_cbk_t lookup_done;
-
- int32_t **pending_matrix;
- int32_t **delta_matrix;
-
- int32_t op_ret;
- int32_t op_errno;
- int *sources;
- int source;
- int active_source;
- int active_sinks;
- unsigned char *success;
- unsigned char *locked_nodes;
- int lock_count;
-
- mode_t impunging_entry_mode;
- const char *linkname;
-
- int op_failed;
-
- gf_boolean_t data_lock_held;
- gf_boolean_t eof_reached;
- fd_t *healing_fd;
- int file_has_holes;
- blksize_t block_size;
- off_t file_size;
- off_t offset;
- unsigned char *write_needed;
- uint8_t *checksum;
- afr_post_remove_call_t post_remove_call;
-
- loc_t parent_loc;
-
- call_frame_t *orig_frame;
- call_frame_t *old_loop_frame;
- gf_boolean_t unwound;
-
- afr_sh_algo_private_t *private;
-
- afr_lock_cbk_t data_lock_success_handler;
- afr_lock_cbk_t data_lock_failure_handler;
- int (*completion_cbk) (call_frame_t *frame, xlator_t *this);
- int (*sh_data_algo_start) (call_frame_t *frame, xlator_t *this);
- int (*algo_completion_cbk) (call_frame_t *frame, xlator_t *this);
- afr_lock_cbk_t loop_completion_cbk;
- int (*algo_abort_cbk) (call_frame_t *frame, xlator_t *this);
- void (*gfid_sh_success_cbk) (call_frame_t *sh_frame, xlator_t *this);
+ /* @event_generation: Keeps count of number of events received which can
+ potentially impact consistency decisions. The events are CHILD_UP
+ and CHILD_DOWN, when we have to recalculate the freshness/staleness
+ of copies to detect if changes had happened while the other server
+ was down. CHILD_DOWN and CHILD_UP can also be received on network
+ disconnect/reconnects and not necessarily server going down/up.
+ Recalculating freshness/staleness on network events is equally
+ important as we might have had a network split brain.
+ */
+ uint32_t event_generation;
+
+ gf_boolean_t choose_local;
+ gf_boolean_t did_discovery;
+ uint64_t sh_readdir_size;
+ gf_boolean_t ensure_durability;
+ char *sh_domain;
+ char *afr_dirty;
+
+ afr_self_heald_t shd;
+
+ /* pump dependencies */
+ void *pump_private;
+ gf_boolean_t use_afr_in_pump;
+} afr_private_t;
- call_frame_t *sh_frame;
-} afr_self_heal_t;
typedef enum {
AFR_DATA_TRANSACTION, /* truncate, write, ... */
@@ -321,11 +185,31 @@ afr_index_for_transaction_type (afr_transaction_type type)
return -1; /* make gcc happy */
}
+typedef struct {
+ loc_t loc;
+ char *basename;
+ unsigned char *locked_nodes;
+ int locked_count;
+
+} afr_entry_lockee_t;
+
+int
+afr_entry_lockee_cmp (const void *l1, const void *l2);
+
+typedef struct {
+ char *domain; /* Domain on which inodelk is taken */
+ struct gf_flock flock;
+ unsigned char *locked_nodes;
+ int32_t lock_count;
+} afr_inodelk_t;
typedef struct {
loc_t *lk_loc;
- struct gf_flock lk_flock;
+ int lockee_count;
+ afr_entry_lockee_t lockee[AFR_LOCKEE_COUNT_MAX];
+
+ afr_inodelk_t inodelk[AFR_DOM_COUNT_MAX];
const char *lk_basename;
const char *lower_basename;
const char *higher_basename;
@@ -334,87 +218,203 @@ typedef struct {
unsigned char *locked_nodes;
unsigned char *lower_locked_nodes;
- unsigned char *inode_locked_nodes;
- unsigned char *entry_locked_nodes;
selfheal_lk_type_t selfheal_lk_type;
transaction_lk_type_t transaction_lk_type;
int32_t lock_count;
- int32_t inodelk_lock_count;
int32_t entrylk_lock_count;
uint64_t lock_number;
int32_t lk_call_count;
int32_t lk_expected_count;
+ int32_t lk_attempted_count;
int32_t lock_op_ret;
int32_t lock_op_errno;
afr_lock_cbk_t lock_cbk;
+ char *domain; /* Domain on which inode/entry lock/unlock in progress.*/
} afr_internal_lock_t;
-typedef struct _afr_locked_fd {
- fd_t *fd;
- struct list_head list;
-} afr_locked_fd_t;
+struct afr_reply {
+ int valid;
+ int32_t op_ret;
+ int32_t op_errno;
+ dict_t *xdata;
+ struct iatt poststat;
+ struct iatt postparent;
+ struct iatt prestat;
+ struct iatt preparent;
+ struct iatt preparent2;
+ struct iatt postparent2;
+ uint8_t checksum[MD5_DIGEST_LENGTH];
+};
+
+typedef enum {
+ AFR_FD_NOT_OPENED,
+ AFR_FD_OPENED,
+ AFR_FD_OPENING
+} afr_fd_open_status_t;
+
+typedef struct {
+ unsigned int *pre_op_done[AFR_NUM_CHANGE_LOGS];
+ int inherited[AFR_NUM_CHANGE_LOGS];
+ int on_disk[AFR_NUM_CHANGE_LOGS];
+ afr_fd_open_status_t *opened_on; /* which subvolumes the fd is open on */
+
+ unsigned int *lock_piggyback;
+ unsigned int *lock_acquired;
+
+ int flags;
+
+ /* used for delayed-post-op optimization */
+ pthread_mutex_t delay_lock;
+ gf_timer_t *delay_timer;
+ call_frame_t *delay_frame;
+
+ /* set if any write on this fd was a non stable write
+ (i.e, without O_SYNC or O_DSYNC)
+ */
+ gf_boolean_t witnessed_unstable_write;
+
+ /* @open_fd_count:
+ Number of open FDs queried from the server, as queried through
+ xdata in FOPs. Currently, used to decide if eager-locking must be
+ temporarily disabled.
+ */
+ uint32_t open_fd_count;
+
+
+ /* list of frames currently in progress */
+ struct list_head eager_locked;
+} afr_fd_ctx_t;
+
typedef struct _afr_local {
- int uid;
- int gid;
+ glusterfs_fop_t op;
unsigned int call_count;
- unsigned int success_count;
- unsigned int enoent_count;
+ /* @event_generation: copy of priv->event_generation taken at the
+ time of starting the transaction. The copy is made so that we
+ have a stable value through the various phases of the transaction.
+ */
+ unsigned int event_generation;
- unsigned int govinda_gOvinda;
+ uint32_t open_fd_count;
+ gf_boolean_t update_open_fd_count;
- unsigned int read_child_index;
- unsigned char read_child_returned;
- unsigned int first_up_child;
-
- pid_t saved_pid;
+ gf_lkowner_t saved_lk_owner;
int32_t op_ret;
int32_t op_errno;
int32_t **pending;
+ int dirty[AFR_NUM_CHANGE_LOGS];
+
loc_t loc;
loc_t newloc;
fd_t *fd;
- unsigned char *fd_open_on;
-
- glusterfs_fop_t fop;
+ afr_fd_ctx_t *fd_ctx;
+ /* @child_up: copy of priv->child_up taken at the time of transaction
+ start. The copy is taken so that we have a stable child_up array
+ through the phases of the transaction as priv->child_up[i] can keep
+ changing through time.
+ */
unsigned char *child_up;
- int32_t *fresh_children; //in the order of response
- int32_t *child_errno;
+ /* @read_attempted:
+ array of flags representing subvolumes where read operations of
+ the read transaction have already been attempted. The array is
+ first pre-filled with down subvolumes, and as reads are performed
+ on other subvolumes, those are set as well. This way if the read
+ operation fails we do not retry on that subvolume again.
+ */
+ unsigned char *read_attempted;
- dict_t *xattr_req;
+ /* @readfn:
- int32_t inodelk_count;
- int32_t entrylk_count;
+ pointer to function which will perform the read operation on a given
+ subvolume. Used in read transactions.
+ */
- afr_internal_lock_t internal_lock;
+ afr_read_txn_wind_t readfn;
+
+ /* @refreshed:
+
+ the inode was "refreshed" (i.e, pending xattrs from all subvols
+ freshly inspected and inode ctx updated accordingly) as part of
+ this transaction already.
+ */
+ gf_boolean_t refreshed;
+
+ /* @inode:
+
+ the inode on which the read txn is performed on. ref'ed and copied
+ from either fd->inode or loc.inode
+ */
+
+ inode_t *inode;
+
+ /* @parent[2]:
+
+ parent inode[s] on which directory transactions are performed.
+ */
+
+ inode_t *parent;
+ inode_t *parent2;
+
+ /* @readable:
+
+ array of flags representing servers from which a read can be
+ performed. This is the output of afr_inode_refresh()
+ */
+ unsigned char *readable;
- afr_locked_fd_t *locked_fd;
- int32_t source_child;
- int32_t lock_recovery_child;
+ afr_inode_refresh_cbk_t refreshfn;
+
+ /* @refreshinode:
+
+ Inode currently getting refreshed.
+ */
+ inode_t *refreshinode;
+
+ /*
+ @pre_op_compat:
+
+ compatibility mode of pre-op. send a separate pre-op and
+ op operations as part of transaction, rather than combining
+ */
+
+ gf_boolean_t pre_op_compat;
+
+ dict_t *xattr_req;
+
+ afr_internal_lock_t internal_lock;
dict_t *dict;
+
int optimistic_change_log;
+ gf_boolean_t delayed_post_op;
+
+ /* Is the current writev() going to perform a stable write?
+ i.e, is fd->flags or @flags writev param have O_SYNC or
+ O_DSYNC?
+ */
+ gf_boolean_t stable_write;
- gf_boolean_t fop_paused;
- int (*fop_call_continue) (call_frame_t *frame, xlator_t *this);
+ /* This write appended to the file. Nnot necessarily O_APPEND,
+ just means the offset of write was at the end of file.
+ */
+ gf_boolean_t append_write;
/*
This struct contains the arguments for the "continuation"
(scheme-like) of fops
*/
- int op;
struct {
struct {
unsigned char buf_set;
@@ -422,25 +422,7 @@ typedef struct _afr_local {
} statfs;
struct {
- uuid_t gfid_req;
- inode_t *inode;
- struct iatt buf;
- struct iatt postparent;
- ino_t ino;
- uint64_t gen;
- ino_t parent_ino;
- dict_t **xattrs;
- dict_t *xattr;
- struct iatt *postparents;
- struct iatt *bufs;
- int32_t read_child;
- int32_t *sources;
- int32_t *success_children;
- } lookup;
-
- struct {
int32_t flags;
- int32_t wbflags;
} open;
struct {
@@ -459,31 +441,28 @@ typedef struct _afr_local {
struct {
int last_index;
- ino_t ino;
} stat;
struct {
int last_index;
- ino_t ino;
} fstat;
struct {
size_t size;
int last_index;
- ino_t ino;
} readlink;
struct {
char *name;
int last_index;
- long pathinfo_len;
+ long xattr_len;
} getxattr;
struct {
- ino_t ino;
size_t size;
off_t offset;
int last_index;
+ uint32_t flags;
} readv;
/* dir read */
@@ -501,59 +480,43 @@ typedef struct _afr_local {
int32_t op_errno;
size_t size;
off_t offset;
-
+ dict_t *dict;
gf_boolean_t failed;
int last_index;
} readdir;
/* inode write */
struct {
- ino_t ino;
struct iatt prebuf;
struct iatt postbuf;
+ } inode_wfop; //common structure for all inode-write-fops
+ struct {
int32_t op_ret;
struct iovec *vector;
struct iobref *iobref;
int32_t count;
off_t offset;
+ uint32_t flags;
} writev;
struct {
- ino_t ino;
- struct iatt prebuf;
- struct iatt postbuf;
- } fsync;
-
- struct {
- ino_t ino;
off_t offset;
- struct iatt prebuf;
- struct iatt postbuf;
} truncate;
struct {
- ino_t ino;
off_t offset;
- struct iatt prebuf;
- struct iatt postbuf;
} ftruncate;
struct {
- ino_t ino;
struct iatt in_buf;
int32_t valid;
- struct iatt preop_buf;
- struct iatt postop_buf;
} setattr;
struct {
- ino_t ino;
struct iatt in_buf;
int32_t valid;
- struct iatt preop_buf;
- struct iatt postop_buf;
} fsetattr;
struct {
@@ -562,116 +525,85 @@ typedef struct _afr_local {
} setxattr;
struct {
+ dict_t *dict;
+ int32_t flags;
+ } fsetxattr;
+
+ struct {
char *name;
} removexattr;
+ struct {
+ dict_t *xattr;
+ } xattrop;
+
+ struct {
+ dict_t *xattr;
+ } fxattrop;
+
/* dir write */
struct {
- ino_t ino;
- uint64_t gen;
- ino_t parent_ino;
- fd_t *fd;
- dict_t *params;
- int32_t flags;
- mode_t mode;
inode_t *inode;
struct iatt buf;
struct iatt preparent;
struct iatt postparent;
- struct iatt read_child_buf;
+ struct iatt prenewparent;
+ struct iatt postnewparent;
+ } dir_fop; //common structure for all dir fops
+
+ struct {
+ fd_t *fd;
+ dict_t *params;
+ int32_t flags;
+ mode_t mode;
} create;
struct {
- ino_t ino;
- uint64_t gen;
- ino_t parent_ino;
dev_t dev;
mode_t mode;
dict_t *params;
- inode_t *inode;
- struct iatt buf;
- struct iatt preparent;
- struct iatt postparent;
- struct iatt read_child_buf;
} mknod;
struct {
- ino_t ino;
- uint64_t gen;
- ino_t parent_ino;
int32_t mode;
dict_t *params;
- inode_t *inode;
- struct iatt buf;
- struct iatt read_child_buf;
- struct iatt preparent;
- struct iatt postparent;
} mkdir;
struct {
- ino_t parent_ino;
- int32_t op_ret;
- int32_t op_errno;
- struct iatt preparent;
- struct iatt postparent;
- } unlink;
-
- struct {
- int flags;
- ino_t parent_ino;
- int32_t op_ret;
- int32_t op_errno;
- struct iatt preparent;
- struct iatt postparent;
+ int flags;
} rmdir;
struct {
- ino_t oldparent_ino;
- ino_t newparent_ino;
- ino_t ino;
- struct iatt buf;
- struct iatt read_child_buf;
- struct iatt preoldparent;
- struct iatt prenewparent;
- struct iatt postoldparent;
- struct iatt postnewparent;
- } rename;
-
- struct {
- ino_t ino;
- uint64_t gen;
- ino_t parent_ino;
- inode_t *inode;
- struct iatt buf;
- struct iatt read_child_buf;
- struct iatt preparent;
- struct iatt postparent;
- } link;
-
- struct {
- ino_t ino;
- uint64_t gen;
- ino_t parent_ino;
- inode_t *inode;
dict_t *params;
- struct iatt buf;
- struct iatt read_child_buf;
char *linkpath;
- struct iatt preparent;
- struct iatt postparent;
} symlink;
+ struct {
+ int32_t mode;
+ off_t offset;
+ size_t len;
+ } fallocate;
+
+ struct {
+ off_t offset;
+ size_t len;
+ } discard;
+
struct {
- int32_t flags;
- dir_entry_t *entries;
- int32_t count;
- } setdents;
+ off_t offset;
+ off_t len;
+ struct iatt prebuf;
+ struct iatt postbuf;
+ } zerofill;
+
+
} cont;
struct {
off_t start, len;
+ gf_boolean_t eager_lock_on;
int *eager_lock;
char *basename;
@@ -682,16 +614,67 @@ typedef struct _afr_local {
afr_transaction_type type;
- int success_count;
- int erase_pending;
- int failure_count;
+ /* stub to resume on destruction
+ of the transaction frame */
+ call_stub_t *resume_stub;
+
+ struct list_head eager_locked;
- int last_tried;
- int32_t *child_errno;
unsigned char *pre_op;
+ /* @fop_subvols: subvolumes on which FOP will be attempted */
+ unsigned char *fop_subvols;
+
+ /* @failed_subvols: subvolumes on which FOP failed. Always
+ a subset of @fop_subvols */
+ unsigned char *failed_subvols;
+
+ /* @dirtied: flag which indicates whether we set dirty flag
+ in the OP. Typically true when we are performing operation
+ on more than one subvol and optimistic changelog is disabled
+
+ A 'true' value set in @dirtied flag means an 'undirtying'
+ has to be done in POST-OP phase.
+ */
+ gf_boolean_t dirtied;
+
+ /* @inherited: flag which indicates that the dirty flags
+ of the previous transaction were inherited
+ */
+ gf_boolean_t inherited;
+
+ /*
+ @no_uninherit: flag which indicates that a pre_op_uninherit()
+ must _not_ be attempted (and returned as failure) always. This
+ flag is set when a hard pre-op is performed, but not accounted
+ for it in fd_ctx->on_disk[]. Such transactions are "isolated"
+ from the pre-op piggybacking entirely and therefore uninherit
+ must not be attempted.
+ */
+ gf_boolean_t no_uninherit;
+
+ /* @uninherit_done:
+ @uninherit_value:
+
+ The above pair variables make pre_op_uninherit() idempotent.
+ Both are FALSE initially. The first call to pre_op_uninherit
+ sets @uninherit_done to TRUE and the return value to
+ @uninherit_value. Further calls will check for @uninherit_done
+ to be TRUE and if so will simply return @uninherit_value.
+ */
+ gf_boolean_t uninherit_done;
+ gf_boolean_t uninherit_value;
+
+ /* @changelog_resume: function to be called after changlogging
+ (either pre-op or post-op) is done
+ */
+
+ afr_changelog_resume_t changelog_resume;
+
call_frame_t *main_frame;
+ int (*wind) (call_frame_t *frame, xlator_t *this, int subvol);
+
int (*fop) (call_frame_t *frame, xlator_t *this);
int (*done) (call_frame_t *frame, xlator_t *this);
@@ -703,99 +686,96 @@ typedef struct _afr_local {
/* post-op hook */
} transaction;
- afr_self_heal_t self_heal;
+ syncbarrier_t barrier;
struct marker_str marker;
-} afr_local_t;
-
-typedef enum {
- AFR_FD_NOT_OPENED,
- AFR_FD_OPENED,
- AFR_FD_OPENING
-} afr_fd_open_status_t;
-typedef struct {
- struct list_head call_list;
- call_frame_t *frame;
-} afr_fd_paused_call_t;
+ /* extra data for fops */
+ dict_t *xdata_req;
+ dict_t *xdata_rsp;
-typedef struct {
- unsigned int *pre_op_done;
- afr_fd_open_status_t *opened_on; /* which subvolumes the fd is open on */
- unsigned int *pre_op_piggyback;
-
- unsigned int *lock_piggyback;
- unsigned int *lock_acquired;
+ mode_t umask;
+ int xflag;
+ gf_boolean_t do_discovery;
+ struct afr_reply *replies;
+} afr_local_t;
- int flags;
- int32_t wbflags;
- uint64_t up_count; /* number of CHILD_UPs this fd has seen */
- uint64_t down_count; /* number of CHILD_DOWNs this fd has seen */
- int32_t last_tried;
+/* did a call fail due to a child failing? */
+#define child_went_down(op_ret, op_errno) (((op_ret) < 0) && \
+ ((op_errno == ENOTCONN) || \
+ (op_errno == EBADFD)))
- int hit, miss;
- gf_boolean_t failed_over;
- struct list_head entries; /* needed for readdir failover */
+int
+afr_inode_read_subvol_get (inode_t *inode, xlator_t *this,
+ unsigned char *data_subvols,
+ unsigned char *metadata_subvols,
+ int *event_generation);
+int
+__afr_inode_read_subvol_get (inode_t *inode, xlator_t *this,
+ unsigned char *data_subvols,
+ unsigned char *metadata_subvols,
+ int *event_generation);
- unsigned char *locked_on; /* which subvolumes locks have been successful */
- struct list_head paused_calls; /* queued calls while fix_open happens */
-} afr_fd_ctx_t;
+int
+__afr_inode_read_subvol_set (inode_t *inode, xlator_t *this,
+ unsigned char *data_subvols,
+ unsigned char *metadata_subvol,
+ int event_generation);
+int
+afr_inode_read_subvol_set (inode_t *inode, xlator_t *this,
+ unsigned char *data_subvols,
+ unsigned char *metadata_subvols,
+ int event_generation);
+int
+afr_inode_read_subvol_reset (inode_t *inode, xlator_t *this);
-/* try alloc and if it fails, goto label */
-#define ALLOC_OR_GOTO(var, type, label) do { \
- var = GF_CALLOC (sizeof (type), 1, \
- gf_afr_mt_##type); \
- if (!var) { \
- gf_log (this->name, GF_LOG_ERROR, \
- "out of memory :("); \
- op_errno = ENOMEM; \
- goto label; \
- } \
- } while (0);
+int
+afr_read_subvol_select_by_policy (inode_t *inode, xlator_t *this,
+ unsigned char *readable);
+int
+afr_inode_read_subvol_type_get (inode_t *inode, xlator_t *this,
+ unsigned char *readable, int *event_p,
+ int type);
+int
+afr_read_subvol_get (inode_t *inode, xlator_t *this, int *subvol_p,
+ int *event_p, afr_transaction_type type);
-/* did a call fail due to a child failing? */
-#define child_went_down(op_ret, op_errno) (((op_ret) < 0) && \
- ((op_errno == ENOTCONN) || \
- (op_errno == EBADFD)))
+#define afr_data_subvol_get(i, t, s, e) \
+ afr_read_subvol_get(i, t, s, e, AFR_DATA_TRANSACTION)
-#define afr_fop_failed(op_ret, op_errno) ((op_ret) == -1)
+#define afr_metadata_subvol_get(i, t, s, e) \
+ afr_read_subvol_get(i, t, s, e, AFR_METADATA_TRANSACTION)
-/* have we tried all children? */
-#define all_tried(i, count) ((i) == (count) - 1)
+int
+afr_inode_refresh (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ afr_inode_refresh_cbk_t cbk);
int32_t
-afr_set_dict_gfid (dict_t *dict, uuid_t gfid);
+afr_notify (xlator_t *this, int32_t event, void *data, void *data2);
int
-pump_command_reply (call_frame_t *frame, xlator_t *this);
+afr_init_entry_lockee (afr_entry_lockee_t *lockee, afr_local_t *local,
+ loc_t *loc, char *basename, int child_count);
-int32_t
-afr_notify (xlator_t *this, int32_t event,
- void *data, ...);
+void
+afr_entry_lockee_cleanup (afr_internal_lock_t *int_lock);
int
afr_attempt_lock_recovery (xlator_t *this, int32_t child_index);
int
-afr_save_locked_fd (xlator_t *this, fd_t *fd);
-
-int
afr_mark_locked_nodes (xlator_t *this, fd_t *fd,
unsigned char *locked_nodes);
void
-afr_set_lk_owner (call_frame_t *frame, xlator_t *this);
+afr_set_lk_owner (call_frame_t *frame, xlator_t *this, void *lk_owner);
int
afr_set_lock_number (call_frame_t *frame, xlator_t *this);
-
-loc_t *
-lower_path (loc_t *l1, const char *b1, loc_t *l2, const char *b2);
-
int32_t
afr_unlock (call_frame_t *frame, xlator_t *this);
@@ -811,40 +791,30 @@ afr_blocking_lock (call_frame_t *frame, xlator_t *this);
int
afr_internal_lock_finish (call_frame_t *frame, xlator_t *this);
+int
+afr_lk_transfer_datalock (call_frame_t *dst, call_frame_t *src, char *dom,
+ unsigned int child_count);
-int pump_start (call_frame_t *frame, xlator_t *this);
+int
+__afr_fd_ctx_set (xlator_t *this, fd_t *fd);
int
afr_fd_ctx_set (xlator_t *this, fd_t *fd);
-int32_t
-afr_inode_get_read_ctx (xlator_t *this, inode_t *inode, int32_t *fresh_children);
-
-void
-afr_inode_set_read_ctx (xlator_t *this, inode_t *inode, int32_t read_child,
- int32_t *fresh_children);
-
-void
-afr_build_parent_loc (loc_t *parent, loc_t *child);
-
-unsigned int
-afr_up_children_count (unsigned char *child_up, unsigned int child_count);
+afr_fd_ctx_t *
+afr_fd_ctx_get (fd_t *fd, xlator_t *this);
-unsigned int
-afr_locked_children_count (unsigned char *children, unsigned int child_count);
+int
+afr_build_parent_loc (loc_t *parent, loc_t *child, int32_t *op_errno);
-unsigned int
-afr_pre_op_done_children_count (unsigned char *pre_op,
- unsigned int child_count);
+int
+afr_locked_nodes_count (unsigned char *locked_nodes, int child_count);
-gf_boolean_t
-afr_is_fresh_lookup (loc_t *loc, xlator_t *this);
+int
+afr_replies_interpret (call_frame_t *frame, xlator_t *this, inode_t *inode);
void
-afr_update_loc_gfids (loc_t *loc, struct iatt *buf, struct iatt *postparent);
-
-int
-afr_locked_nodes_count (unsigned char *locked_nodes, int child_count);
+afr_replies_wipe (afr_local_t *local, afr_private_t *priv);
void
afr_local_cleanup (afr_local_t *local, xlator_t *this);
@@ -852,21 +822,9 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this);
int
afr_frame_return (call_frame_t *frame);
-uint64_t
-afr_is_split_brain (xlator_t *this, inode_t *inode);
-
-void
-afr_set_split_brain (xlator_t *this, inode_t *inode, gf_boolean_t set);
-
int
afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, int32_t wbflags);
-
-void
-afr_set_opendir_done (xlator_t *this, inode_t *inode);
-
-gf_boolean_t
-afr_is_opendir_done (xlator_t *this, inode_t *inode);
+ fd_t *fd, dict_t *xdata);
void
afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this);
@@ -874,9 +832,6 @@ afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this);
int
afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd);
-int
-afr_launch_openfd_self_heal (call_frame_t *frame, xlator_t *this, fd_t *fd);
-
#define AFR_STACK_UNWIND(fop, frame, params ...) \
do { \
afr_local_t *__local = NULL; \
@@ -887,22 +842,36 @@ afr_launch_openfd_self_heal (call_frame_t *frame, xlator_t *this, fd_t *fd);
frame->local = NULL; \
} \
STACK_UNWIND_STRICT (fop, frame, params); \
- afr_local_cleanup (__local, __this); \
- GF_FREE (__local); \
- } while (0);
+ if (__local) { \
+ afr_local_cleanup (__local, __this); \
+ mem_put (__local); \
+ } \
+ } while (0)
-#define AFR_STACK_DESTROY(frame) \
- do { \
- afr_local_t *__local = NULL; \
- xlator_t *__this = NULL; \
- __local = frame->local; \
- __this = frame->this; \
- frame->local = NULL; \
- STACK_DESTROY (frame->root); \
- afr_local_cleanup (__local, __this); \
- GF_FREE (__local); \
+#define AFR_STACK_DESTROY(frame) \
+ do { \
+ afr_local_t *__local = NULL; \
+ xlator_t *__this = NULL; \
+ __local = frame->local; \
+ __this = frame->this; \
+ frame->local = NULL; \
+ STACK_DESTROY (frame->root); \
+ if (__local) { \
+ afr_local_cleanup (__local, __this); \
+ mem_put (__local); \
+ } \
} while (0);
+#define AFR_FRAME_INIT(frame, op_errno) \
+ ({frame->local = mem_get0 (THIS->local_pool); \
+ if (afr_local_init (frame->local, THIS->private, &op_errno)) { \
+ afr_local_cleanup (frame->local, THIS); \
+ mem_put (frame->local); \
+ frame->local = NULL; }; \
+ frame->local;})
+
+#define AFR_STACK_RESET(frame) do { int opr; STACK_RESET (frame->root); AFR_FRAME_INIT(frame, opr);} while (0)
+
/* allocate and return a string that is the basename of argument */
static inline char *
AFR_BASENAME (const char *str)
@@ -915,6 +884,9 @@ AFR_BASENAME (const char *str)
return __basename_str;
}
+call_frame_t *
+afr_copy_frame (call_frame_t *base);
+
int
afr_transaction_local_init (afr_local_t *local, xlator_t *this);
@@ -922,112 +894,83 @@ int32_t
afr_marker_getxattr (call_frame_t *frame, xlator_t *this,
loc_t *loc, const char *name,afr_local_t *local, afr_private_t *priv );
-int32_t *
-afr_children_create (int32_t child_count);
-
int
-AFR_LOCAL_INIT (afr_local_t *local, afr_private_t *priv);
+afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno);
int
afr_internal_lock_init (afr_internal_lock_t *lk, size_t child_count,
transaction_lk_type_t lk_type);
int
-afr_first_up_child (unsigned char *child_up, size_t child_count);
+afr_higher_errno (int32_t old_errno, int32_t new_errno);
int
-afr_select_read_child_from_policy (int32_t *fresh_children, int32_t child_count,
- int32_t prev_read_child,
- int32_t config_read_child, int32_t *sources);
+afr_final_errno (afr_local_t *local, afr_private_t *priv);
+
+int
+afr_xattr_req_prepare (xlator_t *this, dict_t *xattr_req);
void
-afr_set_read_ctx_from_policy (xlator_t *this, inode_t *inode,
- int32_t *fresh_children, int32_t prev_read_child,
- int32_t config_read_child);
+afr_fix_open (fd_t *fd, xlator_t *this);
-int32_t
-afr_get_call_child (xlator_t *this, unsigned char *child_up, int32_t read_child,
- int32_t *fresh_children,
- int32_t *call_child, int32_t *last_index);
+afr_fd_ctx_t *
+afr_fd_ctx_get (fd_t *fd, xlator_t *this);
-int32_t
-afr_next_call_child (int32_t *fresh_children, unsigned char *child_up,
- size_t child_count, int32_t *last_index,
- int32_t read_child);
-void
-afr_get_fresh_children (int32_t *success_children, int32_t *sources,
- int32_t *children, unsigned int child_count);
void
-afr_children_add_child (int32_t *children, int32_t child,
- int32_t child_count);
-void
-afr_children_rm_child (int32_t *children, int32_t child,
- int32_t child_count);
-void
-afr_reset_children (int32_t *children, int32_t child_count);
-gf_boolean_t
-afr_error_more_important (int32_t old_errno, int32_t new_errno);
-int
-afr_errno_count (int32_t *children, int *child_errno,
- unsigned int child_count, int32_t op_errno);
+afr_set_low_priority (call_frame_t *frame);
int
-afr_get_children_count (int32_t *children, unsigned int child_count);
-gf_boolean_t
-afr_is_child_present (int32_t *success_children, int32_t child_count,
- int32_t child);
-void
-afr_update_gfid_from_iatts (uuid_t uuid, struct iatt *bufs,
- int32_t *success_children,
- unsigned int child_count);
-void
-afr_reset_xattr (dict_t **xattr, unsigned int child_count);
+afr_child_fd_ctx_set (xlator_t *this, fd_t *fd, int32_t child,
+ int flags);
+
gf_boolean_t
-afr_conflicting_iattrs (struct iatt *bufs, int32_t *success_children,
- unsigned int child_count, const char *path,
- const char *xlator_name);
-unsigned int
-afr_gfid_missing_count (const char *xlator_name, int32_t *children,
- struct iatt *bufs, unsigned int child_count,
- const char *path);
-void
-afr_xattr_req_prepare (xlator_t *this, dict_t *xattr_req, const char *path);
-void
-afr_children_copy (int32_t *dst, int32_t *src, unsigned int child_count);
-afr_transaction_type
-afr_transaction_type_get (ia_type_t ia_type);
-int32_t
-afr_resultant_errno_get (int32_t *children,
- int *child_errno, unsigned int child_count);
+afr_have_quorum (char *logname, afr_private_t *priv);
+
void
-afr_inode_rm_stale_children (xlator_t *this, inode_t *inode, int32_t read_child,
- int32_t *stale_children);
+afr_matrix_cleanup (int32_t **pending, unsigned int m);
+
+int32_t**
+afr_matrix_create (unsigned int m, unsigned int n);
+
void
-afr_launch_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode,
- gf_boolean_t background, ia_type_t ia_type, char *reason,
- void (*gfid_sh_success_cbk) (call_frame_t *sh_frame,
- xlator_t *this),
- int (*unwind) (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno));
-int
-afr_fix_open (call_frame_t *frame, xlator_t *this, afr_fd_ctx_t *fd_ctx,
- int need_open_count, int *need_open);
-int
-afr_open_fd_fix (call_frame_t *frame, xlator_t *this, gf_boolean_t pause_fop);
-int
-afr_set_elem_count_get (unsigned char *elems, int child_count);
+afr_filter_xattrs (dict_t *xattr);
-afr_fd_ctx_t *
-afr_fd_ctx_get (fd_t *fd, xlator_t *this);
+/*
+ * Special value indicating we should use the "auto" quorum method instead of
+ * a fixed value (including zero to turn off quorum enforcement).
+ */
+#define AFR_QUORUM_AUTO INT_MAX
-gf_boolean_t
-afr_open_only_data_self_heal (char *data_self_heal);
+/*
+ * Having this as a macro will make debugging a bit weirder, but does reduce
+ * the probability of functions handling this check inconsistently.
+ */
+#define QUORUM_CHECK(_func,_label) do { \
+ if (priv->quorum_count && !afr_have_quorum(this->name,priv)) { \
+ gf_log(this->name,GF_LOG_WARNING, \
+ "failing "#_func" due to lack of quorum"); \
+ op_errno = EROFS; \
+ goto _label; \
+ } \
+} while (0);
+
+int
+afr_fd_report_unstable_write (xlator_t *this, fd_t *fd);
gf_boolean_t
-afr_data_self_heal_enabled (char *data_self_heal);
+afr_fd_has_witnessed_unstable_write (xlator_t *this, fd_t *fd);
void
-afr_set_low_priority (call_frame_t *frame);
+afr_delayed_changelog_wake_resume (xlator_t *this, fd_t *fd, call_stub_t *stub);
+
int
-afr_child_fd_ctx_set (xlator_t *this, fd_t *fd, int32_t child,
- int flags, int32_t wb_flags);
+afr_inodelk_init (afr_inodelk_t *lk, char *dom, size_t child_count);
+
+void
+afr_handle_open_fd_count (call_frame_t *frame, xlator_t *this);
+
+int
+afr_local_pathinfo (char *pathinfo, gf_boolean_t *is_local);
+
+void
+afr_remove_eager_lock_stub (afr_local_t *local);
#endif /* __AFR_H__ */
diff --git a/xlators/cluster/afr/src/pump.c b/xlators/cluster/afr/src/pump.c
index dbf86b0a2..eed509956 100644
--- a/xlators/cluster/afr/src/pump.c
+++ b/xlators/cluster/afr/src/pump.c
@@ -1,25 +1,17 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include <unistd.h>
#include <sys/time.h>
#include <stdlib.h>
+#include <fnmatch.h>
#ifndef _CONFIG_H
#define _CONFIG_H
@@ -28,8 +20,130 @@
#include "afr-common.c"
#include "defaults.c"
+#include "glusterfs.h"
+#include "pump.h"
+
+
+static int
+afr_set_dict_gfid (dict_t *dict, uuid_t gfid)
+{
+ int ret = 0;
+ uuid_t *pgfid = NULL;
+
+ GF_ASSERT (gfid);
+
+ pgfid = GF_CALLOC (1, sizeof (uuid_t), gf_common_mt_char);
+ if (!pgfid) {
+ ret = -1;
+ goto out;
+ }
+
+ uuid_copy (*pgfid, gfid);
+
+ ret = dict_set_dynptr (dict, "gfid-req", pgfid, sizeof (uuid_t));
+ if (ret)
+ gf_log (THIS->name, GF_LOG_ERROR, "gfid set failed");
+
+out:
+ if (ret && pgfid)
+ GF_FREE (pgfid);
+ return ret;
+}
+
+static int
+afr_set_root_gfid (dict_t *dict)
+{
+ uuid_t gfid;
+ int ret = 0;
+
+ memset (gfid, 0, 16);
+ gfid[15] = 1;
+
+ ret = afr_set_dict_gfid (dict, gfid);
+
+ return ret;
+}
+
+static int
+afr_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name)
+{
+ int ret = -1;
+ uuid_t pargfid = {0};
+
+ if (!child)
+ goto out;
+
+ if (!uuid_is_null (parent->inode->gfid))
+ uuid_copy (pargfid, parent->inode->gfid);
+ else if (!uuid_is_null (parent->gfid))
+ uuid_copy (pargfid, parent->gfid);
+
+ if (uuid_is_null (pargfid))
+ goto out;
+
+ if (strcmp (parent->path, "/") == 0)
+ ret = gf_asprintf ((char **)&child->path, "/%s", name);
+ else
+ ret = gf_asprintf ((char **)&child->path, "%s/%s", parent->path,
+ name);
+
+ if (-1 == ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "asprintf failed while setting child path");
+ }
+
+ child->name = strrchr (child->path, '/');
+ if (child->name)
+ child->name++;
+
+ child->parent = inode_ref (parent->inode);
+ child->inode = inode_new (parent->inode->table);
+ uuid_copy (child->pargfid, pargfid);
+
+ if (!child->inode) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if ((ret == -1) && child)
+ loc_wipe (child);
+
+ return ret;
+}
+
+static void
+afr_build_root_loc (xlator_t *this, loc_t *loc)
+{
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ loc->path = gf_strdup ("/");
+ loc->name = "";
+ loc->inode = inode_ref (priv->root_inode);
+ uuid_copy (loc->gfid, loc->inode->gfid);
+}
+
+static void
+afr_update_loc_gfids (loc_t *loc, struct iatt *buf, struct iatt *postparent)
+{
+ GF_ASSERT (loc);
+ GF_ASSERT (buf);
+
+ uuid_copy (loc->gfid, buf->ia_gfid);
+ if (postparent)
+ uuid_copy (loc->pargfid, postparent->ia_gfid);
+}
static uint64_t pump_pid = 0;
+static inline void
+pump_fill_loc_info (loc_t *loc, struct iatt *iatt, struct iatt *parent)
+{
+ afr_update_loc_gfids (loc, iatt, parent);
+ uuid_copy (loc->inode->gfid, iatt->ia_gfid);
+}
+
static int
pump_mark_start_pending (xlator_t *this)
{
@@ -140,9 +254,7 @@ pump_set_resume_path (xlator_t *this, const char *path)
LOCK (&pump_priv->resume_path_lock);
{
- pump_priv->resume_path = strdup (path);
- if (!pump_priv->resume_path)
- ret = -1;
+ strncpy (pump_priv->resume_path, path, strlen (path) + 1);
}
UNLOCK (&pump_priv->resume_path_lock);
@@ -167,25 +279,27 @@ pump_save_path (xlator_t *this, const char *path)
GF_ASSERT (priv->root_inode);
- afr_build_root_loc (priv->root_inode, &loc);
+ afr_build_root_loc (this, &loc);
dict = dict_new ();
dict_ret = dict_set_str (dict, PUMP_PATH, (char *)path);
+ if (dict_ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set the key %s", path, PUMP_PATH);
ret = syncop_setxattr (PUMP_SOURCE_CHILD (this), &loc, dict, 0);
if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_INFO,
"setxattr failed - could not save path=%s", path);
} else {
gf_log (this->name, GF_LOG_DEBUG,
"setxattr succeeded - saved path=%s", path);
- gf_log (this->name, GF_LOG_DEBUG,
- "Saving path for status info");
}
dict_unref (dict);
+ loc_wipe (&loc);
return 0;
}
@@ -248,13 +362,9 @@ pump_get_resume_path (xlator_t *this)
static int
pump_update_resume_state (xlator_t *this, const char *path)
{
- afr_private_t *priv = NULL;
-
pump_state_t state;
const char *resume_path = NULL;
- priv = this->private;
-
state = pump_get_state ();
if (state == PUMP_STATE_RESUME) {
@@ -326,22 +436,21 @@ pump_save_file_stats (xlator_t *this, const char *path)
}
static int
-gf_pump_traverse_directory (loc_t *loc, uuid_t gfid)
-{
- xlator_t *this = NULL;
- fd_t *fd = NULL;
-
- off_t offset = 0;
- loc_t entry_loc;
- gf_dirent_t *entry = NULL;
- gf_dirent_t *tmp = NULL;
- gf_dirent_t entries;
-
- struct iatt iatt, parent;
- dict_t *xattr_rsp;
-
- int ret = 0;
- gf_boolean_t is_directory_empty = _gf_true;
+gf_pump_traverse_directory (loc_t *loc)
+{
+ xlator_t *this = NULL;
+ fd_t *fd = NULL;
+ off_t offset = 0;
+ loc_t entry_loc = {0};
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
+ gf_dirent_t entries;
+ struct iatt iatt = {0};
+ struct iatt parent = {0};
+ dict_t *xattr_rsp = NULL;
+ int ret = 0;
+ gf_boolean_t is_directory_empty = _gf_true;
+ gf_boolean_t free_entries = _gf_false;
INIT_LIST_HEAD (&entries.list);
this = THIS;
@@ -366,7 +475,8 @@ gf_pump_traverse_directory (loc_t *loc, uuid_t gfid)
"pump opendir on %s returned=%d",
loc->path, ret);
- while (syncop_readdirp (this, fd, 131072, offset, &entries)) {
+ while (syncop_readdirp (this, fd, 131072, offset, NULL, &entries)) {
+ free_entries = _gf_true;
if (list_empty (&entries.list)) {
gf_log (this->name, GF_LOG_TRACE,
@@ -378,89 +488,101 @@ gf_pump_traverse_directory (loc_t *loc, uuid_t gfid)
gf_log (this->name, GF_LOG_DEBUG,
"found readdir entry=%s", entry->d_name);
+ offset = entry->d_off;
+ if (uuid_is_null (entry->d_stat.ia_gfid)) {
+ gf_log (this->name, GF_LOG_WARNING, "%s/%s: No "
+ "gfid present skipping",
+ loc->path, entry->d_name);
+ continue;
+ }
+ loc_wipe (&entry_loc);
ret = afr_build_child_loc (this, &entry_loc, loc,
entry->d_name);
if (ret)
goto out;
- if (!IS_ENTRY_CWD (entry->d_name) &&
- !IS_ENTRY_PARENT (entry->d_name)) {
-
- is_directory_empty = _gf_false;
- ret = syncop_lookup (this, &entry_loc, NULL,
- &iatt, &xattr_rsp, &parent);
- if (ret)
- continue;
-
- if (uuid_is_null (iatt.ia_gfid)) {
- uuid_generate (gfid);
- uuid_copy (entry_loc.inode->gfid,
- gfid);
- } else {
- uuid_copy (entry_loc.inode->gfid,
- iatt.ia_gfid);
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "lookup %s => %"PRId64,
- entry_loc.path,
- iatt.ia_ino);
-
- ret = syncop_lookup (this, &entry_loc, NULL,
- &iatt, &xattr_rsp, &parent);
-
-
- gf_log (this->name, GF_LOG_DEBUG,
- "second lookup ret=%d: %s => %"PRId64,
- ret,
- entry_loc.path,
- iatt.ia_ino);
-
- pump_update_resume_state (this, entry_loc.path);
-
- pump_save_path (this, entry_loc.path);
- pump_save_file_stats (this, entry_loc.path);
-
- ret = pump_check_and_update_status (this);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Pump beginning to exit out");
- goto out;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "type of file=%d, IFDIR=%d",
- iatt.ia_type, IA_IFDIR);
-
- if (IA_ISDIR (iatt.ia_type)) {
- if (is_pump_traversal_allowed (this, entry_loc.path)) {
- gf_log (this->name, GF_LOG_TRACE,
- "entering dir=%s",
- entry->d_name);
- gf_pump_traverse_directory (&entry_loc, gfid);
- }
- }
- }
- offset = entry->d_off;
- loc_wipe (&entry_loc);
+ if ((strcmp (entry->d_name, ".") == 0) ||
+ (strcmp (entry->d_name, "..") == 0))
+ continue;
+
+ is_directory_empty = _gf_false;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "lookup %s => %"PRId64,
+ entry_loc.path,
+ iatt.ia_ino);
+
+ ret = syncop_lookup (this, &entry_loc, NULL, &iatt,
+ &xattr_rsp, &parent);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: lookup failed", entry_loc.path);
+ continue;
+ }
+
+ ret = afr_selfheal_name (this, loc->gfid, entry->d_name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: name self-heal failed (%s/%s)",
+ entry_loc.path, uuid_utoa (loc->gfid),
+ entry->d_name);
+ continue;
+ }
+
+ ret = afr_selfheal (this, iatt.ia_gfid);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: self-heal failed (%s)",
+ entry_loc.path, uuid_utoa (iatt.ia_gfid));
+ continue;
+ }
+
+ pump_fill_loc_info (&entry_loc, &iatt, &parent);
+
+ pump_update_resume_state (this, entry_loc.path);
+
+ pump_save_path (this, entry_loc.path);
+ pump_save_file_stats (this, entry_loc.path);
+
+ ret = pump_check_and_update_status (this);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Pump beginning to exit out");
+ goto out;
+ }
+
+ if (IA_ISDIR (iatt.ia_type)) {
+ if (is_pump_traversal_allowed (this, entry_loc.path)) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "entering dir=%s", entry->d_name);
+ gf_pump_traverse_directory (&entry_loc);
+ }
+ }
}
gf_dirent_free (&entries);
- gf_log (this->name, GF_LOG_TRACE,
- "offset incremented to %d",
+ free_entries = _gf_false;
+ gf_log (this->name, GF_LOG_TRACE, "offset incremented to %d",
(int32_t ) offset);
}
- if (is_directory_empty && IS_ROOT_PATH (loc->path)) {
+ ret = syncop_close (fd);
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_DEBUG, "closing the fd failed");
+
+ if (is_directory_empty && (strcmp (loc->path, "/") == 0)) {
pump_change_state (this, PUMP_STATE_RUNNING);
gf_log (this->name, GF_LOG_INFO, "Empty source brick. "
"Nothing to be done.");
}
out:
+ if (entry_loc.path)
+ loc_wipe (&entry_loc);
+ if (free_entries)
+ gf_dirent_free (&entries);
return 0;
-
}
static int
@@ -488,7 +610,7 @@ pump_update_resume_path (xlator_t *this)
static int32_t
pump_xattr_cleaner (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_private_t *priv = NULL;
loc_t loc = {0};
@@ -499,22 +621,24 @@ pump_xattr_cleaner (call_frame_t *frame, void *cookie, xlator_t *this,
priv = this->private;
- afr_build_root_loc (priv->root_inode, &loc);
+ afr_build_root_loc (this, &loc);
ret = syncop_removexattr (priv->children[source], &loc,
- PUMP_PATH);
+ PUMP_PATH, 0);
ret = syncop_removexattr (priv->children[sink], &loc,
- PUMP_SINK_COMPLETE);
+ PUMP_SINK_COMPLETE, 0);
for (i = 0; i < priv->child_count; i++) {
ret = syncop_removexattr (priv->children[i], &loc,
- PUMP_SOURCE_COMPLETE);
- if (ret)
+ PUMP_SOURCE_COMPLETE, 0);
+ if (ret) {
gf_log (this->name, GF_LOG_DEBUG, "removexattr "
- "failed with %s", strerror (errno));
+ "failed with %s", strerror (-ret));
+ }
}
+ loc_wipe (&loc);
return pump_command_reply (frame, this);
}
@@ -534,7 +658,7 @@ pump_complete_migration (xlator_t *this)
GF_ASSERT (priv->root_inode);
- afr_build_root_loc (priv->root_inode, &loc);
+ afr_build_root_loc (this, &loc);
dict = dict_new ();
@@ -546,6 +670,10 @@ pump_complete_migration (xlator_t *this)
pump_priv->pump_finished = _gf_true;
dict_ret = dict_set_str (dict, PUMP_SOURCE_COMPLETE, "jargon");
+ if (dict_ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set the key %s",
+ loc.path, PUMP_SOURCE_COMPLETE);
ret = syncop_setxattr (PUMP_SOURCE_CHILD (this), &loc, dict, 0);
if (ret < 0) {
@@ -553,6 +681,10 @@ pump_complete_migration (xlator_t *this)
"setxattr failed - while notifying source complete");
}
dict_ret = dict_set_str (dict, PUMP_SINK_COMPLETE, "jargon");
+ if (dict_ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set the key %s",
+ loc.path, PUMP_SINK_COMPLETE);
ret = syncop_setxattr (PUMP_SINK_CHILD (this), &loc, dict, 0);
if (ret < 0) {
@@ -568,6 +700,7 @@ pump_complete_migration (xlator_t *this)
call_resume (pump_priv->cleaner);
}
+ loc_wipe (&loc);
return 0;
}
@@ -594,6 +727,7 @@ pump_lookup_sink (loc_t *loc)
if (ret) {
gf_log (this->name, GF_LOG_DEBUG,
"Lookup on sink child failed");
+ ret = -1;
goto out;
}
@@ -615,7 +749,6 @@ pump_task (void *data)
struct iatt iatt, parent;
dict_t *xattr_rsp = NULL;
dict_t *xattr_req = NULL;
- uuid_t gfid = {0};
int ret = -1;
@@ -624,7 +757,7 @@ pump_task (void *data)
GF_ASSERT (priv->root_inode);
- afr_build_root_loc (priv->root_inode, &loc);
+ afr_build_root_loc (this, &loc);
xattr_req = dict_new ();
if (!xattr_req) {
gf_log (this->name, GF_LOG_DEBUG,
@@ -638,9 +771,8 @@ pump_task (void *data)
&iatt, &xattr_rsp, &parent);
gf_log (this->name, GF_LOG_TRACE,
- "lookup: ino=%"PRId64", path=%s",
- loc.ino,
- loc.path);
+ "lookup: path=%s gfid=%s",
+ loc.path, uuid_utoa (loc.inode->gfid));
ret = pump_check_and_update_status (this);
if (ret < 0) {
@@ -656,13 +788,14 @@ pump_task (void *data)
goto out;
}
- gf_pump_traverse_directory (&loc, gfid);
+ gf_pump_traverse_directory (&loc);
pump_complete_migration (this);
out:
if (xattr_req)
dict_unref (xattr_req);
+ loc_wipe (&loc);
return 0;
}
@@ -696,7 +829,7 @@ pump_start (call_frame_t *pump_frame, xlator_t *this)
priv = this->private;
pump_priv = priv->pump_private;
- pump_frame->root->lk_owner = (uint64_t) (unsigned long)pump_frame->root;
+ afr_set_lk_owner (pump_frame, this, pump_frame->root);
pump_pid = (uint64_t) (unsigned long)pump_frame->root;
ret = synctask_new (pump_priv->env, pump_task,
@@ -710,8 +843,8 @@ pump_start (call_frame_t *pump_frame, xlator_t *this)
}
gf_log (this->name, GF_LOG_DEBUG,
- "setting pump as started lk_owner: %"PRIu64" %"PRIu64,
- pump_frame->root->lk_owner, pump_pid);
+ "setting pump as started lk_owner: %s %"PRIu64,
+ lkowner_utoa (&pump_frame->root->lk_owner), pump_pid);
priv->use_afr_in_pump = 1;
out:
@@ -745,7 +878,7 @@ pump_cmd_start_setxattr_cbk (call_frame_t *frame,
void *cookie,
xlator_t *this,
int32_t op_ret,
- int32_t op_errno)
+ int32_t op_errno, dict_t *xdata)
{
call_frame_t *prev = NULL;
@@ -797,9 +930,9 @@ pump_initiate_sink_connect (call_frame_t *frame, xlator_t *this)
GF_ASSERT (priv->root_inode);
- afr_build_root_loc (priv->root_inode, &loc);
+ afr_build_root_loc (this, &loc);
- data = data_ref (dict_get (local->dict, PUMP_CMD_START));
+ data = data_ref (dict_get (local->dict, RB_PUMP_CMD_START));
if (!data) {
ret = -1;
gf_log (this->name, GF_LOG_ERROR,
@@ -838,7 +971,7 @@ pump_initiate_sink_connect (call_frame_t *frame, xlator_t *this)
PUMP_SINK_CHILD(this)->fops->setxattr,
&loc,
dict,
- 0);
+ 0, NULL);
ret = 0;
@@ -852,6 +985,7 @@ out:
if (ret && clnt_cmd)
GF_FREE (clnt_cmd);
+ loc_wipe (&loc);
return ret;
}
@@ -871,7 +1005,7 @@ pump_cmd_start_getxattr_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- dict_t *dict)
+ dict_t *dict, dict_t *xdata)
{
afr_local_t *local = NULL;
char *path = NULL;
@@ -938,6 +1072,7 @@ pump_execute_status (call_frame_t *frame, xlator_t *this)
uint64_t number_files = 0;
char filename[PATH_MAX];
+ char summary[PATH_MAX+256];
char *dict_str = NULL;
int32_t op_ret = 0;
@@ -966,16 +1101,19 @@ pump_execute_status (call_frame_t *frame, xlator_t *this)
}
if (pump_priv->pump_finished) {
- snprintf (dict_str, PATH_MAX + 256, "Number of files migrated = %"PRIu64" Migration complete ",
- number_files);
+ snprintf (summary, PATH_MAX+256,
+ "no_of_files=%"PRIu64, number_files);
} else {
- snprintf (dict_str, PATH_MAX + 256, "Number of files migrated = %"PRIu64" Current file= %s ",
- number_files, filename);
+ snprintf (summary, PATH_MAX+256,
+ "no_of_files=%"PRIu64":current_file=%s",
+ number_files, filename);
}
+ snprintf (dict_str, PATH_MAX+256, "status=%d:%s",
+ (pump_priv->pump_finished)?1:0, summary);
dict = dict_new ();
- ret = dict_set_dynstr (dict, PUMP_CMD_STATUS, dict_str);
+ ret = dict_set_dynstr (dict, RB_PUMP_CMD_STATUS, dict_str);
if (ret < 0) {
gf_log (this->name, GF_LOG_DEBUG,
"dict_set_dynstr returned negative value");
@@ -987,13 +1125,12 @@ pump_execute_status (call_frame_t *frame, xlator_t *this)
out:
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict);
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, NULL);
if (dict)
dict_unref (dict);
- if (dict_str)
- GF_FREE (dict_str);
+ GF_FREE (dict_str);
return 0;
}
@@ -1035,14 +1172,14 @@ pump_execute_start (call_frame_t *frame, xlator_t *this)
GF_ASSERT (priv->root_inode);
- afr_build_root_loc (priv->root_inode, &loc);
+ afr_build_root_loc (this, &loc);
STACK_WIND (frame,
pump_cmd_start_getxattr_cbk,
PUMP_SOURCE_CHILD(this),
PUMP_SOURCE_CHILD(this)->fops->getxattr,
&loc,
- PUMP_PATH);
+ PUMP_PATH, NULL);
ret = 0;
@@ -1052,6 +1189,7 @@ out:
pump_command_reply (frame, this);
}
+ loc_wipe (&loc);
return 0;
}
@@ -1059,7 +1197,7 @@ static int
pump_cleanup_helper (void *data) {
call_frame_t *frame = data;
- pump_xattr_cleaner (frame, 0, frame->this, 0, 0);
+ pump_xattr_cleaner (frame, 0, frame->this, 0, 0, NULL);
return 0;
}
@@ -1085,14 +1223,6 @@ pump_execute_commit (call_frame_t *frame, xlator_t *this)
pump_priv = priv->pump_private;