summaryrefslogtreecommitdiffstats
path: root/tests/basic
diff options
context:
space:
mode:
authorPranith Kumar K <pkarampu@redhat.com>2017-07-06 16:40:07 +0530
committerPranith Kumar Karampuri <pkarampu@redhat.com>2017-07-13 08:10:11 +0000
commitf367671d451ae0fc3e178d26cae1880d59eb6ebd (patch)
treee50d5b7b2bbf654e41933c347012b7651101ecab /tests/basic
parent73b2b7fe57069eb85485465fb92c52a97d4d411c (diff)
cluster/ec: Get size of file in EC [f]xattrop
Problem: For allowing parallel writes we shouldn't depend on ia_size to be same for all the bricks in each write_cbk(). But we need to make sure backend size is correct on all the bricks and no crashes/manual modifications happened. Fix: At the time of get_size_version() we do 1 check to make sure size of the file is same across the bricks. From then on the FOPs will give the status of the fop, so we rely on this information to keep which bricks are good/bad. Updates #251 Change-Id: I1df645347e2e9f2e09cfa4411b6cc305d7f4e4e5 Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> Reviewed-on: https://review.gluster.org/17741 Smoke: Gluster Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Xavier Hernandez <xhernandez@datalab.es>
Diffstat (limited to 'tests/basic')
0 files changed, 0 insertions, 0 deletions
s='mode'>-rw-r--r--xlators/cluster/afr/src/afr-dir-read.c620
-rw-r--r--xlators/cluster/afr/src/afr-dir-read.h36
-rw-r--r--xlators/cluster/afr/src/afr-dir-write.c1482
-rw-r--r--xlators/cluster/afr/src/afr-dir-write.h43
-rw-r--r--xlators/cluster/afr/src/afr-inode-read.c1799
-rw-r--r--xlators/cluster/afr/src/afr-inode-read.h39
-rw-r--r--xlators/cluster/afr/src/afr-inode-write.c2123
-rw-r--r--xlators/cluster/afr/src/afr-inode-write.h68
-rw-r--r--xlators/cluster/afr/src/afr-lk-common.c1533
-rw-r--r--xlators/cluster/afr/src/afr-mem-types.h35
-rw-r--r--xlators/cluster/afr/src/afr-open.c663
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-algorithm.c1329
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-algorithm.h42
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c2919
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.h141
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c1576
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c1522
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-metadata.c690
-rw-r--r--xlators/cluster/afr/src/afr-self-heal.h37
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.c1787
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.h65
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c1403
-rw-r--r--xlators/cluster/afr/src/afr-transaction.h50
-rw-r--r--xlators/cluster/afr/src/afr.c1025
-rw-r--r--xlators/cluster/afr/src/afr.h929
-rw-r--r--xlators/cluster/afr/src/pump.c1017
-rw-r--r--xlators/cluster/afr/src/pump.h39
-rw-r--r--xlators/cluster/dht/src/Makefile.am26
-rw-r--r--xlators/cluster/dht/src/dht-common.c4051
-rw-r--r--xlators/cluster/dht/src/dht-common.h678
-rw-r--r--xlators/cluster/dht/src/dht-diskusage.c520
-rw-r--r--xlators/cluster/dht/src/dht-hashfn.c96
-rw-r--r--xlators/cluster/dht/src/dht-helper.c780
-rw-r--r--xlators/cluster/dht/src/dht-inode-read.c1139
-rw-r--r--xlators/cluster/dht/src/dht-inode-write.c1013
-rw-r--r--xlators/cluster/dht/src/dht-layout.c249
-rw-r--r--xlators/cluster/dht/src/dht-linkfile.c253
-rw-r--r--xlators/cluster/dht/src/dht-mem-types.h25
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c1815
-rw-r--r--xlators/cluster/dht/src/dht-rename.c397
-rw-r--r--xlators/cluster/dht/src/dht-selfheal.c624
-rw-r--r--xlators/cluster/dht/src/dht-shared.c758
-rw-r--r--xlators/cluster/dht/src/dht.c536
-rw-r--r--xlators/cluster/dht/src/nufa.c436
-rw-r--r--xlators/cluster/dht/src/switch.c350
-rw-r--r--xlators/cluster/ha/src/Makefile.am7
-rw-r--r--xlators/cluster/ha/src/ha-helpers.c24
-rw-r--r--xlators/cluster/ha/src/ha-mem-types.h21
-rw-r--r--xlators/cluster/ha/src/ha.c32
-rw-r--r--xlators/cluster/ha/src/ha.h24
-rw-r--r--xlators/cluster/map/src/Makefile.am7
-rw-r--r--xlators/cluster/map/src/map-helper.c24
-rw-r--r--xlators/cluster/map/src/map-mem-types.h21
-rw-r--r--xlators/cluster/map/src/map.c27
-rw-r--r--xlators/cluster/map/src/map.h22
-rw-r--r--xlators/cluster/stripe/src/Makefile.am11
-rw-r--r--xlators/cluster/stripe/src/stripe-helpers.c675
-rw-r--r--xlators/cluster/stripe/src/stripe-mem-types.h29
-rw-r--r--xlators/cluster/stripe/src/stripe.c3806
-rw-r--r--xlators/cluster/stripe/src/stripe.h177
-rw-r--r--xlators/cluster/unify/src/Makefile.am16
-rw-r--r--xlators/cluster/unify/src/unify-mem-types.h41
-rw-r--r--xlators/cluster/unify/src/unify-self-heal.c1239
-rw-r--r--xlators/cluster/unify/src/unify.c4589
-rw-r--r--xlators/cluster/unify/src/unify.h146
-rw-r--r--xlators/debug/error-gen/src/Makefile.am9
-rw-r--r--xlators/debug/error-gen/src/error-gen-mem-types.h20
-rw-r--r--xlators/debug/error-gen/src/error-gen.c1016
-rw-r--r--xlators/debug/error-gen/src/error-gen.h39
-rw-r--r--xlators/debug/io-stats/src/Makefile.am7
-rw-r--r--xlators/debug/io-stats/src/io-stats-mem-types.h21
-rw-r--r--xlators/debug/io-stats/src/io-stats.c1390
-rw-r--r--xlators/debug/trace/src/Makefile.am8
-rw-r--r--xlators/debug/trace/src/trace-mem-types.h21
-rw-r--r--xlators/debug/trace/src/trace.c3083
-rw-r--r--xlators/debug/trace/src/trace.h98
-rw-r--r--xlators/encryption/Makefile.am2
-rw-r--r--xlators/encryption/crypt/Makefile.am (renamed from xlators/cluster/unify/Makefile.am)0
-rw-r--r--xlators/encryption/crypt/src/Makefile.am24
-rw-r--r--xlators/encryption/crypt/src/atom.c962
-rw-r--r--xlators/encryption/crypt/src/crypt-common.h141
-rw-r--r--xlators/encryption/crypt/src/crypt-mem-types.h43
-rw-r--r--xlators/encryption/crypt/src/crypt.c4498
-rw-r--r--xlators/encryption/crypt/src/crypt.h899
-rw-r--r--xlators/encryption/crypt/src/data.c769
-rw-r--r--xlators/encryption/crypt/src/keys.c302
-rw-r--r--xlators/encryption/crypt/src/metadata.c605
-rw-r--r--xlators/encryption/crypt/src/metadata.h74
-rw-r--r--xlators/encryption/rot-13/src/Makefile.am7
-rw-r--r--xlators/encryption/rot-13/src/rot-13.c69
-rw-r--r--xlators/encryption/rot-13/src/rot-13.h20
-rw-r--r--xlators/features/Makefile.am3
-rw-r--r--xlators/features/access-control/src/Makefile.am13
-rw-r--r--xlators/features/access-control/src/access-control.c2060
-rw-r--r--xlators/features/access-control/src/access-control.h55
-rw-r--r--xlators/features/changelog/Makefile.am3
-rw-r--r--xlators/features/changelog/lib/Makefile.am (renamed from xlators/features/access-control/Makefile.am)0
-rw-r--r--xlators/features/changelog/lib/examples/c/get-changes.c87
-rw-r--r--xlators/features/changelog/lib/examples/python/changes.py32
-rw-r--r--xlators/features/changelog/lib/examples/python/libgfchangelog.py64
-rw-r--r--xlators/features/changelog/lib/src/Makefile.am37
-rw-r--r--xlators/features/changelog/lib/src/changelog.h31
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-helpers.c180
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-helpers.h97
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-process.c571
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog.c515
-rw-r--r--xlators/features/changelog/src/Makefile.am19
-rw-r--r--xlators/features/changelog/src/changelog-encoders.c176
-rw-r--r--xlators/features/changelog/src/changelog-encoders.h46
-rw-r--r--xlators/features/changelog/src/changelog-helpers.c693
-rw-r--r--xlators/features/changelog/src/changelog-helpers.h395
-rw-r--r--xlators/features/changelog/src/changelog-mem-types.h29
-rw-r--r--xlators/features/changelog/src/changelog-misc.h101
-rw-r--r--xlators/features/changelog/src/changelog-notifier.c314
-rw-r--r--xlators/features/changelog/src/changelog-notifier.h19
-rw-r--r--xlators/features/changelog/src/changelog-rt.c72
-rw-r--r--xlators/features/changelog/src/changelog-rt.h33
-rw-r--r--xlators/features/changelog/src/changelog.c1477
-rw-r--r--xlators/features/compress/Makefile.am (renamed from xlators/protocol/legacy/lib/Makefile.am)2
-rw-r--r--xlators/features/compress/src/Makefile.am17
-rw-r--r--xlators/features/compress/src/cdc-helper.c547
-rw-r--r--xlators/features/compress/src/cdc-mem-types.h22
-rw-r--r--xlators/features/compress/src/cdc.c342
-rw-r--r--xlators/features/compress/src/cdc.h107
-rw-r--r--xlators/features/filter/src/Makefile.am7
-rw-r--r--xlators/features/filter/src/filter-mem-types.h20
-rw-r--r--xlators/features/filter/src/filter.c24
-rw-r--r--xlators/features/gfid-access/Makefile.am (renamed from xlators/performance/stat-prefetch/Makefile.am)0
-rw-r--r--xlators/features/gfid-access/src/Makefile.am15
-rw-r--r--xlators/features/gfid-access/src/gfid-access-mem-types.h23
-rw-r--r--xlators/features/gfid-access/src/gfid-access.c1172
-rw-r--r--xlators/features/gfid-access/src/gfid-access.h128
-rw-r--r--xlators/features/glupy/Makefile.am (renamed from xlators/protocol/legacy/server/Makefile.am)2
-rw-r--r--xlators/features/glupy/doc/README.md44
-rw-r--r--xlators/features/glupy/doc/TESTING9
-rw-r--r--xlators/features/glupy/doc/test.vol10
-rw-r--r--xlators/features/glupy/src/Makefile.am20
-rw-r--r--xlators/features/glupy/src/debug-trace.py774
-rw-r--r--xlators/features/glupy/src/glupy.c2470
-rw-r--r--xlators/features/glupy/src/glupy.h69
-rw-r--r--xlators/features/glupy/src/gluster.py841
-rw-r--r--xlators/features/glupy/src/helloworld.py19
-rw-r--r--xlators/features/glupy/src/negative.py92
-rw-r--r--xlators/features/index/Makefile.am (renamed from xlators/storage/bdb/Makefile.am)2
-rw-r--r--xlators/features/index/src/Makefile.am17
-rw-r--r--xlators/features/index/src/index-mem-types.h22
-rw-r--r--xlators/features/index/src/index.c1489
-rw-r--r--xlators/features/index/src/index.h73
-rw-r--r--xlators/features/locks/src/Makefile.am17
-rw-r--r--xlators/features/locks/src/clear.c424
-rw-r--r--xlators/features/locks/src/clear.h76
-rw-r--r--xlators/features/locks/src/common.c358
-rw-r--r--xlators/features/locks/src/common.h98
-rw-r--r--xlators/features/locks/src/entrylk.c250
-rw-r--r--xlators/features/locks/src/inodelk.c447
-rw-r--r--xlators/features/locks/src/locks-mem-types.h21
-rw-r--r--xlators/features/locks/src/locks.h84
-rw-r--r--xlators/features/locks/src/posix.c1393
-rw-r--r--xlators/features/locks/src/reservelk.c51
-rw-r--r--xlators/features/locks/tests/unit-test.c22
-rw-r--r--xlators/features/mac-compat/src/Makefile.am7
-rw-r--r--xlators/features/mac-compat/src/mac-compat.c52
-rw-r--r--xlators/features/marker/Makefile.am2
-rw-r--r--xlators/features/marker/src/Makefile.am8
-rw-r--r--xlators/features/marker/src/marker-common.c41
-rw-r--r--xlators/features/marker/src/marker-common.h26
-rw-r--r--xlators/features/marker/src/marker-mem-types.h24
-rw-r--r--xlators/features/marker/src/marker-quota-helper.c172
-rw-r--r--xlators/features/marker/src/marker-quota-helper.h64
-rw-r--r--xlators/features/marker/src/marker-quota.c1921
-rw-r--r--xlators/features/marker/src/marker-quota.h129
-rw-r--r--xlators/features/marker/src/marker.c1598
-rw-r--r--xlators/features/marker/src/marker.h88
-rw-r--r--xlators/features/marker/utils/Makefile.am7
-rwxr-xr-xxlators/features/marker/utils/gsyncd.in7
-rw-r--r--xlators/features/marker/utils/syncdaemon/Makefile.am5
-rw-r--r--xlators/features/marker/utils/syncdaemon/README.md81
-rw-r--r--xlators/features/marker/utils/syncdaemon/__init__.py0
-rw-r--r--xlators/features/marker/utils/syncdaemon/configinterface.py140
-rw-r--r--xlators/features/marker/utils/syncdaemon/gconf.py14
-rw-r--r--xlators/features/marker/utils/syncdaemon/gsyncd.py315
-rw-r--r--xlators/features/marker/utils/syncdaemon/master.py335
-rw-r--r--xlators/features/marker/utils/syncdaemon/repce.py162
-rw-r--r--xlators/features/marker/utils/syncdaemon/resource.py491
-rw-r--r--xlators/features/marker/utils/syncdaemon/syncdutils.py11
-rw-r--r--xlators/features/path-convertor/src/Makefile.am7
-rw-r--r--xlators/features/path-convertor/src/path-mem-types.h20
-rw-r--r--xlators/features/path-convertor/src/path.c29
-rw-r--r--xlators/features/protect/Makefile.am (renamed from xlators/protocol/legacy/client/Makefile.am)0
-rw-r--r--xlators/features/protect/src/Makefile.am21
-rw-r--r--xlators/features/protect/src/prot_client.c215
-rw-r--r--xlators/features/protect/src/prot_dht.c168
-rw-r--r--xlators/features/protect/src/prot_server.c51
-rw-r--r--xlators/features/qemu-block/Makefile.am1
-rw-r--r--xlators/features/qemu-block/src/Makefile.am155
-rw-r--r--xlators/features/qemu-block/src/bdrv-xlator.c397
-rw-r--r--xlators/features/qemu-block/src/bh-syncop.c48
-rw-r--r--xlators/features/qemu-block/src/clock-timer.c60
-rw-r--r--xlators/features/qemu-block/src/coroutine-synctask.c116
-rw-r--r--xlators/features/qemu-block/src/monitor-logging.c50
-rw-r--r--xlators/features/qemu-block/src/qb-coroutines.c662
-rw-r--r--xlators/features/qemu-block/src/qb-coroutines.h30
-rw-r--r--xlators/features/qemu-block/src/qemu-block-memory-types.h25
-rw-r--r--xlators/features/qemu-block/src/qemu-block.c1140
-rw-r--r--xlators/features/qemu-block/src/qemu-block.h109
-rw-r--r--xlators/features/quiesce/src/Makefile.am7
-rw-r--r--xlators/features/quiesce/src/quiesce-mem-types.h20
-rw-r--r--xlators/features/quiesce/src/quiesce.c720
-rw-r--r--xlators/features/quiesce/src/quiesce.h23
-rw-r--r--xlators/features/quota/src/Makefile.am7
-rw-r--r--xlators/features/quota/src/quota-mem-types.h23
-rw-r--r--xlators/features/quota/src/quota.c1897
-rw-r--r--xlators/features/quota/src/quota.h66
-rw-r--r--xlators/features/read-only/src/Makefile.am19
-rw-r--r--xlators/features/read-only/src/read-only-common.c239
-rw-r--r--xlators/features/read-only/src/read-only-common.h115
-rw-r--r--xlators/features/read-only/src/read-only.c243
-rw-r--r--xlators/features/read-only/src/worm.c89
-rw-r--r--xlators/features/trash/src/Makefile.am7
-rw-r--r--xlators/features/trash/src/trash-mem-types.h23
-rw-r--r--xlators/features/trash/src/trash.c113
-rw-r--r--xlators/features/trash/src/trash.h22
-rw-r--r--xlators/lib/src/libxlator.c479
-rw-r--r--xlators/lib/src/libxlator.h104
-rw-r--r--xlators/meta/src/Makefile.am5
-rw-r--r--xlators/meta/src/meta-mem-types.h20
-rw-r--r--xlators/meta/src/meta.c20
-rw-r--r--xlators/meta/src/meta.h20
-rw-r--r--xlators/meta/src/misc.c20
-rw-r--r--xlators/meta/src/misc.h20
-rw-r--r--xlators/meta/src/tree.c22
-rw-r--r--xlators/meta/src/tree.h20
-rw-r--r--xlators/meta/src/view.c20
-rw-r--r--xlators/meta/src/view.h20
-rw-r--r--xlators/mgmt/glusterd/src/Makefile.am57
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-brick-ops.c1953
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-geo-rep.c4236
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handler.c4104
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handshake.c1151
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-hooks.c531
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-hooks.h89
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-locks.c637
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-locks.h51
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-log-ops.c271
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mem-types.h37
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c924
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt.c1893
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt.h45
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mountbroker.c693
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mountbroker.h42
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.c9025
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.h170
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.c169
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.h26
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-quota.c839
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rebalance.c1093
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-replace-brick.c2024
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rpc-ops.c1639
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-sm.c214
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-sm.h61
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot.c5590
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.c2977
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.h152
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-syncop.c1639
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-syncop.h71
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c7255
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.h448
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c3121
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.h161
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-ops.c2225
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c1452
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.c1313
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h724
-rw-r--r--xlators/mount/fuse/src/Makefile.am23
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.c3389
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.h337
-rw-r--r--xlators/mount/fuse/src/fuse-helpers.c489
-rw-r--r--xlators/mount/fuse/src/fuse-mem-types.h23
-rw-r--r--xlators/mount/fuse/src/fuse-resolve.c918
-rwxr-xr-xxlators/mount/fuse/utils/mount.glusterfs.in444
-rwxr-xr-xxlators/mount/fuse/utils/mount_glusterfs.in23
-rw-r--r--xlators/nfs/lib/src/auth-null.c72
-rw-r--r--xlators/nfs/lib/src/auth-unix.c97
-rw-r--r--xlators/nfs/lib/src/msg-nfs3.c554
-rw-r--r--xlators/nfs/lib/src/msg-nfs3.h186
-rw-r--r--xlators/nfs/lib/src/rpc-socket.c361
-rw-r--r--xlators/nfs/lib/src/rpc-socket.h65
-rw-r--r--xlators/nfs/lib/src/rpcsvc-auth.c400
-rw-r--r--xlators/nfs/lib/src/rpcsvc.c2923
-rw-r--r--xlators/nfs/lib/src/rpcsvc.h728
-rw-r--r--xlators/nfs/lib/src/xdr-common.h48
-rw-r--r--xlators/nfs/lib/src/xdr-nfs3.c1897
-rw-r--r--xlators/nfs/lib/src/xdr-nfs3.h1206
-rw-r--r--xlators/nfs/lib/src/xdr-rpc.c229
-rw-r--r--xlators/nfs/lib/src/xdr-rpc.h82
-rw-r--r--xlators/nfs/server/src/Makefile.am25
-rw-r--r--xlators/nfs/server/src/acl3.c708
-rw-r--r--xlators/nfs/server/src/acl3.h31
-rw-r--r--xlators/nfs/server/src/mount3.c1363
-rw-r--r--xlators/nfs/server/src/mount3.h46
-rw-r--r--xlators/nfs/server/src/mount3udp_svc.c189
-rw-r--r--xlators/nfs/server/src/nfs-common.c232
-rw-r--r--xlators/nfs/server/src/nfs-common.h29
-rw-r--r--xlators/nfs/server/src/nfs-fops.c543
-rw-r--r--xlators/nfs/server/src/nfs-fops.h52
-rw-r--r--xlators/nfs/server/src/nfs-generics.c64
-rw-r--r--xlators/nfs/server/src/nfs-generics.h40
-rw-r--r--xlators/nfs/server/src/nfs-inodes.c75
-rw-r--r--xlators/nfs/server/src/nfs-inodes.h21
-rw-r--r--xlators/nfs/server/src/nfs-mem-types.h32
-rw-r--r--xlators/nfs/server/src/nfs.c1114
-rw-r--r--xlators/nfs/server/src/nfs.h56
-rw-r--r--xlators/nfs/server/src/nfs3-fh.c185
-rw-r--r--xlators/nfs/server/src/nfs3-fh.h57
-rw-r--r--xlators/nfs/server/src/nfs3-helpers.c2884
-rw-r--r--xlators/nfs/server/src/nfs3-helpers.h44
-rw-r--r--xlators/nfs/server/src/nfs3.c1656
-rw-r--r--xlators/nfs/server/src/nfs3.h134
-rw-r--r--xlators/nfs/server/src/nlm4.c2525
-rw-r--r--xlators/nfs/server/src/nlm4.h77
-rw-r--r--xlators/nfs/server/src/nlmcbk_svc.c117
-rw-r--r--xlators/performance/Makefile.am2
-rw-r--r--xlators/performance/io-cache/src/Makefile.am8
-rw-r--r--xlators/performance/io-cache/src/io-cache.c2080
-rw-r--r--xlators/performance/io-cache/src/io-cache.h48
-rw-r--r--xlators/performance/io-cache/src/ioc-inode.c46
-rw-r--r--xlators/performance/io-cache/src/ioc-mem-types.h20
-rw-r--r--xlators/performance/io-cache/src/page.c272
-rw-r--r--xlators/performance/io-threads/src/Makefile.am7
-rw-r--r--xlators/performance/io-threads/src/io-threads.c1502
-rw-r--r--xlators/performance/io-threads/src/io-threads.h36
-rw-r--r--xlators/performance/io-threads/src/iot-mem-types.h21
-rw-r--r--xlators/performance/md-cache/Makefile.am1
-rw-r--r--xlators/performance/md-cache/src/Makefile.am25
-rw-r--r--xlators/performance/md-cache/src/md-cache-mem-types.h24
-rw-r--r--xlators/performance/md-cache/src/md-cache.c2303
-rw-r--r--xlators/performance/open-behind/Makefile.am1
-rw-r--r--xlators/performance/open-behind/src/Makefile.am15
-rw-r--r--xlators/performance/open-behind/src/open-behind-mem-types.h21
-rw-r--r--xlators/performance/open-behind/src/open-behind.c1001
-rw-r--r--xlators/performance/quick-read/src/Makefile.am7
-rw-r--r--xlators/performance/quick-read/src/quick-read-mem-types.h22
-rw-r--r--xlators/performance/quick-read/src/quick-read.c3529
-rw-r--r--xlators/performance/quick-read/src/quick-read.h61
-rw-r--r--xlators/performance/read-ahead/src/Makefile.am7
-rw-r--r--xlators/performance/read-ahead/src/page.c66
-rw-r--r--xlators/performance/read-ahead/src/read-ahead-mem-types.h20
-rw-r--r--xlators/performance/read-ahead/src/read-ahead.c585
-rw-r--r--xlators/performance/read-ahead/src/read-ahead.h23
-rw-r--r--xlators/performance/readdir-ahead/Makefile.am3
-rw-r--r--xlators/performance/readdir-ahead/src/Makefile.am15
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h24
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead.c560
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead.h46
-rw-r--r--xlators/performance/stat-prefetch/src/Makefile.am14
-rw-r--r--xlators/performance/stat-prefetch/src/stat-prefetch-mem-types.h36
-rw-r--r--xlators/performance/stat-prefetch/src/stat-prefetch.c4114
-rw-r--r--xlators/performance/stat-prefetch/src/stat-prefetch.h106
-rw-r--r--xlators/performance/symlink-cache/src/Makefile.am7
-rw-r--r--xlators/performance/symlink-cache/src/symlink-cache.c56
-rw-r--r--xlators/performance/write-behind/src/Makefile.am7
-rw-r--r--xlators/performance/write-behind/src/write-behind-mem-types.h23
-rw-r--r--xlators/performance/write-behind/src/write-behind.c3817
-rw-r--r--xlators/playground/Makefile.am2
-rw-r--r--xlators/playground/template/Makefile.am2
-rw-r--r--xlators/playground/template/src/Makefile.am16
-rw-r--r--xlators/playground/template/src/template.c49
-rw-r--r--xlators/playground/template/src/template.h24
-rw-r--r--xlators/protocol/auth/addr/src/Makefile.am11
-rw-r--r--xlators/protocol/auth/addr/src/addr.c52
-rw-r--r--xlators/protocol/auth/login/src/Makefile.am7
-rw-r--r--xlators/protocol/auth/login/src/login.c22
-rw-r--r--xlators/protocol/client/src/Makefile.am13
-rw-r--r--xlators/protocol/client/src/client-callback.c25
-rw-r--r--xlators/protocol/client/src/client-handshake.c1141
-rw-r--r--xlators/protocol/client/src/client-helpers.c162
-rw-r--r--xlators/protocol/client/src/client-lk.c424
-rw-r--r--xlators/protocol/client/src/client-mem-types.h23
-rw-r--r--xlators/protocol/client/src/client-rpc-fops.c6203
-rw-r--r--xlators/protocol/client/src/client.c1085
-rw-r--r--xlators/protocol/client/src/client.h154
-rw-r--r--xlators/protocol/client/src/client3_1-fops.c5442
-rw-r--r--xlators/protocol/legacy/Makefile.am3
-rw-r--r--xlators/protocol/legacy/client/src/Makefile.am21
-rw-r--r--xlators/protocol/legacy/client/src/client-mem-types.h43
-rw-r--r--xlators/protocol/legacy/client/src/client-protocol.c6683
-rw-r--r--xlators/protocol/legacy/client/src/client-protocol.h178
-rw-r--r--xlators/protocol/legacy/client/src/saved-frames.c196
-rw-r--r--xlators/protocol/legacy/client/src/saved-frames.h79
-rw-r--r--xlators/protocol/legacy/lib/src/Makefile.am14
-rw-r--r--xlators/protocol/legacy/lib/src/protocol.c108
-rw-r--r--xlators/protocol/legacy/lib/src/protocol.h1118
-rw-r--r--xlators/protocol/legacy/lib/src/transport.c422
-rw-r--r--xlators/protocol/legacy/lib/src/transport.h106
-rw-r--r--xlators/protocol/legacy/server/src/Makefile.am27
-rw-r--r--xlators/protocol/legacy/server/src/authenticate.c249
-rw-r--r--xlators/protocol/legacy/server/src/authenticate.h60
-rw-r--r--xlators/protocol/legacy/server/src/server-helpers.c622
-rw-r--r--xlators/protocol/legacy/server/src/server-helpers.h48
-rw-r--r--xlators/protocol/legacy/server/src/server-mem-types.h39
-rw-r--r--xlators/protocol/legacy/server/src/server-protocol.c6587
-rw-r--r--xlators/protocol/legacy/server/src/server-protocol.h191
-rw-r--r--xlators/protocol/legacy/server/src/server-resolve.c658
-rw-r--r--xlators/protocol/legacy/transport/Makefile.am3
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/Makefile.am1
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/Makefile.am19
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs-mem-types.h39
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs.c2625
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs.h220
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/name.c712
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/name.h47
-rw-r--r--xlators/protocol/legacy/transport/socket/Makefile.am1
-rw-r--r--xlators/protocol/legacy/transport/socket/src/Makefile.am19
-rw-r--r--xlators/protocol/legacy/transport/socket/src/name.c740
-rw-r--r--xlators/protocol/legacy/transport/socket/src/name.h44
-rw-r--r--xlators/protocol/legacy/transport/socket/src/socket-mem-types.h36
-rw-r--r--xlators/protocol/legacy/transport/socket/src/socket.c1625
-rw-r--r--xlators/protocol/legacy/transport/socket/src/socket.h129
-rw-r--r--xlators/protocol/server/src/Makefile.am20
-rw-r--r--xlators/protocol/server/src/authenticate.c108
-rw-r--r--xlators/protocol/server/src/authenticate.h19
-rw-r--r--xlators/protocol/server/src/server-handshake.c230
-rw-r--r--xlators/protocol/server/src/server-helpers.c1392
-rw-r--r--xlators/protocol/server/src/server-helpers.h68
-rw-r--r--xlators/protocol/server/src/server-mem-types.h20
-rw-r--r--xlators/protocol/server/src/server-resolve.c457
-rw-r--r--xlators/protocol/server/src/server-rpc-fops.c6179
-rw-r--r--xlators/protocol/server/src/server.c887
-rw-r--r--xlators/protocol/server/src/server.h143
-rw-r--r--xlators/protocol/server/src/server3_1-fops.c5197
-rw-r--r--xlators/storage/Makefile.am6
-rw-r--r--xlators/storage/bd/Makefile.am3
-rw-r--r--xlators/storage/bd/src/Makefile.am20
-rw-r--r--xlators/storage/bd/src/bd-aio.c527
-rw-r--r--xlators/storage/bd/src/bd-aio.h41
-rw-r--r--xlators/storage/bd/src/bd-helper.c783
-rw-r--r--xlators/storage/bd/src/bd.c2404
-rw-r--r--xlators/storage/bd/src/bd.h178
-rw-r--r--xlators/storage/bdb/src/Makefile.am18
-rw-r--r--xlators/storage/bdb/src/bctx.c341
-rw-r--r--xlators/storage/bdb/src/bdb-ll.c1464
-rw-r--r--xlators/storage/bdb/src/bdb-mem-types.h42
-rw-r--r--xlators/storage/bdb/src/bdb.c3603
-rw-r--r--xlators/storage/bdb/src/bdb.h530
-rw-r--r--xlators/storage/posix/src/Makefile.am17
-rw-r--r--xlators/storage/posix/src/posix-aio.c569
-rw-r--r--xlators/storage/posix/src/posix-aio.h39
-rw-r--r--xlators/storage/posix/src/posix-handle.c744
-rw-r--r--xlators/storage/posix/src/posix-handle.h143
-rw-r--r--xlators/storage/posix/src/posix-helpers.c1391
-rw-r--r--xlators/storage/posix/src/posix-mem-types.h21
-rw-r--r--xlators/storage/posix/src/posix.c3761
-rw-r--r--xlators/storage/posix/src/posix.h129
-rw-r--r--xlators/system/Makefile.am1
-rw-r--r--xlators/system/posix-acl/Makefile.am1
-rw-r--r--xlators/system/posix-acl/src/Makefile.am23
-rw-r--r--xlators/system/posix-acl/src/posix-acl-xattr.c180
-rw-r--r--xlators/system/posix-acl/src/posix-acl-xattr.h26
-rw-r--r--xlators/system/posix-acl/src/posix-acl.c2183
-rw-r--r--xlators/system/posix-acl/src/posix-acl.h30
469 files changed, 165925 insertions, 111287 deletions
diff --git a/xlators/Makefile.am b/xlators/Makefile.am
index 4c94f5e44..f60fa85ce 100644
--- a/xlators/Makefile.am
+++ b/xlators/Makefile.am
@@ -1,3 +1,4 @@
-SUBDIRS = cluster storage protocol performance debug features encryption mount nfs mgmt
+SUBDIRS = cluster storage protocol performance debug features encryption mount nfs mgmt system \
+ playground
CLEANFILES =
diff --git a/xlators/bindings/python/src/Makefile.am b/xlators/bindings/python/src/Makefile.am
index c0b9141c6..90370d861 100644
--- a/xlators/bindings/python/src/Makefile.am
+++ b/xlators/bindings/python/src/Makefile.am
@@ -9,7 +9,7 @@ pythondir = $(xlatordir)/python
python_so_SOURCES = python.c
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall \
+AM_CFLAGS = -fPIC $(GF_CPPFLAGS) -Wall \
-I$(top_srcdir)/libglusterfs/src -shared -nostartfiles \
$(PYTHON_CPPLAGS) -DGLUSTER_PYTHON_PATH=\"$(pythondir)\"
diff --git a/xlators/bindings/python/src/gluster.py b/xlators/bindings/python/src/gluster.py
index ee0eb1310..337c983ec 100644
--- a/xlators/bindings/python/src/gluster.py
+++ b/xlators/bindings/python/src/gluster.py
@@ -1,19 +1,12 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
+
+# Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
from ctypes import *
from glustertypes import *
from glusterstack import *
diff --git a/xlators/bindings/python/src/glusterstack.py b/xlators/bindings/python/src/glusterstack.py
index ba24c8165..0c071ae98 100644
--- a/xlators/bindings/python/src/glusterstack.py
+++ b/xlators/bindings/python/src/glusterstack.py
@@ -1,19 +1,12 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
+
+# Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
from ctypes import *
from glustertypes import *
diff --git a/xlators/bindings/python/src/glustertypes.py b/xlators/bindings/python/src/glustertypes.py
index e9069d07c..98437d22e 100644
--- a/xlators/bindings/python/src/glustertypes.py
+++ b/xlators/bindings/python/src/glustertypes.py
@@ -1,19 +1,12 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
+
+# Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
from ctypes import *
import collections
diff --git a/xlators/bindings/python/src/python.c b/xlators/bindings/python/src/python.c
index 3310a2115..9b96790de 100644
--- a/xlators/bindings/python/src/python.c
+++ b/xlators/bindings/python/src/python.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2007-2010 Chris AtLee <chris@atlee.ca>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#include <Python.h>
#ifndef _CONFIG_H
@@ -45,7 +35,7 @@ python_writev (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
struct iovec *vector,
- int32_t count,
+ int32_t count,
off_t offset)
{
python_private_t *priv = (python_private_t *)this->private;
@@ -148,7 +138,7 @@ init (xlator_t *this)
Py_InitializeEx(0);
if (!this->children) {
- gf_log ("python", GF_LOG_ERROR,
+ gf_log ("python", GF_LOG_ERROR,
"FATAL: python should have exactly one child");
return -1;
}
@@ -166,7 +156,7 @@ init (xlator_t *this)
}
priv->pInterp = Py_NewInterpreter();
-
+
// Adjust python's path
PyObject *syspath = PySys_GetObject("path");
PyObject *path = PyString_FromString(GLUSTER_PYTHON_PATH);
@@ -188,7 +178,7 @@ init (xlator_t *this)
priv->pVectorType = PyObject_GetAttrString(priv->pGlusterModule, "iovec");
gf_log("python", GF_LOG_DEBUG, "Loading script...%s", priv->scriptname);
-
+
priv->pScriptModule = AnonModule_FromFile(priv->scriptname);
if (!priv->pScriptModule || PyErr_Occurred())
{
@@ -217,7 +207,7 @@ init (xlator_t *this)
return 0;
}
-void
+void
fini (xlator_t *this)
{
python_private_t *priv = (python_private_t*)(this->private);
diff --git a/xlators/bindings/python/src/testxlator.py b/xlators/bindings/python/src/testxlator.py
index 507455c85..59a991dca 100644
--- a/xlators/bindings/python/src/testxlator.py
+++ b/xlators/bindings/python/src/testxlator.py
@@ -1,19 +1,12 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
+"""
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+"""
"""
This is a test translator written in python.
diff --git a/xlators/cluster/afr/src/Makefile.am b/xlators/cluster/afr/src/Makefile.am
index e192b599b..35d18a6c0 100644
--- a/xlators/cluster/afr/src/Makefile.am
+++ b/xlators/cluster/afr/src/Makefile.am
@@ -1,21 +1,31 @@
xlator_LTLIBRARIES = afr.la pump.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
-afr_common_source = afr-dir-read.c afr-dir-write.c afr-inode-read.c afr-inode-write.c afr-open.c afr-transaction.c afr-self-heal-data.c afr-self-heal-common.c afr-self-heal-metadata.c afr-self-heal-entry.c afr-self-heal-algorithm.c afr-lk-common.c $(top_builddir)/xlators/lib/src/libxlator.c
+afr_common_source = afr-dir-read.c afr-dir-write.c afr-inode-read.c \
+ afr-inode-write.c afr-open.c afr-transaction.c afr-self-heal-data.c \
+ afr-self-heal-common.c afr-self-heal-metadata.c afr-self-heal-entry.c \
+ afr-self-heal-algorithm.c afr-lk-common.c afr-self-heald.c \
+ $(top_builddir)/xlators/lib/src/libxlator.c
-afr_la_LDFLAGS = -module -avoidversion
+afr_la_LDFLAGS = -module -avoid-version
afr_la_SOURCES = $(afr_common_source) afr.c
afr_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-pump_la_LDFLAGS = -module -avoidversion
+pump_la_LDFLAGS = -module -avoid-version
pump_la_SOURCES = $(afr_common_source) pump.c
pump_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h afr-dir-read.h afr-dir-write.h afr-self-heal.h afr-self-heal-common.h afr-self-heal-algorithm.h pump.h afr-mem-types.h afr-common.c $(top_builddir)/xlators/lib/src/libxlator.h
+noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h \
+ afr-dir-read.h afr-dir-write.h afr-self-heal.h afr-self-heal-common.h \
+ afr-self-heal-algorithm.h pump.h afr-mem-types.h afr-common.c \
+ afr-self-heald.h $(top_builddir)/xlators/lib/src/libxlator.h \
+ $(top_builddir)/glusterfsd/src/glusterfsd.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/contrib/md5 -shared -nostartfiles $(GF_CFLAGS) \
- -I$(top_srcdir)/xlators/lib/src
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
+ -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/xlators/lib/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index c432cc49b..af01f2ef2 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include <libgen.h>
@@ -44,6 +35,7 @@
#include "compat.h"
#include "byte-order.h"
#include "statedump.h"
+#include "inode.h"
#include "fd.h"
@@ -54,301 +46,827 @@
#include "afr-transaction.h"
#include "afr-self-heal.h"
#include "afr-self-heal-common.h"
+#include "afr-self-heald.h"
#include "pump.h"
-#define AFR_ICTX_OPENDIR_DONE_MASK 0x0000000200000000ULL
-#define AFR_ICTX_SPLIT_BRAIN_MASK 0x0000000100000000ULL
+#define AFR_ICTX_OPENDIR_DONE_MASK 0x0000000100000000ULL
#define AFR_ICTX_READ_CHILD_MASK 0x00000000FFFFFFFFULL
+#define AFR_STATISTICS_HISTORY_SIZE 50
+int
+afr_lookup_done_success_action (call_frame_t *frame, xlator_t *this,
+ gf_boolean_t fail_conflict);
+void
+afr_children_copy (int32_t *dst, int32_t *src, unsigned int child_count)
+{
+ int i = 0;
+
+ for (i = 0; i < child_count; i++)
+ dst[i] = src[i];
+}
+
+void
+afr_xattr_req_prepare (xlator_t *this, dict_t *xattr_req, const char *path)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ int ret = 0;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ ret = dict_set_uint64 (xattr_req, priv->pending_key[i],
+ 3 * sizeof(int32_t));
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: Unable to set dict value for %s",
+ path, priv->pending_key[i]);
+ /* 3 = data+metadata+entry */
+ }
+ ret = dict_set_int32 (xattr_req, GF_GFIDLESS_LOOKUP, 1);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "%s: failed to set gfidless "
+ "lookup", path);
+ }
+}
+
+int
+afr_lookup_xattr_req_prepare (afr_local_t *local, xlator_t *this,
+ dict_t *xattr_req, loc_t *loc, void **gfid_req)
+{
+ int ret = -ENOMEM;
+
+ GF_ASSERT (gfid_req);
+
+ *gfid_req = NULL;
+ local->xattr_req = dict_new ();
+ if (!local->xattr_req)
+ goto out;
+ if (xattr_req)
+ dict_copy (xattr_req, local->xattr_req);
+
+ afr_xattr_req_prepare (this, local->xattr_req, loc->path);
+ ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_INODELK_COUNT, 0);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: Unable to set dict value for %s",
+ loc->path, GLUSTERFS_INODELK_COUNT);
+ }
+ ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_ENTRYLK_COUNT, 0);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: Unable to set dict value for %s",
+ loc->path, GLUSTERFS_ENTRYLK_COUNT);
+ }
+
+ ret = dict_set_uint32 (local->xattr_req, GLUSTERFS_PARENT_ENTRYLK, 0);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: Unable to set dict value for %s",
+ loc->path, GLUSTERFS_PARENT_ENTRYLK);
+ }
+
+ ret = dict_get_ptr (local->xattr_req, "gfid-req", gfid_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s: failed to get the gfid from dict", loc->path);
+ *gfid_req = NULL;
+ } else {
+ if (loc->parent != NULL)
+ dict_del (local->xattr_req, "gfid-req");
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+void
+afr_lookup_save_gfid (uuid_t dst, void* new, const loc_t *loc)
+{
+ inode_t *inode = NULL;
+
+ inode = loc->inode;
+ if (inode && !uuid_is_null (inode->gfid))
+ uuid_copy (dst, inode->gfid);
+ else if (!uuid_is_null (loc->gfid))
+ uuid_copy (dst, loc->gfid);
+ else if (new && !uuid_is_null (new))
+ uuid_copy (dst, new);
+}
+
+int
+afr_errno_count (int32_t *children, int *child_errno,
+ unsigned int child_count, int32_t op_errno)
+{
+ int i = 0;
+ int errno_count = 0;
+ int child = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (children) {
+ child = children[i];
+ if (child == -1)
+ break;
+ } else {
+ child = i;
+ }
+ if (child_errno[child] == op_errno)
+ errno_count++;
+ }
+ return errno_count;
+}
int32_t
afr_set_dict_gfid (dict_t *dict, uuid_t gfid)
{
- int ret = 0;
+ int ret = 0;
+ uuid_t *pgfid = NULL;
GF_ASSERT (gfid);
- ret = dict_set_static_bin (dict, "gfid-req", gfid, 16);
+ pgfid = GF_CALLOC (1, sizeof (uuid_t), gf_common_mt_char);
+ if (!pgfid) {
+ ret = -1;
+ goto out;
+ }
+
+ uuid_copy (*pgfid, gfid);
+
+ ret = dict_set_dynptr (dict, "gfid-req", pgfid, sizeof (uuid_t));
if (ret)
- gf_log (THIS->name, GF_LOG_DEBUG, "gfid set failed");
+ gf_log (THIS->name, GF_LOG_ERROR, "gfid set failed");
+
+out:
+ if (ret && pgfid)
+ GF_FREE (pgfid);
return ret;
}
-uint64_t
-afr_is_split_brain (xlator_t *this, inode_t *inode)
+void
+afr_inode_ctx_destroy (afr_inode_ctx_t *ctx)
{
- int ret = 0;
+ if (!ctx)
+ return;
+ GF_FREE (ctx->fresh_children);
+ GF_FREE (ctx);
+}
+
+afr_inode_ctx_t*
+__afr_inode_ctx_get (inode_t *inode, xlator_t *this)
+{
+ int ret = 0;
+ uint64_t ctx_addr = 0;
+ afr_inode_ctx_t *ctx = NULL;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ ret = __inode_ctx_get (inode, this, &ctx_addr);
+ if (ret < 0)
+ ctx_addr = 0;
+ if (ctx_addr != 0) {
+ ctx = (afr_inode_ctx_t*) (long) ctx_addr;
+ goto out;
+ }
+ ctx = GF_CALLOC (1, sizeof (*ctx),
+ gf_afr_mt_inode_ctx_t);
+ if (!ctx)
+ goto fail;
+ ctx->fresh_children = GF_CALLOC (priv->child_count,
+ sizeof (*ctx->fresh_children),
+ gf_afr_mt_int32_t);
+ if (!ctx->fresh_children)
+ goto fail;
+ ret = __inode_ctx_put (inode, this, (uint64_t)ctx);
+ if (ret) {
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "failed to "
+ "set the inode ctx (%s)",
+ uuid_utoa (inode->gfid));
+ goto fail;
+ }
- uint64_t ctx = 0;
- uint64_t split_brain = 0;
+out:
+ return ctx;
- VALIDATE_OR_GOTO (inode, out);
+fail:
+ afr_inode_ctx_destroy (ctx);
+ return NULL;
+}
+
+afr_inode_ctx_t*
+afr_inode_ctx_get (inode_t *inode, xlator_t *this)
+{
+ afr_inode_ctx_t *ctx = NULL;
LOCK (&inode->lock);
{
- ret = __inode_ctx_get (inode, this, &ctx);
+ ctx = __afr_inode_ctx_get (inode, this);
+ }
+ UNLOCK (&inode->lock);
+ return ctx;
+}
- if (ret < 0)
- goto unlock;
+void
+afr_inode_get_ctx_params (xlator_t *this, inode_t *inode,
+ afr_inode_params_t *params)
+{
+ GF_ASSERT (inode);
+ GF_ASSERT (params);
+
+ afr_inode_ctx_t *ctx = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int32_t read_child = -1;
+ int32_t *fresh_children = NULL;
- split_brain = ctx & AFR_ICTX_SPLIT_BRAIN_MASK;
+ priv = this->private;
+ LOCK (&inode->lock);
+ {
+ ctx = __afr_inode_ctx_get (inode, this);
+ if (!ctx)
+ goto unlock;
+ switch (params->op) {
+ case AFR_INODE_GET_READ_CTX:
+ fresh_children = params->u.read_ctx.children;
+ read_child = (int32_t)(ctx->masks &
+ AFR_ICTX_READ_CHILD_MASK);
+ params->u.read_ctx.read_child = read_child;
+ if (!fresh_children)
+ goto unlock;
+ for (i = 0; i < priv->child_count; i++)
+ fresh_children[i] = ctx->fresh_children[i];
+ break;
+ case AFR_INODE_GET_OPENDIR_DONE:
+ params->u.value = _gf_false;
+ if (ctx->masks & AFR_ICTX_OPENDIR_DONE_MASK)
+ params->u.value = _gf_true;
+ break;
+ default:
+ GF_ASSERT (0);
+ break;
+ }
}
unlock:
UNLOCK (&inode->lock);
+}
+
+gf_boolean_t
+afr_is_split_brain (xlator_t *this, inode_t *inode)
+{
+ afr_inode_ctx_t *ctx = NULL;
+ gf_boolean_t spb = _gf_false;
+ ctx = afr_inode_ctx_get (inode, this);
+ if (!ctx)
+ goto out;
+ if ((ctx->mdata_spb == SPB) || (ctx->data_spb == SPB))
+ spb = _gf_true;
out:
- return split_brain;
+ return spb;
}
+gf_boolean_t
+afr_is_opendir_done (xlator_t *this, inode_t *inode)
+{
+ afr_inode_params_t params = {0};
-void
-afr_set_split_brain (xlator_t *this, inode_t *inode, gf_boolean_t set)
+ params.op = AFR_INODE_GET_OPENDIR_DONE;
+ afr_inode_get_ctx_params (this, inode, &params);
+ return params.u.value;
+}
+
+int32_t
+afr_inode_get_read_ctx (xlator_t *this, inode_t *inode, int32_t *fresh_children)
{
- uint64_t ctx = 0;
- int ret = 0;
+ afr_inode_params_t params = {0};
- VALIDATE_OR_GOTO (inode, out);
+ params.op = AFR_INODE_GET_READ_CTX;
+ params.u.read_ctx.children = fresh_children;
+ afr_inode_get_ctx_params (this, inode, &params);
+ return params.u.read_ctx.read_child;
+}
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &ctx);
+void
+afr_inode_ctx_set_read_child (afr_inode_ctx_t *ctx, int32_t read_child)
+{
+ uint64_t remaining_mask = 0;
+ uint64_t mask = 0;
- if (ret < 0) {
- ctx = 0;
- }
+ remaining_mask = (~AFR_ICTX_READ_CHILD_MASK & ctx->masks);
+ mask = (AFR_ICTX_READ_CHILD_MASK & read_child);
+ ctx->masks = remaining_mask | mask;
+}
- if (set) {
- ctx = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx)
- | (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_SPLIT_BRAIN_MASK);
- } else {
- ctx = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx);
- }
- __inode_ctx_put (inode, this, ctx);
+void
+afr_inode_ctx_set_read_ctx (afr_inode_ctx_t *ctx, int32_t read_child,
+ int32_t *fresh_children, int32_t child_count)
+{
+ int i = 0;
+
+ afr_inode_ctx_set_read_child (ctx, read_child);
+ for (i = 0; i < child_count; i++) {
+ if (fresh_children)
+ ctx->fresh_children[i] = fresh_children[i];
+ else
+ ctx->fresh_children[i] = -1;
}
- UNLOCK (&inode->lock);
-out:
- return;
}
+void
+afr_inode_ctx_rm_stale_children (afr_inode_ctx_t *ctx, int32_t *stale_children,
+ int32_t child_count)
+{
+ int i = 0;
+ int32_t read_child = -1;
-uint64_t
-afr_is_opendir_done (xlator_t *this, inode_t *inode)
+ GF_ASSERT (stale_children);
+ for (i = 0; i < child_count; i++) {
+ if (stale_children[i] == -1)
+ break;
+ afr_children_rm_child (ctx->fresh_children,
+ stale_children[i], child_count);
+ }
+ read_child = (int32_t)(ctx->masks & AFR_ICTX_READ_CHILD_MASK);
+ if (!afr_is_child_present (ctx->fresh_children, child_count,
+ read_child))
+ afr_inode_ctx_set_read_child (ctx, ctx->fresh_children[0]);
+}
+
+void
+afr_inode_ctx_set_opendir_done (afr_inode_ctx_t *ctx)
{
- int ret = 0;
+ uint64_t remaining_mask = 0;
+ uint64_t mask = 0;
+
+ remaining_mask = (~AFR_ICTX_OPENDIR_DONE_MASK & ctx->masks);
+ mask = (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_OPENDIR_DONE_MASK);
+ ctx->masks = remaining_mask | mask;
+}
- uint64_t ctx = 0;
- uint64_t opendir_done = 0;
+void
+afr_inode_set_ctx_params (xlator_t *this, inode_t *inode,
+ afr_inode_params_t *params)
+{
+ GF_ASSERT (inode);
+ GF_ASSERT (params);
- VALIDATE_OR_GOTO (inode, out);
+ afr_inode_ctx_t *ctx = NULL;
+ afr_private_t *priv = NULL;
+ int32_t read_child = -1;
+ int32_t *fresh_children = NULL;
+ int32_t *stale_children = NULL;
+ priv = this->private;
LOCK (&inode->lock);
{
- ret = __inode_ctx_get (inode, this, &ctx);
-
- if (ret < 0)
+ ctx = __afr_inode_ctx_get (inode, this);
+ if (!ctx)
goto unlock;
-
- opendir_done = ctx & AFR_ICTX_OPENDIR_DONE_MASK;
+ switch (params->op) {
+ case AFR_INODE_SET_READ_CTX:
+ read_child = params->u.read_ctx.read_child;
+ fresh_children = params->u.read_ctx.children;
+ afr_inode_ctx_set_read_ctx (ctx, read_child,
+ fresh_children,
+ priv->child_count);
+ break;
+ case AFR_INODE_RM_STALE_CHILDREN:
+ stale_children = params->u.read_ctx.children;
+ afr_inode_ctx_rm_stale_children (ctx,
+ stale_children,
+ priv->child_count);
+ break;
+ case AFR_INODE_SET_OPENDIR_DONE:
+ afr_inode_ctx_set_opendir_done (ctx);
+ break;
+ default:
+ GF_ASSERT (0);
+ break;
+ }
}
unlock:
UNLOCK (&inode->lock);
-
-out:
- return opendir_done;
}
+void
+afr_set_split_brain (xlator_t *this, inode_t *inode, afr_spb_state_t mdata_spb,
+ afr_spb_state_t data_spb)
+{
+ afr_inode_ctx_t *ctx = NULL;
+
+ ctx = afr_inode_ctx_get (inode, this);
+ if (mdata_spb != DONT_KNOW)
+ ctx->mdata_spb = mdata_spb;
+ if (data_spb != DONT_KNOW)
+ ctx->data_spb = data_spb;
+}
void
afr_set_opendir_done (xlator_t *this, inode_t *inode)
{
- uint64_t ctx = 0;
- int ret = 0;
+ afr_inode_params_t params = {0};
- VALIDATE_OR_GOTO (inode, out);
+ params.op = AFR_INODE_SET_OPENDIR_DONE;
+ afr_inode_set_ctx_params (this, inode, &params);
+}
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &ctx);
+void
+afr_inode_set_read_ctx (xlator_t *this, inode_t *inode, int32_t read_child,
+ int32_t *fresh_children)
+{
+ afr_inode_params_t params = {0};
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ GF_ASSERT (read_child >= 0);
+ GF_ASSERT (fresh_children);
+ GF_ASSERT (afr_is_child_present (fresh_children, priv->child_count,
+ read_child));
+
+ params.op = AFR_INODE_SET_READ_CTX;
+ params.u.read_ctx.read_child = read_child;
+ params.u.read_ctx.children = fresh_children;
+ afr_inode_set_ctx_params (this, inode, &params);
+}
+
+void
+afr_inode_rm_stale_children (xlator_t *this, inode_t *inode,
+ int32_t *stale_children)
+{
+ afr_inode_params_t params = {0};
+
+ GF_ASSERT (stale_children);
+
+ params.op = AFR_INODE_RM_STALE_CHILDREN;
+ params.u.read_ctx.children = stale_children;
+ afr_inode_set_ctx_params (this, inode, &params);
+}
+
+gf_boolean_t
+afr_is_source_child (int32_t *sources, int32_t child_count, int32_t child)
+{
+ gf_boolean_t source_xattrs = _gf_false;
+
+ GF_ASSERT (child < child_count);
+
+ if ((child >= 0) && (child < child_count) &&
+ sources[child]) {
+ source_xattrs = _gf_true;
+ }
+ return source_xattrs;
+}
+
+gf_boolean_t
+afr_is_child_present (int32_t *success_children, int32_t child_count,
+ int32_t child)
+{
+ gf_boolean_t success_child = _gf_false;
+ int i = 0;
+
+ GF_ASSERT (child < child_count);
- if (ret < 0) {
- ctx = 0;
+ for (i = 0; i < child_count; i++) {
+ if (success_children[i] == -1)
+ break;
+ if (child == success_children[i]) {
+ success_child = _gf_true;
+ break;
}
+ }
+ return success_child;
+}
- ctx = (~AFR_ICTX_OPENDIR_DONE_MASK & ctx)
- | (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_OPENDIR_DONE_MASK);
+gf_boolean_t
+afr_is_read_child (int32_t *success_children, int32_t *sources,
+ int32_t child_count, int32_t child)
+{
+ gf_boolean_t success_child = _gf_false;
+ gf_boolean_t source = _gf_false;
- __inode_ctx_put (inode, this, ctx);
+ if (child < 0) {
+ return _gf_false;
}
- UNLOCK (&inode->lock);
+
+ GF_ASSERT (success_children);
+ GF_ASSERT (child_count > 0);
+
+ success_child = afr_is_child_present (success_children, child_count,
+ child);
+ if (!success_child)
+ goto out;
+ if (NULL == sources) {
+ source = _gf_true;
+ goto out;
+ }
+ source = afr_is_source_child (sources, child_count, child);
out:
- return;
+ return (success_child && source);
}
+int32_t
+afr_hash_child (int32_t *success_children, int32_t child_count,
+ unsigned int hmode, uuid_t gfid)
+{
+ uuid_t gfid_copy = {0,};
+ pid_t pid;
+
+ if (!hmode) {
+ return -1;
+ }
+
+ if (gfid) {
+ uuid_copy(gfid_copy,gfid);
+ }
+ if (hmode > 1) {
+ /*
+ * Why getpid? Because it's one of the cheapest calls
+ * available - faster than gethostname etc. - and returns a
+ * constant-length value that's sure to be shorter than a UUID.
+ * It's still very unlikely to be the same across clients, so
+ * it still provides good mixing. We're not trying for
+ * perfection here. All we need is a low probability that
+ * multiple clients won't converge on the same subvolume.
+ */
+ pid = getpid();
+ memcpy (gfid_copy, &pid, sizeof(pid));
+ }
+
+ return SuperFastHash((char *)gfid_copy,
+ sizeof(gfid_copy)) % child_count;
+}
-uint64_t
-afr_read_child (xlator_t *this, inode_t *inode)
+/* If sources is NULL the xattrs are assumed to be of source for all
+ * success_children.
+ */
+int
+afr_select_read_child_from_policy (int32_t *success_children,
+ int32_t child_count, int32_t prev_read_child,
+ int32_t config_read_child, int32_t *sources,
+ unsigned int hmode, uuid_t gfid)
{
- int ret = 0;
+ int32_t read_child = -1;
+ int i = 0;
- uint64_t ctx = 0;
- uint64_t read_child = 0;
+ GF_ASSERT (success_children);
- VALIDATE_OR_GOTO (inode, out);
+ read_child = config_read_child;
+ if (afr_is_read_child (success_children, sources, child_count,
+ read_child))
+ goto out;
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &ctx);
+ read_child = prev_read_child;
+ if (afr_is_read_child (success_children, sources, child_count,
+ read_child))
+ goto out;
- if (ret < 0)
- goto unlock;
+ read_child = afr_hash_child (success_children, child_count,
+ hmode, gfid);
+ if (afr_is_read_child (success_children, sources, child_count,
+ read_child)) {
+ goto out;
+ }
- read_child = ctx & AFR_ICTX_READ_CHILD_MASK;
+ for (i = 0; i < child_count; i++) {
+ read_child = success_children[i];
+ if (read_child < 0)
+ break;
+ if (afr_is_read_child (success_children, sources, child_count,
+ read_child))
+ goto out;
}
-unlock:
- UNLOCK (&inode->lock);
+ read_child = -1;
out:
return read_child;
}
-
+/* This function should be used when all the success_children are sources
+ */
void
-afr_set_read_child (xlator_t *this, inode_t *inode, int32_t read_child)
+afr_set_read_ctx_from_policy (xlator_t *this, inode_t *inode,
+ int32_t *fresh_children, int32_t prev_read_child,
+ int32_t config_read_child, uuid_t gfid)
{
- uint64_t ctx = 0;
- int ret = 0;
+ int read_child = -1;
+ afr_private_t *priv = NULL;
- VALIDATE_OR_GOTO (inode, out);
+ priv = this->private;
+ read_child = afr_select_read_child_from_policy (fresh_children,
+ priv->child_count,
+ prev_read_child,
+ config_read_child,
+ NULL,
+ priv->hash_mode, gfid);
+ if (read_child >= 0)
+ afr_inode_set_read_ctx (this, inode, read_child,
+ fresh_children);
+}
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &ctx);
+/* afr_next_call_child ()
+ * This is a common function used by all the read-type fops
+ * This function should not be called with the inode's read_children array.
+ * The fop's handler should make a copy of the inode's read_children,
+ * preferred read_child into the local vars, because while this function is
+ * in execution there is a chance for inode's read_ctx to change.
+ */
+int32_t
+afr_next_call_child (int32_t *fresh_children, unsigned char *child_up,
+ size_t child_count, int32_t *last_index,
+ int32_t read_child)
+{
+ int next_index = 0;
+ int32_t next_call_child = -1;
+
+ GF_ASSERT (last_index);
+
+ next_index = *last_index;
+retry:
+ next_index++;
+ if ((next_index >= child_count) ||
+ (fresh_children[next_index] == -1))
+ goto out;
+ if ((fresh_children[next_index] == read_child) ||
+ (!child_up[fresh_children[next_index]]))
+ goto retry;
+ *last_index = next_index;
+ next_call_child = fresh_children[next_index];
+out:
+ return next_call_child;
+}
- if (ret < 0) {
- ctx = 0;
+ /* This function should not be called with the inode's read_children array.
+ * The fop's handler should make a copy of the inode's read_children,
+ * preferred read_child into the local vars, because while this function is
+ * in execution there is a chance for inode's read_ctx to change.
+ */
+int32_t
+afr_get_call_child (xlator_t *this, unsigned char *child_up, int32_t read_child,
+ int32_t *fresh_children,
+ int32_t *call_child, int32_t *last_index)
+{
+ int ret = 0;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ GF_ASSERT (child_up);
+ GF_ASSERT (call_child);
+ GF_ASSERT (last_index);
+ GF_ASSERT (fresh_children);
+
+ if (read_child < 0) {
+ ret = -EIO;
+ goto out;
+ }
+ priv = this->private;
+ *call_child = -1;
+ *last_index = -1;
+
+ if (child_up[read_child]) {
+ *call_child = read_child;
+ } else {
+ for (i = 0; i < priv->child_count; i++) {
+ if (fresh_children[i] == -1)
+ break;
+ if (child_up[fresh_children[i]]) {
+ *call_child = fresh_children[i];
+ ret = 0;
+ break;
+ }
}
- ctx = (~AFR_ICTX_READ_CHILD_MASK & ctx)
- | (AFR_ICTX_READ_CHILD_MASK & read_child);
+ if (*call_child == -1) {
+ ret = -ENOTCONN;
+ goto out;
+ }
- __inode_ctx_put (inode, this, ctx);
+ *last_index = i;
}
- UNLOCK (&inode->lock);
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d, call_child: %d, "
+ "last_index: %d", ret, *call_child, *last_index);
+ return ret;
+}
+void
+afr_reset_xattr (dict_t **xattr, unsigned int child_count)
+{
+ unsigned int i = 0;
+
+ if (!xattr)
+ goto out;
+ for (i = 0; i < child_count; i++) {
+ if (xattr[i]) {
+ dict_unref (xattr[i]);
+ xattr[i] = NULL;
+ }
+ }
out:
return;
}
-
-/**
- * afr_local_cleanup - cleanup everything in frame->local
- */
+void
+afr_xattr_array_destroy (dict_t **xattr, unsigned int child_count)
+{
+ afr_reset_xattr (xattr, child_count);
+ GF_FREE (xattr);
+}
void
afr_local_sh_cleanup (afr_local_t *local, xlator_t *this)
{
afr_self_heal_t *sh = NULL;
afr_private_t *priv = NULL;
- int i = 0;
-
sh = &local->self_heal;
priv = this->private;
- if (sh->buf)
- GF_FREE (sh->buf);
+ if (sh->data_sh_info && strcmp (sh->data_sh_info, ""))
+ GF_FREE (sh->data_sh_info);
- if (sh->xattr) {
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i]) {
- dict_unref (sh->xattr[i]);
- sh->xattr[i] = NULL;
- }
- }
- GF_FREE (sh->xattr);
- }
+ if (sh->metadata_sh_info && strcmp (sh->metadata_sh_info, ""))
+ GF_FREE (sh->metadata_sh_info);
- if (sh->child_errno)
- GF_FREE (sh->child_errno);
+ GF_FREE (sh->buf);
- if (sh->pending_matrix) {
- for (i = 0; i < priv->child_count; i++) {
- GF_FREE (sh->pending_matrix[i]);
- }
- GF_FREE (sh->pending_matrix);
- }
+ GF_FREE (sh->parentbufs);
- if (sh->delta_matrix) {
- for (i = 0; i < priv->child_count; i++) {
- GF_FREE (sh->delta_matrix[i]);
- }
- GF_FREE (sh->delta_matrix);
- }
+ if (sh->inode)
+ inode_unref (sh->inode);
+
+ afr_xattr_array_destroy (sh->xattr, priv->child_count);
- if (sh->sources)
- GF_FREE (sh->sources);
+ GF_FREE (sh->child_errno);
- if (sh->success)
- GF_FREE (sh->success);
+ afr_matrix_cleanup (sh->pending_matrix, priv->child_count);
+ afr_matrix_cleanup (sh->delta_matrix, priv->child_count);
- if (sh->locked_nodes)
- GF_FREE (sh->locked_nodes);
+ GF_FREE (sh->sources);
- if (sh->healing_fd && !sh->healing_fd_opened) {
+ GF_FREE (sh->success);
+
+ GF_FREE (sh->locked_nodes);
+
+ if (sh->healing_fd) {
fd_unref (sh->healing_fd);
sh->healing_fd = NULL;
}
- if (sh->linkname)
- GF_FREE ((char *)sh->linkname);
+ GF_FREE ((char *)sh->linkname);
+
+ GF_FREE (sh->success_children);
+
+ GF_FREE (sh->fresh_children);
+
+ GF_FREE (sh->fresh_parent_dirs);
loc_wipe (&sh->parent_loc);
+ loc_wipe (&sh->lookup_loc);
+
+ GF_FREE (sh->checksum);
+
+ GF_FREE (sh->write_needed);
+ if (sh->healing_fd)
+ fd_unref (sh->healing_fd);
}
void
afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this)
{
- int i = 0;
- afr_private_t * priv = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
priv = this->private;
- for (i = 0; i < priv->child_count; i++) {
- if (local->pending && local->pending[i])
- GF_FREE (local->pending[i]);
- }
-
- GF_FREE (local->pending);
-
- if (local->internal_lock.locked_nodes)
- GF_FREE (local->internal_lock.locked_nodes);
+ afr_matrix_cleanup (local->pending, priv->child_count);
+ afr_matrix_cleanup (local->transaction.txn_changelog,
+ priv->child_count);
- if (local->internal_lock.inode_locked_nodes)
- GF_FREE (local->internal_lock.inode_locked_nodes);
+ GF_FREE (local->internal_lock.locked_nodes);
- if (local->internal_lock.entry_locked_nodes)
- GF_FREE (local->internal_lock.entry_locked_nodes);
+ for (i = 0; local->internal_lock.inodelk[i].domain; i++) {
+ GF_FREE (local->internal_lock.inodelk[i].locked_nodes);
+ }
- if (local->internal_lock.lower_locked_nodes)
- GF_FREE (local->internal_lock.lower_locked_nodes);
+ GF_FREE (local->internal_lock.lower_locked_nodes);
+ afr_entry_lockee_cleanup (&local->internal_lock);
- GF_FREE (local->transaction.child_errno);
- GF_FREE (local->child_errno);
+ GF_FREE (local->transaction.pre_op);
+ GF_FREE (local->transaction.eager_lock);
GF_FREE (local->transaction.basename);
GF_FREE (local->transaction.new_basename);
loc_wipe (&local->transaction.parent_loc);
loc_wipe (&local->transaction.new_parent_loc);
+
+ GF_FREE (local->transaction.postop_piggybacked);
}
void
afr_local_cleanup (afr_local_t *local, xlator_t *this)
{
- int i;
afr_private_t * priv = NULL;
if (!local)
@@ -369,16 +887,21 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this)
if (local->xattr_req)
dict_unref (local->xattr_req);
+ if (local->dict)
+ dict_unref (local->dict);
+
+ GF_FREE(local->replies);
+
GF_FREE (local->child_up);
+ GF_FREE (local->child_errno);
+
+ GF_FREE (local->fresh_children);
+
{ /* lookup */
if (local->cont.lookup.xattrs) {
- for (i = 0; i < priv->child_count; i++) {
- if (local->cont.lookup.xattrs[i]) {
- dict_unref (local->cont.lookup.xattrs[i]);
- local->cont.lookup.xattrs[i] = NULL;
- }
- }
+ afr_reset_xattr (local->cont.lookup.xattrs,
+ priv->child_count);
GF_FREE (local->cont.lookup.xattrs);
local->cont.lookup.xattrs = NULL;
}
@@ -390,16 +913,24 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this)
if (local->cont.lookup.inode) {
inode_unref (local->cont.lookup.inode);
}
+
+ GF_FREE (local->cont.lookup.postparents);
+
+ GF_FREE (local->cont.lookup.bufs);
+
+ GF_FREE (local->cont.lookup.success_children);
+
+ GF_FREE (local->cont.lookup.sources);
+ afr_matrix_cleanup (local->cont.lookup.pending_matrix,
+ priv->child_count);
}
{ /* getxattr */
- if (local->cont.getxattr.name)
- GF_FREE (local->cont.getxattr.name);
+ GF_FREE (local->cont.getxattr.name);
}
{ /* lk */
- if (local->cont.lk.locked_nodes)
- GF_FREE (local->cont.lk.locked_nodes);
+ GF_FREE (local->cont.lk.locked_nodes);
}
{ /* create */
@@ -433,18 +964,40 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this)
dict_unref (local->cont.setxattr.dict);
}
+ { /* fsetxattr */
+ if (local->cont.fsetxattr.dict)
+ dict_unref (local->cont.fsetxattr.dict);
+ }
+
{ /* removexattr */
GF_FREE (local->cont.removexattr.name);
}
-
+ { /* xattrop */
+ if (local->cont.xattrop.xattr)
+ dict_unref (local->cont.xattrop.xattr);
+ }
+ { /* fxattrop */
+ if (local->cont.fxattrop.xattr)
+ dict_unref (local->cont.fxattrop.xattr);
+ }
{ /* symlink */
GF_FREE (local->cont.symlink.linkpath);
}
{ /* opendir */
- if (local->cont.opendir.checksum)
- GF_FREE (local->cont.opendir.checksum);
+ GF_FREE (local->cont.opendir.checksum);
}
+
+ { /* readdirp */
+ if (local->cont.readdir.dict)
+ dict_unref (local->cont.readdir.dict);
+ }
+
+ if (local->xdata_req)
+ dict_unref (local->xdata_req);
+
+ if (local->xdata_rsp)
+ dict_unref (local->xdata_rsp);
}
@@ -465,107 +1018,227 @@ afr_frame_return (call_frame_t *frame)
return call_count;
}
-
-/**
- * up_children_count - return the number of children that are up
- */
-
int
-afr_up_children_count (int child_count, unsigned char *child_up)
+afr_set_elem_count_get (unsigned char *elems, int child_count)
{
int i = 0;
int ret = 0;
for (i = 0; i < child_count; i++)
- if (child_up[i])
+ if (elems[i])
ret++;
return ret;
}
+/**
+ * up_children_count - return the number of children that are up
+ */
-ino64_t
-afr_itransform (ino64_t ino, int child_count, int child_index)
+unsigned int
+afr_up_children_count (unsigned char *child_up, unsigned int child_count)
{
- ino64_t scaled_ino = -1;
-
- if (ino == ((uint64_t) -1)) {
- scaled_ino = ((uint64_t) -1);
- goto out;
- }
-
- scaled_ino = (ino * child_count) + child_index;
+ return afr_set_elem_count_get (child_up, child_count);
+}
-out:
- return scaled_ino;
+unsigned int
+afr_locked_children_count (unsigned char *children, unsigned int child_count)
+{
+ return afr_set_elem_count_get (children, child_count);
}
+unsigned int
+afr_pre_op_done_children_count (unsigned char *pre_op,
+ unsigned int child_count)
+{
+ return afr_set_elem_count_get (pre_op, child_count);
+}
-int
-afr_deitransform_orig (ino64_t ino, int child_count)
+gf_boolean_t
+afr_is_fresh_lookup (loc_t *loc, xlator_t *this)
{
- int index = -1;
+ uint64_t ctx = 0;
+ int32_t ret = 0;
- index = ino % child_count;
+ GF_ASSERT (loc);
+ GF_ASSERT (this);
+ GF_ASSERT (loc->inode);
- return index;
+ ret = inode_ctx_get (loc->inode, this, &ctx);
+ if (0 == ret)
+ return _gf_false;
+ return _gf_true;
}
-
-int
-afr_deitransform (ino64_t ino, int child_count)
+void
+afr_update_loc_gfids (loc_t *loc, struct iatt *buf, struct iatt *postparent)
{
- return 0;
+ GF_ASSERT (loc);
+ GF_ASSERT (buf);
+
+ uuid_copy (loc->gfid, buf->ia_gfid);
+ if (postparent)
+ uuid_copy (loc->pargfid, postparent->ia_gfid);
}
+/*
+ * Quota size xattrs are not maintained by afr. There is a
+ * possibility that they differ even when both the directory changelog xattrs
+ * suggest everything is fine. So if there is at least one 'source' check among
+ * the sources which has the maximum quota size. Otherwise check among all the
+ * available ones for maximum quota size. This way if there is a source and
+ * stale copies it always votes for the 'source'.
+ * */
-int
-afr_self_heal_lookup_unwind (call_frame_t *frame, xlator_t *this)
+static void
+afr_handle_quota_size (afr_local_t *local, xlator_t *this,
+ dict_t *rsp_dict)
{
- afr_local_t *local = NULL;
+ int32_t *sources = NULL;
+ dict_t *xattr = NULL;
+ data_t *max_data = NULL;
+ int64_t max_quota_size = -1;
+ data_t *data = NULL;
+ int64_t *size = NULL;
+ int64_t quota_size = -1;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int ret = -1;
+ gf_boolean_t source_present = _gf_false;
+
+ priv = this->private;
+ sources = local->cont.lookup.sources;
+
+ if (rsp_dict == NULL) {
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "%s: Invalid "
+ "response dictionary", local->loc.path);
+ return;
+ }
- local = frame->local;
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i]) {
+ source_present = _gf_true;
+ break;
+ }
+ }
- if (local->govinda_gOvinda) {
- afr_set_split_brain (this, local->cont.lookup.inode, _gf_true);
+ for (i = 0; i < priv->child_count; i++) {
+ /*
+ * If there is at least one source lets check
+ * for maximum quota sizes among sources, otherwise take the
+ * maximum of the ones present to be on the safer side.
+ */
+ if (source_present && !sources[i])
+ continue;
+
+ xattr = local->cont.lookup.xattrs[i];
+ if (!xattr)
+ continue;
+
+ data = dict_get (xattr, QUOTA_SIZE_KEY);
+ if (!data)
+ continue;
+
+ size = (int64_t*)data->data;
+ quota_size = ntoh64(*size);
+ gf_log (this->name, GF_LOG_DEBUG, "%s: %d, size: %"PRId64,
+ local->loc.path, i, quota_size);
+ if (quota_size > max_quota_size) {
+ if (max_data)
+ data_unref (max_data);
+
+ max_quota_size = quota_size;
+ max_data = data_ref (data);
+ }
}
- AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
- local->cont.lookup.inode,
- &local->cont.lookup.buf,
- local->cont.lookup.xattr,
- &local->cont.lookup.postparent);
+ if (max_data) {
+ ret = dict_set (rsp_dict, QUOTA_SIZE_KEY, max_data);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s: Failed to set "
+ "quota size", local->loc.path);
+ }
- return 0;
+ data_unref (max_data);
+ }
}
-
-static void
-afr_lookup_collect_xattr (afr_local_t *local, xlator_t *this,
- int child_index, dict_t *xattr)
+int
+afr_lookup_build_response_params (afr_local_t *local, xlator_t *this)
{
- uint32_t inodelk_count = 0;
- uint32_t entrylk_count = 0;
+ struct iatt *buf = NULL;
+ struct iatt *postparent = NULL;
+ dict_t **xattr = NULL;
+ int32_t *success_children = NULL;
+ int32_t *sources = NULL;
+ afr_private_t *priv = NULL;
+ int32_t read_child = -1;
+ int ret = 0;
+ int i = 0;
- int ret = 0;
+ GF_ASSERT (local);
- if (afr_sh_has_metadata_pending (xattr, child_index, this)) {
- local->self_heal.need_metadata_self_heal = _gf_true;
- gf_log(this->name, GF_LOG_DEBUG,
- "metadata self-heal is pending for %s.",
- local->loc.path);
- }
+ buf = &local->cont.lookup.buf;
+ postparent = &local->cont.lookup.postparent;
+ xattr = &local->cont.lookup.xattr;
+ priv = this->private;
- if (afr_sh_has_entry_pending (xattr, child_index, this)) {
- local->self_heal.need_entry_self_heal = _gf_true;
- gf_log(this->name, GF_LOG_DEBUG,
- "entry self-heal is pending for %s.", local->loc.path);
+ read_child = afr_inode_get_read_ctx (this, local->cont.lookup.inode,
+ local->fresh_children);
+ if (read_child < 0) {
+ ret = -1;
+ goto out;
+ }
+ success_children = local->cont.lookup.success_children;
+ sources = local->cont.lookup.sources;
+ memset (sources, 0, sizeof (*sources) * priv->child_count);
+ afr_children_intersection_get (local->fresh_children, success_children,
+ sources, priv->child_count);
+ if (!sources[read_child]) {
+ read_child = -1;
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i]) {
+ read_child = i;
+ break;
+ }
+ }
+ }
+ if (read_child < 0) {
+ ret = -1;
+ goto out;
}
- if (afr_sh_has_data_pending (xattr, child_index, this)) {
- local->self_heal.need_data_self_heal = _gf_true;
- gf_log(this->name, GF_LOG_DEBUG,
- "data self-heal is pending for %s.", local->loc.path);
+ gf_log (this->name, GF_LOG_DEBUG, "Building lookup response from %d",
+ read_child);
+ if (!*xattr)
+ *xattr = dict_ref (local->cont.lookup.xattrs[read_child]);
+
+ *buf = local->cont.lookup.bufs[read_child];
+ *postparent = local->cont.lookup.postparents[read_child];
+
+ if (dict_get (local->xattr_req, QUOTA_SIZE_KEY))
+ afr_handle_quota_size (local, this, *xattr);
+
+ if (IA_INVAL == local->cont.lookup.inode->ia_type) {
+ /* fix for RT #602 */
+ local->cont.lookup.inode->ia_type = buf->ia_type;
}
+out:
+ return ret;
+}
+
+static void
+afr_lookup_update_lk_counts (afr_local_t *local, xlator_t *this,
+ int child_index, dict_t *xattr)
+{
+ uint32_t inodelk_count = 0;
+ uint32_t entrylk_count = 0;
+ int ret = -1;
+ uint32_t parent_entrylk = 0;
+
+ GF_ASSERT (local);
+ GF_ASSERT (this);
+ GF_ASSERT (xattr);
+ GF_ASSERT (child_index >= 0);
ret = dict_get_uint32 (xattr, GLUSTERFS_INODELK_COUNT,
&inodelk_count);
@@ -576,484 +1249,1248 @@ afr_lookup_collect_xattr (afr_local_t *local, xlator_t *this,
&entrylk_count);
if (ret == 0)
local->entrylk_count += entrylk_count;
+ ret = dict_get_uint32 (xattr, GLUSTERFS_PARENT_ENTRYLK,
+ &parent_entrylk);
+ if (!ret)
+ local->cont.lookup.parent_entrylk += parent_entrylk;
}
-
+/*
+ * It's important to maintain a commutative property on do_*_self_heal and
+ * found*; once set, they must not be cleared by a subsequent iteration or
+ * call, so that they represent a logical OR of all iterations and calls
+ * regardless of child/key order. That allows the caller to call us multiple
+ * times without having to use a separate variable as a "reduce" accumulator.
+ */
static void
-afr_lookup_self_heal_check (xlator_t *this, afr_local_t *local,
- struct iatt *buf, struct iatt *lookup_buf)
+afr_lookup_set_self_heal_params_by_xattr (afr_local_t *local, xlator_t *this,
+ dict_t *xattr)
{
- if (FILETYPE_DIFFERS (buf, lookup_buf)) {
- /* mismatching filetypes with same name
- */
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int ret = -1;
+ void *pending_raw = NULL;
+ int32_t *pending = NULL;
- gf_log (this->name, GF_LOG_NORMAL,
- "filetype differs for %s ", local->loc.path);
+ GF_ASSERT (local);
+ GF_ASSERT (this);
+ GF_ASSERT (xattr);
- local->govinda_gOvinda = 1;
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ ret = dict_get_ptr (xattr, priv->pending_key[i],
+ &pending_raw);
+ if (ret != 0) {
+ continue;
+ }
+ pending = pending_raw;
+
+ if (pending[AFR_METADATA_TRANSACTION]) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "metadata self-heal is pending for %s.",
+ local->loc.path);
+ local->self_heal.do_metadata_self_heal = _gf_true;
+ }
+
+ if (pending[AFR_ENTRY_TRANSACTION]) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "entry self-heal is pending for %s.",
+ local->loc.path);
+ local->self_heal.do_entry_self_heal = _gf_true;
+ }
+
+ if (pending[AFR_DATA_TRANSACTION]) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "data self-heal is pending for %s.",
+ local->loc.path);
+ local->self_heal.do_data_self_heal = _gf_true;
+ }
}
+}
+
+void
+afr_lookup_check_set_metadata_split_brain (afr_local_t *local, xlator_t *this)
+{
+ int32_t *sources = NULL;
+ afr_private_t *priv = NULL;
+ int32_t subvol_status = 0;
+ int32_t *success_children = NULL;
+ dict_t **xattrs = NULL;
+ struct iatt *bufs = NULL;
+ int32_t **pending_matrix = NULL;
+
+ priv = this->private;
+ sources = GF_CALLOC (priv->child_count, sizeof (*sources),
+ gf_afr_mt_int32_t);
+ if (NULL == sources)
+ goto out;
+ success_children = local->cont.lookup.success_children;
+ xattrs = local->cont.lookup.xattrs;
+ bufs = local->cont.lookup.bufs;
+ pending_matrix = local->cont.lookup.pending_matrix;
+ afr_build_sources (this, xattrs, bufs, pending_matrix,
+ sources, success_children, AFR_METADATA_TRANSACTION,
+ &subvol_status, _gf_false);
+ if (subvol_status & SPLIT_BRAIN)
+ local->cont.lookup.possible_spb = _gf_true;
+out:
+ GF_FREE (sources);
+}
+
+static void
+afr_detect_self_heal_by_iatt (afr_local_t *local, xlator_t *this,
+ struct iatt *buf, struct iatt *lookup_buf)
+{
if (PERMISSION_DIFFERS (buf, lookup_buf)) {
/* mismatching permissions */
- gf_log (this->name, GF_LOG_NORMAL,
+ gf_log (this->name, GF_LOG_DEBUG,
"permissions differ for %s ", local->loc.path);
- local->self_heal.need_metadata_self_heal = _gf_true;
+ local->self_heal.do_metadata_self_heal = _gf_true;
}
if (OWNERSHIP_DIFFERS (buf, lookup_buf)) {
/* mismatching permissions */
- local->self_heal.need_metadata_self_heal = _gf_true;
- gf_log (this->name, GF_LOG_NORMAL,
+ local->self_heal.do_metadata_self_heal = _gf_true;
+ gf_log (this->name, GF_LOG_DEBUG,
"ownership differs for %s ", local->loc.path);
}
if (SIZE_DIFFERS (buf, lookup_buf)
&& IA_ISREG (buf->ia_type)) {
- gf_log (this->name, GF_LOG_NORMAL,
+ gf_log (this->name, GF_LOG_DEBUG,
"size differs for %s ", local->loc.path);
- local->self_heal.need_data_self_heal = _gf_true;
+ local->self_heal.do_data_self_heal = _gf_true;
}
+ if (uuid_compare (buf->ia_gfid, lookup_buf->ia_gfid)) {
+ /* mismatching gfid */
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s: gfid different on subvolume", local->loc.path);
+ }
}
-
static void
-afr_lookup_done (call_frame_t *frame, xlator_t *this, struct iatt *lookup_buf)
+afr_detect_self_heal_by_split_brain_status (afr_local_t *local, xlator_t *this)
{
- int unwind = 1;
- int source = -1;
- int up_count = 0;
- char sh_type_str[256] = {0,};
+ gf_boolean_t split_brain = _gf_false;
+ afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
+ sh = &local->self_heal;
- priv = this->private;
- local = frame->local;
+ split_brain = afr_is_split_brain (this, local->cont.lookup.inode);
+ split_brain = split_brain || local->cont.lookup.possible_spb;
+ if ((local->success_count > 0) && split_brain &&
+ IA_ISREG (local->cont.lookup.inode->ia_type)) {
+ sh->force_confirm_spb = _gf_true;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "split brain detected during lookup of %s.",
+ local->loc.path);
+ }
+}
- local->cont.lookup.postparent.ia_ino = local->cont.lookup.parent_ino;
+static void
+afr_detect_self_heal_by_lookup_status (afr_local_t *local, xlator_t *this)
+{
+ GF_ASSERT (local);
+ GF_ASSERT (this);
- if (local->cont.lookup.ino) {
- local->cont.lookup.buf.ia_ino = local->cont.lookup.ino;
+ if ((local->success_count > 0) && (local->enoent_count > 0)) {
+ local->self_heal.do_metadata_self_heal = _gf_true;
+ local->self_heal.do_data_self_heal = _gf_true;
+ local->self_heal.do_entry_self_heal = _gf_true;
+ local->self_heal.do_gfid_self_heal = _gf_true;
+ local->self_heal.do_missing_entry_self_heal = _gf_true;
+ gf_log(this->name, GF_LOG_DEBUG,
+ "entries are missing in lookup of %s.",
+ local->loc.path);
}
- if (local->op_ret == 0) {
- /* KLUDGE: assuming DHT will not itransform in
- revalidate */
- if (local->cont.lookup.inode->ino) {
- local->cont.lookup.buf.ia_ino =
- local->cont.lookup.inode->ino;
- }
- }
- up_count = afr_up_children_count (priv->child_count, priv->child_up);
- if (up_count == 1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Only 1 child up - do not attempt to detect self heal");
+ return;
+}
- goto unwind;
- }
+gf_boolean_t
+afr_can_self_heal_proceed (afr_self_heal_t *sh, afr_private_t *priv)
+{
+ GF_ASSERT (sh);
+ GF_ASSERT (priv);
- if (local->success_count && local->enoent_count) {
- local->self_heal.need_metadata_self_heal = _gf_true;
- local->self_heal.need_data_self_heal = _gf_true;
- local->self_heal.need_entry_self_heal = _gf_true;
- gf_log(this->name, GF_LOG_NORMAL,
- "entries are missing in lookup of %s.",
- local->loc.path);
+ if (sh->force_confirm_spb)
+ return _gf_true;
+ return (sh->do_gfid_self_heal
+ || sh->do_missing_entry_self_heal
+ || (afr_data_self_heal_enabled (priv->data_self_heal) &&
+ sh->do_data_self_heal)
+ || (priv->metadata_self_heal && sh->do_metadata_self_heal)
+ || (priv->entry_self_heal && sh->do_entry_self_heal));
+}
+
+afr_transaction_type
+afr_transaction_type_get (ia_type_t ia_type)
+{
+ afr_transaction_type type = AFR_METADATA_TRANSACTION;
+
+ GF_ASSERT (ia_type != IA_INVAL);
+
+ if (IA_ISDIR (ia_type)) {
+ type = AFR_ENTRY_TRANSACTION;
+ } else if (IA_ISREG (ia_type)) {
+ type = AFR_DATA_TRANSACTION;
}
+ return type;
+}
- if (local->success_count) {
- /* check for split-brain case in previous lookup */
- if (afr_is_split_brain (this,
- local->cont.lookup.inode)) {
- local->self_heal.need_data_self_heal = _gf_true;
- gf_log(this->name, GF_LOG_NORMAL,
- "split brain detected during lookup of "
- "%s.", local->loc.path);
- }
+int
+afr_lookup_select_read_child (afr_local_t *local, xlator_t *this,
+ int32_t *read_child)
+{
+ ia_type_t ia_type = IA_INVAL;
+ int32_t source = -1;
+ int ret = -1;
+ dict_t **xattrs = NULL;
+ int32_t *success_children = NULL;
+ afr_transaction_type type = AFR_METADATA_TRANSACTION;
+ uuid_t *gfid = NULL;
+
+ GF_ASSERT (local);
+ GF_ASSERT (this);
+ GF_ASSERT (local->success_count > 0);
+
+ success_children = local->cont.lookup.success_children;
+ /*We can take the success_children[0] only because we already
+ *handle the conflicting children other wise, we could select the
+ *read_child based on wrong file type
+ */
+ ia_type = local->cont.lookup.bufs[success_children[0]].ia_type;
+ type = afr_transaction_type_get (ia_type);
+ xattrs = local->cont.lookup.xattrs;
+ gfid = &local->cont.lookup.buf.ia_gfid;
+ source = afr_lookup_select_read_child_by_txn_type (this, local, xattrs,
+ type, *gfid);
+ if (source < 0) {
+ gf_log (this->name, GF_LOG_DEBUG, "failed to select source "
+ "for %s", local->loc.path);
+ goto out;
}
- if ((local->self_heal.need_metadata_self_heal
- || local->self_heal.need_data_self_heal
- || local->self_heal.need_entry_self_heal)
- && ((!local->cont.lookup.is_revalidate)
- || (local->op_ret != -1))) {
+ gf_log (this->name, GF_LOG_DEBUG, "Source selected as %d for %s",
+ source, local->loc.path);
+ *read_child = source;
+ ret = 0;
+out:
+ return ret;
+}
+
+static inline gf_boolean_t
+afr_is_transaction_running (afr_local_t *local)
+{
+ GF_ASSERT (local->fop == GF_FOP_LOOKUP);
+ return ((local->inodelk_count > 0) || (local->entrylk_count > 0));
+}
+
+void
+afr_launch_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ gf_boolean_t background, ia_type_t ia_type, char *reason,
+ void (*gfid_sh_success_cbk) (call_frame_t *sh_frame,
+ xlator_t *this),
+ int (*unwind) (call_frame_t *frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ int32_t sh_failed))
+{
+ afr_local_t *local = NULL;
+ char sh_type_str[256] = {0,};
+ char *bg = "";
- if (local->inodelk_count || local->entrylk_count) {
+ GF_ASSERT (frame);
+ GF_ASSERT (this);
+ GF_ASSERT (inode);
+ GF_ASSERT (ia_type != IA_INVAL);
- /* Someone else is doing self-heal on this file.
- So just make a best effort to set the read-subvolume
- and return */
+ local = frame->local;
+ local->self_heal.background = background;
+ local->self_heal.type = ia_type;
+ local->self_heal.unwind = unwind;
+ local->self_heal.gfid_sh_success_cbk = gfid_sh_success_cbk;
+
+ afr_self_heal_type_str_get (&local->self_heal,
+ sh_type_str,
+ sizeof (sh_type_str));
+
+ if (background)
+ bg = "background";
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s %s self-heal triggered. path: %s, reason: %s", bg,
+ sh_type_str, local->loc.path, reason);
+
+ afr_self_heal (frame, this, inode);
+}
- if (IA_ISREG (local->cont.lookup.inode->ia_type)) {
- source = afr_self_heal_get_source (this, local, local->cont.lookup.xattrs);
+unsigned int
+afr_gfid_missing_count (const char *xlator_name, int32_t *success_children,
+ struct iatt *bufs, unsigned int child_count,
+ const char *path)
+{
+ unsigned int gfid_miss_count = 0;
+ int i = 0;
+ struct iatt *child1 = NULL;
- if (source >= 0) {
- afr_set_read_child (this,
- local->cont.lookup.inode,
- source);
- }
- }
- } else {
- if (!local->cont.lookup.inode->ia_type) {
- /* fix for RT #602 */
- local->cont.lookup.inode->ia_type =
- lookup_buf->ia_type;
- }
+ for (i = 0; i < child_count; i++) {
+ if (success_children[i] == -1)
+ break;
+ child1 = &bufs[success_children[i]];
+ if (uuid_is_null (child1->ia_gfid)) {
+ gf_log (xlator_name, GF_LOG_DEBUG, "%s: gfid is null"
+ " on subvolume %d", path, success_children[i]);
+ gfid_miss_count++;
+ }
+ }
- local->self_heal.background = _gf_true;
- local->self_heal.type = local->cont.lookup.buf.ia_type;
- local->self_heal.unwind = afr_self_heal_lookup_unwind;
+ return gfid_miss_count;
+}
- unwind = 0;
+static int
+afr_lookup_gfid_missing_count (afr_local_t *local, xlator_t *this)
+{
+ int32_t *success_children = NULL;
+ afr_private_t *priv = NULL;
+ struct iatt *bufs = NULL;
+ int miss_count = 0;
- afr_self_heal_type_str_get(&local->self_heal,
- sh_type_str,
- sizeof(sh_type_str));
+ priv = this->private;
+ bufs = local->cont.lookup.bufs;
+ success_children = local->cont.lookup.success_children;
- gf_log (this->name, GF_LOG_NORMAL, "background %s "
- "self-heal triggered. path: %s",
- sh_type_str, local->loc.path);
+ miss_count = afr_gfid_missing_count (this->name, success_children,
+ bufs, priv->child_count,
+ local->loc.path);
+ return miss_count;
+}
+
+gf_boolean_t
+afr_conflicting_iattrs (struct iatt *bufs, int32_t *success_children,
+ unsigned int child_count, const char *path,
+ const char *xlator_name)
+{
+ gf_boolean_t conflicting = _gf_false;
+ int i = 0;
+ struct iatt *child1 = NULL;
+ struct iatt *child2 = NULL;
+ uuid_t *gfid = NULL;
- afr_self_heal (frame, this);
+ for (i = 0; i < child_count; i++) {
+ if (success_children[i] == -1)
+ break;
+ child1 = &bufs[success_children[i]];
+ if ((!gfid) && (!uuid_is_null (child1->ia_gfid)))
+ gfid = &child1->ia_gfid;
+
+ if (i == 0)
+ continue;
+
+ child2 = &bufs[success_children[i-1]];
+ if (FILETYPE_DIFFERS (child1, child2)) {
+ gf_log (xlator_name, GF_LOG_DEBUG, "%s: filetype "
+ "differs on subvolumes (%d, %d)", path,
+ success_children[i-1], success_children[i]);
+ conflicting = _gf_true;
+ goto out;
+ }
+ if (!gfid || uuid_is_null (child1->ia_gfid))
+ continue;
+ if (uuid_compare (*gfid, child1->ia_gfid)) {
+ gf_log (xlator_name, GF_LOG_DEBUG, "%s: gfid differs"
+ " on subvolume %d", path, success_children[i]);
+ conflicting = _gf_true;
+ goto out;
}
}
+out:
+ return conflicting;
+}
+
+/* afr_update_gfid_from_iatts: This function should be called only if the
+ * iatts are not conflicting.
+ */
+void
+afr_update_gfid_from_iatts (uuid_t uuid, struct iatt *bufs,
+ int32_t *success_children, unsigned int child_count)
+{
+ uuid_t *gfid = NULL;
+ int i = 0;
+ int child = 0;
-unwind:
- if (unwind) {
- AFR_STACK_UNWIND (lookup, frame, local->op_ret,
- local->op_errno,
- local->cont.lookup.inode,
- &local->cont.lookup.buf,
- local->cont.lookup.xattr,
- &local->cont.lookup.postparent);
+ for (i = 0; i < child_count; i++) {
+ child = success_children[i];
+ if (child == -1)
+ break;
+ if ((!gfid) && (!uuid_is_null (bufs[child].ia_gfid))) {
+ gfid = &bufs[child].ia_gfid;
+ } else if (gfid && (!uuid_is_null (bufs[child].ia_gfid))) {
+ if (uuid_compare (*gfid, bufs[child].ia_gfid)) {
+ GF_ASSERT (0);
+ goto out;
+ }
+ }
}
+ if (gfid && (!uuid_is_null (*gfid)))
+ uuid_copy (uuid, *gfid);
+out:
+ return;
}
+static gf_boolean_t
+afr_lookup_conflicting_entries (afr_local_t *local, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ gf_boolean_t conflict = _gf_false;
-/*
- * During a lookup, some errors are more "important" than
- * others in that they must be given higher priority while
- * returning to the user.
- *
- * The hierarchy is ESTALE > ENOENT > others
- *
- */
+ priv = this->private;
+ conflict = afr_conflicting_iattrs (local->cont.lookup.bufs,
+ local->cont.lookup.success_children,
+ priv->child_count, local->loc.path,
+ this->name);
+ return conflict;
+}
-static gf_boolean_t
-__error_more_important (int32_t old_errno, int32_t new_errno)
+gf_boolean_t
+afr_open_only_data_self_heal (char *data_self_heal)
{
- gf_boolean_t ret = _gf_true;
+ return !strcmp (data_self_heal, "open");
+}
- /* Nothing should ever overwrite ESTALE */
- if (old_errno == ESTALE)
- ret = _gf_false;
+gf_boolean_t
+afr_data_self_heal_enabled (char *data_self_heal)
+{
+ gf_boolean_t enabled = _gf_false;
- /* Nothing should overwrite ENOENT, except ESTALE */
- else if ((old_errno == ENOENT) && (new_errno != ESTALE))
- ret = _gf_false;
+ if (gf_string2boolean (data_self_heal, &enabled) == -1) {
+ enabled = !strcmp (data_self_heal, "open");
+ GF_ASSERT (enabled);
+ }
- return ret;
+ return enabled;
}
+static void
+afr_lookup_set_self_heal_params (afr_local_t *local, xlator_t *this)
+{
+ int i = 0;
+ struct iatt *bufs = NULL;
+ dict_t **xattr = NULL;
+ afr_private_t *priv = NULL;
+ int32_t child1 = -1;
+ int32_t child2 = -1;
+ afr_self_heal_t *sh = NULL;
+
+ priv = this->private;
+ sh = &local->self_heal;
+
+ afr_detect_self_heal_by_lookup_status (local, this);
+
+ if (afr_lookup_gfid_missing_count (local, this))
+ local->self_heal.do_gfid_self_heal = _gf_true;
+
+ if (_gf_true == afr_lookup_conflicting_entries (local, this))
+ local->self_heal.do_missing_entry_self_heal = _gf_true;
+ else
+ afr_update_gfid_from_iatts (local->self_heal.sh_gfid_req,
+ local->cont.lookup.bufs,
+ local->cont.lookup.success_children,
+ priv->child_count);
+
+ bufs = local->cont.lookup.bufs;
+ for (i = 1; i < local->success_count; i++) {
+ child1 = local->cont.lookup.success_children[i-1];
+ child2 = local->cont.lookup.success_children[i];
+ afr_detect_self_heal_by_iatt (local, this,
+ &bufs[child1], &bufs[child2]);
+ }
+
+ xattr = local->cont.lookup.xattrs;
+ for (i = 0; i < local->success_count; i++) {
+ child1 = local->cont.lookup.success_children[i];
+ afr_lookup_set_self_heal_params_by_xattr (local, this,
+ xattr[child1]);
+ }
+ if (afr_open_only_data_self_heal (priv->data_self_heal))
+ sh->do_data_self_heal = _gf_false;
+ if (sh->do_metadata_self_heal)
+ afr_lookup_check_set_metadata_split_brain (local, this);
+ afr_detect_self_heal_by_split_brain_status (local, this);
+}
int
-afr_fresh_lookup_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf, dict_t *xattr,
- struct iatt *postparent)
+afr_self_heal_lookup_unwind (call_frame_t *frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ int32_t sh_failed)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- struct iatt * lookup_buf = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ dict_t *xattr = NULL;
- int call_count = -1;
- int child_index = -1;
- int first_up_child = -1;
+ local = frame->local;
- child_index = (long) cookie;
+ if (op_ret == -1) {
+ local->op_ret = -1;
+ local->op_errno = afr_most_important_error(local->op_errno,
+ op_errno, _gf_true);
+
+ goto out;
+ } else {
+ local->op_ret = 0;
+ }
+
+ afr_lookup_done_success_action (frame, this, _gf_true);
+ xattr = local->cont.lookup.xattr;
+ if (xattr) {
+ ret = dict_set_int32 (xattr, "sh-failed", sh_failed);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "%s: Failed to set "
+ "sh-failed to %d", local->loc.path, sh_failed);
+
+ if (local->self_heal.actual_sh_started == _gf_true &&
+ sh_failed == 0) {
+ ret = dict_set_int32 (xattr, "actual-sh-done", 1);
+ if (ret)
+ gf_log(this->name, GF_LOG_ERROR, "%s: Failed to"
+ " set actual-sh-done to %d",
+ local->loc.path,
+ local->self_heal.actual_sh_started);
+ }
+ }
+out:
+ AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
+ local->cont.lookup.inode, &local->cont.lookup.buf,
+ local->cont.lookup.xattr,
+ &local->cont.lookup.postparent);
+
+ return 0;
+}
+
+//TODO: At the moment only lookup needs this, so not doing any checks, in the
+// future we will have to do fop specific operations
+void
+afr_post_gfid_sh_success (call_frame_t *sh_frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_local_t *sh_local = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heal_t *sh = NULL;
+ int i = 0;
+ struct iatt *lookup_bufs = NULL;
+ struct iatt *lookup_parentbufs = NULL;
+
+ sh_local = sh_frame->local;
+ sh = &sh_local->self_heal;
+ local = sh->orig_frame->local;
+ lookup_bufs = local->cont.lookup.bufs;
+ lookup_parentbufs = local->cont.lookup.postparents;
priv = this->private;
- LOCK (&frame->lock);
- {
- local = frame->local;
+ memcpy (lookup_bufs, sh->buf, priv->child_count * sizeof (*sh->buf));
+ memcpy (lookup_parentbufs, sh->parentbufs,
+ priv->child_count * sizeof (*sh->parentbufs));
- lookup_buf = &local->cont.lookup.buf;
+ afr_reset_xattr (local->cont.lookup.xattrs, priv->child_count);
+ if (local->cont.lookup.xattr) {
+ dict_unref (local->cont.lookup.xattr);
+ local->cont.lookup.xattr = NULL;
+ }
- if (op_ret == -1) {
- if (op_errno == ENOENT)
- local->enoent_count++;
+ for (i = 0; i < priv->child_count; i++) {
+ if (sh->xattr[i])
+ local->cont.lookup.xattrs[i] = dict_ref (sh->xattr[i]);
+ }
- if (__error_more_important (local->op_errno, op_errno))
- local->op_errno = op_errno;
+ afr_reset_children (local->cont.lookup.success_children,
+ priv->child_count);
+ afr_children_copy (local->cont.lookup.success_children,
+ sh->fresh_children, priv->child_count);
+}
- if (local->op_errno == ESTALE) {
- local->op_ret = -1;
- }
+static void
+afr_lookup_perform_self_heal (call_frame_t *frame, xlator_t *this,
+ gf_boolean_t *sh_launched)
+{
+ unsigned int up_count = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ char *reason = NULL;
- goto unlock;
- }
+ GF_ASSERT (sh_launched);
+ *sh_launched = _gf_false;
+ priv = this->private;
+ local = frame->local;
- afr_lookup_collect_xattr (local, this, child_index, xattr);
+ up_count = afr_up_children_count (local->child_up, priv->child_count);
+ if (up_count == 1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Only 1 child up - do not attempt to detect self heal");
+ goto out;
+ }
- first_up_child = afr_first_up_child (priv);
+ afr_lookup_set_self_heal_params (local, this);
+ if (afr_can_self_heal_proceed (&local->self_heal, priv)) {
+ if (afr_is_transaction_running (local) &&
+ (!local->allow_sh_for_running_transaction))
+ goto out;
+
+ reason = "lookup detected pending operations";
+ afr_launch_self_heal (frame, this, local->cont.lookup.inode,
+ _gf_true, local->cont.lookup.buf.ia_type,
+ reason, afr_post_gfid_sh_success,
+ afr_self_heal_lookup_unwind);
+ *sh_launched = _gf_true;
+ }
+out:
+ return;
+}
+
+void
+afr_get_fresh_children (int32_t *success_children, int32_t *sources,
+ int32_t *fresh_children, unsigned int child_count)
+{
+ unsigned int i = 0;
+ unsigned int j = 0;
+
+ GF_ASSERT (success_children);
+ GF_ASSERT (sources);
+ GF_ASSERT (fresh_children);
- if (child_index == first_up_child) {
- local->cont.lookup.ino =
- afr_itransform (buf->ia_ino,
- priv->child_count,
- first_up_child);
+ afr_reset_children (fresh_children, child_count);
+ for (i = 0; i < child_count; i++) {
+ if (success_children[i] == -1)
+ break;
+ if (afr_is_read_child (success_children, sources, child_count,
+ success_children[i])) {
+ fresh_children[j] = success_children[i];
+ j++;
}
+ }
+}
- if (local->success_count == 0) {
- if (local->op_errno != ESTALE)
- local->op_ret = op_ret;
+static int
+afr_lookup_set_read_ctx (afr_local_t *local, xlator_t *this, int32_t read_child)
+{
+ afr_private_t *priv = NULL;
- local->cont.lookup.inode = inode_ref (inode);
- local->cont.lookup.xattr = dict_ref (xattr);
- local->cont.lookup.xattrs[child_index] = dict_ref (xattr);
- local->cont.lookup.postparent = *postparent;
+ GF_ASSERT (read_child >= 0);
- if (priv->first_lookup && inode->ino == 1) {
- gf_log (this->name, GF_LOG_NORMAL,
- "added root inode");
- priv->root_inode = inode_ref (inode);
- priv->first_lookup = 0;
- }
+ priv = this->private;
+ afr_get_fresh_children (local->cont.lookup.success_children,
+ local->cont.lookup.sources,
+ local->fresh_children, priv->child_count);
+ afr_inode_set_read_ctx (this, local->cont.lookup.inode, read_child,
+ local->fresh_children);
- *lookup_buf = *buf;
+ return 0;
+}
- lookup_buf->ia_ino = afr_itransform (buf->ia_ino,
- priv->child_count,
- child_index);
- if (priv->read_child >= 0) {
- afr_set_read_child (this,
- local->cont.lookup.inode,
- priv->read_child);
- } else {
- afr_set_read_child (this,
- local->cont.lookup.inode,
- child_index);
- }
+int
+afr_lookup_done_success_action (call_frame_t *frame, xlator_t *this,
+ gf_boolean_t fail_conflict)
+{
+ int32_t read_child = -1;
+ int32_t ret = -1;
+ afr_local_t *local = NULL;
+ gf_boolean_t fresh_lookup = _gf_false;
- } else {
- afr_lookup_self_heal_check (this, local, buf, lookup_buf);
+ local = frame->local;
+ fresh_lookup = local->cont.lookup.fresh_lookup;
- if (child_index == local->read_child_index) {
- /*
- lookup has succeeded on the read child.
- So use its inode number
- */
- if (local->cont.lookup.xattr)
- dict_unref (local->cont.lookup.xattr);
+ if (local->loc.parent == NULL)
+ fail_conflict = _gf_true;
- local->cont.lookup.xattr = dict_ref (xattr);
- local->cont.lookup.xattrs[child_index] = dict_ref (xattr);
- local->cont.lookup.postparent = *postparent;
+ if (afr_lookup_conflicting_entries (local, this)) {
+ if (fail_conflict == _gf_false)
+ ret = 0;
+ goto out;
+ }
- *lookup_buf = *buf;
- }
+ ret = afr_lookup_select_read_child (local, this, &read_child);
+ if (!afr_is_transaction_running (local) || fresh_lookup) {
+ if (read_child < 0)
+ goto out;
- }
+ ret = afr_lookup_set_read_ctx (local, this, read_child);
+ if (ret)
+ goto out;
+ }
+
+ ret = afr_lookup_build_response_params (local, this);
+ if (ret)
+ goto out;
+ afr_update_loc_gfids (&local->loc,
+ &local->cont.lookup.buf,
+ &local->cont.lookup.postparent);
- local->success_count++;
+ ret = 0;
+out:
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = EIO;
}
-unlock:
- UNLOCK (&frame->lock);
+ return ret;
+}
- call_count = afr_frame_return (frame);
+int
+afr_lookup_get_latest_subvol (afr_local_t *local, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int32_t *success_children = NULL;
+ struct iatt *bufs = NULL;
+ int i = 0;
+ int child = 0;
+ int lsubvol = -1;
- if (call_count == 0) {
- afr_lookup_done (frame, this, lookup_buf);
+ priv = this->private;
+ success_children = local->cont.lookup.success_children;
+ bufs = local->cont.lookup.bufs;
+ for (i = 0; i < priv->child_count; i++) {
+ child = success_children[i];
+ if (child == -1)
+ break;
+ if (uuid_is_null (bufs[child].ia_gfid))
+ continue;
+ if (lsubvol < 0) {
+ lsubvol = child;
+ } else if (bufs[lsubvol].ia_ctime < bufs[child].ia_ctime) {
+ lsubvol = child;
+ } else if ((bufs[lsubvol].ia_ctime == bufs[child].ia_ctime) &&
+ (bufs[lsubvol].ia_ctime_nsec < bufs[child].ia_ctime_nsec)) {
+ lsubvol = child;
+ }
+ }
+ return lsubvol;
+}
+
+void
+afr_lookup_mark_other_entries_stale (afr_local_t *local, xlator_t *this,
+ int subvol)
+{
+ afr_private_t *priv = NULL;
+ int32_t *success_children = NULL;
+ struct iatt *bufs = NULL;
+ int i = 0;
+ int child = 0;
+
+ priv = this->private;
+ success_children = local->cont.lookup.success_children;
+ bufs = local->cont.lookup.bufs;
+ memcpy (local->fresh_children, success_children,
+ sizeof (*success_children) * priv->child_count);
+ for (i = 0; i < priv->child_count; i++) {
+ child = local->fresh_children[i];
+ if (child == -1)
+ break;
+ if (child == subvol)
+ continue;
+ if (uuid_is_null (bufs[child].ia_gfid) &&
+ (bufs[child].ia_type == bufs[subvol].ia_type))
+ continue;
+ afr_children_rm_child (success_children, child,
+ priv->child_count);
+ local->success_count--;
}
+ afr_reset_children (local->fresh_children, priv->child_count);
+}
- return 0;
+void
+afr_succeed_lookup_on_latest_iatt (afr_local_t *local, xlator_t *this)
+{
+ int lsubvol = 0;
+
+ if (!afr_lookup_conflicting_entries (local, this))
+ goto out;
+
+ lsubvol = afr_lookup_get_latest_subvol (local, this);
+ if (lsubvol < 0)
+ goto out;
+ afr_lookup_mark_other_entries_stale (local, this, lsubvol);
+out:
+ return;
}
+gf_boolean_t
+afr_is_entry_possibly_under_creation (afr_local_t *local, xlator_t *this)
+{
+ /*
+ * We need to perform this test in lookup done and treat on going
+ * create/DELETE as ENOENT.
+ * Reason:
+ Multiple clients A, B and C are attempting 'mkdir -p /mnt/a/b/c'
+
+ 1 Client A is in the middle of mkdir(/a). It has acquired lock.
+ It has performed mkdir(/a) on one subvol, and second one is still
+ in progress
+ 2 Client B performs a lookup, sees directory /a on one,
+ ENOENT on the other, succeeds lookup.
+ 3 Client B performs lookup on /a/b on both subvols, both return ENOENT
+ (one subvol because /a/b does not exist, another because /a
+ itself does not exist)
+ 4 Client B proceeds to mkdir /a/b. It obtains entrylk on inode=/a with
+ basename=b on one subvol, but fails on other subvol as /a is yet to
+ be created by Client A.
+ 5 Client A finishes mkdir of /a on other subvol
+ 6 Client C also attempts to create /a/b, lookup returns ENOENT on
+ both subvols.
+ 7 Client C tries to obtain entrylk on on inode=/a with basename=b,
+ obtains on one subvol (where B had failed), and waits for B to unlock
+ on other subvol.
+ 8 Client B finishes mkdir() on one subvol with GFID-1 and completes
+ transaction and unlocks
+ 9 Client C gets the lock on the second subvol, At this stage second
+ subvol already has /a/b created from Client B, but Client C does not
+ check that in the middle of mkdir transaction
+ 10 Client C attempts mkdir /a/b on both subvols. It succeeds on
+ ONLY ONE (where Client B could not get lock because of
+ missing parent /a dir) with GFID-2, and gets EEXIST from ONE subvol.
+ This way we have /a/b in GFID mismatch. One subvol got GFID-1 because
+ Client B performed transaction on only one subvol (because entrylk()
+ could not be obtained on second subvol because of missing parent dir --
+ caused by premature/speculative succeeding of lookup() on /a when locks
+ are detected). Other subvol gets GFID-2 from Client C because while
+ it was waiting for entrylk() on both subvols, Client B was in the
+ middle of creating mkdir() on only one subvol, and Client C does not
+ "expect" this when it is between lock() and pre-op()/op() phase of the
+ transaction.
+ */
+ if (local->cont.lookup.parent_entrylk && local->enoent_count)
+ return _gf_true;
+
+ return _gf_false;
+}
-int
-afr_revalidate_lookup_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf, dict_t *xattr,
- struct iatt *postparent)
+
+static void
+afr_lookup_done (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- struct iatt * lookup_buf = NULL;
+ int unwind = 1;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ gf_boolean_t sh_launched = _gf_false;
+ gf_boolean_t fail_conflict = _gf_false;
+ int gfid_miss_count = 0;
+ int enotconn_count = 0;
+ int up_children_count = 0;
- int call_count = -1;
- int child_index = -1;
- int first_up_child = -1;
+ priv = this->private;
+ local = frame->local;
- child_index = (long) cookie;
- priv = this->private;
+ if (afr_is_entry_possibly_under_creation (local, this)) {
+ local->op_ret = -1;
+ local->op_errno = ENOENT;
+ goto unwind;
+ }
- LOCK (&frame->lock);
- {
- local = frame->local;
+ if (local->op_ret < 0)
+ goto unwind;
- lookup_buf = &local->cont.lookup.buf;
+ if (local->cont.lookup.parent_entrylk && local->success_count > 1)
+ afr_succeed_lookup_on_latest_iatt (local, this);
+
+ gfid_miss_count = afr_lookup_gfid_missing_count (local, this);
+ up_children_count = afr_up_children_count (local->child_up,
+ priv->child_count);
+ enotconn_count = priv->child_count - up_children_count;
+ if ((gfid_miss_count == local->success_count) &&
+ (enotconn_count > 0)) {
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ gf_log (this->name, GF_LOG_ERROR, "Failing lookup for %s, "
+ "LOOKUP on a file without gfid is not allowed when "
+ "some of the children are down", local->loc.path);
+ goto unwind;
+ }
- if (op_ret == -1) {
- if (op_errno == ENOENT)
- local->enoent_count++;
+ if ((gfid_miss_count == local->success_count) &&
+ uuid_is_null (local->cont.lookup.gfid_req)) {
+ local->op_ret = -1;
+ local->op_errno = ENODATA;
+ gf_log (this->name, GF_LOG_ERROR, "%s: No gfid present",
+ local->loc.path);
+ goto unwind;
+ }
- if (__error_more_important (local->op_errno, op_errno))
- local->op_errno = op_errno;
+ if (gfid_miss_count && uuid_is_null (local->cont.lookup.gfid_req))
+ fail_conflict = _gf_true;
+ ret = afr_lookup_done_success_action (frame, this, fail_conflict);
+ if (ret)
+ goto unwind;
+ uuid_copy (local->self_heal.sh_gfid_req, local->cont.lookup.gfid_req);
- if (local->op_errno == ESTALE) {
- local->op_ret = -1;
- }
+ afr_lookup_perform_self_heal (frame, this, &sh_launched);
+ if (sh_launched) {
+ unwind = 0;
+ goto unwind;
+ }
- goto unlock;
- }
+ unwind:
+ if (unwind) {
+ AFR_STACK_UNWIND (lookup, frame, local->op_ret,
+ local->op_errno, local->cont.lookup.inode,
+ &local->cont.lookup.buf,
+ local->cont.lookup.xattr,
+ &local->cont.lookup.postparent);
+ }
+}
- afr_lookup_collect_xattr (local, this, child_index, xattr);
+/*
+ * During a lookup, some errors are more "important" than
+ * others in that they must be given higher priority while
+ * returning to the user.
+ *
+ * The hierarchy is ESTALE > EIO > ENOENT > others
+ */
+int32_t
+afr_most_important_error(int32_t old_errno, int32_t new_errno,
+ gf_boolean_t eio)
+{
+ if (old_errno == ESTALE || new_errno == ESTALE)
+ return ESTALE;
+ if (eio && (old_errno == EIO || new_errno == EIO))
+ return EIO;
+ if (old_errno == ENOENT || new_errno == ENOENT)
+ return ENOENT;
+
+ return new_errno;
+}
- first_up_child = afr_first_up_child (priv);
+int32_t
+afr_resultant_errno_get (int32_t *children,
+ int *child_errno, unsigned int child_count)
+{
+ int i = 0;
+ int32_t op_errno = 0;
+ int child = 0;
- if (child_index == first_up_child) {
- local->cont.lookup.ino =
- afr_itransform (buf->ia_ino,
- priv->child_count,
- first_up_child);
+ for (i = 0; i < child_count; i++) {
+ if (children) {
+ child = children[i];
+ if (child == -1)
+ break;
+ } else {
+ child = i;
}
+ op_errno = afr_most_important_error(op_errno,
+ child_errno[child],
+ _gf_false);
+ }
+ return op_errno;
+}
- /* in case of revalidate, we need to send stat of the
- * child whose stat was sent during the first lookup.
- * (so that time stamp does not vary with revalidate.
- * in case it is down, stat of the fist success will
- * be replied */
+static void
+afr_lookup_handle_error (afr_local_t *local, int32_t op_ret, int32_t op_errno)
+{
+ GF_ASSERT (local);
+ if (op_errno == ENOENT)
+ local->enoent_count++;
- /* inode number should be preserved across revalidates */
+ local->op_errno = afr_most_important_error(local->op_errno, op_errno,
+ _gf_false);
- if (local->success_count == 0) {
- if (local->op_errno != ESTALE)
- local->op_ret = op_ret;
+ if (local->op_errno == ESTALE) {
+ local->op_ret = -1;
+ }
+}
- local->cont.lookup.inode = inode_ref (inode);
- local->cont.lookup.xattr = dict_ref (xattr);
- local->cont.lookup.xattrs[child_index] = dict_ref (xattr);
- local->cont.lookup.postparent = *postparent;
+static void
+afr_set_root_inode_on_first_lookup (afr_local_t *local, xlator_t *this,
+ inode_t *inode)
+{
+ afr_private_t *priv = NULL;
+ GF_ASSERT (inode);
- *lookup_buf = *buf;
+ if (!__is_root_gfid (inode->gfid))
+ goto out;
+ if (!afr_is_fresh_lookup (&local->loc, this))
+ goto out;
+ priv = this->private;
+ if ((priv->first_lookup)) {
+ gf_log (this->name, GF_LOG_INFO, "added root inode");
+ priv->root_inode = inode_ref (inode);
+ priv->first_lookup = 0;
+ }
+out:
+ return;
+}
- lookup_buf->ia_ino = afr_itransform (buf->ia_ino,
- priv->child_count,
- child_index);
+static void
+afr_lookup_cache_args (afr_local_t *local, int child_index, dict_t *xattr,
+ struct iatt *buf, struct iatt *postparent)
+{
+ GF_ASSERT (child_index >= 0);
+ local->cont.lookup.xattrs[child_index] = dict_ref (xattr);
+ local->cont.lookup.postparents[child_index] = *postparent;
+ local->cont.lookup.bufs[child_index] = *buf;
+}
- if (priv->read_child >= 0) {
- afr_set_read_child (this,
- local->cont.lookup.inode,
- priv->read_child);
- } else {
- afr_set_read_child (this,
- local->cont.lookup.inode,
- child_index);
- }
+static void
+afr_lookup_handle_first_success (afr_local_t *local, xlator_t *this,
+ inode_t *inode, struct iatt *buf)
+{
+ local->cont.lookup.inode = inode_ref (inode);
+ local->cont.lookup.buf = *buf;
+ afr_set_root_inode_on_first_lookup (local, this, inode);
+}
- } else {
- afr_lookup_self_heal_check (this, local, buf, lookup_buf);
+static int32_t
+afr_discovery_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ int ret = 0;
+ char *pathinfo = NULL;
+ gf_boolean_t is_local = _gf_false;
+ afr_private_t *priv = NULL;
+ int32_t child_index = -1;
- if (child_index == local->read_child_index) {
+ if (op_ret != 0) {
+ goto out;
+ }
- /*
- lookup has succeeded on the read child.
- So use its inode number
- */
+ ret = dict_get_str (dict, GF_XATTR_PATHINFO_KEY, &pathinfo);
+ if (ret != 0) {
+ goto out;
+ }
- if (local->cont.lookup.xattr)
- dict_unref (local->cont.lookup.xattr);
+ ret = afr_local_pathinfo (pathinfo, &is_local);
+ if (ret) {
+ goto out;
+ }
- local->cont.lookup.xattr = dict_ref (xattr);
- local->cont.lookup.xattrs[child_index] = dict_ref (xattr);
- local->cont.lookup.postparent = *postparent;
+ priv = this->private;
+ /*
+ * Note that one local subvolume will override another here. The only
+ * way to avoid that would be to retain extra information about whether
+ * the previous read_child is local, and it's just not worth it. Even
+ * the slowest local subvolume is far preferable to a remote one.
+ */
+ if (is_local) {
+ child_index = (int32_t)(long)cookie;
+ gf_log (this->name, GF_LOG_INFO,
+ "selecting local read_child %s",
+ priv->children[child_index]->name);
+ priv->read_child = child_index;
+ }
- *lookup_buf = *buf;
- }
+out:
+ STACK_DESTROY(frame->root);
+ return 0;
+}
+
+static void
+afr_attempt_local_discovery (xlator_t *this, int32_t child_index)
+{
+ call_frame_t *newframe = NULL;
+ loc_t tmploc = {0,};
+ afr_private_t *priv = this->private;
+
+ newframe = create_frame(this,this->ctx->pool);
+ if (!newframe) {
+ return;
+ }
+
+ tmploc.gfid[sizeof(tmploc.gfid)-1] = 1;
+ STACK_WIND_COOKIE (newframe, afr_discovery_cbk,
+ (void *)(long)child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->getxattr,
+ &tmploc, GF_XATTR_PATHINFO_KEY, NULL);
+}
+
+static void
+afr_lookup_handle_success (afr_local_t *local, xlator_t *this, int32_t child_index,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ afr_private_t *priv = this->private;
+ if (local->success_count == 0) {
+ if (local->op_errno != ESTALE) {
+ local->op_ret = op_ret;
+ local->op_errno = 0;
}
+ afr_lookup_handle_first_success (local, this, inode, buf);
+ }
+ afr_lookup_update_lk_counts (local, this,
+ child_index, xattr);
- local->success_count++;
+ afr_lookup_cache_args (local, child_index, xattr,
+ buf, postparent);
+
+ if (local->do_discovery && (priv->read_child == (-1))) {
+ afr_attempt_local_discovery(this,child_index);
}
+
+ local->cont.lookup.success_children[local->success_count] = child_index;
+ local->success_count++;
+}
+
+int
+afr_lookup_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ afr_local_t * local = NULL;
+ int call_count = -1;
+ int child_index = -1;
+
+ child_index = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ local = frame->local;
+
+ if (op_ret == -1) {
+ afr_lookup_handle_error (local, op_ret, op_errno);
+ goto unlock;
+ }
+ afr_lookup_handle_success (local, this, child_index, op_ret,
+ op_errno, inode, buf, xattr,
+ postparent);
+
+ }
unlock:
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
-
if (call_count == 0) {
- afr_lookup_done (frame, this, lookup_buf);
+ afr_lookup_done (frame, this);
}
- return 0;
+ return 0;
}
-
int
-afr_lookup (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr_req)
+afr_lookup_cont_init (afr_local_t *local, unsigned int child_count)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int ret = -1;
- int i = 0;
+ int ret = -ENOMEM;
+ struct iatt *iatts = NULL;
+ int32_t *success_children = NULL;
+ int32_t *sources = NULL;
+ int32_t **pending_matrix = NULL;
+
+ GF_ASSERT (local);
+ local->cont.lookup.xattrs = GF_CALLOC (child_count,
+ sizeof (*local->cont.lookup.xattr),
+ gf_afr_mt_dict_t);
+ if (NULL == local->cont.lookup.xattrs)
+ goto out;
- fop_lookup_cbk_t callback;
+ iatts = GF_CALLOC (child_count, sizeof (*iatts), gf_afr_mt_iatt);
+ if (NULL == iatts)
+ goto out;
+ local->cont.lookup.postparents = iatts;
- int call_count = 0;
+ iatts = GF_CALLOC (child_count, sizeof (*iatts), gf_afr_mt_iatt);
+ if (NULL == iatts)
+ goto out;
+ local->cont.lookup.bufs = iatts;
- uint64_t ctx;
+ success_children = afr_children_create (child_count);
+ if (NULL == success_children)
+ goto out;
+ local->cont.lookup.success_children = success_children;
+
+ local->fresh_children = afr_children_create (child_count);
+ if (NULL == local->fresh_children)
+ goto out;
- int32_t op_errno = 0;
+ sources = GF_CALLOC (sizeof (*sources), child_count, gf_afr_mt_int32_t);
+ if (NULL == sources)
+ goto out;
+ local->cont.lookup.sources = sources;
+
+ pending_matrix = afr_matrix_create (child_count, child_count);
+ if (NULL == pending_matrix)
+ goto out;
+ local->cont.lookup.pending_matrix = pending_matrix;
+
+ ret = 0;
+out:
+ return ret;
+}
+int
+afr_lookup (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xattr_req)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ void *gfid_req = NULL;
+ int ret = -1;
+ int i = 0;
+ int call_count = 0;
+ uint64_t ctx = 0;
+ int32_t op_errno = 0;
+ int allow_sh = 0;
priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (local, out);
local->op_ret = -1;
frame->local = local;
+ local->fop = GF_FOP_LOOKUP;
- if (!strcmp (loc->path, "/" GF_REPLICATE_TRASH_DIR)) {
- op_errno = ENOENT;
+ loc_copy (&local->loc, loc);
+ ret = loc_path (&local->loc, NULL);
+ if (ret < 0) {
+ op_errno = EINVAL;
goto out;
}
- loc_copy (&local->loc, loc);
+ if (local->loc.path &&
+ (strcmp (local->loc.path, "/" GF_REPLICATE_TRASH_DIR) == 0)) {
+ op_errno = EPERM;
+ ret = -1;
+ goto out;
+ }
- ret = inode_ctx_get (loc->inode, this, &ctx);
+ ret = inode_ctx_get (local->loc.inode, this, &ctx);
if (ret == 0) {
/* lookup is a revalidate */
- callback = afr_revalidate_lookup_cbk;
-
- local->cont.lookup.is_revalidate = _gf_true;
- local->read_child_index = afr_read_child (this,
- loc->inode);
+ local->read_child_index = afr_inode_get_read_ctx (this,
+ local->loc.inode,
+ NULL);
} else {
- callback = afr_fresh_lookup_cbk;
-
LOCK (&priv->read_child_lock);
{
- local->read_child_index = (++priv->read_child_rr)
- % (priv->child_count);
+ if (priv->hash_mode) {
+ local->read_child_index = -1;
+ }
+ else {
+ local->read_child_index =
+ (++priv->read_child_rr) %
+ (priv->child_count);
+ }
}
UNLOCK (&priv->read_child_lock);
+ local->cont.lookup.fresh_lookup = _gf_true;
}
- if (loc->parent)
- local->cont.lookup.parent_ino = loc->parent->ino;
-
- local->child_up = memdup (priv->child_up, priv->child_count);
+ local->child_up = memdup (priv->child_up,
+ sizeof (*local->child_up) * priv->child_count);
+ if (NULL == local->child_up) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- local->cont.lookup.xattrs = GF_CALLOC (priv->child_count,
- sizeof (*local->cont.lookup.xattr),
- gf_afr_mt_dict_t);
+ ret = afr_lookup_cont_init (local, priv->child_count);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- local->call_count = afr_up_children_count (priv->child_count,
- local->child_up);
+ local->call_count = afr_up_children_count (local->child_up,
+ priv->child_count);
call_count = local->call_count;
-
if (local->call_count == 0) {
ret = -1;
op_errno = ENOTCONN;
@@ -1063,38 +2500,33 @@ afr_lookup (call_frame_t *frame, xlator_t *this,
/* By default assume ENOTCONN. On success it will be set to 0. */
local->op_errno = ENOTCONN;
- if (xattr_req == NULL)
- local->xattr_req = dict_new ();
- else
- local->xattr_req = dict_ref (xattr_req);
-
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_set_uint64 (local->xattr_req, priv->pending_key[i],
- 3 * sizeof(int32_t));
- if (ret < 0)
- gf_log (this->name, GF_LOG_WARNING,
- "Unable to set dict value.");
- /* 3 = data+metadata+entry */
- }
+ ret = dict_get_int32 (xattr_req, "allow-sh-for-running-transaction",
+ &allow_sh);
+ dict_del (xattr_req, "allow-sh-for-running-transaction");
+ local->allow_sh_for_running_transaction = allow_sh;
- ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_INODELK_COUNT, 0);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Unable to set dict value.");
+ ret = afr_lookup_xattr_req_prepare (local, this, xattr_req, &local->loc,
+ &gfid_req);
+ if (ret) {
+ local->op_errno = -ret;
+ goto out;
}
-
- ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_ENTRYLK_COUNT, 0);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Unable to set dict value.");
+ afr_lookup_save_gfid (local->cont.lookup.gfid_req, gfid_req,
+ &local->loc);
+ local->fop = GF_FOP_LOOKUP;
+ if (priv->choose_local && !priv->did_discovery) {
+ if (gfid_req && __is_root_gfid(gfid_req)) {
+ local->do_discovery = _gf_true;
+ priv->did_discovery = _gf_true;
+ }
}
-
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, callback, (void *) (long) i,
+ STACK_WIND_COOKIE (frame, afr_lookup_cbk,
+ (void *) (long) i,
priv->children[i],
priv->children[i]->fops->lookup,
- loc, local->xattr_req);
+ &local->loc, local->xattr_req);
if (!--call_count)
break;
}
@@ -1102,7 +2534,7 @@ afr_lookup (call_frame_t *frame, xlator_t *this,
ret = 0;
out:
- if (ret == -1)
+ if (ret)
AFR_STACK_UNWIND (lookup, frame, -1, op_errno,
NULL, NULL, NULL, NULL);
@@ -1113,231 +2545,179 @@ out:
/* {{{ open */
int
-afr_fd_ctx_set (xlator_t *this, fd_t *fd)
+__afr_fd_ctx_set (xlator_t *this, fd_t *fd)
{
- afr_private_t * priv = NULL;
-
- int ret = -1;
-
- uint64_t ctx;
- afr_fd_ctx_t * fd_ctx = NULL;
+ afr_private_t * priv = NULL;
+ int ret = -1;
+ uint64_t ctx = 0;
+ afr_fd_ctx_t * fd_ctx = NULL;
VALIDATE_OR_GOTO (this->private, out);
VALIDATE_OR_GOTO (fd, out);
priv = this->private;
- LOCK (&fd->lock);
- {
- ret = __fd_ctx_get (fd, this, &ctx);
-
- if (ret == 0)
- goto unlock;
+ ret = __fd_ctx_get (fd, this, &ctx);
- fd_ctx = GF_CALLOC (1, sizeof (afr_fd_ctx_t),
- gf_afr_mt_afr_fd_ctx_t);
- if (!fd_ctx) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
+ if (ret == 0)
+ goto out;
- ret = -ENOMEM;
- goto unlock;
- }
+ fd_ctx = GF_CALLOC (1, sizeof (afr_fd_ctx_t),
+ gf_afr_mt_afr_fd_ctx_t);
+ if (!fd_ctx) {
+ ret = -ENOMEM;
+ goto out;
+ }
- fd_ctx->pre_op_done = GF_CALLOC (sizeof (*fd_ctx->pre_op_done),
- priv->child_count,
- gf_afr_mt_char);
- if (!fd_ctx->pre_op_done) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- ret = -ENOMEM;
- goto unlock;
- }
+ fd_ctx->pre_op_done = GF_CALLOC (sizeof (*fd_ctx->pre_op_done),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!fd_ctx->pre_op_done) {
+ ret = -ENOMEM;
+ goto out;
+ }
- fd_ctx->pre_op_piggyback = GF_CALLOC (sizeof (*fd_ctx->pre_op_piggyback),
- priv->child_count,
- gf_afr_mt_char);
- if (!fd_ctx->pre_op_piggyback) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- ret = -ENOMEM;
- goto unlock;
- }
+ fd_ctx->pre_op_piggyback = GF_CALLOC (sizeof (*fd_ctx->pre_op_piggyback),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!fd_ctx->pre_op_piggyback) {
+ ret = -ENOMEM;
+ goto out;
+ }
- fd_ctx->opened_on = GF_CALLOC (sizeof (*fd_ctx->opened_on),
- priv->child_count,
- gf_afr_mt_char);
- if (!fd_ctx->opened_on) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- ret = -ENOMEM;
- goto unlock;
- }
+ fd_ctx->opened_on = GF_CALLOC (sizeof (*fd_ctx->opened_on),
+ priv->child_count,
+ gf_afr_mt_int32_t);
+ if (!fd_ctx->opened_on) {
+ ret = -ENOMEM;
+ goto out;
+ }
- fd_ctx->up_count = priv->up_count;
- fd_ctx->down_count = priv->down_count;
+ fd_ctx->lock_piggyback = GF_CALLOC (sizeof (*fd_ctx->lock_piggyback),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!fd_ctx->lock_piggyback) {
+ ret = -ENOMEM;
+ goto out;
+ }
- fd_ctx->locked_on = GF_CALLOC (sizeof (*fd_ctx->locked_on),
- priv->child_count,
- gf_afr_mt_char);
- if (!fd_ctx->locked_on) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- ret = -ENOMEM;
- goto unlock;
- }
+ fd_ctx->lock_acquired = GF_CALLOC (sizeof (*fd_ctx->lock_acquired),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!fd_ctx->lock_acquired) {
+ ret = -ENOMEM;
+ goto out;
+ }
- ret = __fd_ctx_set (fd, this, (uint64_t)(long) fd_ctx);
+ fd_ctx->up_count = priv->up_count;
+ fd_ctx->down_count = priv->down_count;
- INIT_LIST_HEAD (&fd_ctx->entries);
+ fd_ctx->locked_on = GF_CALLOC (sizeof (*fd_ctx->locked_on),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!fd_ctx->locked_on) {
+ ret = -ENOMEM;
+ goto out;
}
-unlock:
- UNLOCK (&fd->lock);
+
+ pthread_mutex_init (&fd_ctx->delay_lock, NULL);
+ INIT_LIST_HEAD (&fd_ctx->entries);
+ fd_ctx->call_child = -1;
+
+ INIT_LIST_HEAD (&fd_ctx->eager_locked);
+
+ ret = __fd_ctx_set (fd, this, (uint64_t)(long) fd_ctx);
+ if (ret)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to set fd ctx (%p)", fd);
out:
return ret;
}
-/* {{{ flush */
int
-afr_flush_unwind (call_frame_t *frame, xlator_t *this)
+afr_fd_ctx_set (xlator_t *this, fd_t *fd)
{
- afr_local_t * local = NULL;
- call_frame_t *main_frame = NULL;
-
- local = frame->local;
+ int ret = -1;
- LOCK (&frame->lock);
+ LOCK (&fd->lock);
{
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (flush, main_frame,
- local->op_ret, local->op_errno);
+ ret = __afr_fd_ctx_set (this, fd);
}
+ UNLOCK (&fd->lock);
- return 0;
+ return ret;
}
+/* {{{ flush */
int
-afr_flush_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+afr_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
int call_count = -1;
- int child_index = (long) cookie;
- int need_unwind = 0;
local = frame->local;
- priv = this->private;
LOCK (&frame->lock);
{
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
if (op_ret != -1) {
if (local->success_count == 0) {
local->op_ret = op_ret;
}
local->success_count++;
-
- if (local->success_count == priv->wait_count) {
- need_unwind = 1;
- }
}
local->op_errno = op_errno;
}
UNLOCK (&frame->lock);
- if (need_unwind)
- afr_flush_unwind (frame, this);
-
- call_count = afr_frame_return (frame);
+ call_count = afr_frame_return (frame);
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
+ if (call_count == 0)
+ AFR_STACK_UNWIND(flush, frame, local->op_ret,
+ local->op_errno, NULL);
return 0;
}
-
-int
-afr_flush_wind (call_frame_t *frame, xlator_t *this)
+static int
+afr_flush_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int i = 0;
- int call_count = -1;
+ int i = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
- local = frame->local;
priv = this->private;
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
+ local = frame->local;
+ call_count = local->call_count;
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_flush_wind_cbk,
+ STACK_WIND_COOKIE (frame, afr_flush_cbk,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->flush,
- local->fd);
-
+ local->fd, NULL);
if (!--call_count)
break;
+
}
}
return 0;
}
-
int
-afr_flush_done (call_frame_t *frame, xlator_t *this)
+afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
- return 0;
-}
-
-
-int
-afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
-
- call_frame_t * transaction_frame = NULL;
-
- int ret = -1;
-
- int op_ret = -1;
- int op_errno = 0;
-
- int call_count = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+ int ret = -1;
+ int op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -1345,49 +2725,27 @@ afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ ret = afr_local_init(local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
+ local->fd = fd_ref(fd);
+ stub = fop_flush_stub (frame, afr_flush_wrapper, fd, xdata);
+ if (!stub) {
+ ret = -1;
op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
goto out;
}
- transaction_frame->local = local;
-
- local->op = GF_FOP_FLUSH;
-
- local->transaction.fop = afr_flush_wind;
- local->transaction.done = afr_flush_done;
- local->transaction.unwind = afr_flush_unwind;
-
- local->fd = fd_ref (fd);
-
- local->transaction.main_frame = frame;
- local->transaction.start = 0;
- local->transaction.len = 0;
-
- afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ afr_delayed_changelog_wake_resume (this, fd, stub);
+ ret = 0;
-
- op_ret = 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
-
- AFR_STACK_UNWIND (flush, frame, op_ret, op_errno);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND(flush, frame, -1, op_errno, NULL);
return 0;
}
@@ -1403,24 +2761,24 @@ afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd)
int ret = 0;
ret = fd_ctx_get (fd, this, &ctx);
-
if (ret < 0)
goto out;
fd_ctx = (afr_fd_ctx_t *)(long) ctx;
if (fd_ctx) {
- if (fd_ctx->pre_op_done)
- GF_FREE (fd_ctx->pre_op_done);
+ GF_FREE (fd_ctx->pre_op_done);
+
+ GF_FREE (fd_ctx->opened_on);
- if (fd_ctx->opened_on)
- GF_FREE (fd_ctx->opened_on);
+ GF_FREE (fd_ctx->locked_on);
- if (fd_ctx->locked_on)
- GF_FREE (fd_ctx->locked_on);
+ GF_FREE (fd_ctx->pre_op_piggyback);
+ GF_FREE (fd_ctx->lock_piggyback);
- if (fd_ctx->pre_op_piggyback)
- GF_FREE (fd_ctx->pre_op_piggyback);
+ GF_FREE (fd_ctx->lock_acquired);
+
+ pthread_mutex_destroy (&fd_ctx->delay_lock);
GF_FREE (fd_ctx);
}
@@ -1458,20 +2816,29 @@ afr_release (xlator_t *this, fd_t *fd)
/* {{{ fsync */
int
+afr_fsync_unwind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ AFR_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+int
afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
afr_local_t *local = NULL;
-
int call_count = -1;
-
int child_index = (long) cookie;
int read_child = 0;
+ call_stub_t *stub = NULL;
local = frame->local;
- read_child = afr_read_child (this, local->fd->inode);
+ read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
LOCK (&frame->lock);
{
@@ -1483,13 +2850,13 @@ afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->op_ret = 0;
if (local->success_count == 0) {
- local->cont.fsync.prebuf = *prebuf;
- local->cont.fsync.postbuf = *postbuf;
+ local->cont.inode_wfop.prebuf = *prebuf;
+ local->cont.inode_wfop.postbuf = *postbuf;
}
if (child_index == read_child) {
- local->cont.fsync.prebuf = *prebuf;
- local->cont.fsync.postbuf = *postbuf;
+ local->cont.inode_wfop.prebuf = *prebuf;
+ local->cont.inode_wfop.postbuf = *postbuf;
}
local->success_count++;
@@ -1502,12 +2869,32 @@ afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
call_count = afr_frame_return (frame);
if (call_count == 0) {
- local->cont.fsync.prebuf.ia_ino = local->cont.fsync.ino;
- local->cont.fsync.postbuf.ia_ino = local->cont.fsync.ino;
-
- AFR_STACK_UNWIND (fsync, frame, local->op_ret, local->op_errno,
- &local->cont.fsync.prebuf,
- &local->cont.fsync.postbuf);
+ /* Make a stub out of the frame, and register it
+ with the waking up post-op. When the call-stub resumes,
+ we are guaranteed that there was no post-op pending
+ (i.e changelogs were unset in the server). This is an
+ essential "guarantee", that fsync() returns only after
+ completely finishing EVERYTHING, including the delayed
+ post-op. This guarantee is expected by FUSE graph switching
+ for example.
+ */
+ stub = fop_fsync_cbk_stub (frame, afr_fsync_unwind_cbk,
+ local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ xdata);
+ if (!stub) {
+ AFR_STACK_UNWIND (fsync, frame, -1, ENOMEM, 0, 0, 0);
+ return 0;
+ }
+
+ /* If no new unstable writes happened between the
+ time we cleared the unstable write witness flag in afr_fsync
+ and now, calling afr_delayed_changelog_wake_up() should
+ wake up and skip over the fsync phase and go straight to
+ afr_changelog_post_op_now()
+ */
+ afr_delayed_changelog_wake_resume (this, local->fd, stub);
}
return 0;
@@ -1516,16 +2903,13 @@ afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t datasync)
+ int32_t datasync, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
-
int ret = -1;
-
int i = 0;
int32_t call_count = 0;
- int32_t op_ret = -1;
int32_t op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
@@ -1534,19 +2918,20 @@ afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd,
priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
call_count = local->call_count;
- frame->local = local;
local->fd = fd_ref (fd);
- local->cont.fsync.ino = fd->inode->ino;
+
+ if (afr_fd_has_witnessed_unstable_write (this, fd)) {
+ /* don't care. we only wanted to CLEAR the bit */
+ }
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
@@ -1554,17 +2939,16 @@ afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->fsync,
- fd, datasync);
+ fd, datasync, xdata);
if (!--call_count)
break;
}
}
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (fsync, frame, op_ret, op_errno, NULL, NULL);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -1574,10 +2958,10 @@ out:
int32_t
afr_fsyncdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
{
afr_local_t *local = NULL;
-
int call_count = -1;
local = frame->local;
@@ -1595,7 +2979,7 @@ afr_fsyncdir_cbk (call_frame_t *frame, void *cookie,
if (call_count == 0)
AFR_STACK_UNWIND (fsyncdir, frame, local->op_ret,
- local->op_errno);
+ local->op_errno, xdata);
return 0;
}
@@ -1603,16 +2987,13 @@ afr_fsyncdir_cbk (call_frame_t *frame, void *cookie,
int32_t
afr_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t datasync)
+ int32_t datasync, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
-
int ret = -1;
-
int i = 0;
int32_t call_count = 0;
- int32_t op_ret = -1;
int32_t op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
@@ -1621,33 +3002,30 @@ afr_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
call_count = local->call_count;
- frame->local = local;
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND (frame, afr_fsyncdir_cbk,
priv->children[i],
priv->children[i]->fops->fsyncdir,
- fd, datasync);
+ fd, datasync, xdata);
if (!--call_count)
break;
}
}
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (fsyncdir, frame, op_ret, op_errno);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND (fsyncdir, frame, -1, op_errno, NULL);
return 0;
}
@@ -1658,18 +3036,20 @@ out:
int32_t
afr_xattrop_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xattr)
+ dict_t *xattr, dict_t *xdata)
{
afr_local_t *local = NULL;
-
int call_count = -1;
local = frame->local;
LOCK (&frame->lock);
{
- if (op_ret == 0)
+ if (op_ret == 0) {
+ if (!local->cont.xattrop.xattr)
+ local->cont.xattrop.xattr = dict_ref (xattr);
local->op_ret = 0;
+ }
local->op_errno = op_errno;
}
@@ -1679,7 +3059,7 @@ afr_xattrop_cbk (call_frame_t *frame, void *cookie,
if (call_count == 0)
AFR_STACK_UNWIND (xattrop, frame, local->op_ret, local->op_errno,
- xattr);
+ local->cont.xattrop.xattr, xdata);
return 0;
}
@@ -1687,16 +3067,13 @@ afr_xattrop_cbk (call_frame_t *frame, void *cookie,
int32_t
afr_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t optype, dict_t *xattr)
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
-
int ret = -1;
-
int i = 0;
int32_t call_count = 0;
- int32_t op_ret = -1;
int32_t op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
@@ -1705,33 +3082,30 @@ afr_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
call_count = local->call_count;
- frame->local = local;
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND (frame, afr_xattrop_cbk,
priv->children[i],
priv->children[i]->fops->xattrop,
- loc, optype, xattr);
+ loc, optype, xattr, xdata);
if (!--call_count)
break;
}
}
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (xattrop, frame, op_ret, op_errno, NULL);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -1742,7 +3116,7 @@ out:
int32_t
afr_fxattrop_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xattr)
+ dict_t *xattr, dict_t *xdata)
{
afr_local_t *local = NULL;
@@ -1752,8 +3126,12 @@ afr_fxattrop_cbk (call_frame_t *frame, void *cookie,
LOCK (&frame->lock);
{
- if (op_ret == 0)
+ if (op_ret == 0) {
+ if (!local->cont.fxattrop.xattr)
+ local->cont.fxattrop.xattr = dict_ref (xattr);
+
local->op_ret = 0;
+ }
local->op_errno = op_errno;
}
@@ -1763,7 +3141,7 @@ afr_fxattrop_cbk (call_frame_t *frame, void *cookie,
if (call_count == 0)
AFR_STACK_UNWIND (fxattrop, frame, local->op_ret, local->op_errno,
- xattr);
+ local->cont.fxattrop.xattr, xdata);
return 0;
}
@@ -1771,16 +3149,13 @@ afr_fxattrop_cbk (call_frame_t *frame, void *cookie,
int32_t
afr_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_xattrop_flags_t optype, dict_t *xattr)
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
-
int ret = -1;
-
int i = 0;
int32_t call_count = 0;
- int32_t op_ret = -1;
int32_t op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
@@ -1789,33 +3164,30 @@ afr_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
call_count = local->call_count;
- frame->local = local;
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND (frame, afr_fxattrop_cbk,
priv->children[i],
priv->children[i]->fops->fxattrop,
- fd, optype, xattr);
+ fd, optype, xattr, xdata);
if (!--call_count)
break;
}
}
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (fxattrop, frame, op_ret, op_errno, NULL);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -1824,11 +3196,10 @@ out:
int32_t
afr_inodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+ xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
-
int call_count = -1;
local = frame->local;
@@ -1846,7 +3217,7 @@ afr_inodelk_cbk (call_frame_t *frame, void *cookie,
if (call_count == 0)
AFR_STACK_UNWIND (inodelk, frame, local->op_ret,
- local->op_errno);
+ local->op_errno, xdata);
return 0;
}
@@ -1854,16 +3225,14 @@ afr_inodelk_cbk (call_frame_t *frame, void *cookie,
int32_t
afr_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *flock)
+ const char *volume, loc_t *loc, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
-
int ret = -1;
-
int i = 0;
int32_t call_count = 0;
- int32_t op_ret = -1;
int32_t op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
@@ -1872,45 +3241,42 @@ afr_inodelk (call_frame_t *frame, xlator_t *this,
priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
call_count = local->call_count;
- frame->local = local;
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND (frame, afr_inodelk_cbk,
priv->children[i],
priv->children[i]->fops->inodelk,
- volume, loc, cmd, flock);
+ volume, loc, cmd, flock, xdata);
if (!--call_count)
break;
}
}
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (inodelk, frame, op_ret, op_errno);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND (inodelk, frame, -1, op_errno, NULL);
return 0;
}
int32_t
afr_finodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
{
afr_local_t *local = NULL;
-
int call_count = -1;
local = frame->local;
@@ -1928,7 +3294,7 @@ afr_finodelk_cbk (call_frame_t *frame, void *cookie,
if (call_count == 0)
AFR_STACK_UNWIND (finodelk, frame, local->op_ret,
- local->op_errno);
+ local->op_errno, xdata);
return 0;
}
@@ -1936,16 +3302,14 @@ afr_finodelk_cbk (call_frame_t *frame, void *cookie,
int32_t
afr_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *flock)
+ const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *flock,
+ dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
-
int ret = -1;
-
int i = 0;
int32_t call_count = 0;
- int32_t op_ret = -1;
int32_t op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
@@ -1954,45 +3318,40 @@ afr_finodelk (call_frame_t *frame, xlator_t *this,
priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
call_count = local->call_count;
- frame->local = local;
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND (frame, afr_finodelk_cbk,
priv->children[i],
priv->children[i]->fops->finodelk,
- volume, fd, cmd, flock);
+ volume, fd, cmd, flock, xdata);
if (!--call_count)
break;
}
}
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (finodelk, frame, op_ret, op_errno);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND (finodelk, frame, -1, op_errno, NULL);
return 0;
}
int32_t
-afr_entrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
+afr_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
-
int call_count = -1;
local = frame->local;
@@ -2010,7 +3369,7 @@ afr_entrylk_cbk (call_frame_t *frame, void *cookie,
if (call_count == 0)
AFR_STACK_UNWIND (entrylk, frame, local->op_ret,
- local->op_errno);
+ local->op_errno, xdata);
return 0;
}
@@ -2019,16 +3378,14 @@ afr_entrylk_cbk (call_frame_t *frame, void *cookie,
int32_t
afr_entrylk (call_frame_t *frame, xlator_t *this,
const char *volume, loc_t *loc,
- const char *basename, entrylk_cmd cmd, entrylk_type type)
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
-
int ret = -1;
-
int i = 0;
int32_t call_count = 0;
- int32_t op_ret = -1;
int32_t op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
@@ -2037,34 +3394,31 @@ afr_entrylk (call_frame_t *frame, xlator_t *this,
priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
call_count = local->call_count;
- frame->local = local;
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND (frame, afr_entrylk_cbk,
priv->children[i],
priv->children[i]->fops->entrylk,
- volume, loc, basename, cmd, type);
+ volume, loc, basename, cmd, type, xdata);
if (!--call_count)
break;
}
}
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (entrylk, frame, op_ret, op_errno);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND (entrylk, frame, -1, op_errno, NULL);
return 0;
}
@@ -2072,11 +3426,10 @@ out:
int32_t
afr_fentrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+ xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
-
int call_count = -1;
local = frame->local;
@@ -2094,7 +3447,7 @@ afr_fentrylk_cbk (call_frame_t *frame, void *cookie,
if (call_count == 0)
AFR_STACK_UNWIND (fentrylk, frame, local->op_ret,
- local->op_errno);
+ local->op_errno, xdata);
return 0;
}
@@ -2103,16 +3456,14 @@ afr_fentrylk_cbk (call_frame_t *frame, void *cookie,
int32_t
afr_fentrylk (call_frame_t *frame, xlator_t *this,
const char *volume, fd_t *fd,
- const char *basename, entrylk_cmd cmd, entrylk_type type)
+ const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
-
int ret = -1;
-
int i = 0;
int32_t call_count = 0;
- int32_t op_ret = -1;
int32_t op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
@@ -2121,44 +3472,40 @@ afr_fentrylk (call_frame_t *frame, xlator_t *this,
priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
call_count = local->call_count;
- frame->local = local;
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND (frame, afr_fentrylk_cbk,
priv->children[i],
priv->children[i]->fops->fentrylk,
- volume, fd, basename, cmd, type);
+ volume, fd, basename, cmd, type, xdata);
if (!--call_count)
break;
}
}
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (fentrylk, frame, op_ret, op_errno);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND (fentrylk, frame, -1, op_errno, NULL);
return 0;
}
int32_t
afr_statfs_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct statvfs *statvfs)
+ struct statvfs *statvfs, dict_t *xdata)
{
afr_local_t *local = NULL;
-
int call_count = 0;
LOCK (&frame->lock);
@@ -2187,7 +3534,7 @@ afr_statfs_cbk (call_frame_t *frame, void *cookie,
if (call_count == 0)
AFR_STACK_UNWIND (statfs, frame, local->op_ret, local->op_errno,
- &local->cont.statfs.buf);
+ &local->cont.statfs.buf, xdata);
return 0;
}
@@ -2195,16 +3542,14 @@ afr_statfs_cbk (call_frame_t *frame, void *cookie,
int32_t
afr_statfs (call_frame_t *frame, xlator_t *this,
- loc_t *loc)
+ loc_t *loc, dict_t *xdata)
{
afr_private_t * priv = NULL;
int child_count = 0;
afr_local_t * local = NULL;
int i = 0;
-
- int ret = -1;
+ int ret = -1;
int call_count = 0;
- int32_t op_ret = -1;
int32_t op_errno = 0;
VALIDATE_OR_GOTO (this, out);
@@ -2214,15 +3559,13 @@ afr_statfs (call_frame_t *frame, xlator_t *this,
priv = this->private;
child_count = priv->child_count;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
- frame->local = local;
call_count = local->call_count;
for (i = 0; i < child_count; i++) {
@@ -2230,27 +3573,26 @@ afr_statfs (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, afr_statfs_cbk,
priv->children[i],
priv->children[i]->fops->statfs,
- loc);
+ loc, xdata);
if (!--call_count)
break;
}
}
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (statfs, frame, op_ret, op_errno, NULL);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND (statfs, frame, -1, op_errno, NULL, NULL);
return 0;
}
int32_t
afr_lk_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock)
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
{
afr_local_t * local = NULL;
-
int call_count = -1;
local = frame->local;
@@ -2258,7 +3600,7 @@ afr_lk_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (call_count == 0)
AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
- lock);
+ lock, xdata);
return 0;
}
@@ -2269,8 +3611,7 @@ afr_lk_unlock (call_frame_t *frame, xlator_t *this)
{
afr_local_t * local = NULL;
afr_private_t * priv = NULL;
-
- int i;
+ int i = 0;
int call_count = 0;
local = frame->local;
@@ -2281,7 +3622,7 @@ afr_lk_unlock (call_frame_t *frame, xlator_t *this)
if (call_count == 0) {
AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
- &local->cont.lk.ret_flock);
+ &local->cont.lk.ret_flock, NULL);
return 0;
}
@@ -2295,7 +3636,7 @@ afr_lk_unlock (call_frame_t *frame, xlator_t *this)
priv->children[i],
priv->children[i]->fops->lk,
local->fd, F_SETLK,
- &local->cont.lk.user_flock);
+ &local->cont.lk.user_flock, NULL);
if (!--call_count)
break;
@@ -2308,13 +3649,13 @@ afr_lk_unlock (call_frame_t *frame, xlator_t *this)
int32_t
afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock)
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
+ int child_index = -1;
/* int ret = 0; */
- int child_index = -1;
local = frame->local;
priv = this->private;
@@ -2343,12 +3684,12 @@ afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
priv->children[child_index],
priv->children[child_index]->fops->lk,
local->fd, local->cont.lk.cmd,
- &local->cont.lk.user_flock);
+ &local->cont.lk.user_flock, xdata);
} else if (local->op_ret == -1) {
/* all nodes have gone down */
AFR_STACK_UNWIND (lk, frame, -1, ENOTCONN,
- &local->cont.lk.ret_flock);
+ &local->cont.lk.ret_flock, NULL);
} else {
/* locking has succeeded on all nodes that are up */
@@ -2366,7 +3707,7 @@ afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
*/
AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
- &local->cont.lk.ret_flock);
+ &local->cont.lk.ret_flock, NULL);
}
return 0;
@@ -2375,16 +3716,13 @@ afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
afr_lk (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t cmd,
- struct gf_flock *flock)
+ fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
-
int i = 0;
-
- int32_t op_ret = -1;
int32_t op_errno = 0;
+ int ret = -1;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -2392,17 +3730,18 @@ afr_lk (call_frame_t *frame, xlator_t *this,
priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
- AFR_LOCAL_INIT (local, priv);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- frame->local = local;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
local->cont.lk.locked_nodes = GF_CALLOC (priv->child_count,
sizeof (*local->cont.lk.locked_nodes),
gf_afr_mt_char);
if (!local->cont.lk.locked_nodes) {
- gf_log (this->name, GF_LOG_ERROR, "Out of memory");
op_errno = ENOMEM;
goto out;
}
@@ -2415,13 +3754,30 @@ afr_lk (call_frame_t *frame, xlator_t *this,
STACK_WIND_COOKIE (frame, afr_lk_cbk, (void *) (long) 0,
priv->children[i],
priv->children[i]->fops->lk,
- fd, cmd, flock);
+ fd, cmd, flock, xdata);
- op_ret = 0;
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND (lk, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+int
+afr_forget (xlator_t *this, inode_t *inode)
+{
+ uint64_t ctx_addr = 0;
+ afr_inode_ctx_t *ctx = NULL;
+
+ inode_ctx_get (inode, this, &ctx_addr);
+
+ if (!ctx_addr)
+ goto out;
+
+ ctx = (afr_inode_ctx_t *)(long)ctx_addr;
+ GF_FREE (ctx->fresh_children);
+ GF_FREE (ctx);
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (lk, frame, op_ret, op_errno, NULL);
- }
return 0;
}
@@ -2440,41 +3796,23 @@ afr_priv_dump (xlator_t *this)
GF_ASSERT (priv);
snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name);
gf_proc_dump_add_section(key_prefix);
- gf_proc_dump_build_key(key, key_prefix, "child_count");
- gf_proc_dump_write(key, "%u", priv->child_count);
- gf_proc_dump_build_key(key, key_prefix, "read_child_rr");
- gf_proc_dump_write(key, "%u", priv->read_child_rr);
+ gf_proc_dump_write("child_count", "%u", priv->child_count);
+ gf_proc_dump_write("read_child_rr", "%u", priv->read_child_rr);
for (i = 0; i < priv->child_count; i++) {
- gf_proc_dump_build_key(key, key_prefix, "child_up[%d]", i);
+ sprintf (key, "child_up[%d]", i);
gf_proc_dump_write(key, "%d", priv->child_up[i]);
- gf_proc_dump_build_key(key, key_prefix,
- "pending_key[%d]", i);
+ sprintf (key, "pending_key[%d]", i);
gf_proc_dump_write(key, "%s", priv->pending_key[i]);
}
- gf_proc_dump_build_key(key, key_prefix, "data_self_heal");
- gf_proc_dump_write(key, "%d", priv->data_self_heal);
- gf_proc_dump_build_key(key, key_prefix, "metadata_self_heal");
- gf_proc_dump_write(key, "%d", priv->metadata_self_heal);
- gf_proc_dump_build_key(key, key_prefix, "entry_self_heal");
- gf_proc_dump_write(key, "%d", priv->entry_self_heal);
- gf_proc_dump_build_key(key, key_prefix, "data_change_log");
- gf_proc_dump_write(key, "%d", priv->data_change_log);
- gf_proc_dump_build_key(key, key_prefix, "metadata_change_log");
- gf_proc_dump_write(key, "%d", priv->metadata_change_log);
- gf_proc_dump_build_key(key, key_prefix, "entry_change_log");
- gf_proc_dump_write(key, "%d", priv->entry_change_log);
- gf_proc_dump_build_key(key, key_prefix, "read_child");
- gf_proc_dump_write(key, "%d", priv->read_child);
- gf_proc_dump_build_key(key, key_prefix, "favorite_child");
- gf_proc_dump_write(key, "%u", priv->favorite_child);
- gf_proc_dump_build_key(key, key_prefix, "data_lock_server_count");
- gf_proc_dump_write(key, "%u", priv->data_lock_server_count);
- gf_proc_dump_build_key(key, key_prefix, "metadata_lock_server_count");
- gf_proc_dump_write(key, "%u", priv->metadata_lock_server_count);
- gf_proc_dump_build_key(key, key_prefix, "entry_lock_server_count");
- gf_proc_dump_write(key, "%u", priv->entry_lock_server_count);
- gf_proc_dump_build_key(key, key_prefix, "wait_count");
- gf_proc_dump_write(key, "%u", priv->wait_count);
+ gf_proc_dump_write("data_self_heal", "%s", priv->data_self_heal);
+ gf_proc_dump_write("metadata_self_heal", "%d", priv->metadata_self_heal);
+ gf_proc_dump_write("entry_self_heal", "%d", priv->entry_self_heal);
+ gf_proc_dump_write("data_change_log", "%d", priv->data_change_log);
+ gf_proc_dump_write("metadata_change_log", "%d", priv->metadata_change_log);
+ gf_proc_dump_write("entry-change_log", "%d", priv->entry_change_log);
+ gf_proc_dump_write("read_child", "%d", priv->read_child);
+ gf_proc_dump_write("favorite_child", "%d", priv->favorite_child);
+ gf_proc_dump_write("wait_count", "%u", priv->wait_count);
return 0;
}
@@ -2490,7 +3828,6 @@ static int
find_child_index (xlator_t *this, xlator_t *child)
{
afr_private_t *priv = NULL;
-
int i = -1;
priv = this->private;
@@ -2505,95 +3842,750 @@ find_child_index (xlator_t *this, xlator_t *child)
int32_t
afr_notify (xlator_t *this, int32_t event,
- void *data, ...)
+ void *data, void *data2)
{
- afr_private_t * priv = NULL;
- unsigned char * child_up = NULL;
-
- int i = -1;
- int up_children = 0;
- int down_children = 0;
+ afr_private_t *priv = NULL;
+ int i = -1;
+ int up_children = 0;
+ int down_children = 0;
+ int propagate = 0;
+ int had_heard_from_all = 0;
+ int have_heard_from_all = 0;
+ int idx = -1;
+ int ret = -1;
+ int call_psh = 0;
+ int up_child = AFR_ALL_CHILDREN;
+ dict_t *input = NULL;
+ dict_t *output = NULL;
priv = this->private;
if (!priv)
return 0;
- child_up = priv->child_up;
+ /*
+ * We need to reset this in case children come up in "staggered"
+ * fashion, so that we discover a late-arriving local subvolume. Note
+ * that we could end up issuing N lookups to the first subvolume, and
+ * O(N^2) overall, but N is small for AFR so it shouldn't be an issue.
+ */
+ priv->did_discovery = _gf_false;
+
+ had_heard_from_all = 1;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!priv->last_event[i]) {
+ had_heard_from_all = 0;
+ }
+ }
+
+ /* parent xlators dont need to know about every child_up, child_down
+ * because of afr ha. If all subvolumes go down, child_down has
+ * to be triggered. In that state when 1 subvolume comes up child_up
+ * needs to be triggered. dht optimizes revalidate lookup by sending
+ * it only to one of its subvolumes. When child up/down happens
+ * for afr's subvolumes dht should be notified by child_modified. The
+ * subsequent revalidate lookup happens on all the dht's subvolumes
+ * which triggers afr self-heals if any.
+ */
+ idx = find_child_index (this, data);
+ if (idx < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Received child_up "
+ "from invalid subvolume");
+ goto out;
+ }
switch (event) {
case GF_EVENT_CHILD_UP:
- i = find_child_index (this, data);
+ LOCK (&priv->lock);
+ {
+ /*
+ * This only really counts if the child was never up
+ * (value = -1) or had been down (value = 0). See
+ * comment at GF_EVENT_CHILD_DOWN for a more detailed
+ * explanation.
+ */
+ if (priv->child_up[idx] != 1) {
+ priv->up_count++;
+ }
+ priv->child_up[idx] = 1;
+
+ call_psh = 1;
+ up_child = idx;
+ for (i = 0; i < priv->child_count; i++)
+ if (priv->child_up[i] == 1)
+ up_children++;
+ if (up_children == 1) {
+ gf_log (this->name, GF_LOG_INFO,
+ "Subvolume '%s' came back up; "
+ "going online.", ((xlator_t *)data)->name);
+ } else {
+ event = GF_EVENT_CHILD_MODIFIED;
+ }
- /* temporarily
- afr_attempt_lock_recovery (this, i);
- */
+ priv->last_event[idx] = event;
+ }
+ UNLOCK (&priv->lock);
- child_up[i] = 1;
+ break;
+ case GF_EVENT_CHILD_DOWN:
LOCK (&priv->lock);
{
- priv->up_count++;
+ /*
+ * If a brick is down when we start, we'll get a
+ * CHILD_DOWN to indicate its initial state. There
+ * was never a CHILD_UP in this case, so if we
+ * increment "down_count" the difference between than
+ * and "up_count" will no longer be the number of
+ * children that are currently up. This has serious
+ * implications e.g. for quorum enforcement, so we
+ * don't increment these values unless the event
+ * represents an actual state transition between "up"
+ * (value = 1) and anything else.
+ */
+ if (priv->child_up[idx] == 1) {
+ priv->down_count++;
+ }
+ priv->child_up[idx] = 0;
+
+ for (i = 0; i < priv->child_count; i++)
+ if (priv->child_up[i] == 0)
+ down_children++;
+ if (down_children == priv->child_count) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "All subvolumes are down. Going offline "
+ "until atleast one of them comes back up.");
+ } else {
+ event = GF_EVENT_CHILD_MODIFIED;
+ }
+
+ priv->last_event[idx] = event;
}
UNLOCK (&priv->lock);
- /*
- if all the children were down, and one child came up,
- send notify to parent
- */
+ break;
- for (i = 0; i < priv->child_count; i++)
- if (child_up[i] == 1)
- up_children++;
+ case GF_EVENT_CHILD_CONNECTING:
+ LOCK (&priv->lock);
+ {
+ priv->last_event[idx] = event;
+ }
+ UNLOCK (&priv->lock);
- if (up_children == 1) {
- gf_log (this->name, GF_LOG_NORMAL,
- "Subvolume '%s' came back up; "
- "going online.", ((xlator_t *)data)->name);
+ break;
- default_notify (this, event, data);
- } else {
- default_notify (this, GF_EVENT_CHILD_MODIFIED, data);
- }
+ case GF_EVENT_TRANSLATOR_OP:
+ input = data;
+ output = data2;
+ ret = afr_xl_op (this, input, output);
+ goto out;
+ break;
+ default:
+ propagate = 1;
break;
+ }
- case GF_EVENT_CHILD_DOWN:
- i = find_child_index (this, data);
+ /* have all subvolumes reported status once by now? */
+ have_heard_from_all = 1;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!priv->last_event[i])
+ have_heard_from_all = 0;
+ }
- child_up[i] = 0;
+ /* if all subvols have reported status, no need to hide anything
+ or wait for anything else. Just propagate blindly */
+ if (have_heard_from_all)
+ propagate = 1;
+
+ if (!had_heard_from_all && have_heard_from_all) {
+ /* This is the first event which completes aggregation
+ of events from all subvolumes. If at least one subvol
+ had come up, propagate CHILD_UP, but only this time
+ */
+ event = GF_EVENT_CHILD_DOWN;
LOCK (&priv->lock);
{
- priv->down_count++;
+ up_children = afr_up_children_count (priv->child_up,
+ priv->child_count);
+ for (i = 0; i < priv->child_count; i++) {
+ if (priv->last_event[i] == GF_EVENT_CHILD_UP) {
+ event = GF_EVENT_CHILD_UP;
+ break;
+ }
+
+ if (priv->last_event[i] ==
+ GF_EVENT_CHILD_CONNECTING) {
+ event = GF_EVENT_CHILD_CONNECTING;
+ /* continue to check other events for CHILD_UP */
+ }
+ }
}
UNLOCK (&priv->lock);
+ }
- /*
- if all children are down, and this was the last to go down,
- send notify to parent
- */
+ ret = 0;
+ if (propagate)
+ ret = default_notify (this, event, data);
+ if (call_psh && priv->shd.iamshd)
+ afr_proactive_self_heal ((void*) (long) up_child);
+
+out:
+ return ret;
+}
+
+int
+afr_first_up_child (unsigned char *child_up, size_t child_count)
+{
+ int ret = -1;
+ int i = 0;
+
+ GF_ASSERT (child_up);
+
+ for (i = 0; i < child_count; i++) {
+ if (child_up[i]) {
+ ret = i;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+int
+afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno)
+{
+ int ret = -1;
+
+ local->op_ret = -1;
+ local->op_errno = EUCLEAN;
+
+ local->child_up = GF_CALLOC (priv->child_count,
+ sizeof (*local->child_up),
+ gf_afr_mt_char);
+ if (!local->child_up) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ memcpy (local->child_up, priv->child_up,
+ sizeof (*local->child_up) * priv->child_count);
+ local->call_count = afr_up_children_count (local->child_up,
+ priv->child_count);
+ if (local->call_count == 0) {
+ gf_log (THIS->name, GF_LOG_INFO, "no subvolumes up");
+ if (op_errno)
+ *op_errno = ENOTCONN;
+ goto out;
+ }
+
+ local->child_errno = GF_CALLOC (priv->child_count,
+ sizeof (*local->child_errno),
+ gf_afr_mt_int32_t);
+ if (!local->child_errno) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+ local->transaction.postop_piggybacked = GF_CALLOC (priv->child_count,
+ sizeof (int),
+ gf_afr_mt_int32_t);
+ if (!local->transaction.postop_piggybacked) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->append_write = _gf_false;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+afr_internal_lock_init (afr_internal_lock_t *lk, size_t child_count,
+ transaction_lk_type_t lk_type)
+{
+ int ret = -ENOMEM;
+
+ lk->locked_nodes = GF_CALLOC (sizeof (*lk->locked_nodes),
+ child_count, gf_afr_mt_char);
+ if (NULL == lk->locked_nodes)
+ goto out;
+
+ lk->lower_locked_nodes = GF_CALLOC (sizeof (*lk->lower_locked_nodes),
+ child_count, gf_afr_mt_char);
+ if (NULL == lk->lower_locked_nodes)
+ goto out;
+
+ lk->lock_op_ret = -1;
+ lk->lock_op_errno = EUCLEAN;
+ lk->transaction_lk_type = lk_type;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+void
+afr_matrix_cleanup (int32_t **matrix, unsigned int m)
+{
+ int i = 0;
+
+ if (!matrix)
+ goto out;
+ for (i = 0; i < m; i++) {
+ GF_FREE (matrix[i]);
+ }
+
+ GF_FREE (matrix);
+out:
+ return;
+}
+
+int32_t**
+afr_matrix_create (unsigned int m, unsigned int n)
+{
+ int32_t **matrix = NULL;
+ int i = 0;
+
+ matrix = GF_CALLOC (sizeof (*matrix), m, gf_afr_mt_int32_t);
+ if (!matrix)
+ goto out;
+
+ for (i = 0; i < m; i++) {
+ matrix[i] = GF_CALLOC (sizeof (*matrix[i]), n,
+ gf_afr_mt_int32_t);
+ if (!matrix[i])
+ goto out;
+ }
+ return matrix;
+out:
+ afr_matrix_cleanup (matrix, m);
+ return NULL;
+}
+
+int
+afr_inodelk_init (afr_inodelk_t *lk, char *dom, size_t child_count)
+{
+ int ret = -ENOMEM;
+
+ lk->domain = dom;
+ lk->locked_nodes = GF_CALLOC (sizeof (*lk->locked_nodes),
+ child_count, gf_afr_mt_char);
+ if (NULL == lk->locked_nodes)
+ goto out;
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+afr_transaction_local_init (afr_local_t *local, xlator_t *this)
+{
+ int child_up_count = 0;
+ int ret = -ENOMEM;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ ret = afr_internal_lock_init (&local->internal_lock, priv->child_count,
+ AFR_TRANSACTION_LK);
+ if (ret < 0)
+ goto out;
+
+ if ((local->transaction.type == AFR_DATA_TRANSACTION) ||
+ (local->transaction.type == AFR_METADATA_TRANSACTION)) {
+ ret = afr_inodelk_init (&local->internal_lock.inodelk[0],
+ this->name, priv->child_count);
+ if (ret < 0)
+ goto out;
+ }
+
+ ret = -ENOMEM;
+ child_up_count = afr_up_children_count (local->child_up,
+ priv->child_count);
+ if (priv->optimistic_change_log && child_up_count == priv->child_count)
+ local->optimistic_change_log = 1;
+
+ local->first_up_child = afr_first_up_child (local->child_up,
+ priv->child_count);
+
+ local->transaction.eager_lock =
+ GF_CALLOC (sizeof (*local->transaction.eager_lock),
+ priv->child_count,
+ gf_afr_mt_int32_t);
+
+ if (!local->transaction.eager_lock)
+ goto out;
+
+ local->fresh_children = afr_children_create (priv->child_count);
+ if (!local->fresh_children)
+ goto out;
+
+ local->transaction.pre_op = GF_CALLOC (sizeof (*local->transaction.pre_op),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!local->transaction.pre_op)
+ goto out;
+
+ local->pending = afr_matrix_create (priv->child_count,
+ AFR_NUM_CHANGE_LOGS);
+ if (!local->pending)
+ goto out;
+
+ local->transaction.txn_changelog = afr_matrix_create (priv->child_count,
+ AFR_NUM_CHANGE_LOGS);
+ if (!local->transaction.txn_changelog)
+ goto out;
+
+ INIT_LIST_HEAD (&local->transaction.eager_locked);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+void
+afr_reset_children (int32_t *fresh_children, int32_t child_count)
+{
+ unsigned int i = 0;
+ for (i = 0; i < child_count; i++)
+ fresh_children[i] = -1;
+}
+
+int32_t*
+afr_children_create (int32_t child_count)
+{
+ int32_t *children = NULL;
+ int i = 0;
+
+ GF_ASSERT (child_count > 0);
+
+ children = GF_CALLOC (child_count, sizeof (*children),
+ gf_afr_mt_int32_t);
+ if (NULL == children)
+ goto out;
+ for (i = 0; i < child_count; i++)
+ children[i] = -1;
+out:
+ return children;
+}
+
+void
+afr_children_add_child (int32_t *children, int32_t child,
+ int32_t child_count)
+{
+ gf_boolean_t child_found = _gf_false;
+ int i = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (children[i] == -1)
+ break;
+ if (children[i] == child) {
+ child_found = _gf_true;
+ break;
+ }
+ }
+
+ if (!child_found) {
+ GF_ASSERT (i < child_count);
+ children[i] = child;
+ }
+}
+
+void
+afr_children_rm_child (int32_t *children, int32_t child, int32_t child_count)
+{
+ int i = 0;
+
+ GF_ASSERT ((child >= 0) && (child < child_count));
+ for (i = 0; i < child_count; i++) {
+ if (children[i] == -1)
+ break;
+ if (children[i] == child) {
+ if (i != (child_count - 1))
+ memmove (children + i, children + i + 1,
+ sizeof (*children)*(child_count - i - 1));
+ children[child_count - 1] = -1;
+ break;
+ }
+ }
+}
+
+int
+afr_get_children_count (int32_t *children, unsigned int child_count)
+{
+ int count = 0;
+ int i = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (children[i] == -1)
+ break;
+ count++;
+ }
+ return count;
+}
+
+void
+afr_set_low_priority (call_frame_t *frame)
+{
+ frame->root->pid = LOW_PRIO_PROC_PID;
+}
+
+int
+afr_child_fd_ctx_set (xlator_t *this, fd_t *fd, int32_t child,
+ int flags)
+{
+ int ret = 0;
+ uint64_t ctx = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ GF_ASSERT (fd && fd->inode);
+ ret = afr_fd_ctx_set (this, fd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not set fd ctx for fd=%p", fd);
+ goto out;
+ }
+
+ ret = fd_ctx_get (fd, this, &ctx);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not get fd ctx for fd=%p", fd);
+ goto out;
+ }
+
+ fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ fd_ctx->opened_on[child] = AFR_FD_OPENED;
+ if (!IA_ISDIR (fd->inode->ia_type)) {
+ fd_ctx->flags = flags;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+gf_boolean_t
+afr_have_quorum (char *logname, afr_private_t *priv)
+{
+ unsigned int quorum = 0;
+
+ GF_VALIDATE_OR_GOTO(logname,priv,out);
+
+ quorum = priv->quorum_count;
+ if (quorum != AFR_QUORUM_AUTO) {
+ return (priv->up_count >= (priv->down_count + quorum));
+ }
+
+ quorum = priv->child_count / 2 + 1;
+ if (priv->up_count >= (priv->down_count + quorum)) {
+ return _gf_true;
+ }
+
+ /*
+ * Special case for even numbers of nodes: if we have exactly half
+ * and that includes the first ("senior-most") node, then that counts
+ * as quorum even if it wouldn't otherwise. This supports e.g. N=2
+ * while preserving the critical property that there can only be one
+ * such group.
+ */
+ if ((priv->child_count % 2) == 0) {
+ quorum = priv->child_count / 2;
+ if (priv->up_count >= (priv->down_count + quorum)) {
+ if (priv->child_up[0]) {
+ return _gf_true;
+ }
+ }
+ }
+
+out:
+ return _gf_false;
+}
+
+void
+afr_priv_destroy (afr_private_t *priv)
+{
+ int i = 0;
+
+ if (!priv)
+ goto out;
+ inode_unref (priv->root_inode);
+ GF_FREE (priv->shd.pos);
+ GF_FREE (priv->shd.pending);
+ GF_FREE (priv->shd.inprogress);
+// for (i = 0; i < priv->child_count; i++)
+// if (priv->shd.timer && priv->shd.timer[i])
+// gf_timer_call_cancel (this->ctx, priv->shd.timer[i]);
+ GF_FREE (priv->shd.timer);
+
+ if (priv->shd.healed)
+ eh_destroy (priv->shd.healed);
+
+ if (priv->shd.heal_failed)
+ eh_destroy (priv->shd.heal_failed);
+
+ if (priv->shd.split_brain)
+ eh_destroy (priv->shd.split_brain);
+
+ for (i = 0; i < priv->child_count; i++)
+ {
+ if (priv->shd.statistics[i])
+ eh_destroy (priv->shd.statistics[i]);
+ }
+
+ GF_FREE (priv->shd.statistics);
+
+ GF_FREE (priv->shd.crawl_events);
+
+ GF_FREE (priv->last_event);
+ if (priv->pending_key) {
for (i = 0; i < priv->child_count; i++)
- if (child_up[i] == 0)
- down_children++;
+ GF_FREE (priv->pending_key[i]);
+ }
+ GF_FREE (priv->pending_key);
+ GF_FREE (priv->children);
+ GF_FREE (priv->child_up);
+ LOCK_DESTROY (&priv->lock);
+ LOCK_DESTROY (&priv->read_child_lock);
+ pthread_mutex_destroy (&priv->mutex);
+ GF_FREE (priv);
+out:
+ return;
+}
- if (down_children == priv->child_count) {
- gf_log (this->name, GF_LOG_ERROR,
- "All subvolumes are down. Going offline "
- "until atleast one of them comes back up.");
+int
+xlator_subvolume_count (xlator_t *this)
+{
+ int i = 0;
+ xlator_list_t *list = NULL;
- default_notify (this, event, data);
- } else {
- default_notify (this, GF_EVENT_CHILD_MODIFIED, data);
+ for (list = this->children; list; list = list->next)
+ i++;
+ return i;
+}
+
+inline gf_boolean_t
+afr_is_errno_set (int *child_errno, int child)
+{
+ return child_errno[child];
+}
+
+inline gf_boolean_t
+afr_is_errno_unset (int *child_errno, int child)
+{
+ return !afr_is_errno_set (child_errno, child);
+}
+
+void
+afr_prepare_new_entry_pending_matrix (int32_t **pending,
+ gf_boolean_t (*is_pending) (int *, int),
+ int *ctx, struct iatt *buf,
+ unsigned int child_count)
+{
+ int midx = 0;
+ int idx = 0;
+ int i = 0;
+
+ midx = afr_index_for_transaction_type (AFR_METADATA_TRANSACTION);
+ if (IA_ISDIR (buf->ia_type))
+ idx = afr_index_for_transaction_type (AFR_ENTRY_TRANSACTION);
+ else if (IA_ISREG (buf->ia_type))
+ idx = afr_index_for_transaction_type (AFR_DATA_TRANSACTION);
+ else
+ idx = -1;
+ for (i = 0; i < child_count; i++) {
+ if (is_pending (ctx, i)) {
+ pending[i][midx] = hton32 (1);
+ if (idx == -1)
+ continue;
+ pending[i][idx] = hton32 (1);
}
+ }
+}
- break;
+gf_boolean_t
+afr_is_fd_fixable (fd_t *fd)
+{
+ if (!fd || !fd->inode)
+ return _gf_false;
+ else if (fd_is_anonymous (fd))
+ return _gf_false;
+ else if (uuid_is_null (fd->inode->gfid))
+ return _gf_false;
+
+ return _gf_true;
+}
- default:
- default_notify (this, event, data);
+void
+afr_handle_open_fd_count (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ inode_t *inode = NULL;
+ afr_inode_ctx_t *ctx = NULL;
+
+ local = frame->local;
+
+ if (local->fd)
+ inode = local->fd->inode;
+ else
+ inode = local->loc.inode;
+
+ if (!inode)
+ return;
+
+ LOCK (&inode->lock);
+ {
+ ctx = __afr_inode_ctx_get (inode, this);
+ ctx->open_fd_count = local->open_fd_count;
}
+ UNLOCK (&inode->lock);
+}
- return 0;
+int
+afr_initialise_statistics (xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int ret = -1;
+ int i = 0;
+ int child_count = 0;
+ eh_t *stats_per_brick = NULL;
+ shd_crawl_event_t ***shd_crawl_events = NULL;
+ priv = this->private;
+
+ priv->shd.statistics = GF_CALLOC (sizeof(eh_t *), priv->child_count,
+ gf_common_mt_eh_t);
+ if (!priv->shd.statistics) {
+ ret = -1;
+ goto out;
+ }
+ child_count = priv->child_count;
+ for (i=0; i < child_count ; i++) {
+ stats_per_brick = eh_new (AFR_STATISTICS_HISTORY_SIZE,
+ _gf_false,
+ _destroy_crawl_event_data);
+ if (!stats_per_brick) {
+ ret = -1;
+ goto out;
+ }
+ priv->shd.statistics[i] = stats_per_brick;
+
+ }
+
+ shd_crawl_events = (shd_crawl_event_t***)(&priv->shd.crawl_events);
+ *shd_crawl_events = GF_CALLOC (sizeof(shd_crawl_event_t*),
+ priv->child_count,
+ gf_afr_mt_shd_crawl_event_t);
+
+ if (!priv->shd.crawl_events) {
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
}
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
index 59bd7872e..689dd84e6 100644
--- a/xlators/cluster/afr/src/afr-dir-read.c
+++ b/xlators/cluster/afr/src/afr-dir-read.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -49,9 +40,9 @@
#include "afr-self-heal.h"
#include "afr-self-heal-common.h"
-
int
-afr_examine_dir_sh_unwind (call_frame_t *frame, xlator_t *this)
+afr_examine_dir_sh_unwind (call_frame_t *frame, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, int32_t sh_failed)
{
afr_local_t *local = NULL;
@@ -60,7 +51,7 @@ afr_examine_dir_sh_unwind (call_frame_t *frame, xlator_t *this)
afr_set_opendir_done (this, local->fd->inode);
AFR_STACK_UNWIND (opendir, frame, local->op_ret,
- local->op_errno, local->fd);
+ local->op_errno, local->fd, NULL);
return 0;
}
@@ -70,16 +61,19 @@ gf_boolean_t
__checksums_differ (uint32_t *checksum, int child_count,
unsigned char *child_up)
{
- int ret = _gf_false;
- int i = 0;
-
- uint32_t cksum;
-
- cksum = checksum[0];
+ int ret = _gf_false;
+ int i = 0;
+ uint32_t cksum = 0;
+ gf_boolean_t activate_check = _gf_false;
for (i = 0; i < child_count; i++) {
if (!child_up[i])
continue;
+ if (_gf_false == activate_check) {
+ cksum = checksum[i];
+ activate_check = _gf_true;
+ continue;
+ }
if (cksum != checksum[i]) {
ret = _gf_true;
@@ -96,40 +90,45 @@ __checksums_differ (uint32_t *checksum, int child_count,
int32_t
afr_examine_dir_readdir_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries)
+ gf_dirent_t *entries, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
-
- gf_dirent_t * entry = NULL;
- gf_dirent_t * tmp = NULL;
-
- int child_index = 0;
-
- uint32_t entry_cksum;
-
- int call_count = 0;
- off_t last_offset = 0;
- char sh_type_str[256] = {0,};
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ afr_self_heal_t * sh = NULL;
+ gf_dirent_t * entry = NULL;
+ gf_dirent_t * tmp = NULL;
+ char *reason = NULL;
+ int child_index = 0;
+ uint32_t entry_cksum = 0;
+ int call_count = 0;
+ off_t last_offset = 0;
+ inode_t *inode = NULL;
priv = this->private;
local = frame->local;
sh = &local->self_heal;
+ inode = local->fd->inode;
child_index = (long) cookie;
if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_INFO,
+ "%s: failed to do opendir on %s",
+ local->loc.path, priv->children[child_index]->name);
local->op_ret = -1;
local->op_ret = op_errno;
goto out;
}
- if (op_ret == 0)
+ if (op_ret == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s: no entries found in %s",
+ local->loc.path, priv->children[child_index]->name);
goto out;
+ }
list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- entry_cksum = gf_rsync_weak_checksum (entry->d_name,
+ entry_cksum = gf_rsync_weak_checksum ((unsigned char *)entry->d_name,
strlen (entry->d_name));
local->cont.opendir.checksum[child_index] ^= entry_cksum;
}
@@ -144,39 +143,30 @@ afr_examine_dir_readdir_cbk (call_frame_t *frame, void *cookie,
(void *) (long) child_index,
priv->children[child_index],
priv->children[child_index]->fops->readdir,
- local->fd, 131072, last_offset);
+ local->fd, 131072, last_offset, NULL);
+
+ return 0;
out:
- if ((op_ret == 0) || (op_ret == -1)) {
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if (__checksums_differ (local->cont.opendir.checksum,
- priv->child_count,
- local->child_up)) {
-
- sh->need_entry_self_heal = _gf_true;
- sh->forced_merge = _gf_true;
- sh->type = local->fd->inode->ia_type;
- sh->background = _gf_false;
- sh->unwind = afr_examine_dir_sh_unwind;
-
- afr_self_heal_type_str_get(&local->self_heal,
- sh_type_str,
- sizeof(sh_type_str));
- gf_log (this->name, GF_LOG_NORMAL,
- "%s self-heal triggered. path: %s, "
- "reason: checksums of directory differ,"
- " forced merge option set",
- sh_type_str, local->loc.path);
-
- afr_self_heal (frame, this);
- } else {
- afr_set_opendir_done (this, local->fd->inode);
-
- AFR_STACK_UNWIND (opendir, frame, local->op_ret,
- local->op_errno, local->fd);
- }
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ if (__checksums_differ (local->cont.opendir.checksum,
+ priv->child_count,
+ local->child_up)) {
+
+ sh->do_entry_self_heal = _gf_true;
+ sh->forced_merge = _gf_true;
+
+ reason = "checksums of directory differ";
+ afr_launch_self_heal (frame, this, inode, _gf_false,
+ inode->ia_type, reason, NULL,
+ afr_examine_dir_sh_unwind);
+ } else {
+ afr_set_opendir_done (this, inode);
+
+ AFR_STACK_UNWIND (opendir, frame, local->op_ret,
+ local->op_errno, local->fd, NULL);
}
}
@@ -187,11 +177,10 @@ out:
int
afr_examine_dir (call_frame_t *frame, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
-
- int i;
- int call_count = 0;
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ int i = 0;
+ int call_count = 0;
local = frame->local;
priv = this->private;
@@ -200,7 +189,7 @@ afr_examine_dir (call_frame_t *frame, xlator_t *this)
sizeof (*local->cont.opendir.checksum),
gf_afr_mt_int32_t);
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_up_children_count (local->child_up, priv->child_count);
local->call_count = call_count;
@@ -210,7 +199,7 @@ afr_examine_dir (call_frame_t *frame, xlator_t *this)
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->readdir,
- local->fd, 131072, 0);
+ local->fd, 131072, 0, NULL);
if (!--call_count)
break;
@@ -224,75 +213,77 @@ afr_examine_dir (call_frame_t *frame, xlator_t *this)
int32_t
afr_opendir_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
int32_t up_children_count = 0;
- int ret = -1;
-
- int call_count = -1;
+ int ret = -1;
+ int call_count = -1;
+ int32_t child_index = 0;
priv = this->private;
local = frame->local;
+ child_index = (long) cookie;
- up_children_count = afr_up_children_count (priv->child_count,
- local->child_up);
+ up_children_count = afr_up_children_count (local->child_up,
+ priv->child_count);
LOCK (&frame->lock);
{
- if (op_ret >= 0)
+ if (op_ret >= 0) {
local->op_ret = op_ret;
+ ret = afr_child_fd_ctx_set (this, fd, child_index, 0);
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = -ret;
+ goto unlock;
+ }
+ }
local->op_errno = op_errno;
}
+unlock:
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
if (call_count == 0) {
- if (local->op_ret == 0) {
+ if (local->op_ret != 0)
+ goto out;
- ret = afr_fd_ctx_set (this, local->fd);
+ if (!afr_is_opendir_done (this, local->fd->inode) &&
+ up_children_count > 1 && priv->entry_self_heal) {
+
+ /*
+ * This is the first opendir on this inode. We need
+ * to check if the directory's entries are the same
+ * on all subvolumes. This is needed in addition
+ * to regular entry self-heal because the readdir
+ * call is sent only to the first subvolume, and
+ * thus files that exist only there will never be healed
+ * otherwise (assuming changelog shows no anomalies).
+ */
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "reading contents of directory %s looking for mismatch",
+ local->loc.path);
+
+ afr_examine_dir (frame, this);
- if (ret) {
- local->op_ret = -1;
- local->op_errno = -1;
- gf_log (this->name, GF_LOG_ERROR, " failed to "
- "set fd ctx for fd %p", local->fd);
- goto out;
- }
- if (!afr_is_opendir_done (this, local->fd->inode) &&
- up_children_count > 1) {
-
- /*
- * This is the first opendir on this inode. We need
- * to check if the directory's entries are the same
- * on all subvolumes. This is needed in addition
- * to regular entry self-heal because the readdir
- * call is sent only to the first subvolume, and
- * thus files that exist only there will never be healed
- * otherwise (assuming changelog shows no anamolies).
- */
-
- gf_log (this->name, GF_LOG_TRACE,
- "reading contents of directory %s looking for mismatch",
- local->loc.path);
-
- afr_examine_dir (frame, this);
-
- } else {
- AFR_STACK_UNWIND (opendir, frame, local->op_ret,
- local->op_errno, local->fd);
- }
} else {
- out:
- AFR_STACK_UNWIND (opendir, frame, local->op_ret,
- local->op_errno, local->fd);
+ /* do the unwind */
+ goto out;
}
}
return 0;
+
+out:
+ AFR_STACK_UNWIND (opendir, frame, local->op_ret,
+ local->op_errno, local->fd, NULL);
+
+ return 0;
}
@@ -302,15 +293,11 @@ afr_opendir (call_frame_t *frame, xlator_t *this,
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
-
int child_count = 0;
int i = 0;
-
- int ret = -1;
- int call_count = -1;
-
- int32_t op_ret = -1;
- int32_t op_errno = 0;
+ int ret = -1;
+ int call_count = -1;
+ int32_t op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -320,37 +307,36 @@ afr_opendir (call_frame_t *frame, xlator_t *this,
child_count = priv->child_count;
- ALLOC_OR_GOTO (local, afr_local_t, out);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
loc_copy (&local->loc, loc);
- frame->local = local;
local->fd = fd_ref (fd);
call_count = local->call_count;
for (i = 0; i < child_count; i++) {
if (local->child_up[i]) {
- STACK_WIND (frame, afr_opendir_cbk,
- priv->children[i],
- priv->children[i]->fops->opendir,
- loc, fd);
+ STACK_WIND_COOKIE (frame, afr_opendir_cbk,
+ (void*) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->opendir,
+ loc, fd, NULL);
if (!--call_count)
break;
}
}
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (opendir, frame, op_ret, op_errno, fd);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND (opendir, frame, -1, op_errno, fd, NULL);
return 0;
}
@@ -372,286 +358,95 @@ struct entry_name {
struct list_head list;
};
-
-static gf_boolean_t
-remembered_name (const char *name, struct list_head *entries)
-{
- struct entry_name *e;
- gf_boolean_t ret = _gf_false;
-
- list_for_each_entry (e, entries, list) {
- if (!strcmp (name, e->name)) {
- ret = _gf_true;
- goto out;
- }
- }
-
-out:
- return ret;
-}
-
-
static void
-afr_remember_entries (gf_dirent_t *entries, fd_t *fd)
+afr_forget_entries (fd_t *fd)
{
- struct entry_name *n = NULL;
- gf_dirent_t * entry = NULL;
-
- int ret = 0;
-
- uint64_t ctx;
- afr_fd_ctx_t *fd_ctx;
+ struct entry_name *entry = NULL;
+ struct entry_name *tmp = NULL;
+ int ret = 0;
+ uint64_t ctx = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
ret = fd_ctx_get (fd, THIS, &ctx);
if (ret < 0) {
- gf_log (THIS->name, GF_LOG_DEBUG,
+ gf_log (THIS->name, GF_LOG_INFO,
"could not get fd ctx for fd=%p", fd);
return;
}
fd_ctx = (afr_fd_ctx_t *)(long) ctx;
- list_for_each_entry (entry, &entries->list, list) {
- n = GF_CALLOC (1, sizeof (*n), gf_afr_mt_entry_name);
- n->name = gf_strdup (entry->d_name);
- INIT_LIST_HEAD (&n->list);
-
- list_add (&n->list, &fd_ctx->entries);
+ list_for_each_entry_safe (entry, tmp, &fd_ctx->entries, list) {
+ GF_FREE (entry->name);
+ list_del (&entry->list);
+ GF_FREE (entry);
}
}
-
-static off_t
-afr_filter_entries (gf_dirent_t *entries, fd_t *fd)
+static void
+afr_readdir_filter_trash_dir (gf_dirent_t *entries, fd_t *fd)
{
- gf_dirent_t *entry, *tmp;
- int ret = 0;
-
- uint64_t ctx;
- afr_fd_ctx_t *fd_ctx;
-
- off_t offset = 0;
-
- ret = fd_ctx_get (fd, THIS, &ctx);
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_DEBUG,
- "could not get fd ctx for fd=%p", fd);
- return -1;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ gf_dirent_t * entry = NULL;
+ gf_dirent_t * tmp = NULL;
list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- offset = entry->d_off;
-
- if (remembered_name (entry->d_name, &fd_ctx->entries)) {
- list_del (&entry->list);
+ if (__is_root_gfid (fd->inode->gfid) &&
+ !strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR)) {
+ list_del_init (&entry->list);
GF_FREE (entry);
}
}
-
- return offset;
-}
-
-
-static void
-afr_forget_entries (fd_t *fd)
-{
- struct entry_name *entry, *tmp;
- int ret = 0;
-
- uint64_t ctx;
- afr_fd_ctx_t *fd_ctx;
-
- ret = fd_ctx_get (fd, THIS, &ctx);
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_DEBUG,
- "could not get fd ctx for fd=%p", fd);
- return;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- list_for_each_entry_safe (entry, tmp, &fd_ctx->entries, list) {
- GF_FREE (entry->name);
- list_del (&entry->list);
- GF_FREE (entry);
- }
}
-
int32_t
afr_readdir_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries)
+ gf_dirent_t *entries, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
+ afr_local_t *local = NULL;
- gf_dirent_t * entry = NULL;
- gf_dirent_t * tmp = NULL;
-
- int child_index = -1;
+ if (op_ret == -1)
+ goto out;
- priv = this->private;
local = frame->local;
- child_index = (long) cookie;
-
- if (op_ret != -1) {
- list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- entry->d_ino = afr_itransform (entry->d_ino,
- priv->child_count,
- child_index);
-
- if ((local->fd->inode == local->fd->inode->table->root)
- && !strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR)) {
- list_del_init (&entry->list);
- GF_FREE (entry);
- }
- }
- }
-
- AFR_STACK_UNWIND (readdir, frame, op_ret, op_errno, entries);
+ afr_readdir_filter_trash_dir (entries, local->fd);
+out:
+ AFR_STACK_UNWIND (readdir, frame, op_ret, op_errno, entries, NULL);
return 0;
}
int32_t
afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- ino_t inum = 0;
-
- int call_child = 0;
- int ret = 0;
+ afr_local_t *local = NULL;
- gf_dirent_t * entry = NULL;
- gf_dirent_t * tmp = NULL;
-
- int child_index = -1;
-
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
-
- off_t offset = 0;
-
- priv = this->private;
- children = priv->children;
+ if (op_ret == -1)
+ goto out;
local = frame->local;
-
- child_index = (long) cookie;
-
- if (priv->strict_readdir) {
- ret = fd_ctx_get (local->fd, this, &ctx);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "could not get fd ctx for fd=%p", local->fd);
- op_ret = -1;
- op_errno = -ret;
- goto out;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- if (child_went_down (op_ret, op_errno)) {
- if (all_tried (child_index, priv->child_count)) {
- goto out;
- }
-
- call_child = ++child_index;
-
- gf_log (this->name, GF_LOG_TRACE,
- "starting readdir afresh on child %d, offset %"PRId64,
- call_child, (uint64_t) 0);
-
- fd_ctx->failed_over = _gf_true;
-
- STACK_WIND_COOKIE (frame, afr_readdirp_cbk,
- (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->readdirp, local->fd,
- local->cont.readdir.size, 0);
- return 0;
- }
- }
-
- if (op_ret != -1) {
- list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- inum = afr_itransform (entry->d_ino, priv->child_count,
- child_index);
- entry->d_ino = inum;
- inum = afr_itransform (entry->d_stat.ia_ino,
- priv->child_count, child_index);
- entry->d_stat.ia_ino = inum;
-
- if ((local->fd->inode == local->fd->inode->table->root)
- && !strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR)) {
- list_del_init (&entry->list);
- GF_FREE (entry);
- }
- }
- }
-
- if (priv->strict_readdir) {
- if (fd_ctx->failed_over) {
- if (list_empty (&entries->list)) {
- goto out;
- }
-
- offset = afr_filter_entries (entries, local->fd);
-
- afr_remember_entries (entries, local->fd);
-
- if (list_empty (&entries->list)) {
- /* All the entries we got were duplicate. We
- shouldn't send an empty list now, because
- that'll make the application stop reading. So
- try to get more entries */
-
- gf_log (this->name, GF_LOG_TRACE,
- "trying to fetch non-duplicate entries from offset %"PRId64", child %s",
- offset, children[child_index]->name);
-
- STACK_WIND_COOKIE (frame, afr_readdirp_cbk,
- (void *) (long) child_index,
- children[child_index],
- children[child_index]->fops->readdirp,
- local->fd, local->cont.readdir.size, offset);
- return 0;
- }
- } else {
- afr_remember_entries (entries, local->fd);
- }
- }
+ afr_readdir_filter_trash_dir (entries, local->fd);
out:
- AFR_STACK_UNWIND (readdirp, frame, op_ret, op_errno, entries);
-
+ AFR_STACK_UNWIND (readdirp, frame, op_ret, op_errno, entries, NULL);
return 0;
}
-
int32_t
afr_do_readdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset, int whichop)
+ fd_t *fd, size_t size, off_t offset, int whichop, dict_t *dict)
{
- afr_private_t * priv = NULL;
- xlator_t ** children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
-
- uint64_t ctx;
- afr_fd_ctx_t *fd_ctx;
-
- int ret = -1;
-
- int32_t op_ret = -1;
- int32_t op_errno = 0;
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ int call_child = 0;
+ afr_local_t *local = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int ret = -1;
+ int32_t op_errno = 0;
+ uint64_t read_child = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -660,85 +455,82 @@ afr_do_readdir (call_frame_t *frame, xlator_t *this,
priv = this->private;
children = priv->children;
- ALLOC_OR_GOTO (local, afr_local_t, out);
- ret = AFR_LOCAL_INIT (local, priv);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ local->fresh_children = afr_children_create (priv->child_count);
+ if (!local->fresh_children) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ read_child = afr_inode_get_read_ctx (this, fd->inode,
+ local->fresh_children);
+ ret = afr_get_call_child (this, local->child_up, read_child,
+ local->fresh_children,
+ &call_child,
+ &local->cont.readdir.last_index);
if (ret < 0) {
op_errno = -ret;
goto out;
}
- frame->local = local;
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx) {
+ op_errno = EBADF;
+ goto out;
+ }
- call_child = afr_first_up_child (priv);
- if (call_child == -1) {
- op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_DEBUG,
- "no child is up");
+ if ((offset == 0) || (fd_ctx->call_child == -1)) {
+ fd_ctx->call_child = call_child;
+ } else if ((priv->readdir_failover == _gf_false) &&
+ (call_child != fd_ctx->call_child)) {
+ op_errno = EBADF;
goto out;
}
local->fd = fd_ref (fd);
local->cont.readdir.size = size;
-
- if (priv->strict_readdir) {
- ret = fd_ctx_get (fd, this, &ctx);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "could not get fd ctx for fd=%p", fd);
- op_errno = -ret;
- goto out;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- if (fd_ctx->last_tried != call_child) {
- gf_log (this->name, GF_LOG_TRACE,
- "first up child has changed from %d to %d, restarting readdir from offset 0",
- fd_ctx->last_tried, call_child);
-
- fd_ctx->failed_over = _gf_true;
- offset = 0;
- }
-
- fd_ctx->last_tried = call_child;
- }
+ local->cont.readdir.dict = (dict)? dict_ref (dict) : NULL;
if (whichop == GF_FOP_READDIR)
STACK_WIND_COOKIE (frame, afr_readdir_cbk,
(void *) (long) call_child,
children[call_child],
children[call_child]->fops->readdir, fd,
- size, offset);
+ size, offset, dict);
else
STACK_WIND_COOKIE (frame, afr_readdirp_cbk,
(void *) (long) call_child,
children[call_child],
children[call_child]->fops->readdirp, fd,
- size, offset);
+ size, offset, dict);
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (readdir, frame, op_ret, op_errno, NULL);
- }
+ AFR_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL);
return 0;
}
int32_t
afr_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
+ off_t offset, dict_t *xdata)
{
- afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIR);
+ afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIR, xdata);
return 0;
}
int32_t
afr_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
+ off_t offset, dict_t *dict)
{
- afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIRP);
+ afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIRP, dict);
return 0;
}
diff --git a/xlators/cluster/afr/src/afr-dir-read.h b/xlators/cluster/afr/src/afr-dir-read.h
index 40c7b6aef..09456d159 100644
--- a/xlators/cluster/afr/src/afr-dir-read.h
+++ b/xlators/cluster/afr/src/afr-dir-read.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __DIR_READ_H__
@@ -23,28 +14,23 @@
int32_t
afr_opendir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, fd_t *fd);
+ loc_t *loc, fd_t *fd, dict_t *xdata);
int32_t
afr_releasedir (xlator_t *this, fd_t *fd);
int32_t
afr_readdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset);
+ fd_t *fd, size_t size, off_t offset, dict_t *xdata);
int32_t
afr_readdirp (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset);
-
-int32_t
-afr_getdents (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset, int32_t flag);
-
+ fd_t *fd, size_t size, off_t offset, dict_t *dict);
int32_t
afr_checksum (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags);
+ loc_t *loc, int32_t flags, dict_t *xdata);
#endif /* __DIR_READ_H__ */
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
index dceb059bb..1943b719b 100644
--- a/xlators/cluster/afr/src/afr-dir-write.c
+++ b/xlators/cluster/afr/src/afr-dir-write.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -47,28 +38,222 @@
#include "afr.h"
#include "afr-transaction.h"
+int
+afr_build_parent_loc (loc_t *parent, loc_t *child, int32_t *op_errno)
+{
+ int ret = -1;
+ char *child_path = NULL;
+
+ if (!child->parent) {
+ if (op_errno)
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ child_path = gf_strdup (child->path);
+ if (!child_path) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+ parent->path = gf_strdup( dirname (child_path) );
+ if (!parent->path) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+ parent->inode = inode_ref (child->parent);
+ uuid_copy (parent->gfid, child->pargfid);
+
+ ret = 0;
+out:
+ GF_FREE(child_path);
+
+ return ret;
+}
void
-afr_build_parent_loc (loc_t *parent, loc_t *child)
+__dir_entry_fop_common_cbk (call_frame_t *frame, int child_index,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, struct iatt *prenewparent,
+ struct iatt *postnewparent)
{
- char *tmp = NULL;
+ afr_local_t *local = NULL;
- if (!child->parent) {
- loc_copy (parent, child);
- return;
+ local = frame->local;
+
+ if (afr_fop_failed (op_ret, op_errno))
+ afr_transaction_fop_failed (frame, this, child_index);
+
+ if (op_ret > -1) {
+ local->op_ret = op_ret;
+
+ if ((local->success_count == 0) ||
+ (child_index == local->read_child_index)) {
+ local->cont.dir_fop.preparent = *preparent;
+ local->cont.dir_fop.postparent = *postparent;
+ if (buf)
+ local->cont.dir_fop.buf = *buf;
+ if (prenewparent)
+ local->cont.dir_fop.prenewparent = *prenewparent;
+ if (postnewparent)
+ local->cont.dir_fop.postnewparent = *postnewparent;
+ }
+
+ local->cont.dir_fop.inode = inode;
+
+ local->fresh_children[local->success_count] = child_index;
+ local->success_count++;
+ local->child_errno[child_index] = 0;
+ } else {
+ local->child_errno[child_index] = op_errno;
}
- tmp = gf_strdup (child->path);
- parent->path = gf_strdup (dirname (tmp));
- GF_FREE (tmp);
+ local->op_errno = op_errno;
+}
- parent->name = strrchr (parent->path, '/');
- if (parent->name)
- parent->name++;
+int
+afr_mark_new_entry_changelog_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ dict_t *xattr, dict_t *xdata)
+{
+ int call_count = 0;
- parent->inode = inode_ref (child->parent);
- parent->parent = inode_parent (parent->inode, 0, NULL);
- parent->ino = parent->inode->ino;
+ call_count = afr_frame_return (frame);
+ if (call_count == 0) {
+ AFR_STACK_DESTROY (frame);
+ }
+ return 0;
+}
+
+void
+afr_mark_new_entry_changelog (call_frame_t *frame, xlator_t *this)
+{
+ call_frame_t *new_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_local_t *new_local = NULL;
+ afr_private_t *priv = NULL;
+ dict_t **xattr = NULL;
+ int32_t **changelog = NULL;
+ int i = 0;
+ GF_UNUSED int op_errno = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ new_frame = copy_frame (frame);
+ if (!new_frame) {
+ goto out;
+ }
+
+ AFR_LOCAL_ALLOC_OR_GOTO (new_frame->local, out);
+ new_local = new_frame->local;
+ changelog = afr_matrix_create (priv->child_count, AFR_NUM_CHANGE_LOGS);
+ if (!changelog)
+ goto out;
+
+ xattr = GF_CALLOC (priv->child_count, sizeof (*xattr),
+ gf_afr_mt_dict_t);
+ if (!xattr)
+ goto out;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_errno[i])
+ continue;
+ xattr[i] = dict_new ();
+ if (!xattr[i])
+ goto out;
+ }
+
+ afr_prepare_new_entry_pending_matrix (changelog,
+ afr_is_errno_set,
+ local->child_errno,
+ &local->cont.dir_fop.buf,
+ priv->child_count);
+
+ new_local->pending = changelog;
+ uuid_copy (new_local->loc.gfid, local->cont.dir_fop.buf.ia_gfid);
+ new_local->loc.inode = inode_ref (local->cont.dir_fop.inode);
+ new_local->call_count = local->success_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_errno[i])
+ continue;
+
+ afr_set_pending_dict (priv, xattr[i], changelog, i, LOCAL_LAST);
+ STACK_WIND_COOKIE (new_frame, afr_mark_new_entry_changelog_cbk,
+ (void *) (long) i, priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &new_local->loc, GF_XATTROP_ADD_ARRAY,
+ xattr[i], NULL);
+ }
+ new_frame = NULL;
+out:
+ if (new_frame)
+ AFR_STACK_DESTROY (new_frame);
+ afr_xattr_array_destroy (xattr, priv->child_count);
+ return;
+}
+
+gf_boolean_t
+afr_is_new_entry_changelog_needed (glusterfs_fop_t fop)
+{
+ glusterfs_fop_t fops[] = {GF_FOP_CREATE, GF_FOP_MKNOD, GF_FOP_NULL};
+ int i = 0;
+
+ for (i = 0; fops[i] != GF_FOP_NULL; i++) {
+ if (fop == fops[i])
+ return _gf_true;
+ }
+ return _gf_false;
+}
+
+void
+afr_dir_fop_mark_entry_pending_changelog (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (local->op_ret < 0)
+ goto out;
+
+ if (local->success_count == priv->child_count)
+ goto out;
+
+ if (!afr_is_new_entry_changelog_needed (local->op))
+ goto out;
+
+ afr_mark_new_entry_changelog (frame, this);
+
+out:
+ return;
+}
+
+void
+afr_dir_fop_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (local->cont.dir_fop.inode == NULL)
+ goto done;
+ afr_set_read_ctx_from_policy (this, local->cont.dir_fop.inode,
+ local->fresh_children,
+ local->read_child_index,
+ priv->read_child,
+ local->cont.dir_fop.buf.ia_gfid);
+done:
+ local->transaction.unwind (frame, this);
+ afr_dir_fop_mark_entry_pending_changelog (frame, this);
+ local->transaction.resume (frame, this);
}
/* {{{ create */
@@ -78,7 +263,6 @@ afr_create_unwind (call_frame_t *frame, xlator_t *this)
{
call_frame_t *main_frame = NULL;
afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
local = frame->local;
@@ -92,23 +276,14 @@ afr_create_unwind (call_frame_t *frame, xlator_t *this)
UNLOCK (&frame->lock);
if (main_frame) {
- if (local->cont.create.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.create.read_child_buf;
- } else {
- unwind_buf = &local->cont.create.buf;
- }
-
- unwind_buf->ia_ino = local->cont.create.ino;
-
- local->cont.create.preparent.ia_ino = local->cont.create.parent_ino;
- local->cont.create.postparent.ia_ino = local->cont.create.parent_ino;
-
AFR_STACK_UNWIND (create, main_frame,
local->op_ret, local->op_errno,
local->cont.create.fd,
- local->cont.create.inode,
- unwind_buf, &local->cont.create.preparent,
- &local->cont.create.postparent);
+ local->cont.dir_fop.inode,
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ local->xdata_rsp);
}
return 0;
@@ -119,34 +294,24 @@ int
afr_create_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
fd_t *fd, inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- uint64_t ctx;
- afr_fd_ctx_t *fd_ctx;
-
- int ret = 0;
-
- int call_count = -1;
- int child_index = -1;
+ afr_local_t *local = NULL;
+ uint64_t ctx = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int ret = 0;
+ int call_count = -1;
+ int child_index = -1;
local = frame->local;
- priv = this->private;
child_index = (long) cookie;
LOCK (&frame->lock);
{
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
+ if (op_ret > -1) {
ret = afr_fd_ctx_set (this, fd);
-
if (ret < 0) {
gf_log (this->name, GF_LOG_ERROR,
"could not set ctx on fd=%p", fd);
@@ -157,7 +322,6 @@ afr_create_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
ret = fd_ctx_get (fd, this, &ctx);
-
if (ret < 0) {
gf_log (this->name, GF_LOG_ERROR,
"could not get fd ctx for fd=%p", fd);
@@ -168,45 +332,17 @@ afr_create_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
fd_ctx = (afr_fd_ctx_t *)(long) ctx;
- fd_ctx->opened_on[child_index] = 1;
+ fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
fd_ctx->flags = local->cont.create.flags;
if (local->success_count == 0) {
- local->cont.create.buf = *buf;
-
- local->cont.create.ino =
- afr_itransform (buf->ia_ino,
- priv->child_count,
- child_index);
-
- if (priv->read_child >= 0) {
- afr_set_read_child (this, inode,
- priv->read_child);
- } else {
- afr_set_read_child (this, inode,
- local->read_child_index);
- }
- }
-
- if (child_index == local->first_up_child) {
- local->cont.create.ino =
- afr_itransform (buf->ia_ino,
- priv->child_count,
- local->first_up_child);
- }
-
- if (child_index == local->read_child_index) {
- local->cont.create.read_child_buf = *buf;
- local->cont.create.preparent = *preparent;
- local->cont.create.postparent = *postparent;
- }
-
- local->cont.create.inode = inode;
-
- local->success_count++;
+ if (xdata)
+ local->xdata_rsp = dict_ref(xdata);
+ }
}
-
- local->op_errno = op_errno;
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, inode, buf,
+ preparent, postparent, NULL, NULL);
}
unlock:
@@ -214,11 +350,8 @@ unlock:
call_count = afr_frame_return (frame);
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
return 0;
}
@@ -229,14 +362,14 @@ afr_create_wind (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
-
int call_count = -1;
int i = 0;
local = frame->local;
priv = this->private;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -246,7 +379,7 @@ afr_create_wind (call_frame_t *frame, xlator_t *this)
local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
+ if (local->transaction.pre_op[i]) {
STACK_WIND_COOKIE (frame, afr_create_wind_cbk,
(void *) (long) i,
priv->children[i],
@@ -254,8 +387,9 @@ afr_create_wind (call_frame_t *frame, xlator_t *this)
&local->loc,
local->cont.create.flags,
local->cont.create.mode,
+ local->umask,
local->cont.create.fd,
- local->cont.create.params);
+ local->xdata_req);
if (!--call_count)
break;
}
@@ -283,16 +417,14 @@ afr_create_done (call_frame_t *frame, xlator_t *this)
int
afr_create (call_frame_t *frame, xlator_t *this,
loc_t *loc, int32_t flags, mode_t mode,
- fd_t *fd, dict_t *params)
+ mode_t umask, fd_t *fd, dict_t *params)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
-
- int ret = -1;
-
- int op_ret = -1;
- int op_errno = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -300,22 +432,20 @@ afr_create (call_frame_t *frame, xlator_t *this,
priv = this->private;
+ QUORUM_CHECK(create,out);
+
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
+ op_errno = ENOMEM;
goto out;
}
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
-
- transaction_frame->local = local;
loc_copy (&local->loc, loc);
@@ -326,33 +456,49 @@ afr_create (call_frame_t *frame, xlator_t *this,
}
UNLOCK (&priv->read_child_lock);
+ local->op = GF_FOP_CREATE;
local->cont.create.flags = flags;
local->cont.create.mode = mode;
local->cont.create.fd = fd_ref (fd);
+ local->umask = umask;
if (params)
- local->cont.create.params = dict_ref (params);
-
- if (loc->parent)
- local->cont.create.parent_ino = loc->parent->ino;
+ local->xdata_req = dict_ref (params);
local->transaction.fop = afr_create_wind;
local->transaction.done = afr_create_done;
local->transaction.unwind = afr_create_unwind;
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
+ if (ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (create, frame, op_ret, op_errno,
- NULL, NULL, NULL, NULL, NULL);
+ AFR_STACK_UNWIND (create, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL, NULL);
}
return 0;
@@ -368,8 +514,6 @@ afr_mknod_unwind (call_frame_t *frame, xlator_t *this)
call_frame_t *main_frame = NULL;
afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
-
local = frame->local;
LOCK (&frame->lock);
@@ -382,22 +526,13 @@ afr_mknod_unwind (call_frame_t *frame, xlator_t *this)
UNLOCK (&frame->lock);
if (main_frame) {
- if (local->cont.mknod.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.mknod.read_child_buf;
- } else {
- unwind_buf = &local->cont.mknod.buf;
- }
-
- unwind_buf->ia_ino = local->cont.mknod.ino;
-
- local->cont.mknod.preparent.ia_ino = local->cont.mknod.parent_ino;
- local->cont.mknod.postparent.ia_ino = local->cont.mknod.parent_ino;
-
AFR_STACK_UNWIND (mknod, main_frame,
local->op_ret, local->op_errno,
- local->cont.mknod.inode,
- unwind_buf, &local->cont.mknod.preparent,
- &local->cont.mknod.postparent);
+ local->cont.dir_fop.inode,
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ NULL);
}
return 0;
@@ -408,72 +543,25 @@ int
afr_mknod_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int call_count = -1;
- int child_index = -1;
-
- local = frame->local;
- priv = this->private;
+ int call_count = -1;
+ int child_index = -1;
child_index = (long) cookie;
LOCK (&frame->lock);
{
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0){
- local->cont.mknod.buf = *buf;
- local->cont.mknod.ino =
- afr_itransform (buf->ia_ino,
- priv->child_count,
- child_index);
-
- if (priv->read_child >= 0) {
- afr_set_read_child (this, inode,
- priv->read_child);
- } else {
- afr_set_read_child (this, inode,
- local->read_child_index);
- }
- }
-
- if (child_index == local->first_up_child) {
- local->cont.mknod.ino =
- afr_itransform (buf->ia_ino,
- priv->child_count,
- local->first_up_child);
- }
-
- if (child_index == local->read_child_index) {
- local->cont.mknod.read_child_buf = *buf;
- local->cont.mknod.preparent = *preparent;
- local->cont.mknod.postparent = *postparent;
- }
-
- local->cont.mknod.inode = inode;
-
- local->success_count++;
- }
-
- local->op_errno = op_errno;
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, inode, buf,
+ preparent, postparent, NULL, NULL);
}
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
return 0;
}
@@ -484,14 +572,14 @@ afr_mknod_wind (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
-
int call_count = -1;
int i = 0;
local = frame->local;
priv = this->private;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -501,13 +589,14 @@ afr_mknod_wind (call_frame_t *frame, xlator_t *this)
local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
+ if (local->transaction.pre_op[i]) {
STACK_WIND_COOKIE (frame, afr_mknod_wind_cbk, (void *) (long) i,
priv->children[i],
priv->children[i]->fops->mknod,
&local->loc, local->cont.mknod.mode,
local->cont.mknod.dev,
- local->cont.mknod.params);
+ local->umask,
+ local->xdata_req);
if (!--call_count)
break;
}
@@ -532,17 +621,15 @@ afr_mknod_done (call_frame_t *frame, xlator_t *this)
int
-afr_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t dev, dict_t *params)
+afr_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t dev, mode_t umask, dict_t *params)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
-
- int ret = -1;
-
- int op_ret = -1;
- int op_errno = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -550,22 +637,20 @@ afr_mknod (call_frame_t *frame, xlator_t *this,
priv = this->private;
+ QUORUM_CHECK(mknod,out);
+
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
+ op_errno = ENOMEM;
goto out;
}
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
-
- transaction_frame->local = local;
loc_copy (&local->loc, loc);
@@ -576,32 +661,48 @@ afr_mknod (call_frame_t *frame, xlator_t *this,
}
UNLOCK (&priv->read_child_lock);
+ local->op = GF_FOP_MKNOD;
local->cont.mknod.mode = mode;
local->cont.mknod.dev = dev;
+ local->umask = umask;
if (params)
- local->cont.mknod.params = dict_ref (params);
-
- if (loc->parent)
- local->cont.mknod.parent_ino = loc->parent->ino;
+ local->xdata_req = dict_ref (params);
local->transaction.fop = afr_mknod_wind;
local->transaction.done = afr_mknod_done;
local->transaction.unwind = afr_mknod_unwind;
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
+ if (ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (mknod, frame, op_ret, op_errno,
- NULL, NULL, NULL, NULL);
+ AFR_STACK_UNWIND (mknod, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL);
}
return 0;
@@ -618,8 +719,6 @@ afr_mkdir_unwind (call_frame_t *frame, xlator_t *this)
call_frame_t *main_frame = NULL;
afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
-
local = frame->local;
LOCK (&frame->lock);
@@ -632,22 +731,13 @@ afr_mkdir_unwind (call_frame_t *frame, xlator_t *this)
UNLOCK (&frame->lock);
if (main_frame) {
- if (local->cont.mkdir.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.mkdir.read_child_buf;
- } else {
- unwind_buf = &local->cont.mkdir.buf;
- }
-
- unwind_buf->ia_ino = local->cont.mkdir.ino;
-
- local->cont.mkdir.preparent.ia_ino = local->cont.mkdir.parent_ino;
- local->cont.mkdir.postparent.ia_ino = local->cont.mkdir.parent_ino;
-
AFR_STACK_UNWIND (mkdir, main_frame,
local->op_ret, local->op_errno,
- local->cont.mkdir.inode,
- unwind_buf, &local->cont.mkdir.preparent,
- &local->cont.mkdir.postparent);
+ local->cont.dir_fop.inode,
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ NULL);
}
return 0;
@@ -658,73 +748,25 @@ int
afr_mkdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int call_count = -1;
- int child_index = -1;
-
- local = frame->local;
- priv = this->private;
+ int call_count = -1;
+ int child_index = -1;
child_index = (long) cookie;
LOCK (&frame->lock);
{
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0) {
- local->cont.mkdir.buf = *buf;
-
- local->cont.mkdir.ino =
- afr_itransform (buf->ia_ino,
- priv->child_count,
- child_index);
-
- if (priv->read_child >= 0) {
- afr_set_read_child (this, inode,
- priv->read_child);
- } else {
- afr_set_read_child (this, inode,
- local->read_child_index);
- }
- }
-
- if (child_index == local->first_up_child) {
- local->cont.mkdir.ino =
- afr_itransform (buf->ia_ino,
- priv->child_count,
- local->first_up_child);
- }
-
- if (child_index == local->read_child_index) {
- local->cont.mkdir.read_child_buf = *buf;
- local->cont.mkdir.preparent = *preparent;
- local->cont.mkdir.postparent = *postparent;
- }
-
- local->cont.mkdir.inode = inode;
-
- local->success_count++;
- }
-
- local->op_errno = op_errno;
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, inode, buf,
+ preparent, postparent, NULL, NULL);
}
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
return 0;
}
@@ -735,14 +777,14 @@ afr_mkdir_wind (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
-
int call_count = -1;
int i = 0;
local = frame->local;
priv = this->private;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -752,13 +794,14 @@ afr_mkdir_wind (call_frame_t *frame, xlator_t *this)
local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
+ if (local->transaction.pre_op[i]) {
STACK_WIND_COOKIE (frame, afr_mkdir_wind_cbk,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->mkdir,
&local->loc, local->cont.mkdir.mode,
- local->cont.mkdir.params);
+ local->umask,
+ local->xdata_req);
if (!--call_count)
break;
}
@@ -782,19 +825,16 @@ afr_mkdir_done (call_frame_t *frame, xlator_t *this)
return 0;
}
-
int
afr_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dict_t *params)
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *params)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
-
- int ret = -1;
-
- int op_ret = -1;
- int op_errno = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -802,22 +842,20 @@ afr_mkdir (call_frame_t *frame, xlator_t *this,
priv = this->private;
+ QUORUM_CHECK(mkdir,out);
+
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
+ op_errno = ENOMEM;
goto out;
}
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
-
- transaction_frame->local = local;
loc_copy (&local->loc, loc);
@@ -829,31 +867,47 @@ afr_mkdir (call_frame_t *frame, xlator_t *this,
UNLOCK (&priv->read_child_lock);
local->cont.mkdir.mode = mode;
+ local->umask = umask;
if (params)
- local->cont.mkdir.params = dict_ref (params);
-
- if (loc->parent)
- local->cont.mkdir.parent_ino = loc->parent->ino;
+ local->xdata_req = dict_ref (params);
+ local->op = GF_FOP_MKDIR;
local->transaction.fop = afr_mkdir_wind;
local->transaction.done = afr_mkdir_done;
local->transaction.unwind = afr_mkdir_unwind;
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
+ if (ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (mkdir, frame, op_ret, op_errno,
- NULL, NULL, NULL, NULL);
+ AFR_STACK_UNWIND (mkdir, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL);
}
return 0;
@@ -870,8 +924,6 @@ afr_link_unwind (call_frame_t *frame, xlator_t *this)
call_frame_t *main_frame = NULL;
afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
-
local = frame->local;
LOCK (&frame->lock);
@@ -884,22 +936,13 @@ afr_link_unwind (call_frame_t *frame, xlator_t *this)
UNLOCK (&frame->lock);
if (main_frame) {
- if (local->cont.link.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.link.read_child_buf;
- } else {
- unwind_buf = &local->cont.link.buf;
- }
-
- unwind_buf->ia_ino = local->cont.link.ino;
-
- local->cont.link.preparent.ia_ino = local->cont.link.parent_ino;
- local->cont.link.postparent.ia_ino = local->cont.link.parent_ino;
-
AFR_STACK_UNWIND (link, main_frame,
local->op_ret, local->op_errno,
- local->cont.link.inode,
- unwind_buf, &local->cont.link.preparent,
- &local->cont.link.postparent);
+ local->cont.dir_fop.inode,
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ NULL);
}
return 0;
@@ -910,61 +953,25 @@ int
afr_link_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int call_count = -1;
- int child_index = -1;
-
- local = frame->local;
- priv = this->private;
+ int call_count = -1;
+ int child_index = -1;
child_index = (long) cookie;
LOCK (&frame->lock);
{
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0) {
- local->cont.link.buf = *buf;
-
- if (priv->read_child >= 0) {
- afr_set_read_child (this, inode,
- priv->read_child);
- } else {
- afr_set_read_child (this, inode,
- local->read_child_index);
- }
- }
-
- if (child_index == local->read_child_index) {
- local->cont.link.read_child_buf = *buf;
- local->cont.link.preparent = *preparent;
- local->cont.link.postparent = *postparent;
- }
-
- local->cont.link.inode = inode;
-
- local->success_count++;
- }
-
- local->op_errno = op_errno;
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, inode, buf,
+ preparent, postparent, NULL, NULL);
}
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
return 0;
}
@@ -975,14 +982,14 @@ afr_link_wind (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
-
int call_count = -1;
int i = 0;
local = frame->local;
priv = this->private;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -992,12 +999,13 @@ afr_link_wind (call_frame_t *frame, xlator_t *this)
local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_link_wind_cbk, (void *) (long) i,
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_link_wind_cbk,
+ (void *) (long) i,
priv->children[i],
priv->children[i]->fops->link,
&local->loc,
- &local->newloc);
+ &local->newloc, local->xdata_req);
if (!--call_count)
break;
@@ -1023,16 +1031,14 @@ afr_link_done (call_frame_t *frame, xlator_t *this)
int
afr_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
-
- int ret = -1;
-
- int op_ret = -1;
- int op_errno = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -1040,25 +1046,25 @@ afr_link (call_frame_t *frame, xlator_t *this,
priv = this->private;
+ QUORUM_CHECK(link,out);
+
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
+ op_errno = ENOMEM;
goto out;
}
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
-
- transaction_frame->local = local;
loc_copy (&local->loc, oldloc);
loc_copy (&local->newloc, newloc);
+ if (xdata)
+ local->xdata_req = dict_ref (xdata);
LOCK (&priv->read_child_lock);
{
@@ -1067,30 +1073,41 @@ afr_link (call_frame_t *frame, xlator_t *this,
}
UNLOCK (&priv->read_child_lock);
- local->cont.link.ino = oldloc->inode->ino;
-
- if (oldloc->parent)
- local->cont.link.parent_ino = newloc->parent->ino;
-
+ local->op = GF_FOP_LINK;
local->transaction.fop = afr_link_wind;
local->transaction.done = afr_link_done;
local->transaction.unwind = afr_link_unwind;
- afr_build_parent_loc (&local->transaction.parent_loc, oldloc);
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, newloc,
+ &op_errno);
+ if (ret)
+ goto out;
local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (oldloc->path);
- local->transaction.new_basename = AFR_BASENAME (newloc->path);
-
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ local->transaction.basename = AFR_BASENAME (newloc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
- op_ret = 0;
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+ ret = 0;
out:
- if (op_ret == -1) {
+ if (ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (link, frame, op_ret, op_errno,
- NULL, NULL, NULL, NULL);
+ AFR_STACK_UNWIND (link, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL);
}
return 0;
@@ -1107,8 +1124,6 @@ afr_symlink_unwind (call_frame_t *frame, xlator_t *this)
call_frame_t *main_frame = NULL;
afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
-
local = frame->local;
LOCK (&frame->lock);
@@ -1121,22 +1136,13 @@ afr_symlink_unwind (call_frame_t *frame, xlator_t *this)
UNLOCK (&frame->lock);
if (main_frame) {
- if (local->cont.symlink.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.symlink.read_child_buf;
- } else {
- unwind_buf = &local->cont.symlink.buf;
- }
-
- unwind_buf->ia_ino = local->cont.symlink.ino;
-
- local->cont.symlink.preparent.ia_ino = local->cont.symlink.parent_ino;
- local->cont.symlink.postparent.ia_ino = local->cont.symlink.parent_ino;
-
AFR_STACK_UNWIND (symlink, main_frame,
local->op_ret, local->op_errno,
- local->cont.symlink.inode,
- unwind_buf, &local->cont.symlink.preparent,
- &local->cont.symlink.postparent);
+ local->cont.dir_fop.inode,
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ NULL);
}
return 0;
@@ -1147,71 +1153,25 @@ int
afr_symlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int call_count = -1;
- int child_index = -1;
-
- local = frame->local;
- priv = this->private;
+ int call_count = -1;
+ int child_index = -1;
child_index = (long) cookie;
LOCK (&frame->lock);
{
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0) {
- local->cont.symlink.buf = *buf;
- local->cont.symlink.ino =
- afr_itransform (buf->ia_ino, priv->child_count,
- child_index);
-
- if (priv->read_child >= 0) {
- afr_set_read_child (this, inode,
- priv->read_child);
- } else {
- afr_set_read_child (this, inode,
- local->read_child_index);
- }
- }
-
- if (child_index == local->first_up_child) {
- local->cont.symlink.ino =
- afr_itransform (buf->ia_ino,
- priv->child_count,
- local->first_up_child);
- }
-
- if (child_index == local->read_child_index) {
- local->cont.symlink.read_child_buf = *buf;
- local->cont.symlink.preparent = *preparent;
- local->cont.symlink.postparent = *postparent;
- }
-
- local->cont.symlink.inode = inode;
-
- local->success_count++;
- }
-
- local->op_errno = op_errno;
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, inode, buf,
+ preparent, postparent, NULL, NULL);
}
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
return 0;
}
@@ -1222,14 +1182,14 @@ afr_symlink_wind (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
-
int call_count = -1;
int i = 0;
local = frame->local;
priv = this->private;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -1239,14 +1199,15 @@ afr_symlink_wind (call_frame_t *frame, xlator_t *this)
local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
+ if (local->transaction.pre_op[i]) {
STACK_WIND_COOKIE (frame, afr_symlink_wind_cbk,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->symlink,
local->cont.symlink.linkpath,
&local->loc,
- local->cont.symlink.params);
+ local->umask,
+ local->xdata_req);
if (!--call_count)
break;
@@ -1273,16 +1234,14 @@ afr_symlink_done (call_frame_t *frame, xlator_t *this)
int
afr_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *loc, dict_t *params)
+ const char *linkpath, loc_t *loc, mode_t umask, dict_t *params)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
-
- int ret = -1;
-
- int op_ret = -1;
- int op_errno = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -1290,22 +1249,20 @@ afr_symlink (call_frame_t *frame, xlator_t *this,
priv = this->private;
+ QUORUM_CHECK(symlink,out);
+
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
+ op_errno = ENOMEM;
goto out;
}
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
-
- transaction_frame->local = local;
loc_copy (&local->loc, loc);
@@ -1317,30 +1274,46 @@ afr_symlink (call_frame_t *frame, xlator_t *this,
UNLOCK (&priv->read_child_lock);
local->cont.symlink.linkpath = gf_strdup (linkpath);
+ local->umask = umask;
if (params)
- local->cont.symlink.params = dict_ref (params);
-
- if (loc->parent)
- local->cont.symlink.parent_ino = loc->parent->ino;
+ local->xdata_req = dict_ref (params);
+ local->op = GF_FOP_SYMLINK;
local->transaction.fop = afr_symlink_wind;
local->transaction.done = afr_symlink_done;
local->transaction.unwind = afr_symlink_unwind;
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
+ if (ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (symlink, frame, op_ret, op_errno,
- NULL, NULL, NULL, NULL);
+ AFR_STACK_UNWIND (symlink, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL);
}
return 0;
@@ -1356,8 +1329,6 @@ afr_rename_unwind (call_frame_t *frame, xlator_t *this)
call_frame_t *main_frame = NULL;
afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
-
local = frame->local;
LOCK (&frame->lock);
@@ -1370,26 +1341,14 @@ afr_rename_unwind (call_frame_t *frame, xlator_t *this)
UNLOCK (&frame->lock);
if (main_frame) {
- if (local->cont.rename.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.rename.read_child_buf;
- } else {
- unwind_buf = &local->cont.rename.buf;
- }
-
- unwind_buf->ia_ino = local->cont.rename.ino;
-
- local->cont.rename.preoldparent.ia_ino = local->cont.rename.oldparent_ino;
- local->cont.rename.postoldparent.ia_ino = local->cont.rename.oldparent_ino;
- local->cont.rename.prenewparent.ia_ino = local->cont.rename.newparent_ino;
- local->cont.rename.postnewparent.ia_ino = local->cont.rename.newparent_ino;
-
AFR_STACK_UNWIND (rename, main_frame,
local->op_ret, local->op_errno,
- unwind_buf,
- &local->cont.rename.preoldparent,
- &local->cont.rename.postoldparent,
- &local->cont.rename.prenewparent,
- &local->cont.rename.postnewparent);
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ &local->cont.dir_fop.prenewparent,
+ &local->cont.dir_fop.postnewparent,
+ NULL);
}
return 0;
@@ -1400,10 +1359,10 @@ int
afr_rename_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
afr_local_t * local = NULL;
-
int call_count = -1;
int child_index = -1;
@@ -1415,38 +1374,22 @@ afr_rename_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
{
if (afr_fop_failed (op_ret, op_errno) && op_errno != ENOTEMPTY)
afr_transaction_fop_failed (frame, this, child_index);
+ local->op_errno = op_errno;
+ local->child_errno[child_index] = op_errno;
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
-
- if (buf) {
- local->cont.rename.buf = *buf;
- }
-
- local->success_count++;
- }
-
- if (child_index == local->read_child_index) {
- local->cont.rename.read_child_buf = *buf;
-
- local->cont.rename.preoldparent = *preoldparent;
- local->cont.rename.postoldparent = *postoldparent;
- local->cont.rename.prenewparent = *prenewparent;
- local->cont.rename.postnewparent = *postnewparent;
- }
- }
+ if (op_ret > -1)
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, NULL, buf,
+ preoldparent, postoldparent,
+ prenewparent, postnewparent);
- local->op_errno = op_errno;
}
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
- local->transaction.resume (frame, this);
- }
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
return 0;
}
@@ -1457,14 +1400,14 @@ afr_rename_wind (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
-
int call_count = -1;
int i = 0;
local = frame->local;
priv = this->private;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -1474,13 +1417,13 @@ afr_rename_wind (call_frame_t *frame, xlator_t *this)
local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
+ if (local->transaction.pre_op[i]) {
STACK_WIND_COOKIE (frame, afr_rename_wind_cbk,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->rename,
&local->loc,
- &local->newloc);
+ &local->newloc, NULL);
if (!--call_count)
break;
}
@@ -1505,16 +1448,15 @@ afr_rename_done (call_frame_t *frame, xlator_t *this)
int
afr_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
-
- int ret = -1;
-
- int op_ret = -1;
- int op_errno = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
+ int nlockee = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -1522,56 +1464,90 @@ afr_rename (call_frame_t *frame, xlator_t *this,
priv = this->private;
+ QUORUM_CHECK(rename,out);
+
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
+ op_errno = ENOMEM;
goto out;
}
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
-
- transaction_frame->local = local;
loc_copy (&local->loc, oldloc);
loc_copy (&local->newloc, newloc);
- local->read_child_index = afr_read_child (this, oldloc->inode);
-
- local->cont.rename.ino = oldloc->inode->ino;
-
- if (oldloc->parent)
- local->cont.rename.oldparent_ino = oldloc->parent->ino;
- if (newloc->parent)
- local->cont.rename.newparent_ino = newloc->parent->ino;
+ local->read_child_index = afr_inode_get_read_ctx (this, oldloc->inode, NULL);
+ local->op = GF_FOP_RENAME;
local->transaction.fop = afr_rename_wind;
local->transaction.done = afr_rename_done;
local->transaction.unwind = afr_rename_unwind;
- afr_build_parent_loc (&local->transaction.parent_loc, oldloc);
- afr_build_parent_loc (&local->transaction.new_parent_loc, newloc);
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, oldloc,
+ &op_errno);
+ if (ret)
+ goto out;
+ ret = afr_build_parent_loc (&local->transaction.new_parent_loc, newloc,
+ &op_errno);
+ if (ret)
+ goto out;
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME (oldloc->path);
local->transaction.new_basename = AFR_BASENAME (newloc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = nlockee = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
+ &local->transaction.new_parent_loc,
+ local->transaction.new_basename,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ nlockee++;
+ ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
- afr_transaction (transaction_frame, this, AFR_ENTRY_RENAME_TRANSACTION);
+ nlockee++;
+ if (local->newloc.inode && IA_ISDIR (local->newloc.inode->ia_type)) {
+ ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
+ &local->newloc,
+ NULL,
+ priv->child_count);
+ if (ret)
+ goto out;
- op_ret = 0;
+ nlockee++;
+ }
+ qsort (int_lock->lockee, nlockee, sizeof (*int_lock->lockee),
+ afr_entry_lockee_cmp);
+ int_lock->lockee_count = nlockee;
+
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_RENAME_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ ret = 0;
out:
- if (op_ret == -1) {
+ if (ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (rename, frame, op_ret, op_errno,
- NULL, NULL, NULL, NULL, NULL);
+ AFR_STACK_UNWIND (rename, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL, NULL);
}
return 0;
@@ -1599,13 +1575,11 @@ afr_unlink_unwind (call_frame_t *frame, xlator_t *this)
UNLOCK (&frame->lock);
if (main_frame) {
- local->cont.unlink.preparent.ia_ino = local->cont.unlink.parent_ino;
- local->cont.unlink.postparent.ia_ino = local->cont.unlink.parent_ino;
-
AFR_STACK_UNWIND (unlink, main_frame,
local->op_ret, local->op_errno,
- &local->cont.unlink.preparent,
- &local->cont.unlink.postparent);
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ NULL);
}
return 0;
@@ -1615,52 +1589,28 @@ afr_unlink_unwind (call_frame_t *frame, xlator_t *this)
int
afr_unlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
int call_count = -1;
int child_index = (long) cookie;
local = frame->local;
- priv = this->private;
LOCK (&frame->lock);
{
if (child_index == local->read_child_index) {
local->read_child_returned = _gf_true;
}
-
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.unlink.preparent = *preparent;
- local->cont.unlink.postparent = *postparent;
- }
-
- if (child_index == local->read_child_index) {
- local->cont.unlink.preparent = *preparent;
- local->cont.unlink.postparent = *postparent;
- }
-
- local->success_count++;
- }
-
- local->op_errno = op_errno;
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, NULL, NULL,
+ preparent, postparent, NULL, NULL);
}
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
return 0;
}
@@ -1671,14 +1621,14 @@ afr_unlink_wind (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
-
int call_count = -1;
int i = 0;
local = frame->local;
priv = this->private;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -1688,12 +1638,13 @@ afr_unlink_wind (call_frame_t *frame, xlator_t *this)
local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
+ if (local->transaction.pre_op[i]) {
STACK_WIND_COOKIE (frame, afr_unlink_wind_cbk,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->unlink,
- &local->loc);
+ &local->loc, local->xflag,
+ local->xdata_req);
if (!--call_count)
break;
@@ -1719,16 +1670,14 @@ afr_unlink_done (call_frame_t *frame, xlator_t *this)
int32_t
afr_unlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc)
+ loc_t *loc, int xflag, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
-
- int ret = -1;
-
- int op_ret = -1;
- int op_errno = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -1736,46 +1685,62 @@ afr_unlink (call_frame_t *frame, xlator_t *this,
priv = this->private;
+ QUORUM_CHECK(unlink,out);
+
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
+ op_errno = ENOMEM;
goto out;
}
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
-
- transaction_frame->local = local;
loc_copy (&local->loc, loc);
+ local->xflag = xflag;
+ if (xdata)
+ local->xdata_req = dict_ref (xdata);
- if (loc->parent)
- local->cont.unlink.parent_ino = loc->parent->ino;
-
+ local->op = GF_FOP_UNLINK;
local->transaction.fop = afr_unlink_wind;
local->transaction.done = afr_unlink_done;
local->transaction.unwind = afr_unlink_unwind;
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
+ if (ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (unlink, frame, op_ret, op_errno,
- NULL, NULL);
+ AFR_STACK_UNWIND (unlink, frame, -1, op_errno,
+ NULL, NULL, NULL);
}
return 0;
@@ -1805,13 +1770,11 @@ afr_rmdir_unwind (call_frame_t *frame, xlator_t *this)
UNLOCK (&frame->lock);
if (main_frame) {
- local->cont.rmdir.preparent.ia_ino = local->cont.rmdir.parent_ino;
- local->cont.rmdir.postparent.ia_ino = local->cont.rmdir.parent_ino;
-
AFR_STACK_UNWIND (rmdir, main_frame,
local->op_ret, local->op_errno,
- &local->cont.rmdir.preparent,
- &local->cont.rmdir.postparent);
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ NULL);
}
return 0;
@@ -1821,53 +1784,36 @@ afr_rmdir_unwind (call_frame_t *frame, xlator_t *this)
int
afr_rmdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
int call_count = -1;
int child_index = (long) cookie;
int read_child = 0;
local = frame->local;
- priv = this->private;
LOCK (&frame->lock);
{
if (child_index == read_child) {
local->read_child_returned = _gf_true;
}
-
if (afr_fop_failed (op_ret, op_errno) && (op_errno != ENOTEMPTY))
afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.rmdir.preparent = *preparent;
- local->cont.rmdir.postparent = *postparent;
-
- }
-
- if (child_index == read_child) {
- local->cont.rmdir.preparent = *preparent;
- local->cont.rmdir.postparent = *postparent;
- }
-
- local->success_count++;
- }
-
local->op_errno = op_errno;
+ local->child_errno[child_index] = op_errno;
+ if (op_ret > -1)
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, NULL, NULL,
+ preparent, postparent, NULL,
+ NULL);
+
}
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
- local->transaction.resume (frame, this);
- }
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
return 0;
}
@@ -1878,14 +1824,14 @@ afr_rmdir_wind (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
-
int call_count = -1;
int i = 0;
local = frame->local;
priv = this->private;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -1895,12 +1841,13 @@ afr_rmdir_wind (call_frame_t *frame, xlator_t *this)
local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
+ if (local->transaction.pre_op[i]) {
STACK_WIND_COOKIE (frame, afr_rmdir_wind_cbk,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->rmdir,
- &local->loc, local->cont.rmdir.flags);
+ &local->loc, local->cont.rmdir.flags,
+ NULL);
if (!--call_count)
break;
@@ -1926,16 +1873,15 @@ afr_rmdir_done (call_frame_t *frame, xlator_t *this)
int
afr_rmdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int flags)
+ loc_t *loc, int flags, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
-
- int ret = -1;
-
- int op_ret = -1;
- int op_errno = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
+ int nlockee = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -1943,47 +1889,71 @@ afr_rmdir (call_frame_t *frame, xlator_t *this,
priv = this->private;
+ QUORUM_CHECK(rmdir,out);
+
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
+ op_errno = ENOMEM;
goto out;
}
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
-
- transaction_frame->local = local;
local->cont.rmdir.flags = flags;
loc_copy (&local->loc, loc);
- if (loc->parent)
- local->cont.rmdir.parent_ino = loc->parent->ino;
-
+ local->op = GF_FOP_RMDIR;
local->transaction.fop = afr_rmdir_wind;
local->transaction.done = afr_rmdir_done;
local->transaction.unwind = afr_rmdir_unwind;
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = nlockee = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ nlockee++;
+ ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
+ &local->loc,
+ NULL,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ nlockee++;
+ qsort (int_lock->lockee, nlockee, sizeof (*int_lock->lockee),
+ afr_entry_lockee_cmp);
+ int_lock->lockee_count = nlockee;
+
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
+ if (ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (rmdir, frame, op_ret, op_errno,
- NULL, NULL);
+ AFR_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL, NULL);
}
return 0;
diff --git a/xlators/cluster/afr/src/afr-dir-write.h b/xlators/cluster/afr/src/afr-dir-write.h
index e589efa37..02f0a3682 100644
--- a/xlators/cluster/afr/src/afr-dir-write.h
+++ b/xlators/cluster/afr/src/afr-dir-write.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __DIR_WRITE_H__
@@ -23,38 +14,34 @@
int32_t
afr_create (call_frame_t *frame, xlator_t *this,
loc_t *loc, int32_t flags, mode_t mode,
- fd_t *fd, dict_t *params);
+ mode_t umask, fd_t *fd, dict_t *xdata);
int32_t
afr_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t dev, dict_t *params);
+ loc_t *loc, mode_t mode, dev_t dev, mode_t umask, dict_t *xdata);
int32_t
afr_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dict_t *params);
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata);
int32_t
afr_unlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc);
+ loc_t *loc, int xflag, dict_t *xdata);
int32_t
afr_rmdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int flags);
+ loc_t *loc, int flags, dict_t *xdata);
int32_t
afr_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc);
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata);
int32_t
afr_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc);
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata);
int
afr_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *oldloc, dict_t *params);
-
-int32_t
-afr_setdents (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t flags, dir_entry_t *entries, int32_t count);
+ const char *linkpath, loc_t *oldloc, mode_t umask, dict_t *params);
#endif /* __DIR_WRITE_H__ */
diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c
index 11db1e1d7..e06e3b2f2 100644
--- a/xlators/cluster/afr/src/afr-inode-read.c
+++ b/xlators/cluster/afr/src/afr-inode-read.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -44,9 +35,6 @@
#include "compat-errno.h"
#include "compat.h"
-#include "afr.h"
-
-
/**
* Common algorithm for inode read calls:
*
@@ -61,16 +49,16 @@
int32_t
afr_access_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+ xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
-
- int unwind = 1;
- int last_tried = -1;
- int this_try = -1;
- int read_child = -1;
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+ int unwind = 1;
+ int32_t *last_index = NULL;
+ int32_t next_call_child = -1;
+ int32_t read_child = -1;
+ int32_t *fresh_children = NULL;
priv = this->private;
children = priv->children;
@@ -80,30 +68,28 @@ afr_access_cbk (call_frame_t *frame, void *cookie,
read_child = (long) cookie;
if (op_ret == -1) {
- retry:
- last_tried = local->cont.access.last_tried;
-
- if (all_tried (last_tried, priv->child_count)) {
+ last_index = &local->cont.access.last_index;
+ fresh_children = local->fresh_children;
+ next_call_child = afr_next_call_child (fresh_children,
+ local->child_up,
+ priv->child_count,
+ last_index, read_child);
+ if (next_call_child < 0)
goto out;
- }
- this_try = ++local->cont.access.last_tried;
-
- if (this_try == read_child) {
- goto retry;
- }
unwind = 0;
STACK_WIND_COOKIE (frame, afr_access_cbk,
(void *) (long) read_child,
- children[this_try],
- children[this_try]->fops->access,
- &local->loc, local->cont.access.mask);
+ children[next_call_child],
+ children[next_call_child]->fops->access,
+ &local->loc, local->cont.access.mask,
+ NULL);
}
out:
if (unwind) {
- AFR_STACK_UNWIND (access, frame, op_ret, op_errno);
+ AFR_STACK_UNWIND (access, frame, op_ret, op_errno, xdata);
}
return 0;
@@ -111,19 +97,16 @@ out:
int32_t
-afr_access (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t mask)
+afr_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata)
{
- afr_private_t * priv = NULL;
- xlator_t ** children = NULL;
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
int call_child = 0;
afr_local_t *local = NULL;
-
+ int32_t op_errno = 0;
int32_t read_child = -1;
-
-
- int32_t op_ret = -1;
- int32_t op_errno = 0;
+ int ret = -1;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -134,40 +117,46 @@ afr_access (call_frame_t *frame, xlator_t *this,
children = priv->children;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_SBRAIN_CHECK_LOC (loc, out);
- read_child = afr_read_child (this, loc->inode);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- if ((read_child >= 0) && (priv->child_up[read_child])) {
- call_child = read_child;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- local->cont.access.last_tried = -1;
+ local->fresh_children = afr_children_create (priv->child_count);
+ if (!local->fresh_children) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- } else {
- call_child = afr_first_up_child (priv);
- if (call_child == -1) {
- op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_DEBUG,
- "no child is up");
- goto out;
- }
- local->cont.access.last_tried = call_child;
+ read_child = afr_inode_get_read_ctx (this, loc->inode,
+ local->fresh_children);
+ ret = afr_get_call_child (this, local->child_up, read_child,
+ local->fresh_children,
+ &call_child,
+ &local->cont.access.last_index);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
}
loc_copy (&local->loc, loc);
- local->cont.access.mask = mask;
+ local->cont.access.mask = mask;
STACK_WIND_COOKIE (frame, afr_access_cbk,
(void *) (long) call_child,
- children[call_child], children[call_child]->fops->access,
- loc, mask);
+ children[call_child],
+ children[call_child]->fops->access,
+ loc, mask, xdata);
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (access, frame, op_ret, op_errno);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND (access, frame, -1, op_errno, NULL);
return 0;
}
@@ -179,16 +168,16 @@ out:
int32_t
afr_stat_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iatt *buf)
+ struct iatt *buf, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
-
- int unwind = 1;
- int last_tried = -1;
- int this_try = -1;
- int read_child = -1;
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+ int unwind = 1;
+ int32_t *last_index = NULL;
+ int32_t next_call_child = -1;
+ int32_t read_child = -1;
+ int32_t *fresh_children = NULL;
priv = this->private;
children = priv->children;
@@ -198,33 +187,27 @@ afr_stat_cbk (call_frame_t *frame, void *cookie,
local = frame->local;
if (op_ret == -1) {
- retry:
- last_tried = local->cont.stat.last_tried;
-
- if (all_tried (last_tried, priv->child_count)) {
+ last_index = &local->cont.stat.last_index;
+ fresh_children = local->fresh_children;
+ next_call_child = afr_next_call_child (fresh_children,
+ local->child_up,
+ priv->child_count,
+ last_index, read_child);
+ if (next_call_child < 0)
goto out;
- }
- this_try = ++local->cont.stat.last_tried;
-
- if (this_try == read_child) {
- goto retry;
- }
unwind = 0;
STACK_WIND_COOKIE (frame, afr_stat_cbk,
(void *) (long) read_child,
- children[this_try],
- children[this_try]->fops->stat,
- &local->loc);
+ children[next_call_child],
+ children[next_call_child]->fops->stat,
+ &local->loc, NULL);
}
out:
if (unwind) {
- if (buf)
- buf->ia_ino = local->cont.stat.ino;
-
- AFR_STACK_UNWIND (stat, frame, op_ret, op_errno, buf);
+ AFR_STACK_UNWIND (stat, frame, op_ret, op_errno, buf, xdata);
}
return 0;
@@ -232,18 +215,15 @@ out:
int32_t
-afr_stat (call_frame_t *frame, xlator_t *this,
- loc_t *loc)
+afr_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
-
- int32_t read_child = -1;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ xlator_t **children = NULL;
int call_child = 0;
-
- int32_t op_ret = -1;
int32_t op_errno = 0;
+ int32_t read_child = -1;
+ int ret = -1;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -254,43 +234,42 @@ afr_stat (call_frame_t *frame, xlator_t *this,
children = priv->children;
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- frame->local = local;
-
- read_child = afr_read_child (this, loc->inode);
+ AFR_SBRAIN_CHECK_LOC (loc, out);
- if ((read_child >= 0) && (priv->child_up[read_child])) {
- call_child = read_child;
-
- local->cont.stat.last_tried = -1;
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- } else {
- call_child = afr_first_up_child (priv);
- if (call_child == -1) {
- op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_DEBUG,
- "no child is up");
- goto out;
- }
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- local->cont.stat.last_tried = call_child;
+ local->fresh_children = afr_children_create (priv->child_count);
+ if (!local->fresh_children) {
+ op_errno = ENOMEM;
+ goto out;
}
+ read_child = afr_inode_get_read_ctx (this, loc->inode,
+ local->fresh_children);
+ ret = afr_get_call_child (this, local->child_up, read_child,
+ local->fresh_children,
+ &call_child,
+ &local->cont.stat.last_index);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
loc_copy (&local->loc, loc);
- local->cont.stat.ino = loc->inode->ino;
-
STACK_WIND_COOKIE (frame, afr_stat_cbk, (void *) (long) call_child,
children[call_child],
children[call_child]->fops->stat,
- loc);
+ loc, xdata);
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (stat, frame, op_ret, op_errno, NULL);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -301,18 +280,18 @@ out:
/* {{{ fstat */
int32_t
-afr_fstat_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iatt *buf)
+afr_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
-
- int unwind = 1;
- int last_tried = -1;
- int this_try = -1;
- int read_child = -1;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ xlator_t **children = NULL;
+ int unwind = 1;
+ int32_t *last_index = NULL;
+ int32_t next_call_child = -1;
+ int32_t read_child = -1;
+ int32_t *fresh_children = NULL;
priv = this->private;
children = priv->children;
@@ -322,33 +301,27 @@ afr_fstat_cbk (call_frame_t *frame, void *cookie,
read_child = (long) cookie;
if (op_ret == -1) {
- retry:
- last_tried = local->cont.fstat.last_tried;
-
- if (all_tried (last_tried, priv->child_count)) {
+ last_index = &local->cont.fstat.last_index;
+ fresh_children = local->fresh_children;
+ next_call_child = afr_next_call_child (fresh_children,
+ local->child_up,
+ priv->child_count,
+ last_index, read_child);
+ if (next_call_child < 0)
goto out;
- }
- this_try = ++local->cont.fstat.last_tried;
-
- if (this_try == read_child) {
- goto retry;
- }
unwind = 0;
STACK_WIND_COOKIE (frame, afr_fstat_cbk,
(void *) (long) read_child,
- children[this_try],
- children[this_try]->fops->fstat,
- local->fd);
+ children[next_call_child],
+ children[next_call_child]->fops->fstat,
+ local->fd, NULL);
}
out:
if (unwind) {
- if (buf)
- buf->ia_ino = local->cont.fstat.ino;
-
- AFR_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf);
+ AFR_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf, xdata);
}
return 0;
@@ -357,17 +330,15 @@ out:
int32_t
afr_fstat (call_frame_t *frame, xlator_t *this,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
-
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ xlator_t **children = NULL;
int call_child = 0;
- int32_t read_child = -1;
-
- int32_t op_ret = -1;
int32_t op_errno = 0;
+ int32_t read_child = 0;
+ int ret = -1;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -379,44 +350,50 @@ afr_fstat (call_frame_t *frame, xlator_t *this,
children = priv->children;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ VALIDATE_OR_GOTO (fd->inode, out);
- frame->local = local;
+ AFR_SBRAIN_CHECK_FD (fd, out);
- VALIDATE_OR_GOTO (fd->inode, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- read_child = afr_read_child (this, fd->inode);
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- if ((read_child >= 0) && (priv->child_up[read_child])) {
- call_child = read_child;
+ local->fresh_children = afr_children_create (priv->child_count);
+ if (!local->fresh_children) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ read_child = afr_inode_get_read_ctx (this, fd->inode,
+ local->fresh_children);
- local->cont.fstat.last_tried = -1;
- } else {
- call_child = afr_first_up_child (priv);
- if (call_child == -1) {
- op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_DEBUG,
- "no child is up");
- goto out;
- }
- local->cont.fstat.last_tried = call_child;
+ ret = afr_get_call_child (this, local->child_up, read_child,
+ local->fresh_children,
+ &call_child,
+ &local->cont.fstat.last_index);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
}
- local->cont.fstat.ino = fd->inode->ino;
local->fd = fd_ref (fd);
+ afr_open_fd_fix (fd, this);
+
STACK_WIND_COOKIE (frame, afr_fstat_cbk, (void *) (long) call_child,
children[call_child],
children[call_child]->fops->fstat,
- fd);
+ fd, xdata);
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (fstat, frame, op_ret, op_errno, NULL);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND (fstat, frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -428,16 +405,16 @@ out:
int32_t
afr_readlink_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- const char *buf, struct iatt *sbuf)
+ const char *buf, struct iatt *sbuf, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
-
- int unwind = 1;
- int last_tried = -1;
- int this_try = -1;
- int read_child = -1;
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+ int unwind = 1;
+ int32_t *last_index = NULL;
+ int32_t next_call_child = -1;
+ int32_t read_child = -1;
+ int32_t *fresh_children = NULL;
priv = this->private;
children = priv->children;
@@ -447,33 +424,28 @@ afr_readlink_cbk (call_frame_t *frame, void *cookie,
read_child = (long) cookie;
if (op_ret == -1) {
- retry:
- last_tried = local->cont.readlink.last_tried;
-
- if (all_tried (last_tried, priv->child_count)) {
+ last_index = &local->cont.readlink.last_index;
+ fresh_children = local->fresh_children;
+ next_call_child = afr_next_call_child (fresh_children,
+ local->child_up,
+ priv->child_count,
+ last_index, read_child);
+ if (next_call_child < 0)
goto out;
- }
- this_try = ++local->cont.readlink.last_tried;
-
- if (this_try == read_child) {
- goto retry;
- }
unwind = 0;
STACK_WIND_COOKIE (frame, afr_readlink_cbk,
(void *) (long) read_child,
- children[this_try],
- children[this_try]->fops->readlink,
+ children[next_call_child],
+ children[next_call_child]->fops->readlink,
&local->loc,
- local->cont.readlink.size);
+ local->cont.readlink.size, NULL);
}
out:
if (unwind) {
- if (sbuf)
- sbuf->ia_ino = local->cont.readlink.ino;
-
- AFR_STACK_UNWIND (readlink, frame, op_ret, op_errno, buf, sbuf);
+ AFR_STACK_UNWIND (readlink, frame, op_ret, op_errno, buf, sbuf,
+ xdata);
}
return 0;
@@ -482,17 +454,15 @@ out:
int32_t
afr_readlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, size_t size)
+ loc_t *loc, size_t size, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- xlator_t ** children = NULL;
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
int call_child = 0;
afr_local_t *local = NULL;
-
+ int32_t op_errno = 0;
int32_t read_child = -1;
-
- int32_t op_ret = -1;
- int32_t op_errno = 0;
+ int ret = -1;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -503,45 +473,45 @@ afr_readlink (call_frame_t *frame, xlator_t *this,
children = priv->children;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_SBRAIN_CHECK_LOC (loc, out);
- frame->local = local;
-
- read_child = afr_read_child (this, loc->inode);
-
- if ((read_child >= 0) && (priv->child_up[read_child])) {
- call_child = read_child;
-
- local->cont.readlink.last_tried = -1;
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- } else {
- call_child = afr_first_up_child (priv);
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- if (call_child == -1) {
- op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_DEBUG,
- "no child is up");
- goto out;
- }
-
- local->cont.readlink.last_tried = call_child;
+ local->fresh_children = afr_children_create (priv->child_count);
+ if (!local->fresh_children) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ read_child = afr_inode_get_read_ctx (this, loc->inode,
+ local->fresh_children);
+ ret = afr_get_call_child (this, local->child_up, read_child,
+ local->fresh_children,
+ &call_child,
+ &local->cont.readlink.last_index);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
}
loc_copy (&local->loc, loc);
local->cont.readlink.size = size;
- local->cont.readlink.ino = loc->inode->ino;
STACK_WIND_COOKIE (frame, afr_readlink_cbk,
(void *) (long) call_child,
- children[call_child], children[call_child]->fops->readlink,
- loc, size);
+ children[call_child],
+ children[call_child]->fops->readlink,
+ loc, size, xdata);
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (readlink, frame, op_ret, op_errno, NULL, NULL);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND (readlink, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -556,7 +526,7 @@ struct _xattr_key {
};
-void
+int
__gather_xattr_keys (dict_t *dict, char *key, data_t *value,
void *data)
{
@@ -568,23 +538,23 @@ __gather_xattr_keys (dict_t *dict, char *key, data_t *value,
xkey = GF_CALLOC (1, sizeof (*xkey), gf_afr_mt_xattr_key);
if (!xkey)
- return;
+ return -1;
xkey->key = key;
INIT_LIST_HEAD (&xkey->list);
list_add_tail (&xkey->list, list);
}
+ return 0;
}
void
__filter_xattrs (dict_t *dict)
{
- struct list_head keys;
-
- struct _xattr_key *key;
- struct _xattr_key *tmp;
+ struct list_head keys = {0,};
+ struct _xattr_key *key = NULL;
+ struct _xattr_key *tmp = NULL;
INIT_LIST_HEAD (&keys);
@@ -605,16 +575,16 @@ __filter_xattrs (dict_t *dict)
int32_t
afr_getxattr_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *dict)
+ dict_t *dict, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
-
- int unwind = 1;
- int last_tried = -1;
- int this_try = -1;
- int read_child = -1;
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+ int unwind = 1;
+ int32_t *last_index = NULL;
+ int32_t next_call_child = -1;
+ int32_t read_child = -1;
+ int32_t *fresh_children = NULL;
priv = this->private;
children = priv->children;
@@ -624,25 +594,23 @@ afr_getxattr_cbk (call_frame_t *frame, void *cookie,
read_child = (long) cookie;
if (op_ret == -1) {
- retry:
- last_tried = local->cont.getxattr.last_tried;
-
- if (all_tried (last_tried, priv->child_count)) {
+ last_index = &local->cont.getxattr.last_index;
+ fresh_children = local->fresh_children;
+ next_call_child = afr_next_call_child (fresh_children,
+ local->child_up,
+ priv->child_count,
+ last_index, read_child);
+ if (next_call_child < 0)
goto out;
- }
- this_try = ++local->cont.getxattr.last_tried;
-
- if (this_try == read_child) {
- goto retry;
- }
unwind = 0;
STACK_WIND_COOKIE (frame, afr_getxattr_cbk,
(void *) (long) read_child,
- children[this_try],
- children[this_try]->fops->getxattr,
+ children[next_call_child],
+ children[next_call_child]->fops->getxattr,
&local->loc,
- local->cont.getxattr.name);
+ local->cont.getxattr.name,
+ NULL);
}
out:
@@ -650,38 +618,853 @@ out:
if (op_ret >= 0 && dict)
__filter_xattrs (dict);
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict);
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
}
return 0;
}
int32_t
-afr_getxattr_unwind (call_frame_t *frame,
- int op_ret, int op_errno, dict_t *dict)
+afr_getxattr_unwind (call_frame_t *frame, int op_ret, int op_errno,
+ dict_t *dict, dict_t *xdata)
{
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict);
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
return 0;
}
int32_t
-afr_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name)
+afr_fgetxattr_clrlk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ dict_t *xattr = NULL;
+ char *tmp_report = NULL;
+ char lk_summary[1024] = {0,};
+ int serz_len = 0;
+ int32_t callcnt = 0;
+ long int cky = 0;
+ int ret = 0;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+ cky = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+ if (op_ret == -1)
+ local->child_errno[cky] = op_errno;
+
+ if (!local->dict)
+ local->dict = dict_new ();
+ if (local->dict) {
+ ret = dict_get_str (dict, local->cont.getxattr.name,
+ &tmp_report);
+ if (ret)
+ goto unlock;
+ ret = dict_set_dynstr (local->dict,
+ children[cky]->name,
+ gf_strdup (tmp_report));
+ if (ret)
+ goto unlock;
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ xattr = dict_new ();
+ if (!xattr) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ ret = dict_serialize_value_with_delim (local->dict,
+ lk_summary,
+ &serz_len, '\n');
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error serializing dictionary");
+ goto unwind;
+ }
+ if (serz_len == -1)
+ snprintf (lk_summary, sizeof (lk_summary),
+ "No locks cleared.");
+ ret = dict_set_dynstr (xattr, local->cont.getxattr.name,
+ gf_strdup (lk_summary));
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error setting dictionary");
+ goto unwind;
+ }
+
+ unwind:
+ // Updating child_errno with more recent 'events'
+ local->child_errno[cky] = op_errno;
+ op_errno = afr_resultant_errno_get (NULL, local->child_errno,
+ priv->child_count);
+ AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, xattr,
+ xdata);
+
+ if (xattr)
+ dict_unref (xattr);
+ }
+
+ return ret;
+}
+
+int32_t
+afr_getxattr_clrlk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ dict_t *xattr = NULL;
+ char *tmp_report = NULL;
+ char lk_summary[1024] = {0,};
+ int serz_len = 0;
+ int32_t callcnt = 0;
+ long int cky = 0;
+ int ret = 0;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+ cky = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+ if (op_ret == -1)
+ local->child_errno[cky] = op_errno;
+
+ if (!local->dict)
+ local->dict = dict_new ();
+ if (local->dict) {
+ ret = dict_get_str (dict, local->cont.getxattr.name,
+ &tmp_report);
+ if (ret)
+ goto unlock;
+ ret = dict_set_dynstr (local->dict,
+ children[cky]->name,
+ gf_strdup (tmp_report));
+ if (ret)
+ goto unlock;
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ xattr = dict_new ();
+ if (!xattr) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ ret = dict_serialize_value_with_delim (local->dict,
+ lk_summary,
+ &serz_len, '\n');
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error serializing dictionary");
+ goto unwind;
+ }
+ if (serz_len == -1)
+ snprintf (lk_summary, sizeof (lk_summary),
+ "No locks cleared.");
+ ret = dict_set_dynstr (xattr, local->cont.getxattr.name,
+ gf_strdup (lk_summary));
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error setting dictionary");
+ goto unwind;
+ }
+
+ unwind:
+ // Updating child_errno with more recent 'events'
+ local->child_errno[cky] = op_errno;
+ op_errno = afr_resultant_errno_get (NULL, local->child_errno,
+ priv->child_count);
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr, xdata);
+
+ if (xattr)
+ dict_unref (xattr);
+ }
+
+ return ret;
+}
+
+/**
+ * node-uuid cbk uses next child querying mechanism
+ */
+int32_t
+afr_getxattr_node_uuid_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ xlator_t **children = NULL;
+ int unwind = 1;
+ int curr_call_child = 0;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+
+ if (op_ret == -1) { /** query the _next_ child */
+
+ /**
+ * _current_ becomes _next_
+ * If done with all childs and yet no success; give up !
+ */
+ curr_call_child = (int) ((long)cookie);
+ if (++curr_call_child == priv->child_count)
+ goto unwind;
+
+ gf_log (this->name, GF_LOG_WARNING,
+ "op_ret (-1): Re-querying afr-child (%d/%d)",
+ curr_call_child, priv->child_count);
+
+ unwind = 0;
+ STACK_WIND_COOKIE (frame, afr_getxattr_node_uuid_cbk,
+ (void *) (long) curr_call_child,
+ children[curr_call_child],
+ children[curr_call_child]->fops->getxattr,
+ &local->loc,
+ local->cont.getxattr.name,
+ NULL);
+ }
+
+ unwind:
+ if (unwind)
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict,
+ NULL);
+
+ return 0;
+}
+
+int32_t
+afr_getxattr_lockinfo_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ int call_cnt = 0, len = 0;
+ char *lockinfo_buf = NULL;
+ dict_t *lockinfo = NULL, *newdict = NULL;
+ afr_local_t *local = NULL;
+
+ LOCK (&frame->lock);
+ {
+ local = frame->local;
+
+ call_cnt = --local->call_count;
+
+ if ((op_ret < 0) || (!dict && !xdata)) {
+ goto unlock;
+ }
+
+ if (xdata) {
+ if (!local->xdata_rsp) {
+ local->xdata_rsp = dict_new ();
+ if (!local->xdata_rsp) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+ }
+
+ if (!dict) {
+ goto unlock;
+ }
+
+ op_ret = dict_get_ptr_and_len (dict, GF_XATTR_LOCKINFO_KEY,
+ (void **)&lockinfo_buf, &len);
+
+ if (!lockinfo_buf) {
+ goto unlock;
+ }
+
+ if (!local->dict) {
+ local->dict = dict_new ();
+ if (!local->dict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ if (lockinfo_buf != NULL) {
+ lockinfo = dict_new ();
+ if (lockinfo == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ } else {
+ op_ret = dict_unserialize (lockinfo_buf, len,
+ &lockinfo);
+
+ if (lockinfo && local->dict) {
+ dict_copy (lockinfo, local->dict);
+ }
+ }
+ }
+
+ if (xdata && local->xdata_rsp) {
+ dict_copy (xdata, local->xdata_rsp);
+ }
+
+ if (!call_cnt) {
+ newdict = dict_new ();
+ if (!newdict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ len = dict_serialized_length (local->dict);
+ if (len == 0) {
+ goto unwind;
+ }
+
+ lockinfo_buf = GF_CALLOC (1, len, gf_common_mt_char);
+ if (!lockinfo_buf) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ op_ret = dict_serialize (local->dict, lockinfo_buf);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -op_ret;
+ }
+
+ op_ret = dict_set_dynptr (newdict, GF_XATTR_LOCKINFO_KEY,
+ (void *)lockinfo_buf, len);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -op_ret;
+ goto unwind;
+ }
+
+ unwind:
+ AFR_STACK_UNWIND (getxattr, frame, op_ret,
+ op_errno, newdict,
+ local->xdata_rsp);
+ }
+
+ dict_unref (lockinfo);
+
+ return 0;
+}
+
+int32_t
+afr_fgetxattr_lockinfo_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ int call_cnt = 0, len = 0;
+ char *lockinfo_buf = NULL;
+ dict_t *lockinfo = NULL, *newdict = NULL;
+ afr_local_t *local = NULL;
+
+ LOCK (&frame->lock);
+ {
+ local = frame->local;
+
+ call_cnt = --local->call_count;
+
+ if ((op_ret < 0) || (!dict && !xdata)) {
+ goto unlock;
+ }
+
+ if (xdata) {
+ if (!local->xdata_rsp) {
+ local->xdata_rsp = dict_new ();
+ if (!local->xdata_rsp) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+ }
+
+ if (!dict) {
+ goto unlock;
+ }
+
+ op_ret = dict_get_ptr_and_len (dict, GF_XATTR_LOCKINFO_KEY,
+ (void **)&lockinfo_buf, &len);
+
+ if (!lockinfo_buf) {
+ goto unlock;
+ }
+
+ if (!local->dict) {
+ local->dict = dict_new ();
+ if (!local->dict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ if (lockinfo_buf != NULL) {
+ lockinfo = dict_new ();
+ if (lockinfo == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ } else {
+ op_ret = dict_unserialize (lockinfo_buf, len,
+ &lockinfo);
+
+ if (lockinfo && local->dict) {
+ dict_copy (lockinfo, local->dict);
+ }
+ }
+ }
+
+ if (xdata && local->xdata_rsp) {
+ dict_copy (xdata, local->xdata_rsp);
+ }
+
+ if (!call_cnt) {
+ newdict = dict_new ();
+ if (!newdict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ len = dict_serialized_length (local->dict);
+ if (len <= 0) {
+ goto unwind;
+ }
+
+ lockinfo_buf = GF_CALLOC (1, len, gf_common_mt_char);
+ if (!lockinfo_buf) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ op_ret = dict_serialize (local->dict, lockinfo_buf);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -op_ret;
+ }
+
+ op_ret = dict_set_dynptr (newdict, GF_XATTR_LOCKINFO_KEY,
+ (void *)lockinfo_buf, len);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -op_ret;
+ goto unwind;
+ }
+
+ unwind:
+ AFR_STACK_UNWIND (fgetxattr, frame, op_ret,
+ op_errno, newdict,
+ local->xdata_rsp);
+ }
+
+ dict_unref (lockinfo);
+
+ return 0;
+}
+
+int32_t
+afr_fgetxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int32_t callcnt = 0;
+ int ret = 0;
+ char *xattr = NULL;
+ char *xattr_serz = NULL;
+ char xattr_cky[1024] = {0,};
+ dict_t *nxattr = NULL;
+ long cky = 0;
+ int32_t padding = 0;
+ int32_t tlen = 0;
+
+ if (!frame || !frame->local || !this) {
+ gf_log ("", GF_LOG_ERROR, "possible NULL deref");
+ goto out;
+ }
+
+ local = frame->local;
+ cky = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (!dict || (op_ret < 0))
+ goto out;
+
+ if (!local->dict)
+ local->dict = dict_new ();
+
+ if (local->dict) {
+ ret = dict_get_str (dict,
+ local->cont.getxattr.name,
+ &xattr);
+ if (ret)
+ goto out;
+
+ xattr = gf_strdup (xattr);
+
+ (void)snprintf (xattr_cky, 1024, "%s-%ld",
+ local->cont.getxattr.name, cky);
+ ret = dict_set_dynstr (local->dict,
+ xattr_cky, xattr);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Cannot set xattr cookie key");
+ goto out;
+ }
+
+ local->cont.getxattr.xattr_len
+ += strlen (xattr) + 1;
+ }
+ }
+out:
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ if (!local->cont.getxattr.xattr_len)
+ goto unwind;
+
+ nxattr = dict_new ();
+ if (!nxattr)
+ goto unwind;
+
+ /* extra bytes for decorations (brackets and <>'s) */
+ padding += strlen (this->name)
+ + strlen (AFR_PATHINFO_HEADER) + 4;
+ local->cont.getxattr.xattr_len += (padding + 2);
+
+ xattr_serz = GF_CALLOC (local->cont.getxattr.xattr_len,
+ sizeof (char), gf_common_mt_char);
+
+ if (!xattr_serz)
+ goto unwind;
+
+ /* the xlator info */
+ (void) sprintf (xattr_serz, "(<"AFR_PATHINFO_HEADER"%s> ",
+ this->name);
+
+ /* actual series of pathinfo */
+ ret = dict_serialize_value_with_delim (local->dict,
+ xattr_serz
+ + strlen (xattr_serz),
+ &tlen, ' ');
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Error serializing"
+ " dictionary");
+ goto unwind;
+ }
+
+ /* closing part */
+ *(xattr_serz + padding + tlen) = ')';
+ *(xattr_serz + padding + tlen + 1) = '\0';
+
+ ret = dict_set_dynstr (nxattr, local->cont.getxattr.name,
+ xattr_serz);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "Cannot set pathinfo"
+ " key in dict");
+
+ unwind:
+ AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, nxattr,
+ xdata);
+
+ if (nxattr)
+ dict_unref (nxattr);
+ }
+
+ return ret;
+}
+
+int32_t
+afr_getxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- xlator_t ** children = NULL;
- int call_child = 0;
- afr_local_t * local = NULL;
- xlator_list_t * trav = NULL;
- xlator_t ** sub_volumes= NULL;
+ afr_local_t *local = NULL;
+ int32_t callcnt = 0;
+ int ret = 0;
+ char *xattr = NULL;
+ char *xattr_serz = NULL;
+ char xattr_cky[1024] = {0,};
+ dict_t *nxattr = NULL;
+ long cky = 0;
+ int32_t padding = 0;
+ int32_t tlen = 0;
+
+ if (!frame || !frame->local || !this) {
+ gf_log ("", GF_LOG_ERROR, "possible NULL deref");
+ goto out;
+ }
+
+ local = frame->local;
+ cky = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (!dict || (op_ret < 0))
+ goto out;
+
+ if (!local->dict)
+ local->dict = dict_new ();
+
+ if (local->dict) {
+ ret = dict_get_str (dict,
+ local->cont.getxattr.name,
+ &xattr);
+ if (ret)
+ goto out;
+
+ xattr = gf_strdup (xattr);
+
+ (void)snprintf (xattr_cky, 1024, "%s-%ld",
+ local->cont.getxattr.name, cky);
+ ret = dict_set_dynstr (local->dict,
+ xattr_cky, xattr);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Cannot set xattr cookie key");
+ goto out;
+ }
+
+ local->cont.getxattr.xattr_len += strlen (xattr) + 1;
+ }
+ }
+ out:
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ if (!local->cont.getxattr.xattr_len)
+ goto unwind;
+
+ nxattr = dict_new ();
+ if (!nxattr)
+ goto unwind;
+
+ /* extra bytes for decorations (brackets and <>'s) */
+ padding += strlen (this->name) + strlen (AFR_PATHINFO_HEADER) + 4;
+ local->cont.getxattr.xattr_len += (padding + 2);
+
+ xattr_serz = GF_CALLOC (local->cont.getxattr.xattr_len,
+ sizeof (char), gf_common_mt_char);
+
+ if (!xattr_serz)
+ goto unwind;
+
+ /* the xlator info */
+ (void) sprintf (xattr_serz, "(<"AFR_PATHINFO_HEADER"%s> ",
+ this->name);
+
+ /* actual series of pathinfo */
+ ret = dict_serialize_value_with_delim (local->dict,
+ xattr_serz + strlen (xattr_serz),
+ &tlen, ' ');
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Error serializing"
+ " dictionary");
+ goto unwind;
+ }
- int read_child = -1;
- int i = 0;
+ /* closing part */
+ *(xattr_serz + padding + tlen) = ')';
+ *(xattr_serz + padding + tlen + 1) = '\0';
- int32_t op_ret = -1;
- int32_t op_errno = 0;
+ ret = dict_set_dynstr (nxattr, local->cont.getxattr.name,
+ xattr_serz);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "Cannot set pathinfo"
+ " key in dict");
+ unwind:
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, nxattr,
+ xdata);
+
+ if (nxattr)
+ dict_unref (nxattr);
+ }
+
+ return ret;
+}
+
+static int
+afr_aggregate_stime_xattr (dict_t *this, char *key, data_t *value, void *data)
+{
+ int ret = 0;
+
+ if (fnmatch (GF_XATTR_STIME_PATTERN, key, FNM_NOESCAPE) == 0)
+ ret = gf_get_min_stime (THIS, data, key, value);
+
+ return ret;
+}
+
+int32_t
+afr_common_getxattr_stime_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int32_t callcnt = 0;
+
+ if (!frame || !frame->local || !this) {
+ gf_log ("", GF_LOG_ERROR, "possible NULL deref");
+ goto out;
+ }
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (!dict || (op_ret < 0)) {
+ local->op_errno = op_errno;
+ goto cleanup;
+ }
+
+ if (!local->dict)
+ local->dict = dict_copy_with_ref (dict, NULL);
+ else
+ dict_foreach (dict, afr_aggregate_stime_xattr,
+ local->dict);
+ local->op_ret = 0;
+ }
+
+cleanup:
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ AFR_STACK_UNWIND (getxattr, frame, local->op_ret,
+ local->op_errno, local->dict, xdata);
+ }
+
+out:
+ return 0;
+}
+
+
+static gf_boolean_t
+afr_is_special_xattr (const char *name, fop_getxattr_cbk_t *cbk,
+ gf_boolean_t is_fgetxattr)
+{
+ gf_boolean_t is_spl = _gf_true;
+
+ GF_ASSERT (cbk);
+ if (!cbk) {
+ is_spl = _gf_false;
+ goto out;
+ }
+
+ if (!strcmp (name, GF_XATTR_PATHINFO_KEY)) {
+ if (is_fgetxattr) {
+ *cbk = afr_fgetxattr_pathinfo_cbk;
+ } else {
+ *cbk = afr_getxattr_pathinfo_cbk;
+ }
+ } else if (!strncmp (name, GF_XATTR_CLRLK_CMD,
+ strlen (GF_XATTR_CLRLK_CMD))) {
+ if (is_fgetxattr) {
+ *cbk = afr_fgetxattr_clrlk_cbk;
+ } else {
+ *cbk = afr_getxattr_clrlk_cbk;
+ }
+ } else if (!strncmp (name, GF_XATTR_LOCKINFO_KEY,
+ strlen (GF_XATTR_LOCKINFO_KEY))) {
+ if (is_fgetxattr) {
+ *cbk = afr_fgetxattr_lockinfo_cbk;
+ } else {
+ *cbk = afr_getxattr_lockinfo_cbk;
+ }
+ } else if (fnmatch (GF_XATTR_STIME_PATTERN, name, FNM_NOESCAPE) == 0) {
+ *cbk = afr_common_getxattr_stime_cbk;
+ } else {
+ is_spl = _gf_false;
+ }
+
+out:
+ return is_spl;
+}
+
+static void
+afr_getxattr_frm_all_children (xlator_t *this, call_frame_t *frame,
+ const char *name, loc_t *loc,
+ fop_getxattr_cbk_t cbk)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ xlator_t **children = NULL;
+ int i = 0;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+ local->call_count = priv->child_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ STACK_WIND_COOKIE (frame, cbk,
+ (void *) (long) i,
+ children[i], children[i]->fops->getxattr,
+ loc, name, NULL);
+ }
+ return;
+}
+
+int32_t
+afr_getxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ int call_child = 0;
+ afr_local_t *local = NULL;
+ xlator_list_t *trav = NULL;
+ xlator_t **sub_volumes = NULL;
+ int i = 0;
+ int32_t op_errno = 0;
+ int32_t read_child = -1;
+ int ret = -1;
+ fop_getxattr_cbk_t cbk = NULL;
+ int afr_xtime_gauge[MCNT_MAX] = {0,};
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -692,107 +1475,319 @@ afr_getxattr (call_frame_t *frame, xlator_t *this,
children = priv->children;
- ALLOC_OR_GOTO (local, afr_local_t, out);
- frame->local = local;
+ AFR_SBRAIN_CHECK_LOC (loc, out);
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
loc_copy (&local->loc, loc);
- if (name)
- local->cont.getxattr.name = gf_strdup (name);
+ if (!name)
+ goto no_name;
+
+ local->cont.getxattr.name = gf_strdup (name);
+
+ if (!strncmp (name, AFR_XATTR_PREFIX,
+ strlen (AFR_XATTR_PREFIX))) {
+ gf_log (this->name, GF_LOG_INFO,
+ "%s: no data present for key %s",
+ loc->path, name);
+ op_errno = ENODATA;
+ goto out;
+ }
+ if ((strcmp (GF_XATTR_MARKER_KEY, name) == 0)
+ && (GF_CLIENT_PID_GSYNCD == frame->root->pid)) {
+ local->marker.call_count = priv->child_count;
- if (name) {
- if (!strncmp (name, AFR_XATTR_PREFIX,
- strlen (AFR_XATTR_PREFIX))) {
+ sub_volumes = alloca ( priv->child_count * sizeof (xlator_t *));
+ for (i = 0, trav = this->children; trav ;
+ trav = trav->next, i++) {
+
+ *(sub_volumes + i) = trav->xlator;
+ }
- op_errno = ENODATA;
+ if (cluster_getmarkerattr (frame, this, loc, name,
+ local, afr_getxattr_unwind,
+ sub_volumes,
+ priv->child_count,
+ MARKER_UUID_TYPE,
+ marker_uuid_default_gauge,
+ priv->vol_uuid)) {
+
+ gf_log (this->name, GF_LOG_INFO,
+ "%s: failed to get marker attr (%s)",
+ loc->path, name);
+ op_errno = EINVAL;
goto out;
}
- if ((strcmp (GF_XATTR_MARKER_KEY, name) == 0)
- && (-1 == frame->root->pid)) {
+ return 0;
+ }
+
+ /*
+ * if we are doing getxattr with pathinfo as the key then we
+ * collect information from all childs
+ */
+ if (afr_is_special_xattr (name, &cbk, 0)) {
+ afr_getxattr_frm_all_children (this, frame, name,
+ loc, cbk);
+ return 0;
+ }
+
+ if (XATTR_IS_NODE_UUID (name)) {
+ i = 0;
+ STACK_WIND_COOKIE (frame, afr_getxattr_node_uuid_cbk,
+ (void *) (long) i,
+ children[i],
+ children[i]->fops->getxattr,
+ loc, name, xdata);
+ return 0;
+ }
+
+ if (*priv->vol_uuid) {
+ if ((match_uuid_local (name, priv->vol_uuid) == 0)
+ && (GF_CLIENT_PID_GSYNCD == frame->root->pid)) {
local->marker.call_count = priv->child_count;
- sub_volumes = alloca ( priv->child_count * sizeof (xlator_t *));
+ sub_volumes = alloca ( priv->child_count
+ * sizeof (xlator_t *));
for (i = 0, trav = this->children; trav ;
trav = trav->next, i++) {
*(sub_volumes + i) = trav->xlator;
+
}
- if (cluster_getmarkerattr (frame, this, loc, name,
- local, afr_getxattr_unwind,
+ /* don't err out on getting ENOTCONN (brick down)
+ * from a subset of the bricks
+ */
+ memcpy (afr_xtime_gauge, marker_xtime_default_gauge,
+ sizeof (afr_xtime_gauge));
+ afr_xtime_gauge[MCNT_NOTFOUND] = 0;
+ afr_xtime_gauge[MCNT_ENOTCONN] = 0;
+ if (cluster_getmarkerattr (frame, this, loc,
+ name, local,
+ afr_getxattr_unwind,
sub_volumes,
priv->child_count,
- MARKER_UUID_TYPE,
+ MARKER_XTIME_TYPE,
+ afr_xtime_gauge,
priv->vol_uuid)) {
-
+ gf_log (this->name, GF_LOG_INFO,
+ "%s: failed to get marker attr (%s)",
+ loc->path, name);
op_errno = EINVAL;
goto out;
}
return 0;
}
+ }
- if (*priv->vol_uuid) {
- if ((match_uuid_local (name, priv->vol_uuid) == 0)
- && (-1 == frame->root->pid)) {
+no_name:
+ local->fresh_children = afr_children_create (priv->child_count);
+ if (!local->fresh_children) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- local->marker.call_count = priv->child_count;
+ read_child = afr_inode_get_read_ctx (this, loc->inode,
+ local->fresh_children);
+ ret = afr_get_call_child (this, local->child_up, read_child,
+ local->fresh_children,
+ &call_child,
+ &local->cont.getxattr.last_index);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- sub_volumes = alloca ( priv->child_count * sizeof (xlator_t *));
- for (i = 0, trav = this->children; trav ;
- trav = trav->next, i++) {
+ STACK_WIND_COOKIE (frame, afr_getxattr_cbk,
+ (void *) (long) call_child,
+ children[call_child],
+ children[call_child]->fops->getxattr,
+ loc, name, xdata);
- *(sub_volumes + i) = trav->xlator;
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
- }
+/* {{{ fgetxattr */
- if (cluster_getmarkerattr (frame, this, loc,
- name, local,
- afr_getxattr_unwind,
- sub_volumes,
- priv->child_count,
- MARKER_XTIME_TYPE,
- priv->vol_uuid)) {
- op_errno = EINVAL;
- goto out;
- }
- return 0;
- }
- }
+int32_t
+afr_fgetxattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+ int unwind = 1;
+ int32_t *last_index = NULL;
+ int32_t next_call_child = -1;
+ int32_t read_child = -1;
+ int32_t *fresh_children = NULL;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+ read_child = (long) cookie;
+
+ if (op_ret == -1) {
+ last_index = &local->cont.getxattr.last_index;
+ fresh_children = local->fresh_children;
+ next_call_child = afr_next_call_child (fresh_children,
+ local->child_up,
+ priv->child_count,
+ last_index, read_child);
+ if (next_call_child < 0)
+ goto out;
+
+ unwind = 0;
+ STACK_WIND_COOKIE (frame, afr_fgetxattr_cbk,
+ (void *) (long) read_child,
+ children[next_call_child],
+ children[next_call_child]->fops->fgetxattr,
+ local->fd,
+ local->cont.getxattr.name,
+ NULL);
}
- read_child = afr_read_child (this, loc->inode);
+out:
+ if (unwind) {
+ if (op_ret >= 0 && dict)
+ __filter_xattrs (dict);
- if ((read_child >= 0) && (priv->child_up[read_child])) {
- call_child = read_child;
+ AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, dict,
+ xdata);
+ }
- local->cont.getxattr.last_tried = -1;
- } else {
- call_child = afr_first_up_child (priv);
+ return 0;
+}
- if (call_child == -1) {
- op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_DEBUG,
- "no child is up");
- goto out;
- }
+int32_t
+afr_fgetxattr_unwind (call_frame_t *frame,
+ int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
+
+{
+ AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
- local->cont.getxattr.last_tried = call_child;
+static void
+afr_fgetxattr_frm_all_children (xlator_t *this, call_frame_t *frame,
+ const char *name, fd_t *fd,
+ fop_fgetxattr_cbk_t cbk)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ xlator_t **children = NULL;
+ int i = 0;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+ local->call_count = priv->child_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ STACK_WIND_COOKIE (frame, cbk,
+ (void *) (long) i,
+ children[i], children[i]->fops->fgetxattr,
+ fd, name, NULL);
}
+ return;
+}
- STACK_WIND_COOKIE (frame, afr_getxattr_cbk,
+int32_t
+afr_fgetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ int call_child = 0;
+ afr_local_t *local = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int32_t read_child = -1;
+ fop_fgetxattr_cbk_t cbk = NULL;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO (priv->children, out);
+
+ children = priv->children;
+
+ AFR_SBRAIN_CHECK_FD (fd, out);
+
+ AFR_LOCAL_ALLOC_OR_GOTO (local, out);
+ frame->local = local;
+
+ op_ret = afr_local_init (local, priv, &op_errno);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
+
+ local->fd = fd_ref (fd);
+ if (name)
+ local->cont.getxattr.name = gf_strdup (name);
+
+ /* pathinfo gets handled only in getxattr(), but we need to handle
+ * lockinfo.
+ * If we are doing fgetxattr with lockinfo as the key then we
+ * collect information from all children.
+ */
+ if (afr_is_special_xattr (name, &cbk, 1)) {
+ afr_fgetxattr_frm_all_children (this, frame, name,
+ fd, cbk);
+ return 0;
+ }
+
+
+ local->fresh_children = afr_children_create (priv->child_count);
+ if (!local->fresh_children) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ read_child = afr_inode_get_read_ctx (this, fd->inode,
+ local->fresh_children);
+ op_ret = afr_get_call_child (this, local->child_up, read_child,
+ local->fresh_children,
+ &call_child,
+ &local->cont.getxattr.last_index);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ op_ret = -1;
+ goto out;
+ }
+
+ STACK_WIND_COOKIE (frame, afr_fgetxattr_cbk,
(void *) (long) call_child,
- children[call_child], children[call_child]->fops->getxattr,
- loc, name);
+ children[call_child],
+ children[call_child]->fops->fgetxattr,
+ fd, name, xdata);
op_ret = 0;
out:
if (op_ret == -1) {
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, NULL);
+ AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, NULL,
+ NULL);
}
return 0;
}
@@ -818,16 +1813,16 @@ int32_t
afr_readv_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
struct iovec *vector, int32_t count, struct iatt *buf,
- struct iobref *iobref)
+ struct iobref *iobref, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
-
- int unwind = 1;
- int last_tried = -1;
- int this_try = -1;
- int read_child = -1;
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+ int unwind = 1;
+ int32_t *last_index = NULL;
+ int32_t next_call_child = -1;
+ int32_t *fresh_children = NULL;
+ int32_t read_child = -1;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -843,39 +1838,31 @@ afr_readv_cbk (call_frame_t *frame, void *cookie,
read_child = (long) cookie;
if (op_ret == -1) {
- retry:
- last_tried = local->cont.readv.last_tried;
-
- if (all_tried (last_tried, priv->child_count)) {
+ last_index = &local->cont.readv.last_index;
+ fresh_children = local->fresh_children;
+ next_call_child = afr_next_call_child (fresh_children,
+ local->child_up,
+ priv->child_count,
+ last_index, read_child);
+ if (next_call_child < 0)
goto out;
- }
- this_try = ++local->cont.readv.last_tried;
-
- if (this_try == read_child) {
- /*
- skip the read child since if we are here
- we must have already tried that child
- */
- goto retry;
- }
unwind = 0;
STACK_WIND_COOKIE (frame, afr_readv_cbk,
(void *) (long) read_child,
- children[this_try],
- children[this_try]->fops->readv,
+ children[next_call_child],
+ children[next_call_child]->fops->readv,
local->fd, local->cont.readv.size,
- local->cont.readv.offset);
+ local->cont.readv.offset,
+ local->cont.readv.flags,
+ NULL);
}
out:
if (unwind) {
- if (buf && local)
- buf->ia_ino = local->cont.readv.ino;
-
AFR_STACK_UNWIND (readv, frame, op_ret, op_errno,
- vector, count, buf, iobref);
+ vector, count, buf, iobref, xdata);
}
return 0;
@@ -884,17 +1871,15 @@ out:
int32_t
afr_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset)
+ fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
xlator_t ** children = NULL;
-
- int32_t read_child = -1;
int call_child = 0;
-
- int32_t op_ret = -1;
int32_t op_errno = 0;
+ int32_t read_child = -1;
+ int ret = -1;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -904,50 +1889,50 @@ afr_readv (call_frame_t *frame, xlator_t *this,
priv = this->private;
children = priv->children;
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- frame->local = local;
+ AFR_SBRAIN_CHECK_FD (fd, out);
- read_child = afr_read_child (this, fd->inode);
-
- if ((read_child >= 0) && (priv->child_up[read_child])) {
- call_child = read_child;
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- /*
- if read fails from the read child, we try
- all children starting with the first one
- */
- local->cont.readv.last_tried = -1;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- } else {
- call_child = afr_first_up_child (priv);
- if (call_child == -1) {
- op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_DEBUG,
- "no child is up");
- goto out;
- }
+ local->fresh_children = afr_children_create (priv->child_count);
+ if (!local->fresh_children) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- local->cont.readv.last_tried = call_child;
+ read_child = afr_inode_get_read_ctx (this, fd->inode, local->fresh_children);
+ ret = afr_get_call_child (this, local->child_up, read_child,
+ local->fresh_children,
+ &call_child,
+ &local->cont.readv.last_index);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
}
local->fd = fd_ref (fd);
- local->cont.readv.ino = fd->inode->ino;
local->cont.readv.size = size;
local->cont.readv.offset = offset;
+ local->cont.readv.flags = flags;
+
+ afr_open_fd_fix (fd, this);
STACK_WIND_COOKIE (frame, afr_readv_cbk,
(void *) (long) call_child,
children[call_child],
children[call_child]->fops->readv,
- fd, size, offset);
+ fd, size, offset, flags, xdata);
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (readv, frame, op_ret, op_errno, NULL, 0, NULL,
- NULL);
+ if (ret < 0) {
+ AFR_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL,
+ NULL, NULL);
}
return 0;
}
diff --git a/xlators/cluster/afr/src/afr-inode-read.h b/xlators/cluster/afr/src/afr-inode-read.h
index acc814fb7..e4091a793 100644
--- a/xlators/cluster/afr/src/afr-inode-read.h
+++ b/xlators/cluster/afr/src/afr-inode-read.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __INODE_READ_H__
@@ -22,26 +13,30 @@
int32_t
afr_access (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t mask);
+ loc_t *loc, int32_t mask, dict_t *xdata);
int32_t
afr_stat (call_frame_t *frame, xlator_t *this,
- loc_t *loc);
+ loc_t *loc, dict_t *xdata);
int32_t
afr_fstat (call_frame_t *frame, xlator_t *this,
- fd_t *fd);
+ fd_t *fd, dict_t *xdata);
int32_t
afr_readlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, size_t size);
+ loc_t *loc, size_t size, dict_t *xdata);
int32_t
afr_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset);
+ fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata);
int32_t
afr_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name);
+ loc_t *loc, const char *name, dict_t *xdata);
+
+int32_t
+afr_fgetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata);
#endif /* __INODE_READ_H__ */
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
index 669a51d75..c1ec69a55 100644
--- a/xlators/cluster/afr/src/afr-inode-write.c
+++ b/xlators/cluster/afr/src/afr-inode-write.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -46,52 +37,155 @@
#include "afr.h"
#include "afr-transaction.h"
+#include "afr-self-heal-common.h"
+
+void
+__inode_write_fop_cbk (call_frame_t *frame, int child_index, int read_child,
+ xlator_t *this, int32_t *op_ret, int32_t *op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (afr_fop_failed (*op_ret, *op_errno)) {
+ local->child_errno[child_index] = *op_errno;
+
+ switch (local->op) {
+ case GF_FOP_TRUNCATE:
+ case GF_FOP_FTRUNCATE:
+ if (*op_errno != EFBIG)
+ afr_transaction_fop_failed (frame, this,
+ child_index);
+ break;
+ default:
+ afr_transaction_fop_failed (frame, this, child_index);
+ break;
+ }
+ local->op_errno = *op_errno;
+ goto out;
+ }
+
+ if ((local->success_count == 0) || (read_child == child_index)) {
+ local->op_ret = *op_ret;
+ if (prebuf)
+ local->cont.inode_wfop.prebuf = *prebuf;
+ if (postbuf)
+ local->cont.inode_wfop.postbuf = *postbuf;
+ }
+
+ local->success_count++;
+out:
+ return;
+}
/* {{{ writev */
-int
+void
+afr_writev_copy_outvars (call_frame_t *src_frame, call_frame_t *dst_frame)
+{
+ afr_local_t *src_local = NULL;
+ afr_local_t *dst_local = NULL;
+
+ src_local = src_frame->local;
+ dst_local = dst_frame->local;
+
+ dst_local->op_ret = src_local->op_ret;
+ dst_local->op_errno = src_local->op_errno;
+ dst_local->cont.inode_wfop.prebuf = src_local->cont.inode_wfop.prebuf;
+ dst_local->cont.inode_wfop.postbuf = src_local->cont.inode_wfop.postbuf;
+}
+
+void
afr_writev_unwind (call_frame_t *frame, xlator_t *this)
{
afr_local_t * local = NULL;
- call_frame_t *main_frame = NULL;
+ local = frame->local;
+
+ AFR_STACK_UNWIND (writev, frame,
+ local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ NULL);
+}
+
+call_frame_t*
+afr_transaction_detach_fop_frame (call_frame_t *frame)
+{
+ afr_local_t * local = NULL;
+ call_frame_t *fop_frame = NULL;
local = frame->local;
LOCK (&frame->lock);
{
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
+ fop_frame = local->transaction.main_frame;
local->transaction.main_frame = NULL;
}
UNLOCK (&frame->lock);
- if (main_frame) {
- local->cont.writev.prebuf.ia_ino = local->cont.writev.ino;
- local->cont.writev.postbuf.ia_ino = local->cont.writev.ino;
+ return fop_frame;
+}
- AFR_STACK_UNWIND (writev, main_frame,
- local->op_ret, local->op_errno,
- &local->cont.writev.prebuf,
- &local->cont.writev.postbuf);
+int
+afr_transaction_writev_unwind (call_frame_t *frame, xlator_t *this)
+{
+ call_frame_t *fop_frame = NULL;
+
+ fop_frame = afr_transaction_detach_fop_frame (frame);
+
+ if (fop_frame) {
+ afr_writev_copy_outvars (frame, fop_frame);
+ afr_writev_unwind (fop_frame, this);
}
return 0;
}
+static void
+afr_writev_handle_short_writes (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+ /*
+ * We already have the best case result of the writev calls staged
+ * as the return value. Any writev that returns some value less
+ * than the best case is now out of sync, so mark the fop as
+ * failed. Note that fops that have returned with errors have
+ * already been marked as failed.
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if ((!local->replies[i].valid) ||
+ (local->replies[i].op_ret == -1))
+ continue;
+
+ if (local->replies[i].op_ret < local->op_ret)
+ afr_transaction_fop_failed(frame, this, i);
+ }
+}
int
afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
afr_local_t * local = NULL;
-
+ afr_private_t *priv = NULL;
+ call_frame_t *fop_frame = NULL;
int child_index = (long) cookie;
int call_count = -1;
int read_child = 0;
+ int ret = 0;
+ uint32_t open_fd_count = 0;
+ uint32_t write_is_append = 0;
local = frame->local;
+ priv = this->private;
- read_child = afr_read_child (this, local->fd->inode);
+ read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
LOCK (&frame->lock);
{
@@ -99,50 +193,100 @@ afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->read_child_returned = _gf_true;
}
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.writev.prebuf = *prebuf;
- local->cont.writev.postbuf = *postbuf;
- }
-
- if (child_index == read_child) {
- local->cont.writev.prebuf = *prebuf;
- local->cont.writev.postbuf = *postbuf;
+ __inode_write_fop_cbk (frame, child_index, read_child, this,
+ &op_ret, &op_errno, prebuf, postbuf,
+ xdata);
+
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+
+
+ /* stage the best case return value for unwind */
+ if ((local->success_count == 0) || (op_ret > local->op_ret)) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ }
+
+ if (op_ret != -1) {
+ if (xdata) {
+ ret = dict_get_uint32 (xdata,
+ GLUSTERFS_OPEN_FD_COUNT,
+ &open_fd_count);
+ if ((ret == 0) &&
+ (open_fd_count > local->open_fd_count)) {
+ local->open_fd_count = open_fd_count;
+ local->update_open_fd_count = _gf_true;
+ }
+
+ write_is_append = 0;
+ ret = dict_get_uint32 (xdata,
+ GLUSTERFS_WRITE_IS_APPEND,
+ &write_is_append);
+ if (ret || !write_is_append)
+ local->append_write = _gf_false;
}
- }
- local->op_errno = op_errno;
+ }
}
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
if (call_count == 0) {
- local->transaction.unwind (frame, this);
- local->transaction.resume (frame, this);
+ if (local->update_open_fd_count)
+ afr_handle_open_fd_count (frame, this);
+
+ if (!local->stable_write && !local->append_write)
+ /* An appended write removes the necessity to
+ fsync() the file. This is because self-heal
+ has the logic to check for larger file when
+ the xattrs are not reliably pointing at
+ a stale file.
+ */
+ afr_fd_report_unstable_write (this, local->fd);
+
+ afr_writev_handle_short_writes (frame, this);
+ if (afr_any_fops_failed (local, priv)) {
+ //Don't unwind until post-op is complete
+ local->transaction.resume (frame, this);
+ } else {
+ /*
+ * Generally inode-write fops do transaction.unwind then
+ * transaction.resume, but writev needs to make sure that
+ * delayed post-op frame is placed in fdctx before unwind
+ * happens. This prevents the race of flush doing the
+ * changelog wakeup first in fuse thread and then this
+ * writev placing its delayed post-op frame in fdctx.
+ * This helps flush make sure all the delayed post-ops are
+ * completed.
+ */
+
+ fop_frame = afr_transaction_detach_fop_frame (frame);
+ afr_writev_copy_outvars (frame, fop_frame);
+ local->transaction.resume (frame, this);
+ afr_writev_unwind (fop_frame, this);
+ }
}
return 0;
}
-
int
afr_writev_wind (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
-
int i = 0;
int call_count = -1;
+ dict_t *xdata = NULL;
+ GF_UNUSED int ret = 0;
local = frame->local;
priv = this->private;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -150,9 +294,31 @@ afr_writev_wind (call_frame_t *frame, xlator_t *this)
}
local->call_count = call_count;
+ local->replies = GF_CALLOC(priv->child_count, sizeof(*local->replies),
+ gf_afr_mt_reply_t);
+ if (!local->replies) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ local->transaction.unwind(frame, this);
+ local->transaction.resume(frame, this);
+ return 0;
+ }
+
+ xdata = dict_new ();
+ if (xdata) {
+ ret = dict_set_uint32 (xdata, GLUSTERFS_OPEN_FD_COUNT,
+ sizeof (uint32_t));
+ ret = dict_set_uint32 (xdata, GLUSTERFS_WRITE_IS_APPEND,
+ 0);
+ /* Set append_write to be true speculatively. If on any
+ server it turns not be true, we unset it in the
+ callback.
+ */
+ local->append_write = _gf_true;
+ }
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
+ if (local->transaction.pre_op[i]) {
STACK_WIND_COOKIE (frame, afr_writev_wind_cbk,
(void *) (long) i,
priv->children[i],
@@ -161,13 +327,18 @@ afr_writev_wind (call_frame_t *frame, xlator_t *this)
local->cont.writev.vector,
local->cont.writev.count,
local->cont.writev.offset,
- local->cont.writev.iobref);
+ local->cont.writev.flags,
+ local->cont.writev.iobref,
+ xdata);
if (!--call_count)
break;
}
}
+ if (xdata)
+ dict_unref (xdata);
+
return 0;
}
@@ -193,24 +364,21 @@ afr_writev_done (call_frame_t *frame, xlator_t *this)
int
afr_do_writev (call_frame_t *frame, xlator_t *this)
{
- call_frame_t * transaction_frame = NULL;
- afr_local_t * local = NULL;
-
- int op_ret = -1;
- int op_errno = 0;
+ call_frame_t *transaction_frame = NULL;
+ afr_local_t *local = NULL;
+ int op_ret = -1;
+ int op_errno = 0;
local = frame->local;
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
op_errno = ENOMEM;
goto out;
}
transaction_frame->local = local;
- frame->local = NULL;
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
local->op = GF_FOP_WRITE;
@@ -218,10 +386,17 @@ afr_do_writev (call_frame_t *frame, xlator_t *this)
local->transaction.fop = afr_writev_wind;
local->transaction.done = afr_writev_done;
- local->transaction.unwind = afr_writev_unwind;
+ local->transaction.unwind = afr_transaction_writev_unwind;
local->transaction.main_frame = frame;
if (local->fd->flags & O_APPEND) {
+ /*
+ * Backend vfs ignores the 'offset' for append mode fd so
+ * locking just the region provided for the writev does not
+ * give consistency gurantee. The actual write may happen at a
+ * completely different range than the one provided by the
+ * offset, len in the fop. So lock the entire file.
+ */
local->transaction.start = 0;
local->transaction.len = 0;
} else {
@@ -230,79 +405,179 @@ afr_do_writev (call_frame_t *frame, xlator_t *this)
local->cont.writev.count);
}
- afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ op_ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
op_ret = 0;
out:
- if (op_ret == -1) {
+ if (op_ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (writev, frame, op_ret, op_errno, NULL, NULL);
+ AFR_STACK_UNWIND (writev, frame, op_ret, op_errno, NULL, NULL, NULL);
}
return 0;
}
+static void
+afr_trigger_open_fd_self_heal (fd_t *fd, xlator_t *this)
+{
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ char *reason = NULL;
+ int32_t op_errno = 0;
+ int ret = 0;
+
+ if (!fd || !fd->inode || uuid_is_null (fd->inode->gfid)) {
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "Invalid args: "
+ "fd: %p, inode: %p", fd,
+ fd ? fd->inode : NULL);
+ goto out;
+ }
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ goto out;
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+ ret = afr_local_init (local, this->private, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ local->loc.inode = inode_ref (fd->inode);
+ ret = loc_path (&local->loc, NULL);
+ if (ret < 0)
+ goto out;
+
+ sh = &local->self_heal;
+ sh->do_metadata_self_heal = _gf_true;
+ if (fd->inode->ia_type == IA_IFREG)
+ sh->do_data_self_heal = _gf_true;
+ else if (fd->inode->ia_type == IA_IFDIR)
+ sh->do_entry_self_heal = _gf_true;
+
+ reason = "subvolume came online";
+ afr_launch_self_heal (frame, this, fd->inode, _gf_true,
+ fd->inode->ia_type, reason, NULL, NULL);
+ return;
+out:
+ AFR_STACK_DESTROY (frame);
+}
+
+void
+afr_open_fd_fix (fd_t *fd, xlator_t *this)
+{
+ int ret = 0;
+ int i = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ gf_boolean_t need_self_heal = _gf_false;
+ int *need_open = NULL;
+ size_t need_open_count = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ if (!afr_is_fd_fixable (fd))
+ goto out;
+
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx)
+ goto out;
+
+ LOCK (&fd->lock);
+ {
+ if (fd_ctx->up_count < priv->up_count) {
+ need_self_heal = _gf_true;
+ fd_ctx->up_count = priv->up_count;
+ fd_ctx->down_count = priv->down_count;
+ }
+
+ need_open = alloca (priv->child_count * sizeof (*need_open));
+ for (i = 0; i < priv->child_count; i++) {
+ need_open[i] = 0;
+ if (fd_ctx->opened_on[i] != AFR_FD_NOT_OPENED)
+ continue;
+
+ if (!priv->child_up[i])
+ continue;
+
+ fd_ctx->opened_on[i] = AFR_FD_OPENING;
+
+ need_open[i] = 1;
+ need_open_count++;
+ }
+ }
+ UNLOCK (&fd->lock);
+ if (ret)
+ goto out;
+
+ if (need_self_heal)
+ afr_trigger_open_fd_self_heal (fd, this);
+
+ if (!need_open_count)
+ goto out;
+
+ afr_fix_open (this, fd, need_open_count, need_open);
+out:
+ return;
+}
int
afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iovec *vector, int32_t count, off_t offset,
- struct iobref *iobref)
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
-
int ret = -1;
-
- int op_ret = -1;
int op_errno = 0;
- uint64_t ctx;
- afr_fd_ctx_t *fd_ctx = NULL;
-
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
VALIDATE_OR_GOTO (this->private, out);
priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
goto out;
}
- frame->local = local;
+ QUORUM_CHECK(writev,out);
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
local->cont.writev.vector = iov_dup (vector, count);
local->cont.writev.count = count;
local->cont.writev.offset = offset;
- local->cont.writev.ino = fd->inode->ino;
+ local->cont.writev.flags = flags;
local->cont.writev.iobref = iobref_ref (iobref);
local->fd = fd_ref (fd);
- ret = fd_ctx_get (fd, this, &ctx);
- if (ret < 0) {
- goto out;
- }
+ /* detect here, but set it in writev_wind_cbk *after* the unstable
+ write is performed
+ */
+ local->stable_write = !!((fd->flags|flags)&(O_SYNC|O_DSYNC));
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ afr_open_fd_fix (fd, this);
- if (fd_ctx->up_count < priv->up_count) {
- local->openfd_flush_cbk = afr_do_writev;
- afr_openfd_flush (frame, this, fd);
- } else {
- afr_do_writev (frame, this);
- }
+ afr_do_writev (frame, this);
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (writev, frame, op_ret, op_errno, NULL, NULL);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -329,13 +604,11 @@ afr_truncate_unwind (call_frame_t *frame, xlator_t *this)
UNLOCK (&frame->lock);
if (main_frame) {
- local->cont.truncate.prebuf.ia_ino = local->cont.truncate.ino;
- local->cont.truncate.postbuf.ia_ino = local->cont.truncate.ino;
-
AFR_STACK_UNWIND (truncate, main_frame, local->op_ret,
local->op_errno,
- &local->cont.truncate.prebuf,
- &local->cont.truncate.postbuf);
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ NULL);
}
return 0;
@@ -345,20 +618,16 @@ afr_truncate_unwind (call_frame_t *frame, xlator_t *this)
int
afr_truncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
int child_index = (long) cookie;
int read_child = 0;
int call_count = -1;
- int need_unwind = 0;
local = frame->local;
- priv = this->private;
- read_child = afr_read_child (this, local->loc.inode);
+ read_child = afr_inode_get_read_ctx (this, local->loc.inode, NULL);
LOCK (&frame->lock);
{
@@ -366,38 +635,22 @@ afr_truncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->read_child_returned = _gf_true;
}
- if (afr_fop_failed (op_ret, op_errno) && op_errno != EFBIG)
- afr_transaction_fop_failed (frame, this, child_index);
-
if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.truncate.prebuf = *prebuf;
- local->cont.truncate.postbuf = *postbuf;
- }
-
- if (child_index == read_child) {
- local->cont.truncate.prebuf = *prebuf;
- local->cont.truncate.postbuf = *postbuf;
- }
-
- local->success_count++;
-
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
+ if (prebuf->ia_size != postbuf->ia_size)
+ local->stable_write = _gf_false;
}
- local->op_errno = op_errno;
+ __inode_write_fop_cbk (frame, child_index, read_child, this,
+ &op_ret, &op_errno, prebuf, postbuf,
+ xdata);
}
UNLOCK (&frame->lock);
- if (need_unwind)
- local->transaction.unwind (frame, this);
-
call_count = afr_frame_return (frame);
if (call_count == 0) {
+ if (local->stable_write && afr_txn_nothing_failed (frame, this))
+ local->transaction.unwind (frame, this);
+
local->transaction.resume (frame, this);
}
@@ -410,14 +663,14 @@ afr_truncate_wind (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
-
int call_count = -1;
int i = 0;
local = frame->local;
priv = this->private;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -425,15 +678,17 @@ afr_truncate_wind (call_frame_t *frame, xlator_t *this)
}
local->call_count = call_count;
+ local->stable_write = _gf_true;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
+ if (local->transaction.pre_op[i]) {
STACK_WIND_COOKIE (frame, afr_truncate_wind_cbk,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->truncate,
&local->loc,
- local->cont.truncate.offset);
+ local->cont.truncate.offset,
+ NULL);
if (!--call_count)
break;
@@ -461,15 +716,12 @@ afr_truncate_done (call_frame_t *frame, xlator_t *this)
int
afr_truncate (call_frame_t *frame, xlator_t *this,
- loc_t *loc, off_t offset)
+ loc_t *loc, off_t offset, dict_t *xdata)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
call_frame_t *transaction_frame = NULL;
-
int ret = -1;
-
- int op_ret = -1;
int op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
@@ -478,27 +730,22 @@ afr_truncate (call_frame_t *frame, xlator_t *this,
priv = this->private;
+ QUORUM_CHECK(truncate,out);
+
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
+ op_errno = ENOMEM;
goto out;
}
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
-
- transaction_frame->local = local;
-
- local->op_ret = -1;
local->cont.truncate.offset = offset;
- local->cont.truncate.ino = loc->inode->ino;
local->transaction.fop = afr_truncate_wind;
local->transaction.done = afr_truncate_done;
@@ -507,17 +754,21 @@ afr_truncate (call_frame_t *frame, xlator_t *this,
loc_copy (&local->loc, loc);
local->transaction.main_frame = frame;
- local->transaction.start = 0;
- local->transaction.len = offset;
+ local->transaction.start = offset;
+ local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
+ if (ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (truncate, frame, op_ret, op_errno, NULL, NULL);
+ AFR_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL, NULL);
}
return 0;
@@ -546,13 +797,11 @@ afr_ftruncate_unwind (call_frame_t *frame, xlator_t *this)
UNLOCK (&frame->lock);
if (main_frame) {
- local->cont.ftruncate.prebuf.ia_ino = local->cont.ftruncate.ino;
- local->cont.ftruncate.postbuf.ia_ino = local->cont.ftruncate.ino;
-
AFR_STACK_UNWIND (ftruncate, main_frame, local->op_ret,
local->op_errno,
- &local->cont.ftruncate.prebuf,
- &local->cont.ftruncate.postbuf);
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ NULL);
}
return 0;
}
@@ -561,20 +810,16 @@ afr_ftruncate_unwind (call_frame_t *frame, xlator_t *this)
int
afr_ftruncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
int child_index = (long) cookie;
int call_count = -1;
- int need_unwind = 0;
int read_child = 0;
local = frame->local;
- priv = this->private;
- read_child = afr_read_child (this, local->fd->inode);
+ read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
LOCK (&frame->lock);
{
@@ -582,38 +827,22 @@ afr_ftruncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->read_child_returned = _gf_true;
}
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.ftruncate.prebuf = *prebuf;
- local->cont.ftruncate.postbuf = *postbuf;
- }
-
- if (child_index == read_child) {
- local->cont.ftruncate.prebuf = *prebuf;
- local->cont.ftruncate.postbuf = *postbuf;
- }
-
- local->success_count++;
-
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
+ if (prebuf->ia_size != postbuf->ia_size)
+ local->stable_write = _gf_false;
}
- local->op_errno = op_errno;
+ __inode_write_fop_cbk (frame, child_index, read_child, this,
+ &op_ret, &op_errno, prebuf, postbuf,
+ xdata);
}
UNLOCK (&frame->lock);
- if (need_unwind)
- local->transaction.unwind (frame, this);
-
call_count = afr_frame_return (frame);
if (call_count == 0) {
+ if (local->stable_write && afr_txn_nothing_failed (frame, this))
+ local->transaction.unwind (frame, this);
+
local->transaction.resume (frame, this);
}
@@ -626,14 +855,14 @@ afr_ftruncate_wind (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
-
int call_count = -1;
int i = 0;
local = frame->local;
priv = this->private;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -641,14 +870,17 @@ afr_ftruncate_wind (call_frame_t *frame, xlator_t *this)
}
local->call_count = call_count;
+ local->stable_write = _gf_true;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
+ if (local->transaction.pre_op[i]) {
STACK_WIND_COOKIE (frame, afr_ftruncate_wind_cbk,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->ftruncate,
- local->fd, local->cont.ftruncate.offset);
+ local->fd,
+ local->cont.ftruncate.offset,
+ NULL);
if (!--call_count)
break;
@@ -679,7 +911,6 @@ afr_do_ftruncate (call_frame_t *frame, xlator_t *this)
{
call_frame_t * transaction_frame = NULL;
afr_local_t * local = NULL;
-
int op_ret = -1;
int op_errno = 0;
@@ -687,8 +918,6 @@ afr_do_ftruncate (call_frame_t *frame, xlator_t *this)
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
goto out;
}
@@ -703,17 +932,22 @@ afr_do_ftruncate (call_frame_t *frame, xlator_t *this)
local->transaction.main_frame = frame;
- local->transaction.start = 0;
- local->transaction.len = local->cont.ftruncate.offset;
+ local->transaction.start = local->cont.ftruncate.offset;
+ local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ op_ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
op_ret = 0;
out:
- if (op_ret == -1) {
+ if (op_ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, NULL, NULL);
+ AFR_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, NULL,
+ NULL, NULL);
}
return 0;
@@ -722,61 +956,47 @@ out:
int
afr_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset)
+ fd_t *fd, off_t offset, dict_t *xdata)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
call_frame_t *transaction_frame = NULL;
-
int ret = -1;
-
- int op_ret = -1;
int op_errno = 0;
- uint64_t ctx;
- afr_fd_ctx_t *fd_ctx = NULL;
-
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
VALIDATE_OR_GOTO (this->private, out);
priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
goto out;
}
+ QUORUM_CHECK(ftruncate,out);
- frame->local = local;
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
local->cont.ftruncate.offset = offset;
- local->cont.ftruncate.ino = fd->inode->ino;
local->fd = fd_ref (fd);
- ret = fd_ctx_get (fd, this, &ctx);
- if (ret < 0) {
- goto out;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ afr_open_fd_fix (fd, this);
- if (fd_ctx->up_count < priv->up_count) {
- local->openfd_flush_cbk = afr_do_ftruncate;
- afr_openfd_flush (frame, this, fd);
- } else {
- afr_do_ftruncate (frame, this);
- }
+ afr_do_ftruncate (frame, this);
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
+ if (ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, NULL, NULL);
+ AFR_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
}
return 0;
@@ -803,13 +1023,11 @@ afr_setattr_unwind (call_frame_t *frame, xlator_t *this)
UNLOCK (&frame->lock);
if (main_frame) {
- local->cont.setattr.preop_buf.ia_ino = local->cont.setattr.ino;
- local->cont.setattr.postop_buf.ia_ino = local->cont.setattr.ino;
-
AFR_STACK_UNWIND (setattr, main_frame, local->op_ret,
local->op_errno,
- &local->cont.setattr.preop_buf,
- &local->cont.setattr.postop_buf);
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ NULL);
}
return 0;
@@ -819,11 +1037,10 @@ afr_setattr_unwind (call_frame_t *frame, xlator_t *this)
int
afr_setattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
afr_local_t * local = NULL;
afr_private_t * priv = NULL;
-
int child_index = (long) cookie;
int read_child = 0;
int call_count = -1;
@@ -832,7 +1049,7 @@ afr_setattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
priv = this->private;
- read_child = afr_read_child (this, local->loc.inode);
+ read_child = afr_inode_get_read_ctx (this, local->loc.inode, NULL);
LOCK (&frame->lock);
{
@@ -840,29 +1057,14 @@ afr_setattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->read_child_returned = _gf_true;
}
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.setattr.preop_buf = *preop;
- local->cont.setattr.postop_buf = *postop;
- }
-
- if (child_index == read_child) {
- local->cont.setattr.preop_buf = *preop;
- local->cont.setattr.postop_buf = *postop;
- }
+ __inode_write_fop_cbk (frame, child_index, read_child, this,
+ &op_ret, &op_errno, preop, postop,
+ xdata);
- local->success_count++;
-
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
+ if ((local->success_count >= priv->wait_count)
+ && local->read_child_returned) {
+ need_unwind = 1;
}
- local->op_errno = op_errno;
}
UNLOCK (&frame->lock);
@@ -884,14 +1086,14 @@ afr_setattr_wind (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
-
int call_count = -1;
int i = 0;
local = frame->local;
priv = this->private;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -901,14 +1103,15 @@ afr_setattr_wind (call_frame_t *frame, xlator_t *this)
local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
+ if (local->transaction.pre_op[i]) {
STACK_WIND_COOKIE (frame, afr_setattr_wind_cbk,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->setattr,
&local->loc,
&local->cont.setattr.in_buf,
- local->cont.setattr.valid);
+ local->cont.setattr.valid,
+ NULL);
if (!--call_count)
break;
@@ -936,15 +1139,12 @@ afr_setattr_done (call_frame_t *frame, xlator_t *this)
int
afr_setattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, struct iatt *buf, int32_t valid)
+ loc_t *loc, struct iatt *buf, int32_t valid, dict_t *xdata)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
call_frame_t *transaction_frame = NULL;
-
int ret = -1;
-
- int op_ret = -1;
int op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
@@ -953,26 +1153,20 @@ afr_setattr (call_frame_t *frame, xlator_t *this,
priv = this->private;
+ QUORUM_CHECK(setattr,out);
+
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
+ op_errno = ENOMEM;
goto out;
}
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
-
- transaction_frame->local = local;
-
- local->op_ret = -1;
-
- local->cont.setattr.ino = loc->inode->ino;
local->cont.setattr.in_buf = *buf;
local->cont.setattr.valid = valid;
@@ -987,14 +1181,18 @@ afr_setattr (call_frame_t *frame, xlator_t *this,
local->transaction.start = LLONG_MAX - 1;
local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
+ if (ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (setattr, frame, op_ret, op_errno, NULL, NULL);
+ AFR_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL, NULL);
}
return 0;
@@ -1019,15 +1217,11 @@ afr_fsetattr_unwind (call_frame_t *frame, xlator_t *this)
UNLOCK (&frame->lock);
if (main_frame) {
- local->cont.fsetattr.preop_buf.ia_ino =
- local->cont.fsetattr.ino;
- local->cont.fsetattr.postop_buf.ia_ino =
- local->cont.fsetattr.ino;
-
AFR_STACK_UNWIND (fsetattr, main_frame, local->op_ret,
local->op_errno,
- &local->cont.fsetattr.preop_buf,
- &local->cont.fsetattr.postop_buf);
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ NULL);
}
return 0;
@@ -1037,11 +1231,10 @@ afr_fsetattr_unwind (call_frame_t *frame, xlator_t *this)
int
afr_fsetattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
afr_local_t * local = NULL;
afr_private_t * priv = NULL;
-
int child_index = (long) cookie;
int read_child = 0;
int call_count = -1;
@@ -1050,7 +1243,7 @@ afr_fsetattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
priv = this->private;
- read_child = afr_read_child (this, local->fd->inode);
+ read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
LOCK (&frame->lock);
{
@@ -1058,29 +1251,14 @@ afr_fsetattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->read_child_returned = _gf_true;
}
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.fsetattr.preop_buf = *preop;
- local->cont.fsetattr.postop_buf = *postop;
- }
-
- if (child_index == read_child) {
- local->cont.fsetattr.preop_buf = *preop;
- local->cont.fsetattr.postop_buf = *postop;
- }
+ __inode_write_fop_cbk (frame, child_index, read_child, this,
+ &op_ret, &op_errno, preop, postop,
+ xdata);
- local->success_count++;
-
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
+ if ((local->success_count >= priv->wait_count)
+ && local->read_child_returned) {
+ need_unwind = 1;
}
- local->op_errno = op_errno;
}
UNLOCK (&frame->lock);
@@ -1102,14 +1280,14 @@ afr_fsetattr_wind (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
-
int call_count = -1;
int i = 0;
local = frame->local;
priv = this->private;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -1119,14 +1297,15 @@ afr_fsetattr_wind (call_frame_t *frame, xlator_t *this)
local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
+ if (local->transaction.pre_op[i]) {
STACK_WIND_COOKIE (frame, afr_fsetattr_wind_cbk,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->fsetattr,
local->fd,
&local->cont.fsetattr.in_buf,
- local->cont.fsetattr.valid);
+ local->cont.fsetattr.valid,
+ NULL);
if (!--call_count)
break;
@@ -1151,18 +1330,14 @@ afr_fsetattr_done (call_frame_t *frame, xlator_t *this)
return 0;
}
-
int
afr_fsetattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, struct iatt *buf, int32_t valid)
+ fd_t *fd, struct iatt *buf, int32_t valid, dict_t *xdata)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
call_frame_t *transaction_frame = NULL;
-
int ret = -1;
-
- int op_ret = -1;
int op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
@@ -1171,26 +1346,25 @@ afr_fsetattr (call_frame_t *frame, xlator_t *this,
priv = this->private;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
goto out;
}
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ QUORUM_CHECK(fsetattr,out);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ op_errno = ENOMEM;
goto out;
}
- transaction_frame->local = local;
-
- local->op_ret = -1;
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
- local->cont.fsetattr.ino = fd->inode->ino;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
local->cont.fsetattr.in_buf = *buf;
local->cont.fsetattr.valid = valid;
@@ -1201,18 +1375,24 @@ afr_fsetattr (call_frame_t *frame, xlator_t *this,
local->fd = fd_ref (fd);
+ afr_open_fd_fix (fd, this);
+
local->transaction.main_frame = frame;
local->transaction.start = LLONG_MAX - 1;
local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
+ if (ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (fsetattr, frame, op_ret, op_errno, NULL, NULL);
+ AFR_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL, NULL);
}
return 0;
@@ -1240,39 +1420,34 @@ afr_setxattr_unwind (call_frame_t *frame, xlator_t *this)
if (main_frame) {
AFR_STACK_UNWIND (setxattr, main_frame,
- local->op_ret, local->op_errno)
- }
+ local->op_ret, local->op_errno,
+ NULL);
+ }
return 0;
}
int
afr_setxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int call_count = -1;
- int need_unwind = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int need_unwind = 0;
+ int child_index = (long) cookie;
local = frame->local;
priv = this->private;
LOCK (&frame->lock);
{
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- }
- local->success_count++;
-
- if (local->success_count == priv->child_count) {
- need_unwind = 1;
- }
+ __inode_write_fop_cbk (frame, child_index, -1, this,
+ &op_ret, &op_errno, NULL, NULL,
+ xdata);
+ if (local->success_count == priv->child_count) {
+ need_unwind = 1;
}
-
- local->op_errno = op_errno;
}
UNLOCK (&frame->lock);
@@ -1292,16 +1467,16 @@ afr_setxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
afr_setxattr_wind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int call_count = -1;
- int i = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
local = frame->local;
priv = this->private;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -1311,14 +1486,15 @@ afr_setxattr_wind (call_frame_t *frame, xlator_t *this)
local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
+ if (local->transaction.pre_op[i]) {
STACK_WIND_COOKIE (frame, afr_setxattr_wind_cbk,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->setxattr,
&local->loc,
local->cont.setxattr.dict,
- local->cont.setxattr.flags);
+ local->cont.setxattr.flags,
+ NULL);
if (!--call_count)
break;
@@ -1332,7 +1508,7 @@ afr_setxattr_wind (call_frame_t *frame, xlator_t *this)
int
afr_setxattr_done (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = frame->local;
+ afr_local_t *local = frame->local;
local->transaction.unwind (frame, this);
@@ -1343,41 +1519,40 @@ afr_setxattr_done (call_frame_t *frame, xlator_t *this)
int
afr_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *dict, int32_t flags)
+ loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = EINVAL;
- int ret = -1;
+ VALIDATE_OR_GOTO (this, out);
- int op_ret = -1;
- int op_errno = 0;
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.afr.*", dict,
+ op_errno, out);
+
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.afr.*", dict,
+ op_errno, out);
VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
VALIDATE_OR_GOTO (this->private, out);
priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
+ QUORUM_CHECK(setxattr,out);
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
+ op_errno = ENOMEM;
goto out;
}
- transaction_frame->local = local;
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
- local->op_ret = -1;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
local->cont.setxattr.dict = dict_ref (dict);
local->cont.setxattr.flags = flags;
@@ -1392,14 +1567,211 @@ afr_setxattr (call_frame_t *frame, xlator_t *this,
local->transaction.start = LLONG_MAX - 1;
local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
+ if (ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (setxattr, frame, op_ret, op_errno);
+ AFR_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
+ }
+
+ return 0;
+}
+
+/* {{{ fsetxattr */
+
+
+int
+afr_fsetxattr_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (fsetxattr, main_frame,
+ local->op_ret, local->op_errno,
+ NULL);
+ }
+ return 0;
+}
+
+
+int
+afr_fsetxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int need_unwind = 0;
+ int child_index = (long) cookie;
+
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+
+ __inode_write_fop_cbk (frame, child_index, -1, this,
+ &op_ret, &op_errno, NULL, NULL,
+ xdata);
+ if (local->success_count == priv->child_count) {
+ need_unwind = 1;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (need_unwind)
+ local->transaction.unwind (frame, this);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+afr_fsetxattr_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_fsetxattr_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fsetxattr,
+ local->fd,
+ local->cont.fsetxattr.dict,
+ local->cont.fsetxattr.flags,
+ NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+int
+afr_fsetxattr_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = frame->local;
+
+ local->transaction.unwind (frame, this);
+
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+int
+afr_fsetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = EINVAL;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.afr.*", dict,
+ op_errno, out);
+
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.afr.*", dict,
+ op_errno, out);
+
+ priv = this->private;
+
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
+ goto out;
+ }
+
+ QUORUM_CHECK(fsetxattr,out);
+
+ AFR_LOCAL_ALLOC_OR_GOTO (local, out);
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ goto out;
+ }
+
+ transaction_frame->local = local;
+
+ local->op_ret = -1;
+
+ local->cont.fsetxattr.dict = dict_ref (dict);
+ local->cont.fsetxattr.flags = flags;
+
+ local->transaction.fop = afr_fsetxattr_wind;
+ local->transaction.done = afr_fsetxattr_done;
+ local->transaction.unwind = afr_fsetxattr_unwind;
+
+ local->fd = fd_ref (fd);
+
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
+
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL);
}
return 0;
@@ -1407,6 +1779,7 @@ out:
/* }}} */
+
/* {{{ removexattr */
@@ -1428,39 +1801,34 @@ afr_removexattr_unwind (call_frame_t *frame, xlator_t *this)
if (main_frame) {
AFR_STACK_UNWIND (removexattr, main_frame,
- local->op_ret, local->op_errno)
- }
+ local->op_ret, local->op_errno,
+ NULL);
+ }
return 0;
}
int
afr_removexattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int call_count = -1;
- int need_unwind = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int need_unwind = 0;
+ int child_index = (long) cookie;
local = frame->local;
priv = this->private;
LOCK (&frame->lock);
{
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- }
- local->success_count++;
-
- if (local->success_count == priv->wait_count) {
- need_unwind = 1;
- }
+ __inode_write_fop_cbk (frame, child_index, -1, this,
+ &op_ret, &op_errno, NULL, NULL,
+ xdata);
+ if (local->success_count == priv->wait_count) {
+ need_unwind = 1;
}
-
- local->op_errno = op_errno;
}
UNLOCK (&frame->lock);
@@ -1482,14 +1850,14 @@ afr_removexattr_wind (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
-
int call_count = -1;
int i = 0;
local = frame->local;
priv = this->private;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -1499,13 +1867,14 @@ afr_removexattr_wind (call_frame_t *frame, xlator_t *this)
local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
+ if (local->transaction.pre_op[i]) {
STACK_WIND_COOKIE (frame, afr_removexattr_wind_cbk,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->removexattr,
&local->loc,
- local->cont.removexattr.name);
+ local->cont.removexattr.name,
+ NULL);
if (!--call_count)
break;
@@ -1531,34 +1900,227 @@ afr_removexattr_done (call_frame_t *frame, xlator_t *this)
int
afr_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name)
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
+
+ VALIDATE_OR_GOTO (this, out);
+
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.afr.*",
+ name, op_errno, out);
+
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.afr.*",
+ name, op_errno, out);
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this->private, out);
+ VALIDATE_OR_GOTO (loc, out);
+
+ priv = this->private;
+
+ QUORUM_CHECK(removexattr,out);
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ local->cont.removexattr.name = gf_strdup (name);
+
+ local->transaction.fop = afr_removexattr_wind;
+ local->transaction.done = afr_removexattr_done;
+ local->transaction.unwind = afr_removexattr_unwind;
+
+ loc_copy (&local->loc, loc);
+
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
+
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
+ }
+
+ return 0;
+}
+
+/* ffremovexattr */
+int
+afr_fremovexattr_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (fremovexattr, main_frame,
+ local->op_ret, local->op_errno,
+ NULL);
+ }
+ return 0;
+}
+
+
+int
+afr_fremovexattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ int call_count = -1;
+ int need_unwind = 0;
+ int child_index = (long) cookie;
+
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ __inode_write_fop_cbk (frame, child_index, -1, this,
+ &op_ret, &op_errno, NULL, NULL,
+ xdata);
+
+ if (local->success_count == priv->wait_count) {
+ need_unwind = 1;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (need_unwind)
+ local->transaction.unwind (frame, this);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
+}
+
+
+int32_t
+afr_fremovexattr_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_fremovexattr_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fremovexattr,
+ local->fd,
+ local->cont.removexattr.name,
+ NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+int
+afr_fremovexattr_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = frame->local;
+
+ local->transaction.unwind (frame, this);
+
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+
+int
+afr_fremovexattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
call_frame_t *transaction_frame = NULL;
-
int ret = -1;
-
int op_ret = -1;
int op_errno = 0;
- VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
+
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.afr.*",
+ name, op_errno, out);
+
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.afr.*",
+ name, op_errno, out);
+
+ VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (loc, out);
priv = this->private;
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
+ goto out;
+ }
+
+ QUORUM_CHECK(fremovexattr, out);
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
goto out;
}
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (local, out);
- ret = AFR_LOCAL_INIT (local, priv);
+ ret = afr_local_init (local, priv, &op_errno);
if (ret < 0) {
op_errno = -ret;
goto out;
@@ -1570,25 +2132,730 @@ afr_removexattr (call_frame_t *frame, xlator_t *this,
local->cont.removexattr.name = gf_strdup (name);
- local->transaction.fop = afr_removexattr_wind;
- local->transaction.done = afr_removexattr_done;
- local->transaction.unwind = afr_removexattr_unwind;
+ local->transaction.fop = afr_fremovexattr_wind;
+ local->transaction.done = afr_fremovexattr_done;
+ local->transaction.unwind = afr_fremovexattr_unwind;
- loc_copy (&local->loc, loc);
+ local->fd = fd_ref (fd);
local->transaction.main_frame = frame;
local->transaction.start = LLONG_MAX - 1;
local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ op_ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
+
+ op_ret = 0;
+out:
+ if (op_ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, NULL);
+ }
+
+ return 0;
+}
+
+static int
+afr_fallocate_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (fallocate, main_frame, local->op_ret,
+ local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ NULL);
+ }
+ return 0;
+}
+
+static int
+afr_fallocate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ int child_index = (long) cookie;
+ int call_count = -1;
+ int need_unwind = 0;
+ int read_child = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
+
+ LOCK (&frame->lock);
+ {
+ if (child_index == read_child) {
+ local->read_child_returned = _gf_true;
+ }
+
+ __inode_write_fop_cbk (frame, child_index, read_child, this,
+ &op_ret, &op_errno, prebuf, postbuf,
+ xdata);
+
+ if ((local->success_count >= priv->wait_count)
+ && local->read_child_returned) {
+ need_unwind = 1;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (need_unwind)
+ local->transaction.unwind (frame, this);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
+}
+
+static int
+afr_fallocate_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_fallocate_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fallocate,
+ local->fd,
+ local->cont.fallocate.mode,
+ local->cont.fallocate.offset,
+ local->cont.fallocate.len,
+ NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int
+afr_fallocate_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ local->transaction.unwind (frame, this);
+
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+static int
+afr_do_fallocate (call_frame_t *frame, xlator_t *this)
+{
+ call_frame_t * transaction_frame = NULL;
+ afr_local_t * local = NULL;
+ int op_ret = -1;
+ int op_errno = 0;
+
+ local = frame->local;
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ goto out;
+ }
+
+ transaction_frame->local = local;
+ frame->local = NULL;
+
+ local->op = GF_FOP_FALLOCATE;
+
+ local->transaction.fop = afr_fallocate_wind;
+ local->transaction.done = afr_fallocate_done;
+ local->transaction.unwind = afr_fallocate_unwind;
+
+ local->transaction.main_frame = frame;
+
+ local->transaction.start = local->cont.fallocate.offset;
+ local->transaction.len = 0;
+
+ /* fallocate can modify the file size */
+ op_ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
+
+ op_ret = 0;
+out:
+ if (op_ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (fallocate, frame, op_ret, op_errno, NULL,
+ NULL, NULL);
+ }
+
+ return 0;
+}
+
+int
+afr_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
+ goto out;
+ }
+ QUORUM_CHECK(fallocate,out);
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ local->cont.fallocate.mode = mode;
+ local->cont.fallocate.offset = offset;
+ local->cont.fallocate.len = len;
+
+ local->fd = fd_ref (fd);
+
+ afr_open_fd_fix (fd, this);
+
+ afr_do_fallocate (frame, this);
+
+ ret = 0;
+out:
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL);
+ }
+
+ return 0;
+}
+
+/* }}} */
+
+/* {{{ discard */
+
+static int
+afr_discard_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (discard, main_frame, local->op_ret,
+ local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ NULL);
+ }
+ return 0;
+}
+
+static int
+afr_discard_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ int child_index = (long) cookie;
+ int call_count = -1;
+ int need_unwind = 0;
+ int read_child = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
+
+ LOCK (&frame->lock);
+ {
+ if (child_index == read_child) {
+ local->read_child_returned = _gf_true;
+ }
+
+ __inode_write_fop_cbk (frame, child_index, read_child, this,
+ &op_ret, &op_errno, prebuf, postbuf,
+ xdata);
+
+ if ((local->success_count >= priv->wait_count)
+ && local->read_child_returned) {
+ need_unwind = 1;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (need_unwind)
+ local->transaction.unwind (frame, this);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
+}
+
+static int
+afr_discard_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_discard_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->discard,
+ local->fd,
+ local->cont.discard.offset,
+ local->cont.discard.len,
+ NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int
+afr_discard_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ local->transaction.unwind (frame, this);
+
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+static int
+afr_do_discard (call_frame_t *frame, xlator_t *this)
+{
+ call_frame_t * transaction_frame = NULL;
+ afr_local_t * local = NULL;
+ int op_ret = -1;
+ int op_errno = 0;
+
+ local = frame->local;
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ goto out;
+ }
+
+ transaction_frame->local = local;
+ frame->local = NULL;
+
+ local->op = GF_FOP_DISCARD;
+
+ local->transaction.fop = afr_discard_wind;
+ local->transaction.done = afr_discard_done;
+ local->transaction.unwind = afr_discard_unwind;
+
+ local->transaction.main_frame = frame;
+
+ local->transaction.start = local->cont.discard.offset;
+ local->transaction.len = 0;
+
+ op_ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
op_ret = 0;
out:
- if (op_ret == -1) {
+ if (op_ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (discard, frame, op_ret, op_errno, NULL,
+ NULL, NULL);
+ }
+
+ return 0;
+}
+
+int
+afr_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
+ goto out;
+ }
+ QUORUM_CHECK(discard, out);
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ local->cont.discard.offset = offset;
+ local->cont.discard.len = len;
+
+ local->fd = fd_ref (fd);
+
+ afr_open_fd_fix (fd, this);
+
+ afr_do_discard(frame, this);
+
+ ret = 0;
+out:
+ if (ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (removexattr, frame, op_ret, op_errno);
+ AFR_STACK_UNWIND (discard, frame, -1, op_errno, NULL, NULL, NULL);
+ }
+
+ return 0;
+}
+
+
+/* {{{ zerofill */
+
+static int
+afr_zerofill_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame) {
+ main_frame = local->transaction.main_frame;
+ }
+ local->transaction.main_frame = NULL;
}
+ UNLOCK (&frame->lock);
+ if (main_frame) {
+ AFR_STACK_UNWIND (zerofill, main_frame, local->op_ret,
+ local->op_errno,
+ &local->cont.zerofill.prebuf,
+ &local->cont.zerofill.postbuf,
+ NULL);
+ }
return 0;
}
+
+static int
+afr_zerofill_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int child_index = (long) cookie;
+ int call_count = -1;
+ int need_unwind = 0;
+ int read_child = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
+
+ LOCK (&frame->lock);
+ {
+ if (child_index == read_child) {
+ local->read_child_returned = _gf_true;
+ }
+
+ if (afr_fop_failed (op_ret, op_errno)) {
+ afr_transaction_fop_failed (frame, this, child_index);
+ }
+
+ if (op_ret != -1) {
+ if (local->success_count == 0) {
+ local->op_ret = op_ret;
+ local->cont.zerofill.prebuf = *prebuf;
+ local->cont.zerofill.postbuf = *postbuf;
+ }
+
+ if (child_index == read_child) {
+ local->cont.zerofill.prebuf = *prebuf;
+ local->cont.zerofill.postbuf = *postbuf;
+ }
+
+ local->success_count++;
+
+ if ((local->success_count >= priv->wait_count)
+ && local->read_child_returned) {
+ need_unwind = 1;
+ }
+ }
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ if (need_unwind) {
+ local->transaction.unwind (frame, this);
+ }
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
+}
+
+static int
+afr_zerofill_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_zerofill_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->zerofill,
+ local->fd,
+ local->cont.zerofill.offset,
+ local->cont.zerofill.len,
+ NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int
+afr_zerofill_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ local->transaction.unwind (frame, this);
+
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+static int
+afr_do_zerofill(call_frame_t *frame, xlator_t *this)
+{
+ call_frame_t *transaction_frame = NULL;
+ afr_local_t *local = NULL;
+ int op_ret = -1;
+ int op_errno = 0;
+
+ local = frame->local;
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ goto out;
+ }
+
+ transaction_frame->local = local;
+ frame->local = NULL;
+
+ local->op = GF_FOP_ZEROFILL;
+
+ local->transaction.fop = afr_zerofill_wind;
+ local->transaction.done = afr_zerofill_done;
+ local->transaction.unwind = afr_zerofill_unwind;
+
+ local->transaction.main_frame = frame;
+
+ local->transaction.start = local->cont.zerofill.offset;
+ local->transaction.len = 0;
+
+ op_ret = afr_transaction (transaction_frame, this,
+ AFR_DATA_TRANSACTION);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
+
+ op_ret = 0;
+out:
+ if (op_ret < 0) {
+ if (transaction_frame) {
+ AFR_STACK_DESTROY (transaction_frame);
+ }
+ AFR_STACK_UNWIND (zerofill, frame, op_ret, op_errno, NULL,
+ NULL, NULL);
+ }
+
+ return 0;
+}
+
+int
+afr_zerofill (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
+ goto out;
+ }
+ QUORUM_CHECK(zerofill, out);
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0) {
+ goto out;
+ }
+ local->cont.zerofill.offset = offset;
+ local->cont.zerofill.len = len;
+
+ local->fd = fd_ref (fd);
+
+ afr_open_fd_fix (fd, this);
+
+ afr_do_zerofill(frame, this);
+
+ ret = 0;
+out:
+ if (ret < 0) {
+ if (transaction_frame) {
+ AFR_STACK_DESTROY (transaction_frame);
+ }
+ AFR_STACK_UNWIND (zerofill, frame, -1, op_errno, NULL,
+ NULL, NULL);
+ }
+
+ return 0;
+}
+
+/* }}} */
+
+
diff --git a/xlators/cluster/afr/src/afr-inode-write.h b/xlators/cluster/afr/src/afr-inode-write.h
index 475898722..8e93ca44a 100644
--- a/xlators/cluster/afr/src/afr-inode-write.h
+++ b/xlators/cluster/afr/src/afr-inode-write.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __INODE_WRITE_H__
@@ -22,51 +13,70 @@
int32_t
afr_chmod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode);
+ loc_t *loc, mode_t mode, dict_t *xdata);
int32_t
afr_chown (call_frame_t *frame, xlator_t *this,
- loc_t *loc, uid_t uid, gid_t gid);
+ loc_t *loc, uid_t uid, gid_t gid, dict_t *xdata);
int
afr_fchown (call_frame_t *frame, xlator_t *this,
- fd_t *fd, uid_t uid, gid_t gid);
+ fd_t *fd, uid_t uid, gid_t gid, dict_t *xdata);
int32_t
afr_fchmod (call_frame_t *frame, xlator_t *this,
- fd_t *fd, mode_t mode);
+ fd_t *fd, mode_t mode, dict_t *xdata);
int32_t
-afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iovec *vector, int32_t count, off_t offset,
- struct iobref *iobref);
+ uint32_t flags, struct iobref *iobref, dict_t *xdata);
int32_t
afr_truncate (call_frame_t *frame, xlator_t *this,
- loc_t *loc, off_t offset);
+ loc_t *loc, off_t offset, dict_t *xdata);
int32_t
afr_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset);
+ fd_t *fd, off_t offset, dict_t *xdata);
int32_t
afr_utimens (call_frame_t *frame, xlator_t *this,
- loc_t *loc, struct timespec tv[2]);
+ loc_t *loc, struct timespec tv[2], dict_t *xdata);
int
afr_setattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, struct iatt *buf, int32_t valid);
+ loc_t *loc, struct iatt *buf, int32_t valid, dict_t *xdata);
int
afr_fsetattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, struct iatt *buf, int32_t valid);
+ fd_t *fd, struct iatt *buf, int32_t valid, dict_t *xdata);
int32_t
afr_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *dict, int32_t flags);
+ loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata);
+
+int32_t
+afr_fsetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, dict_t *dict, int32_t flags, dict_t *xdata);
int32_t
afr_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name);
+ loc_t *loc, const char *name, dict_t *xdata);
+
+int32_t
+afr_fremovexattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata);
+int
+afr_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata);
+
+int
+afr_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata);
+
+int
+afr_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata);
#endif /* __INODE_WRITE_H__ */
diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c
index 632c8b7c4..060d78f35 100644
--- a/xlators/cluster/afr/src/afr-lk-common.c
+++ b/xlators/cluster/afr/src/afr-lk-common.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include "dict.h"
@@ -31,8 +22,69 @@
#define LOCKED_YES 0x1 /* for DATA, METADATA, ENTRY and higher_path */
#define LOCKED_LOWER 0x2 /* for lower path */
+#define AFR_TRACE_INODELK_IN(frame, this, params ...) \
+ do { \
+ afr_private_t *_priv = this->private; \
+ if (!_priv->inodelk_trace) \
+ break; \
+ afr_trace_inodelk_in (frame, this, params); \
+ } while (0);
+
+#define AFR_TRACE_INODELK_OUT(frame, this, params ...) \
+ do { \
+ afr_private_t *_priv = this->private; \
+ if (!_priv->inodelk_trace) \
+ break; \
+ afr_trace_inodelk_out (frame, this, params); \
+ } while (0);
+
+#define AFR_TRACE_ENTRYLK_IN(frame, this, params ...) \
+ do { \
+ afr_private_t *_priv = this->private; \
+ if (!_priv->entrylk_trace) \
+ break; \
+ afr_trace_entrylk_in (frame, this, params); \
+ } while (0);
+
+#define AFR_TRACE_ENTRYLK_OUT(frame, this, params ...) \
+ do { \
+ afr_private_t *_priv = this->private; \
+ if (!_priv->entrylk_trace) \
+ break; \
+ afr_trace_entrylk_out (frame, this, params); \
+ } while (0);
+
int
-afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index);
+afr_entry_lockee_cmp (const void *l1, const void *l2)
+{
+ const afr_entry_lockee_t *r1 = l1;
+ const afr_entry_lockee_t *r2 = l2;
+ int ret = 0;
+ uuid_t gfid1 = {0};
+ uuid_t gfid2 = {0};
+
+ loc_gfid ((loc_t*)&r1->loc, gfid1);
+ loc_gfid ((loc_t*)&r2->loc, gfid2);
+ ret = uuid_compare (gfid1, gfid2);
+ /*Entrylks with NULL basename are the 'smallest'*/
+ if (ret == 0) {
+ if (!r1->basename)
+ return -1;
+ if (!r2->basename)
+ return 1;
+ ret = strcmp (r1->basename, r2->basename);
+ }
+
+ if (ret <= 0)
+ return -1;
+ else
+ return 1;
+}
+
+int afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index);
+
+static int
+afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this);
static uint64_t afr_lock_number = 1;
@@ -57,14 +109,13 @@ afr_set_lock_number (call_frame_t *frame, xlator_t *this)
}
void
-afr_set_lk_owner (call_frame_t *frame, xlator_t *this)
+afr_set_lk_owner (call_frame_t *frame, xlator_t *this, void *lk_owner)
{
- if (!frame->root->lk_owner) {
- gf_log (this->name, GF_LOG_TRACE,
- "Setting lk-owner=%llu",
- (unsigned long long) (unsigned long)frame->root);
- frame->root->lk_owner = (uint64_t) (unsigned long)frame->root;
- }
+ gf_log (this->name, GF_LOG_TRACE,
+ "Setting lk-owner=%llu",
+ (unsigned long long) (unsigned long)lk_owner);
+
+ set_lk_owner_from_ptr (&frame->root->lk_owner, lk_owner);
}
static int
@@ -90,29 +141,19 @@ is_afr_lock_selfheal (afr_local_t *local)
}
int32_t
-internal_lock_count (call_frame_t *frame, xlator_t *this,
- afr_fd_ctx_t *fd_ctx)
+internal_lock_count (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
-
int32_t call_count = 0;
int i = 0;
local = frame->local;
priv = this->private;
- if (fd_ctx) {
- GF_ASSERT (local->fd);
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i] && fd_ctx->opened_on[i])
- ++call_count;
- }
- } else {
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i])
- ++call_count;
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i])
+ ++call_count;
}
return call_count;
@@ -120,7 +161,7 @@ internal_lock_count (call_frame_t *frame, xlator_t *this,
static void
afr_print_inodelk (char *str, int size, int cmd,
- struct gf_flock *flock, uint64_t owner)
+ struct gf_flock *flock, gf_lkowner_t *owner)
{
char *cmd_str = NULL;
char *type_str = NULL;
@@ -168,11 +209,11 @@ afr_print_inodelk (char *str, int size, int cmd,
}
snprintf (str, size, "lock=INODELK, cmd=%s, type=%s, "
- "start=%llu, len=%llu, pid=%llu, lk-owner=%llu",
+ "start=%llu, len=%llu, pid=%llu, lk-owner=%s",
cmd_str, type_str, (unsigned long long) flock->l_start,
(unsigned long long) flock->l_len,
(unsigned long long) flock->l_pid,
- (unsigned long long) owner);
+ lkowner_utoa (owner));
}
@@ -188,11 +229,11 @@ afr_print_lockee (char *str, int size, loc_t *loc, fd_t *fd,
void
afr_print_entrylk (char *str, int size, const char *basename,
- uint64_t owner)
+ gf_lkowner_t *owner)
{
- snprintf (str, size, "Basename=%s, lk-owner=%llu",
+ snprintf (str, size, "Basename=%s, lk-owner=%s",
basename ? basename : "<nul>",
- (unsigned long long)owner);
+ lkowner_utoa (owner));
}
static void
@@ -246,27 +287,20 @@ afr_set_lock_call_type (afr_lock_call_type_t lock_call_type,
}
static void
-afr_trace_inodelk_out (call_frame_t *frame, afr_lock_call_type_t lock_call_type,
+afr_trace_inodelk_out (call_frame_t *frame, xlator_t *this,
+ afr_lock_call_type_t lock_call_type,
afr_lock_op_type_t lk_op_type, struct gf_flock *flock,
int op_ret, int op_errno, int32_t child_index)
{
- xlator_t *this = NULL;
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
char lockee[256];
char lock_call_type_str[256];
char verdict[16];
- this = THIS;
local = frame->local;
int_lock = &local->internal_lock;
- priv = this->private;
-
- if (!priv->inodelk_trace) {
- return;
- }
afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index);
@@ -274,45 +308,37 @@ afr_trace_inodelk_out (call_frame_t *frame, afr_lock_call_type_t lock_call_type,
afr_print_verdict (op_ret, op_errno, verdict);
- gf_log (this->name, GF_LOG_NORMAL,
- "[%s %s] [%s] Lockee={%s} Number={%llu}",
+ gf_log (this->name, GF_LOG_INFO,
+ "[%s %s] [%s] lk-owner=%s Lockee={%s} Number={%llu}",
lock_call_type_str,
lk_op_type == AFR_LOCK_OP ? "LOCK REPLY" : "UNLOCK REPLY",
- verdict,
- lockee,
+ verdict, lkowner_utoa (&frame->root->lk_owner), lockee,
(unsigned long long) int_lock->lock_number);
}
static void
-afr_trace_inodelk_in (call_frame_t *frame, afr_lock_call_type_t lock_call_type,
+afr_trace_inodelk_in (call_frame_t *frame, xlator_t *this,
+ afr_lock_call_type_t lock_call_type,
afr_lock_op_type_t lk_op_type, struct gf_flock *flock,
int32_t cmd, int32_t child_index)
{
- xlator_t *this = NULL;
afr_local_t *local = NULL;
afr_internal_lock_t *int_lock = NULL;
- afr_private_t *priv = NULL;
char lock[256];
char lockee[256];
char lock_call_type_str[256];
- this = THIS;
local = frame->local;
int_lock = &local->internal_lock;
- priv = this->private;
- if (!priv->inodelk_trace) {
- return;
- }
-
- afr_print_inodelk (lock, 256, cmd, flock, frame->root->lk_owner);
+ afr_print_inodelk (lock, 256, cmd, flock, &frame->root->lk_owner);
afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index);
afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock);
- gf_log (this->name, GF_LOG_NORMAL,
+ gf_log (this->name, GF_LOG_INFO,
"[%s %s] Lock={%s} Lockee={%s} Number={%llu}",
lock_call_type_str,
lk_op_type == AFR_LOCK_OP ? "LOCK REQUEST" : "UNLOCK REQUEST",
@@ -322,20 +348,21 @@ afr_trace_inodelk_in (call_frame_t *frame, afr_lock_call_type_t lock_call_type,
}
static void
-afr_trace_entrylk_in (call_frame_t *frame, afr_lock_call_type_t lock_call_type,
+afr_trace_entrylk_in (call_frame_t *frame, xlator_t *this,
+ afr_lock_call_type_t lock_call_type,
afr_lock_op_type_t lk_op_type, const char *basename,
- int32_t child_index)
+ int32_t cookie)
{
- xlator_t *this = NULL;
afr_local_t *local = NULL;
afr_internal_lock_t *int_lock = NULL;
afr_private_t *priv = NULL;
+ int child_index = 0;
+ int lockee_no = 0;
char lock[256];
char lockee[256];
char lock_call_type_str[256];
- this = THIS;
local = frame->local;
int_lock = &local->internal_lock;
priv = this->private;
@@ -343,36 +370,41 @@ afr_trace_entrylk_in (call_frame_t *frame, afr_lock_call_type_t lock_call_type,
if (!priv->entrylk_trace) {
return;
}
+ lockee_no = cookie / priv->child_count;
+ child_index = cookie % priv->child_count;
- afr_print_entrylk (lock, 256, basename, frame->root->lk_owner);
- afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index);
+ afr_print_entrylk (lock, 256, basename, &frame->root->lk_owner);
+ afr_print_lockee (lockee, 256, &int_lock->lockee[lockee_no].loc, local->fd,
+ child_index);
afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock);
- gf_log (this->name, GF_LOG_NORMAL,
- "[%s %s] Lock={%s} Lockee={%s} Number={%llu}",
+ gf_log (this->name, GF_LOG_INFO,
+ "[%s %s] Lock={%s} Lockee={%s} Number={%llu}, Cookie={%d}",
lock_call_type_str,
lk_op_type == AFR_LOCK_OP ? "LOCK REQUEST" : "UNLOCK REQUEST",
lock, lockee,
- (unsigned long long) int_lock->lock_number);
+ (unsigned long long) int_lock->lock_number,
+ cookie);
}
static void
-afr_trace_entrylk_out (call_frame_t *frame, afr_lock_call_type_t lock_call_type,
- afr_lock_op_type_t lk_op_type, const char *basename, int op_ret,
- int op_errno, int32_t child_index)
+afr_trace_entrylk_out (call_frame_t *frame, xlator_t *this,
+ afr_lock_call_type_t lock_call_type,
+ afr_lock_op_type_t lk_op_type, const char *basename,
+ int op_ret, int op_errno, int32_t cookie)
{
- xlator_t *this = NULL;
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
+ int lockee_no = 0;
+ int child_index = 0;
char lock[256];
char lockee[256];
char lock_call_type_str[256];
char verdict[16];
- this = THIS;
local = frame->local;
int_lock = &local->internal_lock;
priv = this->private;
@@ -380,20 +412,25 @@ afr_trace_entrylk_out (call_frame_t *frame, afr_lock_call_type_t lock_call_type,
if (!priv->entrylk_trace) {
return;
}
+ lockee_no = cookie / priv->child_count;
+ child_index = cookie % priv->child_count;
- afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index);
+ afr_print_entrylk (lock, 256, basename, &frame->root->lk_owner);
+ afr_print_lockee (lockee, 256, &int_lock->lockee[lockee_no].loc, local->fd,
+ child_index);
afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock);
afr_print_verdict (op_ret, op_errno, verdict);
- gf_log (this->name, GF_LOG_NORMAL,
- "[%s %s] [%s] Lock={%s} Lockee={%s} Number={%llu}",
+ gf_log (this->name, GF_LOG_INFO,
+ "[%s %s] [%s] Lock={%s} Lockee={%s} Number={%llu} Cookie={%d}",
lock_call_type_str,
lk_op_type == AFR_LOCK_OP ? "LOCK REPLY" : "UNLOCK REPLY",
verdict,
lock, lockee,
- (unsigned long long) int_lock->lock_number);
+ (unsigned long long) int_lock->lock_number,
+ cookie);
}
@@ -446,6 +483,47 @@ is_afr_lock_transaction (afr_local_t *local)
return ret;
}
+int
+afr_init_entry_lockee (afr_entry_lockee_t *lockee, afr_local_t *local,
+ loc_t *loc, char *basename, int child_count)
+{
+ int ret = -1;
+
+ loc_copy (&lockee->loc, loc);
+ lockee->basename = (basename)? gf_strdup (basename): NULL;
+ if (basename && !lockee->basename)
+ goto out;
+
+ lockee->locked_count = 0;
+ lockee->locked_nodes = GF_CALLOC (child_count,
+ sizeof (*lockee->locked_nodes),
+ gf_afr_mt_afr_node_character);
+
+ if (!lockee->locked_nodes)
+ goto out;
+
+ ret = 0;
+out:
+ return ret;
+
+}
+
+void
+afr_entry_lockee_cleanup (afr_internal_lock_t *int_lock)
+{
+ int i = 0;
+
+ for (i = 0; i < int_lock->lockee_count; i++) {
+ loc_wipe (&int_lock->lockee[i].loc);
+ if (int_lock->lockee[i].basename)
+ GF_FREE (int_lock->lockee[i].basename);
+ if (int_lock->lockee[i].locked_nodes)
+ GF_FREE (int_lock->lockee[i].locked_nodes);
+ }
+
+ return;
+}
+
static int
initialize_entrylk_variables (call_frame_t *frame, xlator_t *this)
{
@@ -463,8 +541,13 @@ initialize_entrylk_variables (call_frame_t *frame, xlator_t *this)
int_lock->lock_op_ret = -1;
int_lock->lock_op_errno = 0;
- for (i = 0; i < priv->child_count; i++) {
- int_lock->entry_locked_nodes[i] = 0;
+ for (i = 0; i < AFR_LOCKEE_COUNT_MAX; i++) {
+ if (!int_lock->lockee[i].locked_nodes)
+ break;
+ int_lock->lockee[i].locked_count = 0;
+ memset (int_lock->lockee[i].locked_nodes, 0,
+ sizeof (*int_lock->lockee[i].locked_nodes) *
+ priv->child_count);
}
return 0;
@@ -476,20 +559,23 @@ initialize_inodelk_variables (call_frame_t *frame, xlator_t *this)
afr_local_t *local = NULL;
afr_internal_lock_t *int_lock = NULL;
afr_private_t *priv = NULL;
-
- int i = 0;
+ afr_inodelk_t *inodelk = NULL;
priv = this->private;
local = frame->local;
int_lock = &local->internal_lock;
- int_lock->inodelk_lock_count = 0;
- int_lock->lock_op_ret = -1;
- int_lock->lock_op_errno = 0;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
- for (i = 0; i < priv->child_count; i++) {
- int_lock->inode_locked_nodes[i] = 0;
- }
+ inodelk->lock_count = 0;
+ int_lock->lk_attempted_count = 0;
+ int_lock->lock_op_ret = -1;
+ int_lock->lock_op_errno = 0;
+
+ memset (inodelk->locked_nodes, 0,
+ sizeof (*inodelk->locked_nodes) * priv->child_count);
+ memset (int_lock->locked_nodes, 0,
+ sizeof (*int_lock->locked_nodes) * priv->child_count);
return 0;
}
@@ -499,7 +585,7 @@ lower_path (loc_t *l1, const char *b1, loc_t *l2, const char *b2)
{
int ret = 0;
- ret = strcmp (l1->path, l2->path);
+ ret = uuid_compare (l1->inode->gfid, l2->inode->gfid);
if (ret == 0)
ret = strcmp (b1, b2);
@@ -511,10 +597,22 @@ lower_path (loc_t *l1, const char *b1, loc_t *l2, const char *b2)
}
int
+afr_lockee_locked_nodes_count (afr_internal_lock_t *int_lock)
+{
+ int call_count = 0;
+ int i = 0;
+
+ for (i = 0; i < int_lock->lockee_count; i++)
+ call_count += int_lock->lockee[i].locked_count;
+
+ return call_count;
+}
+
+int
afr_locked_nodes_count (unsigned char *locked_nodes, int child_count)
{
- int i;
+ int i = 0;
int call_count = 0;
for (i = 0; i < child_count; i++) {
@@ -528,7 +626,7 @@ afr_locked_nodes_count (unsigned char *locked_nodes, int child_count)
/* FIXME: What if UNLOCK fails */
static int32_t
afr_unlock_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
afr_internal_lock_t *int_lock = NULL;
@@ -554,18 +652,37 @@ afr_unlock_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
static int32_t
afr_unlock_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_trace_inodelk_out (frame, AFR_INODELK_TRANSACTION,
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
+ int32_t child_index = (long)cookie;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ AFR_TRACE_INODELK_OUT (frame, this, AFR_INODELK_TRANSACTION,
AFR_UNLOCK_OP, NULL, op_ret,
- op_errno, (long) cookie);
+ op_errno, child_index);
+
+ priv = this->private;
if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) {
- gf_log (this->name, GF_LOG_TRACE,
- "Unlock failed for some reason");
+ gf_log (this->name, GF_LOG_INFO, "%s: unlock failed on subvolume %s "
+ "with lock owner %s", local->loc.path,
+ priv->children[child_index]->name,
+ lkowner_utoa (&frame->root->lk_owner));
}
- afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno);
+
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ inodelk->locked_nodes[child_index] &= LOCKED_NO;
+ if (local->transaction.eager_lock)
+ local->transaction.eager_lock[child_index] = 0;
+
+ afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno, xdata);
return 0;
@@ -575,22 +692,30 @@ static int
afr_unlock_inodelk (call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
-
- struct gf_flock flock;
+ struct gf_flock flock = {0,};
+ struct gf_flock full_flock = {0,};
+ struct gf_flock *flock_use = NULL;
int call_count = 0;
int i = 0;
+ int piggyback = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+
local = frame->local;
int_lock = &local->internal_lock;
priv = this->private;
- flock.l_start = int_lock->lk_flock.l_start;
- flock.l_len = int_lock->lk_flock.l_len;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+
+ flock.l_start = inodelk->flock.l_start;
+ flock.l_len = inodelk->flock.l_len;
flock.l_type = F_UNLCK;
- call_count = afr_locked_nodes_count (int_lock->inode_locked_nodes,
+ full_flock.l_type = F_UNLCK;
+ call_count = afr_locked_nodes_count (inodelk->locked_nodes,
priv->child_count);
int_lock->lk_call_count = call_count;
@@ -602,55 +727,107 @@ afr_unlock_inodelk (call_frame_t *frame, xlator_t *this)
goto out;
}
+ if (local->fd)
+ fd_ctx = afr_fd_ctx_get (local->fd, this);
+
for (i = 0; i < priv->child_count; i++) {
- if (int_lock->inode_locked_nodes[i] & LOCKED_YES) {
- if (local->fd) {
- afr_trace_inodelk_in (frame, AFR_INODELK_TRANSACTION,
- AFR_UNLOCK_OP, &flock, F_SETLK, i);
-
- STACK_WIND_COOKIE (frame, afr_unlock_inodelk_cbk,
- (void *) (long)i,
- priv->children[i],
- priv->children[i]->fops->finodelk,
- this->name, local->fd,
- F_SETLK, &flock);
+ if ((inodelk->locked_nodes[i] & LOCKED_YES) != LOCKED_YES)
+ continue;
- if (!--call_count)
- break;
+ if (local->fd) {
+ flock_use = &flock;
+ if (!local->transaction.eager_lock[i]) {
+ goto wind;
+ }
- } else {
- afr_trace_inodelk_in (frame, AFR_INODELK_TRANSACTION,
- AFR_UNLOCK_OP, &flock, F_SETLK, i);
+ piggyback = 0;
- STACK_WIND_COOKIE (frame, afr_unlock_inodelk_cbk,
- (void *) (long)i,
- priv->children[i],
- priv->children[i]->fops->inodelk,
- this->name, &local->loc,
- F_SETLK, &flock);
+ LOCK (&local->fd->lock);
+ {
+ if (fd_ctx->lock_piggyback[i]) {
+ fd_ctx->lock_piggyback[i]--;
+ piggyback = 1;
+ } else {
+ fd_ctx->lock_acquired[i]--;
+ }
+ }
+ UNLOCK (&local->fd->lock);
+ if (piggyback) {
+ afr_unlock_inodelk_cbk (frame, (void *) (long) i,
+ this, 1, 0, NULL);
if (!--call_count)
break;
-
+ continue;
}
- }
+ flock_use = &full_flock;
+ wind:
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_TRANSACTION,
+ AFR_UNLOCK_OP, flock_use, F_SETLK,
+ i);
- }
+ STACK_WIND_COOKIE (frame, afr_unlock_inodelk_cbk,
+ (void *) (long)i,
+ priv->children[i],
+ priv->children[i]->fops->finodelk,
+ int_lock->domain, local->fd,
+ F_SETLK, flock_use, NULL);
+ if (!--call_count)
+ break;
+
+ } else {
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_TRANSACTION,
+ AFR_UNLOCK_OP, &flock, F_SETLK, i);
+
+ STACK_WIND_COOKIE (frame, afr_unlock_inodelk_cbk,
+ (void *) (long)i,
+ priv->children[i],
+ priv->children[i]->fops->inodelk,
+ int_lock->domain, &local->loc,
+ F_SETLK, &flock, NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
out:
return 0;
}
static int32_t
afr_unlock_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_trace_entrylk_out (frame, AFR_ENTRYLK_TRANSACTION,
- AFR_UNLOCK_OP, NULL, op_ret,
- op_errno, (long) cookie);
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ int32_t child_index = 0;
+ int lockee_no = 0;
+
+ priv = this->private;
+ lockee_no = (int)((long) cookie) / priv->child_count;
+ child_index = (int) ((long) cookie) % priv->child_count;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
- afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno);
+ AFR_TRACE_ENTRYLK_OUT (frame, this, AFR_ENTRYLK_TRANSACTION,
+ AFR_UNLOCK_OP,
+ int_lock->lockee[lockee_no].basename, op_ret,
+ op_errno, (int) ((long)cookie));
+
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: unlock failed on %d, reason: %s",
+ local->loc.path, child_index, strerror (op_errno));
+ }
+
+ int_lock->lockee[lockee_no].locked_nodes[child_index] &= LOCKED_NO;
+ afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno, NULL);
return 0;
}
@@ -658,25 +835,22 @@ afr_unlock_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
static int
afr_unlock_entrylk (call_frame_t *frame, xlator_t *this)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- const char *basename = NULL;
- loc_t *loc = NULL;
-
- int call_count = 0;
- int i = -1;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int index = 0;
+ int lockee_no = 0;
+ int copies = 0;
+ int i = -1;
local = frame->local;
int_lock = &local->internal_lock;
priv = this->private;
+ copies = priv->child_count;
- basename = int_lock->lk_basename;
- if (int_lock->lk_loc)
- loc = int_lock->lk_loc;
+ call_count = afr_lockee_locked_nodes_count (int_lock);
- call_count = afr_locked_nodes_count (int_lock->entry_locked_nodes,
- priv->child_count);
int_lock->lk_call_count = call_count;
if (!call_count){
@@ -686,18 +860,23 @@ afr_unlock_entrylk (call_frame_t *frame, xlator_t *this)
goto out;
}
- for (i = 0; i < priv->child_count; i++) {
- if (int_lock->entry_locked_nodes[i] & LOCKED_YES) {
- afr_trace_entrylk_in (frame, AFR_ENTRYLK_NB_TRANSACTION,
- AFR_UNLOCK_OP, basename, i);
+ for (i = 0; i < int_lock->lockee_count * priv->child_count; i++) {
+ lockee_no = i / copies;
+ index = i % copies;
+ if (int_lock->lockee[lockee_no].locked_nodes[index] & LOCKED_YES) {
+ AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_NB_TRANSACTION,
+ AFR_UNLOCK_OP,
+ int_lock->lockee[lockee_no].basename,
+ i);
STACK_WIND_COOKIE (frame, afr_unlock_entrylk_cbk,
(void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- this->name,
- loc, basename,
- ENTRYLK_UNLOCK, ENTRYLK_WRLCK);
+ priv->children[index],
+ priv->children[index]->fops->entrylk,
+ int_lock->domain,
+ &int_lock->lockee[lockee_no].loc,
+ int_lock->lockee[lockee_no].basename,
+ ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL);
if (!--call_count)
break;
@@ -711,18 +890,21 @@ out:
static int32_t
afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int done = 0;
- int child_index = (long) cookie;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int cky = (long) cookie;
+ int child_index = 0;
+ int lockee_no = 0;
+ priv = this->private;
local = frame->local;
int_lock = &local->internal_lock;
- priv = this->private;
+
+ child_index = ((int)cky) % priv->child_count;
+ lockee_no = ((int)cky) / priv->child_count;
LOCK (&frame->lock);
{
@@ -731,16 +913,16 @@ afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
/* return ENOTSUP */
gf_log (this->name, GF_LOG_ERROR,
"subvolume does not support locking. "
- "please load features/posix-locks xlator on server");
+ "please load features/locks xlator on server");
local->op_ret = op_ret;
int_lock->lock_op_ret = op_ret;
- done = 1;
}
- local->child_up[child_index] = 0;
local->op_errno = op_errno;
int_lock->lock_op_errno = op_errno;
}
+
+ int_lock->lk_attempted_count++;
}
UNLOCK (&frame->lock);
@@ -749,11 +931,17 @@ afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
afr_unlock (frame, this);
} else {
if (op_ret == 0) {
- int_lock->locked_nodes[child_index]
- |= LOCKED_YES;
- int_lock->lock_count++;
+ if (local->transaction.type == AFR_ENTRY_TRANSACTION ||
+ local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) {
+ int_lock->lockee[lockee_no].locked_nodes[child_index] |= LOCKED_YES;
+ int_lock->lockee[lockee_no].locked_count++;
+ int_lock->entrylk_lock_count++;
+ } else {
+ int_lock->locked_nodes[child_index] |= LOCKED_YES;
+ int_lock->lock_count++;
+ }
}
- afr_lock_blocking (frame, this, child_index + 1);
+ afr_lock_blocking (frame, this, cky + 1);
}
return 0;
@@ -761,105 +949,26 @@ afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
static int32_t
afr_blocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_trace_inodelk_out (frame, AFR_INODELK_TRANSACTION,
+ AFR_TRACE_INODELK_OUT (frame, this, AFR_INODELK_TRANSACTION,
AFR_LOCK_OP, NULL, op_ret,
op_errno, (long) cookie);
- afr_lock_cbk (frame, cookie, this, op_ret, op_errno);
+ afr_lock_cbk (frame, cookie, this, op_ret, op_errno, xdata);
return 0;
}
static int32_t
-afr_lock_lower_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- loc_t *lower = NULL;
- loc_t *higher = NULL;
- const char *lower_name = NULL;
- const char *higher_name = NULL;
-
- int child_index = (long) cookie;
-
- priv = this->private;
- local = frame->local;
- int_lock = &local->internal_lock;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- if (op_errno == ENOSYS) {
- /* return ENOTSUP */
-
- gf_log (this->name, GF_LOG_ERROR,
- "subvolume does not support locking. "
- "please load features/posix-locks xlator on server");
-
- local->op_ret = op_ret;
- }
-
- local->child_up[child_index] = 0;
- local->op_errno = op_errno;
- }
- }
- UNLOCK (&frame->lock);
-
- if (op_ret != 0) {
- afr_unlock (frame, this);
- goto out;
- } else {
- int_lock->lower_locked_nodes[child_index] |= LOCKED_LOWER;
- int_lock->lock_count++;
- }
-
- /* The lower path has been locked. Now lock the higher path */
-
- lower = lower_path (&local->transaction.parent_loc,
- local->transaction.basename,
- &local->transaction.new_parent_loc,
- local->transaction.new_basename);
-
- lower_name = (lower == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
- higher = (lower == &local->transaction.parent_loc ?
- &local->transaction.new_parent_loc :
- &local->transaction.parent_loc);
-
- higher_name = (higher == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
- afr_trace_entrylk_in (frame, AFR_ENTRYLK_TRANSACTION,
- AFR_LOCK_OP, higher_name, child_index);
-
-
- STACK_WIND_COOKIE (frame, afr_lock_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->entrylk,
- this->name, higher, higher_name,
- ENTRYLK_LOCK, ENTRYLK_WRLCK);
-
-out:
- return 0;
-}
-
-static int32_t
afr_blocking_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_trace_entrylk_out (frame, AFR_ENTRYLK_TRANSACTION,
+ AFR_TRACE_ENTRYLK_OUT (frame, this, AFR_ENTRYLK_TRANSACTION,
AFR_LOCK_OP, NULL, op_ret,
op_errno, (long)cookie);
- afr_lock_cbk (frame, cookie, this, op_ret, op_errno);
+ afr_lock_cbk (frame, cookie, this, op_ret, op_errno, xdata);
return 0;
}
@@ -867,6 +976,7 @@ static int
afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
@@ -877,18 +987,16 @@ afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this)
switch (local->transaction.type) {
case AFR_DATA_TRANSACTION:
case AFR_METADATA_TRANSACTION:
- memcpy (int_lock->inode_locked_nodes,
- int_lock->locked_nodes,
- priv->child_count);
- int_lock->inodelk_lock_count = int_lock->lock_count;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ memcpy (inodelk->locked_nodes, int_lock->locked_nodes,
+ sizeof (*inodelk->locked_nodes) * priv->child_count);
+ inodelk->lock_count = int_lock->lock_count;
break;
case AFR_ENTRY_RENAME_TRANSACTION:
case AFR_ENTRY_TRANSACTION:
- memcpy (int_lock->entry_locked_nodes,
- int_lock->locked_nodes,
- priv->child_count);
- int_lock->entrylk_lock_count = int_lock->lock_count;
+ /*entrylk_count is being used in both non-blocking and blocking
+ * modes */
break;
}
@@ -896,42 +1004,78 @@ afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this)
}
+static inline gf_boolean_t
+afr_is_entrylk (afr_internal_lock_t *int_lock,
+ afr_transaction_type trans_type)
+{
+ gf_boolean_t is_entrylk = _gf_false;
+
+ if ((int_lock->transaction_lk_type == AFR_SELFHEAL_LK) &&
+ int_lock->selfheal_lk_type == AFR_ENTRY_SELF_HEAL_LK) {
+
+ is_entrylk = _gf_true;
+
+ } else if ((int_lock->transaction_lk_type == AFR_TRANSACTION_LK) &&
+ (trans_type == AFR_ENTRY_TRANSACTION ||
+ trans_type == AFR_ENTRY_RENAME_TRANSACTION)) {
+
+ is_entrylk = _gf_true;
+
+ } else {
+ is_entrylk = _gf_false;
+ }
+
+ return is_entrylk;
+}
+
+static gf_boolean_t
+_is_lock_wind_needed (afr_local_t *local, int child_index)
+{
+ if (!local->child_up[child_index])
+ return _gf_false;
+
+ return _gf_true;
+}
+
int
-afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index)
+afr_lock_blocking (call_frame_t *frame, xlator_t *this, int cookie)
{
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- afr_fd_ctx_t *fd_ctx = NULL;
- loc_t *lower = NULL;
- loc_t *higher = NULL;
- const char *lower_name = NULL;
- const char *higher_name = NULL;
-
- struct gf_flock flock;
- uint64_t ctx;
+ struct gf_flock flock = {0,};
+ uint64_t ctx = 0;
int ret = 0;
+ int child_index = 0;
+ int lockee_no = 0;
+ gf_boolean_t is_entrylk = _gf_false;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ priv = this->private;
+ child_index = cookie % priv->child_count;
+ lockee_no = cookie / priv->child_count;
+ is_entrylk = afr_is_entrylk (int_lock, local->transaction.type);
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
- flock.l_start = int_lock->lk_flock.l_start;
- flock.l_len = int_lock->lk_flock.l_len;
- flock.l_type = int_lock->lk_flock.l_type;
+ if (!is_entrylk) {
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ flock.l_start = inodelk->flock.l_start;
+ flock.l_len = inodelk->flock.l_len;
+ flock.l_type = inodelk->flock.l_type;
+ }
if (local->fd) {
ret = fd_ctx_get (local->fd, this, &ctx);
if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_INFO,
"unable to get fd ctx for fd=%p",
local->fd);
local->op_ret = -1;
int_lock->lock_op_ret = -1;
- local->op_errno = EINVAL;
- int_lock->lock_op_errno = EINVAL;
afr_copy_locked_nodes (frame, this);
@@ -939,48 +1083,26 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index)
return 0;
}
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- /* skip over children that or down
- or don't have the fd open */
-
- while ((child_index < priv->child_count)
- && (!local->child_up[child_index]
- || !fd_ctx->opened_on[child_index]))
-
- child_index++;
- } else {
- /* skip over children that are down */
- while ((child_index < priv->child_count)
- && !local->child_up[child_index])
- child_index++;
}
- if ((child_index == priv->child_count) &&
- int_lock->lock_count == 0) {
+ if (int_lock->lk_expected_count == int_lock->lk_attempted_count) {
+ if ((is_entrylk && int_lock->entrylk_lock_count == 0) ||
+ (!is_entrylk && int_lock->lock_count == 0)) {
+ gf_log (this->name, GF_LOG_INFO,
+ "unable to lock on even one child");
- gf_log (this->name, GF_LOG_DEBUG,
- "unable to lock on even one child");
-
- local->op_ret = -1;
- int_lock->lock_op_ret = -1;
- local->op_errno = EAGAIN;
- int_lock->lock_op_errno = EAGAIN;
-
- afr_copy_locked_nodes (frame, this);
+ local->op_ret = -1;
+ int_lock->lock_op_ret = -1;
- afr_unlock(frame, this);
+ afr_copy_locked_nodes (frame, this);
- return 0;
+ afr_unlock(frame, this);
+ return 0;
+ }
}
- if ((child_index == priv->child_count)
- || (int_lock->lock_count ==
- afr_up_children_count (priv->child_count,
- local->child_up))) {
-
+ if (int_lock->lk_expected_count == int_lock->lk_attempted_count) {
/* we're done locking */
gf_log (this->name, GF_LOG_DEBUG,
@@ -993,12 +1115,18 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index)
return 0;
}
+ if (!_is_lock_wind_needed (local, child_index)) {
+ afr_lock_blocking (frame, this, cookie + 1);
+ return 0;
+ }
+
switch (local->transaction.type) {
case AFR_DATA_TRANSACTION:
case AFR_METADATA_TRANSACTION:
if (local->fd) {
- afr_trace_inodelk_in (frame, AFR_INODELK_TRANSACTION,
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_TRANSACTION,
AFR_LOCK_OP, &flock, F_SETLKW,
child_index);
@@ -1006,11 +1134,12 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index)
(void *) (long) child_index,
priv->children[child_index],
priv->children[child_index]->fops->finodelk,
- this->name, local->fd,
- F_SETLKW, &flock);
+ int_lock->domain, local->fd,
+ F_SETLKW, &flock, NULL);
} else {
- afr_trace_inodelk_in (frame, AFR_INODELK_TRANSACTION,
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_TRANSACTION,
AFR_LOCK_OP, &flock, F_SETLKW,
child_index);
@@ -1018,79 +1147,50 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index)
(void *) (long) child_index,
priv->children[child_index],
priv->children[child_index]->fops->inodelk,
- this->name, &local->loc,
- F_SETLKW, &flock);
+ int_lock->domain, &local->loc,
+ F_SETLKW, &flock, NULL);
}
break;
case AFR_ENTRY_RENAME_TRANSACTION:
- {
- lower = lower_path (&local->transaction.parent_loc,
- local->transaction.basename,
- &local->transaction.new_parent_loc,
- local->transaction.new_basename);
-
- lower_name = (lower == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
- higher = (lower == &local->transaction.parent_loc ?
- &local->transaction.new_parent_loc :
- &local->transaction.parent_loc);
-
- higher_name = (higher == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
- afr_trace_entrylk_in (frame, AFR_ENTRYLK_TRANSACTION,
- AFR_LOCK_OP, lower_name, child_index);
-
-
- STACK_WIND_COOKIE (frame, afr_lock_lower_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->entrylk,
- this->name, lower, lower_name,
- ENTRYLK_LOCK, ENTRYLK_WRLCK);
-
- break;
- }
-
case AFR_ENTRY_TRANSACTION:
+ /*Accounting for child_index increments on 'down'
+ *and 'fd-less' children */
+
if (local->fd) {
- afr_trace_entrylk_in (frame, AFR_ENTRYLK_TRANSACTION,
- AFR_LOCK_OP, local->transaction.basename,
- child_index);
+ AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_TRANSACTION,
+ AFR_LOCK_OP,
+ int_lock->lockee[lockee_no].basename,
+ cookie);
STACK_WIND_COOKIE (frame, afr_blocking_entrylk_cbk,
- (void *) (long) child_index,
+ (void *) (long) cookie,
priv->children[child_index],
priv->children[child_index]->fops->fentrylk,
- this->name, local->fd,
- local->transaction.basename,
- ENTRYLK_LOCK, ENTRYLK_WRLCK);
+ int_lock->domain, local->fd,
+ int_lock->lockee[lockee_no].basename,
+ ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
} else {
- afr_trace_entrylk_in (frame, AFR_ENTRYLK_TRANSACTION,
+ AFR_TRACE_ENTRYLK_IN (frame, this,
+ AFR_ENTRYLK_TRANSACTION,
AFR_LOCK_OP, local->transaction.basename,
child_index);
STACK_WIND_COOKIE (frame, afr_blocking_entrylk_cbk,
- (void *) (long) child_index,
+ (void *) (long) cookie,
priv->children[child_index],
priv->children[child_index]->fops->entrylk,
- this->name,
- &local->transaction.parent_loc,
- local->transaction.basename,
- ENTRYLK_LOCK, ENTRYLK_WRLCK);
+ int_lock->domain,
+ &int_lock->lockee[lockee_no].loc,
+ int_lock->lockee[lockee_no].basename,
+ ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
}
break;
}
return 0;
-
-
}
int32_t
@@ -1099,6 +1199,7 @@ afr_blocking_lock (call_frame_t *frame, xlator_t *this)
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
+ int up_count = 0;
priv = this->private;
local = frame->local;
@@ -1112,6 +1213,11 @@ afr_blocking_lock (call_frame_t *frame, xlator_t *this)
case AFR_ENTRY_RENAME_TRANSACTION:
case AFR_ENTRY_TRANSACTION:
+ up_count = afr_up_children_count (local->child_up,
+ priv->child_count);
+ int_lock->lk_call_count = int_lock->lk_expected_count
+ = (int_lock->lockee_count *
+ up_count);
initialize_entrylk_variables (frame, this);
break;
}
@@ -1123,60 +1229,68 @@ afr_blocking_lock (call_frame_t *frame, xlator_t *this)
static int32_t
afr_nonblocking_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
int call_count = 0;
int child_index = (long) cookie;
+ int copies = 0;
+ int index = 0;
+ int lockee_no = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ copies = priv->child_count;
+ index = child_index % copies;
+ lockee_no = child_index / copies;
local = frame->local;
int_lock = &local->internal_lock;
- priv = this->private;
- afr_trace_entrylk_out (frame, AFR_ENTRYLK_TRANSACTION,
- AFR_LOCK_OP, NULL, op_ret,
+ AFR_TRACE_ENTRYLK_OUT (frame, this, AFR_ENTRYLK_TRANSACTION,
+ AFR_LOCK_OP,
+ int_lock->lockee[lockee_no].basename, op_ret,
op_errno, (long) cookie);
- LOCK (&frame->lock);
- {
- call_count = --int_lock->lk_call_count;
- }
- UNLOCK (&frame->lock);
-
- if (op_ret < 0 ) {
- if (op_errno == ENOSYS) {
+ LOCK (&frame->lock);
+ {
+ if (op_ret < 0 ) {
+ if (op_errno == ENOSYS) {
/* return ENOTSUP */
- gf_log (this->name, GF_LOG_ERROR,
- "subvolume does not support locking. "
- "please load features/posix-locks xlator on server");
- local->op_ret = op_ret;
- int_lock->lock_op_ret = op_ret;
+ gf_log (this->name, GF_LOG_ERROR,
+ "subvolume does not support locking. "
+ "please load features/locks xlator on server");
+ local->op_ret = op_ret;
+ int_lock->lock_op_ret = op_ret;
+
+ int_lock->lock_op_errno = op_errno;
+ local->op_errno = op_errno;
+ }
+ } else if (op_ret == 0) {
+ int_lock->lockee[lockee_no].locked_nodes[index] |= \
+ LOCKED_YES;
+ int_lock->lockee[lockee_no].locked_count++;
+ int_lock->entrylk_lock_count++;
+ }
- local->child_up[child_index] = 0;
- int_lock->lock_op_errno = op_errno;
- local->op_errno = op_errno;
- }
- } else if (op_ret == 0) {
- int_lock->entry_locked_nodes[child_index]
- |= LOCKED_YES;
- int_lock->entrylk_lock_count++;
+ call_count = --int_lock->lk_call_count;
}
+ UNLOCK (&frame->lock);
if (call_count == 0) {
gf_log (this->name, GF_LOG_TRACE,
"Last locking reply received");
- /* all locks successfull. Proceed to call FOP */
+ /* all locks successful. Proceed to call FOP */
if (int_lock->entrylk_lock_count ==
- afr_up_children_count (priv->child_count, local->child_up)) {
+ int_lock->lk_expected_count) {
gf_log (this->name, GF_LOG_TRACE,
"All servers locked. Calling the cbk");
int_lock->lock_op_ret = 0;
int_lock->lock_cbk (frame, this);
}
- /* Not all locks were successfull. Unlock and try locking
+ /* Not all locks were successful. Unlock and try locking
again, this time with serially blocking locks */
else {
gf_log (this->name, GF_LOG_TRACE,
@@ -1193,33 +1307,27 @@ afr_nonblocking_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_fd_ctx_t *fd_ctx = NULL;
- const char *basename = NULL;
- loc_t *loc = NULL;
-
- int32_t call_count = 0;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int copies = 0;
+ int index = 0;
+ int lockee_no = 0;
+ int32_t call_count = 0;
int i = 0;
- uint64_t ctx;
- int ret = 0;
local = frame->local;
int_lock = &local->internal_lock;
priv = this->private;
+ copies = priv->child_count;
initialize_entrylk_variables (frame, this);
- basename = int_lock->lk_basename;
- if (int_lock->lk_loc)
- loc = int_lock->lk_loc;
-
if (local->fd) {
- ret = fd_ctx_get (local->fd, this, &ctx);
-
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
+ fd_ctx = afr_fd_ctx_get (local->fd, this);
+ if (!fd_ctx) {
+ gf_log (this->name, GF_LOG_INFO,
"unable to get fd ctx for fd=%p",
local->fd);
@@ -1228,16 +1336,16 @@ afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this)
local->op_errno = EINVAL;
int_lock->lock_op_errno = EINVAL;
+ afr_unlock (frame, this);
return -1;
}
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- call_count = internal_lock_count (frame, this, fd_ctx);
+ call_count = int_lock->lockee_count * internal_lock_count (frame, this);
int_lock->lk_call_count = call_count;
+ int_lock->lk_expected_count = call_count;
if (!call_count) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_INFO,
"fd not open on any subvolumes. aborting.");
afr_unlock (frame, this);
goto out;
@@ -1245,41 +1353,52 @@ afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this)
/* Send non-blocking entrylk calls only on up children
and where the fd has been opened */
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i] && fd_ctx->opened_on[i]) {
- afr_trace_entrylk_in (frame, AFR_ENTRYLK_NB_TRANSACTION,
- AFR_LOCK_OP, basename, i);
+ for (i = 0; i < int_lock->lockee_count*priv->child_count; i++) {
+ index = i%copies;
+ lockee_no = i/copies;
+ if (local->child_up[index]) {
+ AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_NB_TRANSACTION,
+ AFR_LOCK_OP,
+ int_lock->lockee[lockee_no].basename,
+ i);
STACK_WIND_COOKIE (frame, afr_nonblocking_entrylk_cbk,
(void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fentrylk,
+ priv->children[index],
+ priv->children[index]->fops->fentrylk,
this->name, local->fd,
- basename,
- ENTRYLK_LOCK_NB, ENTRYLK_WRLCK);
+ int_lock->lockee[lockee_no].basename,
+ ENTRYLK_LOCK_NB, ENTRYLK_WRLCK,
+ NULL);
+ if (!--call_count)
+ break;
}
}
} else {
- GF_ASSERT (loc);
-
- call_count = internal_lock_count (frame, this, NULL);
+ call_count = int_lock->lockee_count * internal_lock_count (frame, this);
int_lock->lk_call_count = call_count;
+ int_lock->lk_expected_count = call_count;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- afr_trace_entrylk_in (frame, AFR_ENTRYLK_NB_TRANSACTION,
- AFR_LOCK_OP, basename, i);
+ for (i = 0; i < int_lock->lockee_count*priv->child_count; i++) {
+ index = i%copies;
+ lockee_no = i/copies;
+ if (local->child_up[index]) {
+ AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_NB_TRANSACTION,
+ AFR_LOCK_OP,
+ int_lock->lockee[lockee_no].basename,
+ i);
STACK_WIND_COOKIE (frame, afr_nonblocking_entrylk_cbk,
(void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- this->name, loc, basename,
- ENTRYLK_LOCK, ENTRYLK_WRLCK);
+ priv->children[index],
+ priv->children[index]->fops->entrylk,
+ this->name, &int_lock->lockee[lockee_no].loc,
+ int_lock->lockee[lockee_no].basename,
+ ENTRYLK_LOCK_NB, ENTRYLK_WRLCK,
+ NULL);
if (!--call_count)
break;
-
}
}
}
@@ -1289,59 +1408,75 @@ out:
int32_t
afr_nonblocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
int call_count = 0;
int child_index = (long) cookie;
+ afr_fd_ctx_t *fd_ctx = NULL;
+
local = frame->local;
int_lock = &local->internal_lock;
- priv = this->private;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
- afr_trace_inodelk_out (frame, AFR_INODELK_NB_TRANSACTION,
+ AFR_TRACE_INODELK_OUT (frame, this, AFR_INODELK_NB_TRANSACTION,
AFR_LOCK_OP, NULL, op_ret,
op_errno, (long) cookie);
+ if (local->fd)
+ fd_ctx = afr_fd_ctx_get (local->fd, this);
+
LOCK (&frame->lock);
{
+ if (op_ret < 0) {
+ if (op_errno == ENOSYS) {
+ /* return ENOTSUP */
+ gf_log (this->name, GF_LOG_ERROR,
+ "subvolume does not support locking. "
+ "please load features/locks xlator on "
+ "server");
+ local->op_ret = op_ret;
+ int_lock->lock_op_ret = op_ret;
+ int_lock->lock_op_errno = op_errno;
+ local->op_errno = op_errno;
+ }
+ if (local->transaction.eager_lock)
+ local->transaction.eager_lock[child_index] = 0;
+ } else {
+ inodelk->locked_nodes[child_index] |= LOCKED_YES;
+ inodelk->lock_count++;
+
+ if (local->transaction.eager_lock &&
+ local->transaction.eager_lock[child_index] &&
+ local->fd) {
+ /* piggybacked */
+ if (op_ret == 1) {
+ /* piggybacked */
+ } else if (op_ret == 0) {
+ /* lock acquired from server */
+ fd_ctx->lock_acquired[child_index]++;
+ }
+ }
+ }
+
call_count = --int_lock->lk_call_count;
}
UNLOCK (&frame->lock);
- if (op_ret < 0 ) {
- if (op_errno == ENOSYS) {
- /* return ENOTSUP */
- gf_log (this->name, GF_LOG_ERROR,
- "subvolume does not support locking. "
- "please load features/posix-locks xlator on server");
- local->op_ret = op_ret;
- int_lock->lock_op_ret = op_ret;
- local->child_up[child_index] = 0;
- int_lock->lock_op_errno = op_errno;
- local->op_errno = op_errno;
- }
- } else if (op_ret == 0) {
- int_lock->inode_locked_nodes[child_index]
- |= LOCKED_YES;
- int_lock->inodelk_lock_count++;
- }
-
if (call_count == 0) {
gf_log (this->name, GF_LOG_TRACE,
"Last inode locking reply received");
- /* all locks successfull. Proceed to call FOP */
- if (int_lock->inodelk_lock_count ==
- afr_up_children_count (priv->child_count, local->child_up)) {
+ /* all locks successful. Proceed to call FOP */
+ if (inodelk->lock_count == int_lock->lk_expected_count) {
gf_log (this->name, GF_LOG_TRACE,
"All servers locked. Calling the cbk");
int_lock->lock_op_ret = 0;
int_lock->lock_cbk (frame, this);
}
- /* Not all locks were successfull. Unlock and try locking
+ /* Not all locks were successful. Unlock and try locking
again, this time with serially blocking locks */
else {
gf_log (this->name, GF_LOG_TRACE,
@@ -1359,31 +1494,36 @@ int
afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
afr_fd_ctx_t *fd_ctx = NULL;
-
- int32_t call_count = 0;
- uint64_t ctx = 0;
- int i = 0;
- int ret = 0;
- struct gf_flock flock;
+ int32_t call_count = 0;
+ int i = 0;
+ int ret = 0;
+ struct gf_flock flock = {0,};
+ struct gf_flock full_flock = {0,};
+ struct gf_flock *flock_use = NULL;
+ int piggyback = 0;
local = frame->local;
int_lock = &local->internal_lock;
priv = this->private;
- flock.l_start = int_lock->lk_flock.l_start;
- flock.l_len = int_lock->lk_flock.l_len;
- flock.l_type = int_lock->lk_flock.l_type;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+
+ flock.l_start = inodelk->flock.l_start;
+ flock.l_len = inodelk->flock.l_len;
+ flock.l_type = inodelk->flock.l_type;
+
+ full_flock.l_type = inodelk->flock.l_type;
initialize_inodelk_variables (frame, this);
if (local->fd) {
- ret = fd_ctx_get (local->fd, this, &ctx);
-
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
+ fd_ctx = afr_fd_ctx_get (local->fd, this);
+ if (!fd_ctx) {
+ gf_log (this->name, GF_LOG_INFO,
"unable to get fd ctx for fd=%p",
local->fd);
@@ -1392,17 +1532,17 @@ afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)
local->op_errno = EINVAL;
int_lock->lock_op_errno = EINVAL;
+ afr_unlock (frame, this);
ret = -1;
goto out;
}
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- call_count = internal_lock_count (frame, this, fd_ctx);
+ call_count = internal_lock_count (frame, this);
int_lock->lk_call_count = call_count;
+ int_lock->lk_expected_count = call_count;
if (!call_count) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_INFO,
"fd not open on any subvolumes. aborting.");
afr_unlock (frame, this);
goto out;
@@ -1411,261 +1551,77 @@ afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)
/* Send non-blocking inodelk calls only on up children
and where the fd has been opened */
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i] && fd_ctx->opened_on[i]) {
- afr_trace_inodelk_in (frame, AFR_INODELK_NB_TRANSACTION,
- AFR_LOCK_OP, &flock, F_SETLK, i);
-
- STACK_WIND_COOKIE (frame, afr_nonblocking_inodelk_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->finodelk,
- this->name, local->fd,
- F_SETLK, &flock);
-
- if (!--call_count)
- break;
+ if (!local->child_up[i])
+ continue;
+ flock_use = &flock;
+ if (!local->transaction.eager_lock_on) {
+ goto wind;
}
- }
- } else {
- call_count = internal_lock_count (frame, this, NULL);
- int_lock->lk_call_count = call_count;
+ piggyback = 0;
+ local->transaction.eager_lock[i] = 1;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- afr_trace_inodelk_in (frame, AFR_INODELK_NB_TRANSACTION,
- AFR_LOCK_OP, &flock, F_SETLK, i);
+ afr_set_delayed_post_op (frame, this);
- STACK_WIND_COOKIE (frame, afr_nonblocking_inodelk_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->inodelk,
- this->name, &local->loc,
- F_SETLK, &flock);
+ LOCK (&local->fd->lock);
+ {
+ if (fd_ctx->lock_acquired[i]) {
+ fd_ctx->lock_piggyback[i]++;
+ piggyback = 1;
+ }
+ }
+ UNLOCK (&local->fd->lock);
+ if (piggyback) {
+ /* (op_ret == 1) => indicate piggybacked lock */
+ afr_nonblocking_inodelk_cbk (frame, (void *) (long) i,
+ this, 1, 0, NULL);
if (!--call_count)
break;
-
+ continue;
}
- }
- }
-
-out:
- return ret;
-}
-
-static int
-__is_lower_locked (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- int count = 0;
- int i = 0;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- if (int_lock->lower_locked_nodes[i] & LOCKED_LOWER)
- count++;
- }
-
- return count;
-
-}
-
-static int
-__is_higher_locked (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- int count = 0;
- int i = 0;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- if (int_lock->locked_nodes[i] & LOCKED_YES)
- count++;
- }
-
- return count;
-
-}
-
-static int
-afr_unlock_lower_entrylk (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- const char *basename = NULL;
- loc_t *loc = NULL;
-
- int call_count = 0;
- int i = -1;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
-
- basename = int_lock->lk_basename;
- if (int_lock->lk_loc)
- loc = int_lock->lk_loc;
+ flock_use = &full_flock;
+ wind:
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_NB_TRANSACTION,
+ AFR_LOCK_OP, flock_use, F_SETLK, i);
- call_count = __is_lower_locked (frame, this);
- int_lock->lk_call_count = call_count;
+ STACK_WIND_COOKIE (frame, afr_nonblocking_inodelk_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->finodelk,
+ int_lock->domain, local->fd,
+ F_SETLK, flock_use, NULL);
- if (!call_count){
- gf_log (this->name, GF_LOG_TRACE,
- "No internal locks unlocked");
- int_lock->lock_cbk (frame, this);
- goto out;
- }
+ if (!--call_count)
+ break;
+ }
+ } else {
+ call_count = internal_lock_count (frame, this);
+ int_lock->lk_call_count = call_count;
+ int_lock->lk_expected_count = call_count;
- for (i = 0; i < priv->child_count; i++) {
- if (int_lock->lower_locked_nodes[i] & LOCKED_LOWER) {
- afr_trace_entrylk_in (frame, AFR_ENTRYLK_NB_TRANSACTION,
- AFR_UNLOCK_OP, basename, i);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->child_up[i])
+ continue;
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_NB_TRANSACTION,
+ AFR_LOCK_OP, &flock, F_SETLK, i);
- STACK_WIND_COOKIE (frame, afr_unlock_entrylk_cbk,
+ STACK_WIND_COOKIE (frame, afr_nonblocking_inodelk_cbk,
(void *) (long) i,
priv->children[i],
- priv->children[i]->fops->entrylk,
- this->name,
- loc, basename,
- ENTRYLK_UNLOCK, ENTRYLK_WRLCK);
+ priv->children[i]->fops->inodelk,
+ int_lock->domain, &local->loc,
+ F_SETLK, &flock, NULL);
if (!--call_count)
break;
-
}
}
-
out:
- return 0;
-
-}
-
-
-static int
-afr_post_unlock_higher_cbk (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- local->transaction.done (frame, this);
- return 0;
-}
-
-static int
-afr_post_unlock_lower_cbk (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- loc_t *lower = NULL;
- loc_t *higher = NULL;
- const char *lower_name = NULL;
- const char *higher_name = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- lower = lower_path (&local->transaction.parent_loc,
- local->transaction.basename,
- &local->transaction.new_parent_loc,
- local->transaction.new_basename);
-
- lower_name = (lower == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
- higher = (lower == &local->transaction.parent_loc ?
- &local->transaction.new_parent_loc :
- &local->transaction.parent_loc);
-
- higher_name = (higher == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
- if (__is_higher_locked (frame, this)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "unlocking higher");
- int_lock->lk_basename = higher_name;
- int_lock->lk_loc = higher;
- int_lock->lock_cbk = afr_post_unlock_higher_cbk;
-
- afr_unlock_entrylk (frame, this);
- } else
- local->transaction.done (frame, this);
-
- return 0;
-}
-
-static int
-afr_rename_unlock (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- loc_t *lower = NULL;
- loc_t *higher = NULL;
- const char *lower_name = NULL;
- const char *higher_name = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- lower = lower_path (&local->transaction.parent_loc,
- local->transaction.basename,
- &local->transaction.new_parent_loc,
- local->transaction.new_basename);
-
- lower_name = (lower == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
- higher = (lower == &local->transaction.parent_loc ?
- &local->transaction.new_parent_loc :
- &local->transaction.parent_loc);
-
- higher_name = (higher == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
-
- if (__is_lower_locked (frame, this)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "unlocking lower");
- int_lock->lk_basename = lower_name;
- int_lock->lk_loc = lower;
- int_lock->lock_cbk = afr_post_unlock_lower_cbk;
-
- afr_unlock_lower_entrylk (frame, this);
- } else
- afr_post_unlock_lower_cbk (frame, this);
-
- return 0;
-}
-
-static int
-afr_rename_transaction (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- return (local->transaction.type ==
- AFR_ENTRY_RENAME_TRANSACTION);
-
+ return ret;
}
int32_t
@@ -1679,10 +1635,8 @@ afr_unlock (call_frame_t *frame, xlator_t *this)
if (is_afr_lock_transaction (local))
afr_unlock_inodelk (frame, this);
else
- if (!afr_rename_transaction (frame, this))
- afr_unlock_entrylk (frame, this);
- else
- afr_rename_unlock (frame, this);
+ afr_unlock_entrylk (frame, this);
+
} else {
if (is_afr_lock_selfheal (local))
afr_unlock_inodelk (frame, this);
@@ -1709,6 +1663,11 @@ afr_mark_locked_nodes (xlator_t *this, fd_t *fd,
goto out;
ret = fd_ctx_get (fd, this, &tmp);
+ if (ret) {
+ gf_log (this->name, GF_LOG_INFO,
+ "failed to get the fd ctx");
+ goto out;
+ }
fdctx = (afr_fd_ctx_t *) (long) tmp;
GF_ASSERT (fdctx->locked_on);
@@ -1751,8 +1710,6 @@ __afr_save_locked_fd (xlator_t *this, fd_t *fd)
locked_fd = GF_CALLOC (1, sizeof (*locked_fd),
gf_afr_mt_locked_fd);
if (!locked_fd) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
ret = -1;
goto out;
}
@@ -1784,7 +1741,7 @@ afr_save_locked_fd (xlator_t *this, fd_t *fd)
ret = __afr_save_locked_fd (this, fd);
if (ret) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_INFO,
"fd=%p could not be saved", fd);
goto unlock;
}
@@ -1835,12 +1792,10 @@ afr_get_source_lock_recovery (xlator_t *this, fd_t *fd)
for (i = 0; i < priv->child_count; i++) {
if (fdctx->locked_on[i]) {
gf_log (this->name, GF_LOG_DEBUG,
- "Found lock recovery source=%d",
- i);
+ "Found lock recovery source=%d", i);
source_child = i;
break;
}
-
}
out:
@@ -1850,10 +1805,12 @@ out:
int32_t
afr_get_locks_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock);
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata);
int32_t
afr_recover_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock)
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
@@ -1864,7 +1821,7 @@ afr_recover_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
priv = this->private;
if (op_ret) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_INFO,
"lock recovery failed");
goto cleanup;
}
@@ -1877,7 +1834,7 @@ afr_recover_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
(void *) (long) source_child,
priv->children[source_child],
priv->children[source_child]->fops->lk,
- local->fd, F_GETLK_FD, &flock);
+ local->fd, F_GETLK_FD, &flock, NULL);
return 0;
@@ -1905,7 +1862,7 @@ afr_recover_lock (call_frame_t *frame, xlator_t *this,
(void *) (long) lock_recovery_child,
priv->children[lock_recovery_child],
priv->children[lock_recovery_child]->fops->lk,
- local->fd, F_SETLK, flock);
+ local->fd, F_SETLK, flock, NULL);
return 0;
}
@@ -1923,10 +1880,11 @@ is_afr_lock_eol (struct gf_flock *lock)
int32_t
afr_get_locks_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock)
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
{
if (op_ret) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_INFO,
"Failed to get locks on fd");
goto cleanup;
}
@@ -1935,7 +1893,7 @@ afr_get_locks_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
"Got a lock on fd");
if (is_afr_lock_eol (lock)) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_INFO,
"Reached EOL on locks on fd");
goto cleanup;
}
@@ -1983,7 +1941,7 @@ afr_lock_recovery (call_frame_t *frame, xlator_t *this)
(void *) (long) source_child,
priv->children[source_child],
priv->children[source_child]->fops->lk,
- local->fd, F_GETLK_FD, &flock);
+ local->fd, F_GETLK_FD, &flock, NULL);
out:
return ret;
@@ -2003,7 +1961,7 @@ afr_mark_fd_opened (xlator_t *this, fd_t *fd, int32_t child_index)
fdctx = (afr_fd_ctx_t *) (long) tmp;
- fdctx->opened_on[child_index] = 1;
+ fdctx->opened_on[child_index] = AFR_FD_OPENED;
out:
return ret;
@@ -2011,13 +1969,14 @@ out:
int32_t
afr_lock_recovery_preopen_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ dict_t *xdata)
{
int32_t child_index = (long )cookie;
int ret = 0;
if (op_ret) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_INFO,
"Reopen during lock-recovery failed");
goto cleanup;
}
@@ -2027,14 +1986,14 @@ afr_lock_recovery_preopen_cbk (call_frame_t *frame, void *cookie, xlator_t *this
ret = afr_lock_recovery (frame, this);
if (ret) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_INFO,
"Lock recovery failed");
goto cleanup;
}
ret = afr_mark_fd_opened (this, fd, child_index);
if (ret) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_INFO,
"Marking fd open failed");
goto cleanup;
}
@@ -2063,7 +2022,12 @@ afr_lock_recovery_preopen (call_frame_t *frame, xlator_t *this)
GF_ASSERT (local && local->fd);
ret = fd_ctx_get (local->fd, this, &tmp);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to get the context of fd",
+ uuid_utoa (local->fd->inode->gfid));
fdctx = (afr_fd_ctx_t *) (long) tmp;
+ /* TODO: instead we should return from the function */
GF_ASSERT (fdctx);
child_index = local->lock_recovery_child;
@@ -2078,8 +2042,7 @@ afr_lock_recovery_preopen (call_frame_t *frame, xlator_t *this)
(void *)(long) child_index,
priv->children[child_index],
priv->children[child_index]->fops->open,
- &loc, fdctx->flags, local->fd,
- fdctx->wbflags);
+ &loc, fdctx->flags, local->fd, NULL);
return 0;
}
@@ -2097,7 +2060,7 @@ is_fd_opened (fd_t *fd, int32_t child_index)
fdctx = (afr_fd_ctx_t *) (long) tmp;
- if (fdctx->opened_on[child_index])
+ if (fdctx->opened_on[child_index] == AFR_FD_OPENED)
ret = 1;
out:
@@ -2107,13 +2070,14 @@ out:
int
afr_attempt_lock_recovery (xlator_t *this, int32_t child_index)
{
- call_frame_t *frame = NULL;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_locked_fd_t *locked_fd = NULL;
+ call_frame_t *frame = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_locked_fd_t *locked_fd = NULL;
afr_locked_fd_t *tmp = NULL;
- int ret = 0;
- struct list_head locks_list;
+ int ret = -1;
+ struct list_head locks_list = {0,};
+ int32_t op_errno = 0;
priv = this->private;
@@ -2123,25 +2087,14 @@ afr_attempt_lock_recovery (xlator_t *this, int32_t child_index)
frame = create_frame (this, this->ctx->pool);
if (!frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- ret = -1;
- goto out;
- }
-
- local = GF_CALLOC (1, sizeof (*local),
- gf_afr_mt_afr_local_t);
- if (!local) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Out of memory");
ret = -1;
goto out;
}
- AFR_LOCAL_INIT (local, priv);
- if (!local) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0) {
ret = -1;
goto out;
}
@@ -2179,5 +2132,43 @@ afr_attempt_lock_recovery (xlator_t *this, int32_t child_index)
}
out:
+ if ((ret < 0) && frame)
+ AFR_STACK_DESTROY (frame);
+ return ret;
+}
+
+int
+afr_lk_transfer_datalock (call_frame_t *dst, call_frame_t *src, char *dom,
+ unsigned int child_count)
+{
+ afr_local_t *dst_local = NULL;
+ afr_local_t *src_local = NULL;
+ afr_internal_lock_t *dst_lock = NULL;
+ afr_internal_lock_t *src_lock = NULL;
+ afr_inodelk_t *dst_inodelk = NULL;
+ afr_inodelk_t *src_inodelk = NULL;
+ int ret = -1;
+
+ src_local = src->local;
+ src_lock = &src_local->internal_lock;
+ src_inodelk = afr_get_inodelk (src_lock, dom);
+ dst_local = dst->local;
+ dst_lock = &dst_local->internal_lock;
+ dst_inodelk = afr_get_inodelk (dst_lock, dom);
+ if (!dst_inodelk || !src_inodelk)
+ goto out;
+ if (src_inodelk->locked_nodes) {
+ memcpy (dst_inodelk->locked_nodes, src_inodelk->locked_nodes,
+ sizeof (*dst_inodelk->locked_nodes) * child_count);
+ memset (src_inodelk->locked_nodes, 0,
+ sizeof (*src_inodelk->locked_nodes) * child_count);
+ }
+
+ dst_lock->transaction_lk_type = src_lock->transaction_lk_type;
+ dst_lock->selfheal_lk_type = src_lock->selfheal_lk_type;
+ dst_inodelk->lock_count = src_inodelk->lock_count;
+ src_inodelk->lock_count = 0;
+ ret = 0;
+out:
return ret;
}
diff --git a/xlators/cluster/afr/src/afr-mem-types.h b/xlators/cluster/afr/src/afr-mem-types.h
index 14064ebcd..73594f265 100644
--- a/xlators/cluster/afr/src/afr-mem-types.h
+++ b/xlators/cluster/afr/src/afr-mem-types.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -26,7 +17,6 @@
enum gf_afr_mem_types_ {
gf_afr_mt_iovec = gf_common_mt_end + 1,
gf_afr_mt_afr_fd_ctx_t,
- gf_afr_mt_afr_local_t,
gf_afr_mt_afr_private_t,
gf_afr_mt_int32_t,
gf_afr_mt_char,
@@ -42,6 +32,19 @@ enum gf_afr_mem_types_ {
gf_afr_mt_entry_name,
gf_afr_mt_pump_priv,
gf_afr_mt_locked_fd,
+ gf_afr_mt_inode_ctx_t,
+ gf_afr_fd_paused_call_t,
+ gf_afr_mt_crawl_data_t,
+ gf_afr_mt_brick_pos_t,
+ gf_afr_mt_shd_bool_t,
+ gf_afr_mt_shd_timer_t,
+ gf_afr_mt_shd_event_t,
+ gf_afr_mt_time_t,
+ gf_afr_mt_pos_data_t,
+ gf_afr_mt_reply_t,
+ gf_afr_mt_stats_t,
+ gf_afr_mt_shd_crawl_event_t,
+ gf_afr_mt_uint64_t,
gf_afr_mt_end
};
#endif
diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c
index 5b96c4e34..643a5d692 100644
--- a/xlators/cluster/afr/src/afr-open.c
+++ b/xlators/cluster/afr/src/afr-open.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include <libgen.h>
@@ -55,16 +46,84 @@
#include "afr-self-heal.h"
#include "afr-self-heal-common.h"
+int
+afr_stale_child_up (afr_local_t *local, xlator_t *this)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ int up = -1;
+
+ priv = this->private;
+
+ if (!local->fresh_children)
+ local->fresh_children = afr_children_create (priv->child_count);
+ if (!local->fresh_children)
+ goto out;
+
+ afr_inode_get_read_ctx (this, local->fd->inode, local->fresh_children);
+ if (priv->child_count == afr_get_children_count (local->fresh_children,
+ priv->child_count))
+ goto out;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->child_up[i])
+ continue;
+ if (afr_is_child_present (local->fresh_children,
+ priv->child_count, i))
+ continue;
+ up = i;
+ break;
+ }
+out:
+ return up;
+}
+
+void
+afr_perform_data_self_heal (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ inode_t *inode = NULL;
+ int st_child = -1;
+ char reason[64] = {0};
+
+ local = frame->local;
+ sh = &local->self_heal;
+ inode = local->fd->inode;
+
+ if (!IA_ISREG (inode->ia_type))
+ goto out;
+
+ st_child = afr_stale_child_up (local, this);
+ if (st_child < 0)
+ goto out;
+
+ sh->do_data_self_heal = _gf_true;
+ sh->do_metadata_self_heal = _gf_true;
+ sh->do_gfid_self_heal = _gf_true;
+ sh->do_missing_entry_self_heal = _gf_true;
+
+ snprintf (reason, sizeof (reason), "stale subvolume %d detected",
+ st_child);
+ afr_launch_self_heal (frame, this, inode, _gf_true, inode->ia_type,
+ reason, NULL, NULL);
+out:
+ return;
+}
int
afr_open_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
afr_local_t * local = frame->local;
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (afr_open_only_data_self_heal (priv->data_self_heal))
+ afr_perform_data_self_heal (frame, this);
AFR_STACK_UNWIND (open, frame, local->op_ret, local->op_errno,
- local->fd);
+ local->fd, xdata);
return 0;
}
@@ -72,19 +131,15 @@ afr_open_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
afr_open_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
- afr_local_t * local = NULL;
-
- int child_index = (long) cookie;
-
- uint64_t ctx;
- afr_fd_ctx_t *fd_ctx;
-
- int ret = 0;
-
- int call_count = -1;
+ afr_local_t * local = NULL;
+ int ret = 0;
+ int call_count = -1;
+ int child_index = (long) cookie;
+ afr_private_t *priv = NULL;
+ priv = this->private;
local = frame->local;
LOCK (&frame->lock);
@@ -97,33 +152,13 @@ afr_open_cbk (call_frame_t *frame, void *cookie,
local->op_ret = op_ret;
local->success_count++;
- ret = afr_fd_ctx_set (this, fd);
-
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "could not set fd ctx for fd=%p",
- fd);
-
- local->op_ret = -1;
- local->op_errno = -ret;
- goto unlock;
- }
-
- ret = fd_ctx_get (fd, this, &ctx);
-
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "could not get fd ctx for fd=%p", fd);
- local->op_ret = -1;
+ ret = afr_child_fd_ctx_set (this, fd, child_index,
+ local->cont.open.flags);
+ if (ret) {
+ local->op_ret = -1;
local->op_errno = -ret;
goto unlock;
}
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- fd_ctx->opened_on[child_index] = 1;
- fd_ctx->flags = local->cont.open.flags;
- fd_ctx->wbflags = local->cont.open.wbflags;
}
}
unlock:
@@ -136,31 +171,30 @@ unlock:
&& (local->op_ret >= 0)) {
STACK_WIND (frame, afr_open_ftruncate_cbk,
this, this->fops->ftruncate,
- fd, 0);
+ fd, 0, NULL);
} else {
+ if (afr_open_only_data_self_heal (priv->data_self_heal))
+ afr_perform_data_self_heal (frame, this);
AFR_STACK_UNWIND (open, frame, local->op_ret,
- local->op_errno, local->fd);
+ local->op_errno, local->fd, xdata);
}
}
return 0;
}
-
int
afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, int32_t wbflags)
+ fd_t *fd, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
-
- int i = 0;
- int ret = -1;
-
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- int32_t wind_flags = flags & (~O_TRUNC);
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ int i = 0;
+ int ret = -1;
+ int32_t call_count = 0;
+ int32_t op_errno = 0;
+ int32_t wind_flags = flags & (~O_TRUNC);
+ //We can't let truncation to happen outside transaction.
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -169,27 +203,29 @@ afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
priv = this->private;
+ if (flags & (O_CREAT|O_TRUNC)) {
+ QUORUM_CHECK(open,out);
+ }
+
if (afr_is_split_brain (this, loc->inode)) {
/* self-heal failed */
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to open as split brain seen, returning EIO");
op_errno = EIO;
goto out;
}
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
- frame->local = local;
call_count = local->call_count;
-
loc_copy (&local->loc, loc);
local->cont.open.flags = flags;
- local->cont.open.wbflags = wbflags;
local->fd = fd_ref (fd);
@@ -198,456 +234,149 @@ afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
STACK_WIND_COOKIE (frame, afr_open_cbk, (void *) (long) i,
priv->children[i],
priv->children[i]->fops->open,
- loc, wind_flags, fd, wbflags);
+ loc, wind_flags, fd, xdata);
if (!--call_count)
break;
}
}
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (open, frame, op_ret, op_errno, fd);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND (open, frame, -1, op_errno, fd, xdata);
return 0;
}
-
int
-afr_openfd_sh_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+afr_openfd_fix_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ dict_t *xdata)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int ret = 0;
-
- uint64_t ctx;
- afr_fd_ctx_t *fd_ctx;
-
- int call_count = 0;
- int child_index = (long) cookie;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int call_count = 0;
+ int child_index = (long) cookie;
priv = this->private;
local = frame->local;
- int_lock = &local->internal_lock;
-
- LOCK (&frame->lock);
- {
- if (op_ret >= 0) {
- ret = fd_ctx_get (fd, this, &ctx);
-
- if (ret < 0) {
- goto out;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- fd_ctx->opened_on[child_index] = 1;
- gf_log (this->name, GF_LOG_TRACE,
- "fd for %s opened successfully on subvolume %s",
- local->loc.path, priv->children[child_index]->name);
- }
+ if (op_ret >= 0) {
+ gf_log (this->name, GF_LOG_DEBUG, "fd for %s opened "
+ "successfully on subvolume %s", local->loc.path,
+ priv->children[child_index]->name);
+ } else {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to open %s "
+ "on subvolume %s", local->loc.path,
+ priv->children[child_index]->name);
}
-out:
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
- if (call_count == 0) {
- int_lock->lock_cbk = local->transaction.done;
- local->transaction.resume (frame, this);
- }
-
- return 0;
-}
-
-
-static int
-__unopened_count (int child_count, unsigned int *opened_on, unsigned char *child_up)
-{
- int i;
- int count = 0;
-
- for (i = 0; i < child_count; i++) {
- if (!opened_on[i] && child_up[i])
- count++;
- }
-
- return count;
-}
-
-
-int
-afr_openfd_sh_unwind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- uint64_t ctx;
- afr_fd_ctx_t *fd_ctx;
-
- int abandon = 0;
- int ret = 0;
- int i;
- int call_count = 0;
-
- priv = this->private;
- local = frame->local;
-
- /*
- * Some subvolumes might have come up on which we never
- * opened this fd in the first place. Re-open fd's on those
- * subvolumes now.
- */
-
- ret = fd_ctx_get (local->fd, this, &ctx);
-
- if (ret < 0) {
- abandon = 1;
+ fd_ctx = afr_fd_ctx_get (local->fd, this);
+ if (!fd_ctx) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to get fd context, %p", local->fd);
goto out;
}
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
LOCK (&local->fd->lock);
{
- call_count = __unopened_count (priv->child_count,
- fd_ctx->opened_on,
- local->child_up);
- for (i = 0; i < priv->child_count; i++) {
- fd_ctx->pre_op_done[i] = 0;
- fd_ctx->pre_op_piggyback[i] = 0;
+ if (op_ret >= 0) {
+ fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
+ } else {
+ fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED;
}
}
UNLOCK (&local->fd->lock);
-
- if (call_count == 0) {
- abandon = 1;
- goto out;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (!fd_ctx->opened_on[i] && local->child_up[i]) {
- gf_log (this->name, GF_LOG_TRACE,
- "opening fd for %s on subvolume %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_openfd_sh_open_cbk,
- (void *)(long) i,
- priv->children[i],
- priv->children[i]->fops->open,
- &local->loc, fd_ctx->flags, local->fd,
- fd_ctx->wbflags);
-
- if (!--call_count)
- break;
- }
- }
-
out:
- if (abandon)
- local->transaction.resume (frame, this);
-
- return 0;
-}
-
-
-static int
-afr_prepare_loc (call_frame_t *frame, fd_t *fd)
-{
- afr_local_t *local = NULL;
- char *name = NULL;
- char *path = NULL;
- int ret = 0;
-
- if ((!fd) || (!fd->inode))
- return -1;
-
- local = frame->local;
- ret = inode_path (fd->inode, NULL, (char **)&path);
- if (ret <= 0) {
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "Unable to get path for gfid: %s",
- uuid_utoa (fd->inode->gfid));
- return -1;
- }
-
- if (local->loc.path) {
- if (strcmp (path, local->loc.path))
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "overwriting old loc->path %s with %s",
- local->loc.path, path);
- GF_FREE ((char *)local->loc.path);
- }
- local->loc.path = path;
-
- name = strrchr (local->loc.path, '/');
- if (name)
- name++;
- local->loc.name = name;
-
- if (local->loc.inode) {
- inode_unref (local->loc.inode);
- }
- local->loc.inode = inode_ref (fd->inode);
-
- if (local->loc.parent) {
- inode_unref (local->loc.parent);
- }
-
- local->loc.parent = inode_parent (local->loc.inode, 0, NULL);
-
- return 0;
-}
-
-
-int
-afr_openfd_sh (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- char sh_type_str[256] = {0,};
-
- local = frame->local;
- sh = &local->self_heal;
-
- GF_ASSERT (local->loc.path);
- /* forcibly trigger missing-entries self-heal */
-
- local->success_count = 1;
- local->enoent_count = 1;
-
- sh->data_lock_held = _gf_true;
- sh->need_data_self_heal = _gf_true;
- sh->type = local->fd->inode->ia_type;
- sh->background = _gf_false;
- sh->unwind = afr_openfd_sh_unwind;
-
- afr_self_heal_type_str_get(&local->self_heal,
- sh_type_str,
- sizeof(sh_type_str));
- gf_log (this->name, GF_LOG_NORMAL, "%s self-heal triggered. "
- "path: %s, reason: Replicate up down flush, data lock is held",
- sh_type_str, local->loc.path);
-
- afr_self_heal (frame, this);
+ call_count = afr_frame_return (frame);
+ if (call_count == 0)
+ AFR_STACK_DESTROY (frame);
return 0;
}
-
-int
-afr_openfd_flush_done (call_frame_t *frame, xlator_t *this)
+void
+afr_fix_open (xlator_t *this, fd_t *fd, size_t need_open_count, int *need_open)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- uint64_t ctx;
- afr_fd_ctx_t * fd_ctx = NULL;
-
- int _ret = -1;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int32_t op_errno = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
priv = this->private;
- local = frame->local;
-
- LOCK (&local->fd->lock);
- {
- _ret = __fd_ctx_get (local->fd, this, &ctx);
-
- if (_ret < 0) {
- goto out;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- fd_ctx->down_count = priv->down_count;
- fd_ctx->up_count = priv->up_count;
- }
-out:
- UNLOCK (&local->fd->lock);
-
- afr_local_transaction_cleanup (local, this);
-
- gf_log (this->name, GF_LOG_TRACE,
- "The up/down flush is over");
-
- fd_unref (local->fd);
- local->openfd_flush_cbk (frame, this);
-
- return 0;
-}
-
-
-
-int
-afr_openfd_xaction (call_frame_t *frame, xlator_t *this, fd_t *fd)
-{
- afr_local_t * local = NULL;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- local = frame->local;
-
- local->op = GF_FOP_FLUSH;
-
- local->transaction.fop = afr_openfd_sh;
- local->transaction.done = afr_openfd_flush_done;
-
- local->transaction.start = 0;
- local->transaction.len = 0;
-
- gf_log (this->name, GF_LOG_TRACE,
- "doing up/down flush on fd=%p",
- fd);
-
- afr_transaction (frame, this, AFR_DATA_TRANSACTION);
-
-out:
- return 0;
-}
-
-
-int
-afr_openfd_xaction_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int ret = 0;
-
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
-
- int call_count = 0;
- int child_index = (long) cookie;
-
- priv = this->private;
- local = frame->local;
- int_lock = &local->internal_lock;
-
- LOCK (&frame->lock);
- {
- if (op_ret >= 0) {
- ret = fd_ctx_get (fd, this, &ctx);
-
- if (ret < 0) {
- goto out;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- fd_ctx->opened_on[child_index] = 1;
+ if (!afr_is_fd_fixable (fd) || !need_open || !need_open_count)
+ goto out;
- gf_log (this->name, GF_LOG_TRACE,
- "fd for %s opened successfully on subvolume %s",
- local->loc.path, priv->children[child_index]->name);
- }
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx) {
+ ret = -1;
+ goto out;
}
-out:
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
- if (call_count == 0) {
- afr_openfd_xaction (frame, this, local->fd);
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
}
- return 0;
-}
-
-
-int
-afr_openfd_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- uint64_t ctx;
- afr_fd_ctx_t *fd_ctx;
-
- int no_open = 0;
- int ret = 0;
- int i;
- int call_count = 0;
-
- priv = this->private;
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
local = frame->local;
-
- /*
- * If the file is already deleted while the fd is open, no need to
- * perform the openfd flush, call the flush_cbk and get out.
- */
- ret = afr_prepare_loc (frame, fd);
- if (ret < 0) {
- local->openfd_flush_cbk (frame, this);
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
-
- /*
- * Some subvolumes might have come up on which we never
- * opened this fd in the first place. Re-open fd's on those
- * subvolumes now.
- */
-
- local->fd = fd_ref (fd);
-
- ret = fd_ctx_get (fd, this, &ctx);
- if (ret < 0) {
- no_open = 1;
+ local->loc.inode = inode_ref (fd->inode);
+ ret = loc_path (&local->loc, NULL);
+ if (ret < 0)
goto out;
- }
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ local->fd = fd_ref (fd);
+ local->call_count = need_open_count;
- LOCK (&local->fd->lock);
- {
- call_count = __unopened_count (priv->child_count,
- fd_ctx->opened_on,
- local->child_up);
- }
- UNLOCK (&local->fd->lock);
+ gf_log (this->name, GF_LOG_DEBUG, "need open count: %zd",
+ need_open_count);
- if (call_count == 0) {
- no_open = 1;
- goto out;
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (!need_open[i])
+ continue;
- local->call_count = call_count;
+ if (IA_IFDIR == fd->inode->ia_type) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "opening fd for dir %s on subvolume %s",
+ local->loc.path, priv->children[i]->name);
- for (i = 0; i < priv->child_count; i++) {
- if (!fd_ctx->opened_on[i] && local->child_up[i]) {
- gf_log (this->name, GF_LOG_TRACE,
- "opening fd for %s on subvolume %s",
+ STACK_WIND_COOKIE (frame, afr_openfd_fix_open_cbk,
+ (void*) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->opendir,
+ &local->loc, local->fd,
+ NULL);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "opening fd for file %s on subvolume %s",
local->loc.path, priv->children[i]->name);
- STACK_WIND_COOKIE (frame, afr_openfd_xaction_open_cbk,
+ STACK_WIND_COOKIE (frame, afr_openfd_fix_open_cbk,
(void *)(long) i,
priv->children[i],
priv->children[i]->fops->open,
- &local->loc, fd_ctx->flags, fd,
- fd_ctx->wbflags);
-
- if (!--call_count)
- break;
+ &local->loc,
+ fd_ctx->flags & (~O_TRUNC),
+ local->fd, NULL);
}
- }
+ }
+ op_errno = 0;
+ ret = 0;
out:
- if (no_open)
- afr_openfd_xaction (frame, this, fd);
-
- return 0;
+ if (op_errno)
+ ret = -1; //For handling ALLOC_OR_GOTO
+ if (ret && frame)
+ AFR_STACK_DESTROY (frame);
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-algorithm.c b/xlators/cluster/afr/src/afr-self-heal-algorithm.c
index 4bb70915e..83846f152 100644
--- a/xlators/cluster/afr/src/afr-self-heal-algorithm.c
+++ b/xlators/cluster/afr/src/afr-self-heal-algorithm.c
@@ -1,23 +1,15 @@
/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
+#include <openssl/md5.h>
#include "glusterfs.h"
#include "afr.h"
#include "xlator.h"
@@ -33,7 +25,6 @@
#include "compat-errno.h"
#include "compat.h"
#include "byte-order.h"
-#include "md5.h"
#include "afr-transaction.h"
#include "afr-self-heal.h"
@@ -44,306 +35,289 @@
This file contains the various self-heal algorithms
*/
+static int
+sh_loop_driver (call_frame_t *sh_frame, xlator_t *this,
+ gf_boolean_t is_first_call, call_frame_t *old_loop_frame);
+static int
+sh_loop_return (call_frame_t *sh_frame, xlator_t *this, call_frame_t *loop_frame,
+ int32_t op_ret, int32_t op_errno);
+static int
+sh_destroy_frame (call_frame_t *frame, xlator_t *this)
+{
+ if (!frame)
+ goto out;
-/*
- The "full" algorithm. Copies the entire file from
- source to sinks.
-*/
-
+ AFR_STACK_DESTROY (frame);
+out:
+ return 0;
+}
static void
-sh_full_private_cleanup (call_frame_t *frame, xlator_t *this)
+sh_private_cleanup (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
- afr_sh_algo_full_private_t *sh_priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_sh_algo_private_t *sh_priv = NULL;
local = frame->local;
sh = &local->self_heal;
sh_priv = sh->private;
-
- if (sh_priv)
- GF_FREE (sh_priv);
+ GF_FREE (sh_priv);
}
-
-static int
-sh_full_loop_driver (call_frame_t *frame, xlator_t *this, gf_boolean_t is_first_call);
-
static int
-sh_full_loop_driver_done (call_frame_t *frame, xlator_t *this)
+sh_number_of_writes_needed (unsigned char *write_needed, int child_count)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_sh_algo_full_private_t *sh_priv = NULL;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
- sh_priv = sh->private;
-
- sh_full_private_cleanup (frame, this);
- if (sh->op_failed) {
- gf_log (this->name, GF_LOG_TRACE,
- "full self-heal aborting on %s",
- local->loc.path);
-
- local->self_heal.algo_abort_cbk (frame, this);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "full self-heal completed on %s",
- local->loc.path);
+ int writes = 0;
+ int i = 0;
- local->self_heal.algo_completion_cbk (frame, this);
+ for (i = 0; i < child_count; i++) {
+ if (write_needed[i])
+ writes++;
}
- return 0;
-}
-
-static int
-sh_full_loop_return (call_frame_t *rw_frame, xlator_t *this, off_t offset)
-{
- afr_local_t * rw_local = NULL;
- afr_self_heal_t * rw_sh = NULL;
-
- call_frame_t *sh_frame = NULL;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_sh_algo_full_private_t *sh_priv = NULL;
-
- rw_local = rw_frame->local;
- rw_sh = &rw_local->self_heal;
-
- sh_frame = rw_sh->sh_frame;
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
- sh_priv = sh->private;
-
- AFR_STACK_DESTROY (rw_frame);
- sh_full_loop_driver (sh_frame, this, _gf_false);
-
- return 0;
+ return writes;
}
static int
-sh_full_write_cbk (call_frame_t *rw_frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+sh_loop_driver_done (call_frame_t *sh_frame, xlator_t *this,
+ call_frame_t *last_loop_frame)
{
- afr_private_t * priv = NULL;
- afr_local_t * rw_local = NULL;
- afr_self_heal_t *rw_sh = NULL;
-
- call_frame_t *sh_frame = NULL;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t *sh = NULL;
-
- int child_index = (long) cookie;
- int call_count = 0;
-
- priv = this->private;
-
- rw_local = rw_frame->local;
- rw_sh = &rw_local->self_heal;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_sh_algo_private_t *sh_priv = NULL;
+ int32_t total_blocks = 0;
+ int32_t diff_blocks = 0;
- sh_frame = rw_sh->sh_frame;
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
+ local = sh_frame->local;
+ sh = &local->self_heal;
+ sh_priv = sh->private;
+ if (sh_priv) {
+ total_blocks = sh_priv->total_blocks;
+ diff_blocks = sh_priv->diff_blocks;
+ }
- gf_log (this->name, GF_LOG_TRACE,
- "wrote %d bytes of data from %s to child %d, offset %"PRId64"",
- op_ret, sh_local->loc.path, child_index,
- rw_sh->offset - op_ret);
+ sh_private_cleanup (sh_frame, this);
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
+ GF_ASSERT (!last_loop_frame);
+ //loop_finish should have happened and the old_loop should be NULL
+ gf_log (this->name, GF_LOG_DEBUG,
+ "self-heal aborting on %s",
+ local->loc.path);
- LOCK (&sh_frame->lock);
- {
- if (op_ret == -1) {
+ local->self_heal.algo_abort_cbk (sh_frame, this);
+ } else {
+ GF_ASSERT (last_loop_frame);
+ if (diff_blocks == total_blocks) {
+ gf_log (this->name, GF_LOG_DEBUG, "full self-heal "
+ "completed on %s",local->loc.path);
+ } else {
gf_log (this->name, GF_LOG_DEBUG,
- "write to %s failed on subvolume %s (%s)",
- sh_local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
-
- sh->op_failed = 1;
+ "diff self-heal on %s: completed. "
+ "(%d blocks of %d were different (%.2f%%))",
+ local->loc.path, diff_blocks, total_blocks,
+ ((diff_blocks * 1.0)/total_blocks) * 100);
}
- }
- UNLOCK (&sh_frame->lock);
-
- call_count = afr_frame_return (rw_frame);
- if (call_count == 0) {
- sh_full_loop_return (rw_frame, this, rw_sh->offset - op_ret);
+ sh->old_loop_frame = last_loop_frame;
+ local->self_heal.algo_completion_cbk (sh_frame, this);
}
return 0;
}
-
-static int
-sh_full_read_cbk (call_frame_t *rw_frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iovec *vector, int32_t count, struct iatt *buf,
- struct iobref *iobref)
+int
+sh_loop_finish (call_frame_t *loop_frame, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * rw_local = NULL;
- afr_self_heal_t *rw_sh = NULL;
-
- call_frame_t *sh_frame = NULL;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t *sh = NULL;
-
- int i = 0;
- int call_count = 0;
-
- off_t offset = (long) cookie;
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
- priv = this->private;
- rw_local = rw_frame->local;
- rw_sh = &rw_local->self_heal;
-
- sh_frame = rw_sh->sh_frame;
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
-
- call_count = sh->active_sinks;
-
- rw_local->call_count = call_count;
-
- gf_log (this->name, GF_LOG_TRACE,
- "read %d bytes of data from %s, offset %"PRId64"",
- op_ret, sh_local->loc.path, offset);
+ if (!loop_frame)
+ goto out;
- if (op_ret <= 0) {
- sh->op_failed = 1;
- sh_full_loop_return (rw_frame, this, offset);
- return 0;
+ loop_local = loop_frame->local;
+ if (loop_local) {
+ loop_sh = &loop_local->self_heal;
}
- rw_sh->offset += op_ret;
-
- if (sh->file_has_holes) {
- if (iov_0filled (vector, count) == 0) {
- /* the iter function depends on the
- sh->offset already being updated
- above
- */
-
- sh_full_loop_return (rw_frame, this, offset);
- goto out;
- }
+ if (loop_sh && loop_sh->data_lock_held) {
+ afr_sh_data_unlock (loop_frame, this, this->name,
+ sh_destroy_frame);
+ } else {
+ sh_destroy_frame (loop_frame, this);
}
+out:
+ return 0;
+}
- for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] || !sh_local->child_up[i])
- continue;
-
- /* this is a sink, so write to it */
+static int
+sh_loop_lock_success (call_frame_t *loop_frame, xlator_t *this)
+{
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
- STACK_WIND_COOKIE (rw_frame, sh_full_write_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->writev,
- sh->healing_fd, vector, count, offset,
- iobref);
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
- if (!--call_count)
- break;
- }
+ sh_loop_finish (loop_sh->old_loop_frame, this);
+ loop_sh->old_loop_frame = NULL;
-out:
+ gf_log (this->name, GF_LOG_DEBUG, "Acquired lock for range %"PRIu64
+ " %"PRIu64, loop_sh->offset, loop_sh->block_size);
+ loop_sh->data_lock_held = _gf_true;
+ loop_sh->sh_data_algo_start (loop_frame, this);
return 0;
}
-
static int
-sh_full_read_write (call_frame_t *frame, xlator_t *this, off_t offset)
+sh_loop_lock_failure (call_frame_t *loop_frame, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_local_t * rw_local = NULL;
- afr_self_heal_t *rw_sh = NULL;
- afr_self_heal_t *sh = NULL;
-
- call_frame_t *rw_frame = NULL;
+ call_frame_t *sh_frame = NULL;
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
+
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
+ sh_frame = loop_sh->sh_frame;
+
+ gf_log (this->name, GF_LOG_ERROR, "failed lock for range %"PRIu64
+ " %"PRIu64, loop_sh->offset, loop_sh->block_size);
+ sh_loop_finish (loop_sh->old_loop_frame, this);
+ loop_sh->old_loop_frame = NULL;
+ sh_loop_return (sh_frame, this, loop_frame, -1, ENOTCONN);
+ return 0;
+}
- int32_t op_errno = 0;
+static int
+sh_loop_frame_create (call_frame_t *sh_frame, xlator_t *this,
+ call_frame_t *old_loop_frame, call_frame_t **loop_frame)
+{
+ call_frame_t *new_loop_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_local_t *new_loop_local = NULL;
+ afr_self_heal_t *new_loop_sh = NULL;
+ afr_private_t *priv = NULL;
+
+ GF_ASSERT (sh_frame);
+ GF_ASSERT (loop_frame);
+
+ *loop_frame = NULL;
+ local = sh_frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
+ new_loop_frame = copy_frame (sh_frame);
+ if (!new_loop_frame)
+ goto out;
+ //We want the frame to have same lk_owner as sh_frame
+ //so that locks translator allows conflicting locks
+ new_loop_local = afr_self_heal_local_init (local, this);
+ if (!new_loop_local)
+ goto out;
+ new_loop_frame->local = new_loop_local;
- rw_frame = copy_frame (frame);
- if (!rw_frame)
+ new_loop_sh = &new_loop_local->self_heal;
+ new_loop_sh->sources = memdup (sh->sources,
+ priv->child_count * sizeof (*sh->sources));
+ if (!new_loop_sh->sources)
goto out;
+ new_loop_sh->write_needed = GF_CALLOC (priv->child_count,
+ sizeof (*new_loop_sh->write_needed),
+ gf_afr_mt_char);
+ if (!new_loop_sh->write_needed)
+ goto out;
+ new_loop_sh->checksum = GF_CALLOC (priv->child_count, MD5_DIGEST_LENGTH,
+ gf_afr_mt_uint8_t);
+ if (!new_loop_sh->checksum)
+ goto out;
+ new_loop_sh->inode = inode_ref (sh->inode);
+ new_loop_sh->sh_data_algo_start = sh->sh_data_algo_start;
+ new_loop_sh->source = sh->source;
+ new_loop_sh->active_sinks = sh->active_sinks;
+ new_loop_sh->healing_fd = fd_ref (sh->healing_fd);
+ new_loop_sh->file_has_holes = sh->file_has_holes;
+ new_loop_sh->old_loop_frame = old_loop_frame;
+ new_loop_sh->sh_frame = sh_frame;
+ *loop_frame = new_loop_frame;
+ return 0;
+out:
+ sh_destroy_frame (new_loop_frame, this);
+ return -ENOMEM;
+}
- ALLOC_OR_GOTO (rw_local, afr_local_t, out);
+static int
+sh_loop_start (call_frame_t *sh_frame, xlator_t *this, off_t offset,
+ call_frame_t *old_loop_frame)
+{
+ call_frame_t *new_loop_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_local_t *new_loop_local = NULL;
+ afr_self_heal_t *new_loop_sh = NULL;
+ int ret = 0;
- rw_frame->local = rw_local;
- rw_sh = &rw_local->self_heal;
+ GF_ASSERT (sh_frame);
- rw_sh->offset = offset;
- rw_sh->sh_frame = frame;
+ local = sh_frame->local;
+ sh = &local->self_heal;
- STACK_WIND_COOKIE (rw_frame, sh_full_read_cbk,
- (void *) (long) offset,
- priv->children[sh->source],
- priv->children[sh->source]->fops->readv,
- sh->healing_fd, sh->block_size,
- offset);
+ ret = sh_loop_frame_create (sh_frame, this, old_loop_frame,
+ &new_loop_frame);
+ if (ret)
+ goto out;
+ new_loop_local = new_loop_frame->local;
+ new_loop_sh = &new_loop_local->self_heal;
+ new_loop_sh->offset = offset;
+ new_loop_sh->block_size = sh->block_size;
+ afr_sh_data_lock (new_loop_frame, this, offset, new_loop_sh->block_size,
+ _gf_true, this->name, sh_loop_lock_success, sh_loop_lock_failure);
return 0;
-
out:
- sh->op_failed = 1;
-
- sh_full_loop_driver (frame, this, _gf_false);
-
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ if (old_loop_frame)
+ sh_loop_finish (old_loop_frame, this);
+ sh_loop_return (sh_frame, this, new_loop_frame, -1, ENOMEM);
return 0;
}
-
static int
-sh_full_loop_driver (call_frame_t *frame, xlator_t *this, gf_boolean_t is_first_call)
+sh_loop_driver (call_frame_t *sh_frame, xlator_t *this,
+ gf_boolean_t is_first_call, call_frame_t *old_loop_frame)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_sh_algo_full_private_t *sh_priv = NULL;
- gf_boolean_t is_driver_done = _gf_false;
- blksize_t block_size = 0;
- off_t offset = 0;
-
- int loop = 0;
+ afr_local_t * local = NULL;
+ afr_self_heal_t * sh = NULL;
+ afr_sh_algo_private_t *sh_priv = NULL;
+ gf_boolean_t is_driver_done = _gf_false;
+ blksize_t block_size = 0;
+ int loop = 0;
+ off_t offset = 0;
+ afr_private_t *priv = NULL;
priv = this->private;
- local = frame->local;
+ local = sh_frame->local;
sh = &local->self_heal;
sh_priv = sh->private;
LOCK (&sh_priv->lock);
{
- if (_gf_false == is_first_call)
+ if (!is_first_call)
sh_priv->loops_running--;
- offset = sh_priv->offset;
- block_size = sh->block_size;
- while ((sh->op_failed == 0) &&
- (sh_priv->loops_running < priv->data_self_heal_window_size)
+ offset = sh_priv->offset;
+ block_size = sh->block_size;
+ while ((!sh->eof_reached) &&
+ (!is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) &&
+ (sh_priv->loops_running < priv->data_self_heal_window_size)
&& (sh_priv->offset < sh->file_size)) {
loop++;
- gf_log (this->name, GF_LOG_TRACE,
- "spawning a loop for offset %"PRId64,
- sh_priv->offset);
-
- sh_priv->offset += sh->block_size;
+ sh_priv->offset += block_size;
sh_priv->loops_running++;
- if (_gf_false == is_first_call)
+ if (!is_first_call)
break;
-
}
if (0 == sh_priv->loops_running) {
is_driver_done = _gf_true;
@@ -351,361 +325,242 @@ sh_full_loop_driver (call_frame_t *frame, xlator_t *this, gf_boolean_t is_first_
}
UNLOCK (&sh_priv->lock);
+ if (0 == loop) {
+ //loop finish does unlock, but the erasing of the pending
+ //xattrs needs to happen before that so do not finish the loop
+ if (is_driver_done &&
+ !is_self_heal_failed (sh, AFR_CHECK_SPECIFIC))
+ goto driver_done;
+ if (old_loop_frame) {
+ sh_loop_finish (old_loop_frame, this);
+ old_loop_frame = NULL;
+ }
+ }
+
+ //If we have more loops to form we should finish previous loop after
+ //the next loop lock
while (loop--) {
- if (sh->op_failed) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
// op failed in other loop, stop spawning more loops
- sh_full_loop_driver (frame, this, _gf_false);
+ if (old_loop_frame) {
+ sh_loop_finish (old_loop_frame, this);
+ old_loop_frame = NULL;
+ }
+ sh_loop_driver (sh_frame, this, _gf_false, NULL);
} else {
- sh_full_read_write (frame, this, offset);
+ gf_log (this->name, GF_LOG_TRACE, "spawning a loop "
+ "for offset %"PRId64, offset);
+
+ sh_loop_start (sh_frame, this, offset, old_loop_frame);
+ old_loop_frame = NULL;
offset += block_size;
}
}
+driver_done:
if (is_driver_done) {
- sh_full_loop_driver_done (frame, this);
+ sh_loop_driver_done (sh_frame, this, old_loop_frame);
}
-
- return 0;
-}
-
-
-int
-afr_sh_algo_full (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
- afr_sh_algo_full_private_t *sh_priv = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
-
- sh_priv = GF_CALLOC (1, sizeof (*sh_priv),
- gf_afr_mt_afr_private_t);
-
- LOCK_INIT (&sh_priv->lock);
-
- sh->private = sh_priv;
-
- local->call_count = 0;
-
- sh_full_loop_driver (frame, this, _gf_true);
return 0;
}
-
-/*
- * The "diff" algorithm. Copies only those blocks whose checksums
- * don't match with those of source.
- */
-
-
-static void
-sh_diff_private_cleanup (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
- afr_sh_algo_diff_private_t *sh_priv = NULL;
-
- int i;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- sh_priv = sh->private;
-
- for (i = 0; i < priv->data_self_heal_window_size; i++) {
- if (sh_priv->loops[i]) {
- if (sh_priv->loops[i]->write_needed)
- GF_FREE (sh_priv->loops[i]->write_needed);
-
- if (sh_priv->loops[i]->checksum)
- GF_FREE (sh_priv->loops[i]->checksum);
-
- GF_FREE (sh_priv->loops[i]);
- }
- }
-
- if (sh_priv) {
- if (sh_priv->loops)
- GF_FREE (sh_priv->loops);
-
- GF_FREE (sh_priv);
- }
-
-
-}
-
-
-static uint32_t
-__make_cookie (int loop_index, int child_index)
-{
- uint32_t ret = (loop_index << 16) | child_index;
- return ret;
-}
-
-
static int
-__loop_index (uint32_t cookie)
+sh_loop_return (call_frame_t *sh_frame, xlator_t *this, call_frame_t *loop_frame,
+ int32_t op_ret, int32_t op_errno)
{
- return (cookie & 0xFFFF0000) >> 16;
-}
-
-
-static int
-__child_index (uint32_t cookie)
-{
- return (cookie & 0x0000FFFF);
-}
-
-
-static void
-sh_diff_loop_state_reset (struct sh_diff_loop_state *loop_state, int child_count)
-{
- loop_state->active = _gf_false;
-// loop_state->offset = 0;
-
- memset (loop_state->write_needed,
- 0, sizeof (*loop_state->write_needed) * child_count);
-
- memset (loop_state->checksum,
- 0, MD5_DIGEST_LEN * child_count);
-}
-
-
-static int
-sh_diff_number_of_writes_needed (unsigned char *write_needed, int child_count)
-{
- int writes = 0;
- int i;
-
- for (i = 0; i < child_count; i++) {
- if (write_needed[i])
- writes++;
- }
-
- return writes;
-}
-
+ afr_local_t * loop_local = NULL;
+ afr_self_heal_t * loop_sh = NULL;
+ afr_local_t * sh_local = NULL;
+ afr_self_heal_t *sh = NULL;
-static int
-sh_diff_loop_driver_done (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
- afr_sh_algo_diff_private_t *sh_priv = NULL;
- int32_t total_blocks = 0;
- int32_t diff_blocks = 0;
-
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
- sh_priv = sh->private;
- total_blocks = sh_priv->total_blocks;
- diff_blocks = sh_priv->diff_blocks;
-
- sh_diff_private_cleanup (frame, this);
- if (sh->op_failed) {
- gf_log (this->name, GF_LOG_TRACE,
- "diff self-heal aborting on %s",
- local->loc.path);
-
- local->self_heal.algo_abort_cbk (frame, this);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "diff self-heal completed on %s",
- local->loc.path);
-
-
- gf_log (this->name, GF_LOG_NORMAL,
- "diff self-heal on %s: %d blocks of %d were different (%.2f%%)",
- local->loc.path, diff_blocks, total_blocks,
- ((diff_blocks * 1.0)/total_blocks) * 100);
-
- local->self_heal.algo_completion_cbk (frame, this);
- }
-
- return 0;
-}
-
-static int
-sh_diff_loop_driver (call_frame_t *frame, xlator_t *this,
- gf_boolean_t is_first_call,
- struct sh_diff_loop_state *loop_state);
-
-static int
-sh_diff_loop_return (call_frame_t *rw_frame, xlator_t *this,
- struct sh_diff_loop_state *loop_state)
-{
- afr_private_t * priv = NULL;
- afr_local_t * rw_local = NULL;
- afr_self_heal_t * rw_sh = NULL;
-
- call_frame_t *sh_frame = NULL;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_sh_algo_diff_private_t *sh_priv = NULL;
-
- priv = this->private;
-
- rw_local = rw_frame->local;
- rw_sh = &rw_local->self_heal;
-
- sh_frame = rw_sh->sh_frame;
sh_local = sh_frame->local;
sh = &sh_local->self_heal;
- sh_priv = sh->private;
- gf_log (this->name, GF_LOG_TRACE,
- "loop for offset %"PRId64" returned", loop_state->offset);
+ if (loop_frame) {
+ loop_local = loop_frame->local;
+ if (loop_local)
+ loop_sh = &loop_local->self_heal;
+ if (loop_sh)
+ gf_log (this->name, GF_LOG_TRACE, "loop for offset "
+ "%"PRId64" returned", loop_sh->offset);
+ }
- AFR_STACK_DESTROY (rw_frame);
+ if (op_ret == -1) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_set_error (sh, op_errno);
+ if (loop_frame) {
+ sh_loop_finish (loop_frame, this);
+ loop_frame = NULL;
+ }
+ }
- sh_diff_loop_driver (sh_frame, this, _gf_false, loop_state);
+ sh_loop_driver (sh_frame, this, _gf_false, loop_frame);
return 0;
}
-
static int
-sh_diff_write_cbk (call_frame_t *rw_frame, void *cookie, xlator_t *this,
+sh_loop_write_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * rw_local = NULL;
- afr_self_heal_t * rw_sh = NULL;
-
- call_frame_t *sh_frame = NULL;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t *sh = NULL;
-
- afr_sh_algo_diff_private_t *sh_priv;
- struct sh_diff_loop_state *loop_state;
-
- int call_count = 0;
- int child_index = 0;
- int loop_index = 0;
+ afr_private_t * priv = NULL;
+ afr_local_t * loop_local = NULL;
+ afr_self_heal_t * loop_sh = NULL;
+ call_frame_t *sh_frame = NULL;
+ afr_local_t * sh_local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int call_count = 0;
+ int child_index = 0;
priv = this->private;
- rw_local = rw_frame->local;
- rw_sh = &rw_local->self_heal;
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
- sh_frame = rw_sh->sh_frame;
+ sh_frame = loop_sh->sh_frame;
sh_local = sh_frame->local;
sh = &sh_local->self_heal;
- sh_priv = sh->private;
- child_index = __child_index ((uint32_t) (long) cookie);
- loop_index = __loop_index ((uint32_t) (long) cookie);
- loop_state = sh_priv->loops[loop_index];
+ child_index = (long) cookie;
gf_log (this->name, GF_LOG_TRACE,
"wrote %d bytes of data from %s to child %d, offset %"PRId64"",
- op_ret, sh_local->loc.path, child_index,
- loop_state->offset);
+ op_ret, sh_local->loc.path, child_index, loop_sh->offset);
- LOCK (&sh_frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "write to %s failed on subvolume %s (%s)",
- sh_local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "write to %s failed on subvolume %s (%s)",
+ sh_local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
- sh->op_failed = 1;
- }
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_set_error (loop_sh, op_errno);
+ } else if (op_ret < loop_local->cont.writev.vector->iov_len) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "incomplete write to %s on subvolume %s "
+ "(expected %lu, returned %d)", sh_local->loc.path,
+ priv->children[child_index]->name,
+ loop_local->cont.writev.vector->iov_len, op_ret);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
}
- UNLOCK (&sh_frame->lock);
- call_count = afr_frame_return (rw_frame);
+ call_count = afr_frame_return (loop_frame);
if (call_count == 0) {
- sh_diff_loop_return (rw_frame, this, loop_state);
+ iobref_unref(loop_local->cont.writev.iobref);
+
+ sh_loop_return (sh_frame, this, loop_frame,
+ loop_sh->op_ret, loop_sh->op_errno);
}
return 0;
}
-
-static int
-sh_diff_read_cbk (call_frame_t *rw_frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iovec *vector, int32_t count, struct iatt *buf,
- struct iobref *iobref)
+static void
+sh_prune_writes_needed (call_frame_t *sh_frame, call_frame_t *loop_frame,
+ afr_private_t *priv)
{
- afr_private_t * priv = NULL;
- afr_local_t * rw_local = NULL;
- afr_self_heal_t * rw_sh = NULL;
-
- afr_sh_algo_diff_private_t * sh_priv = NULL;
+ afr_local_t *sh_local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
+ int i = 0;
- call_frame_t *sh_frame = NULL;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t *sh = NULL;
+ sh_local = sh_frame->local;
+ sh = &sh_local->self_heal;
- int loop_index;
- struct sh_diff_loop_state *loop_state;
+ if (!strcmp (sh->algo->name, "diff"))
+ return;
- uint32_t wcookie;
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
- int i = 0;
- int call_count = 0;
-
- priv = this->private;
- rw_local = rw_frame->local;
- rw_sh = &rw_local->self_heal;
+ /* full self-heal guarantees there exists atleast 1 file with size 0
+ * That means for other files we can preserve holes that come after
+ * its size before 'trim'
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if (loop_sh->write_needed[i] &&
+ ((loop_sh->offset + 1) > sh->buf[i].ia_size))
+ loop_sh->write_needed[i] = 0;
+ }
+}
- sh_frame = rw_sh->sh_frame;
+static int
+sh_loop_read_cbk (call_frame_t *loop_frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ struct iovec *vector, int32_t count, struct iatt *buf,
+ struct iobref *iobref, dict_t *xdata)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * loop_local = NULL;
+ afr_self_heal_t * loop_sh = NULL;
+ call_frame_t *sh_frame = NULL;
+ int i = 0;
+ int call_count = 0;
+ afr_local_t * sh_local = NULL;
+ afr_self_heal_t * sh = NULL;
+
+ priv = this->private;
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
+
+ sh_frame = loop_sh->sh_frame;
sh_local = sh_frame->local;
sh = &sh_local->self_heal;
- sh_priv = sh->private;
-
- loop_index = __loop_index ((uint32_t) (long) cookie);
- loop_state = sh_priv->loops[loop_index];
-
- call_count = sh_diff_number_of_writes_needed (loop_state->write_needed,
- priv->child_count);
-
- rw_local->call_count = call_count;
gf_log (this->name, GF_LOG_TRACE,
"read %d bytes of data from %s, offset %"PRId64"",
- op_ret, sh_local->loc.path, loop_state->offset);
+ op_ret, loop_local->loc.path, loop_sh->offset);
- if ((op_ret <= 0) ||
- (call_count == 0)) {
- sh_diff_loop_return (rw_frame, this, loop_state);
-
- return 0;
+ if (op_ret <= 0) {
+ if (op_ret < 0) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ gf_log (this->name, GF_LOG_ERROR, "read failed on %d "
+ "for %s reason :%s", sh->source,
+ sh_local->loc.path, strerror (errno));
+ } else {
+ sh->eof_reached = _gf_true;
+ gf_log (this->name, GF_LOG_DEBUG, "Eof reached for %s",
+ sh_local->loc.path);
+ }
+ sh_loop_return (sh_frame, this, loop_frame, op_ret, op_errno);
+ goto out;
}
- if (sh->file_has_holes) {
- if (iov_0filled (vector, count) == 0) {
+ if (loop_sh->file_has_holes && iov_0filled (vector, count) == 0)
+ sh_prune_writes_needed (sh_frame, loop_frame, priv);
- sh_diff_loop_return (rw_frame, this, loop_state);
- goto out;
- }
+ call_count = sh_number_of_writes_needed (loop_sh->write_needed,
+ priv->child_count);
+ if (call_count == 0) {
+ sh_loop_return (sh_frame, this, loop_frame, 0, 0);
+ goto out;
}
- for (i = 0; i < priv->child_count; i++) {
- if (loop_state->write_needed[i]) {
- wcookie = __make_cookie (loop_index, i);
+ loop_local->call_count = call_count;
- STACK_WIND_COOKIE (rw_frame, sh_diff_write_cbk,
- (void *) (long) wcookie,
- priv->children[i],
- priv->children[i]->fops->writev,
- sh->healing_fd, vector, count,
- loop_state->offset, iobref);
+ /*
+ * We only really need the request size at the moment, but the buffer
+ * is required if we want to issue a retry in the event of a short write.
+ * Therefore, we duplicate the vector and ref the iobref here...
+ */
+ loop_local->cont.writev.vector = iov_dup(vector, count);
+ loop_local->cont.writev.iobref = iobref_ref(iobref);
- if (!--call_count)
- break;
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (!loop_sh->write_needed[i])
+ continue;
+ STACK_WIND_COOKIE (loop_frame, sh_loop_write_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->writev,
+ loop_sh->healing_fd, vector, count,
+ loop_sh->offset, 0, iobref, NULL);
+
+ if (!--call_count)
+ break;
}
out:
@@ -714,119 +569,90 @@ out:
static int
-sh_diff_read (call_frame_t *rw_frame, xlator_t *this,
- int loop_index)
+sh_loop_read (call_frame_t *loop_frame, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * rw_local = NULL;
- afr_self_heal_t * rw_sh = NULL;
-
- afr_sh_algo_diff_private_t * sh_priv = NULL;
- struct sh_diff_loop_state *loop_state;
-
- call_frame_t *sh_frame = NULL;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t *sh = NULL;
-
- uint32_t cookie;
+ afr_private_t *priv = NULL;
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
priv = this->private;
- rw_local = rw_frame->local;
- rw_sh = &rw_local->self_heal;
-
- sh_frame = rw_sh->sh_frame;
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
- sh_priv = sh->private;
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
- loop_state = sh_priv->loops[loop_index];
-
- cookie = __make_cookie (loop_index, sh->source);
-
- STACK_WIND_COOKIE (rw_frame, sh_diff_read_cbk,
- (void *) (long) cookie,
- priv->children[sh->source],
- priv->children[sh->source]->fops->readv,
- sh->healing_fd, sh_priv->block_size,
- loop_state->offset);
+ STACK_WIND_COOKIE (loop_frame, sh_loop_read_cbk,
+ (void *) (long) loop_sh->source,
+ priv->children[loop_sh->source],
+ priv->children[loop_sh->source]->fops->readv,
+ loop_sh->healing_fd, loop_sh->block_size,
+ loop_sh->offset, 0, NULL);
return 0;
}
static int
-sh_diff_checksum_cbk (call_frame_t *rw_frame, void *cookie, xlator_t *this,
+sh_diff_checksum_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- uint32_t weak_checksum, uint8_t *strong_checksum)
+ uint32_t weak_checksum, uint8_t *strong_checksum,
+ dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * rw_local = NULL;
- afr_self_heal_t *rw_sh = NULL;
-
- call_frame_t *sh_frame = NULL;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t *sh = NULL;
-
- afr_sh_algo_diff_private_t * sh_priv = NULL;
-
- int loop_index = 0;
- int child_index = 0;
- struct sh_diff_loop_state *loop_state;
-
- int call_count = 0;
- int i = 0;
- int write_needed = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
+ call_frame_t *sh_frame = NULL;
+ afr_local_t *sh_local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_sh_algo_private_t *sh_priv = NULL;
+ int child_index = 0;
+ int call_count = 0;
+ int i = 0;
+ int write_needed = 0;
priv = this->private;
- rw_local = rw_frame->local;
- rw_sh = &rw_local->self_heal;
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
- sh_frame = rw_sh->sh_frame;
+ sh_frame = loop_sh->sh_frame;
sh_local = sh_frame->local;
sh = &sh_local->self_heal;
sh_priv = sh->private;
- child_index = __child_index ((uint32_t) (long) cookie);
- loop_index = __loop_index ((uint32_t) (long) cookie);
-
- loop_state = sh_priv->loops[loop_index];
+ child_index = (long) cookie;
if (op_ret < 0) {
gf_log (this->name, GF_LOG_ERROR,
"checksum on %s failed on subvolume %s (%s)",
sh_local->loc.path, priv->children[child_index]->name,
strerror (op_errno));
-
- sh->op_failed = 1;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
} else {
- memcpy (loop_state->checksum + child_index * MD5_DIGEST_LEN,
- strong_checksum,
- MD5_DIGEST_LEN);
+ memcpy (loop_sh->checksum + child_index * MD5_DIGEST_LENGTH,
+ strong_checksum, MD5_DIGEST_LENGTH);
}
- call_count = afr_frame_return (rw_frame);
+ call_count = afr_frame_return (loop_frame);
if (call_count == 0) {
for (i = 0; i < priv->child_count; i++) {
if (sh->sources[i] || !sh_local->child_up[i])
continue;
- if (memcmp (loop_state->checksum + (i * MD5_DIGEST_LEN),
- loop_state->checksum + (sh->source * MD5_DIGEST_LEN),
- MD5_DIGEST_LEN)) {
+ if (memcmp (loop_sh->checksum + (i * MD5_DIGEST_LENGTH),
+ loop_sh->checksum + (sh->source * MD5_DIGEST_LENGTH),
+ MD5_DIGEST_LENGTH)) {
/*
Checksums differ, so this block
must be written to this sink
*/
- gf_log (this->name, GF_LOG_TRACE,
+ gf_log (this->name, GF_LOG_DEBUG,
"checksum on subvolume %s at offset %"
PRId64" differs from that on source",
- priv->children[i]->name, loop_state->offset);
+ priv->children[i]->name, loop_sh->offset);
- write_needed = loop_state->write_needed[i] = 1;
+ write_needed = loop_sh->write_needed[i] = 1;
}
}
@@ -838,250 +664,171 @@ sh_diff_checksum_cbk (call_frame_t *rw_frame, void *cookie, xlator_t *this,
}
UNLOCK (&sh_priv->lock);
- if (write_needed && !sh->op_failed) {
- sh_diff_read (rw_frame, this, loop_index);
+ if (write_needed &&
+ !is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
+ sh_loop_read (loop_frame, this);
} else {
- sh->offset += sh_priv->block_size;
-
- sh_diff_loop_return (rw_frame, this, loop_state);
+ sh_loop_return (sh_frame, this, loop_frame,
+ op_ret, op_errno);
}
}
return 0;
}
-
-static int
-sh_diff_find_unused_loop (afr_sh_algo_diff_private_t *sh_priv, int max)
-{
- int i;
-
- LOCK (&sh_priv->lock);
- {
- for (i = 0; i < max; i++) {
- if (sh_priv->loops[i]->active == _gf_false) {
- sh_priv->loops[i]->active = _gf_true;
- break;
- }
- }
- }
- UNLOCK (&sh_priv->lock);
-
- if (i == max) {
- gf_log ("[sh-diff]", GF_LOG_ERROR,
- "no free loops found! This shouldn't happen. Please"
- " report this to gluster-devel@nongnu.org");
- }
-
- return i;
-}
-
-
static int
-sh_diff_checksum (call_frame_t *frame, xlator_t *this, off_t offset)
+sh_diff_checksum (call_frame_t *loop_frame, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_local_t * rw_local = NULL;
- afr_self_heal_t * sh = NULL;
- afr_self_heal_t * rw_sh = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
+ int call_count = 0;
+ int i = 0;
- afr_sh_algo_diff_private_t * sh_priv = NULL;
-
- call_frame_t *rw_frame = NULL;
-
- uint32_t cookie;
- int loop_index = 0;
- struct sh_diff_loop_state *loop_state = NULL;
-
- int32_t op_errno = 0;
-
- int call_count = 0;
- int i = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- sh_priv = sh->private;
-
- rw_frame = copy_frame (frame);
- if (!rw_frame)
- goto out;
-
- ALLOC_OR_GOTO (rw_local, afr_local_t, out);
-
- rw_frame->local = rw_local;
- rw_sh = &rw_local->self_heal;
-
- rw_sh->offset = sh->offset;
- rw_sh->sh_frame = frame;
-
- call_count = sh->active_sinks + 1; /* sinks and source */
-
- rw_local->call_count = call_count;
-
- loop_index = sh_diff_find_unused_loop (sh_priv, priv->data_self_heal_window_size);
-
- loop_state = sh_priv->loops[loop_index];
- loop_state->offset = offset;
+ priv = this->private;
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
- /* we need to send both the loop index and child index,
- so squeeze them both into a 32-bit number */
+ call_count = loop_sh->active_sinks + 1; /* sinks and source */
- cookie = __make_cookie (loop_index, sh->source);
+ loop_local->call_count = call_count;
- STACK_WIND_COOKIE (rw_frame, sh_diff_checksum_cbk,
- (void *) (long) cookie,
- priv->children[sh->source],
- priv->children[sh->source]->fops->rchecksum,
- sh->healing_fd,
- offset, sh_priv->block_size);
+ STACK_WIND_COOKIE (loop_frame, sh_diff_checksum_cbk,
+ (void *) (long) loop_sh->source,
+ priv->children[loop_sh->source],
+ priv->children[loop_sh->source]->fops->rchecksum,
+ loop_sh->healing_fd,
+ loop_sh->offset, loop_sh->block_size, NULL);
for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] || !local->child_up[i])
+ if (loop_sh->sources[i] || !loop_local->child_up[i])
continue;
- cookie = __make_cookie (loop_index, i);
-
- STACK_WIND_COOKIE (rw_frame, sh_diff_checksum_cbk,
- (void *) (long) cookie,
+ STACK_WIND_COOKIE (loop_frame, sh_diff_checksum_cbk,
+ (void *) (long) i,
priv->children[i],
priv->children[i]->fops->rchecksum,
- sh->healing_fd,
- offset, sh_priv->block_size);
+ loop_sh->healing_fd,
+ loop_sh->offset, loop_sh->block_size, NULL);
if (!--call_count)
break;
}
return 0;
-
-out:
- sh->op_failed = 1;
-
- sh_diff_loop_driver (frame, this, _gf_false, loop_state);
-
- return 0;
}
-
static int
-sh_diff_loop_driver (call_frame_t *frame, xlator_t *this,
- gf_boolean_t is_first_call,
- struct sh_diff_loop_state *loop_state)
+sh_full_read_write_to_sinks (call_frame_t *loop_frame, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
- afr_sh_algo_diff_private_t *sh_priv = NULL;
- gf_boolean_t is_driver_done = _gf_false;
- blksize_t block_size = 0;
-
- int loop = 0;
-
- off_t offset = 0;
- char sh_type_str[256] = {0,};
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
- sh_priv = sh->private;
-
- afr_self_heal_type_str_get(sh, sh_type_str, sizeof(sh_type_str));
-
- LOCK (&sh_priv->lock);
- {
- if (loop_state)
- sh_diff_loop_state_reset (loop_state, priv->child_count);
- if (_gf_false == is_first_call)
- sh_priv->loops_running--;
- offset = sh_priv->offset;
- block_size = sh_priv->block_size;
- while ((0 == sh->op_failed) &&
- (sh_priv->loops_running < priv->data_self_heal_window_size)
- && (sh_priv->offset < sh->file_size)) {
-
- loop++;
- gf_log (this->name, GF_LOG_TRACE,
- "spawning a loop for offset %"PRId64,
- sh_priv->offset);
-
- sh_priv->offset += sh_priv->block_size;
- sh_priv->loops_running++;
-
- if (_gf_false == is_first_call)
- break;
+ afr_private_t *priv = NULL;
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
+ int i = 0;
- }
- if (0 == sh_priv->loops_running) {
- is_driver_done = _gf_true;
- }
- }
- UNLOCK (&sh_priv->lock);
-
- while (loop--) {
- if (sh->op_failed) {
- // op failed in other loop, stop spawning more loops
- sh_diff_loop_driver (frame, this, _gf_false, NULL);
- } else {
- sh_diff_checksum (frame, this, offset);
- offset += block_size;
- }
- }
+ priv = this->private;
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
- if (is_driver_done) {
- sh_diff_loop_driver_done (frame, this);
+ for (i = 0; i < priv->child_count; i++) {
+ if (loop_sh->sources[i] || !loop_local->child_up[i])
+ continue;
+ loop_sh->write_needed[i] = 1;
}
+ sh_loop_read (loop_frame, this);
return 0;
}
-
-int
-afr_sh_algo_diff (call_frame_t *frame, xlator_t *this)
+afr_sh_algo_private_t*
+afr_sh_priv_init ()
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
- afr_sh_algo_diff_private_t *sh_priv = NULL;
-
- int i;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
+ afr_sh_algo_private_t *sh_priv = NULL;
sh_priv = GF_CALLOC (1, sizeof (*sh_priv),
gf_afr_mt_afr_private_t);
-
- sh_priv->block_size = this->ctx->page_size;
-
- sh->private = sh_priv;
+ if (!sh_priv)
+ goto out;
LOCK_INIT (&sh_priv->lock);
+out:
+ return sh_priv;
+}
- local->call_count = 0;
+int
+afr_sh_transfer_lock (call_frame_t *dst, call_frame_t *src, char *dom,
+ unsigned int child_count)
+{
+ afr_local_t *dst_local = NULL;
+ afr_self_heal_t *dst_sh = NULL;
+ afr_local_t *src_local = NULL;
+ afr_self_heal_t *src_sh = NULL;
+ int ret = -1;
+
+ dst_local = dst->local;
+ dst_sh = &dst_local->self_heal;
+ src_local = src->local;
+ src_sh = &src_local->self_heal;
+ GF_ASSERT (src_sh->data_lock_held);
+ GF_ASSERT (!dst_sh->data_lock_held);
+ ret = afr_lk_transfer_datalock (dst, src, dom, child_count);
+ if (ret)
+ return ret;
+ src_sh->data_lock_held = _gf_false;
+ dst_sh->data_lock_held = _gf_true;
+ return 0;
+}
- sh_priv->loops = GF_CALLOC (priv->data_self_heal_window_size,
- sizeof (*sh_priv->loops),
- gf_afr_mt_sh_diff_loop_state);
+int
+afr_sh_start_loops (call_frame_t *sh_frame, xlator_t *this,
+ afr_sh_algo_fn sh_data_algo_start)
+{
+ call_frame_t *first_loop_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int ret = 0;
+ afr_private_t *priv = NULL;
- for (i = 0; i < priv->data_self_heal_window_size; i++) {
- sh_priv->loops[i] = GF_CALLOC (1, sizeof (*sh_priv->loops[i]),
- gf_afr_mt_sh_diff_loop_state);
+ local = sh_frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
- sh_priv->loops[i]->checksum = GF_CALLOC (priv->child_count,
- MD5_DIGEST_LEN, gf_afr_mt_uint8_t);
- sh_priv->loops[i]->write_needed = GF_CALLOC (priv->child_count,
- sizeof (*sh_priv->loops[i]->write_needed),
- gf_afr_mt_char);
+ sh->sh_data_algo_start = sh_data_algo_start;
+ local->call_count = 0;
+ ret = sh_loop_frame_create (sh_frame, this, NULL, &first_loop_frame);
+ if (ret)
+ goto out;
+ ret = afr_sh_transfer_lock (first_loop_frame, sh_frame, this->name,
+ priv->child_count);
+ if (ret)
+ goto out;
+ sh->private = afr_sh_priv_init ();
+ if (!sh->private) {
+ ret = -1;
+ goto out;
}
+ sh_loop_driver (sh_frame, this, _gf_true, first_loop_frame);
+ ret = 0;
+out:
+ if (ret) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ sh_loop_driver_done (sh_frame, this, NULL);
+ }
+ return 0;
+}
- sh_diff_loop_driver (frame, this, _gf_true, NULL);
-
+int
+afr_sh_algo_diff (call_frame_t *sh_frame, xlator_t *this)
+{
+ afr_sh_start_loops (sh_frame, this, sh_diff_checksum);
return 0;
}
+int
+afr_sh_algo_full (call_frame_t *sh_frame, xlator_t *this)
+{
+ afr_sh_start_loops (sh_frame, this, sh_full_read_write_to_sinks);
+ return 0;
+}
struct afr_sh_algorithm afr_self_heal_algorithms[] = {
{.name = "full", .fn = afr_sh_algo_full},
diff --git a/xlators/cluster/afr/src/afr-self-heal-algorithm.h b/xlators/cluster/afr/src/afr-self-heal-algorithm.h
index e45621b0e..6b20789b1 100644
--- a/xlators/cluster/afr/src/afr-self-heal-algorithm.h
+++ b/xlators/cluster/afr/src/afr-self-heal-algorithm.h
@@ -1,26 +1,16 @@
/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __AFR_SELF_HEAL_ALGORITHM_H__
#define __AFR_SELF_HEAL_ALGORITHM_H__
-
typedef int (*afr_sh_algo_fn) (call_frame_t *frame,
xlator_t *this);
@@ -30,31 +20,13 @@ struct afr_sh_algorithm {
};
extern struct afr_sh_algorithm afr_self_heal_algorithms[3];
-
typedef struct {
gf_lock_t lock;
unsigned int loops_running;
off_t offset;
-} afr_sh_algo_full_private_t;
-
-struct sh_diff_loop_state {
- off_t offset;
- unsigned char *write_needed;
- uint8_t *checksum;
- gf_boolean_t active;
-};
-
-typedef struct {
- size_t block_size;
-
- gf_lock_t lock;
- unsigned int loops_running;
- off_t offset;
int32_t total_blocks;
int32_t diff_blocks;
-
- struct sh_diff_loop_state **loops;
-} afr_sh_algo_diff_private_t;
+} afr_sh_algo_private_t;
#endif /* __AFR_SELF_HEAL_ALGORITHM_H__ */
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 4cf09a656..ef92b4205 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include "glusterfs.h"
@@ -27,6 +18,67 @@
#include "afr-self-heal.h"
#include "pump.h"
+#define ADD_FMT_STRING(msg, off, sh_str, status, print_log) \
+ do { \
+ if (AFR_SELF_HEAL_NOT_ATTEMPTED != status) { \
+ off += snprintf (msg + off, sizeof (msg) - off, \
+ " "sh_str" self heal %s,", \
+ get_sh_completion_status (status));\
+ print_log = 1; \
+ } \
+ } while (0)
+
+#define ADD_FMT_STRING_SYNC(msg, off, sh_str, status, print_log) \
+ do { \
+ if (AFR_SELF_HEAL_SYNC_BEGIN == status || \
+ AFR_SELF_HEAL_FAILED == status) { \
+ off += snprintf (msg + off, sizeof (msg) - off, \
+ " "sh_str" self heal %s,", \
+ get_sh_completion_status (status));\
+ print_log = 1; \
+ } \
+ } while (0)
+
+
+void
+afr_sh_reset (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ memset (sh->child_errno, 0,
+ sizeof (*sh->child_errno) * priv->child_count);
+ memset (sh->buf, 0, sizeof (*sh->buf) * priv->child_count);
+ memset (sh->parentbufs, 0,
+ sizeof (*sh->parentbufs) * priv->child_count);
+ memset (sh->success, 0, sizeof (*sh->success) * priv->child_count);
+ memset (sh->locked_nodes, 0,
+ sizeof (*sh->locked_nodes) * priv->child_count);
+ sh->active_sinks = 0;
+
+ afr_reset_xattr (sh->xattr, priv->child_count);
+}
+
+//Intersection[child]=1 if child is part of intersection
+void
+afr_children_intersection_get (int32_t *set1, int32_t *set2,
+ int *intersection, unsigned int child_count)
+{
+ int i = 0;
+
+ memset (intersection, 0, sizeof (*intersection) * child_count);
+ for (i = 0; i < child_count; i++) {
+ intersection[i] = afr_is_child_present (set1, child_count, i)
+ && afr_is_child_present (set2, child_count,
+ i);
+ }
+}
+
/**
* select_source - select a source and return it
*/
@@ -34,7 +86,7 @@
int
afr_sh_select_source (int sources[], int child_count)
{
- int i;
+ int i = 0;
for (i = 0; i < child_count; i++)
if (sources[i])
return i;
@@ -42,26 +94,34 @@ afr_sh_select_source (int sources[], int child_count)
return -1;
}
+void
+afr_sh_mark_source_sinks (call_frame_t *frame, xlator_t *this)
+{
+ int i = 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int active_sinks = 0;
-/**
- * sink_count - return number of sinks in sources array
- */
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
-int
-afr_sh_sink_count (int sources[], int child_count)
-{
- int i;
- int sinks = 0;
- for (i = 0; i < child_count; i++)
- if (!sources[i])
- sinks++;
- return sinks;
+ for (i = 0; i < priv->child_count; i++) {
+ if (sh->sources[i] == 0 && local->child_up[i] == 1) {
+ active_sinks++;
+ sh->success[i] = 1;
+ } else if (sh->sources[i] == 1 && local->child_up[i] == 1) {
+ sh->success[i] = 1;
+ }
+ }
+ sh->active_sinks = active_sinks;
}
int
afr_sh_source_count (int sources[], int child_count)
{
- int i;
+ int i = 0;
int nsource = 0;
for (i = 0; i < child_count; i++)
@@ -70,32 +130,22 @@ afr_sh_source_count (int sources[], int child_count)
return nsource;
}
-
-int
-afr_sh_supress_errenous_children (int sources[], int child_errno[],
- int child_count)
+void
+afr_sh_set_error (afr_self_heal_t *sh, int32_t op_errno)
{
- int i = 0;
-
- for (i = 0; i < child_count; i++) {
- if (child_errno[i] && sources[i]) {
- sources[i] = 0;
- }
- }
-
- return 0;
+ sh->op_ret = -1;
+ sh->op_errno = afr_most_important_error(sh->op_errno, op_errno,
+ _gf_false);
}
-
void
afr_sh_print_pending_matrix (int32_t *pending_matrix[], xlator_t *this)
{
- afr_private_t * priv = this->private;
-
- char *buf = NULL;
- char *ptr = NULL;
-
- int i, j;
+ afr_private_t * priv = this->private;
+ char *buf = NULL;
+ char *ptr = NULL;
+ int i = 0;
+ int j = 0;
/* 10 digits per entry + 1 space + '[' and ']' */
buf = GF_MALLOC (priv->child_count * 11 + 8, gf_afr_mt_char);
@@ -107,43 +157,142 @@ afr_sh_print_pending_matrix (int32_t *pending_matrix[], xlator_t *this)
ptr += sprintf (ptr, "%d ", pending_matrix[i][j]);
}
sprintf (ptr, "]");
- gf_log (this->name, GF_LOG_TRACE,
- "pending_matrix: %s", buf);
+ gf_log (this->name, GF_LOG_DEBUG, "pending_matrix: %s", buf);
}
GF_FREE (buf);
}
+char*
+afr_get_pending_matrix_str (int32_t *pending_matrix[], xlator_t *this)
+{
+ afr_private_t * priv = this->private;
+ char *buf = NULL;
+ char *ptr = NULL;
+ int i = 0;
+ int j = 0;
+ int child_count = priv->child_count;
+ char *matrix_begin = "[ [ ";
+ char *matrix_end = "] ]";
+ char *seperator = "] [ ";
+ int pending_entry_strlen = 12; //Including space after entry
+ int matrix_begin_strlen = 0;
+ int matrix_end_strlen = 0;
+ int seperator_strlen = 0;
+ int string_length = 0;
+ char *msg = "- Pending matrix: ";
+
+ /*
+ * for a list of lists of [ [ a b ] [ c d ] ]
+ * */
+
+ matrix_begin_strlen = strlen (matrix_begin);
+ matrix_end_strlen = strlen (matrix_end);
+ seperator_strlen = strlen (seperator);
+ string_length = matrix_begin_strlen + matrix_end_strlen
+ + (child_count -1) * seperator_strlen
+ + (child_count * child_count * pending_entry_strlen);
+
+ buf = GF_CALLOC (1, 1 + strlen (msg) + string_length , gf_afr_mt_char);
+ if (!buf)
+ goto out;
+
+ ptr = buf;
+ ptr += sprintf (ptr, "%s", msg);
+ ptr += sprintf (ptr, "%s", matrix_begin);
+ for (i = 0; i < priv->child_count; i++) {
+ for (j = 0; j < priv->child_count; j++) {
+ ptr += sprintf (ptr, "%d ", pending_matrix[i][j]);
+ }
+ if (i < priv->child_count -1)
+ ptr += sprintf (ptr, "%s", seperator);
+ }
+
+ ptr += sprintf (ptr, "%s", matrix_end);
+
+out:
+ return buf;
+}
void
-afr_sh_build_pending_matrix (afr_private_t *priv,
- int32_t *pending_matrix[], dict_t *xattr[],
- int child_count, afr_transaction_type type)
+afr_sh_print_split_brain_log (int32_t *pending_matrix[], xlator_t *this,
+ const char *loc)
{
- int i, j, k;
+ char *buf = NULL;
+ char *free_ptr = NULL;
- /* Indexable by result of afr_index_for_transaction_type(): 0 -- 2. */
- int32_t pending[3];
- void *pending_raw = NULL;
- int ret = -1;
+ buf = afr_get_pending_matrix_str (pending_matrix, this);
+ if (buf)
+ free_ptr = buf;
+ else
+ buf = "";
- unsigned char *ignorant_subvols = NULL;
- ignorant_subvols = GF_CALLOC (sizeof (*ignorant_subvols), child_count,
- gf_afr_mt_char);
+ gf_log (this->name, GF_LOG_ERROR, "Unable to self-heal contents of '%s'"
+ " (possible split-brain). Please delete the file from all but "
+ "the preferred subvolume.%s", loc, buf);
+ GF_FREE (free_ptr);
+ return;
+}
+
+
+void
+afr_init_pending_matrix (int32_t **pending_matrix, size_t child_count)
+{
+ int i = 0;
+ int j = 0;
+
+ GF_ASSERT (pending_matrix);
- /* start clean */
for (i = 0; i < child_count; i++) {
for (j = 0; j < child_count; j++) {
pending_matrix[i][j] = 0;
}
}
+}
+
+void
+afr_mark_ignorant_subvols_as_pending (int32_t **pending_matrix,
+ unsigned char *ignorant_subvols,
+ size_t child_count)
+{
+ int i = 0;
+ int j = 0;
+
+ GF_ASSERT (pending_matrix);
+ GF_ASSERT (ignorant_subvols);
+
+ for (i = 0; i < child_count; i++) {
+ if (ignorant_subvols[i]) {
+ for (j = 0; j < child_count; j++) {
+ if (!ignorant_subvols[j])
+ pending_matrix[j][i] += 1;
+ }
+ }
+ }
+}
+
+int
+afr_build_pending_matrix (char **pending_key, int32_t **pending_matrix,
+ unsigned char *ignorant_subvols,
+ dict_t *xattr[], afr_transaction_type type,
+ size_t child_count)
+{
+ /* Indexable by result of afr_index_for_transaction_type(): 0 -- 2. */
+ int32_t pending[3] = {0,};
+ void *pending_raw = NULL;
+ int ret = -1;
+ int i = 0;
+ int j = 0;
+ int k = 0;
+
+ afr_init_pending_matrix (pending_matrix, child_count);
for (i = 0; i < child_count; i++) {
pending_raw = NULL;
for (j = 0; j < child_count; j++) {
- ret = dict_get_ptr (xattr[i], priv->pending_key[j],
+ ret = dict_get_ptr (xattr[i], pending_key[j],
&pending_raw);
if (ret != 0) {
@@ -153,7 +302,8 @@ afr_sh_build_pending_matrix (afr_private_t *priv,
* subvolume.
*/
- ignorant_subvols[i] = 1;
+ if (ignorant_subvols)
+ ignorant_subvols[i] = 1;
continue;
}
@@ -164,52 +314,14 @@ afr_sh_build_pending_matrix (afr_private_t *priv,
}
}
- /*
- * Make all non-ignorant subvols point towards the ignorant
- * subvolumes.
- */
-
- for (i = 0; i < child_count; i++) {
- if (ignorant_subvols[i]) {
- for (j = 0; j < child_count; j++) {
- if (!ignorant_subvols[j])
- pending_matrix[j][i] += 1;
- }
- }
- }
-
- GF_FREE (ignorant_subvols);
+ return ret;
}
-
-/**
- * mark_sources: Mark all 'source' nodes and return number of source
- * nodes found
- *
- * A node (a row in the pending matrix) belongs to one of
- * three categories:
- *
- * M is the pending matrix.
- *
- * 'innocent' - M[i] is all zeroes
- * 'fool' - M[i] has i'th element = 1 (self-reference)
- * 'wise' - M[i] has i'th element = 0, others are 1 or 0.
- *
- * All 'innocent' nodes are sinks. If all nodes are innocent, no self-heal is
- * needed.
- *
- * A 'wise' node can be a source. If two 'wise' nodes conflict, it is
- * a split-brain. If one wise node refers to the other but the other doesn't
- * refer back, the referrer is a source.
- *
- * All fools are sinks, unless there are no 'wise' nodes. In that case,
- * one of the fools is made a source.
- */
-
typedef enum {
+ AFR_NODE_INVALID,
AFR_NODE_INNOCENT,
AFR_NODE_FOOL,
- AFR_NODE_WISE
+ AFR_NODE_WISE,
} afr_node_type;
typedef struct {
@@ -289,7 +401,7 @@ afr_sh_wise_nodes_exist (afr_node_character *characters, int child_count)
* It is 1 if no other wise node accuses it.
* Only wise nodes with wisdom 1 are sources.
*
- * If no nodes with wisdom 1 exist, a split-brain has occured.
+ * If no nodes with wisdom 1 exist, a split-brain has occurred.
*/
static void
@@ -342,8 +454,7 @@ afr_sh_mark_wisest_as_sources (int sources[],
int child_count)
{
int nsources = 0;
-
- int i = 0;
+ int i = 0;
for (i = 0; i < child_count; i++) {
if (characters[i].wisdom == 1) {
@@ -355,284 +466,669 @@ afr_sh_mark_wisest_as_sources (int sources[],
return nsources;
}
-
-static int
-afr_sh_mark_if_size_differs (afr_self_heal_t *sh, int child_count)
+static void
+afr_compute_witness_of_fools (int32_t *witnesses, int32_t **pending_matrix,
+ afr_node_character *characters,
+ int32_t child_count)
{
- int32_t ** pending_matrix;
- int i, j;
+ int i = 0;
+ int j = 0;
+ int witness = 0;
- int size_differs = 0;
-
- pending_matrix = sh->pending_matrix;
+ GF_ASSERT (witnesses);
+ GF_ASSERT (pending_matrix);
+ GF_ASSERT (characters);
+ GF_ASSERT (child_count > 0);
for (i = 0; i < child_count; i++) {
+ if (characters[i].type != AFR_NODE_FOOL)
+ continue;
+
+ witness = 0;
for (j = 0; j < child_count; j++) {
- if (!sh->buf)
- break;
+ if (i == j)
+ continue;
+ witness += pending_matrix[i][j];
+ }
+ witnesses[i] = witness;
+ }
+}
+
+static int32_t
+afr_find_biggest_witness_among_fools (int32_t *witnesses,
+ afr_node_character *characters,
+ int32_t child_count)
+{
+ int i = 0;
+ int biggest_witness = -1;
+ int biggest_witness_idx = -1;
+ int biggest_witness_cnt = -1;
- if (SIZE_DIFFERS (&sh->buf[i], &sh->buf[j])
- && (pending_matrix[i][j] == 0)
- && (pending_matrix[j][i] == 0)) {
+ GF_ASSERT (witnesses);
+ GF_ASSERT (characters);
+ GF_ASSERT (child_count > 0);
- pending_matrix[i][j] = 1;
- pending_matrix[j][i] = 1;
+ for (i = 0; i < child_count; i++) {
+ if (characters[i].type != AFR_NODE_FOOL)
+ continue;
- size_differs = 1;
- }
- }
+ if (biggest_witness < witnesses[i]) {
+ biggest_witness = witnesses[i];
+ biggest_witness_idx = i;
+ biggest_witness_cnt = 1;
+ continue;
+ }
+
+ if (biggest_witness == witnesses[i])
+ biggest_witness_cnt++;
}
- return size_differs;
-}
+ if (biggest_witness_cnt != 1)
+ return -1;
+ return biggest_witness_idx;
+}
-static int
-afr_sh_mark_biggest_fool_as_source (afr_self_heal_t *sh,
+int
+afr_mark_fool_as_source_by_witness (int32_t *sources, int32_t *witnesses,
afr_node_character *characters,
- int child_count)
+ int32_t child_count, int32_t witness)
{
- int i = 0;
- int biggest = 0;
+ int i = 0;
+ int nsources = 0;
- for (i = 0; i < child_count; i++) {
- if (characters[i].type == AFR_NODE_FOOL) {
- biggest = i;
- break;
- }
- }
+ GF_ASSERT (sources);
+ GF_ASSERT (witnesses);
+ GF_ASSERT (characters);
+ GF_ASSERT (child_count > 0);
for (i = 0; i < child_count; i++) {
if (characters[i].type != AFR_NODE_FOOL)
continue;
- if (!sh->buf)
- break;
-
- if (SIZE_GREATER (&sh->buf[i], &sh->buf[biggest])) {
- biggest = i;
+ if (witness == witnesses[i]) {
+ sources[i] = 1;
+ nsources++;
}
}
+ return nsources;
+}
+
+
+int
+afr_mark_fool_as_source_by_idx (int32_t *sources, int child_count, int idx)
+{
+ if (idx >= 0 && idx < child_count) {
+ sources[idx] = 1;
+ return 1;
+ }
+ return 0;
+}
+
+
+static int
+afr_find_largest_file_size (struct iatt *bufs, int32_t *success_children,
+ int child_count)
+{
+ int idx = -1;
+ int i = -1;
+ int child = -1;
+ uint64_t max_size = 0;
+ uint64_t min_size = 0;
+ int num_children = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (success_children[i] == -1)
+ break;
+
+ child = success_children[i];
+ if (bufs[child].ia_size > max_size) {
+ max_size = bufs[child].ia_size;
+ idx = child;
+ }
+
+ if ((num_children == 0) || (bufs[child].ia_size < min_size)) {
+ min_size = bufs[child].ia_size;
+ }
+
+ num_children++;
+ }
+
+ /* If sizes are same for all of them, finding sources will have to
+ * happen with pending changelog. So return -1
+ */
+ if ((num_children > 1) && (min_size == max_size))
+ return -1;
+ return idx;
+}
- sh->sources[biggest] = 1;
- return 1;
+static int
+afr_find_newest_file (struct iatt *bufs, int32_t *success_children,
+ int child_count)
+{
+ int idx = -1;
+ int i = -1;
+ int child = -1;
+ uint64_t max_ctime = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (success_children[i] == -1)
+ break;
+
+ child = success_children[i];
+ if (bufs[child].ia_ctime > max_ctime) {
+ max_ctime = bufs[child].ia_ctime;
+ idx = child;
+ }
+ }
+
+ return idx;
}
static int
-afr_sh_mark_biggest_as_source (afr_self_heal_t *sh, int child_count)
+afr_mark_biggest_of_fools_as_source (int32_t *sources, int32_t **pending_matrix,
+ afr_node_character *characters,
+ int32_t *success_children,
+ int child_count, struct iatt *bufs)
+{
+ int32_t biggest_witness = 0;
+ int nsources = 0;
+ int32_t *witnesses = NULL;
+
+ GF_ASSERT (child_count > 0);
+
+ biggest_witness = afr_find_largest_file_size (bufs, success_children,
+ child_count);
+ if (biggest_witness != -1)
+ goto found;
+
+ witnesses = GF_CALLOC (child_count, sizeof (*witnesses),
+ gf_afr_mt_int32_t);
+ if (NULL == witnesses) {
+ nsources = -1;
+ goto out;
+ }
+
+ afr_compute_witness_of_fools (witnesses, pending_matrix, characters,
+ child_count);
+ biggest_witness = afr_find_biggest_witness_among_fools (witnesses,
+ characters,
+ child_count);
+ if (biggest_witness != -1)
+ goto found;
+
+ biggest_witness = afr_find_newest_file (bufs, success_children,
+ child_count);
+
+found:
+ nsources = afr_mark_fool_as_source_by_idx (sources, child_count,
+ biggest_witness);
+out:
+ GF_FREE (witnesses);
+ return nsources;
+}
+
+int
+afr_mark_child_as_source_by_uid (int32_t *sources, struct iatt *bufs,
+ int32_t *success_children,
+ unsigned int child_count, uint32_t uid)
{
- int biggest = 0;
- int i;
+ int i = 0;
+ int nsources = 0;
+ int child = 0;
for (i = 0; i < child_count; i++) {
- if (!sh->buf)
+ if (-1 == success_children[i])
break;
- if (SIZE_GREATER (&sh->buf[i], &sh->buf[biggest])) {
- biggest = i;
+ child = success_children[i];
+ if (uid == bufs[child].ia_uid) {
+ sources[child] = 1;
+ nsources++;
}
}
+ return nsources;
+}
- sh->sources[biggest] = 1;
+int
+afr_get_child_with_lowest_uid (struct iatt *bufs, int32_t *success_children,
+ unsigned int child_count)
+{
+ int i = 0;
+ int smallest = -1;
+ int child = 0;
- return 1;
+ for (i = 0; i < child_count; i++) {
+ if (-1 == success_children[i])
+ break;
+ child = success_children[i];
+ if ((smallest == -1) ||
+ (bufs[child].ia_uid < bufs[smallest].ia_uid)) {
+ smallest = child;
+ }
+ }
+ return smallest;
}
-
static int
-afr_sh_mark_loweia_uid_as_source (afr_self_heal_t *sh, int child_count)
+afr_sh_mark_lowest_uid_as_source (struct iatt *bufs, int32_t *success_children,
+ int child_count, int32_t *sources)
{
- uid_t smallest = 0;
- int i;
+ int nsources = 0;
+ int smallest = 0;
- for (i = 0; i < child_count; i++) {
- if (!sh->buf)
+ smallest = afr_get_child_with_lowest_uid (bufs, success_children,
+ child_count);
+ if (smallest < 0) {
+ nsources = -1;
+ goto out;
+ }
+ nsources = afr_mark_child_as_source_by_uid (sources, bufs,
+ success_children, child_count,
+ bufs[smallest].ia_uid);
+out:
+ return nsources;
+}
+
+int
+afr_get_no_xattr_dir_read_child (xlator_t *this, int32_t *success_children,
+ struct iatt *bufs)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int child = -1;
+ int read_child = -1;
+
+ priv = this->private;
+ for (i = 0; i < priv->child_count; i++) {
+ child = success_children[i];
+ if (child < 0)
break;
+ if (read_child < 0)
+ read_child = child;
+ else if (bufs[read_child].ia_size < bufs[child].ia_size)
+ read_child = child;
+ }
+ return read_child;
+}
+
+int
+afr_sh_mark_zero_size_file_as_sink (struct iatt *bufs, int32_t *success_children,
+ int child_count, int32_t *sources)
+{
+ int nsources = 0;
+ int i = 0;
+ int child = 0;
+ gf_boolean_t sink_exists = _gf_false;
+ gf_boolean_t source_exists = _gf_false;
+ int source = -1;
- if (sh->buf[i].ia_uid < sh->buf[smallest].ia_uid) {
- smallest = i;
+ for (i = 0; i < child_count; i++) {
+ child = success_children[i];
+ if (child < 0)
+ break;
+ if (!bufs[child].ia_size) {
+ sink_exists = _gf_true;
+ continue;
+ }
+ if (!source_exists) {
+ source_exists = _gf_true;
+ source = child;
+ continue;
+ }
+ if (bufs[source].ia_size != bufs[child].ia_size) {
+ nsources = -1;
+ goto out;
}
}
+ if (!source_exists && !sink_exists) {
+ nsources = -1;
+ goto out;
+ }
- sh->sources[smallest] = 1;
+ if (!source_exists || !sink_exists)
+ goto out;
- return 1;
+ for (i = 0; i < child_count; i++) {
+ child = success_children[i];
+ if (child < 0)
+ break;
+ if (bufs[child].ia_size) {
+ sources[child] = 1;
+ nsources++;
+ }
+ }
+out:
+ return nsources;
}
+char *
+afr_get_character_str (afr_node_type type)
+{
+ char *character = NULL;
-int
-afr_sh_mark_sources (afr_self_heal_t *sh, int child_count,
- afr_self_heal_type type)
+ switch (type) {
+ case AFR_NODE_INNOCENT:
+ character = "innocent";
+ break;
+ case AFR_NODE_FOOL:
+ character = "fool";
+ break;
+ case AFR_NODE_WISE:
+ character = "wise";
+ break;
+ default:
+ character = "invalid";
+ break;
+ }
+ return character;
+}
+
+afr_node_type
+afr_find_child_character_type (int32_t *pending_row, int32_t child,
+ unsigned int child_count)
{
- int i = 0;
+ afr_node_type type = AFR_NODE_INVALID;
- int32_t ** pending_matrix;
- int * sources;
+ GF_ASSERT ((child >= 0) && (child < child_count));
- int size_differs = 0;
+ if (afr_sh_is_innocent (pending_row, child_count))
+ type = AFR_NODE_INNOCENT;
+ else if (afr_sh_is_fool (pending_row, child, child_count))
+ type = AFR_NODE_FOOL;
+ else if (afr_sh_is_wise (pending_row, child, child_count))
+ type = AFR_NODE_WISE;
+ return type;
+}
- pending_matrix = sh->pending_matrix;
- sources = sh->sources;
+int
+afr_build_sources (xlator_t *this, dict_t **xattr, struct iatt *bufs,
+ int32_t **pending_matrix, int32_t *sources,
+ int32_t *success_children, afr_transaction_type type,
+ int32_t *subvol_status, gf_boolean_t ignore_ignorant)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heal_type sh_type = AFR_SELF_HEAL_INVALID;
+ int nsources = -1;
+ unsigned char *ignorant_subvols = NULL;
+ unsigned int child_count = 0;
- int nsources = 0;
+ priv = this->private;
+ child_count = priv->child_count;
- /* stores the 'characters' (innocent, fool, wise) of the nodes */
- afr_node_character *
- characters = GF_CALLOC (sizeof (afr_node_character),
- child_count,
- gf_afr_mt_afr_node_character) ;
+ if (afr_get_children_count (success_children, priv->child_count) == 0)
+ goto out;
- /* start clean */
- for (i = 0; i < child_count; i++) {
- sources[i] = 0;
+ if (!ignore_ignorant) {
+ ignorant_subvols = GF_CALLOC (sizeof (*ignorant_subvols),
+ child_count, gf_afr_mt_char);
+ if (NULL == ignorant_subvols)
+ goto out;
}
- for (i = 0; i < child_count; i++) {
- if (afr_sh_is_innocent (pending_matrix[i], child_count)) {
- characters[i].type = AFR_NODE_INNOCENT;
+ afr_build_pending_matrix (priv->pending_key, pending_matrix,
+ ignorant_subvols, xattr, type,
+ priv->child_count);
- } else if (afr_sh_is_fool (pending_matrix[i], i, child_count)) {
- characters[i].type = AFR_NODE_FOOL;
+ if (!ignore_ignorant)
+ afr_mark_ignorant_subvols_as_pending (pending_matrix,
+ ignorant_subvols,
+ priv->child_count);
+ sh_type = afr_self_heal_type_for_transaction (type);
+ if (AFR_SELF_HEAL_INVALID == sh_type)
+ goto out;
- } else if (afr_sh_is_wise (pending_matrix[i], i, child_count)) {
- characters[i].type = AFR_NODE_WISE;
+ afr_sh_print_pending_matrix (pending_matrix, this);
- } else {
- gf_log ("[module:replicate]", GF_LOG_ERROR,
- "Could not determine the state of subvolume %d!"
- " (This message should never appear."
- " Please file a bug report to "
- "<gluster-devel@nongnu.org>.)", i);
- }
+ nsources = afr_mark_sources (this, sources, pending_matrix, bufs,
+ sh_type, success_children, subvol_status);
+out:
+ GF_FREE (ignorant_subvols);
+ return nsources;
+}
+
+void
+afr_find_character_types (afr_node_character *characters,
+ int32_t **pending_matrix, int32_t *success_children,
+ unsigned int child_count)
+{
+ afr_node_type type = AFR_NODE_INVALID;
+ int child = 0;
+ int i = 0;
+
+ for (i = 0; i < child_count; i++) {
+ child = success_children[i];
+ if (child == -1)
+ break;
+ type = afr_find_child_character_type (pending_matrix[child],
+ child, child_count);
+ characters[child].type = type;
}
+}
- if (type == AFR_SELF_HEAL_DATA) {
- size_differs = afr_sh_mark_if_size_differs (sh, child_count);
+void
+afr_mark_success_children_sources (int32_t *sources, int32_t *success_children,
+ unsigned int child_count)
+{
+ int i = 0;
+ for (i = 0; i < child_count; i++) {
+ if (success_children[i] == -1)
+ break;
+ sources[success_children[i]] = 1;
}
+}
+/**
+ * mark_sources: Mark all 'source' nodes and return number of source
+ * nodes found
+ *
+ * A node (a row in the pending matrix) belongs to one of
+ * three categories:
+ *
+ * M is the pending matrix.
+ *
+ * 'innocent' - M[i] is all zeroes
+ * 'fool' - M[i] has i'th element = 1 (self-reference)
+ * 'wise' - M[i] has i'th element = 0, others are 1 or 0.
+ *
+ * All 'innocent' nodes are sinks. If all nodes are innocent, no self-heal is
+ * needed.
+ *
+ * A 'wise' node can be a source. If two 'wise' nodes conflict, it is
+ * a split-brain. If one wise node refers to the other but the other doesn't
+ * refer back, the referrer is a source.
+ *
+ * All fools are sinks, unless there are no 'wise' nodes. In that case,
+ * one of the fools is made a source.
+ */
- if ((type == AFR_SELF_HEAL_METADATA)
- && afr_sh_all_nodes_innocent (characters, child_count)) {
+int
+afr_mark_sources (xlator_t *this, int32_t *sources, int32_t **pending_matrix,
+ struct iatt *bufs, afr_self_heal_type type,
+ int32_t *success_children, int32_t *subvol_status)
+{
+ /* stores the 'characters' (innocent, fool, wise) of the nodes */
+ afr_node_character *characters = NULL;
+ int nsources = -1;
+ unsigned int child_count = 0;
+ afr_private_t *priv = NULL;
- nsources = afr_sh_mark_loweia_uid_as_source (sh, child_count);
+ priv = this->private;
+ child_count = priv->child_count;
+ characters = GF_CALLOC (sizeof (afr_node_character),
+ child_count, gf_afr_mt_afr_node_character);
+ if (!characters)
goto out;
- }
+ this = THIS;
+
+ /* start clean */
+ memset (sources, 0, sizeof (*sources) * child_count);
+ nsources = 0;
+ afr_find_character_types (characters, pending_matrix, success_children,
+ child_count);
if (afr_sh_all_nodes_innocent (characters, child_count)) {
- if (size_differs) {
- nsources = afr_sh_mark_biggest_as_source (sh,
- child_count);
+ switch (type) {
+ case AFR_SELF_HEAL_METADATA:
+ nsources = afr_sh_mark_lowest_uid_as_source (bufs,
+ success_children,
+ child_count,
+ sources);
+ break;
+ case AFR_SELF_HEAL_DATA:
+ nsources = afr_sh_mark_zero_size_file_as_sink (bufs,
+ success_children,
+ child_count,
+ sources);
+ if ((nsources < 0) && subvol_status)
+ *subvol_status |= SPLIT_BRAIN;
+ break;
+ default:
+ break;
}
+ goto out;
+ }
- } else if (afr_sh_wise_nodes_exist (characters, child_count)) {
+ if (afr_sh_wise_nodes_exist (characters, child_count)) {
afr_sh_compute_wisdom (pending_matrix, characters, child_count);
if (afr_sh_wise_nodes_conflict (characters, child_count)) {
- /* split-brain */
-
+ if (subvol_status)
+ *subvol_status |= SPLIT_BRAIN;
nsources = -1;
- goto out;
-
} else {
nsources = afr_sh_mark_wisest_as_sources (sources,
characters,
child_count);
}
} else {
- nsources = afr_sh_mark_biggest_fool_as_source (sh, characters,
- child_count);
+ if (subvol_status)
+ *subvol_status |= ALL_FOOLS;
+ nsources = afr_mark_biggest_of_fools_as_source (sources,
+ pending_matrix,
+ characters,
+ success_children,
+ child_count, bufs);
}
out:
+ if (nsources == 0)
+ afr_mark_success_children_sources (sources, success_children,
+ child_count);
GF_FREE (characters);
+ gf_log (this->name, GF_LOG_DEBUG, "Number of sources: %d", nsources);
return nsources;
}
-
void
afr_sh_pending_to_delta (afr_private_t *priv, dict_t **xattr,
- int32_t *delta_matrix[], int success[],
+ int32_t *delta_matrix[], unsigned char success[],
int child_count, afr_transaction_type type)
{
- int i = 0;
- int j = 0;
- int k = 0;
+ int tgt = 0;
+ int src = 0;
+ int value = 0;
- /* Indexable by result of afr_index_for_transaction_type(): 0 -- 2. */
- int32_t pending[3];
- void *pending_raw = NULL;
- int ret = 0;
+ afr_build_pending_matrix (priv->pending_key, delta_matrix, NULL,
+ xattr, type, priv->child_count);
- /* start clean */
- for (i = 0; i < child_count; i++) {
- for (j = 0; j < child_count; j++) {
- delta_matrix[i][j] = 0;
+ /*
+ * The algorithm here has two parts. First, for each subvol indexed
+ * as tgt, we try to figure out what count everyone should have for it.
+ * If the self-heal succeeded, that's easy; the value is zero.
+ * Otherwise, the value is the maximum of the succeeding nodes' counts.
+ * Once we know the value, we loop through (possibly for a second time)
+ * setting each count to the difference so that when we're done all
+ * succeeding nodes will have the same count for tgt.
+ */
+ for (tgt = 0; tgt < priv->child_count; ++tgt) {
+ value = 0;
+ if (!success[tgt]) {
+ /* Find the maximum. */
+ for (src = 0; src < priv->child_count; ++src) {
+ if (!success[src]) {
+ continue;
+ }
+ if (delta_matrix[src][tgt] > value) {
+ value = delta_matrix[src][tgt];
+ }
+ }
}
- }
-
- for (i = 0; i < child_count; i++) {
- if (pending_raw)
- pending_raw = NULL;
-
- for (j = 0; j < child_count; j++) {
- ret = dict_get_ptr (xattr[i], priv->pending_key[j],
- &pending_raw);
- if (ret < 0)
- gf_log ("afr_sh_pending_to_delta",
- GF_LOG_DEBUG,
- "Unable to get dict value.");
- if (!success[j])
- continue;
-
- k = afr_index_for_transaction_type (type);
-
- if (pending_raw != NULL) {
- memcpy (pending, pending_raw, sizeof(pending));
- delta_matrix[i][j] = -(ntoh32 (pending[k]));
- } else {
- delta_matrix[i][j] = 0;
+ /* Force everyone who succeeded to the chosen value. */
+ for (src = 0; src < priv->child_count; ++src) {
+ if (success[src]) {
+ delta_matrix[src][tgt] = value
+ - delta_matrix[src][tgt];
+ }
+ else {
+ delta_matrix[src][tgt] = 0;
}
-
}
}
}
int
-afr_sh_delta_to_xattr (afr_private_t *priv,
+afr_sh_delta_to_xattr (xlator_t *this,
int32_t *delta_matrix[], dict_t *xattr[],
int child_count, afr_transaction_type type)
{
- int i = 0;
- int j = 0;
- int k = 0;
-
- int ret = 0;
-
- int32_t *pending = 0;
+ int i = 0;
+ int j = 0;
+ int k = 0;
+ int ret = 0;
+ int32_t *pending = NULL;
+ int32_t *local_pending = NULL;
+ afr_private_t *priv = NULL;
+ priv = this->private;
for (i = 0; i < child_count; i++) {
if (!xattr[i])
continue;
+ local_pending = NULL;
for (j = 0; j < child_count; j++) {
pending = GF_CALLOC (sizeof (int32_t), 3,
gf_afr_mt_int32_t);
+
+ if (!pending) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to allocate pending entry "
+ "for %s[%d] on %s",
+ priv->pending_key[j], type,
+ priv->children[i]->name);
+ continue;
+ }
/* 3 = data+metadata+entry */
k = afr_index_for_transaction_type (type);
pending[k] = hton32 (delta_matrix[i][j]);
+ if (j == i) {
+ local_pending = pending;
+ continue;
+ }
ret = dict_set_bin (xattr[i], priv->pending_key[j],
pending,
- 3 * sizeof (int32_t));
- if (ret < 0)
- gf_log ("afr_sh_delta_to_xattr",
- GF_LOG_WARNING,
+ AFR_NUM_CHANGE_LOGS * sizeof (int32_t));
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Unable to set dict value.");
+ GF_FREE (pending);
+ }
+ }
+ if (local_pending) {
+ ret = dict_set_bin (xattr[i], priv->pending_key[i],
+ local_pending,
+ AFR_NUM_CHANGE_LOGS * sizeof (int32_t));
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
"Unable to set dict value.");
+ GF_FREE (local_pending);
+ }
}
}
return 0;
@@ -640,708 +1136,937 @@ afr_sh_delta_to_xattr (afr_private_t *priv,
int
-afr_sh_has_metadata_pending (dict_t *xattr, int child_count, xlator_t *this)
+afr_sh_missing_entries_done (call_frame_t *frame, xlator_t *this)
{
- afr_private_t *priv = NULL;
- /* Indexable by result of afr_index_for_transaction_type(): 0 -- 2. */
- int32_t pending[3];
- void *pending_raw = NULL;
-
- int ret = -1;
- int i = 0;
- int j = 0;
-
- priv = this->private;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_get_ptr (xattr, priv->pending_key[i],
- &pending_raw);
+ local = frame->local;
+ sh = &local->self_heal;
- if (ret != 0)
- return 0;
+ afr_sh_reset (frame, this);
- memcpy (pending, pending_raw, sizeof(pending));
- j = afr_index_for_transaction_type (AFR_METADATA_TRANSACTION);
+ if (local->unhealable) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "split brain found, aborting selfheal of %s",
+ local->loc.path);
+ }
- if (pending[j])
- return 1;
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
+ sh->completion_cbk (frame, this);
+ } else {
+ gf_log (this->name, GF_LOG_TRACE,
+ "proceeding to metadata check on %s",
+ local->loc.path);
+ afr_self_heal_metadata (frame, this);
}
return 0;
}
-int
-afr_sh_has_data_pending (dict_t *xattr, int child_count, xlator_t *this)
+static int
+afr_sh_missing_entries_finish (call_frame_t *frame, xlator_t *this)
{
- afr_private_t *priv = NULL;
- /* Indexable by result of afr_index_for_transaction_type(): 0 -- 2. */
- int32_t pending[3];
- void *pending_raw = NULL;
-
- int ret = -1;
- int i = 0;
- int j = 0;
-
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_get_ptr (xattr, priv->pending_key[i],
- &pending_raw);
-
- if (ret != 0)
- return 0;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
- memcpy (pending, pending_raw, sizeof(pending));
- j = afr_index_for_transaction_type (AFR_DATA_TRANSACTION);
+ local = frame->local;
+ int_lock = &local->internal_lock;
- if (pending[j])
- return 1;
- }
+ int_lock->lock_cbk = afr_sh_missing_entries_done;
+ afr_unlock (frame, this);
return 0;
}
-
int
-afr_sh_has_entry_pending (dict_t *xattr, int child_count, xlator_t *this)
+afr_sh_common_create (afr_self_heal_t *sh, unsigned int child_count)
{
- afr_private_t *priv = NULL;
- /* Indexable by result of afr_index_for_transaction_type(): 0 -- 2. */
- int32_t pending[3];
- void *pending_raw = NULL;
+ int ret = -ENOMEM;
+ sh->buf = GF_CALLOC (child_count, sizeof (*sh->buf),
+ gf_afr_mt_iatt);
+ if (!sh->buf)
+ goto out;
+ sh->parentbufs = GF_CALLOC (child_count, sizeof (*sh->parentbufs),
+ gf_afr_mt_iatt);
+ if (!sh->parentbufs)
+ goto out;
+ sh->child_errno = GF_CALLOC (child_count, sizeof (*sh->child_errno),
+ gf_afr_mt_int);
+ if (!sh->child_errno)
+ goto out;
+ sh->success_children = afr_children_create (child_count);
+ if (!sh->success_children)
+ goto out;
+ sh->fresh_children = afr_children_create (child_count);
+ if (!sh->fresh_children)
+ goto out;
+ sh->xattr = GF_CALLOC (child_count, sizeof (*sh->xattr),
+ gf_afr_mt_dict_t);
+ if (!sh->xattr)
+ goto out;
+ ret = 0;
+out:
+ return ret;
+}
- int ret = -1;
- int i = 0;
- int j = 0;
+void
+afr_sh_common_lookup_resp_handler (call_frame_t *frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf,
+ dict_t *xattr, struct iatt *postparent,
+ loc_t *loc)
+{
+ int child_index = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heal_t *sh = NULL;
+ local = frame->local;
priv = this->private;
+ sh = &local->self_heal;
+ child_index = (long) cookie;
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_get_ptr (xattr, priv->pending_key[i],
- &pending_raw);
-
- if (ret != 0)
- return 0;
-
- memcpy (pending, pending_raw, sizeof(pending));
- j = afr_index_for_transaction_type (AFR_ENTRY_TRANSACTION);
-
- if (pending[j])
- return 1;
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0) {
+ sh->buf[child_index] = *buf;
+ sh->parentbufs[child_index] = *postparent;
+ sh->success_children[sh->success_count] = child_index;
+ sh->success_count++;
+ sh->xattr[child_index] = dict_ref (xattr);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG, "path %s on subvolume"
+ " %s => -1 (%s)", loc->path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
+ local->self_heal.child_errno[child_index] = op_errno;
+ }
}
-
- return 0;
+ UNLOCK (&frame->lock);
+ return;
}
-
-/**
- * is_matrix_zero - return true if pending matrix is all zeroes
- */
+gf_boolean_t
+afr_valid_ia_type (ia_type_t ia_type)
+{
+ switch (ia_type) {
+ case IA_IFSOCK:
+ case IA_IFREG:
+ case IA_IFBLK:
+ case IA_IFCHR:
+ case IA_IFIFO:
+ case IA_IFLNK:
+ case IA_IFDIR:
+ return _gf_true;
+ default:
+ return _gf_false;
+ }
+ return _gf_false;
+}
int
-afr_sh_is_matrix_zero (int32_t *pending_matrix[], int child_count)
+afr_impunge_frame_create (call_frame_t *frame, xlator_t *this,
+ int active_source, call_frame_t **impunge_frame)
{
- int i, j;
+ afr_local_t *local = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ int32_t op_errno = 0;
+ afr_private_t *priv = NULL;
+ int ret = 0;
+ call_frame_t *new_frame = NULL;
+
+ op_errno = ENOMEM;
+ priv = this->private;
+ new_frame = copy_frame (frame);
+ if (!new_frame) {
+ goto out;
+ }
- for (i = 0; i < child_count; i++)
- for (j = 0; j < child_count; j++)
- if (pending_matrix[i][j])
- return 0;
- return 1;
-}
+ AFR_LOCAL_ALLOC_OR_GOTO (impunge_local, out);
+ local = frame->local;
+ new_frame->local = impunge_local;
+ impunge_sh = &impunge_local->self_heal;
+ impunge_sh->sh_frame = frame;
+ impunge_sh->active_source = active_source;
+ impunge_local->child_up = memdup (local->child_up,
+ sizeof (*local->child_up) *
+ priv->child_count);
+ if (!impunge_local->child_up)
+ goto out;
-int
-afr_sh_missing_entries_done (call_frame_t *frame, xlator_t *this)
+ impunge_local->pending = afr_matrix_create (priv->child_count,
+ AFR_NUM_CHANGE_LOGS);
+ if (!impunge_local->pending)
+ goto out;
+
+ ret = afr_sh_common_create (impunge_sh, priv->child_count);
+ if (ret) {
+ op_errno = -ret;
+ goto out;
+ }
+ op_errno = 0;
+ *impunge_frame = new_frame;
+out:
+ if (op_errno && new_frame)
+ AFR_STACK_DESTROY (new_frame);
+ return -op_errno;
+}
+
+void
+afr_sh_missing_entry_call_impunge_recreate (call_frame_t *frame, xlator_t *this,
+ struct iatt *buf,
+ struct iatt *postparent,
+ afr_impunge_done_cbk_t impunge_done)
{
+ call_frame_t *impunge_frame = NULL;
afr_local_t *local = NULL;
+ afr_local_t *impunge_local = NULL;
afr_self_heal_t *sh = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ int ret = 0;
+ unsigned int enoent_count = 0;
afr_private_t *priv = NULL;
- int i = 0;
+ int i = 0;
+ int32_t op_errno = 0;
local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
-// memset (sh->child_errno, 0, sizeof (int) * priv->child_count);
- memset (sh->buf, 0, sizeof (struct iatt) * priv->child_count);
+ sh = &local->self_heal;
+ priv = this->private;
+ enoent_count = afr_errno_count (NULL, sh->child_errno,
+ priv->child_count, ENOENT);
+ if (!enoent_count) {
+ gf_log (this->name, GF_LOG_INFO,
+ "no missing files - %s. proceeding to metadata check",
+ local->loc.path);
+ goto out;
+ }
+ sh->impunge_done = impunge_done;
+ ret = afr_impunge_frame_create (frame, this, sh->source, &impunge_frame);
+ if (ret)
+ goto out;
+ impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
+ loc_copy (&impunge_local->loc, &local->loc);
+ ret = afr_build_parent_loc (&impunge_sh->parent_loc,
+ &impunge_local->loc, &op_errno);
+ if (ret) {
+ ret = -op_errno;
+ goto out;
+ }
+ impunge_local->call_count = enoent_count;
+ impunge_sh->entrybuf = sh->buf[sh->source];
+ impunge_sh->parentbuf = sh->parentbufs[sh->source];
for (i = 0; i < priv->child_count; i++) {
- sh->locked_nodes[i] = 0;
+ if (!impunge_local->child_up[i]) {
+ impunge_sh->child_errno[i] = ENOTCONN;
+ continue;
+ }
+ if (sh->child_errno[i] != ENOENT) {
+ impunge_sh->child_errno[i] = EEXIST;
+ continue;
+ }
}
-
for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i])
- dict_unref (sh->xattr[i]);
- sh->xattr[i] = NULL;
+ if (sh->child_errno[i] != ENOENT)
+ continue;
+ afr_sh_entry_impunge_create (impunge_frame, this, i);
+ enoent_count--;
}
-
- if (local->govinda_gOvinda) {
- gf_log (this->name, GF_LOG_TRACE,
- "aborting selfheal of %s",
- local->loc.path);
- sh->completion_cbk (frame, this);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "proceeding to metadata check on %s",
- local->loc.path);
- afr_self_heal_metadata (frame, this);
+ GF_ASSERT (!enoent_count);
+ return;
+out:
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "impunge of %s failed, "
+ "reason: %s", local->loc.path, strerror (-ret));
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
}
-
- return 0;
+ afr_sh_missing_entries_finish (frame, this);
}
-
-static int
-sh_missing_entries_finish (call_frame_t *frame, xlator_t *this)
+int
+afr_sh_create_entry_cbk (call_frame_t *frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
local = frame->local;
- int_lock = &local->internal_lock;
-
- int_lock->lock_cbk = afr_sh_missing_entries_done;
- afr_unlock (frame, this);
-
+ sh = &local->self_heal;
+ if (op_ret < 0)
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_missing_entries_finish (frame, this);
return 0;
}
-
static int
-sh_destroy_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int op_errno,
- struct iatt *preop, struct iatt *postop)
+sh_missing_entries_create (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
-
- loc_t *parent_loc = cookie;
-
- int call_count = 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int type = 0;
+ struct iatt *buf = NULL;
+ struct iatt *postparent = NULL;
local = frame->local;
+ sh = &local->self_heal;
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "setattr on %s failed: %s",
- local->loc.path, strerror (op_errno));
- }
-
- if (parent_loc) {
- loc_wipe (parent_loc);
- GF_FREE (parent_loc);
- }
-
- call_count = afr_frame_return (frame);
+ buf = &sh->buf[sh->source];
+ postparent = &sh->parentbufs[sh->source];
- if (call_count == 0) {
- STACK_DESTROY (frame->root);
+ type = buf->ia_type;
+ if (!afr_valid_ia_type (type)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: unknown file type: 0%o", local->loc.path, type);
+ afr_set_local_for_unhealable (local);
+ afr_sh_missing_entries_finish (frame, this);
+ goto out;
}
+ afr_sh_missing_entry_call_impunge_recreate (frame, this,
+ buf, postparent,
+ afr_sh_create_entry_cbk);
+out:
return 0;
}
-
-static int
-sh_missing_entries_newentry_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
+void
+afr_sh_missing_entries_lookup_done (call_frame_t *frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
{
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
afr_private_t *priv = NULL;
- call_frame_t *setattr_frame = NULL;
- int call_count = 0;
- int child_index = 0;
-
- loc_t *parent_loc = NULL;
-
- struct iatt stbuf;
- int32_t valid;
+ ia_type_t ia_type = IA_INVAL;
+ int32_t nsources = 0;
+ loc_t *loc = NULL;
+ int32_t subvol_status = 0;
+ afr_transaction_type txn_type = AFR_DATA_TRANSACTION;
+ gf_boolean_t split_brain = _gf_false;
+ int read_child = -1;
local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- child_index = (long) cookie;
-
- stbuf.ia_atime = sh->buf[sh->source].ia_atime;
- stbuf.ia_atime_nsec = sh->buf[sh->source].ia_atime_nsec;
- stbuf.ia_mtime = sh->buf[sh->source].ia_mtime;
- stbuf.ia_mtime_nsec = sh->buf[sh->source].ia_mtime_nsec;
-
- stbuf.ia_uid = sh->buf[sh->source].ia_uid;
- stbuf.ia_gid = sh->buf[sh->source].ia_gid;
+ sh = &local->self_heal;
+ priv = this->private;
+ loc = &local->loc;
- valid = GF_SET_ATTR_UID | GF_SET_ATTR_GID |
- GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
+ if (op_ret < 0) {
+ if (op_errno == EIO) {
+ afr_set_local_for_unhealable (local);
+ }
+ // EIO can happen if finding the fresh parent dir failed
+ goto out;
+ }
- if (op_ret == 0) {
- setattr_frame = copy_frame (frame);
+ //now No chance for the ia_type to conflict
+ ia_type = sh->buf[sh->success_children[0]].ia_type;
+ txn_type = afr_transaction_type_get (ia_type);
+ nsources = afr_build_sources (this, sh->xattr, sh->buf,
+ sh->pending_matrix, sh->sources,
+ sh->success_children, txn_type,
+ &subvol_status, _gf_false);
+ if (nsources < 0) {
+ gf_log (this->name, GF_LOG_INFO, "No sources for dir of %s,"
+ " in missing entry self-heal, continuing with the rest"
+ " of the self-heals", local->loc.path);
+ if (subvol_status & SPLIT_BRAIN) {
+ split_brain = _gf_true;
+ switch (txn_type) {
+ case AFR_DATA_TRANSACTION:
+ nsources = 1;
+ sh->sources[sh->success_children[0]] = 1;
+ break;
+ case AFR_ENTRY_TRANSACTION:
+ read_child = afr_get_no_xattr_dir_read_child
+ (this,
+ sh->success_children,
+ sh->buf);
+ sh->sources[read_child] = 1;
+ nsources = 1;
+ break;
+ default:
+ op_errno = EIO;
+ goto out;
+ }
+ } else {
+ op_errno = EIO;
+ goto out;
+ }
+ }
- setattr_frame->local = GF_CALLOC (1, sizeof (afr_local_t),
- gf_afr_mt_afr_local_t);
+ afr_get_fresh_children (sh->success_children, sh->sources,
+ sh->fresh_children, priv->child_count);
+ sh->source = sh->fresh_children[0];
+ if (sh->source == -1) {
+ gf_log (this->name, GF_LOG_DEBUG, "No active sources found.");
+ op_errno = EIO;
+ goto out;
+ }
- ((afr_local_t *)setattr_frame->local)->call_count = 2;
+ if (sh->gfid_sh_success_cbk)
+ sh->gfid_sh_success_cbk (frame, this);
+ sh->type = sh->buf[sh->source].ia_type;
+ if (uuid_is_null (loc->inode->gfid))
+ uuid_copy (loc->gfid, sh->buf[sh->source].ia_gfid);
+ if (split_brain) {
+ afr_sh_missing_entries_finish (frame, this);
+ } else {
+ sh_missing_entries_create (frame, this);
+ }
+ return;
+out:
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_set_error (sh, op_errno);
+ afr_sh_missing_entries_finish (frame, this);
+ return;
+}
- gf_log (this->name, GF_LOG_TRACE,
- "setattr (%s) on subvolume %s",
- local->loc.path, priv->children[child_index]->name);
+static int
+afr_sh_common_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ int call_count = 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
- STACK_WIND_COOKIE (setattr_frame, sh_destroy_cbk,
- (void *) (long) 0,
- priv->children[child_index],
- priv->children[child_index]->fops->setattr,
- &local->loc, &stbuf, valid);
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
- valid = GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
- parent_loc = GF_CALLOC (1, sizeof (*parent_loc),
- gf_afr_mt_loc_t);
- afr_build_parent_loc (parent_loc, &local->loc);
+ afr_sh_common_lookup_resp_handler (frame, cookie, this, op_ret,
+ op_errno, inode, buf, xattr,
+ postparent, &sh->lookup_loc);
+ call_count = afr_frame_return (frame);
- STACK_WIND_COOKIE (setattr_frame, sh_destroy_cbk,
- (void *) (long) parent_loc,
- priv->children[child_index],
- priv->children[child_index]->fops->setattr,
- parent_loc, &sh->parentbuf, valid);
+ if (call_count)
+ goto out;
+ op_ret = -1;
+ if (!sh->success_count) {
+ op_errno = afr_resultant_errno_get (NULL, sh->child_errno,
+ priv->child_count);
+ gf_log (this->name, GF_LOG_ERROR, "Failed to lookup %s, "
+ "reason %s", sh->lookup_loc.path,
+ strerror (op_errno));
+ goto done;
}
- call_count = afr_frame_return (frame);
+ if ((sh->lookup_flags & AFR_LOOKUP_FAIL_CONFLICTS) &&
+ (afr_conflicting_iattrs (sh->buf, sh->success_children,
+ priv->child_count,
+ sh->lookup_loc.path, this->name))) {
+ op_errno = EIO;
+ gf_log (this->name, GF_LOG_ERROR, "Conflicting entries "
+ "for %s", sh->lookup_loc.path);
+ goto done;
+ }
- if (call_count == 0) {
- sh_missing_entries_finish (frame, this);
+ if ((sh->lookup_flags & AFR_LOOKUP_FAIL_MISSING_GFIDS) &&
+ (afr_gfid_missing_count (this->name, sh->success_children,
+ sh->buf, priv->child_count,
+ sh->lookup_loc.path))) {
+ op_errno = ENODATA;
+ gf_log (this->name, GF_LOG_ERROR, "Missing Gfids "
+ "for %s", sh->lookup_loc.path);
+ goto done;
}
+ op_ret = 0;
+done:
+ sh->lookup_done (frame, this, op_ret, op_errno);
+out:
return 0;
}
-
-static int
-sh_missing_entries_mknod (call_frame_t *frame, xlator_t *this)
+int
+afr_sh_remove_entry_cbk (call_frame_t *frame, xlator_t *this, int child,
+ int32_t op_ret, int32_t op_errno)
{
+ int call_count = 0;
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- int ret = 0;
- int enoent_count = 0;
- int call_count = 0;
- mode_t st_mode = 0;
- dev_t ia_rdev = 0;
- dict_t *dict = NULL;
- dev_t st_rdev = 0;
local = frame->local;
sh = &local->self_heal;
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++)
- if (sh->child_errno[i] == ENOENT)
- enoent_count++;
-
- call_count = enoent_count;
- local->call_count = call_count;
-
- st_mode = st_mode_from_ia (sh->buf[sh->source].ia_prot,
- sh->buf[sh->source].ia_type);
- ia_rdev = sh->buf[sh->source].ia_rdev;
- st_rdev = makedev (ia_major (ia_rdev), ia_minor (ia_rdev));
-
- gf_log (this->name, GF_LOG_TRACE,
- "mknod %s mode 0%o device type %"PRId64" on %d subvolumes",
- local->loc.path, st_mode, (uint64_t)st_rdev, enoent_count);
-
- dict = dict_new ();
- if (!dict)
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
-
- ret = afr_set_dict_gfid (dict, sh->buf[sh->source].ia_gfid);
- if (ret)
- gf_log (this->name, GF_LOG_DEBUG, "gfid set failed");
- for (i = 0; i < priv->child_count; i++) {
- if (sh->child_errno[i] == ENOENT) {
- STACK_WIND_COOKIE (frame,
- sh_missing_entries_newentry_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->mknod,
- &local->loc, st_mode, st_rdev, dict);
- if (!--call_count)
- break;
+ GF_ASSERT (sh->post_remove_call);
+ if ((op_ret == -1) && (op_errno != ENOENT)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "purge entry %s failed, on child %d reason, %s",
+ local->loc.path, child, strerror (op_errno));
+ LOCK (&frame->lock);
+ {
+ afr_sh_set_error (sh, EIO);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
}
+ UNLOCK (&frame->lock);
}
-
- if (dict)
- dict_unref (dict);
-
+ call_count = afr_frame_return (frame);
+ if (call_count == 0)
+ sh->post_remove_call (frame, this);
return 0;
}
-
-static int
-sh_missing_entries_mkdir (call_frame_t *frame, xlator_t *this)
+void
+afr_sh_call_entry_expunge_remove (call_frame_t *frame, xlator_t *this,
+ int child_index, struct iatt *buf,
+ struct iatt *parentbuf,
+ afr_expunge_done_cbk_t expunge_done)
{
+ call_frame_t *expunge_frame = NULL;
afr_local_t *local = NULL;
+ afr_local_t *expunge_local = NULL;
afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- dict_t *dict = NULL;
- int i = 0;
- int ret = 0;
- int enoent_count = 0;
- int call_count = 0;
- mode_t st_mode = 0;
+ afr_self_heal_t *expunge_sh = NULL;
+ int32_t op_errno = 0;
+ int ret = 0;
+ expunge_frame = copy_frame (frame);
+ if (!expunge_frame) {
+ goto out;
+ }
+
+ AFR_LOCAL_ALLOC_OR_GOTO (expunge_local, out);
local = frame->local;
sh = &local->self_heal;
- priv = this->private;
+ expunge_frame->local = expunge_local;
+ expunge_sh = &expunge_local->self_heal;
+ expunge_sh->sh_frame = frame;
+ loc_copy (&expunge_local->loc, &local->loc);
+ ret = afr_build_parent_loc (&expunge_sh->parent_loc,
+ &expunge_local->loc, &op_errno);
+ if (ret) {
+ ret = -op_errno;
+ goto out;
+ }
+ sh->expunge_done = expunge_done;
+ afr_sh_entry_expunge_remove (expunge_frame, this, child_index, buf,
+ parentbuf);
+ return;
+out:
+ gf_log (this->name, GF_LOG_ERROR, "Expunge of %s failed, reason: %s",
+ local->loc.path, strerror (op_errno));
+ expunge_done (frame, this, child_index, -1, op_errno);
+}
- for (i = 0; i < priv->child_count; i++)
- if (sh->child_errno[i] == ENOENT)
- enoent_count++;
+void
+afr_sh_remove_stale_lookup_info (afr_self_heal_t *sh, int32_t *success_children,
+ int32_t *fresh_children,
+ unsigned int child_count)
+{
+ int i = 0;
- call_count = enoent_count;
- local->call_count = call_count;
+ for (i = 0; i < child_count; i++) {
+ if (afr_is_child_present (success_children, child_count, i) &&
+ !afr_is_child_present (fresh_children, child_count, i)) {
+ sh->child_errno[i] = ENOENT;
+ GF_ASSERT (sh->xattr[i]);
+ dict_unref (sh->xattr[i]);
+ sh->xattr[i] = NULL;
+ }
+ }
+}
- st_mode = st_mode_from_ia (sh->buf[sh->source].ia_prot,
- sh->buf[sh->source].ia_type);
+int
+afr_sh_purge_stale_entries_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
- dict = dict_new ();
- if (!dict) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- sh_missing_entries_finish (frame, this);
- return 0;
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
+ afr_sh_missing_entries_finish (frame, this);
+ } else {
+ if (afr_gfid_missing_count (this->name, sh->fresh_children,
+ sh->buf, priv->child_count,
+ local->loc.path)) {
+ afr_sh_common_lookup (frame, this, &local->loc,
+ afr_sh_missing_entries_lookup_done,
+ sh->sh_gfid_req,
+ AFR_LOOKUP_FAIL_CONFLICTS|
+ AFR_LOOKUP_FAIL_MISSING_GFIDS,
+ NULL);
+ } else {
+ //No need to set gfid so goto missing entries lookup done
+ //Behave as if you have done the lookup
+ afr_sh_remove_stale_lookup_info (sh,
+ sh->success_children,
+ sh->fresh_children,
+ priv->child_count);
+ afr_children_copy (sh->success_children,
+ sh->fresh_children,
+ priv->child_count);
+ afr_sh_missing_entries_lookup_done (frame, this, 0, 0);
+ }
}
+ return 0;
+}
- ret = afr_set_dict_gfid (dict, sh->buf[sh->source].ia_gfid);
- if (ret)
- gf_log (this->name, GF_LOG_DEBUG,
- "inode gfid set failed");
+gf_boolean_t
+afr_sh_purge_entry_condition (afr_local_t *local, afr_private_t *priv,
+ int child)
+{
+ afr_self_heal_t *sh = NULL;
+ sh = &local->self_heal;
- gf_log (this->name, GF_LOG_TRACE,
- "mkdir %s mode 0%o on %d subvolumes",
- local->loc.path, st_mode, enoent_count);
+ if (local->child_up[child] &&
+ (!afr_is_child_present (sh->fresh_parent_dirs, priv->child_count,
+ child))
+ && (sh->child_errno[child] != ENOENT))
+ return _gf_true;
- for (i = 0; i < priv->child_count; i++) {
- if (sh->child_errno[i] == ENOENT) {
- if (!strcmp (local->loc.path, "/")) {
- /* We shouldn't try to create "/" */
+ return _gf_false;
+}
- sh_missing_entries_finish (frame, this);
+gf_boolean_t
+afr_sh_purge_stale_entry_condition (afr_local_t *local, afr_private_t *priv,
+ int child)
+{
+ afr_self_heal_t *sh = NULL;
- return 0;
- } else {
- STACK_WIND_COOKIE (frame,
- sh_missing_entries_newentry_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->mkdir,
- &local->loc, st_mode, dict);
- if (!--call_count)
- break;
- }
- }
- }
+ sh = &local->self_heal;
- if (dict)
- dict_unref (dict);
+ if (local->child_up[child] &&
+ (!afr_is_child_present (sh->fresh_children, priv->child_count,
+ child))
+ && (sh->child_errno[child] != ENOENT))
+ return _gf_true;
- return 0;
+ return _gf_false;
}
-
-static int
-sh_missing_entries_symlink (call_frame_t *frame, xlator_t *this,
- const char *link, struct iatt *buf)
+void
+afr_sh_purge_entry_common (call_frame_t *frame, xlator_t *this,
+ gf_boolean_t purge_condition (afr_local_t *local,
+ afr_private_t *priv,
+ int child))
{
afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
afr_private_t *priv = NULL;
- dict_t *dict = NULL;
- int i = 0;
- int ret = 0;
- int enoent_count = 0;
- int call_count = 0;
-
+ afr_self_heal_t *sh = NULL;
+ int i = 0;
+ int call_count = 0;
local = frame->local;
sh = &local->self_heal;
priv = this->private;
- for (i = 0; i < priv->child_count; i++)
- if (sh->child_errno[i] == ENOENT)
- enoent_count++;
-
- call_count = enoent_count;
- local->call_count = call_count;
-
- dict = dict_new ();
- if (!dict) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- sh_missing_entries_finish (frame, this);
- return 0;
+ for (i = 0; i < priv->child_count; i++) {
+ if (purge_condition (local, priv, i))
+ call_count++;
}
- ret = afr_set_dict_gfid (dict, buf->ia_gfid);
- if (ret)
- gf_log (this->name, GF_LOG_DEBUG,
- "dict gfid set failed");
-
- gf_log (this->name, GF_LOG_TRACE,
- "symlink %s -> %s on %d subvolumes",
- local->loc.path, link, enoent_count);
+ if (call_count == 0) {
+ sh->post_remove_call (frame, this);
+ goto out;
+ }
+ local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (sh->child_errno[i] == ENOENT) {
- STACK_WIND_COOKIE (frame,
- sh_missing_entries_newentry_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->symlink,
- link, &local->loc, dict);
- if (!--call_count)
- break;
- }
+ if (!purge_condition (local, priv, i))
+ continue;
+ gf_log (this->name, GF_LOG_INFO, "purging the stale entry %s "
+ "on %s", local->loc.path, priv->children[i]->name);
+ afr_sh_call_entry_expunge_remove (frame, this,
+ (long) i, &sh->buf[i],
+ &sh->parentbufs[i],
+ afr_sh_remove_entry_cbk);
}
-
- return 0;
-}
-
-
-static int
-sh_missing_entries_readlink_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- const char *link, struct iatt *sbuf)
-{
- if (op_ret > 0)
- sh_missing_entries_symlink (frame, this, link, sbuf);
- else
- sh_missing_entries_finish (frame, this);
-
- return 0;
+out:
+ return;
}
-
-static int
-sh_missing_entries_readlink (call_frame_t *frame, xlator_t *this)
+void
+afr_sh_purge_entry (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
-
local = frame->local;
sh = &local->self_heal;
- priv = this->private;
+ sh->post_remove_call = afr_sh_missing_entries_finish;
- STACK_WIND (frame, sh_missing_entries_readlink_cbk,
- priv->children[sh->source],
- priv->children[sh->source]->fops->readlink,
- &local->loc, 4096);
-
- return 0;
+ afr_sh_purge_entry_common (frame, this, afr_sh_purge_entry_condition);
}
-
-static int
-sh_missing_entries_create (call_frame_t *frame, xlator_t *this)
+void
+afr_sh_purge_stale_entry (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
- int type = 0;
- int i = 0;
afr_private_t *priv = NULL;
- int enoent_count = 0;
- int govinda_gOvinda = 0;
-
+ int i = 0;
local = frame->local;
sh = &local->self_heal;
priv = this->private;
+ sh->post_remove_call = afr_sh_purge_stale_entries_done;
+
for (i = 0; i < priv->child_count; i++) {
- if (!local->child_up[i])
+ if (afr_is_child_present (sh->fresh_children,
+ priv->child_count, i))
continue;
- if (sh->child_errno[i]) {
- if (sh->child_errno[i] == ENOENT)
- enoent_count++;
- } else {
- if (type) {
- if (type != sh->buf[i].ia_type) {
- gf_log (this->name, GF_LOG_TRACE,
- "file %s is govinda!",
- local->loc.path);
+ if ((!local->child_up[i]) || sh->child_errno[i] != 0)
+ continue;
- govinda_gOvinda = 1;
- }
- } else {
- sh->source = i;
- type = sh->buf[i].ia_type;
- }
- }
- }
+ GF_ASSERT (!uuid_is_null (sh->entrybuf.ia_gfid) ||
+ uuid_is_null (sh->buf[i].ia_gfid));
- if (govinda_gOvinda) {
- gf_log (this->name, GF_LOG_ERROR,
- "conflicting filetypes exist for path %s. returning.",
- local->loc.path);
+ if ((sh->entrybuf.ia_type != sh->buf[i].ia_type) ||
+ (uuid_compare (sh->buf[i].ia_gfid,
+ sh->entrybuf.ia_gfid)))
+ continue;
- local->govinda_gOvinda = 1;
- sh_missing_entries_finish (frame, this);
- return 0;
- }
+ afr_children_add_child (sh->fresh_children, i,
+ priv->child_count);
- if (!type) {
- gf_log (this->name, GF_LOG_ERROR,
- "no source found for %s. all nodes down?. returning.",
- local->loc.path);
- /* subvolumes down and/or file does not exist */
- sh_missing_entries_finish (frame, this);
- return 0;
}
+ afr_sh_purge_entry_common (frame, this,
+ afr_sh_purge_stale_entry_condition);
+}
- if (enoent_count == 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "no missing files - %s. proceeding to metadata check",
- local->loc.path);
- /* proceed to next step - metadata self-heal */
- sh_missing_entries_finish (frame, this);
- return 0;
- }
+void
+afr_sh_save_child_iatts_from_policy (int32_t *children, struct iatt *bufs,
+ struct iatt *save,
+ unsigned int child_count)
+{
+ int i = 0;
+ int child = 0;
+ gf_boolean_t saved = _gf_false;
- switch (type) {
- case IA_IFSOCK:
- case IA_IFREG:
- case IA_IFBLK:
- case IA_IFCHR:
- case IA_IFIFO:
- sh_missing_entries_mknod (frame, this);
- break;
- case IA_IFLNK:
- sh_missing_entries_readlink (frame, this);
- break;
- case IA_IFDIR:
- sh_missing_entries_mkdir (frame, this);
- break;
- default:
- gf_log (this->name, GF_LOG_ERROR,
- "unknown file type: 0%o", type);
- local->govinda_gOvinda = 1;
- sh_missing_entries_finish (frame, this);
+ GF_ASSERT (save);
+ //if iatt buf with gfid exists sets it
+ for (i = 0; i < child_count; i++) {
+ child = children[i];
+ if (child == -1)
+ break;
+ *save = bufs[child];
+ saved = _gf_true;
+ if (!uuid_is_null (save->ia_gfid))
+ break;
}
-
- return 0;
+ GF_ASSERT (saved);
}
+void
+afr_get_children_of_fresh_parent_dirs (afr_self_heal_t *sh,
+ unsigned int child_count)
+{
+ afr_children_intersection_get (sh->success_children,
+ sh->fresh_parent_dirs,
+ sh->sources, child_count);
+ afr_get_fresh_children (sh->success_children, sh->sources,
+ sh->fresh_children, child_count);
+ memset (sh->sources, 0, sizeof (*sh->sources) * child_count);
+}
-static int
-sh_missing_entries_lookup_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf, dict_t *xattr,
- struct iatt *postparent)
+void
+afr_sh_children_lookup_done (call_frame_t *frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
{
- int child_index = 0;
- afr_local_t *local = NULL;
- int call_count = 0;
- afr_private_t *priv = NULL;
- mode_t st_mode = 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int32_t fresh_child_enoents = 0;
+ int32_t fresh_parent_count = 0;
local = frame->local;
+ sh = &local->self_heal;
priv = this->private;
- child_index = (long) cookie;
+ if (op_ret < 0)
+ goto fail;
+ afr_get_children_of_fresh_parent_dirs (sh, priv->child_count);
+ fresh_parent_count = afr_get_children_count (sh->fresh_parent_dirs,
+ priv->child_count);
+ //we need the enoent count of the subvols present in fresh_parent_dirs
+ fresh_child_enoents = afr_errno_count (sh->fresh_parent_dirs,
+ sh->child_errno,
+ priv->child_count, ENOENT);
+ if (fresh_child_enoents == fresh_parent_count) {
+ afr_sh_set_error (sh, ENOENT);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_purge_entry (frame, this);
+ } else if (!afr_conflicting_iattrs (sh->buf, sh->fresh_children,
+ priv->child_count, local->loc.path,
+ this->name)) {
+ afr_sh_save_child_iatts_from_policy (sh->fresh_children,
+ sh->buf, &sh->entrybuf,
+ priv->child_count);
+ afr_update_gfid_from_iatts (sh->sh_gfid_req, sh->buf,
+ sh->fresh_children,
+ priv->child_count);
+ afr_sh_purge_stale_entry (frame, this);
+ } else {
+ op_errno = EIO;
+ afr_set_local_for_unhealable (local);
+ goto fail;
+ }
- if (buf)
- st_mode = st_mode_from_ia (buf->ia_prot, buf->ia_type);
+ return;
- LOCK (&frame->lock);
- {
- if (op_ret == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "path %s on subvolume %s is of mode 0%o",
- local->loc.path,
- priv->children[child_index]->name,
- st_mode);
+fail:
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_set_error (sh, op_errno);
+ afr_sh_missing_entries_finish (frame, this);
+ return;
+}
- local->self_heal.buf[child_index] = *buf;
- local->self_heal.parentbuf = *postparent;
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "path %s on subvolume %s => -1 (%s)",
- local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
+static void
+afr_sh_find_fresh_parents (call_frame_t *frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int enoent_count = 0;
+ int nsources = 0;
+ int source = -1;
+ int32_t subvol_status = 0;
- local->self_heal.child_errno[child_index] = op_errno;
- }
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+ if (op_ret < 0)
+ goto out;
+ enoent_count = afr_errno_count (NULL, sh->child_errno,
+ priv->child_count, ENOENT);
+ if (enoent_count > 0) {
+ gf_log (this->name, GF_LOG_INFO, "Parent dir missing for %s,"
+ " in missing entry self-heal, aborting missing-entry "
+ "self-heal",
+ local->loc.path);
+ afr_sh_missing_entries_finish (frame, this);
+ return;
}
- UNLOCK (&frame->lock);
- call_count = afr_frame_return (frame);
+ nsources = afr_build_sources (this, sh->xattr, sh->buf,
+ sh->pending_matrix, sh->sources,
+ sh->success_children,
+ AFR_ENTRY_TRANSACTION, &subvol_status,
+ _gf_true);
+ if ((subvol_status & ALL_FOOLS) ||
+ (subvol_status & SPLIT_BRAIN)) {
+ gf_log (this->name, GF_LOG_INFO, "%s: Performing conservative "
+ "merge", sh->parent_loc.path);
+ afr_mark_success_children_sources (sh->sources,
+ sh->success_children,
+ priv->child_count);
+ } else if (nsources < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "No sources for dir "
+ "of %s, in missing entry self-heal, aborting "
+ "self-heal", local->loc.path);
+ op_errno = EIO;
+ goto out;
+ }
- if (call_count == 0) {
- sh_missing_entries_create (frame, this);
+ source = afr_sh_select_source (sh->sources, priv->child_count);
+ if (source == -1) {
+ GF_ASSERT (0);
+ gf_log (this->name, GF_LOG_DEBUG, "No active sources found.");
+ op_errno = EIO;
+ goto out;
}
+ afr_get_fresh_children (sh->success_children, sh->sources,
+ sh->fresh_parent_dirs, priv->child_count);
+ afr_sh_common_lookup (frame, this, &local->loc,
+ afr_sh_children_lookup_done, NULL, 0,
+ NULL);
+ return;
- return 0;
+out:
+ afr_sh_set_error (sh, op_errno);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_missing_entries_finish (frame, this);
+ return;
}
+void
+afr_sh_common_reset (afr_self_heal_t *sh, unsigned int child_count)
+{
+ int i = 0;
-static int
-sh_missing_entries_lookup (call_frame_t *frame, xlator_t *this)
+ for (i = 0; i < child_count; i++) {
+ memset (&sh->buf[i], 0, sizeof (sh->buf[i]));
+ memset (&sh->parentbufs[i], 0, sizeof (sh->parentbufs[i]));
+ sh->child_errno[i] = 0;
+ }
+ memset (&sh->parentbuf, 0, sizeof (sh->parentbuf));
+ sh->success_count = 0;
+ afr_reset_children (sh->success_children, child_count);
+ afr_reset_children (sh->fresh_children, child_count);
+ afr_reset_xattr (sh->xattr, child_count);
+ loc_wipe (&sh->lookup_loc);
+}
+
+/* afr self-heal state will be lost if this call is made
+ * please check the afr_sh_common_reset that is called in this function
+ */
+int
+afr_sh_common_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ afr_lookup_done_cbk_t lookup_done , uuid_t gfid,
+ int32_t flags, dict_t *xdata)
{
afr_local_t *local = NULL;
int i = 0;
int call_count = 0;
afr_private_t *priv = NULL;
dict_t *xattr_req = NULL;
- int ret = -1;
+ afr_self_heal_t *sh = NULL;
local = frame->local;
priv = this->private;
+ sh = &local->self_heal;
- call_count = afr_up_children_count (priv->child_count,
- local->child_up);
+ call_count = afr_up_children_count (local->child_up, priv->child_count);
local->call_count = call_count;
xattr_req = dict_new();
if (xattr_req) {
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_set_uint64 (xattr_req,
- priv->pending_key[i],
- 3 * sizeof(int32_t));
- if (ret < 0)
- gf_log (this->name, GF_LOG_WARNING,
- "Unable to set dict value.");
+ afr_xattr_req_prepare (this, xattr_req, loc->path);
+ if (gfid) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "looking up %s with gfid: %s",
+ loc->path, uuid_utoa (gfid));
+ GF_ASSERT (!uuid_is_null (gfid));
+ afr_set_dict_gfid (xattr_req, gfid);
}
}
+ afr_sh_common_reset (sh, priv->child_count);
+ sh->lookup_done = lookup_done;
+ loc_copy (&sh->lookup_loc, loc);
+ sh->lookup_flags = flags;
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
- gf_log (this->name, GF_LOG_TRACE,
+ gf_log (this->name, GF_LOG_DEBUG,
"looking up %s on subvolume %s",
- local->loc.path, priv->children[i]->name);
+ loc->path, priv->children[i]->name);
STACK_WIND_COOKIE (frame,
- sh_missing_entries_lookup_cbk,
+ afr_sh_common_lookup_cbk,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->lookup,
- &local->loc, xattr_req);
+ loc, xattr_req);
if (!--call_count)
break;
@@ -1357,111 +2082,162 @@ sh_missing_entries_lookup (call_frame_t *frame, xlator_t *this)
int
-afr_sh_post_nonblocking_entrylk_cbk (call_frame_t *frame, xlator_t *this)
+afr_sh_post_nb_entrylk_missing_entry_sh_cbk (call_frame_t *frame,
+ xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
local = frame->local;
int_lock = &local->internal_lock;
+ sh = &local->self_heal;
if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_INFO,
"Non blocking entrylks failed.");
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_missing_entries_done (frame, this);
} else {
gf_log (this->name, GF_LOG_DEBUG,
"Non blocking entrylks done. Proceeding to FOP");
- sh_missing_entries_lookup (frame, this);
+ afr_sh_common_lookup (frame, this, &sh->parent_loc,
+ afr_sh_find_fresh_parents,
+ NULL, AFR_LOOKUP_FAIL_CONFLICTS,
+ NULL);
}
return 0;
}
-static int
-afr_sh_entrylk (call_frame_t *frame, xlator_t *this)
+int
+afr_sh_entrylk (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ char *base_name, afr_lock_cbk_t lock_cbk)
{
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ priv = this->private;
local = frame->local;
int_lock = &local->internal_lock;
- sh = &local->self_heal;
int_lock->transaction_lk_type = AFR_SELFHEAL_LK;
int_lock->selfheal_lk_type = AFR_ENTRY_SELF_HEAL_LK;
afr_set_lock_number (frame, this);
- int_lock->lk_basename = local->loc.name;
- int_lock->lk_loc = &sh->parent_loc;
- int_lock->lock_cbk = afr_sh_post_nonblocking_entrylk_cbk;
+ int_lock->lk_basename = base_name;
+ int_lock->lk_loc = loc;
+ int_lock->lock_cbk = lock_cbk;
+ int_lock->domain = this->name;
+ int_lock->lockee_count = 0;
+ afr_init_entry_lockee (&int_lock->lockee[0], local, loc,
+ base_name, priv->child_count);
+ int_lock->lockee_count++;
afr_nonblocking_entrylk (frame, this);
return 0;
}
static int
-afr_self_heal_missing_entries (call_frame_t *frame, xlator_t *this)
+afr_self_heal_parent_entrylk (call_frame_t *frame, xlator_t *this,
+ afr_lock_cbk_t lock_cbk)
{
- afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ int ret = -1;
+ int32_t op_errno = 0;
local = frame->local;
- int_lock = &local->internal_lock;
sh = &local->self_heal;
- priv = this->private;
gf_log (this->name, GF_LOG_TRACE,
"attempting to recreate missing entries for path=%s",
local->loc.path);
- afr_build_parent_loc (&sh->parent_loc, &local->loc);
+ ret = afr_build_parent_loc (&sh->parent_loc, &local->loc, &op_errno);
+ if (ret)
+ goto out;
- afr_sh_entrylk (frame, this);
+ afr_sh_entrylk (frame, this, &sh->parent_loc, NULL,
+ lock_cbk);
+ return 0;
+out:
+ int_lock = &local->internal_lock;
+ int_lock->lock_op_ret = -1;
+ lock_cbk (frame, this);
return 0;
}
-afr_local_t *afr_local_copy (afr_local_t *l, xlator_t *this)
+static int
+afr_self_heal_missing_entries (call_frame_t *frame, xlator_t *this)
{
- afr_private_t *priv = NULL;
- afr_local_t *lc = NULL;
+ afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
- afr_self_heal_t *shc = NULL;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ sh->sh_type_in_action = AFR_SELF_HEAL_GFID_OR_MISSING_ENTRY;
+
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
+
+ afr_self_heal_parent_entrylk (frame, this,
+ afr_sh_post_nb_entrylk_missing_entry_sh_cbk);
+ return 0;
+}
+
+afr_local_t*
+afr_self_heal_local_init (afr_local_t *l, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *lc = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_self_heal_t *shc = NULL;
+ int ret = 0;
priv = this->private;
sh = &l->self_heal;
- lc = GF_CALLOC (1, sizeof (afr_local_t),
- gf_afr_mt_afr_local_t);
+ lc = mem_get0 (this->local_pool);
+ if (!lc)
+ goto out;
shc = &lc->self_heal;
shc->unwind = sh->unwind;
- shc->need_data_self_heal = sh->need_data_self_heal;
- shc->need_metadata_self_heal = sh->need_metadata_self_heal;
- shc->need_entry_self_heal = sh->need_entry_self_heal;
+ shc->gfid_sh_success_cbk = sh->gfid_sh_success_cbk;
+ shc->do_missing_entry_self_heal = sh->do_missing_entry_self_heal;
+ shc->do_gfid_self_heal = sh->do_gfid_self_heal;
+ shc->do_data_self_heal = sh->do_data_self_heal;
+ shc->do_metadata_self_heal = sh->do_metadata_self_heal;
+ shc->do_entry_self_heal = sh->do_entry_self_heal;
+ shc->force_confirm_spb = sh->force_confirm_spb;
shc->forced_merge = sh->forced_merge;
- shc->healing_fd_opened = sh->healing_fd_opened;
- shc->data_lock_held = sh->data_lock_held;
- if (sh->healing_fd && !sh->healing_fd_opened)
- shc->healing_fd = fd_ref (sh->healing_fd);
- else
- shc->healing_fd = sh->healing_fd;
shc->background = sh->background;
shc->type = sh->type;
+ shc->data_sh_info = "";
+ shc->metadata_sh_info = "";
- if (l->loc.path)
- loc_copy (&lc->loc, &l->loc);
+ uuid_copy (shc->sh_gfid_req, sh->sh_gfid_req);
+ if (l->loc.path) {
+ ret = loc_copy (&lc->loc, &l->loc);
+ if (ret < 0)
+ goto out;
+ }
+
+ lc->child_up = memdup (l->child_up,
+ sizeof (*lc->child_up) * priv->child_count);
+ if (!lc->child_up) {
+ ret = -1;
+ goto out;
+ }
- lc->child_up = memdup (l->child_up, priv->child_count);
if (l->xattr_req)
lc->xattr_req = dict_ref (l->xattr_req);
@@ -1469,39 +2245,25 @@ afr_local_t *afr_local_copy (afr_local_t *l, xlator_t *this)
lc->cont.lookup.inode = inode_ref (l->cont.lookup.inode);
if (l->cont.lookup.xattr)
lc->cont.lookup.xattr = dict_ref (l->cont.lookup.xattr);
- if (l->internal_lock.inode_locked_nodes)
- lc->internal_lock.inode_locked_nodes =
- memdup (l->internal_lock.inode_locked_nodes,
- priv->child_count);
- else
- lc->internal_lock.inode_locked_nodes =
- GF_CALLOC (sizeof (*l->internal_lock.inode_locked_nodes),
- priv->child_count,
- gf_afr_mt_char);
- if (l->internal_lock.entry_locked_nodes)
- lc->internal_lock.entry_locked_nodes =
- memdup (l->internal_lock.entry_locked_nodes,
- priv->child_count);
- else
- lc->internal_lock.entry_locked_nodes =
- GF_CALLOC (sizeof (*l->internal_lock.entry_locked_nodes),
- priv->child_count,
- gf_afr_mt_char);
- if (l->internal_lock.locked_nodes)
- lc->internal_lock.locked_nodes =
- memdup (l->internal_lock.locked_nodes,
- priv->child_count);
- else
- lc->internal_lock.locked_nodes =
- GF_CALLOC (sizeof (*l->internal_lock.locked_nodes),
- priv->child_count,
- gf_afr_mt_char);
- lc->internal_lock.inodelk_lock_count =
- l->internal_lock.inodelk_lock_count;
- lc->internal_lock.entrylk_lock_count =
- l->internal_lock.entrylk_lock_count;
+ lc->internal_lock.locked_nodes =
+ GF_CALLOC (sizeof (*l->internal_lock.locked_nodes),
+ priv->child_count, gf_afr_mt_char);
+ if (!lc->internal_lock.locked_nodes) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = afr_inodelk_init (&lc->internal_lock.inodelk[0],
+ this->name, priv->child_count);
+ if (ret)
+ goto out;
+out:
+ if (ret) {
+ afr_local_cleanup (lc, this);
+ lc = NULL;
+ }
return lc;
}
@@ -1511,29 +2273,39 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this)
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
afr_self_heal_t * sh = NULL;
+ afr_local_t * orig_frame_local = NULL;
+ afr_self_heal_t * orig_frame_sh = NULL;
char sh_type_str[256] = {0,};
+ gf_loglevel_t loglevel = 0;
priv = this->private;
local = bgsh_frame->local;
sh = &local->self_heal;
- if (local->govinda_gOvinda) {
- afr_set_split_brain (this, local->cont.lookup.inode,
- _gf_true);
+ if (local->unhealable) {
+ afr_set_split_brain (this, sh->inode, SPB, SPB);
+ }
+
+ afr_self_heal_type_str_get (sh, sh_type_str,
+ sizeof(sh_type_str));
+ if (is_self_heal_failed (sh, AFR_CHECK_ALL) && !priv->shd.iamshd) {
+ loglevel = GF_LOG_ERROR;
+ } else if (!is_self_heal_failed (sh, AFR_CHECK_ALL)) {
+ loglevel = GF_LOG_INFO;
} else {
- afr_set_split_brain (this, local->cont.lookup.inode,
- _gf_false);
+ loglevel = GF_LOG_DEBUG;
}
- afr_self_heal_type_str_get(sh, sh_type_str,
- sizeof(sh_type_str));
- gf_log (this->name, GF_LOG_NORMAL,
- "background %s self-heal completed on %s", sh_type_str,
- local->loc.path);
+ afr_log_self_heal_completion_status (local, loglevel);
+
FRAME_SU_UNDO (bgsh_frame, afr_local_t);
- if (!sh->unwound) {
- sh->unwind (sh->orig_frame, this);
+ if (!sh->unwound && sh->unwind) {
+ orig_frame_local = sh->orig_frame->local;
+ orig_frame_sh = &orig_frame_local->self_heal;
+ orig_frame_sh->actual_sh_started = _gf_true;
+ sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno,
+ is_self_heal_failed (sh, AFR_CHECK_ALL));
}
if (sh->background) {
@@ -1550,98 +2322,130 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this)
}
int
-afr_self_heal (call_frame_t *frame, xlator_t *this)
+afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode)
{
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
afr_private_t *priv = NULL;
- int i = 0;
-
- call_frame_t *sh_frame = NULL;
- afr_local_t *sh_local = NULL;
-
+ int32_t op_errno = 0;
+ int ret = 0;
+ afr_self_heal_t *orig_sh = NULL;
+ call_frame_t *sh_frame = NULL;
+ afr_local_t *sh_local = NULL;
+ loc_t *loc = NULL;
local = frame->local;
+ orig_sh = &local->self_heal;
priv = this->private;
GF_ASSERT (local->loc.path);
- afr_set_lk_owner (frame, this);
-
- if (local->self_heal.background) {
- LOCK (&priv->lock);
- {
- if (priv->background_self_heals_started
- > priv->background_self_heal_count) {
-
- local->self_heal.background = _gf_false;
-
- } else {
- priv->background_self_heals_started++;
- }
- }
- UNLOCK (&priv->lock);
- }
-
gf_log (this->name, GF_LOG_TRACE,
"performing self heal on %s (metadata=%d data=%d entry=%d)",
local->loc.path,
- local->self_heal.need_metadata_self_heal,
- local->self_heal.need_data_self_heal,
- local->self_heal.need_entry_self_heal);
+ local->self_heal.do_metadata_self_heal,
+ local->self_heal.do_data_self_heal,
+ local->self_heal.do_entry_self_heal);
+ op_errno = ENOMEM;
sh_frame = copy_frame (frame);
- sh_local = afr_local_copy (local, this);
+ if (!sh_frame)
+ goto out;
+ afr_set_lk_owner (sh_frame, this, sh_frame->root);
+ afr_set_low_priority (sh_frame);
+
+ sh_local = afr_self_heal_local_init (local, this);
+ if (!sh_local)
+ goto out;
sh_frame->local = sh_local;
sh = &sh_local->self_heal;
+ sh->inode = inode_ref (inode);
sh->orig_frame = frame;
sh->completion_cbk = afr_self_heal_completion_cbk;
-
- sh->buf = GF_CALLOC (priv->child_count, sizeof (struct iatt),
- gf_afr_mt_iatt);
- sh->child_errno = GF_CALLOC (priv->child_count, sizeof (int),
- gf_afr_mt_int);
- sh->success = GF_CALLOC (priv->child_count, sizeof (int),
- gf_afr_mt_int);
- sh->xattr = GF_CALLOC (priv->child_count, sizeof (dict_t *),
- gf_afr_mt_dict_t);
+ sh->success = GF_CALLOC (priv->child_count, sizeof (*sh->success),
+ gf_afr_mt_char);
+ if (!sh->success)
+ goto out;
sh->sources = GF_CALLOC (sizeof (*sh->sources), priv->child_count,
gf_afr_mt_int);
+ if (!sh->sources)
+ goto out;
sh->locked_nodes = GF_CALLOC (sizeof (*sh->locked_nodes),
priv->child_count,
gf_afr_mt_int);
+ if (!sh->locked_nodes)
+ goto out;
- sh->pending_matrix = GF_CALLOC (sizeof (int32_t *), priv->child_count,
- gf_afr_mt_int32_t);
+ sh->pending_matrix = afr_matrix_create (priv->child_count,
+ priv->child_count);
+ if (!sh->pending_matrix)
+ goto out;
- for (i = 0; i < priv->child_count; i++) {
- sh->pending_matrix[i] = GF_CALLOC (sizeof (int32_t),
- priv->child_count,
- gf_afr_mt_int32_t);
+ sh->delta_matrix = afr_matrix_create (priv->child_count,
+ priv->child_count);
+ if (!sh->delta_matrix)
+ goto out;
+
+ sh->fresh_parent_dirs = afr_children_create (priv->child_count);
+ if (!sh->fresh_parent_dirs)
+ goto out;
+ ret = afr_sh_common_create (sh, priv->child_count);
+ if (ret) {
+ op_errno = -ret;
+ goto out;
}
- sh->delta_matrix = GF_CALLOC (sizeof (int32_t *), priv->child_count,
- gf_afr_mt_int32_t);
- for (i = 0; i < priv->child_count; i++) {
- sh->delta_matrix[i] = GF_CALLOC (sizeof (int32_t),
- priv->child_count,
- gf_afr_mt_int32_t);
+ if (local->self_heal.background) {
+ LOCK (&priv->lock);
+ {
+ if (priv->background_self_heals_started
+ < priv->background_self_heal_count) {
+ priv->background_self_heals_started++;
+
+
+ } else {
+ local->self_heal.background = _gf_false;
+ sh->background = _gf_false;
+ }
+ }
+ UNLOCK (&priv->lock);
}
+ if (!local->loc.parent) {
+ sh->do_missing_entry_self_heal = _gf_false;
+ sh->do_gfid_self_heal = _gf_false;
+ }
+
+ sh->sh_type_in_action = AFR_SELF_HEAL_INVALID;
+
FRAME_SU_DO (sh_frame, afr_local_t);
- if (local->success_count && local->enoent_count) {
+ if (sh->do_missing_entry_self_heal || sh->do_gfid_self_heal) {
afr_self_heal_missing_entries (sh_frame, this);
} else {
+ loc = &sh_local->loc;
+ if (uuid_is_null (loc->inode->gfid) && uuid_is_null (loc->gfid)) {
+ if (!uuid_is_null (inode->gfid))
+ GF_ASSERT (!uuid_compare (inode->gfid,
+ sh->sh_gfid_req));
+ uuid_copy (loc->gfid, sh->sh_gfid_req);
+ }
gf_log (this->name, GF_LOG_TRACE,
"proceeding to metadata check on %s",
local->loc.path);
afr_sh_missing_entries_done (sh_frame, this);
}
+ op_errno = 0;
+out:
+ if (op_errno) {
+ orig_sh->unwind (frame, this, -1, op_errno, 1);
+ if (sh_frame)
+ AFR_STACK_DESTROY (sh_frame);
+ }
return 0;
}
@@ -1649,19 +2453,360 @@ void
afr_self_heal_type_str_get (afr_self_heal_t *self_heal_p, char *str,
size_t size)
{
- GF_ASSERT (str && (size > 0));
+ GF_ASSERT (str && (size > strlen (" missing-entry gfid "
+ "meta-data data entry")));
- if (self_heal_p->need_metadata_self_heal) {
- snprintf(str, size, " meta-data");
+ if (self_heal_p->do_metadata_self_heal) {
+ snprintf (str, size, " meta-data");
}
- if (self_heal_p->need_data_self_heal) {
- snprintf(str + strlen(str), size - strlen(str),
- " data");
+ if (self_heal_p->do_data_self_heal) {
+ snprintf (str + strlen(str), size - strlen(str), " data");
}
- if (self_heal_p->need_entry_self_heal) {
- snprintf(str + strlen(str), size - strlen(str),
- " entry");
+ if (self_heal_p->do_entry_self_heal) {
+ snprintf (str + strlen(str), size - strlen(str), " entry");
}
+
+ if (self_heal_p->do_missing_entry_self_heal) {
+ snprintf (str + strlen(str), size - strlen(str),
+ " missing-entry");
+ }
+
+ if (self_heal_p->do_gfid_self_heal) {
+ snprintf (str + strlen(str), size - strlen(str), " gfid");
+ }
+}
+
+afr_self_heal_type
+afr_self_heal_type_for_transaction (afr_transaction_type type)
+{
+ afr_self_heal_type sh_type = AFR_SELF_HEAL_INVALID;
+
+ switch (type) {
+ case AFR_DATA_TRANSACTION:
+ sh_type = AFR_SELF_HEAL_DATA;
+ break;
+ case AFR_METADATA_TRANSACTION:
+ sh_type = AFR_SELF_HEAL_METADATA;
+ break;
+ case AFR_ENTRY_TRANSACTION:
+ sh_type = AFR_SELF_HEAL_ENTRY;
+ break;
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ GF_ASSERT (0);
+ break;
+ }
+ return sh_type;
+}
+
+int
+afr_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name)
+{
+ int ret = -1;
+ uuid_t pargfid = {0};
+
+ if (!child)
+ goto out;
+
+ if (!uuid_is_null (parent->inode->gfid))
+ uuid_copy (pargfid, parent->inode->gfid);
+ else if (!uuid_is_null (parent->gfid))
+ uuid_copy (pargfid, parent->gfid);
+
+ if (uuid_is_null (pargfid))
+ goto out;
+
+ if (strcmp (parent->path, "/") == 0)
+ ret = gf_asprintf ((char **)&child->path, "/%s", name);
+ else
+ ret = gf_asprintf ((char **)&child->path, "%s/%s", parent->path,
+ name);
+
+ if (-1 == ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "asprintf failed while setting child path");
+ }
+
+ child->name = strrchr (child->path, '/');
+ if (child->name)
+ child->name++;
+
+ child->parent = inode_ref (parent->inode);
+ child->inode = inode_new (parent->inode->table);
+ uuid_copy (child->pargfid, pargfid);
+
+ if (!child->inode) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if ((ret == -1) && child)
+ loc_wipe (child);
+
+ return ret;
+}
+
+int
+afr_sh_erase_pending (call_frame_t *frame, xlator_t *this,
+ afr_transaction_type type, afr_fxattrop_cbk_t cbk,
+ int (*finish)(call_frame_t *frame, xlator_t *this))
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int i = 0;
+ dict_t **erase_xattr = NULL;
+ int ret = -1;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ afr_sh_pending_to_delta (priv, sh->xattr, sh->delta_matrix,
+ sh->success, priv->child_count, type);
+
+ erase_xattr = GF_CALLOC (sizeof (*erase_xattr), priv->child_count,
+ gf_afr_mt_dict_t);
+ if (!erase_xattr)
+ goto out;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sh->xattr[i]) {
+ call_count++;
+ erase_xattr[i] = dict_new ();
+ if (!erase_xattr[i])
+ goto out;
+ }
+ }
+
+ afr_sh_delta_to_xattr (this, sh->delta_matrix, erase_xattr,
+ priv->child_count, type);
+
+ gf_log (this->name, GF_LOG_DEBUG, "Delta matrix for: %s",
+ lkowner_utoa (&frame->root->lk_owner));
+ afr_sh_print_pending_matrix (sh->delta_matrix, this);
+ local->call_count = call_count;
+ if (call_count == 0) {
+ ret = 0;
+ finish (frame, this);
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!erase_xattr[i])
+ continue;
+
+ if (sh->healing_fd) {//true for ENTRY, reg file DATA transaction
+ STACK_WIND_COOKIE (frame, cbk, (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fxattrop,
+ sh->healing_fd,
+ GF_XATTROP_ADD_ARRAY, erase_xattr[i],
+ NULL);
+ } else {
+ STACK_WIND_COOKIE (frame, cbk, (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &local->loc,
+ GF_XATTROP_ADD_ARRAY, erase_xattr[i],
+ NULL);
+ }
+ }
+
+ ret = 0;
+out:
+ if (erase_xattr) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (erase_xattr[i]) {
+ dict_unref (erase_xattr[i]);
+ }
+ }
+ }
+
+ GF_FREE (erase_xattr);
+
+ if (ret < 0) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ finish (frame, this);
+ }
+
+ return 0;
+}
+
+void
+afr_set_self_heal_status(afr_self_heal_t *sh, afr_self_heal_status status)
+{
+ xlator_t *this = NULL;
+ afr_sh_status_for_all_type *sh_status = &(sh->afr_all_sh_status);
+ afr_self_heal_type sh_type_in_action = sh->sh_type_in_action;
+ this = THIS;
+
+ if (!sh) {
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "Null self heal"
+ "Structure");
+ goto out;
+ }
+
+ switch (sh_type_in_action) {
+ case AFR_SELF_HEAL_GFID_OR_MISSING_ENTRY:
+ sh_status->gfid_or_missing_entry_self_heal = status;
+ break;
+ case AFR_SELF_HEAL_METADATA:
+ sh_status->metadata_self_heal = status;
+ break;
+ case AFR_SELF_HEAL_DATA:
+ sh_status->data_self_heal = status;
+ break;
+ case AFR_SELF_HEAL_ENTRY:
+ sh_status->entry_self_heal = status;
+ break;
+ case AFR_SELF_HEAL_INVALID:
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "Invalid"
+ "self heal type in action");
+ break;
+ }
+out:
+ return;
+}
+
+void
+afr_set_local_for_unhealable (afr_local_t *local)
+{
+ afr_self_heal_t *sh = NULL;
+
+ sh = &local->self_heal;
+
+ local->unhealable = 1;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+}
+
+int
+is_self_heal_failed (afr_self_heal_t *sh, afr_sh_fail_check_type type)
+{
+ afr_sh_status_for_all_type sh_status = sh->afr_all_sh_status;
+ afr_self_heal_type sh_type_in_action = AFR_SELF_HEAL_INVALID;
+ afr_self_heal_status status = AFR_SELF_HEAL_FAILED;
+ xlator_t *this = NULL;
+ int sh_failed = 0;
+
+ this = THIS;
+
+ if (!sh) {
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "Null self heal "
+ "structure");
+ sh_failed = 1;
+ goto out;
+ }
+
+ if (type == AFR_CHECK_ALL) {
+ if ((sh_status.gfid_or_missing_entry_self_heal == AFR_SELF_HEAL_FAILED)
+ || (sh_status.metadata_self_heal == AFR_SELF_HEAL_FAILED)
+ || (sh_status.data_self_heal == AFR_SELF_HEAL_FAILED)
+ || (sh_status.entry_self_heal == AFR_SELF_HEAL_FAILED))
+ sh_failed = 1;
+ } else if (type == AFR_CHECK_SPECIFIC) {
+ sh_type_in_action = sh->sh_type_in_action;
+ switch (sh_type_in_action) {
+ case AFR_SELF_HEAL_GFID_OR_MISSING_ENTRY:
+ status = sh_status.gfid_or_missing_entry_self_heal;
+ break;
+ case AFR_SELF_HEAL_METADATA:
+ status = sh_status.metadata_self_heal;
+ break;
+ case AFR_SELF_HEAL_ENTRY:
+ status = sh_status.entry_self_heal;
+ break;
+ case AFR_SELF_HEAL_DATA:
+ status = sh_status.data_self_heal;
+ break;
+ case AFR_SELF_HEAL_INVALID:
+ status = AFR_SELF_HEAL_NOT_ATTEMPTED;
+ break;
+ }
+ if (status == AFR_SELF_HEAL_FAILED)
+ sh_failed = 1;
+
+ }
+
+out:
+ return sh_failed;
+}
+
+char *
+get_sh_completion_status (afr_self_heal_status status)
+{
+
+ char *not_attempted = " is not attempted";
+ char *failed = " failed";
+ char *started = " is started";
+ char *sync_begin = " is successfully completed";
+ char *result = " has unknown status";
+
+ switch (status)
+ {
+ case AFR_SELF_HEAL_NOT_ATTEMPTED:
+ result = not_attempted;
+ break;
+ case AFR_SELF_HEAL_FAILED:
+ result = failed;
+ break;
+ case AFR_SELF_HEAL_STARTED:
+ result = started;
+ break;
+ case AFR_SELF_HEAL_SYNC_BEGIN:
+ result = sync_begin;
+ break;
+ }
+
+ return result;
+
+}
+
+void
+afr_log_self_heal_completion_status (afr_local_t *local, gf_loglevel_t loglvl)
+{
+
+ char sh_log[4096] = {0};
+ afr_self_heal_t *sh = &local->self_heal;
+ afr_sh_status_for_all_type all_status = sh->afr_all_sh_status;
+ xlator_t *this = NULL;
+ size_t off = 0;
+ int data_sh = 0;
+ int metadata_sh = 0;
+ int print_log = 0;
+
+ this = THIS;
+
+ ADD_FMT_STRING (sh_log, off, "gfid or missing entry",
+ all_status.gfid_or_missing_entry_self_heal, print_log);
+ ADD_FMT_STRING_SYNC (sh_log, off, "metadata",
+ all_status.metadata_self_heal, print_log);
+ if (sh->background) {
+ ADD_FMT_STRING_SYNC (sh_log, off, "backgroung data",
+ all_status.data_self_heal, print_log);
+ } else {
+ ADD_FMT_STRING_SYNC (sh_log, off, "foreground data",
+ all_status.data_self_heal, print_log);
+ }
+ ADD_FMT_STRING_SYNC (sh_log, off, "entry", all_status.entry_self_heal,
+ print_log);
+
+ if (AFR_SELF_HEAL_SYNC_BEGIN == all_status.data_self_heal &&
+ strcmp (sh->data_sh_info, "") && sh->data_sh_info )
+ data_sh = 1;
+ if (AFR_SELF_HEAL_SYNC_BEGIN == all_status.metadata_self_heal &&
+ strcmp (sh->metadata_sh_info, "") && sh->metadata_sh_info)
+ metadata_sh = 1;
+
+ if (!print_log)
+ return;
+
+ gf_log (this->name, loglvl, "%s %s %s on %s", sh_log,
+ ((data_sh == 1) ? sh->data_sh_info : ""),
+ ((metadata_sh == 1) ? sh->metadata_sh_info : ""),
+ local->loc.path);
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.h b/xlators/cluster/afr/src/afr-self-heal-common.h
index 6431feaff..473264776 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.h
+++ b/xlators/cluster/afr/src/afr-self-heal-common.h
@@ -1,73 +1,144 @@
/*
- Copyright (c) 2008-2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __AFR_SELF_HEAL_COMMON_H__
#define __AFR_SELF_HEAL_COMMON_H__
#define FILE_HAS_HOLES(buf) (((buf)->ia_size) > ((buf)->ia_blocks * 512))
+#define AFR_SH_MIN_PARTICIPANTS 2
typedef enum {
- AFR_SELF_HEAL_ENTRY,
- AFR_SELF_HEAL_METADATA,
- AFR_SELF_HEAL_DATA,
-} afr_self_heal_type;
+ AFR_LOOKUP_FAIL_CONFLICTS = 1,
+ AFR_LOOKUP_FAIL_MISSING_GFIDS = 2,
+} afr_lookup_flags_t;
int
afr_sh_select_source (int sources[], int child_count);
int
-afr_sh_sink_count (int sources[], int child_count);
-
-int
afr_sh_source_count (int sources[], int child_count);
-int
-afr_sh_supress_errenous_children (int sources[], int child_errno[],
- int child_count);
-
void
afr_sh_print_pending_matrix (int32_t *pending_matrix[], xlator_t *this);
void
-afr_sh_build_pending_matrix (afr_private_t *priv,
- int32_t *pending_matrix[], dict_t *xattr[],
- int child_count, afr_transaction_type type);
+afr_sh_print_split_brain_log (int32_t *pending_matrix[], xlator_t *this,
+ const char *loc);
+
+int
+afr_build_pending_matrix (char **pending_key, int32_t **pending_matrix,
+ unsigned char *ignorant_subvols,
+ dict_t *xattr[], afr_transaction_type type,
+ size_t child_count);
void
afr_sh_pending_to_delta (afr_private_t *priv, dict_t **xattr,
- int32_t *delta_matrix[], int success[],
+ int32_t *delta_matrix[], unsigned char success[],
int child_count, afr_transaction_type type);
int
-afr_sh_mark_sources (afr_self_heal_t *sh, int child_count,
- afr_self_heal_type type);
+afr_mark_sources (xlator_t *this, int32_t *sources, int32_t **pending_matrix,
+ struct iatt *bufs, afr_self_heal_type type,
+ int32_t *success_children, int32_t *subvol_status);
int
-afr_sh_delta_to_xattr (afr_private_t *priv,
+afr_sh_delta_to_xattr (xlator_t *this,
int32_t *delta_matrix[], dict_t *xattr[],
int child_count, afr_transaction_type type);
-int
-afr_sh_is_matrix_zero (int32_t *pending_matrix[], int child_count);
-
void
afr_self_heal_type_str_get (afr_self_heal_t *self_heal_p, char *str,
size_t size);
+afr_self_heal_type
+afr_self_heal_type_for_transaction (afr_transaction_type type);
+
+int
+afr_build_sources (xlator_t *this, dict_t **xattr, struct iatt *bufs,
+ int32_t **pending_matrix, int32_t *sources,
+ int32_t *success_children, afr_transaction_type type,
+ int32_t *subvol_status, gf_boolean_t ignore_ignorant);
+void
+afr_sh_common_reset (afr_self_heal_t *sh, unsigned int child_count);
+
+void
+afr_sh_common_lookup_resp_handler (call_frame_t *frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf,
+ dict_t *xattr, struct iatt *postparent,
+ loc_t *loc);
+
+int
+afr_sh_common_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ afr_lookup_done_cbk_t lookup_cbk, uuid_t uuid,
+ int32_t flags, dict_t *xdata);
+int
+afr_sh_entry_expunge_remove (call_frame_t *expunge_frame, xlator_t *this,
+ int active_src, struct iatt *buf,
+ struct iatt *parentbuf);
+int
+afr_sh_entrylk (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ char *base_name, afr_lock_cbk_t lock_cbk);
+int
+afr_sh_entry_impunge_create (call_frame_t *impunge_frame, xlator_t *this,
+ int child_index);
+int
+afr_sh_data_unlock (call_frame_t *frame, xlator_t *this, char *dom,
+ afr_lock_cbk_t lock_cbk);
+afr_local_t *
+afr_self_heal_local_init (afr_local_t *l, xlator_t *this);
+int
+afr_sh_data_lock (call_frame_t *frame, xlator_t *this,
+ off_t start, off_t len, gf_boolean_t block, char *dom,
+ afr_lock_cbk_t success_handler,
+ afr_lock_cbk_t failure_handler);
+void
+afr_sh_set_error (afr_self_heal_t *sh, int32_t op_errno);
+void
+afr_sh_mark_source_sinks (call_frame_t *frame, xlator_t *this);
+typedef int
+(*afr_fxattrop_cbk_t) (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xattr, dict_t *xdata);
+int
+afr_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name);
+int
+afr_impunge_frame_create (call_frame_t *frame, xlator_t *this,
+ int active_source, call_frame_t **impunge_frame);
+void
+afr_sh_reset (call_frame_t *frame, xlator_t *this);
+
+void
+afr_children_intersection_get (int32_t *set1, int32_t *set2,
+ int *intersection, unsigned int child_count);
+int
+afr_get_no_xattr_dir_read_child (xlator_t *this, int32_t *success_children,
+ struct iatt *bufs);
+int
+afr_sh_erase_pending (call_frame_t *frame, xlator_t *this,
+ afr_transaction_type type, afr_fxattrop_cbk_t cbk,
+ int (*finish)(call_frame_t *frame, xlator_t *this));
+
+void
+afr_set_local_for_unhealable (afr_local_t *local);
+
+int
+is_self_heal_failed (afr_self_heal_t *sh, afr_sh_fail_check_type type);
+
+void
+afr_set_self_heal_status (afr_self_heal_t *sh, afr_self_heal_status status);
+
+void
+afr_log_self_heal_completion_status (afr_local_t *local, gf_loglevel_t logl);
+
+char*
+afr_get_pending_matrix_str (int32_t *pending_matrix[], xlator_t *this);
#endif /* __AFR_SELF_HEAL_COMMON_H__ */
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index ca7dd92d8..9de26ee56 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include <libgen.h>
@@ -49,35 +40,35 @@
#include "afr-self-heal-common.h"
#include "afr-self-heal-algorithm.h"
+int
+afr_sh_data_fail (call_frame_t *frame, xlator_t *this);
+
+static inline gf_boolean_t
+afr_sh_data_proceed (unsigned int success_count)
+{
+ return (success_count >= AFR_SH_MIN_PARTICIPANTS);
+}
+
+extern int
+sh_loop_finish (call_frame_t *loop_frame, xlator_t *this);
+
+int
+afr_post_sh_big_lock_success (call_frame_t *frame, xlator_t *this);
+
+int
+afr_post_sh_big_lock_failure (call_frame_t *frame, xlator_t *this);
+
+int
+afr_sh_data_finish (call_frame_t *frame, xlator_t *this);
int
afr_sh_data_done (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
local = frame->local;
sh = &local->self_heal;
- priv = this->private;
-
- /*
- TODO: cleanup sh->*
- */
-
- if (sh->healing_fd && !sh->healing_fd_opened) {
- /* unref only if we created the fd ourselves */
-
- fd_unref (sh->healing_fd);
- sh->healing_fd = NULL;
- }
-
-/* for (i = 0; i < priv->child_count; i++) */
-/* sh->locked_nodes[i] = 0; */
-
- gf_log (this->name, GF_LOG_TRACE,
- "self heal of %s completed",
- local->loc.path);
sh->completion_cbk (frame, this);
@@ -87,13 +78,12 @@ afr_sh_data_done (call_frame_t *frame, xlator_t *this)
int
afr_sh_data_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
-
- int child_index = (long) cookie;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int child_index = (long) cookie;
local = frame->local;
priv = this->private;
@@ -101,8 +91,8 @@ afr_sh_data_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
LOCK (&frame->lock);
{
if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "flush or setattr failed on %s on subvolume %s: %s",
+ gf_log (this->name, GF_LOG_ERROR,
+ "flush failed on %s on subvolume %s: %s",
local->loc.path, priv->children[child_index]->name,
strerror (op_errno));
}
@@ -118,87 +108,37 @@ afr_sh_data_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-
-int
-afr_sh_data_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *statpre, struct iatt *statpost)
-{
- afr_sh_data_flush_cbk (frame, cookie, this, op_ret, op_errno);
-
- return 0;
-}
-
-
int
afr_sh_data_close (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_self_heal_t *sh = NULL;
-
- int i = 0;
- int call_count = 0;
- int source = 0;
- int32_t valid = 0;
-
- struct iatt stbuf = {0,};
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heal_t *sh = NULL;
+ int i = 0;
+ int call_count = 0;
local = frame->local;
sh = &local->self_heal;
priv = this->private;
- source = sh->source;
-
- valid |= (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME);
-
- stbuf.ia_atime = sh->buf[source].ia_atime;
- stbuf.ia_atime_nsec = sh->buf[source].ia_atime_nsec;
- stbuf.ia_mtime = sh->buf[source].ia_mtime;
- stbuf.ia_mtime_nsec = sh->buf[source].ia_mtime_nsec;
-
- if (sh->healing_fd_opened) {
- /* not our job to close the fd */
-
- afr_sh_data_done (frame, this);
- return 0;
- }
-
if (!sh->healing_fd) {
+ //This happens when file is non-reg
afr_sh_data_done (frame, this);
return 0;
}
-
- call_count = (sh->active_sinks + 1) * 2;
+ call_count = afr_set_elem_count_get (sh->success,
+ priv->child_count);
local->call_count = call_count;
- /* closed source */
- gf_log (this->name, GF_LOG_TRACE,
- "closing fd of %s on %s",
- local->loc.path, priv->children[sh->source]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_data_flush_cbk,
- (void *) (long) sh->source,
- priv->children[sh->source],
- priv->children[sh->source]->fops->flush,
- sh->healing_fd);
- call_count--;
-
- STACK_WIND_COOKIE (frame, afr_sh_data_setattr_cbk,
- (void *) (long) sh->source,
- priv->children[sh->source],
- priv->children[sh->source]->fops->setattr,
- &local->loc, &stbuf, valid);
-
- call_count--;
-
- if (call_count == 0)
+ if (call_count == 0) {
+ afr_sh_data_done (frame, this);
return 0;
+ }
for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] || !local->child_up[i])
+ if (!sh->success[i])
continue;
-
- gf_log (this->name, GF_LOG_TRACE,
+ gf_log (this->name, GF_LOG_DEBUG,
"closing fd of %s on %s",
local->loc.path, priv->children[i]->name);
@@ -206,15 +146,7 @@ afr_sh_data_close (call_frame_t *frame, xlator_t *this)
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->flush,
- sh->healing_fd);
-
- call_count--;
-
- STACK_WIND_COOKIE (frame, afr_sh_data_setattr_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->setattr,
- &local->loc, &stbuf, valid);
+ sh->healing_fd, NULL);
if (!--call_count)
break;
@@ -223,29 +155,46 @@ afr_sh_data_close (call_frame_t *frame, xlator_t *this)
return 0;
}
+int
+afr_sh_dom_unlock (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ if (sh->sh_dom_lock_held)
+ afr_sh_data_unlock (frame, this, priv->sh_domain,
+ afr_sh_data_close);
+ else
+ afr_sh_data_close (frame, this);
+ return 0;
+}
int
-afr_sh_data_unlck_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+afr_sh_data_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
{
- afr_local_t * local = NULL;
- int call_count = 0;
- int child_index = (long) cookie;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int child_index = (long) cookie;
local = frame->local;
+ priv = this->private;
LOCK (&frame->lock);
{
if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "locking inode of %s on child %d failed: %s",
- local->loc.path, child_index,
+ gf_log (this->name, GF_LOG_INFO,
+ "setattr failed on %s on subvolume %s: %s",
+ local->loc.path, priv->children[child_index]->name,
strerror (op_errno));
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "inode of %s on child %d locked",
- local->loc.path, child_index);
}
}
UNLOCK (&frame->lock);
@@ -253,168 +202,455 @@ afr_sh_data_unlck_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
call_count = afr_frame_return (frame);
if (call_count == 0) {
- afr_sh_data_close (frame, this);
+ afr_sh_data_finish (frame, this);
+ }
+
+ return 0;
+}
+
+int
+afr_sh_data_setattr (call_frame_t *frame, xlator_t *this, struct iatt* stbuf)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heal_t *sh = NULL;
+ int i = 0;
+ int call_count = 0;
+ int32_t valid = 0;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ valid = (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME);
+
+ call_count = afr_set_elem_count_get (sh->success,
+ priv->child_count);
+ local->call_count = call_count;
+
+ if (call_count == 0) {
+ GF_ASSERT (0);
+ afr_sh_data_finish (frame, this);
+ return 0;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sh->success[i])
+ continue;
+
+ STACK_WIND_COOKIE (frame, afr_sh_data_setattr_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->setattr,
+ &local->loc, stbuf, valid, NULL);
+
+ if (!--call_count)
+ break;
+ }
+
+ return 0;
+}
+
+int
+afr_sh_data_setattr_fstat_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ struct iatt *buf, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int child_index = (long) cookie;
+
+ local = frame->local;
+ sh = &local->self_heal;
+
+ GF_ASSERT (sh->source == child_index);
+ if (op_ret != -1) {
+ sh->buf[child_index] = *buf;
+ afr_sh_data_setattr (frame, this, buf);
+ } else {
+ gf_log (this->name, GF_LOG_ERROR, "%s: Failed to set "
+ "time-stamps after self-heal", local->loc.path);
+ afr_sh_data_fail (frame, this);
}
return 0;
}
+/*
+ * If there are any writes after the self-heal is triggered then the
+ * stbuf stored in local->self_heal.buf[] will be invalid so we do one more
+ * stat on the source and then set the [am]times
+ */
+int
+afr_sh_set_timestamps (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ STACK_WIND_COOKIE (frame, afr_sh_data_setattr_fstat_cbk,
+ (void *) (long) sh->source,
+ priv->children[sh->source],
+ priv->children[sh->source]->fops->fstat,
+ sh->healing_fd, NULL);
+ return 0;
+}
+//Fun fact, lock_cbk is being used for both lock & unlock
int
-afr_sh_data_unlock (call_frame_t *frame, xlator_t *this)
+afr_sh_data_unlock (call_frame_t *frame, xlator_t *this, char *dom,
+ afr_lock_cbk_t lock_cbk)
{
afr_local_t *local = NULL;
afr_internal_lock_t *int_lock = NULL;
afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int ret = 0;
local = frame->local;
int_lock = &local->internal_lock;
sh = &local->self_heal;
+ priv = this->private;
- GF_ASSERT (!sh->data_lock_held);
-
- int_lock->lock_cbk = afr_sh_data_close;
+ if (strcmp (dom, this->name) == 0) {
+ sh->data_lock_held = _gf_false;
+ } else if (strcmp (dom, priv->sh_domain) == 0) {
+ sh->sh_dom_lock_held = _gf_false;
+ } else {
+ ret = -1;
+ goto out;
+ }
+ int_lock->lock_cbk = lock_cbk;
+ int_lock->domain = dom;
afr_unlock (frame, this);
+out:
+ if (ret) {
+ int_lock->lock_op_ret = -1;
+ int_lock->lock_cbk (frame, this);
+ }
return 0;
}
-
int
afr_sh_data_finish (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
local = frame->local;
sh = &local->self_heal;
- gf_log (this->name, GF_LOG_TRACE,
+ gf_log (this->name, GF_LOG_DEBUG,
"finishing data selfheal of %s", local->loc.path);
- if (!sh->data_lock_held)
- afr_sh_data_unlock (frame, this);
+ if (sh->data_lock_held)
+ afr_sh_data_unlock (frame, this, this->name, afr_sh_dom_unlock);
else
- afr_sh_data_close (frame, this);
+ afr_sh_dom_unlock (frame, this);
return 0;
}
+int
+afr_sh_data_fail (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "finishing failed data selfheal of %s", local->loc.path);
+
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_data_finish (frame, this);
+ return 0;
+}
int
afr_sh_data_erase_pending_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xattr)
+ int32_t op_errno, dict_t *xattr, dict_t *xdata)
{
int call_count = 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int32_t child_index = (long) cookie;
- call_count = afr_frame_return (frame);
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Erasing of pending change "
+ "log failed on %s for subvol %s, reason: %s",
+ local->loc.path, priv->children[child_index]->name,
+ strerror (op_errno));
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ }
- if (call_count == 0)
- afr_sh_data_finish (frame, this);
+ call_count = afr_frame_return (frame);
+ if (call_count == 0) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
+ if (sh->old_loop_frame)
+ sh_loop_finish (sh->old_loop_frame, this);
+ sh->old_loop_frame = NULL;
+ afr_sh_data_fail (frame, this);
+ goto out;
+ }
+ if (!IA_ISREG (sh->type)) {
+ afr_sh_data_finish (frame, this);
+ goto out;
+ }
+ GF_ASSERT (sh->old_loop_frame);
+ afr_sh_data_lock (frame, this, 0, 0, _gf_true, this->name,
+ afr_post_sh_big_lock_success,
+ afr_post_sh_big_lock_failure);
+ }
+out:
return 0;
}
-
int
afr_sh_data_erase_pending (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int i = 0;
- dict_t **erase_xattr = NULL;
+ afr_sh_erase_pending (frame, this, AFR_DATA_TRANSACTION,
+ afr_sh_data_erase_pending_cbk,
+ afr_sh_data_finish);
+ return 0;
+}
+int
+afr_sh_data_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heal_t *sh = NULL;
+ int call_count = 0;
+ int child_index = (long) cookie;
local = frame->local;
- sh = &local->self_heal;
priv = this->private;
+ sh = &local->self_heal;
- afr_sh_pending_to_delta (priv, sh->xattr, sh->delta_matrix, sh->success,
- priv->child_count, AFR_DATA_TRANSACTION);
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "%s: Failed to fsync on "
+ "%s - %s", local->loc.path,
+ priv->children[child_index]->name, strerror (op_errno));
+ LOCK (&frame->lock);
+ {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ }
+ UNLOCK (&frame->lock);
+ if (sh->old_loop_frame)
+ sh_loop_finish (sh->old_loop_frame, this);
+ sh->old_loop_frame = NULL;
+ }
- erase_xattr = GF_CALLOC (sizeof (*erase_xattr), priv->child_count,
- gf_afr_mt_dict_t);
+ call_count = afr_frame_return (frame);
+ if (call_count == 0) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC))
+ afr_sh_data_fail (frame, this);
+ else
+ afr_sh_data_erase_pending (frame, this);
+ }
+ return 0;
+}
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i]) {
- call_count++;
+/*
+ * Before erasing xattrs, make sure the data is written to disk
+ */
+int
+afr_sh_data_fsync (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heal_t *sh = NULL;
+ int i = 0;
+ int call_count = 0;
- erase_xattr[i] = get_new_dict();
- dict_ref (erase_xattr[i]);
- }
- }
+ local = frame->local;
+ priv = this->private;
+ sh = &local->self_heal;
- afr_sh_delta_to_xattr (priv, sh->delta_matrix, erase_xattr,
- priv->child_count, AFR_DATA_TRANSACTION);
+ call_count = sh->active_sinks;
+ if (call_count == 0) {
+ afr_sh_data_erase_pending (frame, this);
+ return 0;
+ }
local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (!erase_xattr[i])
+ if (!sh->success[i] || sh->sources[i])
continue;
- gf_log (this->name, GF_LOG_TRACE,
- "erasing pending flags from %s on %s",
- local->loc.path, priv->children[i]->name);
+ STACK_WIND_COOKIE (frame, afr_sh_data_fsync_cbk,
+ (void *) (long) i, priv->children[i],
+ priv->children[i]->fops->fsync,
+ sh->healing_fd, 1, NULL);
+ }
- STACK_WIND_COOKIE (frame, afr_sh_data_erase_pending_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- sh->healing_fd,
- GF_XATTROP_ADD_ARRAY, erase_xattr[i]);
- if (!--call_count)
+ return 0;
+}
+
+static struct afr_sh_algorithm *
+sh_algo_from_name (xlator_t *this, char *name)
+{
+ int i = 0;
+
+ if (name == NULL)
+ goto out;
+
+ while (afr_self_heal_algorithms[i].name) {
+ if (!strcmp (name, afr_self_heal_algorithms[i].name)) {
+ return &afr_self_heal_algorithms[i];
+ }
+
+ i++;
+ }
+
+out:
+ return NULL;
+}
+
+
+static int
+sh_zero_byte_files_exist (afr_local_t *local, int child_count)
+{
+ int i = 0;
+ int ret = 0;
+ afr_self_heal_t *sh = NULL;
+
+ sh = &local->self_heal;
+ for (i = 0; i < child_count; i++) {
+ if (!local->child_up[i] || sh->child_errno[i])
+ continue;
+ if (sh->buf[i].ia_size == 0) {
+ ret = 1;
break;
+ }
}
- for (i = 0; i < priv->child_count; i++) {
- if (erase_xattr[i]) {
- dict_unref (erase_xattr[i]);
+ return ret;
+}
+
+
+struct afr_sh_algorithm *
+afr_sh_data_pick_algo (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t * priv = NULL;
+ struct afr_sh_algorithm * algo = NULL;
+ afr_local_t * local = NULL;
+ afr_self_heal_t * sh = NULL;
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+ algo = sh_algo_from_name (this, priv->data_self_heal_algorithm);
+
+ if (algo == NULL) {
+ /* option not set, so fall back on heuristics */
+
+ if (sh_zero_byte_files_exist (local, priv->child_count)
+ || (sh->file_size <= (priv->data_self_heal_window_size *
+ this->ctx->page_size))) {
+
+ /*
+ * If the file does not exist on one of the subvolumes,
+ * or a zero-byte file exists (created by entry self-heal)
+ * the entire content has to be copied anyway, so there
+ * is no benefit from using the "diff" algorithm.
+ *
+ * If the file size is about the same as page size,
+ * the entire file can be read and written with a few
+ * (pipelined) STACK_WINDs, which will be faster
+ * than "diff" which has to read checksums and then
+ * read and write.
+ */
+
+ algo = sh_algo_from_name (this, "full");
+
+ } else {
+ algo = sh_algo_from_name (this, "diff");
}
}
- GF_FREE (erase_xattr);
- return 0;
+ return algo;
}
int
+afr_sh_data_sync_prepare (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ struct afr_sh_algorithm *sh_algo = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+
+ sh->algo_completion_cbk = afr_sh_data_fsync;
+ sh->algo_abort_cbk = afr_sh_data_fail;
+
+ sh_algo = afr_sh_data_pick_algo (frame, this);
+
+ sh->algo = sh_algo;
+ sh_algo->fn (frame, this);
+
+ return 0;
+}
+
+int
afr_sh_data_trim_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
int call_count = 0;
int child_index = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
- priv = this->private;
+ priv = this->private;
local = frame->local;
+ sh = &local->self_heal;
child_index = (long) cookie;
LOCK (&frame->lock);
{
- if (op_ret == -1)
- gf_log (this->name, GF_LOG_DEBUG,
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
"ftruncate of %s on subvolume %s failed (%s)",
local->loc.path,
priv->children[child_index]->name,
strerror (op_errno));
- else
- gf_log (this->name, GF_LOG_TRACE,
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
"ftruncate of %s on subvolume %s completed",
local->loc.path,
priv->children[child_index]->name);
+ }
}
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
if (call_count == 0) {
- afr_sh_data_erase_pending (frame, this);
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC))
+ afr_sh_data_fail (frame, this);
+ else
+ afr_sh_data_sync_prepare (frame, this);
}
return 0;
@@ -449,7 +685,8 @@ afr_sh_data_trim_sinks (call_frame_t *frame, xlator_t *this)
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->ftruncate,
- sh->healing_fd, sh->file_size);
+ sh->healing_fd, sh->file_size,
+ NULL);
if (!--call_count)
break;
@@ -458,177 +695,311 @@ afr_sh_data_trim_sinks (call_frame_t *frame, xlator_t *this)
return 0;
}
+int
+afr_sh_inode_set_read_ctx (afr_self_heal_t *sh, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int ret = 0;
+ int i = 0;
-static struct afr_sh_algorithm *
-sh_algo_from_name (xlator_t *this, char *name)
+ priv = this->private;
+ sh->source = afr_sh_select_source (sh->sources, priv->child_count);
+ if (sh->source < 0) {
+ ret = -1;
+ goto out;
+ }
+
+ /* detect changes not visible through pending flags -- JIC */
+ for (i = 0; i < priv->child_count; i++) {
+ if (i == sh->source || sh->child_errno[i])
+ continue;
+
+ if (SIZE_DIFFERS (&sh->buf[i], &sh->buf[sh->source]))
+ sh->sources[i] = 0;
+ }
+
+ afr_reset_children (sh->fresh_children, priv->child_count);
+ afr_get_fresh_children (sh->success_children, sh->sources,
+ sh->fresh_children, priv->child_count);
+ afr_inode_set_read_ctx (this, sh->inode, sh->source,
+ sh->fresh_children);
+out:
+ return ret;
+}
+
+char*
+afr_get_sizes_str (afr_local_t *local, struct iatt *bufs, xlator_t *this)
{
- int i = 0;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ char num[1024] = {0};
+ size_t len = 0;
+ char *sizes_str = NULL;
+ size_t off = 0;
+ char *fmt_str = "%llu bytes on %s, ";
+ char *child_down = " %s,";
+ char *child_unknown = " %s,";
+ int down_child_present = 0;
+ int down_count = 0;
+ int unknown_count = 0;
+ int unknown_child_present = 0;
+ char *down_subvol_1 = " down subvolume is ";
+ char *unknown_subvol_1 = " unknown subvolume is ";
+ char *down_subvol_2 = " down subvolumes are ";
+ char *unknown_subvol_2 = " unknown subvolumes are ";
- while (afr_self_heal_algorithms[i].name) {
- if (!strcmp (name, afr_self_heal_algorithms[i].name)) {
- return &afr_self_heal_algorithms[i];
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i] == 1) {
+ len += snprintf (num, sizeof (num), fmt_str,
+ (unsigned long long) bufs[i].ia_size,
+ priv->children[i]->name);
+ } else if (local->child_up[i] == 0) {
+ len += snprintf (num, sizeof (num), child_down,
+ priv->children[i]->name);
+ if (!down_child_present)
+ down_child_present = 1;
+ down_count ++;
+ } else if (local->child_up[i] == -1) {
+ len += snprintf (num, sizeof (num), child_unknown,
+ priv->children[i]->name);
+ if (!unknown_child_present)
+ unknown_child_present = 1;
+ unknown_count++;
}
- i++;
}
- return NULL;
-}
+ if (down_child_present) {
+ if (down_count > 1)
+ len += snprintf (num, sizeof (num), "%s",
+ down_subvol_2);
+ else
+ len += snprintf (num, sizeof (num), "%s",
+ down_subvol_1);
+ }
+ if (unknown_child_present) {
+ if (unknown_count > 1)
+ len += snprintf (num, sizeof (num), "%s",
+ unknown_subvol_2);
+ else
+ len += snprintf (num, sizeof (num), "%s",
+ unknown_subvol_1);
+ }
+ len++;//for '\0'
-static int
-sh_zero_byte_files_exist (afr_self_heal_t *sh, int child_count)
-{
- int i;
- int ret = 0;
+ sizes_str = GF_CALLOC (len, sizeof (char), gf_common_mt_char);
- for (i = 0; i < child_count; i++) {
- if (sh->buf[i].ia_size == 0) {
- ret = 1;
- break;
+ if (!sizes_str)
+ return NULL;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i] == 1) {
+ off += snprintf (sizes_str + off, len - off, fmt_str,
+ (unsigned long long) bufs[i].ia_size,
+ priv->children[i]->name);
}
}
- return ret;
-}
+ if (down_child_present) {
+ if (down_count > 1) {
+ off += snprintf (sizes_str + off, len - off, "%s",
+ down_subvol_2);
+ } else {
+ off += snprintf (sizes_str + off, len - off, "%s",
+ down_subvol_1);
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i] == 0) {
+ off += snprintf (sizes_str + off, len - off, child_down,
+ priv->children[i]->name);
+ }
+ }
+ if (unknown_child_present) {
+ if (unknown_count > 1) {
+ off += snprintf (sizes_str + off, len - off, "%s",
+ unknown_subvol_2);
+ } else {
+ off += snprintf (sizes_str + off, len - off, "%s",
+ unknown_subvol_1);
+ }
+ }
-struct afr_sh_algorithm *
-afr_sh_data_pick_algo (call_frame_t *frame, xlator_t *this)
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i] == -1) {
+ off += snprintf (sizes_str + off, len - off,
+ child_unknown,
+ priv->children[i]->name);
+
+ }
+ }
+
+ return sizes_str;
+}
+
+char*
+afr_get_sinks_str (xlator_t *this, afr_local_t *local, afr_self_heal_t *sh)
{
- afr_private_t * priv = NULL;
- struct afr_sh_algorithm * algo = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ char num[1024] = {0};
+ size_t len = 0;
+ char *sinks_str = NULL;
+ char *temp_str = " to sinks ";
+ char *str_format = " %s,";
+ char off = 0;
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
- algo = sh_algo_from_name (this, priv->data_self_heal_algorithm);
+ priv = this->private;
- if (algo == NULL) {
- /* option not set, so fall back on heuristics */
+ len += snprintf (num, sizeof (num), "%s", temp_str);
+ for (i = 0; i < priv->child_count; i++) {
+ if ((sh->sources[i] == 0) && (local->child_up[i] == 1)) {
+ len += snprintf (num, sizeof (num), str_format,
+ priv->children[i]->name);
+ }
+ }
- if ((local->enoent_count != 0)
- || sh_zero_byte_files_exist (sh, priv->child_count)
- || (sh->file_size <= (priv->data_self_heal_window_size * this->ctx->page_size))) {
+ len ++;
- /*
- * If the file does not exist on one of the subvolumes,
- * or a zero-byte file exists (created by entry self-heal)
- * the entire content has to be copied anyway, so there
- * is no benefit from using the "diff" algorithm.
- *
- * If the file size is about the same as page size,
- * the entire file can be read and written with a few
- * (pipelined) STACK_WINDs, which will be faster
- * than "diff" which has to read checksums and then
- * read and write.
- */
+ sinks_str = GF_CALLOC (len, sizeof (char), gf_common_mt_char);
- algo = sh_algo_from_name (this, "full");
+ if (!sinks_str)
+ return NULL;
- } else {
- algo = sh_algo_from_name (this, "diff");
+ off += snprintf (sinks_str + off, len - off, "%s", temp_str);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if ((sh->sources[i] == 0) && (local->child_up[i] == 1)) {
+ off += snprintf (sinks_str + off, len - off,
+ str_format,
+ priv->children[i]->name);
}
}
- return algo;
+ return sinks_str;
+
}
-int
-afr_sh_data_sync_prepare (call_frame_t *frame, xlator_t *this)
+void
+afr_set_data_sh_info_str (afr_local_t *local, afr_self_heal_t *sh, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
+ char *pending_matrix_str = NULL;
+ char *sizes_str = NULL;
+ char *sinks_str = NULL;
afr_private_t *priv = NULL;
- int active_sinks = 0;
- int source = 0;
- int i = 0;
- struct afr_sh_algorithm *sh_algo = NULL;
+ priv = this->private;
+
+ pending_matrix_str = afr_get_pending_matrix_str (sh->pending_matrix,
+ this);
+ if (!pending_matrix_str)
+ pending_matrix_str = "";
+
+ sizes_str = afr_get_sizes_str (local, sh->buf, this);
+ if (!sizes_str)
+ sizes_str = "";
+
+ sinks_str = afr_get_sinks_str (this, local, sh);
+ if (!sinks_str)
+ sinks_str = "";
+
+ gf_asprintf (&sh->data_sh_info, " data self heal from %s %s with "
+ "%s data %s", priv->children[sh->source]->name, sinks_str,
+ sizes_str, pending_matrix_str);
+
+ if (pending_matrix_str && strcmp (pending_matrix_str, ""))
+ GF_FREE (pending_matrix_str);
+
+ if (sizes_str && strcmp (sizes_str, ""))
+ GF_FREE (sizes_str);
+}
+
+void
+afr_sh_data_fix (call_frame_t *frame, xlator_t *this)
+{
+ int source = 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
local = frame->local;
sh = &local->self_heal;
priv = this->private;
- source = sh->source;
+ source = sh->source;
+ sh->block_size = this->ctx->page_size;
+ sh->file_size = sh->buf[source].ia_size;
- for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] == 0 && local->child_up[i] == 1) {
- active_sinks++;
- sh->success[i] = 1;
- }
+ if (FILE_HAS_HOLES (&sh->buf[source]))
+ sh->file_has_holes = 1;
+
+ if (sh->background && sh->unwind && !sh->unwound) {
+ sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno,
+ is_self_heal_failed (sh, AFR_CHECK_SPECIFIC));
+ sh->unwound = _gf_true;
}
- sh->success[source] = 1;
- if (active_sinks == 0) {
- gf_log (this->name, GF_LOG_TRACE,
+ afr_sh_mark_source_sinks (frame, this);
+ if (sh->active_sinks == 0) {
+ gf_log (this->name, GF_LOG_INFO,
"no active sinks for performing self-heal on file %s",
local->loc.path);
afr_sh_data_finish (frame, this);
- return 0;
+ return;
}
- sh->active_sinks = active_sinks;
gf_log (this->name, GF_LOG_DEBUG,
"self-healing file %s from subvolume %s to %d other",
- local->loc.path, priv->children[source]->name, active_sinks);
-
- sh->algo_completion_cbk = afr_sh_data_trim_sinks;
- sh->algo_abort_cbk = afr_sh_data_finish;
+ local->loc.path, priv->children[sh->source]->name,
+ sh->active_sinks);
- sh_algo = afr_sh_data_pick_algo (frame, this);
-
- sh_algo->fn (frame, this);
-
- return 0;
+ sh->actual_sh_started = _gf_true;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_SYNC_BEGIN);
+ afr_sh_data_trim_sinks (frame, this);
}
-
int
-afr_sh_data_fix (call_frame_t *frame, xlator_t *this)
+afr_sh_data_fxattrop_fstat_done (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
- afr_local_t * orig_local = NULL;
-
afr_self_heal_t *sh = NULL;
afr_private_t *priv = NULL;
int nsources = 0;
- int source = 0;
- int i = 0;
+ int ret = 0;
+ int *old_sources = NULL;
+ int tstamp_source = 0;
+ int i = 0;
local = frame->local;
sh = &local->self_heal;
priv = this->private;
- afr_sh_build_pending_matrix (priv, sh->pending_matrix, sh->xattr,
- priv->child_count, AFR_DATA_TRANSACTION);
-
- afr_sh_print_pending_matrix (sh->pending_matrix, this);
-
- nsources = afr_sh_mark_sources (sh, priv->child_count,
- AFR_SELF_HEAL_DATA);
-
- afr_sh_supress_errenous_children (sh->sources, sh->child_errno,
- priv->child_count);
-
- if (nsources == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "No self-heal needed for %s",
- local->loc.path);
-
- afr_sh_data_finish (frame, this);
- return 0;
+ gf_log (this->name, GF_LOG_DEBUG, "Pending matrix for: %s",
+ lkowner_utoa (&frame->root->lk_owner));
+ if (sh->sync_done) {
+ //store sources before sync so that mtime can be set using the
+ //iatt buf from one of them.
+ old_sources = alloca (priv->child_count*sizeof (*old_sources));
+ memcpy (old_sources, sh->sources,
+ priv->child_count * sizeof (*old_sources));
}
+ nsources = afr_build_sources (this, sh->xattr, sh->buf, sh->pending_matrix,
+ sh->sources, sh->success_children,
+ AFR_DATA_TRANSACTION, NULL, _gf_true);
if ((nsources == -1)
&& (priv->favorite_child != -1)
&& (sh->child_errno[priv->favorite_child] == 0)) {
gf_log (this->name, GF_LOG_DEBUG,
- "Picking favorite child %s as authentic source to resolve conflicting data of %s",
+ "Picking favorite child %s as authentic source to "
+ "resolve conflicting data of %s",
priv->children[priv->favorite_child]->name,
local->loc.path);
@@ -639,111 +1010,129 @@ afr_sh_data_fix (call_frame_t *frame, xlator_t *this)
}
if (nsources == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "Unable to self-heal contents of '%s' (possible split-brain). "
- "Please delete the file from all but the preferred "
- "subvolume.", local->loc.path);
-
- local->govinda_gOvinda = 1;
+ afr_sh_print_split_brain_log (sh->pending_matrix, this,
+ local->loc.path);
+ afr_set_split_brain (this, sh->inode, DONT_KNOW, SPB);
- afr_sh_data_finish (frame, this);
+ afr_sh_data_fail (frame, this);
return 0;
}
- source = afr_sh_select_source (sh->sources, priv->child_count);
+ afr_set_split_brain (this, sh->inode, DONT_KNOW, NO_SPB);
- if (source == -1) {
+ ret = afr_sh_inode_set_read_ctx (sh, this);
+ if (ret) {
gf_log (this->name, GF_LOG_DEBUG,
"No active sources found.");
- afr_sh_data_finish (frame, this);
+ afr_sh_data_fail (frame, this);
return 0;
}
- sh->source = source;
- sh->block_size = 65536;
- sh->file_size = sh->buf[source].ia_size;
-
- if (FILE_HAS_HOLES (&sh->buf[source]))
- sh->file_has_holes = 1;
-
- orig_local = sh->orig_frame->local;
- orig_local->cont.lookup.buf.ia_size = sh->buf[source].ia_size;
-
- /* detect changes not visible through pending flags -- JIC */
- for (i = 0; i < priv->child_count; i++) {
- if (i == source || sh->child_errno[i])
- continue;
-
- if (SIZE_DIFFERS (&sh->buf[i], &sh->buf[source]))
- sh->sources[i] = 0;
- }
-
- afr_set_read_child (this, local->loc.inode, sh->source);
-
- /*
- quick-read might have read the file, so send xattr from
- the source subvolume (http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=815)
- */
+ if (sh->sync_done) {
+ /* Perform setattr from one of the old_sources if possible
+ * Because only they have the correct mtime, the new sources
+ * (i.e. old sinks) have mtime from last writev in sync.
+ */
+ tstamp_source = sh->source;
+ for (i = 0; i < priv->child_count; i++) {
+ if (old_sources[i] && sh->sources[i])
+ tstamp_source = i;
+ }
+ afr_sh_data_setattr (frame, this, &sh->buf[tstamp_source]);
+ } else {
+ afr_set_data_sh_info_str (local, sh, this);
+ if (nsources == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "No self-heal needed for %s",
+ local->loc.path);
- dict_unref (orig_local->cont.lookup.xattr);
- if (orig_local->cont.lookup.xattrs)
- orig_local->cont.lookup.xattr = dict_ref (orig_local->cont.lookup.xattrs[sh->source]);
+ afr_sh_data_finish (frame, this);
+ return 0;
+ }
- if (sh->background) {
- sh->unwind (sh->orig_frame, this);
- sh->unwound = _gf_true;
+ if (sh->do_data_self_heal &&
+ afr_data_self_heal_enabled (priv->data_self_heal))
+ afr_sh_data_fix (frame, this);
+ else
+ afr_sh_data_finish (frame, this);
}
-
- afr_sh_data_sync_prepare (frame, this);
-
return 0;
}
-
int
-afr_self_heal_get_source (xlator_t *this, afr_local_t *local, dict_t **xattr)
+afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,
+ dict_t **xattr,
+ afr_transaction_type txn_type,
+ uuid_t gfid)
{
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
+ afr_private_t *priv = NULL;
+ int read_child = -1;
+ int32_t **pending_matrix = NULL;
+ int32_t *sources = NULL;
+ int32_t *success_children = NULL;
+ struct iatt *bufs = NULL;
+ int32_t nsources = 0;
+ int32_t prev_read_child = -1;
+ int32_t config_read_child = -1;
+ int32_t subvol_status = 0;
- int source = 0;
- int i = 0;
-
- sh = &local->self_heal;
priv = this->private;
-
- sh->pending_matrix = GF_CALLOC (sizeof (int32_t *), priv->child_count,
- gf_afr_mt_int32_t);
- for (i = 0; i < priv->child_count; i++) {
- sh->pending_matrix[i] = GF_CALLOC (sizeof (int32_t),
- priv->child_count,
- gf_afr_mt_int32_t);
+ bufs = local->cont.lookup.bufs;
+ success_children = local->cont.lookup.success_children;
+
+ pending_matrix = local->cont.lookup.pending_matrix;
+ sources = local->cont.lookup.sources;
+ memset (sources, 0, sizeof (*sources) * priv->child_count);
+
+ nsources = afr_build_sources (this, xattr, bufs, pending_matrix,
+ sources, success_children, txn_type,
+ &subvol_status, _gf_false);
+ if (subvol_status & SPLIT_BRAIN) {
+ gf_log (this->name, GF_LOG_DEBUG, "%s: Possible split-brain",
+ local->loc.path);
+ switch (txn_type) {
+ case AFR_DATA_TRANSACTION:
+ local->cont.lookup.possible_spb = _gf_true;
+ nsources = 1;
+ sources[success_children[0]] = 1;
+ break;
+ case AFR_ENTRY_TRANSACTION:
+ read_child = afr_get_no_xattr_dir_read_child (this,
+ success_children,
+ bufs);
+ sources[read_child] = 1;
+ nsources = 1;
+ break;
+ default:
+ break;
+ }
}
-
- sh->sources = GF_CALLOC (priv->child_count, sizeof (*sh->sources),
- gf_afr_mt_int32_t);
-
- afr_sh_build_pending_matrix (priv, sh->pending_matrix, xattr,
- priv->child_count, AFR_DATA_TRANSACTION);
-
- (void)afr_sh_mark_sources (sh, priv->child_count, AFR_SELF_HEAL_DATA);
-
- source = afr_sh_select_source (sh->sources, priv->child_count);
-
- return source;
+ if (nsources < 0)
+ goto out;
+
+ prev_read_child = local->read_child_index;
+ config_read_child = priv->read_child;
+ read_child = afr_select_read_child_from_policy (success_children,
+ priv->child_count,
+ prev_read_child,
+ config_read_child,
+ sources,
+ priv->hash_mode, gfid);
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "returning read_child: %d",
+ read_child);
+ return read_child;
}
-
int
afr_sh_data_fstat_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iatt *buf)
+ struct iatt *buf, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
-
int call_count = -1;
int child_index = (long) cookie;
@@ -760,6 +1149,14 @@ afr_sh_data_fstat_cbk (call_frame_t *frame, void *cookie,
priv->children[child_index]->name);
sh->buf[child_index] = *buf;
+ sh->success_children[sh->success_count] = child_index;
+ sh->success_count++;
+ } else {
+ gf_log (this->name, GF_LOG_ERROR, "%s: fstat failed "
+ "on %s, reason %s", local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
+ sh->child_errno[child_index] = op_errno;
}
}
UNLOCK (&frame->lock);
@@ -767,9 +1164,20 @@ afr_sh_data_fstat_cbk (call_frame_t *frame, void *cookie,
call_count = afr_frame_return (frame);
if (call_count == 0) {
- afr_sh_data_fix (frame, this);
+ /* Previous versions of glusterfs might have set
+ * the pending data xattrs which need to be erased
+ */
+ if (!afr_sh_data_proceed (sh->success_count)) {
+ gf_log (this->name, GF_LOG_ERROR, "inspecting metadata "
+ "succeeded on < %d children, aborting "
+ "self-heal for %s", AFR_SH_MIN_PARTICIPANTS,
+ local->loc.path);
+ afr_sh_data_fail (frame, this);
+ goto out;
+ }
+ afr_sh_data_fxattrop_fstat_done (frame, this);
}
-
+out:
return 0;
}
@@ -780,46 +1188,52 @@ afr_sh_data_fstat (call_frame_t *frame, xlator_t *this)
afr_self_heal_t *sh = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
-
- int call_count = 0;
- int i = 0;
+ int call_count = 0;
+ int i = 0;
+ int child = 0;
+ int32_t *fstat_children = NULL;
priv = this->private;
local = frame->local;
sh = &local->self_heal;
- call_count = afr_up_children_count (priv->child_count,
- local->child_up);
-
+ fstat_children = memdup (sh->success_children,
+ sizeof (*fstat_children) * priv->child_count);
+ if (!fstat_children) {
+ afr_sh_data_fail (frame, this);
+ goto out;
+ }
+ call_count = sh->success_count;
local->call_count = call_count;
+ memset (sh->buf, 0, sizeof (*sh->buf) * priv->child_count);
+ afr_reset_children (sh->success_children, priv->child_count);
+ sh->success_count = 0;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_sh_data_fstat_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fstat,
- sh->healing_fd);
-
- if (!--call_count)
- break;
- }
+ child = fstat_children[i];
+ if (child == -1)
+ break;
+ STACK_WIND_COOKIE (frame, afr_sh_data_fstat_cbk,
+ (void *) (long) child,
+ priv->children[child],
+ priv->children[child]->fops->fstat,
+ sh->healing_fd, NULL);
+ --call_count;
}
-
+ GF_ASSERT (!call_count);
+out:
+ GF_FREE (fstat_children);
return 0;
}
-
-int
-afr_sh_data_fxattrop_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xattr)
+void
+afr_sh_common_fxattrop_resp_handler (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xattr)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
-
- int call_count = -1;
int child_index = (long) cookie;
local = frame->local;
@@ -835,16 +1249,46 @@ afr_sh_data_fxattrop_cbk (call_frame_t *frame, void *cookie,
priv->children[child_index]->name);
sh->xattr[child_index] = dict_ref (xattr);
+ sh->success_children[sh->success_count] = child_index;
+ sh->success_count++;
+ } else {
+ gf_log (this->name, GF_LOG_ERROR, "fxattrop of %s "
+ "failed on %s, reason %s", local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
+ sh->child_errno[child_index] = op_errno;
}
}
UNLOCK (&frame->lock);
+}
- call_count = afr_frame_return (frame);
+int
+afr_sh_data_fxattrop_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xattr, dict_t *xdata)
+{
+ int call_count = -1;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+
+ afr_sh_common_fxattrop_resp_handler (frame, cookie, this, op_ret,
+ op_errno, xattr);
+ call_count = afr_frame_return (frame);
if (call_count == 0) {
+ if (!afr_sh_data_proceed (sh->success_count)) {
+ gf_log (this->name, GF_LOG_ERROR, "%s, inspecting "
+ "change log succeeded on < %d children",
+ local->loc.path, AFR_SH_MIN_PARTICIPANTS);
+ afr_sh_data_fail (frame, this);
+ goto out;
+ }
afr_sh_data_fstat (frame, this);
}
-
+out:
return 0;
}
@@ -855,34 +1299,61 @@ afr_sh_data_fxattrop (call_frame_t *frame, xlator_t *this)
afr_self_heal_t *sh = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- dict_t *xattr_req = NULL;
-
- int32_t zero_pending[3] = {0, 0, 0};
-
+ dict_t **xattr_req;
+ int32_t *zero_pending = NULL;
int call_count = 0;
int i = 0;
int ret = 0;
+ int j;
priv = this->private;
local = frame->local;
sh = &local->self_heal;
- call_count = afr_up_children_count (priv->child_count,
- local->child_up);
+ call_count = afr_up_children_count (local->child_up,
+ priv->child_count);
local->call_count = call_count;
- xattr_req = dict_new();
- if (xattr_req) {
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_set_static_bin (xattr_req, priv->pending_key[i],
- zero_pending, 3 * sizeof(int32_t));
- if (ret < 0)
- gf_log (this->name, GF_LOG_WARNING,
- "Unable to set dict value");
- }
- }
-
+ xattr_req = GF_CALLOC(priv->child_count, sizeof(struct dict_t *),
+ gf_afr_mt_dict_t);
+ if (!xattr_req)
+ goto out;
+
+ for (i = 0; i < priv->child_count; i++) {
+ xattr_req[i] = dict_new();
+ if (!xattr_req[i]) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ for (j = 0; j < priv->child_count; j++) {
+ zero_pending = GF_CALLOC (3, sizeof (*zero_pending),
+ gf_afr_mt_int32_t);
+ if (!zero_pending) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_dynptr (xattr_req[i], priv->pending_key[j],
+ zero_pending,
+ 3 * sizeof (*zero_pending));
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Unable to set dict value");
+ goto out;
+ } else {
+ zero_pending = NULL;
+ }
+ }
+ }
+
+ afr_reset_xattr (sh->xattr, priv->child_count);
+ afr_reset_children (sh->success_children, priv->child_count);
+ memset (sh->child_errno, 0,
+ sizeof (*sh->child_errno) * priv->child_count);
+ sh->success_count = 0;
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND_COOKIE (frame, afr_sh_data_fxattrop_cbk,
@@ -890,48 +1361,90 @@ afr_sh_data_fxattrop (call_frame_t *frame, xlator_t *this)
priv->children[i],
priv->children[i]->fops->fxattrop,
sh->healing_fd, GF_XATTROP_ADD_ARRAY,
- xattr_req);
+ xattr_req[i], NULL);
if (!--call_count)
break;
}
}
- if (xattr_req)
- dict_unref (xattr_req);
+out:
+ if (xattr_req) {
+ for (i = 0; i < priv->child_count; i++)
+ if (xattr_req[i])
+ dict_unref(xattr_req[i]);
+ GF_FREE(xattr_req);
+ }
+
+ if (ret) {
+ GF_FREE (zero_pending);
+ afr_sh_data_fail (frame, this);
+ }
return 0;
}
+int
+afr_sh_data_big_lock_success (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+
+ sh->data_lock_held = _gf_true;
+ afr_sh_data_fxattrop (frame, this);
+ return 0;
+}
int
-afr_sh_data_lock_rec (call_frame_t *frame, xlator_t *this);
+afr_sh_dom_lock_success (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+
+ sh->sh_dom_lock_held = _gf_true;
+ afr_sh_data_lock (frame, this, 0, 0, _gf_true, this->name,
+ afr_sh_data_big_lock_success,
+ afr_sh_data_fail);
+ return 0;
+}
int
-afr_sh_data_post_nonblocking_inodelk_cbk (call_frame_t *frame, xlator_t *this)
+afr_sh_data_post_blocking_inodelk_cbk (call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
local = frame->local;
int_lock = &local->internal_lock;
+ sh = &local->self_heal;
if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Non Blocking inodelks failed.");
- afr_sh_data_done (frame, this);
+ gf_log (this->name, GF_LOG_ERROR, "Blocking data inodelks "
+ "failed for %s. by %s",
+ local->loc.path, lkowner_utoa (&frame->root->lk_owner));
+
+ sh->data_lock_failure_handler (frame, this);
} else {
- gf_log (this->name, GF_LOG_DEBUG,
- "Non Blocking inodelks done. Proceeding to FOP");
- afr_sh_data_fxattrop (frame, this);
+ gf_log (this->name, GF_LOG_DEBUG, "Blocking data inodelks "
+ "done for %s by %s. Proceding to self-heal",
+ local->loc.path, lkowner_utoa (&frame->root->lk_owner));
+
+ sh->data_lock_success_handler (frame, this);
}
return 0;
}
int
-afr_sh_data_lock_rec (call_frame_t *frame, xlator_t *this)
+afr_sh_data_post_nonblocking_inodelk_cbk (call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
@@ -941,50 +1454,114 @@ afr_sh_data_lock_rec (call_frame_t *frame, xlator_t *this)
int_lock = &local->internal_lock;
sh = &local->self_heal;
+ if (int_lock->lock_op_ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG, "Non Blocking data inodelks "
+ "failed for %s. by %s",
+ local->loc.path, lkowner_utoa (&frame->root->lk_owner));
+
+ if (!sh->data_lock_block) {
+ sh->data_lock_failure_handler(frame, this);
+ } else {
+ int_lock->lock_cbk =
+ afr_sh_data_post_blocking_inodelk_cbk;
+ afr_blocking_lock (frame, this);
+ }
+ } else {
+
+ gf_log (this->name, GF_LOG_DEBUG, "Non Blocking data inodelks "
+ "done for %s by %s. Proceeding to self-heal",
+ local->loc.path, lkowner_utoa (&frame->root->lk_owner));
+ sh->data_lock_success_handler (frame, this);
+ }
+
+ return 0;
+}
+
+int
+afr_sh_data_lock_rec (call_frame_t *frame, xlator_t *this, char *dom,
+ off_t start, off_t len)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
int_lock->transaction_lk_type = AFR_SELFHEAL_LK;
int_lock->selfheal_lk_type = AFR_DATA_SELF_HEAL_LK;
afr_set_lock_number (frame, this);
- int_lock->lk_flock.l_start = 0;
- int_lock->lk_flock.l_len = 0;
- int_lock->lk_flock.l_type = F_WRLCK;
int_lock->lock_cbk = afr_sh_data_post_nonblocking_inodelk_cbk;
+ int_lock->domain = dom;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ inodelk->flock.l_start = start;
+ inodelk->flock.l_len = len;
+ inodelk->flock.l_type = F_WRLCK;
+
afr_nonblocking_inodelk (frame, this);
+ return 0;
+}
+
+int
+afr_post_sh_big_lock_success (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+
+ GF_ASSERT (sh->old_loop_frame);
+ sh_loop_finish (sh->old_loop_frame, this);
+ sh->old_loop_frame = NULL;
+ sh->data_lock_held = _gf_true;
+ sh->sync_done = _gf_true;
+ afr_sh_data_fxattrop (frame, this);
+ return 0;
+}
+
+int
+afr_post_sh_big_lock_failure (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ GF_ASSERT (sh->old_loop_frame);
+ sh_loop_finish (sh->old_loop_frame, this);
+ sh->old_loop_frame = NULL;
+ afr_sh_set_timestamps (frame, this);
return 0;
}
int
-afr_sh_data_lock (call_frame_t *frame, xlator_t *this)
+afr_sh_data_lock (call_frame_t *frame, xlator_t *this,
+ off_t start, off_t len, gf_boolean_t block,
+ char *dom, afr_lock_cbk_t success_handler,
+ afr_lock_cbk_t failure_handler)
{
afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
afr_self_heal_t * sh = NULL;
-
local = frame->local;
sh = &local->self_heal;
- priv = this->private;
- if (sh->data_lock_held) {
- /* caller has held the lock already,
- so skip locking */
-
- afr_sh_data_fxattrop (frame, this);
- return 0;
- }
-
- return afr_sh_data_lock_rec (frame, this);
+ sh->data_lock_success_handler = success_handler;
+ sh->data_lock_failure_handler = failure_handler;
+ sh->data_lock_block = block;
+ return afr_sh_data_lock_rec (frame, this, dom, start, len);
}
-
int
afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
@@ -1005,26 +1582,26 @@ afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
LOCK (&frame->lock);
{
if (op_ret == -1) {
- gf_log (this->name, GF_LOG_TRACE,
+ gf_log (this->name, GF_LOG_ERROR,
"open of %s failed on child %s (%s)",
local->loc.path,
priv->children[child_index]->name,
strerror (op_errno));
- sh->op_failed = 1;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ } else {
+ gf_log (this->name, GF_LOG_TRACE,
+ "open of %s succeeded on child %s",
+ local->loc.path,
+ priv->children[child_index]->name);
}
-
- gf_log (this->name, GF_LOG_TRACE,
- "open of %s succeeded on child %s",
- local->loc.path,
- priv->children[child_index]->name);
}
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
if (call_count == 0) {
- if (sh->op_failed) {
- afr_sh_data_finish (frame, this);
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
+ afr_sh_data_fail (frame, this);
return 0;
}
@@ -1032,7 +1609,8 @@ afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
"fd for %s opened, commencing sync",
local->loc.path);
- afr_sh_data_lock (frame, this);
+ afr_sh_data_lock (frame, this, 0, 0, _gf_true, priv->sh_domain,
+ afr_sh_dom_lock_success, afr_sh_data_fail);
}
return 0;
@@ -1044,9 +1622,7 @@ afr_sh_data_open (call_frame_t *frame, xlator_t *this)
{
int i = 0;
int call_count = 0;
-
fd_t *fd = NULL;
-
afr_local_t * local = NULL;
afr_private_t * priv = NULL;
afr_self_heal_t *sh = NULL;
@@ -1055,14 +1631,7 @@ afr_sh_data_open (call_frame_t *frame, xlator_t *this)
sh = &local->self_heal;
priv = this->private;
- if (sh->healing_fd_opened) {
- /* caller has opened the fd for us already, so skip open */
-
- afr_sh_data_lock (frame, this);
- return 0;
- }
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_up_children_count (local->child_up, priv->child_count);
local->call_count = call_count;
fd = fd_create (local->loc.inode, frame->root->pid);
@@ -1078,7 +1647,7 @@ afr_sh_data_open (call_frame_t *frame, xlator_t *this)
priv->children[i],
priv->children[i]->fops->open,
&local->loc,
- O_RDWR|O_LARGEFILE, fd, 0);
+ O_RDWR|O_LARGEFILE, fd, NULL);
if (!--call_count)
break;
@@ -1087,20 +1656,93 @@ afr_sh_data_open (call_frame_t *frame, xlator_t *this)
return 0;
}
+void
+afr_sh_non_reg_fix (call_frame_t *frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+
+ if (op_ret < 0) {
+ afr_sh_data_fail (frame, this);
+ return;
+ }
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count ; i++) {
+ if (1 == local->child_up[i])
+ sh->success[i] = 1;
+ }
+
+ afr_sh_erase_pending (frame, this, AFR_DATA_TRANSACTION,
+ afr_sh_data_erase_pending_cbk,
+ afr_sh_data_finish);
+}
int
-afr_self_heal_data (call_frame_t *frame, xlator_t *this)
+afr_sh_non_reg_lock_success (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
- afr_private_t *priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+ sh->data_lock_held = _gf_true;
+ afr_sh_common_lookup (frame, this, &local->loc,
+ afr_sh_non_reg_fix, NULL,
+ AFR_LOOKUP_FAIL_CONFLICTS |
+ AFR_LOOKUP_FAIL_MISSING_GFIDS,
+ NULL);
+ return 0;
+}
+
+gf_boolean_t
+afr_can_start_data_self_heal (afr_self_heal_t *sh, afr_private_t *priv)
+{
+ if (sh->force_confirm_spb)
+ return _gf_true;
+ if (sh->do_data_self_heal &&
+ afr_data_self_heal_enabled (priv->data_self_heal))
+ return _gf_true;
+ return _gf_false;
+}
+
+int
+afr_self_heal_data (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = this->private;
+ int ret = -1;
local = frame->local;
sh = &local->self_heal;
- if (sh->need_data_self_heal && priv->data_self_heal) {
- afr_sh_data_open (frame, this);
+ sh->sh_type_in_action = AFR_SELF_HEAL_DATA;
+
+ if (afr_can_start_data_self_heal (sh, priv)) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
+ ret = afr_inodelk_init (&local->internal_lock.inodelk[1],
+ priv->sh_domain, priv->child_count);
+ if (ret < 0) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_data_done (frame, this);
+ return 0;
+ }
+
+ if (IA_ISREG (sh->type)) {
+ afr_sh_data_open (frame, this);
+ } else {
+ afr_sh_data_lock (frame, this, 0, 0, _gf_true,
+ this->name,
+ afr_sh_non_reg_lock_success,
+ afr_sh_data_fail);
+ }
} else {
gf_log (this->name, GF_LOG_TRACE,
"not doing data self heal on %s",
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index a74b8477d..53491a1d7 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include <libgen.h>
@@ -49,35 +40,26 @@
#include "afr-self-heal.h"
#include "afr-self-heal-common.h"
-int
-afr_sh_post_nonblocking_entrylk_cbk (call_frame_t *frame, xlator_t *this);
+#define AFR_INIT_SH_FRAME_VALS(_frame, _local, _sh, _sh_frame, _sh_local, _sh_sh)\
+ do {\
+ _local = _frame->local;\
+ _sh = &_local->self_heal;\
+ _sh_frame = _sh->sh_frame;\
+ _sh_local = _sh_frame->local;\
+ _sh_sh = &_sh_local->self_heal;\
+ } while (0);
int
+afr_sh_entry_impunge_create_file (call_frame_t *impunge_frame, xlator_t *this,
+ int child_index);
+int
afr_sh_entry_done (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
local = frame->local;
sh = &local->self_heal;
- priv = this->private;
-
- /*
- TODO: cleanup sh->*
- */
-
- if (sh->healing_fd)
- fd_unref (sh->healing_fd);
- sh->healing_fd = NULL;
-
-/* for (i = 0; i < priv->child_count; i++) { */
-/* sh->locked_nodes[i] = 0; */
-/* } */
-
- gf_log (this->name, GF_LOG_TRACE,
- "self heal of %s completed",
- local->loc.path);
sh->completion_cbk (frame, this);
@@ -120,20 +102,42 @@ afr_sh_entry_finish (call_frame_t *frame, xlator_t *this)
int
afr_sh_entry_erase_pending_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xattr)
+ int32_t op_errno, dict_t *xattr, dict_t *xdata)
{
+ long i = 0;
int call_count = 0;
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
afr_local_t *orig_local = NULL;
call_frame_t *orig_frame = NULL;
+ afr_private_t *priv = NULL;
+ int32_t read_child = -1;
+
+ local = frame->local;
+ priv = this->private;
+ sh = &local->self_heal;
+ i = (long)cookie;
+
+
+ afr_children_add_child (sh->fresh_children, i, priv->child_count);
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_INFO,
+ "%s: failed to erase pending xattrs on %s (%s)",
+ local->loc.path, priv->children[i]->name,
+ strerror (op_errno));
+ }
call_count = afr_frame_return (frame);
if (call_count == 0) {
- local = frame->local;
- sh = &local->self_heal;
-
+ if (sh->source == -1) {
+ //this happens if the forced merge option is set
+ read_child = sh->fresh_children[0];
+ } else {
+ read_child = sh->source;
+ }
+ afr_inode_set_read_ctx (this, sh->inode, read_child,
+ sh->fresh_children);
orig_frame = sh->orig_frame;
orig_local = orig_frame->local;
@@ -153,67 +157,20 @@ afr_sh_entry_erase_pending (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int i = 0;
- dict_t **erase_xattr = NULL;
- int need_unwind = 0;
-
local = frame->local;
sh = &local->self_heal;
- priv = this->private;
-
- afr_sh_pending_to_delta (priv, sh->xattr, sh->delta_matrix, sh->success,
- priv->child_count, AFR_ENTRY_TRANSACTION);
-
- erase_xattr = GF_CALLOC (sizeof (*erase_xattr), priv->child_count,
- gf_afr_mt_dict_t);
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i]) {
- call_count++;
-
- erase_xattr[i] = get_new_dict();
- dict_ref (erase_xattr[i]);
- }
- }
-
- if (call_count == 0)
- need_unwind = 1;
-
- afr_sh_delta_to_xattr (priv, sh->delta_matrix, erase_xattr,
- priv->child_count, AFR_ENTRY_TRANSACTION);
- local->call_count = call_count;
- for (i = 0; i < priv->child_count; i++) {
- if (!erase_xattr[i])
- continue;
-
- gf_log (this->name, GF_LOG_TRACE,
- "erasing pending flags from %s on %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_entry_erase_pending_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->loc,
- GF_XATTROP_ADD_ARRAY, erase_xattr[i]);
- if (!--call_count)
- break;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (erase_xattr[i]) {
- dict_unref (erase_xattr[i]);
- }
+ if (sh->entries_skipped) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ goto out;
}
- GF_FREE (erase_xattr);
-
- if (need_unwind)
- afr_sh_entry_finish (frame, this);
-
+ afr_sh_erase_pending (frame, this, AFR_ENTRY_TRANSACTION,
+ afr_sh_entry_erase_pending_cbk,
+ afr_sh_entry_finish);
+ return 0;
+out:
+ afr_sh_entry_finish (frame, this);
return 0;
}
@@ -294,61 +251,11 @@ next_active_sink (call_frame_t *frame, xlator_t *this,
return next_active_sink;
}
-
-int
-build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name)
-{
- int ret = -1;
-
- if (!child) {
- goto out;
- }
-
- if (strcmp (parent->path, "/") == 0)
- ret = gf_asprintf ((char **)&child->path, "/%s", name);
- else
- ret = gf_asprintf ((char **)&child->path, "%s/%s", parent->path,
- name);
-
- if (-1 == ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "asprintf failed while setting child path");
- }
-
- if (!child->path) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
-
- child->name = strrchr (child->path, '/');
- if (child->name)
- child->name++;
-
- child->parent = inode_ref (parent->inode);
- child->inode = inode_new (parent->inode->table);
-
- if (!child->inode) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
-
- ret = 0;
-out:
- if (ret == -1)
- loc_wipe (child);
-
- return ret;
-}
-
-
int
afr_sh_entry_impunge_all (call_frame_t *frame, xlator_t *this);
int
-afr_sh_entry_impunge_subvol (call_frame_t *frame, xlator_t *this,
- int active_src);
+afr_sh_entry_impunge_subvol (call_frame_t *frame, xlator_t *this);
int
afr_sh_entry_expunge_all (call_frame_t *frame, xlator_t *this);
@@ -359,7 +266,8 @@ afr_sh_entry_expunge_subvol (call_frame_t *frame, xlator_t *this,
int
afr_sh_entry_expunge_entry_done (call_frame_t *frame, xlator_t *this,
- int active_src)
+ int active_src, int32_t op_ret,
+ int32_t op_errno)
{
int call_count = 0;
@@ -375,29 +283,33 @@ int
afr_sh_entry_expunge_parent_setattr_cbk (call_frame_t *expunge_frame,
void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
+ struct iatt *preop, struct iatt *postop,
+ dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *expunge_local = NULL;
afr_self_heal_t *expunge_sh = NULL;
call_frame_t *frame = NULL;
-
- int active_src = (long) cookie;
+ int active_src = (long) cookie;
+ afr_self_heal_t *sh = NULL;
+ afr_local_t *local = NULL;
priv = this->private;
expunge_local = expunge_frame->local;
expunge_sh = &expunge_local->self_heal;
frame = expunge_sh->sh_frame;
+ local = frame->local;
+ sh = &local->self_heal;
if (op_ret != 0) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_ERROR,
"setattr on parent directory of %s on subvolume %s failed: %s",
expunge_local->loc.path,
priv->children[active_src]->name, strerror (op_errno));
}
AFR_STACK_DESTROY (expunge_frame);
- afr_sh_entry_expunge_entry_done (frame, this, active_src);
+ sh->expunge_done (frame, this, active_src, op_ret, op_errno);
return 0;
}
@@ -408,20 +320,17 @@ afr_sh_entry_expunge_remove_cbk (call_frame_t *expunge_frame, void *cookie,
xlator_t *this,
int32_t op_ret, int32_t op_errno,
struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *expunge_local = NULL;
afr_self_heal_t *expunge_sh = NULL;
int active_src = 0;
- call_frame_t *frame = NULL;
-
- int32_t valid = 0;
+ int32_t valid = 0;
priv = this->private;
expunge_local = expunge_frame->local;
expunge_sh = &expunge_local->self_heal;
- frame = expunge_sh->sh_frame;
active_src = (long) cookie;
@@ -431,7 +340,7 @@ afr_sh_entry_expunge_remove_cbk (call_frame_t *expunge_frame, void *cookie,
expunge_local->loc.path,
priv->children[active_src]->name);
} else {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_INFO,
"removing %s on %s failed (%s)",
expunge_local->loc.path,
priv->children[active_src]->name,
@@ -439,7 +348,6 @@ afr_sh_entry_expunge_remove_cbk (call_frame_t *expunge_frame, void *cookie,
}
valid = GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
- afr_build_parent_loc (&expunge_sh->parent_loc, &expunge_local->loc);
STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_parent_setattr_cbk,
(void *) (long) active_src,
@@ -447,7 +355,7 @@ afr_sh_entry_expunge_remove_cbk (call_frame_t *expunge_frame, void *cookie,
priv->children[active_src]->fops->setattr,
&expunge_sh->parent_loc,
&expunge_sh->parentbuf,
- valid);
+ valid, NULL);
return 0;
}
@@ -471,7 +379,7 @@ afr_sh_entry_expunge_unlink (call_frame_t *expunge_frame, xlator_t *this,
(void *) (long) active_src,
priv->children[active_src],
priv->children[active_src]->fops->unlink,
- &expunge_local->loc);
+ &expunge_local->loc, 0, NULL);
return 0;
}
@@ -496,7 +404,7 @@ afr_sh_entry_expunge_rmdir (call_frame_t *expunge_frame, xlator_t *this,
(void *) (long) active_src,
priv->children[active_src],
priv->children[active_src]->fops->rmdir,
- &expunge_local->loc, 1);
+ &expunge_local->loc, 1, NULL);
return 0;
}
@@ -504,22 +412,29 @@ afr_sh_entry_expunge_rmdir (call_frame_t *expunge_frame, xlator_t *this,
int
afr_sh_entry_expunge_remove (call_frame_t *expunge_frame, xlator_t *this,
- int active_src, struct iatt *buf)
+ int active_src, struct iatt *buf,
+ struct iatt *parentbuf)
{
afr_private_t *priv = NULL;
afr_local_t *expunge_local = NULL;
afr_self_heal_t *expunge_sh = NULL;
- int source = 0;
call_frame_t *frame = NULL;
int type = 0;
+ afr_self_heal_t *sh = NULL;
+ afr_local_t *local = NULL;
+ loc_t *loc = NULL;
priv = this->private;
expunge_local = expunge_frame->local;
expunge_sh = &expunge_local->self_heal;
frame = expunge_sh->sh_frame;
- source = expunge_sh->source;
+ local = frame->local;
+ sh = &local->self_heal;
+ loc = &expunge_local->loc;
type = buf->ia_type;
+ if (loc->parent && uuid_is_null (loc->parent->gfid))
+ uuid_copy (loc->pargfid, parentbuf->ia_gfid);
switch (type) {
case IA_IFSOCK:
@@ -537,7 +452,7 @@ afr_sh_entry_expunge_remove (call_frame_t *expunge_frame, xlator_t *this,
gf_log (this->name, GF_LOG_ERROR,
"%s has unknown file type on %s: 0%o",
expunge_local->loc.path,
- priv->children[source]->name, type);
+ priv->children[active_src]->name, type);
goto out;
break;
}
@@ -545,7 +460,7 @@ afr_sh_entry_expunge_remove (call_frame_t *expunge_frame, xlator_t *this,
return 0;
out:
AFR_STACK_DESTROY (expunge_frame);
- afr_sh_entry_expunge_entry_done (frame, this, active_src);
+ sh->expunge_done (frame, this, active_src, -1, EINVAL);
return 0;
}
@@ -563,15 +478,19 @@ afr_sh_entry_expunge_lookup_cbk (call_frame_t *expunge_frame, void *cookie,
afr_self_heal_t *expunge_sh = NULL;
call_frame_t *frame = NULL;
int active_src = 0;
+ afr_self_heal_t *sh = NULL;
+ afr_local_t *local = NULL;
priv = this->private;
expunge_local = expunge_frame->local;
expunge_sh = &expunge_local->self_heal;
frame = expunge_sh->sh_frame;
active_src = (long) cookie;
+ local = frame->local;
+ sh = &local->self_heal;
if (op_ret == -1) {
- gf_log (this->name, GF_LOG_TRACE,
+ gf_log (this->name, GF_LOG_ERROR,
"lookup of %s on %s failed (%s)",
expunge_local->loc.path,
priv->children[active_src]->name,
@@ -579,12 +498,13 @@ afr_sh_entry_expunge_lookup_cbk (call_frame_t *expunge_frame, void *cookie,
goto out;
}
- afr_sh_entry_expunge_remove (expunge_frame, this, active_src, buf);
+ afr_sh_entry_expunge_remove (expunge_frame, this, active_src, buf,
+ postparent);
return 0;
out:
AFR_STACK_DESTROY (expunge_frame);
- afr_sh_entry_expunge_entry_done (frame, this, active_src);
+ sh->expunge_done (frame, this, active_src, op_ret, op_errno);
return 0;
}
@@ -608,12 +528,11 @@ afr_sh_entry_expunge_purge (call_frame_t *expunge_frame, xlator_t *this,
(void *) (long) active_src,
priv->children[active_src],
priv->children[active_src]->fops->lookup,
- &expunge_local->loc, 0);
+ &expunge_local->loc, NULL);
return 0;
}
-
int
afr_sh_entry_expunge_entry_cbk (call_frame_t *expunge_frame, void *cookie,
xlator_t *this,
@@ -628,7 +547,8 @@ afr_sh_entry_expunge_entry_cbk (call_frame_t *expunge_frame, void *cookie,
call_frame_t *frame = NULL;
int active_src = 0;
int need_expunge = 0;
-
+ afr_self_heal_t *sh = NULL;
+ afr_local_t *local = NULL;
priv = this->private;
expunge_local = expunge_frame->local;
@@ -636,9 +556,13 @@ afr_sh_entry_expunge_entry_cbk (call_frame_t *expunge_frame, void *cookie,
frame = expunge_sh->sh_frame;
active_src = expunge_sh->active_source;
source = (long) cookie;
+ local = frame->local;
+ sh = &local->self_heal;
if (op_ret == -1 && op_errno == ENOENT)
need_expunge = 1;
+ else if (op_ret == -1)
+ goto out;
if (!uuid_is_null (expunge_sh->entrybuf.ia_gfid) &&
!uuid_is_null (buf->ia_gfid) &&
@@ -655,8 +579,9 @@ afr_sh_entry_expunge_entry_cbk (call_frame_t *expunge_frame, void *cookie,
}
if (need_expunge) {
- gf_log (this->name, GF_LOG_TRACE,
- "missing entry %s on %s",
+ gf_log (this->name, GF_LOG_INFO,
+ "Entry %s is missing on %s and deleting from "
+ "replica's other bricks",
expunge_local->loc.path,
priv->children[source]->name);
@@ -668,13 +593,14 @@ afr_sh_entry_expunge_entry_cbk (call_frame_t *expunge_frame, void *cookie,
return 0;
}
+out:
if (op_ret == 0) {
gf_log (this->name, GF_LOG_TRACE,
"%s exists under %s",
expunge_local->loc.path,
priv->children[source]->name);
} else {
- gf_log (this->name, GF_LOG_TRACE,
+ gf_log (this->name, GF_LOG_INFO,
"looking up %s under %s failed (%s)",
expunge_local->loc.path,
priv->children[source]->name,
@@ -682,11 +608,24 @@ afr_sh_entry_expunge_entry_cbk (call_frame_t *expunge_frame, void *cookie,
}
AFR_STACK_DESTROY (expunge_frame);
- afr_sh_entry_expunge_entry_done (frame, this, active_src);
+ sh->expunge_done (frame, this, active_src, op_ret, op_errno);
return 0;
}
+static gf_boolean_t
+can_skip_entry_self_heal (char *name, loc_t *parent_loc)
+{
+ if (strcmp (name, ".") == 0) {
+ return _gf_true;
+ } else if (strcmp (name, "..") == 0) {
+ return _gf_true;
+ } else if (loc_is_root (parent_loc) &&
+ (strcmp (name, GF_REPLICATE_TRASH_DIR) == 0)) {
+ return _gf_true;
+ }
+ return _gf_false;
+}
int
afr_sh_entry_expunge_entry (call_frame_t *frame, xlator_t *this,
@@ -703,6 +642,7 @@ afr_sh_entry_expunge_entry (call_frame_t *frame, xlator_t *this,
int source = 0;
int op_errno = 0;
char *name = NULL;
+ int op_ret = -1;
priv = this->private;
local = frame->local;
@@ -710,42 +650,37 @@ afr_sh_entry_expunge_entry (call_frame_t *frame, xlator_t *this,
active_src = sh->active_source;
source = sh->source;
+ sh->expunge_done = afr_sh_entry_expunge_entry_done;
name = entry->d_name;
-
- if ((strcmp (name, ".") == 0)
- || (strcmp (name, "..") == 0)
- || ((strcmp (local->loc.path, "/") == 0)
- && (strcmp (name, GF_REPLICATE_TRASH_DIR) == 0))) {
-
- gf_log (this->name, GF_LOG_TRACE,
- "skipping inspection of %s under %s",
- name, local->loc.path);
+ if (can_skip_entry_self_heal (name, &local->loc)) {
+ op_ret = 0;
goto out;
}
gf_log (this->name, GF_LOG_TRACE,
- "inspecting existance of %s under %s",
+ "inspecting existence of %s under %s",
name, local->loc.path);
expunge_frame = copy_frame (frame);
if (!expunge_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
+ op_errno = ENOMEM;
goto out;
}
- ALLOC_OR_GOTO (expunge_local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (expunge_local, out);
expunge_frame->local = expunge_local;
expunge_sh = &expunge_local->self_heal;
expunge_sh->sh_frame = frame;
expunge_sh->active_source = active_src;
expunge_sh->entrybuf = entry->d_stat;
+ loc_copy (&expunge_sh->parent_loc, &local->loc);
-
- ret = build_child_loc (this, &expunge_local->loc, &local->loc, name);
+ ret = afr_build_child_loc (this, &expunge_local->loc, &local->loc,
+ name);
if (ret != 0) {
+ op_errno = EINVAL;
goto out;
}
@@ -758,12 +693,12 @@ afr_sh_entry_expunge_entry (call_frame_t *frame, xlator_t *this,
(void *) (long) source,
priv->children[source],
priv->children[source]->fops->lookup,
- &expunge_local->loc, 0);
+ &expunge_local->loc, NULL);
ret = 0;
out:
if (ret == -1)
- afr_sh_entry_expunge_entry_done (frame, this, active_src);
+ sh->expunge_done (frame, this, active_src, op_ret, op_errno);
return 0;
}
@@ -773,7 +708,7 @@ int
afr_sh_entry_expunge_readdir_cbk (call_frame_t *frame, void *cookie,
xlator_t *this,
int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries)
+ gf_dirent_t *entries, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
@@ -791,7 +726,7 @@ afr_sh_entry_expunge_readdir_cbk (call_frame_t *frame, void *cookie,
if (op_ret <= 0) {
if (op_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_INFO,
"readdir of %s on subvolume %s failed (%s)",
local->loc.path,
priv->children[active_src]->name,
@@ -841,7 +776,7 @@ afr_sh_entry_expunge_subvol (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, afr_sh_entry_expunge_readdir_cbk,
priv->children[active_src],
priv->children[active_src]->fops->readdirp,
- sh->healing_fd, sh->block_size, sh->offset);
+ sh->healing_fd, sh->block_size, sh->offset, NULL);
return 0;
}
@@ -862,7 +797,7 @@ afr_sh_entry_expunge_all (call_frame_t *frame, xlator_t *this)
sh->offset = 0;
if (sh->source == -1) {
- gf_log (this->name, GF_LOG_TRACE,
+ gf_log (this->name, GF_LOG_DEBUG,
"no active sources for %s to expunge entries",
local->loc.path);
goto out;
@@ -871,7 +806,7 @@ afr_sh_entry_expunge_all (call_frame_t *frame, xlator_t *this)
active_src = next_active_sink (frame, this, sh->active_source);
sh->active_source = active_src;
- if (sh->op_failed) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
goto out;
}
@@ -896,257 +831,419 @@ out:
int
afr_sh_entry_impunge_entry_done (call_frame_t *frame, xlator_t *this,
- int active_src)
+ int32_t op_ret, int32_t op_errno)
{
int call_count = 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ local = frame->local;
+ sh = &local->self_heal;
+ if (op_ret < 0)
+ sh->entries_skipped = _gf_true;
call_count = afr_frame_return (frame);
-
if (call_count == 0)
- afr_sh_entry_impunge_subvol (frame, this, active_src);
+ afr_sh_entry_impunge_subvol (frame, this);
return 0;
}
+void
+afr_sh_entry_call_impunge_done (call_frame_t *impunge_frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ afr_local_t *impunge_local = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ call_frame_t *frame = NULL;
+
+ AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
+ frame, local, sh);
+
+ AFR_STACK_DESTROY (impunge_frame);
+ sh->impunge_done (frame, this, op_ret, op_errno);
+}
int
afr_sh_entry_impunge_setattr_cbk (call_frame_t *impunge_frame, void *cookie,
xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
+ struct iatt *preop, struct iatt *postop,
+ dict_t *xdata)
{
int call_count = 0;
afr_private_t *priv = NULL;
afr_local_t *impunge_local = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- call_frame_t *frame = NULL;
- int active_src = 0;
int child_index = 0;
priv = this->private;
impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- frame = impunge_sh->sh_frame;
- local = frame->local;
- sh = &local->self_heal;
- active_src = sh->active_source;
child_index = (long) cookie;
if (op_ret == 0) {
- gf_log (this->name, GF_LOG_TRACE,
+ gf_log (this->name, GF_LOG_DEBUG,
"setattr done for %s on %s",
impunge_local->loc.path,
priv->children[child_index]->name);
} else {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_INFO,
"setattr (%s) on %s failed (%s)",
impunge_local->loc.path,
priv->children[child_index]->name,
strerror (op_errno));
}
- LOCK (&impunge_frame->lock);
- {
- call_count = --impunge_local->call_count;
- }
- UNLOCK (&impunge_frame->lock);
-
+ call_count = afr_frame_return (impunge_frame);
if (call_count == 0) {
- AFR_STACK_DESTROY (impunge_frame);
- afr_sh_entry_impunge_entry_done (frame, this, active_src);
+ afr_sh_entry_call_impunge_done (impunge_frame, this,
+ 0, op_errno);
}
return 0;
}
+int
+afr_sh_entry_impunge_parent_setattr_cbk (call_frame_t *setattr_frame,
+ void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preop, struct iatt *postop,
+ dict_t *xdata)
+{
+ int call_count = 0;
+ afr_local_t *setattr_local = NULL;
+
+ setattr_local = setattr_frame->local;
+ if (op_ret != 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "setattr on parent directory (%s) failed: %s",
+ setattr_local->loc.path, strerror (op_errno));
+ }
+
+ call_count = afr_frame_return (setattr_frame);
+ if (call_count == 0)
+ AFR_STACK_DESTROY (setattr_frame);
+ return 0;
+}
int
-afr_sh_entry_impunge_xattrop_cbk (call_frame_t *impunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- dict_t *xattr)
+afr_sh_entry_impunge_setattr (call_frame_t *impunge_frame, xlator_t *this)
{
afr_private_t *priv = NULL;
afr_local_t *impunge_local = NULL;
+ afr_local_t *setattr_local = NULL;
afr_self_heal_t *impunge_sh = NULL;
+ call_frame_t *setattr_frame = NULL;
+ int32_t valid = 0;
+ int32_t op_errno = 0;
int child_index = 0;
-
- struct iatt stbuf;
- int32_t valid = 0;
+ int call_count = 0;
+ int i = 0;
priv = this->private;
impunge_local = impunge_frame->local;
impunge_sh = &impunge_local->self_heal;
- child_index = (long) cookie;
-
- gf_log (this->name, GF_LOG_TRACE,
+ gf_log (this->name, GF_LOG_DEBUG,
"setting ownership of %s on %s to %d/%d",
impunge_local->loc.path,
priv->children[child_index]->name,
- impunge_local->cont.lookup.buf.ia_uid,
- impunge_local->cont.lookup.buf.ia_gid);
+ impunge_sh->entrybuf.ia_uid,
+ impunge_sh->entrybuf.ia_gid);
- stbuf.ia_atime = impunge_local->cont.lookup.buf.ia_atime;
- stbuf.ia_atime_nsec = impunge_local->cont.lookup.buf.ia_atime_nsec;
- stbuf.ia_mtime = impunge_local->cont.lookup.buf.ia_mtime;
- stbuf.ia_mtime_nsec = impunge_local->cont.lookup.buf.ia_mtime_nsec;
+ setattr_frame = copy_frame (impunge_frame);
+ if (!setattr_frame) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ AFR_LOCAL_ALLOC_OR_GOTO (setattr_frame->local, out);
+ setattr_local = setattr_frame->local;
+ call_count = afr_errno_count (NULL, impunge_sh->child_errno,
+ priv->child_count, 0);
+ loc_copy (&setattr_local->loc, &impunge_sh->parent_loc);
+ impunge_local->call_count = call_count;
+ setattr_local->call_count = call_count;
+ for (i = 0; i < priv->child_count; i++) {
+ if (impunge_sh->child_errno[i])
+ continue;
+ valid = GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
+ STACK_WIND_COOKIE (setattr_frame,
+ afr_sh_entry_impunge_parent_setattr_cbk,
+ (void *) (long) i, priv->children[i],
+ priv->children[i]->fops->setattr,
+ &setattr_local->loc,
+ &impunge_sh->parentbuf, valid, NULL);
+
+ valid = GF_SET_ATTR_UID | GF_SET_ATTR_GID |
+ GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
+ STACK_WIND_COOKIE (impunge_frame,
+ afr_sh_entry_impunge_setattr_cbk,
+ (void *) (long) i, priv->children[i],
+ priv->children[i]->fops->setattr,
+ &impunge_local->loc,
+ &impunge_sh->entrybuf, valid, NULL);
+ call_count--;
+ }
+ GF_ASSERT (!call_count);
+ return 0;
+out:
+ if (setattr_frame)
+ AFR_STACK_DESTROY (setattr_frame);
+ afr_sh_entry_call_impunge_done (impunge_frame, this, 0, op_errno);
+ return 0;
+}
- stbuf.ia_uid = impunge_local->cont.lookup.buf.ia_uid;
- stbuf.ia_gid = impunge_local->cont.lookup.buf.ia_gid;
+int
+afr_sh_entry_impunge_xattrop_cbk (call_frame_t *impunge_frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ dict_t *xattr, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ int child_index = 0;
- valid = GF_SET_ATTR_UID | GF_SET_ATTR_GID |
- GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
+ priv = this->private;
+ impunge_local = impunge_frame->local;
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_setattr_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->setattr,
- &impunge_local->loc,
- &stbuf, valid);
+ child_index = (long) cookie;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_INFO,
+ "%s: failed to perform xattrop on %s (%s)",
+ impunge_local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
+ goto out;
+ }
+
+ afr_sh_entry_impunge_setattr (impunge_frame, this);
+ return 0;
+out:
+ afr_sh_entry_call_impunge_done (impunge_frame, this,
+ -1, op_errno);
return 0;
}
-
int
-afr_sh_entry_impunge_parent_setattr_cbk (call_frame_t *setattr_frame,
- void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
+afr_sh_entry_impunge_perform_xattrop (call_frame_t *impunge_frame,
+ xlator_t *this)
{
- loc_t *parent_loc = cookie;
+ int active_src = 0;
+ dict_t *xattr = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ int32_t op_errno = 0;
- if (op_ret != 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "setattr on parent directory failed: %s",
- strerror (op_errno));
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
+ active_src = impunge_sh->active_source;
+
+ afr_prepare_new_entry_pending_matrix (impunge_local->pending,
+ afr_is_errno_unset,
+ impunge_sh->child_errno,
+ &impunge_sh->entrybuf,
+ priv->child_count);
+ xattr = dict_new ();
+ if (!xattr) {
+ op_errno = ENOMEM;
+ goto out;
}
- loc_wipe (parent_loc);
+ afr_set_pending_dict (priv, xattr, impunge_local->pending, active_src,
+ LOCAL_LAST);
- GF_FREE (parent_loc);
+ STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_xattrop_cbk,
+ (void *) (long) active_src,
+ priv->children[active_src],
+ priv->children[active_src]->fops->xattrop,
+ &impunge_local->loc, GF_XATTROP_ADD_ARRAY, xattr, NULL);
- AFR_STACK_DESTROY (setattr_frame);
+ if (xattr)
+ dict_unref (xattr);
+ return 0;
+out:
+ afr_sh_entry_call_impunge_done (impunge_frame, this,
+ -1, op_errno);
return 0;
}
-
int
afr_sh_entry_impunge_newfile_cbk (call_frame_t *impunge_frame, void *cookie,
xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *stbuf,
struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- int call_count = 0;
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- call_frame_t *frame = NULL;
- int active_src = 0;
- int child_index = 0;
- int pending_array[3] = {0, };
- dict_t *xattr = NULL;
- int ret = 0;
- int idx = 0;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- call_frame_t *setattr_frame = NULL;
- int32_t valid = 0;
- loc_t *parent_loc = NULL;
- struct iatt parentbuf;
+ int call_count = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ int child_index = 0;
priv = this->private;
impunge_local = impunge_frame->local;
impunge_sh = &impunge_local->self_heal;
- frame = impunge_sh->sh_frame;
- local = frame->local;
- sh = &local->self_heal;
- active_src = sh->active_source;
child_index = (long) cookie;
if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
+ impunge_sh->child_errno[child_index] = op_errno;
+ gf_log (this->name, GF_LOG_ERROR,
"creation of %s on %s failed (%s)",
impunge_local->loc.path,
priv->children[child_index]->name,
strerror (op_errno));
- goto out;
+ } else {
+ impunge_sh->child_errno[child_index] = 0;
}
- inode->ia_type = stbuf->ia_type;
+ call_count = afr_frame_return (impunge_frame);
+ if (call_count == 0) {
+ if (!afr_errno_count (NULL, impunge_sh->child_errno,
+ priv->child_count, 0)) {
+ // new_file creation failed every where
+ afr_sh_entry_call_impunge_done (impunge_frame, this,
+ -1, op_errno);
+ goto out;
+ }
+ afr_sh_entry_impunge_perform_xattrop (impunge_frame, this);
+ }
+out:
+ return 0;
+}
- xattr = get_new_dict ();
- dict_ref (xattr);
+int
+afr_sh_entry_impunge_hardlink_cbk (call_frame_t *impunge_frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ int call_count = 0;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
- idx = afr_index_for_transaction_type (AFR_METADATA_TRANSACTION);
- pending_array[idx] = hton32 (1);
- if (IA_ISDIR (stbuf->ia_type))
- idx = afr_index_for_transaction_type (AFR_ENTRY_TRANSACTION);
- else
- idx = afr_index_for_transaction_type (AFR_DATA_TRANSACTION);
- pending_array[idx] = hton32 (1);
+ impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
- ret = dict_set_static_bin (xattr, priv->pending_key[child_index],
- pending_array, sizeof (pending_array));
- if (ret < 0)
- gf_log (this->name, GF_LOG_WARNING,
- "Unable to set dict value.");
+ if (IA_IFLNK == impunge_sh->entrybuf.ia_type) {
+ //For symlinks impunge is attempted un-conditionally
+ //So the file can already exist.
+ if ((op_ret < 0) && (op_errno == EEXIST))
+ op_ret = 0;
+ }
- valid = GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
- parentbuf = impunge_sh->parentbuf;
- setattr_frame = copy_frame (impunge_frame);
+ call_count = afr_frame_return (impunge_frame);
+ if (call_count == 0)
+ afr_sh_entry_call_impunge_done (impunge_frame, this,
+ op_ret, op_errno);
- parent_loc = GF_CALLOC (1, sizeof (*parent_loc),
- gf_afr_mt_loc_t);
- afr_build_parent_loc (parent_loc, &impunge_local->loc);
+ return 0;
+}
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_xattrop_cbk,
- (void *) (long) child_index,
- priv->children[active_src],
- priv->children[active_src]->fops->xattrop,
- &impunge_local->loc, GF_XATTROP_ADD_ARRAY, xattr);
+int
+afr_sh_entry_impunge_hardlink (call_frame_t *impunge_frame, xlator_t *this,
+ int child_index)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ loc_t *loc = NULL;
+ struct iatt *buf = NULL;
+ loc_t oldloc = {0};
- STACK_WIND_COOKIE (setattr_frame, afr_sh_entry_impunge_parent_setattr_cbk,
- (void *) (long) parent_loc,
- priv->children[child_index],
- priv->children[child_index]->fops->setattr,
- parent_loc, &parentbuf, valid);
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
+ loc = &impunge_local->loc;
+ buf = &impunge_sh->entrybuf;
- dict_unref (xattr);
+ oldloc.inode = inode_ref (loc->inode);
+ uuid_copy (oldloc.gfid, buf->ia_gfid);
+ gf_log (this->name, GF_LOG_DEBUG, "linking missing file %s on %s",
+ loc->path, priv->children[child_index]->name);
+
+ STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_hardlink_cbk,
+ (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->link,
+ &oldloc, loc, NULL);
+ loc_wipe (&oldloc);
return 0;
+}
-out:
- LOCK (&impunge_frame->lock);
- {
- call_count = --impunge_local->call_count;
+int
+afr_sh_nameless_lookup_cbk (call_frame_t *impunge_frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ if (op_ret < 0) {
+ afr_sh_entry_impunge_create_file (impunge_frame, this,
+ (long)cookie);
+ } else {
+ afr_sh_entry_impunge_hardlink (impunge_frame, this,
+ (long)cookie);
}
- UNLOCK (&impunge_frame->lock);
+ return 0;
+}
- if (call_count == 0) {
- AFR_STACK_DESTROY (impunge_frame);
- afr_sh_entry_impunge_entry_done (frame, this, active_src);
- }
+int
+afr_sh_entry_impunge_check_hardlink (call_frame_t *impunge_frame,
+ xlator_t *this,
+ int child_index, struct iatt *stbuf)
+{
+ afr_private_t *priv = NULL;
+ call_frame_t *frame = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ afr_self_heal_t *sh = NULL;
+ loc_t *loc = NULL;
+ dict_t *xattr_req = NULL;
+ loc_t oldloc = {0};
+ int ret = -1;
+
+ priv = this->private;
+ AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
+ frame, local, sh);
+ loc = &impunge_local->loc;
+ xattr_req = dict_new ();
+ if (!xattr_req)
+ goto out;
+ oldloc.inode = inode_ref (loc->inode);
+ uuid_copy (oldloc.gfid, stbuf->ia_gfid);
+
+ STACK_WIND_COOKIE (impunge_frame, afr_sh_nameless_lookup_cbk,
+ (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->lookup,
+ &oldloc, xattr_req);
+ ret = 0;
+out:
+ if (xattr_req)
+ dict_unref (xattr_req);
+ loc_wipe (&oldloc);
+ if (ret)
+ sh->impunge_done (frame, this, -1, ENOMEM);
return 0;
}
-
int
afr_sh_entry_impunge_mknod (call_frame_t *impunge_frame, xlator_t *this,
int child_index, struct iatt *stbuf)
{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- dict_t *dict = NULL;
-
- int ret = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ dict_t *dict = NULL;
+ int ret = 0;
priv = this->private;
impunge_local = impunge_frame->local;
@@ -1160,9 +1257,40 @@ afr_sh_entry_impunge_mknod (call_frame_t *impunge_frame, xlator_t *this,
if (!dict)
gf_log (this->name, GF_LOG_ERROR, "Out of memory");
+ GF_ASSERT (!uuid_is_null (stbuf->ia_gfid));
ret = afr_set_dict_gfid (dict, stbuf->ia_gfid);
if (ret)
- gf_log (this->name, GF_LOG_DEBUG, "gfid set failed");
+ gf_log (this->name, GF_LOG_INFO, "%s: gfid set failed",
+ impunge_local->loc.path);
+
+ /*
+ * Reason for adding GLUSTERFS_INTERNAL_FOP_KEY :
+ *
+ * Problem:
+ * While a brick is down in a replica pair, lets say the user creates
+ * one file(file-A) and a hard link to that file(h-file-A). After the
+ * brick comes back up, entry self-heal is attempted on parent dir of
+ * these two files. As part of readdir in self-heal it reads both the
+ * entries file-A and h-file-A for both of them it does name less lookup
+ * to check if there are any hardlinks already present in the
+ * destination brick. It finds that there are no hard links already
+ * present for files file-A, h-file-A. Self-heal does mknods for both
+ * file-A and h-file-A. This leads to file-A and h-file-A not being
+ * hardlinks anymore.
+ *
+ * Fix: (More like shrinking of race-window, the race itself is still
+ * present in posix-mknod).
+ * If mknod comes with the presence of GLUSTERFS_INTERNAL_FOP_KEY then
+ * posix_mknod checks if there are already any gfid-links and does
+ * link() instead of mknod. There still can be a race where two
+ * posix_mknods same gfid see that
+ * gfid-link file is not present and proceeds with mknods and result in
+ * two different files with same gfid.
+ */
+ ret = dict_set_str (dict, GLUSTERFS_INTERNAL_FOP_KEY, "yes");
+ if (ret)
+ gf_log (this->name, GF_LOG_INFO, "%s: %s set failed",
+ impunge_local->loc.path, GLUSTERFS_INTERNAL_FOP_KEY);
STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_newfile_cbk,
(void *) (long) child_index,
@@ -1170,7 +1298,8 @@ afr_sh_entry_impunge_mknod (call_frame_t *impunge_frame, xlator_t *this,
priv->children[child_index]->fops->mknod,
&impunge_local->loc,
st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type),
- stbuf->ia_rdev, dict);
+ makedev (ia_major (stbuf->ia_rdev),
+ ia_minor (stbuf->ia_rdev)), 0, dict);
if (dict)
dict_unref (dict);
@@ -1200,9 +1329,11 @@ afr_sh_entry_impunge_mkdir (call_frame_t *impunge_frame, xlator_t *this,
return 0;
}
+ GF_ASSERT (!uuid_is_null (stbuf->ia_gfid));
ret = afr_set_dict_gfid (dict, stbuf->ia_gfid);
if (ret)
- gf_log (this->name, GF_LOG_DEBUG, "gfid set failed");
+ gf_log (this->name, GF_LOG_INFO, "%s: gfid set failed",
+ impunge_local->loc.path);
gf_log (this->name, GF_LOG_DEBUG,
"creating missing directory %s on %s",
@@ -1215,7 +1346,7 @@ afr_sh_entry_impunge_mkdir (call_frame_t *impunge_frame, xlator_t *this,
priv->children[child_index]->fops->mkdir,
&impunge_local->loc,
st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type),
- dict);
+ 0, dict);
if (dict)
dict_unref (dict);
@@ -1232,25 +1363,26 @@ afr_sh_entry_impunge_symlink (call_frame_t *impunge_frame, xlator_t *this,
afr_local_t *impunge_local = NULL;
dict_t *dict = NULL;
struct iatt *buf = NULL;
-
- int ret = 0;
+ int ret = 0;
priv = this->private;
impunge_local = impunge_frame->local;
- buf = &impunge_local->cont.symlink.buf;
+ buf = &impunge_local->cont.dir_fop.buf;
dict = dict_new ();
if (!dict) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- afr_sh_entry_impunge_entry_done (impunge_frame, this, 0);
+ afr_sh_entry_call_impunge_done (impunge_frame, this,
+ -1, ENOMEM);
+ goto out;
}
+ GF_ASSERT (!uuid_is_null (buf->ia_gfid));
ret = afr_set_dict_gfid (dict, buf->ia_gfid);
if (ret)
- gf_log (this->name, GF_LOG_DEBUG,
- "dict set gfid failed");
+ gf_log (this->name, GF_LOG_INFO,
+ "%s: dict set gfid failed",
+ impunge_local->loc.path);
gf_log (this->name, GF_LOG_DEBUG,
"creating missing symlink %s -> %s on %s",
@@ -1261,11 +1393,11 @@ afr_sh_entry_impunge_symlink (call_frame_t *impunge_frame, xlator_t *this,
(void *) (long) child_index,
priv->children[child_index],
priv->children[child_index]->fops->symlink,
- linkname, &impunge_local->loc, dict);
+ linkname, &impunge_local->loc, 0, dict);
if (dict)
dict_unref (dict);
-
+out:
return 0;
}
@@ -1275,26 +1407,22 @@ afr_sh_entry_impunge_symlink_unlink_cbk (call_frame_t *impunge_frame,
void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *impunge_local = NULL;
afr_self_heal_t *impunge_sh = NULL;
int child_index = -1;
- call_frame_t *frame = NULL;
int call_count = -1;
- int active_src = -1;
priv = this->private;
impunge_local = impunge_frame->local;
impunge_sh = &impunge_local->self_heal;
- frame = impunge_sh->sh_frame;
- active_src = impunge_sh->active_source;
child_index = (long) cookie;
if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_INFO,
"unlink of %s on %s failed (%s)",
impunge_local->loc.path,
priv->children[child_index]->name,
@@ -1313,10 +1441,9 @@ out:
}
UNLOCK (&impunge_frame->lock);
- if (call_count == 0) {
- AFR_STACK_DESTROY (impunge_frame);
- afr_sh_entry_impunge_entry_done (frame, this, active_src);
- }
+ if (call_count == 0)
+ afr_sh_entry_call_impunge_done (impunge_frame, this,
+ op_ret, op_errno);
return 0;
}
@@ -1341,7 +1468,7 @@ afr_sh_entry_impunge_symlink_unlink (call_frame_t *impunge_frame, xlator_t *this
(void *) (long) child_index,
priv->children[child_index],
priv->children[child_index]->fops->unlink,
- &impunge_local->loc);
+ &impunge_local->loc, 0, NULL);
return 0;
}
@@ -1351,26 +1478,24 @@ int
afr_sh_entry_impunge_readlink_sink_cbk (call_frame_t *impunge_frame, void *cookie,
xlator_t *this,
int32_t op_ret, int32_t op_errno,
- const char *linkname, struct iatt *sbuf)
+ const char *linkname, struct iatt *sbuf, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *impunge_local = NULL;
afr_self_heal_t *impunge_sh = NULL;
int child_index = -1;
- call_frame_t *frame = NULL;
int call_count = -1;
int active_src = -1;
priv = this->private;
impunge_local = impunge_frame->local;
impunge_sh = &impunge_local->self_heal;
- frame = impunge_sh->sh_frame;
active_src = impunge_sh->active_source;
child_index = (long) cookie;
if ((op_ret == -1) && (op_errno != ENOENT)) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_INFO,
"readlink of %s on %s failed (%s)",
impunge_local->loc.path,
priv->children[active_src]->name,
@@ -1409,10 +1534,9 @@ out:
}
UNLOCK (&impunge_frame->lock);
- if (call_count == 0) {
- AFR_STACK_DESTROY (impunge_frame);
- afr_sh_entry_impunge_entry_done (frame, this, active_src);
- }
+ if (call_count == 0)
+ afr_sh_entry_call_impunge_done (impunge_frame, this,
+ op_ret, op_errno);
return 0;
}
@@ -1436,7 +1560,7 @@ afr_sh_entry_impunge_readlink_sink (call_frame_t *impunge_frame, xlator_t *this,
(void *) (long) child_index,
priv->children[child_index],
priv->children[child_index]->fops->readlink,
- &impunge_local->loc, 4096);
+ &impunge_local->loc, 4096, NULL);
return 0;
}
@@ -1446,26 +1570,24 @@ int
afr_sh_entry_impunge_readlink_cbk (call_frame_t *impunge_frame, void *cookie,
xlator_t *this,
int32_t op_ret, int32_t op_errno,
- const char *linkname, struct iatt *sbuf)
+ const char *linkname, struct iatt *sbuf, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *impunge_local = NULL;
afr_self_heal_t *impunge_sh = NULL;
int child_index = -1;
- call_frame_t *frame = NULL;
int call_count = -1;
int active_src = -1;
priv = this->private;
impunge_local = impunge_frame->local;
impunge_sh = &impunge_local->self_heal;
- frame = impunge_sh->sh_frame;
active_src = impunge_sh->active_source;
child_index = (long) cookie;
if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_INFO,
"readlink of %s on %s failed (%s)",
impunge_local->loc.path,
priv->children[active_src]->name,
@@ -1485,10 +1607,9 @@ out:
}
UNLOCK (&impunge_frame->lock);
- if (call_count == 0) {
- AFR_STACK_DESTROY (impunge_frame);
- afr_sh_entry_impunge_entry_done (frame, this, active_src);
- }
+ if (call_count == 0)
+ afr_sh_entry_call_impunge_done (impunge_frame, this,
+ op_ret, op_errno);
return 0;
}
@@ -1507,56 +1628,84 @@ afr_sh_entry_impunge_readlink (call_frame_t *impunge_frame, xlator_t *this,
impunge_local = impunge_frame->local;
impunge_sh = &impunge_local->self_heal;
active_src = impunge_sh->active_source;
- impunge_local->cont.symlink.buf = *stbuf;
+ impunge_local->cont.dir_fop.buf = *stbuf;
STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_readlink_cbk,
(void *) (long) child_index,
priv->children[active_src],
priv->children[active_src]->fops->readlink,
- &impunge_local->loc, 4096);
+ &impunge_local->loc, 4096, NULL);
return 0;
}
-
int
-afr_sh_entry_impunge_recreate_lookup_cbk (call_frame_t *impunge_frame,
- void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf,
- dict_t *xattr,struct iatt *postparent)
+afr_sh_entry_impunge_create (call_frame_t *impunge_frame, xlator_t *this,
+ int child_index)
{
+ call_frame_t *frame = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ afr_self_heal_t *sh = NULL;
afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int active_src = 0;
- int type = 0;
- int child_index = 0;
- call_frame_t *frame = NULL;
- int call_count = 0;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- frame = impunge_sh->sh_frame;
-
- child_index = (long) cookie;
+ ia_type_t type = IA_INVAL;
+ int active_src = 0;
+ struct iatt *buf = NULL;
+ AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
+ frame, local, sh);
active_src = impunge_sh->active_source;
+ afr_update_loc_gfids (&impunge_local->loc, &impunge_sh->entrybuf,
+ &impunge_sh->parentbuf);
- if (op_ret != 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "looking up %s on %s (for %s) failed (%s)",
+ buf = &impunge_sh->entrybuf;
+ type = buf->ia_type;
+
+ switch (type) {
+ case IA_IFSOCK:
+ case IA_IFREG:
+ case IA_IFBLK:
+ case IA_IFCHR:
+ case IA_IFIFO:
+ case IA_IFLNK:
+ afr_sh_entry_impunge_check_hardlink (impunge_frame, this,
+ child_index, buf);
+ break;
+ case IA_IFDIR:
+ afr_sh_entry_impunge_mkdir (impunge_frame, this,
+ child_index, buf);
+ break;
+ default:
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s has unknown file type on %s: 0%o",
impunge_local->loc.path,
- priv->children[active_src]->name,
- priv->children[child_index]->name,
- strerror (op_errno));
- goto out;
+ priv->children[active_src]->name, type);
+ sh->impunge_done (frame, this, -1, EINVAL);
+ break;
}
- impunge_sh->parentbuf = *postparent;
+ return 0;
+}
+
+int
+afr_sh_entry_impunge_create_file (call_frame_t *impunge_frame, xlator_t *this,
+ int child_index)
+{
+ call_frame_t *frame = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ ia_type_t type = IA_INVAL;
+ int active_src = 0;
+ struct iatt *buf = NULL;
- impunge_local->cont.lookup.buf = *buf;
+ AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
+ frame, local, sh);
+ active_src = impunge_sh->active_source;
+ buf = &impunge_sh->entrybuf;
type = buf->ia_type;
switch (type) {
@@ -1572,234 +1721,230 @@ afr_sh_entry_impunge_recreate_lookup_cbk (call_frame_t *impunge_frame,
afr_sh_entry_impunge_readlink (impunge_frame, this,
child_index, buf);
break;
- case IA_IFDIR:
- afr_sh_entry_impunge_mkdir (impunge_frame, this,
- child_index, buf);
- break;
default:
gf_log (this->name, GF_LOG_ERROR,
"%s has unknown file type on %s: 0%o",
impunge_local->loc.path,
priv->children[active_src]->name, type);
- goto out;
+ sh->impunge_done (frame, this, -1, EINVAL);
break;
}
return 0;
+}
-out:
- LOCK (&impunge_frame->lock);
- {
- call_count = --impunge_local->call_count;
- }
- UNLOCK (&impunge_frame->lock);
+gf_boolean_t
+afr_sh_need_recreate (afr_self_heal_t *impunge_sh, unsigned int child,
+ unsigned int child_count)
+{
+ gf_boolean_t recreate = _gf_false;
- if (call_count == 0) {
- AFR_STACK_DESTROY (impunge_frame);
- afr_sh_entry_impunge_entry_done (frame, this, active_src);
+ GF_ASSERT (impunge_sh->child_errno);
+
+ if (child == impunge_sh->active_source)
+ goto out;
+
+ if (IA_IFLNK == impunge_sh->entrybuf.ia_type) {
+ recreate = _gf_true;
+ goto out;
}
- return 0;
+ if (impunge_sh->child_errno[child] == ENOENT)
+ recreate = _gf_true;
+out:
+ return recreate;
}
+unsigned int
+afr_sh_recreate_count (afr_self_heal_t *impunge_sh, int *sources,
+ unsigned int child_count)
+{
+ int count = 0;
+ int i = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (afr_sh_need_recreate (impunge_sh, i, child_count))
+ count++;
+ }
+
+ return count;
+}
int
-afr_sh_entry_impunge_recreate (call_frame_t *impunge_frame, xlator_t *this,
- int child_index)
+afr_sh_entry_call_impunge_recreate (call_frame_t *impunge_frame,
+ xlator_t *this)
{
afr_private_t *priv = NULL;
afr_local_t *impunge_local = NULL;
afr_self_heal_t *impunge_sh = NULL;
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ unsigned int recreate_count = 0;
+ int i = 0;
int active_src = 0;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
-
+ priv = this->private;
+ AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
+ frame, local, sh);
active_src = impunge_sh->active_source;
-
- STACK_WIND_COOKIE (impunge_frame,
- afr_sh_entry_impunge_recreate_lookup_cbk,
- (void *) (long) child_index,
- priv->children[active_src],
- priv->children[active_src]->fops->lookup,
- &impunge_local->loc, 0);
-
+ impunge_sh->entrybuf = impunge_sh->buf[active_src];
+ impunge_sh->parentbuf = impunge_sh->parentbufs[active_src];
+ recreate_count = afr_sh_recreate_count (impunge_sh, sh->sources,
+ priv->child_count);
+ if (!recreate_count) {
+ afr_sh_entry_call_impunge_done (impunge_frame, this, 0, 0);
+ goto out;
+ }
+ impunge_local->call_count = recreate_count;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!impunge_local->child_up[i]) {
+ impunge_sh->child_errno[i] = ENOTCONN;
+ continue;
+ }
+ if (!afr_sh_need_recreate (impunge_sh, i, priv->child_count)) {
+ impunge_sh->child_errno[i] = EEXIST;
+ continue;
+ }
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if (!afr_sh_need_recreate (impunge_sh, i, priv->child_count))
+ continue;
+ (void)afr_sh_entry_impunge_create (impunge_frame, this, i);
+ recreate_count--;
+ }
+ GF_ASSERT (!recreate_count);
+out:
return 0;
}
-
-int
-afr_sh_entry_impunge_entry_cbk (call_frame_t *impunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf, dict_t *x,
- struct iatt *postparent)
+void
+afr_sh_entry_common_lookup_done (call_frame_t *impunge_frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
{
afr_private_t *priv = NULL;
afr_local_t *impunge_local = NULL;
afr_self_heal_t *impunge_sh = NULL;
- int call_count = 0;
- int child_index = 0;
call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ unsigned int gfid_miss_count = 0;
+ unsigned int children_up_count = 0;
+ uuid_t gfid = {0};
int active_src = 0;
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- frame = impunge_sh->sh_frame;
- child_index = (long) cookie;
- active_src = impunge_sh->active_source;
-
- if ((op_ret == -1 && op_errno == ENOENT)
- || (IA_ISLNK (impunge_sh->impunging_entry_mode))) {
-
- /*
- * A symlink's target might have changed, so
- * always go down the recreate path for them.
- */
-
- /* decrease call_count in recreate-callback */
-
- gf_log (this->name, GF_LOG_TRACE,
- "missing entry %s on %s",
- impunge_local->loc.path,
- priv->children[child_index]->name);
-
- afr_sh_entry_impunge_recreate (impunge_frame, this,
- child_index);
- return 0;
- }
-
- if (op_ret == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "%s exists under %s",
- impunge_local->loc.path,
- priv->children[child_index]->name);
+ priv = this->private;
+ AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
+ frame, local, sh);
+ active_src = impunge_sh->active_source;
- impunge_sh->parentbuf = *postparent;
+ if (op_ret < 0)
+ goto done;
+ if (impunge_sh->child_errno[active_src]) {
+ op_ret = -1;
+ op_errno = impunge_sh->child_errno[active_src];
+ goto done;
+ }
+
+ gfid_miss_count = afr_gfid_missing_count (this->name,
+ impunge_sh->success_children,
+ impunge_sh->buf, priv->child_count,
+ impunge_local->loc.path);
+ children_up_count = afr_up_children_count (impunge_local->child_up,
+ priv->child_count);
+ if ((gfid_miss_count == children_up_count) &&
+ (children_up_count < priv->child_count)) {
+ op_ret = -1;
+ op_errno = ENODATA;
+ gf_log (this->name, GF_LOG_ERROR, "Not all children are up, "
+ "gfid should not be assigned in this state for %s",
+ impunge_local->loc.path);
+ goto done;
+ }
+
+ if (gfid_miss_count) {
+ afr_update_gfid_from_iatts (gfid, impunge_sh->buf,
+ impunge_sh->success_children,
+ priv->child_count);
+ if (uuid_is_null (gfid)) {
+ sh->entries_skipped = _gf_true;
+ gf_log (this->name, GF_LOG_INFO, "%s: Skipping entry "
+ "self-heal because of gfid absence",
+ impunge_local->loc.path);
+ goto done;
+ }
+ afr_sh_common_lookup (impunge_frame, this, &impunge_local->loc,
+ afr_sh_entry_common_lookup_done, gfid,
+ AFR_LOOKUP_FAIL_CONFLICTS |
+ AFR_LOOKUP_FAIL_MISSING_GFIDS,
+ NULL);
} else {
- gf_log (this->name, GF_LOG_TRACE,
- "looking up %s under %s failed (%s)",
- impunge_local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- }
-
- LOCK (&impunge_frame->lock);
- {
- call_count = --impunge_local->call_count;
- }
- UNLOCK (&impunge_frame->lock);
-
- if (call_count == 0) {
- AFR_STACK_DESTROY (impunge_frame);
- afr_sh_entry_impunge_entry_done (frame, this, active_src);
+ afr_sh_entry_call_impunge_recreate (impunge_frame, this);
}
-
- return 0;
+ return;
+done:
+ afr_sh_entry_call_impunge_done (impunge_frame, this,
+ op_ret, op_errno);
+ return;
}
-
int
afr_sh_entry_impunge_entry (call_frame_t *frame, xlator_t *this,
gf_dirent_t *entry)
{
- afr_private_t *priv = NULL;
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
int ret = -1;
call_frame_t *impunge_frame = NULL;
afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
int active_src = 0;
- int i = 0;
- int call_count = 0;
int op_errno = 0;
+ int op_ret = -1;
- priv = this->private;
local = frame->local;
sh = &local->self_heal;
active_src = sh->active_source;
+ sh->impunge_done = afr_sh_entry_impunge_entry_done;
- if ((strcmp (entry->d_name, ".") == 0)
- || (strcmp (entry->d_name, "..") == 0)
- || ((strcmp (local->loc.path, "/") == 0)
- && (strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR) == 0))) {
-
- gf_log (this->name, GF_LOG_TRACE,
- "skipping inspection of %s under %s",
- entry->d_name, local->loc.path);
+ if (can_skip_entry_self_heal (entry->d_name, &local->loc)) {
+ op_ret = 0;
goto out;
}
gf_log (this->name, GF_LOG_TRACE,
- "inspecting existance of %s under %s",
+ "inspecting existence of %s under %s",
entry->d_name, local->loc.path);
- impunge_frame = copy_frame (frame);
- if (!impunge_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
+ ret = afr_impunge_frame_create (frame, this, active_src,
+ &impunge_frame);
+ if (ret) {
+ op_errno = -ret;
goto out;
}
- ALLOC_OR_GOTO (impunge_local, afr_local_t, out);
-
- impunge_frame->local = impunge_local;
+ impunge_local = impunge_frame->local;
impunge_sh = &impunge_local->self_heal;
- impunge_sh->sh_frame = frame;
- impunge_sh->active_source = active_src;
-
- impunge_sh->impunging_entry_mode =
- st_mode_from_ia (entry->d_stat.ia_prot, entry->d_stat.ia_type);
-
- ret = build_child_loc (this, &impunge_local->loc, &local->loc, entry->d_name);
+ ret = afr_build_child_loc (this, &impunge_local->loc, &local->loc,
+ entry->d_name);
+ loc_copy (&impunge_sh->parent_loc, &local->loc);
if (ret != 0) {
+ op_errno = ENOMEM;
goto out;
}
- for (i = 0; i < priv->child_count; i++) {
- if (i == active_src)
- continue;
- if (local->child_up[i] == 0)
- continue;
- if (sh->sources[i] == 1)
- continue;
- call_count++;
- }
-
- impunge_local->call_count = call_count;
+ afr_sh_common_lookup (impunge_frame, this, &impunge_local->loc,
+ afr_sh_entry_common_lookup_done, NULL,
+ AFR_LOOKUP_FAIL_CONFLICTS, NULL);
- for (i = 0; i < priv->child_count; i++) {
- if (i == active_src)
- continue;
- if (local->child_up[i] == 0)
- continue;
- if (sh->sources[i] == 1)
- continue;
-
- gf_log (this->name, GF_LOG_TRACE,
- "looking up %s on %s", impunge_local->loc.path,
- priv->children[i]->name);
-
- STACK_WIND_COOKIE (impunge_frame,
- afr_sh_entry_impunge_entry_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->lookup,
- &impunge_local->loc, 0);
-
- if (!--call_count)
- break;
- }
-
- ret = 0;
+ op_ret = 0;
out:
- if (ret == -1)
- afr_sh_entry_impunge_entry_done (frame, this, active_src);
+ if (ret) {
+ if (impunge_frame)
+ AFR_STACK_DESTROY (impunge_frame);
+ sh->impunge_done (frame, this, op_ret, op_errno);
+ }
return 0;
}
@@ -1809,7 +1954,7 @@ int
afr_sh_entry_impunge_readdir_cbk (call_frame_t *frame, void *cookie,
xlator_t *this,
int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries)
+ gf_dirent_t *entries, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
@@ -1827,11 +1972,12 @@ afr_sh_entry_impunge_readdir_cbk (call_frame_t *frame, void *cookie,
if (op_ret <= 0) {
if (op_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_INFO,
"readdir of %s on subvolume %s failed (%s)",
local->loc.path,
priv->children[active_src]->name,
strerror (op_errno));
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
} else {
gf_log (this->name, GF_LOG_TRACE,
"readdir of %s on subvolume %s complete",
@@ -1848,7 +1994,7 @@ afr_sh_entry_impunge_readdir_cbk (call_frame_t *frame, void *cookie,
entry_count++;
}
- gf_log (this->name, GF_LOG_TRACE,
+ gf_log (this->name, GF_LOG_DEBUG,
"readdir'ed %d entries from %s",
entry_count, priv->children[active_src]->name);
@@ -1864,21 +2010,24 @@ afr_sh_entry_impunge_readdir_cbk (call_frame_t *frame, void *cookie,
int
-afr_sh_entry_impunge_subvol (call_frame_t *frame, xlator_t *this,
- int active_src)
+afr_sh_entry_impunge_subvol (call_frame_t *frame, xlator_t *this)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
+ int32_t active_src = 0;
priv = this->private;
local = frame->local;
sh = &local->self_heal;
+ active_src = sh->active_source;
+ gf_log (this->name, GF_LOG_DEBUG, "%s: readdir from offset %zd",
+ local->loc.path, sh->offset);
STACK_WIND (frame, afr_sh_entry_impunge_readdir_cbk,
priv->children[active_src],
priv->children[active_src]->fops->readdirp,
- sh->healing_fd, sh->block_size, sh->offset);
+ sh->healing_fd, sh->block_size, sh->offset, NULL);
return 0;
}
@@ -1901,7 +2050,7 @@ afr_sh_entry_impunge_all (call_frame_t *frame, xlator_t *this)
active_src = next_active_source (frame, this, sh->active_source);
sh->active_source = active_src;
- if (sh->op_failed) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
afr_sh_entry_finish (frame, this);
return 0;
}
@@ -1916,7 +2065,7 @@ afr_sh_entry_impunge_all (call_frame_t *frame, xlator_t *this)
"impunging entries of %s on %s to other sinks",
local->loc.path, priv->children[active_src]->name);
- afr_sh_entry_impunge_subvol (frame, this, active_src);
+ afr_sh_entry_impunge_subvol (frame, this);
return 0;
}
@@ -1924,7 +2073,7 @@ afr_sh_entry_impunge_all (call_frame_t *frame, xlator_t *this)
int
afr_sh_entry_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
@@ -1945,12 +2094,12 @@ afr_sh_entry_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
LOCK (&frame->lock);
{
if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_ERROR,
"opendir of %s failed on child %s (%s)",
local->loc.path,
priv->children[child_index]->name,
strerror (op_errno));
- sh->op_failed = 1;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
}
}
UNLOCK (&frame->lock);
@@ -1958,7 +2107,7 @@ afr_sh_entry_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
call_count = afr_frame_return (frame);
if (call_count == 0) {
- if (sh->op_failed) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
afr_sh_entry_finish (frame, this);
return 0;
}
@@ -1996,7 +2145,7 @@ afr_sh_entry_open (call_frame_t *frame, xlator_t *this)
source = local->self_heal.source;
sources = local->self_heal.sources;
- sh->block_size = 65536; //131072
+ sh->block_size = priv->sh_readdir_size;
sh->offset = 0;
call_count = sh->active_sinks;
@@ -2018,7 +2167,7 @@ afr_sh_entry_open (call_frame_t *frame, xlator_t *this)
(void *) (long) source,
priv->children[source],
priv->children[source]->fops->opendir,
- &local->loc, fd);
+ &local->loc, fd, NULL);
call_count--;
}
@@ -2035,7 +2184,7 @@ afr_sh_entry_open (call_frame_t *frame, xlator_t *this)
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->opendir,
- &local->loc, fd);
+ &local->loc, fd, NULL);
if (!--call_count)
break;
@@ -2051,9 +2200,7 @@ afr_sh_entry_sync_prepare (call_frame_t *frame, xlator_t *this)
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
afr_private_t *priv = NULL;
- int active_sinks = 0;
int source = 0;
- int i = 0;
local = frame->local;
sh = &local->self_heal;
@@ -2061,182 +2208,108 @@ afr_sh_entry_sync_prepare (call_frame_t *frame, xlator_t *this)
source = sh->source;
- for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] == 0 && local->child_up[i] == 1) {
- active_sinks++;
- sh->success[i] = 1;
- }
- }
+ afr_sh_mark_source_sinks (frame, this);
if (source != -1)
sh->success[source] = 1;
- if (active_sinks == 0) {
+ if (sh->active_sinks == 0) {
gf_log (this->name, GF_LOG_TRACE,
"no active sinks for self-heal on dir %s",
local->loc.path);
afr_sh_entry_finish (frame, this);
return 0;
}
- if (source == -1 && active_sinks < 2) {
+ if (source == -1 && sh->active_sinks < 2) {
gf_log (this->name, GF_LOG_TRACE,
"cannot sync with 0 sources and 1 sink on dir %s",
local->loc.path);
afr_sh_entry_finish (frame, this);
return 0;
}
- sh->active_sinks = active_sinks;
if (source != -1)
gf_log (this->name, GF_LOG_DEBUG,
"self-healing directory %s from subvolume %s to "
"%d other",
local->loc.path, priv->children[source]->name,
- active_sinks);
+ sh->active_sinks);
else
gf_log (this->name, GF_LOG_DEBUG,
"no active sources for %s found. "
"merging all entries as a conservative decision",
local->loc.path);
+ sh->actual_sh_started = _gf_true;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_SYNC_BEGIN);
afr_sh_entry_open (frame, this);
return 0;
}
-int
-afr_sh_entry_fix (call_frame_t *frame, xlator_t *this)
+void
+afr_sh_entry_fix (call_frame_t *frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
{
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
afr_private_t *priv = NULL;
int source = 0;
-
- int nsources = 0;
+ int nsources = 0;
+ int32_t subvol_status = 0;
local = frame->local;
sh = &local->self_heal;
priv = this->private;
+ if (op_ret < 0) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_set_error (sh, op_errno);
+ afr_sh_entry_finish (frame, this);
+ goto out;
+ }
+
if (sh->forced_merge) {
sh->source = -1;
goto heal;
}
- afr_sh_build_pending_matrix (priv, sh->pending_matrix, sh->xattr,
- priv->child_count, AFR_ENTRY_TRANSACTION);
-
- afr_sh_print_pending_matrix (sh->pending_matrix, this);
-
- nsources = afr_sh_mark_sources (sh, priv->child_count,
- AFR_SELF_HEAL_ENTRY);
-
- if (nsources == 0) {
+ nsources = afr_build_sources (this, sh->xattr, sh->buf,
+ sh->pending_matrix, sh->sources,
+ sh->success_children,
+ AFR_ENTRY_TRANSACTION, &subvol_status,
+ _gf_true);
+ if ((subvol_status & ALL_FOOLS) ||
+ (subvol_status & SPLIT_BRAIN)) {
+ gf_log (this->name, GF_LOG_INFO, "%s: Performing conservative "
+ "merge", local->loc.path);
+ source = -1;
+ memset (sh->sources, 0,
+ sizeof (*sh->sources) * priv->child_count);
+ } else if (nsources == 0) {
gf_log (this->name, GF_LOG_TRACE,
"No self-heal needed for %s",
local->loc.path);
afr_sh_entry_finish (frame, this);
- return 0;
+ return;
+ } else {
+ source = afr_sh_select_source (sh->sources, priv->child_count);
}
- afr_sh_supress_errenous_children (sh->sources, sh->child_errno,
- priv->child_count);
-
- source = afr_sh_select_source (sh->sources, priv->child_count);
-
sh->source = source;
+ afr_reset_children (sh->fresh_children, priv->child_count);
+ afr_get_fresh_children (sh->success_children, sh->sources,
+ sh->fresh_children, priv->child_count);
+ if (sh->source >= 0)
+ afr_inode_set_read_ctx (this, sh->inode, sh->source,
+ sh->fresh_children);
+
heal:
afr_sh_entry_sync_prepare (frame, this);
-
- return 0;
-}
-
-
-
-int
-afr_sh_entry_lookup_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf, dict_t *xattr,
- struct iatt *postparent)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- int call_count = -1;
- int child_index = (long) cookie;
-
- local = frame->local;
- sh = &local->self_heal;
-
- LOCK (&frame->lock);
- {
- if (op_ret != -1) {
- sh->xattr[child_index] = dict_ref (xattr);
- sh->buf[child_index] = *buf;
- }
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- afr_sh_entry_fix (frame, this);
- }
-
- return 0;
-}
-
-
-
-int
-afr_sh_entry_lookup (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- dict_t *xattr_req = NULL;
- int ret = 0;
- int call_count = 0;
- int i = 0;
-
- priv = this->private;
- local = frame->local;
-
- call_count = afr_up_children_count (priv->child_count,
- local->child_up);
-
- local->call_count = call_count;
-
- xattr_req = dict_new();
- if (xattr_req) {
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_set_uint64 (xattr_req,
- priv->pending_key[i],
- 3 * sizeof(int32_t));
- if (ret < 0)
- gf_log (this->name, GF_LOG_WARNING,
- "Unable to set dict value.");
- }
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame,
- afr_sh_entry_lookup_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->lookup,
- &local->loc, xattr_req);
- if (!--call_count)
- break;
- }
- }
-
- if (xattr_req)
- dict_unref (xattr_req);
-
- return 0;
+out:
+ return;
}
int
@@ -2244,61 +2317,48 @@ afr_sh_post_nonblocking_entry_cbk (call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
local = frame->local;
int_lock = &local->internal_lock;
+ sh = &local->self_heal;
if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Non Blocking entrylks failed.");
+ gf_log (this->name, GF_LOG_ERROR, "Non Blocking entrylks "
+ "failed for %s.", local->loc.path);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_entry_done (frame, this);
} else {
- gf_log (this->name, GF_LOG_DEBUG,
- "Non Blocking entrylks done. Proceeding to FOP");
- afr_sh_entry_lookup(frame, this);
+ gf_log (this->name, GF_LOG_DEBUG, "Non Blocking entrylks done "
+ "for %s. Proceeding to FOP", local->loc.path);
+ afr_sh_common_lookup (frame, this, &local->loc,
+ afr_sh_entry_fix, NULL,
+ AFR_LOOKUP_FAIL_CONFLICTS |
+ AFR_LOOKUP_FAIL_MISSING_GFIDS,
+ NULL);
}
return 0;
}
int
-afr_sh_entry_lock (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- int_lock->transaction_lk_type = AFR_SELFHEAL_LK;
- int_lock->selfheal_lk_type = AFR_ENTRY_SELF_HEAL_LK;
-
- afr_set_lock_number (frame, this);
-
- int_lock->lk_basename = NULL;
- int_lock->lk_loc = &local->loc;
- int_lock->lock_cbk = afr_sh_post_nonblocking_entry_cbk;
-
- afr_nonblocking_entrylk (frame, this);
-
-
- return 0;
-}
-
-
-int
afr_self_heal_entry (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
+ afr_local_t *local = NULL;
afr_private_t *priv = NULL;
-
+ afr_self_heal_t *sh = NULL;
priv = this->private;
local = frame->local;
+ sh = &local->self_heal;
+
+ sh->sh_type_in_action = AFR_SELF_HEAL_ENTRY;
- if (local->self_heal.need_entry_self_heal && priv->entry_self_heal) {
- afr_sh_entry_lock (frame, this);
+ if (local->self_heal.do_entry_self_heal && priv->entry_self_heal) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
+ afr_sh_entrylk (frame, this, &local->loc, NULL,
+ afr_sh_post_nonblocking_entry_cbk);
} else {
gf_log (this->name, GF_LOG_TRACE,
"proceeding to completion on %s",
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
index 3f2e657a4..fd5da6cfd 100644
--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include <libgen.h>
@@ -54,73 +45,26 @@ afr_sh_metadata_done (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
local = frame->local;
sh = &local->self_heal;
- priv = this->private;
-// memset (sh->child_errno, 0, sizeof (int) * priv->child_count);
- memset (sh->buf, 0, sizeof (struct iatt) * priv->child_count);
- memset (sh->success, 0, sizeof (int) * priv->child_count);
-
-/* for (i = 0; i < priv->child_count; i++) { */
-/* sh->locked_nodes[i] = 1; */
-/* } */
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i])
- dict_unref (sh->xattr[i]);
- sh->xattr[i] = NULL;
- }
-
- if (local->govinda_gOvinda) {
+ afr_sh_reset (frame, this);
+ if (IA_ISDIR (sh->type)) {
gf_log (this->name, GF_LOG_DEBUG,
- "aborting selfheal of %s",
+ "proceeding to entry check on %s",
local->loc.path);
- sh->completion_cbk (frame, this);
+ afr_self_heal_entry (frame, this);
} else {
- if (IA_ISREG (sh->type)) {
- gf_log (this->name, GF_LOG_TRACE,
- "proceeding to data check on %s",
- local->loc.path);
- afr_self_heal_data (frame, this);
- return 0;
- }
-
- if (IA_ISDIR (sh->type)) {
- gf_log (this->name, GF_LOG_TRACE,
- "proceeding to entry check on %s",
- local->loc.path);
- afr_self_heal_entry (frame, this);
- return 0;
- }
gf_log (this->name, GF_LOG_DEBUG,
- "completed self heal of %s",
+ "proceeding to data check on %s",
local->loc.path);
-
- sh->completion_cbk (frame, this);
+ afr_self_heal_data (frame, this);
}
return 0;
}
-
-int
-afr_sh_metadata_unlck_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- int call_count = 0;
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- afr_sh_metadata_done (frame, this);
-
- return 0;
-}
-
int
afr_sh_inode_unlock (call_frame_t *frame, xlator_t *this)
{
@@ -144,107 +88,68 @@ afr_sh_metadata_finish (call_frame_t *frame, xlator_t *this)
return 0;
}
-
int
-afr_sh_metadata_erase_pending_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xattr)
+afr_sh_metadata_fail (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- int call_count = 0;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
- if (call_count == 0)
- afr_sh_metadata_finish (frame, this);
+ local = frame->local;
+ sh = &local->self_heal;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_metadata_finish (frame, this);
return 0;
}
-
int
-afr_sh_metadata_erase_pending (call_frame_t *frame, xlator_t *this)
+afr_sh_metadata_erase_pending_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xattr, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int i = 0;
- dict_t **erase_xattr = NULL;
-
+ afr_local_t *local = NULL;
+ int call_count = 0;
+ long i = 0;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
local = frame->local;
+ priv = this->private;
sh = &local->self_heal;
- priv = this->private;
-
- afr_sh_pending_to_delta (priv, sh->xattr, sh->delta_matrix,
- sh->success, priv->child_count,
- AFR_METADATA_TRANSACTION);
+ i = (long)cookie;
- erase_xattr = GF_CALLOC (sizeof (*erase_xattr), priv->child_count,
- gf_afr_mt_dict_t);
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i]) {
- call_count++;
-
- erase_xattr[i] = get_new_dict();
- dict_ref (erase_xattr[i]);
- }
+ if ((!IA_ISREG (sh->buf[sh->source].ia_type)) &&
+ (!IA_ISDIR (sh->buf[sh->source].ia_type))) {
+ afr_children_add_child (sh->fresh_children, i,
+ priv->child_count);
}
-
- afr_sh_delta_to_xattr (priv, sh->delta_matrix, erase_xattr,
- priv->child_count, AFR_METADATA_TRANSACTION);
-
- local->call_count = call_count;
+ call_count = afr_frame_return (frame);
if (call_count == 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "metadata of %s not healed on any subvolume",
- local->loc.path);
-
- afr_sh_metadata_finish (frame, this);
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (!erase_xattr[i])
- continue;
-
- gf_log (this->name, GF_LOG_TRACE,
- "erasing pending flags from %s on %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_metadata_erase_pending_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->loc,
- GF_XATTROP_ADD_ARRAY, erase_xattr[i]);
- if (!--call_count)
- break;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (erase_xattr[i]) {
- dict_unref (erase_xattr[i]);
+ if ((!IA_ISREG (sh->buf[sh->source].ia_type)) &&
+ (!IA_ISDIR (sh->buf[sh->source].ia_type))) {
+ afr_inode_set_read_ctx (this, sh->inode, sh->source,
+ sh->fresh_children);
}
+ afr_sh_metadata_finish (frame, this);
}
- GF_FREE (erase_xattr);
return 0;
}
+int
+afr_sh_metadata_erase_pending (call_frame_t *frame, xlator_t *this)
+{
+ afr_sh_erase_pending (frame, this, AFR_METADATA_TRANSACTION,
+ afr_sh_metadata_erase_pending_cbk,
+ afr_sh_metadata_finish);
+ return 0;
+}
+
int
afr_sh_metadata_sync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
@@ -262,7 +167,7 @@ afr_sh_metadata_sync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
LOCK (&frame->lock);
{
if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_INFO,
"setting attributes failed for %s on %s (%s)",
local->loc.path,
priv->children[child_index]->name,
@@ -275,8 +180,13 @@ afr_sh_metadata_sync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
call_count = afr_frame_return (frame);
- if (call_count == 0)
+ if (call_count == 0) {
+ if (local->xattr_req) {
+ dict_unref (local->xattr_req);
+ local->xattr_req = NULL;
+ }
afr_sh_metadata_erase_pending (frame, this);
+ }
return 0;
}
@@ -285,9 +195,9 @@ afr_sh_metadata_sync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
afr_sh_metadata_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
- afr_sh_metadata_sync_cbk (frame, cookie, this, op_ret, op_errno);
+ afr_sh_metadata_sync_cbk (frame, cookie, this, op_ret, op_errno, xdata);
return 0;
}
@@ -295,13 +205,93 @@ afr_sh_metadata_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
afr_sh_metadata_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_sh_metadata_sync_cbk (frame, cookie, this, op_ret, op_errno);
+ afr_sh_metadata_sync_cbk (frame, cookie, this, op_ret, op_errno, xdata);
return 0;
}
+int
+afr_sh_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (op_ret < 0) {
+ afr_sh_metadata_sync_cbk (frame, cookie,
+ this, -1, op_errno, xdata);
+ goto out;
+ }
+
+ i = (long) cookie;
+
+ STACK_WIND_COOKIE (frame, afr_sh_metadata_xattr_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->setxattr,
+ &local->loc, local->xattr_req, 0, NULL);
+
+ out:
+ return 0;
+}
+
+inline void
+afr_prune_special_keys (dict_t *xattr_dict)
+{
+ dict_del (xattr_dict, GF_SELINUX_XATTR_KEY);
+}
+
+inline void
+afr_prune_pending_keys (dict_t *xattr_dict, afr_private_t *priv)
+{
+ int i = 0;
+
+ for (; i < priv->child_count; i++) {
+ dict_del (xattr_dict, priv->pending_key[i]);
+ }
+}
+
+int
+afr_sh_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
+ dict_t *xdata)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (op_ret < 0) {
+ afr_sh_metadata_sync_cbk (frame, cookie,
+ this, -1, op_errno, xdata);
+ goto out;
+ }
+
+ afr_prune_pending_keys (xattr, priv);
+
+ afr_prune_special_keys (xattr);
+
+ i = (long) cookie;
+
+ /* send removexattr in bulk via xdata */
+ STACK_WIND_COOKIE (frame, afr_sh_removexattr_cbk,
+ cookie,
+ priv->children[i],
+ priv->children[i]->fops->removexattr,
+ &local->loc, "", xattr);
+
+ out:
+ return 0;
+}
int
afr_sh_metadata_sync (call_frame_t *frame, xlator_t *this, dict_t *xattr)
@@ -314,7 +304,7 @@ afr_sh_metadata_sync (call_frame_t *frame, xlator_t *this, dict_t *xattr)
int call_count = 0;
int i = 0;
- struct iatt stbuf;
+ struct iatt stbuf = {0,};
int32_t valid = 0;
local = frame->local;
@@ -327,9 +317,10 @@ afr_sh_metadata_sync (call_frame_t *frame, xlator_t *this, dict_t *xattr)
/*
* 2 calls per sink - setattr, setxattr
*/
- if (xattr)
+ if (xattr) {
call_count = active_sinks * 2;
- else
+ local->xattr_req = dict_ref (xattr);
+ } else
call_count = active_sinks;
local->call_count = call_count;
@@ -365,18 +356,18 @@ afr_sh_metadata_sync (call_frame_t *frame, xlator_t *this, dict_t *xattr)
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->setattr,
- &local->loc, &stbuf, valid);
+ &local->loc, &stbuf, valid, NULL);
call_count--;
if (!xattr)
continue;
- STACK_WIND_COOKIE (frame, afr_sh_metadata_xattr_cbk,
+ STACK_WIND_COOKIE (frame, afr_sh_getxattr_cbk,
(void *) (long) i,
priv->children[i],
- priv->children[i]->fops->setxattr,
- &local->loc, xattr, 0);
+ priv->children[i]->fops->getxattr,
+ &local->loc, NULL, NULL);
call_count--;
}
@@ -385,17 +376,15 @@ afr_sh_metadata_sync (call_frame_t *frame, xlator_t *this, dict_t *xattr)
int
-afr_sh_metadata_getxattr_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr)
+afr_sh_metadata_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
+ dict_t *xdata)
{
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
afr_private_t *priv = NULL;
int source = 0;
- int i;
-
local = frame->local;
sh = &local->self_heal;
priv = this->private;
@@ -410,16 +399,147 @@ afr_sh_metadata_getxattr_cbk (call_frame_t *frame, void *cookie,
afr_sh_metadata_sync (frame, this, NULL);
} else {
- for (i = 0; i < priv->child_count; i++) {
- dict_del (xattr, priv->pending_key[i]);
- }
-
+ afr_prune_pending_keys (xattr, priv);
afr_sh_metadata_sync (frame, this, xattr);
}
return 0;
}
+static void
+afr_set_metadata_sh_info_str (afr_local_t *local, afr_self_heal_t *sh,
+ xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ char num[1024] = {0};
+ size_t len = 0;
+ char *string = NULL;
+ size_t off = 0;
+ char *source_child = " from source %s to";
+ char *format = " %s, ";
+ char *string_msg = " metadata self heal";
+ char *pending_matrix_str = NULL;
+ int down_child_present = 0;
+ int unknown_child_present = 0;
+ char *down_subvol_1 = " down subvolume is ";
+ char *unknown_subvol_1 = " unknown subvolume is";
+ char *down_subvol_2 = " down subvolumes are ";
+ char *unknown_subvol_2 = " unknown subvolumes are ";
+ int down_count = 0;
+ int unknown_count = 0;
+
+ priv = this->private;
+
+ pending_matrix_str = afr_get_pending_matrix_str (sh->pending_matrix,
+ this);
+
+ if (!pending_matrix_str)
+ pending_matrix_str = "";
+
+ len += snprintf (num, sizeof (num), "%s", string_msg);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if ((sh->source == i) && (local->child_up[i] == 1)) {
+ len += snprintf (num, sizeof (num), source_child,
+ priv->children[i]->name);
+ } else if ((local->child_up[i] == 1) && (sh->sources[i] == 0)) {
+ len += snprintf (num, sizeof (num), format,
+ priv->children[i]->name);
+ } else if (local->child_up[i] == 0) {
+ len += snprintf (num, sizeof (num), format,
+ priv->children[i]->name);
+ if (!down_child_present)
+ down_child_present = 1;
+ down_count++;
+ } else if (local->child_up[i] == -1) {
+ len += snprintf (num, sizeof (num), format,
+ priv->children[i]->name);
+ if (!unknown_child_present)
+ unknown_child_present = 1;
+ unknown_count++;
+ }
+ }
+
+ if (down_child_present) {
+ if (down_count > 1) {
+ len += snprintf (num, sizeof (num), "%s",
+ down_subvol_2);
+ } else {
+ len += snprintf (num, sizeof (num), "%s",
+ down_subvol_1);
+ }
+ }
+ if (unknown_child_present) {
+ if (unknown_count > 1) {
+ len += snprintf (num, sizeof (num), "%s",
+ unknown_subvol_2);
+ } else {
+ len += snprintf (num, sizeof (num), "%s",
+ unknown_subvol_1);
+ }
+ }
+
+ len ++;
+
+ string = GF_CALLOC (len, sizeof (char), gf_common_mt_char);
+ if (!string)
+ return;
+
+ off += snprintf (string + off, len - off, "%s", string_msg);
+ for (i=0; i < priv->child_count; i++) {
+ if ((sh->source == i) && (local->child_up[i] == 1))
+ off += snprintf (string + off, len - off, source_child,
+ priv->children[i]->name);
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if ((local->child_up[i] == 1)&& (sh->sources[i] == 0))
+ off += snprintf (string + off, len - off, format,
+ priv->children[i]->name);
+ }
+
+ if (down_child_present) {
+ if (down_count > 1) {
+ off += snprintf (string + off, len - off, "%s",
+ down_subvol_2);
+ } else {
+ off += snprintf (string + off, len - off, "%s",
+ down_subvol_1);
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i] == 0)
+ off += snprintf (string + off, len - off, format,
+ priv->children[i]->name);
+ }
+
+ if (unknown_child_present) {
+ if (unknown_count > 1) {
+ off += snprintf (string + off, len - off, "%s",
+ unknown_subvol_2);
+ } else {
+ off += snprintf (string + off, len - off, "%s",
+ unknown_subvol_1);
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i] == -1)
+ off += snprintf (string + off, len - off, format,
+ priv->children[i]->name);
+ }
+
+ gf_asprintf (&sh->metadata_sh_info, "%s metadata %s,", string,
+ pending_matrix_str);
+
+ if (pending_matrix_str && strcmp (pending_matrix_str, ""))
+ GF_FREE (pending_matrix_str);
+
+ if (string && strcmp (string, ""))
+ GF_FREE (string);
+}
int
afr_sh_metadata_sync_prepare (call_frame_t *frame, xlator_t *this)
@@ -427,9 +547,7 @@ afr_sh_metadata_sync_prepare (call_frame_t *frame, xlator_t *this)
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
afr_private_t *priv = NULL;
- int active_sinks = 0;
int source = 0;
- int i = 0;
local = frame->local;
sh = &local->self_heal;
@@ -437,38 +555,35 @@ afr_sh_metadata_sync_prepare (call_frame_t *frame, xlator_t *this)
source = sh->source;
- for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] == 0 && local->child_up[i] == 1) {
- active_sinks++;
- sh->success[i] = 1;
- }
- }
- sh->success[source] = 1;
-
- if (active_sinks == 0) {
+ afr_sh_mark_source_sinks (frame, this);
+ if (sh->active_sinks == 0) {
gf_log (this->name, GF_LOG_DEBUG,
"no active sinks for performing self-heal on file %s",
local->loc.path);
afr_sh_metadata_finish (frame, this);
return 0;
}
- sh->active_sinks = active_sinks;
gf_log (this->name, GF_LOG_TRACE,
"syncing metadata of %s from subvolume %s to %d active sinks",
- local->loc.path, priv->children[source]->name, active_sinks);
+ local->loc.path, priv->children[source]->name,
+ sh->active_sinks);
+ sh->actual_sh_started = _gf_true;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_SYNC_BEGIN);
+ afr_set_metadata_sh_info_str (local, sh, this);
STACK_WIND (frame, afr_sh_metadata_getxattr_cbk,
priv->children[source],
priv->children[source]->fops->getxattr,
- &local->loc, NULL);
+ &local->loc, NULL, NULL);
return 0;
}
-int
-afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this)
+void
+afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
{
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
@@ -481,27 +596,16 @@ afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this)
sh = &local->self_heal;
priv = this->private;
- afr_sh_build_pending_matrix (priv, sh->pending_matrix, sh->xattr,
- priv->child_count,
- AFR_METADATA_TRANSACTION);
-
- afr_sh_print_pending_matrix (sh->pending_matrix, this);
-
- nsources = afr_sh_mark_sources (sh, priv->child_count,
- AFR_SELF_HEAL_METADATA);
-
- afr_sh_supress_errenous_children (sh->sources, sh->child_errno,
- priv->child_count);
-
- if (nsources == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "No self-heal needed for %s",
- local->loc.path);
-
+ if (op_ret < 0) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_set_error (sh, op_errno);
afr_sh_metadata_finish (frame, this);
- return 0;
+ goto out;
}
-
+ nsources = afr_build_sources (this, sh->xattr, sh->buf,
+ sh->pending_matrix, sh->sources,
+ sh->success_children,
+ AFR_METADATA_TRANSACTION, NULL, _gf_false);
if ((nsources == -1)
&& (priv->favorite_child != -1)
&& (sh->child_errno[priv->favorite_child] == 0)) {
@@ -518,15 +622,21 @@ afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this)
}
if (nsources == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "Unable to self-heal permissions/ownership of '%s' "
- "(possible split-brain). Please fix the file on "
- "all backend volumes", local->loc.path);
+ afr_sh_print_split_brain_log (sh->pending_matrix, this,
+ local->loc.path);
+ afr_set_split_brain (this, sh->inode, SPB, DONT_KNOW);
+ afr_sh_metadata_fail (frame, this);
+ goto out;
+ }
- local->govinda_gOvinda = 1;
+ afr_set_split_brain (this, sh->inode, NO_SPB, DONT_KNOW);
+ if (nsources == 0) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "No self-heal needed for %s",
+ local->loc.path);
afr_sh_metadata_finish (frame, this);
- return 0;
+ goto out;
}
source = afr_sh_select_source (sh->sources, priv->child_count);
@@ -536,7 +646,7 @@ afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this)
"No active sources found.");
afr_sh_metadata_finish (frame, this);
- return 0;
+ goto out;
}
sh->source = source;
@@ -553,118 +663,26 @@ afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this)
sh->sources[i] = 0;
}
- afr_sh_metadata_sync_prepare (frame, this);
-
- return 0;
-}
-
-
-int
-afr_sh_metadata_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf, dict_t *xattr,
- struct iatt *postparent)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int child_index = 0;
-
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "path %s on subvolume %s is of mode 0%o",
- local->loc.path,
- priv->children[child_index]->name,
- buf->ia_type);
-
- sh->buf[child_index] = *buf;
- if (xattr)
- sh->xattr[child_index] = dict_ref (xattr);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "path %s on subvolume %s => -1 (%s)",
- local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
-
- sh->child_errno[child_index] = op_errno;
- }
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- afr_sh_metadata_fix (frame, this);
-
- return 0;
-}
-
-
-int
-afr_sh_metadata_lookup (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- int call_count = 0;
- dict_t *xattr_req = NULL;
- int ret = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_up_children_count (priv->child_count,
- local->child_up);
- local->call_count = call_count;
-
- xattr_req = dict_new();
-
- if (xattr_req) {
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_set_uint64 (xattr_req,
- priv->pending_key[i],
- 3 * sizeof(int32_t));
- if (ret < 0)
- gf_log (this->name, GF_LOG_WARNING,
- "Unable to set dict value.");
- }
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- gf_log (this->name, GF_LOG_TRACE,
- "looking up %s on %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_metadata_lookup_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->lookup,
- &local->loc, xattr_req);
- if (!--call_count)
- break;
- }
+ if ((!IA_ISREG (sh->buf[source].ia_type)) &&
+ (!IA_ISDIR (sh->buf[source].ia_type))) {
+ afr_reset_children (sh->fresh_children, priv->child_count);
+ afr_get_fresh_children (sh->success_children, sh->sources,
+ sh->fresh_children, priv->child_count);
+ afr_inode_set_read_ctx (this, sh->inode, sh->source,
+ sh->fresh_children);
}
- if (xattr_req)
- dict_unref (xattr_req);
-
- return 0;
+ if (sh->do_metadata_self_heal && priv->metadata_self_heal)
+ afr_sh_metadata_sync_prepare (frame, this);
+ else
+ afr_sh_metadata_finish (frame, this);
+out:
+ return;
}
int
-afr_sh_post_nonblocking_inodelk_cbk (call_frame_t *frame, xlator_t *this)
+afr_sh_metadata_post_nonblocking_inodelk_cbk (call_frame_t *frame,
+ xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
@@ -673,14 +691,21 @@ afr_sh_post_nonblocking_inodelk_cbk (call_frame_t *frame, xlator_t *this)
int_lock = &local->internal_lock;
if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Non Blocking inodelks failed.");
+ gf_log (this->name, GF_LOG_DEBUG, "Non Blocking metadata "
+ "inodelks failed for %s.", local->loc.path);
+ gf_log (this->name, GF_LOG_DEBUG, "Metadata self-heal "
+ "failed for %s.", local->loc.path);
afr_sh_metadata_done (frame, this);
} else {
- gf_log (this->name, GF_LOG_DEBUG,
- "Non Blocking inodelks done. Proceeding to FOP");
- afr_sh_metadata_lookup (frame, this);
+ gf_log (this->name, GF_LOG_DEBUG, "Non Blocking metadata "
+ "inodelks done for %s. Proceeding to FOP",
+ local->loc.path);
+ afr_sh_common_lookup (frame, this, &local->loc,
+ afr_sh_metadata_fix, NULL,
+ AFR_LOOKUP_FAIL_CONFLICTS |
+ AFR_LOOKUP_FAIL_MISSING_GFIDS,
+ NULL);
}
return 0;
@@ -690,37 +715,52 @@ int
afr_sh_metadata_lock (call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
afr_local_t *local = NULL;
local = frame->local;
int_lock = &local->internal_lock;
+ int_lock->domain = this->name;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
int_lock->transaction_lk_type = AFR_SELFHEAL_LK;
int_lock->selfheal_lk_type = AFR_METADATA_SELF_HEAL_LK;
afr_set_lock_number (frame, this);
- int_lock->lk_flock.l_start = 0;
- int_lock->lk_flock.l_len = 0;
- int_lock->lk_flock.l_type = F_WRLCK;
- int_lock->lock_cbk = afr_sh_post_nonblocking_inodelk_cbk;
+ inodelk->flock.l_start = LLONG_MAX - 1;
+ inodelk->flock.l_len = 0;
+ inodelk->flock.l_type = F_WRLCK;
+ int_lock->lock_cbk = afr_sh_metadata_post_nonblocking_inodelk_cbk;
afr_nonblocking_inodelk (frame, this);
return 0;
}
+gf_boolean_t
+afr_can_start_metadata_self_heal (afr_self_heal_t *sh, afr_private_t *priv)
+{
+ if (sh->force_confirm_spb)
+ return _gf_true;
+ if (sh->do_metadata_self_heal && priv->metadata_self_heal)
+ return _gf_true;
+ return _gf_false;
+}
int
afr_self_heal_metadata (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = this->private;
-
+ afr_self_heal_t *sh = &local->self_heal;
local = frame->local;
+ sh = &local->self_heal;
+ sh->sh_type_in_action = AFR_SELF_HEAL_METADATA;
- if (local->self_heal.need_metadata_self_heal && priv->metadata_self_heal) {
+ if (afr_can_start_metadata_self_heal (sh, priv)) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
afr_sh_metadata_lock (frame, this);
} else {
afr_sh_metadata_done (frame, this);
diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
index b10ae3fc0..7c9bc8111 100644
--- a/xlators/cluster/afr/src/afr-self-heal.h
+++ b/xlators/cluster/afr/src/afr-self-heal.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __AFR_SELF_HEAL_H__
@@ -30,13 +21,6 @@
#define SIZE_GREATER(buf1,buf2) ((buf1)->ia_size > (buf2)->ia_size)
int
-afr_sh_has_metadata_pending (dict_t *xattr, int child_count, xlator_t *this);
-int
-afr_sh_has_entry_pending (dict_t *xattr, int child_count, xlator_t *this);
-int
-afr_sh_has_data_pending (dict_t *xattr, int child_count, xlator_t *this);
-
-int
afr_self_heal_entry (call_frame_t *frame, xlator_t *this);
int
@@ -49,6 +33,11 @@ int
afr_self_heal_get_source (xlator_t *this, afr_local_t *local, dict_t **xattr);
int
-afr_self_heal (call_frame_t *frame, xlator_t *this);
+afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode);
+int
+afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,
+ dict_t **xattr,
+ afr_transaction_type txn_type,
+ uuid_t gfid);
#endif /* __AFR_SELF_HEAL_H__ */
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
new file mode 100644
index 000000000..1b48a1bca
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-self-heald.c
@@ -0,0 +1,1787 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+#include "afr.h"
+#include "syncop.h"
+#include "afr-self-heald.h"
+#include "afr-self-heal-common.h"
+#include "protocol-common.h"
+#include "event-history.h"
+
+typedef enum {
+ STOP_CRAWL_ON_SINGLE_SUBVOL = 1
+} afr_crawl_flags_t;
+
+typedef enum {
+ HEAL = 1,
+ INFO,
+ STATISTICS_TO_BE_HEALED,
+} shd_crawl_op;
+
+typedef struct shd_dump {
+ dict_t *dict;
+ xlator_t *this;
+ int child;
+} shd_dump_t;
+
+typedef struct shd_event_ {
+ int child;
+ char *path;
+} shd_event_t;
+
+typedef struct shd_pos_ {
+ int child;
+ xlator_t *this;
+ afr_child_pos_t pos;
+} shd_pos_t;
+
+typedef int
+(*afr_crawl_done_cbk_t) (int ret, call_frame_t *sync_frame, void *crawl_data);
+
+void
+afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl,
+ process_entry_cbk_t process_entry, void *op_data,
+ gf_boolean_t exclusive, int crawl_flags,
+ afr_crawl_done_cbk_t crawl_done);
+
+static int
+_crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data);
+
+/* For calling straight through (e.g. already in a synctask). */
+int
+afr_find_child_position (xlator_t *this, int child, afr_child_pos_t *pos);
+
+/* For deferring through a new synctask. */
+int
+afr_syncop_find_child_position (void *data);
+
+static int
+_loc_assign_gfid_path (loc_t *loc)
+{
+ int ret = -1;
+ char gfid_path[64] = {0};
+
+ if (loc->inode && !uuid_is_null (loc->inode->gfid)) {
+ ret = inode_path (loc->inode, NULL, (char**)&loc->path);
+ } else if (!uuid_is_null (loc->gfid)) {
+ snprintf (gfid_path, sizeof (gfid_path), "<gfid:%s>",
+ uuid_utoa (loc->gfid));
+ loc->path = gf_strdup (gfid_path);
+ if (loc->path)
+ ret = 0;
+ }
+ return ret;
+}
+
+void
+_destroy_crawl_event_data (void *data)
+{
+ shd_crawl_event_t *crawl_event = NULL;
+
+ if (!data)
+ goto out;
+
+ crawl_event = (shd_crawl_event_t *)data;
+ GF_FREE (crawl_event->start_time_str);
+ GF_FREE (crawl_event->end_time_str);
+
+out:
+ return;
+}
+
+void
+_destroy_shd_event_data (void *data)
+{
+ shd_event_t *event = NULL;
+ if (!data)
+ goto out;
+ event = (shd_event_t*)data;
+ GF_FREE (event->path);
+out:
+ return;
+}
+void
+shd_cleanup_event (void *event)
+{
+ shd_event_t *shd_event = event;
+
+ if (!shd_event)
+ goto out;
+ GF_FREE (shd_event->path);
+ GF_FREE (shd_event);
+out:
+ return;
+}
+
+int
+afr_get_local_child (afr_self_heald_t *shd, unsigned int child_count)
+{
+ int i = 0;
+ int ret = -1;
+ for (i = 0; i < child_count; i++) {
+ if (shd->pos[i] == AFR_POS_LOCAL) {
+ ret = i;
+ break;
+ }
+ }
+ return ret;
+}
+
+static int
+_build_index_loc (xlator_t *this, loc_t *loc, char *name, loc_t *parent)
+{
+ int ret = 0;
+
+ uuid_copy (loc->pargfid, parent->inode->gfid);
+ loc->path = "";
+ loc->name = name;
+ loc->parent = inode_ref (parent->inode);
+ if (!loc->parent) {
+ loc->path = NULL;
+ loc_wipe (loc);
+ ret = -1;
+ }
+ return ret;
+}
+
+int
+_add_crawl_stats_to_dict (xlator_t *this, dict_t *output, int child,
+ shd_crawl_event_t *shd_event, struct timeval *tv)
+{
+ int ret = 0;
+ uint64_t count = 0;
+ char key[256] = {0};
+ int xl_id = 0;
+ uint64_t healed_count = 0;
+ uint64_t split_brain_count = 0;
+ uint64_t heal_failed_count = 0;
+ char *start_time_str = NULL;
+ char *end_time_str = NULL;
+ char *crawl_type = NULL;
+ int progress = -1;
+
+ healed_count = shd_event->healed_count;
+ split_brain_count = shd_event->split_brain_count;
+ heal_failed_count = shd_event->heal_failed_count;
+ start_time_str = shd_event->start_time_str;
+ end_time_str = shd_event->end_time_str;
+ crawl_type = shd_event->crawl_type;
+
+ if (!start_time_str) {
+ ret = -1;
+ goto out;
+ }
+
+
+ ret = dict_get_int32 (output, this->name, &xl_id);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "xl does not have id");
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "statistics-%d-%d-count", xl_id, child);
+ ret = dict_get_uint64 (output, key, &count);
+
+ snprintf (key, sizeof (key), "statistics_healed_cnt-%d-%d-%"PRIu64,
+ xl_id, child, count);
+ ret = dict_set_uint64(output, key, healed_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_"
+ "healed_count to outout");
+ goto out;
+ }
+ snprintf (key, sizeof (key), "statistics_sb_cnt-%d-%d-%"PRIu64,
+ xl_id, child, count);
+ ret = dict_set_uint64 (output, key, split_brain_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_"
+ "split_brain_count to outout");
+ goto out;
+ }
+ snprintf (key, sizeof (key), "statistics_crawl_type-%d-%d-%"PRIu64,
+ xl_id, child, count);
+ ret = dict_set_dynstr (output, key, gf_strdup (crawl_type));
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_"
+ "crawl_type to output");
+ goto out;
+ }
+ snprintf (key, sizeof (key), "statistics_heal_failed_cnt-%d-%d-%"PRIu64,
+ xl_id, child, count);
+ ret = dict_set_uint64 (output, key, heal_failed_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_"
+ "healed_failed_count to outout");
+ goto out;
+ }
+ snprintf (key, sizeof (key), "statistics_strt_time-%d-%d-%"PRIu64,
+ xl_id, child, count);
+ ret = dict_set_dynstr (output, key, gf_strdup(start_time_str));
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_"
+ "crawl_start_time to outout");
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "statistics_end_time-%d-%d-%"PRIu64,
+ xl_id, child, count);
+
+ if (!end_time_str)
+ end_time_str = "Could not determine the end time";
+ ret = dict_set_dynstr (output, key, gf_strdup(end_time_str));
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_"
+ "crawl_end_time to outout");
+ goto out;
+ }
+ snprintf (key, sizeof (key), "statistics_inprogress-%d-%d-%"PRIu64,
+ xl_id, child, count);
+
+ if (shd_event->crawl_inprogress == _gf_true)
+ progress = 1;
+ else
+ progress = 0;
+
+ ret = dict_set_int32 (output, key, progress);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_"
+ "inprogress to outout");
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "statistics-%d-%d-count",xl_id, child);
+ ret = dict_set_uint64 (output, key, count + 1);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not increment the "
+ "counter.");
+ goto out;
+ }
+out:
+ return ret;
+}
+
+int
+_add_path_to_dict (xlator_t *this, dict_t *output, int child, char *path,
+ struct timeval *tv, gf_boolean_t dyn)
+{
+ //subkey not used for now
+ int ret = -1;
+ uint64_t count = 0;
+ char key[256] = {0};
+ int xl_id = 0;
+
+ ret = dict_get_int32 (output, this->name, &xl_id);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "xl does not have id");
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "%d-%d-count", xl_id, child);
+ ret = dict_get_uint64 (output, key, &count);
+
+ snprintf (key, sizeof (key), "%d-%d-%"PRIu64, xl_id, child, count);
+ if (dyn)
+ ret = dict_set_dynstr (output, key, path);
+ else
+ ret = dict_set_str (output, key, path);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s: Could not add to output",
+ path);
+ goto out;
+ }
+
+ if (!tv)
+ goto inc_count;
+ snprintf (key, sizeof (key), "%d-%d-%"PRIu64"-time", xl_id,
+ child, count);
+ ret = dict_set_uint32 (output, key, tv->tv_sec);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s: Could not set time",
+ path);
+ goto out;
+ }
+
+inc_count:
+ snprintf (key, sizeof (key), "%d-%d-count", xl_id, child);
+ ret = dict_set_uint64 (output, key, count + 1);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not increment count");
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+_get_path_from_gfid_loc (xlator_t *this, xlator_t *readdir_xl, loc_t *child,
+ char **fpath, gf_boolean_t *missing)
+{
+ dict_t *xattr = NULL;
+ char *path = NULL;
+ int ret = -1;
+
+ ret = syncop_getxattr (readdir_xl, child, &xattr, GFID_TO_PATH_KEY);
+ if (ret < 0) {
+ if ((errno == ENOENT) && missing)
+ *missing = _gf_true;
+ goto out;
+ }
+ ret = dict_get_str (xattr, GFID_TO_PATH_KEY, &path);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get path for "
+ "gfid %s", uuid_utoa (child->gfid));
+ goto out;
+ }
+ path = gf_strdup (path);
+ if (!path) {
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+out:
+ if (!ret)
+ *fpath = path;
+ if (xattr)
+ dict_unref (xattr);
+ return ret;
+}
+
+int
+_add_event_to_dict (circular_buffer_t *cb, void *data)
+{
+ int ret = 0;
+ shd_dump_t *dump_data = NULL;
+ shd_event_t *shd_event = NULL;
+
+ dump_data = data;
+ shd_event = cb->data;
+ if (shd_event->child != dump_data->child)
+ goto out;
+ ret = _add_path_to_dict (dump_data->this, dump_data->dict,
+ dump_data->child, shd_event->path, &cb->tv,
+ _gf_false);
+out:
+ return ret;
+}
+
+int
+_add_crawl_event_statistics_to_dict (circular_buffer_t *cb, void *data)
+{
+ int ret = 0;
+ shd_dump_t *dump_data = NULL;
+ shd_crawl_event_t *shd_event = NULL;
+
+ dump_data = data;
+ shd_event = cb->data;
+ ret = _add_crawl_stats_to_dict (dump_data->this, dump_data->dict,
+ dump_data->child, shd_event, &cb->tv);
+ return ret;
+}
+
+int
+_add_eh_to_dict (xlator_t *this, eh_t *eh, dict_t *dict, int child)
+{
+ shd_dump_t dump_data = {0};
+
+ dump_data.this = this;
+ dump_data.dict = dict;
+ dump_data.child = child;
+ eh_dump (eh, &dump_data, _add_event_to_dict);
+ return 0;
+}
+
+
+int
+_add_statistics_to_dict (xlator_t *this, dict_t *dict, int child)
+{
+ shd_dump_t dump_data = {0};
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+
+ priv = this->private;
+ shd = &priv->shd;
+
+ dump_data.this = this;
+ dump_data.dict = dict;
+ dump_data.child = child;
+ eh_dump (shd->statistics[child], &dump_data,
+ _add_crawl_event_statistics_to_dict);
+ return 0;
+
+}
+
+void
+_remove_stale_index (xlator_t *this, xlator_t *readdir_xl,
+ loc_t *parent, char *fname)
+{
+ int ret = 0;
+ loc_t index_loc = {0};
+
+ ret = _build_index_loc (this, &index_loc, fname, parent);
+ if (ret)
+ goto out;
+ gf_log (this->name, GF_LOG_DEBUG, "Removing stale index "
+ "for %s on %s", index_loc.name, readdir_xl->name);
+ ret = syncop_unlink (readdir_xl, &index_loc);
+ if(ret && (errno != ENOENT)) {
+ gf_log(this->name, GF_LOG_ERROR, "%s: Failed to remove index "
+ "on %s - %s",index_loc.name, readdir_xl->name,
+ strerror (errno));
+ }
+ index_loc.path = NULL;
+ loc_wipe (&index_loc);
+out:
+ return;
+}
+
+int
+_count_hard_links_under_base_indices_dir (xlator_t *this,
+ afr_crawl_data_t *crawl_data,
+ gf_dirent_t *entry, loc_t *childloc,
+ loc_t *parentloc, struct iatt *iattr)
+{
+ xlator_t *readdir_xl = crawl_data->readdir_xl;
+ struct iatt parent = {0};
+ int ret = 0;
+ dict_t *output = NULL;
+ int xl_id = 0;
+ char key[256] = {0};
+ int child = -1;
+ uint64_t hardlinks = 0;
+
+ output = crawl_data->op_data;
+ child = crawl_data->child;
+
+ ret = syncop_lookup (readdir_xl, childloc, NULL, iattr, NULL, &parent);
+ if (ret)
+ goto out;
+
+ ret = dict_get_int32 (output, this->name, &xl_id);
+ if (ret)
+ goto out;
+
+ snprintf (key, sizeof (key), "%d-%d-hardlinks", xl_id, child);
+ ret = dict_get_uint64 (output, key, &hardlinks);
+
+ /*Removing the count of base_entry under indices/base_indicies and
+ * entry under indices/xattrop */
+ hardlinks = hardlinks + iattr->ia_nlink - 2;
+ ret = dict_set_uint64 (output, key, hardlinks);
+ if (ret)
+ goto out;
+
+out:
+ return ret;
+}
+
+int
+_add_summary_to_dict (xlator_t *this, afr_crawl_data_t *crawl_data,
+ gf_dirent_t *entry,
+ loc_t *childloc, loc_t *parentloc, struct iatt *iattr)
+{
+ dict_t *output = NULL;
+ xlator_t *readdir_xl = NULL;
+ int ret = -1;
+ char *path = NULL;
+ gf_boolean_t missing = _gf_false;
+ char gfid_str[64] = {0};
+
+ if (uuid_is_null (childloc->gfid))
+ goto out;
+
+ output = crawl_data->op_data;
+ readdir_xl = crawl_data->readdir_xl;
+
+ ret = _get_path_from_gfid_loc (this, readdir_xl, childloc, &path,
+ &missing);
+ if (ret == 0) {
+ ret = _add_path_to_dict (this, output, crawl_data->child, path,
+ NULL, _gf_true);
+ } else if (missing) {
+ _remove_stale_index (this, readdir_xl, parentloc,
+ uuid_utoa_r (childloc->gfid, gfid_str));
+ }
+
+out:
+ if (ret && path)
+ GF_FREE (path);
+ return ret;
+}
+
+void
+_crawl_post_sh_action (xlator_t *this, loc_t *parent, loc_t *child,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr_rsp,
+ afr_crawl_data_t *crawl_data)
+{
+ int ret = 0;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ eh_t *eh = NULL;
+ char *path = NULL;
+ char gfid_str[64] = {0};
+ shd_event_t *event = NULL;
+ int32_t sh_failed = 0;
+ gf_boolean_t split_brain = 0;
+ int32_t actual_sh_done = 0;
+ shd_crawl_event_t **shd_crawl_event = NULL;
+
+ priv = this->private;
+ shd = &priv->shd;
+ if (crawl_data->crawl == INDEX) {
+ if ((op_ret < 0) && (op_errno == ENOENT)) {
+ _remove_stale_index (this, crawl_data->readdir_xl,
+ parent, uuid_utoa_r (child->gfid,
+ gfid_str));
+ goto out;
+ }
+ ret = _get_path_from_gfid_loc (this, crawl_data->readdir_xl,
+ child, &path, NULL);
+ if (ret)
+ goto out;
+ } else {
+ path = gf_strdup (child->path);
+ if (!path) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if (xattr_rsp) {
+ ret = dict_get_int32 (xattr_rsp, "sh-failed", &sh_failed);
+ ret = dict_get_int32 (xattr_rsp, "actual-sh-done", &actual_sh_done);
+ }
+
+ shd_crawl_event = (shd_crawl_event_t**)(shd->crawl_events);
+
+ split_brain = afr_is_split_brain (this, child->inode);
+ if ((op_ret < 0 && op_errno == EIO) || split_brain) {
+ eh = shd->split_brain;
+ shd_crawl_event[crawl_data->child]->split_brain_count += 1;
+ } else if ((op_ret < 0) || sh_failed) {
+ eh = shd->heal_failed;
+ shd_crawl_event[crawl_data->child]->heal_failed_count += 1;
+ } else if (actual_sh_done == 1) {
+ eh = shd->healed;
+ shd_crawl_event[crawl_data->child]->healed_count += 1;
+ }
+ ret = -1;
+
+ if (eh != NULL) {
+ event = GF_CALLOC (1, sizeof (*event), gf_afr_mt_shd_event_t);
+ if (!event)
+ goto out;
+ event->child = crawl_data->child;
+ event->path = path;
+
+ ret = eh_save_history (eh, event);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "%s:Failed to save "
+ "to event history, (%d, %s)", path, op_ret,
+ strerror (op_errno));
+
+ goto out;
+ }
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG, "%s:Self heal already done ",
+ path);
+
+ }
+ ret = 0;
+out:
+ if (ret && path)
+ GF_FREE (path);
+ return;
+}
+
+int
+_link_inode_update_loc (xlator_t *this, loc_t *loc, struct iatt *iattr)
+{
+ inode_t *link_inode = NULL;
+ int ret = -1;
+
+ link_inode = inode_link (loc->inode, NULL, NULL, iattr);
+ if (link_inode == NULL) {
+ gf_log (this->name, GF_LOG_ERROR, "inode link failed "
+ "on the inode (%s)", uuid_utoa (iattr->ia_gfid));
+ goto out;
+ }
+ inode_unref (loc->inode);
+ loc->inode = link_inode;
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+_self_heal_entry (xlator_t *this, afr_crawl_data_t *crawl_data, gf_dirent_t *entry,
+ loc_t *child, loc_t *parent, struct iatt *iattr)
+{
+ struct iatt parentbuf = {0};
+ int ret = 0;
+ dict_t *xattr_rsp = NULL;
+ dict_t *xattr_req = NULL;
+
+ xattr_req = dict_new ();
+ if (!xattr_req) {
+ errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_int32 (xattr_req, "allow-sh-for-running-transaction", 1);
+
+ gf_log (this->name, GF_LOG_DEBUG, "lookup %s", child->path);
+
+ ret = syncop_lookup (this, child, xattr_req,
+ iattr, &xattr_rsp, &parentbuf);
+ _crawl_post_sh_action (this, parent, child, ret, errno, xattr_rsp,
+ crawl_data);
+ if (xattr_rsp)
+ dict_unref (xattr_rsp);
+ if (ret == 0)
+ ret = _link_inode_update_loc (this, child, iattr);
+
+out:
+ if (xattr_req)
+ dict_unref(xattr_req);
+ return ret;
+}
+
+static int
+afr_crawl_done (int ret, call_frame_t *sync_frame, void *data)
+{
+ GF_FREE (data);
+ STACK_DESTROY (sync_frame->root);
+ return 0;
+}
+
+void
+_do_self_heal_on_subvol (xlator_t *this, int child, afr_crawl_type_t crawl)
+{
+ afr_start_crawl (this, child, crawl, _self_heal_entry,
+ NULL, _gf_true, STOP_CRAWL_ON_SINGLE_SUBVOL,
+ afr_crawl_done);
+}
+
+gf_boolean_t
+_crawl_proceed (xlator_t *this, int child, int crawl_flags, char **reason)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ gf_boolean_t proceed = _gf_false;
+ char *msg = NULL;
+
+ priv = this->private;
+ shd = &priv->shd;
+ if (!shd->enabled) {
+ msg = "Self-heal daemon is not enabled";
+ gf_log (this->name, GF_LOG_DEBUG, "%s", msg);
+ goto out;
+ }
+ if (!priv->child_up[child]) {
+ gf_log (this->name, GF_LOG_DEBUG, "Stopping crawl for %s , "
+ "subvol went down", priv->children[child]->name);
+ msg = "Brick is Not connected";
+ goto out;
+ }
+
+ if (crawl_flags & STOP_CRAWL_ON_SINGLE_SUBVOL) {
+ if (afr_up_children_count (priv->child_up,
+ priv->child_count) < 2) {
+ gf_log (this->name, GF_LOG_DEBUG, "Stopping crawl as "
+ "< 2 children are up");
+ msg = "< 2 bricks in replica are running";
+ goto out;
+ }
+ }
+ proceed = _gf_true;
+out:
+ if (reason)
+ *reason = msg;
+ return proceed;
+}
+
+int
+_do_crawl_op_on_local_subvols (xlator_t *this, afr_crawl_type_t crawl,
+ shd_crawl_op op, dict_t *output)
+{
+ afr_private_t *priv = NULL;
+ char *status = NULL;
+ char *subkey = NULL;
+ char key[256] = {0};
+ shd_pos_t pos_data = {0};
+ int op_ret = -1;
+ int xl_id = -1;
+ int i = 0;
+ int ret = 0;
+ int crawl_flags = 0;
+
+ priv = this->private;
+ if (op == HEAL)
+ crawl_flags |= STOP_CRAWL_ON_SINGLE_SUBVOL;
+
+ if (output) {
+ ret = dict_get_int32 (output, this->name, &xl_id);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Invalid input, "
+ "translator-id is not available");
+ goto out;
+ }
+ }
+ pos_data.this = this;
+ subkey = "status";
+ for (i = 0; i < priv->child_count; i++) {
+ if (_crawl_proceed (this, i, crawl_flags, &status)) {
+ pos_data.child = i;
+ /*
+ * We're already in a synctask in this case, so we
+ * don't need to defer through a second (and in fact
+ * that can cause deadlock). Just call straight
+ * through instead.
+ */
+ ret = afr_find_child_position(pos_data.this,
+ pos_data.child,
+ &pos_data.pos);
+ if (ret) {
+ status = "Not able to find brick location";
+ } else if (pos_data.pos == AFR_POS_REMOTE) {
+ status = "brick is remote";
+ } else {
+ op_ret = 0;
+ if (op == HEAL) {
+ status = "Started self-heal";
+ _do_self_heal_on_subvol (this, i,
+ crawl);
+ } else if (output && (op == INFO)) {
+ status = "";
+ afr_start_crawl (this, i, INDEX,
+ _add_summary_to_dict,
+ output, _gf_false, 0,
+ NULL);
+ } else if (output &&
+ (op == STATISTICS_TO_BE_HEALED)) {
+ status = "";
+ afr_start_crawl (this, i,
+ INDEX_TO_BE_HEALED,
+ _count_hard_links_under_base_indices_dir,
+ output, _gf_false,
+ 0, NULL);
+ }
+ }
+ if (output) {
+ snprintf (key, sizeof (key), "%d-%d-%s", xl_id,
+ i, subkey);
+ ret = dict_set_str (output, key, status);
+ }
+ if (!op_ret && (crawl == FULL))
+ break;
+ }
+ if (output) {
+ snprintf (key, sizeof (key), "%d-%d-%s", xl_id, i,
+ subkey);
+ ret = dict_set_str (output, key, status);
+ }
+ }
+out:
+ return op_ret;
+}
+
+int
+_do_self_heal_on_local_subvols (xlator_t *this, afr_crawl_type_t crawl,
+ dict_t *output)
+{
+ return _do_crawl_op_on_local_subvols (this, crawl, HEAL, output);
+}
+
+int
+_get_index_summary_on_local_subvols (xlator_t *this, dict_t *output)
+{
+ return _do_crawl_op_on_local_subvols (this, INDEX, INFO, output);
+}
+
+void
+afr_fill_completed_crawl_statistics_to_dict (xlator_t *this, dict_t *dict)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ int i = 0;
+ priv = this->private;
+ shd= &priv->shd;
+ for (i = 0; i < priv->child_count; i++) {
+ if (shd->pos[i] != AFR_POS_LOCAL)
+ continue;
+ _add_statistics_to_dict (this, dict, i);
+ }
+
+ return ;
+}
+
+static void
+reset_crawl_event (shd_crawl_event_t *crawl_event)
+{
+ crawl_event->healed_count = 0;
+ crawl_event->split_brain_count = 0;
+ crawl_event->heal_failed_count = 0;
+ GF_FREE (crawl_event->start_time_str);
+ crawl_event->start_time_str = NULL;
+ crawl_event->end_time_str = NULL;
+ crawl_event->crawl_type = NULL;
+ crawl_event->crawl_inprogress = _gf_false;
+ return;
+}
+
+static void
+afr_copy_crawl_event_struct (shd_crawl_event_t *src, shd_crawl_event_t *dst)
+{
+ dst->healed_count = src->healed_count;
+ dst->split_brain_count = src->split_brain_count;
+ dst->heal_failed_count = src->heal_failed_count;
+ dst->start_time_str = gf_strdup (src->start_time_str);
+ dst->end_time_str = "Crawl is already in progress";
+ dst->crawl_type = src->crawl_type;
+ dst->crawl_inprogress = _gf_true;
+ return;
+}
+
+static int
+afr_fill_crawl_statistics_of_running_crawl(xlator_t *this, dict_t *dict)
+{
+ shd_crawl_event_t *evnt = NULL;
+ int ret = 0;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ int i = 0;
+ priv = this->private;
+ shd = &priv->shd;
+
+ evnt = GF_CALLOC (1, sizeof (shd_crawl_event_t),
+ gf_afr_mt_shd_crawl_event_t);
+ if (!evnt) {
+ ret = -1;
+ goto out;
+ }
+ LOCK (&priv->lock);
+ {
+ for (i = 0; i < priv->child_count; i++) {
+ if (shd->pos[i] != AFR_POS_LOCAL)
+ continue;
+
+ reset_crawl_event (evnt);
+
+ if (!shd->crawl_events[i]) {
+ continue;
+ }
+
+ afr_copy_crawl_event_struct (shd->crawl_events[i],
+ evnt);
+ _add_crawl_stats_to_dict (this, dict, i, evnt, NULL);
+
+ }
+ }
+ UNLOCK (&priv->lock);
+ reset_crawl_event (evnt);
+ GF_FREE (evnt);
+
+out:
+ return ret;
+}
+
+static int
+_add_local_subvols_crawl_statistics_to_dict (xlator_t *this, dict_t *dict)
+{
+ int ret = 0;
+ afr_fill_completed_crawl_statistics_to_dict (this, dict);
+ ret = afr_fill_crawl_statistics_of_running_crawl (this, dict);
+ return ret;
+}
+int
+_add_local_subvols_eh_to_dict (xlator_t *this, eh_t *eh, dict_t *dict)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ int i = 0;
+
+ priv = this->private;
+ shd = &priv->shd;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (shd->pos[i] != AFR_POS_LOCAL)
+ continue;
+ _add_eh_to_dict (this, eh, dict, i);
+ }
+ return 0;
+}
+
+int
+afr_xl_op (xlator_t *this, dict_t *input, dict_t *output)
+{
+ gf_xl_afr_op_t op = GF_AFR_OP_INVALID;
+ int ret = 0;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ int xl_id = 0;
+
+ priv = this->private;
+ shd = &priv->shd;
+
+ ret = dict_get_int32 (input, "xl-op", (int32_t*)&op);
+ if (ret)
+ goto out;
+ ret = dict_get_int32 (input, this->name, &xl_id);
+ if (ret)
+ goto out;
+ ret = dict_set_int32 (output, this->name, xl_id);
+ if (ret)
+ goto out;
+ switch (op) {
+ case GF_AFR_OP_HEAL_INDEX:
+ ret = _do_self_heal_on_local_subvols (this, INDEX, output);
+ break;
+ case GF_AFR_OP_HEAL_FULL:
+ ret = _do_self_heal_on_local_subvols (this, FULL, output);
+ break;
+ case GF_AFR_OP_INDEX_SUMMARY:
+ (void)_get_index_summary_on_local_subvols (this, output);
+ ret = 0;
+ break;
+ case GF_AFR_OP_HEALED_FILES:
+ ret = _add_local_subvols_eh_to_dict (this, shd->healed, output);
+ break;
+ case GF_AFR_OP_HEAL_FAILED_FILES:
+ ret = _add_local_subvols_eh_to_dict (this, shd->heal_failed,
+ output);
+ break;
+ case GF_AFR_OP_SPLIT_BRAIN_FILES:
+ ret = _add_local_subvols_eh_to_dict (this, shd->split_brain,
+ output);
+ break;
+ case GF_AFR_OP_STATISTICS:
+ ret = _add_local_subvols_crawl_statistics_to_dict (this, output);
+ break;
+ case GF_AFR_OP_STATISTICS_HEAL_COUNT:
+ case GF_AFR_OP_STATISTICS_HEAL_COUNT_PER_REPLICA:
+ ret = _do_crawl_op_on_local_subvols (this, INDEX_TO_BE_HEALED,
+ STATISTICS_TO_BE_HEALED,
+ output);
+ break;
+ default:
+ gf_log (this->name, GF_LOG_ERROR, "Unknown set op %d", op);
+ break;
+ }
+out:
+ dict_del (output, this->name);
+ return ret;
+}
+
+void
+afr_poll_self_heal (void *data)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ struct timespec timeout = {0};
+ xlator_t *this = NULL;
+ long child = (long)data;
+ gf_timer_t *old_timer = NULL;
+ gf_timer_t *new_timer = NULL;
+ shd_pos_t pos_data = {0};
+ int ret = 0;
+
+ this = THIS;
+ priv = this->private;
+ shd = &priv->shd;
+
+ if (shd->pos[child] == AFR_POS_UNKNOWN) {
+ pos_data.this = this;
+ pos_data.child = child;
+ ret = synctask_new (this->ctx->env,
+ afr_syncop_find_child_position,
+ NULL, NULL, &pos_data);
+ if (!ret)
+ shd->pos[child] = pos_data.pos;
+ }
+ if (shd->enabled && (shd->pos[child] == AFR_POS_LOCAL))
+ _do_self_heal_on_subvol (this, child, INDEX);
+ timeout.tv_sec = shd->timeout;
+ timeout.tv_nsec = 0;
+ //notify and previous timer should be synchronized.
+ LOCK (&priv->lock);
+ {
+ old_timer = shd->timer[child];
+ if (shd->pos[child] == AFR_POS_REMOTE)
+ goto unlock;
+ shd->timer[child] = gf_timer_call_after (this->ctx, timeout,
+ afr_poll_self_heal,
+ data);
+ new_timer = shd->timer[child];
+ }
+unlock:
+ UNLOCK (&priv->lock);
+
+ if (old_timer)
+ gf_timer_call_cancel (this->ctx, old_timer);
+ if (!new_timer && (shd->pos[child] != AFR_POS_REMOTE)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Could not create self-heal polling timer for %s",
+ priv->children[child]->name);
+ }
+ return;
+}
+
+static int
+afr_handle_child_up (int ret, call_frame_t *sync_frame, void *data)
+{
+ afr_self_heald_t *shd = NULL;
+ shd_pos_t *pos_data = data;
+ afr_private_t *priv = NULL;
+
+ if (ret)
+ goto out;
+
+ priv = pos_data->this->private;
+ shd = &priv->shd;
+ shd->pos[pos_data->child] = pos_data->pos;
+ if (pos_data->pos != AFR_POS_REMOTE)
+ afr_poll_self_heal ((void*)(long)pos_data->child);
+ _do_self_heal_on_local_subvols (THIS, INDEX, NULL);
+out:
+ GF_FREE (data);
+ return 0;
+}
+
+void
+afr_proactive_self_heal (void *data)
+{
+ xlator_t *this = NULL;
+ long child = (long)data;
+ shd_pos_t *pos_data = NULL;
+ int ret = 0;
+
+ this = THIS;
+
+ //Position of brick could have changed and it could be local now.
+ //Compute the position again
+ pos_data = GF_CALLOC (1, sizeof (*pos_data), gf_afr_mt_pos_data_t);
+ if (!pos_data)
+ goto out;
+ pos_data->this = this;
+ pos_data->child = child;
+ ret = synctask_new (this->ctx->env, afr_syncop_find_child_position,
+ afr_handle_child_up, NULL, pos_data);
+ if (ret)
+ goto out;
+out:
+ return;
+}
+
+static int
+get_pathinfo_host (char *pathinfo, char *hostname, size_t size)
+{
+ char *start = NULL;
+ char *end = NULL;
+ int ret = -1;
+ int i = 0;
+
+ if (!pathinfo)
+ goto out;
+
+ start = strchr (pathinfo, ':');
+ if (!start)
+ goto out;
+ end = strrchr (pathinfo, ':');
+ if (start == end)
+ goto out;
+
+ memset (hostname, 0, size);
+ i = 0;
+ while (++start != end)
+ hostname[i++] = *start;
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+afr_local_pathinfo (char *pathinfo, gf_boolean_t *local)
+{
+ int ret = 0;
+ char pathinfohost[1024] = {0};
+ char localhost[1024] = {0};
+ xlator_t *this = THIS;
+
+ *local = _gf_false;
+ ret = get_pathinfo_host (pathinfo, pathinfohost, sizeof (pathinfohost));
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Invalid pathinfo: %s",
+ pathinfo);
+ goto out;
+ }
+
+ ret = gethostname (localhost, sizeof (localhost));
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "gethostname() failed, "
+ "reason: %s", strerror (errno));
+ goto out;
+ }
+
+ if (!strcmp (localhost, pathinfohost))
+ *local = _gf_true;
+out:
+ return ret;
+}
+
+int
+afr_crawl_build_start_loc (xlator_t *this, afr_crawl_data_t *crawl_data,
+ loc_t *dirloc)
+{
+ afr_private_t *priv = NULL;
+ dict_t *xattr = NULL;
+ void *index_gfid = NULL;
+ void *base_indices_holder_vgfid = NULL;
+ loc_t rootloc = {0};
+ struct iatt iattr = {0};
+ struct iatt parent = {0};
+ int ret = 0;
+ xlator_t *readdir_xl = crawl_data->readdir_xl;
+
+ priv = this->private;
+ if (crawl_data->crawl == FULL) {
+ afr_build_root_loc (this, dirloc);
+ } else if (crawl_data->crawl == INDEX) {
+ afr_build_root_loc (this, &rootloc);
+ ret = syncop_getxattr (readdir_xl, &rootloc, &xattr,
+ GF_XATTROP_INDEX_GFID);
+ if (ret < 0)
+ goto out;
+ ret = dict_get_ptr (xattr, GF_XATTROP_INDEX_GFID, &index_gfid);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to get index "
+ "dir gfid on %s", readdir_xl->name);
+ goto out;
+ }
+ if (!index_gfid) {
+ gf_log (this->name, GF_LOG_ERROR, "index gfid empty "
+ "on %s", readdir_xl->name);
+ ret = -1;
+ goto out;
+ }
+ uuid_copy (dirloc->gfid, index_gfid);
+ dirloc->path = "";
+ dirloc->inode = inode_new (priv->root_inode->table);
+ ret = syncop_lookup (readdir_xl, dirloc, NULL,
+ &iattr, NULL, &parent);
+ if (ret < 0) {
+ if (errno != ENOENT) {
+ gf_log (this->name, GF_LOG_ERROR, "lookup "
+ "failed on index dir on %s - (%s)",
+ readdir_xl->name, strerror (errno));
+ }
+ goto out;
+ }
+ ret = _link_inode_update_loc (this, dirloc, &iattr);
+ if (ret)
+ goto out;
+ } else if (crawl_data->crawl == INDEX_TO_BE_HEALED) {
+ afr_build_root_loc (this, &rootloc);
+ ret = syncop_getxattr (readdir_xl, &rootloc, &xattr,
+ GF_BASE_INDICES_HOLDER_GFID);
+ if (ret < 0)
+ goto out;
+ ret = dict_get_ptr (xattr, GF_BASE_INDICES_HOLDER_GFID,
+ &base_indices_holder_vgfid);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "index gfid empty "
+ "on %s", readdir_xl->name);
+ ret = -1;
+ goto out;
+ }
+ if (!base_indices_holder_vgfid) {
+ gf_log (this->name, GF_LOG_ERROR, "Base indices holder"
+ "virtual gfid is null on %s", readdir_xl->name);
+ ret = -1;
+ goto out;
+ }
+ uuid_copy (dirloc->gfid, base_indices_holder_vgfid);
+ dirloc->path = "";
+ dirloc->inode = inode_new (priv->root_inode->table);
+ ret = syncop_lookup (readdir_xl, dirloc, NULL, &iattr, NULL,
+ &parent);
+ if (ret < 0) {
+ if (errno != ENOENT) {
+ gf_log (this->name, GF_LOG_ERROR, "lookup "
+ "failed for base_indices_holder dir"
+ " on %s - (%s)", readdir_xl->name,
+ strerror (errno));
+
+ } else {
+ gf_log (this->name, GF_LOG_ERROR, "base_indices"
+ "_holder is not yet created.");
+ }
+ goto out;
+ }
+ ret = _link_inode_update_loc (this, dirloc, &iattr);
+ if (ret)
+ goto out;
+ }
+ ret = 0;
+out:
+ if (xattr)
+ dict_unref (xattr);
+ loc_wipe (&rootloc);
+ return ret;
+}
+
+int
+afr_crawl_opendir (xlator_t *this, afr_crawl_data_t *crawl_data, fd_t **dirfd,
+ loc_t *dirloc)
+{
+ fd_t *fd = NULL;
+ int ret = 0;
+
+ if (crawl_data->crawl == FULL) {
+ fd = fd_create (dirloc->inode, crawl_data->pid);
+ if (!fd) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to create fd for %s", dirloc->path);
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_opendir (crawl_data->readdir_xl, dirloc, fd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "opendir failed on %s", dirloc->path);
+ goto out;
+ }
+ } else {
+ fd = fd_anonymous (dirloc->inode);
+ }
+ ret = 0;
+out:
+ if (!ret)
+ *dirfd = fd;
+ return ret;
+}
+
+xlator_t*
+afr_crawl_readdir_xl_get (xlator_t *this, afr_crawl_data_t *crawl_data)
+{
+ afr_private_t *priv = this->private;
+
+ if (crawl_data->crawl == FULL) {
+ return this;
+ } else {
+ return priv->children[crawl_data->child];
+ }
+ return NULL;
+}
+
+int
+afr_crawl_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent,
+ gf_dirent_t *entry, afr_crawl_data_t *crawl_data)
+{
+ int ret = -1;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ if (crawl_data->crawl == FULL) {
+ ret = afr_build_child_loc (this, child, parent, entry->d_name);
+ } else if (crawl_data->crawl == INDEX_TO_BE_HEALED) {
+ ret = _build_index_loc (this, child, entry->d_name, parent);
+ if (ret)
+ goto out;
+ child->inode = inode_new (priv->root_inode->table);
+ if (!child->inode) {
+ ret = -1;
+ goto out;
+ }
+ child->path = NULL;
+ } else {
+ child->inode = inode_new (priv->root_inode->table);
+ if (!child->inode)
+ goto out;
+ uuid_parse (entry->d_name, child->gfid);
+ ret = _loc_assign_gfid_path (child);
+ }
+out:
+ return ret;
+}
+
+static int
+_process_entries (xlator_t *this, loc_t *parentloc, gf_dirent_t *entries,
+ off_t *offset, afr_crawl_data_t *crawl_data)
+{
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
+ int ret = 0;
+ loc_t entry_loc = {0};
+ fd_t *fd = NULL;
+ struct iatt iattr = {0};
+
+ list_for_each_entry_safe (entry, tmp, &entries->list, list) {
+ if (!_crawl_proceed (this, crawl_data->child,
+ crawl_data->crawl_flags, NULL)) {
+ ret = -1;
+ goto out;
+ }
+ *offset = entry->d_off;
+ if (IS_ENTRY_CWD (entry->d_name) ||
+ IS_ENTRY_PARENT (entry->d_name))
+ continue;
+ if ((crawl_data->crawl == FULL) &&
+ uuid_is_null (entry->d_stat.ia_gfid)) {
+ gf_log (this->name, GF_LOG_WARNING, "%s/%s: No "
+ "gfid present skipping",
+ parentloc->path, entry->d_name);
+ continue;
+ }
+
+ loc_wipe (&entry_loc);
+ ret = afr_crawl_build_child_loc (this, &entry_loc, parentloc,
+ entry, crawl_data);
+ if (ret)
+ goto out;
+
+ ret = crawl_data->process_entry (this, crawl_data, entry,
+ &entry_loc, parentloc, &iattr);
+
+ if (crawl_data->crawl == INDEX_TO_BE_HEALED && ret) {
+ goto out;
+ } else if (ret) {
+ continue;
+ }
+
+ if ((crawl_data->crawl == INDEX) ||
+ (crawl_data->crawl == INDEX_TO_BE_HEALED))
+ continue;
+
+ if (!IA_ISDIR (iattr.ia_type))
+ continue;
+ fd = NULL;
+ ret = afr_crawl_opendir (this, crawl_data, &fd, &entry_loc);