summaryrefslogtreecommitdiffstats
path: root/THANKS
diff options
context:
space:
mode:
authorShehjar Tikoo <shehjart@zresearch.com>2009-05-05 16:04:57 +0530
committerAnand V. Avati <avati@amp.gluster.com>2009-05-05 17:51:24 +0530
commite9800f8915257f8e3fc94dea8257e20a5561dfd7 (patch)
tree194477fe8841ebfc82d31252a555e823a3296ab1 /THANKS
parentfe94d6e4992d7b7f2bcee73dfd2b0af76e1f2611 (diff)
libglusterfsclient: Add 0-byte size/count guards
Signed-off-by: Anand V. Avati <avati@amp.gluster.com>
Diffstat (limited to 'THANKS')
0 files changed, 0 insertions, 0 deletions
lusterfs-afrv1.git/diff/xlators/bindings/python/src/testxlator.py?id2=0413d46873d082bbd3ee41aced3db01e444422d9'>xlators/bindings/python/src/testxlator.py56
-rw-r--r--xlators/cluster/afr/src/Makefile.am31
-rw-r--r--xlators/cluster/afr/src/afr-common.c4400
-rw-r--r--xlators/cluster/afr/src/afr-dir-read.c831
-rw-r--r--xlators/cluster/afr/src/afr-dir-read.h31
-rw-r--r--xlators/cluster/afr/src/afr-dir-write.c2163
-rw-r--r--xlators/cluster/afr/src/afr-dir-write.h43
-rw-r--r--xlators/cluster/afr/src/afr-inode-read.c1977
-rw-r--r--xlators/cluster/afr/src/afr-inode-read.h39
-rw-r--r--xlators/cluster/afr/src/afr-inode-write.c2284
-rw-r--r--xlators/cluster/afr/src/afr-inode-write.h68
-rw-r--r--xlators/cluster/afr/src/afr-lk-common.c1712
-rw-r--r--xlators/cluster/afr/src/afr-mem-types.h33
-rw-r--r--xlators/cluster/afr/src/afr-open.c438
-rw-r--r--xlators/cluster/afr/src/afr-read-txn.c239
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-algorithm.c743
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-algorithm.h42
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c2818
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.h133
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c1775
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c2600
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-metadata.c834
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-name.c457
-rw-r--r--xlators/cluster/afr/src/afr-self-heal.h181
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.c1506
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.h95
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c1869
-rw-r--r--xlators/cluster/afr/src/afr-transaction.h54
-rw-r--r--xlators/cluster/afr/src/afr.c401
-rw-r--r--xlators/cluster/afr/src/afr.h999
-rw-r--r--xlators/cluster/afr/src/pump.c1047
-rw-r--r--xlators/cluster/afr/src/pump.h34
-rw-r--r--xlators/cluster/dht/src/Makefile.am13
-rw-r--r--xlators/cluster/dht/src/dht-common.c2146
-rw-r--r--xlators/cluster/dht/src/dht-common.h360
-rw-r--r--xlators/cluster/dht/src/dht-diskusage.c225
-rw-r--r--xlators/cluster/dht/src/dht-hashfn.c96
-rw-r--r--xlators/cluster/dht/src/dht-helper.c541
-rw-r--r--xlators/cluster/dht/src/dht-inode-read.c287
-rw-r--r--xlators/cluster/dht/src/dht-inode-write.c539
-rw-r--r--xlators/cluster/dht/src/dht-layout.c278
-rw-r--r--xlators/cluster/dht/src/dht-linkfile.c182
-rw-r--r--xlators/cluster/dht/src/dht-mem-types.h23
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c1367
-rw-r--r--xlators/cluster/dht/src/dht-rename.c283
-rw-r--r--xlators/cluster/dht/src/dht-selfheal.c588
-rw-r--r--xlators/cluster/dht/src/dht-shared.c781
-rw-r--r--xlators/cluster/dht/src/dht.c497
-rw-r--r--xlators/cluster/dht/src/nufa.c395
-rw-r--r--xlators/cluster/dht/src/switch.c312
-rw-r--r--xlators/cluster/dht/src/unittest/dht_layout_mock.c63
-rw-r--r--xlators/cluster/dht/src/unittest/dht_layout_unittest.c124
-rw-r--r--xlators/cluster/ha/src/Makefile.am7
-rw-r--r--xlators/cluster/ha/src/ha-helpers.c24
-rw-r--r--xlators/cluster/ha/src/ha-mem-types.h21
-rw-r--r--xlators/cluster/ha/src/ha.c32
-rw-r--r--xlators/cluster/ha/src/ha.h24
-rw-r--r--xlators/cluster/map/src/Makefile.am7
-rw-r--r--xlators/cluster/map/src/map-helper.c24
-rw-r--r--xlators/cluster/map/src/map-mem-types.h21
-rw-r--r--xlators/cluster/map/src/map.c27
-rw-r--r--xlators/cluster/map/src/map.h22
-rw-r--r--xlators/cluster/stripe/src/Makefile.am11
-rw-r--r--xlators/cluster/stripe/src/stripe-helpers.c677
-rw-r--r--xlators/cluster/stripe/src/stripe-mem-types.h28
-rw-r--r--xlators/cluster/stripe/src/stripe.c3093
-rw-r--r--xlators/cluster/stripe/src/stripe.h163
-rw-r--r--xlators/cluster/unify/src/Makefile.am16
-rw-r--r--xlators/cluster/unify/src/unify-mem-types.h41
-rw-r--r--xlators/cluster/unify/src/unify-self-heal.c1239
-rw-r--r--xlators/cluster/unify/src/unify.c4589
-rw-r--r--xlators/cluster/unify/src/unify.h146
-rw-r--r--xlators/debug/error-gen/src/Makefile.am7
-rw-r--r--xlators/debug/error-gen/src/error-gen-mem-types.h20
-rw-r--r--xlators/debug/error-gen/src/error-gen.c996
-rw-r--r--xlators/debug/error-gen/src/error-gen.h37
-rw-r--r--xlators/debug/io-stats/src/Makefile.am9
-rw-r--r--xlators/debug/io-stats/src/io-stats-mem-types.h21
-rw-r--r--xlators/debug/io-stats/src/io-stats.c918
-rw-r--r--xlators/debug/trace/src/Makefile.am8
-rw-r--r--xlators/debug/trace/src/trace-mem-types.h21
-rw-r--r--xlators/debug/trace/src/trace.c3078
-rw-r--r--xlators/debug/trace/src/trace.h98
-rw-r--r--xlators/encryption/Makefile.am2
-rw-r--r--xlators/encryption/crypt/Makefile.am (renamed from xlators/cluster/unify/Makefile.am)0
-rw-r--r--xlators/encryption/crypt/src/Makefile.am24
-rw-r--r--xlators/encryption/crypt/src/atom.c962
-rw-r--r--xlators/encryption/crypt/src/crypt-common.h141
-rw-r--r--xlators/encryption/crypt/src/crypt-mem-types.h44
-rw-r--r--xlators/encryption/crypt/src/crypt.c4522
-rw-r--r--xlators/encryption/crypt/src/crypt.h903
-rw-r--r--xlators/encryption/crypt/src/data.c769
-rw-r--r--xlators/encryption/crypt/src/keys.c302
-rw-r--r--xlators/encryption/crypt/src/metadata.c605
-rw-r--r--xlators/encryption/crypt/src/metadata.h74
-rw-r--r--xlators/encryption/rot-13/src/Makefile.am7
-rw-r--r--xlators/encryption/rot-13/src/rot-13.c71
-rw-r--r--xlators/encryption/rot-13/src/rot-13.h20
-rw-r--r--xlators/features/Makefile.am3
-rw-r--r--xlators/features/changelog/Makefile.am3
-rw-r--r--xlators/features/changelog/lib/Makefile.am (renamed from xlators/storage/bdb/Makefile.am)2
-rw-r--r--xlators/features/changelog/lib/examples/c/get-changes.c87
-rw-r--r--xlators/features/changelog/lib/examples/python/changes.py32
-rw-r--r--xlators/features/changelog/lib/examples/python/libgfchangelog.py64
-rw-r--r--xlators/features/changelog/lib/src/Makefile.am37
-rw-r--r--xlators/features/changelog/lib/src/changelog.h31
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-helpers.c180
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-helpers.h97
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-process.c618
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog.c516
-rw-r--r--xlators/features/changelog/src/Makefile.am19
-rw-r--r--xlators/features/changelog/src/changelog-encoders.c197
-rw-r--r--xlators/features/changelog/src/changelog-encoders.h48
-rw-r--r--xlators/features/changelog/src/changelog-helpers.c696
-rw-r--r--xlators/features/changelog/src/changelog-helpers.h405
-rw-r--r--xlators/features/changelog/src/changelog-mem-types.h29
-rw-r--r--xlators/features/changelog/src/changelog-misc.h101
-rw-r--r--xlators/features/changelog/src/changelog-notifier.c314
-rw-r--r--xlators/features/changelog/src/changelog-notifier.h19
-rw-r--r--xlators/features/changelog/src/changelog-rt.c72
-rw-r--r--xlators/features/changelog/src/changelog-rt.h33
-rw-r--r--xlators/features/changelog/src/changelog.c1568
-rw-r--r--xlators/features/compress/Makefile.am (renamed from xlators/protocol/legacy/lib/Makefile.am)2
-rw-r--r--xlators/features/compress/src/Makefile.am17
-rw-r--r--xlators/features/compress/src/cdc-helper.c547
-rw-r--r--xlators/features/compress/src/cdc-mem-types.h23
-rw-r--r--xlators/features/compress/src/cdc.c361
-rw-r--r--xlators/features/compress/src/cdc.h107
-rw-r--r--xlators/features/filter/src/Makefile.am7
-rw-r--r--xlators/features/filter/src/filter-mem-types.h20
-rw-r--r--xlators/features/filter/src/filter.c24
-rw-r--r--xlators/features/gfid-access/Makefile.am (renamed from xlators/bindings/python/Makefile.am)0
-rw-r--r--xlators/features/gfid-access/src/Makefile.am15
-rw-r--r--xlators/features/gfid-access/src/gfid-access-mem-types.h23
-rw-r--r--xlators/features/gfid-access/src/gfid-access.c1287
-rw-r--r--xlators/features/gfid-access/src/gfid-access.h134
-rw-r--r--xlators/features/glupy/Makefile.am3
-rw-r--r--xlators/features/glupy/doc/README.md44
-rw-r--r--xlators/features/glupy/doc/TESTING9
-rw-r--r--xlators/features/glupy/doc/test.vol10
-rw-r--r--xlators/features/glupy/examples/Makefile.am5
-rw-r--r--xlators/features/glupy/examples/debug-trace.py775
-rw-r--r--xlators/features/glupy/examples/helloworld.py19
-rw-r--r--xlators/features/glupy/examples/negative.py91
-rw-r--r--xlators/features/glupy/src/Makefile.am21
-rw-r--r--xlators/features/glupy/src/glupy.c2471
-rw-r--r--xlators/features/glupy/src/glupy.h69
-rw-r--r--xlators/features/glupy/src/glupy.py841
-rw-r--r--xlators/features/glupy/src/setup.py.in24
-rw-r--r--xlators/features/index/Makefile.am (renamed from xlators/protocol/legacy/server/Makefile.am)2
-rw-r--r--xlators/features/index/src/Makefile.am17
-rw-r--r--xlators/features/index/src/index-mem-types.h22
-rw-r--r--xlators/features/index/src/index.c1275
-rw-r--r--xlators/features/index/src/index.h59
-rw-r--r--xlators/features/locks/src/Makefile.am17
-rw-r--r--xlators/features/locks/src/clear.c423
-rw-r--r--xlators/features/locks/src/clear.h76
-rw-r--r--xlators/features/locks/src/common.c212
-rw-r--r--xlators/features/locks/src/common.h70
-rw-r--r--xlators/features/locks/src/entrylk.c588
-rw-r--r--xlators/features/locks/src/inodelk.c487
-rw-r--r--xlators/features/locks/src/locks-mem-types.h21
-rw-r--r--xlators/features/locks/src/locks.h101
-rw-r--r--xlators/features/locks/src/posix.c1279
-rw-r--r--xlators/features/locks/src/reservelk.c51
-rw-r--r--xlators/features/locks/tests/unit-test.c22
-rw-r--r--xlators/features/mac-compat/src/Makefile.am7
-rw-r--r--xlators/features/mac-compat/src/mac-compat.c52
-rw-r--r--xlators/features/marker/Makefile.am2
-rw-r--r--xlators/features/marker/src/Makefile.am8
-rw-r--r--xlators/features/marker/src/marker-common.c37
-rw-r--r--xlators/features/marker/src/marker-common.h27
-rw-r--r--xlators/features/marker/src/marker-mem-types.h24
-rw-r--r--xlators/features/marker/src/marker-quota-helper.c79
-rw-r--r--xlators/features/marker/src/marker-quota-helper.h27
-rw-r--r--xlators/features/marker/src/marker-quota.c544
-rw-r--r--xlators/features/marker/src/marker-quota.h73
-rw-r--r--xlators/features/marker/src/marker.c1157
-rw-r--r--xlators/features/marker/src/marker.h50
-rw-r--r--xlators/features/marker/utils/Makefile.am3
-rw-r--r--xlators/features/marker/utils/src/Makefile.am22
-rw-r--r--xlators/features/marker/utils/src/gsyncd.c346
-rw-r--r--xlators/features/marker/utils/src/procdiggy.c124
-rw-r--r--xlators/features/marker/utils/src/procdiggy.h26
-rw-r--r--xlators/features/marker/utils/syncdaemon/Makefile.am6
-rw-r--r--xlators/features/marker/utils/syncdaemon/README.md81
-rw-r--r--xlators/features/marker/utils/syncdaemon/__codecheck.py46
-rw-r--r--xlators/features/marker/utils/syncdaemon/__init__.py0
-rw-r--r--xlators/features/marker/utils/syncdaemon/configinterface.py224
-rw-r--r--xlators/features/marker/utils/syncdaemon/gconf.py20
-rw-r--r--xlators/features/marker/utils/syncdaemon/gsyncd.py369
-rw-r--r--xlators/features/marker/utils/syncdaemon/libcxattr.py72
-rw-r--r--xlators/features/marker/utils/syncdaemon/master.py518
-rw-r--r--xlators/features/marker/utils/syncdaemon/monitor.py123
-rw-r--r--xlators/features/marker/utils/syncdaemon/repce.py225
-rw-r--r--xlators/features/marker/utils/syncdaemon/resource.py837
-rw-r--r--xlators/features/marker/utils/syncdaemon/syncdutils.py269
-rw-r--r--xlators/features/path-convertor/src/Makefile.am7
-rw-r--r--xlators/features/path-convertor/src/path-mem-types.h20
-rw-r--r--xlators/features/path-convertor/src/path.c29
-rw-r--r--xlators/features/protect/Makefile.am (renamed from xlators/protocol/legacy/client/Makefile.am)0
-rw-r--r--xlators/features/protect/src/Makefile.am21
-rw-r--r--xlators/features/protect/src/prot_client.c217
-rw-r--r--xlators/features/protect/src/prot_dht.c168
-rw-r--r--xlators/features/protect/src/prot_server.c51
-rw-r--r--xlators/features/qemu-block/Makefile.am (renamed from xlators/performance/stat-prefetch/Makefile.am)0
-rw-r--r--xlators/features/qemu-block/src/Makefile.am155
-rw-r--r--xlators/features/qemu-block/src/bdrv-xlator.c389
-rw-r--r--xlators/features/qemu-block/src/bh-syncop.c48
-rw-r--r--xlators/features/qemu-block/src/clock-timer.c60
-rw-r--r--xlators/features/qemu-block/src/coroutine-synctask.c116
-rw-r--r--xlators/features/qemu-block/src/monitor-logging.c50
-rw-r--r--xlators/features/qemu-block/src/qb-coroutines.c667
-rw-r--r--xlators/features/qemu-block/src/qb-coroutines.h30
-rw-r--r--xlators/features/qemu-block/src/qemu-block-memory-types.h25
-rw-r--r--xlators/features/qemu-block/src/qemu-block.c1140
-rw-r--r--xlators/features/qemu-block/src/qemu-block.h109
-rw-r--r--xlators/features/quiesce/src/Makefile.am7
-rw-r--r--xlators/features/quiesce/src/quiesce-mem-types.h20
-rw-r--r--xlators/features/quiesce/src/quiesce.c661
-rw-r--r--xlators/features/quiesce/src/quiesce.h23
-rw-r--r--xlators/features/quota/src/Makefile.am22
-rw-r--r--xlators/features/quota/src/quota-enforcer-client.c403
-rw-r--r--xlators/features/quota/src/quota-mem-types.h26
-rw-r--r--xlators/features/quota/src/quota.c3325
-rw-r--r--xlators/features/quota/src/quota.h181
-rw-r--r--xlators/features/quota/src/quotad-aggregator.c423
-rw-r--r--xlators/features/quota/src/quotad-aggregator.h37
-rw-r--r--xlators/features/quota/src/quotad-helpers.c113
-rw-r--r--xlators/features/quota/src/quotad-helpers.h24
-rw-r--r--xlators/features/quota/src/quotad.c210
-rw-r--r--xlators/features/read-only/src/Makefile.am9
-rw-r--r--xlators/features/read-only/src/read-only-common.c133
-rw-r--r--xlators/features/read-only/src/read-only-common.h76
-rw-r--r--xlators/features/read-only/src/read-only.c22
-rw-r--r--xlators/features/read-only/src/worm.c36
-rw-r--r--xlators/features/trash/src/Makefile.am7
-rw-r--r--xlators/features/trash/src/trash-mem-types.h23
-rw-r--r--xlators/features/trash/src/trash.c109
-rw-r--r--xlators/features/trash/src/trash.h22
-rw-r--r--xlators/lib/src/libxlator.c536
-rw-r--r--xlators/lib/src/libxlator.h106
-rw-r--r--xlators/meta/src/Makefile.am5
-rw-r--r--xlators/meta/src/meta-mem-types.h20
-rw-r--r--xlators/meta/src/meta.c20
-rw-r--r--xlators/meta/src/meta.h20
-rw-r--r--xlators/meta/src/misc.c20
-rw-r--r--xlators/meta/src/misc.h20
-rw-r--r--xlators/meta/src/tree.c22
-rw-r--r--xlators/meta/src/tree.h20
-rw-r--r--xlators/meta/src/view.c20
-rw-r--r--xlators/meta/src/view.h20
-rw-r--r--xlators/mgmt/glusterd/src/Makefile.am52
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-brick-ops.c1391
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-geo-rep.c3930
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handler.c2791
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handshake.c939
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-hooks.c559
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-hooks.h89
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-locks.c213
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-locks.h37
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-log-ops.c118
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mem-types.h30
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mountbroker.c122
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mountbroker.h24
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.c4103
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.h122
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.c140
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.h26
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-quota.c1526
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rebalance.c1078
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-replace-brick.c1483
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rpc-ops.c1322
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-sm.c148
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-sm.h60
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.c2054
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.h92
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-syncop.c1690
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-syncop.h54
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c7050
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.h405
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c1787
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.h110
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-ops.c1635
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c1491
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.c784
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h491
-rw-r--r--xlators/mount/fuse/src/Makefile.am22
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.c3338
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.h338
-rw-r--r--xlators/mount/fuse/src/fuse-helpers.c432
-rw-r--r--xlators/mount/fuse/src/fuse-mem-types.h23
-rw-r--r--xlators/mount/fuse/src/fuse-resolve.c928
-rwxr-xr-xxlators/mount/fuse/utils/mount.glusterfs.in710
-rwxr-xr-xxlators/mount/fuse/utils/mount_glusterfs.in3
-rw-r--r--xlators/nfs/server/src/Makefile.am23
-rw-r--r--xlators/nfs/server/src/acl3.c843
-rw-r--r--xlators/nfs/server/src/acl3.h37
-rw-r--r--xlators/nfs/server/src/mount3.c1194
-rw-r--r--xlators/nfs/server/src/mount3.h39
-rw-r--r--xlators/nfs/server/src/mount3udp_svc.c189
-rw-r--r--xlators/nfs/server/src/nfs-common.c211
-rw-r--r--xlators/nfs/server/src/nfs-common.h27
-rw-r--r--xlators/nfs/server/src/nfs-fops.c454
-rw-r--r--xlators/nfs/server/src/nfs-fops.h41
-rw-r--r--xlators/nfs/server/src/nfs-generics.c43
-rw-r--r--xlators/nfs/server/src/nfs-generics.h30
-rw-r--r--xlators/nfs/server/src/nfs-inodes.c73
-rw-r--r--xlators/nfs/server/src/nfs-inodes.h19
-rw-r--r--xlators/nfs/server/src/nfs-mem-types.h30
-rw-r--r--xlators/nfs/server/src/nfs.c1060
-rw-r--r--xlators/nfs/server/src/nfs.h52
-rw-r--r--xlators/nfs/server/src/nfs3-fh.c183
-rw-r--r--xlators/nfs/server/src/nfs3-fh.h55
-rw-r--r--xlators/nfs/server/src/nfs3-helpers.c2731
-rw-r--r--xlators/nfs/server/src/nfs3-helpers.h42
-rw-r--r--xlators/nfs/server/src/nfs3.c1032
-rw-r--r--xlators/nfs/server/src/nfs3.h130
-rw-r--r--xlators/nfs/server/src/nlm4.c2527
-rw-r--r--xlators/nfs/server/src/nlm4.h77
-rw-r--r--xlators/nfs/server/src/nlmcbk_svc.c117
-rw-r--r--xlators/performance/Makefile.am2
-rw-r--r--xlators/performance/io-cache/src/Makefile.am8
-rw-r--r--xlators/performance/io-cache/src/io-cache.c493
-rw-r--r--xlators/performance/io-cache/src/io-cache.h30
-rw-r--r--xlators/performance/io-cache/src/ioc-inode.c22
-rw-r--r--xlators/performance/io-cache/src/ioc-mem-types.h20
-rw-r--r--xlators/performance/io-cache/src/page.c91
-rw-r--r--xlators/performance/io-threads/src/Makefile.am7
-rw-r--r--xlators/performance/io-threads/src/io-threads.c2060
-rw-r--r--xlators/performance/io-threads/src/io-threads.h33
-rw-r--r--xlators/performance/io-threads/src/iot-mem-types.h21
-rw-r--r--xlators/performance/md-cache/Makefile.am1
-rw-r--r--xlators/performance/md-cache/src/Makefile.am25
-rw-r--r--xlators/performance/md-cache/src/md-cache-mem-types.h24
-rw-r--r--xlators/performance/md-cache/src/md-cache.c2303
-rw-r--r--xlators/performance/open-behind/Makefile.am1
-rw-r--r--xlators/performance/open-behind/src/Makefile.am15
-rw-r--r--xlators/performance/open-behind/src/open-behind-mem-types.h21
-rw-r--r--xlators/performance/open-behind/src/open-behind.c1020
-rw-r--r--xlators/performance/quick-read/src/Makefile.am7
-rw-r--r--xlators/performance/quick-read/src/quick-read-mem-types.h22
-rw-r--r--xlators/performance/quick-read/src/quick-read.c3559
-rw-r--r--xlators/performance/quick-read/src/quick-read.h61
-rw-r--r--xlators/performance/read-ahead/src/Makefile.am7
-rw-r--r--xlators/performance/read-ahead/src/page.c73
-rw-r--r--xlators/performance/read-ahead/src/read-ahead-mem-types.h20
-rw-r--r--xlators/performance/read-ahead/src/read-ahead.c489
-rw-r--r--xlators/performance/read-ahead/src/read-ahead.h23
-rw-r--r--xlators/performance/readdir-ahead/Makefile.am3
-rw-r--r--xlators/performance/readdir-ahead/src/Makefile.am15
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h24
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead.c560
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead.h46
-rw-r--r--xlators/performance/stat-prefetch/src/Makefile.am14
-rw-r--r--xlators/performance/stat-prefetch/src/stat-prefetch-mem-types.h36
-rw-r--r--xlators/performance/stat-prefetch/src/stat-prefetch.c4259
-rw-r--r--xlators/performance/stat-prefetch/src/stat-prefetch.h106
-rw-r--r--xlators/performance/symlink-cache/src/Makefile.am7
-rw-r--r--xlators/performance/symlink-cache/src/symlink-cache.c56
-rw-r--r--xlators/performance/write-behind/src/Makefile.am7
-rw-r--r--xlators/performance/write-behind/src/write-behind-mem-types.h23
-rw-r--r--xlators/performance/write-behind/src/write-behind.c3744
-rw-r--r--xlators/playground/Makefile.am2
-rw-r--r--xlators/playground/template/Makefile.am2
-rw-r--r--xlators/playground/template/src/Makefile.am16
-rw-r--r--xlators/playground/template/src/template.c49
-rw-r--r--xlators/playground/template/src/template.h24
-rw-r--r--xlators/protocol/auth/addr/src/Makefile.am11
-rw-r--r--xlators/protocol/auth/addr/src/addr.c37
-rw-r--r--xlators/protocol/auth/login/src/Makefile.am7
-rw-r--r--xlators/protocol/auth/login/src/login.c22
-rw-r--r--xlators/protocol/client/src/Makefile.am13
-rw-r--r--xlators/protocol/client/src/client-callback.c25
-rw-r--r--xlators/protocol/client/src/client-handshake.c1017
-rw-r--r--xlators/protocol/client/src/client-helpers.c154
-rw-r--r--xlators/protocol/client/src/client-lk.c413
-rw-r--r--xlators/protocol/client/src/client-mem-types.h23
-rw-r--r--xlators/protocol/client/src/client-rpc-fops.c (renamed from xlators/protocol/client/src/client3_1-fops.c)3645
-rw-r--r--xlators/protocol/client/src/client.c817
-rw-r--r--xlators/protocol/client/src/client.h157
-rw-r--r--xlators/protocol/legacy/Makefile.am3
-rw-r--r--xlators/protocol/legacy/client/src/Makefile.am21
-rw-r--r--xlators/protocol/legacy/client/src/client-mem-types.h43
-rw-r--r--xlators/protocol/legacy/client/src/client-protocol.c6683
-rw-r--r--xlators/protocol/legacy/client/src/client-protocol.h178
-rw-r--r--xlators/protocol/legacy/client/src/saved-frames.c196
-rw-r--r--xlators/protocol/legacy/client/src/saved-frames.h79
-rw-r--r--xlators/protocol/legacy/lib/src/Makefile.am14
-rw-r--r--xlators/protocol/legacy/lib/src/protocol.c108
-rw-r--r--xlators/protocol/legacy/lib/src/protocol.h1118
-rw-r--r--xlators/protocol/legacy/lib/src/transport.c422
-rw-r--r--xlators/protocol/legacy/lib/src/transport.h106
-rw-r--r--xlators/protocol/legacy/server/src/Makefile.am27
-rw-r--r--xlators/protocol/legacy/server/src/authenticate.c249
-rw-r--r--xlators/protocol/legacy/server/src/authenticate.h60
-rw-r--r--xlators/protocol/legacy/server/src/server-helpers.c622
-rw-r--r--xlators/protocol/legacy/server/src/server-helpers.h48
-rw-r--r--xlators/protocol/legacy/server/src/server-mem-types.h39
-rw-r--r--xlators/protocol/legacy/server/src/server-protocol.c6587
-rw-r--r--xlators/protocol/legacy/server/src/server-protocol.h191
-rw-r--r--xlators/protocol/legacy/server/src/server-resolve.c658
-rw-r--r--xlators/protocol/legacy/transport/Makefile.am3
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/Makefile.am1
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/Makefile.am19
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs-mem-types.h39
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs.c2625
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs.h220
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/name.c712
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/name.h47
-rw-r--r--xlators/protocol/legacy/transport/socket/Makefile.am1
-rw-r--r--xlators/protocol/legacy/transport/socket/src/Makefile.am19
-rw-r--r--xlators/protocol/legacy/transport/socket/src/name.c740
-rw-r--r--xlators/protocol/legacy/transport/socket/src/name.h44
-rw-r--r--xlators/protocol/legacy/transport/socket/src/socket-mem-types.h36
-rw-r--r--xlators/protocol/legacy/transport/socket/src/socket.c1625
-rw-r--r--xlators/protocol/legacy/transport/socket/src/socket.h129
-rw-r--r--xlators/protocol/server/src/Makefile.am20
-rw-r--r--xlators/protocol/server/src/authenticate.c109
-rw-r--r--xlators/protocol/server/src/authenticate.h19
-rw-r--r--xlators/protocol/server/src/server-handshake.c210
-rw-r--r--xlators/protocol/server/src/server-helpers.c1190
-rw-r--r--xlators/protocol/server/src/server-helpers.h55
-rw-r--r--xlators/protocol/server/src/server-mem-types.h20
-rw-r--r--xlators/protocol/server/src/server-resolve.c444
-rw-r--r--xlators/protocol/server/src/server-rpc-fops.c6182
-rw-r--r--xlators/protocol/server/src/server.c673
-rw-r--r--xlators/protocol/server/src/server.h111
-rw-r--r--xlators/protocol/server/src/server3_1-fops.c5236
-rw-r--r--xlators/storage/Makefile.am6
-rw-r--r--xlators/storage/bd/Makefile.am3
-rw-r--r--xlators/storage/bd/src/Makefile.am20
-rw-r--r--xlators/storage/bd/src/bd-aio.c528
-rw-r--r--xlators/storage/bd/src/bd-aio.h41
-rw-r--r--xlators/storage/bd/src/bd-helper.c1021
-rw-r--r--xlators/storage/bd/src/bd-mem-types.h27
-rw-r--r--xlators/storage/bd/src/bd.c2449
-rw-r--r--xlators/storage/bd/src/bd.h177
-rw-r--r--xlators/storage/bdb/src/Makefile.am18
-rw-r--r--xlators/storage/bdb/src/bctx.c341
-rw-r--r--xlators/storage/bdb/src/bdb-ll.c1464
-rw-r--r--xlators/storage/bdb/src/bdb-mem-types.h42
-rw-r--r--xlators/storage/bdb/src/bdb.c3603
-rw-r--r--xlators/storage/bdb/src/bdb.h530
-rw-r--r--xlators/storage/posix/src/Makefile.am17
-rw-r--r--xlators/storage/posix/src/posix-aio.c569
-rw-r--r--xlators/storage/posix/src/posix-aio.h39
-rw-r--r--xlators/storage/posix/src/posix-handle.c881
-rw-r--r--xlators/storage/posix/src/posix-handle.h228
-rw-r--r--xlators/storage/posix/src/posix-helpers.c1068
-rw-r--r--xlators/storage/posix/src/posix-mem-types.h21
-rw-r--r--xlators/storage/posix/src/posix.c3536
-rw-r--r--xlators/storage/posix/src/posix.h140
-rw-r--r--xlators/system/posix-acl/src/Makefile.am12
-rw-r--r--xlators/system/posix-acl/src/posix-acl-mem-types.h24
-rw-r--r--xlators/system/posix-acl/src/posix-acl-xattr.c32
-rw-r--r--xlators/system/posix-acl/src/posix-acl-xattr.h44
-rw-r--r--xlators/system/posix-acl/src/posix-acl.c526
-rw-r--r--xlators/system/posix-acl/src/posix-acl.h71
466 files changed, 131314 insertions, 100647 deletions
diff --git a/xlators/Makefile.am b/xlators/Makefile.am
index b1643d26c..f60fa85ce 100644
--- a/xlators/Makefile.am
+++ b/xlators/Makefile.am
@@ -1,3 +1,4 @@
-SUBDIRS = cluster storage protocol performance debug features encryption mount nfs mgmt system
+SUBDIRS = cluster storage protocol performance debug features encryption mount nfs mgmt system \
+ playground
CLEANFILES =
diff --git a/xlators/bindings/Makefile.am b/xlators/bindings/Makefile.am
deleted file mode 100644
index f77665802..000000000
--- a/xlators/bindings/Makefile.am
+++ /dev/null
@@ -1 +0,0 @@
-SUBDIRS = $(BINDINGS_SUBDIRS)
diff --git a/xlators/bindings/python/src/Makefile.am b/xlators/bindings/python/src/Makefile.am
deleted file mode 100644
index c0b9141c6..000000000
--- a/xlators/bindings/python/src/Makefile.am
+++ /dev/null
@@ -1,19 +0,0 @@
-
-xlator_PROGRAMS = python.so
-
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/bindings
-
-python_PYTHON = gluster.py glustertypes.py glusterstack.py
-
-pythondir = $(xlatordir)/python
-
-python_so_SOURCES = python.c
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles \
- $(PYTHON_CPPLAGS) -DGLUSTER_PYTHON_PATH=\"$(pythondir)\"
-
-AM_LDFLAGS = $(PYTHON_LDFLAGS)
-
-CLEANFILES =
-
diff --git a/xlators/bindings/python/src/gluster.py b/xlators/bindings/python/src/gluster.py
deleted file mode 100644
index ee0eb1310..000000000
--- a/xlators/bindings/python/src/gluster.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
-from ctypes import *
-from glustertypes import *
-from glusterstack import *
-import sys
-import inspect
-
-libglusterfs = CDLL("libglusterfs.so")
-_gf_log = libglusterfs._gf_log
-_gf_log.restype = c_int32
-_gf_log.argtypes = [c_char_p, c_char_p, c_char_p, c_int32, c_int, c_char_p]
-
-gf_log_loglevel = c_int.in_dll(libglusterfs, "gf_log_loglevel")
-
-GF_LOG_NONE = 0
-GF_LOG_CRITICAL = 1
-GF_LOG_ERROR = 2
-GF_LOG_WARNING = 3
-GF_LOG_DEBUG = 4
-
-def gf_log(module, level, fmt, *params):
- if level <= gf_log_loglevel:
- frame = sys._getframe(1)
- _gf_log(module, frame.f_code.co_filename, frame.f_code.co_name,
- frame.f_lineno, level, fmt, *params)
-
-class ComplexTranslator(object):
- def __init__(self, xlator):
- self.xlator = xlator_t.from_address(xlator)
-
- def __getattr__(self, item):
- return getattr(self.xlator, item)
diff --git a/xlators/bindings/python/src/glusterstack.py b/xlators/bindings/python/src/glusterstack.py
deleted file mode 100644
index ba24c8165..000000000
--- a/xlators/bindings/python/src/glusterstack.py
+++ /dev/null
@@ -1,55 +0,0 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
-from ctypes import *
-from glustertypes import *
-
-libc = CDLL("libc.so.6")
-calloc = libc.calloc
-calloc.argtypes = [c_int, c_int]
-calloc.restype = c_void_p
-
-# TODO: Can these be done in C somehow?
-def stack_wind(frame, rfn, obj, fn, *params):
- """Frame is a frame object"""
- _new = cast(calloc(1, sizeof(call_frame_t)), POINTER(call_frame_t))
- _new[0].root = frame.root
- _new[0].next = frame.root[0].frames.next
- _new[0].prev = pointer(frame.root[0].frames)
- if frame.root[0].frames.next:
- frame.root[0].frames.next[0].prev = _new
- frame.root[0].frames.next = _new
- _new[0].this = obj
- # TODO: Type checking like tmp_cbk?
- _new[0].ret = rfn
- _new[0].parent = pointer(frame)
- _new[0].cookie = cast(_new, c_void_p)
- # TODO: Initialize lock
- #_new.lock.init()
- frame.ref_count += 1
- fn(_new, obj, *params)
-
-def stack_unwind(frame, *params):
- """Frame is a frame object"""
- fn = frame[0].ret
- parent = frame[0].parent[0]
- parent.ref_count -= 1
-
- op_ret = params[0]
- op_err = params[1]
- params = params[2:]
- fn(parent, call_frame_t.from_address(frame[0].cookie), parent.this,
- op_ret, op_err, *params)
diff --git a/xlators/bindings/python/src/glustertypes.py b/xlators/bindings/python/src/glustertypes.py
deleted file mode 100644
index e9069d07c..000000000
--- a/xlators/bindings/python/src/glustertypes.py
+++ /dev/null
@@ -1,167 +0,0 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
-from ctypes import *
-import collections
-
-#
-# Forward declaration of some gluster types
-#
-class call_frame_t(Structure):
- pass
-
-class call_ctx_t(Structure):
- pass
-
-class call_pool_t(Structure):
- pass
-
-class xlator_t(Structure):
- def _getFirstChild(self):
- return self.children[0].xlator
- firstChild = property(_getFirstChild)
-
-class xlator_list_t(Structure):
- pass
-
-class xlator_fops(Structure):
- pass
-
-class xlator_mops(Structure):
- pass
-
-class glusterfs_ctx_t(Structure):
- pass
-
-class list_head(Structure):
- pass
-
-class dict_t(Structure):
- pass
-
-class inode_table_t(Structure):
- pass
-
-class fd_t(Structure):
- pass
-
-class iovec(Structure):
- _fields_ = [
- ("iov_base", c_void_p),
- ("iov_len", c_size_t),
- ]
-
- def __init__(self, s):
- self.iov_base = cast(c_char_p(s), c_void_p)
- self.iov_len = len(s)
-
- def getBytes(self):
- return string_at(self.iov_base, self.iov_len)
-
-# This is a pthread_spinlock_t
-# TODO: what happens to volatile-ness?
-gf_lock_t = c_int
-
-uid_t = c_uint32
-gid_t = c_uint32
-pid_t = c_int32
-
-off_t = c_int64
-
-#
-# Function pointer types
-#
-ret_fn_t = CFUNCTYPE(c_int32, POINTER(call_frame_t), POINTER(call_frame_t),
- POINTER(xlator_t), c_int32, c_int32)
-
-fini_fn_t = CFUNCTYPE(None, POINTER(xlator_t))
-init_fn_t = CFUNCTYPE(c_int32, POINTER(xlator_t))
-event_notify_fn_t = CFUNCTYPE(c_int32, POINTER(xlator_t), c_int32, c_void_p)
-
-list_head._fields_ = [
- ("next", POINTER(list_head)),
- ("prev", POINTER(list_head)),
- ]
-
-call_frame_t._fields_ = [
- ("root", POINTER(call_ctx_t)),
- ("parent", POINTER(call_frame_t)),
- ("next", POINTER(call_frame_t)),
- ("prev", POINTER(call_frame_t)),
- ("local", c_void_p),
- ("this", POINTER(xlator_t)),
- ("ret", ret_fn_t),
- ("ref_count", c_int32),
- ("lock", gf_lock_t),
- ("cookie", c_void_p),
- ("op", c_int32),
- ("type", c_int8),
- ]
-
-call_ctx_t._fields_ = [
- ("all_frames", list_head),
- ("trans", c_void_p),
- ("pool", call_pool_t),
- ("unique", c_uint64),
- ("state", c_void_p),
- ("uid", uid_t),
- ("gid", gid_t),
- ("pid", pid_t),
- ("frames", call_frame_t),
- ("req_refs", POINTER(dict_t)),
- ("rsp_refs", POINTER(dict_t)),
- ]
-
-xlator_t._fields_ = [
- ("name", c_char_p),
- ("type", c_char_p),
- ("next", POINTER(xlator_t)),
- ("prev", POINTER(xlator_t)),
- ("parent", POINTER(xlator_t)),
- ("children", POINTER(xlator_list_t)),
- ("fops", POINTER(xlator_fops)),
- ("mops", POINTER(xlator_mops)),
- ("fini", fini_fn_t),
- ("init", init_fn_t),
- ("notify", event_notify_fn_t),
- ("options", POINTER(dict_t)),
- ("ctx", POINTER(glusterfs_ctx_t)),
- ("itable", POINTER(inode_table_t)),
- ("ready", c_char),
- ("private", c_void_p),
- ]
-
-xlator_list_t._fields_ = [
- ("xlator", POINTER(xlator_t)),
- ("next", POINTER(xlator_list_t)),
- ]
-
-fop_functions = collections.defaultdict(lambda: c_void_p)
-fop_function_names = ['lookup', 'forget', 'stat', 'fstat', 'chmod', 'fchmod',
- 'chown', 'fchown', 'truncate', 'ftruncate', 'utimens', 'access',
- 'readlink', 'mknod', 'mkdir', 'unlink', 'rmdir', 'symlink',
- 'rename', 'link', 'create', 'open', 'readv', 'writev', 'flush',
- 'close', 'fsync', 'opendir', 'readdir', 'closedir', 'fsyncdir',
- 'statfs', 'setxattr', 'getxattr', 'removexattr', 'lk', 'writedir',
- # TODO: Call backs?
- ]
-
-fop_writev_t = CFUNCTYPE(c_int32, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(fd_t), POINTER(iovec), c_int32,
- off_t)
-
-fop_functions['writev'] = fop_writev_t
-xlator_fops._fields_ = [(f, fop_functions[f]) for f in fop_function_names]
diff --git a/xlators/bindings/python/src/python.c b/xlators/bindings/python/src/python.c
deleted file mode 100644
index 3310a2115..000000000
--- a/xlators/bindings/python/src/python.c
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- Copyright (c) 2007-2010 Chris AtLee <chris@atlee.ca>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include <Python.h>
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
-#include "defaults.h"
-
-typedef struct
-{
- char *scriptname;
- PyObject *pXlator;
- PyObject *pScriptModule;
- PyObject *pGlusterModule;
- PyThreadState *pInterp;
-
- PyObject *pFrameType, *pVectorType, *pFdType;
-} python_private_t;
-
-int32_t
-python_writev (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t offset)
-{
- python_private_t *priv = (python_private_t *)this->private;
- gf_log("python", GF_LOG_DEBUG, "In writev");
- if (PyObject_HasAttrString(priv->pXlator, "writev"))
- {
-
- PyObject *retval = PyObject_CallMethod(priv->pXlator, "writev",
- "O O O i l",
- PyObject_CallMethod(priv->pFrameType, "from_address", "O&", PyLong_FromVoidPtr, frame),
- PyObject_CallMethod(priv->pFdType, "from_address", "O&", PyLong_FromVoidPtr, fd),
- PyObject_CallMethod(priv->pVectorType, "from_address", "O&", PyLong_FromVoidPtr, vector),
- count,
- offset);
- if (PyErr_Occurred())
- {
- PyErr_Print();
- }
- Py_XDECREF(retval);
- }
- else
- {
- return default_writev(frame, this, fd, vector, count, offset);
- }
- return 0;
-}
-
-struct xlator_fops fops = {
- .writev = python_writev
-};
-
-static PyObject *
-AnonModule_FromFile (const char* fname)
-{
- // Get the builtins
- PyThreadState* pThread = PyThreadState_Get();
- PyObject *pBuiltins = pThread->interp->builtins;
-
- if (PyErr_Occurred())
- {
- PyErr_Print();
- return NULL;
- }
-
- // Create a new dictionary for running code in
- PyObject *pModuleDict = PyDict_New();
- PyDict_SetItemString(pModuleDict, "__builtins__", pBuiltins);
- Py_INCREF(pBuiltins);
-
- // Run the file in the new context
- FILE* fp = fopen(fname, "r");
- PyRun_File(fp, fname, Py_file_input, pModuleDict, pModuleDict);
- fclose(fp);
- if (PyErr_Occurred())
- {
- PyErr_Print();
- Py_DECREF(pModuleDict);
- Py_DECREF(pBuiltins);
- return NULL;
- }
-
- // Create an object to hold the new context
- PyRun_String("class ModuleWrapper(object):\n\tpass\n", Py_single_input, pModuleDict, pModuleDict);
- if (PyErr_Occurred())
- {
- PyErr_Print();
- Py_DECREF(pModuleDict);
- Py_DECREF(pBuiltins);
- return NULL;
- }
- PyObject *pModule = PyRun_String("ModuleWrapper()", Py_eval_input, pModuleDict, pModuleDict);
- if (PyErr_Occurred())
- {
- PyErr_Print();
- Py_DECREF(pModuleDict);
- Py_DECREF(pBuiltins);
- Py_XDECREF(pModule);
- return NULL;
- }
-
- // Set the new context's dictionary to the one we used to run the code
- // inside
- PyObject_SetAttrString(pModule, "__dict__", pModuleDict);
- if (PyErr_Occurred())
- {
- PyErr_Print();
- Py_DECREF(pModuleDict);
- Py_DECREF(pBuiltins);
- Py_DECREF(pModule);
- return NULL;
- }
-
- return pModule;
-}
-
-int32_t
-init (xlator_t *this)
-{
- // This is ok to call more than once per process
- Py_InitializeEx(0);
-
- if (!this->children) {
- gf_log ("python", GF_LOG_ERROR,
- "FATAL: python should have exactly one child");
- return -1;
- }
-
- python_private_t *priv = CALLOC (sizeof (python_private_t), 1);
- ERR_ABORT (priv);
-
- data_t *scriptname = dict_get (this->options, "scriptname");
- if (scriptname) {
- priv->scriptname = data_to_str(scriptname);
- } else {
- gf_log("python", GF_LOG_ERROR,
- "FATAL: python requires the scriptname parameter");
- return -1;
- }
-
- priv->pInterp = Py_NewInterpreter();
-
- // Adjust python's path
- PyObject *syspath = PySys_GetObject("path");
- PyObject *path = PyString_FromString(GLUSTER_PYTHON_PATH);
- PyList_Append(syspath, path);
- Py_DECREF(path);
-
- gf_log("python", GF_LOG_DEBUG,
- "Loading gluster module");
-
- priv->pGlusterModule = PyImport_ImportModule("gluster");
- if (PyErr_Occurred())
- {
- PyErr_Print();
- return -1;
- }
-
- priv->pFrameType = PyObject_GetAttrString(priv->pGlusterModule, "call_frame_t");
- priv->pFdType = PyObject_GetAttrString(priv->pGlusterModule, "fd_t");
- priv->pVectorType = PyObject_GetAttrString(priv->pGlusterModule, "iovec");
-
- gf_log("python", GF_LOG_DEBUG, "Loading script...%s", priv->scriptname);
-
- priv->pScriptModule = AnonModule_FromFile(priv->scriptname);
- if (!priv->pScriptModule || PyErr_Occurred())
- {
- gf_log("python", GF_LOG_ERROR, "Error loading %s", priv->scriptname);
- PyErr_Print();
- return -1;
- }
-
- if (!PyObject_HasAttrString(priv->pScriptModule, "xlator"))
- {
- gf_log("python", GF_LOG_ERROR, "%s does not have a xlator attribute", priv->scriptname);
- return -1;
- }
- gf_log("python", GF_LOG_DEBUG, "Instantiating translator");
- priv->pXlator = PyObject_CallMethod(priv->pScriptModule, "xlator", "O&",
- PyLong_FromVoidPtr, this);
- if (PyErr_Occurred() || !priv->pXlator)
- {
- PyErr_Print();
- return -1;
- }
-
- this->private = priv;
-
- gf_log ("python", GF_LOG_DEBUG, "python xlator loaded");
- return 0;
-}
-
-void
-fini (xlator_t *this)
-{
- python_private_t *priv = (python_private_t*)(this->private);
- Py_DECREF(priv->pXlator);
- Py_DECREF(priv->pScriptModule);
- Py_DECREF(priv->pGlusterModule);
- Py_DECREF(priv->pFrameType);
- Py_DECREF(priv->pFdType);
- Py_DECREF(priv->pVectorType);
- Py_EndInterpreter(priv->pInterp);
- return;
-}
diff --git a/xlators/bindings/python/src/testxlator.py b/xlators/bindings/python/src/testxlator.py
deleted file mode 100644
index 507455c85..000000000
--- a/xlators/bindings/python/src/testxlator.py
+++ /dev/null
@@ -1,56 +0,0 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
-
-"""
-This is a test translator written in python.
-
-Important things to note:
- This file must be import-able from glusterfsd. This probably means
- setting PYTHONPATH to where this file is located.
-
- This file must have a top-level xlator class object that will be
- used to instantiate individual translators.
-"""
-from gluster import *
-
-class MyXlator(ComplexTranslator):
- name = "MyXlator"
- def writev_cbk(self, frame, cookie, op_ret, op_errno, buf):
- stack_unwind(frame, op_ret, op_errno, buf)
- return 0
-
- def writev(self, frame, fd, vector, count, offset):
- gf_log(self.name, GF_LOG_WARNING, "writev %i bytes", vector.iov_len)
- # TODO: Use cookie to pass this to writev_cbk
- old_count = vector.iov_len
-
- data = vector.getBytes().encode("zlib")
-
- vector = iovec(data)
- gf_log(self.name, GF_LOG_WARNING, "writev %i bytes", vector.iov_len)
-
- @ret_fn_t
- def rfn(frame, prev, this, op_ret, op_errno, *params):
- if len(params) == 0:
- params = [0]
- return self.writev_cbk(frame, prev, old_count, op_errno, *params)
-
- stack_wind(frame, rfn, self.firstChild,
- self.firstChild[0].fops[0].writev, fd, vector, count, offset)
- return 0
-
-xlator = MyXlator
diff --git a/xlators/cluster/afr/src/Makefile.am b/xlators/cluster/afr/src/Makefile.am
index 16ed25af1..ea5a90abb 100644
--- a/xlators/cluster/afr/src/Makefile.am
+++ b/xlators/cluster/afr/src/Makefile.am
@@ -1,27 +1,38 @@
xlator_LTLIBRARIES = afr.la pump.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
-afr_common_source = afr-dir-read.c afr-dir-write.c afr-inode-read.c afr-inode-write.c afr-open.c afr-transaction.c afr-self-heal-data.c afr-self-heal-common.c afr-self-heal-metadata.c afr-self-heal-entry.c afr-self-heal-algorithm.c afr-lk-common.c afr-self-heald.c $(top_builddir)/xlators/lib/src/libxlator.c
+afr_common_source = afr-dir-read.c afr-dir-write.c afr-inode-read.c \
+ afr-inode-write.c afr-open.c afr-transaction.c afr-lk-common.c \
+ afr-read-txn.c \
+ $(top_builddir)/xlators/lib/src/libxlator.c
-afr_la_LDFLAGS = -module -avoidversion
-afr_la_SOURCES = $(afr_common_source) afr.c
+AFR_SELFHEAL_SOURCES = afr-self-heal-common.c afr-self-heal-data.c \
+ afr-self-heal-entry.c afr-self-heal-metadata.c afr-self-heald.c \
+ afr-self-heal-name.c
+
+afr_la_LDFLAGS = -module -avoid-version
+afr_la_SOURCES = $(afr_common_source) $(AFR_SELFHEAL_SOURCES) afr.c
afr_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-pump_la_LDFLAGS = -module -avoidversion
-pump_la_SOURCES = $(afr_common_source) pump.c
+pump_la_LDFLAGS = -module -avoid-version
+pump_la_SOURCES = $(afr_common_source) $(AFR_SELFHEAL_SOURCES) pump.c
pump_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h afr-dir-read.h afr-dir-write.h afr-self-heal.h afr-self-heal-common.h afr-self-heal-algorithm.h pump.h afr-mem-types.h afr-common.c afr-self-heald.h $(top_builddir)/xlators/lib/src/libxlator.h $(top_builddir)/glusterfsd/src/glusterfsd.h
+noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h \
+ afr-dir-read.h afr-dir-write.h afr-self-heal.h afr-mem-types.h \
+ afr-common.c afr-self-heald.h pump.h \
+ $(top_builddir)/xlators/lib/src/libxlator.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
+ -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/xlators/lib/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/contrib/md5 -shared -nostartfiles $(GF_CFLAGS) \
- -I$(top_srcdir)/xlators/lib/src
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
uninstall-local:
rm -f $(DESTDIR)$(xlatordir)/replicate.so
- rm -f $(DESTDIR)$(xlatordir)/pump.so
install-data-hook:
ln -sf afr.so $(DESTDIR)$(xlatordir)/replicate.so
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 108b021b3..6bd231600 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include <libgen.h>
@@ -54,780 +45,821 @@
#include "afr-dir-write.h"
#include "afr-transaction.h"
#include "afr-self-heal.h"
-#include "afr-self-heal-common.h"
#include "afr-self-heald.h"
-#include "pump.h"
-#define AFR_ICTX_OPENDIR_DONE_MASK 0x0000000200000000ULL
-#define AFR_ICTX_SPLIT_BRAIN_MASK 0x0000000100000000ULL
-#define AFR_ICTX_READ_CHILD_MASK 0x00000000FFFFFFFFULL
-int
-afr_lookup_done_success_action (call_frame_t *frame, xlator_t *this,
- gf_boolean_t fail_conflict);
-void
-afr_children_copy (int32_t *dst, int32_t *src, unsigned int child_count)
+call_frame_t *
+afr_copy_frame (call_frame_t *base)
{
- int i = 0;
+ afr_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ int op_errno = 0;
+
+ frame = copy_frame (base);
+ if (!frame)
+ return NULL;
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local) {
+ AFR_STACK_DESTROY (frame);
+ return NULL;
+ }
- for (i = 0; i < child_count; i++)
- dst[i] = src[i];
+ return frame;
}
-void
-afr_xattr_req_prepare (xlator_t *this, dict_t *xattr_req, const char *path)
-{
- int i = 0;
- afr_private_t *priv = NULL;
- int ret = 0;
+/*
+ * INODE CTX 64-bit VALUE FORMAT FOR SMALL (<= 16) SUBVOL COUNTS:
+ *
+ * |<---------- 64bit ------------>|
+ * 63 32 31 16 15 0
+ * | EVENT_GEN | DATA | METADATA |
+ *
+ *
+ * METADATA (bit-0 .. bit-15): bitmap representing subvolumes from which
+ * metadata can be attempted to be read.
+ *
+ * bit-0 => priv->subvolumes[0]
+ * bit-1 => priv->subvolumes[1]
+ * ... etc. till bit-15
+ *
+ * DATA (bit-16 .. bit-31): bitmap representing subvolumes from which data
+ * can be attempted to be read.
+ *
+ * bit-16 => priv->subvolumes[0]
+ * bit-17 => priv->subvolumes[1]
+ * ... etc. till bit-31
+ *
+ * EVENT_GEN (bit-32 .. bit-63): event generation (i.e priv->event_generation)
+ * when DATA and METADATA was last updated.
+ *
+ * If EVENT_GEN is < priv->event_generation,
+ * or is 0, it means afr_inode_refresh() needs
+ * to be called to recalculate the bitmaps.
+ */
- priv = this->private;
+int
+__afr_inode_read_subvol_get_small (inode_t *inode, xlator_t *this,
+ unsigned char *data, unsigned char *metadata,
+ int *event_p)
+{
+ afr_private_t *priv = NULL;
+ int ret = -1;
+ uint16_t datamap = 0;
+ uint16_t metadatamap = 0;
+ uint32_t event = 0;
+ uint64_t val = 0;
+ int i = 0;
+
+ priv = this->private;
+
+ ret = __inode_ctx_get (inode, this, &val);
+ if (ret < 0)
+ return ret;
+
+ metadatamap = (val & 0x000000000000ffff);
+ datamap = (val & 0x00000000ffff0000) >> 16;
+ event = (val & 0xffffffff00000000) >> 32;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (metadata)
+ metadata[i] = (metadatamap >> i) & 1;
+ if (data)
+ data[i] = (datamap >> i) & 1;
+ }
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_set_uint64 (xattr_req, priv->pending_key[i],
- 3 * sizeof(int32_t));
- if (ret < 0)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: Unable to set dict value for %s",
- path, priv->pending_key[i]);
- /* 3 = data+metadata+entry */
- }
+ if (event_p)
+ *event_p = event;
+ return ret;
+}
+
+
+int
+__afr_inode_read_subvol_set_small (inode_t *inode, xlator_t *this,
+ unsigned char *data, unsigned char *metadata,
+ int event)
+{
+ afr_private_t *priv = NULL;
+ uint16_t datamap = 0;
+ uint16_t metadatamap = 0;
+ uint64_t val = 0;
+ int i = 0;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (data[i])
+ datamap |= (1 << i);
+ if (metadata[i])
+ metadatamap |= (1 << i);
+ }
+
+ val = ((uint64_t) metadatamap) |
+ (((uint64_t) datamap) << 16) |
+ (((uint64_t) event) << 32);
+
+ return __inode_ctx_set (inode, this, &val);
}
+
int
-afr_lookup_xattr_req_prepare (afr_local_t *local, xlator_t *this,
- dict_t *xattr_req, loc_t *loc, void **gfid_req)
+__afr_inode_read_subvol_reset_small (inode_t *inode, xlator_t *this)
{
- int ret = -ENOMEM;
+ int ret = -1;
+ uint16_t datamap = 0;
+ uint16_t metadatamap = 0;
+ uint32_t event = 0;
+ uint64_t val = 0;
- GF_ASSERT (gfid_req);
+ ret = __inode_ctx_get (inode, this, &val);
+ (void) ret;
- *gfid_req = NULL;
- local->xattr_req = dict_new ();
- if (!local->xattr_req)
- goto out;
- if (xattr_req)
- dict_copy (xattr_req, local->xattr_req);
+ metadatamap = (val & 0x000000000000ffff) >> 0;
+ datamap = (val & 0x00000000ffff0000) >> 16;
+ event = 0;
- afr_xattr_req_prepare (this, local->xattr_req, loc->path);
- ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_INODELK_COUNT, 0);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: Unable to set dict value for %s",
- loc->path, GLUSTERFS_INODELK_COUNT);
- }
- ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_ENTRYLK_COUNT, 0);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: Unable to set dict value for %s",
- loc->path, GLUSTERFS_ENTRYLK_COUNT);
- }
+ val = ((uint64_t) metadatamap) |
+ (((uint64_t) datamap) << 16) |
+ (((uint64_t) event) << 32);
- ret = dict_get_ptr (local->xattr_req, "gfid-req", gfid_req);
- if (ret) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: failed to get the gfid from dict", loc->path);
- *gfid_req = NULL;
- } else {
- if (loc->parent != NULL)
- dict_del (local->xattr_req, "gfid-req");
- }
- ret = 0;
-out:
- return ret;
+ return __inode_ctx_set (inode, this, &val);
}
-void
-afr_lookup_save_gfid (uuid_t dst, void* new, const loc_t *loc)
+
+int
+__afr_inode_read_subvol_get (inode_t *inode, xlator_t *this,
+ unsigned char *data, unsigned char *metadata,
+ int *event_p)
{
- inode_t *inode = NULL;
+ afr_private_t *priv = NULL;
+ int ret = -1;
+
+ priv = this->private;
+
+ if (priv->child_count <= 16)
+ ret = __afr_inode_read_subvol_get_small (inode, this, data,
+ metadata, event_p);
+ else
+ /* TBD: allocate structure with array and read from it */
+ ret = -1;
- inode = loc->inode;
- if (inode && !uuid_is_null (inode->gfid))
- uuid_copy (dst, inode->gfid);
- else if (!uuid_is_null (loc->gfid))
- uuid_copy (dst, loc->gfid);
- else if (new && !uuid_is_null (new))
- uuid_copy (dst, new);
+ return ret;
}
+
int
-afr_errno_count (int32_t *children, int *child_errno,
- unsigned int child_count, int32_t op_errno)
+__afr_inode_read_subvol_set (inode_t *inode, xlator_t *this, unsigned char *data,
+ unsigned char *metadata, int event)
{
- int i = 0;
- int errno_count = 0;
- int child = 0;
+ afr_private_t *priv = NULL;
+ int ret = -1;
- for (i = 0; i < child_count; i++) {
- if (children) {
- child = children[i];
- if (child == -1)
- break;
- } else {
- child = i;
- }
- if (child_errno[child] == op_errno)
- errno_count++;
- }
- return errno_count;
-}
+ priv = this->private;
-int32_t
-afr_set_dict_gfid (dict_t *dict, uuid_t gfid)
-{
- int ret = 0;
- uuid_t *pgfid = NULL;
+ if (priv->child_count <= 16)
+ ret = __afr_inode_read_subvol_set_small (inode, this, data,
+ metadata, event);
+ else
+ ret = -1;
- GF_ASSERT (gfid);
+ return ret;
+}
- pgfid = GF_CALLOC (1, sizeof (uuid_t), gf_common_mt_char);
- if (!pgfid) {
- ret = -1;
- goto out;
- }
- uuid_copy (*pgfid, gfid);
+int
+__afr_inode_read_subvol_reset (inode_t *inode, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int ret = -1;
- ret = dict_set_dynptr (dict, "gfid-req", pgfid, sizeof (uuid_t));
- if (ret)
- gf_log (THIS->name, GF_LOG_ERROR, "gfid set failed");
+ priv = this->private;
-out:
- if (ret && pgfid)
- GF_FREE (pgfid);
+ if (priv->child_count <= 16)
+ ret = __afr_inode_read_subvol_reset_small (inode, this);
+ else
+ ret = -1;
- return ret;
+ return ret;
}
-afr_inode_ctx_t*
-afr_inode_ctx_get_from_addr (uint64_t addr, int32_t child_count)
+
+int
+afr_inode_read_subvol_get (inode_t *inode, xlator_t *this, unsigned char *data,
+ unsigned char *metadata, int *event_p)
{
- int ret = -1;
- afr_inode_ctx_t *ctx = NULL;
- size_t size = 0;
+ int ret = -1;
- GF_ASSERT (child_count > 0);
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_read_subvol_get (inode, this, data,
+ metadata, event_p);
+ }
+ UNLOCK(&inode->lock);
- if (!addr) {
- ctx = GF_CALLOC (1, sizeof (*ctx),
- gf_afr_mt_inode_ctx_t);
- if (!ctx)
- goto out;
- size = sizeof (*ctx->fresh_children);
- ctx->fresh_children = GF_CALLOC (child_count, size,
- gf_afr_mt_int32_t);
- if (!ctx->fresh_children)
- goto out;
- } else {
- ctx = (afr_inode_ctx_t*) (long) addr;
- }
- ret = 0;
-out:
- if (ret && ctx) {
- if (ctx->fresh_children)
- GF_FREE (ctx->fresh_children);
- GF_FREE (ctx);
- ctx = NULL;
- }
- return ctx;
+ return ret;
}
-void
-afr_inode_get_ctx (xlator_t *this, inode_t *inode, afr_inode_params_t *params)
+
+int
+afr_inode_read_subvol_set (inode_t *inode, xlator_t *this, unsigned char *data,
+ unsigned char *metadata, int event)
{
- GF_ASSERT (inode);
- GF_ASSERT (params);
+ int ret = -1;
- int ret = 0;
- afr_inode_ctx_t *ctx = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- uint64_t ctx_addr = 0;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_read_subvol_set (inode, this, data, metadata,
+ event);
+ }
+ UNLOCK(&inode->lock);
- priv = this->private;
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &ctx_addr);
- if (ret < 0)
- goto unlock;
- ctx = afr_inode_ctx_get_from_addr (ctx_addr, priv->child_count);
- if (!ctx)
- goto unlock;
- switch (params->op) {
- case AFR_INODE_GET_READ_CTX:
- fresh_children = params->u.read_ctx.children;
- read_child = (int32_t)(ctx->masks &
- AFR_ICTX_READ_CHILD_MASK);
- params->u.read_ctx.read_child = read_child;
- if (!fresh_children)
- goto unlock;
- for (i = 0; i < priv->child_count; i++)
- fresh_children[i] = ctx->fresh_children[i];
- break;
- case AFR_INODE_GET_OPENDIR_DONE:
- params->u.value = _gf_false;
- if (ctx->masks & AFR_ICTX_OPENDIR_DONE_MASK)
- params->u.value = _gf_true;
- break;
- case AFR_INODE_GET_SPLIT_BRAIN:
- params->u.value = _gf_false;
- if (ctx->masks & AFR_ICTX_SPLIT_BRAIN_MASK)
- params->u.value = _gf_true;
- ;
- break;
- default:
- GF_ASSERT (0);
- break;
- }
- }
-unlock:
- UNLOCK (&inode->lock);
+ return ret;
}
-gf_boolean_t
-afr_is_split_brain (xlator_t *this, inode_t *inode)
+
+int
+afr_inode_read_subvol_reset (inode_t *inode, xlator_t *this)
{
- afr_inode_params_t params = {0};
+ int ret = -1;
+
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_read_subvol_reset (inode, this);
+ }
+ UNLOCK(&inode->lock);
- params.op = AFR_INODE_GET_SPLIT_BRAIN;
- afr_inode_get_ctx (this, inode, &params);
- return params.u.value;
+ return ret;
}
-gf_boolean_t
-afr_is_opendir_done (xlator_t *this, inode_t *inode)
-{
- afr_inode_params_t params = {0};
- params.op = AFR_INODE_GET_OPENDIR_DONE;
- afr_inode_get_ctx (this, inode, &params);
- return params.u.value;
+int
+afr_accused_fill (xlator_t *this, dict_t *xdata, unsigned char *accused,
+ afr_transaction_type type)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int idx = afr_index_for_transaction_type (type);
+ void *pending_raw = NULL;
+ int pending[3];
+ int ret = 0;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ ret = dict_get_ptr (xdata, priv->pending_key[i],
+ &pending_raw);
+ if (ret) /* no pending flags */
+ continue;
+ memcpy (pending, pending_raw, sizeof(pending));
+
+ if (ntoh32 (pending[idx]))
+ accused[i] = 1;
+ }
+
+ return 0;
}
-int32_t
-afr_inode_get_read_ctx (xlator_t *this, inode_t *inode, int32_t *fresh_children)
+int
+afr_accuse_smallfiles (xlator_t *this, struct afr_reply *replies,
+ unsigned char *data_accused)
{
- afr_inode_params_t params = {0};
+ int i = 0;
+ afr_private_t *priv = NULL;
+ uint64_t maxsize = 0;
- params.op = AFR_INODE_GET_READ_CTX;
- params.u.read_ctx.children = fresh_children;
- afr_inode_get_ctx (this, inode, &params);
- return params.u.read_ctx.read_child;
-}
+ priv = this->private;
-void
-afr_inode_ctx_set_read_child (afr_inode_ctx_t *ctx, int32_t read_child)
-{
- uint64_t remaining_mask = 0;
- uint64_t mask = 0;
+ for (i = 0; i < priv->child_count; i++) {
+ if (data_accused[i])
+ continue;
+ if (replies[i].poststat.ia_size > maxsize)
+ maxsize = replies[i].poststat.ia_size;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (data_accused[i])
+ continue;
+ if (replies[i].poststat.ia_size < maxsize)
+ data_accused[i] = 1;
+ }
- GF_ASSERT (read_child >= 0);
- remaining_mask = (~AFR_ICTX_READ_CHILD_MASK & ctx->masks);
- mask = (AFR_ICTX_READ_CHILD_MASK & read_child);
- ctx->masks = remaining_mask | mask;
+ return 0;
}
-void
-afr_inode_ctx_set_read_ctx (afr_inode_ctx_t *ctx, int32_t read_child,
- int32_t *fresh_children, int32_t child_count)
-{
- int i = 0;
- afr_inode_ctx_set_read_child (ctx, read_child);
- for (i = 0; i < child_count; i++) {
- if (fresh_children)
- ctx->fresh_children[i] = fresh_children[i];
- else
- ctx->fresh_children[i] = -1;
- }
-}
+int
+afr_replies_interpret (call_frame_t *frame, xlator_t *this, inode_t *inode)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ struct afr_reply *replies = NULL;
+ int event_generation = 0;
+ int i = 0;
+ unsigned char *data_accused = NULL;
+ unsigned char *metadata_accused = NULL;
+ unsigned char *data_readable = NULL;
+ unsigned char *metadata_readable = NULL;
+ int ret = 0;
+
+ local = frame->local;
+ priv = this->private;
+ replies = local->replies;
+ event_generation = local->event_generation;
+
+ data_accused = alloca0 (priv->child_count);
+ data_readable = alloca0 (priv->child_count);
+ metadata_accused = alloca0 (priv->child_count);
+ metadata_readable = alloca0 (priv->child_count);
+
+ for (i = 0; i < priv->child_count; i++) {
+ data_readable[i] = 1;
+ metadata_readable[i] = 1;
+ }
-void
-afr_inode_ctx_rm_stale_children (afr_inode_ctx_t *ctx, int32_t read_child,
- int32_t *stale_children, int32_t child_count)
-{
- int i = 0;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid) {
+ data_readable[i] = 0;
+ metadata_readable[i] = 0;
+ continue;
+ }
- GF_ASSERT (stale_children);
- afr_inode_ctx_set_read_child (ctx, read_child);
- for (i = 0; i < child_count; i++) {
- if ((ctx->fresh_children[i] == -1) || (stale_children[i] == -1))
- break;
- afr_children_rm_child (ctx->fresh_children,
- stale_children[i], child_count);
- }
-}
+ if (replies[i].op_ret == -1) {
+ data_readable[i] = 0;
+ metadata_readable[i] = 0;
+ continue;
+ }
-void
-afr_inode_ctx_set_opendir_done (afr_inode_ctx_t *ctx)
-{
- uint64_t remaining_mask = 0;
- uint64_t mask = 0;
+ afr_accused_fill (this, replies[i].xdata, data_accused,
+ (inode->ia_type == IA_IFDIR) ?
+ AFR_ENTRY_TRANSACTION : AFR_DATA_TRANSACTION);
- remaining_mask = (~AFR_ICTX_OPENDIR_DONE_MASK & ctx->masks);
- mask = (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_OPENDIR_DONE_MASK);
- ctx->masks = remaining_mask | mask;
-}
+ afr_accused_fill (this, replies[i].xdata,
+ metadata_accused, AFR_METADATA_TRANSACTION);
-void
-afr_inode_ctx_set_splitbrain (afr_inode_ctx_t *ctx, gf_boolean_t set)
-{
- uint64_t remaining_mask = 0;
- uint64_t mask = 0;
+ }
- if (set) {
- remaining_mask = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx->masks);
- mask = (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_SPLIT_BRAIN_MASK);
- ctx->masks = remaining_mask | mask;
- } else {
- ctx->masks = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx->masks);
- }
+ if (inode->ia_type != IA_IFDIR)
+ afr_accuse_smallfiles (this, replies, data_accused);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (data_accused[i]) {
+ data_readable[i] = 0;
+ ret = 1;
+ }
+ if (metadata_accused[i]) {
+ metadata_readable[i] = 0;
+ ret = 1;
+ }
+ }
+
+ afr_inode_read_subvol_set (inode, this, data_readable,
+ metadata_readable, event_generation);
+ return ret;
}
-void
-afr_inode_set_ctx (xlator_t *this, inode_t *inode, afr_inode_params_t *params)
-{
- GF_ASSERT (inode);
- GF_ASSERT (params);
- int ret = 0;
- afr_inode_ctx_t *ctx = NULL;
- afr_private_t *priv = NULL;
- uint64_t ctx_addr = 0;
- gf_boolean_t set = _gf_false;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
- int32_t *stale_children = NULL;
- priv = this->private;
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &ctx_addr);
- if (ret < 0)
- ctx_addr = 0;
- ctx = afr_inode_ctx_get_from_addr (ctx_addr, priv->child_count);
- if (!ctx)
- goto unlock;
- switch (params->op) {
- case AFR_INODE_SET_READ_CTX:
- read_child = params->u.read_ctx.read_child;
- fresh_children = params->u.read_ctx.children;
- afr_inode_ctx_set_read_ctx (ctx, read_child,
- fresh_children,
- priv->child_count);
- break;
- case AFR_INODE_RM_STALE_CHILDREN:
- read_child = params->u.read_ctx.read_child;
- stale_children = params->u.read_ctx.children;
- afr_inode_ctx_rm_stale_children (ctx, read_child,
- stale_children,
- priv->child_count);
- break;
- case AFR_INODE_SET_OPENDIR_DONE:
- afr_inode_ctx_set_opendir_done (ctx);
- break;
- case AFR_INODE_SET_SPLIT_BRAIN:
- set = params->u.value;
- afr_inode_ctx_set_splitbrain (ctx, set);
- break;
- default:
- GF_ASSERT (0);
- break;
- }
- ret = __inode_ctx_put (inode, this, (uint64_t)ctx);
- if (ret) {
- gf_log_callingfn (this->name, GF_LOG_ERROR, "failed to "
- "set the inode ctx (%s)",
- uuid_utoa (inode->gfid));
- }
- }
-unlock:
- UNLOCK (&inode->lock);
+int
+afr_refresh_selfheal_done (int ret, call_frame_t *heal, void *opaque)
+{
+ if (heal)
+ STACK_DESTROY (heal->root);
+ return 0;
}
-void
-afr_set_split_brain (xlator_t *this, inode_t *inode, gf_boolean_t set)
+int
+afr_inode_refresh_err (call_frame_t *frame, xlator_t *this)
{
- afr_inode_params_t params = {0};
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int err = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].valid && !local->replies[i].op_ret) {
+ err = 0;
+ goto ret;
+ }
+ }
- params.op = AFR_INODE_SET_SPLIT_BRAIN;
- params.u.value = set;
- afr_inode_set_ctx (this, inode, &params);
+ err = afr_final_errno (local, priv);
+ret:
+ return -err;
}
-void
-afr_set_opendir_done (xlator_t *this, inode_t *inode)
+
+int
+afr_refresh_selfheal_wrap (void *opaque)
{
- afr_inode_params_t params = {0};
+ call_frame_t *frame = opaque;
+ afr_local_t *local = NULL;
+ xlator_t *this = NULL;
+ int err = 0;
- params.op = AFR_INODE_SET_OPENDIR_DONE;
- afr_inode_set_ctx (this, inode, &params);
+ local = frame->local;
+ this = frame->this;
+
+ afr_selfheal (frame->this, local->refreshinode->gfid);
+
+ afr_selfheal_unlocked_discover (frame, local->refreshinode,
+ local->refreshinode->gfid,
+ local->replies);
+
+ afr_replies_interpret (frame, this, local->refreshinode);
+
+ err = afr_inode_refresh_err (frame, this);
+
+ afr_replies_wipe (local, this->private);
+
+ local->refreshfn (frame, this, err);
+
+ return 0;
}
-void
-afr_inode_set_read_ctx (xlator_t *this, inode_t *inode, int32_t read_child,
- int32_t *fresh_children)
+
+gf_boolean_t
+afr_selfheal_enabled (xlator_t *this)
{
- afr_inode_params_t params = {0};
- afr_private_t *priv = NULL;
+ afr_private_t *priv = NULL;
+ gf_boolean_t data = _gf_false;
- priv = this->private;
- GF_ASSERT (read_child >= 0);
- GF_ASSERT (fresh_children);
- GF_ASSERT (afr_is_child_present (fresh_children, priv->child_count,
- read_child));
+ priv = this->private;
+
+ gf_string2boolean (priv->data_self_heal, &data);
- params.op = AFR_INODE_SET_READ_CTX;
- params.u.read_ctx.read_child = read_child;
- params.u.read_ctx.children = fresh_children;
- afr_inode_set_ctx (this, inode, &params);
+ return data || priv->metadata_self_heal || priv->entry_self_heal;
}
-void
-afr_inode_rm_stale_children (xlator_t *this, inode_t *inode, int32_t read_child,
- int32_t *stale_children)
+
+
+int
+afr_inode_refresh_done (call_frame_t *frame, xlator_t *this)
{
- afr_inode_params_t params = {0};
+ call_frame_t *heal = NULL;
+ afr_local_t *local = NULL;
+ int ret = 0;
+ int err = 0;
+
+ local = frame->local;
+
+ ret = afr_replies_interpret (frame, this, local->refreshinode);
- GF_ASSERT (read_child >= 0);
- GF_ASSERT (stale_children);
+ err = afr_inode_refresh_err (frame, this);
- params.op = AFR_INODE_RM_STALE_CHILDREN;
- params.u.read_ctx.read_child = read_child;
- params.u.read_ctx.children = stale_children;
- afr_inode_set_ctx (this, inode, &params);
+ afr_replies_wipe (local, this->private);
+
+ if (ret && afr_selfheal_enabled (this)) {
+ heal = copy_frame (frame);
+ if (heal)
+ heal->root->pid = -1;
+ ret = synctask_new (this->ctx->env, afr_refresh_selfheal_wrap,
+ afr_refresh_selfheal_done, heal, frame);
+ if (ret)
+ goto refresh_done;
+ } else {
+ refresh_done:
+ local->refreshfn (frame, this, err);
+ }
+
+ return 0;
}
-gf_boolean_t
-afr_is_source_child (int32_t *sources, int32_t child_count, int32_t child)
-{
- gf_boolean_t source_xattrs = _gf_false;
- GF_ASSERT (child < child_count);
+int
+afr_inode_refresh_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *par)
+{
+ afr_local_t *local = NULL;
+ int call_child = (long) cookie;
+ int call_count = 0;
+
+ local = frame->local;
+
+ local->replies[call_child].valid = 1;
+ local->replies[call_child].op_ret = op_ret;
+ local->replies[call_child].op_errno = op_errno;
+ if (op_ret != -1) {
+ local->replies[call_child].poststat = *buf;
+ local->replies[call_child].postparent = *par;
+ local->replies[call_child].xdata = dict_ref (xdata);
+ }
+
+ call_count = afr_frame_return (frame);
- if ((child >= 0) && (child < child_count) &&
- sources[child]) {
- source_xattrs = _gf_true;
- }
- return source_xattrs;
+ if (call_count == 0)
+ afr_inode_refresh_done (frame, this);
+
+ return 0;
}
-gf_boolean_t
-afr_is_child_present (int32_t *success_children, int32_t child_count,
- int32_t child)
+
+int
+afr_inode_refresh_subvol (call_frame_t *frame, xlator_t *this, int i,
+ inode_t *inode, dict_t *xdata)
{
- gf_boolean_t success_child = _gf_false;
- int i = 0;
+ loc_t loc = {0, };
+ afr_private_t *priv = NULL;
- GF_ASSERT (child < child_count);
+ priv = this->private;
- for (i = 0; i < child_count; i++) {
- if (success_children[i] == -1)
- break;
- if (child == success_children[i]) {
- success_child = _gf_true;
- break;
- }
- }
- return success_child;
+ loc.inode = inode;
+ uuid_copy (loc.gfid, inode->gfid);
+
+ STACK_WIND_COOKIE (frame, afr_inode_refresh_subvol_cbk,
+ (void *) (long) i, priv->children[i],
+ priv->children[i]->fops->lookup, &loc, xdata);
+ return 0;
}
-gf_boolean_t
-afr_is_read_child (int32_t *success_children, int32_t *sources,
- int32_t child_count, int32_t child)
+
+int
+afr_inode_refresh_do (call_frame_t *frame, xlator_t *this)
{
- gf_boolean_t success_child = _gf_false;
- gf_boolean_t source = _gf_false;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int i = 0;
+ dict_t *xdata = NULL;
- GF_ASSERT (success_children);
- GF_ASSERT (child_count > 0);
+ priv = this->private;
+ local = frame->local;
- success_child = afr_is_child_present (success_children, child_count,
- child);
- if (!success_child)
- goto out;
- if (NULL == sources) {
- source = _gf_true;
- goto out;
- }
- source = afr_is_source_child (sources, child_count, child);
-out:
- return (success_child && source);
+ afr_replies_wipe (local, priv);
+
+ xdata = dict_new ();
+ if (!xdata) {
+ afr_inode_refresh_done (frame, this);
+ return 0;
+ }
+
+ if (afr_xattr_req_prepare (this, xdata) != 0) {
+ dict_unref (xdata);
+ afr_inode_refresh_done (frame, this);
+ return 0;
+ }
+
+ local->call_count = AFR_COUNT (local->child_up, priv->child_count);
+
+ call_count = local->call_count;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->child_up[i])
+ continue;
+
+ afr_inode_refresh_subvol (frame, this, i, local->refreshinode,
+ xdata);
+
+ if (!--call_count)
+ break;
+ }
+
+ dict_unref (xdata);
+
+ return 0;
}
-/* If sources is NULL the xattrs are assumed to be of source for all
- * success_children.
- */
+
int
-afr_select_read_child_from_policy (int32_t *success_children, int32_t child_count,
- int32_t prev_read_child,
- int32_t config_read_child, int32_t *sources)
+afr_inode_refresh (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ afr_inode_refresh_cbk_t refreshfn)
{
- int32_t read_child = -1;
- int i = 0;
+ afr_local_t *local = NULL;
- GF_ASSERT (success_children);
+ local = frame->local;
- read_child = prev_read_child;
- if (afr_is_read_child (success_children, sources, child_count,
- read_child))
- goto out;
+ local->refreshfn = refreshfn;
- read_child = config_read_child;
- if (afr_is_read_child (success_children, sources, child_count,
- read_child))
- goto out;
+ if (local->refreshinode) {
+ inode_unref (local->refreshinode);
+ local->refreshinode = NULL;
+ }
- for (i = 0; i < child_count; i++) {
- read_child = success_children[i];
- if (read_child < 0)
- break;
- if (afr_is_read_child (success_children, sources, child_count,
- read_child))
- goto out;
- }
- read_child = -1;
+ local->refreshinode = inode_ref (inode);
-out:
- return read_child;
+ afr_inode_refresh_do (frame, this);
+
+ return 0;
}
-/* This function should be used when all the success_children are sources
- */
-void
-afr_set_read_ctx_from_policy (xlator_t *this, inode_t *inode,
- int32_t *fresh_children, int32_t prev_read_child,
- int32_t config_read_child)
-{
- int read_child = -1;
- afr_private_t *priv = NULL;
- priv = this->private;
- read_child = afr_select_read_child_from_policy (fresh_children,
- priv->child_count,
- prev_read_child,
- config_read_child,
- NULL);
- if (read_child >= 0)
- afr_inode_set_read_ctx (this, inode, read_child,
- fresh_children);
-}
-
-/* afr_next_call_child ()
- * This is a common function used by all the read-type fops
- * This function should not be called with the inode's read_children array.
- * The fop's handler should make a copy of the inode's read_children,
- * preferred read_child into the local vars, because while this function is
- * in execution there is a chance for inode's read_ctx to change.
- */
-int32_t
-afr_next_call_child (int32_t *fresh_children, unsigned char *child_up,
- size_t child_count, int32_t *last_index,
- int32_t read_child)
+int
+afr_xattr_req_prepare (xlator_t *this, dict_t *xattr_req)
{
- int next_index = 0;
- int32_t next_call_child = -1;
+ int i = 0;
+ afr_private_t *priv = NULL;
+ int ret = 0;
- GF_ASSERT (last_index);
+ priv = this->private;
- next_index = *last_index;
-retry:
- next_index++;
- if ((next_index >= child_count) ||
- (fresh_children[next_index] == -1))
- goto out;
- if ((fresh_children[next_index] == read_child) ||
- (!child_up[fresh_children[next_index]]))
- goto retry;
- *last_index = next_index;
- next_call_child = fresh_children[next_index];
-out:
- return next_call_child;
+ for (i = 0; i < priv->child_count; i++) {
+ ret = dict_set_uint64 (xattr_req, priv->pending_key[i],
+ AFR_NUM_CHANGE_LOGS * sizeof(int));
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_WARNING,
+ "Unable to set dict value for %s",
+ priv->pending_key[i]);
+ /* 3 = data+metadata+entry */
+ }
+ ret = dict_set_uint64 (xattr_req, AFR_DIRTY,
+ AFR_NUM_CHANGE_LOGS * sizeof(int));
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "failed to set dirty "
+ "query flag");
+ }
+
+ return ret;
}
- /* This function should not be called with the inode's read_children array.
- * The fop's handler should make a copy of the inode's read_children,
- * preferred read_child into the local vars, because while this function is
- * in execution there is a chance for inode's read_ctx to change.
- */
-int32_t
-afr_get_call_child (xlator_t *this, unsigned char *child_up, int32_t read_child,
- int32_t *fresh_children,
- int32_t *call_child, int32_t *last_index)
+int
+afr_lookup_xattr_req_prepare (afr_local_t *local, xlator_t *this,
+ dict_t *xattr_req, loc_t *loc)
{
- int ret = 0;
- afr_private_t *priv = NULL;
- int i = 0;
-
- GF_ASSERT (child_up);
- GF_ASSERT (call_child);
- GF_ASSERT (last_index);
- GF_ASSERT (fresh_children);
- GF_ASSERT (read_child >= 0);
+ int ret = -ENOMEM;
- priv = this->private;
- *call_child = -1;
- *last_index = -1;
+ local->xattr_req = dict_new ();
+ if (!local->xattr_req)
+ goto out;
+ if (xattr_req)
+ dict_copy (xattr_req, local->xattr_req);
- if (child_up[read_child]) {
- *call_child = read_child;
- } else {
- for (i = 0; i < priv->child_count; i++) {
- if (fresh_children[i] == -1)
- break;
- if (child_up[fresh_children[i]]) {
- *call_child = fresh_children[i];
- ret = 0;
- break;
- }
- }
+ ret = afr_xattr_req_prepare (this, local->xattr_req);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: Unable to prepare xattr_req", loc->path);
+ }
- if (*call_child == -1) {
- ret = -ENOTCONN;
- goto out;
- }
+ ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_INODELK_COUNT, 0);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: Unable to set dict value for %s",
+ loc->path, GLUSTERFS_INODELK_COUNT);
+ }
+ ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_ENTRYLK_COUNT, 0);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: Unable to set dict value for %s",
+ loc->path, GLUSTERFS_ENTRYLK_COUNT);
+ }
- *last_index = i;
+ ret = dict_set_uint32 (local->xattr_req, GLUSTERFS_PARENT_ENTRYLK, 0);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: Unable to set dict value for %s",
+ loc->path, GLUSTERFS_PARENT_ENTRYLK);
}
+
+ ret = 0;
out:
- gf_log (this->name, GF_LOG_DEBUG, "Returning %d, call_child: %d, "
- "last_index: %d", ret, *call_child, *last_index);
return ret;
}
-void
-afr_reset_xattr (dict_t **xattr, unsigned int child_count)
+
+int
+afr_hash_child (inode_t *inode, int32_t child_count, int hashmode)
{
- unsigned int i = 0;
+ uuid_t gfid_copy = {0,};
+ pid_t pid;
- if (!xattr)
- goto out;
- for (i = 0; i < child_count; i++) {
- if (xattr[i]) {
- dict_unref (xattr[i]);
- xattr[i] = NULL;
- }
+ if (!hashmode) {
+ return -1;
}
-out:
- return;
-}
-void
-afr_local_sh_cleanup (afr_local_t *local, xlator_t *this)
-{
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
+ if (inode) {
+ uuid_copy (gfid_copy, inode->gfid);
+ }
+ if (hashmode > 1) {
+ /*
+ * Why getpid? Because it's one of the cheapest calls
+ * available - faster than gethostname etc. - and returns a
+ * constant-length value that's sure to be shorter than a UUID.
+ * It's still very unlikely to be the same across clients, so
+ * it still provides good mixing. We're not trying for
+ * perfection here. All we need is a low probability that
+ * multiple clients won't converge on the same subvolume.
+ */
+ pid = getpid();
+ memcpy (gfid_copy, &pid, sizeof(pid));
+ }
- sh = &local->self_heal;
- priv = this->private;
+ return SuperFastHash((char *)gfid_copy,
+ sizeof(gfid_copy)) % child_count;
+}
- if (sh->buf)
- GF_FREE (sh->buf);
- if (sh->parentbufs)
- GF_FREE (sh->parentbufs);
+int
+afr_read_subvol_select_by_policy (inode_t *inode, xlator_t *this,
+ unsigned char *readable)
+{
+ afr_private_t *priv = NULL;
+ int read_subvol = -1;
+ int i = 0;
- if (sh->inode)
- inode_unref (sh->inode);
+ priv = this->private;
- if (sh->xattr) {
- afr_reset_xattr (sh->xattr, priv->child_count);
- GF_FREE (sh->xattr);
- }
+ /* first preference - explicitly specified or local subvolume */
+ if (priv->read_child >= 0 && readable[priv->read_child])
+ return priv->read_child;
- if (sh->child_errno)
- GF_FREE (sh->child_errno);
+ /* second preference - use hashed mode */
+ read_subvol = afr_hash_child (inode, priv->child_count,
+ priv->hash_mode);
+ if (read_subvol >= 0 && readable[read_subvol])
+ return read_subvol;
- if (sh->pending_matrix) {
- for (i = 0; i < priv->child_count; i++) {
- GF_FREE (sh->pending_matrix[i]);
- }
- GF_FREE (sh->pending_matrix);
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (readable[i])
+ return i;
+ }
- if (sh->delta_matrix) {
- for (i = 0; i < priv->child_count; i++) {
- GF_FREE (sh->delta_matrix[i]);
- }
- GF_FREE (sh->delta_matrix);
- }
+ /* no readable subvolumes, either split brain or all subvols down */
- if (sh->sources)
- GF_FREE (sh->sources);
+ return -1;
+}
- if (sh->success)
- GF_FREE (sh->success);
- if (sh->locked_nodes)
- GF_FREE (sh->locked_nodes);
+int
+afr_inode_read_subvol_type_get (inode_t *inode, xlator_t *this,
+ unsigned char *readable, int *event_p,
+ int type)
+{
+ int ret = -1;
- if (sh->healing_fd) {
- fd_unref (sh->healing_fd);
- sh->healing_fd = NULL;
- }
+ if (type == AFR_METADATA_TRANSACTION)
+ ret = afr_inode_read_subvol_get (inode, this, 0, readable,
+ event_p);
+ else
+ ret = afr_inode_read_subvol_get (inode, this, readable, 0,
+ event_p);
+ return ret;
+}
- if (sh->linkname)
- GF_FREE ((char *)sh->linkname);
- if (sh->success_children)
- GF_FREE (sh->success_children);
+int
+afr_read_subvol_get (inode_t *inode, xlator_t *this, int *subvol_p,
+ int *event_p, afr_transaction_type type)
+{
+ afr_private_t *priv = NULL;
+ unsigned char *data_readable = NULL;
+ unsigned char *metadata_readable = NULL;
+ unsigned char *readable = NULL;
+ unsigned char *intersection = NULL;
+ int subvol = -1;
+ int event = 0;
+
+ priv = this->private;
- if (sh->fresh_children)
- GF_FREE (sh->fresh_children);
+ readable = alloca0 (priv->child_count);
+ data_readable = alloca0 (priv->child_count);
+ metadata_readable = alloca0 (priv->child_count);
+ intersection = alloca0 (priv->child_count);
- if (sh->fresh_parent_dirs)
- GF_FREE (sh->fresh_parent_dirs);
+ afr_inode_read_subvol_type_get (inode, this, readable, &event, type);
- loc_wipe (&sh->parent_loc);
- loc_wipe (&sh->lookup_loc);
+ afr_inode_read_subvol_get (inode, this, data_readable, metadata_readable,
+ &event);
- if (sh->checksum)
- GF_FREE (sh->checksum);
+ AFR_INTERSECT (intersection, data_readable, metadata_readable,
+ priv->child_count);
- if (sh->write_needed)
- GF_FREE (sh->write_needed);
- if (sh->healing_fd)
- fd_unref (sh->healing_fd);
+ if (AFR_COUNT (intersection, priv->child_count) > 0)
+ subvol = afr_read_subvol_select_by_policy (inode, this,
+ intersection);
+ else
+ subvol = afr_read_subvol_select_by_policy (inode, this,
+ readable);
+ if (subvol_p)
+ *subvol_p = subvol;
+ if (event_p)
+ *event_p = event;
+ return subvol;
}
void
afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this)
{
- int i = 0;
- afr_private_t * priv = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
priv = this->private;
- for (i = 0; i < priv->child_count; i++) {
- if (local->pending && local->pending[i])
- GF_FREE (local->pending[i]);
- }
-
- GF_FREE (local->pending);
-
- if (local->internal_lock.locked_nodes)
- GF_FREE (local->internal_lock.locked_nodes);
+ afr_matrix_cleanup (local->pending, priv->child_count);
- if (local->internal_lock.inode_locked_nodes)
- GF_FREE (local->internal_lock.inode_locked_nodes);
+ GF_FREE (local->internal_lock.locked_nodes);
- if (local->internal_lock.entry_locked_nodes)
- GF_FREE (local->internal_lock.entry_locked_nodes);
+ for (i = 0; local->internal_lock.inodelk[i].domain; i++) {
+ GF_FREE (local->internal_lock.inodelk[i].locked_nodes);
+ }
- if (local->internal_lock.lower_locked_nodes)
- GF_FREE (local->internal_lock.lower_locked_nodes);
+ GF_FREE (local->internal_lock.lower_locked_nodes);
+ afr_entry_lockee_cleanup (&local->internal_lock);
GF_FREE (local->transaction.pre_op);
- GF_FREE (local->transaction.child_errno);
- GF_FREE (local->child_errno);
GF_FREE (local->transaction.eager_lock);
GF_FREE (local->transaction.basename);
@@ -835,10 +867,39 @@ afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this)
loc_wipe (&local->transaction.parent_loc);
loc_wipe (&local->transaction.new_parent_loc);
+
}
void
+afr_replies_wipe (afr_local_t *local, afr_private_t *priv)
+{
+ int i;
+
+ if (!local->replies)
+ return;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].xdata) {
+ dict_unref (local->replies[i].xdata);
+ local->replies[i].xdata = NULL;
+ }
+ }
+
+ memset (local->replies, 0, sizeof(*local->replies) * priv->child_count);
+}
+
+void
+afr_remove_eager_lock_stub (afr_local_t *local)
+{
+ LOCK (&local->fd->lock);
+ {
+ list_del_init (&local->transaction.eager_locked);
+ }
+ UNLOCK (&local->fd->lock);
+}
+
+void
afr_local_cleanup (afr_local_t *local, xlator_t *this)
{
afr_private_t * priv = NULL;
@@ -846,7 +907,11 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this)
if (!local)
return;
- afr_local_sh_cleanup (local, this);
+ syncbarrier_destroy (&local->barrier);
+
+ if (local->transaction.eager_lock_on &&
+ !list_empty (&local->transaction.eager_locked))
+ afr_remove_eager_lock_stub (local);
afr_local_transaction_cleanup (local, this);
@@ -861,52 +926,36 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this)
if (local->xattr_req)
dict_unref (local->xattr_req);
- if (local->child_up)
- GF_FREE (local->child_up);
+ if (local->dict)
+ dict_unref (local->dict);
- if (local->fresh_children)
- GF_FREE (local->fresh_children);
+ afr_replies_wipe (local, priv);
+ GF_FREE(local->replies);
- if (local->fd_open_on)
- GF_FREE (local->fd_open_on);
+ GF_FREE (local->child_up);
- { /* lookup */
- if (local->cont.lookup.xattrs) {
- afr_reset_xattr (local->cont.lookup.xattrs,
- priv->child_count);
- GF_FREE (local->cont.lookup.xattrs);
- local->cont.lookup.xattrs = NULL;
- }
+ GF_FREE (local->read_attempted);
- if (local->cont.lookup.xattr) {
- dict_unref (local->cont.lookup.xattr);
- }
+ GF_FREE (local->readable);
- if (local->cont.lookup.inode) {
- inode_unref (local->cont.lookup.inode);
- }
+ if (local->inode)
+ inode_unref (local->inode);
- if (local->cont.lookup.postparents)
- GF_FREE (local->cont.lookup.postparents);
+ if (local->parent)
+ inode_unref (local->parent);
- if (local->cont.lookup.bufs)
- GF_FREE (local->cont.lookup.bufs);
+ if (local->parent2)
+ inode_unref (local->parent2);
- if (local->cont.lookup.success_children)
- GF_FREE (local->cont.lookup.success_children);
-
- if (local->cont.lookup.sources)
- GF_FREE (local->cont.lookup.sources);
- }
+ if (local->refreshinode)
+ inode_unref (local->refreshinode);
{ /* getxattr */
- if (local->cont.getxattr.name)
- GF_FREE (local->cont.getxattr.name);
+ GF_FREE (local->cont.getxattr.name);
}
{ /* lk */
- if (local->cont.lk.locked_nodes)
- GF_FREE (local->cont.lk.locked_nodes);
+ GF_FREE (local->cont.lk.locked_nodes);
}
{ /* create */
@@ -940,18 +989,40 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this)
dict_unref (local->cont.setxattr.dict);
}
+ { /* fsetxattr */
+ if (local->cont.fsetxattr.dict)
+ dict_unref (local->cont.fsetxattr.dict);
+ }
+
{ /* removexattr */
GF_FREE (local->cont.removexattr.name);
}
-
+ { /* xattrop */
+ if (local->cont.xattrop.xattr)
+ dict_unref (local->cont.xattrop.xattr);
+ }
+ { /* fxattrop */
+ if (local->cont.fxattrop.xattr)
+ dict_unref (local->cont.fxattrop.xattr);
+ }
{ /* symlink */
GF_FREE (local->cont.symlink.linkpath);
}
{ /* opendir */
- if (local->cont.opendir.checksum)
- GF_FREE (local->cont.opendir.checksum);
+ GF_FREE (local->cont.opendir.checksum);
}
+
+ { /* readdirp */
+ if (local->cont.readdir.dict)
+ dict_unref (local->cont.readdir.dict);
+ }
+
+ if (local->xdata_req)
+ dict_unref (local->xdata_req);
+
+ if (local->xdata_rsp)
+ dict_unref (local->xdata_rsp);
}
@@ -972,1377 +1043,1061 @@ afr_frame_return (call_frame_t *frame)
return call_count;
}
-int
-afr_set_elem_count_get (unsigned char *elems, int child_count)
-{
- int i = 0;
- int ret = 0;
- for (i = 0; i < child_count; i++)
- if (elems[i])
- ret++;
- return ret;
-}
+gf_boolean_t
+afr_is_entry_possibly_under_txn (afr_local_t *local, xlator_t *this)
+{
+ int i = 0;
+ int tmp = 0;
+ afr_private_t *priv = NULL;
-/**
- * up_children_count - return the number of children that are up
- */
+ priv = this->private;
-unsigned int
-afr_up_children_count (unsigned char *child_up, unsigned int child_count)
-{
- return afr_set_elem_count_get (child_up, child_count);
-}
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].xdata)
+ continue;
+ if (dict_get_int32 (local->replies[i].xdata,
+ GLUSTERFS_PARENT_ENTRYLK,
+ &tmp) == 0)
+ if (tmp)
+ return _gf_true;
+ }
-unsigned int
-afr_locked_children_count (unsigned char *children, unsigned int child_count)
-{
- return afr_set_elem_count_get (children, child_count);
+ return _gf_false;
}
-unsigned int
-afr_pre_op_done_children_count (unsigned char *pre_op,
- unsigned int child_count)
-{
- return afr_set_elem_count_get (pre_op, child_count);
-}
-gf_boolean_t
-afr_is_fresh_lookup (loc_t *loc, xlator_t *this)
-{
- uint64_t ctx = 0;
- int32_t ret = 0;
+/*
+ * Quota size xattrs are not maintained by afr. There is a
+ * possibility that they differ even when both the directory changelog xattrs
+ * suggest everything is fine. So if there is at least one 'source' check among
+ * the sources which has the maximum quota size. Otherwise check among all the
+ * available ones for maximum quota size. This way if there is a source and
+ * stale copies it always votes for the 'source'.
+ * */
- GF_ASSERT (loc);
- GF_ASSERT (this);
- GF_ASSERT (loc->inode);
+static void
+afr_handle_quota_size (call_frame_t *frame, xlator_t *this)
+{
+ unsigned char *readable = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ struct afr_reply *replies = NULL;
+ int i = 0;
+ uint64_t size = 0;
+ uint64_t max_size = 0;
+ int readable_cnt = 0;
+
+ local = frame->local;
+ priv = this->private;
+ replies = local->replies;
+
+ readable = alloca0 (priv->child_count);
+
+ afr_inode_read_subvol_get (local->inode, this, readable, 0, 0);
+
+ readable_cnt = AFR_COUNT (readable, priv->child_count);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret == -1)
+ continue;
+ if (readable_cnt && !readable[i])
+ continue;
+ if (!replies[i].xdata)
+ continue;
+ if (dict_get_uint64 (replies[i].xdata, QUOTA_SIZE_KEY, &size))
+ continue;
+ if (size > max_size)
+ max_size = size;
+ }
- ret = inode_ctx_get (loc->inode, this, &ctx);
- if (0 == ret)
- return _gf_false;
- return _gf_true;
+ if (!max_size)
+ return;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret == -1)
+ continue;
+ if (readable_cnt && !readable[i])
+ continue;
+ if (!replies[i].xdata)
+ continue;
+ if (dict_set_uint64 (replies[i].xdata, QUOTA_SIZE_KEY, max_size))
+ continue;
+ }
}
-void
-afr_update_loc_gfids (loc_t *loc, struct iatt *buf, struct iatt *postparent)
+
+static void
+afr_lookup_done (call_frame_t *frame, xlator_t *this)
{
- GF_ASSERT (loc);
- GF_ASSERT (buf);
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = -1;
+ int op_errno = 0;
+ int read_subvol = 0;
+ unsigned char *readable = NULL;
+ int event = 0;
+ struct afr_reply *replies = NULL;
+ uuid_t read_gfid = {0, };
+ gf_boolean_t locked_entry = _gf_false;
+ gf_boolean_t can_interpret = _gf_true;
- uuid_copy (loc->gfid, buf->ia_gfid);
- if (postparent)
- uuid_copy (loc->pargfid, postparent->ia_gfid);
-}
+ priv = this->private;
+ local = frame->local;
+ replies = local->replies;
-int
-afr_lookup_build_response_params (afr_local_t *local, xlator_t *this)
-{
- int32_t read_child = -1;
- struct iatt *buf = NULL;
- struct iatt *postparent = NULL;
- dict_t **xattr = NULL;
- int ret = 0;
+ locked_entry = afr_is_entry_possibly_under_txn (local, this);
- GF_ASSERT (local);
+ readable = alloca0 (priv->child_count);
- buf = &local->cont.lookup.buf;
- postparent = &local->cont.lookup.postparent;
- xattr = &local->cont.lookup.xattr;
+ afr_inode_read_subvol_get (local->loc.parent, this, readable,
+ NULL, &event);
- read_child = afr_inode_get_read_ctx (this, local->cont.lookup.inode,
- NULL);
- if (read_child < 0) {
- ret = -1;
- goto out;
- }
- gf_log (this->name, GF_LOG_DEBUG, "Building lookup response from %d",
- read_child);
- if (!*xattr)
- *xattr = dict_ref (local->cont.lookup.xattrs[read_child]);
- *buf = local->cont.lookup.bufs[read_child];
- *postparent = local->cont.lookup.postparents[read_child];
+ /* First, check if we have an ESTALE from somewhere,
+ If so, propagate that so that a revalidate can be
+ issued
+ */
+ op_errno = afr_final_errno (frame->local, this->private);
+ local->op_errno = op_errno;
+ if (op_errno == ESTALE) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ goto unwind;
+ }
- if (IA_INVAL == local->cont.lookup.inode->ia_type) {
- /* fix for RT #602 */
- local->cont.lookup.inode->ia_type = buf->ia_type;
- }
-out:
- return ret;
-}
+ read_subvol = -1;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if (locked_entry && replies[i].op_ret == -1 &&
+ replies[i].op_errno == ENOENT) {
+ /* Second, check entry is still
+ "underway" in creation */
+ local->op_ret = -1;
+ local->op_errno = ENOENT;
+ read_subvol = i;
+ goto unwind;
+ }
-static void
-afr_lookup_update_lk_counts (afr_local_t *local, xlator_t *this,
- int child_index, dict_t *xattr)
-{
- uint32_t inodelk_count = 0;
- uint32_t entrylk_count = 0;
- int ret = -1;
+ if (replies[i].op_ret == -1)
+ continue;
- GF_ASSERT (local);
- GF_ASSERT (this);
- GF_ASSERT (xattr);
- GF_ASSERT (child_index >= 0);
+ if (read_subvol == -1 || !readable[read_subvol]) {
+ read_subvol = i;
+ uuid_copy (read_gfid, replies[i].poststat.ia_gfid);
+ local->op_ret = 0;
+ }
+ }
- ret = dict_get_uint32 (xattr, GLUSTERFS_INODELK_COUNT,
- &inodelk_count);
- if (ret == 0)
- local->inodelk_count += inodelk_count;
+ if (read_subvol == -1)
+ goto unwind;
+ /* We now have a read_subvol, which is readable[] (if there
+ were any). Next we look for GFID mismatches. We don't
+ consider a GFID mismatch as an error if read_subvol is
+ readable[] but the mismatching GFID subvol is not.
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret == -1) {
+ if (priv->child_up[i])
+ can_interpret = _gf_false;
+ continue;
+ }
- ret = dict_get_uint32 (xattr, GLUSTERFS_ENTRYLK_COUNT,
- &entrylk_count);
- if (ret == 0)
- local->entrylk_count += entrylk_count;
-}
+ if (!uuid_compare (replies[i].poststat.ia_gfid,
+ read_gfid))
+ continue;
-static void
-afr_lookup_set_self_heal_params_by_xattr (afr_local_t *local, xlator_t *this,
- dict_t *xattr)
-{
- GF_ASSERT (local);
- GF_ASSERT (this);
- GF_ASSERT (xattr);
+ can_interpret = _gf_false;
- if (afr_sh_has_metadata_pending (xattr, this)) {
- local->self_heal.do_metadata_self_heal = _gf_true;
- gf_log(this->name, GF_LOG_DEBUG,
- "metadata self-heal is pending for %s.",
- local->loc.path);
- }
+ if (locked_entry)
+ continue;
- if (afr_sh_has_entry_pending (xattr, this)) {
- local->self_heal.do_entry_self_heal = _gf_true;
- gf_log(this->name, GF_LOG_DEBUG,
- "entry self-heal is pending for %s.", local->loc.path);
- }
+ /* Now GFIDs mismatch. It's OK as long as this subvol
+ is not readable[] but read_subvol is */
+ if (readable[read_subvol] && !readable[i])
+ continue;
- if (afr_sh_has_data_pending (xattr, this)) {
- local->self_heal.do_data_self_heal = _gf_true;
- gf_log(this->name, GF_LOG_DEBUG,
- "data self-heal is pending for %s.", local->loc.path);
- }
-}
+ /* LOG ERROR */
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto unwind;
+ }
-static void
-afr_detect_self_heal_by_iatt (afr_local_t *local, xlator_t *this,
- struct iatt *buf, struct iatt *lookup_buf)
-{
- if (PERMISSION_DIFFERS (buf, lookup_buf)) {
- /* mismatching permissions */
- gf_log (this->name, GF_LOG_INFO,
- "permissions differ for %s ", local->loc.path);
- local->self_heal.do_metadata_self_heal = _gf_true;
- }
+ /* Forth, for the finalized GFID, pick the best subvolume
+ to return stats from.
+ */
+ if (can_interpret) {
+ /* It is safe to call afr_replies_interpret() because we have
+ a response from all the UP subvolumes and all of them resolved
+ to the same GFID
+ */
+ if (afr_replies_interpret (frame, this, local->inode)) {
+ read_subvol = afr_data_subvol_get (local->inode, this,
+ 0, 0);
+ afr_inode_read_subvol_reset (local->inode, this);
+ goto cant_interpret;
+ } else {
+ read_subvol = afr_data_subvol_get (local->inode, this,
+ 0, 0);
+ }
+ } else {
+ cant_interpret:
+ if (read_subvol == -1)
+ dict_del (replies[0].xdata, GF_CONTENT_KEY);
+ else
+ dict_del (replies[read_subvol].xdata, GF_CONTENT_KEY);
+ }
- if (OWNERSHIP_DIFFERS (buf, lookup_buf)) {
- /* mismatching permissions */
- local->self_heal.do_metadata_self_heal = _gf_true;
- gf_log (this->name, GF_LOG_INFO,
- "ownership differs for %s ", local->loc.path);
- }
+ afr_handle_quota_size (frame, this);
- if (SIZE_DIFFERS (buf, lookup_buf)
- && IA_ISREG (buf->ia_type)) {
- gf_log (this->name, GF_LOG_INFO,
- "size differs for %s ", local->loc.path);
- local->self_heal.do_data_self_heal = _gf_true;
- }
+unwind:
+ if (read_subvol == -1)
+ read_subvol = 0;
- if (uuid_compare (buf->ia_gfid, lookup_buf->ia_gfid)) {
- /* mismatching gfid */
- gf_log (this->name, GF_LOG_WARNING,
- "%s: gfid different on subvolume", local->loc.path);
- }
+ AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
+ local->inode, &local->replies[read_subvol].poststat,
+ local->replies[read_subvol].xdata,
+ &local->replies[read_subvol].postparent);
}
-static void
-afr_detect_self_heal_by_lookup_status (afr_local_t *local, xlator_t *this,
- gf_boolean_t split_brain)
-{
- GF_ASSERT (local);
- GF_ASSERT (this);
-
- if ((local->success_count > 0) && (local->enoent_count > 0)) {
- local->self_heal.do_metadata_self_heal = _gf_true;
- local->self_heal.do_data_self_heal = _gf_true;
- local->self_heal.do_entry_self_heal = _gf_true;
- local->self_heal.do_gfid_self_heal = _gf_true;
- local->self_heal.do_missing_entry_self_heal = _gf_true;
- gf_log(this->name, GF_LOG_INFO,
- "entries are missing in lookup of %s.",
- local->loc.path);
- //If all self-heals are needed no need to check for other rules
- goto out;
- }
+/*
+ * During a lookup, some errors are more "important" than
+ * others in that they must be given higher priority while
+ * returning to the user.
+ *
+ * The hierarchy is ESTALE > ENOENT > others
+ */
- if ((local->success_count > 0) && split_brain &&
- IA_ISREG (local->cont.lookup.inode->ia_type)) {
- local->self_heal.do_data_self_heal = _gf_true;
- local->self_heal.do_gfid_self_heal = _gf_true;
- local->self_heal.do_missing_entry_self_heal = _gf_true;
- gf_log (this->name, GF_LOG_WARNING,
- "split brain detected during lookup of %s.",
- local->loc.path);
- }
+int
+afr_higher_errno (int32_t old_errno, int32_t new_errno)
+{
+ if (old_errno == ENODATA || new_errno == ENODATA)
+ return ENODATA;
+ if (old_errno == ESTALE || new_errno == ESTALE)
+ return ESTALE;
+ if (old_errno == ENOENT || new_errno == ENOENT)
+ return ENOENT;
-out:
- return;
+ return new_errno;
}
-gf_boolean_t
-afr_can_self_heal_proceed (afr_self_heal_t *sh, afr_private_t *priv)
-{
- GF_ASSERT (sh);
- GF_ASSERT (priv);
- return (sh->do_gfid_self_heal
- || sh->do_missing_entry_self_heal
- || (afr_data_self_heal_enabled (priv->data_self_heal) &&
- sh->do_data_self_heal)
- || (priv->metadata_self_heal && sh->do_metadata_self_heal)
- || (priv->entry_self_heal && sh->do_entry_self_heal));
+int
+afr_final_errno (afr_local_t *local, afr_private_t *priv)
+{
+ int i = 0;
+ int op_errno = 0;
+ int tmp_errno = 0;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret == 0)
+ continue;
+ tmp_errno = local->replies[i].op_errno;
+ op_errno = afr_higher_errno (op_errno, tmp_errno);
+ }
+
+ return op_errno;
}
-afr_transaction_type
-afr_transaction_type_get (ia_type_t ia_type)
+static int
+get_pathinfo_host (char *pathinfo, char *hostname, size_t size)
{
- afr_transaction_type type = AFR_METADATA_TRANSACTION;
+ char *start = NULL;
+ char *end = NULL;
+ int ret = -1;
+ int i = 0;
- GF_ASSERT (ia_type != IA_INVAL);
+ if (!pathinfo)
+ goto out;
- if (IA_ISDIR (ia_type)) {
- type = AFR_ENTRY_TRANSACTION;
- } else if (IA_ISREG (ia_type)) {
- type = AFR_DATA_TRANSACTION;
- }
- return type;
+ start = strchr (pathinfo, ':');
+ if (!start)
+ goto out;
+ end = strrchr (pathinfo, ':');
+ if (start == end)
+ goto out;
+
+ memset (hostname, 0, size);
+ i = 0;
+ while (++start != end)
+ hostname[i++] = *start;
+ ret = 0;
+out:
+ return ret;
}
int
-afr_lookup_select_read_child (afr_local_t *local, xlator_t *this,
- int32_t *read_child)
+afr_local_pathinfo (char *pathinfo, gf_boolean_t *local)
{
- ia_type_t ia_type = IA_INVAL;
- int32_t source = -1;
- int ret = -1;
- dict_t **xattrs = NULL;
- int32_t *success_children = NULL;
- afr_transaction_type type = AFR_METADATA_TRANSACTION;
+ int ret = 0;
+ char pathinfohost[1024] = {0};
+ char localhost[1024] = {0};
+ xlator_t *this = THIS;
- GF_ASSERT (local);
- GF_ASSERT (this);
- GF_ASSERT (local->success_count > 0);
+ *local = _gf_false;
+ ret = get_pathinfo_host (pathinfo, pathinfohost, sizeof (pathinfohost));
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Invalid pathinfo: %s",
+ pathinfo);
+ goto out;
+ }
- success_children = local->cont.lookup.success_children;
- /*We can take the success_children[0] only because we already
- *handle the conflicting children other wise, we could select the
- *read_child based on wrong file type
- */
- ia_type = local->cont.lookup.bufs[success_children[0]].ia_type;
- type = afr_transaction_type_get (ia_type);
- xattrs = local->cont.lookup.xattrs;
- source = afr_lookup_select_read_child_by_txn_type (this, local, xattrs,
- type);
- if (source < 0) {
- gf_log (this->name, GF_LOG_DEBUG, "failed to select source "
- "for %s", local->loc.path);
+ ret = gethostname (localhost, sizeof (localhost));
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "gethostname() failed, "
+ "reason: %s", strerror (errno));
goto out;
}
- gf_log (this->name, GF_LOG_DEBUG, "Source selected as %d for %s",
- source, local->loc.path);
- *read_child = source;
- ret = 0;
+ if (!strcmp (localhost, pathinfohost))
+ *local = _gf_true;
out:
return ret;
}
-static inline gf_boolean_t
-afr_is_transaction_running (afr_local_t *local)
+static int32_t
+afr_local_discovery_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
- GF_ASSERT (local->fop == GF_FOP_LOOKUP);
- return ((local->inodelk_count > 0) || (local->entrylk_count > 0));
-}
+ int ret = 0;
+ char *pathinfo = NULL;
+ gf_boolean_t is_local = _gf_false;
+ afr_private_t *priv = NULL;
+ int32_t child_index = -1;
-void
-afr_launch_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode,
- gf_boolean_t background, ia_type_t ia_type, char *reason,
- void (*gfid_sh_success_cbk) (call_frame_t *sh_frame,
- xlator_t *this),
- int (*unwind) (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno))
-{
- afr_local_t *local = NULL;
- char sh_type_str[256] = {0,};
- char *bg = "";
-
- GF_ASSERT (frame);
- GF_ASSERT (this);
- GF_ASSERT (inode);
- GF_ASSERT (ia_type != IA_INVAL);
+ if (op_ret != 0) {
+ goto out;
+ }
- local = frame->local;
- local->self_heal.background = background;
- local->self_heal.type = ia_type;
- local->self_heal.unwind = unwind;
- local->self_heal.gfid_sh_success_cbk = gfid_sh_success_cbk;
+ priv = this->private;
+ child_index = (int32_t)(long)cookie;
- afr_self_heal_type_str_get (&local->self_heal,
- sh_type_str,
- sizeof (sh_type_str));
+ ret = dict_get_str (dict, GF_XATTR_PATHINFO_KEY, &pathinfo);
+ if (ret != 0) {
+ goto out;
+ }
- if (background)
- bg = "background";
- gf_log (this->name, GF_LOG_INFO,
- "%s %s self-heal triggered. path: %s, reason: %s", bg,
- sh_type_str, local->loc.path, reason);
+ ret = afr_local_pathinfo (pathinfo, &is_local);
+ if (ret) {
+ goto out;
+ }
- afr_self_heal (frame, this, inode);
+ /*
+ * Note that one local subvolume will override another here. The only
+ * way to avoid that would be to retain extra information about whether
+ * the previous read_child is local, and it's just not worth it. Even
+ * the slowest local subvolume is far preferable to a remote one.
+ */
+ if (is_local) {
+ gf_log (this->name, GF_LOG_INFO,
+ "selecting local read_child %s",
+ priv->children[child_index]->name);
+ priv->read_child = child_index;
+ }
+out:
+ STACK_DESTROY(frame->root);
+ return 0;
}
-unsigned int
-afr_gfid_missing_count (const char *xlator_name, int32_t *success_children,
- struct iatt *bufs, unsigned int child_count,
- const char *path)
+static void
+afr_attempt_local_discovery (xlator_t *this, int32_t child_index)
{
- unsigned int gfid_miss_count = 0;
- int i = 0;
- struct iatt *child1 = NULL;
+ call_frame_t *newframe = NULL;
+ loc_t tmploc = {0,};
+ afr_private_t *priv = this->private;
- for (i = 0; i < child_count; i++) {
- if (success_children[i] == -1)
- break;
- child1 = &bufs[success_children[i]];
- if (uuid_is_null (child1->ia_gfid)) {
- gf_log (xlator_name, GF_LOG_DEBUG, "%s: gfid is null"
- " on subvolume %d", path, success_children[i]);
- gfid_miss_count++;
- }
+ newframe = create_frame(this,this->ctx->pool);
+ if (!newframe) {
+ return;
}
- return gfid_miss_count;
+ tmploc.gfid[sizeof(tmploc.gfid)-1] = 1;
+ STACK_WIND_COOKIE (newframe, afr_local_discovery_cbk,
+ (void *)(long)child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->getxattr,
+ &tmploc, GF_XATTR_PATHINFO_KEY, NULL);
}
-static int
-afr_lookup_gfid_missing_count (afr_local_t *local, xlator_t *this)
+
+int
+afr_lookup_selfheal_wrap (void *opaque)
{
- int32_t *success_children = NULL;
- afr_private_t *priv = NULL;
- struct iatt *bufs = NULL;
- int miss_count = 0;
+ call_frame_t *frame = opaque;
+ afr_local_t *local = NULL;
+ xlator_t *this = NULL;
+ inode_t *inode = NULL;
- priv = this->private;
- bufs = local->cont.lookup.bufs;
- success_children = local->cont.lookup.success_children;
+ local = frame->local;
+ this = frame->this;
- miss_count = afr_gfid_missing_count (this->name, success_children,
- bufs, priv->child_count,
- local->loc.path);
- return miss_count;
-}
+ afr_selfheal_name (frame->this, local->loc.pargfid, local->loc.name);
-gf_boolean_t
-afr_conflicting_iattrs (struct iatt *bufs, int32_t *success_children,
- unsigned int child_count, const char *path,
- const char *xlator_name)
-{
- gf_boolean_t conflicting = _gf_false;
- int i = 0;
- struct iatt *child1 = NULL;
- struct iatt *child2 = NULL;
- uuid_t *gfid = NULL;
-
- for (i = 0; i < child_count; i++) {
- if (success_children[i] == -1)
- break;
- child1 = &bufs[success_children[i]];
- if ((!gfid) && (!uuid_is_null (child1->ia_gfid)))
- gfid = &child1->ia_gfid;
-
- if (i == 0)
- continue;
-
- child2 = &bufs[success_children[i-1]];
- if (FILETYPE_DIFFERS (child1, child2)) {
- gf_log (xlator_name, GF_LOG_WARNING, "%s: filetype "
- "differs on subvolumes (%d, %d)", path,
- success_children[i-1], success_children[i]);
- conflicting = _gf_true;
- goto out;
- }
- if (!gfid || uuid_is_null (child1->ia_gfid))
- continue;
- if (uuid_compare (*gfid, child1->ia_gfid)) {
- gf_log (xlator_name, GF_LOG_WARNING, "%s: gfid differs"
- " on subvolume %d", path, success_children[i]);
- conflicting = _gf_true;
- goto out;
- }
- }
-out:
- return conflicting;
-}
+ afr_replies_wipe (local, this->private);
-/* afr_update_gfid_from_iatts: This function should be called only if the
- * iatts are not conflicting.
- */
-void
-afr_update_gfid_from_iatts (uuid_t uuid, struct iatt *bufs,
- int32_t *success_children, unsigned int child_count)
-{
- uuid_t *gfid = NULL;
- int i = 0;
- int child = 0;
+ inode = afr_selfheal_unlocked_lookup_on (frame, local->loc.parent,
+ local->loc.name, local->replies,
+ local->child_up);
+ if (inode)
+ inode_unref (inode);
+ afr_lookup_done (frame, this);
- for (i = 0; i < child_count; i++) {
- child = success_children[i];
- if (child == -1)
- break;
- if ((!gfid) && (!uuid_is_null (bufs[child].ia_gfid))) {
- gfid = &bufs[child].ia_gfid;
- } else if (gfid && (!uuid_is_null (bufs[child].ia_gfid))) {
- if (uuid_compare (*gfid, bufs[child].ia_gfid)) {
- GF_ASSERT (0);
- goto out;
- }
- }
- }
- if (gfid && (!uuid_is_null (*gfid)))
- uuid_copy (uuid, *gfid);
-out:
- return;
+ return 0;
}
-static gf_boolean_t
-afr_lookup_conflicting_entries (afr_local_t *local, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- gf_boolean_t conflict = _gf_false;
- priv = this->private;
- conflict = afr_conflicting_iattrs (local->cont.lookup.bufs,
- local->cont.lookup.success_children,
- priv->child_count, local->loc.path,
- this->name);
- return conflict;
-}
+int
+afr_lookup_entry_heal (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ call_frame_t *heal = NULL;
+ int i = 0, first = -1;
+ gf_boolean_t need_heal = _gf_false;
+ struct afr_reply *replies = NULL;
+ int ret = 0;
+
+ local = frame->local;
+ replies = local->replies;
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if (first == -1) {
+ first = i;
+ continue;
+ }
-gf_boolean_t
-afr_open_only_data_self_heal (char *data_self_heal)
-{
- return !strcmp (data_self_heal, "open");
-}
+ if (replies[i].op_ret != replies[first].op_ret) {
+ need_heal = _gf_true;
+ break;
+ }
-gf_boolean_t
-afr_data_self_heal_enabled (char *data_self_heal)
-{
- gf_boolean_t enabled = _gf_false;
+ if (uuid_compare (replies[i].poststat.ia_gfid,
+ replies[first].poststat.ia_gfid)) {
+ need_heal = _gf_true;
+ break;
+ }
+ }
- if (gf_string2boolean (data_self_heal, &enabled) == -1) {
- enabled = !strcmp (data_self_heal, "open");
- GF_ASSERT (enabled);
- }
+ if (need_heal) {
+ heal = copy_frame (frame);
+ if (heal)
+ heal->root->pid = -1;
+ ret = synctask_new (this->ctx->env, afr_lookup_selfheal_wrap,
+ afr_refresh_selfheal_done, heal, frame);
+ if (ret)
+ goto lookup_done;
+ } else {
+ lookup_done:
+ afr_lookup_done (frame, this);
+ }
- return enabled;
+ return ret;
}
-static void
-afr_lookup_set_self_heal_params (afr_local_t *local, xlator_t *this)
-{
- int i = 0;
- struct iatt *bufs = NULL;
- dict_t **xattr = NULL;
- afr_private_t *priv = NULL;
- int32_t child1 = -1;
- int32_t child2 = -1;
- afr_self_heal_t *sh = NULL;
- gf_boolean_t split_brain = _gf_false;
- priv = this->private;
- sh = &local->self_heal;
+int
+afr_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode, struct iatt *buf,
+ dict_t *xdata, struct iatt *postparent)
+{
+ afr_local_t * local = NULL;
+ int call_count = -1;
+ int child_index = -1;
- split_brain = afr_is_split_brain (this, local->cont.lookup.inode);
- afr_detect_self_heal_by_lookup_status (local, this, split_brain);
+ child_index = (long) cookie;
- if (afr_lookup_gfid_missing_count (local, this))
- local->self_heal.do_gfid_self_heal = _gf_true;
+ local = frame->local;
- if (_gf_true == afr_lookup_conflicting_entries (local, this))
- local->self_heal.do_missing_entry_self_heal = _gf_true;
- else
- afr_update_gfid_from_iatts (local->self_heal.sh_gfid_req,
- local->cont.lookup.bufs,
- local->cont.lookup.success_children,
- priv->child_count);
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+ if (op_ret != -1) {
+ local->replies[child_index].poststat = *buf;
+ local->replies[child_index].postparent = *postparent;
+ if (xdata)
+ local->replies[child_index].xdata = dict_ref (xdata);
+ }
- bufs = local->cont.lookup.bufs;
- for (i = 1; i < local->success_count; i++) {
- child1 = local->cont.lookup.success_children[i-1];
- child2 = local->cont.lookup.success_children[i];
- afr_detect_self_heal_by_iatt (local, this,
- &bufs[child1], &bufs[child2]);
+ call_count = afr_frame_return (frame);
+ if (call_count == 0) {
+ afr_lookup_entry_heal (frame, this);
}
- xattr = local->cont.lookup.xattrs;
- for (i = 0; i < local->success_count; i++) {
- child1 = local->cont.lookup.success_children[i];
- afr_lookup_set_self_heal_params_by_xattr (local, this,
- xattr[child1]);
- }
- if (afr_open_only_data_self_heal (priv->data_self_heal)
- && !split_brain)
- sh->do_data_self_heal = _gf_false;
+ return 0;
}
-int
-afr_self_heal_lookup_unwind (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+
+
+static void
+afr_discover_done (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = -1;
+ int op_errno = 0;
+ int read_subvol = 0;
+ priv = this->private;
local = frame->local;
- if (op_ret == -1) {
- local->op_ret = -1;
- if (afr_error_more_important (local->op_errno, op_errno))
- local->op_errno = op_errno;
-
- goto out;
- } else {
- local->op_ret = 0;
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret == 0)
+ local->op_ret = 0;
+ }
- afr_lookup_done_success_action (frame, this, _gf_true);
-out:
- AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
- local->cont.lookup.inode, &local->cont.lookup.buf,
- local->cont.lookup.xattr,
- &local->cont.lookup.postparent);
+ op_errno = afr_final_errno (frame->local, this->private);
- return 0;
-}
+ if (local->op_ret < 0) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ goto unwind;
+ }
-//TODO: At the moment only lookup needs this, so not doing any checks, in the
-// future we will have to do fop specific operations
-void
-afr_post_gfid_sh_success (call_frame_t *sh_frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_local_t *sh_local = NULL;
- afr_private_t *priv = NULL;
- afr_self_heal_t *sh = NULL;
- int i = 0;
- struct iatt *lookup_bufs = NULL;
- struct iatt *lookup_parentbufs = NULL;
-
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
- local = sh->orig_frame->local;
- lookup_bufs = local->cont.lookup.bufs;
- lookup_parentbufs = local->cont.lookup.postparents;
- priv = this->private;
+ afr_replies_interpret (frame, this, local->inode);
- memcpy (lookup_bufs, sh->buf, priv->child_count * sizeof (*sh->buf));
- memcpy (lookup_parentbufs, sh->parentbufs,
- priv->child_count * sizeof (*sh->parentbufs));
+ read_subvol = afr_data_subvol_get (local->inode, this, 0, 0);
+ if (read_subvol == -1) {
+ gf_log (this->name, GF_LOG_WARNING, "no read subvols for %s",
+ local->loc.path);
- afr_reset_xattr (local->cont.lookup.xattrs, priv->child_count);
- if (local->cont.lookup.xattr) {
- dict_unref (local->cont.lookup.xattr);
- local->cont.lookup.xattr = NULL;
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid ||
+ local->replies[i].op_ret == -1)
+ continue;
+ read_subvol = i;
+ break;
+ }
+ }
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i])
- local->cont.lookup.xattrs[i] = dict_ref (sh->xattr[i]);
- }
+unwind:
+ if (read_subvol == -1)
+ read_subvol = 0;
- afr_reset_children (local->cont.lookup.success_children,
- priv->child_count);
- afr_children_copy (local->cont.lookup.success_children,
- sh->fresh_children, priv->child_count);
+ AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
+ local->inode, &local->replies[read_subvol].poststat,
+ local->replies[read_subvol].xdata,
+ &local->replies[read_subvol].postparent);
}
-static void
-afr_lookup_perform_self_heal (call_frame_t *frame, xlator_t *this,
- gf_boolean_t *sh_launched)
-{
- unsigned int up_count = 0;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- char *reason = NULL;
-
- GF_ASSERT (sh_launched);
- *sh_launched = _gf_false;
- priv = this->private;
- local = frame->local;
- up_count = afr_up_children_count (local->child_up, priv->child_count);
- if (up_count == 1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Only 1 child up - do not attempt to detect self heal");
- goto out;
- }
+int
+afr_discover_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode, struct iatt *buf,
+ dict_t *xdata, struct iatt *postparent)
+{
+ afr_local_t * local = NULL;
+ int call_count = -1;
+ int child_index = -1;
- afr_lookup_set_self_heal_params (local, this);
- if (afr_can_self_heal_proceed (&local->self_heal, priv)) {
- if (afr_is_transaction_running (local))
- goto out;
+ child_index = (long) cookie;
- reason = "lookup detected pending operations";
- afr_launch_self_heal (frame, this, local->cont.lookup.inode,
- _gf_true, local->cont.lookup.buf.ia_type,
- reason, afr_post_gfid_sh_success,
- afr_self_heal_lookup_unwind);
- *sh_launched = _gf_true;
- }
-out:
- return;
-}
+ local = frame->local;
-void
-afr_get_fresh_children (int32_t *success_children, int32_t *sources,
- int32_t *fresh_children, unsigned int child_count)
-{
- unsigned int i = 0;
- unsigned int j = 0;
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+ if (op_ret != -1) {
+ local->replies[child_index].poststat = *buf;
+ local->replies[child_index].postparent = *postparent;
+ if (xdata)
+ local->replies[child_index].xdata = dict_ref (xdata);
+ }
- GF_ASSERT (success_children);
- GF_ASSERT (sources);
- GF_ASSERT (fresh_children);
+ if (local->do_discovery && (op_ret == 0))
+ afr_attempt_local_discovery (this, child_index);
- afr_reset_children (fresh_children, child_count);
- for (i = 0; i < child_count; i++) {
- if (success_children[i] == -1)
- break;
- if (afr_is_read_child (success_children, sources, child_count,
- success_children[i])) {
- fresh_children[j] = success_children[i];
- j++;
- }
+ call_count = afr_frame_return (frame);
+ if (call_count == 0) {
+ afr_discover_done (frame, this);
}
+
+ return 0;
}
-static int
-afr_lookup_set_read_ctx (afr_local_t *local, xlator_t *this, int32_t read_child)
+
+int
+afr_discover_do (call_frame_t *frame, xlator_t *this, int err)
{
- afr_private_t *priv = NULL;
+ int ret = 0;
+ int i = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
- GF_ASSERT (read_child >= 0);
+ local = frame->local;
+ priv = this->private;
- priv = this->private;
- afr_get_fresh_children (local->cont.lookup.success_children,
- local->cont.lookup.sources,
- local->fresh_children, priv->child_count);
- afr_inode_set_read_ctx (this, local->cont.lookup.inode, read_child,
- local->fresh_children);
+ if (err) {
+ local->op_errno = -err;
+ ret = -1;
+ goto out;
+ }
- return 0;
-}
+ call_count = local->call_count = AFR_COUNT (local->child_up,
+ priv->child_count);
-int
-afr_lookup_done_success_action (call_frame_t *frame, xlator_t *this,
- gf_boolean_t fail_conflict)
-{
- int32_t read_child = -1;
- int32_t ret = -1;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- local = frame->local;
- priv = this->private;
-
- if (local->loc.parent == NULL)
- fail_conflict = _gf_true;
-
- if (afr_conflicting_iattrs (local->cont.lookup.bufs,
- local->cont.lookup.success_children,
- priv->child_count, local->loc.path,
- this->name)) {
- if (fail_conflict == _gf_false)
- ret = 0;
+ ret = afr_lookup_xattr_req_prepare (local, this, local->xattr_req,
+ &local->loc);
+ if (ret) {
+ local->op_errno = -ret;
+ ret = -1;
goto out;
}
- if (!afr_is_transaction_running (local)) {
- ret = afr_lookup_select_read_child (local, this, &read_child);
- if (ret)
- goto out;
-
- ret = afr_lookup_set_read_ctx (local, this, read_child);
- if (ret)
- goto out;
- }
-
- ret = afr_lookup_build_response_params (local, this);
- if (ret)
- goto out;
- if (afr_is_fresh_lookup (&local->loc, this)) {
- afr_update_loc_gfids (&local->loc,
- &local->cont.lookup.buf,
- &local->cont.lookup.postparent);
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_discover_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->lookup,
+ &local->loc, local->xattr_req);
+ if (!--call_count)
+ break;
+ }
}
- ret = 0;
+ return 0;
out:
- if (ret) {
- local->op_ret = -1;
- local->op_errno = EIO;
- }
- return ret;
+ AFR_STACK_UNWIND (lookup, frame, -1, local->op_errno, 0, 0, 0, 0);
+ return 0;
}
-static void
-afr_lookup_done (call_frame_t *frame, xlator_t *this)
+
+int
+afr_discover (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
{
- int unwind = 1;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int ret = -1;
- gf_boolean_t sh_launched = _gf_false;
- gf_boolean_t fail_conflict = _gf_false;
- int gfid_miss_count = 0;
- int enotconn_count = 0;
- int up_children_count = 0;
+ int op_errno = ENOMEM;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int event = 0;
- priv = this->private;
- local = frame->local;
+ priv = this->private;
- if (local->op_ret < 0)
- goto unwind;
- gfid_miss_count = afr_lookup_gfid_missing_count (local, this);
- up_children_count = afr_up_children_count (local->child_up,
- priv->child_count);
- enotconn_count = priv->child_count - up_children_count;
- if ((gfid_miss_count == local->success_count) &&
- (enotconn_count > 0)) {
- local->op_ret = -1;
- local->op_errno = EIO;
- gf_log (this->name, GF_LOG_ERROR, "Failing lookup for %s, "
- "LOOKUP on a file without gfid is not allowed when "
- "some of the children are down", local->loc.path);
- goto unwind;
- }
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
- if ((gfid_miss_count == local->success_count) &&
- uuid_is_null (local->cont.lookup.gfid_req)) {
- local->op_ret = -1;
- local->op_errno = ENODATA;
- gf_log (this->name, GF_LOG_ERROR, "%s: No gfid present",
- local->loc.path);
- goto unwind;
+ if (!local->call_count) {
+ op_errno = ENOTCONN;
+ goto out;
}
- if (gfid_miss_count && uuid_is_null (local->cont.lookup.gfid_req))
- fail_conflict = _gf_true;
- ret = afr_lookup_done_success_action (frame, this, fail_conflict);
- if (ret)
- goto unwind;
- uuid_copy (local->self_heal.sh_gfid_req, local->cont.lookup.gfid_req);
+ if (__is_root_gfid (loc->inode->gfid)) {
+ if (!this->itable)
+ this->itable = loc->inode->table;
+ if (!priv->root_inode)
+ priv->root_inode = inode_ref (loc->inode);
- afr_lookup_perform_self_heal (frame, this, &sh_launched);
- if (sh_launched) {
- unwind = 0;
- goto unwind;
- }
+ if (priv->choose_local && !priv->did_discovery) {
+ /* Logic to detect which subvolumes of AFR are
+ local, in order to prefer them for reads
+ */
+ local->do_discovery = _gf_true;
+ priv->did_discovery = _gf_true;
+ }
+ }
- unwind:
- if (unwind) {
- AFR_STACK_UNWIND (lookup, frame, local->op_ret,
- local->op_errno, local->cont.lookup.inode,
- &local->cont.lookup.buf,
- local->cont.lookup.xattr,
- &local->cont.lookup.postparent);
- }
-}
+ local->op = GF_FOP_LOOKUP;
-/*
- * During a lookup, some errors are more "important" than
- * others in that they must be given higher priority while
- * returning to the user.
- *
- * The hierarchy is ESTALE > ENOENT > others
- *
- */
+ loc_copy (&local->loc, loc);
-gf_boolean_t
-afr_error_more_important (int32_t old_errno, int32_t new_errno)
-{
- gf_boolean_t ret = _gf_true;
+ local->inode = inode_ref (loc->inode);
- /* Nothing should ever overwrite ESTALE */
- if (old_errno == ESTALE)
- ret = _gf_false;
+ if (xattr_req)
+ /* If xattr_req was null, afr_lookup_xattr_req_prepare() will
+ allocate one for us */
+ local->xattr_req = dict_ref (xattr_req);
- /* Nothing should overwrite ENOENT, except ESTALE */
- else if ((old_errno == ENOENT) && (new_errno != ESTALE))
- ret = _gf_false;
+ if (uuid_is_null (loc->inode->gfid)) {
+ afr_discover_do (frame, this, 0);
+ return 0;
+ }
- return ret;
-}
+ afr_read_subvol_get (loc->inode, this, NULL, &event,
+ AFR_DATA_TRANSACTION);
-int32_t
-afr_resultant_errno_get (int32_t *children,
- int *child_errno, unsigned int child_count)
-{
- int i = 0;
- int32_t op_errno = 0;
- int child = 0;
+ if (event != local->event_generation)
+ afr_inode_refresh (frame, this, loc->inode, afr_discover_do);
+ else
+ afr_discover_do (frame, this, 0);
- for (i = 0; i < child_count; i++) {
- if (children) {
- child = children[i];
- if (child == -1)
- break;
- } else {
- child = i;
- }
- if (afr_error_more_important (op_errno, child_errno[child]))
- op_errno = child_errno[child];
- }
- return op_errno;
+ return 0;
+out:
+ AFR_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ return 0;
}
-static void
-afr_lookup_handle_error (afr_local_t *local, int32_t op_ret, int32_t op_errno)
+
+int
+afr_lookup_do (call_frame_t *frame, xlator_t *this, int err)
{
- GF_ASSERT (local);
- if (op_errno == ENOENT)
- local->enoent_count++;
+ int ret = 0;
+ int i = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
- if (afr_error_more_important (local->op_errno, op_errno))
- local->op_errno = op_errno;
+ local = frame->local;
+ priv = this->private;
- if (local->op_errno == ESTALE) {
- local->op_ret = -1;
- }
-}
+ if (err < 0) {
+ local->op_errno = -err;
+ ret = -1;
+ goto out;
+ }
-static void
-afr_set_root_inode_on_first_lookup (afr_local_t *local, xlator_t *this,
- inode_t *inode)
-{
- afr_private_t *priv = NULL;
- GF_ASSERT (inode);
+ call_count = local->call_count = AFR_COUNT (local->child_up,
+ priv->child_count);
- if (!__is_root_gfid (inode->gfid))
- goto out;
- if (!afr_is_fresh_lookup (&local->loc, this))
+ ret = afr_lookup_xattr_req_prepare (local, this, local->xattr_req,
+ &local->loc);
+ if (ret) {
+ local->op_errno = -ret;
+ ret = -1;
goto out;
- priv = this->private;
- if ((priv->first_lookup)) {
- gf_log (this->name, GF_LOG_INFO, "added root inode");
- priv->root_inode = inode_ref (inode);
- priv->first_lookup = 0;
}
-out:
- return;
-}
-static void
-afr_lookup_cache_args (afr_local_t *local, int child_index, dict_t *xattr,
- struct iatt *buf, struct iatt *postparent)
-{
- GF_ASSERT (child_index >= 0);
- local->cont.lookup.xattrs[child_index] = dict_ref (xattr);
- local->cont.lookup.postparents[child_index] = *postparent;
- local->cont.lookup.bufs[child_index] = *buf;
-}
-
-static void
-afr_lookup_handle_first_success (afr_local_t *local, xlator_t *this,
- inode_t *inode, struct iatt *buf)
-{
- local->cont.lookup.inode = inode_ref (inode);
- local->cont.lookup.buf = *buf;
- afr_set_root_inode_on_first_lookup (local, this, inode);
-}
-
-static void
-afr_lookup_handle_success (afr_local_t *local, xlator_t *this, int32_t child_index,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *xattr,
- struct iatt *postparent)
-{
- if (local->success_count == 0) {
- if (local->op_errno != ESTALE) {
- local->op_ret = op_ret;
- local->op_errno = 0;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_lookup_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->lookup,
+ &local->loc, local->xattr_req);
+ if (!--call_count)
+ break;
}
- afr_lookup_handle_first_success (local, this, inode, buf);
}
- afr_lookup_update_lk_counts (local, this,
- child_index, xattr);
-
- afr_lookup_cache_args (local, child_index, xattr,
- buf, postparent);
- local->cont.lookup.success_children[local->success_count] = child_index;
- local->success_count++;
+ return 0;
+out:
+ AFR_STACK_UNWIND (lookup, frame, -1, local->op_errno, 0, 0, 0, 0);
+ return 0;
}
+/*
+ * afr_lookup()
+ *
+ * The goal here is to figure out what the element getting looked up is.
+ * i.e what is the GFID, inode type and a conservative estimate of the
+ * inode attributes are.
+ *
+ * As we lookup, operations may be underway on the entry name and the
+ * inode. In lookup() we are primarily concerned only with the entry
+ * operations. If the entry is getting unlinked or renamed, we detect
+ * what operation is underway by querying for on-going transactions and
+ * pending self-healing on the entry through xdata.
+ *
+ * If the entry is a file/dir, it may need self-heal and/or in a
+ * split-brain condition. Lookup is not the place to worry about these
+ * conditions. Outcast marking will naturally handle them in the read
+ * paths.
+ *
+ * Here is a brief goal of what we are trying to achieve:
+ *
+ * - LOOKUP on all subvolumes concurrently, querying on-going transaction
+ * and pending self-heal info from the servers.
+ *
+ * - If all servers reply the same inode type and GFID, the overall call
+ * MUST be a success.
+ *
+ * - If inode types or GFIDs mismatch, and there IS either an on-going
+ * transaction or pending self-heal, inspect what the nature of the
+ * transaction or pending heal is, and select the appropriate subvolume's
+ * reply as the winner.
+ *
+ * - If inode types or GFIDs mismatch, and there are no on-going transactions
+ * or pending self-heal on the entry name on any of the servers, fail the
+ * lookup with EIO. Something has gone wrong beyond reasonable action.
+ */
+
int
-afr_lookup_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf, dict_t *xattr,
- struct iatt *postparent)
+afr_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
{
- afr_local_t * local = NULL;
- int call_count = -1;
- int child_index = -1;
+ afr_local_t *local = NULL;
+ int32_t op_errno = 0;
+ int event = 0;
- child_index = (long) cookie;
+ if (!loc->parent) {
+ afr_discover (frame, this, loc, xattr_req);
+ return 0;
+ }
- LOCK (&frame->lock);
- {
- local = frame->local;
+ if (__is_root_gfid (loc->parent->gfid)) {
+ if (!strcmp (loc->name, GF_REPLICATE_TRASH_DIR)) {
+ op_errno = EPERM;
+ goto out;
+ }
+ }
- if (op_ret == -1) {
- afr_lookup_handle_error (local, op_ret, op_errno);
- goto unlock;
- }
- afr_lookup_handle_success (local, this, child_index, op_ret,
- op_errno, inode, buf, xattr,
- postparent);
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
- }
-unlock:
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
- if (call_count == 0) {
- afr_lookup_done (frame, this);
+ if (!local->call_count) {
+ op_errno = ENOTCONN;
+ goto out;
}
- return 0;
-}
-
-int
-afr_lookup_cont_init (afr_local_t *local, unsigned int child_count)
-{
- int ret = -ENOMEM;
- struct iatt *iatts = NULL;
- int32_t *success_children = NULL;
- int32_t *sources = NULL;
-
- GF_ASSERT (local);
- local->cont.lookup.xattrs = GF_CALLOC (child_count,
- sizeof (*local->cont.lookup.xattr),
- gf_afr_mt_dict_t);
- if (NULL == local->cont.lookup.xattrs)
- goto out;
+ local->op = GF_FOP_LOOKUP;
- iatts = GF_CALLOC (child_count, sizeof (*iatts), gf_afr_mt_iatt);
- if (NULL == iatts)
- goto out;
- local->cont.lookup.postparents = iatts;
+ loc_copy (&local->loc, loc);
- iatts = GF_CALLOC (child_count, sizeof (*iatts), gf_afr_mt_iatt);
- if (NULL == iatts)
- goto out;
- local->cont.lookup.bufs = iatts;
+ local->inode = inode_ref (loc->inode);
- success_children = afr_children_create (child_count);
- if (NULL == success_children)
- goto out;
- local->cont.lookup.success_children = success_children;
+ if (xattr_req)
+ /* If xattr_req was null, afr_lookup_xattr_req_prepare() will
+ allocate one for us */
+ local->xattr_req = dict_ref (xattr_req);
- local->fresh_children = afr_children_create (child_count);
- if (NULL == local->fresh_children)
- goto out;
+ afr_read_subvol_get (loc->parent, this, NULL, &event,
+ AFR_DATA_TRANSACTION);
- sources = GF_CALLOC (sizeof (*sources), child_count, gf_afr_mt_int32_t);
- if (NULL == sources)
- goto out;
- local->cont.lookup.sources = sources;
+ if (event != local->event_generation)
+ afr_inode_refresh (frame, this, loc->parent, afr_lookup_do);
+ else
+ afr_lookup_do (frame, this, 0);
- ret = 0;
+ return 0;
out:
- return ret;
-}
-
-int
-afr_lookup (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr_req)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- void *gfid_req = NULL;
- int ret = -1;
- int i = 0;
- int call_count = 0;
- uint64_t ctx = 0;
- int32_t op_errno = 0;
-
- priv = this->private;
+ AFR_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- local->op_ret = -1;
+ return 0;
+}
- frame->local = local;
- local->fop = GF_FOP_LOOKUP;
- if (!strcmp (loc->path, "/" GF_REPLICATE_TRASH_DIR)) {
- op_errno = ENOENT;
- goto out;
- }
+/* {{{ open */
- loc_copy (&local->loc, loc);
+afr_fd_ctx_t *
+__afr_fd_ctx_get (fd_t *fd, xlator_t *this)
+{
+ uint64_t ctx = 0;
+ int ret = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
- ret = inode_ctx_get (loc->inode, this, &ctx);
- if (ret == 0) {
- /* lookup is a revalidate */
+ ret = __fd_ctx_get (fd, this, &ctx);
- local->read_child_index = afr_inode_get_read_ctx (this,
- loc->inode,
- NULL);
- } else {
- LOCK (&priv->read_child_lock);
- {
- local->read_child_index = (++priv->read_child_rr)
- % (priv->child_count);
- }
- UNLOCK (&priv->read_child_lock);
- }
+ if (ret < 0) {
+ ret = __afr_fd_ctx_set (this, fd);
+ if (ret < 0)
+ goto out;
- local->child_up = memdup (priv->child_up,
- sizeof (*local->child_up) * priv->child_count);
- if (NULL == local->child_up) {
- op_errno = ENOMEM;
- goto out;
+ ret = __fd_ctx_get (fd, this, &ctx);
+ if (ret < 0)
+ goto out;
}
- ret = afr_lookup_cont_init (local, priv->child_count);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+out:
+ return fd_ctx;
+}
- local->call_count = afr_up_children_count (local->child_up,
- priv->child_count);
- call_count = local->call_count;
- if (local->call_count == 0) {
- ret = -1;
- op_errno = ENOTCONN;
- goto out;
- }
- /* By default assume ENOTCONN. On success it will be set to 0. */
- local->op_errno = ENOTCONN;
+afr_fd_ctx_t *
+afr_fd_ctx_get (fd_t *fd, xlator_t *this)
+{
+ afr_fd_ctx_t *fd_ctx = NULL;
- local->call_count = afr_up_children_count (local->child_up,
- priv->child_count);
- ret = afr_lookup_xattr_req_prepare (local, this, xattr_req, loc,
- &gfid_req);
- if (ret) {
- local->op_errno = -ret;
- goto out;
- }
- afr_lookup_save_gfid (local->cont.lookup.gfid_req, gfid_req,
- loc);
- local->fop = GF_FOP_LOOKUP;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_lookup_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->lookup,
- loc, local->xattr_req);
- if (!--call_count)
- break;
- }
+ LOCK(&fd->lock);
+ {
+ fd_ctx = __afr_fd_ctx_get (fd, this);
}
+ UNLOCK(&fd->lock);
- ret = 0;
-out:
- if (ret)
- AFR_STACK_UNWIND (lookup, frame, -1, op_errno,
- NULL, NULL, NULL, NULL);
-
- return 0;
+ return fd_ctx;
}
-/* {{{ open */
-
int
-afr_fd_ctx_set (xlator_t *this, fd_t *fd)
+__afr_fd_ctx_set (xlator_t *this, fd_t *fd)
{
afr_private_t * priv = NULL;
int ret = -1;
uint64_t ctx = 0;
afr_fd_ctx_t * fd_ctx = NULL;
+ int i = 0;
VALIDATE_OR_GOTO (this->private, out);
VALIDATE_OR_GOTO (fd, out);
priv = this->private;
- LOCK (&fd->lock);
- {
- ret = __fd_ctx_get (fd, this, &ctx);
+ ret = __fd_ctx_get (fd, this, &ctx);
- if (ret == 0)
- goto unlock;
-
- fd_ctx = GF_CALLOC (1, sizeof (afr_fd_ctx_t),
- gf_afr_mt_afr_fd_ctx_t);
- if (!fd_ctx) {
- ret = -ENOMEM;
- goto unlock;
- }
+ if (ret == 0)
+ goto out;
- fd_ctx->pre_op_done = GF_CALLOC (sizeof (*fd_ctx->pre_op_done),
- priv->child_count,
- gf_afr_mt_char);
- if (!fd_ctx->pre_op_done) {
- ret = -ENOMEM;
- goto unlock;
- }
+ fd_ctx = GF_CALLOC (1, sizeof (afr_fd_ctx_t),
+ gf_afr_mt_afr_fd_ctx_t);
+ if (!fd_ctx) {
+ ret = -ENOMEM;
+ goto out;
+ }
- fd_ctx->pre_op_piggyback = GF_CALLOC (sizeof (*fd_ctx->pre_op_piggyback),
- priv->child_count,
- gf_afr_mt_char);
- if (!fd_ctx->pre_op_piggyback) {
- ret = -ENOMEM;
- goto unlock;
- }
+ for (i = 0; i < AFR_NUM_CHANGE_LOGS; i++) {
+ fd_ctx->pre_op_done[i] = GF_CALLOC (sizeof (*fd_ctx->pre_op_done[i]),
+ priv->child_count,
+ gf_afr_mt_int32_t);
+ if (!fd_ctx->pre_op_done[i]) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
- fd_ctx->opened_on = GF_CALLOC (sizeof (*fd_ctx->opened_on),
- priv->child_count,
- gf_afr_mt_int32_t);
- if (!fd_ctx->opened_on) {
- ret = -ENOMEM;
- goto unlock;
- }
+ fd_ctx->opened_on = GF_CALLOC (sizeof (*fd_ctx->opened_on),
+ priv->child_count,
+ gf_afr_mt_int32_t);
+ if (!fd_ctx->opened_on) {
+ ret = -ENOMEM;
+ goto out;
+ }
- fd_ctx->lock_piggyback = GF_CALLOC (sizeof (*fd_ctx->lock_piggyback),
- priv->child_count,
- gf_afr_mt_char);
- if (!fd_ctx->lock_piggyback) {
- ret = -ENOMEM;
- goto unlock;
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (fd_is_anonymous (fd))
+ fd_ctx->opened_on[i] = AFR_FD_OPENED;
+ else
+ fd_ctx->opened_on[i] = AFR_FD_NOT_OPENED;
+ }
- fd_ctx->lock_acquired = GF_CALLOC (sizeof (*fd_ctx->lock_acquired),
- priv->child_count,
- gf_afr_mt_char);
- if (!fd_ctx->lock_acquired) {
- ret = -ENOMEM;
- goto unlock;
- }
+ fd_ctx->lock_piggyback = GF_CALLOC (sizeof (*fd_ctx->lock_piggyback),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!fd_ctx->lock_piggyback) {
+ ret = -ENOMEM;
+ goto out;
+ }
- fd_ctx->up_count = priv->up_count;
- fd_ctx->down_count = priv->down_count;
+ fd_ctx->lock_acquired = GF_CALLOC (sizeof (*fd_ctx->lock_acquired),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!fd_ctx->lock_acquired) {
+ ret = -ENOMEM;
+ goto out;
+ }
- fd_ctx->locked_on = GF_CALLOC (sizeof (*fd_ctx->locked_on),
- priv->child_count,
- gf_afr_mt_char);
- if (!fd_ctx->locked_on) {
- ret = -ENOMEM;
- goto unlock;
- }
+ pthread_mutex_init (&fd_ctx->delay_lock, NULL);
- INIT_LIST_HEAD (&fd_ctx->paused_calls);
- INIT_LIST_HEAD (&fd_ctx->entries);
+ INIT_LIST_HEAD (&fd_ctx->eager_locked);
- ret = __fd_ctx_set (fd, this, (uint64_t)(long) fd_ctx);
- if (ret)
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set fd ctx (%p)", fd);
- }
-unlock:
- UNLOCK (&fd->lock);
+ ret = __fd_ctx_set (fd, this, (uint64_t)(long) fd_ctx);
+ if (ret)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to set fd ctx (%p)", fd);
out:
return ret;
}
-/* {{{ flush */
int
-afr_flush_unwind (call_frame_t *frame, xlator_t *this)
+afr_fd_ctx_set (xlator_t *this, fd_t *fd)
{
- afr_local_t * local = NULL;
- call_frame_t *main_frame = NULL;
-
- local = frame->local;
+ int ret = -1;
- LOCK (&frame->lock);
+ LOCK (&fd->lock);
{
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (flush, main_frame,
- local->op_ret, local->op_errno);
+ ret = __afr_fd_ctx_set (this, fd);
}
+ UNLOCK (&fd->lock);
- return 0;
+ return ret;
}
+/* {{{ flush */
int
-afr_flush_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+afr_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int call_count = -1;
- int child_index = (long) cookie;
- int need_unwind = 0;
+ afr_local_t *local = NULL;
+ int call_count = -1;
local = frame->local;
- priv = this->private;
LOCK (&frame->lock);
{
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- }
- local->success_count++;
-
- if (local->success_count == priv->wait_count) {
- need_unwind = 1;
- }
- }
-
- local->op_errno = op_errno;
+ local->op_ret = op_ret;
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref (xdata);
+ } else {
+ local->op_errno = op_errno;
+ }
}
UNLOCK (&frame->lock);
- if (need_unwind)
- afr_flush_unwind (frame, this);
-
- call_count = afr_frame_return (frame);
+ call_count = afr_frame_return (frame);
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
+ if (call_count == 0)
+ AFR_STACK_UNWIND (flush, frame, local->op_ret,
+ local->op_errno, local->xdata_rsp);
return 0;
}
-
-int
-afr_flush_wind (call_frame_t *frame, xlator_t *this)
+static int
+afr_flush_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- int call_count = -1;
+ int i = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
- local = frame->local;
priv = this->private;
-
- call_count = afr_up_children_count (local->child_up, priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
+ local = frame->local;
+ call_count = local->call_count;
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_flush_wind_cbk,
+ STACK_WIND_COOKIE (frame, afr_flush_cbk,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->flush,
- local->fd);
-
+ local->fd, xdata);
if (!--call_count)
break;
+
}
}
return 0;
}
-
-int
-afr_flush_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
- return 0;
-}
-
-
int
-afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
+afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
- goto out;
- }
+ afr_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+ int op_errno = ENOMEM;
- transaction_frame->local = local;
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
- local->op = GF_FOP_FLUSH;
-
- local->transaction.fop = afr_flush_wind;
- local->transaction.done = afr_flush_done;
- local->transaction.unwind = afr_flush_unwind;
-
- local->fd = fd_ref (fd);
+ if (!local->call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
- local->transaction.main_frame = frame;
- local->transaction.start = 0;
- local->transaction.len = 0;
+ local->fd = fd_ref(fd);
- ret = afr_open_fd_fix (transaction_frame, this, _gf_false);
- if (ret) {
- op_ret = -1;
- op_errno = -ret;
+ stub = fop_flush_stub (frame, afr_flush_wrapper, fd, xdata);
+ if (!stub)
goto out;
- }
- afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ afr_delayed_changelog_wake_resume (this, fd, stub);
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
-
- AFR_STACK_UNWIND (flush, frame, op_ret, op_errno);
- }
-
+ AFR_STACK_UNWIND (flush, frame, -1, op_errno, NULL);
return 0;
}
@@ -2355,8 +2110,7 @@ afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd)
uint64_t ctx = 0;
afr_fd_ctx_t *fd_ctx = NULL;
int ret = 0;
- afr_fd_paused_call_t *paused_call = NULL;
- afr_fd_paused_call_t *tmp = NULL;
+ int i = 0;
ret = fd_ctx_get (fd, this, &ctx);
if (ret < 0)
@@ -2365,28 +2119,22 @@ afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd)
fd_ctx = (afr_fd_ctx_t *)(long) ctx;
if (fd_ctx) {
- if (fd_ctx->pre_op_done)
- GF_FREE (fd_ctx->pre_op_done);
+ //no need to take any locks
+ if (!list_empty (&fd_ctx->eager_locked))
+ gf_log (this->name, GF_LOG_WARNING, "%s: Stale "
+ "Eager-lock stubs found",
+ uuid_utoa (fd->inode->gfid));
- if (fd_ctx->opened_on)
- GF_FREE (fd_ctx->opened_on);
+ for (i = 0; i < AFR_NUM_CHANGE_LOGS; i++)
+ GF_FREE (fd_ctx->pre_op_done[i]);
- if (fd_ctx->locked_on)
- GF_FREE (fd_ctx->locked_on);
+ GF_FREE (fd_ctx->opened_on);
- if (fd_ctx->pre_op_piggyback)
- GF_FREE (fd_ctx->pre_op_piggyback);
- list_for_each_entry_safe (paused_call, tmp, &fd_ctx->paused_calls,
- call_list) {
- list_del_init (&paused_call->call_list);
- GF_FREE (paused_call);
- }
+ GF_FREE (fd_ctx->lock_piggyback);
- if (fd_ctx->lock_piggyback)
- GF_FREE (fd_ctx->lock_piggyback);
+ GF_FREE (fd_ctx->lock_acquired);
- if (fd_ctx->lock_acquired)
- GF_FREE (fd_ctx->lock_acquired);
+ pthread_mutex_destroy (&fd_ctx->delay_lock);
GF_FREE (fd_ctx);
}
@@ -2399,24 +2147,8 @@ out:
int
afr_release (xlator_t *this, fd_t *fd)
{
- afr_locked_fd_t *locked_fd = NULL;
- afr_locked_fd_t *tmp = NULL;
- afr_private_t *priv = NULL;
-
- priv = this->private;
-
afr_cleanup_fd_ctx (this, fd);
- list_for_each_entry_safe (locked_fd, tmp, &priv->saved_fds,
- list) {
-
- if (locked_fd->fd == fd) {
- list_del_init (&locked_fd->list);
- GF_FREE (locked_fd);
- }
-
- }
-
return 0;
}
@@ -2424,51 +2156,87 @@ afr_release (xlator_t *this, fd_t *fd)
/* {{{ fsync */
int
+afr_fsync_unwind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ AFR_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+int
afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
afr_local_t *local = NULL;
int call_count = -1;
int child_index = (long) cookie;
- int read_child = 0;
+ int read_subvol = 0;
+ call_stub_t *stub = NULL;
local = frame->local;
- read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
+ read_subvol = afr_data_subvol_get (local->inode, this, 0, 0);
LOCK (&frame->lock);
{
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
-
if (op_ret == 0) {
- local->op_ret = 0;
+ if (local->op_ret == -1) {
+ local->op_ret = 0;
- if (local->success_count == 0) {
- local->cont.fsync.prebuf = *prebuf;
- local->cont.fsync.postbuf = *postbuf;
- }
+ local->cont.inode_wfop.prebuf = *prebuf;
+ local->cont.inode_wfop.postbuf = *postbuf;
- if (child_index == read_child) {
- local->cont.fsync.prebuf = *prebuf;
- local->cont.fsync.postbuf = *postbuf;
+ if (xdata)
+ local->xdata_rsp = dict_ref (xdata);
}
- local->success_count++;
- }
-
- local->op_errno = op_errno;
+ if (child_index == read_subvol) {
+ local->cont.inode_wfop.prebuf = *prebuf;
+ local->cont.inode_wfop.postbuf = *postbuf;
+ if (xdata) {
+ if (local->xdata_rsp)
+ dict_unref (local->xdata_rsp);
+ local->xdata_rsp = dict_ref (xdata);
+ }
+ }
+ } else {
+ local->op_errno = op_errno;
+ }
}
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
if (call_count == 0) {
- AFR_STACK_UNWIND (fsync, frame, local->op_ret, local->op_errno,
- &local->cont.fsync.prebuf,
- &local->cont.fsync.postbuf);
+ /* Make a stub out of the frame, and register it
+ with the waking up post-op. When the call-stub resumes,
+ we are guaranteed that there was no post-op pending
+ (i.e changelogs were unset in the server). This is an
+ essential "guarantee", that fsync() returns only after
+ completely finishing EVERYTHING, including the delayed
+ post-op. This guarantee is expected by FUSE graph switching
+ for example.
+ */
+ stub = fop_fsync_cbk_stub (frame, afr_fsync_unwind_cbk,
+ local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ local->xdata_rsp);
+ if (!stub) {
+ AFR_STACK_UNWIND (fsync, frame, -1, ENOMEM, 0, 0, 0);
+ return 0;
+ }
+
+ /* If no new unstable writes happened between the
+ time we cleared the unstable write witness flag in afr_fsync
+ and now, calling afr_delayed_changelog_wake_up() should
+ wake up and skip over the fsync phase and go straight to
+ afr_changelog_post_op_now()
+ */
+ afr_delayed_changelog_wake_resume (this, local->fd, stub);
}
return 0;
@@ -2476,35 +2244,34 @@ afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
-afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t datasync)
+afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
{
- afr_private_t *priv = NULL;
+ afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int ret = -1;
int i = 0;
int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
+ int32_t op_errno = ENOMEM;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ priv = this->private;
- priv = this->private;
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ call_count = local->call_count;
+ if (!call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ local->fd = fd_ref (fd);
- call_count = local->call_count;
- frame->local = local;
+ if (afr_fd_has_witnessed_unstable_write (this, fd)) {
+ /* don't care. we only wanted to CLEAR the bit */
+ }
- local->fd = fd_ref (fd);
+ local->inode = inode_ref (fd->inode);
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
@@ -2512,17 +2279,16 @@ afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->fsync,
- fd, datasync);
+ fd, datasync, xdata);
if (!--call_count)
break;
}
}
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (fsync, frame, op_ret, op_errno, NULL, NULL);
- }
+ AFR_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL, NULL);
+
return 0;
}
@@ -2530,9 +2296,9 @@ out:
/* {{{ fsync */
-int32_t
-afr_fsyncdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+int
+afr_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
int call_count = -1;
@@ -2541,10 +2307,13 @@ afr_fsyncdir_cbk (call_frame_t *frame, void *cookie,
LOCK (&frame->lock);
{
- if (op_ret == 0)
+ if (op_ret == 0) {
local->op_ret = 0;
-
- local->op_errno = op_errno;
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref (xdata);
+ } else {
+ local->op_errno = op_errno;
+ }
}
UNLOCK (&frame->lock);
@@ -2552,57 +2321,49 @@ afr_fsyncdir_cbk (call_frame_t *frame, void *cookie,
if (call_count == 0)
AFR_STACK_UNWIND (fsyncdir, frame, local->op_ret,
- local->op_errno);
+ local->op_errno, local->xdata_rsp);
return 0;
}
-int32_t
-afr_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t datasync)
+int
+afr_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
{
- afr_private_t *priv = NULL;
+ afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int ret = -1;
int i = 0;
int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
+ int32_t op_errno = ENOMEM;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ priv = this->private;
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
call_count = local->call_count;
- frame->local = local;
+ if (!call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND (frame, afr_fsyncdir_cbk,
priv->children[i],
priv->children[i]->fops->fsyncdir,
- fd, datasync);
+ fd, datasync, xdata);
if (!--call_count)
break;
}
}
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (fsyncdir, frame, op_ret, op_errno);
- }
+ AFR_STACK_UNWIND (fsyncdir, frame, -1, op_errno, NULL);
+
return 0;
}
@@ -2613,7 +2374,7 @@ out:
int32_t
afr_xattrop_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xattr)
+ dict_t *xattr, dict_t *xdata)
{
afr_local_t *local = NULL;
int call_count = -1;
@@ -2622,8 +2383,15 @@ afr_xattrop_cbk (call_frame_t *frame, void *cookie,
LOCK (&frame->lock);
{
- if (op_ret == 0)
+ if (op_ret == 0) {
+ if (!local->cont.xattrop.xattr)
+ local->cont.xattrop.xattr = dict_ref (xattr);
+
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref (xdata);
+
local->op_ret = 0;
+ }
local->op_errno = op_errno;
}
@@ -2633,7 +2401,7 @@ afr_xattrop_cbk (call_frame_t *frame, void *cookie,
if (call_count == 0)
AFR_STACK_UNWIND (xattrop, frame, local->op_ret, local->op_errno,
- xattr);
+ local->cont.xattrop.xattr, local->xdata_rsp);
return 0;
}
@@ -2641,49 +2409,41 @@ afr_xattrop_cbk (call_frame_t *frame, void *cookie,
int32_t
afr_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t optype, dict_t *xattr)
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int ret = -1;
int i = 0;
int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ int32_t op_errno = ENOMEM;
priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
call_count = local->call_count;
- frame->local = local;
+ if (!call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND (frame, afr_xattrop_cbk,
priv->children[i],
priv->children[i]->fops->xattrop,
- loc, optype, xattr);
+ loc, optype, xattr, xdata);
if (!--call_count)
break;
}
}
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (xattrop, frame, op_ret, op_errno, NULL);
- }
+ AFR_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL, NULL);
+
return 0;
}
@@ -2694,7 +2454,7 @@ out:
int32_t
afr_fxattrop_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xattr)
+ dict_t *xattr, dict_t *xdata)
{
afr_local_t *local = NULL;
@@ -2704,8 +2464,14 @@ afr_fxattrop_cbk (call_frame_t *frame, void *cookie,
LOCK (&frame->lock);
{
- if (op_ret == 0)
+ if (op_ret == 0) {
+ if (!local->cont.fxattrop.xattr)
+ local->cont.fxattrop.xattr = dict_ref (xattr);
+
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref (xdata);
local->op_ret = 0;
+ }
local->op_errno = op_errno;
}
@@ -2715,7 +2481,7 @@ afr_fxattrop_cbk (call_frame_t *frame, void *cookie,
if (call_count == 0)
AFR_STACK_UNWIND (fxattrop, frame, local->op_ret, local->op_errno,
- xattr);
+ local->cont.fxattrop.xattr, local->xdata_rsp);
return 0;
}
@@ -2723,49 +2489,41 @@ afr_fxattrop_cbk (call_frame_t *frame, void *cookie,
int32_t
afr_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_xattrop_flags_t optype, dict_t *xattr)
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int ret = -1;
int i = 0;
int32_t call_count = 0;
- int32_t op_ret = -1;
int32_t op_errno = 0;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
goto out;
- }
call_count = local->call_count;
- frame->local = local;
+ if (!call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND (frame, afr_fxattrop_cbk,
priv->children[i],
priv->children[i]->fops->fxattrop,
- fd, optype, xattr);
+ fd, optype, xattr, xdata);
if (!--call_count)
break;
}
}
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (fxattrop, frame, op_ret, op_errno, NULL);
- }
+ AFR_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL, NULL);
+
return 0;
}
@@ -2773,8 +2531,8 @@ out:
int32_t
-afr_inodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+afr_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
@@ -2795,7 +2553,7 @@ afr_inodelk_cbk (call_frame_t *frame, void *cookie,
if (call_count == 0)
AFR_STACK_UNWIND (inodelk, frame, local->op_ret,
- local->op_errno);
+ local->op_errno, xdata);
return 0;
}
@@ -2803,57 +2561,50 @@ afr_inodelk_cbk (call_frame_t *frame, void *cookie,
int32_t
afr_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *flock)
+ const char *volume, loc_t *loc, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int ret = -1;
int i = 0;
int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ int32_t op_errno = ENOMEM;
priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
goto out;
- }
call_count = local->call_count;
- frame->local = local;
+ if (!call_count) {
+ op_errno = ENOMEM;
+ goto out;
+ }
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND (frame, afr_inodelk_cbk,
priv->children[i],
priv->children[i]->fops->inodelk,
- volume, loc, cmd, flock);
+ volume, loc, cmd, flock, xdata);
if (!--call_count)
break;
}
}
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (inodelk, frame, op_ret, op_errno);
- }
+ AFR_STACK_UNWIND (inodelk, frame, -1, op_errno, NULL);
+
return 0;
}
int32_t
-afr_finodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+afr_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
@@ -2874,66 +2625,57 @@ afr_finodelk_cbk (call_frame_t *frame, void *cookie,
if (call_count == 0)
AFR_STACK_UNWIND (finodelk, frame, local->op_ret,
- local->op_errno);
+ local->op_errno, xdata);
return 0;
}
int32_t
-afr_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *flock)
+afr_finodelk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int ret = -1;
int i = 0;
int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ int32_t op_errno = ENOMEM;
priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
call_count = local->call_count;
- frame->local = local;
+ if (!call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND (frame, afr_finodelk_cbk,
priv->children[i],
priv->children[i]->fops->finodelk,
- volume, fd, cmd, flock);
+ volume, fd, cmd, flock, xdata);
if (!--call_count)
break;
}
}
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (finodelk, frame, op_ret, op_errno);
- }
+ AFR_STACK_UNWIND (finodelk, frame, -1, op_errno, NULL);
+
return 0;
}
int32_t
-afr_entrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
+afr_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
int call_count = -1;
@@ -2953,67 +2695,59 @@ afr_entrylk_cbk (call_frame_t *frame, void *cookie,
if (call_count == 0)
AFR_STACK_UNWIND (entrylk, frame, local->op_ret,
- local->op_errno);
+ local->op_errno, xdata);
return 0;
}
-int32_t
-afr_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc,
- const char *basename, entrylk_cmd cmd, entrylk_type type)
+int
+afr_entrylk (call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int ret = -1;
int i = 0;
int32_t call_count = 0;
- int32_t op_ret = -1;
int32_t op_errno = 0;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
call_count = local->call_count;
- frame->local = local;
+ if (!call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND (frame, afr_entrylk_cbk,
priv->children[i],
priv->children[i]->fops->entrylk,
- volume, loc, basename, cmd, type);
+ volume, loc, basename, cmd, type, xdata);
if (!--call_count)
break;
}
}
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (entrylk, frame, op_ret, op_errno);
- }
+ AFR_STACK_UNWIND (entrylk, frame, -1, op_errno, NULL);
+
return 0;
}
-int32_t
-afr_fentrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+int
+afr_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
@@ -3034,156 +2768,148 @@ afr_fentrylk_cbk (call_frame_t *frame, void *cookie,
if (call_count == 0)
AFR_STACK_UNWIND (fentrylk, frame, local->op_ret,
- local->op_errno);
+ local->op_errno, xdata);
return 0;
}
-int32_t
-afr_fentrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd,
- const char *basename, entrylk_cmd cmd, entrylk_type type)
+int
+afr_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int ret = -1;
int i = 0;
int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ int32_t op_errno = ENOMEM;
priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
call_count = local->call_count;
- frame->local = local;
+ if (!call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND (frame, afr_fentrylk_cbk,
priv->children[i],
priv->children[i]->fops->fentrylk,
- volume, fd, basename, cmd, type);
+ volume, fd, basename, cmd, type, xdata);
if (!--call_count)
break;
}
}
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (fentrylk, frame, op_ret, op_errno);
- }
+ AFR_STACK_UNWIND (fentrylk, frame, -1, op_errno, NULL);
+
return 0;
}
-int32_t
-afr_statfs_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct statvfs *statvfs)
+
+int
+afr_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct statvfs *statvfs, dict_t *xdata)
{
afr_local_t *local = NULL;
int call_count = 0;
+ struct statvfs *buf = NULL;
LOCK (&frame->lock);
{
local = frame->local;
- if (op_ret == 0) {
- local->op_ret = op_ret;
-
- if (local->cont.statfs.buf_set) {
- if (statvfs->f_bavail < local->cont.statfs.buf.f_bavail)
- local->cont.statfs.buf = *statvfs;
- } else {
- local->cont.statfs.buf = *statvfs;
- local->cont.statfs.buf_set = 1;
- }
- }
-
- if (op_ret == -1)
+ if (op_ret != 0) {
local->op_errno = op_errno;
+ goto unlock;
+ }
+ local->op_ret = op_ret;
+
+ buf = &local->cont.statfs.buf;
+ if (local->cont.statfs.buf_set) {
+ if (statvfs->f_bavail < buf->f_bavail) {
+ *buf = *statvfs;
+ if (xdata) {
+ if (local->xdata_rsp)
+ dict_unref (local->xdata_rsp);
+ local->xdata_rsp = dict_ref (xdata);
+ }
+ }
+ } else {
+ *buf = *statvfs;
+ local->cont.statfs.buf_set = 1;
+ if (xdata)
+ local->xdata_rsp = dict_ref (xdata);
+ }
}
+unlock:
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
if (call_count == 0)
AFR_STACK_UNWIND (statfs, frame, local->op_ret, local->op_errno,
- &local->cont.statfs.buf);
+ &local->cont.statfs.buf, local->xdata_rsp);
return 0;
}
-int32_t
-afr_statfs (call_frame_t *frame, xlator_t *this,
- loc_t *loc)
+int
+afr_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- int child_count = 0;
afr_local_t * local = NULL;
+ afr_private_t *priv = NULL;
int i = 0;
- int ret = -1;
int call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (loc, out);
-
- priv = this->private;
- child_count = priv->child_count;
+ int32_t op_errno = ENOMEM;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ priv = this->private;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
- frame->local = local;
call_count = local->call_count;
+ if (!call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
- for (i = 0; i < child_count; i++) {
+ for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND (frame, afr_statfs_cbk,
priv->children[i],
priv->children[i]->fops->statfs,
- loc);
+ loc, xdata);
if (!--call_count)
break;
}
}
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (statfs, frame, op_ret, op_errno, NULL);
- }
+ AFR_STACK_UNWIND (statfs, frame, -1, op_errno, NULL, NULL);
+
return 0;
}
int32_t
afr_lk_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock)
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
{
afr_local_t * local = NULL;
int call_count = -1;
@@ -3193,7 +2919,7 @@ afr_lk_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (call_count == 0)
AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
- lock);
+ lock, xdata);
return 0;
}
@@ -3215,7 +2941,7 @@ afr_lk_unlock (call_frame_t *frame, xlator_t *this)
if (call_count == 0) {
AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
- &local->cont.lk.ret_flock);
+ &local->cont.lk.ret_flock, NULL);
return 0;
}
@@ -3229,7 +2955,7 @@ afr_lk_unlock (call_frame_t *frame, xlator_t *this)
priv->children[i],
priv->children[i]->fops->lk,
local->fd, F_SETLK,
- &local->cont.lk.user_flock);
+ &local->cont.lk.user_flock, NULL);
if (!--call_count)
break;
@@ -3242,7 +2968,7 @@ afr_lk_unlock (call_frame_t *frame, xlator_t *this)
int32_t
afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock)
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
@@ -3277,30 +3003,15 @@ afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
priv->children[child_index],
priv->children[child_index]->fops->lk,
local->fd, local->cont.lk.cmd,
- &local->cont.lk.user_flock);
+ &local->cont.lk.user_flock, xdata);
} else if (local->op_ret == -1) {
/* all nodes have gone down */
AFR_STACK_UNWIND (lk, frame, -1, ENOTCONN,
- &local->cont.lk.ret_flock);
+ &local->cont.lk.ret_flock, NULL);
} else {
- /* locking has succeeded on all nodes that are up */
-
- /* temporarily
- ret = afr_mark_locked_nodes (this, local->fd,
- local->cont.lk.locked_nodes);
- if (ret)
- gf_log (this->name, GF_LOG_DEBUG,
- "Could not save locked nodes info in fdctx");
-
- ret = afr_save_locked_fd (this, local->fd);
- if (ret)
- gf_log (this->name, GF_LOG_DEBUG,
- "Could not save locked fd");
-
- */
AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
- &local->cont.lk.ret_flock);
+ &local->cont.lk.ret_flock, NULL);
}
return 0;
@@ -3309,24 +3020,18 @@ afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
afr_lk (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t cmd, struct gf_flock *flock)
+ fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
int i = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ int32_t op_errno = ENOMEM;
priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
- AFR_LOCAL_INIT (local, priv);
-
- frame->local = local;
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
local->cont.lk.locked_nodes = GF_CALLOC (priv->child_count,
sizeof (*local->cont.lk.locked_nodes),
@@ -3345,32 +3050,18 @@ afr_lk (call_frame_t *frame, xlator_t *this,
STACK_WIND_COOKIE (frame, afr_lk_cbk, (void *) (long) 0,
priv->children[i],
priv->children[i]->fops->lk,
- fd, cmd, flock);
+ fd, cmd, flock, xdata);
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (lk, frame, op_ret, op_errno, NULL);
- }
+ AFR_STACK_UNWIND (lk, frame, -1, op_errno, NULL, NULL);
+
return 0;
}
int
afr_forget (xlator_t *this, inode_t *inode)
{
- uint64_t ctx_addr = 0;
- afr_inode_ctx_t *ctx = NULL;
-
- inode_ctx_get (inode, this, &ctx_addr);
-
- if (!ctx_addr)
- goto out;
-
- ctx = (afr_inode_ctx_t *)(long)ctx_addr;
- if (ctx->fresh_children)
- GF_FREE (ctx->fresh_children);
- GF_FREE (ctx);
-out:
return 0;
}
@@ -3390,7 +3081,6 @@ afr_priv_dump (xlator_t *this)
snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name);
gf_proc_dump_add_section(key_prefix);
gf_proc_dump_write("child_count", "%u", priv->child_count);
- gf_proc_dump_write("read_child_rr", "%u", priv->read_child_rr);
for (i = 0; i < priv->child_count; i++) {
sprintf (key, "child_up[%d]", i);
gf_proc_dump_write(key, "%d", priv->child_up[i]);
@@ -3405,11 +3095,6 @@ afr_priv_dump (xlator_t *this)
gf_proc_dump_write("entry-change_log", "%d", priv->entry_change_log);
gf_proc_dump_write("read_child", "%d", priv->read_child);
gf_proc_dump_write("favorite_child", "%d", priv->favorite_child);
- gf_proc_dump_write("data_lock_server_count", "%u", priv->data_lock_server_count);
- gf_proc_dump_write("metadata_lock_server_count", "%u",
- priv->metadata_lock_server_count);
- gf_proc_dump_write("entry_lock_server_count", "%u",
- priv->entry_lock_server_count);
gf_proc_dump_write("wait_count", "%u", priv->wait_count);
return 0;
@@ -3440,7 +3125,7 @@ find_child_index (xlator_t *this, xlator_t *child)
int32_t
afr_notify (xlator_t *this, int32_t event,
- void *data, ...)
+ void *data, void *data2)
{
afr_private_t *priv = NULL;
int i = -1;
@@ -3452,13 +3137,23 @@ afr_notify (xlator_t *this, int32_t event,
int idx = -1;
int ret = -1;
int call_psh = 0;
- int up_child = AFR_ALL_CHILDREN;
+ int up_child = -1;
+ dict_t *input = NULL;
+ dict_t *output = NULL;
priv = this->private;
if (!priv)
return 0;
+ /*
+ * We need to reset this in case children come up in "staggered"
+ * fashion, so that we discover a late-arriving local subvolume. Note
+ * that we could end up issuing N lookups to the first subvolume, and
+ * O(N^2) overall, but N is small for AFR so it shouldn't be an issue.
+ */
+ priv->did_discovery = _gf_false;
+
had_heard_from_all = 1;
for (i = 0; i < priv->child_count; i++) {
if (!priv->last_event[i]) {
@@ -3486,9 +3181,20 @@ afr_notify (xlator_t *this, int32_t event,
case GF_EVENT_CHILD_UP:
LOCK (&priv->lock);
{
+ /*
+ * This only really counts if the child was never up
+ * (value = -1) or had been down (value = 0). See
+ * comment at GF_EVENT_CHILD_DOWN for a more detailed
+ * explanation.
+ */
+ if (priv->child_up[idx] != 1) {
+ priv->up_count++;
+ priv->event_generation++;
+ }
priv->child_up[idx] = 1;
- priv->up_count++;
+ call_psh = 1;
+ up_child = idx;
for (i = 0; i < priv->child_count; i++)
if (priv->child_up[i] == 1)
up_children++;
@@ -3498,12 +3204,6 @@ afr_notify (xlator_t *this, int32_t event,
"going online.", ((xlator_t *)data)->name);
} else {
event = GF_EVENT_CHILD_MODIFIED;
- gf_log (this->name, GF_LOG_INFO, "subvol %d came up, "
- "start crawl", idx);
- if (had_heard_from_all) {
- call_psh = 1;
- up_child = idx;
- }
}
priv->last_event[idx] = event;
@@ -3515,8 +3215,23 @@ afr_notify (xlator_t *this, int32_t event,
case GF_EVENT_CHILD_DOWN:
LOCK (&priv->lock);
{
+ /*
+ * If a brick is down when we start, we'll get a
+ * CHILD_DOWN to indicate its initial state. There
+ * was never a CHILD_UP in this case, so if we
+ * increment "down_count" the difference between than
+ * and "up_count" will no longer be the number of
+ * children that are currently up. This has serious
+ * implications e.g. for quorum enforcement, so we
+ * don't increment these values unless the event
+ * represents an actual state transition between "up"
+ * (value = 1) and anything else.
+ */
+ if (priv->child_up[idx] == 1) {
+ priv->down_count++;
+ priv->event_generation++;
+ }
priv->child_up[idx] = 0;
- priv->down_count++;
for (i = 0; i < priv->child_count; i++)
if (priv->child_up[i] == 0)
@@ -3544,10 +3259,15 @@ afr_notify (xlator_t *this, int32_t event,
break;
- case GF_EVENT_TRIGGER_HEAL:
- gf_log (this->name, GF_LOG_INFO, "Self-heal was triggered"
- " manually. Start crawling");
- call_psh = 1;
+ case GF_EVENT_TRANSLATOR_OP:
+ input = data;
+ output = data2;
+ if (!had_heard_from_all) {
+ ret = -1;
+ goto out;
+ }
+ ret = afr_xl_op (this, input, output);
+ goto out;
break;
default:
@@ -3576,8 +3296,7 @@ afr_notify (xlator_t *this, int32_t event,
LOCK (&priv->lock);
{
- up_children = afr_up_children_count (priv->child_up,
- priv->child_count);
+ up_children = AFR_COUNT (priv->child_up, priv->child_count);
for (i = 0; i < priv->child_count; i++) {
if (priv->last_event[i] == GF_EVENT_CHILD_UP) {
event = GF_EVENT_CHILD_UP;
@@ -3592,65 +3311,75 @@ afr_notify (xlator_t *this, int32_t event,
}
}
UNLOCK (&priv->lock);
- if (up_children > 1) {
- gf_log (this->name, GF_LOG_INFO, "All subvolumes came "
- "up, start crawl");
- call_psh = 1;
- }
}
ret = 0;
if (propagate)
ret = default_notify (this, event, data);
- if (call_psh)
- afr_proactive_self_heal (this, up_child);
+ if (call_psh && priv->shd.iamshd) {
+ afr_selfheal_childup (this, up_child);
+ }
out:
return ret;
}
-int
-afr_first_up_child (unsigned char *child_up, size_t child_count)
-{
- int ret = -1;
- int i = 0;
-
- GF_ASSERT (child_up);
-
- for (i = 0; i < child_count; i++) {
- if (child_up[i]) {
- ret = i;
- break;
- }
- }
-
- return ret;
-}
int
-AFR_LOCAL_INIT (afr_local_t *local, afr_private_t *priv)
+afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno)
{
local->op_ret = -1;
local->op_errno = EUCLEAN;
- local->call_count = afr_up_children_count (priv->child_up,
- priv->child_count);
- if (local->call_count == 0) {
- gf_log (THIS->name, GF_LOG_INFO, "no subvolumes up");
- return -ENOTCONN;
- }
+ syncbarrier_init (&local->barrier);
- local->child_up = GF_CALLOC (sizeof (*local->child_up),
- priv->child_count,
+ local->child_up = GF_CALLOC (priv->child_count,
+ sizeof (*local->child_up),
gf_afr_mt_char);
if (!local->child_up) {
- return -ENOMEM;
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
}
memcpy (local->child_up, priv->child_up,
sizeof (*local->child_up) * priv->child_count);
+ local->call_count = AFR_COUNT (local->child_up, priv->child_count);
+ if (local->call_count == 0) {
+ gf_log (THIS->name, GF_LOG_INFO, "no subvolumes up");
+ if (op_errno)
+ *op_errno = ENOTCONN;
+ goto out;
+ }
+ local->event_generation = priv->event_generation;
- return 0;
+ local->read_attempted = GF_CALLOC (priv->child_count, sizeof (char),
+ gf_afr_mt_char);
+ if (!local->read_attempted) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->readable = GF_CALLOC (priv->child_count, sizeof (char),
+ gf_afr_mt_char);
+ if (!local->readable) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->replies = GF_CALLOC(priv->child_count, sizeof(*local->replies),
+ gf_afr_mt_reply_t);
+ if (!local->replies) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ return 0;
+out:
+ return -1;
}
int
@@ -3659,16 +3388,6 @@ afr_internal_lock_init (afr_internal_lock_t *lk, size_t child_count,
{
int ret = -ENOMEM;
- lk->inode_locked_nodes = GF_CALLOC (sizeof (*lk->inode_locked_nodes),
- child_count, gf_afr_mt_char);
- if (NULL == lk->inode_locked_nodes)
- goto out;
-
- lk->entry_locked_nodes = GF_CALLOC (sizeof (*lk->entry_locked_nodes),
- child_count, gf_afr_mt_char);
- if (NULL == lk->entry_locked_nodes)
- goto out;
-
lk->locked_nodes = GF_CALLOC (sizeof (*lk->locked_nodes),
child_count, gf_afr_mt_char);
if (NULL == lk->locked_nodes)
@@ -3688,10 +3407,62 @@ out:
return ret;
}
+void
+afr_matrix_cleanup (int32_t **matrix, unsigned int m)
+{
+ int i = 0;
+
+ if (!matrix)
+ goto out;
+ for (i = 0; i < m; i++) {
+ GF_FREE (matrix[i]);
+ }
+
+ GF_FREE (matrix);
+out:
+ return;
+}
+
+int32_t**
+afr_matrix_create (unsigned int m, unsigned int n)
+{
+ int32_t **matrix = NULL;
+ int i = 0;
+
+ matrix = GF_CALLOC (sizeof (*matrix), m, gf_afr_mt_int32_t);
+ if (!matrix)
+ goto out;
+
+ for (i = 0; i < m; i++) {
+ matrix[i] = GF_CALLOC (sizeof (*matrix[i]), n,
+ gf_afr_mt_int32_t);
+ if (!matrix[i])
+ goto out;
+ }
+ return matrix;
+out:
+ afr_matrix_cleanup (matrix, m);
+ return NULL;
+}
+
+int
+afr_inodelk_init (afr_inodelk_t *lk, char *dom, size_t child_count)
+{
+ int ret = -ENOMEM;
+
+ lk->domain = dom;
+ lk->locked_nodes = GF_CALLOC (sizeof (*lk->locked_nodes),
+ child_count, gf_afr_mt_char);
+ if (NULL == lk->locked_nodes)
+ goto out;
+ ret = 0;
+out:
+ return ret;
+}
+
int
afr_transaction_local_init (afr_local_t *local, xlator_t *this)
{
- int i = 0;
int child_up_count = 0;
int ret = -ENOMEM;
afr_private_t *priv = NULL;
@@ -3702,20 +3473,20 @@ afr_transaction_local_init (afr_local_t *local, xlator_t *this)
if (ret < 0)
goto out;
+ if ((local->transaction.type == AFR_DATA_TRANSACTION) ||
+ (local->transaction.type == AFR_METADATA_TRANSACTION)) {
+ ret = afr_inodelk_init (&local->internal_lock.inodelk[0],
+ this->name, priv->child_count);
+ if (ret < 0)
+ goto out;
+ }
+
ret = -ENOMEM;
- child_up_count = afr_up_children_count (local->child_up,
- priv->child_count);
+ child_up_count = AFR_COUNT (local->child_up, priv->child_count);
if (priv->optimistic_change_log && child_up_count == priv->child_count)
local->optimistic_change_log = 1;
- local->first_up_child = afr_first_up_child (local->child_up,
- priv->child_count);
-
- local->child_errno = GF_CALLOC (sizeof (*local->child_errno),
- priv->child_count,
- gf_afr_mt_int32_t);
- if (!local->child_errno)
- goto out;
+ local->pre_op_compat = priv->pre_op_compat;
local->transaction.eager_lock =
GF_CALLOC (sizeof (*local->transaction.eager_lock),
@@ -3725,199 +3496,130 @@ afr_transaction_local_init (afr_local_t *local, xlator_t *this)
if (!local->transaction.eager_lock)
goto out;
- local->pending = GF_CALLOC (sizeof (*local->pending),
- priv->child_count,
- gf_afr_mt_int32_t);
-
- if (!local->pending)
- goto out;
-
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children)
- goto out;
-
- if (local->fd) {
- local->fd_open_on = GF_CALLOC (sizeof (*local->fd_open_on),
- priv->child_count,
- gf_afr_mt_char);
- if (!local->fd_open_on)
- goto out;
- }
-
local->transaction.pre_op = GF_CALLOC (sizeof (*local->transaction.pre_op),
priv->child_count,
gf_afr_mt_char);
if (!local->transaction.pre_op)
goto out;
- for (i = 0; i < priv->child_count; i++) {
- local->pending[i] = GF_CALLOC (sizeof (*local->pending[i]),
- 3, /* data + metadata + entry */
- gf_afr_mt_int32_t);
- if (!local->pending[i])
- goto out;
- }
+ local->transaction.fop_subvols = GF_CALLOC (sizeof (*local->transaction.fop_subvols),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!local->transaction.fop_subvols)
+ goto out;
- local->transaction.child_errno =
- GF_CALLOC (sizeof (*local->transaction.child_errno),
- priv->child_count,
- gf_afr_mt_int32_t);
- local->transaction.erase_pending = 1;
+ local->transaction.failed_subvols = GF_CALLOC (sizeof (*local->transaction.failed_subvols),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!local->transaction.failed_subvols)
+ goto out;
+
+ local->pending = afr_matrix_create (priv->child_count,
+ AFR_NUM_CHANGE_LOGS);
+ if (!local->pending)
+ goto out;
+
+ INIT_LIST_HEAD (&local->transaction.eager_locked);
ret = 0;
out:
return ret;
}
+
void
-afr_reset_children (int32_t *fresh_children, int32_t child_count)
+afr_set_low_priority (call_frame_t *frame)
{
- unsigned int i = 0;
- for (i = 0; i < child_count; i++)
- fresh_children[i] = -1;
+ frame->root->pid = LOW_PRIO_PROC_PID;
}
-int32_t*
-afr_children_create (int32_t child_count)
+
+gf_boolean_t
+afr_have_quorum (char *logname, afr_private_t *priv)
{
- int32_t *children = NULL;
- int i = 0;
+ unsigned int quorum = 0;
- GF_ASSERT (child_count > 0);
+ GF_VALIDATE_OR_GOTO(logname,priv,out);
- children = GF_CALLOC (child_count, sizeof (*children),
- gf_afr_mt_int32_t);
- if (NULL == children)
- goto out;
- for (i = 0; i < child_count; i++)
- children[i] = -1;
-out:
- return children;
-}
+ quorum = priv->quorum_count;
+ if (quorum != AFR_QUORUM_AUTO) {
+ return (priv->up_count >= (priv->down_count + quorum));
+ }
-void
-afr_children_add_child (int32_t *children, int32_t child,
- int32_t child_count)
-{
- gf_boolean_t child_found = _gf_false;
- int i = 0;
+ quorum = priv->child_count / 2 + 1;
+ if (priv->up_count >= (priv->down_count + quorum)) {
+ return _gf_true;
+ }
- for (i = 0; i < child_count; i++) {
- if (children[i] == -1)
- break;
- if (children[i] == child) {
- child_found = _gf_true;
- break;
+ /*
+ * Special case for even numbers of nodes: if we have exactly half
+ * and that includes the first ("senior-most") node, then that counts
+ * as quorum even if it wouldn't otherwise. This supports e.g. N=2
+ * while preserving the critical property that there can only be one
+ * such group.
+ */
+ if ((priv->child_count % 2) == 0) {
+ quorum = priv->child_count / 2;
+ if (priv->up_count >= (priv->down_count + quorum)) {
+ if (priv->child_up[0]) {
+ return _gf_true;
+ }
}
}
- if (!child_found) {
- GF_ASSERT (i < child_count);
- children[i] = child;
- }
+out:
+ return _gf_false;
}
void
-afr_children_rm_child (int32_t *children, int32_t child, int32_t child_count)
+afr_priv_destroy (afr_private_t *priv)
{
- int i = 0;
+ int i = 0;
- GF_ASSERT ((child >= 0) && (child < child_count));
- for (i = 0; i < child_count; i++) {
- if (children[i] == -1)
- break;
- if (children[i] == child) {
- if (i != (child_count - 1))
- memmove (children + i, children + i + 1,
- sizeof (*children)*(child_count - i - 1));
- children[child_count - 1] = -1;
- break;
- }
- }
+ if (!priv)
+ goto out;
+ inode_unref (priv->root_inode);
+ GF_FREE (priv->last_event);
+ if (priv->pending_key) {
+ for (i = 0; i < priv->child_count; i++)
+ GF_FREE (priv->pending_key[i]);
+ }
+ GF_FREE (priv->pending_key);
+ GF_FREE (priv->children);
+ GF_FREE (priv->child_up);
+ LOCK_DESTROY (&priv->lock);
+
+ GF_FREE (priv);
+out:
+ return;
}
int
-afr_get_children_count (int32_t *children, unsigned int child_count)
+xlator_subvolume_count (xlator_t *this)
{
- int count = 0;
int i = 0;
+ xlator_list_t *list = NULL;
- for (i = 0; i < child_count; i++) {
- if (children[i] == -1)
- break;
- count++;
- }
- return count;
-}
-
-void
-afr_set_low_priority (call_frame_t *frame)
-{
- frame->root->pid = LOW_PRIO_PROC_PID;
+ for (list = this->children; list; list = list->next)
+ i++;
+ return i;
}
-int
-afr_child_fd_ctx_set (xlator_t *this, fd_t *fd, int32_t child,
- int flags, int32_t wbflags)
-{
- int ret = 0;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
-
- GF_ASSERT (fd && fd->inode);
- ret = afr_fd_ctx_set (this, fd);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "could not set fd ctx for fd=%p", fd);
- goto out;
- }
-
- ret = fd_ctx_get (fd, this, &ctx);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "could not get fd ctx for fd=%p", fd);
- goto out;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
- fd_ctx->opened_on[child] = AFR_FD_OPENED;
- if (!IA_ISDIR (fd->inode->ia_type)) {
- fd_ctx->flags = flags;
- fd_ctx->wbflags = wbflags;
- }
- ret = 0;
-out:
- return ret;
-}
-gf_boolean_t
-afr_have_quorum (char *logname, afr_private_t *priv)
+void
+afr_handle_open_fd_count (call_frame_t *frame, xlator_t *this)
{
- unsigned int quorum = 0;
+ afr_local_t *local = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
- GF_VALIDATE_OR_GOTO(logname,priv,out);
+ local = frame->local;
- quorum = priv->child_count / 2 + 1;
- if (priv->up_count >= (priv->down_count + quorum)) {
- return _gf_true;
- }
+ if (!local->fd)
+ return;
- /*
- * Special case for even numbers of nodes: if we have exactly half
- * and that includes the first ("senior-most") node, then that counts
- * as quorum even if it wouldn't otherwise. This supports e.g. N=2
- * while preserving the critical property that there can only be one
- * such group.
- */
- if ((priv->child_count % 2) == 0) {
- quorum = priv->child_count / 2;
- if (priv->up_count >= (priv->down_count + quorum)) {
- if (priv->child_up[0]) {
- return _gf_true;
- }
- }
- }
+ fd_ctx = afr_fd_ctx_get (local->fd, this);
+ if (!fd_ctx)
+ return;
-out:
- return _gf_false;
+ fd_ctx->open_fd_count = local->open_fd_count;
}
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
index f5a7b47a4..fa1da3958 100644
--- a/xlators/cluster/afr/src/afr-dir-read.c
+++ b/xlators/cluster/afr/src/afr-dir-read.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -46,701 +37,384 @@
#include "checksum.h"
#include "afr.h"
-#include "afr-self-heal.h"
-#include "afr-self-heal-common.h"
-
-int
-afr_examine_dir_sh_unwind (call_frame_t *frame, xlator_t *this, int32_t op_ret,
- int32_t op_errno)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- afr_set_opendir_done (this, local->fd->inode);
-
- AFR_STACK_UNWIND (opendir, frame, local->op_ret,
- local->op_errno, local->fd);
-
- return 0;
-}
-
-
-gf_boolean_t
-__checksums_differ (uint32_t *checksum, int child_count,
- unsigned char *child_up)
-{
- int ret = _gf_false;
- int i = 0;
- uint32_t cksum = 0;
- gf_boolean_t activate_check = _gf_false;
-
- for (i = 0; i < child_count; i++) {
- if (!child_up[i])
- continue;
- if (_gf_false == activate_check) {
- cksum = checksum[i];
- activate_check = _gf_true;
- continue;
- }
-
- if (cksum != checksum[i]) {
- ret = _gf_true;
- break;
- }
-
- cksum = checksum[i];
- }
-
- return ret;
-}
-
-
-int32_t
-afr_examine_dir_readdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
- gf_dirent_t * entry = NULL;
- gf_dirent_t * tmp = NULL;
- char *reason = NULL;
- int child_index = 0;
- uint32_t entry_cksum = 0;
- int call_count = 0;
- off_t last_offset = 0;
- inode_t *inode = NULL;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
- inode = local->fd->inode;
-
- child_index = (long) cookie;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_INFO,
- "%s: failed to do opendir on %s",
- local->loc.path, priv->children[child_index]->name);
- local->op_ret = -1;
- local->op_ret = op_errno;
- goto out;
- }
-
- if (op_ret == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: no entries found in %s",
- local->loc.path, priv->children[child_index]->name);
- goto out;
- }
-
- list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- entry_cksum = gf_rsync_weak_checksum (entry->d_name,
- strlen (entry->d_name));
- local->cont.opendir.checksum[child_index] ^= entry_cksum;
- }
-
- list_for_each_entry (entry, &entries->list, list) {
- last_offset = entry->d_off;
- }
-
- /* read more entries */
-
- STACK_WIND_COOKIE (frame, afr_examine_dir_readdir_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->readdir,
- local->fd, 131072, last_offset);
-
- return 0;
-
-out:
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if (__checksums_differ (local->cont.opendir.checksum,
- priv->child_count,
- local->child_up)) {
-
- sh->do_entry_self_heal = _gf_true;
- sh->forced_merge = _gf_true;
-
- reason = "checksums of directory differ";
- afr_launch_self_heal (frame, this, inode, _gf_false,
- inode->ia_type, reason, NULL,
- afr_examine_dir_sh_unwind);
- } else {
- afr_set_opendir_done (this, inode);
-
- AFR_STACK_UNWIND (opendir, frame, local->op_ret,
- local->op_errno, local->fd);
- }
- }
-
- return 0;
-}
-
-
-int
-afr_examine_dir (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- int i = 0;
- int call_count = 0;
-
- local = frame->local;
- priv = this->private;
-
- local->cont.opendir.checksum = GF_CALLOC (priv->child_count,
- sizeof (*local->cont.opendir.checksum),
- gf_afr_mt_int32_t);
-
- call_count = afr_up_children_count (local->child_up, priv->child_count);
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_examine_dir_readdir_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->readdir,
- local->fd, 131072, 0);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
+#include "afr-transaction.h"
int32_t
afr_opendir_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
- afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int32_t up_children_count = 0;
- int ret = -1;
int call_count = -1;
int32_t child_index = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
- priv = this->private;
local = frame->local;
+ fd_ctx = local->fd_ctx;
child_index = (long) cookie;
- up_children_count = afr_up_children_count (local->child_up,
- priv->child_count);
-
LOCK (&frame->lock);
{
- if (op_ret >= 0) {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED;
+ } else {
local->op_ret = op_ret;
- ret = afr_child_fd_ctx_set (this, fd, child_index,
- 0, 0);
- if (ret) {
- local->op_ret = -1;
- local->op_errno = -ret;
- goto unlock;
- }
+ fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref (xdata);
}
-
- local->op_errno = op_errno;
}
-unlock:
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
- if (call_count == 0) {
- if (local->op_ret != 0)
- goto out;
-
- if (!afr_is_opendir_done (this, local->fd->inode) &&
- up_children_count > 1) {
-
- /*
- * This is the first opendir on this inode. We need
- * to check if the directory's entries are the same
- * on all subvolumes. This is needed in addition
- * to regular entry self-heal because the readdir
- * call is sent only to the first subvolume, and
- * thus files that exist only there will never be healed
- * otherwise (assuming changelog shows no anomalies).
- */
-
- gf_log (this->name, GF_LOG_TRACE,
- "reading contents of directory %s looking for mismatch",
- local->loc.path);
-
- afr_examine_dir (frame, this);
-
- } else {
- /* do the unwind */
- goto out;
- }
- }
-
- return 0;
-
-out:
- AFR_STACK_UNWIND (opendir, frame, local->op_ret,
- local->op_errno, local->fd);
-
+ if (call_count == 0)
+ AFR_STACK_UNWIND (opendir, frame, local->op_ret,
+ local->op_errno, local->fd, NULL);
return 0;
}
-int32_t
-afr_opendir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, fd_t *fd)
+int
+afr_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
- int child_count = 0;
int i = 0;
- int ret = -1;
int call_count = -1;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ int32_t op_errno = ENOMEM;
+ afr_fd_ctx_t *fd_ctx = NULL;
priv = this->private;
- child_count = priv->child_count;
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
- ALLOC_OR_GOTO (local, afr_local_t, out);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx)
+ goto out;
loc_copy (&local->loc, loc);
- frame->local = local;
local->fd = fd_ref (fd);
+ local->fd_ctx = fd_ctx;
call_count = local->call_count;
- for (i = 0; i < child_count; i++) {
+ for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND_COOKIE (frame, afr_opendir_cbk,
(void*) (long) i,
priv->children[i],
priv->children[i]->fops->opendir,
- loc, fd);
+ loc, fd, NULL);
if (!--call_count)
break;
}
}
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (opendir, frame, op_ret, op_errno, fd);
- }
-
+ AFR_STACK_UNWIND (opendir, frame, -1, op_errno, fd, NULL);
return 0;
}
-/**
- * Common algorithm for directory read calls:
- *
- * - Try the fop on the first child that is up
- * - if we have failed due to ENOTCONN:
- * try the next child
- *
- * Applicable to: readdir
- */
-
+#define BACKEND_D_OFF_BITS 63
+#define PRESENT_D_OFF_BITS 63
-struct entry_name {
- char *name;
- struct list_head list;
-};
+#define ONE 1ULL
+#define MASK (~0ULL)
+#define PRESENT_MASK (MASK >> (64 - PRESENT_D_OFF_BITS))
+#define BACKEND_MASK (MASK >> (64 - BACKEND_D_OFF_BITS))
+#define TOP_BIT (ONE << (PRESENT_D_OFF_BITS - 1))
+#define SHIFT_BITS (max (0, (BACKEND_D_OFF_BITS - PRESENT_D_OFF_BITS + 1)))
-static gf_boolean_t
-remembered_name (const char *name, struct list_head *entries)
+static uint64_t
+afr_bits_for (uint64_t num)
{
- struct entry_name *e = NULL;
- gf_boolean_t ret = _gf_false;
+ uint64_t bits = 0, ctrl = 1;
- list_for_each_entry (e, entries, list) {
- if (!strcmp (name, e->name)) {
- ret = _gf_true;
- goto out;
- }
- }
+ while (ctrl < num) {
+ ctrl *= 2;
+ bits ++;
+ }
-out:
- return ret;
+ return bits;
}
-
-static void
-afr_remember_entries (gf_dirent_t *entries, fd_t *fd)
+int
+afr_itransform (xlator_t *this, int subvol, uint64_t x, uint64_t *y_p)
{
- struct entry_name *n = NULL;
- gf_dirent_t *entry = NULL;
- int ret = 0;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
-
- ret = fd_ctx_get (fd, THIS, &ctx);
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_INFO,
- "could not get fd ctx for fd=%p", fd);
- return;
+ afr_private_t *conf = NULL;
+ int cnt = 0;
+ int max = 0;
+ uint64_t y = 0;
+ uint64_t hi_mask = 0;
+ uint64_t off_mask = 0;
+ int max_bits = 0;
+
+ if (x == ((uint64_t) -1)) {
+ y = (uint64_t) -1;
+ goto out;
}
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- list_for_each_entry (entry, &entries->list, list) {
- n = GF_CALLOC (1, sizeof (*n), gf_afr_mt_entry_name);
- n->name = gf_strdup (entry->d_name);
- INIT_LIST_HEAD (&n->list);
-
- list_add (&n->list, &fd_ctx->entries);
- }
-}
+ conf = this->private;
+ if (!conf)
+ goto out;
+ max = conf->child_count;
+ cnt = subvol;
-static off_t
-afr_filter_entries (gf_dirent_t *entries, fd_t *fd)
-{
- gf_dirent_t *entry = NULL;
- gf_dirent_t *tmp = NULL;
- int ret = 0;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- off_t offset = 0;
-
- ret = fd_ctx_get (fd, THIS, &ctx);
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_INFO,
- "could not get fd ctx for fd=%p", fd);
- return -1;
- }
+ if (max == 1) {
+ y = x;
+ goto out;
+ }
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ max_bits = afr_bits_for (max);
- list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- offset = entry->d_off;
+ hi_mask = ~(PRESENT_MASK >> (max_bits + 1));
- if (remembered_name (entry->d_name, &fd_ctx->entries)) {
- list_del (&entry->list);
- GF_FREE (entry);
- }
+ if (x & hi_mask) {
+ /* HUGE d_off */
+ off_mask = MASK << max_bits;
+ y = TOP_BIT | ((x >> SHIFT_BITS) & off_mask) | cnt;
+ } else {
+ /* small d_off */
+ y = ((x * max) + cnt);
}
- return offset;
+out:
+ if (y_p)
+ *y_p = y;
+
+ return 0;
}
-static void
-afr_forget_entries (fd_t *fd)
+int
+afr_deitransform (xlator_t *this, uint64_t y, int *subvol_p,
+ uint64_t *x_p)
{
- struct entry_name *entry = NULL;
- struct entry_name *tmp = NULL;
- int ret = 0;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
-
- ret = fd_ctx_get (fd, THIS, &ctx);
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_INFO,
- "could not get fd ctx for fd=%p", fd);
- return;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- list_for_each_entry_safe (entry, tmp, &fd_ctx->entries, list) {
- GF_FREE (entry->name);
- list_del (&entry->list);
- GF_FREE (entry);
- }
-}
+ afr_private_t *conf = NULL;
+ int cnt = 0;
+ int max = 0;
+ uint64_t x = 0;
+ int subvol = 0;
+ int max_bits = 0;
+ uint64_t off_mask = 0;
+ uint64_t host_mask = 0;
+
+ if (!this->private)
+ return -1;
+ conf = this->private;
+ max = conf->child_count;
-int32_t
-afr_readdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries)
-{
- afr_local_t * local = NULL;
- gf_dirent_t * entry = NULL;
- gf_dirent_t * tmp = NULL;
+ if (max == 1) {
+ x = y;
+ cnt = 0;
+ goto out;
+ }
- local = frame->local;
+ if (y & TOP_BIT) {
+ /* HUGE d_off */
+ max_bits = afr_bits_for (max);
+ off_mask = (MASK << max_bits);
+ host_mask = ~(off_mask);
- if (op_ret == -1)
- goto out;
+ x = ((y & ~TOP_BIT) & off_mask) << SHIFT_BITS;
- list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- if ((local->fd->inode == local->fd->inode->table->root)
- && !strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR)) {
- list_del_init (&entry->list);
- GF_FREE (entry);
- }
+ cnt = y & host_mask;
+ } else {
+ /* small d_off */
+ cnt = y % max;
+ x = y / max;
}
out:
- AFR_STACK_UNWIND (readdir, frame, op_ret, op_errno, entries);
+ subvol = cnt;
+
+ if (subvol_p)
+ *subvol_p = subvol;
+
+ if (x_p)
+ *x_p = x;
return 0;
}
-int32_t
-afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)
+static void
+afr_readdir_transform_entries (gf_dirent_t *subvol_entries, int subvol,
+ gf_dirent_t *entries, fd_t *fd)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int32_t next_call_child = -1;
- int ret = 0;
- gf_dirent_t * entry = NULL;
- gf_dirent_t * tmp = NULL;
- int32_t *last_index = NULL;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- off_t offset = 0;
- int32_t call_child = -1;
-
- priv = this->private;
- children = priv->children;
-
- local = frame->local;
-
- read_child = (long) cookie;
- last_index = &local->cont.readdir.last_index;
- fresh_children = local->fresh_children;
-
- /* the value of the last_index changes if afr_next_call_child is
- * called. So to find the call_child of this callback use last_index
- * before the next_call_child call.
- */
- if (*last_index == -1)
- call_child = read_child;
- else
- call_child = fresh_children[*last_index];
-
- if (priv->strict_readdir) {
- ret = fd_ctx_get (local->fd, this, &ctx);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "could not get fd ctx for fd=%p", local->fd);
- op_ret = -1;
- op_errno = -ret;
- goto out;
+ afr_private_t *priv = NULL;
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
+ unsigned char *data_readable = NULL;
+ unsigned char *metadata_readable = NULL;
+ int gen = 0;
+
+ priv = THIS->private;
+
+ data_readable = alloca0 (priv->child_count);
+ metadata_readable = alloca0 (priv->child_count);
+
+ list_for_each_entry_safe (entry, tmp, &subvol_entries->list, list) {
+ if (__is_root_gfid (fd->inode->gfid) &&
+ !strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR)) {
+ continue;
}
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ list_del_init (&entry->list);
+ afr_itransform (THIS, subvol, entry->d_off, &entry->d_off);
+ list_add_tail (&entry->list, &entries->list);
- if (op_ret == -1) {
- next_call_child = afr_next_call_child (fresh_children,
- local->child_up,
- priv->child_count,
- last_index,
- read_child);
- if (next_call_child < 0)
- goto out;
- gf_log (this->name, GF_LOG_TRACE,
- "starting readdir afresh on child %d, offset %"PRId64,
- next_call_child, (uint64_t) 0);
-
- fd_ctx->failed_over = _gf_true;
-
- STACK_WIND_COOKIE (frame, afr_readdirp_cbk,
- (void *) (long) read_child,
- children[next_call_child],
- children[next_call_child]->fops->readdirp,
- local->fd,
- local->cont.readdir.size, 0);
- return 0;
- }
- }
+ if (entry->inode) {
+ gen = 0;
+ afr_inode_read_subvol_get (entry->inode, THIS,
+ data_readable,
+ metadata_readable, &gen);
- if (op_ret != -1) {
- list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- if ((local->fd->inode == local->fd->inode->table->root)
- && !strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR)) {
- list_del_init (&entry->list);
- GF_FREE (entry);
- }
- }
- }
+ if (gen != priv->event_generation ||
+ !data_readable[subvol] ||
+ !metadata_readable[subvol]) {
- if (priv->strict_readdir) {
- if (fd_ctx->failed_over) {
- if (list_empty (&entries->list)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no entries found");
- goto out;
- }
-
- offset = afr_filter_entries (entries, local->fd);
-
- afr_remember_entries (entries, local->fd);
-
- if (list_empty (&entries->list)) {
- /* All the entries we got were duplicate. We
- shouldn't send an empty list now, because
- that will make the application stop reading. So
- try to get more entries */
-
- gf_log (this->name, GF_LOG_TRACE,
- "trying to fetch non-duplicate entries "
- "from offset %"PRId64", child %s",
- offset, children[call_child]->name);
-
- STACK_WIND_COOKIE (frame, afr_readdirp_cbk,
- (void *) (long) read_child,
- children[call_child],
- children[call_child]->fops->readdirp,
- local->fd, local->cont.readdir.size, offset);
- return 0;
- }
- } else {
- afr_remember_entries (entries, local->fd);
- }
+ inode_unref (entry->inode);
+ entry->inode = NULL;
+ }
+ }
}
-
-out:
- AFR_STACK_UNWIND (readdirp, frame, op_ret, op_errno, entries);
-
- return 0;
}
+
int32_t
-afr_do_readdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset, int whichop)
+afr_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *subvol_entries,
+ dict_t *xdata)
{
- afr_private_t * priv = NULL;
- xlator_t ** children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- int ret = -1;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- uint64_t read_child = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
- children = priv->children;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
- frame->local = local;
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ afr_local_t *local = NULL;
+ gf_dirent_t entries;
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children) {
- op_errno = ENOMEM;
- goto out;
- }
+ INIT_LIST_HEAD (&entries.list);
- read_child = afr_inode_get_read_ctx (this, fd->inode,
- local->fresh_children);
- op_ret = afr_get_call_child (this, local->child_up, read_child,
- local->fresh_children,
- &call_child,
- &local->cont.readdir.last_index);
- if (op_ret < 0) {
- op_errno = -op_ret;
- op_ret = -1;
- goto out;
- }
+ local = frame->local;
- local->fd = fd_ref (fd);
- local->cont.readdir.size = size;
+ if (op_ret < 0 && !local->cont.readdir.offset) {
+ /* failover only if this was first readdir, detected
+ by offset == 0 */
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
- if (priv->strict_readdir) {
- ret = fd_ctx_get (fd, this, &ctx);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "could not get fd ctx for fd=%p", fd);
- op_errno = -ret;
- goto out;
- }
+ afr_read_txn_continue (frame, this, (long) cookie);
+ return 0;
+ }
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ if (op_ret >= 0)
+ afr_readdir_transform_entries (subvol_entries, (long) cookie,
+ &entries, local->fd);
- if (fd_ctx->last_tried != call_child) {
- gf_log (this->name, GF_LOG_TRACE,
- "first up child has changed from %d to %d, "
- "restarting readdir from offset 0",
- fd_ctx->last_tried, call_child);
+ AFR_STACK_UNWIND (readdir, frame, op_ret, op_errno, &entries, xdata);
- fd_ctx->failed_over = _gf_true;
- offset = 0;
- }
+ return 0;
+}
- fd_ctx->last_tried = call_child;
- }
- if (whichop == GF_FOP_READDIR)
+int
+afr_readdir_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (subvol == -1) {
+ AFR_STACK_UNWIND (readdir, frame, local->op_ret,
+ local->op_errno, 0, 0);
+ return 0;
+ }
+
+ if (local->op == GF_FOP_READDIR)
STACK_WIND_COOKIE (frame, afr_readdir_cbk,
- (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->readdir, fd,
- size, offset);
+ (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->readdir,
+ local->fd, local->cont.readdir.size,
+ local->cont.readdir.offset,
+ local->xdata_req);
else
- STACK_WIND_COOKIE (frame, afr_readdirp_cbk,
- (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->readdirp, fd,
- size, offset);
+ STACK_WIND_COOKIE (frame, afr_readdir_cbk,
+ (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->readdirp,
+ local->fd, local->cont.readdir.size,
+ local->cont.readdir.offset,
+ local->xdata_req);
+ return 0;
+}
- op_ret = 0;
+
+int
+afr_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, int whichop, dict_t *dict)
+{
+ afr_local_t *local = NULL;
+ int32_t op_errno = 0;
+ int subvol = -1;
+
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->op = whichop;
+ local->fd = fd_ref (fd);
+ local->cont.readdir.size = size;
+ local->cont.readdir.offset = offset;
+ local->xdata_req = (dict)? dict_ref (dict) : NULL;
+
+ if (offset == 0) {
+ /* First readdir has option of failing over and selecting
+ an appropriate read subvolume */
+ afr_read_txn (frame, this, fd->inode, afr_readdir_wind,
+ AFR_DATA_TRANSACTION);
+ } else {
+ /* But continued readdirs MUST stick to the same subvolume
+ without an option to failover */
+ afr_deitransform (this, offset, &subvol,
+ (uint64_t *)&local->cont.readdir.offset);
+ afr_readdir_wind (frame, this, subvol);
+ }
+
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (readdir, frame, op_ret, op_errno, NULL);
- }
+ AFR_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL);
return 0;
}
int32_t
afr_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
+ off_t offset, dict_t *xdata)
{
- afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIR);
+ afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIR, xdata);
+
return 0;
}
int32_t
afr_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
+ off_t offset, dict_t *dict)
{
- afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIRP);
+ afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIRP, dict);
+
return 0;
}
@@ -748,7 +422,6 @@ afr_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
int32_t
afr_releasedir (xlator_t *this, fd_t *fd)
{
- afr_forget_entries (fd);
afr_cleanup_fd_ctx (this, fd);
return 0;
diff --git a/xlators/cluster/afr/src/afr-dir-read.h b/xlators/cluster/afr/src/afr-dir-read.h
index 6a6bc6354..09456d159 100644
--- a/xlators/cluster/afr/src/afr-dir-read.h
+++ b/xlators/cluster/afr/src/afr-dir-read.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __DIR_READ_H__
@@ -23,23 +14,23 @@
int32_t
afr_opendir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, fd_t *fd);
+ loc_t *loc, fd_t *fd, dict_t *xdata);
int32_t
afr_releasedir (xlator_t *this, fd_t *fd);
int32_t
afr_readdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset);
+ fd_t *fd, size_t size, off_t offset, dict_t *xdata);
int32_t
afr_readdirp (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset);
+ fd_t *fd, size_t size, off_t offset, dict_t *dict);
int32_t
afr_checksum (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags);
+ loc_t *loc, int32_t flags, dict_t *xdata);
#endif /* __DIR_READ_H__ */
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
index 11df550d5..465dde54f 100644
--- a/xlators/cluster/afr/src/afr-dir-write.c
+++ b/xlators/cluster/afr/src/afr-dir-write.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -43,70 +34,218 @@
#include "common-utils.h"
#include "compat-errno.h"
#include "compat.h"
+#include "byte-order.h"
#include "afr.h"
#include "afr-transaction.h"
void
-afr_build_parent_loc (loc_t *parent, loc_t *child)
+afr_mark_entry_pending_changelog (call_frame_t *frame, xlator_t *this);
+
+int
+afr_build_parent_loc (loc_t *parent, loc_t *child, int32_t *op_errno)
{
- char *tmp = NULL;
+ int ret = -1;
+ char *child_path = NULL;
if (!child->parent) {
- //this should never be called with root as the child
- GF_ASSERT (0);
- loc_copy (parent, child);
- return;
+ if (op_errno)
+ *op_errno = EINVAL;
+ goto out;
}
- tmp = gf_strdup (child->path);
- parent->path = gf_strdup (dirname (tmp));
- GF_FREE (tmp);
+ child_path = gf_strdup (child->path);
+ if (!child_path) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
- parent->name = strrchr (parent->path, '/');
- if (parent->name)
- parent->name++;
+ parent->path = gf_strdup (dirname (child_path));
+ if (!parent->path) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
- parent->inode = inode_ref (child->parent);
- parent->parent = inode_parent (parent->inode, 0, NULL);
+ parent->inode = inode_ref (child->parent);
+ uuid_copy (parent->gfid, child->pargfid);
- if (!uuid_is_null (child->pargfid))
- uuid_copy (parent->gfid, child->pargfid);
+ ret = 0;
+out:
+ GF_FREE (child_path);
+
+ return ret;
}
-/* {{{ create */
-int
-afr_create_unwind (call_frame_t *frame, xlator_t *this)
+static void
+__afr_dir_write_finalize (call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int inode_read_subvol = -1;
+ int parent_read_subvol = -1;
+ int parent2_read_subvol = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (local->inode) {
+ afr_replies_interpret (frame, this, local->inode);
+ inode_read_subvol = afr_data_subvol_get (local->inode, this,
+ NULL, NULL);
+ }
+ if (local->parent)
+ parent_read_subvol = afr_data_subvol_get (local->parent, this,
+ NULL, NULL);
+ if (local->parent2)
+ parent2_read_subvol = afr_data_subvol_get (local->parent2, this,
+ NULL, NULL);
+
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno (local, priv);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret < 0) {
+ if (local->inode)
+ afr_inode_read_subvol_reset (local->inode,
+ this);
+ if (local->parent)
+ afr_inode_read_subvol_reset (local->parent,
+ this);
+ if (local->parent2)
+ afr_inode_read_subvol_reset (local->parent2,
+ this);
+ continue;
+ }
+
+ if (local->op_ret == -1) {
+ local->op_ret = local->replies[i].op_ret;
+ local->op_errno = local->replies[i].op_errno;
+
+ local->cont.dir_fop.buf =
+ local->replies[i].poststat;
+ local->cont.dir_fop.preparent =
+ local->replies[i].preparent;
+ local->cont.dir_fop.postparent =
+ local->replies[i].postparent;
+ local->cont.dir_fop.prenewparent =
+ local->replies[i].preparent2;
+ local->cont.dir_fop.postnewparent =
+ local->replies[i].postparent2;
+ if (local->replies[i].xdata)
+ local->xdata_rsp =
+ dict_ref (local->replies[i].xdata);
+ continue;
+ }
+
+ if (i == inode_read_subvol) {
+ local->cont.dir_fop.buf =
+ local->replies[i].poststat;
+ if (local->replies[i].xdata) {
+ if (local->xdata_rsp)
+ dict_unref (local->xdata_rsp);
+ local->xdata_rsp =
+ dict_ref (local->replies[i].xdata);
+ }
+ }
+
+ if (i == parent_read_subvol) {
+ local->cont.dir_fop.preparent =
+ local->replies[i].preparent;
+ local->cont.dir_fop.postparent =
+ local->replies[i].postparent;
+ }
+
+ if (i == parent2_read_subvol) {
+ local->cont.dir_fop.prenewparent =
+ local->replies[i].preparent2;
+ local->cont.dir_fop.postnewparent =
+ local->replies[i].postparent2;
+ }
+ }
+}
+
+
+static void
+__afr_dir_write_fill (call_frame_t *frame, xlator_t *this, int child_index,
+ int op_ret, int op_errno, struct iatt *poststat,
+ struct iatt *preparent, struct iatt *postparent,
+ struct iatt *preparent2, struct iatt *postparent2,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
local = frame->local;
+ fd_ctx = local->fd_ctx;
+
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+
+ if (op_ret >= 0) {
+ if (poststat)
+ local->replies[child_index].poststat = *poststat;
+ if (preparent)
+ local->replies[child_index].preparent = *preparent;
+ if (postparent)
+ local->replies[child_index].postparent = *postparent;
+ if (preparent2)
+ local->replies[child_index].preparent2 = *preparent2;
+ if (postparent2)
+ local->replies[child_index].postparent2 = *postparent2;
+ if (xdata)
+ local->replies[child_index].xdata = dict_ref (xdata);
+
+ if (fd_ctx)
+ fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
+ } else {
+ if (op_errno != ENOTEMPTY)
+ afr_transaction_fop_failed (frame, this, child_index);
+ if (fd_ctx)
+ fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED;
+ }
+
+ return;
+}
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- if (local->cont.create.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.create.read_child_buf;
- } else {
- unwind_buf = &local->cont.create.buf;
- }
-
- AFR_STACK_UNWIND (create, main_frame,
- local->op_ret, local->op_errno,
- local->cont.create.fd,
- local->cont.create.inode,
- unwind_buf, &local->cont.create.preparent,
- &local->cont.create.postparent);
+
+static int
+__afr_dir_write_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ struct iatt *preparent2, struct iatt *postparent2,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int child_index = (long) cookie;
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ __afr_dir_write_fill (frame, this, child_index, op_ret,
+ op_errno, buf, preparent, postparent,
+ preparent2, postparent2, xdata);
+ }
+ UNLOCK (&frame->lock);
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ __afr_dir_write_finalize (frame, this);
+
+ if (afr_txn_nothing_failed (frame, this))
+ local->transaction.unwind (frame, this);
+
+ afr_mark_entry_pending_changelog (frame, this);
+
+ local->transaction.resume (frame, this);
}
return 0;
@@ -114,223 +253,269 @@ afr_create_unwind (call_frame_t *frame, xlator_t *this)
int
-afr_create_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- fd_t *fd, inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+afr_mark_new_entry_changelog_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ dict_t *xattr, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- int ret = 0;
- int call_count = -1;
- int child_index = -1;
- int32_t *fresh_children = NULL;
+ int call_count = 0;
- local = frame->local;
- priv = this->private;
+ call_count = afr_frame_return (frame);
- child_index = (long) cookie;
+ if (call_count == 0)
+ AFR_STACK_DESTROY (frame);
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
+ return 0;
+}
- if (op_ret != -1) {
- local->op_ret = op_ret;
- ret = afr_fd_ctx_set (this, fd);
+void
+afr_mark_new_entry_changelog (call_frame_t *frame, xlator_t *this)
+{
+ call_frame_t *new_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_local_t *new_local = NULL;
+ afr_private_t *priv = NULL;
+ dict_t *xattr = NULL;
+ int32_t **changelog = NULL;
+ int i = 0;
+ int idx = 0;
+ int op_errno = ENOMEM;
+ unsigned char *pending = NULL;
+ int call_count = 0;
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "could not set ctx on fd=%p", fd);
+ local = frame->local;
+ priv = this->private;
- local->op_ret = -1;
- local->op_errno = -ret;
- goto unlock;
- }
+ new_frame = copy_frame (frame);
+ if (!new_frame)
+ goto out;
- ret = fd_ctx_get (fd, this, &ctx);
+ new_local = AFR_FRAME_INIT (new_frame, op_errno);
+ if (!new_local)
+ goto out;
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "could not get fd ctx for fd=%p", fd);
- local->op_ret = -1;
- local->op_errno = -ret;
- goto unlock;
- }
+ changelog = afr_matrix_create (priv->child_count, AFR_NUM_CHANGE_LOGS);
+ if (!changelog)
+ goto out;
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ xattr = dict_new ();
+ if (!xattr)
+ goto out;
- fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
- fd_ctx->flags = local->cont.create.flags;
+ idx = afr_index_for_transaction_type (AFR_DATA_TRANSACTION);
- if (local->success_count == 0)
- local->cont.create.buf = *buf;
+ pending = alloca0 (priv->child_count);
- if (child_index == local->read_child_index) {
- local->cont.create.read_child_buf = *buf;
- local->cont.create.preparent = *preparent;
- local->cont.create.postparent = *postparent;
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i] &&
+ !local->transaction.failed_subvols[i]) {
+ call_count ++;
+ continue;
+ }
- local->cont.create.inode = inode;
+ changelog[i][idx] = hton32(1);
+ pending[i] = 1;
+ }
- fresh_children = local->fresh_children;
- fresh_children[local->success_count] = child_index;
- local->success_count++;
- }
+ new_local->pending = changelog;
+ uuid_copy (new_local->loc.gfid, local->cont.dir_fop.buf.ia_gfid);
+ new_local->loc.inode = inode_ref (local->inode);
- local->op_errno = op_errno;
- }
-unlock:
- UNLOCK (&frame->lock);
+ afr_set_pending_dict (priv, xattr, changelog);
- call_count = afr_frame_return (frame);
+ new_local->call_count = call_count;
- if (call_count == 0) {
- afr_set_read_ctx_from_policy (this, inode,
- local->fresh_children,
- local->read_child_index,
- priv->read_child);
- local->transaction.unwind (frame, this);
+ for (i = 0; i < priv->child_count; i++) {
+ if (pending[i])
+ continue;
- local->transaction.resume (frame, this);
+ STACK_WIND_COOKIE (new_frame, afr_mark_new_entry_changelog_cbk,
+ (void *) (long) i, priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &new_local->loc, GF_XATTROP_ADD_ARRAY,
+ xattr, NULL);
+ if (!--call_count)
+ break;
}
- return 0;
+ new_frame = NULL;
+out:
+ if (new_frame)
+ AFR_STACK_DESTROY (new_frame);
+ if (xattr)
+ dict_unref (xattr);
+ return;
}
-int
-afr_create_wind (call_frame_t *frame, xlator_t *this)
+void
+afr_mark_entry_pending_changelog (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
+ int pre_op_count = 0;
+ int failed_count = 0;
local = frame->local;
- priv = this->private;
+ priv = this->private;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
+ if (local->op_ret < 0)
+ return;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+ if (local->op != GF_FOP_CREATE && local->op != GF_FOP_MKNOD)
+ return;
- local->call_count = call_count;
+ pre_op_count = AFR_COUNT (local->transaction.pre_op, priv->child_count);
+ failed_count = AFR_COUNT (local->transaction.failed_subvols,
+ priv->child_count);
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_create_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->create,
- &local->loc,
- local->cont.create.flags,
- local->cont.create.mode,
- local->cont.create.fd,
- local->cont.create.params);
- if (!--call_count)
- break;
- }
- }
+ if (pre_op_count == priv->child_count && !failed_count)
+ return;
- return 0;
+ afr_mark_new_entry_changelog (frame, this);
+
+ return;
}
+/* {{{ create */
+
int
-afr_create_done (call_frame_t *frame, xlator_t *this)
+afr_create_unwind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
local = frame->local;
- local->transaction.unwind (frame, this);
+ main_frame = afr_transaction_detach_fop_frame (frame);
- AFR_STACK_DESTROY (frame);
+ if (!main_frame)
+ return 0;
+ AFR_STACK_UNWIND (create, main_frame, local->op_ret, local->op_errno,
+ local->cont.create.fd, local->inode,
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
return 0;
}
int
-afr_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode,
- fd_t *fd, dict_t *params)
+afr_create_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ fd_t *fd, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
+ return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
+}
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+int
+afr_create_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
priv = this->private;
- QUORUM_CHECK(create,out);
+ STACK_WIND_COOKIE (frame, afr_create_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->create,
+ &local->loc, local->cont.create.flags,
+ local->cont.create.mode, local->umask,
+ local->cont.create.fd, local->xdata_req);
+ return 0;
+}
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- goto out;
- }
- ALLOC_OR_GOTO (local, afr_local_t, out);
+int
+afr_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ priv = this->private;
+
+ QUORUM_CHECK(create,out);
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
goto out;
- }
- transaction_frame->local = local;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
loc_copy (&local->loc, loc);
- LOCK (&priv->read_child_lock);
- {
- local->read_child_index = (++priv->read_child_rr)
- % (priv->child_count);
- }
- UNLOCK (&priv->read_child_lock);
+ local->fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!local->fd_ctx)
+ goto out;
+
+ local->inode = inode_ref (loc->inode);
+ local->parent = inode_ref (loc->parent);
+ local->op = GF_FOP_CREATE;
local->cont.create.flags = flags;
local->cont.create.mode = mode;
local->cont.create.fd = fd_ref (fd);
- if (params)
- local->cont.create.params = dict_ref (params);
+ local->umask = umask;
+
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- local->transaction.fop = afr_create_wind;
- local->transaction.done = afr_create_done;
+ if (!local->xdata_req)
+ goto out;
+
+ local->transaction.wind = afr_create_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
local->transaction.unwind = afr_create_unwind;
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (create, frame, op_ret, op_errno,
- NULL, NULL, NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL, NULL);
return 0;
}
@@ -343,33 +528,17 @@ afr_mknod_unwind (call_frame_t *frame, xlator_t *this)
{
call_frame_t *main_frame = NULL;
afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- if (local->cont.mknod.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.mknod.read_child_buf;
- } else {
- unwind_buf = &local->cont.mknod.buf;
- }
-
- AFR_STACK_UNWIND (mknod, main_frame,
- local->op_ret, local->op_errno,
- local->cont.mknod.inode,
- unwind_buf, &local->cont.mknod.preparent,
- &local->cont.mknod.postparent);
- }
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
+ AFR_STACK_UNWIND (mknod, main_frame, local->op_ret, local->op_errno,
+ local->inode, &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
return 0;
}
@@ -378,183 +547,107 @@ int
afr_mknod_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int child_index = -1;
- int32_t *fresh_children = NULL;
-
- local = frame->local;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0)
- local->cont.mknod.buf = *buf;
-
- if (child_index == local->read_child_index) {
- local->cont.mknod.read_child_buf = *buf;
- local->cont.mknod.preparent = *preparent;
- local->cont.mknod.postparent = *postparent;
- }
-
- local->cont.mknod.inode = inode;
-
- fresh_children = local->fresh_children;
- fresh_children[local->success_count] = child_index;
- local->success_count++;
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- afr_set_read_ctx_from_policy (this, inode,
- local->fresh_children,
- local->read_child_index,
- priv->read_child);
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
}
-int32_t
-afr_mknod_wind (call_frame_t *frame, xlator_t *this)
+int
+afr_mknod_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
local = frame->local;
priv = this->private;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_mknod_wind_cbk, (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->mknod,
- &local->loc, local->cont.mknod.mode,
- local->cont.mknod.dev,
- local->cont.mknod.params);
- if (!--call_count)
- break;
- }
- }
-
+ STACK_WIND_COOKIE (frame, afr_mknod_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->mknod,
+ &local->loc, local->cont.mknod.mode,
+ local->cont.mknod.dev, local->umask,
+ local->xdata_req);
return 0;
}
-
int
-afr_mknod_done (call_frame_t *frame, xlator_t *this)
+afr_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t dev, mode_t umask, dict_t *xdata)
{
- afr_local_t * local = NULL;
-
- local = frame->local;
-
- local->transaction.unwind (frame, this);
- AFR_STACK_DESTROY (frame);
-
- return 0;
-}
-
-
-int
-afr_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t dev, dict_t *params)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
priv = this->private;
- QUORUM_CHECK(mknod,out);
+ QUORUM_CHECK(mknod,out);
transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- goto out;
- }
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ if (!transaction_frame)
goto out;
- }
- transaction_frame->local = local;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
loc_copy (&local->loc, loc);
+ local->inode = inode_ref (loc->inode);
+ local->parent = inode_ref (loc->parent);
- LOCK (&priv->read_child_lock);
- {
- local->read_child_index = (++priv->read_child_rr)
- % (priv->child_count);
- }
- UNLOCK (&priv->read_child_lock);
-
+ local->op = GF_FOP_MKNOD;
local->cont.mknod.mode = mode;
local->cont.mknod.dev = dev;
- if (params)
- local->cont.mknod.params = dict_ref (params);
+ local->umask = umask;
+
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- local->transaction.fop = afr_mknod_wind;
- local->transaction.done = afr_mknod_done;
+ if (!local->xdata_req)
+ goto out;
+
+ local->transaction.wind = afr_mknod_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
local->transaction.unwind = afr_mknod_unwind;
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (mknod, frame, op_ret, op_errno,
- NULL, NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL);
return 0;
}
@@ -568,33 +661,17 @@ afr_mkdir_unwind (call_frame_t *frame, xlator_t *this)
{
call_frame_t *main_frame = NULL;
afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- if (local->cont.mkdir.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.mkdir.read_child_buf;
- } else {
- unwind_buf = &local->cont.mkdir.buf;
- }
-
- AFR_STACK_UNWIND (mkdir, main_frame,
- local->op_ret, local->op_errno,
- local->cont.mkdir.inode,
- unwind_buf, &local->cont.mkdir.preparent,
- &local->cont.mkdir.postparent);
- }
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
+ AFR_STACK_UNWIND (mkdir, main_frame, local->op_ret, local->op_errno,
+ local->inode, &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
return 0;
}
@@ -603,184 +680,106 @@ int
afr_mkdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int child_index = -1;
- int32_t *fresh_children = NULL;
-
- local = frame->local;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0)
- local->cont.mkdir.buf = *buf;
-
- if (child_index == local->read_child_index) {
- local->cont.mkdir.read_child_buf = *buf;
- local->cont.mkdir.preparent = *preparent;
- local->cont.mkdir.postparent = *postparent;
- }
-
- local->cont.mkdir.inode = inode;
-
- fresh_children = local->fresh_children;
- fresh_children[local->success_count] = child_index;
- local->success_count++;
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- afr_set_read_ctx_from_policy (this, inode,
- local->fresh_children,
- local->read_child_index,
- priv->read_child);
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
}
int
-afr_mkdir_wind (call_frame_t *frame, xlator_t *this)
+afr_mkdir_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
local = frame->local;
priv = this->private;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_mkdir_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->mkdir,
- &local->loc, local->cont.mkdir.mode,
- local->cont.mkdir.params);
- if (!--call_count)
- break;
- }
- }
-
+ STACK_WIND_COOKIE (frame, afr_mkdir_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->mkdir, &local->loc,
+ local->cont.mkdir.mode, local->umask,
+ local->xdata_req);
return 0;
}
int
-afr_mkdir_done (call_frame_t *frame, xlator_t *this)
+afr_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
{
- afr_local_t * local = NULL;
-
- local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
- return 0;
-}
-
-
-int
-afr_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dict_t *params)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
priv = this->private;
- QUORUM_CHECK(mkdir,out);
+ QUORUM_CHECK(mkdir,out);
transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
+ if (!transaction_frame)
goto out;
- }
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- transaction_frame->local = local;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
loc_copy (&local->loc, loc);
-
- LOCK (&priv->read_child_lock);
- {
- local->read_child_index = (++priv->read_child_rr)
- % (priv->child_count);
- }
- UNLOCK (&priv->read_child_lock);
+ local->inode = inode_ref (loc->inode);
+ local->parent = inode_ref (loc->parent);
local->cont.mkdir.mode = mode;
- if (params)
- local->cont.mkdir.params = dict_ref (params);
+ local->umask = umask;
+
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- local->transaction.fop = afr_mkdir_wind;
- local->transaction.done = afr_mkdir_done;
+ if (!local->xdata_req)
+ goto out;
+
+ local->op = GF_FOP_MKDIR;
+ local->transaction.wind = afr_mkdir_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
local->transaction.unwind = afr_mkdir_unwind;
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
-
- AFR_STACK_UNWIND (mkdir, frame, op_ret, op_errno,
- NULL, NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL);
return 0;
}
@@ -794,33 +793,17 @@ afr_link_unwind (call_frame_t *frame, xlator_t *this)
{
call_frame_t *main_frame = NULL;
afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- if (local->cont.link.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.link.read_child_buf;
- } else {
- unwind_buf = &local->cont.link.buf;
- }
-
- AFR_STACK_UNWIND (link, main_frame,
- local->op_ret, local->op_errno,
- local->cont.link.inode,
- unwind_buf, &local->cont.link.preparent,
- &local->cont.link.postparent);
- }
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
+ AFR_STACK_UNWIND (link, main_frame, local->op_ret, local->op_errno,
+ local->inode, &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
return 0;
}
@@ -829,180 +812,105 @@ int
afr_link_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int child_index = -1;
- int32_t *fresh_children = NULL;
-
- local = frame->local;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0) {
- local->cont.link.buf = *buf;
- }
-
- if (child_index == local->read_child_index) {
- local->cont.link.read_child_buf = *buf;
- local->cont.link.preparent = *preparent;
- local->cont.link.postparent = *postparent;
- }
-
- local->cont.link.inode = inode;
-
- fresh_children = local->fresh_children;
- fresh_children[local->success_count] = child_index;
- local->success_count++;
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- afr_set_read_ctx_from_policy (this, inode,
- local->fresh_children,
- local->read_child_index,
- priv->read_child);
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
}
int
-afr_link_wind (call_frame_t *frame, xlator_t *this)
+afr_link_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
local = frame->local;
priv = this->private;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_link_wind_cbk, (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->link,
- &local->loc,
- &local->newloc);
-
- if (!--call_count)
- break;
- }
- }
-
+ STACK_WIND_COOKIE (frame, afr_link_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->link,
+ &local->loc, &local->newloc, local->xdata_req);
return 0;
}
int
-afr_link_done (call_frame_t *frame, xlator_t *this)
+afr_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
- afr_local_t * local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
- return 0;
-}
-
-
-int
-afr_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
priv = this->private;
- QUORUM_CHECK(link,out);
+ QUORUM_CHECK(link,out);
transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- goto out;
- }
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ if (!transaction_frame)
goto out;
- }
- transaction_frame->local = local;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
loc_copy (&local->loc, oldloc);
loc_copy (&local->newloc, newloc);
- LOCK (&priv->read_child_lock);
- {
- local->read_child_index = (++priv->read_child_rr)
- % (priv->child_count);
- }
- UNLOCK (&priv->read_child_lock);
+ local->inode = inode_ref (oldloc->inode);
+ local->parent = inode_ref (newloc->parent);
- local->transaction.fop = afr_link_wind;
- local->transaction.done = afr_link_done;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->op = GF_FOP_LINK;
+
+ local->transaction.wind = afr_link_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
local->transaction.unwind = afr_link_unwind;
- afr_build_parent_loc (&local->transaction.parent_loc, oldloc);
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, newloc,
+ &op_errno);
+ if (ret)
+ goto out;
local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (oldloc->path);
- local->transaction.new_basename = AFR_BASENAME (newloc->path);
+ local->transaction.basename = AFR_BASENAME (newloc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (link, frame, op_ret, op_errno,
- NULL, NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL);
return 0;
}
@@ -1016,33 +924,17 @@ afr_symlink_unwind (call_frame_t *frame, xlator_t *this)
{
call_frame_t *main_frame = NULL;
afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- if (local->cont.symlink.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.symlink.read_child_buf;
- } else {
- unwind_buf = &local->cont.symlink.buf;
- }
-
- AFR_STACK_UNWIND (symlink, main_frame,
- local->op_ret, local->op_errno,
- local->cont.symlink.inode,
- unwind_buf, &local->cont.symlink.preparent,
- &local->cont.symlink.postparent);
- }
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
+ AFR_STACK_UNWIND (symlink, main_frame, local->op_ret, local->op_errno,
+ local->inode, &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
return 0;
}
@@ -1051,184 +943,106 @@ int
afr_symlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int child_index = -1;
- int32_t *fresh_children = NULL;
-
- local = frame->local;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0)
- local->cont.symlink.buf = *buf;
-
- if (child_index == local->read_child_index) {
- local->cont.symlink.read_child_buf = *buf;
- local->cont.symlink.preparent = *preparent;
- local->cont.symlink.postparent = *postparent;
- }
-
- local->cont.symlink.inode = inode;
-
- fresh_children = local->fresh_children;
- fresh_children[local->success_count] = child_index;
- local->success_count++;
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- afr_set_read_ctx_from_policy (this, inode,
- local->fresh_children,
- local->read_child_index,
- priv->read_child);
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
}
int
-afr_symlink_wind (call_frame_t *frame, xlator_t *this)
+afr_symlink_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
local = frame->local;
priv = this->private;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_symlink_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->symlink,
- local->cont.symlink.linkpath,
- &local->loc,
- local->cont.symlink.params);
-
- if (!--call_count)
- break;
-
- }
- }
-
+ STACK_WIND_COOKIE (frame, afr_symlink_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->symlink,
+ local->cont.symlink.linkpath, &local->loc,
+ local->umask, local->xdata_req);
return 0;
}
int
-afr_symlink_done (call_frame_t *frame, xlator_t *this)
+afr_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
- afr_local_t * local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
- return 0;
-}
-
-
-int
-afr_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *loc, dict_t *params)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
priv = this->private;
- QUORUM_CHECK(symlink,out);
+ QUORUM_CHECK(symlink,out);
transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
+ if (!transaction_frame)
goto out;
- }
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- transaction_frame->local = local;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
loc_copy (&local->loc, loc);
-
- LOCK (&priv->read_child_lock);
- {
- local->read_child_index = (++priv->read_child_rr)
- % (priv->child_count);
- }
- UNLOCK (&priv->read_child_lock);
+ local->inode = inode_ref (loc->inode);
+ local->parent = inode_ref (loc->parent);
local->cont.symlink.linkpath = gf_strdup (linkpath);
- if (params)
- local->cont.symlink.params = dict_ref (params);
+ local->umask = umask;
+
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- local->transaction.fop = afr_symlink_wind;
- local->transaction.done = afr_symlink_done;
+ if (!local->xdata_req)
+ goto out;
+
+ local->op = GF_FOP_SYMLINK;
+ local->transaction.wind = afr_symlink_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
local->transaction.unwind = afr_symlink_unwind;
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (symlink, frame, op_ret, op_errno,
- NULL, NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (symlink, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL);
return 0;
}
@@ -1241,35 +1055,19 @@ afr_rename_unwind (call_frame_t *frame, xlator_t *this)
{
call_frame_t *main_frame = NULL;
afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- if (local->cont.rename.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.rename.read_child_buf;
- } else {
- unwind_buf = &local->cont.rename.buf;
- }
-
- AFR_STACK_UNWIND (rename, main_frame,
- local->op_ret, local->op_errno,
- unwind_buf,
- &local->cont.rename.preoldparent,
- &local->cont.rename.postoldparent,
- &local->cont.rename.prenewparent,
- &local->cont.rename.postnewparent);
- }
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
+ AFR_STACK_UNWIND (rename, main_frame, local->op_ret, local->op_errno,
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ &local->cont.dir_fop.prenewparent,
+ &local->cont.dir_fop.postnewparent, local->xdata_rsp);
return 0;
}
@@ -1278,170 +1076,135 @@ int
afr_rename_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
- afr_local_t * local = NULL;
- int call_count = -1;
- int child_index = -1;
-
- local = frame->local;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno) && op_errno != ENOTEMPTY)
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
-
- if (buf) {
- local->cont.rename.buf = *buf;
- }
-
- local->success_count++;
- }
-
- if (child_index == local->read_child_index) {
- local->cont.rename.read_child_buf = *buf;
-
- local->cont.rename.preoldparent = *preoldparent;
- local->cont.rename.postoldparent = *postoldparent;
- local->cont.rename.prenewparent = *prenewparent;
- local->cont.rename.postnewparent = *postnewparent;
- }
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, buf,
+ preoldparent, postoldparent, prenewparent,
+ postnewparent, xdata);
}
-int32_t
-afr_rename_wind (call_frame_t *frame, xlator_t *this)
+int
+afr_rename_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_rename_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->rename,
- &local->loc,
- &local->newloc);
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
-
-
-int
-afr_rename_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = frame->local;
-
- local->transaction.unwind (frame, this);
- AFR_STACK_DESTROY (frame);
+ local = frame->local;
+ priv = this->private;
+ STACK_WIND_COOKIE (frame, afr_rename_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->rename,
+ &local->loc, &local->newloc, local->xdata_req);
return 0;
}
int
-afr_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
+afr_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+ int nlockee = 0;
priv = this->private;
- QUORUM_CHECK(rename,out);
+ QUORUM_CHECK(rename,out);
transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- goto out;
- }
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ if (!transaction_frame)
+ op_errno = ENOMEM;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- transaction_frame->local = local;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
loc_copy (&local->loc, oldloc);
loc_copy (&local->newloc, newloc);
- local->read_child_index = afr_inode_get_read_ctx (this, oldloc->inode, NULL);
+ local->inode = inode_ref (oldloc->inode);
+ local->parent = inode_ref (oldloc->parent);
+ local->parent2 = inode_ref (newloc->parent);
+
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- local->transaction.fop = afr_rename_wind;
- local->transaction.done = afr_rename_done;
+ if (!local->xdata_req)
+ goto out;
+
+ local->op = GF_FOP_RENAME;
+ local->transaction.wind = afr_rename_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
local->transaction.unwind = afr_rename_unwind;
- afr_build_parent_loc (&local->transaction.parent_loc, oldloc);
- afr_build_parent_loc (&local->transaction.new_parent_loc, newloc);
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, oldloc,
+ &op_errno);
+ if (ret)
+ goto out;
+ ret = afr_build_parent_loc (&local->transaction.new_parent_loc, newloc,
+ &op_errno);
+ if (ret)
+ goto out;
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME (oldloc->path);
local->transaction.new_basename = AFR_BASENAME (newloc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = nlockee = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
+ &local->transaction.new_parent_loc,
+ local->transaction.new_basename,
+ priv->child_count);
+ if (ret)
+ goto out;
- afr_transaction (transaction_frame, this, AFR_ENTRY_RENAME_TRANSACTION);
+ nlockee++;
+ ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
- op_ret = 0;
-out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
+ nlockee++;
+ if (local->newloc.inode && IA_ISDIR (local->newloc.inode->ia_type)) {
+ ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
+ &local->newloc,
+ NULL,
+ priv->child_count);
+ if (ret)
+ goto out;
- AFR_STACK_UNWIND (rename, frame, op_ret, op_errno,
- NULL, NULL, NULL, NULL, NULL);
+ nlockee++;
}
+ qsort (int_lock->lockee, nlockee, sizeof (*int_lock->lockee),
+ afr_entry_lockee_cmp);
+ int_lock->lockee_count = nlockee;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_RENAME_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+
+ AFR_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL, NULL);
return 0;
}
@@ -1457,22 +1220,13 @@ afr_unlink_unwind (call_frame_t *frame, xlator_t *this)
local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (unlink, main_frame,
- local->op_ret, local->op_errno,
- &local->cont.unlink.preparent,
- &local->cont.unlink.postparent);
- }
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
+ AFR_STACK_UNWIND (unlink, main_frame, local->op_ret, local->op_errno,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
return 0;
}
@@ -1480,161 +1234,103 @@ afr_unlink_unwind (call_frame_t *frame, xlator_t *this)
int
afr_unlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t * local = NULL;
- int call_count = -1;
- int child_index = (long) cookie;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (child_index == local->read_child_index) {
- local->read_child_returned = _gf_true;
- }
-
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.unlink.preparent = *preparent;
- local->cont.unlink.postparent = *postparent;
- }
-
- if (child_index == local->read_child_index) {
- local->cont.unlink.preparent = *preparent;
- local->cont.unlink.postparent = *postparent;
- }
-
- local->success_count++;
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, NULL,
+ preparent, postparent, NULL, NULL, xdata);
}
-int32_t
-afr_unlink_wind (call_frame_t *frame, xlator_t *this)
+int
+afr_unlink_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
local = frame->local;
priv = this->private;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_unlink_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->unlink,
- &local->loc);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
-
-
-int32_t
-afr_unlink_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
+ STACK_WIND_COOKIE (frame, afr_unlink_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->unlink,
+ &local->loc, local->xflag, local->xdata_req);
return 0;
}
-int32_t
-afr_unlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc)
+int
+afr_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
priv = this->private;
- QUORUM_CHECK(unlink,out);
+ QUORUM_CHECK(unlink,out);
transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
+ if (!transaction_frame)
goto out;
- }
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ loc_copy (&local->loc, loc);
+ local->xflag = xflag;
- transaction_frame->local = local;
+ local->inode = inode_ref (loc->inode);
+ local->parent = inode_ref (loc->parent);
- loc_copy (&local->loc, loc);
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
+
+ if (!local->xdata_req)
+ goto out;
- local->transaction.fop = afr_unlink_wind;
- local->transaction.done = afr_unlink_done;
+ local->op = GF_FOP_UNLINK;
+ local->transaction.wind = afr_unlink_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
local->transaction.unwind = afr_unlink_unwind;
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (unlink, frame, op_ret, op_errno,
- NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -1652,22 +1348,13 @@ afr_rmdir_unwind (call_frame_t *frame, xlator_t *this)
local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (rmdir, main_frame,
- local->op_ret, local->op_errno,
- &local->cont.rmdir.preparent,
- &local->cont.rmdir.postparent);
- }
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
+ AFR_STACK_UNWIND (rmdir, main_frame, local->op_ret, local->op_errno,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
return 0;
}
@@ -1675,163 +1362,117 @@ afr_rmdir_unwind (call_frame_t *frame, xlator_t *this)
int
afr_rmdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t * local = NULL;
- int call_count = -1;
- int child_index = (long) cookie;
- int read_child = 0;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
-
- if (afr_fop_failed (op_ret, op_errno) && (op_errno != ENOTEMPTY))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.rmdir.preparent = *preparent;
- local->cont.rmdir.postparent = *postparent;
-
- }
-
- if (child_index == read_child) {
- local->cont.rmdir.preparent = *preparent;
- local->cont.rmdir.postparent = *postparent;
- }
-
- local->success_count++;
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, NULL,
+ preparent, postparent, NULL, NULL, xdata);
}
int
-afr_rmdir_wind (call_frame_t *frame, xlator_t *this)
+afr_rmdir_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
local = frame->local;
priv = this->private;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_rmdir_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->rmdir,
- &local->loc, local->cont.rmdir.flags);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
-
-
-int
-afr_rmdir_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
+ STACK_WIND_COOKIE (frame, afr_rmdir_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->rmdir,
+ &local->loc, local->cont.rmdir.flags, local->xdata_req);
return 0;
}
int
-afr_rmdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int flags)
+afr_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+ int nlockee = 0;
priv = this->private;
- QUORUM_CHECK(rmdir,out);
+ QUORUM_CHECK(rmdir,out);
transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
+ if (!transaction_frame)
goto out;
- }
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
- transaction_frame->local = local;
+ loc_copy (&local->loc, loc);
+ local->inode = inode_ref (loc->inode);
+ local->parent = inode_ref (loc->parent);
local->cont.rmdir.flags = flags;
- loc_copy (&local->loc, loc);
- local->transaction.fop = afr_rmdir_wind;
- local->transaction.done = afr_rmdir_done;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->op = GF_FOP_RMDIR;
+ local->transaction.wind = afr_rmdir_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
local->transaction.unwind = afr_rmdir_unwind;
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = nlockee = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ nlockee++;
+ ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
+ &local->loc,
+ NULL,
+ priv->child_count);
+ if (ret)
+ goto out;
- op_ret = 0;
-out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (rmdir, frame, op_ret, op_errno,
- NULL, NULL);
+ nlockee++;
+ qsort (int_lock->lockee, nlockee, sizeof (*int_lock->lockee),
+ afr_entry_lockee_cmp);
+ int_lock->lockee_count = nlockee;
+
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
}
+ return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+
+ AFR_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
diff --git a/xlators/cluster/afr/src/afr-dir-write.h b/xlators/cluster/afr/src/afr-dir-write.h
index 0290c6350..02f0a3682 100644
--- a/xlators/cluster/afr/src/afr-dir-write.h
+++ b/xlators/cluster/afr/src/afr-dir-write.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __DIR_WRITE_H__
@@ -23,38 +14,34 @@
int32_t
afr_create (call_frame_t *frame, xlator_t *this,
loc_t *loc, int32_t flags, mode_t mode,
- fd_t *fd, dict_t *params);
+ mode_t umask, fd_t *fd, dict_t *xdata);
int32_t
afr_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t dev, dict_t *params);
+ loc_t *loc, mode_t mode, dev_t dev, mode_t umask, dict_t *xdata);
int32_t
afr_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dict_t *params);
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata);
int32_t
afr_unlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc);
+ loc_t *loc, int xflag, dict_t *xdata);
int32_t
afr_rmdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int flags);
+ loc_t *loc, int flags, dict_t *xdata);
int32_t
afr_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc);
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata);
int32_t
afr_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc);
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata);
int
afr_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *oldloc, dict_t *params);
-
-int32_t
-afr_setdents (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t flags, dir_entry_t *entries, int32_t count);
+ const char *linkpath, loc_t *oldloc, mode_t umask, dict_t *params);
#endif /* __DIR_WRITE_H__ */
diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c
index 82a9d27c0..01e078c13 100644
--- a/xlators/cluster/afr/src/afr-inode-read.c
+++ b/xlators/cluster/afr/src/afr-inode-read.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -44,242 +35,153 @@
#include "compat-errno.h"
#include "compat.h"
-/**
- * Common algorithm for inode read calls:
- *
- * - Try the fop on the first child that is up
- * - if we have failed due to ENOTCONN:
- * try the next child
- *
- * Applicable to: access, stat, fstat, readlink, getxattr
- */
+#include "afr-transaction.h"
+
/* {{{ access */
-int32_t
-afr_access_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+int
+afr_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int unwind = 1;
- int32_t *last_index = NULL;
- int32_t next_call_child = -1;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
-
- priv = this->private;
- children = priv->children;
+ afr_local_t *local = NULL;
local = frame->local;
- read_child = (long) cookie;
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
- if (op_ret == -1) {
- last_index = &local->cont.access.last_index;
- fresh_children = local->fresh_children;
- next_call_child = afr_next_call_child (fresh_children,
- local->child_up,
- priv->child_count,
- last_index, read_child);
- if (next_call_child < 0)
- goto out;
+ afr_read_txn_continue (frame, this, (long) cookie);
+ return 0;
+ }
- unwind = 0;
-
- STACK_WIND_COOKIE (frame, afr_access_cbk,
- (void *) (long) read_child,
- children[next_call_child],
- children[next_call_child]->fops->access,
- &local->loc, local->cont.access.mask);
- }
-
-out:
- if (unwind) {
- AFR_STACK_UNWIND (access, frame, op_ret, op_errno);
- }
+ AFR_STACK_UNWIND (access, frame, op_ret, op_errno, xdata);
return 0;
}
-int32_t
-afr_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask)
+int
+afr_access_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_private_t *priv = NULL;
- xlator_t **children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- int32_t read_child = -1;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
-
- children = priv->children;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
- frame->local = local;
-
- op_ret = AFR_LOCAL_INIT (local, priv);
- if (op_ret < 0) {
- op_errno = -op_ret;
- goto out;
- }
-
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children) {
- op_errno = ENOMEM;
- goto out;
- }
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (subvol == -1) {
+ AFR_STACK_UNWIND (access, frame, local->op_ret,
+ local->op_errno, 0);
+ return 0;
+ }
+
+ STACK_WIND_COOKIE (frame, afr_access_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->access,
+ &local->loc, local->cont.access.mask,
+ local->xdata_req);
+ return 0;
+}
+int
+afr_access (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int mask, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int op_errno = 0;
- read_child = afr_inode_get_read_ctx (this, loc->inode,
- local->fresh_children);
- op_ret = afr_get_call_child (this, local->child_up, read_child,
- local->fresh_children,
- &call_child,
- &local->cont.access.last_index);
- if (op_ret < 0) {
- op_errno = -op_ret;
- op_ret = -1;
- goto out;
- }
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
- loc_copy (&local->loc, loc);
- local->cont.access.mask = mask;
+ local->op = GF_FOP_ACCESS;
+ loc_copy (&local->loc, loc);
+ local->cont.access.mask = mask;
+ if (xdata)
+ local->xdata_req = dict_ref (xdata);
- STACK_WIND_COOKIE (frame, afr_access_cbk,
- (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->access,
- loc, mask);
+ afr_read_txn (frame, this, loc->inode, afr_access_wind,
+ AFR_METADATA_TRANSACTION);
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (access, frame, op_ret, op_errno);
- }
+ AFR_STACK_UNWIND (access, frame, -1, op_errno, NULL);
+
return 0;
}
-
/* }}} */
/* {{{ stat */
-int32_t
+int
afr_stat_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iatt *buf)
+ struct iatt *buf, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int unwind = 1;
- int32_t *last_index = NULL;
- int32_t next_call_child = -1;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
-
- priv = this->private;
- children = priv->children;
-
- read_child = (long) cookie;
+ afr_local_t *local = NULL;
local = frame->local;
- if (op_ret == -1) {
- last_index = &local->cont.stat.last_index;
- fresh_children = local->fresh_children;
- next_call_child = afr_next_call_child (fresh_children,
- local->child_up,
- priv->child_count,
- last_index, read_child);
- if (next_call_child < 0)
- goto out;
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
- unwind = 0;
+ afr_read_txn_continue (frame, this, (long) cookie);
+ return 0;
+ }
- STACK_WIND_COOKIE (frame, afr_stat_cbk,
- (void *) (long) read_child,
- children[next_call_child],
- children[next_call_child]->fops->stat,
- &local->loc);
- }
-
-out:
- if (unwind) {
- AFR_STACK_UNWIND (stat, frame, op_ret, op_errno, buf);
- }
+ AFR_STACK_UNWIND (stat, frame, op_ret, op_errno, buf, xdata);
return 0;
}
-int32_t
-afr_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
+int
+afr_stat_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- xlator_t **children = NULL;
- int call_child = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- int32_t read_child = -1;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (subvol == -1) {
+ AFR_STACK_UNWIND (stat, frame, local->op_ret, local->op_errno,
+ 0, 0);
+ return 0;
+ }
+
+ STACK_WIND_COOKIE (frame, afr_stat_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->stat,
+ &local->loc, local->xdata_req);
+ return 0;
+}
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
+int
+afr_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int op_errno = 0;
- children = priv->children;
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
- ALLOC_OR_GOTO (local, afr_local_t, out);
- frame->local = local;
- op_ret = AFR_LOCAL_INIT (local, priv);
- if (op_ret < 0) {
- op_errno = -op_ret;
- goto out;
- }
+ local->op = GF_FOP_STAT;
+ loc_copy (&local->loc, loc);
+ if (xdata)
+ local->xdata_req = dict_ref (xdata);
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children) {
- op_errno = ENOMEM;
- goto out;
- }
+ afr_read_txn (frame, this, loc->inode, afr_stat_wind,
+ AFR_DATA_TRANSACTION);
- read_child = afr_inode_get_read_ctx (this, loc->inode,
- local->fresh_children);
- op_ret = afr_get_call_child (this, local->child_up, read_child,
- local->fresh_children,
- &call_child,
- &local->cont.stat.last_index);
- if (op_ret < 0) {
- op_errno = -op_ret;
- op_ret = -1;
- goto out;
- }
- loc_copy (&local->loc, loc);
-
- STACK_WIND_COOKIE (frame, afr_stat_cbk, (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->stat,
- loc);
-
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (stat, frame, op_ret, op_errno, NULL);
- }
+ AFR_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -289,126 +191,76 @@ out:
/* {{{ fstat */
-int32_t
+int
afr_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- xlator_t **children = NULL;
- int unwind = 1;
- int32_t *last_index = NULL;
- int32_t next_call_child = -1;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
-
- priv = this->private;
- children = priv->children;
+ afr_local_t *local = NULL;
local = frame->local;
- read_child = (long) cookie;
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
- if (op_ret == -1) {
- last_index = &local->cont.fstat.last_index;
- fresh_children = local->fresh_children;
- next_call_child = afr_next_call_child (fresh_children,
- local->child_up,
- priv->child_count,
- last_index, read_child);
- if (next_call_child < 0)
- goto out;
+ afr_read_txn_continue (frame, this, (long) cookie);
+ return 0;
+ }
- unwind = 0;
+ AFR_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf, xdata);
- STACK_WIND_COOKIE (frame, afr_fstat_cbk,
- (void *) (long) read_child,
- children[next_call_child],
- children[next_call_child]->fops->fstat,
- local->fd);
- }
+ return 0;
+}
-out:
- if (unwind) {
- AFR_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf);
- }
- return 0;
+int
+afr_fstat_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (subvol == -1) {
+ AFR_STACK_UNWIND (fstat, frame, local->op_ret, local->op_errno,
+ 0, 0);
+ return 0;
+ }
+
+ STACK_WIND_COOKIE (frame, afr_fstat_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fstat,
+ local->fd, local->xdata_req);
+ return 0;
}
int32_t
afr_fstat (call_frame_t *frame, xlator_t *this,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- xlator_t **children = NULL;
- int call_child = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- int32_t read_child = 0;
+ afr_local_t *local = NULL;
+ int op_errno = 0;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (fd, out);
- VALIDATE_OR_GOTO (this->private, out);
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
-
- children = priv->children;
+ local->op = GF_FOP_FSTAT;
+ local->fd = fd_ref (fd);
+ if (xdata)
+ local->xdata_req = dict_ref (xdata);
- VALIDATE_OR_GOTO (fd->inode, out);
+ afr_fix_open (fd, this);
- ALLOC_OR_GOTO (local, afr_local_t, out);
- frame->local = local;
+ afr_read_txn (frame, this, fd->inode, afr_fstat_wind,
+ AFR_DATA_TRANSACTION);
- op_ret = AFR_LOCAL_INIT (local, priv);
- if (op_ret < 0) {
- op_errno = -op_ret;
- goto out;
- }
-
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children) {
- op_errno = ENOMEM;
- goto out;
- }
-
- read_child = afr_inode_get_read_ctx (this, fd->inode,
- local->fresh_children);
-
-
-
- op_ret = afr_get_call_child (this, local->child_up, read_child,
- local->fresh_children,
- &call_child,
- &local->cont.fstat.last_index);
- if (op_ret < 0) {
- op_errno = -op_ret;
- op_ret = -1;
- goto out;
- }
-
- local->fd = fd_ref (fd);
-
- op_ret = afr_open_fd_fix (frame, this, _gf_false);
- if (op_ret) {
- op_errno = -op_ret;
- op_ret = -1;
- goto out;
- }
- STACK_WIND_COOKIE (frame, afr_fstat_cbk, (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->fstat,
- fd);
-
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (fstat, frame, op_ret, op_errno, NULL);
- }
+ AFR_STACK_UNWIND (fstat, frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -417,117 +269,77 @@ out:
/* {{{ readlink */
-int32_t
+int
afr_readlink_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- const char *buf, struct iatt *sbuf)
+ const char *buf, struct iatt *sbuf, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int unwind = 1;
- int32_t *last_index = NULL;
- int32_t next_call_child = -1;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
+ afr_local_t *local = NULL;
- priv = this->private;
- children = priv->children;
-
- local = frame->local;
+ local = frame->local;
- read_child = (long) cookie;
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
- if (op_ret == -1) {
- last_index = &local->cont.readlink.last_index;
- fresh_children = local->fresh_children;
- next_call_child = afr_next_call_child (fresh_children,
- local->child_up,
- priv->child_count,
- last_index, read_child);
- if (next_call_child < 0)
- goto out;
-
- unwind = 0;
- STACK_WIND_COOKIE (frame, afr_readlink_cbk,
- (void *) (long) read_child,
- children[next_call_child],
- children[next_call_child]->fops->readlink,
- &local->loc,
- local->cont.readlink.size);
- }
+ afr_read_txn_continue (frame, this, (long) cookie);
+ return 0;
+ }
-out:
- if (unwind) {
- AFR_STACK_UNWIND (readlink, frame, op_ret, op_errno, buf, sbuf);
- }
+ AFR_STACK_UNWIND (readlink, frame, op_ret, op_errno,
+ buf, sbuf, xdata);
+ return 0;
+}
- return 0;
+int
+afr_readlink_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (subvol == -1) {
+ AFR_STACK_UNWIND (readlink, frame, local->op_ret,
+ local->op_errno, 0, 0, 0);
+ return 0;
+ }
+
+ STACK_WIND_COOKIE (frame, afr_readlink_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->readlink,
+ &local->loc, local->cont.readlink.size,
+ local->xdata_req);
+ return 0;
}
-int32_t
+int
afr_readlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, size_t size)
+ loc_t *loc, size_t size, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- xlator_t **children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
- int32_t op_ret = -1;
+ afr_local_t * local = NULL;
int32_t op_errno = 0;
- int32_t read_child = -1;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
-
- children = priv->children;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
- frame->local = local;
- op_ret = AFR_LOCAL_INIT (local, priv);
- if (op_ret < 0) {
- op_errno = -op_ret;
- goto out;
- }
-
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children) {
- op_errno = ENOMEM;
- goto out;
- }
- read_child = afr_inode_get_read_ctx (this, loc->inode,
- local->fresh_children);
- op_ret = afr_get_call_child (this, local->child_up, read_child,
- local->fresh_children,
- &call_child,
- &local->cont.readlink.last_index);
- if (op_ret < 0) {
- op_errno = -op_ret;
- op_ret = -1;
- goto out;
- }
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
+ local->op = GF_FOP_READLINK;
loc_copy (&local->loc, loc);
+ local->cont.readlink.size = size;
+ if (xdata)
+ local->xdata_req = dict_ref (xdata);
- local->cont.readlink.size = size;
+ afr_read_txn (frame, this, loc->inode, afr_readlink_wind,
+ AFR_DATA_TRANSACTION);
- STACK_WIND_COOKIE (frame, afr_readlink_cbk,
- (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->readlink,
- loc, size);
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (readlink, frame, op_ret, op_errno, NULL, NULL);
- }
return 0;
+out:
+ AFR_STACK_UNWIND(readlink, frame, -1, op_errno, 0, 0, 0);
+
+ return 0;
}
@@ -541,7 +353,7 @@ struct _xattr_key {
};
-void
+int
__gather_xattr_keys (dict_t *dict, char *key, data_t *value,
void *data)
{
@@ -553,18 +365,19 @@ __gather_xattr_keys (dict_t *dict, char *key, data_t *value,
xkey = GF_CALLOC (1, sizeof (*xkey), gf_afr_mt_xattr_key);
if (!xkey)
- return;
+ return -1;
xkey->key = key;
INIT_LIST_HEAD (&xkey->list);
list_add_tail (&xkey->list, list);
}
+ return 0;
}
void
-__filter_xattrs (dict_t *dict)
+afr_filter_xattrs (dict_t *dict)
{
struct list_head keys = {0,};
struct _xattr_key *key = NULL;
@@ -585,85 +398,669 @@ __filter_xattrs (dict_t *dict)
}
-
-int32_t
+int
afr_getxattr_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *dict)
+ dict_t *dict, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int unwind = 1;
- int32_t *last_index = NULL;
- int32_t next_call_child = -1;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ afr_read_txn_continue (frame, this, (long) cookie);
+ return 0;
+ }
+
+ if (dict)
+ afr_filter_xattrs (dict);
+
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
+
+ return 0;
+}
+
+
+int
+afr_getxattr_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (subvol == -1) {
+ AFR_STACK_UNWIND (getxattr, frame, local->op_ret,
+ local->op_errno, NULL, NULL);
+ return 0;
+ }
+
+ STACK_WIND_COOKIE (frame, afr_getxattr_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->getxattr,
+ &local->loc, local->cont.getxattr.name,
+ local->xdata_req);
+ return 0;
+}
+
+
+int32_t
+afr_getxattr_unwind (call_frame_t *frame, int op_ret, int op_errno,
+ dict_t *dict, dict_t *xdata)
+
+{
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+int32_t
+afr_fgetxattr_clrlk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ dict_t *xattr = NULL;
+ char *tmp_report = NULL;
+ char lk_summary[1024] = {0,};
+ int serz_len = 0;
+ int32_t callcnt = 0;
+ long int cky = 0;
+ int ret = 0;
priv = this->private;
children = priv->children;
local = frame->local;
+ cky = (long) cookie;
- read_child = (long) cookie;
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+ if (op_ret == -1)
+ local->replies[cky].op_errno = op_errno;
+
+ if (!local->dict)
+ local->dict = dict_new ();
+ if (local->dict) {
+ ret = dict_get_str (dict, local->cont.getxattr.name,
+ &tmp_report);
+ if (ret)
+ goto unlock;
+ ret = dict_set_dynstr (local->dict,
+ children[cky]->name,
+ gf_strdup (tmp_report));
+ if (ret)
+ goto unlock;
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
- if (op_ret == -1) {
- last_index = &local->cont.getxattr.last_index;
- fresh_children = local->fresh_children;
- next_call_child = afr_next_call_child (fresh_children,
- local->child_up,
- priv->child_count,
- last_index, read_child);
- if (next_call_child < 0)
- goto out;
+ if (!callcnt) {
+ xattr = dict_new ();
+ if (!xattr) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ ret = dict_serialize_value_with_delim (local->dict,
+ lk_summary,
+ &serz_len, '\n');
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error serializing dictionary");
+ goto unwind;
+ }
+ if (serz_len == -1)
+ snprintf (lk_summary, sizeof (lk_summary),
+ "No locks cleared.");
+ ret = dict_set_dynstr (xattr, local->cont.getxattr.name,
+ gf_strdup (lk_summary));
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error setting dictionary");
+ goto unwind;
+ }
+
+ unwind:
+ // Updating child_errno with more recent 'events'
+ op_errno = afr_final_errno (local, priv);
+
+ AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, xattr,
+ xdata);
+ if (xattr)
+ dict_unref (xattr);
+ }
+
+ return ret;
+}
+
+int32_t
+afr_getxattr_clrlk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ dict_t *xattr = NULL;
+ char *tmp_report = NULL;
+ char lk_summary[1024] = {0,};
+ int serz_len = 0;
+ int32_t callcnt = 0;
+ long int cky = 0;
+ int ret = 0;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+ cky = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+ if (op_ret == -1)
+ local->replies[cky].op_errno = op_errno;
+
+ if (!local->dict)
+ local->dict = dict_new ();
+ if (local->dict) {
+ ret = dict_get_str (dict, local->cont.getxattr.name,
+ &tmp_report);
+ if (ret)
+ goto unlock;
+ ret = dict_set_dynstr (local->dict,
+ children[cky]->name,
+ gf_strdup (tmp_report));
+ if (ret)
+ goto unlock;
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ xattr = dict_new ();
+ if (!xattr) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ ret = dict_serialize_value_with_delim (local->dict,
+ lk_summary,
+ &serz_len, '\n');
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error serializing dictionary");
+ goto unwind;
+ }
+ if (serz_len == -1)
+ snprintf (lk_summary, sizeof (lk_summary),
+ "No locks cleared.");
+ ret = dict_set_dynstr (xattr, local->cont.getxattr.name,
+ gf_strdup (lk_summary));
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error setting dictionary");
+ goto unwind;
+ }
+
+ unwind:
+ // Updating child_errno with more recent 'events'
+ op_errno = afr_final_errno (local, priv);
+
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr, xdata);
+
+ if (xattr)
+ dict_unref (xattr);
+ }
+
+ return ret;
+}
+
+/**
+ * node-uuid cbk uses next child querying mechanism
+ */
+int32_t
+afr_getxattr_node_uuid_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ xlator_t **children = NULL;
+ int unwind = 1;
+ int curr_call_child = 0;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+
+ if (op_ret == -1) { /** query the _next_ child */
+
+ /**
+ * _current_ becomes _next_
+ * If done with all childs and yet no success; give up !
+ */
+ curr_call_child = (int) ((long)cookie);
+ if (++curr_call_child == priv->child_count)
+ goto unwind;
+
+ gf_log (this->name, GF_LOG_WARNING,
+ "op_ret (-1): Re-querying afr-child (%d/%d)",
+ curr_call_child, priv->child_count);
unwind = 0;
- STACK_WIND_COOKIE (frame, afr_getxattr_cbk,
- (void *) (long) read_child,
- children[next_call_child],
- children[next_call_child]->fops->getxattr,
+ STACK_WIND_COOKIE (frame, afr_getxattr_node_uuid_cbk,
+ (void *) (long) curr_call_child,
+ children[curr_call_child],
+ children[curr_call_child]->fops->getxattr,
&local->loc,
- local->cont.getxattr.name);
+ local->cont.getxattr.name,
+ NULL);
}
-out:
- if (unwind) {
- if (op_ret >= 0 && dict)
- __filter_xattrs (dict);
+ unwind:
+ if (unwind)
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict,
+ NULL);
+
+ return 0;
+}
+
+int32_t
+afr_getxattr_lockinfo_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ int call_cnt = 0, len = 0;
+ char *lockinfo_buf = NULL;
+ dict_t *lockinfo = NULL, *newdict = NULL;
+ afr_local_t *local = NULL;
+
+ LOCK (&frame->lock);
+ {
+ local = frame->local;
+
+ call_cnt = --local->call_count;
+
+ if ((op_ret < 0) || (!dict && !xdata)) {
+ goto unlock;
+ }
+
+ if (xdata) {
+ if (!local->xdata_rsp) {
+ local->xdata_rsp = dict_new ();
+ if (!local->xdata_rsp) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+ }
+
+ if (!dict) {
+ goto unlock;
+ }
+
+ op_ret = dict_get_ptr_and_len (dict, GF_XATTR_LOCKINFO_KEY,
+ (void **)&lockinfo_buf, &len);
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict);
+ if (!lockinfo_buf) {
+ goto unlock;
+ }
+
+ if (!local->dict) {
+ local->dict = dict_new ();
+ if (!local->dict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ if (lockinfo_buf != NULL) {
+ lockinfo = dict_new ();
+ if (lockinfo == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ } else {
+ op_ret = dict_unserialize (lockinfo_buf, len,
+ &lockinfo);
+
+ if (lockinfo && local->dict) {
+ dict_copy (lockinfo, local->dict);
+ }
+ }
}
+ if (xdata && local->xdata_rsp) {
+ dict_copy (xdata, local->xdata_rsp);
+ }
+
+ if (!call_cnt) {
+ newdict = dict_new ();
+ if (!newdict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ len = dict_serialized_length (local->dict);
+ if (len == 0) {
+ goto unwind;
+ }
+
+ lockinfo_buf = GF_CALLOC (1, len, gf_common_mt_char);
+ if (!lockinfo_buf) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ op_ret = dict_serialize (local->dict, lockinfo_buf);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -op_ret;
+ }
+
+ op_ret = dict_set_dynptr (newdict, GF_XATTR_LOCKINFO_KEY,
+ (void *)lockinfo_buf, len);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -op_ret;
+ goto unwind;
+ }
+
+ unwind:
+ AFR_STACK_UNWIND (getxattr, frame, op_ret,
+ op_errno, newdict,
+ local->xdata_rsp);
+ }
+
+ dict_unref (lockinfo);
+
return 0;
}
int32_t
-afr_getxattr_unwind (call_frame_t *frame,
- int op_ret, int op_errno, dict_t *dict)
-
+afr_fgetxattr_lockinfo_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
{
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict);
+ int call_cnt = 0, len = 0;
+ char *lockinfo_buf = NULL;
+ dict_t *lockinfo = NULL, *newdict = NULL;
+ afr_local_t *local = NULL;
+
+ LOCK (&frame->lock);
+ {
+ local = frame->local;
+
+ call_cnt = --local->call_count;
+
+ if ((op_ret < 0) || (!dict && !xdata)) {
+ goto unlock;
+ }
+
+ if (xdata) {
+ if (!local->xdata_rsp) {
+ local->xdata_rsp = dict_new ();
+ if (!local->xdata_rsp) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+ }
+
+ if (!dict) {
+ goto unlock;
+ }
+
+ op_ret = dict_get_ptr_and_len (dict, GF_XATTR_LOCKINFO_KEY,
+ (void **)&lockinfo_buf, &len);
+
+ if (!lockinfo_buf) {
+ goto unlock;
+ }
+
+ if (!local->dict) {
+ local->dict = dict_new ();
+ if (!local->dict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ if (lockinfo_buf != NULL) {
+ lockinfo = dict_new ();
+ if (lockinfo == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ } else {
+ op_ret = dict_unserialize (lockinfo_buf, len,
+ &lockinfo);
+
+ if (lockinfo && local->dict) {
+ dict_copy (lockinfo, local->dict);
+ }
+ }
+ }
+
+ if (xdata && local->xdata_rsp) {
+ dict_copy (xdata, local->xdata_rsp);
+ }
+
+ if (!call_cnt) {
+ newdict = dict_new ();
+ if (!newdict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ len = dict_serialized_length (local->dict);
+ if (len <= 0) {
+ goto unwind;
+ }
+
+ lockinfo_buf = GF_CALLOC (1, len, gf_common_mt_char);
+ if (!lockinfo_buf) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ op_ret = dict_serialize (local->dict, lockinfo_buf);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -op_ret;
+ }
+
+ op_ret = dict_set_dynptr (newdict, GF_XATTR_LOCKINFO_KEY,
+ (void *)lockinfo_buf, len);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -op_ret;
+ goto unwind;
+ }
+
+ unwind:
+ AFR_STACK_UNWIND (fgetxattr, frame, op_ret,
+ op_errno, newdict,
+ local->xdata_rsp);
+ }
+
+ dict_unref (lockinfo);
+
return 0;
}
int32_t
+afr_fgetxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int32_t callcnt = 0;
+ int ret = 0;
+ char *xattr = NULL;
+ char *xattr_serz = NULL;
+ char xattr_cky[1024] = {0,};
+ dict_t *nxattr = NULL;
+ long cky = 0;
+ int32_t padding = 0;
+ int32_t tlen = 0;
+
+ if (!frame || !frame->local || !this) {
+ gf_log ("", GF_LOG_ERROR, "possible NULL deref");
+ goto out;
+ }
+
+ local = frame->local;
+ cky = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret < 0) {
+ local->op_errno = op_errno;
+ } else {
+ local->op_ret = op_ret;
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref (xdata);
+ }
+
+ if (!dict || (op_ret < 0))
+ goto out;
+
+ if (!local->dict)
+ local->dict = dict_new ();
+
+ if (local->dict) {
+ ret = dict_get_str (dict,
+ local->cont.getxattr.name,
+ &xattr);
+ if (ret)
+ goto out;
+
+ xattr = gf_strdup (xattr);
+
+ (void)snprintf (xattr_cky, 1024, "%s-%ld",
+ local->cont.getxattr.name, cky);
+ ret = dict_set_dynstr (local->dict,
+ xattr_cky, xattr);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Cannot set xattr cookie key");
+ goto out;
+ }
+
+ local->cont.getxattr.xattr_len
+ += strlen (xattr) + 1;
+ }
+ }
+out:
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ if (!local->cont.getxattr.xattr_len)
+ goto unwind;
+
+ nxattr = dict_new ();
+ if (!nxattr)
+ goto unwind;
+
+ /* extra bytes for decorations (brackets and <>'s) */
+ padding += strlen (this->name)
+ + strlen (AFR_PATHINFO_HEADER) + 4;
+ local->cont.getxattr.xattr_len += (padding + 2);
+
+ xattr_serz = GF_CALLOC (local->cont.getxattr.xattr_len,
+ sizeof (char), gf_common_mt_char);
+
+ if (!xattr_serz)
+ goto unwind;
+
+ /* the xlator info */
+ (void) sprintf (xattr_serz, "(<"AFR_PATHINFO_HEADER"%s> ",
+ this->name);
+
+ /* actual series of pathinfo */
+ ret = dict_serialize_value_with_delim (local->dict,
+ xattr_serz
+ + strlen (xattr_serz),
+ &tlen, ' ');
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Error serializing"
+ " dictionary");
+ goto unwind;
+ }
+
+ /* closing part */
+ *(xattr_serz + padding + tlen) = ')';
+ *(xattr_serz + padding + tlen + 1) = '\0';
+
+ ret = dict_set_dynstr (nxattr, local->cont.getxattr.name,
+ xattr_serz);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "Cannot set pathinfo"
+ " key in dict");
+
+ unwind:
+ AFR_STACK_UNWIND (fgetxattr, frame, local->op_ret,
+ local->op_errno, nxattr, local->xdata_rsp);
+
+ if (nxattr)
+ dict_unref (nxattr);
+ }
+
+ return ret;
+}
+
+int32_t
afr_getxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *dict)
+ dict_t *dict, dict_t *xdata)
{
- afr_local_t *local = NULL;
- int32_t callcnt = 0;
- int ret = 0;
- char *pathinfo = NULL;
- char *pathinfo_serz = NULL;
- char pathinfo_cky[1024] = {0,};
- dict_t *xattr = NULL;
- long cky = 0;
- int32_t padding = 0;
- int32_t tlen = 0;
+ afr_local_t *local = NULL;
+ int32_t callcnt = 0;
+ int ret = 0;
+ char *xattr = NULL;
+ char *xattr_serz = NULL;
+ char xattr_cky[1024] = {0,};
+ dict_t *nxattr = NULL;
+ long cky = 0;
+ int32_t padding = 0;
+ int32_t tlen = 0;
if (!frame || !frame->local || !this) {
- gf_log (this->name, GF_LOG_ERROR, "possible NULL deref");
+ gf_log ("", GF_LOG_ERROR, "possible NULL deref");
goto out;
}
@@ -674,6 +1071,14 @@ afr_getxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,
{
callcnt = --local->call_count;
+ if (op_ret < 0) {
+ local->op_errno = op_errno;
+ } else {
+ local->op_ret = op_ret;
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref (xdata);
+ }
+
if (!dict || (op_ret < 0))
goto out;
@@ -681,142 +1086,341 @@ afr_getxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,
local->dict = dict_new ();
if (local->dict) {
- ret = dict_get_str (dict, GF_XATTR_PATHINFO_KEY, &pathinfo);
+ ret = dict_get_str (dict,
+ local->cont.getxattr.name,
+ &xattr);
if (ret)
goto out;
- pathinfo = gf_strdup (pathinfo);
+ xattr = gf_strdup (xattr);
- snprintf (pathinfo_cky, 1024, "%s-%ld", GF_XATTR_PATHINFO_KEY, cky);
- ret = dict_set_dynstr (local->dict, pathinfo_cky, pathinfo);
+ (void)snprintf (xattr_cky, 1024, "%s-%ld",
+ local->cont.getxattr.name, cky);
+ ret = dict_set_dynstr (local->dict,
+ xattr_cky, xattr);
if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Cannot set pathinfo cookie key");
+ gf_log (this->name, GF_LOG_ERROR,
+ "Cannot set xattr cookie key");
goto out;
}
- local->cont.getxattr.pathinfo_len += strlen (pathinfo) + 1;
+ local->cont.getxattr.xattr_len += strlen (xattr) + 1;
}
}
out:
UNLOCK (&frame->lock);
if (!callcnt) {
- if (!local->cont.getxattr.pathinfo_len)
+ if (!local->cont.getxattr.xattr_len)
goto unwind;
- xattr = dict_new ();
- if (!xattr)
+ nxattr = dict_new ();
+ if (!nxattr)
goto unwind;
/* extra bytes for decorations (brackets and <>'s) */
- padding = strlen (this->name) + strlen (AFR_PATHINFO_HEADER) + 4;
- local->cont.getxattr.pathinfo_len += (padding + 2);
+ padding += strlen (this->name) + strlen (AFR_PATHINFO_HEADER) + 4;
+ local->cont.getxattr.xattr_len += (padding + 2);
- pathinfo_serz = GF_CALLOC (local->cont.getxattr.pathinfo_len, sizeof (char),
- gf_common_mt_char);
+ xattr_serz = GF_CALLOC (local->cont.getxattr.xattr_len,
+ sizeof (char), gf_common_mt_char);
- if (!pathinfo_serz)
+ if (!xattr_serz)
goto unwind;
/* the xlator info */
- sprintf (pathinfo_serz, "(<"AFR_PATHINFO_HEADER"%s> ", this->name);
+ (void) sprintf (xattr_serz, "(<"AFR_PATHINFO_HEADER"%s> ",
+ this->name);
/* actual series of pathinfo */
- ret = dict_serialize_value_with_delim (local->dict, pathinfo_serz + strlen (pathinfo_serz),
+ ret = dict_serialize_value_with_delim (local->dict,
+ xattr_serz + strlen (xattr_serz),
&tlen, ' ');
if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Error serializing dictionary");
+ gf_log (this->name, GF_LOG_ERROR, "Error serializing"
+ " dictionary");
goto unwind;
}
/* closing part */
- *(pathinfo_serz + padding + tlen) = ')';
- *(pathinfo_serz + padding + tlen + 1) = '\0';
+ *(xattr_serz + padding + tlen) = ')';
+ *(xattr_serz + padding + tlen + 1) = '\0';
- ret = dict_set_dynstr (xattr, GF_XATTR_PATHINFO_KEY, pathinfo_serz);
+ ret = dict_set_dynstr (nxattr, local->cont.getxattr.name,
+ xattr_serz);
if (ret)
- gf_log (this->name, GF_LOG_ERROR, "Cannot set pathinfo key in dict");
+ gf_log (this->name, GF_LOG_ERROR, "Cannot set pathinfo"
+ " key in dict");
unwind:
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr);
-
- if (local->dict)
- dict_unref (local->dict);
+ AFR_STACK_UNWIND (getxattr, frame, local->op_ret,
+ local->op_errno, nxattr, local->xdata_rsp);
- if (xattr)
- dict_unref (xattr);
+ if (nxattr)
+ dict_unref (nxattr);
}
return ret;
}
+static int
+afr_aggregate_stime_xattr (dict_t *this, char *key, data_t *value, void *data)
+{
+ int ret = 0;
+
+ if (fnmatch (GF_XATTR_STIME_PATTERN, key, FNM_NOESCAPE) == 0)
+ ret = gf_get_max_stime (THIS, data, key, value);
+
+ return ret;
+}
+
+int32_t
+afr_common_getxattr_stime_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int32_t callcnt = 0;
+
+ if (!frame || !frame->local || !this) {
+ gf_log ("", GF_LOG_ERROR, "possible NULL deref");
+ goto out;
+ }
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (!dict || (op_ret < 0)) {
+ local->op_errno = op_errno;
+ goto cleanup;
+ }
+
+ if (!local->dict)
+ local->dict = dict_copy_with_ref (dict, NULL);
+ else
+ dict_foreach (dict, afr_aggregate_stime_xattr,
+ local->dict);
+ local->op_ret = 0;
+ }
+
+cleanup:
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ AFR_STACK_UNWIND (getxattr, frame, local->op_ret,
+ local->op_errno, local->dict, xdata);
+ }
+
+out:
+ return 0;
+}
+
+
+static gf_boolean_t
+afr_is_special_xattr (const char *name, fop_getxattr_cbk_t *cbk,
+ gf_boolean_t is_fgetxattr)
+{
+ gf_boolean_t is_spl = _gf_true;
+
+ GF_ASSERT (cbk);
+ if (!cbk || !name) {
+ is_spl = _gf_false;
+ goto out;
+ }
+
+ if (!strcmp (name, GF_XATTR_PATHINFO_KEY) ||
+ !strcmp (name, GF_XATTR_USER_PATHINFO_KEY)) {
+ if (is_fgetxattr) {
+ *cbk = afr_fgetxattr_pathinfo_cbk;
+ } else {
+ *cbk = afr_getxattr_pathinfo_cbk;
+ }
+ } else if (!strncmp (name, GF_XATTR_CLRLK_CMD,
+ strlen (GF_XATTR_CLRLK_CMD))) {
+ if (is_fgetxattr) {
+ *cbk = afr_fgetxattr_clrlk_cbk;
+ } else {
+ *cbk = afr_getxattr_clrlk_cbk;
+ }
+ } else if (!strncmp (name, GF_XATTR_LOCKINFO_KEY,
+ strlen (GF_XATTR_LOCKINFO_KEY))) {
+ if (is_fgetxattr) {
+ *cbk = afr_fgetxattr_lockinfo_cbk;
+ } else {
+ *cbk = afr_getxattr_lockinfo_cbk;
+ }
+ } else if (fnmatch (GF_XATTR_STIME_PATTERN, name, FNM_NOESCAPE) == 0) {
+ *cbk = afr_common_getxattr_stime_cbk;
+ } else {
+ is_spl = _gf_false;
+ }
+
+out:
+ return is_spl;
+}
+
+static void
+afr_getxattr_all_subvols (xlator_t *this, call_frame_t *frame,
+ const char *name, loc_t *loc,
+ fop_getxattr_cbk_t cbk)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+ int call_count = 0;
+
+ priv = this->private;
+
+ local = frame->local;
+ //local->call_count set in afr_local_init
+ call_count = local->call_count;
+
+ //If up-children count is 0, afr_local_init would have failed already
+ //and the call would have unwound so not handling it here.
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, cbk,
+ (void *) (long) i, priv->children[i],
+ priv->children[i]->fops->getxattr,
+ loc, name, NULL);
+ if (!--call_count)
+ break;
+ }
+ }
+ return;
+}
+
int32_t
afr_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name)
+ loc_t *loc, const char *name, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- xlator_t **children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- xlator_t **sub_volumes = NULL;
- int i = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- int32_t read_child = -1;
-
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ afr_local_t *local = NULL;
+ xlator_list_t *trav = NULL;
+ xlator_t **sub_volumes = NULL;
+ int i = 0;
+ int32_t op_errno = 0;
+ int ret = -1;
+ fop_getxattr_cbk_t cbk = NULL;
+ int afr_xtime_gauge[MCNT_MAX] = {0,};
+
+
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
children = priv->children;
- ALLOC_OR_GOTO (local, afr_local_t, out);
- frame->local = local;
+ loc_copy (&local->loc, loc);
+
+ local->op = GF_FOP_GETXATTR;
+
+ if (xdata)
+ local->xdata_req = dict_ref (xdata);
+
+ if (!name)
+ goto no_name;
+
+ local->cont.getxattr.name = gf_strdup (name);
+
+ if (!local->cont.getxattr.name) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- op_ret = AFR_LOCAL_INIT (local, priv);
- if (op_ret < 0) {
- op_errno = -op_ret;
+ if (!strncmp (name, AFR_XATTR_PREFIX,
+ strlen (AFR_XATTR_PREFIX))) {
+ gf_log (this->name, GF_LOG_INFO,
+ "%s: no data present for key %s",
+ loc->path, name);
+ op_errno = ENODATA;
goto out;
}
+ if ((strcmp (GF_XATTR_MARKER_KEY, name) == 0)
+ && (GF_CLIENT_PID_GSYNCD == frame->root->pid)) {
- loc_copy (&local->loc, loc);
- if (name)
- local->cont.getxattr.name = gf_strdup (name);
+ local->marker.call_count = priv->child_count;
+ sub_volumes = alloca ( priv->child_count * sizeof (xlator_t *));
+ for (i = 0, trav = this->children; trav ;
+ trav = trav->next, i++) {
+
+ *(sub_volumes + i) = trav->xlator;
+ }
+
+ if (cluster_getmarkerattr (frame, this, loc, name,
+ local, afr_getxattr_unwind,
+ sub_volumes,
+ priv->child_count,
+ MARKER_UUID_TYPE,
+ marker_uuid_default_gauge,
+ priv->vol_uuid)) {
- if (name) {
- if (!strncmp (name, AFR_XATTR_PREFIX,
- strlen (AFR_XATTR_PREFIX))) {
gf_log (this->name, GF_LOG_INFO,
- "%s: no data present for key %s",
+ "%s: failed to get marker attr (%s)",
loc->path, name);
- op_errno = ENODATA;
+ op_errno = EINVAL;
goto out;
}
- if ((strcmp (GF_XATTR_MARKER_KEY, name) == 0)
- && (-1 == frame->root->pid)) {
+ return 0;
+ }
+
+ /*
+ * if we are doing getxattr with pathinfo as the key then we
+ * collect information from all childs
+ */
+ if (afr_is_special_xattr (name, &cbk, 0)) {
+ afr_getxattr_all_subvols (this, frame, name, loc, cbk);
+ return 0;
+ }
+
+ if (XATTR_IS_NODE_UUID (name)) {
+ i = 0;
+ STACK_WIND_COOKIE (frame, afr_getxattr_node_uuid_cbk,
+ (void *) (long) i,
+ children[i],
+ children[i]->fops->getxattr,
+ loc, name, xdata);
+ return 0;
+ }
+
+ if (*priv->vol_uuid) {
+ if ((match_uuid_local (name, priv->vol_uuid) == 0)
+ && (GF_CLIENT_PID_GSYNCD == frame->root->pid)) {
local->marker.call_count = priv->child_count;
- sub_volumes = alloca ( priv->child_count * sizeof (xlator_t *));
+ sub_volumes = alloca ( priv->child_count
+ * sizeof (xlator_t *));
for (i = 0, trav = this->children; trav ;
trav = trav->next, i++) {
*(sub_volumes + i) = trav->xlator;
+
}
- if (cluster_getmarkerattr (frame, this, loc, name,
- local, afr_getxattr_unwind,
+ /* don't err out on getting ENOTCONN (brick down)
+ * from a subset of the bricks
+ */
+ memcpy (afr_xtime_gauge, marker_xtime_default_gauge,
+ sizeof (afr_xtime_gauge));
+ afr_xtime_gauge[MCNT_NOTFOUND] = 0;
+ afr_xtime_gauge[MCNT_ENOTCONN] = 0;
+ if (cluster_getmarkerattr (frame, this, loc,
+ name, local,
+ afr_getxattr_unwind,
sub_volumes,
priv->child_count,
- MARKER_UUID_TYPE,
+ MARKER_XTIME_TYPE,
+ afr_xtime_gauge,
priv->vol_uuid)) {
-
gf_log (this->name, GF_LOG_INFO,
"%s: failed to get marker attr (%s)",
loc->path, name);
@@ -826,86 +1430,150 @@ afr_getxattr (call_frame_t *frame, xlator_t *this,
return 0;
}
+ }
- /*
- * if we are doing getxattr with pathinfo as the key then we
- * collect information from all childs
- */
- if (strncmp (name, GF_XATTR_PATHINFO_KEY,
- strlen (GF_XATTR_PATHINFO_KEY)) == 0) {
-
- local->call_count = priv->child_count;
- for (i = 0; i < priv->child_count; i++) {
- STACK_WIND_COOKIE (frame, afr_getxattr_pathinfo_cbk,
- (void *) (long) i,
- children[i], children[i]->fops->getxattr,
- loc, name);
- }
+no_name:
- return 0;
- }
+ afr_read_txn (frame, this, local->loc.inode, afr_getxattr_wind,
+ AFR_METADATA_TRANSACTION);
- if (*priv->vol_uuid) {
- if ((match_uuid_local (name, priv->vol_uuid) == 0)
- && (-1 == frame->root->pid)) {
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
- local->marker.call_count = priv->child_count;
+/* {{{ fgetxattr */
- sub_volumes = alloca ( priv->child_count * sizeof (xlator_t *));
- for (i = 0, trav = this->children; trav ;
- trav = trav->next, i++) {
- *(sub_volumes + i) = trav->xlator;
+int32_t
+afr_fgetxattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
- }
+ local = frame->local;
- if (cluster_getmarkerattr (frame, this, loc,
- name, local,
- afr_getxattr_unwind,
- sub_volumes,
- priv->child_count,
- MARKER_XTIME_TYPE,
- priv->vol_uuid)) {
- gf_log (this->name, GF_LOG_INFO,
- "%s: failed to get marker attr (%s)",
- loc->path, name);
- op_errno = EINVAL;
- goto out;
- }
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
- return 0;
- }
+ afr_read_txn_continue (frame, this, (long) cookie);
+ return 0;
+ }
+
+ if (dict)
+ afr_filter_xattrs (dict);
+
+ AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, dict, xdata);
+
+ return 0;
+}
+
+int
+afr_fgetxattr_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (subvol == -1) {
+ AFR_STACK_UNWIND (fgetxattr, frame, local->op_ret,
+ local->op_errno, NULL, NULL);
+ return 0;
+ }
+
+ STACK_WIND_COOKIE (frame, (void *) (long) subvol, afr_fgetxattr_cbk,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fgetxattr,
+ local->fd, local->cont.getxattr.name,
+ local->xdata_req);
+ return 0;
+}
+
+
+static void
+afr_fgetxattr_all_subvols (xlator_t *this, call_frame_t *frame,
+ fop_fgetxattr_cbk_t cbk)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+ int call_count = 0;
+
+ priv = this->private;
+
+ local = frame->local;
+ //local->call_count set in afr_local_init
+ call_count = local->call_count;
+
+ //If up-children count is 0, afr_local_init would have failed already
+ //and the call would have unwound so not handling it here.
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fgetxattr,
+ local->fd, local->cont.getxattr.name,
+ NULL);
+ if (!--call_count)
+ break;
}
}
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children) {
- op_errno = ENOMEM;
- goto out;
- }
+ return;
+}
- read_child = afr_inode_get_read_ctx (this, loc->inode, local->fresh_children);
- op_ret = afr_get_call_child (this, local->child_up, read_child,
- local->fresh_children,
- &call_child,
- &local->cont.getxattr.last_index);
- if (op_ret < 0) {
- op_errno = -op_ret;
- op_ret = -1;
- goto out;
+
+int
+afr_fgetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int32_t op_errno = 0;
+ fop_fgetxattr_cbk_t cbk = NULL;
+
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->op = GF_FOP_FGETXATTR;
+ local->fd = fd_ref (fd);
+ if (name) {
+ local->cont.getxattr.name = gf_strdup (name);
+ if (!local->cont.getxattr.name) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ }
+ if (xdata)
+ local->xdata_req = dict_ref (xdata);
+
+ /* pathinfo gets handled only in getxattr(), but we need to handle
+ * lockinfo.
+ * If we are doing fgetxattr with lockinfo as the key then we
+ * collect information from all children.
+ */
+ if (afr_is_special_xattr (name, &cbk, 1)) {
+ afr_fgetxattr_all_subvols (this, frame, cbk);
+ return 0;
}
- STACK_WIND_COOKIE (frame, afr_getxattr_cbk,
- (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->getxattr,
- loc, name);
+ afr_fix_open (fd, this);
+
+ afr_read_txn (frame, this, fd->inode, afr_fgetxattr_wind,
+ AFR_METADATA_TRANSACTION);
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, NULL);
- }
+ AFR_STACK_UNWIND (fgetxattr, frame, -1, op_errno, NULL, NULL);
+
return 0;
}
@@ -914,145 +1582,84 @@ out:
/* {{{ readv */
-/**
- * read algorithm:
- *
- * if the user has specified a read subvolume, use it
- * otherwise -
- * use the inode number to hash it to one of the subvolumes, and
- * read from there (to balance read load)
- *
- * if any of the above read's fail, try the children in sequence
- * beginning at the beginning
- */
-
-int32_t
+int
afr_readv_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
struct iovec *vector, int32_t count, struct iatt *buf,
- struct iobref *iobref)
+ struct iobref *iobref, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int unwind = 1;
- int32_t *last_index = NULL;
- int32_t next_call_child = -1;
- int32_t *fresh_children = NULL;
- int32_t read_child = -1;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ afr_local_t *local = NULL;
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
+ local = frame->local;
- children = priv->children;
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
- local = frame->local;
+ afr_read_txn_continue (frame, this, (long) cookie);
+ return 0;
+ }
- read_child = (long) cookie;
-
- if (op_ret == -1) {
- last_index = &local->cont.readv.last_index;
- fresh_children = local->fresh_children;
- next_call_child = afr_next_call_child (fresh_children,
- local->child_up,
- priv->child_count,
- last_index, read_child);
- if (next_call_child < 0)
- goto out;
-
- unwind = 0;
-
- STACK_WIND_COOKIE (frame, afr_readv_cbk,
- (void *) (long) read_child,
- children[next_call_child],
- children[next_call_child]->fops->readv,
- local->fd, local->cont.readv.size,
- local->cont.readv.offset);
- }
+ AFR_STACK_UNWIND (readv, frame, op_ret, op_errno,
+ vector, count, buf, iobref, xdata);
+ return 0;
+}
-out:
- if (unwind) {
- AFR_STACK_UNWIND (readv, frame, op_ret, op_errno,
- vector, count, buf, iobref);
- }
- return 0;
+int
+afr_readv_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (subvol == -1) {
+ AFR_STACK_UNWIND (readv, frame, local->op_ret, local->op_errno,
+ 0, 0, 0, 0, 0);
+ return 0;
+ }
+
+ STACK_WIND_COOKIE (frame, afr_readv_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->readv,
+ local->fd, local->cont.readv.size,
+ local->cont.readv.offset, local->cont.readv.flags,
+ local->xdata_req);
+ return 0;
}
-int32_t
-afr_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset)
+int
+afr_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
{
- afr_private_t * priv = NULL;
afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int call_child = 0;
- int32_t op_ret = -1;
int32_t op_errno = 0;
- int32_t read_child = -1;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (fd, out);
- priv = this->private;
- children = priv->children;
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
- ALLOC_OR_GOTO (local, afr_local_t, out);
- frame->local = local;
- op_ret = AFR_LOCAL_INIT (local, priv);
- if (op_ret < 0) {
- op_errno = -op_ret;
- goto out;
- }
-
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children) {
- op_errno = ENOMEM;
- goto out;
- }
-
- read_child = afr_inode_get_read_ctx (this, fd->inode, local->fresh_children);
- op_ret = afr_get_call_child (this, local->child_up, read_child,
- local->fresh_children,
- &call_child,
- &local->cont.readv.last_index);
- if (op_ret < 0) {
- op_errno = -op_ret;
- op_ret = -1;
- goto out;
- }
-
- local->fd = fd_ref (fd);
+ local->op = GF_FOP_READ;
+ local->fd = fd_ref (fd);
+ local->cont.readv.size = size;
+ local->cont.readv.offset = offset;
+ local->cont.readv.flags = flags;
+ if (xdata)
+ local->xdata_req = dict_ref (xdata);
- local->cont.readv.size = size;
- local->cont.readv.offset = offset;
+ afr_fix_open (fd, this);
- op_ret = afr_open_fd_fix (frame, this, _gf_false);
- if (op_ret) {
- op_errno = -op_ret;
- op_ret = -1;
- goto out;
- }
- STACK_WIND_COOKIE (frame, afr_readv_cbk,
- (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->readv,
- fd, size, offset);
+ afr_read_txn (frame, this, fd->inode, afr_readv_wind,
+ AFR_DATA_TRANSACTION);
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (readv, frame, op_ret, op_errno, NULL, 0, NULL,
- NULL);
- }
return 0;
+out:
+ AFR_STACK_UNWIND(readv, frame, -1, op_errno, 0, 0, 0, 0, 0);
+
+ return 0;
}
/* }}} */
diff --git a/xlators/cluster/afr/src/afr-inode-read.h b/xlators/cluster/afr/src/afr-inode-read.h
index 5479cfbd5..e4091a793 100644
--- a/xlators/cluster/afr/src/afr-inode-read.h
+++ b/xlators/cluster/afr/src/afr-inode-read.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __INODE_READ_H__
@@ -22,26 +13,30 @@
int32_t
afr_access (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t mask);
+ loc_t *loc, int32_t mask, dict_t *xdata);
int32_t
afr_stat (call_frame_t *frame, xlator_t *this,
- loc_t *loc);
+ loc_t *loc, dict_t *xdata);
int32_t
afr_fstat (call_frame_t *frame, xlator_t *this,
- fd_t *fd);
+ fd_t *fd, dict_t *xdata);
int32_t
afr_readlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, size_t size);
+ loc_t *loc, size_t size, dict_t *xdata);
int32_t
afr_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset);
+ fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata);
int32_t
afr_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name);
+ loc_t *loc, const char *name, dict_t *xdata);
+
+int32_t
+afr_fgetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata);
#endif /* __INODE_READ_H__ */
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
index 4135ba947..3dacfc8dd 100644
--- a/xlators/cluster/afr/src/afr-inode-write.c
+++ b/xlators/cluster/afr/src/afr-inode-write.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -46,142 +37,303 @@
#include "afr.h"
#include "afr-transaction.h"
-#include "afr-self-heal-common.h"
+//#include "afr-self-heal-common.h"
-/* {{{ writev */
-int
-afr_writev_unwind (call_frame_t *frame, xlator_t *this)
+static void
+__afr_inode_write_finalize (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int read_subvol = 0;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (local->inode) {
+ if (local->transaction.type == AFR_METADATA_TRANSACTION)
+ read_subvol = afr_metadata_subvol_get (local->inode, this,
+ NULL, NULL);
+ else
+ read_subvol = afr_data_subvol_get (local->inode, this,
+ NULL, NULL);
+ }
+
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno (local, priv);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret < 0) {
+ afr_inode_read_subvol_reset (local->inode, this);
+ continue;
+ }
+
+ /* Order of checks in the compound conditional
+ below is important.
+
+ - Highest precedence: largest op_ret
+ - Next precendence: if all op_rets are equal, read subvol
+ - Least precedence: any succeeded subvol
+ */
+ if ((local->op_ret < local->replies[i].op_ret) ||
+ ((local->op_ret == local->replies[i].op_ret) &&
+ (i == read_subvol))) {
+
+ local->op_ret = local->replies[i].op_ret;
+ local->op_errno = local->replies[i].op_errno;
+
+ local->cont.inode_wfop.prebuf =
+ local->replies[i].prestat;
+ local->cont.inode_wfop.postbuf =
+ local->replies[i].poststat;
+
+ if (local->replies[i].xdata) {
+ if (local->xdata_rsp)
+ dict_unref (local->xdata_rsp);
+ local->xdata_rsp =
+ dict_ref (local->replies[i].xdata);
+ }
+ }
+ }
+}
+
+
+static void
+__afr_inode_write_fill (call_frame_t *frame, xlator_t *this, int child_index,
+ int op_ret, int op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+
+ if (op_ret >= 0) {
+ if (prebuf)
+ local->replies[child_index].prestat = *prebuf;
+ if (postbuf)
+ local->replies[child_index].poststat = *postbuf;
+ if (xdata)
+ local->replies[child_index].xdata = dict_ref (xdata);
+ } else {
+ afr_transaction_fop_failed (frame, this, child_index);
+ }
+
+ return;
+}
+
+
+static int
+__afr_inode_write_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int child_index = (long) cookie;
+ int call_count = -1;
local = frame->local;
LOCK (&frame->lock);
{
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
+ __afr_inode_write_fill (frame, this, child_index, op_ret,
+ op_errno, prebuf, postbuf, xdata);
}
UNLOCK (&frame->lock);
- if (main_frame) {
- AFR_STACK_UNWIND (writev, main_frame,
- local->op_ret, local->op_errno,
- &local->cont.writev.prebuf,
- &local->cont.writev.postbuf);
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ __afr_inode_write_finalize (frame, this);
+
+ if (afr_txn_nothing_failed (frame, this))
+ local->transaction.unwind (frame, this);
+
+ local->transaction.resume (frame, this);
}
+
return 0;
}
+/* {{{ writev */
-int
-afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+void
+afr_writev_copy_outvars (call_frame_t *src_frame, call_frame_t *dst_frame)
{
- afr_local_t * local = NULL;
- int child_index = (long) cookie;
- int call_count = -1;
- int read_child = 0;
+ afr_local_t *src_local = NULL;
+ afr_local_t *dst_local = NULL;
+
+ src_local = src_frame->local;
+ dst_local = dst_frame->local;
+
+ dst_local->op_ret = src_local->op_ret;
+ dst_local->op_errno = src_local->op_errno;
+ dst_local->cont.inode_wfop.prebuf = src_local->cont.inode_wfop.prebuf;
+ dst_local->cont.inode_wfop.postbuf = src_local->cont.inode_wfop.postbuf;
+ if (src_local->xdata_rsp)
+ dst_local->xdata_rsp = dict_ref (src_local->xdata_rsp);
+}
+void
+afr_writev_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
local = frame->local;
- read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
-
- LOCK (&frame->lock);
- {
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
+ AFR_STACK_UNWIND (writev, frame,
+ local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ local->xdata_rsp);
+}
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.writev.prebuf = *prebuf;
- local->cont.writev.postbuf = *postbuf;
- }
+int
+afr_transaction_writev_unwind (call_frame_t *frame, xlator_t *this)
+{
+ call_frame_t *fop_frame = NULL;
- if (child_index == read_child) {
- local->cont.writev.prebuf = *prebuf;
- local->cont.writev.postbuf = *postbuf;
- }
- }
+ fop_frame = afr_transaction_detach_fop_frame (frame);
- local->op_errno = op_errno;
+ if (fop_frame) {
+ afr_writev_copy_outvars (frame, fop_frame);
+ afr_writev_unwind (fop_frame, this);
}
- UNLOCK (&frame->lock);
+ return 0;
+}
- call_count = afr_frame_return (frame);
+static void
+afr_writev_handle_short_writes (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
+ local = frame->local;
+ priv = this->private;
+ /*
+ * We already have the best case result of the writev calls staged
+ * as the return value. Any writev that returns some value less
+ * than the best case is now out of sync, so mark the fop as
+ * failed. Note that fops that have returned with errors have
+ * already been marked as failed.
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if ((!local->replies[i].valid) ||
+ (local->replies[i].op_ret == -1))
+ continue;
- local->transaction.resume (frame, this);
+ if (local->replies[i].op_ret < local->op_ret)
+ afr_transaction_fop_failed(frame, this, i);
}
- return 0;
}
int
-afr_writev_wind (call_frame_t *frame, xlator_t *this)
+afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- int call_count = -1;
+ afr_local_t * local = NULL;
+ call_frame_t *fop_frame = NULL;
+ int child_index = (long) cookie;
+ int call_count = -1;
+ int ret = 0;
+ uint32_t open_fd_count = 0;
+ uint32_t write_is_append = 0;
local = frame->local;
- priv = this->private;
-
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
+ LOCK (&frame->lock);
+ {
+ __afr_inode_write_fill (frame, this, child_index, op_ret,
+ op_errno, prebuf, postbuf, xdata);
+ if (op_ret == -1 || !xdata)
+ goto unlock;
+
+ write_is_append = 0;
+ ret = dict_get_uint32 (xdata, GLUSTERFS_WRITE_IS_APPEND,
+ &write_is_append);
+ if (ret || !write_is_append)
+ local->append_write = _gf_false;
+
+ ret = dict_get_uint32 (xdata, GLUSTERFS_OPEN_FD_COUNT,
+ &open_fd_count);
+ if (ret == -1)
+ goto unlock;
+ if ((open_fd_count > local->open_fd_count)) {
+ local->open_fd_count = open_fd_count;
+ local->update_open_fd_count = _gf_true;
+ }
}
+unlock:
+ UNLOCK (&frame->lock);
- local->call_count = call_count;
+ call_count = afr_frame_return (frame);
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_writev_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->writev,
- local->fd,
- local->cont.writev.vector,
- local->cont.writev.count,
- local->cont.writev.offset,
- local->cont.writev.iobref);
-
- if (!--call_count)
- break;
+ if (call_count == 0) {
+ if (!local->stable_write && !local->append_write)
+ /* An appended write removes the necessity to
+ fsync() the file. This is because self-heal
+ has the logic to check for larger file when
+ the xattrs are not reliably pointing at
+ a stale file.
+ */
+ afr_fd_report_unstable_write (this, local->fd);
+
+ __afr_inode_write_finalize (frame, this);
+
+ afr_writev_handle_short_writes (frame, this);
+
+ if (local->update_open_fd_count)
+ afr_handle_open_fd_count (frame, this);
+
+ if (!afr_txn_nothing_failed (frame, this)) {
+ //Don't unwind until post-op is complete
+ local->transaction.resume (frame, this);
+ } else {
+ /*
+ * Generally inode-write fops do transaction.unwind then
+ * transaction.resume, but writev needs to make sure that
+ * delayed post-op frame is placed in fdctx before unwind
+ * happens. This prevents the race of flush doing the
+ * changelog wakeup first in fuse thread and then this
+ * writev placing its delayed post-op frame in fdctx.
+ * This helps flush make sure all the delayed post-ops are
+ * completed.
+ */
+
+ fop_frame = afr_transaction_detach_fop_frame (frame);
+ afr_writev_copy_outvars (frame, fop_frame);
+ local->transaction.resume (frame, this);
+ afr_writev_unwind (fop_frame, this);
}
}
-
return 0;
}
int
-afr_writev_done (call_frame_t *frame, xlator_t *this)
+afr_writev_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
local = frame->local;
+ priv = this->private;
- iobref_unref (local->cont.writev.iobref);
- local->cont.writev.iobref = NULL;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
+ STACK_WIND_COOKIE (frame, afr_writev_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->writev,
+ local->fd, local->cont.writev.vector,
+ local->cont.writev.count, local->cont.writev.offset,
+ local->cont.writev.flags, local->cont.writev.iobref,
+ local->xdata_req);
return 0;
}
@@ -191,30 +343,37 @@ afr_do_writev (call_frame_t *frame, xlator_t *this)
{
call_frame_t *transaction_frame = NULL;
afr_local_t *local = NULL;
- int op_ret = -1;
- int op_errno = 0;
-
- local = frame->local;
+ int ret = -1;
+ int op_errno = ENOMEM;
transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
+ if (!transaction_frame)
goto out;
- }
+ local = frame->local;
transaction_frame->local = local;
- frame->local = NULL;
+ frame->local = NULL;
- local->op = GF_FOP_WRITE;
+ if (!AFR_FRAME_INIT (frame, op_errno))
+ goto out;
- local->success_count = 0;
+ local->op = GF_FOP_WRITE;
- local->transaction.fop = afr_writev_wind;
- local->transaction.done = afr_writev_done;
- local->transaction.unwind = afr_writev_unwind;
+ local->transaction.wind = afr_writev_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_transaction_writev_unwind;
local->transaction.main_frame = frame;
+
if (local->fd->flags & O_APPEND) {
+ /*
+ * Backend vfs ignores the 'offset' for append mode fd so
+ * locking just the region provided for the writev does not
+ * give consistency gurantee. The actual write may happen at a
+ * completely different range than the one provided by the
+ * offset, len in the fop. So lock the entire file.
+ */
local->transaction.start = 0;
local->transaction.len = 0;
} else {
@@ -223,1506 +382,1373 @@ afr_do_writev (call_frame_t *frame, xlator_t *this)
local->cont.writev.count);
}
- afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (writev, frame, op_ret, op_errno, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
-static int
-afr_prepare_loc (call_frame_t *frame, fd_t *fd)
+
+int
+afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
- afr_local_t *local = NULL;
- char *name = NULL;
- char *path = NULL;
- int ret = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int op_errno = ENOMEM;
- if ((!fd) || (!fd->inode))
- return -1;
+ priv = this->private;
- local = frame->local;
- ret = inode_path (fd->inode, NULL, (char **)&path);
- if (ret <= 0) {
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "Unable to get path for gfid: %s",
- uuid_utoa (fd->inode->gfid));
- return -1;
- }
+ QUORUM_CHECK(writev,out);
- if (local->loc.path) {
- if (strcmp (path, local->loc.path))
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "overwriting old loc->path %s with %s",
- local->loc.path, path);
- GF_FREE ((char *)local->loc.path);
- }
- local->loc.path = path;
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
- name = strrchr (local->loc.path, '/');
- if (name)
- name++;
- local->loc.name = name;
+ local->cont.writev.vector = iov_dup (vector, count);
+ if (!local->cont.writev.vector)
+ goto out;
+ local->cont.writev.count = count;
+ local->cont.writev.offset = offset;
+ local->cont.writev.flags = flags;
+ local->cont.writev.iobref = iobref_ref (iobref);
- if (local->loc.inode) {
- inode_unref (local->loc.inode);
- }
- local->loc.inode = inode_ref (fd->inode);
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- if (local->loc.parent) {
- inode_unref (local->loc.parent);
- }
+ if (!local->xdata_req)
+ goto out;
- local->loc.parent = inode_parent (local->loc.inode, 0, NULL);
+ local->fd = fd_ref (fd);
+ local->inode = inode_ref (fd->inode);
- return 0;
-}
+ if (dict_set_uint32 (local->xdata_req, GLUSTERFS_OPEN_FD_COUNT, 4)) {
+ op_errno = ENOMEM;
+ goto out;
+ }
-afr_fd_paused_call_t*
-afr_paused_call_create (call_frame_t *frame)
-{
- afr_local_t *local = NULL;
- afr_fd_paused_call_t *paused_call = NULL;
+ if (dict_set_uint32 (local->xdata_req, GLUSTERFS_WRITE_IS_APPEND, 4)) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- local = frame->local;
- GF_ASSERT (local->fop_call_continue);
+ /* Set append_write to be true speculatively. If on any
+ server it turns not be true, we unset it in the
+ callback.
+ */
+ local->append_write = _gf_true;
- paused_call = GF_CALLOC (1, sizeof (*paused_call),
- gf_afr_fd_paused_call_t);
- if (paused_call) {
- INIT_LIST_HEAD (&paused_call->call_list);
- paused_call->frame = frame;
- }
+ /* detect here, but set it in writev_wind_cbk *after* the unstable
+ write is performed
+ */
+ local->stable_write = !!((fd->flags|flags)&(O_SYNC|O_DSYNC));
- return paused_call;
-}
+ afr_fix_open (fd, this);
-static int
-afr_pause_fd_fop (call_frame_t *frame, xlator_t *this, afr_fd_ctx_t *fd_ctx)
-{
- afr_fd_paused_call_t *paused_call = NULL;
- int ret = 0;
+ afr_do_writev (frame, this);
- paused_call = afr_paused_call_create (frame);
- if (paused_call)
- list_add (&paused_call->call_list, &fd_ctx->paused_calls);
- else
- ret = -ENOMEM;
+ return 0;
+out:
+ AFR_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
- return ret;
+ return 0;
}
-static void
-afr_trigger_open_fd_self_heal (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- inode_t *inode = NULL;
- char *reason = NULL;
- local = frame->local;
- sh = &local->self_heal;
- inode = local->fd->inode;
-
- sh->do_missing_entry_self_heal = _gf_true;
- sh->do_gfid_self_heal = _gf_true;
- sh->do_data_self_heal = _gf_true;
+/* }}} */
- reason = "subvolume came online";
- afr_launch_self_heal (frame, this, inode, _gf_true, inode->ia_type,
- reason, NULL, NULL);
-}
+/* {{{ truncate */
int
-afr_open_fd_fix (call_frame_t *frame, xlator_t *this, gf_boolean_t pause_fop)
+afr_truncate_unwind (call_frame_t *frame, xlator_t *this)
{
- int ret = 0;
- int i = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- gf_boolean_t need_self_heal = _gf_false;
- int *need_open = NULL;
- int need_open_count = 0;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- gf_boolean_t fop_continue = _gf_true;
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
local = frame->local;
- priv = this->private;
- GF_ASSERT (local->fd);
- if (pause_fop)
- GF_ASSERT (local->fop_call_continue);
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
- ret = afr_prepare_loc (frame, local->fd);
- if (ret < 0) {
- //File does not exist we cant open it.
- ret = 0;
- goto out;
- }
+ AFR_STACK_UNWIND (truncate, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
+ return 0;
+}
- fd_ctx = afr_fd_ctx_get (local->fd, this);
- if (!fd_ctx) {
- ret = -EINVAL;
- goto unlock;
- }
- LOCK (&local->fd->lock);
- {
- if (fd_ctx->up_count < priv->up_count) {
- need_self_heal = _gf_true;
- fd_ctx->up_count = priv->up_count;
- fd_ctx->down_count = priv->down_count;
- }
+int
+afr_truncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
- for (i = 0; i < priv->child_count; i++) {
- if ((fd_ctx->opened_on[i] == AFR_FD_NOT_OPENED) &&
- local->child_up[i]) {
- fd_ctx->opened_on[i] = AFR_FD_OPENING;
- if (!need_open)
- need_open = GF_CALLOC (priv->child_count,
- sizeof (*need_open),
- gf_afr_mt_int32_t);
- need_open[i] = 1;
- need_open_count++;
- } else if (pause_fop && local->child_up[i] &&
- (fd_ctx->opened_on[i] == AFR_FD_OPENING)) {
- local->fop_paused = _gf_true;
- }
- }
+ local = frame->local;
- if (local->fop_paused) {
- GF_ASSERT (pause_fop);
- gf_log (this->name, GF_LOG_INFO, "Pause fd %p",
- local->fd);
- ret = afr_pause_fd_fop (frame, this, fd_ctx);
- if (ret)
- goto unlock;
- fop_continue = _gf_false;
- }
- }
-unlock:
- UNLOCK (&local->fd->lock);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to fix fd for %s",
- local->loc.path);
- fop_continue = _gf_false;
- goto out;
- }
+ if (op_ret == 0 && prebuf->ia_size != postbuf->ia_size)
+ local->stable_write = _gf_false;
- if (need_self_heal)
- afr_trigger_open_fd_self_heal (frame, this);
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+}
- if (!need_open_count)
- goto out;
- gf_log (this->name, GF_LOG_INFO, "Opening fd %p", local->fd);
- afr_fix_open (frame, this, fd_ctx, need_open_count, need_open);
- fop_continue = _gf_false;
-out:
- if (need_open)
- GF_FREE (need_open);
- if (fop_continue && local->fop_call_continue)
- local->fop_call_continue (frame, this);
- return ret;
+int
+afr_truncate_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE (frame, afr_truncate_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->truncate,
+ &local->loc, local->cont.truncate.offset,
+ local->xdata_req);
+ return 0;
}
+
int
-afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t offset,
- struct iobref *iobref)
+afr_truncate (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, off_t offset, dict_t *xdata)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
- int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
priv = this->private;
- QUORUM_CHECK(writev,out);
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ QUORUM_CHECK(truncate,out);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
goto out;
- }
- frame->local = local;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
- local->cont.writev.vector = iov_dup (vector, count);
- local->cont.writev.count = count;
- local->cont.writev.offset = offset;
- local->cont.writev.iobref = iobref_ref (iobref);
+ local->cont.truncate.offset = offset;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- local->fd = fd_ref (fd);
- local->fop_call_continue = afr_do_writev;
+ if (!local->xdata_req)
+ goto out;
- ret = afr_open_fd_fix (frame, this, _gf_true);
- if (ret) {
- op_errno = -ret;
- goto out;
+ local->transaction.wind = afr_truncate_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_truncate_unwind;
+
+ loc_copy (&local->loc, loc);
+ local->inode = inode_ref (loc->inode);
+
+ local->op = GF_FOP_TRUNCATE;
+
+ local->transaction.main_frame = frame;
+ local->transaction.start = offset;
+ local->transaction.len = 0;
+
+ /* Set it true speculatively, will get reset in afr_truncate_wind_cbk
+ if truncate was not a NOP */
+ local->stable_write = _gf_true;
+
+ ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
}
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (writev, frame, op_ret, op_errno, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
/* }}} */
-/* {{{ truncate */
+/* {{{ ftruncate */
+
int
-afr_truncate_unwind (call_frame_t *frame, xlator_t *this)
+afr_ftruncate_unwind (call_frame_t *frame, xlator_t *this)
{
afr_local_t * local = NULL;
call_frame_t *main_frame = NULL;
local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
- if (main_frame) {
- AFR_STACK_UNWIND (truncate, main_frame, local->op_ret,
- local->op_errno,
- &local->cont.truncate.prebuf,
- &local->cont.truncate.postbuf);
- }
+ AFR_STACK_UNWIND (ftruncate, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
+ return 0;
+}
+
+
+int
+afr_ftruncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ local = frame->local;
+
+ if (op_ret == 0 && prebuf->ia_size != postbuf->ia_size)
+ local->stable_write = _gf_false;
+
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+}
+
+
+int
+afr_ftruncate_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE (frame, afr_ftruncate_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->ftruncate,
+ local->fd, local->cont.ftruncate.offset,
+ local->xdata_req);
return 0;
}
int
-afr_truncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+afr_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int child_index = (long) cookie;
- int read_child = 0;
- int call_count = -1;
- int need_unwind = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- local = frame->local;
- priv = this->private;
+ priv = this->private;
- read_child = afr_inode_get_read_ctx (this, local->loc.inode, NULL);
+ QUORUM_CHECK(ftruncate,out);
- LOCK (&frame->lock);
- {
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
+ transaction_frame = copy_frame (frame);
+ if (!frame)
+ goto out;
- if (afr_fop_failed (op_ret, op_errno) && op_errno != EFBIG)
- afr_transaction_fop_failed (frame, this, child_index);
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.truncate.prebuf = *prebuf;
- local->cont.truncate.postbuf = *postbuf;
- }
+ local->cont.ftruncate.offset = offset;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- if (child_index == read_child) {
- local->cont.truncate.prebuf = *prebuf;
- local->cont.truncate.postbuf = *postbuf;
- }
+ if (!local->xdata_req)
+ goto out;
- local->success_count++;
+ local->fd = fd_ref (fd);
+ local->inode = inode_ref (fd->inode);
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
- }
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ local->op = GF_FOP_FTRUNCATE;
- if (need_unwind)
- local->transaction.unwind (frame, this);
+ local->transaction.wind = afr_ftruncate_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_ftruncate_unwind;
- call_count = afr_frame_return (frame);
+ local->transaction.main_frame = frame;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
+ local->transaction.start = local->cont.ftruncate.offset;
+ local->transaction.len = 0;
+
+ afr_fix_open (fd, this);
+
+ /* Set it true speculatively, will get reset in afr_ftruncate_wind_cbk
+ if truncate was not a NOP */
+ local->stable_write = _gf_true;
+
+ ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
}
+ return 0;
+out:
+ AFR_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
+
return 0;
}
+/* }}} */
-int32_t
-afr_truncate_wind (call_frame_t *frame, xlator_t *this)
+/* {{{ setattr */
+
+int
+afr_setattr_unwind (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
+ call_frame_t *main_frame = NULL;
local = frame->local;
- priv = this->private;
-
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
- local->call_count = call_count;
+ AFR_STACK_UNWIND (setattr, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ local->xdata_rsp);
+ return 0;
+}
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_truncate_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->truncate,
- &local->loc,
- local->cont.truncate.offset);
-
- if (!--call_count)
- break;
- }
- }
- return 0;
+int
+afr_setattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
+{
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ preop, postop, xdata);
}
int
-afr_truncate_done (call_frame_t *frame, xlator_t *this)
+afr_setattr_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
local = frame->local;
+ priv = this->private;
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
+ STACK_WIND_COOKIE (frame, afr_setattr_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->setattr,
+ &local->loc, &local->cont.setattr.in_buf,
+ local->cont.setattr.valid, local->xdata_req);
return 0;
}
int
-afr_truncate (call_frame_t *frame, xlator_t *this,
- loc_t *loc, off_t offset)
+afr_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *buf,
+ int32_t valid, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ int op_errno = ENOMEM;
priv = this->private;
- QUORUM_CHECK(truncate,out);
+ QUORUM_CHECK(setattr,out);
transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
+ if (!transaction_frame)
goto out;
- }
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
- transaction_frame->local = local;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
- local->op_ret = -1;
+ local->cont.setattr.in_buf = *buf;
+ local->cont.setattr.valid = valid;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- local->cont.truncate.offset = offset;
+ if (!local->xdata_req)
+ goto out;
- local->transaction.fop = afr_truncate_wind;
- local->transaction.done = afr_truncate_done;
- local->transaction.unwind = afr_truncate_unwind;
+ local->transaction.wind = afr_setattr_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_setattr_unwind;
loc_copy (&local->loc, loc);
+ local->inode = inode_ref (loc->inode);
+
+ local->op = GF_FOP_SETATTR;
local->transaction.main_frame = frame;
- local->transaction.start = offset;
+ local->transaction.start = LLONG_MAX - 1;
local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (truncate, frame, op_ret, op_errno, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
-
-/* }}} */
-
-/* {{{ ftruncate */
-
+/* {{{ fsetattr */
int
-afr_ftruncate_unwind (call_frame_t *frame, xlator_t *this)
+afr_fsetattr_unwind (call_frame_t *frame, xlator_t *this)
{
afr_local_t * local = NULL;
call_frame_t *main_frame = NULL;
local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
- if (main_frame) {
- AFR_STACK_UNWIND (ftruncate, main_frame, local->op_ret,
- local->op_errno,
- &local->cont.ftruncate.prebuf,
- &local->cont.ftruncate.postbuf);
- }
+ AFR_STACK_UNWIND (fsetattr, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
return 0;
}
int
-afr_ftruncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+afr_fsetattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int child_index = (long) cookie;
- int call_count = -1;
- int need_unwind = 0;
- int read_child = 0;
-
- local = frame->local;
- priv = this->private;
-
- read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
-
- LOCK (&frame->lock);
- {
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
-
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.ftruncate.prebuf = *prebuf;
- local->cont.ftruncate.postbuf = *postbuf;
- }
-
- if (child_index == read_child) {
- local->cont.ftruncate.prebuf = *prebuf;
- local->cont.ftruncate.postbuf = *postbuf;
- }
-
- local->success_count++;
-
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
- }
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ preop, postop, xdata);
+}
- if (need_unwind)
- local->transaction.unwind (frame, this);
- call_count = afr_frame_return (frame);
+int
+afr_fsetattr_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
+ local = frame->local;
+ priv = this->private;
+ STACK_WIND_COOKIE (frame, afr_fsetattr_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fsetattr,
+ local->fd, &local->cont.fsetattr.in_buf,
+ local->cont.fsetattr.valid, local->xdata_req);
return 0;
}
int
-afr_ftruncate_wind (call_frame_t *frame, xlator_t *this)
+afr_fsetattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, struct iatt *buf, int32_t valid, dict_t *xdata)
{
- afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- local = frame->local;
priv = this->private;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
+ QUORUM_CHECK(fsetattr,out);
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
+ goto out;
- local->call_count = call_count;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_ftruncate_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->ftruncate,
- local->fd, local->cont.ftruncate.offset);
-
- if (!--call_count)
- break;
- }
- }
+ local->cont.fsetattr.in_buf = *buf;
+ local->cont.fsetattr.valid = valid;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- return 0;
-}
+ if (!local->xdata_req)
+ goto out;
+ local->transaction.wind = afr_fsetattr_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_fsetattr_unwind;
-int
-afr_ftruncate_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
+ local->fd = fd_ref (fd);
+ local->inode = inode_ref (fd->inode);
- local = frame->local;
+ local->op = GF_FOP_FSETATTR;
+
+ afr_fix_open (fd, this);
+
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
- local->transaction.unwind (frame, this);
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- AFR_STACK_DESTROY (frame);
+ return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
+/* {{{ setxattr */
+
+
int
-afr_do_ftruncate (call_frame_t *frame, xlator_t *this)
+afr_setxattr_unwind (call_frame_t *frame, xlator_t *this)
{
- call_frame_t * transaction_frame = NULL;
- afr_local_t * local = NULL;
- int op_ret = -1;
- int op_errno = 0;
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
local = frame->local;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- goto out;
- }
-
- transaction_frame->local = local;
- frame->local = NULL;
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
- local->op = GF_FOP_FTRUNCATE;
+ AFR_STACK_UNWIND (setxattr, main_frame, local->op_ret, local->op_errno,
+ local->xdata_rsp);
+ return 0;
+}
- local->transaction.fop = afr_ftruncate_wind;
- local->transaction.done = afr_ftruncate_done;
- local->transaction.unwind = afr_ftruncate_unwind;
- local->transaction.main_frame = frame;
+int
+afr_setxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ NULL, NULL, xdata);
+}
- local->transaction.start = local->cont.ftruncate.offset;
- local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+int
+afr_setxattr_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- op_ret = 0;
-out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, NULL, NULL);
- }
+ local = frame->local;
+ priv = this->private;
+ STACK_WIND_COOKIE (frame, afr_setxattr_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->setxattr,
+ &local->loc, local->cont.setxattr.dict,
+ local->cont.setxattr.flags, local->xdata_req);
return 0;
}
int
-afr_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset)
+afr_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
+ int ret = -1;
+ int op_errno = EINVAL;
+
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.afr.*", dict,
+ op_errno, out);
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.afr.*", dict,
+ op_errno, out);
priv = this->private;
- QUORUM_CHECK(ftruncate,out);
+ QUORUM_CHECK(setxattr,out);
- ALLOC_OR_GOTO (local, afr_local_t, out);
- ret = AFR_LOCAL_INIT (local, priv);
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
+ goto out;
- if (ret < 0) {
- op_errno = -ret;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
goto out;
- }
- frame->local = local;
+ local->cont.setxattr.dict = dict_ref (dict);
+ local->cont.setxattr.flags = flags;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- local->cont.ftruncate.offset = offset;
+ if (!local->xdata_req)
+ goto out;
- local->fd = fd_ref (fd);
- local->fop_call_continue = afr_do_ftruncate;
+ local->transaction.wind = afr_setxattr_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_setxattr_unwind;
- ret = afr_open_fd_fix (frame, this, _gf_true);
- if (ret) {
- op_errno = -ret;
- goto out;
+ loc_copy (&local->loc, loc);
+ local->inode = inode_ref (loc->inode);
+
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
+
+ local->op = GF_FOP_SETXATTR;
+
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
}
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+
+ AFR_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
return 0;
}
-/* }}} */
+/* {{{ fsetxattr */
-/* {{{ setattr */
int
-afr_setattr_unwind (call_frame_t *frame, xlator_t *this)
+afr_fsetxattr_unwind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (setattr, main_frame, local->op_ret,
- local->op_errno,
- &local->cont.setattr.preop_buf,
- &local->cont.setattr.postop_buf);
- }
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
+ AFR_STACK_UNWIND (fsetxattr, main_frame, local->op_ret, local->op_errno,
+ local->xdata_rsp);
return 0;
}
int
-afr_setattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
+afr_fsetxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int child_index = (long) cookie;
- int read_child = 0;
- int call_count = -1;
- int need_unwind = 0;
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ NULL, NULL, xdata);
+}
+
+
+int
+afr_fsetxattr_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
local = frame->local;
- priv = this->private;
+ priv = this->private;
- read_child = afr_inode_get_read_ctx (this, local->loc.inode, NULL);
+ STACK_WIND_COOKIE (frame, afr_fsetxattr_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fsetxattr,
+ local->fd, local->cont.fsetxattr.dict,
+ local->cont.fsetxattr.flags, local->xdata_req);
+ return 0;
+}
- LOCK (&frame->lock);
- {
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
+int
+afr_fsetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.setattr.preop_buf = *preop;
- local->cont.setattr.postop_buf = *postop;
- }
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.afr.*", dict,
+ op_errno, out);
- if (child_index == read_child) {
- local->cont.setattr.preop_buf = *preop;
- local->cont.setattr.postop_buf = *postop;
- }
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.afr.*", dict,
+ op_errno, out);
- local->success_count++;
+ priv = this->private;
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
- }
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ QUORUM_CHECK(fsetxattr,out);
- if (need_unwind)
- local->transaction.unwind (frame, this);
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
+ goto out;
- call_count = afr_frame_return (frame);
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
+ local->cont.fsetxattr.dict = dict_ref (dict);
+ local->cont.fsetxattr.flags = flags;
+
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->transaction.wind = afr_fsetxattr_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_fsetxattr_unwind;
+
+ local->fd = fd_ref (fd);
+ local->inode = inode_ref (fd->inode);
+
+ local->op = GF_FOP_FSETXATTR;
+
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
+
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
}
+ return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+
+ AFR_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL);
return 0;
}
+/* }}} */
-int32_t
-afr_setattr_wind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
- local = frame->local;
- priv = this->private;
+/* {{{ removexattr */
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+int
+afr_removexattr_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
- local->call_count = call_count;
+ local = frame->local;
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_setattr_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->setattr,
- &local->loc,
- &local->cont.setattr.in_buf,
- local->cont.setattr.valid);
-
- if (!--call_count)
- break;
- }
- }
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
+ AFR_STACK_UNWIND (removexattr, main_frame, local->op_ret, local->op_errno,
+ local->xdata_rsp);
return 0;
}
int
-afr_setattr_done (call_frame_t *frame, xlator_t *this)
+afr_removexattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_local_t *local = NULL;
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ NULL, NULL, xdata);
+}
- local = frame->local;
- local->transaction.unwind (frame, this);
+int
+afr_removexattr_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- AFR_STACK_DESTROY (frame);
+ local = frame->local;
+ priv = this->private;
+ STACK_WIND_COOKIE (frame, afr_removexattr_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->removexattr,
+ &local->loc, local->cont.removexattr.name,
+ local->xdata_req);
return 0;
}
int
-afr_setattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, struct iatt *buf, int32_t valid)
+afr_removexattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.afr.*",
+ name, op_errno, out);
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.afr.*",
+ name, op_errno, out);
priv = this->private;
- QUORUM_CHECK(setattr,out);
+ QUORUM_CHECK(removexattr,out);
transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
+ if (!transaction_frame)
goto out;
- }
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ local->cont.removexattr.name = gf_strdup (name);
- transaction_frame->local = local;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- local->op_ret = -1;
+ if (!local->xdata_req)
+ goto out;
- local->cont.setattr.in_buf = *buf;
- local->cont.setattr.valid = valid;
-
- local->transaction.fop = afr_setattr_wind;
- local->transaction.done = afr_setattr_done;
- local->transaction.unwind = afr_setattr_unwind;
+ local->transaction.wind = afr_removexattr_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_removexattr_unwind;
loc_copy (&local->loc, loc);
+ local->inode = inode_ref (loc->inode);
+
+ local->op = GF_FOP_REMOVEXATTR;
local->transaction.main_frame = frame;
local->transaction.start = LLONG_MAX - 1;
local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (setattr, frame, op_ret, op_errno, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
return 0;
}
-/* {{{ fsetattr */
-
+/* ffremovexattr */
int
-afr_fsetattr_unwind (call_frame_t *frame, xlator_t *this)
+afr_fremovexattr_unwind (call_frame_t *frame, xlator_t *this)
{
afr_local_t * local = NULL;
call_frame_t *main_frame = NULL;
local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (fsetattr, main_frame, local->op_ret,
- local->op_errno,
- &local->cont.fsetattr.preop_buf,
- &local->cont.fsetattr.postop_buf);
- }
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
+ AFR_STACK_UNWIND (fremovexattr, main_frame, local->op_ret, local->op_errno,
+ local->xdata_rsp);
return 0;
}
int
-afr_fsetattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
+afr_fremovexattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int child_index = (long) cookie;
- int read_child = 0;
- int call_count = -1;
- int need_unwind = 0;
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ NULL, NULL, xdata);
+}
+
+
+int
+afr_fremovexattr_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
local = frame->local;
- priv = this->private;
+ priv = this->private;
- read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
+ STACK_WIND_COOKIE (frame, afr_fremovexattr_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fremovexattr,
+ local->fd, local->cont.removexattr.name,
+ local->xdata_req);
+ return 0;
+}
- LOCK (&frame->lock);
- {
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
+int
+afr_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.fsetattr.preop_buf = *preop;
- local->cont.fsetattr.postop_buf = *postop;
- }
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.afr.*",
+ name, op_errno, out);
- if (child_index == read_child) {
- local->cont.fsetattr.preop_buf = *preop;
- local->cont.fsetattr.postop_buf = *postop;
- }
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.afr.*",
+ name, op_errno, out);
- local->success_count++;
+ priv = this->private;
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
- }
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ QUORUM_CHECK(fremovexattr, out);
- if (need_unwind)
- local->transaction.unwind (frame, this);
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
+ goto out;
- call_count = afr_frame_return (frame);
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
+ local->cont.removexattr.name = gf_strdup (name);
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->transaction.wind = afr_fremovexattr_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_fremovexattr_unwind;
+
+ local->fd = fd_ref (fd);
+ local->inode = inode_ref (fd->inode);
+
+ local->op = GF_FOP_FREMOVEXATTR;
+
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
+
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
}
+ return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+
+ AFR_STACK_UNWIND (fremovexattr, frame, -1, op_errno, NULL);
+
return 0;
}
-int32_t
-afr_fsetattr_wind (call_frame_t *frame, xlator_t *this)
+int
+afr_fallocate_unwind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
local = frame->local;
- priv = this->private;
-
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
- local->call_count = call_count;
+ AFR_STACK_UNWIND (fallocate, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
+ return 0;
+}
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_fsetattr_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fsetattr,
- local->fd,
- &local->cont.fsetattr.in_buf,
- local->cont.fsetattr.valid);
-
- if (!--call_count)
- break;
- }
- }
- return 0;
+int
+afr_fallocate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ prebuf, postbuf, xdata);
}
int
-afr_fsetattr_done (call_frame_t *frame, xlator_t *this)
+afr_fallocate_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
local = frame->local;
+ priv = this->private;
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
+ STACK_WIND_COOKIE (frame, afr_fallocate_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fallocate,
+ local->fd, local->cont.fallocate.mode,
+ local->cont.fallocate.offset,
+ local->cont.fallocate.len, local->xdata_req);
return 0;
}
+
int
-afr_fsetattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, struct iatt *buf, int32_t valid)
+afr_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
+ afr_private_t *priv = NULL;
+ call_frame_t *transaction_frame = NULL;
+ afr_local_t *local = NULL;
int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ int op_errno = ENOMEM;
- priv = this->private;
+ priv = this->private;
- QUORUM_CHECK(fsetattr,out);
+ QUORUM_CHECK(fallocate,out);
transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
+ if (!transaction_frame)
goto out;
- }
- ALLOC_OR_GOTO (local, afr_local_t, out);
- transaction_frame->local = local;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ local->cont.fallocate.mode = mode;
+ local->cont.fallocate.offset = offset;
+ local->cont.fallocate.len = len;
- local->op_ret = -1;
+ local->fd = fd_ref (fd);
+ local->inode = inode_ref (fd->inode);
- local->cont.fsetattr.in_buf = *buf;
- local->cont.fsetattr.valid = valid;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- local->transaction.fop = afr_fsetattr_wind;
- local->transaction.done = afr_fsetattr_done;
- local->transaction.unwind = afr_fsetattr_unwind;
+ if (!local->xdata_req)
+ goto out;
- local->fd = fd_ref (fd);
+ local->op = GF_FOP_FALLOCATE;
- op_ret = afr_open_fd_fix (transaction_frame, this, _gf_false);
- if (ret) {
- op_errno = -op_ret;
- op_ret = -1;
- goto out;
- }
+ local->transaction.wind = afr_fallocate_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_fallocate_unwind;
local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
+
+ local->transaction.start = local->cont.fallocate.offset;
local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ afr_fix_open (fd, this);
- op_ret = 0;
-out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (fsetattr, frame, op_ret, op_errno, NULL, NULL);
+ ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
}
+ return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+
+ AFR_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
-/* {{{ setxattr */
+/* }}} */
+/* {{{ discard */
int
-afr_setxattr_unwind (call_frame_t *frame, xlator_t *this)
+afr_discard_unwind (call_frame_t *frame, xlator_t *this)
{
afr_local_t * local = NULL;
call_frame_t *main_frame = NULL;
local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
- if (main_frame) {
- AFR_STACK_UNWIND (setxattr, main_frame,
- local->op_ret, local->op_errno)
- }
+ AFR_STACK_UNWIND (discard, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
return 0;
}
int
-afr_setxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+afr_discard_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int call_count = -1;
- int need_unwind = 0;
-
- local = frame->local;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- }
- local->success_count++;
-
- if (local->success_count == priv->child_count) {
- need_unwind = 1;
- }
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- if (need_unwind)
- local->transaction.unwind (frame, this);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ prebuf, postbuf, xdata);
}
int
-afr_setxattr_wind (call_frame_t *frame, xlator_t *this)
+afr_discard_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
local = frame->local;
priv = this->private;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_setxattr_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->setxattr,
- &local->loc,
- local->cont.setxattr.dict,
- local->cont.setxattr.flags);
-
- if (!--call_count)
- break;
- }
- }
-
+ STACK_WIND_COOKIE (frame, afr_discard_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->discard,
+ local->fd, local->cont.discard.offset,
+ local->cont.discard.len, local->xdata_req);
return 0;
}
int
-afr_setxattr_done (call_frame_t *frame, xlator_t *this)
+afr_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
{
- afr_local_t * local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
- return 0;
-}
-
-int
-afr_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *dict, int32_t flags)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ int op_errno = ENOMEM;
priv = this->private;
- QUORUM_CHECK(setxattr,out);
+ QUORUM_CHECK(discard, out);
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
+ goto out;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- goto out;
- }
+ local->cont.discard.offset = offset;
+ local->cont.discard.len = len;
- transaction_frame->local = local;
+ local->fd = fd_ref (fd);
+ local->inode = inode_ref (fd->inode);
- local->op_ret = -1;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- local->cont.setxattr.dict = dict_ref (dict);
- local->cont.setxattr.flags = flags;
+ if (!local->xdata_req)
+ goto out;
- local->transaction.fop = afr_setxattr_wind;
- local->transaction.done = afr_setxattr_done;
- local->transaction.unwind = afr_setxattr_unwind;
+ local->op = GF_FOP_DISCARD;
- loc_copy (&local->loc, loc);
+ local->transaction.wind = afr_discard_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_discard_unwind;
local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
+
+ local->transaction.start = local->cont.discard.offset;
local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ afr_fix_open (fd, this);
- op_ret = 0;
-out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (setxattr, frame, op_ret, op_errno);
+ ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
}
+ return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+
+ AFR_STACK_UNWIND (discard, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
-/* }}} */
-
-/* {{{ removexattr */
+/* {{{ zerofill */
int
-afr_removexattr_unwind (call_frame_t *frame, xlator_t *this)
+afr_zerofill_unwind (call_frame_t *frame, xlator_t *this)
{
afr_local_t * local = NULL;
call_frame_t *main_frame = NULL;
local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
- if (main_frame) {
- AFR_STACK_UNWIND (removexattr, main_frame,
- local->op_ret, local->op_errno)
- }
+ AFR_STACK_UNWIND (discard, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
return 0;
}
int
-afr_removexattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+afr_zerofill_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int call_count = -1;
- int need_unwind = 0;
-
- local = frame->local;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- }
- local->success_count++;
-
- if (local->success_count == priv->wait_count) {
- need_unwind = 1;
- }
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- if (need_unwind)
- local->transaction.unwind (frame, this);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ prebuf, postbuf, xdata);
}
-int32_t
-afr_removexattr_wind (call_frame_t *frame, xlator_t *this)
+int
+afr_zerofill_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
local = frame->local;
priv = this->private;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_removexattr_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->removexattr,
- &local->loc,
- local->cont.removexattr.name);
-
- if (!--call_count)
- break;
- }
- }
-
+ STACK_WIND_COOKIE (frame, afr_zerofill_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->zerofill,
+ local->fd, local->cont.zerofill.offset,
+ local->cont.zerofill.len, local->xdata_req);
return 0;
}
-
int
-afr_removexattr_done (call_frame_t *frame, xlator_t *this)
+afr_zerofill (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
{
- afr_local_t * local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
- return 0;
-}
-
-
-int
-afr_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (loc, out);
+ int op_errno = ENOMEM;
priv = this->private;
- QUORUM_CHECK(removexattr,out);
+ QUORUM_CHECK(discard, out);
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- goto out;
- }
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
+ goto out;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ local->cont.zerofill.offset = offset;
+ local->cont.zerofill.len = len;
- transaction_frame->local = local;
+ local->fd = fd_ref (fd);
+ local->inode = inode_ref (fd->inode);
- local->op_ret = -1;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- local->cont.removexattr.name = gf_strdup (name);
+ if (!local->xdata_req)
+ goto out;
- local->transaction.fop = afr_removexattr_wind;
- local->transaction.done = afr_removexattr_done;
- local->transaction.unwind = afr_removexattr_unwind;
+ local->op = GF_FOP_ZEROFILL;
- loc_copy (&local->loc, loc);
+ local->transaction.wind = afr_zerofill_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_zerofill_unwind;
local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
- local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ local->transaction.start = local->cont.discard.offset;
+ local->transaction.len = len;
- op_ret = 0;
-out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (removexattr, frame, op_ret, op_errno);
+ afr_fix_open (fd, this);
+
+ ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
}
+ return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+
+ AFR_STACK_UNWIND (zerofill, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
+
+/* }}} */
+
+
diff --git a/xlators/cluster/afr/src/afr-inode-write.h b/xlators/cluster/afr/src/afr-inode-write.h
index f9aa7bd36..7b1fc5528 100644
--- a/xlators/cluster/afr/src/afr-inode-write.h
+++ b/xlators/cluster/afr/src/afr-inode-write.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __INODE_WRITE_H__
@@ -22,51 +13,70 @@
int32_t
afr_chmod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode);
+ loc_t *loc, mode_t mode, dict_t *xdata);
int32_t
afr_chown (call_frame_t *frame, xlator_t *this,
- loc_t *loc, uid_t uid, gid_t gid);
+ loc_t *loc, uid_t uid, gid_t gid, dict_t *xdata);
int
afr_fchown (call_frame_t *frame, xlator_t *this,
- fd_t *fd, uid_t uid, gid_t gid);
+ fd_t *fd, uid_t uid, gid_t gid, dict_t *xdata);
int32_t
afr_fchmod (call_frame_t *frame, xlator_t *this,
- fd_t *fd, mode_t mode);
+ fd_t *fd, mode_t mode, dict_t *xdata);
int32_t
-afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iovec *vector, int32_t count, off_t offset,
- struct iobref *iobref);
+ uint32_t flags, struct iobref *iobref, dict_t *xdata);
int32_t
afr_truncate (call_frame_t *frame, xlator_t *this,
- loc_t *loc, off_t offset);
+ loc_t *loc, off_t offset, dict_t *xdata);
int32_t
afr_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset);
+ fd_t *fd, off_t offset, dict_t *xdata);
int32_t
afr_utimens (call_frame_t *frame, xlator_t *this,
- loc_t *loc, struct timespec tv[2]);
+ loc_t *loc, struct timespec tv[2], dict_t *xdata);
int
afr_setattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, struct iatt *buf, int32_t valid);
+ loc_t *loc, struct iatt *buf, int32_t valid, dict_t *xdata);
int
afr_fsetattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, struct iatt *buf, int32_t valid);
+ fd_t *fd, struct iatt *buf, int32_t valid, dict_t *xdata);
int32_t
afr_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *dict, int32_t flags);
+ loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata);
+
+int32_t
+afr_fsetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, dict_t *dict, int32_t flags, dict_t *xdata);
int32_t
afr_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name);
+ loc_t *loc, const char *name, dict_t *xdata);
+
+int32_t
+afr_fremovexattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata);
+int
+afr_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata);
+
+int
+afr_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata);
+
+int
+afr_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata);
#endif /* __INODE_WRITE_H__ */
diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c
index 82b9427ca..a2a758f35 100644
--- a/xlators/cluster/afr/src/afr-lk-common.c
+++ b/xlators/cluster/afr/src/afr-lk-common.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include "dict.h"
@@ -31,8 +22,69 @@
#define LOCKED_YES 0x1 /* for DATA, METADATA, ENTRY and higher_path */
#define LOCKED_LOWER 0x2 /* for lower path */
+#define AFR_TRACE_INODELK_IN(frame, this, params ...) \
+ do { \
+ afr_private_t *_priv = this->private; \
+ if (!_priv->inodelk_trace) \
+ break; \
+ afr_trace_inodelk_in (frame, this, params); \
+ } while (0);
+
+#define AFR_TRACE_INODELK_OUT(frame, this, params ...) \
+ do { \
+ afr_private_t *_priv = this->private; \
+ if (!_priv->inodelk_trace) \
+ break; \
+ afr_trace_inodelk_out (frame, this, params); \
+ } while (0);
+
+#define AFR_TRACE_ENTRYLK_IN(frame, this, params ...) \
+ do { \
+ afr_private_t *_priv = this->private; \
+ if (!_priv->entrylk_trace) \
+ break; \
+ afr_trace_entrylk_in (frame, this, params); \
+ } while (0);
+
+#define AFR_TRACE_ENTRYLK_OUT(frame, this, params ...) \
+ do { \
+ afr_private_t *_priv = this->private; \
+ if (!_priv->entrylk_trace) \
+ break; \
+ afr_trace_entrylk_out (frame, this, params); \
+ } while (0);
+
int
-afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index);
+afr_entry_lockee_cmp (const void *l1, const void *l2)
+{
+ const afr_entry_lockee_t *r1 = l1;
+ const afr_entry_lockee_t *r2 = l2;
+ int ret = 0;
+ uuid_t gfid1 = {0};
+ uuid_t gfid2 = {0};
+
+ loc_gfid ((loc_t*)&r1->loc, gfid1);
+ loc_gfid ((loc_t*)&r2->loc, gfid2);
+ ret = uuid_compare (gfid1, gfid2);
+ /*Entrylks with NULL basename are the 'smallest'*/
+ if (ret == 0) {
+ if (!r1->basename)
+ return -1;
+ if (!r2->basename)
+ return 1;
+ ret = strcmp (r1->basename, r2->basename);
+ }
+
+ if (ret <= 0)
+ return -1;
+ else
+ return 1;
+}
+
+int afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index);
+
+static int
+afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this);
static uint64_t afr_lock_number = 1;
@@ -57,12 +109,13 @@ afr_set_lock_number (call_frame_t *frame, xlator_t *this)
}
void
-afr_set_lk_owner (call_frame_t *frame, xlator_t *this)
+afr_set_lk_owner (call_frame_t *frame, xlator_t *this, void *lk_owner)
{
gf_log (this->name, GF_LOG_TRACE,
"Setting lk-owner=%llu",
- (unsigned long long) (unsigned long)frame->root);
- frame->root->lk_owner = (uint64_t) (unsigned long)frame->root;
+ (unsigned long long) (unsigned long)lk_owner);
+
+ set_lk_owner_from_ptr (&frame->root->lk_owner, lk_owner);
}
static int
@@ -98,16 +151,9 @@ internal_lock_count (call_frame_t *frame, xlator_t *this)
local = frame->local;
priv = this->private;
- if (local->fd) {
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i] && local->fd_open_on[i])
- ++call_count;
- }
- } else {
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i])
- ++call_count;
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i])
+ ++call_count;
}
return call_count;
@@ -115,7 +161,7 @@ internal_lock_count (call_frame_t *frame, xlator_t *this)
static void
afr_print_inodelk (char *str, int size, int cmd,
- struct gf_flock *flock, uint64_t owner)
+ struct gf_flock *flock, gf_lkowner_t *owner)
{
char *cmd_str = NULL;
char *type_str = NULL;
@@ -163,11 +209,11 @@ afr_print_inodelk (char *str, int size, int cmd,
}
snprintf (str, size, "lock=INODELK, cmd=%s, type=%s, "
- "start=%llu, len=%llu, pid=%llu, lk-owner=%llu",
+ "start=%llu, len=%llu, pid=%llu, lk-owner=%s",
cmd_str, type_str, (unsigned long long) flock->l_start,
(unsigned long long) flock->l_len,
(unsigned long long) flock->l_pid,
- (unsigned long long) owner);
+ lkowner_utoa (owner));
}
@@ -183,11 +229,11 @@ afr_print_lockee (char *str, int size, loc_t *loc, fd_t *fd,
void
afr_print_entrylk (char *str, int size, const char *basename,
- uint64_t owner)
+ gf_lkowner_t *owner)
{
- snprintf (str, size, "Basename=%s, lk-owner=%llu",
+ snprintf (str, size, "Basename=%s, lk-owner=%s",
basename ? basename : "<nul>",
- (unsigned long long)owner);
+ lkowner_utoa (owner));
}
static void
@@ -241,27 +287,20 @@ afr_set_lock_call_type (afr_lock_call_type_t lock_call_type,
}
static void
-afr_trace_inodelk_out (call_frame_t *frame, afr_lock_call_type_t lock_call_type,
+afr_trace_inodelk_out (call_frame_t *frame, xlator_t *this,
+ afr_lock_call_type_t lock_call_type,
afr_lock_op_type_t lk_op_type, struct gf_flock *flock,
int op_ret, int op_errno, int32_t child_index)
{
- xlator_t *this = NULL;
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
char lockee[256];
char lock_call_type_str[256];
char verdict[16];
- this = THIS;
local = frame->local;
int_lock = &local->internal_lock;
- priv = this->private;
-
- if (!priv->inodelk_trace) {
- return;
- }
afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index);
@@ -270,39 +309,31 @@ afr_trace_inodelk_out (call_frame_t *frame, afr_lock_call_type_t lock_call_type,
afr_print_verdict (op_ret, op_errno, verdict);
gf_log (this->name, GF_LOG_INFO,
- "[%s %s] [%s] Lockee={%s} Number={%llu}",
+ "[%s %s] [%s] lk-owner=%s Lockee={%s} Number={%llu}",
lock_call_type_str,
lk_op_type == AFR_LOCK_OP ? "LOCK REPLY" : "UNLOCK REPLY",
- verdict,
- lockee,
+ verdict, lkowner_utoa (&frame->root->lk_owner), lockee,
(unsigned long long) int_lock->lock_number);
}
static void
-afr_trace_inodelk_in (call_frame_t *frame, afr_lock_call_type_t lock_call_type,
+afr_trace_inodelk_in (call_frame_t *frame, xlator_t *this,
+ afr_lock_call_type_t lock_call_type,
afr_lock_op_type_t lk_op_type, struct gf_flock *flock,
int32_t cmd, int32_t child_index)
{
- xlator_t *this = NULL;
afr_local_t *local = NULL;
afr_internal_lock_t *int_lock = NULL;
- afr_private_t *priv = NULL;
char lock[256];
char lockee[256];
char lock_call_type_str[256];
- this = THIS;
local = frame->local;
int_lock = &local->internal_lock;
- priv = this->private;
-
- if (!priv->inodelk_trace) {
- return;
- }
- afr_print_inodelk (lock, 256, cmd, flock, frame->root->lk_owner);
+ afr_print_inodelk (lock, 256, cmd, flock, &frame->root->lk_owner);
afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index);
afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock);
@@ -317,20 +348,21 @@ afr_trace_inodelk_in (call_frame_t *frame, afr_lock_call_type_t lock_call_type,
}
static void
-afr_trace_entrylk_in (call_frame_t *frame, afr_lock_call_type_t lock_call_type,
+afr_trace_entrylk_in (call_frame_t *frame, xlator_t *this,
+ afr_lock_call_type_t lock_call_type,
afr_lock_op_type_t lk_op_type, const char *basename,
- int32_t child_index)
+ int32_t cookie)
{
- xlator_t *this = NULL;
afr_local_t *local = NULL;
afr_internal_lock_t *int_lock = NULL;
afr_private_t *priv = NULL;
+ int child_index = 0;
+ int lockee_no = 0;
char lock[256];
char lockee[256];
char lock_call_type_str[256];
- this = THIS;
local = frame->local;
int_lock = &local->internal_lock;
priv = this->private;
@@ -338,36 +370,41 @@ afr_trace_entrylk_in (call_frame_t *frame, afr_lock_call_type_t lock_call_type,
if (!priv->entrylk_trace) {
return;
}
+ lockee_no = cookie / priv->child_count;
+ child_index = cookie % priv->child_count;
- afr_print_entrylk (lock, 256, basename, frame->root->lk_owner);
- afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index);
+ afr_print_entrylk (lock, 256, basename, &frame->root->lk_owner);
+ afr_print_lockee (lockee, 256, &int_lock->lockee[lockee_no].loc, local->fd,
+ child_index);
afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock);
gf_log (this->name, GF_LOG_INFO,
- "[%s %s] Lock={%s} Lockee={%s} Number={%llu}",
+ "[%s %s] Lock={%s} Lockee={%s} Number={%llu}, Cookie={%d}",
lock_call_type_str,
lk_op_type == AFR_LOCK_OP ? "LOCK REQUEST" : "UNLOCK REQUEST",
lock, lockee,
- (unsigned long long) int_lock->lock_number);
+ (unsigned long long) int_lock->lock_number,
+ cookie);
}
static void
-afr_trace_entrylk_out (call_frame_t *frame, afr_lock_call_type_t lock_call_type,
- afr_lock_op_type_t lk_op_type, const char *basename, int op_ret,
- int op_errno, int32_t child_index)
+afr_trace_entrylk_out (call_frame_t *frame, xlator_t *this,
+ afr_lock_call_type_t lock_call_type,
+ afr_lock_op_type_t lk_op_type, const char *basename,
+ int op_ret, int op_errno, int32_t cookie)
{
- xlator_t *this = NULL;
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
+ int lockee_no = 0;
+ int child_index = 0;
char lock[256];
char lockee[256];
char lock_call_type_str[256];
char verdict[16];
- this = THIS;
local = frame->local;
int_lock = &local->internal_lock;
priv = this->private;
@@ -375,20 +412,25 @@ afr_trace_entrylk_out (call_frame_t *frame, afr_lock_call_type_t lock_call_type,
if (!priv->entrylk_trace) {
return;
}
+ lockee_no = cookie / priv->child_count;
+ child_index = cookie % priv->child_count;
- afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index);
+ afr_print_entrylk (lock, 256, basename, &frame->root->lk_owner);
+ afr_print_lockee (lockee, 256, &int_lock->lockee[lockee_no].loc, local->fd,
+ child_index);
afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock);
afr_print_verdict (op_ret, op_errno, verdict);
gf_log (this->name, GF_LOG_INFO,
- "[%s %s] [%s] Lock={%s} Lockee={%s} Number={%llu}",
+ "[%s %s] [%s] Lock={%s} Lockee={%s} Number={%llu} Cookie={%d}",
lock_call_type_str,
lk_op_type == AFR_LOCK_OP ? "LOCK REPLY" : "UNLOCK REPLY",
verdict,
lock, lockee,
- (unsigned long long) int_lock->lock_number);
+ (unsigned long long) int_lock->lock_number,
+ cookie);
}
@@ -441,6 +483,47 @@ is_afr_lock_transaction (afr_local_t *local)
return ret;
}
+int
+afr_init_entry_lockee (afr_entry_lockee_t *lockee, afr_local_t *local,
+ loc_t *loc, char *basename, int child_count)
+{
+ int ret = -1;
+
+ loc_copy (&lockee->loc, loc);
+ lockee->basename = (basename)? gf_strdup (basename): NULL;
+ if (basename && !lockee->basename)
+ goto out;
+
+ lockee->locked_count = 0;
+ lockee->locked_nodes = GF_CALLOC (child_count,
+ sizeof (*lockee->locked_nodes),
+ gf_afr_mt_afr_node_character);
+
+ if (!lockee->locked_nodes)
+ goto out;
+
+ ret = 0;
+out:
+ return ret;
+
+}
+
+void
+afr_entry_lockee_cleanup (afr_internal_lock_t *int_lock)
+{
+ int i = 0;
+
+ for (i = 0; i < int_lock->lockee_count; i++) {
+ loc_wipe (&int_lock->lockee[i].loc);
+ if (int_lock->lockee[i].basename)
+ GF_FREE (int_lock->lockee[i].basename);
+ if (int_lock->lockee[i].locked_nodes)
+ GF_FREE (int_lock->lockee[i].locked_nodes);
+ }
+
+ return;
+}
+
static int
initialize_entrylk_variables (call_frame_t *frame, xlator_t *this)
{
@@ -458,8 +541,13 @@ initialize_entrylk_variables (call_frame_t *frame, xlator_t *this)
int_lock->lock_op_ret = -1;
int_lock->lock_op_errno = 0;
- for (i = 0; i < priv->child_count; i++) {
- int_lock->entry_locked_nodes[i] = 0;
+ for (i = 0; i < AFR_LOCKEE_COUNT_MAX; i++) {
+ if (!int_lock->lockee[i].locked_nodes)
+ break;
+ int_lock->lockee[i].locked_count = 0;
+ memset (int_lock->lockee[i].locked_nodes, 0,
+ sizeof (*int_lock->lockee[i].locked_nodes) *
+ priv->child_count);
}
return 0;
@@ -471,37 +559,37 @@ initialize_inodelk_variables (call_frame_t *frame, xlator_t *this)
afr_local_t *local = NULL;
afr_internal_lock_t *int_lock = NULL;
afr_private_t *priv = NULL;
- int i = 0;
+ afr_inodelk_t *inodelk = NULL;
priv = this->private;
local = frame->local;
int_lock = &local->internal_lock;
- int_lock->inodelk_lock_count = 0;
- int_lock->lock_op_ret = -1;
- int_lock->lock_op_errno = 0;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
- for (i = 0; i < priv->child_count; i++) {
- int_lock->inode_locked_nodes[i] = 0;
- }
+ inodelk->lock_count = 0;
+ int_lock->lk_attempted_count = 0;
+ int_lock->lock_op_ret = -1;
+ int_lock->lock_op_errno = 0;
+
+ memset (inodelk->locked_nodes, 0,
+ sizeof (*inodelk->locked_nodes) * priv->child_count);
+ memset (int_lock->locked_nodes, 0,
+ sizeof (*int_lock->locked_nodes) * priv->child_count);
return 0;
}
-loc_t *
-lower_path (loc_t *l1, const char *b1, loc_t *l2, const char *b2)
+int
+afr_lockee_locked_nodes_count (afr_internal_lock_t *int_lock)
{
- int ret = 0;
-
- ret = strcmp (l1->path, l2->path);
+ int call_count = 0;
+ int i = 0;
- if (ret == 0)
- ret = strcmp (b1, b2);
+ for (i = 0; i < int_lock->lockee_count; i++)
+ call_count += int_lock->lockee[i].locked_count;
- if (ret <= 0)
- return l1;
- else
- return l2;
+ return call_count;
}
int
@@ -522,7 +610,7 @@ afr_locked_nodes_count (unsigned char *locked_nodes, int child_count)
/* FIXME: What if UNLOCK fails */
static int32_t
afr_unlock_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
afr_internal_lock_t *int_lock = NULL;
@@ -548,33 +636,37 @@ afr_unlock_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
static int32_t
afr_unlock_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
int32_t child_index = (long)cookie;
+ afr_private_t *priv = NULL;
local = frame->local;
int_lock = &local->internal_lock;
- afr_trace_inodelk_out (frame, AFR_INODELK_TRANSACTION,
+ AFR_TRACE_INODELK_OUT (frame, this, AFR_INODELK_TRANSACTION,
AFR_UNLOCK_OP, NULL, op_ret,
op_errno, child_index);
+ priv = this->private;
+
if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: unlock failed on %d, reason: %s",
- local->loc.path, child_index, strerror (op_errno));
+ gf_log (this->name, GF_LOG_INFO, "%s: unlock failed on subvolume %s "
+ "with lock owner %s", local->loc.path,
+ priv->children[child_index]->name,
+ lkowner_utoa (&frame->root->lk_owner));
}
- int_lock->inode_locked_nodes[child_index] &= LOCKED_NO;
-
- if (op_ret == 1) {
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ inodelk->locked_nodes[child_index] &= LOCKED_NO;
+ if (local->transaction.eager_lock)
local->transaction.eager_lock[child_index] = 0;
- }
- afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno);
+ afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno, xdata);
return 0;
@@ -584,9 +676,12 @@ static int
afr_unlock_inodelk (call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
struct gf_flock flock = {0,};
+ struct gf_flock full_flock = {0,};
+ struct gf_flock *flock_use = NULL;
int call_count = 0;
int i = 0;
int piggyback = 0;
@@ -597,15 +692,14 @@ afr_unlock_inodelk (call_frame_t *frame, xlator_t *this)
int_lock = &local->internal_lock;
priv = this->private;
- flock.l_start = int_lock->lk_flock.l_start;
- flock.l_len = int_lock->lk_flock.l_len;
- flock.l_type = F_UNLCK;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
- gf_log (this->name, GF_LOG_DEBUG, "attempting data unlock range %"PRIu64
- " %"PRIu64" by %"PRIu64, flock.l_start, flock.l_len,
- frame->root->lk_owner);
+ flock.l_start = inodelk->flock.l_start;
+ flock.l_len = inodelk->flock.l_len;
+ flock.l_type = F_UNLCK;
- call_count = afr_locked_nodes_count (int_lock->inode_locked_nodes,
+ full_flock.l_type = F_UNLCK;
+ call_count = afr_locked_nodes_count (inodelk->locked_nodes,
priv->child_count);
int_lock->lk_call_count = call_count;
@@ -621,11 +715,11 @@ afr_unlock_inodelk (call_frame_t *frame, xlator_t *this)
fd_ctx = afr_fd_ctx_get (local->fd, this);
for (i = 0; i < priv->child_count; i++) {
- if ((int_lock->inode_locked_nodes[i] & LOCKED_YES)
- != LOCKED_YES)
+ if ((inodelk->locked_nodes[i] & LOCKED_YES) != LOCKED_YES)
continue;
if (local->fd) {
+ flock_use = &flock;
if (!local->transaction.eager_lock[i]) {
goto wind;
}
@@ -637,43 +731,48 @@ afr_unlock_inodelk (call_frame_t *frame, xlator_t *this)
if (fd_ctx->lock_piggyback[i]) {
fd_ctx->lock_piggyback[i]--;
piggyback = 1;
+ } else {
+ fd_ctx->lock_acquired[i]--;
}
}
UNLOCK (&local->fd->lock);
if (piggyback) {
afr_unlock_inodelk_cbk (frame, (void *) (long) i,
- this, 1, 0);
+ this, 1, 0, NULL);
if (!--call_count)
break;
continue;
}
- fd_ctx->lock_acquired[i]--;
+ flock_use = &full_flock;
wind:
- afr_trace_inodelk_in (frame, AFR_INODELK_TRANSACTION,
- AFR_UNLOCK_OP, &flock, F_SETLK, i);
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_TRANSACTION,
+ AFR_UNLOCK_OP, flock_use, F_SETLK,
+ i);
STACK_WIND_COOKIE (frame, afr_unlock_inodelk_cbk,
(void *) (long)i,
priv->children[i],
priv->children[i]->fops->finodelk,
- this->name, local->fd,
- F_SETLK, &flock);
+ int_lock->domain, local->fd,
+ F_SETLK, flock_use, NULL);
if (!--call_count)
break;
} else {
- afr_trace_inodelk_in (frame, AFR_INODELK_TRANSACTION,
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_TRANSACTION,
AFR_UNLOCK_OP, &flock, F_SETLK, i);
STACK_WIND_COOKIE (frame, afr_unlock_inodelk_cbk,
(void *) (long)i,
priv->children[i],
priv->children[i]->fops->inodelk,
- this->name, &local->loc,
- F_SETLK, &flock);
+ int_lock->domain, &local->loc,
+ F_SETLK, &flock, NULL);
if (!--call_count)
break;
@@ -685,24 +784,34 @@ out:
static int32_t
afr_unlock_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
- int32_t child_index = (long)cookie;
+ afr_private_t *priv = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ int32_t child_index = 0;
+ int lockee_no = 0;
+
+ priv = this->private;
+ lockee_no = (int)((long) cookie) / priv->child_count;
+ child_index = (int) ((long) cookie) % priv->child_count;
local = frame->local;
+ int_lock = &local->internal_lock;
- afr_trace_entrylk_out (frame, AFR_ENTRYLK_TRANSACTION,
- AFR_UNLOCK_OP, NULL, op_ret,
- op_errno, child_index);
+ AFR_TRACE_ENTRYLK_OUT (frame, this, AFR_ENTRYLK_TRANSACTION,
+ AFR_UNLOCK_OP,
+ int_lock->lockee[lockee_no].basename, op_ret,
+ op_errno, (int) ((long)cookie));
- if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) {
+ if (op_ret < 0) {
gf_log (this->name, GF_LOG_ERROR,
"%s: unlock failed on %d, reason: %s",
local->loc.path, child_index, strerror (op_errno));
}
- afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno);
+ int_lock->lockee[lockee_no].locked_nodes[child_index] &= LOCKED_NO;
+ afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno, NULL);
return 0;
}
@@ -710,24 +819,22 @@ afr_unlock_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
static int
afr_unlock_entrylk (call_frame_t *frame, xlator_t *this)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- const char *basename = NULL;
- loc_t *loc = NULL;
- int call_count = 0;
- int i = -1;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int index = 0;
+ int lockee_no = 0;
+ int copies = 0;
+ int i = -1;
local = frame->local;
int_lock = &local->internal_lock;
priv = this->private;
+ copies = priv->child_count;
- basename = int_lock->lk_basename;
- if (int_lock->lk_loc)
- loc = int_lock->lk_loc;
+ call_count = afr_lockee_locked_nodes_count (int_lock);
- call_count = afr_locked_nodes_count (int_lock->entry_locked_nodes,
- priv->child_count);
int_lock->lk_call_count = call_count;
if (!call_count){
@@ -737,18 +844,23 @@ afr_unlock_entrylk (call_frame_t *frame, xlator_t *this)
goto out;
}
- for (i = 0; i < priv->child_count; i++) {
- if (int_lock->entry_locked_nodes[i] & LOCKED_YES) {
- afr_trace_entrylk_in (frame, AFR_ENTRYLK_NB_TRANSACTION,
- AFR_UNLOCK_OP, basename, i);
+ for (i = 0; i < int_lock->lockee_count * priv->child_count; i++) {
+ lockee_no = i / copies;
+ index = i % copies;
+ if (int_lock->lockee[lockee_no].locked_nodes[index] & LOCKED_YES) {
+ AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_NB_TRANSACTION,
+ AFR_UNLOCK_OP,
+ int_lock->lockee[lockee_no].basename,
+ i);
STACK_WIND_COOKIE (frame, afr_unlock_entrylk_cbk,
(void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- this->name,
- loc, basename,
- ENTRYLK_UNLOCK, ENTRYLK_WRLCK);
+ priv->children[index],
+ priv->children[index]->fops->entrylk,
+ int_lock->domain,
+ &int_lock->lockee[lockee_no].loc,
+ int_lock->lockee[lockee_no].basename,
+ ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL);
if (!--call_count)
break;
@@ -762,15 +874,22 @@ out:
static int32_t
afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- int child_index = (long) cookie;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int cky = (long) cookie;
+ int child_index = 0;
+ int lockee_no = 0;
+ priv = this->private;
local = frame->local;
int_lock = &local->internal_lock;
+ child_index = ((int)cky) % priv->child_count;
+ lockee_no = ((int)cky) / priv->child_count;
+
LOCK (&frame->lock);
{
if (op_ret == -1) {
@@ -786,6 +905,8 @@ afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->op_errno = op_errno;
int_lock->lock_op_errno = op_errno;
}
+
+ int_lock->lk_attempted_count++;
}
UNLOCK (&frame->lock);
@@ -794,10 +915,17 @@ afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
afr_unlock (frame, this);
} else {
if (op_ret == 0) {
- int_lock->locked_nodes[child_index] |= LOCKED_YES;
- int_lock->lock_count++;
+ if (local->transaction.type == AFR_ENTRY_TRANSACTION ||
+ local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) {
+ int_lock->lockee[lockee_no].locked_nodes[child_index] |= LOCKED_YES;
+ int_lock->lockee[lockee_no].locked_count++;
+ int_lock->entrylk_lock_count++;
+ } else {
+ int_lock->locked_nodes[child_index] |= LOCKED_YES;
+ int_lock->lock_count++;
+ }
}
- afr_lock_blocking (frame, this, child_index + 1);
+ afr_lock_blocking (frame, this, cky + 1);
}
return 0;
@@ -805,98 +933,26 @@ afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
static int32_t
afr_blocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_trace_inodelk_out (frame, AFR_INODELK_TRANSACTION,
+ AFR_TRACE_INODELK_OUT (frame, this, AFR_INODELK_TRANSACTION,
AFR_LOCK_OP, NULL, op_ret,
op_errno, (long) cookie);
- afr_lock_cbk (frame, cookie, this, op_ret, op_errno);
+ afr_lock_cbk (frame, cookie, this, op_ret, op_errno, xdata);
return 0;
}
static int32_t
-afr_lock_lower_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- loc_t *lower = NULL;
- loc_t *higher = NULL;
- const char *higher_name = NULL;
- int child_index = (long) cookie;
-
- priv = this->private;
- local = frame->local;
- int_lock = &local->internal_lock;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- if (op_errno == ENOSYS) {
- /* return ENOTSUP */
-
- gf_log (this->name, GF_LOG_ERROR,
- "subvolume does not support locking. "
- "please load features/locks xlator on server");
-
- local->op_ret = op_ret;
- }
-
- local->op_errno = op_errno;
- }
- }
- UNLOCK (&frame->lock);
-
- if (op_ret != 0) {
- afr_unlock (frame, this);
- goto out;
- } else {
- int_lock->lower_locked_nodes[child_index] |= LOCKED_LOWER;
- int_lock->lock_count++;
- }
-
- /* The lower path has been locked. Now lock the higher path */
-
- lower = lower_path (&local->transaction.parent_loc,
- local->transaction.basename,
- &local->transaction.new_parent_loc,
- local->transaction.new_basename);
-
- higher = (lower == &local->transaction.parent_loc ?
- &local->transaction.new_parent_loc :
- &local->transaction.parent_loc);
-
- higher_name = (higher == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
- afr_trace_entrylk_in (frame, AFR_ENTRYLK_TRANSACTION,
- AFR_LOCK_OP, higher_name, child_index);
-
-
- STACK_WIND_COOKIE (frame, afr_lock_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->entrylk,
- this->name, higher, higher_name,
- ENTRYLK_LOCK, ENTRYLK_WRLCK);
-
-out:
- return 0;
-}
-
-static int32_t
afr_blocking_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_trace_entrylk_out (frame, AFR_ENTRYLK_TRANSACTION,
+ AFR_TRACE_ENTRYLK_OUT (frame, this, AFR_ENTRYLK_TRANSACTION,
AFR_LOCK_OP, NULL, op_ret,
op_errno, (long)cookie);
- afr_lock_cbk (frame, cookie, this, op_ret, op_errno);
+ afr_lock_cbk (frame, cookie, this, op_ret, op_errno, xdata);
return 0;
}
@@ -904,6 +960,7 @@ static int
afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
@@ -914,18 +971,16 @@ afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this)
switch (local->transaction.type) {
case AFR_DATA_TRANSACTION:
case AFR_METADATA_TRANSACTION:
- memcpy (int_lock->inode_locked_nodes,
- int_lock->locked_nodes,
- priv->child_count);
- int_lock->inodelk_lock_count = int_lock->lock_count;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ memcpy (inodelk->locked_nodes, int_lock->locked_nodes,
+ sizeof (*inodelk->locked_nodes) * priv->child_count);
+ inodelk->lock_count = int_lock->lock_count;
break;
case AFR_ENTRY_RENAME_TRANSACTION:
case AFR_ENTRY_TRANSACTION:
- memcpy (int_lock->entry_locked_nodes,
- int_lock->locked_nodes,
- priv->child_count);
- int_lock->entrylk_lock_count = int_lock->lock_count;
+ /*entrylk_count is being used in both non-blocking and blocking
+ * modes */
break;
}
@@ -933,25 +988,67 @@ afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this)
}
+static inline gf_boolean_t
+afr_is_entrylk (afr_internal_lock_t *int_lock,
+ afr_transaction_type trans_type)
+{
+ gf_boolean_t is_entrylk = _gf_false;
+
+ if ((int_lock->transaction_lk_type == AFR_SELFHEAL_LK) &&
+ int_lock->selfheal_lk_type == AFR_ENTRY_SELF_HEAL_LK) {
+
+ is_entrylk = _gf_true;
+
+ } else if ((int_lock->transaction_lk_type == AFR_TRANSACTION_LK) &&
+ (trans_type == AFR_ENTRY_TRANSACTION ||
+ trans_type == AFR_ENTRY_RENAME_TRANSACTION)) {
+
+ is_entrylk = _gf_true;
+
+ } else {
+ is_entrylk = _gf_false;
+ }
+
+ return is_entrylk;
+}
+
+static gf_boolean_t
+_is_lock_wind_needed (afr_local_t *local, int child_index)
+{
+ if (!local->child_up[child_index])
+ return _gf_false;
+
+ return _gf_true;
+}
+
int
-afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index)
+afr_lock_blocking (call_frame_t *frame, xlator_t *this, int cookie)
{
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- loc_t *lower = NULL;
- const char *lower_name = NULL;
struct gf_flock flock = {0,};
uint64_t ctx = 0;
int ret = 0;
+ int child_index = 0;
+ int lockee_no = 0;
+ gf_boolean_t is_entrylk = _gf_false;
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ priv = this->private;
+ child_index = cookie % priv->child_count;
+ lockee_no = cookie / priv->child_count;
+ is_entrylk = afr_is_entrylk (int_lock, local->transaction.type);
- flock.l_start = int_lock->lk_flock.l_start;
- flock.l_len = int_lock->lk_flock.l_len;
- flock.l_type = int_lock->lk_flock.l_type;
+
+ if (!is_entrylk) {
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ flock.l_start = inodelk->flock.l_start;
+ flock.l_len = inodelk->flock.l_len;
+ flock.l_type = inodelk->flock.l_type;
+ }
if (local->fd) {
ret = fd_ctx_get (local->fd, this, &ctx);
@@ -970,42 +1067,26 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index)
return 0;
}
-
- /* skip over children that or down
- or don't have the fd open */
-
- while ((child_index < priv->child_count)
- && (!local->child_up[child_index] ||
- !local->fd_open_on[child_index]))
-
- child_index++;
- } else {
- /* skip over children that are down */
- while ((child_index < priv->child_count)
- && !local->child_up[child_index])
- child_index++;
}
- if ((child_index == priv->child_count) &&
- int_lock->lock_count == 0) {
-
- gf_log (this->name, GF_LOG_INFO,
- "unable to lock on even one child");
-
- local->op_ret = -1;
- int_lock->lock_op_ret = -1;
+ if (int_lock->lk_expected_count == int_lock->lk_attempted_count) {
+ if ((is_entrylk && int_lock->entrylk_lock_count == 0) ||
+ (!is_entrylk && int_lock->lock_count == 0)) {
+ gf_log (this->name, GF_LOG_INFO,
+ "unable to lock on even one child");
- afr_copy_locked_nodes (frame, this);
+ local->op_ret = -1;
+ int_lock->lock_op_ret = -1;
- afr_unlock(frame, this);
+ afr_copy_locked_nodes (frame, this);
- return 0;
+ afr_unlock(frame, this);
+ return 0;
+ }
}
- if ((child_index == priv->child_count)
- || (int_lock->lock_count == int_lock->lk_expected_count)) {
-
+ if (int_lock->lk_expected_count == int_lock->lk_attempted_count) {
/* we're done locking */
gf_log (this->name, GF_LOG_DEBUG,
@@ -1018,12 +1099,18 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index)
return 0;
}
+ if (!_is_lock_wind_needed (local, child_index)) {
+ afr_lock_blocking (frame, this, cookie + 1);
+ return 0;
+ }
+
switch (local->transaction.type) {
case AFR_DATA_TRANSACTION:
case AFR_METADATA_TRANSACTION:
if (local->fd) {
- afr_trace_inodelk_in (frame, AFR_INODELK_TRANSACTION,
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_TRANSACTION,
AFR_LOCK_OP, &flock, F_SETLKW,
child_index);
@@ -1031,11 +1118,12 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index)
(void *) (long) child_index,
priv->children[child_index],
priv->children[child_index]->fops->finodelk,
- this->name, local->fd,
- F_SETLKW, &flock);
+ int_lock->domain, local->fd,
+ F_SETLKW, &flock, NULL);
} else {
- afr_trace_inodelk_in (frame, AFR_INODELK_TRANSACTION,
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_TRANSACTION,
AFR_LOCK_OP, &flock, F_SETLKW,
child_index);
@@ -1043,63 +1131,44 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index)
(void *) (long) child_index,
priv->children[child_index],
priv->children[child_index]->fops->inodelk,
- this->name, &local->loc,
- F_SETLKW, &flock);
+ int_lock->domain, &local->loc,
+ F_SETLKW, &flock, NULL);
}
break;
case AFR_ENTRY_RENAME_TRANSACTION:
- {
- lower = lower_path (&local->transaction.parent_loc,
- local->transaction.basename,
- &local->transaction.new_parent_loc,
- local->transaction.new_basename);
-
- lower_name = (lower == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
- afr_trace_entrylk_in (frame, AFR_ENTRYLK_TRANSACTION,
- AFR_LOCK_OP, lower_name, child_index);
-
-
- STACK_WIND_COOKIE (frame, afr_lock_lower_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->entrylk,
- this->name, lower, lower_name,
- ENTRYLK_LOCK, ENTRYLK_WRLCK);
-
- break;
- }
-
case AFR_ENTRY_TRANSACTION:
+ /*Accounting for child_index increments on 'down'
+ *and 'fd-less' children */
+
if (local->fd) {
- afr_trace_entrylk_in (frame, AFR_ENTRYLK_TRANSACTION,
- AFR_LOCK_OP, local->transaction.basename,
- child_index);
+ AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_TRANSACTION,
+ AFR_LOCK_OP,
+ int_lock->lockee[lockee_no].basename,
+ cookie);
STACK_WIND_COOKIE (frame, afr_blocking_entrylk_cbk,
- (void *) (long) child_index,
+ (void *) (long) cookie,
priv->children[child_index],
priv->children[child_index]->fops->fentrylk,
- this->name, local->fd,
- local->transaction.basename,
- ENTRYLK_LOCK, ENTRYLK_WRLCK);
+ int_lock->domain, local->fd,
+ int_lock->lockee[lockee_no].basename,
+ ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
} else {
- afr_trace_entrylk_in (frame, AFR_ENTRYLK_TRANSACTION,
+ AFR_TRACE_ENTRYLK_IN (frame, this,
+ AFR_ENTRYLK_TRANSACTION,
AFR_LOCK_OP, local->transaction.basename,
child_index);
STACK_WIND_COOKIE (frame, afr_blocking_entrylk_cbk,
- (void *) (long) child_index,
+ (void *) (long) cookie,
priv->children[child_index],
priv->children[child_index]->fops->entrylk,
- this->name,
- &local->transaction.parent_loc,
- local->transaction.basename,
- ENTRYLK_LOCK, ENTRYLK_WRLCK);
+ int_lock->domain,
+ &int_lock->lockee[lockee_no].loc,
+ int_lock->lockee[lockee_no].basename,
+ ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
}
break;
@@ -1127,11 +1196,11 @@ afr_blocking_lock (call_frame_t *frame, xlator_t *this)
break;
case AFR_ENTRY_RENAME_TRANSACTION:
- up_count = afr_up_children_count (local->child_up,
- priv->child_count);
- int_lock->lk_expected_count = 2 * up_count;
- //fallthrough
case AFR_ENTRY_TRANSACTION:
+ up_count = AFR_COUNT (local->child_up, priv->child_count);
+ int_lock->lk_call_count = int_lock->lk_expected_count
+ = (int_lock->lockee_count *
+ up_count);
initialize_entrylk_variables (frame, this);
break;
}
@@ -1143,42 +1212,55 @@ afr_blocking_lock (call_frame_t *frame, xlator_t *this)
static int32_t
afr_nonblocking_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
int call_count = 0;
int child_index = (long) cookie;
+ int copies = 0;
+ int index = 0;
+ int lockee_no = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ copies = priv->child_count;
+ index = child_index % copies;
+ lockee_no = child_index / copies;
local = frame->local;
int_lock = &local->internal_lock;
- afr_trace_entrylk_out (frame, AFR_ENTRYLK_TRANSACTION,
- AFR_LOCK_OP, NULL, op_ret,
+ AFR_TRACE_ENTRYLK_OUT (frame, this, AFR_ENTRYLK_TRANSACTION,
+ AFR_LOCK_OP,
+ int_lock->lockee[lockee_no].basename, op_ret,
op_errno, (long) cookie);
- LOCK (&frame->lock);
- {
- call_count = --int_lock->lk_call_count;
- }
- UNLOCK (&frame->lock);
-
- if (op_ret < 0 ) {
- if (op_errno == ENOSYS) {
+ LOCK (&frame->lock);
+ {
+ if (op_ret < 0 ) {
+ if (op_errno == ENOSYS) {
/* return ENOTSUP */
- gf_log (this->name, GF_LOG_ERROR,
- "subvolume does not support locking. "
- "please load features/locks xlator on server");
- local->op_ret = op_ret;
- int_lock->lock_op_ret = op_ret;
+ gf_log (this->name, GF_LOG_ERROR,
+ "subvolume does not support locking. "
+ "please load features/locks xlator on server");
+ local->op_ret = op_ret;
+ int_lock->lock_op_ret = op_ret;
+
+ int_lock->lock_op_errno = op_errno;
+ local->op_errno = op_errno;
+ }
+ } else if (op_ret == 0) {
+ int_lock->lockee[lockee_no].locked_nodes[index] |= \
+ LOCKED_YES;
+ int_lock->lockee[lockee_no].locked_count++;
+ int_lock->entrylk_lock_count++;
+ }
- int_lock->lock_op_errno = op_errno;
- local->op_errno = op_errno;
- }
- } else if (op_ret == 0) {
- int_lock->entry_locked_nodes[child_index] |= LOCKED_YES;
- int_lock->entrylk_lock_count++;
+ call_count = --int_lock->lk_call_count;
}
+ UNLOCK (&frame->lock);
if (call_count == 0) {
gf_log (this->name, GF_LOG_TRACE,
@@ -1205,42 +1287,26 @@ afr_nonblocking_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-void
-afr_mark_fd_open_on (afr_local_t *local, afr_fd_ctx_t *fd_ctx,
- size_t child_count)
-{
- int i = 0;
-
- GF_ASSERT (local->fd_open_on);
-
- memset (local->fd_open_on, 0, sizeof (*local->fd_open_on)*child_count);
- for (i = 0; i < child_count; i++)
- if (fd_ctx->opened_on[i] == AFR_FD_OPENED)
- local->fd_open_on[i] = 1;
-}
-
int
afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_fd_ctx_t *fd_ctx = NULL;
- const char *basename = NULL;
- loc_t *loc = NULL;
- int32_t call_count = 0;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int copies = 0;
+ int index = 0;
+ int lockee_no = 0;
+ int32_t call_count = 0;
int i = 0;
local = frame->local;
int_lock = &local->internal_lock;
priv = this->private;
+ copies = priv->child_count;
initialize_entrylk_variables (frame, this);
- basename = int_lock->lk_basename;
- if (int_lock->lk_loc)
- loc = int_lock->lk_loc;
-
if (local->fd) {
fd_ctx = afr_fd_ctx_get (local->fd, this);
if (!fd_ctx) {
@@ -1253,11 +1319,11 @@ afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this)
local->op_errno = EINVAL;
int_lock->lock_op_errno = EINVAL;
+ afr_unlock (frame, this);
return -1;
}
- afr_mark_fd_open_on (local, fd_ctx, priv->child_count);
- call_count = internal_lock_count (frame, this);
+ call_count = int_lock->lockee_count * internal_lock_count (frame, this);
int_lock->lk_call_count = call_count;
int_lock->lk_expected_count = call_count;
@@ -1270,42 +1336,52 @@ afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this)
/* Send non-blocking entrylk calls only on up children
and where the fd has been opened */
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i] && local->fd_open_on[i]) {
- afr_trace_entrylk_in (frame, AFR_ENTRYLK_NB_TRANSACTION,
- AFR_LOCK_OP, basename, i);
+ for (i = 0; i < int_lock->lockee_count*priv->child_count; i++) {
+ index = i%copies;
+ lockee_no = i/copies;
+ if (local->child_up[index]) {
+ AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_NB_TRANSACTION,
+ AFR_LOCK_OP,
+ int_lock->lockee[lockee_no].basename,
+ i);
STACK_WIND_COOKIE (frame, afr_nonblocking_entrylk_cbk,
(void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fentrylk,
+ priv->children[index],
+ priv->children[index]->fops->fentrylk,
this->name, local->fd,
- basename,
- ENTRYLK_LOCK_NB, ENTRYLK_WRLCK);
+ int_lock->lockee[lockee_no].basename,
+ ENTRYLK_LOCK_NB, ENTRYLK_WRLCK,
+ NULL);
+ if (!--call_count)
+ break;
}
}
} else {
- GF_ASSERT (loc);
-
- call_count = internal_lock_count (frame, this);
+ call_count = int_lock->lockee_count * internal_lock_count (frame, this);
int_lock->lk_call_count = call_count;
int_lock->lk_expected_count = call_count;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- afr_trace_entrylk_in (frame, AFR_ENTRYLK_NB_TRANSACTION,
- AFR_LOCK_OP, basename, i);
+ for (i = 0; i < int_lock->lockee_count*priv->child_count; i++) {
+ index = i%copies;
+ lockee_no = i/copies;
+ if (local->child_up[index]) {
+ AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_NB_TRANSACTION,
+ AFR_LOCK_OP,
+ int_lock->lockee[lockee_no].basename,
+ i);
STACK_WIND_COOKIE (frame, afr_nonblocking_entrylk_cbk,
(void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- this->name, loc, basename,
- ENTRYLK_LOCK_NB, ENTRYLK_WRLCK);
+ priv->children[index],
+ priv->children[index]->fops->entrylk,
+ this->name, &int_lock->lockee[lockee_no].loc,
+ int_lock->lockee[lockee_no].basename,
+ ENTRYLK_LOCK_NB, ENTRYLK_WRLCK,
+ NULL);
if (!--call_count)
break;
-
}
}
}
@@ -1315,70 +1391,69 @@ out:
int32_t
afr_nonblocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
afr_local_t *local = NULL;
int call_count = 0;
int child_index = (long) cookie;
afr_fd_ctx_t *fd_ctx = NULL;
- afr_private_t *priv = NULL;
- priv = this->private;
local = frame->local;
int_lock = &local->internal_lock;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
- afr_trace_inodelk_out (frame, AFR_INODELK_NB_TRANSACTION,
+ AFR_TRACE_INODELK_OUT (frame, this, AFR_INODELK_NB_TRANSACTION,
AFR_LOCK_OP, NULL, op_ret,
op_errno, (long) cookie);
+ if (local->fd)
+ fd_ctx = afr_fd_ctx_get (local->fd, this);
+
LOCK (&frame->lock);
{
+ if (op_ret < 0) {
+ if (op_errno == ENOSYS) {
+ /* return ENOTSUP */
+ gf_log (this->name, GF_LOG_ERROR,
+ "subvolume does not support locking. "
+ "please load features/locks xlator on "
+ "server");
+ local->op_ret = op_ret;
+ int_lock->lock_op_ret = op_ret;
+ int_lock->lock_op_errno = op_errno;
+ local->op_errno = op_errno;
+ }
+ if (local->transaction.eager_lock)
+ local->transaction.eager_lock[child_index] = 0;
+ } else {
+ inodelk->locked_nodes[child_index] |= LOCKED_YES;
+ inodelk->lock_count++;
+
+ if (local->transaction.eager_lock &&
+ local->transaction.eager_lock[child_index] &&
+ local->fd) {
+ /* piggybacked */
+ if (op_ret == 1) {
+ /* piggybacked */
+ } else if (op_ret == 0) {
+ /* lock acquired from server */
+ fd_ctx->lock_acquired[child_index]++;
+ }
+ }
+ }
+
call_count = --int_lock->lk_call_count;
}
UNLOCK (&frame->lock);
- if (op_ret < 0) {
- if (op_errno == ENOSYS) {
- /* return ENOTSUP */
- gf_log (this->name, GF_LOG_ERROR,
- "subvolume does not support locking. "
- "please load features/locks xlator on server");
- local->op_ret = op_ret;
- int_lock->lock_op_ret = op_ret;
- int_lock->lock_op_errno = op_errno;
- local->op_errno = op_errno;
- }
- } else {
- int_lock->inode_locked_nodes[child_index]
- |= LOCKED_YES;
- int_lock->inodelk_lock_count++;
-
- if (priv->eager_lock && local->fd) {
- fd_ctx = afr_fd_ctx_get (local->fd, this);
- local->transaction.eager_lock[child_index] = 1;
- /* piggybacked */
-
- if (op_ret == 1) {
- /* piggybacked */
- } else if (op_ret == 0) {
- /* lock acquired from server */
- LOCK (&local->fd->lock);
- {
- fd_ctx->lock_acquired[child_index]++;
- }
- UNLOCK (&local->fd->lock);
- }
- }
- }
-
if (call_count == 0) {
gf_log (this->name, GF_LOG_TRACE,
"Last inode locking reply received");
/* all locks successful. Proceed to call FOP */
- if (int_lock->inodelk_lock_count ==
- int_lock->lk_expected_count) {
+ if (inodelk->lock_count == int_lock->lk_expected_count) {
gf_log (this->name, GF_LOG_TRACE,
"All servers locked. Calling the cbk");
int_lock->lock_op_ret = 0;
@@ -1402,30 +1477,29 @@ int
afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
afr_fd_ctx_t *fd_ctx = NULL;
- int32_t call_count = 0;
- int i = 0;
- int ret = 0;
- struct gf_flock flock = {0,};
- struct gf_flock full_flock = {0,};
- struct gf_flock *flock_use = &flock;
- int piggyback = 0;
+ int32_t call_count = 0;
+ int i = 0;
+ int ret = 0;
+ struct gf_flock flock = {0,};
+ struct gf_flock full_flock = {0,};
+ struct gf_flock *flock_use = NULL;
+ int piggyback = 0;
local = frame->local;
int_lock = &local->internal_lock;
priv = this->private;
- flock.l_start = int_lock->lk_flock.l_start;
- flock.l_len = int_lock->lk_flock.l_len;
- flock.l_type = int_lock->lk_flock.l_type;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
- gf_log (this->name, GF_LOG_DEBUG, "attempting data lock range %"PRIu64
- " %"PRIu64" by %"PRIu64, flock.l_start, flock.l_len,
- frame->root->lk_owner);
+ flock.l_start = inodelk->flock.l_start;
+ flock.l_len = inodelk->flock.l_len;
+ flock.l_type = inodelk->flock.l_type;
- full_flock.l_type = int_lock->lk_flock.l_type;
+ full_flock.l_type = inodelk->flock.l_type;
initialize_inodelk_variables (frame, this);
@@ -1441,11 +1515,11 @@ afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)
local->op_errno = EINVAL;
int_lock->lock_op_errno = EINVAL;
+ afr_unlock (frame, this);
ret = -1;
goto out;
}
- afr_mark_fd_open_on (local, fd_ctx, priv->child_count);
call_count = internal_lock_count (frame, this);
int_lock->lk_call_count = call_count;
int_lock->lk_expected_count = call_count;
@@ -1460,14 +1534,18 @@ afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)
/* Send non-blocking inodelk calls only on up children
and where the fd has been opened */
for (i = 0; i < priv->child_count; i++) {
- if (!local->child_up[i] || !local->fd_open_on[i])
+ if (!local->child_up[i])
continue;
- if (!priv->eager_lock)
+ flock_use = &flock;
+ if (!local->transaction.eager_lock_on) {
goto wind;
+ }
- flock_use = &full_flock;
piggyback = 0;
+ local->transaction.eager_lock[i] = 1;
+
+ afr_set_delayed_post_op (frame, this);
LOCK (&local->fd->lock);
{
@@ -1481,21 +1559,23 @@ afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)
if (piggyback) {
/* (op_ret == 1) => indicate piggybacked lock */
afr_nonblocking_inodelk_cbk (frame, (void *) (long) i,
- this, 1, 0);
+ this, 1, 0, NULL);
if (!--call_count)
break;
continue;
}
+ flock_use = &full_flock;
wind:
- afr_trace_inodelk_in (frame, AFR_INODELK_NB_TRANSACTION,
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_NB_TRANSACTION,
AFR_LOCK_OP, flock_use, F_SETLK, i);
STACK_WIND_COOKIE (frame, afr_nonblocking_inodelk_cbk,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->finodelk,
- this->name, local->fd,
- F_SETLK, flock_use);
+ int_lock->domain, local->fd,
+ F_SETLK, flock_use, NULL);
if (!--call_count)
break;
@@ -1508,15 +1588,16 @@ afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)
for (i = 0; i < priv->child_count; i++) {
if (!local->child_up[i])
continue;
- afr_trace_inodelk_in (frame, AFR_INODELK_NB_TRANSACTION,
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_NB_TRANSACTION,
AFR_LOCK_OP, &flock, F_SETLK, i);
STACK_WIND_COOKIE (frame, afr_nonblocking_inodelk_cbk,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->inodelk,
- this->name, &local->loc,
- F_SETLK, &flock);
+ int_lock->domain, &local->loc,
+ F_SETLK, &flock, NULL);
if (!--call_count)
break;
@@ -1526,200 +1607,6 @@ out:
return ret;
}
-static int
-__is_lower_locked (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int count = 0;
- int i = 0;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- if (int_lock->lower_locked_nodes[i] & LOCKED_LOWER)
- count++;
- }
-
- return count;
-
-}
-
-static int
-__is_higher_locked (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int count = 0;
- int i = 0;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- if (int_lock->locked_nodes[i] & LOCKED_YES)
- count++;
- }
-
- return count;
-
-}
-
-static int
-afr_unlock_lower_entrylk (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- const char *basename = NULL;
- loc_t *loc = NULL;
- int call_count = 0;
- int i = -1;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
-
- basename = int_lock->lk_basename;
- if (int_lock->lk_loc)
- loc = int_lock->lk_loc;
-
- call_count = __is_lower_locked (frame, this);
- int_lock->lk_call_count = call_count;
-
- if (!call_count){
- gf_log (this->name, GF_LOG_TRACE,
- "No internal locks unlocked");
- int_lock->lock_cbk (frame, this);
- goto out;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (int_lock->lower_locked_nodes[i] & LOCKED_LOWER) {
- afr_trace_entrylk_in (frame, AFR_ENTRYLK_NB_TRANSACTION,
- AFR_UNLOCK_OP, basename, i);
-
- STACK_WIND_COOKIE (frame, afr_unlock_entrylk_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- this->name,
- loc, basename,
- ENTRYLK_UNLOCK, ENTRYLK_WRLCK);
-
- if (!--call_count)
- break;
-
- }
- }
-
-out:
- return 0;
-
-}
-
-
-static int
-afr_post_unlock_higher_cbk (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- local->transaction.done (frame, this);
- return 0;
-}
-
-static int
-afr_post_unlock_lower_cbk (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- loc_t *lower = NULL;
- loc_t *higher = NULL;
- const char *higher_name = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- lower = lower_path (&local->transaction.parent_loc,
- local->transaction.basename,
- &local->transaction.new_parent_loc,
- local->transaction.new_basename);
-
- higher = (lower == &local->transaction.parent_loc ?
- &local->transaction.new_parent_loc :
- &local->transaction.parent_loc);
-
- higher_name = (higher == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
- if (__is_higher_locked (frame, this)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "unlocking higher");
- int_lock->lk_basename = higher_name;
- int_lock->lk_loc = higher;
- int_lock->lock_cbk = afr_post_unlock_higher_cbk;
-
- afr_unlock_entrylk (frame, this);
- } else
- local->transaction.done (frame, this);
-
- return 0;
-}
-
-static int
-afr_rename_unlock (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- loc_t *lower = NULL;
- const char *lower_name = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- lower = lower_path (&local->transaction.parent_loc,
- local->transaction.basename,
- &local->transaction.new_parent_loc,
- local->transaction.new_basename);
-
- lower_name = (lower == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
- if (__is_lower_locked (frame, this)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "unlocking lower");
- int_lock->lk_basename = lower_name;
- int_lock->lk_loc = lower;
- int_lock->lock_cbk = afr_post_unlock_lower_cbk;
-
- afr_unlock_lower_entrylk (frame, this);
- } else
- afr_post_unlock_lower_cbk (frame, this);
-
- return 0;
-}
-
-static int
-afr_rename_transaction (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- return (local->transaction.type ==
- AFR_ENTRY_RENAME_TRANSACTION);
-
-}
-
int32_t
afr_unlock (call_frame_t *frame, xlator_t *this)
{
@@ -1731,10 +1618,8 @@ afr_unlock (call_frame_t *frame, xlator_t *this)
if (is_afr_lock_transaction (local))
afr_unlock_inodelk (frame, this);
else
- if (!afr_rename_transaction (frame, this))
- afr_unlock_entrylk (frame, this);
- else
- afr_rename_unlock (frame, this);
+ afr_unlock_entrylk (frame, this);
+
} else {
if (is_afr_lock_selfheal (local))
afr_unlock_inodelk (frame, this);
@@ -1746,490 +1631,37 @@ afr_unlock (call_frame_t *frame, xlator_t *this)
}
int
-afr_mark_locked_nodes (xlator_t *this, fd_t *fd,
- unsigned char *locked_nodes)
-{
- afr_private_t *priv = NULL;
- afr_fd_ctx_t *fdctx = NULL;
- uint64_t tmp = 0;
- int ret = 0;
-
- priv = this->private;
-
- ret = afr_fd_ctx_set (this, fd);
- if (ret)
- goto out;
-
- ret = fd_ctx_get (fd, this, &tmp);
- if (ret) {
- gf_log (this->name, GF_LOG_INFO,
- "failed to get the fd ctx");
- goto out;
- }
- fdctx = (afr_fd_ctx_t *) (long) tmp;
-
- GF_ASSERT (fdctx->locked_on);
-
- memcpy (fdctx->locked_on, locked_nodes,
- priv->child_count);
-
-out:
- return ret;
-}
-
-static int
-__is_fd_saved (xlator_t *this, fd_t *fd)
-{
- afr_locked_fd_t *locked_fd = NULL;
- afr_private_t *priv = NULL;
- int found = 0;
-
- priv = this->private;
-
- list_for_each_entry (locked_fd, &priv->saved_fds, list) {
- if (locked_fd->fd == fd) {
- found = 1;
- break;
- }
- }
-
- return found;
-}
-
-static int
-__afr_save_locked_fd (xlator_t *this, fd_t *fd)
-{
- afr_private_t *priv = NULL;
- afr_locked_fd_t *locked_fd = NULL;
- int ret = 0;
-
- priv = this->private;
-
- locked_fd = GF_CALLOC (1, sizeof (*locked_fd),
- gf_afr_mt_locked_fd);
- if (!locked_fd) {
- ret = -1;
- goto out;
- }
-
- locked_fd->fd = fd;
- INIT_LIST_HEAD (&locked_fd->list);
-
- list_add_tail (&locked_fd->list, &priv->saved_fds);
-
-out:
- return ret;
-}
-
-int
-afr_save_locked_fd (xlator_t *this, fd_t *fd)
+afr_lk_transfer_datalock (call_frame_t *dst, call_frame_t *src, char *dom,
+ unsigned int child_count)
{
- afr_private_t *priv = NULL;
- int ret = 0;
-
- priv = this->private;
-
- pthread_mutex_lock (&priv->mutex);
- {
- if (__is_fd_saved (this, fd)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "fd=%p already saved", fd);
- goto unlock;
- }
-
- ret = __afr_save_locked_fd (this, fd);
- if (ret) {
- gf_log (this->name, GF_LOG_INFO,
- "fd=%p could not be saved", fd);
- goto unlock;
- }
- }
-unlock:
- pthread_mutex_unlock (&priv->mutex);
-
- return ret;
-}
-
-static int
-afr_lock_recovery_cleanup (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_locked_fd_t *locked_fd = NULL;
-
- local = frame->local;
-
- locked_fd = local->locked_fd;
-
- STACK_DESTROY (frame->root);
- afr_local_cleanup (local, this);
-
- afr_save_locked_fd (this, locked_fd->fd);
-
- return 0;
-
-}
-
-static int
-afr_get_source_lock_recovery (xlator_t *this, fd_t *fd)
-{
- afr_fd_ctx_t *fdctx = NULL;
- afr_private_t *priv = NULL;
- uint64_t tmp = 0;
- int i = 0;
- int source_child = -1;
- int ret = 0;
-
- priv = this->private;
-
- ret = fd_ctx_get (fd, this, &tmp);
- if (ret)
+ afr_local_t *dst_local = NULL;
+ afr_local_t *src_local = NULL;
+ afr_internal_lock_t *dst_lock = NULL;
+ afr_internal_lock_t *src_lock = NULL;
+ afr_inodelk_t *dst_inodelk = NULL;
+ afr_inodelk_t *src_inodelk = NULL;
+ int ret = -1;
+
+ src_local = src->local;
+ src_lock = &src_local->internal_lock;
+ src_inodelk = afr_get_inodelk (src_lock, dom);
+ dst_local = dst->local;
+ dst_lock = &dst_local->internal_lock;
+ dst_inodelk = afr_get_inodelk (dst_lock, dom);
+ if (!dst_inodelk || !src_inodelk)
goto out;
-
- fdctx = (afr_fd_ctx_t *) (long) tmp;
-
- for (i = 0; i < priv->child_count; i++) {
- if (fdctx->locked_on[i]) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Found lock recovery source=%d", i);
- source_child = i;
- break;
- }
- }
-
-out:
- return source_child;
-
-}
-
-int32_t
-afr_get_locks_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock);
-int32_t
-afr_recover_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int32_t source_child = 0;
- struct gf_flock flock = {0,};
-
- local = frame->local;
- priv = this->private;
-
- if (op_ret) {
- gf_log (this->name, GF_LOG_INFO,
- "lock recovery failed");
- goto cleanup;
- }
-
- source_child = local->source_child;
-
- memcpy (&flock, lock, sizeof (*lock));
-
- STACK_WIND_COOKIE (frame, afr_get_locks_fd_cbk,
- (void *) (long) source_child,
- priv->children[source_child],
- priv->children[source_child]->fops->lk,
- local->fd, F_GETLK_FD, &flock);
-
- return 0;
-
-cleanup:
- afr_lock_recovery_cleanup (frame, this);
- return 0;
-}
-
-int
-afr_recover_lock (call_frame_t *frame, xlator_t *this,
- struct gf_flock *flock)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int32_t lock_recovery_child = 0;
-
- priv = this->private;
- local = frame->local;
-
- lock_recovery_child = local->lock_recovery_child;
-
- frame->root->lk_owner = flock->l_owner;
-
- STACK_WIND_COOKIE (frame, afr_recover_lock_cbk,
- (void *) (long) lock_recovery_child,
- priv->children[lock_recovery_child],
- priv->children[lock_recovery_child]->fops->lk,
- local->fd, F_SETLK, flock);
-
- return 0;
-}
-
-static int
-is_afr_lock_eol (struct gf_flock *lock)
-{
- int ret = 0;
-
- if ((lock->l_type == GF_LK_EOL))
- ret = 1;
-
- return ret;
-}
-
-int32_t
-afr_get_locks_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock)
-{
- if (op_ret) {
- gf_log (this->name, GF_LOG_INFO,
- "Failed to get locks on fd");
- goto cleanup;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Got a lock on fd");
-
- if (is_afr_lock_eol (lock)) {
- gf_log (this->name, GF_LOG_INFO,
- "Reached EOL on locks on fd");
- goto cleanup;
- }
-
- afr_recover_lock (frame, this, lock);
-
- return 0;
-
-cleanup:
- afr_lock_recovery_cleanup (frame, this);
-
- return 0;
-}
-
-static int
-afr_lock_recovery (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- fd_t *fd = NULL;
- int ret = 0;
- int32_t source_child = 0;
- struct gf_flock flock = {0,};
-
- priv = this->private;
- local = frame->local;
-
- fd = local->fd;
-
- source_child = afr_get_source_lock_recovery (this, fd);
- if (source_child < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "Could not recover locks due to lock "
- "split brain");
- ret = -1;
- goto out;
- }
-
- local->source_child = source_child;
-
- /* the flock can be zero filled as we're querying incrementally
- the locks held on the fd.
- */
- STACK_WIND_COOKIE (frame, afr_get_locks_fd_cbk,
- (void *) (long) source_child,
- priv->children[source_child],
- priv->children[source_child]->fops->lk,
- local->fd, F_GETLK_FD, &flock);
-
-out:
- return ret;
-}
-
-
-static int
-afr_mark_fd_opened (xlator_t *this, fd_t *fd, int32_t child_index)
-{
- afr_fd_ctx_t *fdctx = NULL;
- uint64_t tmp = 0;
- int ret = 0;
-
- ret = fd_ctx_get (fd, this, &tmp);
- if (ret)
- goto out;
-
- fdctx = (afr_fd_ctx_t *) (long) tmp;
-
- fdctx->opened_on[child_index] = AFR_FD_OPENED;
-
-out:
- return ret;
-}
-
-int32_t
-afr_lock_recovery_preopen_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
-{
- int32_t child_index = (long )cookie;
- int ret = 0;
-
- if (op_ret) {
- gf_log (this->name, GF_LOG_INFO,
- "Reopen during lock-recovery failed");
- goto cleanup;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Open succeeded => proceed to recover locks");
-
- ret = afr_lock_recovery (frame, this);
- if (ret) {
- gf_log (this->name, GF_LOG_INFO,
- "Lock recovery failed");
- goto cleanup;
- }
-
- ret = afr_mark_fd_opened (this, fd, child_index);
- if (ret) {
- gf_log (this->name, GF_LOG_INFO,
- "Marking fd open failed");
- goto cleanup;
- }
-
- return 0;
-
-cleanup:
- afr_lock_recovery_cleanup (frame, this);
- return 0;
-}
-
-static int
-afr_lock_recovery_preopen (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- uint64_t tmp = 0;
- afr_fd_ctx_t *fdctx = NULL;
- loc_t loc = {0,};
- int32_t child_index = 0;
- int ret = 0;
-
- priv = this->private;
- local = frame->local;
-
- GF_ASSERT (local && local->fd);
-
- ret = fd_ctx_get (local->fd, this, &tmp);
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to get the context of fd",
- uuid_utoa (local->fd->inode->gfid));
- fdctx = (afr_fd_ctx_t *) (long) tmp;
- /* TODO: instead we should return from the function */
- GF_ASSERT (fdctx);
-
- child_index = local->lock_recovery_child;
-
- inode_path (local->fd->inode, NULL, (char **)&loc.path);
- loc.name = strrchr (loc.path, '/');
- loc.inode = inode_ref (local->fd->inode);
- loc.parent = inode_parent (local->fd->inode, 0, NULL);
-
-
- STACK_WIND_COOKIE (frame, afr_lock_recovery_preopen_cbk,
- (void *)(long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->open,
- &loc, fdctx->flags, local->fd,
- fdctx->wbflags);
-
- return 0;
-}
-
-static int
-is_fd_opened (fd_t *fd, int32_t child_index)
-{
- afr_fd_ctx_t *fdctx = NULL;
- uint64_t tmp = 0;
- int ret = 0;
-
- ret = fd_ctx_get (fd, THIS, &tmp);
- if (ret)
- goto out;
-
- fdctx = (afr_fd_ctx_t *) (long) tmp;
-
- if (fdctx->opened_on[child_index] == AFR_FD_OPENED)
- ret = 1;
-
-out:
- return ret;
-}
-
-int
-afr_attempt_lock_recovery (xlator_t *this, int32_t child_index)
-{
- call_frame_t *frame = NULL;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_locked_fd_t *locked_fd = NULL;
- afr_locked_fd_t *tmp = NULL;
- int ret = 0;
- struct list_head locks_list = {0,};
-
-
- priv = this->private;
-
- if (list_empty (&priv->saved_fds))
- goto out;
-
- frame = create_frame (this, this->ctx->pool);
- if (!frame) {
- ret = -1;
- goto out;
- }
-
- local = GF_CALLOC (1, sizeof (*local),
- gf_afr_mt_afr_local_t);
- if (!local) {
- ret = -1;
- goto out;
- }
-
- AFR_LOCAL_INIT (local, priv);
- if (!local) {
- ret = -1;
- goto out;
- }
-
- frame->local = local;
-
- INIT_LIST_HEAD (&locks_list);
-
- pthread_mutex_lock (&priv->mutex);
- {
- list_splice_init (&priv->saved_fds, &locks_list);
- }
- pthread_mutex_unlock (&priv->mutex);
-
- list_for_each_entry_safe (locked_fd, tmp,
- &locks_list, list) {
-
- list_del_init (&locked_fd->list);
-
- local->fd = fd_ref (locked_fd->fd);
- local->lock_recovery_child = child_index;
- local->locked_fd = locked_fd;
-
- if (!is_fd_opened (locked_fd->fd, child_index)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "attempting open before lock "
- "recovery");
- afr_lock_recovery_preopen (frame, this);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "attempting lock recovery "
- "without a preopen");
- afr_lock_recovery (frame, this);
- }
- }
-
+ if (src_inodelk->locked_nodes) {
+ memcpy (dst_inodelk->locked_nodes, src_inodelk->locked_nodes,
+ sizeof (*dst_inodelk->locked_nodes) * child_count);
+ memset (src_inodelk->locked_nodes, 0,
+ sizeof (*src_inodelk->locked_nodes) * child_count);
+ }
+
+ dst_lock->transaction_lk_type = src_lock->transaction_lk_type;
+ dst_lock->selfheal_lk_type = src_lock->selfheal_lk_type;
+ dst_inodelk->lock_count = src_inodelk->lock_count;
+ src_inodelk->lock_count = 0;
+ ret = 0;
out:
return ret;
}
diff --git a/xlators/cluster/afr/src/afr-mem-types.h b/xlators/cluster/afr/src/afr-mem-types.h
index ebe189c35..05df90cc0 100644
--- a/xlators/cluster/afr/src/afr-mem-types.h
+++ b/xlators/cluster/afr/src/afr-mem-types.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -26,7 +17,6 @@
enum gf_afr_mem_types_ {
gf_afr_mt_iovec = gf_common_mt_end + 1,
gf_afr_mt_afr_fd_ctx_t,
- gf_afr_mt_afr_local_t,
gf_afr_mt_afr_private_t,
gf_afr_mt_int32_t,
gf_afr_mt_char,
@@ -44,8 +34,15 @@ enum gf_afr_mem_types_ {
gf_afr_mt_locked_fd,
gf_afr_mt_inode_ctx_t,
gf_afr_fd_paused_call_t,
- gf_afr_mt_afr_crawl_data_t,
- gf_afr_mt_afr_brick_pos_t,
+ gf_afr_mt_crawl_data_t,
+ gf_afr_mt_brick_pos_t,
+ gf_afr_mt_shd_bool_t,
+ gf_afr_mt_shd_timer_t,
+ gf_afr_mt_shd_event_t,
+ gf_afr_mt_time_t,
+ gf_afr_mt_pos_data_t,
+ gf_afr_mt_reply_t,
+ gf_afr_mt_subvol_healer_t,
gf_afr_mt_end
};
#endif
diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c
index 5031cfd5e..f86aa7fd8 100644
--- a/xlators/cluster/afr/src/afr-open.c
+++ b/xlators/cluster/afr/src/afr-open.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include <libgen.h>
@@ -52,87 +43,31 @@
#include "afr-dir-read.h"
#include "afr-dir-write.h"
#include "afr-transaction.h"
-#include "afr-self-heal.h"
-#include "afr-self-heal-common.h"
-
-int
-afr_stale_child_up (afr_local_t *local, xlator_t *this)
-{
- int i = 0;
- afr_private_t *priv = NULL;
- int up = -1;
-
- priv = this->private;
- if (!local->fresh_children)
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children)
- goto out;
-
- afr_inode_get_read_ctx (this, local->fd->inode, local->fresh_children);
- if (priv->child_count == afr_get_children_count (local->fresh_children,
- priv->child_count))
- goto out;
-
- for (i = 0; i < priv->child_count; i++) {
- if (!local->child_up[i])
- continue;
- if (afr_is_child_present (local->fresh_children,
- priv->child_count, i))
- continue;
- up = i;
- break;
- }
-out:
- return up;
-}
-void
-afr_perform_data_self_heal (call_frame_t *frame, xlator_t *this)
+gf_boolean_t
+afr_is_fd_fixable (fd_t *fd)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- inode_t *inode = NULL;
- int st_child = -1;
- char reason[64] = {0};
-
- local = frame->local;
- sh = &local->self_heal;
- inode = local->fd->inode;
-
- if (!IA_ISREG (inode->ia_type))
- goto out;
-
- st_child = afr_stale_child_up (local, this);
- if (st_child < 0)
- goto out;
-
- sh->do_data_self_heal = _gf_true;
- sh->do_metadata_self_heal = _gf_true;
- sh->do_gfid_self_heal = _gf_true;
- sh->do_missing_entry_self_heal = _gf_true;
-
- snprintf (reason, sizeof (reason), "stale subvolume %d detected",
- st_child);
- afr_launch_self_heal (frame, this, inode, _gf_true, inode->ia_type,
- reason, NULL, NULL);
-out:
- return;
+ if (!fd || !fd->inode)
+ return _gf_false;
+ else if (fd_is_anonymous (fd))
+ return _gf_false;
+ else if (uuid_is_null (fd->inode->gfid))
+ return _gf_false;
+
+ return _gf_true;
}
+
int
afr_open_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
afr_local_t * local = frame->local;
- afr_private_t *priv = NULL;
- priv = this->private;
- if (afr_open_only_data_self_heal (priv->data_self_heal))
- afr_perform_data_self_heal (frame, this);
AFR_STACK_UNWIND (open, frame, local->op_ret, local->op_errno,
- local->fd);
+ local->fd, xdata);
return 0;
}
@@ -140,53 +75,41 @@ afr_open_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
afr_open_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
afr_local_t * local = NULL;
- int ret = 0;
int call_count = -1;
int child_index = (long) cookie;
- afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
- priv = this->private;
local = frame->local;
+ fd_ctx = local->fd_ctx;
LOCK (&frame->lock);
{
if (op_ret == -1) {
local->op_errno = op_errno;
- }
-
- if (op_ret >= 0) {
+ fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED;
+ } else {
local->op_ret = op_ret;
- local->success_count++;
-
- ret = afr_child_fd_ctx_set (this, fd, child_index,
- local->cont.open.flags,
- local->cont.open.wbflags);
- if (ret) {
- local->op_ret = -1;
- local->op_errno = -ret;
- goto unlock;
- }
+ fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref (xdata);
}
}
-unlock:
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
if (call_count == 0) {
- if ((local->cont.open.flags & O_TRUNC)
- && (local->op_ret >= 0)) {
+ if ((fd_ctx->flags & O_TRUNC) && (local->op_ret >= 0)) {
STACK_WIND (frame, afr_open_ftruncate_cbk,
this, this->fops->ftruncate,
- fd, 0);
+ fd, 0, NULL);
} else {
- if (afr_open_only_data_self_heal (priv->data_self_heal))
- afr_perform_data_self_heal (frame, this);
AFR_STACK_UNWIND (open, frame, local->op_ret,
- local->op_errno, local->fd);
+ local->op_errno, local->fd,
+ local->xdata_rsp);
}
}
@@ -195,230 +118,190 @@ unlock:
int
afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, int32_t wbflags)
+ fd_t *fd, dict_t *xdata)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
int i = 0;
- int ret = -1;
int32_t call_count = 0;
- int32_t op_ret = -1;
int32_t op_errno = 0;
- int32_t wind_flags = flags & (~O_TRUNC);
- //We can't let truncation to happen outside transaction.
+ afr_fd_ctx_t *fd_ctx = NULL;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (loc, out);
+ //We can't let truncation to happen outside transaction.
priv = this->private;
- if (flags & (O_CREAT|O_TRUNC)) {
- QUORUM_CHECK(open,out);
- }
-
- if (afr_is_split_brain (this, loc->inode)) {
- /* self-heal failed */
- gf_log (this->name, GF_LOG_WARNING,
- "failed to open as split brain seen, returning EIO");
- op_errno = EIO;
- goto out;
+ if (flags & (O_CREAT|O_TRUNC)) {
+ QUORUM_CHECK(open,out);
}
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- frame->local = local;
- call_count = local->call_count;
- loc_copy (&local->loc, loc);
+ local->fd = fd_ref (fd);
+ local->fd_ctx = fd_ctx;
+ fd_ctx->flags = flags;
- local->cont.open.flags = flags;
- local->cont.open.wbflags = wbflags;
+ call_count = local->call_count;
- local->fd = fd_ref (fd);
+ local->cont.open.flags = flags;
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND_COOKIE (frame, afr_open_cbk, (void *) (long) i,
priv->children[i],
priv->children[i]->fops->open,
- loc, wind_flags, fd, wbflags);
-
+ loc, (flags & ~O_TRUNC), fd, xdata);
if (!--call_count)
break;
}
}
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (open, frame, op_ret, op_errno, fd);
- }
+ AFR_STACK_UNWIND (open, frame, -1, op_errno, fd, NULL);
return 0;
}
-//NOTE: this function should be called with holding the lock on
-//fd to which fd_ctx belongs
-void
-afr_get_resumable_calls (xlator_t *this, afr_fd_ctx_t *fd_ctx,
- struct list_head *list)
-{
- afr_fd_paused_call_t *paused_call = NULL;
- afr_fd_paused_call_t *tmp = NULL;
- afr_local_t *call_local = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- gf_boolean_t call = _gf_false;
-
- priv = this->private;
- list_for_each_entry_safe (paused_call, tmp, &fd_ctx->paused_calls,
- call_list) {
- call = _gf_true;
- call_local = paused_call->frame->local;
- for (i = 0; i < priv->child_count; i++) {
- if (call_local->child_up[i] &&
- (fd_ctx->opened_on[i] == AFR_FD_OPENING))
- call = _gf_false;
- }
-
- if (call) {
- list_del_init (&paused_call->call_list);
- list_add (&paused_call->call_list, list);
- }
- }
-}
-
-void
-afr_resume_calls (xlator_t *this, struct list_head *list)
-{
- afr_fd_paused_call_t *paused_call = NULL;
- afr_fd_paused_call_t *tmp = NULL;
- afr_local_t *call_local = NULL;
-
- list_for_each_entry_safe (paused_call, tmp, list, call_list) {
- list_del_init (&paused_call->call_list);
- call_local = paused_call->frame->local;
- call_local->fop_call_continue (paused_call->frame, this);
- GF_FREE (paused_call);
- }
-}
-
int
afr_openfd_fix_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_fd_ctx_t *fd_ctx = NULL;
- int call_count = 0;
- int child_index = (long) cookie;
- struct list_head paused_calls = {0};
- gf_boolean_t fop_paused = _gf_false;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int call_count = 0;
+ int child_index = (long) cookie;
priv = this->private;
local = frame->local;
- call_count = afr_frame_return (frame);
-
- //Note: No frame locking needed for this block of code
- fd_ctx = afr_fd_ctx_get (local->fd, this);
- if (!fd_ctx) {
- gf_log (this->name, GF_LOG_WARNING,
- "failed to get fd context, %p", local->fd);
- goto out;
+ if (op_ret >= 0) {
+ gf_log (this->name, GF_LOG_DEBUG, "fd for %s opened "
+ "successfully on subvolume %s", local->loc.path,
+ priv->children[child_index]->name);
+ } else {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to open %s "
+ "on subvolume %s", local->loc.path,
+ priv->children[child_index]->name);
}
- fop_paused = local->fop_paused;
+ fd_ctx = local->fd_ctx;
+
LOCK (&local->fd->lock);
{
if (op_ret >= 0) {
fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
- gf_log (this->name, GF_LOG_INFO, "fd for %s opened "
- "successfully on subvolume %s", local->loc.path,
- priv->children[child_index]->name);
} else {
- //Change open status from OPENING to NOT OPENED.
fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED;
}
- if (call_count == 0) {
- INIT_LIST_HEAD (&paused_calls);
- afr_get_resumable_calls (this, fd_ctx, &paused_calls);
- }
}
UNLOCK (&local->fd->lock);
-out:
- if (call_count == 0) {
- afr_resume_calls (this, &paused_calls);
- //If the fop is paused then resume_calls will continue the fop
- if (fop_paused)
- goto done;
-
- if (local->fop_call_continue)
- local->fop_call_continue (frame, this);
- else
- AFR_STACK_DESTROY (frame);
- }
-done:
+ call_count = afr_frame_return (frame);
+ if (call_count == 0)
+ AFR_STACK_DESTROY (frame);
+
return 0;
}
-int
-afr_fix_open (call_frame_t *frame, xlator_t *this, afr_fd_ctx_t *fd_ctx,
- int need_open_count, int *need_open)
+
+static int
+afr_fd_ctx_need_open (fd_t *fd, xlator_t *this, unsigned char *need_open)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- call_frame_t *open_frame = NULL;
- afr_local_t *open_local = NULL;
- int ret = -1;
- ia_type_t ia_type = IA_INVAL;
- int32_t op_errno = 0;
-
- GF_ASSERT (fd_ctx);
- GF_ASSERT (need_open_count > 0);
- GF_ASSERT (need_open);
+ afr_fd_ctx_t *fd_ctx = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int count = 0;
+
+ priv = this->private;
+
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx)
+ return 0;
+
+ LOCK (&fd->lock);
+ {
+ for (i = 0; i < priv->child_count; i++) {
+ if (fd_ctx->opened_on[i] == AFR_FD_NOT_OPENED &&
+ priv->child_up[i]) {
+ fd_ctx->opened_on[i] = AFR_FD_OPENING;
+ need_open[i] = 1;
+ count++;
+ } else {
+ need_open[i] = 0;
+ }
+ }
+ }
+ UNLOCK (&fd->lock);
+
+ return count;
+}
+
+
+void
+afr_fix_open (fd_t *fd, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int32_t op_errno = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ unsigned char *need_open = NULL;
+ int call_count = 0;
- local = frame->local;
priv = this->private;
- if (!local->fop_call_continue) {
- open_frame = copy_frame (frame);
- if (!open_frame) {
- ret = -ENOMEM;
- goto out;
- }
- ALLOC_OR_GOTO (open_local, afr_local_t, out);
- open_frame->local = open_local;
- ret = AFR_LOCAL_INIT (open_local, priv);
- if (ret < 0)
- goto out;
- loc_copy (&open_local->loc, &local->loc);
- open_local->fd = fd_ref (local->fd);
- } else {
- ret = 0;
- open_frame = frame;
- open_local = local;
- }
- open_local->call_count = need_open_count;
+ if (!afr_is_fd_fixable (fd))
+ goto out;
+
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx)
+ goto out;
+
+ need_open = alloca0 (priv->child_count);
+
+ call_count = afr_fd_ctx_need_open (fd, this, need_open);
+ if (!call_count)
+ goto out;
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ goto out;
+
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->loc.inode = inode_ref (fd->inode);
+ ret = loc_path (&local->loc, NULL);
+ if (ret < 0)
+ goto out;
+
+ local->fd = fd_ref (fd);
+ local->fd_ctx = fd_ctx;
+
+ local->call_count = call_count;
gf_log (this->name, GF_LOG_DEBUG, "need open count: %d",
- need_open_count);
+ call_count);
- ia_type = open_local->fd->inode->ia_type;
- GF_ASSERT (ia_type != IA_INVAL);
for (i = 0; i < priv->child_count; i++) {
if (!need_open[i])
continue;
- if (IA_IFDIR == ia_type) {
+
+ if (IA_IFDIR == fd->inode->ia_type) {
gf_log (this->name, GF_LOG_DEBUG,
"opening fd for dir %s on subvolume %s",
local->loc.path, priv->children[i]->name);
@@ -427,25 +310,28 @@ afr_fix_open (call_frame_t *frame, xlator_t *this, afr_fd_ctx_t *fd_ctx,
(void*) (long) i,
priv->children[i],
priv->children[i]->fops->opendir,
- &open_local->loc, open_local->fd);
+ &local->loc, local->fd,
+ NULL);
} else {
gf_log (this->name, GF_LOG_DEBUG,
"opening fd for file %s on subvolume %s",
local->loc.path, priv->children[i]->name);
- STACK_WIND_COOKIE (open_frame, afr_openfd_fix_open_cbk,
+ STACK_WIND_COOKIE (frame, afr_openfd_fix_open_cbk,
(void *)(long) i,
priv->children[i],
priv->children[i]->fops->open,
- &open_local->loc, fd_ctx->flags,
- open_local->fd, fd_ctx->wbflags);
+ &local->loc,
+ fd_ctx->flags & (~O_TRUNC),
+ local->fd, NULL);
}
+ if (!--call_count)
+ break;
}
+
+ return;
out:
- if (op_errno)
- ret = -op_errno;
- if (ret && open_frame)
- AFR_STACK_DESTROY (open_frame);
- return ret;
+ if (frame)
+ AFR_STACK_DESTROY (frame);
}
diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c
new file mode 100644
index 000000000..186f68c33
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-read-txn.c
@@ -0,0 +1,239 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "afr.h"
+#include "afr-transaction.h"
+
+int
+afr_read_txn_next_subvol (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int subvol = -1;
+
+ local = frame->local;
+ priv = this->private;
+
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->readable[i]) {
+ /* don't even bother trying here.
+ just mark as attempted and move on. */
+ local->read_attempted[i] = 1;
+ continue;
+ }
+
+ if (!local->read_attempted[i]) {
+ subvol = i;
+ break;
+ }
+ }
+
+ /* If no more subvols were available for reading, we leave
+ @subvol as -1, which is an indication we have run out of
+ readable subvols. */
+ if (subvol != -1)
+ local->read_attempted[subvol] = 1;
+ local->readfn (frame, this, subvol);
+
+ return 0;
+}
+
+
+int
+afr_read_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err)
+{
+ afr_local_t *local = NULL;
+ int read_subvol = 0;
+ int event_generation = 0;
+ inode_t *inode = NULL;
+ int ret = -1;
+
+ local = frame->local;
+ inode = local->inode;
+
+ if (err) {
+ local->op_errno = -err;
+ local->op_ret = -1;
+ read_subvol = -1;
+ goto readfn;
+ }
+
+ ret = afr_inode_read_subvol_type_get (inode, this, local->readable,
+ &event_generation,
+ local->transaction.type);
+
+ if (ret == -1 || !event_generation) {
+ /* Even after refresh, we don't have a good
+ read subvolume. Time to bail */
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ read_subvol = -1;
+ goto readfn;
+ }
+
+ read_subvol = afr_read_subvol_select_by_policy (inode, this,
+ local->readable);
+
+ if (read_subvol == -1) {
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto readfn;
+ }
+
+ if (local->read_attempted[read_subvol]) {
+ afr_read_txn_next_subvol (frame, this);
+ return 0;
+ }
+
+ local->read_attempted[read_subvol] = 1;
+readfn:
+ local->readfn (frame, this, read_subvol);
+
+ return 0;
+}
+
+
+int
+afr_read_txn_continue (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (!local->refreshed) {
+ local->refreshed = _gf_true;
+ afr_inode_refresh (frame, this, local->inode,
+ afr_read_txn_refresh_done);
+ } else {
+ afr_read_txn_next_subvol (frame, this);
+ }
+
+ return 0;
+}
+
+
+/* afr_read_txn_wipe:
+
+ clean internal variables in @local in order to make
+ it possible to call afr_read_txn() multiple times from
+ the same frame
+*/
+
+void
+afr_read_txn_wipe (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ local->readfn = NULL;
+
+ if (local->inode)
+ inode_unref (local->inode);
+
+ for (i = 0; i < priv->child_count; i++) {
+ local->read_attempted[i] = 0;
+ local->readable[i] = 0;
+ }
+}
+
+
+/*
+ afr_read_txn:
+
+ This is the read transaction function. The way it works:
+
+ - Determine read-subvolume from inode ctx.
+
+ - If read-subvolume's generation was stale, refresh ctx once by
+ calling afr_inode_refresh()
+
+ Else make an attempt to read on read-subvolume.
+
+ - If attempted read on read-subvolume fails, refresh ctx once
+ by calling afr_inode_refresh()
+
+ - After ctx refresh, query read-subvolume freshly and attempt
+ read once.
+
+ - If read fails, try every other readable[] subvolume before
+ finally giving up. readable[] elements are set by afr_inode_refresh()
+ based on dirty and pending flags.
+
+ - If file is in split brain in the backend, generation will be
+ kept 0 by afr_inode_refresh() and readable[] will be set 0 for
+ all elements. Therefore reads always fail.
+*/
+
+int
+afr_read_txn (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ afr_read_txn_wind_t readfn, afr_transaction_type type)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int read_subvol = -1;
+ int event_generation = 0;
+ int ret = -1;
+
+ priv = this->private;
+ local = frame->local;
+
+ afr_read_txn_wipe (frame, this);
+
+ local->readfn = readfn;
+ local->inode = inode_ref (inode);
+
+ local->transaction.type = type;
+ ret = afr_inode_read_subvol_type_get (inode, this, local->readable,
+ &event_generation, type);
+ if (ret == -1)
+ /* very first transaction on this inode */
+ goto refresh;
+
+ if (local->event_generation != event_generation)
+ /* servers have disconnected / reconnected, and possibly
+ rebooted, very likely changing the state of freshness
+ of copies */
+ goto refresh;
+
+ read_subvol = afr_read_subvol_select_by_policy (inode, this,
+ local->readable);
+
+ if (read_subvol < 0 || read_subvol > priv->child_count) {
+ gf_log (this->name, GF_LOG_WARNING, "Unreadable subvolume %d "
+ "found with event generation %d", read_subvol,
+ event_generation);
+ goto refresh;
+ }
+
+ if (!local->child_up[read_subvol]) {
+ /* should never happen, just in case */
+ gf_log (this->name, GF_LOG_WARNING, "subvolume %d is the "
+ "read subvolume in this generation, but is not up",
+ read_subvol);
+ goto refresh;
+ }
+
+ local->read_attempted[read_subvol] = 1;
+
+ local->readfn (frame, this, read_subvol);
+
+ return 0;
+
+refresh:
+ afr_inode_refresh (frame, this, inode, afr_read_txn_refresh_done);
+
+ return 0;
+}
diff --git a/xlators/cluster/afr/src/afr-self-heal-algorithm.c b/xlators/cluster/afr/src/afr-self-heal-algorithm.c
deleted file mode 100644
index 4dfb85824..000000000
--- a/xlators/cluster/afr/src/afr-self-heal-algorithm.c
+++ /dev/null
@@ -1,743 +0,0 @@
-/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#include "glusterfs.h"
-#include "afr.h"
-#include "xlator.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-#include "byte-order.h"
-#include "md5.h"
-
-#include "afr-transaction.h"
-#include "afr-self-heal.h"
-#include "afr-self-heal-common.h"
-#include "afr-self-heal-algorithm.h"
-
-/*
- This file contains the various self-heal algorithms
-*/
-
-static int
-sh_loop_driver (call_frame_t *sh_frame, xlator_t *this,
- gf_boolean_t is_first_call, call_frame_t *old_loop_frame);
-static int
-sh_loop_return (call_frame_t *sh_frame, xlator_t *this, call_frame_t *loop_frame,
- int32_t op_ret, int32_t op_errno);
-static int
-sh_destroy_frame (call_frame_t *frame, xlator_t *this)
-{
- if (!frame)
- goto out;
-
- AFR_STACK_DESTROY (frame);
-out:
- return 0;
-}
-
-static void
-sh_private_cleanup (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_sh_algo_private_t *sh_priv = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
-
- sh_priv = sh->private;
- if (sh_priv)
- GF_FREE (sh_priv);
-}
-
-static int
-sh_number_of_writes_needed (unsigned char *write_needed, int child_count)
-{
- int writes = 0;
- int i = 0;
-
- for (i = 0; i < child_count; i++) {
- if (write_needed[i])
- writes++;
- }
-
- return writes;
-}
-
-
-static int
-sh_loop_driver_done (call_frame_t *sh_frame, xlator_t *this,
- call_frame_t *last_loop_frame)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_sh_algo_private_t *sh_priv = NULL;
- int32_t total_blocks = 0;
- int32_t diff_blocks = 0;
-
- local = sh_frame->local;
- sh = &local->self_heal;
- sh_priv = sh->private;
-
- if (sh_priv) {
- total_blocks = sh_priv->total_blocks;
- diff_blocks = sh_priv->diff_blocks;
- }
-
- sh_private_cleanup (sh_frame, this);
- if (sh->op_failed) {
- GF_ASSERT (!last_loop_frame);
- //loop_finish should have happened and the old_loop should be NULL
- gf_log (this->name, GF_LOG_INFO,
- "self-heal aborting on %s",
- local->loc.path);
-
- local->self_heal.algo_abort_cbk (sh_frame, this);
- } else {
- GF_ASSERT (last_loop_frame);
- if (diff_blocks == total_blocks) {
- gf_log (this->name, GF_LOG_INFO, "full self-heal "
- "completed on %s",local->loc.path);
- } else {
- gf_log (this->name, GF_LOG_INFO,
- "diff self-heal on %s: completed. "
- "(%d blocks of %d were different (%.2f%%))",
- local->loc.path, diff_blocks, total_blocks,
- ((diff_blocks * 1.0)/total_blocks) * 100);
- }
-
- if (sh_frame == last_loop_frame)
- sh->old_loop_frame = NULL;
- else
- sh->old_loop_frame = last_loop_frame;
- local->self_heal.algo_completion_cbk (sh_frame, this);
- }
-
- return 0;
-}
-
-int
-sh_loop_finish (call_frame_t *loop_frame, xlator_t *this)
-{
- afr_local_t *loop_local = NULL;
- afr_self_heal_t *loop_sh = NULL;
-
- if (!loop_frame)
- goto out;
-
- loop_local = loop_frame->local;
- if (loop_local) {
- loop_sh = &loop_local->self_heal;
- }
-
- if (loop_sh && loop_sh->loop_completion_cbk) {
- if (loop_sh->data_lock_held) {
- afr_sh_data_unlock (loop_frame, this,
- loop_sh->loop_completion_cbk);
- } else {
- loop_sh->loop_completion_cbk (loop_frame, this);
- }
- } else {
- //default loop_completion_cbk destroys the loop_frame
- if (loop_sh && !loop_sh->loop_completion_cbk)
- GF_ASSERT (!loop_sh->data_lock_held);
- sh_destroy_frame (loop_frame, this);
- }
-out:
- return 0;
-}
-
-static int
-sh_loop_lock_success (call_frame_t *loop_frame, xlator_t *this)
-{
- afr_local_t *loop_local = NULL;
- afr_self_heal_t *loop_sh = NULL;
-
- loop_local = loop_frame->local;
- loop_sh = &loop_local->self_heal;
-
- sh_loop_finish (loop_sh->old_loop_frame, this);
- loop_sh->old_loop_frame = NULL;
-
- gf_log (this->name, GF_LOG_DEBUG, "Aquired lock for range %"PRIu64
- " %"PRIu64, loop_sh->offset, loop_sh->block_size);
- loop_sh->data_lock_held = _gf_true;
- loop_sh->sh_data_algo_start (loop_frame, this);
- return 0;
-}
-
-static int
-sh_loop_lock_failure (call_frame_t *loop_frame, xlator_t *this)
-{
- call_frame_t *sh_frame = NULL;
- afr_local_t *loop_local = NULL;
- afr_self_heal_t *loop_sh = NULL;
-
- loop_local = loop_frame->local;
- loop_sh = &loop_local->self_heal;
- sh_frame = loop_sh->sh_frame;
-
- gf_log (this->name, GF_LOG_ERROR, "failed lock for range %"PRIu64
- " %"PRIu64, loop_sh->offset, loop_sh->block_size);
- if (loop_sh->old_loop_frame != loop_sh->sh_frame)
- sh_loop_finish (loop_sh->old_loop_frame, this);
- loop_sh->old_loop_frame = NULL;
- sh_loop_return (sh_frame, this, loop_frame, -1, ENOTCONN);
- return 0;
-}
-
-static int
-sh_loop_start (call_frame_t *sh_frame, xlator_t *this, off_t offset,
- call_frame_t *old_loop_frame)
-{
- call_frame_t *new_loop_frame = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_local_t *new_loop_local = NULL;
- afr_self_heal_t *new_loop_sh = NULL;
- afr_private_t *priv = NULL;
-
- GF_ASSERT (sh_frame);
-
- local = sh_frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- new_loop_frame = copy_frame (sh_frame);
- if (!new_loop_frame)
- goto out;
- //We want the frame to have same lk_oner as sh_frame
- new_loop_local = afr_local_copy (local, this);
- if (!new_loop_local)
- goto out;
- new_loop_frame->local = new_loop_local;
-
- new_loop_sh = &new_loop_local->self_heal;
- new_loop_sh->sources = memdup (sh->sources,
- priv->child_count * sizeof (*sh->sources));
- if (!new_loop_sh->sources)
- goto out;
- new_loop_sh->write_needed = GF_CALLOC (priv->child_count,
- sizeof (*new_loop_sh->write_needed),
- gf_afr_mt_char);
- if (!new_loop_sh->write_needed)
- goto out;
- new_loop_sh->checksum = GF_CALLOC (priv->child_count, MD5_DIGEST_LEN,
- gf_afr_mt_uint8_t);
- if (!new_loop_sh->checksum)
- goto out;
- new_loop_sh->offset = offset;
- new_loop_sh->block_size = sh->block_size;
- new_loop_sh->inode = inode_ref (sh->inode);
- new_loop_sh->sh_data_algo_start = sh->sh_data_algo_start;
- new_loop_sh->source = sh->source;
- new_loop_sh->active_sinks = sh->active_sinks;
- new_loop_sh->healing_fd = fd_ref (sh->healing_fd);
- new_loop_sh->file_has_holes = sh->file_has_holes;
- new_loop_sh->loop_completion_cbk = sh_destroy_frame;
- new_loop_sh->old_loop_frame = old_loop_frame;
- new_loop_sh->sh_frame = sh_frame;
- afr_sh_data_lock (new_loop_frame, this, offset, new_loop_sh->block_size,
- sh_loop_lock_success, sh_loop_lock_failure);
- return 0;
-out:
- sh->op_failed = 1;
- if (new_loop_frame) {
- new_loop_frame->local = new_loop_local;
- }
- if (old_loop_frame != sh_frame)
- sh_loop_finish (old_loop_frame, this);
- sh_loop_return (sh_frame, this, new_loop_frame, -1, ENOMEM);
- return 0;
-}
-
-static int
-sh_loop_driver (call_frame_t *sh_frame, xlator_t *this,
- gf_boolean_t is_first_call, call_frame_t *old_loop_frame)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
- afr_sh_algo_private_t *sh_priv = NULL;
- gf_boolean_t is_driver_done = _gf_false;
- blksize_t block_size = 0;
- int loop = 0;
- off_t offset = 0;
-
- priv = this->private;
- local = sh_frame->local;
- sh = &local->self_heal;
- sh_priv = sh->private;
-
- LOCK (&sh_priv->lock);
- {
- if (_gf_false == is_first_call)
- sh_priv->loops_running--;
- offset = sh_priv->offset;
- block_size = sh->block_size;
- while ((!sh->eof_reached) && (0 == sh->op_failed) &&
- (sh_priv->loops_running < priv->data_self_heal_window_size)
- && (sh_priv->offset < sh->file_size)) {
-
- loop++;
- sh_priv->offset += block_size;
- sh_priv->loops_running++;
-
- if (_gf_false == is_first_call)
- break;
- }
- if (0 == sh_priv->loops_running) {
- is_driver_done = _gf_true;
- }
- }
- UNLOCK (&sh_priv->lock);
-
- if (0 == loop) {
- //loop finish does unlock, but the erasing of the pending
- //xattrs needs to happen before that so do not finish the loop
- if (is_driver_done && !sh->op_failed)
- goto driver_done;
- if (old_loop_frame) {
- sh_loop_finish (old_loop_frame, this);
- old_loop_frame = NULL;
- }
- }
-
- //If we have more loops to form we should finish previous loop after
- //the next loop lock
- while (loop--) {
- if (sh->op_failed) {
- // op failed in other loop, stop spawning more loops
- if (old_loop_frame) {
- sh_loop_finish (old_loop_frame, this);
- old_loop_frame = NULL;
- }
- sh_loop_driver (sh_frame, this, _gf_false, NULL);
- } else {
- gf_log (this->name, GF_LOG_TRACE, "spawning a loop "
- "for offset %"PRId64, offset);
-
- sh_loop_start (sh_frame, this, offset, old_loop_frame);
- old_loop_frame = NULL;
- offset += block_size;
- }
- }
-
-driver_done:
- if (is_driver_done) {
- sh_loop_driver_done (sh_frame, this, old_loop_frame);
- }
- return 0;
-}
-
-static int
-sh_loop_return (call_frame_t *sh_frame, xlator_t *this, call_frame_t *loop_frame,
- int32_t op_ret, int32_t op_errno)
-{
- afr_local_t * loop_local = NULL;
- afr_self_heal_t * loop_sh = NULL;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t *sh = NULL;
-
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
-
- if (loop_frame) {
- GF_ASSERT (loop_frame != sh_frame);
- loop_local = loop_frame->local;
- if (loop_local)
- loop_sh = &loop_local->self_heal;
- if (loop_sh)
- gf_log (this->name, GF_LOG_TRACE, "loop for offset "
- "%"PRId64" returned", loop_sh->offset);
- }
-
- if (op_ret == -1) {
- sh->op_failed = 1;
- afr_sh_set_error (sh, op_errno);
- if (loop_frame) {
- sh_loop_finish (loop_frame, this);
- loop_frame = NULL;
- }
- }
-
- sh_loop_driver (sh_frame, this, _gf_false, loop_frame);
-
- return 0;
-}
-
-static int
-sh_loop_write_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- struct iatt *postbuf)
-{
- afr_private_t * priv = NULL;
- afr_local_t * loop_local = NULL;
- afr_self_heal_t * loop_sh = NULL;
- call_frame_t *sh_frame = NULL;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t *sh = NULL;
- int call_count = 0;
- int child_index = 0;
-
- priv = this->private;
- loop_local = loop_frame->local;
- loop_sh = &loop_local->self_heal;
-
- sh_frame = loop_sh->sh_frame;
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
-
- child_index = (long) cookie;
-
- gf_log (this->name, GF_LOG_TRACE,
- "wrote %d bytes of data from %s to child %d, offset %"PRId64"",
- op_ret, sh_local->loc.path, child_index, loop_sh->offset);
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "write to %s failed on subvolume %s (%s)",
- sh_local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
-
- sh->op_failed = 1;
- afr_sh_set_error (loop_sh, op_errno);
- }
-
- call_count = afr_frame_return (loop_frame);
-
- if (call_count == 0) {
- sh_loop_return (sh_frame, this, loop_frame,
- loop_sh->op_ret, loop_sh->op_errno);
- }
-
- return 0;
-}
-
-
-static int
-sh_loop_read_cbk (call_frame_t *loop_frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iovec *vector, int32_t count, struct iatt *buf,
- struct iobref *iobref)
-{
- afr_private_t * priv = NULL;
- afr_local_t * loop_local = NULL;
- afr_self_heal_t * loop_sh = NULL;
- call_frame_t *sh_frame = NULL;
- int i = 0;
- int call_count = 0;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t * sh = NULL;
-
- priv = this->private;
- loop_local = loop_frame->local;
- loop_sh = &loop_local->self_heal;
-
- sh_frame = loop_sh->sh_frame;
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
-
- gf_log (this->name, GF_LOG_TRACE,
- "read %d bytes of data from %s, offset %"PRId64"",
- op_ret, loop_local->loc.path, loop_sh->offset);
-
- if (op_ret <= 0) {
- if (op_ret < 0) {
- sh->op_failed = 1;
- gf_log (this->name, GF_LOG_ERROR, "read failed on %d "
- "for %s reason :%s", sh->source,
- sh_local->loc.path, strerror (errno));
- } else {
- sh->eof_reached = _gf_true;
- gf_log (this->name, GF_LOG_DEBUG, "Eof reached for %s",
- sh_local->loc.path);
- }
- sh_loop_return (sh_frame, this, loop_frame, op_ret, op_errno);
- goto out;
- }
-
- if (loop_sh->file_has_holes && iov_0filled (vector, count) == 0) {
- gf_log (this->name, GF_LOG_DEBUG, "0 filled block");
- sh_loop_return (sh_frame, this, loop_frame,
- op_ret, op_errno);
- goto out;
- }
-
- call_count = sh_number_of_writes_needed (loop_sh->write_needed,
- priv->child_count);
- GF_ASSERT (call_count > 0);
- loop_local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (!loop_sh->write_needed[i])
- continue;
- STACK_WIND_COOKIE (loop_frame, sh_loop_write_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->writev,
- loop_sh->healing_fd, vector, count,
- loop_sh->offset, iobref);
-
- if (!--call_count)
- break;
- }
-
-out:
- return 0;
-}
-
-
-static int
-sh_loop_read (call_frame_t *loop_frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_local_t *loop_local = NULL;
- afr_self_heal_t *loop_sh = NULL;
-
- priv = this->private;
- loop_local = loop_frame->local;
- loop_sh = &loop_local->self_heal;
-
- STACK_WIND_COOKIE (loop_frame, sh_loop_read_cbk,
- (void *) (long) loop_sh->source,
- priv->children[loop_sh->source],
- priv->children[loop_sh->source]->fops->readv,
- loop_sh->healing_fd, loop_sh->block_size,
- loop_sh->offset);
-
- return 0;
-}
-
-
-static int
-sh_diff_checksum_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- uint32_t weak_checksum, uint8_t *strong_checksum)
-{
- afr_private_t *priv = NULL;
- afr_local_t *loop_local = NULL;
- afr_self_heal_t *loop_sh = NULL;
- call_frame_t *sh_frame = NULL;
- afr_local_t *sh_local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_sh_algo_private_t *sh_priv = NULL;
- int child_index = 0;
- int call_count = 0;
- int i = 0;
- int write_needed = 0;
-
- priv = this->private;
-
- loop_local = loop_frame->local;
- loop_sh = &loop_local->self_heal;
-
- sh_frame = loop_sh->sh_frame;
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
-
- sh_priv = sh->private;
-
- child_index = (long) cookie;
-
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "checksum on %s failed on subvolume %s (%s)",
- sh_local->loc.path, priv->children[child_index]->name,
- strerror (op_errno));
- sh->op_failed = 1;
- } else {
- memcpy (loop_sh->checksum + child_index * MD5_DIGEST_LEN,
- strong_checksum, MD5_DIGEST_LEN);
- }
-
- call_count = afr_frame_return (loop_frame);
-
- if (call_count == 0) {
- for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] || !sh_local->child_up[i])
- continue;
-
- if (memcmp (loop_sh->checksum + (i * MD5_DIGEST_LEN),
- loop_sh->checksum + (sh->source * MD5_DIGEST_LEN),
- MD5_DIGEST_LEN)) {
- /*
- Checksums differ, so this block
- must be written to this sink
- */
-
- gf_log (this->name, GF_LOG_DEBUG,
- "checksum on subvolume %s at offset %"
- PRId64" differs from that on source",
- priv->children[i]->name, loop_sh->offset);
-
- write_needed = loop_sh->write_needed[i] = 1;
- }
- }
-
- LOCK (&sh_priv->lock);
- {
- sh_priv->total_blocks++;
- if (write_needed)
- sh_priv->diff_blocks++;
- }
- UNLOCK (&sh_priv->lock);
-
- if (write_needed && !sh->op_failed) {
- sh_loop_read (loop_frame, this);
- } else {
- sh_loop_return (sh_frame, this, loop_frame,
- op_ret, op_errno);
- }
- }
-
- return 0;
-}
-
-static int
-sh_diff_checksum (call_frame_t *loop_frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_local_t *loop_local = NULL;
- afr_self_heal_t *loop_sh = NULL;
- int call_count = 0;
- int i = 0;
-
- priv = this->private;
- loop_local = loop_frame->local;
- loop_sh = &loop_local->self_heal;
-
- call_count = loop_sh->active_sinks + 1; /* sinks and source */
-
- loop_local->call_count = call_count;
-
- STACK_WIND_COOKIE (loop_frame, sh_diff_checksum_cbk,
- (void *) (long) loop_sh->source,
- priv->children[loop_sh->source],
- priv->children[loop_sh->source]->fops->rchecksum,
- loop_sh->healing_fd,
- loop_sh->offset, loop_sh->block_size);
-
- for (i = 0; i < priv->child_count; i++) {
- if (loop_sh->sources[i] || !loop_local->child_up[i])
- continue;
-
- STACK_WIND_COOKIE (loop_frame, sh_diff_checksum_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->rchecksum,
- loop_sh->healing_fd,
- loop_sh->offset, loop_sh->block_size);
-
- if (!--call_count)
- break;
- }
-
- return 0;
-}
-
-static int
-sh_full_read_write_to_sinks (call_frame_t *loop_frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_local_t *loop_local = NULL;
- afr_self_heal_t *loop_sh = NULL;
- int i = 0;
-
- priv = this->private;
- loop_local = loop_frame->local;
- loop_sh = &loop_local->self_heal;
-
- for (i = 0; i < priv->child_count; i++) {
- if (loop_sh->sources[i] || !loop_local->child_up[i])
- continue;
- loop_sh->write_needed[i] = 1;
- }
- sh_loop_read (loop_frame, this);
- return 0;
-}
-
-static int
-sh_do_nothing (call_frame_t *frame, xlator_t *this)
-{
- return 0;
-}
-
-int
-afr_sh_start_loops (call_frame_t *sh_frame, xlator_t *this,
- afr_sh_algo_fn sh_data_algo_start)
-{
- afr_local_t *sh_local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_sh_algo_private_t *sh_priv = NULL;
-
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
-
- sh_priv = GF_CALLOC (1, sizeof (*sh_priv),
- gf_afr_mt_afr_private_t);
- if (!sh_priv) {
- sh->op_failed = 1;
- sh_loop_driver_done (sh_frame, this, NULL);
- goto out;
- }
-
- LOCK_INIT (&sh_priv->lock);
-
- sh->private = sh_priv;
- sh->sh_data_algo_start = sh_data_algo_start;
-
- sh_local->call_count = 0;
-
- sh->loop_completion_cbk = sh_do_nothing;
- sh_loop_driver (sh_frame, this, _gf_true, sh_frame);
-out:
- return 0;
-}
-
-int
-afr_sh_algo_diff (call_frame_t *sh_frame, xlator_t *this)
-{
- afr_sh_start_loops (sh_frame, this, sh_diff_checksum);
- return 0;
-}
-
-int
-afr_sh_algo_full (call_frame_t *sh_frame, xlator_t *this)
-{
- afr_sh_start_loops (sh_frame, this, sh_full_read_write_to_sinks);
- return 0;
-}
-
-struct afr_sh_algorithm afr_self_heal_algorithms[] = {
- {.name = "full", .fn = afr_sh_algo_full},
- {.name = "diff", .fn = afr_sh_algo_diff},
- {0, 0},
-};
diff --git a/xlators/cluster/afr/src/afr-self-heal-algorithm.h b/xlators/cluster/afr/src/afr-self-heal-algorithm.h
deleted file mode 100644
index 04d8e8a6c..000000000
--- a/xlators/cluster/afr/src/afr-self-heal-algorithm.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef __AFR_SELF_HEAL_ALGORITHM_H__
-#define __AFR_SELF_HEAL_ALGORITHM_H__
-
-
-typedef int (*afr_sh_algo_fn) (call_frame_t *frame,
- xlator_t *this);
-
-struct afr_sh_algorithm {
- const char *name;
- afr_sh_algo_fn fn;
-};
-
-extern struct afr_sh_algorithm afr_self_heal_algorithms[3];
-typedef struct {
- gf_lock_t lock;
- unsigned int loops_running;
- off_t offset;
-
- int32_t total_blocks;
- int32_t diff_blocks;
-} afr_sh_algo_private_t;
-
-#endif /* __AFR_SELF_HEAL_ALGORITHM_H__ */
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 5ea7bcc2f..4dac83113 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -1,2313 +1,1009 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#include "glusterfs.h"
-#include "xlator.h"
-#include "byte-order.h"
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
#include "afr.h"
-#include "afr-transaction.h"
-#include "afr-self-heal-common.h"
#include "afr-self-heal.h"
-#include "pump.h"
-
-//Intersection[child]=1 if child is part of intersection
-void
-afr_children_intersection_get (int32_t *set1, int32_t *set2,
- int *intersection, unsigned int child_count)
-{
- int i = 0;
-
- memset (intersection, 0, sizeof (*intersection) * child_count);
- for (i = 0; i < child_count; i++) {
- intersection[i] = afr_is_child_present (set1, child_count, i)
- && afr_is_child_present (set2, child_count,
- i);
- }
-}
+#include "byte-order.h"
-/**
- * select_source - select a source and return it
- */
int
-afr_sh_select_source (int sources[], int child_count)
+afr_selfheal_post_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
{
- int i = 0;
- for (i = 0; i < child_count; i++)
- if (sources[i])
- return i;
+ afr_local_t *local = NULL;
- return -1;
-}
+ local = frame->local;
-void
-afr_sh_mark_source_sinks (call_frame_t *frame, xlator_t *this)
-{
- int i = 0;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int active_sinks = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] == 0 && local->child_up[i] == 1) {
- active_sinks++;
- sh->success[i] = 1;
- } else if (sh->sources[i] == 1 && local->child_up[i] == 1) {
- sh->success[i] = 1;
- }
- }
- sh->active_sinks = active_sinks;
+ syncbarrier_wake (&local->barrier);
+
+ return 0;
}
-/**
- * sink_count - return number of sinks in sources array
- */
int
-afr_sh_sink_count (int sources[], int child_count)
+afr_selfheal_post_op (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ int subvol, dict_t *xattr)
{
- int i = 0;
- int sinks = 0;
- for (i = 0; i < child_count; i++)
- if (!sources[i])
- sinks++;
- return sinks;
-}
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ loc_t loc = {0, };
-int
-afr_sh_source_count (int sources[], int child_count)
-{
- int i = 0;
- int nsource = 0;
+ priv = this->private;
+ local = frame->local;
- for (i = 0; i < child_count; i++)
- if (sources[i])
- nsource++;
- return nsource;
-}
+ loc.inode = inode_ref (inode);
+ uuid_copy (loc.gfid, inode->gfid);
-void
-afr_sh_set_error (afr_self_heal_t *sh, int32_t op_errno)
-{
- sh->op_ret = -1;
- if (afr_error_more_important (sh->op_errno, op_errno))
- sh->op_errno = op_errno;
-}
+ STACK_WIND (frame, afr_selfheal_post_op_cbk, priv->children[subvol],
+ priv->children[subvol]->fops->xattrop, &loc,
+ GF_XATTROP_ADD_ARRAY, xattr, NULL);
-void
-afr_sh_print_pending_matrix (int32_t *pending_matrix[], xlator_t *this)
-{
- afr_private_t * priv = this->private;
- char *buf = NULL;
- char *ptr = NULL;
- int i = 0;
- int j = 0;
-
- /* 10 digits per entry + 1 space + '[' and ']' */
- buf = GF_MALLOC (priv->child_count * 11 + 8, gf_afr_mt_char);
-
- for (i = 0; i < priv->child_count; i++) {
- ptr = buf;
- ptr += sprintf (ptr, "[ ");
- for (j = 0; j < priv->child_count; j++) {
- ptr += sprintf (ptr, "%d ", pending_matrix[i][j]);
- }
- sprintf (ptr, "]");
- gf_log (this->name, GF_LOG_DEBUG,
- "pending_matrix: %s", buf);
- }
-
- GF_FREE (buf);
+ syncbarrier_wait (&local->barrier, 1);
+
+ return 0;
}
-void
-afr_init_pending_matrix (int32_t **pending_matrix, size_t child_count)
+
+dict_t *
+afr_selfheal_output_xattr (xlator_t *this, afr_transaction_type type,
+ int *output_dirty, int **output_matrix, int subvol)
{
- int i = 0;
- int j = 0;
+ dict_t *xattr = NULL;
+ afr_private_t *priv = NULL;
+ int j = 0;
+ int idx = 0;
+ int ret = 0;
+ int *raw = 0;
- GF_ASSERT (pending_matrix);
+ priv = this->private;
+ idx = afr_index_for_transaction_type (type);
- for (i = 0; i < child_count; i++) {
- for (j = 0; j < child_count; j++) {
- pending_matrix[i][j] = 0;
- }
- }
-}
+ xattr = dict_new ();
+ if (!xattr)
+ return NULL;
-void
-afr_mark_ignorant_subvols_as_pending (int32_t **pending_matrix,
- unsigned char *ignorant_subvols,
- size_t child_count)
-{
- int i = 0;
- int j = 0;
-
- GF_ASSERT (pending_matrix);
- GF_ASSERT (ignorant_subvols);
-
- for (i = 0; i < child_count; i++) {
- if (ignorant_subvols[i]) {
- for (j = 0; j < child_count; j++) {
- if (!ignorant_subvols[j])
- pending_matrix[j][i] += 1;
- }
- }
- }
+ if (output_dirty[subvol]) {
+ /* clear dirty */
+ raw = GF_CALLOC (sizeof(int), AFR_NUM_CHANGE_LOGS, gf_afr_mt_int32_t);
+ if (!raw)
+ goto err;
+
+ raw[idx] = hton32 (output_dirty[subvol]);
+ ret = dict_set_bin (xattr, AFR_DIRTY, raw,
+ sizeof(int) * AFR_NUM_CHANGE_LOGS);
+ if (ret)
+ goto err;
+ }
+
+ /* clear/set pending */
+ for (j = 0; j < priv->child_count; j++) {
+ if (!output_matrix[subvol][j])
+ continue;
+
+ raw = GF_CALLOC (sizeof(int), AFR_NUM_CHANGE_LOGS,
+ gf_afr_mt_int32_t);
+ if (!raw)
+ goto err;
+
+ raw[idx] = hton32 (output_matrix[subvol][j]);
+
+ ret = dict_set_bin (xattr, priv->pending_key[j],
+ raw, sizeof(int) * AFR_NUM_CHANGE_LOGS);
+ if (ret)
+ goto err;
+ }
+
+ return xattr;
+err:
+ if (xattr)
+ dict_unref (xattr);
+ return NULL;
}
+
int
-afr_build_pending_matrix (char **pending_key, int32_t **pending_matrix,
- dict_t *xattr[], afr_transaction_type type,
- size_t child_count)
-{
- /* Indexable by result of afr_index_for_transaction_type(): 0 -- 2. */
- int32_t pending[3] = {0,};
- void *pending_raw = NULL;
- int ret = -1;
- int i = 0;
- int j = 0;
- int k = 0;
- unsigned char *ignorant_subvols = NULL;
-
- ignorant_subvols = GF_CALLOC (sizeof (*ignorant_subvols), child_count,
- gf_afr_mt_char);
- if (NULL == ignorant_subvols)
- goto out;
-
- afr_init_pending_matrix (pending_matrix, child_count);
-
- for (i = 0; i < child_count; i++) {
- pending_raw = NULL;
-
- for (j = 0; j < child_count; j++) {
- ret = dict_get_ptr (xattr[i], pending_key[j],
- &pending_raw);
-
- if (ret != 0) {
- /*
- * There is no xattr present. This means this
- * subvolume should be considered an 'ignorant'
- * subvolume.
- */
-
- ignorant_subvols[i] = 1;
- continue;
- }
-
- memcpy (pending, pending_raw, sizeof(pending));
- k = afr_index_for_transaction_type (type);
-
- pending_matrix[i][j] = ntoh32 (pending[k]);
- }
- }
-
- afr_mark_ignorant_subvols_as_pending (pending_matrix,
- ignorant_subvols,
- child_count);
- GF_FREE (ignorant_subvols);
-out:
- return ret;
+afr_selfheal_undo_pending (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ unsigned char *sources, unsigned char *sinks,
+ unsigned char *healed_sinks, afr_transaction_type type,
+ struct afr_reply *replies, unsigned char *locked_on)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int j = 0;
+ unsigned char *pending = NULL;
+ int *input_dirty = NULL;
+ int **input_matrix = NULL;
+ int *output_dirty = NULL;
+ int **output_matrix = NULL;
+ dict_t *xattr = NULL;
+
+ priv = this->private;
+
+ pending = alloca0 (priv->child_count);
+
+ input_dirty = alloca0 (priv->child_count * sizeof (int));
+ input_matrix = ALLOC_MATRIX (priv->child_count, int);
+ output_dirty = alloca0 (priv->child_count * sizeof (int));
+ output_matrix = ALLOC_MATRIX (priv->child_count, int);
+
+ afr_selfheal_extract_xattr (this, replies, type, input_dirty,
+ input_matrix);
+
+ for (i = 0; i < priv->child_count; i++)
+ if (sinks[i] && !healed_sinks[i])
+ pending[i] = 1;
+
+ for (i = 0; i < priv->child_count; i++) {
+ for (j = 0; j < priv->child_count; j++) {
+ if (pending[j])
+ output_matrix[i][j] = 1;
+ else
+ output_matrix[i][j] = -input_matrix[i][j];
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!pending[i])
+ output_dirty[i] = -input_dirty[i];
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!locked_on[i])
+ /* perform post-op only on subvols we had locked
+ and inspected on.
+ */
+ continue;
+
+ xattr = afr_selfheal_output_xattr (this, type, output_dirty,
+ output_matrix, i);
+ if (!xattr) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "unable to allocate xdata for subvol %d", i);
+ continue;
+ }
+
+ afr_selfheal_post_op (frame, this, inode, i, xattr);
+
+ dict_unref (xattr);
+ }
+
+ return 0;
}
-typedef enum {
- AFR_NODE_INNOCENT,
- AFR_NODE_FOOL,
- AFR_NODE_WISE,
- AFR_NODE_INVALID = -1,
-} afr_node_type;
-typedef struct {
- afr_node_type type;
- int wisdom;
-} afr_node_character;
+void
+afr_replies_copy (struct afr_reply *dst, struct afr_reply *src, int count)
+{
+ int i = 0;
+ dict_t *xdata = NULL;
+
+ if (dst == src)
+ return;
+
+ for (i = 0; i < count; i++) {
+ dst[i].valid = src[i].valid;
+ dst[i].op_ret = src[i].op_ret;
+ dst[i].op_errno = src[i].op_errno;
+ dst[i].prestat = src[i].prestat;
+ dst[i].poststat = src[i].poststat;
+ dst[i].preparent = src[i].preparent;
+ dst[i].postparent = src[i].postparent;
+ dst[i].preparent2 = src[i].preparent2;
+ dst[i].postparent2 = src[i].postparent2;
+ if (src[i].xdata)
+ xdata = dict_ref (src[i].xdata);
+ else
+ xdata = NULL;
+ if (dst[i].xdata)
+ dict_unref (dst[i].xdata);
+ dst[i].xdata = xdata;
+ memcpy (dst[i].checksum, src[i].checksum,
+ MD5_DIGEST_LENGTH);
+ }
+}
-static int
-afr_sh_is_innocent (int32_t *array, int child_count)
+int
+afr_selfheal_fill_dirty (xlator_t *this, int *dirty, int subvol,
+ int idx, dict_t *xdata)
{
- int i = 0;
- int ret = 1; /* innocent until proven guilty */
+ void *pending_raw = NULL;
+ int pending[3] = {0, };
- for (i = 0; i < child_count; i++) {
- if (array[i]) {
- ret = 0;
- break;
- }
- }
+ if (dict_get_ptr (xdata, AFR_DIRTY, &pending_raw))
+ return -1;
- return ret;
-}
+ if (!pending_raw)
+ return -1;
+ memcpy (pending, pending_raw, sizeof(pending));
-static int
-afr_sh_is_fool (int32_t *array, int i, int child_count)
-{
- return array[i]; /* fool if accuses itself */
+ dirty[subvol] = ntoh32 (pending[idx]);
+
+ return 0;
}
-static int
-afr_sh_is_wise (int32_t *array, int i, int child_count)
+int
+afr_selfheal_fill_matrix (xlator_t *this, int **matrix, int subvol,
+ int idx, dict_t *xdata)
{
- return !array[i]; /* wise if does not accuse itself */
-}
+ int i = 0;
+ void *pending_raw = NULL;
+ int pending[3] = {0, };
+ afr_private_t *priv = NULL;
+ priv = this->private;
-static int
-afr_sh_all_nodes_innocent (afr_node_character *characters,
- int child_count)
-{
- int i = 0;
- int ret = 1;
+ for (i = 0; i < priv->child_count; i++) {
+ if (dict_get_ptr (xdata, priv->pending_key[i], &pending_raw))
+ continue;
+
+ if (!pending_raw)
+ continue;
+
+ memcpy (pending, pending_raw, sizeof(pending));
- for (i = 0; i < child_count; i++) {
- if (characters[i].type != AFR_NODE_INNOCENT) {
- ret = 0;
- break;
- }
- }
+ matrix[subvol][i] = ntoh32 (pending[idx]);
+ }
- return ret;
+ return 0;
}
-static int
-afr_sh_wise_nodes_exist (afr_node_character *characters, int child_count)
+int
+afr_selfheal_extract_xattr (xlator_t *this, struct afr_reply *replies,
+ afr_transaction_type type, int *dirty, int **matrix)
{
- int i = 0;
- int ret = 0;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ dict_t *xdata = NULL;
+ int idx = -1;
+
+ idx = afr_index_for_transaction_type (type);
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].xdata)
+ continue;
- for (i = 0; i < child_count; i++) {
- if (characters[i].type == AFR_NODE_WISE) {
- ret = 1;
- break;
- }
- }
+ xdata = replies[i].xdata;
- return ret;
+ afr_selfheal_fill_dirty (this, dirty, i, idx, xdata);
+ afr_selfheal_fill_matrix (this, matrix, i, idx, xdata);
+ }
+
+ return 0;
}
+
/*
- * The 'wisdom' of a wise node is 0 if any other wise node accuses it.
- * It is 1 if no other wise node accuses it.
- * Only wise nodes with wisdom 1 are sources.
+ * This function determines if a self-heal is required for a given inode,
+ * and if needed, in what direction.
+ *
+ * locked_on[] is the array representing servers which have been locked and
+ * from which xattrs have been fetched for analysis.
+ *
+ * The output of the function is by filling the arrays sources[] and sinks[].
+ *
+ * sources[i] is set if i'th server is an eligible source for a selfheal.
+ *
+ * sinks[i] is set if i'th server needs to be healed.
+ *
+ * if sources[0..N] are all set, there is no need for a selfheal.
+ *
+ * if sinks[0..N] are all set, the inode is in split brain.
*
- * If no nodes with wisdom 1 exist, a split-brain has occurred.
*/
-static void
-afr_sh_compute_wisdom (int32_t *pending_matrix[],
- afr_node_character characters[], int child_count)
-{
- int i = 0;
- int j = 0;
-
- for (i = 0; i < child_count; i++) {
- if (characters[i].type == AFR_NODE_WISE) {
- characters[i].wisdom = 1;
-
- for (j = 0; j < child_count; j++) {
- if ((characters[j].type == AFR_NODE_WISE)
- && pending_matrix[j][i]) {
-
- characters[i].wisdom = 0;
- }
- }
- }
- }
+int
+afr_selfheal_find_direction (call_frame_t *frame, xlator_t *this,
+ struct afr_reply *replies,
+ afr_transaction_type type, unsigned char *locked_on,
+ unsigned char *sources, unsigned char *sinks)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int j = 0;
+ int *dirty = NULL;
+ int **matrix = NULL;
+ char *accused = NULL;
+
+ priv = this->private;
+
+ dirty = alloca0 (priv->child_count * sizeof (int));
+ accused = alloca0 (priv->child_count);
+ matrix = ALLOC_MATRIX(priv->child_count, int);
+
+ /* First construct the pending matrix for further analysis */
+ afr_selfheal_extract_xattr (this, replies, type, dirty, matrix);
+
+ /* Next short list all accused to exclude them from being sources */
+ for (i = 0; i < priv->child_count; i++) {
+ for (j = 0; j < priv->child_count; j++) {
+ if (matrix[i][j])
+ accused[j] = 1;
+ }
+ }
+
+ /* Short list all non-accused as sources */
+ memset (sources, 0, priv->child_count);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!accused[i] && locked_on[i])
+ sources[i] = 1;
+ }
+
+ /* Everyone accused by sources are sinks */
+ memset (sinks, 0, priv->child_count);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ for (j = 0; j < priv->child_count; j++) {
+ if (matrix[i][j])
+ sinks[j] = 1;
+ }
+ }
+
+ /* If any source has 'dirty' bit, pick first
+ 'dirty' source and make everybody else sinks */
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i] && dirty[i]) {
+ for (j = 0; j < priv->child_count; j++) {
+ if (j != i) {
+ sources[j] = 0;
+ sinks[j] = 1;
+ }
+ }
+ break;
+ }
+ }
+
+ /* If no sources, all locked nodes are sinks - split brain */
+ if (AFR_COUNT (sources, priv->child_count) == 0) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (locked_on[i])
+ sinks[i] = 1;
+ }
+ }
+
+ return 0;
}
-static int
-afr_sh_wise_nodes_conflict (afr_node_character *characters,
- int child_count)
+int
+afr_selfheal_discover_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *parbuf)
{
- int i = 0;
- int ret = 1;
+ afr_local_t *local = NULL;
+ int i = -1;
- for (i = 0; i < child_count; i++) {
- if ((characters[i].type == AFR_NODE_WISE)
- && characters[i].wisdom == 1) {
+ local = frame->local;
+ i = (long) cookie;
- /* There is atleast one bona-fide wise node */
- ret = 0;
- break;
- }
- }
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
+ if (buf)
+ local->replies[i].poststat = *buf;
+ if (parbuf)
+ local->replies[i].postparent = *parbuf;
+ if (xdata)
+ local->replies[i].xdata = dict_ref (xdata);
- return ret;
+ syncbarrier_wake (&local->barrier);
+
+ return 0;
}
-static int
-afr_sh_mark_wisest_as_sources (int sources[],
- afr_node_character *characters,
- int child_count)
+inode_t *
+afr_selfheal_unlocked_lookup_on (call_frame_t *frame, inode_t *parent,
+ const char *name, struct afr_reply *replies,
+ unsigned char *lookup_on)
{
- int nsources = 0;
- int i = 0;
+ loc_t loc = {0, };
+ dict_t *xattr_req = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ inode_t *inode = NULL;
- for (i = 0; i < child_count; i++) {
- if (characters[i].wisdom == 1) {
- sources[i] = 1;
- nsources++;
- }
- }
+ local = frame->local;
+ priv = frame->this->private;
- return nsources;
-}
+ xattr_req = dict_new ();
+ if (!xattr_req)
+ return NULL;
-static void
-afr_compute_witness_of_fools (int32_t *witnesses, int32_t **pending_matrix,
- afr_node_character *characters,
- int32_t child_count)
-{
- int i = 0;
- int j = 0;
- int witness = 0;
-
- GF_ASSERT (witnesses);
- GF_ASSERT (pending_matrix);
- GF_ASSERT (characters);
- GF_ASSERT (child_count > 0);
-
- for (i = 0; i < child_count; i++) {
- if (characters[i].type != AFR_NODE_FOOL)
- continue;
-
- witness = 0;
- for (j = 0; j < child_count; j++) {
- if (i == j)
- continue;
- witness += pending_matrix[i][j];
- }
- witnesses[i] = witness;
- }
-}
+ if (afr_xattr_req_prepare (frame->this, xattr_req) != 0) {
+ dict_destroy (xattr_req);
+ return NULL;
+ }
-static int32_t
-afr_find_biggest_witness_among_fools (int32_t *witnesses,
- afr_node_character *characters,
- int32_t child_count)
-{
- int i = 0;
- int biggest_witness = -1;
+ inode = inode_new (parent->table);
+ if (!inode) {
+ dict_destroy (xattr_req);
+ return NULL;
+ }
- GF_ASSERT (witnesses);
- GF_ASSERT (characters);
- GF_ASSERT (child_count > 0);
+ loc.parent = inode_ref (parent);
+ uuid_copy (loc.pargfid, parent->gfid);
+ loc.name = name;
+ loc.inode = inode_ref (inode);
- for (i = 0; i < child_count; i++) {
- if (characters[i].type != AFR_NODE_FOOL)
- continue;
+ AFR_ONLIST (lookup_on, frame, afr_selfheal_discover_cbk, lookup, &loc,
+ xattr_req);
- if (biggest_witness < witnesses[i])
- biggest_witness = witnesses[i];
- }
- return biggest_witness;
-}
+ afr_replies_copy (replies, local->replies, priv->child_count);
-int
-afr_mark_fool_as_source_by_witness (int32_t *sources, int32_t *witnesses,
- afr_node_character *characters,
- int32_t child_count, int32_t witness)
-{
- int i = 0;
- int nsources = 0;
-
- GF_ASSERT (sources);
- GF_ASSERT (witnesses);
- GF_ASSERT (characters);
- GF_ASSERT (child_count > 0);
-
- for (i = 0; i < child_count; i++) {
- if (characters[i].type != AFR_NODE_FOOL)
- continue;
-
- if (witness == witnesses[i]) {
- sources[i] = 1;
- nsources++;
- }
- }
- return nsources;
-}
+ loc_wipe (&loc);
+ dict_unref (xattr_req);
-static int
-afr_mark_biggest_of_fools_as_source (int32_t *sources, int32_t **pending_matrix,
- afr_node_character *characters,
- int child_count)
-{
- int32_t biggest_witness = 0;
- int nsources = 0;
- int32_t *witnesses = NULL;
-
- GF_ASSERT (child_count > 0);
-
- witnesses = GF_CALLOC (child_count, sizeof (*witnesses),
- gf_afr_mt_int32_t);
- if (NULL == witnesses) {
- nsources = -1;
- goto out;
- }
-
- afr_compute_witness_of_fools (witnesses, pending_matrix, characters,
- child_count);
- biggest_witness = afr_find_biggest_witness_among_fools (witnesses,
- characters,
- child_count);
- nsources = afr_mark_fool_as_source_by_witness (sources, witnesses,
- characters, child_count,
- biggest_witness);
-out:
- if (witnesses)
- GF_FREE (witnesses);
- return nsources;
+ return inode;
}
-int
-afr_mark_child_as_source_by_uid (int32_t *sources, struct iatt *bufs,
- int32_t *valid_children, int child_count,
- uint32_t uid)
-{
- int i = 0;
- int nsources = 0;
- int child = 0;
-
- GF_ASSERT (bufs);
- GF_ASSERT (valid_children);
- GF_ASSERT (sources);
- GF_ASSERT (child_count > 0);
-
- for (i = 0; i < child_count; i++) {
- if (-1 == valid_children[i])
- continue;
-
- child = valid_children[i];
- if (uid == bufs[child].ia_uid) {
- sources[child] = 1;
- nsources++;
- }
- }
- return nsources;
-}
int
-afr_get_child_with_lowest_uid (struct iatt *bufs, int32_t *valid_children,
- int child_count)
+afr_selfheal_unlocked_discover_on (call_frame_t *frame, inode_t *inode,
+ uuid_t gfid, struct afr_reply *replies,
+ unsigned char *discover_on)
{
- int i = 0;
- int smallest = -1;
- int child = 0;
-
- GF_ASSERT (bufs);
- GF_ASSERT (valid_children);
- GF_ASSERT (child_count > 0);
-
- for (i = 0; i < child_count; i++) {
- if (-1 == valid_children[i])
- continue;
- child = valid_children[i];
- if ((smallest == -1) ||
- (bufs[child].ia_uid < bufs[smallest].ia_uid)) {
- smallest = child;
- }
- }
- return smallest;
-}
+ loc_t loc = {0, };
+ dict_t *xattr_req = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
-static int
-afr_sh_mark_lowest_uid_as_source (struct iatt *bufs, int32_t *valid_children,
- int child_count, int32_t *sources)
-{
- int nsources = 0;
- int smallest = 0;
-
- smallest = afr_get_child_with_lowest_uid (bufs, valid_children,
- child_count);
- if (smallest < 0) {
- nsources = -1;
- goto out;
- }
- nsources = afr_mark_child_as_source_by_uid (sources, bufs,
- valid_children, child_count,
- bufs[smallest].ia_uid);
-out:
- return nsources;
-}
+ local = frame->local;
+ priv = frame->this->private;
-char *
-afr_get_character_str (afr_node_type type)
-{
- char *character = NULL;
-
- switch (type) {
- case AFR_NODE_INNOCENT:
- character = "innocent";
- break;
- case AFR_NODE_FOOL:
- character = "fool";
- break;
- case AFR_NODE_WISE:
- character = "wise";
- break;
- default:
- character = "invalid";
- break;
- }
- return character;
-}
+ xattr_req = dict_new ();
+ if (!xattr_req)
+ return -ENOMEM;
-afr_node_type
-afr_find_child_character_type (int32_t *pending_row, int32_t child,
- int32_t child_count, const char *xlator_name)
-{
- afr_node_type type = AFR_NODE_INVALID;
-
- GF_ASSERT (pending_row);
- GF_ASSERT (child_count > 0);
- GF_ASSERT ((child >= 0) && (child < child_count));
-
- if (afr_sh_is_innocent (pending_row, child_count))
- type = AFR_NODE_INNOCENT;
- else if (afr_sh_is_fool (pending_row, child, child_count))
- type = AFR_NODE_FOOL;
- else if (afr_sh_is_wise (pending_row, child, child_count))
- type = AFR_NODE_WISE;
- else
- GF_ASSERT (0);
-
- gf_log (xlator_name, GF_LOG_DEBUG, "child %d character %s",
- child, afr_get_character_str (type));
- return type;
-}
-
-int
-afr_build_sources (xlator_t *this, dict_t **xattr, struct iatt *bufs,
- int32_t **pending_matrix, int32_t *sources,
- int32_t *success_children, afr_transaction_type type)
-{
- afr_private_t *priv = NULL;
- afr_self_heal_type sh_type = AFR_SELF_HEAL_INVALID;
- int nsources = -1;
+ if (afr_xattr_req_prepare (frame->this, xattr_req) != 0) {
+ dict_destroy (xattr_req);
+ return -ENOMEM;
+ }
- priv = this->private;
+ loc.inode = inode_ref (inode);
+ uuid_copy (loc.gfid, gfid);
- if (afr_get_children_count (success_children, priv->child_count) == 0)
- goto out;
+ AFR_ONLIST (discover_on, frame, afr_selfheal_discover_cbk, lookup, &loc,
+ xattr_req);
- afr_build_pending_matrix (priv->pending_key, pending_matrix,
- xattr, type, priv->child_count);
+ afr_replies_copy (replies, local->replies, priv->child_count);
- sh_type = afr_self_heal_type_for_transaction (type);
- if (AFR_SELF_HEAL_INVALID == sh_type)
- goto out;
+ loc_wipe (&loc);
+ dict_unref (xattr_req);
- afr_sh_print_pending_matrix (pending_matrix, this);
-
- nsources = afr_mark_sources (sources, pending_matrix, bufs,
- priv->child_count, sh_type,
- success_children, this->name);
-out:
- return nsources;
+ return 0;
}
-/**
- * mark_sources: Mark all 'source' nodes and return number of source
- * nodes found
- *
- * A node (a row in the pending matrix) belongs to one of
- * three categories:
- *
- * M is the pending matrix.
- *
- * 'innocent' - M[i] is all zeroes
- * 'fool' - M[i] has i'th element = 1 (self-reference)
- * 'wise' - M[i] has i'th element = 0, others are 1 or 0.
- *
- * All 'innocent' nodes are sinks. If all nodes are innocent, no self-heal is
- * needed.
- *
- * A 'wise' node can be a source. If two 'wise' nodes conflict, it is
- * a split-brain. If one wise node refers to the other but the other doesn't
- * refer back, the referrer is a source.
- *
- * All fools are sinks, unless there are no 'wise' nodes. In that case,
- * one of the fools is made a source.
- */
-
int
-afr_mark_sources (int32_t *sources, int32_t **pending_matrix, struct iatt *bufs,
- int32_t child_count, afr_self_heal_type type,
- int32_t *valid_children, const char *xlator_name)
+afr_selfheal_unlocked_discover (call_frame_t *frame, inode_t *inode,
+ uuid_t gfid, struct afr_reply *replies)
{
- /* stores the 'characters' (innocent, fool, wise) of the nodes */
-
- afr_node_character *characters = NULL;
- int i = 0;
- int nsources = -1;
- xlator_t *this = NULL;
-
- characters = GF_CALLOC (sizeof (afr_node_character),
- child_count, gf_afr_mt_afr_node_character);
- if (!characters)
- goto out;
-
- this = THIS;
-
- /* start clean */
- for (i = 0; i < child_count; i++) {
- sources[i] = 0;
- }
-
- nsources = 0;
- for (i = 0; i < child_count; i++) {
- characters[i].type =
- afr_find_child_character_type (pending_matrix[i], i,
- child_count,
- xlator_name);
- if (AFR_NODE_INVALID == characters[i].type)
- gf_log (xlator_name, GF_LOG_WARNING,
- "child %d had invalid xattrs", i);
- }
-
- if ((type == AFR_SELF_HEAL_METADATA)
- && afr_sh_all_nodes_innocent (characters, child_count)) {
-
- nsources = afr_sh_mark_lowest_uid_as_source (bufs,
- valid_children,
- child_count,
- sources);
- goto out;
- }
-
- if (afr_sh_wise_nodes_exist (characters, child_count)) {
- afr_sh_compute_wisdom (pending_matrix, characters, child_count);
-
- if (afr_sh_wise_nodes_conflict (characters, child_count)) {
- /* split-brain */
- gf_log (this->name, GF_LOG_INFO,
- "split-brain possible, no source detected");
- nsources = -1;
-
- } else {
- nsources = afr_sh_mark_wisest_as_sources (sources,
- characters,
- child_count);
- }
- } else {
- nsources = afr_mark_biggest_of_fools_as_source (sources,
- pending_matrix,
- characters,
- child_count);
- }
+ afr_private_t *priv = NULL;
-out:
- if (nsources == 0) {
- for (i = 0; i < child_count; i++) {
- if (valid_children[i] != -1)
- sources[valid_children[i]] = 1;
- }
- }
- if (characters)
- GF_FREE (characters);
-
- gf_log (this->name, GF_LOG_DEBUG, "Number of sources: %d", nsources);
- return nsources;
-}
+ priv = frame->this->private;
-void
-afr_sh_pending_to_delta (afr_private_t *priv, dict_t **xattr,
- int32_t *delta_matrix[], unsigned char success[],
- int child_count, afr_transaction_type type)
-{
- /* Indexable by result of afr_index_for_transaction_type(): 0 -- 2. */
- int32_t pending[3] = {0,};
- void *pending_raw = NULL;
- int ret = 0;
- int i = 0;
- int j = 0;
- int k = 0;
-
- /* start clean */
- for (i = 0; i < child_count; i++) {
- for (j = 0; j < child_count; j++) {
- delta_matrix[i][j] = 0;
- }
- }
-
- for (i = 0; i < child_count; i++) {
- if (pending_raw)
- pending_raw = NULL;
-
- for (j = 0; j < child_count; j++) {
- ret = dict_get_ptr (xattr[i], priv->pending_key[j],
- &pending_raw);
- if (ret < 0)
- gf_log (THIS->name, GF_LOG_DEBUG,
- "Unable to get dict value.");
- if (!success[j])
- continue;
-
- k = afr_index_for_transaction_type (type);
-
- if (pending_raw != NULL) {
- memcpy (pending, pending_raw, sizeof(pending));
- delta_matrix[i][j] = -(ntoh32 (pending[k]));
- } else {
- delta_matrix[i][j] = 0;
- }
-
- }
- }
+ return afr_selfheal_unlocked_discover_on (frame, inode, gfid, replies,
+ priv->child_up);
}
int
-afr_sh_delta_to_xattr (afr_private_t *priv,
- int32_t *delta_matrix[], dict_t *xattr[],
- int child_count, afr_transaction_type type)
+afr_selfheal_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
{
- int i = 0;
- int j = 0;
- int k = 0;
- int ret = 0;
- int32_t *pending = NULL;
-
- for (i = 0; i < child_count; i++) {
- if (!xattr[i])
- continue;
-
- for (j = 0; j < child_count; j++) {
- pending = GF_CALLOC (sizeof (int32_t), 3,
- gf_afr_mt_int32_t);
-
- if (!pending)
- continue;
- /* 3 = data+metadata+entry */
-
- k = afr_index_for_transaction_type (type);
-
- pending[k] = hton32 (delta_matrix[i][j]);
-
- ret = dict_set_bin (xattr[i], priv->pending_key[j],
- pending,
- 3 * sizeof (int32_t));
- if (ret < 0)
- gf_log (THIS->name, GF_LOG_WARNING,
- "Unable to set dict value.");
- }
- }
- return 0;
-}
+ afr_local_t *local = NULL;
+ int i = 0;
+ local = frame->local;
+ i = (long) cookie;
-int
-afr_sh_has_metadata_pending (dict_t *xattr, xlator_t *this)
-{
- /* Indexable by result of afr_index_for_transaction_type(): 0 -- 2. */
- int32_t pending[3] = {0,};
- void *pending_raw = NULL;
- afr_private_t *priv = NULL;
- int ret = -1;
- int i = 0;
- int j = 0;
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
- priv = this->private;
+ syncbarrier_wake (&local->barrier);
+
+ return 0;
+}
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_get_ptr (xattr, priv->pending_key[i],
- &pending_raw);
- if (ret != 0)
- return 0;
+int
+afr_selfheal_locked_fill (call_frame_t *frame, xlator_t *this,
+ unsigned char *locked_on)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int count = 0;
- memcpy (pending, pending_raw, sizeof(pending));
- j = afr_index_for_transaction_type (AFR_METADATA_TRANSACTION);
+ local = frame->local;
+ priv = this->private;
- if (pending[j])
- return 1;
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].valid && local->replies[i].op_ret == 0) {
+ locked_on[i] = 1;
+ count++;
+ } else {
+ locked_on[i] = 0;
+ }
+ }
- return 0;
+ return count;
}
int
-afr_sh_has_data_pending (dict_t *xattr, xlator_t *this)
+afr_selfheal_tryinodelk (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, off_t off, size_t size,
+ unsigned char *locked_on)
{
- /* Indexable by result of afr_index_for_transaction_type(): 0 -- 2. */
- int32_t pending[3] = {0,};
- void *pending_raw = NULL;
- afr_private_t *priv = NULL;
- int ret = -1;
- int i = 0;
- int j = 0;
+ loc_t loc = {0,};
+ struct gf_flock flock = {0, };
- priv = this->private;
+ loc.inode = inode_ref (inode);
+ uuid_copy (loc.gfid, inode->gfid);
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_get_ptr (xattr, priv->pending_key[i],
- &pending_raw);
+ flock.l_type = F_WRLCK;
+ flock.l_start = off;
+ flock.l_len = size;
- if (ret != 0)
- return 0;
+ AFR_ONALL (frame, afr_selfheal_lock_cbk, inodelk, dom,
+ &loc, F_SETLK, &flock, NULL);
- memcpy (pending, pending_raw, sizeof(pending));
- j = afr_index_for_transaction_type (AFR_DATA_TRANSACTION);
+ loc_wipe (&loc);
- if (pending[j])
- return 1;
- }
-
- return 0;
+ return afr_selfheal_locked_fill (frame, this, locked_on);
}
int
-afr_sh_has_entry_pending (dict_t *xattr, xlator_t *this)
+afr_selfheal_inodelk (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, off_t off, size_t size,
+ unsigned char *locked_on)
{
- /* Indexable by result of afr_index_for_transaction_type(): 0 -- 2. */
- int32_t pending[3] = {0,};
- void *pending_raw = NULL;
- afr_private_t *priv = NULL;
- int ret = -1;
- int i = 0;
- int j = 0;
+ loc_t loc = {0,};
+ struct gf_flock flock = {0, };
+ afr_local_t *local = NULL;
+ int i = 0;
+ afr_private_t *priv = NULL;
- priv = this->private;
+ priv = this->private;
+ local = frame->local;
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_get_ptr (xattr, priv->pending_key[i],
- &pending_raw);
+ loc.inode = inode_ref (inode);
+ uuid_copy (loc.gfid, inode->gfid);
- if (ret != 0)
- return 0;
+ flock.l_type = F_WRLCK;
+ flock.l_start = off;
+ flock.l_len = size;
- memcpy (pending, pending_raw, sizeof(pending));
- j = afr_index_for_transaction_type (AFR_ENTRY_TRANSACTION);
+ AFR_ONALL (frame, afr_selfheal_lock_cbk, inodelk, dom,
+ &loc, F_SETLK, &flock, NULL);
- if (pending[j])
- return 1;
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].op_ret == -1 &&
+ local->replies[i].op_errno == EAGAIN) {
+ afr_selfheal_locked_fill (frame, this, locked_on);
+ afr_selfheal_uninodelk (frame, this, inode, dom, off,
+ size, locked_on);
- return 0;
-}
+ AFR_SEQ (frame, afr_selfheal_lock_cbk, inodelk, dom,
+ &loc, F_SETLKW, &flock, NULL);
+ break;
+ }
+ }
+ loc_wipe (&loc);
-/**
- * is_matrix_zero - return true if pending matrix is all zeroes
- */
-
-int
-afr_sh_is_matrix_zero (int32_t *pending_matrix[], int child_count)
-{
- int i = 0;
- int j = 0;
-
- for (i = 0; i < child_count; i++)
- for (j = 0; j < child_count; j++)
- if (pending_matrix[i][j])
- return 0;
- return 1;
+ return afr_selfheal_locked_fill (frame, this, locked_on);
}
int
-afr_sh_missing_entries_done (call_frame_t *frame, xlator_t *this)
+afr_selfheal_uninodelk (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, off_t off, size_t size,
+ const unsigned char *locked_on)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
-// memset (sh->child_errno, 0, sizeof (int) * priv->child_count);
- memset (sh->buf, 0, sizeof (struct iatt) * priv->child_count);
-
- for (i = 0; i < priv->child_count; i++) {
- sh->locked_nodes[i] = 0;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i])
- dict_unref (sh->xattr[i]);
- sh->xattr[i] = NULL;
- }
-
- if (local->govinda_gOvinda || sh->op_failed) {
- gf_log (this->name, GF_LOG_INFO,
- "split brain found, aborting selfheal of %s",
- local->loc.path);
- sh->op_failed = 1;
- sh->completion_cbk (frame, this);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "proceeding to metadata check on %s",
- local->loc.path);
- afr_self_heal_metadata (frame, this);
- }
-
- return 0;
-}
+ loc_t loc = {0,};
+ struct gf_flock flock = {0, };
-static int
-afr_sh_missing_entries_finish (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
+ loc.inode = inode_ref (inode);
+ uuid_copy (loc.gfid, inode->gfid);
- local = frame->local;
- int_lock = &local->internal_lock;
+ flock.l_type = F_UNLCK;
+ flock.l_start = off;
+ flock.l_len = size;
- int_lock->lock_cbk = afr_sh_missing_entries_done;
- afr_unlock (frame, this);
+ AFR_ONLIST (locked_on, frame, afr_selfheal_lock_cbk, inodelk,
+ dom, &loc, F_SETLK, &flock, NULL);
- return 0;
-}
+ loc_wipe (&loc);
-int
-afr_sh_common_create (afr_self_heal_t *sh, unsigned int child_count)
-{
- int ret = -ENOMEM;
- sh->buf = GF_CALLOC (child_count, sizeof (*sh->buf),
- gf_afr_mt_iatt);
- if (!sh->buf)
- goto out;
- sh->parentbufs = GF_CALLOC (child_count, sizeof (*sh->parentbufs),
- gf_afr_mt_iatt);
- if (!sh->parentbufs)
- goto out;
- sh->child_errno = GF_CALLOC (child_count, sizeof (*sh->child_errno),
- gf_afr_mt_int);
- if (!sh->child_errno)
- goto out;
- sh->success_children = afr_children_create (child_count);
- if (!sh->success_children)
- goto out;
- sh->fresh_children = afr_children_create (child_count);
- if (!sh->fresh_children)
- goto out;
- sh->xattr = GF_CALLOC (child_count, sizeof (*sh->xattr),
- gf_afr_mt_dict_t);
- if (!sh->xattr)
- goto out;
- ret = 0;
-out:
- return ret;
+ return 0;
}
-void
-afr_sh_common_lookup_resp_handler (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf,
- dict_t *xattr, struct iatt *postparent,
- loc_t *loc)
-{
- int child_index = 0;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- priv = this->private;
- sh = &local->self_heal;
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0) {
- sh->buf[child_index] = *buf;
- sh->parentbufs[child_index] = *postparent;
- sh->success_children[sh->success_count] = child_index;
- sh->success_count++;
- sh->xattr[child_index] = dict_ref (xattr);
- } else {
- gf_log (this->name, GF_LOG_ERROR, "path %s on subvolume"
- " %s => -1 (%s)", loc->path,
- priv->children[child_index]->name,
- strerror (op_errno));
- local->self_heal.child_errno[child_index] = op_errno;
- }
- }
- UNLOCK (&frame->lock);
- return;
-}
-
-gf_boolean_t
-afr_valid_ia_type (ia_type_t ia_type)
-{
- switch (ia_type) {
- case IA_IFSOCK:
- case IA_IFREG:
- case IA_IFBLK:
- case IA_IFCHR:
- case IA_IFIFO:
- case IA_IFLNK:
- case IA_IFDIR:
- return _gf_true;
- default:
- return _gf_false;
- }
- return _gf_false;
-}
int
-afr_impunge_frame_create (call_frame_t *frame, xlator_t *this,
- int active_source, int ret_child, mode_t entry_mode,
- call_frame_t **impunge_frame)
+afr_selfheal_tryentrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, const char *name, unsigned char *locked_on)
{
- afr_local_t *local = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int32_t op_errno = 0;
- afr_private_t *priv = NULL;
- int ret = 0;
- call_frame_t *new_frame = NULL;
-
- op_errno = ENOMEM;
- priv = this->private;
- new_frame = copy_frame (frame);
- if (!new_frame) {
- goto out;
- }
-
- ALLOC_OR_GOTO (impunge_local, afr_local_t, out);
-
- local = frame->local;
- new_frame->local = impunge_local;
- impunge_sh = &impunge_local->self_heal;
- impunge_sh->sh_frame = frame;
- impunge_sh->active_source = active_source;
- impunge_sh->impunge_ret_child = ret_child;
- impunge_sh->impunging_entry_mode = entry_mode;
- impunge_local->child_up = memdup (local->child_up,
- sizeof (*local->child_up) *
- priv->child_count);
- if (!impunge_local->child_up)
- goto out;
-
- ret = afr_sh_common_create (impunge_sh, priv->child_count);
- if (ret) {
- op_errno = -ret;
- goto out;
- }
- op_errno = 0;
- *impunge_frame = new_frame;
-out:
- if (op_errno && new_frame)
- AFR_STACK_DESTROY (new_frame);
- return -op_errno;
-}
+ loc_t loc = {0,};
-void
-afr_sh_call_entry_impunge_recreate (call_frame_t *frame, xlator_t *this,
- int child_index, struct iatt *buf,
- struct iatt *postparent,
- afr_impunge_done_cbk_t impunge_done)
-{
- call_frame_t *impunge_frame = NULL;
- afr_local_t *local = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *sh = NULL;
- int ret = 0;
- mode_t mode = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- mode = st_mode_from_ia (buf->ia_prot, buf->ia_type);
- ret = afr_impunge_frame_create (frame, this, sh->source, child_index,
- mode, &impunge_frame);
- if (ret)
- goto out;
- impunge_local = impunge_frame->local;
- loc_copy (&impunge_local->loc, &local->loc);
- sh->impunge_done = impunge_done;
- impunge_local->call_count = 1;
- afr_sh_entry_impunge_create (impunge_frame, this, child_index, buf,
- postparent);
- return;
-out:
- gf_log (this->name, GF_LOG_ERROR, "impunge of %s failed, reason: %s",
- local->loc.path, strerror (-ret));
- impunge_done (frame, this, child_index, -1, -ret);
-}
+ loc.inode = inode_ref (inode);
+ uuid_copy (loc.gfid, inode->gfid);
-int
-afr_sh_create_entry_cbk (call_frame_t *frame, xlator_t *this, int child,
- int32_t op_ret, int32_t op_errno)
-{
- int call_count = 0;
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- if (op_ret == -1)
- gf_log (this->name, GF_LOG_ERROR,
- "create entry %s failed, on child %d reason, %s",
- local->loc.path, child, strerror (op_errno));
- call_count = afr_frame_return (frame);
- if (call_count == 0)
- afr_sh_missing_entries_finish (frame, this);
- return 0;
-}
+ AFR_ONALL (frame, afr_selfheal_lock_cbk, entrylk, dom,
+ &loc, name, ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL);
-static int
-sh_missing_entries_create (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int type = 0;
- afr_private_t *priv = NULL;
- int enoent_count = 0;
- int i = 0;
- struct iatt *buf = NULL;
- struct iatt *postparent = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- enoent_count = afr_errno_count (NULL, sh->child_errno,
- priv->child_count, ENOENT);
- if (enoent_count == 0) {
- gf_log (this->name, GF_LOG_INFO,
- "no missing files - %s. proceeding to metadata check",
- local->loc.path);
- /* proceed to next step - metadata self-heal */
- afr_sh_missing_entries_finish (frame, this);
- return 0;
- }
-
- buf = &sh->buf[sh->source];
- postparent = &sh->parentbufs[sh->source];
-
- type = buf->ia_type;
- if (!afr_valid_ia_type (type)) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: unknown file type: 0%o", local->loc.path, type);
- local->govinda_gOvinda = 1;
- afr_sh_missing_entries_finish (frame, this);
- goto out;
- }
-
- local->call_count = enoent_count;
- for (i = 0; i < priv->child_count; i++) {
- //If !child_up errno will be zero
- if (sh->child_errno[i] != ENOENT)
- continue;
- afr_sh_call_entry_impunge_recreate (frame, this, i,
- buf, postparent,
- afr_sh_create_entry_cbk);
- enoent_count--;
- }
- GF_ASSERT (enoent_count == 0);
-out:
- return 0;
-}
+ loc_wipe (&loc);
-void
-afr_sh_missing_entries_lookup_done (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- ia_type_t ia_type = IA_INVAL;
- int32_t nsources = 0;
- loc_t *loc = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
- loc = &local->loc;
-
- if (op_ret < 0) {
- if (op_errno == EIO)
- local->govinda_gOvinda = 1;
- // EIO can happen if finding the fresh parent dir failed
- goto out;
- }
-
- //now No chance for the ia_type to conflict
- ia_type = sh->buf[sh->success_children[0]].ia_type;
- nsources = afr_build_sources (this, sh->xattr, sh->buf,
- sh->pending_matrix, sh->sources,
- sh->success_children,
- afr_transaction_type_get (ia_type));
- if (nsources < 0) {
- gf_log (this->name, GF_LOG_INFO, "No sources for dir of %s,"
- " in missing entry self-heal, continuing with the rest"
- " of the self-heals", local->loc.path);
- op_errno = EIO;
- goto out;
- }
-
- afr_get_fresh_children (sh->success_children, sh->sources,
- sh->fresh_children, priv->child_count);
- sh->source = sh->fresh_children[0];
- if (sh->source == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "No active sources found.");
- op_errno = EIO;
- goto out;
- }
-
- if (sh->gfid_sh_success_cbk)
- sh->gfid_sh_success_cbk (frame, this);
- sh->type = sh->buf[sh->source].ia_type;
- if (uuid_is_null (loc->inode->gfid))
- uuid_copy (loc->gfid, sh->buf[sh->source].ia_gfid);
- sh_missing_entries_create (frame, this);
- return;
-out:
- sh->op_failed = 1;
- afr_sh_set_error (sh, op_errno);
- afr_sh_missing_entries_finish (frame, this);
- return;
+ return afr_selfheal_locked_fill (frame, this, locked_on);
}
-static int
-afr_sh_common_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *xattr,
- struct iatt *postparent)
-{
- int call_count = 0;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- afr_sh_common_lookup_resp_handler (frame, cookie, this, op_ret,
- op_errno, inode, buf, xattr,
- postparent, &sh->lookup_loc);
- call_count = afr_frame_return (frame);
-
- if (call_count)
- goto out;
- op_ret = -1;
- if (!sh->success_count) {
- op_errno = afr_resultant_errno_get (NULL, sh->child_errno,
- priv->child_count);
- gf_log (this->name, GF_LOG_ERROR, "Failed to lookup %s, "
- "reason %s", sh->lookup_loc.path,
- strerror (op_errno));
- goto done;
- }
-
- if ((sh->lookup_flags & AFR_LOOKUP_FAIL_CONFLICTS) &&
- (afr_conflicting_iattrs (sh->buf, sh->success_children,
- priv->child_count,
- sh->lookup_loc.path, this->name))) {
- op_errno = EIO;
- gf_log (this->name, GF_LOG_ERROR, "Conflicting entries "
- "for %s", sh->lookup_loc.path);
- goto done;
- }
-
- if ((sh->lookup_flags & AFR_LOOKUP_FAIL_MISSING_GFIDS) &&
- (afr_gfid_missing_count (this->name, sh->success_children,
- sh->buf, priv->child_count,
- sh->lookup_loc.path))) {
- op_errno = ENODATA;
- gf_log (this->name, GF_LOG_ERROR, "Missing Gfids "
- "for %s", sh->lookup_loc.path);
- goto done;
- }
- op_ret = 0;
-
-done:
- sh->lookup_done (frame, this, op_ret, op_errno);
-out:
- return 0;
-}
int
-afr_sh_remove_entry_cbk (call_frame_t *frame, xlator_t *this, int child,
- int32_t op_ret, int32_t op_errno)
+afr_selfheal_entrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, const char *name, unsigned char *locked_on)
{
- int call_count = 0;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
-
- GF_ASSERT (sh->post_remove_call);
- if ((op_ret == -1) && (op_errno != ENOENT)) {
- gf_log (this->name, GF_LOG_ERROR,
- "purge entry %s failed, on child %d reason, %s",
- local->loc.path, child, strerror (op_errno));
- LOCK (&frame->lock);
- {
- afr_sh_set_error (sh, EIO);
- sh->op_failed = 1;
- }
- UNLOCK (&frame->lock);
- }
- call_count = afr_frame_return (frame);
- if (call_count == 0)
- sh->post_remove_call (frame, this);
- return 0;
-}
+ loc_t loc = {0,};
+ afr_local_t *local = NULL;
+ int i = 0;
+ afr_private_t *priv = NULL;
-void
-afr_sh_call_entry_expunge_remove (call_frame_t *frame, xlator_t *this,
- int child_index, struct iatt *buf,
- afr_expunge_done_cbk_t expunge_done)
-{
- call_frame_t *expunge_frame = NULL;
- afr_local_t *local = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- int32_t op_errno = 0;
-
- expunge_frame = copy_frame (frame);
- if (!expunge_frame) {
- goto out;
- }
-
- ALLOC_OR_GOTO (expunge_local, afr_local_t, out);
-
- local = frame->local;
- sh = &local->self_heal;
- expunge_frame->local = expunge_local;
- expunge_sh = &expunge_local->self_heal;
- expunge_sh->sh_frame = frame;
- loc_copy (&expunge_local->loc, &local->loc);
- sh->expunge_done = expunge_done;
- afr_sh_entry_expunge_remove (expunge_frame, this, child_index, buf);
- return;
-out:
- gf_log (this->name, GF_LOG_ERROR, "Expunge of %s failed, reason: %s",
- local->loc.path, strerror (op_errno));
- expunge_done (frame, this, child_index, -1, op_errno);
-}
+ priv = this->private;
+ local = frame->local;
-void
-afr_sh_remove_stale_lookup_info (afr_self_heal_t *sh, int32_t *success_children,
- int32_t *fresh_children,
- unsigned int child_count)
-{
- int i = 0;
-
- for (i = 0; i < child_count; i++) {
- if (afr_is_child_present (success_children, child_count, i) &&
- !afr_is_child_present (fresh_children, child_count, i)) {
- sh->child_errno[i] = ENOENT;
- GF_ASSERT (sh->xattr[i]);
- dict_unref (sh->xattr[i]);
- sh->xattr[i] = NULL;
- }
- }
-}
+ loc.inode = inode_ref (inode);
+ uuid_copy (loc.gfid, inode->gfid);
-int
-afr_sh_purge_stale_entries_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- if (sh->op_failed) {
- afr_sh_missing_entries_finish (frame, this);
- } else {
- if (afr_gfid_missing_count (this->name, sh->fresh_children,
- sh->buf, priv->child_count,
- local->loc.path)) {
- afr_sh_common_lookup (frame, this, &local->loc,
- afr_sh_missing_entries_lookup_done,
- sh->sh_gfid_req,
- AFR_LOOKUP_FAIL_CONFLICTS|
- AFR_LOOKUP_FAIL_MISSING_GFIDS);
- } else {
- //No need to set gfid so goto missing entries lookup done
- //Behave as if you have done the lookup
- afr_sh_remove_stale_lookup_info (sh,
- sh->success_children,
- sh->fresh_children,
- priv->child_count);
- afr_children_copy (sh->success_children,
- sh->fresh_children,
- priv->child_count);
- afr_sh_missing_entries_lookup_done (frame, this, 0, 0);
- }
- }
- return 0;
-}
+ AFR_ONALL (frame, afr_selfheal_lock_cbk, entrylk, dom, &loc,
+ name, ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL);
-gf_boolean_t
-afr_sh_purge_entry_condition (afr_local_t *local, afr_private_t *priv,
- int child)
-{
- afr_self_heal_t *sh = NULL;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].op_ret == -1 &&
+ local->replies[i].op_errno == EAGAIN) {
+ afr_selfheal_locked_fill (frame, this, locked_on);
+ afr_selfheal_unentrylk (frame, this, inode, dom, name,
+ locked_on);
- sh = &local->self_heal;
+ AFR_SEQ (frame, afr_selfheal_lock_cbk, entrylk, dom,
+ &loc, name, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
+ break;
+ }
+ }
- if (local->child_up[child] &&
- (!afr_is_child_present (sh->fresh_parent_dirs, priv->child_count,
- child))
- && (sh->child_errno[child] != ENOENT))
- return _gf_true;
+ loc_wipe (&loc);
- return _gf_false;
+ return afr_selfheal_locked_fill (frame, this, locked_on);
}
-gf_boolean_t
-afr_sh_purge_stale_entry_condition (afr_local_t *local, afr_private_t *priv,
- int child)
+
+int
+afr_selfheal_unentrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, const char *name, unsigned char *locked_on)
{
- afr_self_heal_t *sh = NULL;
+ loc_t loc = {0,};
- sh = &local->self_heal;
+ loc.inode = inode_ref (inode);
+ uuid_copy (loc.gfid, inode->gfid);
- if (local->child_up[child] &&
- (!afr_is_child_present (sh->fresh_children, priv->child_count,
- child))
- && (sh->child_errno[child] != ENOENT))
- return _gf_true;
+ AFR_ONLIST (locked_on, frame, afr_selfheal_lock_cbk, entrylk,
+ dom, &loc, name, ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL);
- return _gf_false;
-}
+ loc_wipe (&loc);
-void
-afr_sh_purge_entry_common (call_frame_t *frame, xlator_t *this,
- gf_boolean_t purge_condition (afr_local_t *local,
- afr_private_t *priv,
- int child))
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_self_heal_t *sh = NULL;
- int i = 0;
- int call_count = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- if (purge_condition (local, priv, i))
- call_count++;
- }
-
- if (call_count == 0) {
- sh->post_remove_call (frame, this);
- goto out;
- }
-
- local->call_count = call_count;
- for (i = 0; i < priv->child_count; i++) {
- if (!purge_condition (local, priv, i))
- continue;
- gf_log (this->name, GF_LOG_INFO, "purging the stale entry %s "
- "on %d", local->loc.path, i);
- afr_sh_call_entry_expunge_remove (frame, this,
- (long) i, &sh->buf[i],
- afr_sh_remove_entry_cbk);
- }
-out:
- return;
+ return 0;
}
-void
-afr_sh_purge_entry (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
- sh->post_remove_call = afr_sh_missing_entries_finish;
- afr_sh_purge_entry_common (frame, this, afr_sh_purge_entry_condition);
-}
-
-void
-afr_sh_purge_stale_entry (call_frame_t *frame, xlator_t *this)
+gf_boolean_t
+afr_is_pending_set (xlator_t *this, dict_t *xdata, int type)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
+ int idx = -1;
+ afr_private_t *priv = NULL;
+ void *pending_raw = NULL;
+ int *pending_int = NULL;
+ int i = 0;
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ priv = this->private;
+ idx = afr_index_for_transaction_type (type);
- sh->post_remove_call = afr_sh_purge_stale_entries_done;
+ if (dict_get_ptr (xdata, AFR_DIRTY, &pending_raw) == 0) {
+ if (pending_raw) {
+ pending_int = pending_raw;
- for (i = 0; i < priv->child_count; i++) {
- if (afr_is_child_present (sh->fresh_children,
- priv->child_count, i))
- continue;
+ if (ntoh32 (pending_int[idx]))
+ return _gf_true;
+ }
+ }
- if ((!local->child_up[i]) || sh->child_errno[i] != 0)
- continue;
+ for (i = 0; i < priv->child_count; i++) {
+ if (dict_get_ptr (xdata, priv->pending_key[i],
+ &pending_raw))
+ continue;
+ if (!pending_raw)
+ continue;
+ pending_int = pending_raw;
- GF_ASSERT (!uuid_is_null (sh->entrybuf.ia_gfid) ||
- uuid_is_null (sh->buf[i].ia_gfid));
+ if (ntoh32 (pending_int[idx]))
+ return _gf_true;
+ }
- if ((sh->entrybuf.ia_type != sh->buf[i].ia_type) ||
- (uuid_compare (sh->buf[i].ia_gfid,
- sh->entrybuf.ia_gfid)))
- continue;
-
- afr_children_add_child (sh->fresh_children, i,
- priv->child_count);
-
- }
- afr_sh_purge_entry_common (frame, this,
- afr_sh_purge_stale_entry_condition);
+ return _gf_false;
}
-void
-afr_sh_save_child_iatts_from_policy (int32_t *children, struct iatt *bufs,
- struct iatt *save,
- unsigned int child_count)
-{
- int i = 0;
- int child = 0;
- gf_boolean_t saved = _gf_false;
-
- GF_ASSERT (save);
- //if iatt buf with gfid exists sets it
- for (i = 0; i < child_count; i++) {
- child = children[i];
- if (child == -1)
- break;
- *save = bufs[child];
- saved = _gf_true;
- if (!uuid_is_null (save->ia_gfid))
- break;
- }
- GF_ASSERT (saved);
-}
-void
-afr_get_children_of_fresh_parent_dirs (afr_self_heal_t *sh,
- unsigned int child_count)
+gf_boolean_t
+afr_is_data_set (xlator_t *this, dict_t *xdata)
{
- afr_children_intersection_get (sh->success_children,
- sh->fresh_parent_dirs,
- sh->sources, child_count);
- afr_get_fresh_children (sh->success_children, sh->sources,
- sh->fresh_children, child_count);
- memset (sh->sources, 0, sizeof (*sh->sources) * child_count);
+ return afr_is_pending_set (this, xdata, AFR_DATA_TRANSACTION);
}
-void
-afr_sh_children_lookup_done (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+gf_boolean_t
+afr_is_metadata_set (xlator_t *this, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int32_t fresh_child_enoents = 0;
- int32_t fresh_parent_count = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- if (op_ret < 0)
- goto fail;
- afr_get_children_of_fresh_parent_dirs (sh, priv->child_count);
- fresh_parent_count = afr_get_children_count (sh->fresh_parent_dirs,
- priv->child_count);
- //we need the enoent count of the subvols present in fresh_parent_dirs
- fresh_child_enoents = afr_errno_count (sh->fresh_parent_dirs,
- sh->child_errno,
- priv->child_count, ENOENT);
- if (fresh_child_enoents == fresh_parent_count) {
- gf_log (this->name, GF_LOG_INFO, "Deleting stale file %s",
- local->loc.path);
- afr_sh_set_error (sh, ENOENT);
- sh->op_failed = 1;
- afr_sh_purge_entry (frame, this);
- } else if (!afr_conflicting_iattrs (sh->buf, sh->fresh_children,
- priv->child_count, local->loc.path,
- this->name)) {
- afr_sh_save_child_iatts_from_policy (sh->fresh_children,
- sh->buf, &sh->entrybuf,
- priv->child_count);
- afr_update_gfid_from_iatts (sh->sh_gfid_req, sh->buf,
- sh->fresh_children,
- priv->child_count);
- afr_sh_purge_stale_entry (frame, this);
- } else {
- op_errno = EIO;
- local->govinda_gOvinda = 1;
- goto fail;
- }
-
- return;
-
-fail:
- sh->op_failed = 1;
- afr_sh_set_error (sh, op_errno);
- afr_sh_missing_entries_finish (frame, this);
- return;
+ return afr_is_pending_set (this, xdata, AFR_METADATA_TRANSACTION);
}
-static void
-afr_sh_find_fresh_parents (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+gf_boolean_t
+afr_is_entry_set (xlator_t *this, dict_t *xdata)
{
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int enoent_count = 0;
- int nsources = 0;
- int source = -1;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- /* If We can't find a fresh parent directory here,
- * we wont know which subvol is correct without finding a parent dir
- * upwards which has correct xattrs, for that we may have to
- * do lookups till root, we dont wanna do that,
- * instead make sure that if there are conflicting gfid
- * parent dirs, self-heal thus lookup is failed with EIO.
- * if there are missing entries we dont know whether to delete or
- * create so fail with EIO,
- * If there are conflicting xattr fail with EIO.
- */
- if (op_ret < 0)
- goto out;
- enoent_count = afr_errno_count (NULL, sh->child_errno,
- priv->child_count, ENOENT);
- if (enoent_count > 0) {
- gf_log (this->name, GF_LOG_ERROR, "Parent dir missing for %s,"
- " in missing entry self-heal, aborting self-heal",
- local->loc.path);
- goto out;
- }
-
- nsources = afr_build_sources (this, sh->xattr, sh->buf,
- sh->pending_matrix, sh->sources,
- sh->success_children,
- AFR_ENTRY_TRANSACTION);
- if (nsources < 0) {
- gf_log (this->name, GF_LOG_ERROR, "No sources for dir of %s,"
- " in missing entry self-heal, aborting self-heal",
- local->loc.path);
- goto out;
- }
-
- source = afr_sh_select_source (sh->sources, priv->child_count);
- if (source == -1) {
- GF_ASSERT (0);
- gf_log (this->name, GF_LOG_DEBUG, "No active sources found.");
- goto out;
- }
- afr_get_fresh_children (sh->success_children, sh->sources,
- sh->fresh_parent_dirs, priv->child_count);
- afr_sh_common_lookup (frame, this, &local->loc,
- afr_sh_children_lookup_done, NULL, 0);
- return;
-
-out:
- afr_sh_set_error (sh, EIO);
- sh->op_failed = 1;
- afr_sh_missing_entries_finish (frame, this);
- return;
+ return afr_is_pending_set (this, xdata, AFR_ENTRY_TRANSACTION);
}
+
void
-afr_sh_common_reset (afr_self_heal_t *sh, unsigned int child_count)
+afr_inode_link (inode_t *inode, struct iatt *iatt)
{
- int i = 0;
-
- for (i = 0; i < child_count; i++) {
- memset (&sh->buf[i], 0, sizeof (sh->buf[i]));
- memset (&sh->parentbufs[i], 0, sizeof (sh->parentbufs[i]));
- sh->child_errno[i] = 0;
- }
- memset (&sh->parentbuf, 0, sizeof (sh->parentbuf));
- sh->success_count = 0;
- afr_reset_children (sh->success_children, child_count);
- afr_reset_children (sh->fresh_children, child_count);
- afr_reset_xattr (sh->xattr, child_count);
- loc_wipe (&sh->lookup_loc);
-}
+ inode_t *linked_inode = NULL;
-/* afr self-heal state will be lost if this call is made
- * please check the afr_sh_common_reset that is called in this function
- */
-int
-afr_sh_common_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
- afr_lookup_done_cbk_t lookup_done , uuid_t gfid,
- int32_t flags)
-{
- afr_local_t *local = NULL;
- int i = 0;
- int call_count = 0;
- afr_private_t *priv = NULL;
- dict_t *xattr_req = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- priv = this->private;
- sh = &local->self_heal;
-
- call_count = afr_up_children_count (local->child_up, priv->child_count);
-
- local->call_count = call_count;
-
- xattr_req = dict_new();
-
- if (xattr_req) {
- afr_xattr_req_prepare (this, xattr_req, loc->path);
- if (gfid) {
- gf_log (this->name, GF_LOG_DEBUG,
- "looking up %s with gfid: %s",
- loc->path, uuid_utoa (gfid));
- GF_ASSERT (!uuid_is_null (gfid));
- afr_set_dict_gfid (xattr_req, gfid);
- }
- }
-
- afr_sh_common_reset (sh, priv->child_count);
- sh->lookup_done = lookup_done;
- loc_copy (&sh->lookup_loc, loc);
- sh->lookup_flags = flags;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- gf_log (this->name, GF_LOG_DEBUG,
- "looking up %s on subvolume %s",
- loc->path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame,
- afr_sh_common_lookup_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->lookup,
- loc, xattr_req);
-
- if (!--call_count)
- break;
- }
- }
-
- if (xattr_req)
- dict_unref (xattr_req);
-
- return 0;
+ linked_inode = inode_link (inode, NULL, NULL, iatt);
+
+ uuid_copy (inode->gfid, iatt->ia_gfid);
+ inode->ia_type = iatt->ia_type;
+
+ if (linked_inode) {
+ inode_lookup (linked_inode);
+ inode_unref (linked_inode);
+ }
}
+/*
+ * This function inspects the looked up replies (in an unlocked manner)
+ * and decides whether a locked verification and possible healing is
+ * required or not. It updates the three booleans for each type
+ * of healing. If the boolean flag gets set to FALSE, then we are sure
+ * no healing is required. If the boolean flag gets set to TRUE then
+ * we have to proceed with locked reinspection.
+ */
int
-afr_sh_post_nb_entrylk_conflicting_sh_cbk (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- sh = &local->self_heal;
-
- if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "Non blocking entrylks failed.");
- sh->op_failed = -1;
- afr_sh_missing_entries_done (frame, this);
- } else {
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Non blocking entrylks done. Proceeding to FOP");
- afr_sh_common_lookup (frame, this, &sh->parent_loc,
- afr_sh_find_fresh_parents,
- NULL, AFR_LOOKUP_FAIL_CONFLICTS);
- }
-
- return 0;
-}
+afr_selfheal_unlocked_inspect (call_frame_t *frame, xlator_t *this,
+ inode_t *inode, uuid_t gfid,
+ gf_boolean_t *data_selfheal,
+ gf_boolean_t *metadata_selfheal,
+ gf_boolean_t *entry_selfheal)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int valid_cnt = 0;
+ struct iatt first = {0, };
+ struct afr_reply *replies = NULL;
+ int ret = -1;
+
+ priv = this->private;
+
+ replies = alloca0 (sizeof (*replies) * priv->child_count);
+
+ ret = afr_selfheal_unlocked_discover (frame, inode, gfid, replies);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+ if (replies[i].op_ret == -1)
+ continue;
+
+ if (afr_is_data_set (this, replies[i].xdata))
+ *data_selfheal = _gf_true;
+
+ if (afr_is_metadata_set (this, replies[i].xdata))
+ *metadata_selfheal = _gf_true;
-int
-afr_sh_post_nb_entrylk_gfid_sh_cbk (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
- int_lock = &local->internal_lock;
-
- if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "Non blocking entrylks failed.");
- afr_sh_missing_entries_done (frame, this);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "Non blocking entrylks done. Proceeding to FOP");
- afr_sh_common_lookup (frame, this, &local->loc,
- afr_sh_missing_entries_lookup_done,
- sh->sh_gfid_req, AFR_LOOKUP_FAIL_CONFLICTS|
- AFR_LOOKUP_FAIL_MISSING_GFIDS);
- }
-
- return 0;
-}
+ if (afr_is_entry_set (this, replies[i].xdata))
+ *entry_selfheal = _gf_true;
-int
-afr_sh_entrylk (call_frame_t *frame, xlator_t *this, loc_t *loc,
- char *base_name, afr_lock_cbk_t lock_cbk)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
+ valid_cnt ++;
+ if (valid_cnt == 1) {
+ first = replies[i].poststat;
+ continue;
+ }
- local = frame->local;
- int_lock = &local->internal_lock;
+ if (!IA_EQUAL (first, replies[i].poststat, type)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "TYPE mismatch %d vs %d on %s for gfid:%s",
+ (int) first.ia_type,
+ (int) replies[i].poststat.ia_type,
+ priv->children[i]->name,
+ uuid_utoa (replies[i].poststat.ia_gfid));
+ return -EIO;
+ }
- int_lock->transaction_lk_type = AFR_SELFHEAL_LK;
- int_lock->selfheal_lk_type = AFR_ENTRY_SELF_HEAL_LK;
+ if (!IA_EQUAL (first, replies[i].poststat, uid)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "UID mismatch %d vs %d on %s for gfid:%s",
+ (int) first.ia_uid,
+ (int) replies[i].poststat.ia_uid,
+ priv->children[i]->name,
+ uuid_utoa (replies[i].poststat.ia_gfid));
- afr_set_lock_number (frame, this);
+ *metadata_selfheal = _gf_true;
+ }
- int_lock->lk_basename = base_name;
- int_lock->lk_loc = loc;
- int_lock->lock_cbk = lock_cbk;
+ if (!IA_EQUAL (first, replies[i].poststat, gid)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "GID mismatch %d vs %d on %s for gfid:%s",
+ (int) first.ia_uid,
+ (int) replies[i].poststat.ia_uid,
+ priv->children[i]->name,
+ uuid_utoa (replies[i].poststat.ia_gfid));
- afr_nonblocking_entrylk (frame, this);
+ *metadata_selfheal = _gf_true;
+ }
- return 0;
-}
+ if (!IA_EQUAL (first, replies[i].poststat, prot)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "MODE mismatch %d vs %d on %s for gfid:%s",
+ (int) st_mode_from_ia (first.ia_prot, 0),
+ (int) st_mode_from_ia (replies[i].poststat.ia_prot, 0),
+ priv->children[i]->name,
+ uuid_utoa (replies[i].poststat.ia_gfid));
-static int
-afr_self_heal_parent_entrylk (call_frame_t *frame, xlator_t *this,
- afr_lock_cbk_t lock_cbk)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
+ *metadata_selfheal = _gf_true;
+ }
- local = frame->local;
- sh = &local->self_heal;
+ if (IA_ISREG(first.ia_type) &&
+ !IA_EQUAL (first, replies[i].poststat, size)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "SIZE mismatch %lld vs %lld on %s for gfid:%s",
+ (long long) first.ia_size,
+ (long long) replies[i].poststat.ia_size,
+ priv->children[i]->name,
+ uuid_utoa (replies[i].poststat.ia_gfid));
- gf_log (this->name, GF_LOG_TRACE,
- "attempting to recreate missing entries for path=%s",
- local->loc.path);
+ *data_selfheal = _gf_true;
+ }
+ }
- GF_ASSERT (local->loc.parent);
- afr_build_parent_loc (&sh->parent_loc, &local->loc);
+ if (valid_cnt > 0)
+ afr_inode_link (inode, &first);
- afr_sh_entrylk (frame, this, &sh->parent_loc, NULL,
- lock_cbk);
- return 0;
-}
+ if (valid_cnt < 2)
+ return -ENOTCONN;
-static int
-afr_self_heal_conflicting_entries (call_frame_t *frame, xlator_t *this)
-{
- afr_self_heal_parent_entrylk (frame, this,
- afr_sh_post_nb_entrylk_conflicting_sh_cbk);
- return 0;
+ return 0;
}
-static int
-afr_self_heal_gfids (call_frame_t *frame, xlator_t *this)
-{
- afr_self_heal_parent_entrylk (frame, this,
- afr_sh_post_nb_entrylk_gfid_sh_cbk);
- return 0;
-}
-afr_local_t *afr_local_copy (afr_local_t *l, xlator_t *this)
+inode_t *
+afr_inode_find (xlator_t *this, uuid_t gfid)
{
- afr_private_t *priv = NULL;
- afr_local_t *lc = NULL;
- afr_self_heal_t *sh = NULL;
- afr_self_heal_t *shc = NULL;
-
- priv = this->private;
-
- sh = &l->self_heal;
-
- lc = GF_CALLOC (1, sizeof (afr_local_t),
- gf_afr_mt_afr_local_t);
- if (!lc)
- goto out;
-
- shc = &lc->self_heal;
-
- shc->unwind = sh->unwind;
- shc->gfid_sh_success_cbk = sh->gfid_sh_success_cbk;
- shc->do_missing_entry_self_heal = sh->do_missing_entry_self_heal;
- shc->do_gfid_self_heal = sh->do_gfid_self_heal;
- shc->do_data_self_heal = sh->do_data_self_heal;
- shc->do_metadata_self_heal = sh->do_metadata_self_heal;
- shc->do_entry_self_heal = sh->do_entry_self_heal;
- shc->forced_merge = sh->forced_merge;
- shc->background = sh->background;
- shc->type = sh->type;
-
- uuid_copy (shc->sh_gfid_req, sh->sh_gfid_req);
- if (l->loc.path)
- loc_copy (&lc->loc, &l->loc);
-
- lc->child_up = memdup (l->child_up,
- sizeof (*lc->child_up) * priv->child_count);
- if (l->xattr_req)
- lc->xattr_req = dict_ref (l->xattr_req);
-
- if (l->cont.lookup.inode)
- lc->cont.lookup.inode = inode_ref (l->cont.lookup.inode);
- if (l->cont.lookup.xattr)
- lc->cont.lookup.xattr = dict_ref (l->cont.lookup.xattr);
- if (l->internal_lock.inode_locked_nodes)
- lc->internal_lock.inode_locked_nodes =
- memdup (l->internal_lock.inode_locked_nodes,
- sizeof (*lc->internal_lock.inode_locked_nodes) * priv->child_count);
- else
- lc->internal_lock.inode_locked_nodes =
- GF_CALLOC (sizeof (*l->internal_lock.inode_locked_nodes),
- priv->child_count,
- gf_afr_mt_char);
- if (l->internal_lock.entry_locked_nodes)
- lc->internal_lock.entry_locked_nodes =
- memdup (l->internal_lock.entry_locked_nodes,
- sizeof (*lc->internal_lock.entry_locked_nodes) * priv->child_count);
- else
- lc->internal_lock.entry_locked_nodes =
- GF_CALLOC (sizeof (*l->internal_lock.entry_locked_nodes),
- priv->child_count,
- gf_afr_mt_char);
- if (l->internal_lock.locked_nodes)
- lc->internal_lock.locked_nodes =
- memdup (l->internal_lock.locked_nodes,
- sizeof (*lc->internal_lock.locked_nodes) * priv->child_count);
- else
- lc->internal_lock.locked_nodes =
- GF_CALLOC (sizeof (*l->internal_lock.locked_nodes),
- priv->child_count,
- gf_afr_mt_char);
-
- lc->internal_lock.inodelk_lock_count =
- l->internal_lock.inodelk_lock_count;
- lc->internal_lock.entrylk_lock_count =
- l->internal_lock.entrylk_lock_count;
+ inode_table_t *table = NULL;
+ inode_t *inode = NULL;
-out:
- return lc;
-}
+ table = this->itable;
+ if (!table)
+ return NULL;
-int
-afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
- char sh_type_str[256] = {0,};
- gf_boolean_t split_brain = _gf_false;
-
- priv = this->private;
- local = bgsh_frame->local;
- sh = &local->self_heal;
-
- if (local->govinda_gOvinda)
- split_brain = _gf_true;
-
- afr_set_split_brain (this, sh->inode, split_brain);
-
- afr_self_heal_type_str_get (sh, sh_type_str,
- sizeof(sh_type_str));
- if (sh->op_failed) {
- gf_log (this->name, GF_LOG_ERROR, "background %s self-heal "
- "failed on %s", sh_type_str, local->loc.path);
- } else {
- gf_log (this->name, GF_LOG_INFO, "background %s self-heal "
- "completed on %s", sh_type_str, local->loc.path);
- }
-
- FRAME_SU_UNDO (bgsh_frame, afr_local_t);
-
- if (!sh->unwound && sh->unwind) {
- sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno);
- }
-
- if (sh->background) {
- LOCK (&priv->lock);
- {
- priv->background_self_heals_started--;
- }
- UNLOCK (&priv->lock);
- }
-
- AFR_STACK_DESTROY (bgsh_frame);
-
- return 0;
-}
+ inode = inode_find (table, gfid);
+ if (inode)
+ return inode;
-int
-afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- int32_t op_errno = 0;
- int ret = 0;
- afr_self_heal_t *orig_sh = NULL;
- call_frame_t *sh_frame = NULL;
- afr_local_t *sh_local = NULL;
- loc_t *loc = NULL;
-
- local = frame->local;
- orig_sh = &local->self_heal;
- priv = this->private;
-
- GF_ASSERT (local->loc.path);
-
- gf_log (this->name, GF_LOG_TRACE,
- "performing self heal on %s (metadata=%d data=%d entry=%d)",
- local->loc.path,
- local->self_heal.do_metadata_self_heal,
- local->self_heal.do_data_self_heal,
- local->self_heal.do_entry_self_heal);
-
- op_errno = ENOMEM;
- sh_frame = copy_frame (frame);
- if (!sh_frame)
- goto out;
- afr_set_lk_owner (sh_frame, this);
- afr_set_low_priority (sh_frame);
-
- sh_local = afr_local_copy (local, this);
- if (!sh_local)
- goto out;
- sh_frame->local = sh_local;
- sh = &sh_local->self_heal;
-
- sh->inode = inode_ref (inode);
-
- sh->orig_frame = frame;
-
- sh->completion_cbk = afr_self_heal_completion_cbk;
-
- sh->success = GF_CALLOC (priv->child_count, sizeof (*sh->success),
- gf_afr_mt_char);
- if (!sh->success)
- goto out;
- sh->sources = GF_CALLOC (sizeof (*sh->sources), priv->child_count,
- gf_afr_mt_int);
- if (!sh->sources)
- goto out;
- sh->locked_nodes = GF_CALLOC (sizeof (*sh->locked_nodes),
- priv->child_count,
- gf_afr_mt_int);
- if (!sh->locked_nodes)
- goto out;
-
- sh->pending_matrix = GF_CALLOC (sizeof (int32_t *), priv->child_count,
- gf_afr_mt_int32_t);
- if (!sh->pending_matrix)
- goto out;
-
- for (i = 0; i < priv->child_count; i++) {
- sh->pending_matrix[i] = GF_CALLOC (sizeof (int32_t),
- priv->child_count,
- gf_afr_mt_int32_t);
- if (!sh->pending_matrix[i])
- goto out;
- }
-
- sh->delta_matrix = GF_CALLOC (sizeof (int32_t *), priv->child_count,
- gf_afr_mt_int32_t);
- if (!sh->delta_matrix)
- goto out;
- for (i = 0; i < priv->child_count; i++) {
- sh->delta_matrix[i] = GF_CALLOC (sizeof (int32_t),
- priv->child_count,
- gf_afr_mt_int32_t);
- if (!sh->delta_matrix)
- goto out;
- }
- sh->fresh_parent_dirs = afr_children_create (priv->child_count);
- if (!sh->fresh_parent_dirs)
- goto out;
- ret = afr_sh_common_create (sh, priv->child_count);
- if (ret) {
- op_errno = -ret;
- goto out;
- }
-
- if (local->self_heal.background) {
- LOCK (&priv->lock);
- {
- if (priv->background_self_heals_started
- < priv->background_self_heal_count) {
- priv->background_self_heals_started++;
-
-
- } else {
- local->self_heal.background = _gf_false;
- }
- }
- UNLOCK (&priv->lock);
- }
-
- FRAME_SU_DO (sh_frame, afr_local_t);
- if (sh->do_missing_entry_self_heal) {
- afr_self_heal_conflicting_entries (sh_frame, this);
- } else if (sh->do_gfid_self_heal) {
- GF_ASSERT (!uuid_is_null (sh->sh_gfid_req));
- afr_self_heal_gfids (sh_frame, this);
- } else {
- loc = &sh_local->loc;
- if (uuid_is_null (loc->inode->gfid) && uuid_is_null (loc->gfid)) {
- if (!uuid_is_null (inode->gfid))
- GF_ASSERT (!uuid_compare (inode->gfid,
- sh->sh_gfid_req));
- uuid_copy (loc->gfid, sh->sh_gfid_req);
- }
- gf_log (this->name, GF_LOG_TRACE,
- "proceeding to metadata check on %s",
- local->loc.path);
-
- afr_sh_missing_entries_done (sh_frame, this);
- }
- op_errno = 0;
+ inode = inode_new (table);
+ if (!inode)
+ return NULL;
-out:
- if (op_errno) {
- orig_sh->unwind (frame, this, -1, op_errno);
- }
- return 0;
+ uuid_copy (inode->gfid, gfid);
+
+ return inode;
}
-void
-afr_self_heal_type_str_get (afr_self_heal_t *self_heal_p, char *str,
- size_t size)
+
+call_frame_t *
+afr_frame_create (xlator_t *this)
{
- GF_ASSERT (str && (size > strlen (" missing-entry gfid "
- "meta-data data entry")));
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ int op_errno = 0;
+ pid_t pid = -1;
- if (self_heal_p->do_metadata_self_heal) {
- snprintf (str, size, " meta-data");
- }
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ return NULL;
- if (self_heal_p->do_data_self_heal) {
- snprintf (str + strlen(str), size - strlen(str), " data");
- }
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local) {
+ STACK_DESTROY (frame->root);
+ return NULL;
+ }
- if (self_heal_p->do_entry_self_heal) {
- snprintf (str + strlen(str), size - strlen(str), " entry");
- }
+ syncopctx_setfspid (&pid);
- if (self_heal_p->do_missing_entry_self_heal) {
- snprintf (str + strlen(str), size - strlen(str),
- " missing-entry");
- }
+ frame->root->pid = pid;
- if (self_heal_p->do_gfid_self_heal) {
- snprintf (str + strlen(str), size - strlen(str), " gfid");
- }
-}
+ afr_set_lk_owner (frame, this, frame->root);
-afr_self_heal_type
-afr_self_heal_type_for_transaction (afr_transaction_type type)
-{
- afr_self_heal_type sh_type = AFR_SELF_HEAL_INVALID;
-
- switch (type) {
- case AFR_DATA_TRANSACTION:
- sh_type = AFR_SELF_HEAL_DATA;
- break;
- case AFR_METADATA_TRANSACTION:
- sh_type = AFR_SELF_HEAL_METADATA;
- break;
- case AFR_ENTRY_TRANSACTION:
- sh_type = AFR_SELF_HEAL_ENTRY;
- break;
- case AFR_ENTRY_RENAME_TRANSACTION:
- GF_ASSERT (0);
- break;
- }
- return sh_type;
+ return frame;
}
+
+/*
+ * This is the entry point for healing a given GFID
+ */
+
int
-afr_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name, uuid_t gfid)
+afr_selfheal (xlator_t *this, uuid_t gfid)
{
- int ret = -1;
-
- if (!child) {
- goto out;
- }
+ inode_t *inode = NULL;
+ call_frame_t *frame = NULL;
+ int ret = -1;
+ gf_boolean_t data_selfheal = _gf_false;
+ gf_boolean_t metadata_selfheal = _gf_false;
+ gf_boolean_t entry_selfheal = _gf_false;
- if (strcmp (parent->path, "/") == 0)
- ret = gf_asprintf ((char **)&child->path, "/%s", name);
- else
- ret = gf_asprintf ((char **)&child->path, "%s/%s", parent->path,
- name);
+ inode = afr_inode_find (this, gfid);
+ if (!inode)
+ goto out;
- if (-1 == ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "asprintf failed while setting child path");
- }
+ frame = afr_frame_create (this);
+ if (!frame)
+ goto out;
- if (!child->path) {
- goto out;
- }
+ ret = afr_selfheal_unlocked_inspect (frame, this, inode, gfid,
+ &data_selfheal,
+ &metadata_selfheal,
+ &entry_selfheal);
+ if (ret)
+ goto out;
- child->name = strrchr (child->path, '/');
- if (child->name)
- child->name++;
+ if (data_selfheal)
+ afr_selfheal_data (frame, this, inode);
- child->parent = inode_ref (parent->inode);
- child->inode = inode_new (parent->inode->table);
+ if (metadata_selfheal)
+ afr_selfheal_metadata (frame, this, inode);
- if (!child->inode) {
- ret = -1;
- goto out;
- }
- uuid_copy (child->gfid, gfid);
+ if (entry_selfheal)
+ afr_selfheal_entry (frame, this, inode);
- ret = 0;
+ inode_forget (inode, 1);
out:
- if (ret == -1)
- loc_wipe (child);
+ if (inode)
+ inode_unref (inode);
+ if (frame)
+ AFR_STACK_DESTROY (frame);
- return ret;
+ return ret;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.h b/xlators/cluster/afr/src/afr-self-heal-common.h
deleted file mode 100644
index da02bfdcf..000000000
--- a/xlators/cluster/afr/src/afr-self-heal-common.h
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef __AFR_SELF_HEAL_COMMON_H__
-#define __AFR_SELF_HEAL_COMMON_H__
-
-#define FILE_HAS_HOLES(buf) (((buf)->ia_size) > ((buf)->ia_blocks * 512))
-
-typedef enum {
- AFR_SELF_HEAL_ENTRY,
- AFR_SELF_HEAL_METADATA,
- AFR_SELF_HEAL_DATA,
- AFR_SELF_HEAL_INVALID = -1,
-} afr_self_heal_type;
-
-typedef enum {
- AFR_LOOKUP_FAIL_CONFLICTS = 1,
- AFR_LOOKUP_FAIL_MISSING_GFIDS = 2,
-} afr_lookup_flags_t;
-
-int
-afr_sh_select_source (int sources[], int child_count);
-
-int
-afr_sh_sink_count (int sources[], int child_count);
-
-int
-afr_sh_source_count (int sources[], int child_count);
-
-void
-afr_sh_print_pending_matrix (int32_t *pending_matrix[], xlator_t *this);
-
-int
-afr_build_pending_matrix (char **pending_key, int32_t **pending_matrix,
- dict_t *xattr[], afr_transaction_type type,
- size_t child_count);
-
-void
-afr_sh_pending_to_delta (afr_private_t *priv, dict_t **xattr,
- int32_t *delta_matrix[], unsigned char success[],
- int child_count, afr_transaction_type type);
-
-int
-afr_mark_sources (int32_t *sources, int32_t **pending_matrix, struct iatt *bufs,
- int32_t child_count, afr_self_heal_type type,
- int32_t *valid_children, const char *xlator_name);
-
-int
-afr_sh_delta_to_xattr (afr_private_t *priv,
- int32_t *delta_matrix[], dict_t *xattr[],
- int child_count, afr_transaction_type type);
-
-int
-afr_sh_is_matrix_zero (int32_t *pending_matrix[], int child_count);
-
-void
-afr_self_heal_type_str_get (afr_self_heal_t *self_heal_p, char *str,
- size_t size);
-
-afr_self_heal_type
-afr_self_heal_type_for_transaction (afr_transaction_type type);
-
-int
-afr_build_sources (xlator_t *xlator, dict_t **xattr, struct iatt *bufs,
- int32_t **pending_matrix, int32_t *sources,
- int32_t *success_children, afr_transaction_type type);
-void
-afr_sh_common_reset (afr_self_heal_t *sh, unsigned int child_count);
-
-void
-afr_sh_common_lookup_resp_handler (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf,
- dict_t *xattr, struct iatt *postparent,
- loc_t *loc);
-
-int
-afr_sh_common_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
- afr_lookup_done_cbk_t lookup_cbk, uuid_t uuid,
- int32_t flags);
-int
-afr_sh_entry_expunge_remove (call_frame_t *expunge_frame, xlator_t *this,
- int active_src, struct iatt *buf);
-int
-afr_sh_entrylk (call_frame_t *frame, xlator_t *this, loc_t *loc,
- char *base_name, afr_lock_cbk_t lock_cbk);
-int
-afr_sh_entry_impunge_create (call_frame_t *impunge_frame, xlator_t *this,
- int child_index, struct iatt *buf,
- struct iatt *postparent);
-int
-afr_sh_data_unlock (call_frame_t *frame, xlator_t *this,
- afr_lock_cbk_t lock_cbk);
-afr_local_t *
-afr_local_copy (afr_local_t *l, xlator_t *this);
-int
-afr_sh_data_lock (call_frame_t *frame, xlator_t *this,
- off_t start, off_t len,
- afr_lock_cbk_t success_handler,
- afr_lock_cbk_t failure_handler);
-void
-afr_sh_set_error (afr_self_heal_t *sh, int32_t op_errno);
-void
-afr_sh_mark_source_sinks (call_frame_t *frame, xlator_t *this);
-typedef int
-(*afr_fxattrop_cbk_t) (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xattr);
-int
-afr_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name,
- uuid_t gfid);
-int
-afr_impunge_frame_create (call_frame_t *frame, xlator_t *this,
- int active_source, int ret_child, mode_t entry_mode,
- call_frame_t **impunge_frame);
-#endif /* __AFR_SELF_HEAL_COMMON_H__ */
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index fd351e976..c0548d995 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -1,1372 +1,635 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#include <libgen.h>
-#include <unistd.h>
-#include <fnmatch.h>
-#include <sys/time.h>
-#include <stdlib.h>
-#include <signal.h>
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
#endif
-#include "glusterfs.h"
#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-#include "byte-order.h"
-
-#include "afr-transaction.h"
#include "afr-self-heal.h"
-#include "afr-self-heal-common.h"
-#include "afr-self-heal-algorithm.h"
-
-
-extern int
-sh_loop_finish (call_frame_t *loop_frame, xlator_t *this);
-
-int
-afr_post_sh_big_lock_success (call_frame_t *frame, xlator_t *this);
-
-int
-afr_post_sh_big_lock_failure (call_frame_t *frame, xlator_t *this);
-
-int
-afr_sh_data_finish (call_frame_t *frame, xlator_t *this);
-
-int
-afr_sh_data_fxattrop (call_frame_t *frame, xlator_t *this,
- afr_fxattrop_cbk_t fxattrop_cbk);
-
-int
-afr_post_sh_data_fxattrop_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xattr);
-
-int
-afr_sh_data_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
-
- sh->completion_cbk (frame, this);
-
- return 0;
-}
-
-
-int
-afr_sh_data_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int child_index = (long) cookie;
-
- local = frame->local;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_INFO,
- "flush failed on %s on subvolume %s: %s",
- local->loc.path, priv->children[child_index]->name,
- strerror (op_errno));
- }
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- afr_sh_data_done (frame, this);
- }
-
- return 0;
-}
-
-int
-afr_sh_data_close (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_self_heal_t *sh = NULL;
- int i = 0;
- int call_count = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- call_count = afr_set_elem_count_get (sh->success,
- priv->child_count);
- local->call_count = call_count;
-
- if (call_count == 0) {
- afr_sh_data_done (frame, this);
- return 0;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (!sh->success[i])
- continue;
- gf_log (this->name, GF_LOG_DEBUG,
- "closing fd of %s on %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_data_flush_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->flush,
- sh->healing_fd);
-
- if (!--call_count)
- break;
- }
-
- return 0;
-}
-
-int
-afr_sh_data_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *statpre,
- struct iatt *statpost)
-{
-
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int child_index = (long) cookie;
-
- local = frame->local;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_INFO,
- "setattr failed on %s on subvolume %s: %s",
- local->loc.path, priv->children[child_index]->name,
- strerror (op_errno));
- }
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- afr_sh_data_finish (frame, this);
- }
-
- return 0;
-}
-
-int
-afr_sh_data_setattr (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_self_heal_t *sh = NULL;
- int i = 0;
- int call_count = 0;
- int source = 0;
- int32_t valid = 0;
- struct iatt stbuf = {0,};
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- source = sh->source;
-
- valid |= (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME);
-
- stbuf.ia_atime = sh->buf[source].ia_atime;
- stbuf.ia_atime_nsec = sh->buf[source].ia_atime_nsec;
- stbuf.ia_mtime = sh->buf[source].ia_mtime;
- stbuf.ia_mtime_nsec = sh->buf[source].ia_mtime_nsec;
-
- call_count = afr_set_elem_count_get (sh->success,
- priv->child_count);
- local->call_count = call_count;
-
- if (call_count == 0) {
- GF_ASSERT (0);
- afr_sh_data_finish (frame, this);
- return 0;
- }
+#include "byte-order.h"
- for (i = 0; i < priv->child_count; i++) {
- if (!sh->success[i])
- continue;
+enum {
+ AFR_SELFHEAL_DATA_FULL = 0,
+ AFR_SELFHEAL_DATA_DIFF,
+};
- STACK_WIND_COOKIE (frame, afr_sh_data_setattr_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->setattr,
- &local->loc, &stbuf, valid);
- if (!--call_count)
- break;
- }
-
- return 0;
-}
-
-int
-afr_sh_data_setattr_fstat_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iatt *buf)
+#define HAS_HOLES(i) ((i->ia_blocks * 512) < (i->ia_size))
+static int
+__checksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, uint32_t weak, uint8_t *strong,
+ dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int child_index = (long) cookie;
-
- local = frame->local;
- sh = &local->self_heal;
+ afr_local_t *local = NULL;
+ int i = (long) cookie;
- GF_ASSERT (sh->source == child_index);
- if (op_ret != -1)
- sh->buf[child_index] = *buf;
- afr_sh_data_setattr (frame, this);
+ local = frame->local;
- return 0;
-}
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
+ if (strong)
+ memcpy (local->replies[i].checksum, strong, MD5_DIGEST_LENGTH);
-/*
- * If there are any writes after the self-heal is triggered then the
- * stbuf stored in local->self_heal.buf[] will be invalid so we do one more
- * stat on the source and then set the [am]times
- */
-int
-afr_sh_set_timestamps (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- STACK_WIND_COOKIE (frame, afr_sh_data_setattr_fstat_cbk,
- (void *) (long) sh->source,
- priv->children[sh->source],
- priv->children[sh->source]->fops->fstat,
- sh->healing_fd);
- return 0;
+ syncbarrier_wake (&local->barrier);
+ return 0;
}
-//Fun fact, lock_cbk is being used for both lock & unlock
-int
-afr_sh_data_unlock (call_frame_t *frame, xlator_t *this,
- afr_lock_cbk_t lock_cbk)
-{
- afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- sh = &local->self_heal;
-
- GF_ASSERT (sh->data_lock_held);
- sh->data_lock_held = _gf_false;
- int_lock->lock_cbk = lock_cbk;
- afr_unlock (frame, this);
-
- return 0;
-}
-
-int
-afr_sh_data_finish (call_frame_t *frame, xlator_t *this)
+static int
+attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *pre, struct iatt *post,
+ dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
+ int i = (long) cookie;
+ afr_local_t *local = NULL;
- local = frame->local;
- sh = &local->self_heal;
+ local = frame->local;
- gf_log (this->name, GF_LOG_DEBUG,
- "finishing data selfheal of %s", local->loc.path);
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
+ if (pre)
+ local->replies[i].prestat = *pre;
+ if (post)
+ local->replies[i].poststat = *post;
+ if (xdata)
+ local->replies[i].xdata = dict_ref (xdata);
- if (sh->data_lock_held)
- afr_sh_data_unlock (frame, this, afr_sh_data_close);
- else
- afr_sh_data_close (frame, this);
+ syncbarrier_wake (&local->barrier);
- return 0;
+ return 0;
}
-int
-afr_sh_data_fail (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
- gf_log (this->name, GF_LOG_DEBUG,
- "finishing failed data selfheal of %s", local->loc.path);
-
- sh->op_failed = 1;
- if (sh->data_lock_held)
- afr_sh_data_unlock (frame, this, afr_sh_data_close);
- else
- afr_sh_data_close (frame, this);
- return 0;
-}
-
-int
-afr_sh_data_erase_pending_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xattr)
+static gf_boolean_t
+__afr_selfheal_data_checksums_match (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int source,
+ unsigned char *healed_sinks,
+ off_t offset, size_t size)
{
- int call_count = 0;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local = frame->local;
- sh = &local->self_heal;
- if (!IA_ISREG (sh->type)) {
- afr_sh_data_finish (frame, this);
- goto out;
- }
- if (NULL == sh->old_loop_frame) {
- GF_ASSERT (sh->data_lock_held);
- afr_sh_data_fxattrop (frame, this,
- afr_post_sh_data_fxattrop_cbk);
- goto out;
- }
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ unsigned char *wind_subvols = NULL;
+ int i = 0;
- afr_sh_data_lock (frame, this, 0, 0,
- afr_post_sh_big_lock_success,
- afr_post_sh_big_lock_failure);
- }
-out:
- return 0;
-}
+ priv = this->private;
+ local = frame->local;
-int
-afr_sh_data_erase_pending (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int i = 0;
- dict_t **erase_xattr = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- afr_sh_pending_to_delta (priv, sh->xattr, sh->delta_matrix, sh->success,
- priv->child_count, AFR_DATA_TRANSACTION);
- gf_log (this->name, GF_LOG_DEBUG, "Delta matrix for: %"PRIu64,
- frame->root->lk_owner);
- afr_sh_print_pending_matrix (sh->delta_matrix, this);
-
- erase_xattr = GF_CALLOC (sizeof (*erase_xattr), priv->child_count,
- gf_afr_mt_dict_t);
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i]) {
- call_count++;
-
- erase_xattr[i] = get_new_dict();
- dict_ref (erase_xattr[i]);
- }
- }
-
- afr_sh_delta_to_xattr (priv, sh->delta_matrix, erase_xattr,
- priv->child_count, AFR_DATA_TRANSACTION);
-
- GF_ASSERT (call_count);
- local->call_count = call_count;
- for (i = 0; i < priv->child_count; i++) {
- if (!erase_xattr[i])
- continue;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "erasing pending flags from %s on %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_data_erase_pending_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- sh->healing_fd,
- GF_XATTROP_ADD_ARRAY, erase_xattr[i]);
- if (!--call_count)
- break;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (erase_xattr[i]) {
- dict_unref (erase_xattr[i]);
- }
- }
- GF_FREE (erase_xattr);
-
- return 0;
-}
+ wind_subvols = alloca0 (priv->child_count);
+ for (i = 0; i < priv->child_count; i++) {
+ if (i == source || healed_sinks[i])
+ wind_subvols[i] = 1;
+ }
+ AFR_ONLIST (wind_subvols, frame, __checksum_cbk, rchecksum, fd,
+ offset, size, NULL);
-static struct afr_sh_algorithm *
-sh_algo_from_name (xlator_t *this, char *name)
-{
- int i = 0;
+ if (!local->replies[source].valid || local->replies[source].op_ret != 0)
+ return _gf_false;
- while (afr_self_heal_algorithms[i].name) {
- if (!strcmp (name, afr_self_heal_algorithms[i].name)) {
- return &afr_self_heal_algorithms[i];
- }
-
- i++;
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (i == source)
+ continue;
+ if (memcmp (local->replies[source].checksum,
+ local->replies[i].checksum,
+ MD5_DIGEST_LENGTH))
+ return _gf_false;
+ }
- return NULL;
+ return _gf_true;
}
static int
-sh_zero_byte_files_exist (afr_self_heal_t *sh, int child_count)
-{
- int i;
- int ret = 0;
-
- for (i = 0; i < child_count; i++) {
- if (sh->buf[i].ia_size == 0) {
- ret = 1;
- break;
- }
- }
-
- return ret;
+__afr_selfheal_data_read_write (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int source, unsigned char *healed_sinks,
+ off_t offset, size_t size,
+ struct afr_reply *replies)
+{
+ struct iovec *iovec = NULL;
+ int count = 0;
+ struct iobref *iobref = NULL;
+ int ret = 0;
+ int i = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ ret = syncop_readv (priv->children[source], fd, size, offset, 0,
+ &iovec, &count, &iobref);
+ if (ret <= 0)
+ return ret;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!healed_sinks[i])
+ continue;
+
+ /*
+ * TODO: Use fiemap() and discard() to heal holes
+ * in the future.
+ *
+ * For now,
+ *
+ * - if the source had any holes at all,
+ * AND
+ * - if we are writing past the original file size
+ * of the sink
+ * AND
+ * - is NOT the last block of the source file. if
+ * the block contains EOF, it has to be written
+ * in order to set the file size even if the
+ * last block is 0-filled.
+ * AND
+ * - if the read buffer is filled with only 0's
+ *
+ * then, skip writing to this source. We don't depend
+ * on the write to happen to update the size as we
+ * have performed an ftruncate() upfront anyways.
+ */
+#define is_last_block(o,b,s) ((s >= o) && (s <= (o + b)))
+ if (HAS_HOLES ((&replies[source].poststat)) &&
+ offset >= replies[i].poststat.ia_size &&
+ !is_last_block (offset, size,
+ replies[source].poststat.ia_size) &&
+ (iov_0filled (iovec, count) == 0))
+ continue;
+
+ ret = syncop_writev (priv->children[i], fd, iovec, count,
+ offset, iobref, 0);
+ if (ret != iov_length (iovec, count)) {
+ /* write() failed on this sink. unset the corresponding
+ member in sinks[] (which is healed_sinks[] in the
+ caller) so that this server does NOT get considered
+ as successfully healed.
+ */
+ healed_sinks[i] = 0;
+ }
+ }
+ if (iobref)
+ iobref_unref (iobref);
+
+ return ret;
}
-struct afr_sh_algorithm *
-afr_sh_data_pick_algo (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t * priv = NULL;
- struct afr_sh_algorithm * algo = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
- algo = sh_algo_from_name (this, priv->data_self_heal_algorithm);
-
- if (algo == NULL) {
- /* option not set, so fall back on heuristics */
-
- if ((local->enoent_count != 0)
- || sh_zero_byte_files_exist (sh, priv->child_count)
- || (sh->file_size <= (priv->data_self_heal_window_size *
- this->ctx->page_size))) {
-
- /*
- * If the file does not exist on one of the subvolumes,
- * or a zero-byte file exists (created by entry self-heal)
- * the entire content has to be copied anyway, so there
- * is no benefit from using the "diff" algorithm.
- *
- * If the file size is about the same as page size,
- * the entire file can be read and written with a few
- * (pipelined) STACK_WINDs, which will be faster
- * than "diff" which has to read checksums and then
- * read and write.
- */
-
- algo = sh_algo_from_name (this, "full");
-
- } else {
- algo = sh_algo_from_name (this, "diff");
- }
- }
-
- return algo;
+static int
+afr_selfheal_data_block (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int source, unsigned char *healed_sinks, off_t offset,
+ size_t size, int type, struct afr_reply *replies)
+{
+ int ret = -1;
+ int sink_count = 0;
+ afr_private_t *priv = NULL;
+ unsigned char *data_lock = NULL;
+
+ priv = this->private;
+ sink_count = AFR_COUNT (healed_sinks, priv->child_count);
+ data_lock = alloca0 (priv->child_count);
+
+ ret = afr_selfheal_inodelk (frame, this, fd->inode, this->name,
+ offset, size, data_lock);
+ {
+ if (ret < sink_count) {
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+
+ if (type == AFR_SELFHEAL_DATA_DIFF &&
+ __afr_selfheal_data_checksums_match (frame, this, fd, source,
+ healed_sinks, offset, size)) {
+ ret = 0;
+ goto unlock;
+ }
+
+ ret = __afr_selfheal_data_read_write (frame, this, fd, source,
+ healed_sinks, offset, size,
+ replies);
+ }
+unlock:
+ afr_selfheal_uninodelk (frame, this, fd->inode, this->name,
+ offset, size, data_lock);
+ return ret;
}
-int
-afr_sh_data_sync_prepare (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- struct afr_sh_algorithm *sh_algo = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
-
- sh->algo_completion_cbk = afr_sh_data_erase_pending;
- sh->algo_abort_cbk = afr_sh_data_fail;
- sh_algo = afr_sh_data_pick_algo (frame, this);
-
- sh_algo->fn (frame, this);
-
- return 0;
-}
-
-int
-afr_sh_data_trim_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+static int
+afr_selfheal_data_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ unsigned char *healed_sinks)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- int call_count = 0;
- int child_index = 0;
-
- priv = this->private;
- local = frame->local;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1)
- gf_log (this->name, GF_LOG_INFO,
- "ftruncate of %s on subvolume %s failed (%s)",
- local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- else
- gf_log (this->name, GF_LOG_DEBUG,
- "ftruncate of %s on subvolume %s completed",
- local->loc.path,
- priv->children[child_index]->name);
- }
- UNLOCK (&frame->lock);
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
- call_count = afr_frame_return (frame);
+ local = frame->local;
+ priv = this->private;
- if (call_count == 0)
- afr_sh_data_sync_prepare (frame, this);
+ AFR_ONLIST (healed_sinks, frame, attr_cbk, fsync, fd, 0, NULL);
- return 0;
+ for (i = 0; i < priv->child_count; i++)
+ if (healed_sinks[i] && local->replies[i].op_ret != 0)
+ /* fsync() failed. Do NOT consider this server
+ as successfully healed. Mark it so.
+ */
+ healed_sinks[i] = 0;
+ return 0;
}
-int
-afr_sh_data_trim_sinks (call_frame_t *frame, xlator_t *this)
+static int
+afr_selfheal_data_restore_time (call_frame_t *frame, xlator_t *this,
+ inode_t *inode, int source,
+ unsigned char *healed_sinks,
+ struct afr_reply *replies)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t *sh = NULL;
- int *sources = NULL;
- int call_count = 0;
- int i = 0;
+ loc_t loc = {0, };
+ loc.inode = inode_ref (inode);
+ uuid_copy (loc.gfid, inode->gfid);
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
+ AFR_ONLIST (healed_sinks, frame, attr_cbk, setattr, &loc,
+ &replies[source].poststat,
+ (GF_SET_ATTR_ATIME|GF_SET_ATTR_MTIME), NULL);
- sources = sh->sources;
- call_count = sh->active_sinks;
+ loc_wipe (&loc);
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (sources[i] || !local->child_up[i])
- continue;
-
- STACK_WIND_COOKIE (frame, afr_sh_data_trim_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->ftruncate,
- sh->healing_fd, sh->file_size);
-
- if (!--call_count)
- break;
- }
-
- return 0;
-}
-
-int
-afr_sh_inode_set_read_ctx (afr_self_heal_t *sh, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- int ret = 0;
-
- priv = this->private;
- sh->source = afr_sh_select_source (sh->sources, priv->child_count);
- if (sh->source < 0) {
- ret = -1;
- goto out;
- }
-
- afr_reset_children (sh->fresh_children, priv->child_count);
- afr_get_fresh_children (sh->success_children, sh->sources,
- sh->fresh_children, priv->child_count);
- afr_inode_set_read_ctx (this, sh->inode, sh->source,
- sh->fresh_children);
-out:
- return ret;
+ return 0;
}
-int
-afr_sh_data_fix (call_frame_t *frame, xlator_t *this)
+static int
+afr_data_self_heal_type_get (afr_private_t *priv, unsigned char *healed_sinks,
+ int source, struct afr_reply *replies)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int nsources = 0;
- int source = 0;
- int i = 0;
- int ret = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- gf_log (this->name, GF_LOG_DEBUG, "Pending matrix for: %"PRIu64,
- frame->root->lk_owner);
- nsources = afr_build_sources (this, sh->xattr, sh->buf, sh->pending_matrix,
- sh->sources, sh->success_children,
- AFR_DATA_TRANSACTION);
- if (nsources == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "No self-heal needed for %s",
- local->loc.path);
-
- afr_sh_data_finish (frame, this);
- return 0;
- }
-
- if ((nsources == -1)
- && (priv->favorite_child != -1)
- && (sh->child_errno[priv->favorite_child] == 0)) {
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Picking favorite child %s as authentic source to "
- "resolve conflicting data of %s",
- priv->children[priv->favorite_child]->name,
- local->loc.path);
-
- sh->sources[priv->favorite_child] = 1;
-
- nsources = afr_sh_source_count (sh->sources,
- priv->child_count);
- }
-
- if (nsources == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "Unable to self-heal contents of '%s' (possible "
- "split-brain). Please delete the file from all but "
- "the preferred subvolume.", local->loc.path);
-
- local->govinda_gOvinda = 1;
-
- afr_sh_data_fail (frame, this);
- return 0;
- }
-
- ret = afr_sh_inode_set_read_ctx (sh, this);
- if (ret) {
- gf_log (this->name, GF_LOG_DEBUG,
- "No active sources found.");
-
- afr_sh_data_fail (frame, this);
- return 0;
- }
-
- source = sh->source;
- sh->block_size = this->ctx->page_size;
- sh->file_size = sh->buf[source].ia_size;
-
- if (FILE_HAS_HOLES (&sh->buf[source]))
- sh->file_has_holes = 1;
-
- /* detect changes not visible through pending flags -- JIC */
- for (i = 0; i < priv->child_count; i++) {
- if (i == source || sh->child_errno[i])
- continue;
-
- if (SIZE_DIFFERS (&sh->buf[i], &sh->buf[source]))
- sh->sources[i] = 0;
- }
-
- if (sh->background && sh->unwind) {
- sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno);
- sh->unwound = _gf_true;
- }
-
- afr_sh_mark_source_sinks (frame, this);
- if (sh->active_sinks == 0) {
- gf_log (this->name, GF_LOG_INFO,
- "no active sinks for performing self-heal on file %s",
- local->loc.path);
- afr_sh_data_finish (frame, this);
- return 0;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "self-healing file %s from subvolume %s to %d other",
- local->loc.path, priv->children[sh->source]->name,
- sh->active_sinks);
- afr_sh_data_trim_sinks (frame, this);
-
- return 0;
-}
+ int type = AFR_SELFHEAL_DATA_FULL;
+ int i = 0;
-static void
-afr_destroy_pending_matrix (int32_t **pending_matrix, int32_t child_count)
-{
- int i = 0;
- GF_ASSERT (child_count > 0);
- if (pending_matrix) {
- for (i = 0; i < child_count; i++) {
- if (pending_matrix[i])
- GF_FREE (pending_matrix[i]);
+ if (priv->data_self_heal_algorithm == NULL) {
+ type = AFR_SELFHEAL_DATA_FULL;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!healed_sinks[i] && i != source)
+ continue;
+ if (replies[i].poststat.ia_size) {
+ type = AFR_SELFHEAL_DATA_DIFF;
+ break;
+ }
}
- GF_FREE (pending_matrix);
+ } else if (strcmp (priv->data_self_heal_algorithm, "full") == 0) {
+ type = AFR_SELFHEAL_DATA_FULL;
+ } else if (strcmp (priv->data_self_heal_algorithm, "diff") == 0) {
+ type = AFR_SELFHEAL_DATA_DIFF;
}
+ return type;
}
-static int32_t**
-afr_create_pending_matrix (int32_t child_count)
-{
- gf_boolean_t cleanup = _gf_false;
- int32_t **pending_matrix = NULL;
- int i = 0;
-
- GF_ASSERT (child_count > 0);
-
- pending_matrix = GF_CALLOC (sizeof (*pending_matrix), child_count,
- gf_afr_mt_int32_t);
- if (NULL == pending_matrix)
- goto out;
- for (i = 0; i < child_count; i++) {
- pending_matrix[i] = GF_CALLOC (sizeof (**pending_matrix),
- child_count,
- gf_afr_mt_int32_t);
- if (NULL == pending_matrix[i]) {
- cleanup = _gf_true;
- goto out;
- }
- }
-out:
- if (_gf_true == cleanup) {
- afr_destroy_pending_matrix (pending_matrix, child_count);
- pending_matrix = NULL;
- }
- return pending_matrix;
-}
+static int
+afr_selfheal_data_do (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int source, unsigned char *healed_sinks,
+ struct afr_reply *replies)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ off_t off = 0;
+ size_t block = 128 * 1024;
+ int type = AFR_SELFHEAL_DATA_FULL;
+ int ret = -1;
+ call_frame_t *iter_frame = NULL;
+ char *sinks_str = NULL;
+ char *p = NULL;
+
+ priv = this->private;
+
+ sinks_str = alloca0 (priv->child_count * 8);
+ p = sinks_str;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!healed_sinks[i])
+ continue;
+ p += sprintf (p, "%d ", i);
+ }
+
+ gf_log (this->name, GF_LOG_INFO, "performing data selfheal on %s. "
+ "source=%d sinks=%s",
+ uuid_utoa (fd->inode->gfid), source, sinks_str);
+
+ type = afr_data_self_heal_type_get (priv, healed_sinks, source,
+ replies);
+
+ iter_frame = afr_copy_frame (frame);
+ if (!iter_frame)
+ return -ENOMEM;
+
+ for (off = 0; off < replies[source].poststat.ia_size; off += block) {
+ ret = afr_selfheal_data_block (iter_frame, this, fd, source,
+ healed_sinks, off, block, type,
+ replies);
+ if (ret < 0)
+ goto out;
+
+ AFR_STACK_RESET (iter_frame);
+ }
+
+ afr_selfheal_data_restore_time (frame, this, fd->inode, source,
+ healed_sinks, replies);
+
+ ret = afr_selfheal_data_fsync (frame, this, fd, healed_sinks);
-int
-afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,
- dict_t **xattr,
- afr_transaction_type txn_type)
-{
- afr_private_t *priv = NULL;
- int read_child = -1;
- int32_t **pending_matrix = NULL;
- int32_t *sources = NULL;
- int32_t *success_children = NULL;
- struct iatt *bufs = NULL;
- int32_t nsources = 0;
- int32_t prev_read_child = -1;
- int32_t config_read_child = -1;
-
- priv = this->private;
- bufs = local->cont.lookup.bufs;
- success_children = local->cont.lookup.success_children;
-
- pending_matrix = afr_create_pending_matrix (priv->child_count);
- if (NULL == pending_matrix)
- goto out;
-
- sources = local->cont.lookup.sources;
- memset (sources, 0, sizeof (*sources) * priv->child_count);
-
- nsources = afr_build_sources (this, xattr, bufs, pending_matrix,
- sources, success_children, txn_type);
- if (nsources < 0)
- goto out;
-
- prev_read_child = local->read_child_index;
- config_read_child = priv->read_child;
- read_child = afr_select_read_child_from_policy (success_children,
- priv->child_count,
- prev_read_child,
- config_read_child,
- sources);
out:
- afr_destroy_pending_matrix (pending_matrix, priv->child_count);
- gf_log (this->name, GF_LOG_DEBUG, "returning read_child: %d",
- read_child);
- return read_child;
-}
-
-int
-afr_sh_data_special_file_fix (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_self_heal_t *sh = NULL;
- afr_local_t *local = NULL;
- int i = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- for (i = 0; i < priv->child_count ; i++)
- if (1 == local->child_up[i])
- sh->success[i] = 1;
-
- afr_sh_data_erase_pending (frame, this);
-
- return 0;
+ if (iter_frame)
+ AFR_STACK_DESTROY (iter_frame);
+ return ret;
}
-int
-afr_sh_data_fstat_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iatt *buf)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int call_count = -1;
- int child_index = (long) cookie;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (op_ret != -1) {
- gf_log (this->name, GF_LOG_TRACE,
- "fstat of %s on %s succeeded",
- local->loc.path,
- priv->children[child_index]->name);
-
- sh->buf[child_index] = *buf;
- sh->success_children[sh->success_count] = child_index;
- sh->success_count++;
- }
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- /* Previous versions of glusterfs might have set
- * the pending data xattrs which need to be erased
- */
- if (IA_ISREG (buf->ia_type))
- afr_sh_data_fix (frame, this);
- else
- afr_sh_data_special_file_fix (frame, this);
- }
-
- return 0;
-}
-
-
-int
-afr_sh_data_fstat (call_frame_t *frame, xlator_t *this)
+static int
+__afr_selfheal_truncate_sinks (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, unsigned char *healed_sinks,
+ struct afr_reply *replies, uint64_t size)
{
- afr_self_heal_t *sh = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int i = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ unsigned char *larger_sinks = 0;
+ int i = 0;
- call_count = afr_up_children_count (local->child_up,
- priv->child_count);
+ local = frame->local;
+ priv = this->private;
- local->call_count = call_count;
+ larger_sinks = alloca0 (priv->child_count);
+ for (i = 0; i < priv->child_count; i++) {
+ if (healed_sinks[i] && replies[i].poststat.ia_size > size)
+ larger_sinks[i] = 1;
+ }
- afr_reset_children (sh->success_children, priv->child_count);
- sh->success_count = 0;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_sh_data_fstat_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fstat,
- sh->healing_fd);
+ AFR_ONLIST (larger_sinks, frame, attr_cbk, ftruncate, fd, size, NULL);
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
-
-void
-afr_sh_common_fxattrop_resp_handler (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xattr)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int child_index = (long) cookie;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (op_ret != -1) {
- gf_log (this->name, GF_LOG_TRACE,
- "fxattrop of %s on %s succeeded",
- local->loc.path,
- priv->children[child_index]->name);
-
- sh->xattr[child_index] = dict_ref (xattr);
- sh->success_children[sh->success_count] = child_index;
- sh->success_count++;
- }
- }
- UNLOCK (&frame->lock);
+ for (i = 0; i < priv->child_count; i++)
+ if (healed_sinks[i] && local->replies[i].op_ret == -1)
+ /* truncate() failed. Do NOT consider this server
+ as successfully healed. Mark it so.
+ */
+ healed_sinks[i] = 0;
+ return 0;
}
-int
-afr_post_sh_data_fxattrop_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xattr)
-{
- int call_count = -1;
- int ret = 0;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- afr_sh_common_fxattrop_resp_handler (frame, cookie, this, op_ret,
- op_errno, xattr);
-
- local = frame->local;
- sh = &local->self_heal;
- call_count = afr_frame_return (frame);
- if (call_count == 0) {
- (void) afr_build_sources (this, sh->xattr, NULL,
- sh->pending_matrix,
- sh->sources, sh->success_children,
- AFR_DATA_TRANSACTION);
- ret = afr_sh_inode_set_read_ctx (sh, this);
- if (ret)
- afr_sh_data_fail (frame, this);
- else
- afr_sh_set_timestamps (frame, this);
- }
-
- return 0;
-}
-
-int
-afr_sh_data_fxattrop_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xattr)
-{
- int call_count = -1;
-
- afr_sh_common_fxattrop_resp_handler (frame, cookie, this, op_ret,
- op_errno, xattr);
-
- call_count = afr_frame_return (frame);
- if (call_count == 0) {
- afr_sh_data_fstat (frame, this);
- }
-
- return 0;
+/*
+ * If by chance there are multiple sources with differing sizes, select
+ * the largest file as the source.
+ *
+ * This can only happen if data was directly modified in the backend.
+ */
+static int
+__afr_selfheal_data_finalize_source (xlator_t *this, unsigned char *sources,
+ unsigned char *sinks,
+ unsigned char *healed_sinks,
+ unsigned char *locked_on,
+ struct afr_reply *replies)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ uint64_t size = 0;
+ int source = -1;
+ int locked_count = 0;
+ int sources_count = 0;
+ int healed_sinks_count = 0;
+
+ priv = this->private;
+
+ locked_count = AFR_COUNT (locked_on, priv->child_count);
+ sources_count = AFR_COUNT (sources, priv->child_count);
+ healed_sinks_count = AFR_COUNT (healed_sinks, priv->child_count);
+
+ if (locked_count == healed_sinks_count || !sources_count) {
+ /* split brain */
+ return -EIO;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ if (size <= replies[i].poststat.ia_size) {
+ size = replies[i].poststat.ia_size;
+ source = i;
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ if (replies[i].poststat.ia_size < size) {
+ sources[i] = 0;
+ sinks[i] = 1;
+ }
+ }
+
+ return source;
}
-
-int
-afr_sh_data_fxattrop (call_frame_t *frame, xlator_t *this,
- afr_fxattrop_cbk_t fxattrop_cbk)
+/*
+ * __afr_selfheal_data_prepare:
+ *
+ * This function inspects the on-disk xattrs and determines which subvols
+ * are sources and sinks.
+ *
+ * The return value is the index of the subvolume to be used as the source
+ * for self-healing, or -1 if no healing is necessary/split brain.
+ */
+static int
+__afr_selfheal_data_prepare (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ unsigned char *locked_on, unsigned char *sources,
+ unsigned char *sinks, unsigned char *healed_sinks,
+ struct afr_reply *replies)
{
- afr_self_heal_t *sh = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- dict_t *xattr_req = NULL;
- int32_t *zero_pending = NULL;
- int call_count = 0;
- int i = 0;
- int ret = 0;
+ int ret = -1;
+ int source = -1;
+ afr_private_t *priv = NULL;
+ int i = 0;
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
+ priv = this->private;
- call_count = afr_up_children_count (local->child_up,
- priv->child_count);
+ ret = afr_selfheal_unlocked_discover (frame, fd->inode, fd->inode->gfid,
+ replies);
+ if (ret)
+ return ret;
- local->call_count = call_count;
+ ret = afr_selfheal_find_direction (frame, this, replies,
+ AFR_DATA_TRANSACTION,
+ locked_on, sources, sinks);
+ if (ret)
+ return ret;
- xattr_req = dict_new();
- if (!xattr_req) {
- ret = -1;
- goto out;
- }
+ source = __afr_selfheal_data_finalize_source (this, sources, sinks,
+ healed_sinks, locked_on,
+ replies);
+ if (source < 0)
+ return -EIO;
- for (i = 0; i < priv->child_count; i++) {
- zero_pending = GF_CALLOC (3, sizeof (*zero_pending),
- gf_afr_mt_int32_t);
- if (!zero_pending) {
- ret = -1;
- goto out;
- }
- ret = dict_set_dynptr (xattr_req, priv->pending_key[i],
- zero_pending,
- 3 * sizeof (*zero_pending));
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Unable to set dict value");
- goto out;
- } else {
- zero_pending = NULL;
- }
- }
+ for (i = 0; i < priv->child_count; i++)
+ /* Initialize the healed_sinks[] array optimistically to
+ the intersection of to-be-healed (i.e sinks[]) and
+ the list of servers which are up (i.e locked_on[]).
- afr_reset_xattr (sh->xattr, priv->child_count);
- afr_reset_children (sh->success_children, priv->child_count);
- sh->success_count = 0;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, fxattrop_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- sh->healing_fd, GF_XATTROP_ADD_ARRAY,
- xattr_req);
-
- if (!--call_count)
- break;
- }
- }
+ As we encounter failures in the healing process, we
+ will unmark the respective servers in the healed_sinks[]
+ array.
+ */
+ healed_sinks[i] = sinks[i] && locked_on[i];
-out:
- if (xattr_req)
- dict_unref (xattr_req);
-
- if (ret) {
- if (zero_pending)
- GF_FREE (zero_pending);
- sh->op_failed = 1;
- afr_sh_data_done (frame, this);
- }
-
- return 0;
-}
-
-int
-afr_sh_data_big_lock_success (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
-
- sh->data_lock_held = _gf_true;
- afr_sh_data_fxattrop (frame, this, afr_sh_data_fxattrop_cbk);
- return 0;
+ return source;
}
-int
-afr_sh_data_post_blocking_inodelk_cbk (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- sh = &local->self_heal;
-
- if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_ERROR, "Blocking data inodelks "
- "failed for %s. by %"PRIu64,
- local->loc.path, frame->root->lk_owner);
- sh->data_lock_failure_handler (frame, this);
- } else {
-
- gf_log (this->name, GF_LOG_DEBUG, "Blocking data inodelks "
- "done for %s by %"PRIu64". Proceding to self-heal",
- local->loc.path, frame->root->lk_owner);
- sh->data_lock_success_handler (frame, this);
- }
- return 0;
+static int
+__afr_selfheal_data (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ unsigned char *locked_on)
+{
+ afr_private_t *priv = NULL;
+ int ret = -1;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *data_lock = NULL;
+ unsigned char *healed_sinks = NULL;
+ struct afr_reply *locked_replies = NULL;
+ int source = -1;
+ gf_boolean_t compat = _gf_false;
+ unsigned char *compat_lock = NULL;
+
+ priv = this->private;
+
+ sources = alloca0 (priv->child_count);
+ sinks = alloca0 (priv->child_count);
+ healed_sinks = alloca0 (priv->child_count);
+ data_lock = alloca0 (priv->child_count);
+ compat_lock = alloca0 (priv->child_count);
+
+ locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count);
+
+ ret = afr_selfheal_inodelk (frame, this, fd->inode, this->name, 0, 0,
+ data_lock);
+ {
+ if (ret < 2) {
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+
+ ret = __afr_selfheal_data_prepare (frame, this, fd, data_lock,
+ sources, sinks, healed_sinks,
+ locked_replies);
+ if (ret < 0)
+ goto unlock;
+
+ source = ret;
+
+ ret = __afr_selfheal_truncate_sinks (frame, this, fd, healed_sinks,
+ locked_replies,
+ locked_replies[source].poststat.ia_size);
+ if (ret < 0)
+ goto unlock;
+
+ ret = 0;
+
+ /* Locking from (LLONG_MAX - 2) to (LLONG_MAX - 1) is for
+ compatibility with older self-heal clients which do not
+ hold a lock in the @priv->sh_domain domain to guard
+ against concurrent ongoing self-heals
+ */
+ afr_selfheal_inodelk (frame, this, fd->inode, this->name,
+ LLONG_MAX - 2, 1, compat_lock);
+ compat = _gf_true;
+ }
+unlock:
+ afr_selfheal_uninodelk (frame, this, fd->inode, this->name, 0, 0,
+ data_lock);
+ if (ret < 0)
+ goto out;
+
+ ret = afr_selfheal_data_do (frame, this, fd, source, healed_sinks,
+ locked_replies);
+ if (ret)
+ goto out;
+
+ ret = afr_selfheal_undo_pending (frame, this, fd->inode, sources, sinks,
+ healed_sinks, AFR_DATA_TRANSACTION,
+ locked_replies, data_lock);
+out:
+ if (compat)
+ afr_selfheal_uninodelk (frame, this, fd->inode, this->name,
+ LLONG_MAX - 2, 1, compat_lock);
+ return ret;
}
-int
-afr_sh_data_post_nonblocking_inodelk_cbk (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- sh = &local->self_heal;
-
- if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG, "Non Blocking data inodelks "
- "failed for %s. by %"PRIu64,
- local->loc.path, frame->root->lk_owner);
- int_lock->lock_cbk = afr_sh_data_post_blocking_inodelk_cbk;
- afr_blocking_lock (frame, this);
- } else {
-
- gf_log (this->name, GF_LOG_DEBUG, "Non Blocking data inodelks "
- "done for %s by %"PRIu64". Proceeding to self-heal",
- local->loc.path, frame->root->lk_owner);
- sh->data_lock_success_handler (frame, this);
- }
-
- return 0;
-}
-int
-afr_sh_data_lock_rec (call_frame_t *frame, xlator_t *this, off_t start, off_t len)
+static fd_t *
+afr_selfheal_data_open (xlator_t *this, inode_t *inode)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
+ loc_t loc = {0,};
+ int ret = 0;
+ fd_t *fd = NULL;
- int_lock->transaction_lk_type = AFR_SELFHEAL_LK;
- int_lock->selfheal_lk_type = AFR_DATA_SELF_HEAL_LK;
+ fd = fd_create (inode, 0);
+ if (!fd)
+ return NULL;
- afr_set_lock_number (frame, this);
+ loc.inode = inode_ref (inode);
+ uuid_copy (loc.gfid, inode->gfid);
- int_lock->lk_flock.l_start = start;
- int_lock->lk_flock.l_len = len;
- int_lock->lk_flock.l_type = F_WRLCK;
- int_lock->lock_cbk = afr_sh_data_post_nonblocking_inodelk_cbk;
+ ret = syncop_open (this, &loc, O_RDWR|O_LARGEFILE, fd);
+ if (ret) {
+ fd_unref (fd);
+ fd = NULL;
+ } else {
+ fd_bind (fd);
+ }
- afr_nonblocking_inodelk (frame, this);
+ loc_wipe (&loc);
- return 0;
+ return fd;
}
-int
-afr_post_sh_big_lock_success (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
-
- GF_ASSERT (sh->old_loop_frame);
- sh_loop_finish (sh->old_loop_frame, this);
- sh->old_loop_frame = NULL;
- sh->data_lock_held = _gf_true;
- afr_sh_data_fxattrop (frame, this, afr_post_sh_data_fxattrop_cbk);
- return 0;
-}
int
-afr_post_sh_big_lock_failure (call_frame_t *frame, xlator_t *this)
+afr_selfheal_data (call_frame_t *frame, xlator_t *this, inode_t *inode)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ unsigned char *locked_on = NULL;
+ int ret = 0;
+ fd_t *fd = NULL;
- local = frame->local;
- sh = &local->self_heal;
+ priv = this->private;
- GF_ASSERT (sh->old_loop_frame);
- sh_loop_finish (sh->old_loop_frame, this);
- sh->old_loop_frame = NULL;
- afr_sh_set_timestamps (frame, this);
- return 0;
-}
+ fd = afr_selfheal_data_open (this, inode);
+ if (!fd)
+ return -EIO;
+ locked_on = alloca0 (priv->child_count);
-int
-afr_sh_data_lock (call_frame_t *frame, xlator_t *this,
- off_t start, off_t len,
- afr_lock_cbk_t success_handler,
- afr_lock_cbk_t failure_handler)
-{
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
+ ret = afr_selfheal_tryinodelk (frame, this, inode, priv->sh_domain, 0, 0,
+ locked_on);
+ {
+ if (ret < 2) {
+ /* Either less than two subvols available, or another
+ selfheal (from another server) is in progress. Skip
+ for now in any case there isn't anything to do.
+ */
+ ret = -ENOTCONN;
+ goto unlock;
+ }
- local = frame->local;
- sh = &local->self_heal;
+ ret = __afr_selfheal_data (frame, this, fd, locked_on);
+ }
+unlock:
+ afr_selfheal_uninodelk (frame, this, inode, priv->sh_domain, 0, 0, locked_on);
- sh->data_lock_success_handler = success_handler;
- sh->data_lock_failure_handler = failure_handler;
- return afr_sh_data_lock_rec (frame, this, start, len);
-}
-
-int
-afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int child_index = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- child_index = (long) cookie;
-
- /* TODO: some of the open's might fail.
- In that case, modify cleanup fn to send flush on those
- fd's which are already open */
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "open of %s failed on child %s (%s)",
- local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- sh->op_failed = 1;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "open of %s succeeded on child %s",
- local->loc.path,
- priv->children[child_index]->name);
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if (sh->op_failed) {
- afr_sh_data_fail (frame, this);
- return 0;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "fd for %s opened, commencing sync",
- local->loc.path);
-
- afr_sh_data_lock (frame, this, 0, 0,
- afr_sh_data_big_lock_success,
- afr_sh_data_fail);
- }
-
- return 0;
-}
-
-
-int
-afr_sh_data_open (call_frame_t *frame, xlator_t *this)
-{
- int i = 0;
- int call_count = 0;
- fd_t *fd = NULL;
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- call_count = afr_up_children_count (local->child_up, priv->child_count);
- local->call_count = call_count;
-
- fd = fd_create (local->loc.inode, frame->root->pid);
- sh->healing_fd = fd;
-
- /* open sinks */
- for (i = 0; i < priv->child_count; i++) {
- if(!local->child_up[i])
- continue;
-
- STACK_WIND_COOKIE (frame, afr_sh_data_open_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->open,
- &local->loc,
- O_RDWR|O_LARGEFILE, fd, 0);
-
- if (!--call_count)
- break;
- }
-
- return 0;
-}
-
-
-int
-afr_self_heal_data (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = this->private;
-
- local = frame->local;
- sh = &local->self_heal;
-
- if (sh->do_data_self_heal &&
- afr_data_self_heal_enabled (priv->data_self_heal)) {
- afr_sh_data_open (frame, this);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "not doing data self heal on %s",
- local->loc.path);
- afr_sh_data_done (frame, this);
- }
+ if (fd)
+ fd_unref (fd);
- return 0;
+ return ret;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index f87de0c1f..9605d69f4 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -1,2211 +1,631 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#include <libgen.h>
-#include <unistd.h>
-#include <fnmatch.h>
-#include <sys/time.h>
-#include <stdlib.h>
-#include <signal.h>
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
#endif
-#include "glusterfs.h"
-#include "inode.h"
#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
+#include "afr-self-heal.h"
#include "byte-order.h"
-
#include "afr-transaction.h"
-#include "afr-self-heal.h"
-#include "afr-self-heal-common.h"
-
-#define AFR_INIT_SH_FRAME_VALS(_frame, _local, _sh, _sh_frame, _sh_local, _sh_sh)\
- do {\
- _local = _frame->local;\
- _sh = &_local->self_heal;\
- _sh_frame = _sh->sh_frame;\
- _sh_local = _sh_frame->local;\
- _sh_sh = &_sh_local->self_heal;\
- } while (0);
-
-int
-afr_sh_entry_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
-
- if (sh->healing_fd)
- fd_unref (sh->healing_fd);
- sh->healing_fd = NULL;
-
- sh->completion_cbk (frame, this);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_unlock (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- int_lock->lock_cbk = afr_sh_entry_done;
- afr_unlock (frame, this);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_finish (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- gf_log (this->name, GF_LOG_TRACE,
- "finishing entry selfheal of %s", local->loc.path);
-
- afr_sh_entry_unlock (frame, this);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_erase_pending_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xattr)
-{
- long i = 0;
- int call_count = 0;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_local_t *orig_local = NULL;
- call_frame_t *orig_frame = NULL;
- afr_private_t *priv = NULL;
- int32_t read_child = -1;
-
- local = frame->local;
- priv = this->private;
- sh = &local->self_heal;
- i = (long)cookie;
-
-
- afr_children_add_child (sh->fresh_children, i, priv->child_count);
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_INFO,
- "%s: failed to erase pending xattrs on %s (%s)",
- local->loc.path, priv->children[i]->name,
- strerror (op_errno));
- }
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if (sh->source == -1) {
- //this happens if the forced merge option is set
- read_child = sh->fresh_children[0];
- } else {
- read_child = sh->source;
- }
- afr_inode_set_read_ctx (this, sh->inode, read_child,
- sh->fresh_children);
- orig_frame = sh->orig_frame;
- orig_local = orig_frame->local;
-
- if (sh->source != -1) {
- orig_local->cont.lookup.buf.ia_nlink = sh->buf[sh->source].ia_nlink;
- }
-
- afr_sh_entry_finish (frame, this);
- }
-
- return 0;
-}
-
-
-int
-afr_sh_entry_erase_pending (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int i = 0;
- dict_t **erase_xattr = NULL;
- int need_unwind = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- if (sh->entries_skipped) {
- need_unwind = 1;
- sh->op_failed = _gf_true;
- goto out;
- }
- afr_sh_pending_to_delta (priv, sh->xattr, sh->delta_matrix, sh->success,
- priv->child_count, AFR_ENTRY_TRANSACTION);
-
- erase_xattr = GF_CALLOC (sizeof (*erase_xattr), priv->child_count,
- gf_afr_mt_dict_t);
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i]) {
- call_count++;
-
- erase_xattr[i] = get_new_dict();
- dict_ref (erase_xattr[i]);
- }
- }
-
- if (call_count == 0)
- need_unwind = 1;
-
- afr_sh_delta_to_xattr (priv, sh->delta_matrix, erase_xattr,
- priv->child_count, AFR_ENTRY_TRANSACTION);
-
- local->call_count = call_count;
- for (i = 0; i < priv->child_count; i++) {
- if (!erase_xattr[i])
- continue;
-
- gf_log (this->name, GF_LOG_TRACE,
- "erasing pending flags from %s on %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_entry_erase_pending_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->loc,
- GF_XATTROP_ADD_ARRAY, erase_xattr[i]);
- if (!--call_count)
- break;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (erase_xattr[i]) {
- dict_unref (erase_xattr[i]);
- }
- }
- GF_FREE (erase_xattr);
-
-out:
- if (need_unwind)
- afr_sh_entry_finish (frame, this);
-
- return 0;
-}
-
-
-
-static int
-next_active_source (call_frame_t *frame, xlator_t *this,
- int current_active_source)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int source = -1;
- int next_active_source = -1;
- int i = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- source = sh->source;
-
- if (source != -1) {
- if (current_active_source != source)
- next_active_source = source;
- goto out;
- }
-
- /*
- the next active sink becomes the source for the
- 'conservative decision' of merging all entries
- */
-
- for (i = 0; i < priv->child_count; i++) {
- if ((sh->sources[i] == 0)
- && (local->child_up[i] == 1)
- && (i > current_active_source)) {
-
- next_active_source = i;
- break;
- }
- }
-out:
- return next_active_source;
-}
-
static int
-next_active_sink (call_frame_t *frame, xlator_t *this,
- int current_active_sink)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int next_active_sink = -1;
- int i = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- /*
- the next active sink becomes the source for the
- 'conservative decision' of merging all entries
- */
-
- for (i = 0; i < priv->child_count; i++) {
- if ((sh->sources[i] == 0)
- && (local->child_up[i] == 1)
- && (i > current_active_sink)) {
-
- next_active_sink = i;
- break;
- }
- }
-
- return next_active_sink;
-}
-
-int
-afr_sh_entry_impunge_all (call_frame_t *frame, xlator_t *this);
-
-int
-afr_sh_entry_impunge_subvol (call_frame_t *frame, xlator_t *this,
- int active_src);
-
-int
-afr_sh_entry_expunge_all (call_frame_t *frame, xlator_t *this);
-
-int
-afr_sh_entry_expunge_subvol (call_frame_t *frame, xlator_t *this,
- int active_src);
-
-int
-afr_sh_entry_expunge_entry_done (call_frame_t *frame, xlator_t *this,
- int active_src, int32_t op_ret,
- int32_t op_errno)
-{
- int call_count = 0;
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- afr_sh_entry_expunge_subvol (frame, this, active_src);
-
- return 0;
-}
-
-int
-afr_sh_entry_expunge_parent_setattr_cbk (call_frame_t *expunge_frame,
- void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
-{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- call_frame_t *frame = NULL;
- int active_src = (long) cookie;
- afr_self_heal_t *sh = NULL;
- afr_local_t *local = NULL;
-
- priv = this->private;
- expunge_local = expunge_frame->local;
- expunge_sh = &expunge_local->self_heal;
- frame = expunge_sh->sh_frame;
- local = frame->local;
- sh = &local->self_heal;
-
- if (op_ret != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "setattr on parent directory of %s on subvolume %s failed: %s",
- expunge_local->loc.path,
- priv->children[active_src]->name, strerror (op_errno));
- }
-
- AFR_STACK_DESTROY (expunge_frame);
- sh->expunge_done (frame, this, active_src, op_ret, op_errno);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_expunge_remove_cbk (call_frame_t *expunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- int active_src = 0;
- int32_t valid = 0;
-
- priv = this->private;
- expunge_local = expunge_frame->local;
- expunge_sh = &expunge_local->self_heal;
-
- active_src = (long) cookie;
-
- if (op_ret == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "removed %s on %s",
- expunge_local->loc.path,
- priv->children[active_src]->name);
- } else {
- gf_log (this->name, GF_LOG_INFO,
- "removing %s on %s failed (%s)",
- expunge_local->loc.path,
- priv->children[active_src]->name,
- strerror (op_errno));
- }
-
- valid = GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
- afr_build_parent_loc (&expunge_sh->parent_loc, &expunge_local->loc);
-
- STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_parent_setattr_cbk,
- (void *) (long) active_src,
- priv->children[active_src],
- priv->children[active_src]->fops->setattr,
- &expunge_sh->parent_loc,
- &expunge_sh->parentbuf,
- valid);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_expunge_unlink (call_frame_t *expunge_frame, xlator_t *this,
- int active_src)
-{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
-
- priv = this->private;
- expunge_local = expunge_frame->local;
-
- gf_log (this->name, GF_LOG_TRACE,
- "expunging file %s on %s",
- expunge_local->loc.path, priv->children[active_src]->name);
-
- STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_remove_cbk,
- (void *) (long) active_src,
- priv->children[active_src],
- priv->children[active_src]->fops->unlink,
- &expunge_local->loc);
-
- return 0;
-}
-
-
-
-int
-afr_sh_entry_expunge_rmdir (call_frame_t *expunge_frame, xlator_t *this,
- int active_src)
-{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
-
- priv = this->private;
- expunge_local = expunge_frame->local;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "expunging directory %s on %s",
- expunge_local->loc.path, priv->children[active_src]->name);
-
- STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_remove_cbk,
- (void *) (long) active_src,
- priv->children[active_src],
- priv->children[active_src]->fops->rmdir,
- &expunge_local->loc, 1);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_expunge_remove (call_frame_t *expunge_frame, xlator_t *this,
- int active_src, struct iatt *buf)
-{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- call_frame_t *frame = NULL;
- int type = 0;
- afr_self_heal_t *sh = NULL;
- afr_local_t *local = NULL;
-
- priv = this->private;
- expunge_local = expunge_frame->local;
- expunge_sh = &expunge_local->self_heal;
- frame = expunge_sh->sh_frame;
- local = frame->local;
- sh = &local->self_heal;
-
- type = buf->ia_type;
-
- switch (type) {
- case IA_IFSOCK:
- case IA_IFREG:
- case IA_IFBLK:
- case IA_IFCHR:
- case IA_IFIFO:
- case IA_IFLNK:
- afr_sh_entry_expunge_unlink (expunge_frame, this, active_src);
- break;
- case IA_IFDIR:
- afr_sh_entry_expunge_rmdir (expunge_frame, this, active_src);
- break;
- default:
- gf_log (this->name, GF_LOG_ERROR,
- "%s has unknown file type on %s: 0%o",
- expunge_local->loc.path,
- priv->children[active_src]->name, type);
- goto out;
- break;
- }
-
- return 0;
-out:
- AFR_STACK_DESTROY (expunge_frame);
- sh->expunge_done (frame, this, active_src, -1, EINVAL);
+afr_selfheal_entry_delete (call_frame_t *frame, xlator_t *this, inode_t *dir,
+ const char *name, inode_t *inode, int child,
+ struct afr_reply *replies)
+{
+ afr_private_t *priv = NULL;
+ xlator_t *subvol = NULL;
+ int ret = 0;
+ loc_t loc = {0, };
+ char g[64];
+
+ priv = this->private;
+
+ subvol = priv->children[child];
+
+ loc.parent = inode_ref (dir);
+ uuid_copy (loc.pargfid, dir->gfid);
+ loc.name = name;
+ loc.inode = inode_ref (inode);
+
+ if (replies[child].valid && replies[child].op_ret == 0) {
+ switch (replies[child].poststat.ia_type) {
+ case IA_IFDIR:
+ gf_log (this->name, GF_LOG_WARNING,
+ "expunging dir %s/%s (%s) on %s",
+ uuid_utoa (dir->gfid), name,
+ uuid_utoa_r (replies[child].poststat.ia_gfid, g),
+ subvol->name);
+ ret = syncop_rmdir (subvol, &loc, 1);
+ break;
+ default:
+ gf_log (this->name, GF_LOG_WARNING,
+ "expunging file %s/%s (%s) on %s",
+ uuid_utoa (dir->gfid), name,
+ uuid_utoa_r (replies[child].poststat.ia_gfid, g),
+ subvol->name);
+ ret = syncop_unlink (subvol, &loc);
+ break;
+ }
+ }
+
+ loc_wipe (&loc);
+
+ return ret;
+}
+
+
+int
+afr_selfheal_recreate_entry (call_frame_t *frame, xlator_t *this, int dst,
+ int source, inode_t *dir, const char *name,
+ inode_t *inode, struct afr_reply *replies)
+{
+ int ret = 0;
+ loc_t loc = {0,};
+ loc_t srcloc = {0,};
+ afr_private_t *priv = NULL;
+ dict_t *xdata = NULL;
+ struct iatt *iatt = NULL;
+ char *linkname = NULL;
+ mode_t mode = 0;
+ struct iatt newent = {0,};
+
+ priv = this->private;
+
+ xdata = dict_new();
+ if (!xdata)
+ return -ENOMEM;
+
+ loc.parent = inode_ref (dir);
+ uuid_copy (loc.pargfid, dir->gfid);
+ loc.name = name;
+ loc.inode = inode_ref (inode);
+
+ ret = afr_selfheal_entry_delete (frame, this, dir, name, inode, dst,
+ replies);
+ if (ret)
+ goto out;
+
+ ret = dict_set_static_bin (xdata, "gfid-req",
+ replies[source].poststat.ia_gfid, 16);
+ if (ret)
+ goto out;
+
+ iatt = &replies[source].poststat;
+
+ srcloc.inode = inode_ref (inode);
+ uuid_copy (srcloc.gfid, iatt->ia_gfid);
+
+ mode = st_mode_from_ia (iatt->ia_prot, iatt->ia_type);
+
+ switch (iatt->ia_type) {
+ case IA_IFDIR:
+ ret = syncop_mkdir (priv->children[dst], &loc, mode, xdata, 0);
+ break;
+ case IA_IFLNK:
+ ret = syncop_lookup (priv->children[dst], &srcloc, 0, 0, 0, 0);
+ if (ret == 0) {
+ ret = syncop_link (priv->children[dst], &srcloc, &loc);
+ } else {
+ ret = syncop_readlink (priv->children[source], &srcloc,
+ &linkname, 4096);
+ if (ret <= 0)
+ goto out;
+ ret = syncop_symlink (priv->children[dst], &loc, linkname,
+ xdata, NULL);
+ }
+ break;
+ default:
+ ret = dict_set_int32 (xdata, GLUSTERFS_INTERNAL_FOP_KEY, 1);
+ if (ret)
+ goto out;
+ ret = syncop_mknod (priv->children[dst], &loc, mode,
+ iatt->ia_rdev, xdata, &newent);
+ if (ret == 0 && iatt->ia_size && !newent.ia_size) {
+ /* New entry created. Mark @dst pending on all sources */
+ ret = 1;
+ }
+ break;
+ }
- return 0;
-}
-
-
-int
-afr_sh_entry_expunge_lookup_cbk (call_frame_t *expunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf, dict_t *x,
- struct iatt *postparent)
-{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- call_frame_t *frame = NULL;
- int active_src = 0;
- afr_self_heal_t *sh = NULL;
- afr_local_t *local = NULL;
-
- priv = this->private;
- expunge_local = expunge_frame->local;
- expunge_sh = &expunge_local->self_heal;
- frame = expunge_sh->sh_frame;
- active_src = (long) cookie;
- local = frame->local;
- sh = &local->self_heal;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "lookup of %s on %s failed (%s)",
- expunge_local->loc.path,
- priv->children[active_src]->name,
- strerror (op_errno));
- goto out;
- }
-
- afr_sh_entry_expunge_remove (expunge_frame, this, active_src, buf);
-
- return 0;
out:
- AFR_STACK_DESTROY (expunge_frame);
- sh->expunge_done (frame, this, active_src, op_ret, op_errno);
-
- return 0;
+ if (xdata)
+ dict_unref (xdata);
+ loc_wipe (&loc);
+ loc_wipe (&srcloc);
+ return ret;
}
-int
-afr_sh_entry_expunge_purge (call_frame_t *expunge_frame, xlator_t *this,
- int active_src)
+static int
+afr_selfheal_newentry_mark (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ int source, struct afr_reply *replies,
+ unsigned char *sources, unsigned char *newentry)
{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
+ int ret = 0;
+ int i = 0;
+ afr_private_t *priv = NULL;
+ dict_t *xattr = NULL;
+ int **changelog = NULL;
+ int idx = 0;
- priv = this->private;
- expunge_local = expunge_frame->local;
+ priv = this->private;
- gf_log (this->name, GF_LOG_TRACE,
- "looking up %s on %s",
- expunge_local->loc.path, priv->children[active_src]->name);
+ idx = afr_index_for_transaction_type (AFR_DATA_TRANSACTION);
- STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_lookup_cbk,
- (void *) (long) active_src,
- priv->children[active_src],
- priv->children[active_src]->fops->lookup,
- &expunge_local->loc, 0);
+ uuid_copy (inode->gfid, replies[source].poststat.ia_gfid);
- return 0;
-}
-
-int
-afr_sh_entry_expunge_entry_cbk (call_frame_t *expunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf, dict_t *x,
- struct iatt *postparent)
-{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- int source = 0;
- call_frame_t *frame = NULL;
- int active_src = 0;
- int need_expunge = 0;
- afr_self_heal_t *sh = NULL;
- afr_local_t *local = NULL;
-
- priv = this->private;
- expunge_local = expunge_frame->local;
- expunge_sh = &expunge_local->self_heal;
- frame = expunge_sh->sh_frame;
- active_src = expunge_sh->active_source;
- source = (long) cookie;
- local = frame->local;
- sh = &local->self_heal;
-
- if (op_ret == -1 && op_errno == ENOENT)
- need_expunge = 1;
- else if (op_ret == -1)
- goto out;
-
- if (!uuid_is_null (expunge_sh->entrybuf.ia_gfid) &&
- !uuid_is_null (buf->ia_gfid) &&
- (uuid_compare (expunge_sh->entrybuf.ia_gfid, buf->ia_gfid) != 0)) {
- char uuidbuf1[64];
- char uuidbuf2[64];
- gf_log (this->name, GF_LOG_DEBUG,
- "entry %s found on %s with mismatching gfid (%s/%s)",
- expunge_local->loc.path,
- priv->children[source]->name,
- uuid_utoa_r (expunge_sh->entrybuf.ia_gfid, uuidbuf1),
- uuid_utoa_r (buf->ia_gfid, uuidbuf2));
- need_expunge = 1;
- }
-
- if (need_expunge) {
- gf_log (this->name, GF_LOG_INFO,
- "missing entry %s on %s",
- expunge_local->loc.path,
- priv->children[source]->name);
-
- if (postparent)
- expunge_sh->parentbuf = *postparent;
-
- afr_sh_entry_expunge_purge (expunge_frame, this, active_src);
-
- return 0;
- }
-
-out:
- if (op_ret == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "%s exists under %s",
- expunge_local->loc.path,
- priv->children[source]->name);
- } else {
- gf_log (this->name, GF_LOG_INFO,
- "looking up %s under %s failed (%s)",
- expunge_local->loc.path,
- priv->children[source]->name,
- strerror (op_errno));
- }
-
- AFR_STACK_DESTROY (expunge_frame);
- sh->expunge_done (frame, this, active_src, op_ret, op_errno);
-
- return 0;
-}
+ changelog = afr_matrix_create (priv->child_count, AFR_NUM_CHANGE_LOGS);
+ xattr = dict_new();
+ if (!xattr)
+ return -ENOMEM;
-int
-afr_sh_entry_expunge_entry (call_frame_t *frame, xlator_t *this,
- gf_dirent_t *entry)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int ret = -1;
- call_frame_t *expunge_frame = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- int active_src = 0;
- int source = 0;
- int op_errno = 0;
- char *name = NULL;
- int op_ret = -1;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- active_src = sh->active_source;
- source = sh->source;
- sh->expunge_done = afr_sh_entry_expunge_entry_done;
-
- name = entry->d_name;
-
- if ((strcmp (name, ".") == 0)
- || (strcmp (name, "..") == 0)
- || ((strcmp (local->loc.path, "/") == 0)
- && (strcmp (name, GF_REPLICATE_TRASH_DIR) == 0))) {
-
- gf_log (this->name, GF_LOG_TRACE,
- "skipping inspection of %s under %s",
- name, local->loc.path);
- op_ret = 0;
- goto out;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "inspecting existence of %s under %s",
- name, local->loc.path);
-
- expunge_frame = copy_frame (frame);
- if (!expunge_frame) {
- op_errno = ENOMEM;
- goto out;
- }
-
- ALLOC_OR_GOTO (expunge_local, afr_local_t, out);
-
- expunge_frame->local = expunge_local;
- expunge_sh = &expunge_local->self_heal;
- expunge_sh->sh_frame = frame;
- expunge_sh->active_source = active_src;
- expunge_sh->entrybuf = entry->d_stat;
-
- ret = afr_build_child_loc (this, &expunge_local->loc, &local->loc,
- name, entry->d_stat.ia_gfid);
- if (ret != 0) {
- op_errno = EINVAL;
- goto out;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "looking up %s on %s", expunge_local->loc.path,
- priv->children[source]->name);
-
- STACK_WIND_COOKIE (expunge_frame,
- afr_sh_entry_expunge_entry_cbk,
- (void *) (long) source,
- priv->children[source],
- priv->children[source]->fops->lookup,
- &expunge_local->loc, 0);
-
- ret = 0;
-out:
- if (ret == -1)
- sh->expunge_done (frame, this, active_src, op_ret, op_errno);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!newentry[i])
+ continue;
+ changelog[i][idx] = hton32(1);
+ }
- return 0;
-}
+ afr_set_pending_dict (priv, xattr, changelog);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ afr_selfheal_post_op (frame, this, inode, i, xattr);
+ }
-int
-afr_sh_entry_expunge_readdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- gf_dirent_t *entry = NULL;
- off_t last_offset = 0;
- int active_src = 0;
- int entry_count = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- active_src = sh->active_source;
-
- if (op_ret <= 0) {
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "readdir of %s on subvolume %s failed (%s)",
- local->loc.path,
- priv->children[active_src]->name,
- strerror (op_errno));
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "readdir of %s on subvolume %s complete",
- local->loc.path,
- priv->children[active_src]->name);
- }
-
- afr_sh_entry_expunge_all (frame, this);
- return 0;
- }
-
- list_for_each_entry (entry, &entries->list, list) {
- last_offset = entry->d_off;
- entry_count++;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "readdir'ed %d entries from %s",
- entry_count, priv->children[active_src]->name);
-
- sh->offset = last_offset;
- local->call_count = entry_count;
-
- list_for_each_entry (entry, &entries->list, list) {
- afr_sh_entry_expunge_entry (frame, this, entry);
- }
-
- return 0;
+ dict_unref (xattr);
+ return ret;
}
-int
-afr_sh_entry_expunge_subvol (call_frame_t *frame, xlator_t *this,
- int active_src)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
- STACK_WIND (frame, afr_sh_entry_expunge_readdir_cbk,
- priv->children[active_src],
- priv->children[active_src]->fops->readdirp,
- sh->healing_fd, sh->block_size, sh->offset);
-
- return 0;
+static int
+__afr_selfheal_heal_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ char *name, inode_t *inode, int source,
+ unsigned char *sources, unsigned char *healed_sinks,
+ unsigned char *locked_on, struct afr_reply *replies)
+{
+ int ret = 0;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ unsigned char *newentry = NULL;
+
+ priv = this->private;
+ newentry = alloca0 (priv->child_count);
+
+ if (!replies[source].valid)
+ return -EIO;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!healed_sinks[i])
+ continue;
+ if (replies[source].op_ret == -1 &&
+ replies[source].op_errno == ENOENT) {
+ ret = afr_selfheal_entry_delete (frame, this, fd->inode,
+ name, inode, i, replies);
+ } else {
+ if (!uuid_compare (replies[i].poststat.ia_gfid,
+ replies[source].poststat.ia_gfid))
+ continue;
+
+ ret = afr_selfheal_recreate_entry (frame, this, i, source,
+ fd->inode, name, inode,
+ replies);
+ if (ret > 0) {
+ newentry[i] = 1;
+ ret = 0;
+ }
+ }
+ if (ret < 0)
+ break;
+ }
+
+ if (AFR_COUNT (newentry, priv->child_count))
+ afr_selfheal_newentry_mark (frame, this, inode, source, replies,
+ sources, newentry);
+ return ret;
}
-int
-afr_sh_entry_expunge_all (call_frame_t *frame, xlator_t *this)
+static int
+__afr_selfheal_merge_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ char *name, inode_t *inode, unsigned char *sources,
+ unsigned char *healed_sinks, unsigned char *locked_on,
+ struct afr_reply *replies)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int active_src = -1;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- sh->offset = 0;
+ int ret = 0;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int source = -1;
- if (sh->source == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no active sources for %s to expunge entries",
- local->loc.path);
- goto out;
- }
+ priv = this->private;
- active_src = next_active_sink (frame, this, sh->active_source);
- sh->active_source = active_src;
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies[i].valid && replies[i].op_ret == 0) {
+ source = i;
+ break;
+ }
+ }
- if (sh->op_failed) {
- goto out;
- }
+ if (source == -1) {
+ /* entry got deleted in the mean time? */
+ return 0;
+ }
- if (active_src == -1) {
- /* completed creating missing files on all subvolumes */
- goto out;
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (i == source || !healed_sinks[i])
+ continue;
- gf_log (this->name, GF_LOG_TRACE,
- "expunging entries of %s on %s to other sinks",
- local->loc.path, priv->children[active_src]->name);
+ if (replies[i].op_errno != ENOENT)
+ continue;
- afr_sh_entry_expunge_subvol (frame, this, active_src);
-
- return 0;
-out:
- afr_sh_entry_impunge_all (frame, this);
- return 0;
+ ret = afr_selfheal_recreate_entry (frame, this, i, source,
+ fd->inode, name, inode,
+ replies);
+ }
+ return ret;
}
-int
-afr_sh_entry_impunge_entry_done (call_frame_t *frame, xlator_t *this,
- int active_src, int32_t op_ret,
- int32_t op_errno)
-{
- int call_count = 0;
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- afr_sh_entry_impunge_subvol (frame, this, active_src);
-
- return 0;
-}
-
-void
-afr_sh_entry_call_impunge_done (call_frame_t *impunge_frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+static int
+__afr_selfheal_entry_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ char *name, inode_t *inode, int source,
+ unsigned char *sources, unsigned char *healed_sinks,
+ unsigned char *locked_on, struct afr_reply *replies)
{
- afr_local_t *impunge_local = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- call_frame_t *frame = NULL;
- int32_t impunge_ret_child = 0;
-
- AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
- frame, local, sh);
-
- impunge_ret_child = impunge_sh->impunge_ret_child;
- AFR_STACK_DESTROY (impunge_frame);
- sh->impunge_done (frame, this, impunge_ret_child, op_ret,
- op_errno);
-}
+ int ret = -1;
-int
-afr_sh_entry_impunge_setattr_cbk (call_frame_t *impunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
-{
- int call_count = 0;
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- int child_index = 0;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- child_index = (long) cookie;
-
- if (op_ret == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "setattr done for %s on %s",
- impunge_local->loc.path,
- priv->children[child_index]->name);
- } else {
- gf_log (this->name, GF_LOG_INFO,
- "setattr (%s) on %s failed (%s)",
- impunge_local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- }
-
- LOCK (&impunge_frame->lock);
- {
- call_count = --impunge_local->call_count;
- }
- UNLOCK (&impunge_frame->lock);
-
- if (call_count == 0)
- afr_sh_entry_call_impunge_done (impunge_frame, this,
- op_ret, op_errno);
-
- return 0;
+ if (source < 0)
+ ret = __afr_selfheal_merge_dirent (frame, this, fd, name, inode,
+ sources, healed_sinks,
+ locked_on, replies);
+ else
+ ret = __afr_selfheal_heal_dirent (frame, this, fd, name, inode,
+ source, sources, healed_sinks,
+ locked_on, replies);
+ return ret;
}
-int
-afr_sh_entry_impunge_xattrop_cbk (call_frame_t *impunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- dict_t *xattr)
-{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- int child_index = 0;
- struct iatt stbuf = {0};
- int32_t valid = 0;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
-
- child_index = (long) cookie;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_INFO,
- "%s: failed to perform xattrop on %s (%s)",
- impunge_local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "setting ownership of %s on %s to %d/%d",
- impunge_local->loc.path,
- priv->children[child_index]->name,
- impunge_local->cont.lookup.buf.ia_uid,
- impunge_local->cont.lookup.buf.ia_gid);
-
- stbuf.ia_atime = impunge_local->cont.lookup.buf.ia_atime;
- stbuf.ia_atime_nsec = impunge_local->cont.lookup.buf.ia_atime_nsec;
- stbuf.ia_mtime = impunge_local->cont.lookup.buf.ia_mtime;
- stbuf.ia_mtime_nsec = impunge_local->cont.lookup.buf.ia_mtime_nsec;
-
- stbuf.ia_uid = impunge_local->cont.lookup.buf.ia_uid;
- stbuf.ia_gid = impunge_local->cont.lookup.buf.ia_gid;
-
- valid = GF_SET_ATTR_UID | GF_SET_ATTR_GID |
- GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
-
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_setattr_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->setattr,
- &impunge_local->loc,
- &stbuf, valid);
- return 0;
+static int
+afr_selfheal_entry_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int source, unsigned char *sources,
+ unsigned char *healed_sinks, char *name)
+{
+ afr_private_t *priv = NULL;
+ int ret = 0;
+ unsigned char *locked_on = NULL;
+ struct afr_reply *replies = NULL;
+ inode_t *inode = NULL;
+
+ priv = this->private;
+
+ locked_on = alloca0 (priv->child_count);
+
+ replies = alloca0 (priv->child_count * sizeof(*replies));
+
+ ret = afr_selfheal_entrylk (frame, this, fd->inode, this->name,
+ name, locked_on);
+ {
+ if (ret < 2) {
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+
+ inode = afr_selfheal_unlocked_lookup_on (frame, fd->inode, name,
+ replies, locked_on);
+ if (!inode) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+
+ ret = __afr_selfheal_entry_dirent (frame, this, fd, name, inode,
+ source, sources, healed_sinks,
+ locked_on, replies);
+ }
+unlock:
+ afr_selfheal_unentrylk (frame, this, fd->inode, this->name, name,
+ locked_on);
+ if (inode)
+ inode_unref (inode);
+ return ret;
}
-int
-afr_sh_entry_impunge_parent_setattr_cbk (call_frame_t *setattr_frame,
- void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
-{
- loc_t *parent_loc = cookie;
-
- if (op_ret != 0) {
- gf_log (this->name, GF_LOG_INFO,
- "setattr on parent directory (%s) failed: %s",
- parent_loc->path, strerror (op_errno));
- }
-
- loc_wipe (parent_loc);
-
- GF_FREE (parent_loc);
-
- AFR_STACK_DESTROY (setattr_frame);
- return 0;
-}
-
-int
-afr_sh_entry_impunge_newfile_cbk (call_frame_t *impunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent,
- struct iatt *postparent)
+static int
+afr_selfheal_entry_do_subvol (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int child, int source, unsigned char *sources,
+ unsigned char *healed_sinks)
{
- int call_count = 0;
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int active_src = 0;
- int child_index = 0;
- int32_t *pending_array = NULL;
- dict_t *xattr = NULL;
- int ret = 0;
- int idx = 0;
- call_frame_t *setattr_frame = NULL;
- int32_t valid = 0;
- loc_t *parent_loc = NULL;
- struct iatt parentbuf = {0,};
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- active_src = impunge_sh->active_source;
-
- child_index = (long) cookie;
-
- if (op_ret == -1) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "creation of %s on %s failed (%s)",
- impunge_local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- goto out;
- }
-
- inode->ia_type = stbuf->ia_type;
-
- xattr = dict_new ();
- if (!xattr) {
- ret = -1;
- goto out;
- }
-
- pending_array = (int32_t*) GF_CALLOC (3, sizeof (*pending_array),
- gf_afr_mt_int32_t);
-
- if (!pending_array) {
- ret = -1;
- goto out;
- }
-
- /* Pending data xattrs shouldn't be set for special files
- */
- idx = afr_index_for_transaction_type (AFR_METADATA_TRANSACTION);
- pending_array[idx] = hton32 (1);
- if (IA_ISDIR (stbuf->ia_type))
- idx = afr_index_for_transaction_type (AFR_ENTRY_TRANSACTION);
- else if (IA_ISREG (stbuf->ia_type))
- idx = afr_index_for_transaction_type (AFR_DATA_TRANSACTION);
- else
- goto cont;
- pending_array[idx] = hton32 (1);
-
-cont:
- ret = dict_set_dynptr (xattr, priv->pending_key[child_index],
- pending_array,
- 3 * sizeof (*pending_array));
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Unable to set dict value.");
- } else {
- pending_array = NULL;
- }
- valid = GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
- parentbuf = impunge_sh->parentbuf;
- setattr_frame = copy_frame (impunge_frame);
-
- parent_loc = GF_CALLOC (1, sizeof (*parent_loc),
- gf_afr_mt_loc_t);
- if (!parent_loc) {
- ret = -1;
- goto out;
- }
- afr_build_parent_loc (parent_loc, &impunge_local->loc);
-
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_xattrop_cbk,
- (void *) (long) child_index,
- priv->children[active_src],
- priv->children[active_src]->fops->xattrop,
- &impunge_local->loc, GF_XATTROP_ADD_ARRAY, xattr);
-
- STACK_WIND_COOKIE (setattr_frame, afr_sh_entry_impunge_parent_setattr_cbk,
- (void *) (long) parent_loc,
- priv->children[child_index],
- priv->children[child_index]->fops->setattr,
- parent_loc, &parentbuf, valid);
+ int ret = 0;
+ gf_dirent_t entries;
+ gf_dirent_t *entry = NULL;
+ off_t offset = 0;
+ call_frame_t *iter_frame = NULL;
+ xlator_t *subvol = NULL;
+ afr_private_t *priv = NULL;
-out:
- if (xattr)
- dict_unref (xattr);
-
- if (ret) {
- if (pending_array)
- GF_FREE (pending_array);
+ priv = this->private;
+ subvol = priv->children[child];
- LOCK (&impunge_frame->lock);
- {
- call_count = --impunge_local->call_count;
- }
- UNLOCK (&impunge_frame->lock);
+ INIT_LIST_HEAD (&entries.list);
- if (call_count == 0)
- afr_sh_entry_call_impunge_done (impunge_frame, this,
- -1, op_errno);
- }
-
- return 0;
-}
+ iter_frame = afr_copy_frame (frame);
+ if (!iter_frame)
+ return -ENOMEM;
+ while ((ret = syncop_readdir (subvol, fd, 131072, offset, &entries))) {
+ if (ret > 0)
+ ret = 0;
+ list_for_each_entry (entry, &entries.list, list) {
+ offset = entry->d_off;
-int
-afr_sh_entry_impunge_mknod (call_frame_t *impunge_frame, xlator_t *this,
- int child_index, struct iatt *stbuf)
-{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- dict_t *dict = NULL;
- int ret = 0;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "creating missing file %s on %s",
- impunge_local->loc.path,
- priv->children[child_index]->name);
-
- dict = dict_new ();
- if (!dict)
- gf_log (this->name, GF_LOG_ERROR, "Out of memory");
-
- GF_ASSERT (!uuid_is_null (stbuf->ia_gfid));
- ret = afr_set_dict_gfid (dict, stbuf->ia_gfid);
- if (ret)
- gf_log (this->name, GF_LOG_INFO, "%s: gfid set failed",
- impunge_local->loc.path);
-
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_newfile_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->mknod,
- &impunge_local->loc,
- st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type),
- makedev (ia_major (stbuf->ia_rdev),
- ia_minor (stbuf->ia_rdev)), dict);
-
- if (dict)
- dict_unref (dict);
-
- return 0;
-}
-
-
-
-int
-afr_sh_entry_impunge_mkdir (call_frame_t *impunge_frame, xlator_t *this,
- int child_index, struct iatt *stbuf)
-{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- dict_t *dict = NULL;
-
- int ret = 0;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
-
- dict = dict_new ();
- if (!dict) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- return 0;
- }
-
- GF_ASSERT (!uuid_is_null (stbuf->ia_gfid));
- ret = afr_set_dict_gfid (dict, stbuf->ia_gfid);
- if (ret)
- gf_log (this->name, GF_LOG_INFO, "%s: gfid set failed",
- impunge_local->loc.path);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "creating missing directory %s on %s",
- impunge_local->loc.path,
- priv->children[child_index]->name);
-
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_newfile_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->mkdir,
- &impunge_local->loc,
- st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type),
- dict);
-
- if (dict)
- dict_unref (dict);
-
- return 0;
-}
-
+ if (!strcmp (entry->d_name, ".") ||
+ !strcmp (entry->d_name, ".."))
+ continue;
-int
-afr_sh_entry_impunge_symlink (call_frame_t *impunge_frame, xlator_t *this,
- int child_index, const char *linkname)
-{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- dict_t *dict = NULL;
- struct iatt *buf = NULL;
- int ret = 0;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
-
- buf = &impunge_local->cont.symlink.buf;
-
- dict = dict_new ();
- if (!dict) {
- afr_sh_entry_call_impunge_done (impunge_frame, this,
- -1, ENOMEM);
- goto out;
- }
-
- GF_ASSERT (!uuid_is_null (buf->ia_gfid));
- ret = afr_set_dict_gfid (dict, buf->ia_gfid);
- if (ret)
- gf_log (this->name, GF_LOG_INFO,
- "%s: dict set gfid failed",
- impunge_local->loc.path);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "creating missing symlink %s -> %s on %s",
- impunge_local->loc.path, linkname,
- priv->children[child_index]->name);
-
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_newfile_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->symlink,
- linkname, &impunge_local->loc, dict);
-
- if (dict)
- dict_unref (dict);
-out:
- return 0;
-}
+ if (__is_root_gfid (fd->inode->gfid) &&
+ !strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR))
+ continue;
+ ret = afr_selfheal_entry_dirent (iter_frame, this, fd,
+ source, sources,
+ healed_sinks,
+ entry->d_name);
+ AFR_STACK_RESET (iter_frame);
-int
-afr_sh_entry_impunge_symlink_unlink_cbk (call_frame_t *impunge_frame,
- void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int child_index = -1;
- int call_count = -1;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
-
- child_index = (long) cookie;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_INFO,
- "unlink of %s on %s failed (%s)",
- impunge_local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- goto out;
- }
-
- afr_sh_entry_impunge_symlink (impunge_frame, this, child_index,
- impunge_sh->linkname);
-
- return 0;
-out:
- LOCK (&impunge_frame->lock);
- {
- call_count = --impunge_local->call_count;
- }
- UNLOCK (&impunge_frame->lock);
+ if (ret)
+ break;
+ }
- if (call_count == 0)
- afr_sh_entry_call_impunge_done (impunge_frame, this,
- op_ret, op_errno);
+ gf_dirent_free (&entries);
+ if (ret)
+ break;
+ }
- return 0;
+ AFR_STACK_DESTROY (iter_frame);
+ return ret;
}
-
-int
-afr_sh_entry_impunge_symlink_unlink (call_frame_t *impunge_frame, xlator_t *this,
- int child_index)
+static int
+afr_selfheal_entry_do (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int source, unsigned char *sources,
+ unsigned char *healed_sinks,
+ struct afr_reply *locked_replies)
{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
+ int i = 0;
+ afr_private_t *priv = NULL;
+ int ret = 0;
- gf_log (this->name, GF_LOG_DEBUG,
- "unlinking symlink %s with wrong target on %s",
- impunge_local->loc.path,
- priv->children[child_index]->name);
+ priv = this->private;
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_symlink_unlink_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->unlink,
- &impunge_local->loc);
+ gf_log (this->name, GF_LOG_INFO, "performing entry selfheal on %s",
+ uuid_utoa (fd->inode->gfid));
- return 0;
+ for (i = 0; i < priv->child_count; i++) {
+ if (i != source && !healed_sinks[i])
+ continue;
+ ret = afr_selfheal_entry_do_subvol (frame, this, fd, i, source,
+ sources, healed_sinks);
+ if (ret)
+ break;
+ }
+ return ret;
}
-int
-afr_sh_entry_impunge_readlink_sink_cbk (call_frame_t *impunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- const char *linkname, struct iatt *sbuf)
+static int
+__afr_selfheal_entry_finalize_source (xlator_t *this, unsigned char *sources,
+ unsigned char *sinks,
+ unsigned char *locked_on,
+ struct afr_reply *replies)
{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int child_index = -1;
- int call_count = -1;
- int active_src = -1;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- active_src = impunge_sh->active_source;
-
- child_index = (long) cookie;
-
- if ((op_ret == -1) && (op_errno != ENOENT)) {
- gf_log (this->name, GF_LOG_INFO,
- "readlink of %s on %s failed (%s)",
- impunge_local->loc.path,
- priv->children[active_src]->name,
- strerror (op_errno));
- goto out;
- }
-
- /* symlink doesn't exist on the sink */
-
- if ((op_ret == -1) && (op_errno == ENOENT)) {
- afr_sh_entry_impunge_symlink (impunge_frame, this,
- child_index, impunge_sh->linkname);
- return 0;
- }
-
-
- /* symlink exists on the sink, so check if targets match */
-
- if (strcmp (linkname, impunge_sh->linkname) == 0) {
- /* targets match, nothing to do */
-
- goto out;
- } else {
- /*
- * Hah! Sneaky wolf in sheep's clothing!
- */
- afr_sh_entry_impunge_symlink_unlink (impunge_frame, this,
- child_index);
- return 0;
- }
-
-out:
- LOCK (&impunge_frame->lock);
- {
- call_count = --impunge_local->call_count;
- }
- UNLOCK (&impunge_frame->lock);
+ int i = 0;
+ afr_private_t *priv = NULL;
+ int source = -1;
+ int locked_count = 0;
+ int sources_count = 0;
+ int sinks_count = 0;
- if (call_count == 0)
- afr_sh_entry_call_impunge_done (impunge_frame, this,
- op_ret, op_errno);
+ priv = this->private;
- return 0;
-}
-
-
-int
-afr_sh_entry_impunge_readlink_sink (call_frame_t *impunge_frame, xlator_t *this,
- int child_index)
-{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
+ locked_count = AFR_COUNT (locked_on, priv->child_count);
+ sources_count = AFR_COUNT (sources, priv->child_count);
+ sinks_count = AFR_COUNT (sinks, priv->child_count);
- priv = this->private;
- impunge_local = impunge_frame->local;
+ if (locked_count == sinks_count || !sources_count) {
+ return -1;
+ }
- gf_log (this->name, GF_LOG_DEBUG,
- "checking symlink target of %s on %s",
- impunge_local->loc.path, priv->children[child_index]->name);
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i]) {
+ source = i;
+ break;
+ }
+ }
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_readlink_sink_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->readlink,
- &impunge_local->loc, 4096);
-
- return 0;
+ return source;
}
-int
-afr_sh_entry_impunge_readlink_cbk (call_frame_t *impunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- const char *linkname, struct iatt *sbuf)
+static int
+__afr_selfheal_entry_prepare (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ unsigned char *locked_on, unsigned char *sources,
+ unsigned char *sinks, unsigned char *healed_sinks,
+ struct afr_reply *replies, int *source_p)
{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int child_index = -1;
- int call_count = -1;
- int active_src = -1;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- active_src = impunge_sh->active_source;
-
- child_index = (long) cookie;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_INFO,
- "readlink of %s on %s failed (%s)",
- impunge_local->loc.path,
- priv->children[active_src]->name,
- strerror (op_errno));
- goto out;
- }
-
- impunge_sh->linkname = gf_strdup (linkname);
- afr_sh_entry_impunge_readlink_sink (impunge_frame, this, child_index);
-
- return 0;
+ int ret = -1;
+ int source = -1;
+ afr_private_t *priv = NULL;
+ int i = 0;
-out:
- LOCK (&impunge_frame->lock);
- {
- call_count = --impunge_local->call_count;
- }
- UNLOCK (&impunge_frame->lock);
-
- if (call_count == 0)
- afr_sh_entry_call_impunge_done (impunge_frame, this,
- op_ret, op_errno);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_impunge_readlink (call_frame_t *impunge_frame, xlator_t *this,
- int child_index, struct iatt *stbuf)
-{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int active_src = -1;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- active_src = impunge_sh->active_source;
- impunge_local->cont.symlink.buf = *stbuf;
-
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_readlink_cbk,
- (void *) (long) child_index,
- priv->children[active_src],
- priv->children[active_src]->fops->readlink,
- &impunge_local->loc, 4096);
-
- return 0;
-}
+ priv = this->private;
-int
-afr_sh_entry_impunge_create (call_frame_t *impunge_frame, xlator_t *this,
- int child_index, struct iatt *buf,
- struct iatt *postparent)
-{
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- afr_private_t *priv = NULL;
- ia_type_t type = IA_INVAL;
- int ret = 0;
- int active_src = 0;
-
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- impunge_sh->parentbuf = *postparent;
- active_src = impunge_sh->active_source;
- impunge_local->cont.lookup.buf = *buf;
- afr_update_loc_gfids (&impunge_local->loc, buf, postparent);
-
- type = buf->ia_type;
-
- switch (type) {
- case IA_IFSOCK:
- case IA_IFREG:
- case IA_IFBLK:
- case IA_IFCHR:
- case IA_IFIFO:
- afr_sh_entry_impunge_mknod (impunge_frame, this,
- child_index, buf);
- break;
- case IA_IFLNK:
- afr_sh_entry_impunge_readlink (impunge_frame, this,
- child_index, buf);
- break;
- case IA_IFDIR:
- afr_sh_entry_impunge_mkdir (impunge_frame, this,
- child_index, buf);
- break;
- default:
- gf_log (this->name, GF_LOG_ERROR,
- "%s has unknown file type on %s: 0%o",
- impunge_local->loc.path,
- priv->children[active_src]->name, type);
- ret = -1;
- break;
- }
-
- return ret;
-}
+ ret = afr_selfheal_unlocked_discover (frame, fd->inode, fd->inode->gfid,
+ replies);
+ if (ret)
+ return ret;
-gf_boolean_t
-afr_sh_need_recreate (afr_self_heal_t *impunge_sh, int *sources,
- unsigned int child, unsigned int child_count)
-{
- int32_t *success_children = NULL;
- gf_boolean_t recreate = _gf_false;
-
- GF_ASSERT (impunge_sh->impunging_entry_mode);
- GF_ASSERT (impunge_sh->child_errno);
- GF_ASSERT (sources);
-
- success_children = impunge_sh->success_children;
- if (sources[child] || (child == impunge_sh->active_source)) {
- GF_ASSERT (afr_is_child_present (success_children,
- child_count, child));
- goto out;
- }
-
- if (IA_ISLNK (impunge_sh->impunging_entry_mode)) {
- recreate = _gf_true;
- goto out;
- }
-
- if (impunge_sh->child_errno[child] == ENOENT)
- recreate = _gf_true;
-out:
- return recreate;
-}
+ ret = afr_selfheal_find_direction (frame, this, replies,
+ AFR_ENTRY_TRANSACTION,
+ locked_on, sources, sinks);
+ if (ret)
+ return ret;
-unsigned int
-afr_sh_recreate_count (afr_self_heal_t *impunge_sh, int *sources,
- unsigned int child_count)
-{
- int count = 0;
- int i = 0;
+ source = __afr_selfheal_entry_finalize_source (this, sources, sinks,
+ locked_on, replies);
+ if (source < 0) {
+ /* If source is < 0 (typically split-brain), we perform a
+ conservative merge of entries rather than erroring out */
+ }
+ *source_p = source;
- for (i = 0; i < child_count; i++) {
- if (afr_sh_need_recreate (impunge_sh, sources, i, child_count))
- count++;
- }
+ for (i = 0; i < priv->child_count; i++)
+ /* Initialize the healed_sinks[] array optimistically to
+ the intersection of to-be-healed (i.e sinks[]) and
+ the list of servers which are up (i.e locked_on[]).
- return count;
-}
+ As we encounter failures in the healing process, we
+ will unmark the respective servers in the healed_sinks[]
+ array.
+ */
+ healed_sinks[i] = sinks[i] && locked_on[i];
-int
-afr_sh_entry_call_impunge_recreate (call_frame_t *impunge_frame,
- xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- call_frame_t *frame = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- struct iatt *buf = NULL;
- struct iatt *postparent = NULL;
- unsigned int recreate_count = 0;
- int i = 0;
- int active_src = 0;
-
- priv = this->private;
- AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
- frame, local, sh);
- active_src = impunge_sh->active_source;
- buf = &impunge_sh->buf[active_src];
- postparent = &impunge_sh->parentbufs[active_src];
-
- recreate_count = afr_sh_recreate_count (impunge_sh, sh->sources,
- priv->child_count);
- GF_ASSERT (recreate_count);
- impunge_local->call_count = recreate_count;
- for (i = 0; i < priv->child_count; i++) {
- if (afr_sh_need_recreate (impunge_sh, sh->sources, i,
- priv->child_count)) {
- (void)afr_sh_entry_impunge_create (impunge_frame, this,
- i, buf,
- postparent);
- recreate_count--;
- }
- }
- GF_ASSERT (!recreate_count);
- return 0;
+ return ret;
}
-void
-afr_sh_entry_common_lookup_done (call_frame_t *impunge_frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- call_frame_t *frame = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- unsigned int recreate_count = 0;
- unsigned int gfid_miss_count = 0;
- unsigned int children_up_count = 0;
- uuid_t gfid = {0};
- int active_src = 0;
-
- priv = this->private;
- AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
- frame, local, sh);
- active_src = impunge_sh->active_source;
-
- if (op_ret < 0)
- goto done;
- if (impunge_sh->child_errno[active_src]) {
- op_ret = -1;
- op_errno = impunge_sh->child_errno[active_src];
- goto done;
- }
-
- gfid_miss_count = afr_gfid_missing_count (this->name,
- impunge_sh->success_children,
- impunge_sh->buf, priv->child_count,
- impunge_local->loc.path);
- children_up_count = afr_up_children_count (impunge_local->child_up,
- priv->child_count);
- if ((gfid_miss_count == children_up_count) &&
- (children_up_count < priv->child_count)) {
- op_ret = -1;
- op_errno = ENODATA;
- gf_log (this->name, GF_LOG_ERROR, "Not all children are up, "
- "gfid should not be assigned in this state for %s",
- impunge_local->loc.path);
- goto done;
- }
-
- if (gfid_miss_count) {
- afr_update_gfid_from_iatts (gfid, impunge_sh->buf,
- impunge_sh->success_children,
- priv->child_count);
- if (uuid_is_null (gfid)) {
- sh->entries_skipped = _gf_true;
- gf_log (this->name, GF_LOG_INFO, "%s: Skipping entry "
- "self-heal because of gfid absence",
- impunge_local->loc.path);
- goto done;
- }
- afr_sh_common_lookup (impunge_frame, this, &impunge_local->loc,
- afr_sh_entry_common_lookup_done, gfid,
- AFR_LOOKUP_FAIL_CONFLICTS |
- AFR_LOOKUP_FAIL_MISSING_GFIDS);
- } else {
- recreate_count = afr_sh_recreate_count (impunge_sh, sh->sources,
- priv->child_count);
- if (!recreate_count) {
- op_ret = 0;
- op_errno = 0;
- goto done;
- }
- afr_sh_entry_call_impunge_recreate (impunge_frame, this);
- }
- return;
-done:
- afr_sh_entry_call_impunge_done (impunge_frame, this,
- op_ret, op_errno);
- return;
-}
-int
-afr_sh_entry_impunge_entry (call_frame_t *frame, xlator_t *this,
- gf_dirent_t *entry)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int ret = -1;
- call_frame_t *impunge_frame = NULL;
- afr_local_t *impunge_local = NULL;
- int active_src = 0;
- int op_errno = 0;
- int op_ret = -1;
- mode_t entry_mode = 0;
-
- local = frame->local;
- sh = &local->self_heal;
-
- active_src = sh->active_source;
- sh->impunge_done = afr_sh_entry_impunge_entry_done;
-
- if ((strcmp (entry->d_name, ".") == 0)
- || (strcmp (entry->d_name, "..") == 0)
- || ((strcmp (local->loc.path, "/") == 0)
- && (strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR) == 0))) {
-
- gf_log (this->name, GF_LOG_TRACE,
- "skipping inspection of %s under %s",
- entry->d_name, local->loc.path);
- op_ret = 0;
- goto out;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "inspecting existence of %s under %s",
- entry->d_name, local->loc.path);
-
- entry_mode = st_mode_from_ia (entry->d_stat.ia_prot,
- entry->d_stat.ia_type);
- ret = afr_impunge_frame_create (frame, this, active_src, active_src,
- entry_mode, &impunge_frame);
- if (ret) {
- op_errno = -ret;
- goto out;
- }
-
- impunge_local = impunge_frame->local;
- ret = afr_build_child_loc (this, &impunge_local->loc, &local->loc,
- entry->d_name, entry->d_stat.ia_gfid);
- if (ret != 0) {
- op_errno = ENOMEM;
- goto out;
- }
-
- afr_sh_common_lookup (impunge_frame, this, &impunge_local->loc,
- afr_sh_entry_common_lookup_done, NULL,
- AFR_LOOKUP_FAIL_CONFLICTS);
-
- op_ret = 0;
+static int
+__afr_selfheal_entry (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ unsigned char *locked_on)
+{
+ afr_private_t *priv = NULL;
+ int ret = -1;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *data_lock = NULL;
+ unsigned char *healed_sinks = NULL;
+ struct afr_reply *locked_replies = NULL;
+ int source = -1;
+
+ priv = this->private;
+
+ sources = alloca0 (priv->child_count);
+ sinks = alloca0 (priv->child_count);
+ healed_sinks = alloca0 (priv->child_count);
+ data_lock = alloca0 (priv->child_count);
+
+ locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count);
+
+ ret = afr_selfheal_entrylk (frame, this, fd->inode, this->name, NULL,
+ data_lock);
+ {
+ if (ret < 2) {
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+
+ ret = __afr_selfheal_entry_prepare (frame, this, fd, data_lock,
+ sources, sinks, healed_sinks,
+ locked_replies, &source);
+ }
+unlock:
+ afr_selfheal_unentrylk (frame, this, fd->inode, this->name, NULL,
+ data_lock);
+ if (ret < 0)
+ goto out;
+
+ ret = afr_selfheal_entry_do (frame, this, fd, source, sources,
+ healed_sinks, locked_replies);
+ if (ret)
+ goto out;
+
+ ret = afr_selfheal_undo_pending (frame, this, fd->inode, sources, sinks,
+ healed_sinks, AFR_ENTRY_TRANSACTION,
+ locked_replies, data_lock);
out:
- if (ret) {
- if (impunge_frame)
- AFR_STACK_DESTROY (impunge_frame);
- sh->impunge_done (frame, this, active_src, op_ret, op_errno);
- }
-
- return 0;
-}
-
-
-int
-afr_sh_entry_impunge_readdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- gf_dirent_t *entry = NULL;
- off_t last_offset = 0;
- int active_src = 0;
- int entry_count = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- active_src = sh->active_source;
-
- if (op_ret <= 0) {
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "readdir of %s on subvolume %s failed (%s)",
- local->loc.path,
- priv->children[active_src]->name,
- strerror (op_errno));
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "readdir of %s on subvolume %s complete",
- local->loc.path,
- priv->children[active_src]->name);
- }
-
- afr_sh_entry_impunge_all (frame, this);
- return 0;
- }
-
- list_for_each_entry (entry, &entries->list, list) {
- last_offset = entry->d_off;
- entry_count++;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "readdir'ed %d entries from %s",
- entry_count, priv->children[active_src]->name);
-
- sh->offset = last_offset;
- local->call_count = entry_count;
-
- list_for_each_entry (entry, &entries->list, list) {
- afr_sh_entry_impunge_entry (frame, this, entry);
- }
-
- return 0;
+ return ret;
}
-int
-afr_sh_entry_impunge_subvol (call_frame_t *frame, xlator_t *this,
- int active_src)
+static fd_t *
+afr_selfheal_data_opendir (xlator_t *this, inode_t *inode)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- STACK_WIND (frame, afr_sh_entry_impunge_readdir_cbk,
- priv->children[active_src],
- priv->children[active_src]->fops->readdirp,
- sh->healing_fd, sh->block_size, sh->offset);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_impunge_all (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int active_src = -1;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- sh->offset = 0;
-
- active_src = next_active_source (frame, this, sh->active_source);
- sh->active_source = active_src;
-
- if (sh->op_failed) {
- afr_sh_entry_finish (frame, this);
- return 0;
- }
+ loc_t loc = {0,};
+ int ret = 0;
+ fd_t *fd = NULL;
- if (active_src == -1) {
- /* completed creating missing files on all subvolumes */
- afr_sh_entry_erase_pending (frame, this);
- return 0;
- }
+ fd = fd_create (inode, 0);
+ if (!fd)
+ return NULL;
- gf_log (this->name, GF_LOG_TRACE,
- "impunging entries of %s on %s to other sinks",
- local->loc.path, priv->children[active_src]->name);
+ loc.inode = inode_ref (inode);
+ uuid_copy (loc.gfid, inode->gfid);
- afr_sh_entry_impunge_subvol (frame, this, active_src);
-
- return 0;
-}
+ ret = syncop_opendir (this, &loc, fd);
+ if (ret) {
+ fd_unref (fd);
+ fd = NULL;
+ } else {
+ fd_bind (fd);
+ }
+ loc_wipe (&loc);
-int
-afr_sh_entry_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int child_index = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- child_index = (long) cookie;
-
- /* TODO: some of the open's might fail.
- In that case, modify cleanup fn to send flush on those
- fd's which are already open */
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "opendir of %s failed on child %s (%s)",
- local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- sh->op_failed = 1;
- }
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if (sh->op_failed) {
- afr_sh_entry_finish (frame, this);
- return 0;
- }
- gf_log (this->name, GF_LOG_TRACE,
- "fd for %s opened, commencing sync",
- local->loc.path);
-
- sh->active_source = -1;
- afr_sh_entry_expunge_all (frame, this);
- }
-
- return 0;
+ return fd;
}
-int
-afr_sh_entry_open (call_frame_t *frame, xlator_t *this)
-{
- int i = 0;
- int call_count = 0;
-
- int source = -1;
- int *sources = NULL;
-
- fd_t *fd = NULL;
-
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- source = local->self_heal.source;
- sources = local->self_heal.sources;
-
- sh->block_size = 65536; //131072
- sh->offset = 0;
-
- call_count = sh->active_sinks;
- if (source != -1)
- call_count++;
-
- local->call_count = call_count;
-
- fd = fd_create (local->loc.inode, frame->root->pid);
- sh->healing_fd = fd;
-
- if (source != -1) {
- gf_log (this->name, GF_LOG_TRACE,
- "opening directory %s on subvolume %s (source)",
- local->loc.path, priv->children[source]->name);
-
- /* open source */
- STACK_WIND_COOKIE (frame, afr_sh_entry_opendir_cbk,
- (void *) (long) source,
- priv->children[source],
- priv->children[source]->fops->opendir,
- &local->loc, fd);
- call_count--;
- }
-
- /* open sinks */
- for (i = 0; i < priv->child_count; i++) {
- if (sources[i] || !local->child_up[i])
- continue;
-
- gf_log (this->name, GF_LOG_TRACE,
- "opening directory %s on subvolume %s (sink)",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_entry_opendir_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->opendir,
- &local->loc, fd);
-
- if (!--call_count)
- break;
- }
-
- return 0;
-}
-
int
-afr_sh_entry_sync_prepare (call_frame_t *frame, xlator_t *this)
+afr_selfheal_entry (call_frame_t *frame, xlator_t *this, inode_t *inode)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int source = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- source = sh->source;
-
- afr_sh_mark_source_sinks (frame, this);
- if (source != -1)
- sh->success[source] = 1;
-
- if (sh->active_sinks == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "no active sinks for self-heal on dir %s",
- local->loc.path);
- afr_sh_entry_finish (frame, this);
- return 0;
- }
- if (source == -1 && sh->active_sinks < 2) {
- gf_log (this->name, GF_LOG_TRACE,
- "cannot sync with 0 sources and 1 sink on dir %s",
- local->loc.path);
- afr_sh_entry_finish (frame, this);
- return 0;
- }
-
- if (source != -1)
- gf_log (this->name, GF_LOG_DEBUG,
- "self-healing directory %s from subvolume %s to "
- "%d other",
- local->loc.path, priv->children[source]->name,
- sh->active_sinks);
- else
- gf_log (this->name, GF_LOG_DEBUG,
- "no active sources for %s found. "
- "merging all entries as a conservative decision",
- local->loc.path);
-
- afr_sh_entry_open (frame, this);
-
- return 0;
-}
+ afr_private_t *priv = NULL;
+ unsigned char *locked_on = NULL;
+ fd_t *fd = NULL;
+ int ret = 0;
+ priv = this->private;
-void
-afr_sh_entry_fix (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int source = 0;
-
- int nsources = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- if (op_ret < 0) {
- sh->op_failed = 1;
- afr_sh_set_error (sh, op_errno);
- afr_sh_entry_finish (frame, this);
- goto out;
- }
-
- if (sh->forced_merge) {
- sh->source = -1;
- goto heal;
- }
-
- nsources = afr_build_sources (this, sh->xattr, sh->buf,
- sh->pending_matrix, sh->sources,
- sh->success_children,
- AFR_ENTRY_TRANSACTION);
- if (nsources == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "No self-heal needed for %s",
- local->loc.path);
-
- afr_sh_entry_finish (frame, this);
- return;
- }
-
- source = afr_sh_select_source (sh->sources, priv->child_count);
-
- sh->source = source;
-
- afr_reset_children (sh->fresh_children, priv->child_count);
- afr_get_fresh_children (sh->success_children, sh->sources,
- sh->fresh_children, priv->child_count);
- if (sh->source >= 0)
- afr_inode_set_read_ctx (this, sh->inode, sh->source,
- sh->fresh_children);
-
-heal:
- afr_sh_entry_sync_prepare (frame, this);
-out:
- return;
-}
-
-int
-afr_sh_post_nonblocking_entry_cbk (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- sh = &local->self_heal;
-
- if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_ERROR, "Non Blocking entrylks "
- "failed for %s.", local->loc.path);
- sh->op_failed = 1;
- afr_sh_entry_done (frame, this);
- } else {
-
- gf_log (this->name, GF_LOG_DEBUG, "Non Blocking entrylks done "
- "for %s. Proceeding to FOP", local->loc.path);
- afr_sh_common_lookup (frame, this, &local->loc,
- afr_sh_entry_fix, NULL,
- AFR_LOOKUP_FAIL_CONFLICTS |
- AFR_LOOKUP_FAIL_MISSING_GFIDS);
- }
-
- return 0;
-}
+ fd = afr_selfheal_data_opendir (this, inode);
+ if (!fd)
+ return -EIO;
-int
-afr_self_heal_entry (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
+ locked_on = alloca0 (priv->child_count);
+ ret = afr_selfheal_tryentrylk (frame, this, inode, priv->sh_domain, NULL,
+ locked_on);
+ {
+ if (ret < 2) {
+ /* Either less than two subvols available, or another
+ selfheal (from another server) is in progress. Skip
+ for now in any case there isn't anything to do.
+ */
+ ret = -ENOTCONN;
+ goto unlock;
+ }
- priv = this->private;
- local = frame->local;
+ ret = __afr_selfheal_entry (frame, this, fd, locked_on);
+ }
+unlock:
+ afr_selfheal_unentrylk (frame, this, inode, priv->sh_domain, NULL, locked_on);
- if (local->self_heal.do_entry_self_heal && priv->entry_self_heal) {
- afr_sh_entrylk (frame, this, &local->loc, NULL,
- afr_sh_post_nonblocking_entry_cbk);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "proceeding to completion on %s",
- local->loc.path);
- afr_sh_entry_done (frame, this);
- }
+ if (fd)
+ fd_unref (fd);
- return 0;
+ return ret;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
index c67748b2f..b31a33237 100644
--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
@@ -1,627 +1,281 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#include <libgen.h>
-#include <unistd.h>
-#include <fnmatch.h>
-#include <sys/time.h>
-#include <stdlib.h>
-#include <signal.h>
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
#endif
-#include "glusterfs.h"
#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-#include "byte-order.h"
-
-#include "afr-transaction.h"
#include "afr-self-heal.h"
-#include "afr-self-heal-common.h"
-
-
-int
-afr_sh_metadata_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
-// memset (sh->child_errno, 0, sizeof (int) * priv->child_count);
- memset (sh->buf, 0, sizeof (struct iatt) * priv->child_count);
- memset (sh->success, 0, sizeof (*sh->success) * priv->child_count);
-
- afr_reset_xattr (sh->xattr, priv->child_count);
- if (local->govinda_gOvinda) {
- gf_log (this->name, GF_LOG_INFO,
- "split-brain detected, aborting selfheal of %s",
- local->loc.path);
- sh->op_failed = 1;
- sh->completion_cbk (frame, this);
- } else {
- if (IA_ISDIR (sh->type)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "proceeding to entry check on %s",
- local->loc.path);
- afr_self_heal_entry (frame, this);
- return 0;
- }
- gf_log (this->name, GF_LOG_DEBUG,
- "proceeding to data check on %s",
- local->loc.path);
- afr_self_heal_data (frame, this);
- }
-
- return 0;
-}
-
-
-int
-afr_sh_metadata_unlck_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- int call_count = 0;
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- afr_sh_metadata_done (frame, this);
-
- return 0;
-}
-
-int
-afr_sh_inode_unlock (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- int_lock->lock_cbk = afr_sh_metadata_done;
- afr_unlock (frame, this);
-
- return 0;
-}
-
-int
-afr_sh_metadata_finish (call_frame_t *frame, xlator_t *this)
-{
- afr_sh_inode_unlock (frame, this);
-
- return 0;
-}
-
-
-int
-afr_sh_metadata_erase_pending_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xattr)
-{
- afr_local_t *local = NULL;
- int call_count = 0;
- long i = 0;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
-
- local = frame->local;
- priv = this->private;
- sh = &local->self_heal;
- i = (long)cookie;
-
- if ((!IA_ISREG (sh->buf[sh->source].ia_type)) &&
- (!IA_ISDIR (sh->buf[sh->source].ia_type))) {
- afr_children_add_child (sh->fresh_children, i,
- priv->child_count);
- }
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if ((!IA_ISREG (sh->buf[sh->source].ia_type)) &&
- (!IA_ISDIR (sh->buf[sh->source].ia_type))) {
- afr_inode_set_read_ctx (this, sh->inode, sh->source,
- sh->fresh_children);
- }
- afr_sh_metadata_finish (frame, this);
- }
-
- return 0;
-}
-
-
-int
-afr_sh_metadata_erase_pending (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int i = 0;
- dict_t **erase_xattr = NULL;
-
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- afr_sh_pending_to_delta (priv, sh->xattr, sh->delta_matrix,
- sh->success, priv->child_count,
- AFR_METADATA_TRANSACTION);
-
- erase_xattr = GF_CALLOC (sizeof (*erase_xattr), priv->child_count,
- gf_afr_mt_dict_t);
- if (!erase_xattr)
- return -ENOMEM;
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i]) {
- call_count++;
-
- erase_xattr[i] = get_new_dict();
- dict_ref (erase_xattr[i]);
- }
- }
-
- afr_sh_delta_to_xattr (priv, sh->delta_matrix, erase_xattr,
- priv->child_count, AFR_METADATA_TRANSACTION);
-
- local->call_count = call_count;
-
- if (call_count == 0) {
- gf_log (this->name, GF_LOG_INFO,
- "metadata of %s not healed on any subvolume",
- local->loc.path);
-
- afr_sh_metadata_finish (frame, this);
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (!erase_xattr[i])
- continue;
-
- gf_log (this->name, GF_LOG_TRACE,
- "erasing pending flags from %s on %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_metadata_erase_pending_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->loc,
- GF_XATTROP_ADD_ARRAY, erase_xattr[i]);
- if (!--call_count)
- break;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (erase_xattr[i]) {
- dict_unref (erase_xattr[i]);
- }
- }
- GF_FREE (erase_xattr);
-
- return 0;
-}
-
-
-int
-afr_sh_metadata_sync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int child_index = 0;
-
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_INFO,
- "setting attributes failed for %s on %s (%s)",
- local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
-
- sh->success[child_index] = 0;
- }
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- afr_sh_metadata_erase_pending (frame, this);
-
- return 0;
-}
-
-
-int
-afr_sh_metadata_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
-{
- afr_sh_metadata_sync_cbk (frame, cookie, this, op_ret, op_errno);
-
- return 0;
-}
-
-
-int
-afr_sh_metadata_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_sh_metadata_sync_cbk (frame, cookie, this, op_ret, op_errno);
+#include "byte-order.h"
- return 0;
-}
+#define AFR_HEAL_ATTR (GF_SET_ATTR_UID|GF_SET_ATTR_GID|GF_SET_ATTR_MODE)
int
-afr_sh_metadata_sync (call_frame_t *frame, xlator_t *this, dict_t *xattr)
+afr_selfheal_metadata_do (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ int source, unsigned char *healed_sinks,
+ struct afr_reply *locked_replies)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int source = 0;
- int active_sinks = 0;
- int call_count = 0;
- int i = 0;
-
- struct iatt stbuf = {0,};
- int32_t valid = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- source = sh->source;
- active_sinks = sh->active_sinks;
-
- /*
- * 2 calls per sink - setattr, setxattr
- */
- if (xattr)
- call_count = active_sinks * 2;
- else
- call_count = active_sinks;
-
- local->call_count = call_count;
-
- stbuf.ia_atime = sh->buf[source].ia_atime;
- stbuf.ia_atime_nsec = sh->buf[source].ia_atime_nsec;
- stbuf.ia_mtime = sh->buf[source].ia_mtime;
- stbuf.ia_mtime_nsec = sh->buf[source].ia_mtime_nsec;
-
- stbuf.ia_uid = sh->buf[source].ia_uid;
- stbuf.ia_gid = sh->buf[source].ia_gid;
-
- stbuf.ia_type = sh->buf[source].ia_type;
- stbuf.ia_prot = sh->buf[source].ia_prot;
-
- valid = GF_SET_ATTR_MODE |
- GF_SET_ATTR_UID | GF_SET_ATTR_GID |
- GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
-
- for (i = 0; i < priv->child_count; i++) {
- if (call_count == 0) {
- break;
- }
- if (sh->sources[i] || !local->child_up[i])
- continue;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "self-healing metadata of %s from %s to %s",
- local->loc.path, priv->children[source]->name,
- priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_metadata_setattr_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->setattr,
- &local->loc, &stbuf, valid);
-
- call_count--;
-
- if (!xattr)
- continue;
-
- STACK_WIND_COOKIE (frame, afr_sh_metadata_xattr_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->setxattr,
- &local->loc, xattr, 0);
- call_count--;
- }
-
- return 0;
+ int ret = -1;
+ loc_t loc = {0,};
+ dict_t *xattr = NULL;
+ dict_t *old_xattr = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ priv = this->private;
+
+ loc.inode = inode_ref (inode);
+ uuid_copy (loc.gfid, inode->gfid);
+
+ gf_log (this->name, GF_LOG_INFO, "performing metadata selfheal on %s",
+ uuid_utoa (inode->gfid));
+
+ ret = syncop_getxattr (priv->children[source], &loc, &xattr, NULL);
+ if (ret < 0) {
+ loc_wipe (&loc);
+ return -EIO;
+ }
+
+ afr_filter_xattrs (xattr);
+ dict_del (xattr, GF_SELINUX_XATTR_KEY);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!healed_sinks[i])
+ continue;
+
+ ret = syncop_setattr (priv->children[i], &loc,
+ &locked_replies[source].poststat,
+ AFR_HEAL_ATTR, NULL, NULL);
+ if (ret)
+ healed_sinks[i] = 0;
+
+ old_xattr = NULL;
+ ret = syncop_getxattr (priv->children[i], &loc, &old_xattr, 0);
+ if (old_xattr) {
+ dict_del (old_xattr, GF_SELINUX_XATTR_KEY);
+ afr_filter_xattrs (old_xattr);
+ ret = syncop_removexattr (priv->children[i], &loc, "",
+ old_xattr);
+ }
+
+ ret = syncop_setxattr (priv->children[i], &loc, xattr, 0);
+ if (ret)
+ healed_sinks[i] = 0;
+ }
+
+ loc_wipe (&loc);
+ if (xattr)
+ dict_unref (xattr);
+
+ return 0;
}
-int
-afr_sh_metadata_getxattr_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr)
+/*
+ * Look for mismatching uid/gid or mode even if xattrs don't say so, and
+ * pick one arbitrarily as winner.
+ */
+
+static int
+__afr_selfheal_metadata_finalize_source (xlator_t *this, unsigned char *sources,
+ unsigned char *sinks,
+ unsigned char *locked_on,
+ struct afr_reply *replies)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int source = 0;
-
- int i;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- source = sh->source;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "getxattr of %s failed on subvolume %s (%s). proceeding without xattr",
- local->loc.path, priv->children[source]->name,
- strerror (op_errno));
-
- afr_sh_metadata_sync (frame, this, NULL);
- } else {
- for (i = 0; i < priv->child_count; i++) {
- dict_del (xattr, priv->pending_key[i]);
- }
-
- afr_sh_metadata_sync (frame, this, xattr);
- }
-
- return 0;
+ int i = 0;
+ afr_private_t *priv = NULL;
+ struct iatt first = {0, };
+ int source = -1;
+ int locked_count = 0;
+ int sources_count = 0;
+ int sinks_count = 0;
+
+ priv = this->private;
+
+ locked_count = AFR_COUNT (locked_on, priv->child_count);
+ sources_count = AFR_COUNT (sources, priv->child_count);
+ sinks_count = AFR_COUNT (sinks, priv->child_count);
+
+ if (locked_count == sinks_count || !sources_count) {
+ if (!priv->metadata_splitbrain_forced_heal) {
+ return -EIO;
+ }
+ /* Metadata split brain, select one subvol
+ arbitrarily */
+ for (i = 0; i < priv->child_count; i++) {
+ if (locked_on[i] && sinks[i]) {
+ sources[i] = 1;
+ sinks[i] = 0;
+ break;
+ }
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ if (source == -1) {
+ source = i;
+ first = replies[i].poststat;
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ if (!IA_EQUAL (first, replies[i].poststat, type) ||
+ !IA_EQUAL (first, replies[i].poststat, uid) ||
+ !IA_EQUAL (first, replies[i].poststat, gid) ||
+ !IA_EQUAL (first, replies[i].poststat, prot)) {
+ sources[i] = 0;
+ sinks[i] = 1;
+ }
+ }
+
+ return source;
}
-int
-afr_sh_metadata_sync_prepare (call_frame_t *frame, xlator_t *this)
+static int
+__afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ unsigned char *locked_on, unsigned char *sources,
+ unsigned char *sinks, unsigned char *healed_sinks,
+ struct afr_reply *replies)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int source = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- source = sh->source;
-
- afr_sh_mark_source_sinks (frame, this);
- if (sh->active_sinks == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no active sinks for performing self-heal on file %s",
- local->loc.path);
- afr_sh_metadata_finish (frame, this);
- return 0;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "syncing metadata of %s from subvolume %s to %d active sinks",
- local->loc.path, priv->children[source]->name,
- sh->active_sinks);
-
- STACK_WIND (frame, afr_sh_metadata_getxattr_cbk,
- priv->children[source],
- priv->children[source]->fops->getxattr,
- &local->loc, NULL);
-
- return 0;
+ int ret = -1;
+ int source = -1;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ priv = this->private;
+
+ ret = afr_selfheal_unlocked_discover (frame, inode, inode->gfid,
+ replies);
+ if (ret)
+ return ret;
+
+ ret = afr_selfheal_find_direction (frame, this, replies,
+ AFR_METADATA_TRANSACTION,
+ locked_on, sources, sinks);
+ if (ret)
+ return ret;
+
+ source = __afr_selfheal_metadata_finalize_source (this, sources, sinks,
+ locked_on, replies);
+ if (source < 0)
+ return -EIO;
+
+ for (i = 0; i < priv->child_count; i++)
+ /* Initialize the healed_sinks[] array optimistically to
+ the intersection of to-be-healed (i.e sinks[]) and
+ the list of servers which are up (i.e locked_on[]).
+
+ As we encounter failures in the healing process, we
+ will unmark the respective servers in the healed_sinks[]
+ array.
+ */
+ healed_sinks[i] = sinks[i] && locked_on[i];
+
+ return source;
}
-void
-afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+static int
+__afr_selfheal_metadata (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ unsigned char *locked_on)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int nsources = 0;
- int source = 0;
- int i = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- if (op_ret < 0) {
- sh->op_failed = 1;
- afr_sh_set_error (sh, op_errno);
- afr_sh_metadata_finish (frame, this);
- goto out;
- }
- nsources = afr_build_sources (this, sh->xattr, sh->buf,
- sh->pending_matrix, sh->sources,
- sh->success_children,
- AFR_METADATA_TRANSACTION);
- if (nsources == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "No self-heal needed for %s",
- local->loc.path);
-
- afr_sh_metadata_finish (frame, this);
- goto out;
- }
-
- if ((nsources == -1)
- && (priv->favorite_child != -1)
- && (sh->child_errno[priv->favorite_child] == 0)) {
-
- gf_log (this->name, GF_LOG_WARNING,
- "Picking favorite child %s as authentic source to resolve conflicting metadata of %s",
- priv->children[priv->favorite_child]->name,
- local->loc.path);
-
- sh->sources[priv->favorite_child] = 1;
-
- nsources = afr_sh_source_count (sh->sources,
- priv->child_count);
- }
-
- if (nsources == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "Unable to self-heal permissions/ownership of '%s' "
- "(possible split-brain). Please fix the file on "
- "all backend volumes", local->loc.path);
-
- local->govinda_gOvinda = 1;
-
- afr_sh_metadata_finish (frame, this);
- goto out;
- }
-
- source = afr_sh_select_source (sh->sources, priv->child_count);
-
- if (source == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "No active sources found.");
-
- afr_sh_metadata_finish (frame, this);
- goto out;
- }
-
- sh->source = source;
-
- /* detect changes not visible through pending flags -- JIC */
- for (i = 0; i < priv->child_count; i++) {
- if (i == source || sh->child_errno[i])
- continue;
-
- if (PERMISSION_DIFFERS (&sh->buf[i], &sh->buf[source]))
- sh->sources[i] = 0;
-
- if (OWNERSHIP_DIFFERS (&sh->buf[i], &sh->buf[source]))
- sh->sources[i] = 0;
- }
-
- if ((!IA_ISREG (sh->buf[source].ia_type)) &&
- (!IA_ISDIR (sh->buf[source].ia_type))) {
- afr_reset_children (sh->fresh_children, priv->child_count);
- afr_get_fresh_children (sh->success_children, sh->sources,
- sh->fresh_children, priv->child_count);
- afr_inode_set_read_ctx (this, sh->inode, sh->source,
- sh->fresh_children);
- }
-
- afr_sh_metadata_sync_prepare (frame, this);
+ afr_private_t *priv = NULL;
+ int ret = -1;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *data_lock = NULL;
+ unsigned char *healed_sinks = NULL;
+ struct afr_reply *locked_replies = NULL;
+ int source = -1;
+
+ priv = this->private;
+
+ sources = alloca0 (priv->child_count);
+ sinks = alloca0 (priv->child_count);
+ healed_sinks = alloca0 (priv->child_count);
+ data_lock = alloca0 (priv->child_count);
+
+ locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count);
+
+ ret = afr_selfheal_inodelk (frame, this, inode, this->name,
+ LLONG_MAX - 1, 0, data_lock);
+ {
+ if (ret < 2) {
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+
+ ret = __afr_selfheal_metadata_prepare (frame, this, inode, data_lock,
+ sources, sinks, healed_sinks,
+ locked_replies);
+ if (ret < 0)
+ goto unlock;
+
+ source = ret;
+ ret = 0;
+ }
+unlock:
+ afr_selfheal_uninodelk (frame, this, inode, this->name,
+ LLONG_MAX -1, 0, data_lock);
+ if (ret < 0)
+ goto out;
+
+ ret = afr_selfheal_metadata_do (frame, this, inode, source, healed_sinks,
+ locked_replies);
+ if (ret)
+ goto out;
+
+ ret = afr_selfheal_undo_pending (frame, this, inode, sources, sinks,
+ healed_sinks, AFR_METADATA_TRANSACTION,
+ locked_replies, data_lock);
out:
- return;
+ return ret;
}
-int
-afr_sh_metadata_post_nonblocking_inodelk_cbk (call_frame_t *frame,
- xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_ERROR, "Non Blocking metadata "
- "inodelks failed for %s.", local->loc.path);
- gf_log (this->name, GF_LOG_ERROR, "Metadata self-heal "
- "failed for %s.", local->loc.path);
- afr_sh_metadata_done (frame, this);
- } else {
-
- gf_log (this->name, GF_LOG_DEBUG, "Non Blocking metadata "
- "inodelks done for %s. Proceeding to FOP",
- local->loc.path);
- afr_sh_common_lookup (frame, this, &local->loc,
- afr_sh_metadata_fix, NULL,
- AFR_LOOKUP_FAIL_CONFLICTS |
- AFR_LOOKUP_FAIL_MISSING_GFIDS);
- }
-
- return 0;
-}
int
-afr_sh_metadata_lock (call_frame_t *frame, xlator_t *this)
+afr_selfheal_metadata (call_frame_t *frame, xlator_t *this, inode_t *inode)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- int_lock->transaction_lk_type = AFR_SELFHEAL_LK;
- int_lock->selfheal_lk_type = AFR_METADATA_SELF_HEAL_LK;
-
- afr_set_lock_number (frame, this);
-
- int_lock->lk_flock.l_start = 0;
- int_lock->lk_flock.l_len = 0;
- int_lock->lk_flock.l_type = F_WRLCK;
- int_lock->lock_cbk = afr_sh_metadata_post_nonblocking_inodelk_cbk;
-
- afr_nonblocking_inodelk (frame, this);
-
- return 0;
-}
-
-
-int
-afr_self_heal_metadata (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = this->private;
-
-
- local = frame->local;
-
- if (local->self_heal.do_metadata_self_heal && priv->metadata_self_heal) {
- afr_sh_metadata_lock (frame, this);
- } else {
- afr_sh_metadata_done (frame, this);
- }
-
- return 0;
+ afr_private_t *priv = NULL;
+ unsigned char *locked_on = NULL;
+ int ret = 0;
+
+ priv = this->private;
+
+ locked_on = alloca0 (priv->child_count);
+
+ ret = afr_selfheal_tryinodelk (frame, this, inode, priv->sh_domain, 0, 0,
+ locked_on);
+ {
+ if (ret < 2) {
+ /* Either less than two subvols available, or another
+ selfheal (from another server) is in progress. Skip
+ for now in any case there isn't anything to do.
+ */
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+
+ ret = __afr_selfheal_metadata (frame, this, inode, locked_on);
+ }
+unlock:
+ afr_selfheal_uninodelk (frame, this, inode, priv->sh_domain, 0, 0, locked_on);
+
+ return ret;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c
new file mode 100644
index 000000000..ce80b8da3
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-self-heal-name.c
@@ -0,0 +1,457 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "afr.h"
+#include "afr-self-heal.h"
+
+
+int
+__afr_selfheal_assign_gfid (call_frame_t *frame, xlator_t *this, inode_t *parent,
+ uuid_t pargfid, const char *bname, inode_t *inode,
+ struct afr_reply *replies, int gfid_idx)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ dict_t *xdata = NULL;
+ int ret = 0;
+ loc_t loc = {0, };
+
+ priv = this->private;
+
+ uuid_copy (parent->gfid, pargfid);
+
+ xdata = dict_new ();
+ if (!xdata) {
+ return -ENOMEM;
+ }
+
+ ret = dict_set_static_bin (xdata, "gfid-req",
+ replies[gfid_idx].poststat.ia_gfid, 16);
+ if (ret) {
+ dict_destroy (xdata);
+ return -ENOMEM;
+ }
+
+ loc.parent = inode_ref (parent);
+ loc.inode = inode_ref (inode);
+ uuid_copy (loc.pargfid, pargfid);
+ loc.name = bname;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies[i].op_ret == 0 || replies[i].op_errno != ENODATA)
+ continue;
+
+ ret = syncop_lookup (priv->children[i], &loc, xdata, 0, 0, 0);
+ }
+
+ loc_wipe (&loc);
+ dict_unref (xdata);
+
+ return ret;
+}
+
+
+int
+__afr_selfheal_name_impunge (call_frame_t *frame, xlator_t *this, inode_t *parent,
+ uuid_t pargfid, const char *bname, inode_t *inode,
+ struct afr_reply *replies, int gfid_idx)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ int ret = 0;
+
+ priv = this->private;
+
+ uuid_copy (parent->gfid, pargfid);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if (uuid_compare (replies[i].poststat.ia_gfid,
+ replies[gfid_idx].poststat.ia_gfid) == 0)
+ continue;
+
+ ret |= afr_selfheal_recreate_entry (frame, this, i, gfid_idx,
+ parent, bname, inode, replies);
+ }
+
+ return ret;
+}
+
+
+int
+__afr_selfheal_name_expunge (call_frame_t *frame, xlator_t *this, inode_t *parent,
+ uuid_t pargfid, const char *bname, inode_t *inode,
+ struct afr_reply *replies)
+{
+ loc_t loc = {0, };
+ int i = 0;
+ afr_private_t *priv = NULL;
+ char g[64];
+ int ret = 0;
+
+ priv = this->private;
+
+ loc.parent = inode_ref (parent);
+ uuid_copy (loc.pargfid, pargfid);
+ loc.name = bname;
+ loc.inode = inode_ref (inode);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if (replies[i].op_ret)
+ continue;
+
+ switch (replies[i].poststat.ia_type) {
+ case IA_IFDIR:
+ gf_log (this->name, GF_LOG_WARNING,
+ "expunging dir %s/%s (%s) on %s",
+ uuid_utoa (pargfid), bname,
+ uuid_utoa_r (replies[i].poststat.ia_gfid, g),
+ priv->children[i]->name);
+ ret |= syncop_rmdir (priv->children[i], &loc, 1);
+ break;
+ default:
+ gf_log (this->name, GF_LOG_WARNING,
+ "expunging file %s/%s (%s) on %s",
+ uuid_utoa (pargfid), bname,
+ uuid_utoa_r (replies[i].poststat.ia_gfid, g),
+ priv->children[i]->name);
+ ret |= syncop_unlink (priv->children[i], &loc);
+ break;
+ }
+ }
+
+ loc_wipe (&loc);
+
+ return ret;
+
+}
+
+
+int
+__afr_selfheal_name_do (call_frame_t *frame, xlator_t *this, inode_t *parent,
+ uuid_t pargfid, const char *bname, inode_t *inode,
+ unsigned char *sources, unsigned char *sinks,
+ unsigned char *healed_sinks, int source,
+ unsigned char *locked_on, struct afr_reply *replies)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ uuid_t gfid = {0, };
+ int gfid_idx = -1;
+ gf_boolean_t source_is_empty = _gf_true;
+ gf_boolean_t need_heal = _gf_false;
+ int first_idx = -1;
+ char g1[64],g2[64];
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if (first_idx == -1) {
+ first_idx = i;
+ continue;
+ }
+
+ if (replies[i].op_ret != replies[first_idx].op_ret)
+ need_heal = _gf_true;
+
+ if (uuid_compare (replies[i].poststat.ia_gfid,
+ replies[first_idx].poststat.ia_gfid))
+ need_heal = _gf_true;
+ }
+
+ if (!need_heal)
+ return 0;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if (!replies[i].op_ret && (source == -1 || sources[i])) {
+ source_is_empty = _gf_false;
+ break;
+ }
+ }
+
+ if (source_is_empty) {
+ return __afr_selfheal_name_expunge (frame, this, parent, pargfid,
+ bname, inode, replies);
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if (uuid_is_null (replies[i].poststat.ia_gfid))
+ continue;
+
+ if (uuid_is_null (gfid)) {
+ uuid_copy (gfid, replies[i].poststat.ia_gfid);
+ gfid_idx = i;
+ continue;
+ }
+
+ if (sources[i] || source == -1) {
+ if (gfid_idx != -1 &&
+ (sources[gfid_idx] || source == -1) &&
+ uuid_compare (gfid, replies[i].poststat.ia_gfid)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "GFID mismatch for <gfid:%s>/%s "
+ "%s on %s and %s on %s",
+ uuid_utoa (pargfid), bname,
+ uuid_utoa_r (replies[i].poststat.ia_gfid, g1),
+ priv->children[i]->name,
+ uuid_utoa_r (replies[gfid_idx].poststat.ia_gfid, g2),
+ priv->children[gfid_idx]->name);
+ return -1;
+ }
+
+ uuid_copy (gfid, replies[i].poststat.ia_gfid);
+ gfid_idx = i;
+ continue;
+ }
+ }
+
+ if (gfid_idx == -1)
+ return -1;
+
+ __afr_selfheal_assign_gfid (frame, this, parent, pargfid, bname, inode,
+ replies, gfid_idx);
+
+ return __afr_selfheal_name_impunge (frame, this, parent, pargfid,
+ bname, inode, replies, gfid_idx);
+}
+
+
+int
+__afr_selfheal_name_finalize_source (xlator_t *this, unsigned char *sources,
+ unsigned char *sinks, unsigned char *locked_on,
+ struct afr_reply *replies)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ int source = -1;
+ int locked_count = 0;
+ int sources_count = 0;
+ int sinks_count = 0;
+
+ priv = this->private;
+
+ locked_count = AFR_COUNT (locked_on, priv->child_count);
+ sources_count = AFR_COUNT (sources, priv->child_count);
+ sinks_count = AFR_COUNT (sinks, priv->child_count);
+
+ if (locked_count == sinks_count || !sources_count) {
+ return -1;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i]) {
+ source = i;
+ break;
+ }
+ }
+
+ return source;
+}
+
+
+int
+__afr_selfheal_name_prepare (call_frame_t *frame, xlator_t *this, inode_t *parent,
+ uuid_t pargfid, unsigned char *locked_on,
+ unsigned char *sources, unsigned char *sinks,
+ unsigned char *healed_sinks, struct afr_reply *replies,
+ int *source_p)
+{
+ int ret = -1;
+ int source = -1;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ priv = this->private;
+
+ ret = afr_selfheal_unlocked_discover (frame, parent, pargfid, replies);
+ if (ret)
+ return ret;
+
+ ret = afr_selfheal_find_direction (frame, this, replies,
+ AFR_ENTRY_TRANSACTION,
+ locked_on, sources, sinks);
+ if (ret)
+ return ret;
+
+ source = __afr_selfheal_name_finalize_source (this, sources, sinks,
+ locked_on, replies);
+ if (source < 0) {
+ /* If source is < 0 (typically split-brain), we perform a
+ conservative merge of entries rather than erroring out */
+ }
+ *source_p = source;
+
+ for (i = 0; i < priv->child_count; i++)
+ /* Initialize the healed_sinks[] array optimistically to
+ the intersection of to-be-healed (i.e sinks[]) and
+ the list of servers which are up (i.e locked_on[]).
+
+ As we encounter failures in the healing process, we
+ will unmark the respective servers in the healed_sinks[]
+ array.
+ */
+ healed_sinks[i] = sinks[i] && locked_on[i];
+
+ return ret;
+}
+
+
+int
+afr_selfheal_name_do (call_frame_t *frame, xlator_t *this, inode_t *parent,
+ uuid_t pargfid, const char *bname)
+{
+ afr_private_t *priv = NULL;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *healed_sinks = NULL;
+ unsigned char *locked_on = NULL;
+ int source = -1;
+ struct afr_reply *replies = NULL;
+ int ret = -1;
+ inode_t *inode = NULL;
+
+ priv = this->private;
+
+ locked_on = alloca0 (priv->child_count);
+ sources = alloca0 (priv->child_count);
+ sinks = alloca0 (priv->child_count);
+ healed_sinks = alloca0 (priv->child_count);
+
+ replies = alloca0 (priv->child_count * sizeof(*replies));
+
+ ret = afr_selfheal_entrylk (frame, this, parent, this->name, bname,
+ locked_on);
+ {
+ if (ret < 2) {
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+
+ ret = __afr_selfheal_name_prepare (frame, this, parent, pargfid,
+ locked_on, sources, sinks,
+ healed_sinks, replies,
+ &source);
+ if (ret)
+ goto unlock;
+
+ inode = afr_selfheal_unlocked_lookup_on (frame, parent, bname,
+ replies, locked_on);
+ if (!inode) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+
+ ret = __afr_selfheal_name_do (frame, this, parent, pargfid, bname,
+ inode, sources, sinks, healed_sinks,
+ source, locked_on, replies);
+ }
+unlock:
+ afr_selfheal_unentrylk (frame, this, parent, this->name, bname,
+ locked_on);
+ if (inode)
+ inode_unref (inode);
+
+ return ret;
+}
+
+
+int
+afr_selfheal_name_unlocked_inspect (call_frame_t *frame, xlator_t *this,
+ inode_t *parent, uuid_t pargfid,
+ const char *bname, gf_boolean_t *need_heal)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ struct afr_reply *replies = NULL;
+ inode_t *inode = NULL;
+ int first_idx = -1;
+
+ priv = this->private;
+
+ replies = alloca0 (sizeof (*replies) * priv->child_count);
+
+ inode = afr_selfheal_unlocked_lookup_on (frame, parent, bname,
+ replies, priv->child_up);
+ if (!inode)
+ return -ENOMEM;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if (first_idx == -1) {
+ first_idx = i;
+ continue;
+ }
+
+ if (replies[i].op_ret != replies[first_idx].op_ret)
+ *need_heal = _gf_true;
+
+ if (uuid_compare (replies[i].poststat.ia_gfid,
+ replies[first_idx].poststat.ia_gfid))
+ *need_heal = _gf_true;
+ }
+
+ if (inode)
+ inode_unref (inode);
+ return 0;
+}
+
+int
+afr_selfheal_name (xlator_t *this, uuid_t pargfid, const char *bname)
+{
+ inode_t *parent = NULL;
+ call_frame_t *frame = NULL;
+ int ret = -1;
+ gf_boolean_t need_heal = _gf_false;
+
+ parent = afr_inode_find (this, pargfid);
+ if (!parent)
+ goto out;
+
+ frame = afr_frame_create (this);
+ if (!frame)
+ goto out;
+
+ ret = afr_selfheal_name_unlocked_inspect (frame, this, parent, pargfid,
+ bname, &need_heal);
+ if (ret)
+ goto out;
+
+ if (need_heal)
+ afr_selfheal_name_do (frame, this, parent, pargfid, bname);
+out:
+ if (parent)
+ inode_unref (parent);
+ if (frame)
+ AFR_STACK_DESTROY (frame);
+
+ return ret;
+}
diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
index f40c06faa..a1b972ac3 100644
--- a/xlators/cluster/afr/src/afr-self-heal.h
+++ b/xlators/cluster/afr/src/afr-self-heal.h
@@ -1,58 +1,167 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#ifndef __AFR_SELF_HEAL_H__
-#define __AFR_SELF_HEAL_H__
-#include <sys/stat.h>
+#ifndef _AFR_SELFHEAL_H
+#define _AFR_SELFHEAL_H
+
+
+/* Perform fop on all UP subvolumes and wait for all callbacks to return */
+
+#define AFR_ONALL(frame, rfn, fop, args ...) do { \
+ afr_local_t *__local = frame->local; \
+ afr_private_t *__priv = frame->this->private; \
+ int __i = 0, __count = 0; \
+ \
+ afr_replies_wipe (__local, __priv); \
+ \
+ for (__i = 0; __i < __priv->child_count; __i++) { \
+ if (!__priv->child_up[__i]) continue; \
+ STACK_WIND_COOKIE (frame, rfn, (void *)(long) __i, \
+ __priv->children[__i], \
+ __priv->children[__i]->fops->fop, args); \
+ __count++; \
+ } \
+ syncbarrier_wait (&__local->barrier, __count); \
+ } while (0)
+
+
+/* Perform fop on all subvolumes represented by list[] array and wait
+ for all callbacks to return */
+
+#define AFR_ONLIST(list, frame, rfn, fop, args ...) do { \
+ afr_local_t *__local = frame->local; \
+ afr_private_t *__priv = frame->this->private; \
+ int __i = 0, __count = 0; \
+ \
+ afr_replies_wipe (__local, __priv); \
+ \
+ for (__i = 0; __i < __priv->child_count; __i++) { \
+ if (!list[__i]) continue; \
+ STACK_WIND_COOKIE (frame, rfn, (void *)(long) __i, \
+ __priv->children[__i], \
+ __priv->children[__i]->fops->fop, args); \
+ __count++; \
+ } \
+ syncbarrier_wait (&__local->barrier, __count); \
+ } while (0)
+
+
+#define AFR_SEQ(frame, rfn, fop, args ...) do { \
+ afr_local_t *__local = frame->local; \
+ afr_private_t *__priv = frame->this->private; \
+ int __i = 0; \
+ \
+ afr_replies_wipe (__local, __priv); \
+ \
+ for (__i = 0; __i < __priv->child_count; __i++) { \
+ if (!__priv->child_up[__i]) continue; \
+ STACK_WIND_COOKIE (frame, rfn, (void *)(long) __i, \
+ __priv->children[__i], \
+ __priv->children[__i]->fops->fop, args); \
+ syncbarrier_wait (&__local->barrier, 1); \
+ } \
+ } while (0)
-#define FILETYPE_DIFFERS(buf1,buf2) ((buf1)->ia_type != (buf2)->ia_type)
-#define PERMISSION_DIFFERS(buf1,buf2) (st_mode_from_ia ((buf1)->ia_prot, (buf1)->ia_type) != st_mode_from_ia ((buf2)->ia_prot, (buf2)->ia_type))
-#define OWNERSHIP_DIFFERS(buf1,buf2) (((buf1)->ia_uid != (buf2)->ia_uid) || ((buf1)->ia_gid != (buf2)->ia_gid))
-#define SIZE_DIFFERS(buf1,buf2) ((buf1)->ia_size != (buf2)->ia_size)
-#define SIZE_GREATER(buf1,buf2) ((buf1)->ia_size > (buf2)->ia_size)
+#define ALLOC_MATRIX(n, type) ({type **__ptr = NULL; \
+ int __i; \
+ __ptr = alloca0 (n * sizeof(type *)); \
+ for (__i = 0; __i < n; __i++) __ptr[__i] = alloca0 (n * sizeof(type)); \
+ __ptr;})
+
+
+#define IA_EQUAL(f,s,field) (memcmp (&(f.ia_##field), &(s.ia_##field), sizeof (s.ia_##field)) == 0)
+
int
-afr_sh_has_metadata_pending (dict_t *xattr, xlator_t *this);
+afr_selfheal (xlator_t *this, uuid_t gfid);
+
int
-afr_sh_has_entry_pending (dict_t *xattr, xlator_t *this);
+afr_selfheal_name (xlator_t *this, uuid_t gfid, const char *name);
+
int
-afr_sh_has_data_pending (dict_t *xattr, xlator_t *this);
+afr_selfheal_data (call_frame_t *frame, xlator_t *this, inode_t *inode);
int
-afr_self_heal_entry (call_frame_t *frame, xlator_t *this);
+afr_selfheal_metadata (call_frame_t *frame, xlator_t *this, inode_t *inode);
int
-afr_self_heal_data (call_frame_t *frame, xlator_t *this);
+afr_selfheal_entry (call_frame_t *frame, xlator_t *this, inode_t *inode);
+
+
+int
+afr_selfheal_inodelk (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, off_t off, size_t size,
+ unsigned char *locked_on);
int
-afr_self_heal_metadata (call_frame_t *frame, xlator_t *this);
+afr_selfheal_tryinodelk (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, off_t off, size_t size,
+ unsigned char *locked_on);
int
-afr_self_heal_get_source (xlator_t *this, afr_local_t *local, dict_t **xattr);
+afr_selfheal_uninodelk (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, off_t off, size_t size,
+ const unsigned char *locked_on);
int
-afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode);
+afr_selfheal_entrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, const char *name, unsigned char *locked_on);
int
-afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,
- dict_t **xattr,
- afr_transaction_type txn_type);
-#endif /* __AFR_SELF_HEAL_H__ */
+afr_selfheal_tryentrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, const char *name, unsigned char *locked_on);
+
+int
+afr_selfheal_unentrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, const char *name, unsigned char *locked_on);
+
+int
+afr_selfheal_unlocked_discover (call_frame_t *frame, inode_t *inode,
+ uuid_t gfid, struct afr_reply *replies);
+
+inode_t *
+afr_selfheal_unlocked_lookup_on (call_frame_t *frame, inode_t *parent,
+ const char *name, struct afr_reply *replies,
+ unsigned char *lookup_on);
+
+int
+afr_selfheal_find_direction (call_frame_t *frame, xlator_t *this,
+ struct afr_reply *replies,
+ afr_transaction_type type, unsigned char *locked_on,
+ unsigned char *sources, unsigned char *sinks);
+
+int
+afr_selfheal_extract_xattr (xlator_t *this, struct afr_reply *replies,
+ afr_transaction_type type, int *dirty, int **matrix);
+
+int
+afr_selfheal_undo_pending (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ unsigned char *sources, unsigned char *sinks,
+ unsigned char *healed_sinks, afr_transaction_type type,
+ struct afr_reply *replies, unsigned char *locked_on);
+
+int
+afr_selfheal_recreate_entry (call_frame_t *frame, xlator_t *this, int dst,
+ int source, inode_t *dir, const char *name,
+ inode_t *inode, struct afr_reply *replies);
+
+int
+afr_selfheal_post_op (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ int subvol, dict_t *xattr);
+
+call_frame_t *
+afr_frame_create (xlator_t *this);
+
+inode_t *
+afr_inode_find (xlator_t *this, uuid_t gfid);
+
+#endif /* !_AFR_SELFHEAL_H */
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
index c75b7fa92..4bfe909bc 100644
--- a/xlators/cluster/afr/src/afr-self-heald.c
+++ b/xlators/cluster/afr/src/afr-self-heald.c
@@ -1,496 +1,1256 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
+
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
#endif
+
#include "afr.h"
-#include "syncop.h"
+#include "afr-self-heal.h"
#include "afr-self-heald.h"
-#include "afr-self-heal-common.h"
+#include "protocol-common.h"
+
+#define SHD_INODE_LRU_LIMIT 2048
+#define AFR_EH_HEALED_LIMIT 1024
+#define AFR_EH_HEAL_FAIL_LIMIT 1024
+#define AFR_EH_SPLIT_BRAIN_LIMIT 1024
+#define AFR_STATISTICS_HISTORY_SIZE 50
+
+
+#define ASSERT_LOCAL(this, healer) \
+ if (!afr_shd_is_subvol_local(this, healer->subvol)) { \
+ healer->local = _gf_false; \
+ if (safe_break (healer)) { \
+ break; \
+ } else { \
+ continue; \
+ } \
+ } else { \
+ healer->local = _gf_true; \
+ }
+
+
+#define NTH_INDEX_HEALER(this, n) &((((afr_private_t *)this->private))->shd.index_healers[n])
+#define NTH_FULL_HEALER(this, n) &((((afr_private_t *)this->private))->shd.full_healers[n])
-static int
-_crawl_directory (loc_t *loc, pid_t pid);
-static int
-get_pathinfo_host (char *pathinfo, char *hostname, size_t size)
+int afr_shd_gfid_to_path (xlator_t *this, xlator_t *subvol, uuid_t gfid, char **path_p);
+
+char *
+afr_subvol_name (xlator_t *this, int subvol)
{
- char *start = NULL;
- char *end = NULL;
- int ret = -1;
- int i = 0;
+ afr_private_t *priv = NULL;
- if (!pathinfo)
- goto out;
+ priv = this->private;
+ if (subvol < 0 || subvol > priv->child_count)
+ return NULL;
- start = strchr (pathinfo, ':');
- if (!start)
- goto out;
- end = strrchr (pathinfo, ':');
- if (start == end)
- goto out;
+ return priv->children[subvol]->name;
+}
- memset (hostname, 0, size);
- i = 0;
- while (++start != end)
- hostname[i++] = *start;
- ret = 0;
-out:
- return ret;
+
+void
+afr_destroy_crawl_event_data (void *data)
+{
+ return;
+}
+
+
+void
+afr_destroy_shd_event_data (void *data)
+{
+ shd_event_t *shd_event = data;
+
+ if (!shd_event)
+ return;
+ GF_FREE (shd_event->path);
+
+ return;
}
+
+gf_boolean_t
+afr_shd_is_subvol_local (xlator_t *this, int subvol)
+{
+ char *pathinfo = NULL;
+ afr_private_t *priv = NULL;
+ dict_t *xattr = NULL;
+ int ret = 0;
+ gf_boolean_t is_local = _gf_false;
+ loc_t loc = {0, };
+
+ priv = this->private;
+
+ loc.inode = this->itable->root;
+ uuid_copy (loc.gfid, loc.inode->gfid);
+
+ ret = syncop_getxattr (priv->children[subvol], &loc, &xattr,
+ GF_XATTR_PATHINFO_KEY);
+ if (ret)
+ return _gf_false;
+ if (!xattr)
+ return _gf_false;
+
+ ret = dict_get_str (xattr, GF_XATTR_PATHINFO_KEY, &pathinfo);
+ if (ret)
+ return _gf_false;
+
+ afr_local_pathinfo (pathinfo, &is_local);
+
+ gf_log (this->name, GF_LOG_DEBUG, "subvol %s is %slocal",
+ priv->children[subvol]->name, is_local? "" : "not ");
+
+ return is_local;
+}
+
+
int
-afr_local_pathinfo (char *pathinfo, gf_boolean_t *local)
+__afr_shd_healer_wait (struct subvol_healer *healer)
{
- int ret = 0;
- char pathinfohost[1024] = {0};
- char localhost[1024] = {0};
- xlator_t *this = THIS;
+ afr_private_t *priv = NULL;
+ struct timespec wait_till = {0, };
+ int ret = 0;
- *local = _gf_false;
- ret = get_pathinfo_host (pathinfo, pathinfohost, sizeof (pathinfohost));
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Invalid pathinfo: %s",
- pathinfo);
- goto out;
- }
+ priv = healer->this->private;
- ret = gethostname (localhost, sizeof (localhost));
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "gethostname() failed, "
- "reason: %s", strerror (errno));
- goto out;
- }
+disabled_loop:
+ wait_till.tv_sec = time (NULL) + 60;
- if (!strcmp (localhost, pathinfohost))
- *local = _gf_true;
-out:
- return ret;
+ while (!healer->rerun) {
+ ret = pthread_cond_timedwait (&healer->cond,
+ &healer->mutex,
+ &wait_till);
+ if (ret == ETIMEDOUT)
+ break;
+ }
+
+ ret = healer->rerun;
+ healer->rerun = 0;
+
+ if (!priv->shd.enabled)
+ goto disabled_loop;
+
+ return ret;
}
-inline void
-afr_fill_loc_info (loc_t *loc, struct iatt *iatt, struct iatt *parent)
+
+int
+afr_shd_healer_wait (struct subvol_healer *healer)
{
- afr_update_loc_gfids (loc, iatt, parent);
- uuid_copy (loc->inode->gfid, iatt->ia_gfid);
+ int ret = 0;
+
+ pthread_mutex_lock (&healer->mutex);
+ {
+ ret = __afr_shd_healer_wait (healer);
+ }
+ pthread_mutex_unlock (&healer->mutex);
+
+ return ret;
}
-static int
-_perform_self_heal (xlator_t *this, loc_t *parentloc, gf_dirent_t *entries,
- off_t *offset, pid_t pid)
+
+gf_boolean_t
+safe_break (struct subvol_healer *healer)
{
- gf_dirent_t *entry = NULL;
- gf_dirent_t *tmp = NULL;
- struct iatt iatt = {0};
- struct iatt parent = {0};;
- int ret = 0;
- loc_t entry_loc = {0};
-
- list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- *offset = entry->d_off;
- if (IS_ENTRY_CWD (entry->d_name) ||
- IS_ENTRY_PARENT (entry->d_name))
- continue;
- if (uuid_is_null (entry->d_stat.ia_gfid)) {
- gf_log (this->name, GF_LOG_WARNING, "%s/%s: No "
- "gfid present skipping",
- parentloc->path, entry->d_name);
- continue;
- }
-
- loc_wipe (&entry_loc);
- ret = afr_build_child_loc (this, &entry_loc, parentloc,
- entry->d_name, entry->d_stat.ia_gfid);
- if (ret)
- goto out;
+ gf_boolean_t ret = _gf_false;
- gf_log (this->name, GF_LOG_DEBUG, "lookup %s", entry_loc.path);
+ pthread_mutex_lock (&healer->mutex);
+ {
+ if (healer->rerun)
+ goto unlock;
- ret = syncop_lookup (this, &entry_loc, NULL,
- &iatt, NULL, &parent);
- //Don't fail the crawl if lookup fails as it
- //could be because of split-brain
- if (ret || (!IA_ISDIR (iatt.ia_type)))
- continue;
- afr_fill_loc_info (&entry_loc, &iatt, &parent);
- ret = _crawl_directory (&entry_loc, pid);
- }
- ret = 0;
+ healer->running = _gf_false;
+ ret = _gf_true;
+ }
+unlock:
+ pthread_mutex_unlock (&healer->mutex);
+
+ return ret;
+}
+
+
+inode_t *
+afr_shd_inode_find (xlator_t *this, xlator_t *subvol, uuid_t gfid)
+{
+ inode_t *inode = NULL;
+ int ret = 0;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+
+ inode = inode_find (this->itable, gfid);
+ if (inode)
+ goto out;
+
+ loc.inode = inode_new (this->itable);
+ if (!loc.inode)
+ goto out;
+ uuid_copy (loc.gfid, gfid);
+
+ ret = syncop_lookup (subvol, &loc, NULL, &iatt, NULL, NULL);
+ if (ret < 0)
+ goto out;
+
+ inode = inode_link (loc.inode, NULL, NULL, &iatt);
+ if (inode)
+ inode_lookup (inode);
out:
- if (entry_loc.path)
- loc_wipe (&entry_loc);
- return ret;
+ loc_wipe (&loc);
+ return inode;
}
-static int
-_crawl_directory (loc_t *loc, pid_t pid)
+
+fd_t *
+afr_shd_index_opendir (xlator_t *this, int child)
{
- xlator_t *this = NULL;
- afr_private_t *priv = NULL;
- fd_t *fd = NULL;
- off_t offset = 0;
- gf_dirent_t entries;
- struct iatt iatt = {0};
- struct iatt parent = {0};;
- int ret = 0;
- gf_boolean_t free_entries = _gf_false;
+ fd_t *fd = NULL;
+ afr_private_t *priv = NULL;
+ xlator_t *subvol = NULL;
+ loc_t rootloc = {0, };
+ inode_t *inode = NULL;
+ int ret = 0;
+ dict_t *xattr = NULL;
+ void *index_gfid = NULL;
+
+ priv = this->private;
+ subvol = priv->children[child];
+
+ rootloc.inode = inode_ref (this->itable->root);
+ uuid_copy (rootloc.gfid, rootloc.inode->gfid);
+
+ ret = syncop_getxattr (subvol, &rootloc, &xattr,
+ GF_XATTROP_INDEX_GFID);
+ if (ret || !xattr) {
+ errno = -ret;
+ goto out;
+ }
+
+ ret = dict_get_ptr (xattr, GF_XATTROP_INDEX_GFID, &index_gfid);
+ if (ret)
+ goto out;
+
+ gf_log (this->name, GF_LOG_DEBUG, "index-dir gfid for %s: %s",
+ subvol->name, uuid_utoa (index_gfid));
+
+ inode = afr_shd_inode_find (this, subvol, index_gfid);
+ if (!inode)
+ goto out;
+ fd = fd_anonymous (inode);
+out:
+ loc_wipe (&rootloc);
+ if (xattr)
+ dict_unref (xattr);
+ return fd;
+}
- INIT_LIST_HEAD (&entries.list);
- this = THIS;
- priv = this->private;
- GF_ASSERT (loc->inode);
+int
+afr_shd_index_purge (xlator_t *subvol, inode_t *inode, char *name)
+{
+ loc_t loc = {0, };
+ int ret = 0;
- gf_log (this->name, GF_LOG_DEBUG, "crawling %s", loc->path);
- fd = fd_create (loc->inode, pid);
- if (!fd) {
- gf_log (this->name, GF_LOG_ERROR,
- "Failed to create fd for %s", loc->path);
- goto out;
- }
+ loc.parent = inode_ref (inode);
+ loc.name = name;
- if (!loc->parent) {
- ret = syncop_lookup (this, loc, NULL,
- &iatt, NULL, &parent);
- }
+ ret = syncop_unlink (subvol, &loc);
- ret = syncop_opendir (this, loc, fd);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "opendir failed on %s", loc->path);
- goto out;
- }
+ loc_wipe (&loc);
+ return ret;
+}
- while (syncop_readdirp (this, fd, 131072, offset, &entries)) {
- ret = 0;
- free_entries = _gf_true;
- if (afr_up_children_count (priv->child_up,
- priv->child_count) < 2) {
- gf_log (this->name, GF_LOG_ERROR, "Stopping crawl as "
- "< 2 children are up");
- ret = -1;
- goto out;
- }
- if (list_empty (&entries.list))
- goto out;
+int
+afr_shd_selfheal_name (struct subvol_healer *healer, int child, uuid_t parent,
+ const char *bname)
+{
+ int ret = -1;
- ret = _perform_self_heal (this, loc, &entries, &offset, pid);
- gf_dirent_free (&entries);
- free_entries = _gf_false;
- }
- if (fd)
- fd_unref (fd);
- ret = 0;
-out:
- if (free_entries)
- gf_dirent_free (&entries);
- return ret;
+ ret = afr_selfheal_name (THIS, parent, bname);
+
+ return ret;
}
int
-afr_find_child_position (xlator_t *this, int child)
+afr_shd_selfheal (struct subvol_healer *healer, int child, uuid_t gfid)
{
- afr_private_t *priv = NULL;
- dict_t *xattr_rsp = NULL;
- loc_t loc = {0};
- int ret = 0;
- gf_boolean_t local = _gf_false;
- char *pathinfo = NULL;
- afr_child_pos_t *pos = NULL;
- inode_table_t *itable = NULL;
+ int ret = 0;
+ eh_t *eh = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ shd_event_t *shd_event = NULL;
+ char *path = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *this = NULL;
+ crawl_event_t *crawl_event = NULL;
+
+ this = healer->this;
+ priv = this->private;
+ shd = &priv->shd;
+ crawl_event = &healer->crawl_event;
+
+ subvol = priv->children[child];
+
+ ret = afr_selfheal (this, gfid);
+
+ if (ret == -EIO) {
+ eh = shd->split_brain;
+ crawl_event->split_brain_count++;
+ } else if (ret < 0) {
+ eh = shd->heal_failed;
+ crawl_event->heal_failed_count++;
+ } else if (ret == 0) {
+ eh = shd->healed;
+ crawl_event->healed_count++;
+ }
+
+ afr_shd_gfid_to_path (this, subvol, gfid, &path);
+ if (!path)
+ return ret;
+
+ if (eh) {
+ shd_event = GF_CALLOC (1, sizeof(*shd_event),
+ gf_afr_mt_shd_event_t);
+ if (!shd_event) {
+ GF_FREE (path);
+ return ret;
+ }
+
+ shd_event->child = child;
+ shd_event->path = path;
+
+ if (eh_save_history (eh, shd_event) < 0) {
+ GF_FREE (shd_event);
+ GF_FREE (path);
+ }
+ }
+ return ret;
+}
- priv = this->private;
- pos = &priv->shd.pos[child];
- if (*pos != AFR_POS_UNKNOWN) {
- goto out;
- }
+void
+afr_shd_sweep_prepare (struct subvol_healer *healer)
+{
+ crawl_event_t *event = NULL;
- //TODO: Hack to make the root_loc hack work
- LOCK (&priv->lock);
- {
- if (!priv->root_inode) {
- itable = inode_table_new (0, this);
- if (!itable)
- goto unlock;
- priv->root_inode = inode_new (itable);
- if (!priv->root_inode)
- goto unlock;
- }
- }
-unlock:
- UNLOCK (&priv->lock);
+ event = &healer->crawl_event;
- if (!priv->root_inode) {
- ret = -1;
- goto out;
- }
- afr_build_root_loc (priv->root_inode, &loc);
+ event->healed_count = 0;
+ event->split_brain_count = 0;
+ event->heal_failed_count = 0;
- ret = syncop_getxattr (priv->children[child], &loc, &xattr_rsp,
- GF_XATTR_PATHINFO_KEY);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "getxattr failed on child "
- "%d", child);
- goto out;
- }
+ time (&event->start_time);
+ event->end_time = 0;
+}
- ret = dict_get_str (xattr_rsp, GF_XATTR_PATHINFO_KEY, &pathinfo);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Pathinfo key not found on "
- "child %d", child);
- goto out;
- }
- ret = afr_local_pathinfo (pathinfo, &local);
- if (ret)
- goto out;
- if (local)
- *pos = AFR_POS_LOCAL;
- else
- *pos = AFR_POS_REMOTE;
+void
+afr_shd_sweep_done (struct subvol_healer *healer)
+{
+ crawl_event_t *event = NULL;
+ crawl_event_t *history = NULL;
+ afr_self_heald_t *shd = NULL;
- gf_log (this->name, GF_LOG_INFO, "child %d is %d", child, *pos);
-out:
- return ret;
+ event = &healer->crawl_event;
+ shd = &(((afr_private_t *)healer->this->private)->shd);
+
+ time (&event->end_time);
+ history = memdup (event, sizeof (*event));
+ event->start_time = 0;
+
+ if (!history)
+ return;
+
+ if (eh_save_history (shd->statistics[healer->subvol], history) < 0)
+ GF_FREE (history);
}
-static int
-afr_crawl_done (int ret, call_frame_t *sync_frame, void *data)
+
+int
+afr_shd_index_sweep (struct subvol_healer *healer)
{
- GF_FREE (data);
- STACK_DESTROY (sync_frame->root);
- return 0;
+ xlator_t *this = NULL;
+ int child = -1;
+ fd_t *fd = NULL;
+ xlator_t *subvol = NULL;
+ afr_private_t *priv = NULL;
+ off_t offset = 0;
+ gf_dirent_t entries;
+ gf_dirent_t *entry = NULL;
+ uuid_t gfid;
+ int ret = 0;
+ int count = 0;
+
+ this = healer->this;
+ child = healer->subvol;
+ priv = this->private;
+ subvol = priv->children[child];
+
+ fd = afr_shd_index_opendir (this, child);
+ if (!fd) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "unable to opendir index-dir on %s", subvol->name);
+ return -errno;
+ }
+
+ INIT_LIST_HEAD (&entries.list);
+
+ while ((ret = syncop_readdir (subvol, fd, 131072, offset, &entries))) {
+ if (ret > 0)
+ ret = 0;
+ list_for_each_entry (entry, &entries.list, list) {
+ offset = entry->d_off;
+
+ if (!priv->shd.enabled) {
+ ret = -EBUSY;
+ break;
+ }
+
+ if (!strcmp (entry->d_name, ".") ||
+ !strcmp (entry->d_name, ".."))
+ continue;
+
+ gf_log (this->name, GF_LOG_DEBUG, "got entry: %s",
+ entry->d_name);
+
+ ret = uuid_parse (entry->d_name, gfid);
+ if (ret)
+ continue;
+
+ ret = afr_shd_selfheal (healer, child, gfid);
+ if (ret == 0)
+ count++;
+
+ if (ret == -ENOENT || ret == -ESTALE) {
+ afr_shd_index_purge (subvol, fd->inode,
+ entry->d_name);
+ ret = 0;
+ }
+ }
+
+ gf_dirent_free (&entries);
+ if (ret)
+ break;
+ }
+
+ if (fd)
+ fd_unref (fd);
+ if (!ret)
+ ret = count;
+ return ret;
}
-static int
-afr_find_all_children_postions (xlator_t *this)
+
+int
+afr_shd_full_sweep (struct subvol_healer *healer, inode_t *inode)
{
- int ret = -1;
- int i = 0;
- gf_boolean_t succeeded = _gf_false;
- afr_private_t *priv = NULL;
+ fd_t *fd = NULL;
+ xlator_t *this = NULL;
+ xlator_t *subvol = NULL;
+ afr_private_t *priv = NULL;
+ off_t offset = 0;
+ gf_dirent_t entries;
+ gf_dirent_t *entry = NULL;
+ int ret = 0;
+
+ this = healer->this;
+ priv = this->private;
+ subvol = priv->children[healer->subvol];
+
+ fd = fd_anonymous (inode);
+ if (!fd)
+ return -errno;
+
+ INIT_LIST_HEAD (&entries.list);
+
+ while ((ret = syncop_readdirp (subvol, fd, 131072, offset, 0, &entries))) {
+ if (ret < 0)
+ break;
+
+ ret = gf_link_inodes_from_dirent (this, fd->inode, &entries);
+ if (ret)
+ break;
+
+ list_for_each_entry (entry, &entries.list, list) {
+ offset = entry->d_off;
+
+ if (!priv->shd.enabled) {
+ ret = -EBUSY;
+ break;
+ }
+
+ if (!strcmp (entry->d_name, ".") ||
+ !strcmp (entry->d_name, ".."))
+ continue;
+
+ afr_shd_selfheal_name (healer, healer->subvol,
+ inode->gfid, entry->d_name);
+
+ afr_shd_selfheal (healer, healer->subvol,
+ entry->d_stat.ia_gfid);
+
+ if (entry->d_stat.ia_type == IA_IFDIR) {
+ ret = afr_shd_full_sweep (healer, entry->inode);
+ if (ret)
+ break;
+ }
+ }
+
+ gf_dirent_free (&entries);
+ if (ret)
+ break;
+ }
+
+ if (fd)
+ fd_unref (fd);
+ return ret;
+}
- priv = this->private;
- for (i = 0; i < priv->child_count; i++) {
- if (priv->child_up[i] != 1)
- continue;
- ret = afr_find_child_position (this, i);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "Failed to determine if the "
- "child %s is local.",
- priv->children[i]->name);
- continue;
- }
- succeeded = _gf_true;
- }
- if (succeeded)
- ret = 0;
- return ret;
+
+void *
+afr_shd_index_healer (void *data)
+{
+ struct subvol_healer *healer = NULL;
+ xlator_t *this = NULL;
+ int ret = 0;
+
+ healer = data;
+ THIS = this = healer->this;
+
+ for (;;) {
+ afr_shd_healer_wait (healer);
+
+ ASSERT_LOCAL(this, healer);
+
+ do {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "starting index sweep on subvol %s",
+ afr_subvol_name (this, healer->subvol));
+
+ afr_shd_sweep_prepare (healer);
+
+ ret = afr_shd_index_sweep (healer);
+
+ afr_shd_sweep_done (healer);
+ /*
+ As long as at least one gfid was
+ healed, keep retrying. We may have
+ just healed a directory and thereby
+ created entries for other gfids which
+ could not be healed thus far.
+ */
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "finished index sweep on subvol %s",
+ afr_subvol_name (this, healer->subvol));
+ /*
+ Give a pause before retrying to avoid a busy loop
+ in case the only entry in index is because of
+ an ongoing I/O.
+ */
+ sleep (1);
+ } while (ret > 0);
+ }
+
+ return NULL;
}
-static gf_boolean_t
-afr_local_child_exists (afr_child_pos_t *pos, unsigned int child_count)
+
+void *
+afr_shd_full_healer (void *data)
{
- int i = 0;
- gf_boolean_t local = _gf_false;
+ struct subvol_healer *healer = NULL;
+ xlator_t *this = NULL;
+ int run = 0;
- for (i = 0; i < child_count; i++, pos++) {
- if (*pos == AFR_POS_LOCAL) {
- local = _gf_true;
- break;
- }
- }
- return local;
+ healer = data;
+ THIS = this = healer->this;
+
+ for (;;) {
+ pthread_mutex_lock (&healer->mutex);
+ {
+ run = __afr_shd_healer_wait (healer);
+ if (!run)
+ healer->running = _gf_false;
+ }
+ pthread_mutex_unlock (&healer->mutex);
+
+ if (!run)
+ break;
+
+ ASSERT_LOCAL(this, healer);
+
+ gf_log (this->name, GF_LOG_INFO,
+ "starting full sweep on subvol %s",
+ afr_subvol_name (this, healer->subvol));
+
+ afr_shd_sweep_prepare (healer);
+
+ afr_shd_full_sweep (healer, this->itable->root);
+
+ afr_shd_sweep_done (healer);
+
+ gf_log (this->name, GF_LOG_INFO,
+ "finished full sweep on subvol %s",
+ afr_subvol_name (this, healer->subvol));
+ }
+
+ return NULL;
}
+
int
-afr_init_child_position (xlator_t *this, int child)
+afr_shd_healer_init (xlator_t *this, struct subvol_healer *healer)
{
- int ret = 0;
+ int ret = 0;
- if (child == AFR_ALL_CHILDREN) {
- ret = afr_find_all_children_postions (this);
- } else {
- ret = afr_find_child_position (this, child);
- }
- return ret;
+ ret = pthread_mutex_init (&healer->mutex, NULL);
+ if (ret)
+ goto out;
+
+ ret = pthread_cond_init (&healer->cond, NULL);
+ if (ret)
+ goto out;
+
+ healer->this = this;
+ healer->running = _gf_false;
+ healer->rerun = _gf_false;
+ healer->local = _gf_false;
+out:
+ return ret;
}
+
int
-afr_is_local_child (afr_self_heald_t *shd, int child, unsigned int child_count)
+afr_shd_healer_spawn (xlator_t *this, struct subvol_healer *healer,
+ void *(threadfn)(void *))
{
- gf_boolean_t local = _gf_false;
+ int ret = 0;
+
+ pthread_mutex_lock (&healer->mutex);
+ {
+ if (healer->running) {
+ pthread_cond_signal (&healer->cond);
+ } else {
+ ret = gf_thread_create (&healer->thread, NULL,
+ threadfn, healer);
+ if (ret)
+ goto unlock;
+ healer->running = 1;
+ }
+
+ healer->rerun = 1;
+ }
+unlock:
+ pthread_mutex_unlock (&healer->mutex);
+
+ return ret;
+}
- if (child == AFR_ALL_CHILDREN)
- local = afr_local_child_exists (shd->pos, child_count);
- else
- local = (shd->pos[child] == AFR_POS_LOCAL);
- return local;
+int
+afr_shd_full_healer_spawn (xlator_t *this, int subvol)
+{
+ return afr_shd_healer_spawn (this, NTH_FULL_HEALER (this, subvol),
+ afr_shd_full_healer);
}
-static int
-afr_crawl_directory (xlator_t *this, pid_t pid)
+
+int
+afr_shd_index_healer_spawn (xlator_t *this, int subvol)
+{
+ return afr_shd_healer_spawn (this, NTH_INDEX_HEALER (this, subvol),
+ afr_shd_index_healer);
+}
+
+
+int
+afr_shd_dict_add_crawl_event (xlator_t *this, dict_t *output,
+ crawl_event_t *crawl_event)
{
- afr_private_t *priv = NULL;
- afr_self_heald_t *shd = NULL;
- loc_t loc = {0};
- gf_boolean_t crawl = _gf_false;
int ret = 0;
+ uint64_t count = 0;
+ char key[256] = {0};
+ int xl_id = 0;
+ uint64_t healed_count = 0;
+ uint64_t split_brain_count = 0;
+ uint64_t heal_failed_count = 0;
+ char *start_time_str = 0;
+ char *end_time_str = NULL;
+ char *crawl_type = NULL;
+ int progress = -1;
+ int child = -1;
+
+ child = crawl_event->child;
+ healed_count = crawl_event->healed_count;
+ split_brain_count = crawl_event->split_brain_count;
+ heal_failed_count = crawl_event->heal_failed_count;
+ crawl_type = crawl_event->crawl_type;
+
+ if (!crawl_event->start_time)
+ goto out;
+
+ start_time_str = gf_strdup (ctime (&crawl_event->start_time));
+
+ if (crawl_event->end_time)
+ end_time_str = gf_strdup (ctime (&crawl_event->end_time));
+
+ ret = dict_get_int32 (output, this->name, &xl_id);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "xl does not have id");
+ goto out;
+ }
- priv = this->private;
- shd = &priv->shd;
+ snprintf (key, sizeof (key), "statistics-%d-%d-count", xl_id, child);
+ ret = dict_get_uint64 (output, key, &count);
- LOCK (&priv->lock);
- {
- if (shd->inprogress) {
- shd->pending = _gf_true;
- } else {
- shd->inprogress = _gf_true;
- crawl = _gf_true;
- }
+ snprintf (key, sizeof (key), "statistics_healed_cnt-%d-%d-%"PRIu64,
+ xl_id, child, count);
+ ret = dict_set_uint64(output, key, healed_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not add statistics_healed_count to outout");
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "statistics_sb_cnt-%d-%d-%"PRIu64,
+ xl_id, child, count);
+ ret = dict_set_uint64 (output, key, split_brain_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not add statistics_split_brain_count to outout");
+ goto out;
}
- UNLOCK (&priv->lock);
- if (!priv->root_inode) {
- ret = -1;
+ snprintf (key, sizeof (key), "statistics_crawl_type-%d-%d-%"PRIu64,
+ xl_id, child, count);
+ ret = dict_set_str (output, key, crawl_type);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not add statistics_crawl_type to output");
goto out;
}
- if (!crawl)
+ snprintf (key, sizeof (key), "statistics_heal_failed_cnt-%d-%d-%"PRIu64,
+ xl_id, child, count);
+ ret = dict_set_uint64 (output, key, heal_failed_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not add statistics_healed_failed_count to outout");
goto out;
+ }
- afr_build_root_loc (priv->root_inode, &loc);
- while (crawl) {
- ret = _crawl_directory (&loc, pid);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR, "Crawl failed");
- else
- gf_log (this->name, GF_LOG_INFO, "Crawl completed");
- LOCK (&priv->lock);
- {
- if (shd->pending) {
- shd->pending = _gf_false;
- } else {
- shd->inprogress = _gf_false;
- crawl = _gf_false;
- }
- }
- UNLOCK (&priv->lock);
+ snprintf (key, sizeof (key), "statistics_strt_time-%d-%d-%"PRIu64,
+ xl_id, child, count);
+ ret = dict_set_dynstr (output, key, start_time_str);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not add statistics_crawl_start_time to outout");
+ goto out;
+ } else {
+ start_time_str = NULL;
+ }
+
+ if (!end_time_str)
+ progress = 1;
+ else
+ progress = 0;
+
+ snprintf (key, sizeof (key), "statistics_end_time-%d-%d-%"PRIu64,
+ xl_id, child, count);
+ if (!end_time_str)
+ end_time_str = gf_strdup ("Could not determine the end time");
+ ret = dict_set_dynstr (output, key, end_time_str);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not add statistics_crawl_end_time to outout");
+ goto out;
+ } else {
+ end_time_str = NULL;
+ }
+
+ snprintf (key, sizeof (key), "statistics_inprogress-%d-%d-%"PRIu64,
+ xl_id, child, count);
+
+ ret = dict_set_int32 (output, key, progress);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not add statistics_inprogress to outout");
+ goto out;
}
+
+ snprintf (key, sizeof (key), "statistics-%d-%d-count", xl_id, child);
+ ret = dict_set_uint64 (output, key, count + 1);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not increment the counter.");
+ goto out;
+ }
out:
+ GF_FREE (start_time_str);
+ GF_FREE (end_time_str);
return ret;
}
-static int
-afr_crawl (void *data)
+
+int
+afr_shd_dict_add_path (xlator_t *this, dict_t *output, int child, char *path,
+ struct timeval *tv)
{
- xlator_t *this = NULL;
- afr_private_t *priv = NULL;
- afr_self_heald_t *shd = NULL;
- int ret = -1;
- afr_crawl_data_t *crawl_data = data;
+ int ret = -1;
+ uint64_t count = 0;
+ char key[256] = {0};
+ int xl_id = 0;
- this = THIS;
- priv = this->private;
- shd = &priv->shd;
+ ret = dict_get_int32 (output, this->name, &xl_id);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "xl does not have id");
+ goto out;
+ }
- ret = afr_init_child_position (this, crawl_data->child);
- if (ret)
+ snprintf (key, sizeof (key), "%d-%d-count", xl_id, child);
+ ret = dict_get_uint64 (output, key, &count);
+
+ snprintf (key, sizeof (key), "%d-%d-%"PRIu64, xl_id, child, count);
+ ret = dict_set_dynstr (output, key, path);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s: Could not add to output",
+ path);
goto out;
+ }
- if (!afr_is_local_child (shd, crawl_data->child, priv->child_count))
+ if (tv) {
+ snprintf (key, sizeof (key), "%d-%d-%"PRIu64"-time", xl_id,
+ child, count);
+ ret = dict_set_uint32 (output, key, tv->tv_sec);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s: Could not set time",
+ path);
+ goto out;
+ }
+ }
+
+ snprintf (key, sizeof (key), "%d-%d-count", xl_id, child);
+
+ ret = dict_set_uint64 (output, key, count + 1);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not increment count");
goto out;
+ }
- ret = afr_crawl_directory (this, crawl_data->pid);
+ ret = 0;
out:
return ret;
}
-void
-afr_proactive_self_heal (xlator_t *this, int idx)
+
+int
+afr_shd_gfid_to_path (xlator_t *this, xlator_t *subvol, uuid_t gfid, char **path_p)
{
- afr_private_t *priv = NULL;
- afr_self_heald_t *shd = NULL;
- call_frame_t *frame = NULL;
- afr_crawl_data_t *crawl_data = NULL;
- int ret = 0;
+ loc_t loc = {0,};
+ char *path = NULL;
+ dict_t *xattr = NULL;
+ int ret = 0;
+
+ uuid_copy (loc.gfid, gfid);
+ loc.inode = inode_new (this->itable);
+
+ ret = syncop_getxattr (subvol, &loc, &xattr, GFID_TO_PATH_KEY);
+ loc_wipe (&loc);
+ if (ret)
+ return ret;
+
+ ret = dict_get_str (xattr, GFID_TO_PATH_KEY, &path);
+ if (ret || !path)
+ return -EINVAL;
+
+ *path_p = gf_strdup (path);
+ if (!*path_p)
+ return -ENOMEM;
+ return 0;
+}
- priv = this->private;
- shd = &priv->shd;
- if (!shd->enabled)
- goto out;
- if ((idx != AFR_ALL_CHILDREN) &&
- (shd->pos[idx] == AFR_POS_REMOTE))
- goto out;
+int
+afr_shd_gather_index_entries (xlator_t *this, int child, dict_t *output)
+{
+ fd_t *fd = NULL;
+ xlator_t *subvol = NULL;
+ afr_private_t *priv = NULL;
+ off_t offset = 0;
+ gf_dirent_t entries;
+ gf_dirent_t *entry = NULL;
+ uuid_t gfid;
+ int ret = 0;
+ int count = 0;
+ char *path = NULL;
+
+ priv = this->private;
+ subvol = priv->children[child];
+
+ fd = afr_shd_index_opendir (this, child);
+ if (!fd) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "unable to opendir index-dir on %s", subvol->name);
+ return -errno;
+ }
+
+ INIT_LIST_HEAD (&entries.list);
+
+ while ((ret = syncop_readdir (subvol, fd, 131072, offset, &entries))) {
+ if (ret > 0)
+ ret = 0;
+ list_for_each_entry (entry, &entries.list, list) {
+ offset = entry->d_off;
+
+ if (!strcmp (entry->d_name, ".") ||
+ !strcmp (entry->d_name, ".."))
+ continue;
+
+ gf_log (this->name, GF_LOG_DEBUG, "got entry: %s",
+ entry->d_name);
+
+ ret = uuid_parse (entry->d_name, gfid);
+ if (ret)
+ continue;
+
+ path = NULL;
+ ret = afr_shd_gfid_to_path (this, subvol, gfid, &path);
+
+ if (ret == -ENOENT || ret == -ESTALE) {
+ afr_shd_index_purge (subvol, fd->inode,
+ entry->d_name);
+ ret = 0;
+ continue;
+ }
+
+ ret = afr_shd_dict_add_path (this, output, child, path,
+ NULL);
+ }
+
+ gf_dirent_free (&entries);
+ if (ret)
+ break;
+ }
+
+ if (fd)
+ fd_unref (fd);
+ if (!ret)
+ ret = count;
+ return ret;
+}
- frame = create_frame (this, this->ctx->pool);
- if (!frame)
- goto out;
- afr_set_lk_owner (frame, this);
- afr_set_low_priority (frame);
- crawl_data = GF_CALLOC (1, sizeof (*crawl_data),
- gf_afr_mt_afr_crawl_data_t);
- if (!crawl_data)
- goto out;
- crawl_data->child = idx;
- crawl_data->pid = frame->root->pid;
- gf_log (this->name, GF_LOG_INFO, "starting crawl for %d", idx);
- ret = synctask_new (this->ctx->env, afr_crawl,
- afr_crawl_done, frame, crawl_data);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR, "Could not create the "
- "task for %d ret %d", idx, ret);
-out:
- return;
+int
+afr_add_shd_event (circular_buffer_t *cb, void *data)
+{
+ dict_t *output = NULL;
+ xlator_t *this = THIS;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ shd_event_t *shd_event = NULL;
+ char *path = NULL;
+
+ output = data;
+ priv = this->private;
+ shd = &priv->shd;
+ shd_event = cb->data;
+
+ if (!shd->index_healers[shd_event->child].local)
+ return 0;
+
+ path = gf_strdup (shd_event->path);
+ if (!path)
+ return -ENOMEM;
+
+ afr_shd_dict_add_path (this, output, shd_event->child, path,
+ &cb->tv);
+ return 0;
}
-//TODO: This is a hack
-void
-afr_build_root_loc (inode_t *inode, loc_t *loc)
+int
+afr_add_crawl_event (circular_buffer_t *cb, void *data)
{
- loc->path = "/";
- loc->name = "";
- loc->inode = inode;
- loc->inode->ia_type = IA_IFDIR;
- memset (loc->inode->gfid, 0, 16);
- loc->inode->gfid[15] = 1;
+ dict_t *output = NULL;
+ xlator_t *this = THIS;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ crawl_event_t *crawl_event = NULL;
+ output = data;
+ priv = this->private;
+ shd = &priv->shd;
+ crawl_event = cb->data;
+
+ if (!shd->index_healers[crawl_event->child].local)
+ return 0;
+
+ afr_shd_dict_add_crawl_event (this, output, crawl_event);
+
+ return 0;
}
+
int
-afr_set_root_gfid (dict_t *dict)
+afr_selfheal_daemon_init (xlator_t *this)
{
- uuid_t gfid;
- int ret = 0;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ int ret = -1;
+ int i = 0;
+
+ priv = this->private;
+ shd = &priv->shd;
+
+ this->itable = inode_table_new (SHD_INODE_LRU_LIMIT, this);
+ if (!this->itable)
+ goto out;
+
+ shd->index_healers = GF_CALLOC (sizeof(*shd->index_healers),
+ priv->child_count,
+ gf_afr_mt_subvol_healer_t);
+ if (!shd->index_healers)
+ goto out;
+
+ for (i = 0; i < priv->child_count; i++) {
+ shd->index_healers[i].subvol = i;
+ ret = afr_shd_healer_init (this, &shd->index_healers[i]);
+ if (ret)
+ goto out;
+ }
+
+ shd->full_healers = GF_CALLOC (sizeof(*shd->full_healers),
+ priv->child_count,
+ gf_afr_mt_subvol_healer_t);
+ if (!shd->full_healers)
+ goto out;
+ for (i = 0; i < priv->child_count; i++) {
+ shd->full_healers[i].subvol = i;
+ ret = afr_shd_healer_init (this, &shd->full_healers[i]);
+ if (ret)
+ goto out;
+ }
+
+ shd->healed = eh_new (AFR_EH_HEALED_LIMIT, _gf_false,
+ afr_destroy_shd_event_data);
+ if (!shd->healed)
+ goto out;
+
+ shd->heal_failed = eh_new (AFR_EH_HEAL_FAIL_LIMIT, _gf_false,
+ afr_destroy_shd_event_data);
+ if (!shd->heal_failed)
+ goto out;
+
+ shd->split_brain = eh_new (AFR_EH_SPLIT_BRAIN_LIMIT, _gf_false,
+ afr_destroy_shd_event_data);
+ if (!shd->split_brain)
+ goto out;
+
+ shd->statistics = GF_CALLOC (sizeof(eh_t *), priv->child_count,
+ gf_common_mt_eh_t);
+ if (!shd->statistics)
+ goto out;
+
+ for (i = 0; i < priv->child_count ; i++) {
+ shd->statistics[i] = eh_new (AFR_STATISTICS_HISTORY_SIZE,
+ _gf_false,
+ afr_destroy_crawl_event_data);
+ if (!shd->statistics[i])
+ goto out;
+ shd->full_healers[i].crawl_event.child = i;
+ shd->full_healers[i].crawl_event.crawl_type = "FULL";
+ shd->index_healers[i].crawl_event.child = i;
+ shd->index_healers[i].crawl_event.crawl_type = "INDEX";
+ }
- memset (gfid, 0, 16);
- gfid[15] = 1;
+ ret = 0;
+out:
+ return ret;
+}
- ret = afr_set_dict_gfid (dict, gfid);
- return ret;
+int
+afr_selfheal_childup (xlator_t *this, int subvol)
+{
+ afr_shd_index_healer_spawn (this, subvol);
+
+ return 0;
}
+
+int64_t
+afr_shd_get_index_count (xlator_t *this, int i)
+{
+ afr_private_t *priv = NULL;
+ xlator_t *subvol = NULL;
+ uint64_t count = 0;
+ loc_t rootloc = {0, };
+ dict_t *xattr = NULL;
+ int ret = -1;
+
+ priv = this->private;
+ subvol = priv->children[i];
+
+ rootloc.inode = inode_ref (this->itable->root);
+ uuid_copy (rootloc.gfid, rootloc.inode->gfid);
+
+ ret = syncop_getxattr (subvol, &rootloc, &xattr,
+ GF_XATTROP_INDEX_COUNT);
+ loc_wipe (&rootloc);
+
+ if (ret < 0)
+ return -1;
+
+ ret = dict_get_uint64 (xattr, GF_XATTROP_INDEX_COUNT, &count);
+ if (ret)
+ return -1;
+ return count;
+}
+
+
+int
+afr_xl_op (xlator_t *this, dict_t *input, dict_t *output)
+{
+ gf_xl_afr_op_t op = GF_AFR_OP_INVALID;
+ int ret = 0;
+ int xl_id = 0;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ struct subvol_healer *healer = NULL;
+ int i = 0;
+ char key[64];
+ int op_ret = 0;
+ int64_t cnt = 0;
+
+ priv = this->private;
+ shd = &priv->shd;
+
+ for (i = 0; i < priv->child_count; i++)
+ if (priv->child_up[i] == -1)
+ goto out;
+
+ ret = dict_get_int32 (input, "xl-op", (int32_t*)&op);
+ if (ret)
+ goto out;
+ ret = dict_get_int32 (input, this->name, &xl_id);
+ if (ret)
+ goto out;
+ ret = dict_set_int32 (output, this->name, xl_id);
+ if (ret)
+ goto out;
+ switch (op) {
+ case GF_AFR_OP_HEAL_INDEX:
+ op_ret = -1;
+
+ for (i = 0; i < priv->child_count; i++) {
+ healer = &shd->index_healers[i];
+ snprintf (key, 64, "%d-%d-status", xl_id, i);
+
+ if (!priv->child_up[i]) {
+ ret = dict_set_str (output, key,
+ "Brick is not connected");
+ } else if (AFR_COUNT (priv->child_up,
+ priv->child_count) < 2) {
+ ret = dict_set_str (output, key,
+ "< 2 bricks in replica are up");
+ } else if (!afr_shd_is_subvol_local (this, healer->subvol)) {
+ ret = dict_set_str (output, key,
+ "Brick is remote");
+ } else {
+ ret = dict_set_str (output, key,
+ "Started self-heal");
+ afr_shd_index_healer_spawn (this, i);
+ op_ret = 0;
+ }
+ }
+ break;
+ case GF_AFR_OP_HEAL_FULL:
+ op_ret = -1;
+
+ for (i = 0; i < priv->child_count; i++) {
+ healer = &shd->full_healers[i];
+ snprintf (key, 64, "%d-%d-status", xl_id, i);
+
+ if (!priv->child_up[i]) {
+ ret = dict_set_str (output, key,
+ "Brick is not connected");
+ } else if (AFR_COUNT (priv->child_up,
+ priv->child_count) < 2) {
+ ret = dict_set_str (output, key,
+ "< 2 bricks in replica are up");
+ } else if (!afr_shd_is_subvol_local (this, healer->subvol)) {
+ ret = dict_set_str (output, key,
+ "Brick is remote");
+ } else {
+ ret = dict_set_str (output, key,
+ "Started self-heal");
+ afr_shd_full_healer_spawn (this, i);
+ op_ret = 0;
+ }
+ }
+ break;
+ case GF_AFR_OP_INDEX_SUMMARY:
+ for (i = 0; i < priv->child_count; i++)
+ if (shd->index_healers[i].local)
+ afr_shd_gather_index_entries (this, i, output);
+ break;
+ case GF_AFR_OP_HEALED_FILES:
+ eh_dump (shd->healed, output, afr_add_shd_event);
+ break;
+ case GF_AFR_OP_HEAL_FAILED_FILES:
+ eh_dump (shd->heal_failed, output, afr_add_shd_event);
+ break;
+ case GF_AFR_OP_SPLIT_BRAIN_FILES:
+ eh_dump (shd->split_brain, output, afr_add_shd_event);
+ break;
+ case GF_AFR_OP_STATISTICS:
+ for (i = 0; i < priv->child_count; i++) {
+ eh_dump (shd->statistics[i], output,
+ afr_add_crawl_event);
+ afr_shd_dict_add_crawl_event (this, output,
+ &shd->index_healers[i].crawl_event);
+ afr_shd_dict_add_crawl_event (this, output,
+ &shd->full_healers[i].crawl_event);
+ }
+ break;
+ case GF_AFR_OP_STATISTICS_HEAL_COUNT:
+ case GF_AFR_OP_STATISTICS_HEAL_COUNT_PER_REPLICA:
+ op_ret = -1;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!priv->child_up[i]) {
+ snprintf (key, 64, "%d-%d-status", xl_id, i);
+ ret = dict_set_str (output, key,
+ "Brick is not connected");
+ } else {
+ snprintf (key, 64, "%d-%d-hardlinks", xl_id, i);
+ cnt = afr_shd_get_index_count (this, i);
+ if (cnt >= 0) {
+ ret = dict_set_uint64 (output, key, cnt);
+ }
+ op_ret = 0;
+ }
+ }
+
+// ret = _do_crawl_op_on_local_subvols (this, INDEX_TO_BE_HEALED,
+// STATISTICS_TO_BE_HEALED,
+// output);
+ break;
+
+ default:
+ gf_log (this->name, GF_LOG_ERROR, "Unknown set op %d", op);
+ break;
+ }
+out:
+ dict_del (output, this->name);
+ return op_ret;
+}
diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h
index 6eb119b07..10e229ee7 100644
--- a/xlators/cluster/afr/src/afr-self-heald.h
+++ b/xlators/cluster/afr/src/afr-self-heald.h
@@ -1,43 +1,72 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#ifndef __AFR_SELF_HEALD_H__
-#define __AFR_SELF_HEALD_H__
-#include "xlator.h"
-#define IS_ROOT_PATH(path) (!strcmp (path, "/"))
-#define IS_ENTRY_CWD(entry) (!strcmp (entry, "."))
-#define IS_ENTRY_PARENT(entry) (!strcmp (entry, ".."))
-#define AFR_ALL_CHILDREN -1
+#ifndef _AFR_SELF_HEALD_H
+#define _AFR_SELF_HEALD_H
+
+#include <pthread.h>
+
+
+typedef struct {
+ int child;
+ char *path;
+} shd_event_t;
+
+typedef struct {
+ int child;
+ uint64_t healed_count;
+ uint64_t split_brain_count;
+ uint64_t heal_failed_count;
+
+ /* If start_time is 0, it means crawler is not in progress
+ and stats are not valid */
+ time_t start_time;
+ /* If start_time is NOT 0 and end_time is 0, it means
+ cralwer is in progress */
+ time_t end_time;
+ char *crawl_type;
+} crawl_event_t;
+
+struct subvol_healer {
+ xlator_t *this;
+ int subvol;
+ gf_boolean_t local;
+ gf_boolean_t running;
+ gf_boolean_t rerun;
+ crawl_event_t crawl_event;
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+ pthread_t thread;
+};
+
+typedef struct {
+ gf_boolean_t iamshd;
+ gf_boolean_t enabled;
+ struct subvol_healer *index_healers;
+ struct subvol_healer *full_healers;
-typedef struct afr_crawl_data_ {
- int child;
- pid_t pid;
-} afr_crawl_data_t;
+ eh_t *healed;
+ eh_t *heal_failed;
+ eh_t *split_brain;
+ eh_t **statistics;
+} afr_self_heald_t;
-void afr_proactive_self_heal (xlator_t *this, int idx);
-void afr_build_root_loc (inode_t *inode, loc_t *loc);
+int
+afr_selfheal_childup (xlator_t *this, int subvol);
-int afr_set_root_gfid (dict_t *dict);
+int
+afr_selfheal_daemon_init (xlator_t *this);
-inline void
-afr_fill_loc_info (loc_t *loc, struct iatt *iatt, struct iatt *parent);
+int
+afr_xl_op (xlator_t *this, dict_t *input, dict_t *output);
-#endif /* __AFR_SELF_HEALD_H__ */
+#endif /* !_AFR_SELF_HEALD_H */
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index d3960dcff..205ff759e 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -1,186 +1,173 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include "dict.h"
#include "byte-order.h"
#include "common-utils.h"
+#include "timer.h"
#include "afr.h"
#include "afr-transaction.h"
#include <signal.h>
+gf_boolean_t
+afr_changelog_pre_op_uninherit (call_frame_t *frame, xlator_t *this);
-#define LOCKED_NO 0x0 /* no lock held */
-#define LOCKED_YES 0x1 /* for DATA, METADATA, ENTRY and higher_path
- of RENAME */
-#define LOCKED_LOWER 0x2 /* for lower_path of RENAME */
+gf_boolean_t
+afr_changelog_pre_op_update (call_frame_t *frame, xlator_t *this);
+int
+afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
+ afr_changelog_resume_t changelog_resume);
-afr_fd_ctx_t *
-afr_fd_ctx_get (fd_t *fd, xlator_t *this)
-{
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- int ret = 0;
- ret = fd_ctx_get (fd, this, &ctx);
+int
+__afr_txn_write_fop (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
- if (ret < 0)
- goto out;
+ local = frame->local;
+ priv = this->private;
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ call_count = AFR_COUNT (local->transaction.pre_op, priv->child_count);
-out:
- return fd_ctx;
-}
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+ local->call_count = call_count;
-static void
-afr_pid_save (call_frame_t *frame)
-{
- afr_local_t * local = NULL;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ local->transaction.wind (frame, this, i);
- local = frame->local;
+ if (!--call_count)
+ break;
+ }
+ }
- local->saved_pid = frame->root->pid;
+ return 0;
}
-static void
-afr_pid_restore (call_frame_t *frame)
+int
+__afr_txn_write_done (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
+ afr_local_t *local = NULL;
local = frame->local;
- frame->root->pid = local->saved_pid;
-}
-
+ local->transaction.unwind (frame, this);
-static void
-__mark_all_pending (int32_t *pending[], int child_count,
- afr_transaction_type type)
-{
- int i = 0;
- int j = 0;
+ AFR_STACK_DESTROY (frame);
- for (i = 0; i < child_count; i++) {
- j = afr_index_for_transaction_type (type);
- pending[i][j] = hton32 (1);
- }
+ return 0;
}
-static void
-__mark_child_dead (int32_t *pending[], int child_count, int child,
- afr_transaction_type type)
+call_frame_t*
+afr_transaction_detach_fop_frame (call_frame_t *frame)
{
- int j = 0;
+ afr_local_t * local = NULL;
+ call_frame_t *fop_frame = NULL;
- j = afr_index_for_transaction_type (type);
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ fop_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
- pending[child][j] = 0;
+ return fop_frame;
}
static void
-__mark_pre_op_done_on_fd (call_frame_t *frame, xlator_t *this, int child_index)
+afr_save_lk_owner (call_frame_t *frame)
{
- afr_local_t *local = NULL;
- afr_fd_ctx_t *fd_ctx = NULL;
+ afr_local_t * local = NULL;
local = frame->local;
- if (!local->fd)
- return;
-
- fd_ctx = afr_fd_ctx_get (local->fd, this);
-
- if (!fd_ctx)
- goto out;
-
- LOCK (&local->fd->lock);
- {
- if (local->transaction.type == AFR_DATA_TRANSACTION)
- fd_ctx->pre_op_done[child_index]++;
- }
- UNLOCK (&local->fd->lock);
-out:
- return;
+ local->saved_lk_owner = frame->root->lk_owner;
}
static void
-__mark_pre_op_undone_on_fd (call_frame_t *frame, xlator_t *this, int child_index)
+afr_restore_lk_owner (call_frame_t *frame)
{
- afr_local_t *local = NULL;
- afr_fd_ctx_t *fd_ctx = NULL;
+ afr_local_t * local = NULL;
local = frame->local;
- if (!local->fd)
- return;
-
- fd_ctx = afr_fd_ctx_get (local->fd, this);
-
- if (!fd_ctx)
- goto out;
-
- LOCK (&local->fd->lock);
- {
- if (local->transaction.type == AFR_DATA_TRANSACTION)
- fd_ctx->pre_op_done[child_index]--;
- }
- UNLOCK (&local->fd->lock);
-out:
- return;
+ frame->root->lk_owner = local->saved_lk_owner;
}
-
-static void
-__mark_down_children (int32_t *pending[], int child_count,
- unsigned char *child_up, afr_transaction_type type)
+void
+__mark_all_success (call_frame_t *frame, xlator_t *this)
{
- int i = 0;
- int j = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i;
- for (i = 0; i < child_count; i++) {
- j = afr_index_for_transaction_type (type);
+ local = frame->local;
+ priv = this->private;
- if (!child_up[i])
- pending[i][j] = 0;
- }
+ for (i = 0; i < priv->child_count; i++) {
+ local->transaction.failed_subvols[i] = 0;
+ }
}
-static void
-__mark_all_success (int32_t *pending[], int child_count,
- afr_transaction_type type)
+int
+afr_transaction_perform_fop (call_frame_t *frame, xlator_t *this)
{
- int i;
- int j;
+ afr_local_t *local = NULL;
+ fd_t *fd = NULL;
- for (i = 0; i < child_count; i++) {
- j = afr_index_for_transaction_type (type);
- pending[i][j] = hton32 (-1);
- }
+ local = frame->local;
+ fd = local->fd;
+
+ /* Perform fops with the lk-owner from top xlator.
+ * Eg: lk-owner of posix-lk and flush should be same,
+ * flush cant clear the posix-lks without that lk-owner.
+ */
+ afr_save_lk_owner (frame);
+ frame->root->lk_owner =
+ local->transaction.main_frame->root->lk_owner;
+
+ if (local->pre_op_compat)
+ /* old mode, pre-op was done as afr_changelog_do()
+ just now, before OP */
+ afr_changelog_pre_op_update (frame, this);
+
+ /* The wake up needs to happen independent of
+ what type of fop arrives here. If it was
+ a write, then it has already inherited the
+ lock and changelog. If it was not a write,
+ then the presumption of the optimization (of
+ optimizing for successive write operations)
+ fails.
+ */
+ if (fd)
+ afr_delayed_changelog_wake_up (this, fd);
+ local->transaction.fop (frame, this);
+
+ return 0;
}
@@ -247,63 +234,30 @@ __fop_changelog_needed (call_frame_t *frame, xlator_t *this)
}
-static int
-afr_set_pending_dict (afr_private_t *priv, dict_t *xattr, int32_t **pending)
+int
+afr_set_pending_dict (afr_private_t *priv, dict_t *xattr, int **pending)
{
int i = 0;
int ret = 0;
+ int pending_zero[AFR_NUM_CHANGE_LOGS] = {0, };
for (i = 0; i < priv->child_count; i++) {
+ if (!memcmp (pending_zero, pending[i], sizeof (pending_zero)))
+ /* don't set xattrs for non-pending servers */
+ continue;
+
ret = dict_set_static_bin (xattr, priv->pending_key[i],
- pending[i], 3 * sizeof (int32_t));
+ pending[i],
+ AFR_NUM_CHANGE_LOGS * sizeof (int));
/* 3 = data+metadata+entry */
- if (ret < 0)
- goto out;
- }
-
-out:
- return ret;
-}
-
-
-static int
-afr_set_piggyback_dict (afr_private_t *priv, dict_t *xattr, int32_t **pending,
- afr_transaction_type type)
-{
- int i = 0;
- int ret = 0;
- int *arr = NULL;
- int index = 0;
- size_t pending_xattr_size = 3 * sizeof (int32_t);
- /* 3 = data+metadata+entry */
-
- index = afr_index_for_transaction_type (type);
-
- for (i = 0; i < priv->child_count; i++) {
- arr = GF_CALLOC (1, pending_xattr_size,
- gf_afr_mt_char);
- if (!arr) {
- ret = -1;
- goto out;
- }
-
- memcpy (arr, pending[i], pending_xattr_size);
-
- arr[index] = hton32 (ntoh32(arr[index]) + 1);
-
- ret = dict_set_bin (xattr, priv->pending_key[i],
- arr, pending_xattr_size);
-
- if (ret < 0)
- goto out;
+ if (ret)
+ break;
}
-out:
return ret;
}
-
int
afr_lock_server_count (afr_private_t *priv, afr_transaction_type type)
{
@@ -329,410 +283,532 @@ afr_lock_server_count (afr_private_t *priv, afr_transaction_type type)
/* {{{ pending */
-int32_t
-afr_changelog_post_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr)
+
+int
+afr_changelog_post_op_done (call_frame_t *frame, xlator_t *this)
{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
afr_internal_lock_t *int_lock = NULL;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int call_count = -1;
- priv = this->private;
- local = frame->local;
+ local = frame->local;
+ priv = this->private;
int_lock = &local->internal_lock;
- LOCK (&frame->lock);
- {
- call_count = --local->call_count;
- }
- UNLOCK (&frame->lock);
+ if (local->transaction.resume_stub) {
+ call_resume (local->transaction.resume_stub);
+ local->transaction.resume_stub = NULL;
+ }
- if (call_count == 0) {
- if (afr_lock_server_count (priv, local->transaction.type) == 0) {
- local->transaction.done (frame, this);
- } else {
- int_lock->lock_cbk = local->transaction.done;
- afr_unlock (frame, this);
- }
- }
+ if (afr_lock_server_count (priv, local->transaction.type) == 0) {
+ local->transaction.done (frame, this);
+ } else {
+ int_lock->lock_cbk = local->transaction.done;
+ afr_unlock (frame, this);
+ }
- return 0;
+ return 0;
}
-void
-afr_transaction_rm_stale_children (call_frame_t *frame, xlator_t *this,
- inode_t *inode, afr_transaction_type type)
+afr_inodelk_t*
+afr_get_inodelk (afr_internal_lock_t *int_lock, char *dom)
{
- int i = -1;
- int count = 0;
- int read_child = -1;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int **pending = NULL;
- int idx = 0;
- int32_t *stale_children = NULL;
- int32_t *fresh_children = NULL;
- gf_boolean_t rm_stale_children = _gf_false;
-
- idx = afr_index_for_transaction_type (type);
+ afr_inodelk_t *inodelk = NULL;
+ int i = 0;
- priv = this->private;
- local = frame->local;
- pending = local->pending;
-
- stale_children = afr_children_create (priv->child_count);
- if (!stale_children)
- goto out;
-
- fresh_children = local->fresh_children;
- read_child = afr_inode_get_read_ctx (this, inode, fresh_children);
-
- GF_ASSERT (read_child >= 0);
-
- if (pending[read_child][idx] == 0)
- read_child = -1;
-
- for (i = 0; i < priv->child_count; i++) {
- if (!afr_is_child_present (fresh_children,
- priv->child_count, i))
- continue;
- if (pending[i][idx] == 0) {
- /* child is down or op failed on it */
- rm_stale_children = _gf_true;
- afr_children_rm_child (fresh_children, i,
- priv->child_count);
- stale_children[count++] = i;
- }
+ for (i = 0; int_lock->inodelk[i].domain; i++) {
+ inodelk = &int_lock->inodelk[i];
+ if (strcmp (dom, inodelk->domain) == 0)
+ return inodelk;
}
-
- if (!rm_stale_children) {
- GF_ASSERT (read_child >= 0);
- goto out;
- }
-
- if (fresh_children[0] == -1) {
- //All children failed. leave as-is
- goto out;
- }
-
- if (read_child == -1)
- read_child = fresh_children[0];
- afr_inode_rm_stale_children (this, inode, read_child, stale_children);
-out:
- if (stale_children)
- GF_FREE (stale_children);
- return;
+ return NULL;
}
unsigned char*
afr_locked_nodes_get (afr_transaction_type type, afr_internal_lock_t *int_lock)
{
unsigned char *locked_nodes = NULL;
+ afr_inodelk_t *inodelk = NULL;
switch (type) {
case AFR_DATA_TRANSACTION:
case AFR_METADATA_TRANSACTION:
- locked_nodes = int_lock->inode_locked_nodes;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ locked_nodes = inodelk->locked_nodes;
break;
case AFR_ENTRY_TRANSACTION:
case AFR_ENTRY_RENAME_TRANSACTION:
- locked_nodes = int_lock->entry_locked_nodes;
+ /*Because same set of subvols participate in all lockee
+ * entities*/
+ locked_nodes = int_lock->lockee[0].locked_nodes;
break;
}
return locked_nodes;
}
+
int
-afr_changelog_pre_op_call_count (afr_transaction_type type,
- afr_internal_lock_t *int_lock,
- unsigned int child_count)
+afr_changelog_call_count (afr_transaction_type type,
+ unsigned char *pre_op_subvols,
+ unsigned int child_count)
{
- int call_count = 0;
- unsigned char *locked_nodes = NULL;
+ int call_count = 0;
- locked_nodes = afr_locked_nodes_get (type, int_lock);
- GF_ASSERT (locked_nodes);
+ call_count = AFR_COUNT(pre_op_subvols, child_count);
- call_count = afr_locked_children_count (locked_nodes, child_count);
- if (type == AFR_ENTRY_RENAME_TRANSACTION) {
+ if (type == AFR_ENTRY_RENAME_TRANSACTION)
call_count *= 2;
- }
return call_count;
}
-int
-afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t * priv = this->private;
- afr_internal_lock_t *int_lock = NULL;
- int ret = 0;
- int i = 0;
- int call_count = 0;
-
- afr_local_t * local = NULL;
- afr_fd_ctx_t *fdctx = NULL;
- dict_t **xattr = NULL;
- int piggyback = 0;
- int index = 0;
- int nothing_failed = 1;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- __mark_down_children (local->pending, priv->child_count,
- local->child_up, local->transaction.type);
+gf_boolean_t
+afr_txn_nothing_failed (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
- if (local->fd)
- afr_transaction_rm_stale_children (frame, this,
- local->fd->inode,
- local->transaction.type);
+ local = frame->local;
+ priv = this->private;
- xattr = alloca (priv->child_count * sizeof (*xattr));
- memset (xattr, 0, (priv->child_count * sizeof (*xattr)));
for (i = 0; i < priv->child_count; i++) {
- xattr[i] = dict_new ();
+ if (local->transaction.failed_subvols[i])
+ return _gf_false;
}
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
- local->call_count = call_count;
+ return _gf_true;
+}
- if (local->fd)
- fdctx = afr_fd_ctx_get (local->fd, this);
- if (call_count == 0) {
- /* no child is up */
- int_lock->lock_cbk = local->transaction.done;
- afr_unlock (frame, this);
- goto out;
- }
+void
+afr_handle_symmetric_errors (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int op_errno = 0;
+ int i_errno = 0;
+ gf_boolean_t matching_errors = _gf_true;
+ int i = 0;
+
+ priv = this->private;
+ local = frame->local;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret != -1) {
+ /* Operation succeeded on at least on subvol,
+ so it is not a failed-everywhere situation.
+ */
+ matching_errors = _gf_false;
+ break;
+ }
+ i_errno = local->replies[i].op_errno;
+
+ if (i_errno == ENOTCONN) {
+ /* ENOTCONN is not a symmetric error. We do not
+ know if the operation was performed on the
+ backend or not.
+ */
+ matching_errors = _gf_false;
+ break;
+ }
+
+ if (!op_errno) {
+ op_errno = i_errno;
+ } else if (op_errno != i_errno) {
+ /* Mismatching op_errno's */
+ matching_errors = _gf_false;
+ break;
+ }
+ }
+
+ if (matching_errors)
+ __mark_all_success (frame, this);
+}
- /* check if something has failed, to handle piggybacking */
- nothing_failed = 1;
- index = afr_index_for_transaction_type (local->transaction.type);
- for (i = 0; i < priv->child_count; i++) {
- if (local->pending[i][index] == 0) {
- nothing_failed = 0;
- break;
- }
- }
- index = afr_index_for_transaction_type (local->transaction.type);
- if (local->optimistic_change_log &&
- local->transaction.type != AFR_DATA_TRANSACTION) {
- /* if nothing_failed, then local->pending[..] == {0 .. 0} */
- for (i = 0; i < priv->child_count; i++)
- local->pending[i][index]++;
- }
+int
+afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t * priv = this->private;
+ int i = 0;
+ int ret = 0;
+ int idx = 0;
+ afr_local_t * local = NULL;
+ dict_t *xattr = NULL;
+ int nothing_failed = 1;
+ gf_boolean_t need_undirty = _gf_false;
- for (i = 0; i < priv->child_count; i++) {
- if (!local->transaction.pre_op[i])
- continue;
- ret = afr_set_pending_dict (priv, xattr[i],
- local->pending);
+ local = frame->local;
+ idx = afr_index_for_transaction_type (local->transaction.type);
- if (ret < 0)
- gf_log (this->name, GF_LOG_INFO,
- "failed to set pending entry");
+ nothing_failed = afr_txn_nothing_failed (frame, this);
+ if (afr_changelog_pre_op_uninherit (frame, this))
+ need_undirty = _gf_false;
+ else
+ need_undirty = _gf_true;
- switch (local->transaction.type) {
- case AFR_DATA_TRANSACTION:
- {
- if (!fdctx) {
- STACK_WIND (frame, afr_changelog_post_op_cbk,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->loc,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- break;
- }
+ if (nothing_failed && !need_undirty) {
+ afr_changelog_post_op_done (frame, this);
+ goto out;
+ }
+
+ xattr = dict_new ();
+ if (!xattr) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ afr_changelog_post_op_done (frame, this);
+ goto out;
+ }
+
+ if (need_undirty) {
+ local->dirty[idx] = hton32(-1);
+
+ ret = dict_set_static_bin (xattr, AFR_DIRTY, local->dirty,
+ sizeof(int) * AFR_NUM_CHANGE_LOGS);
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ afr_changelog_post_op_done (frame, this);
+ goto out;
+ }
+
+ }
+
+ if (!nothing_failed) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.failed_subvols[i])
+ local->pending[i][idx] = hton32(1);
+ }
+ ret = afr_set_pending_dict (priv, xattr, local->pending);
+ if (ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ afr_changelog_post_op_done (frame, this);
+ goto out;
+ }
+
+ }
+
+ afr_changelog_do (frame, this, xattr, afr_changelog_post_op_done);
+out:
+ if (xattr)
+ dict_unref (xattr);
- LOCK (&local->fd->lock);
- {
- piggyback = 0;
- if (fdctx->pre_op_piggyback[i]) {
- fdctx->pre_op_piggyback[i]--;
- piggyback = 1;
- }
- }
- UNLOCK (&local->fd->lock);
+ return 0;
+}
- if (piggyback && !nothing_failed)
- ret = afr_set_piggyback_dict (priv, xattr[i],
- local->pending,
- local->transaction.type);
- if (nothing_failed && piggyback) {
- afr_changelog_post_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i]);
- } else {
- __mark_pre_op_undone_on_fd (frame, this, i);
- STACK_WIND_COOKIE (frame,
- afr_changelog_post_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- }
- }
- break;
- case AFR_METADATA_TRANSACTION:
- {
- if (nothing_failed) {
- afr_changelog_post_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i]);
- break;
- }
+gf_boolean_t
+afr_changelog_pre_op_uninherit (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ fd_t *fd = NULL;
+ int i = 0;
+ gf_boolean_t ret = _gf_false;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int type = 0;
+
+ local = frame->local;
+ priv = this->private;
+ fd = local->fd;
+
+ type = afr_index_for_transaction_type (local->transaction.type);
+ if (type != AFR_DATA_TRANSACTION)
+ return !local->transaction.dirtied;
+
+ if (!fd)
+ return !local->transaction.dirtied;
+
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx)
+ return _gf_false;
+
+ if (local->transaction.no_uninherit)
+ return _gf_false;
+
+ /* This function must be idempotent. So check if we
+ were called before and return the same answer again.
+
+ It is important to keep this function idempotent for
+ the call in afr_changelog_post_op_safe() to not have
+ side effects on the call from afr_changelog_post_op_now()
+ */
+ if (local->transaction.uninherit_done)
+ return local->transaction.uninherit_value;
+
+ LOCK(&fd->lock);
+ {
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i] !=
+ fd_ctx->pre_op_done[type][i]) {
+ ret = !local->transaction.dirtied;
+ goto unlock;
+ }
+ }
+
+ if (fd_ctx->inherited[type]) {
+ ret = _gf_true;
+ fd_ctx->inherited[type]--;
+ } else if (fd_ctx->on_disk[type]) {
+ ret = _gf_false;
+ fd_ctx->on_disk[type]--;
+ } else {
+ /* ASSERT */
+ ret = _gf_false;
+ }
+
+ if (!fd_ctx->inherited[type] && !fd_ctx->on_disk[type]) {
+ for (i = 0; i < priv->child_count; i++)
+ fd_ctx->pre_op_done[type][i] = 0;
+ }
+ }
+unlock:
+ UNLOCK(&fd->lock);
+
+ local->transaction.uninherit_done = _gf_true;
+ local->transaction.uninherit_value = ret;
+
+ return ret;
+}
- if (local->fd)
- STACK_WIND (frame, afr_changelog_post_op_cbk,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- else
- STACK_WIND (frame, afr_changelog_post_op_cbk,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->loc,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- }
- break;
- case AFR_ENTRY_RENAME_TRANSACTION:
- {
- if (nothing_failed) {
- afr_changelog_post_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i]);
- } else {
- STACK_WIND_COOKIE (frame, afr_changelog_post_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->transaction.new_parent_loc,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- }
- call_count--;
- }
+gf_boolean_t
+afr_changelog_pre_op_inherit (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ fd_t *fd = NULL;
+ int i = 0;
+ gf_boolean_t ret = _gf_false;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int type = 0;
+
+ local = frame->local;
+ priv = this->private;
+ fd = local->fd;
+
+ if (local->transaction.type != AFR_DATA_TRANSACTION)
+ return _gf_false;
+
+ type = afr_index_for_transaction_type (local->transaction.type);
+
+ if (!fd)
+ return _gf_false;
+
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx)
+ return _gf_false;
+
+ LOCK(&fd->lock);
+ {
+ if (!fd_ctx->on_disk[type]) {
+ /* nothing to inherit yet */
+ ret = _gf_false;
+ goto unlock;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i] !=
+ fd_ctx->pre_op_done[type][i]) {
+ /* either inherit exactly, or don't */
+ ret = _gf_false;
+ goto unlock;
+ }
+ }
+
+ fd_ctx->inherited[type]++;
+
+ ret = _gf_true;
+
+ local->transaction.inherited = _gf_true;
+ }
+unlock:
+ UNLOCK(&fd->lock);
+
+ return ret;
+}
- /*
- set it again because previous stack_wind
- might have already returned (think of case
- where subvolume is posix) and would have
- used the dict as placeholder for return
- value
- */
- ret = afr_set_pending_dict (priv, xattr[i],
- local->pending);
+gf_boolean_t
+afr_changelog_pre_op_update (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ fd_t *fd = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int i = 0;
+ gf_boolean_t ret = _gf_false;
+ int type = 0;
+
+ local = frame->local;
+ priv = this->private;
+ fd = local->fd;
+
+ if (!fd)
+ return _gf_false;
+
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx)
+ return _gf_false;
+
+ if (local->transaction.inherited)
+ /* was already inherited in afr_changelog_pre_op */
+ return _gf_false;
+
+ if (!local->transaction.dirtied)
+ return _gf_false;
+
+ if (!afr_txn_nothing_failed (frame, this))
+ return _gf_false;
+
+ type = afr_index_for_transaction_type (local->transaction.type);
+
+ ret = _gf_false;
+
+ LOCK(&fd->lock);
+ {
+ if (!fd_ctx->on_disk[type]) {
+ for (i = 0; i < priv->child_count; i++)
+ fd_ctx->pre_op_done[type][i] =
+ local->transaction.pre_op[i];
+ } else {
+ for (i = 0; i < priv->child_count; i++)
+ if (fd_ctx->pre_op_done[type][i] !=
+ local->transaction.pre_op[i]) {
+ local->transaction.no_uninherit = 1;
+ goto unlock;
+ }
+ }
+ fd_ctx->on_disk[type]++;
+
+ ret = _gf_true;
+ }
+unlock:
+ UNLOCK(&fd->lock);
+
+ return ret;
+}
- if (ret < 0)
- gf_log (this->name, GF_LOG_INFO,
- "failed to set pending entry");
- /* fall through */
+int
+afr_changelog_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int call_count = -1;
- case AFR_ENTRY_TRANSACTION:
- {
- if (nothing_failed) {
- afr_changelog_post_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i]);
- break;
- }
+ local = frame->local;
- if (local->fd)
- STACK_WIND (frame, afr_changelog_post_op_cbk,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- else
- STACK_WIND (frame, afr_changelog_post_op_cbk,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->transaction.parent_loc,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- }
- break;
- }
+ if (op_ret == -1)
+ afr_transaction_fop_failed (frame, this, (long) cookie);
- if (!--call_count)
- break;
- }
+ call_count = afr_frame_return (frame);
-out:
- for (i = 0; i < priv->child_count; i++) {
- dict_unref (xattr[i]);
- }
+ if (call_count == 0)
+ local->transaction.changelog_resume (frame, this);
return 0;
}
-int32_t
-afr_changelog_pre_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr)
+int
+afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
+ afr_changelog_resume_t changelog_resume)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = this->private;
- int call_count = -1;
- int child_index = (long) cookie;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int call_count = 0;
- local = frame->local;
+ local = frame->local;
+ priv = this->private;
- LOCK (&frame->lock);
- {
- switch (op_ret) {
- case 0:
- __mark_pre_op_done_on_fd (frame, this, child_index);
- //fallthrough we need to mark the pre_op
- case 1:
- local->transaction.pre_op[child_index] = 1;
- /* special op_ret for piggyback */
- break;
- case -1:
- if (op_errno == ENOTSUP) {
- gf_log (this->name, GF_LOG_ERROR,
- "xattrop not supported by %s",
- priv->children[child_index]->name);
- local->op_ret = -1;
-
- } else if (!child_went_down (op_ret, op_errno)) {
- gf_log (this->name, GF_LOG_ERROR,
- "xattrop failed on child %s: %s",
- priv->children[child_index]->name,
- strerror (op_errno));
+ call_count = afr_changelog_call_count (local->transaction.type,
+ local->transaction.pre_op,
+ priv->child_count);
+
+ if (call_count == 0) {
+ changelog_resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ local->transaction.changelog_resume = changelog_resume;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->transaction.pre_op[i])
+ continue;
+
+ switch (local->transaction.type) {
+ case AFR_DATA_TRANSACTION:
+ case AFR_METADATA_TRANSACTION:
+ if (!local->fd) {
+ STACK_WIND_COOKIE (frame, afr_changelog_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &local->loc,
+ GF_XATTROP_ADD_ARRAY, xattr,
+ NULL);
+ } else {
+ STACK_WIND_COOKIE (frame, afr_changelog_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fxattrop,
+ local->fd,
+ GF_XATTROP_ADD_ARRAY, xattr,
+ NULL);
}
- local->op_errno = op_errno;
- break;
- }
+ break;
+ case AFR_ENTRY_RENAME_TRANSACTION:
- call_count = --local->call_count;
- }
- UNLOCK (&frame->lock);
+ STACK_WIND_COOKIE (frame, afr_changelog_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &local->transaction.new_parent_loc,
+ GF_XATTROP_ADD_ARRAY, xattr,
+ NULL);
+ call_count--;
- if (call_count == 0) {
- if ((local->op_ret == -1) &&
- (local->op_errno == ENOTSUP)) {
- local->transaction.resume (frame, this);
- } else {
- __mark_all_success (local->pending, priv->child_count,
- local->transaction.type);
+ /* fall through */
- afr_pid_restore (frame);
+ case AFR_ENTRY_TRANSACTION:
+ if (local->fd)
+ STACK_WIND_COOKIE (frame, afr_changelog_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fxattrop,
+ local->fd,
+ GF_XATTROP_ADD_ARRAY, xattr,
+ NULL);
+ else
+ STACK_WIND_COOKIE (frame, afr_changelog_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &local->transaction.parent_loc,
+ GF_XATTROP_ADD_ARRAY, xattr,
+ NULL);
+ break;
+ }
- local->transaction.fop (frame, this);
- }
+ if (!--call_count)
+ break;
}
- return 0;
+ return 0;
}
+
int
afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
{
@@ -740,193 +816,122 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
int i = 0;
int ret = 0;
int call_count = 0;
- dict_t **xattr = NULL;
- afr_fd_ctx_t *fdctx = NULL;
+ int op_errno = 0;
afr_local_t *local = NULL;
- int piggyback = 0;
afr_internal_lock_t *int_lock = NULL;
unsigned char *locked_nodes = NULL;
+ unsigned char *pending_subvols = NULL;
+ int idx = -1;
+ gf_boolean_t pre_nop = _gf_true;
+ dict_t *xdata_req = NULL;
local = frame->local;
int_lock = &local->internal_lock;
-
- xattr = alloca (priv->child_count * sizeof (*xattr));
- memset (xattr, 0, (priv->child_count * sizeof (*xattr)));
-
- for (i = 0; i < priv->child_count; i++) {
- xattr[i] = dict_new ();
- }
-
- call_count = afr_changelog_pre_op_call_count (local->transaction.type,
- int_lock,
- priv->child_count);
- if (call_count == 0) {
- local->internal_lock.lock_cbk =
- local->transaction.done;
- afr_unlock (frame, this);
- goto out;
- }
-
- local->call_count = call_count;
-
- __mark_all_pending (local->pending, priv->child_count,
- local->transaction.type);
-
- if (local->fd)
- fdctx = afr_fd_ctx_get (local->fd, this);
+ idx = afr_index_for_transaction_type (local->transaction.type);
locked_nodes = afr_locked_nodes_get (local->transaction.type, int_lock);
- for (i = 0; i < priv->child_count; i++) {
- if (!locked_nodes[i])
- continue;
- ret = afr_set_pending_dict (priv, xattr[i],
- local->pending);
- if (ret < 0)
- gf_log (this->name, GF_LOG_INFO,
- "failed to set pending entry");
-
-
- switch (local->transaction.type) {
- case AFR_DATA_TRANSACTION:
- {
- if (!fdctx) {
- STACK_WIND_COOKIE (frame,
- afr_changelog_pre_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &(local->loc),
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- break;
- }
+ pending_subvols = alloca0 (priv->child_count);
- LOCK (&local->fd->lock);
- {
- piggyback = 0;
- if (fdctx->pre_op_done[i]) {
- fdctx->pre_op_piggyback[i]++;
- piggyback = 1;
- fdctx->hit++;
- } else {
- fdctx->miss++;
- }
- }
- UNLOCK (&local->fd->lock);
+ for (i = 0; i < priv->child_count; i++) {
+ if (locked_nodes[i]) {
+ local->transaction.pre_op[i] = 1;
+ call_count++;
+ } else {
+ pending_subvols[i] = 1;
+ }
+ }
- if (piggyback)
- afr_changelog_pre_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i]);
- else
- STACK_WIND_COOKIE (frame,
- afr_changelog_pre_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- }
- break;
- case AFR_METADATA_TRANSACTION:
- {
- if (local->optimistic_change_log) {
- afr_changelog_pre_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i]);
- break;
- }
+ /* TBD: quorum check w/ call_count */
- if (local->fd)
- STACK_WIND_COOKIE (frame,
- afr_changelog_pre_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- else
- STACK_WIND_COOKIE (frame,
- afr_changelog_pre_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &(local->loc),
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- }
- break;
-
- case AFR_ENTRY_RENAME_TRANSACTION:
- {
- if (local->optimistic_change_log) {
- afr_changelog_pre_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i]);
- } else {
- STACK_WIND_COOKIE (frame,
- afr_changelog_pre_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->transaction.new_parent_loc,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- }
-
- call_count--;
- }
-
-
- /*
- set it again because previous stack_wind
- might have already returned (think of case
- where subvolume is posix) and would have
- used the dict as placeholder for return
- value
- */
-
- ret = afr_set_pending_dict (priv, xattr[i],
- local->pending);
-
- if (ret < 0)
- gf_log (this->name, GF_LOG_INFO,
- "failed to set pending entry");
-
- /* fall through */
+ if (call_count == 0) {
+ op_errno = ENOTCONN;
+ goto err;
+ }
+
+ xdata_req = dict_new();
+ if (!xdata_req) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ pre_nop = _gf_true;
+
+ if (afr_changelog_pre_op_inherit (frame, this))
+ goto next;
+
+ if (call_count < priv->child_count) {
+ /* For subvols we are not performing operation on,
+ mark them as pending up-front along with the FOP
+ so that we can safely defer unmarking dirty until
+ later.
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if (pending_subvols[i])
+ local->pending[i][idx] = hton32(1);
+ }
+ ret = afr_set_pending_dict (priv, xdata_req,
+ local->pending);
+ if (ret < 0) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ pre_nop = _gf_false;
+ }
+
+ if (call_count > 1 &&
+ (local->transaction.type == AFR_DATA_TRANSACTION ||
+ !local->optimistic_change_log)) {
+
+ /* If we are performing change on only one subvol, no
+ need to mark dirty, because we are setting the pending
+ counts already anyways
+ */
+ local->dirty[idx] = hton32(1);
+
+ ret = dict_set_static_bin (xdata_req, AFR_DIRTY, local->dirty,
+ sizeof(int) * AFR_NUM_CHANGE_LOGS);
+ if (ret) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ pre_nop = _gf_false;
+ local->transaction.dirtied = 1;
+ }
+
+ if (pre_nop)
+ goto next;
+
+ if (!local->pre_op_compat) {
+ dict_copy (xdata_req, local->xdata_req);
+ goto next;
+ }
+
+ afr_changelog_do (frame, this, xdata_req, afr_transaction_perform_fop);
+
+ if (xdata_req)
+ dict_unref (xdata_req);
+
+ return 0;
+next:
+ afr_transaction_perform_fop (frame, this);
+
+ if (xdata_req)
+ dict_unref (xdata_req);
- case AFR_ENTRY_TRANSACTION:
- {
- if (local->optimistic_change_log) {
- afr_changelog_pre_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i]);
- break;
- }
+ return 0;
+err:
+ local->internal_lock.lock_cbk = local->transaction.done;
+ local->op_ret = -1;
+ local->op_errno = op_errno;
- if (local->fd)
- STACK_WIND_COOKIE (frame,
- afr_changelog_pre_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- else
- STACK_WIND_COOKIE (frame,
- afr_changelog_pre_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->transaction.parent_loc,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- }
- break;
- }
+ afr_unlock (frame, this);
- if (!--call_count)
- break;
- }
-out:
- for (i = 0; i < priv->child_count; i++) {
- dict_unref (xattr[i]);
- }
+ if (xdata_req)
+ dict_unref (xdata_req);
- return 0;
+ return 0;
}
@@ -1075,12 +1080,14 @@ int
afr_set_transaction_flock (afr_local_t *local)
{
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
int_lock = &local->internal_lock;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
- int_lock->lk_flock.l_len = local->transaction.len;
- int_lock->lk_flock.l_start = local->transaction.start;
- int_lock->lk_flock.l_type = F_WRLCK;
+ inodelk->flock.l_len = local->transaction.len;
+ inodelk->flock.l_start = local->transaction.start;
+ inodelk->flock.l_type = F_WRLCK;
return 0;
}
@@ -1095,6 +1102,7 @@ afr_lock_rec (call_frame_t *frame, xlator_t *this)
int_lock = &local->internal_lock;
int_lock->transaction_lk_type = AFR_TRANSACTION_LK;
+ int_lock->domain = this->name;
switch (local->transaction.type) {
case AFR_DATA_TRANSACTION:
@@ -1108,8 +1116,8 @@ afr_lock_rec (call_frame_t *frame, xlator_t *this)
case AFR_ENTRY_RENAME_TRANSACTION:
- int_lock->lock_cbk = afr_post_blocking_rename_cbk;
- afr_blocking_lock (frame, this);
+ int_lock->lock_cbk = afr_post_nonblocking_entrylk_cbk;
+ afr_nonblocking_entrylk (frame, this);
break;
case AFR_ENTRY_TRANSACTION:
@@ -1131,12 +1139,6 @@ afr_lock_rec (call_frame_t *frame, xlator_t *this)
int
afr_lock (call_frame_t *frame, xlator_t *this)
{
- afr_pid_save (frame);
-
- frame->root->pid = (long) frame->root;
-
- afr_set_lk_owner (frame, this);
-
afr_set_lock_number (frame, this);
return afr_lock_rec (frame, this);
@@ -1148,47 +1150,427 @@ afr_lock (call_frame_t *frame, xlator_t *this)
int
afr_internal_lock_finish (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- priv = this->private;
- local = frame->local;
-
if (__fop_changelog_needed (frame, this)) {
afr_changelog_pre_op (frame, this);
} else {
- __mark_all_success (local->pending, priv->child_count,
- local->transaction.type);
+ afr_transaction_perform_fop (frame, this);
+ }
+
+ return 0;
+}
+
+
+void
+afr_set_delayed_post_op (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ /* call this function from any of the related optimizations
+ which benefit from delaying post op are enabled, namely:
+
+ - changelog piggybacking
+ - eager locking
+ */
+
+ priv = this->private;
+ if (!priv)
+ return;
+
+ if (!priv->post_op_delay_secs)
+ return;
+
+ local = frame->local;
+ if (!local->transaction.eager_lock_on)
+ return;
+
+ if (!local)
+ return;
+
+ if (!local->fd)
+ return;
+
+ if (local->op == GF_FOP_WRITE)
+ local->delayed_post_op = _gf_true;
+}
+
+gf_boolean_t
+afr_are_multiple_fds_opened (fd_t *fd, xlator_t *this)
+{
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ if (!fd) {
+ /* If false is returned, it may keep on taking eager-lock
+ * which may lead to starvation, so return true to avoid that.
+ */
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "Invalid fd");
+ return _gf_true;
+ }
+ /* Lets say mount1 has eager-lock(full-lock) and after the eager-lock
+ * is taken mount2 opened the same file, it won't be able to
+ * perform any data operations until mount1 releases eager-lock.
+ * To avoid such scenario do not enable eager-lock for this transaction
+ * if open-fd-count is > 1
+ */
+
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx)
+ return _gf_true;
+
+ if (fd_ctx->open_fd_count > 1)
+ return _gf_true;
+
+ return _gf_false;
+}
+
+
+gf_boolean_t
+is_afr_delayed_changelog_post_op_needed (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ gf_boolean_t res = _gf_false;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (!local->delayed_post_op)
+ goto out;
+
+ //Mark pending changelog ASAP
+ if (!afr_txn_nothing_failed (frame, this))
+ goto out;
+
+ if (local->fd && afr_are_multiple_fds_opened (local->fd, this))
+ goto out;
+
+ res = _gf_true;
+out:
+ return res;
+}
+
+
+void
+afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd,
+ call_stub_t *stub);
+
+void
+afr_delayed_changelog_wake_up_cbk (void *data)
+{
+ fd_t *fd = NULL;
+
+ fd = data;
+
+ afr_delayed_changelog_wake_up (THIS, fd);
+}
+
+
+/* SET operation */
+int
+afr_fd_report_unstable_write (xlator_t *this, fd_t *fd)
+{
+ afr_fd_ctx_t *fdctx = NULL;
+
+ fdctx = afr_fd_ctx_get (fd, this);
+
+ LOCK(&fd->lock);
+ {
+ fdctx->witnessed_unstable_write = _gf_true;
+ }
+ UNLOCK(&fd->lock);
+
+ return 0;
+}
+
+/* TEST and CLEAR operation */
+gf_boolean_t
+afr_fd_has_witnessed_unstable_write (xlator_t *this, fd_t *fd)
+{
+ afr_fd_ctx_t *fdctx = NULL;
+ gf_boolean_t witness = _gf_false;
+
+ fdctx = afr_fd_ctx_get (fd, this);
+ if (!fdctx)
+ return _gf_true;
+
+ LOCK(&fd->lock);
+ {
+ if (fdctx->witnessed_unstable_write) {
+ witness = _gf_true;
+ fdctx->witnessed_unstable_write = _gf_false;
+ }
+ }
+ UNLOCK (&fd->lock);
+
+ return witness;
+}
- afr_pid_restore (frame);
- local->transaction.fop (frame, this);
+int
+afr_changelog_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ int child_index = (long) cookie;
+ int call_count = -1;
+ afr_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (op_ret != 0) {
+ /* Failure of fsync() is as good as failure of previous
+ write(). So treat it like one.
+ */
+ gf_log (this->name, GF_LOG_WARNING,
+ "fsync(%s) failed on subvolume %s. Transaction was %s",
+ uuid_utoa (local->fd->inode->gfid),
+ priv->children[child_index]->name,
+ gf_fop_list[local->op]);
+
+ afr_transaction_fop_failed (frame, this, child_index);
+ }
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ afr_changelog_post_op_now (frame, this);
+
+ return 0;
+}
+
+
+int
+afr_changelog_fsync (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ int i = 0;
+ int call_count = 0;
+ afr_private_t *priv = NULL;
+ dict_t *xdata = NULL;
+ GF_UNUSED int ret = -1;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = AFR_COUNT (local->transaction.pre_op, priv->child_count);
+
+ if (!call_count) {
+ /* will go straight to unlock */
+ afr_changelog_post_op_now (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ xdata = dict_new();
+ if (xdata)
+ ret = dict_set_int32 (xdata, "batch-fsync", 1);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->transaction.pre_op[i])
+ continue;
+
+ STACK_WIND_COOKIE (frame, afr_changelog_fsync_cbk,
+ (void *) (long) i, priv->children[i],
+ priv->children[i]->fops->fsync, local->fd,
+ 1, xdata);
+ if (!--call_count)
+ break;
}
+ if (xdata)
+ dict_unref (xdata);
+
return 0;
}
int
+afr_changelog_post_op_safe (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (!local->fd || local->transaction.type != AFR_DATA_TRANSACTION) {
+ afr_changelog_post_op_now (frame, this);
+ return 0;
+ }
+
+ if (afr_changelog_pre_op_uninherit (frame, this) &&
+ afr_txn_nothing_failed (frame, this)) {
+ /* just detected that this post-op is about to
+ be optimized away as a new write() has
+ already piggybacked on this frame's changelog.
+ */
+ afr_changelog_post_op_now (frame, this);
+ return 0;
+ }
+
+ /* Calling afr_changelog_post_op_now() now will result in
+ issuing ->[f]xattrop().
+
+ Performing a hard POST-OP (->[f]xattrop() FOP) is a more
+ responsible operation that what it might appear on the surface.
+
+ The changelog of a file (in the xattr of the file on the server)
+ stores information (pending count) about the state of the file
+ on the OTHER server. This changelog is blindly trusted, and must
+ therefore be updated in such a way it remains trustworthy. This
+ implies that decrementing the pending count (essentially "clearing
+ the dirty flag") must be done STRICTLY after we are sure that the
+ operation on the other server has reached stable storage.
+
+ While the backend filesystem on that server will eventually flush
+ it to stable storage, we (being in userspace) have no mechanism
+ to get notified when the write became "stable".
+
+ This means we need take matter into our own hands and issue an
+ fsync() EVEN IF THE APPLICATION WAS PERFORMING UNSTABLE WRITES,
+ and get an acknowledgement for it. And we need to wait for the
+ fsync() acknowledgement before initiating the hard POST-OP.
+
+ However if the FD itself was opened in O_SYNC or O_DSYNC then
+ we are already guaranteed that the writes were made stable as
+ part of the FOP itself. The same holds true for NFS stable
+ writes which happen on an anonymous FD with O_DSYNC or O_SYNC
+ flag set in the writev() @flags param. For all other write types,
+ mark a flag in the fdctx whenever an unstable write is witnessed.
+ */
+
+ if (!afr_fd_has_witnessed_unstable_write (this, local->fd)) {
+ afr_changelog_post_op_now (frame, this);
+ return 0;
+ }
+
+ /* Check whether users want durability and perform fsync/post-op
+ * accordingly.
+ */
+ if (priv->ensure_durability) {
+ /* Time to fsync() */
+ afr_changelog_fsync (frame, this);
+ } else {
+ afr_changelog_post_op_now (frame, this);
+ }
+
+ return 0;
+}
+
+
+void
+afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd,
+ call_stub_t *stub)
+{
+ afr_fd_ctx_t *fd_ctx = NULL;
+ call_frame_t *prev_frame = NULL;
+ struct timespec delta = {0, };
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ priv = this->private;
+
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx)
+ goto out;
+
+ delta.tv_sec = priv->post_op_delay_secs;
+ delta.tv_nsec = 0;
+
+ pthread_mutex_lock (&fd_ctx->delay_lock);
+ {
+ prev_frame = fd_ctx->delay_frame;
+ fd_ctx->delay_frame = NULL;
+ if (fd_ctx->delay_timer)
+ gf_timer_call_cancel (this->ctx, fd_ctx->delay_timer);
+ fd_ctx->delay_timer = NULL;
+ if (!frame)
+ goto unlock;
+ fd_ctx->delay_timer = gf_timer_call_after (this->ctx, delta,
+ afr_delayed_changelog_wake_up_cbk,
+ fd);
+ fd_ctx->delay_frame = frame;
+ }
+unlock:
+ pthread_mutex_unlock (&fd_ctx->delay_lock);
+
+out:
+ if (prev_frame) {
+ local = prev_frame->local;
+ local->transaction.resume_stub = stub;
+ afr_changelog_post_op_now (prev_frame, this);
+ } else if (stub) {
+ call_resume (stub);
+ }
+}
+
+
+void
+afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (is_afr_delayed_changelog_post_op_needed (frame, this))
+ afr_delayed_changelog_post_op (this, frame, local->fd, NULL);
+ else
+ afr_changelog_post_op_safe (frame, this);
+}
+
+
+
+/* Wake up the sleeping/delayed post-op, and also register
+ a stub to have it resumed after this transaction
+ completely finishes.
+
+ The @stub gets saved in @local and gets resumed in
+ afr_local_cleanup()
+ */
+void
+afr_delayed_changelog_wake_resume (xlator_t *this, fd_t *fd, call_stub_t *stub)
+{
+ afr_delayed_changelog_post_op (this, NULL, fd, stub);
+}
+
+
+void
+afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd)
+{
+ afr_delayed_changelog_post_op (this, NULL, fd, NULL);
+}
+
+
+int
afr_transaction_resume (call_frame_t *frame, xlator_t *this)
{
- afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
+
+ if (local->transaction.eager_lock_on) {
+ /* We don't need to retain "local" in the
+ fd list anymore, writes to all subvols
+ are finished by now */
+ afr_remove_eager_lock_stub (local);
+ }
+
+ afr_restore_lk_owner (frame);
+
+ afr_handle_symmetric_errors (frame, this);
+
+ if (!local->pre_op_compat)
+ /* new mode, pre-op was done along
+ with OP */
+ afr_changelog_pre_op_update (frame, this);
if (__fop_changelog_needed (frame, this)) {
afr_changelog_post_op (frame, this);
} else {
- if (afr_lock_server_count (priv, local->transaction.type) == 0) {
- local->transaction.done (frame, this);
- } else {
- int_lock->lock_cbk = local->transaction.done;
- afr_unlock (frame, this);
- }
+ afr_changelog_post_op_done (frame, this);
}
return 0;
@@ -1200,16 +1582,96 @@ afr_transaction_resume (call_frame_t *frame, xlator_t *this)
*/
void
-afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this, int child_index)
+afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this,
+ int child_index)
{
afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
local = frame->local;
- priv = this->private;
- __mark_child_dead (local->pending, priv->child_count,
- child_index, local->transaction.type);
+ local->transaction.failed_subvols[child_index] = 1;
+}
+
+
+
+ static gf_boolean_t
+afr_locals_overlap (afr_local_t *local1, afr_local_t *local2)
+{
+ uint64_t start1 = local1->transaction.start;
+ uint64_t start2 = local2->transaction.start;
+ uint64_t end1 = 0;
+ uint64_t end2 = 0;
+
+ if (local1->transaction.len)
+ end1 = start1 + local1->transaction.len - 1;
+ else
+ end1 = ULLONG_MAX;
+
+ if (local2->transaction.len)
+ end2 = start2 + local2->transaction.len - 1;
+ else
+ end2 = ULLONG_MAX;
+
+ return ((end1 >= start2) && (end2 >= start1));
+}
+
+void
+afr_transaction_eager_lock_init (afr_local_t *local, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fdctx = NULL;
+ afr_local_t *each = NULL;
+
+ priv = this->private;
+
+ if (!local->fd)
+ return;
+
+ if (local->transaction.type != AFR_DATA_TRANSACTION)
+ return;
+
+ if (!priv->eager_lock)
+ return;
+
+ fdctx = afr_fd_ctx_get (local->fd, this);
+ if (!fdctx)
+ return;
+
+ if (afr_are_multiple_fds_opened (local->fd, this))
+ return;
+ /*
+ * Once full file lock is acquired in eager-lock phase, overlapping
+ * writes do not compete for inode-locks, instead are transferred to the
+ * next writes. Because of this overlapping writes are not ordered.
+ * This can cause inconsistencies in replication.
+ * Example:
+ * Two overlapping writes w1, w2 are sent in parallel on same fd
+ * in two threads t1, t2.
+ * Both threads can execute afr_writev_wind in the following manner.
+ * t1 winds w1 on brick-0
+ * t2 winds w2 on brick-0
+ * t2 winds w2 on brick-1
+ * t1 winds w1 on brick-1
+ *
+ * This check makes sure the locks are not transferred for
+ * overlapping writes.
+ */
+ LOCK (&local->fd->lock);
+ {
+ list_for_each_entry (each, &fdctx->eager_locked,
+ transaction.eager_locked) {
+ if (afr_locals_overlap (each, local)) {
+ local->transaction.eager_lock_on = _gf_false;
+ goto unlock;
+ }
+ }
+
+ local->transaction.eager_lock_on = _gf_true;
+ list_add_tail (&local->transaction.eager_locked,
+ &fdctx->eager_locked);
+ }
+unlock:
+ UNLOCK (&local->fd->lock);
}
@@ -1218,20 +1680,43 @@ afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type)
{
afr_local_t * local = NULL;
afr_private_t * priv = NULL;
+ fd_t *fd = NULL;
+ int ret = -1;
local = frame->local;
priv = this->private;
- afr_transaction_local_init (local, this);
-
local->transaction.resume = afr_transaction_resume;
local->transaction.type = type;
+ ret = afr_transaction_local_init (local, this);
+ if (ret < 0)
+ goto out;
+
+ afr_transaction_eager_lock_init (local, this);
+
+ if (local->fd && local->transaction.eager_lock_on)
+ afr_set_lk_owner (frame, this, local->fd);
+ else
+ afr_set_lk_owner (frame, this, frame->root);
+
+ if (!local->transaction.eager_lock_on && local->loc.inode) {
+ fd = fd_lookup (local->loc.inode, frame->root->pid);
+ if (fd == NULL)
+ fd = fd_lookup_anonymous (local->loc.inode);
+
+ if (fd) {
+ afr_delayed_changelog_wake_up (this, fd);
+ fd_unref (fd);
+ }
+ }
+
if (afr_lock_server_count (priv, local->transaction.type) == 0) {
afr_internal_lock_finish (frame, this);
} else {
afr_lock (frame, this);
}
-
- return 0;
+ ret = 0;
+out:
+ return ret;
}
diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h
index 10f274fec..77cc8eed0 100644
--- a/xlators/cluster/afr/src/afr-transaction.h
+++ b/xlators/cluster/afr/src/afr-transaction.h
@@ -1,25 +1,18 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __TRANSACTION_H__
#define __TRANSACTION_H__
+#include "afr.h"
+
void
afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this,
int child_index);
@@ -27,9 +20,34 @@ afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this,
int
afr_lock_server_count (afr_private_t *priv, afr_transaction_type type);
+afr_inodelk_t*
+afr_get_inodelk (afr_internal_lock_t *int_lock, char *dom);
+
int32_t
afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type);
-afr_fd_ctx_t *
-afr_fd_ctx_get (fd_t *fd, xlator_t *this);
+int
+afr_set_pending_dict (afr_private_t *priv, dict_t *xattr, int32_t **pending);
+
+void
+afr_set_delayed_post_op (call_frame_t *frame, xlator_t *this);
+
+void
+afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd);
+
+void
+__mark_all_success (call_frame_t *frame, xlator_t *this);
+
+gf_boolean_t
+afr_txn_nothing_failed (call_frame_t *frame, xlator_t *this);
+
+int afr_read_txn (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ afr_read_txn_wind_t readfn, afr_transaction_type type);
+
+int afr_read_txn_continue (call_frame_t *frame, xlator_t *this, int subvol);
+
+int __afr_txn_write_fop (call_frame_t *frame, xlator_t *this);
+int __afr_txn_write_done (call_frame_t *frame, xlator_t *this);
+call_frame_t *afr_transaction_detach_fop_frame (call_frame_t *frame);
+
#endif /* __TRANSACTION_H__ */
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index 7791ec86b..5e12910b7 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include <libgen.h>
@@ -37,8 +28,13 @@ notify (xlator_t *this, int32_t event,
void *data, ...)
{
int ret = -1;
+ va_list ap;
+ void *data2 = NULL;
- ret = afr_notify (this, event, data);
+ va_start (ap, data);
+ data2 = va_arg (ap, dict_t*);
+ va_end (ap);
+ ret = afr_notify (this, event, data, data2);
return ret;
}
@@ -85,29 +81,42 @@ xlator_subvolume_index (xlator_t *this, xlator_t *subvol)
return index;
}
-
-int
-xlator_subvolume_count (xlator_t *this)
+void
+fix_quorum_options (xlator_t *this, afr_private_t *priv, char *qtype)
{
- int i = 0;
- xlator_list_t *list = NULL;
-
- for (list = this->children; list; list = list->next)
- i++;
- return i;
+ if (priv->quorum_count && strcmp(qtype,"fixed")) {
+ gf_log(this->name,GF_LOG_WARNING,
+ "quorum-type %s overriding quorum-count %u",
+ qtype, priv->quorum_count);
+ }
+ if (!strcmp(qtype,"none")) {
+ priv->quorum_count = 0;
+ }
+ else if (!strcmp(qtype,"auto")) {
+ priv->quorum_count = AFR_QUORUM_AUTO;
+ }
}
-
int
reconfigure (xlator_t *this, dict_t *options)
{
- afr_private_t * priv = NULL;
- xlator_t * read_subvol = NULL;
- int ret = -1;
- int index = -1;
+ afr_private_t *priv = NULL;
+ xlator_t *read_subvol = NULL;
+ int read_subvol_index = -1;
+ int ret = -1;
+ int index = -1;
+ char *qtype = NULL;
priv = this->private;
+ GF_OPTION_RECONF ("afr-dirty-xattr",
+ priv->afr_dirty, options, str,
+ out);
+
+ GF_OPTION_RECONF ("metadata-splitbrain-forced-heal",
+ priv->metadata_splitbrain_forced_heal, options, bool,
+ out);
+
GF_OPTION_RECONF ("background-self-heal-count",
priv->background_self_heal_count, options, uint32,
out);
@@ -121,9 +130,6 @@ reconfigure (xlator_t *this, dict_t *options)
GF_OPTION_RECONF ("entry-self-heal", priv->entry_self_heal, options,
bool, out);
- GF_OPTION_RECONF ("strict-readdir", priv->strict_readdir, options, bool,
- out);
-
GF_OPTION_RECONF ("data-self-heal-window-size",
priv->data_self_heal_window_size, options,
uint32, out);
@@ -140,10 +146,11 @@ reconfigure (xlator_t *this, dict_t *options)
GF_OPTION_RECONF ("data-self-heal-algorithm",
priv->data_self_heal_algorithm, options, str, out);
- GF_OPTION_RECONF ("self-heal-daemon", priv->shd.enabled, options, bool, out);
-
GF_OPTION_RECONF ("read-subvolume", read_subvol, options, xlator, out);
+ GF_OPTION_RECONF ("read-hash-mode", priv->hash_mode,
+ options, uint32, out);
+
if (read_subvol) {
index = xlator_subvolume_index (this, read_subvol);
if (index == -1) {
@@ -154,6 +161,43 @@ reconfigure (xlator_t *this, dict_t *options)
priv->read_child = index;
}
+ GF_OPTION_RECONF ("read-subvolume-index",read_subvol_index, options,int32,out);
+
+ if (read_subvol_index >-1) {
+ index=read_subvol_index;
+ if (index >= priv->child_count) {
+ gf_log (this->name, GF_LOG_ERROR, "%d not a subvolume-index",
+ index);
+ goto out;
+ }
+ priv->read_child = index;
+ }
+
+ GF_OPTION_RECONF ("pre-op-compat", priv->pre_op_compat, options, bool, out);
+
+ GF_OPTION_RECONF ("eager-lock", priv->eager_lock, options, bool, out);
+ GF_OPTION_RECONF ("quorum-type", qtype, options, str, out);
+ GF_OPTION_RECONF ("quorum-count", priv->quorum_count, options,
+ uint32, out);
+ fix_quorum_options(this,priv,qtype);
+
+ GF_OPTION_RECONF ("post-op-delay-secs", priv->post_op_delay_secs, options,
+ uint32, out);
+
+ GF_OPTION_RECONF (AFR_SH_READDIR_SIZE_KEY, priv->sh_readdir_size,
+ options, size, out);
+ /* Reset this so we re-discover in case the topology changed. */
+ GF_OPTION_RECONF ("ensure-durability", priv->ensure_durability, options,
+ bool, out);
+
+ GF_OPTION_RECONF ("self-heal-daemon", priv->shd.enabled, options,
+ bool, out);
+
+ GF_OPTION_RECONF ("iam-self-heal-daemon", priv->shd.iamshd, options,
+ bool, out);
+
+ priv->did_discovery = _gf_false;
+
ret = 0;
out:
return ret;
@@ -173,15 +217,16 @@ static const char *favorite_child_warning_str = "You have specified subvolume '%
int32_t
init (xlator_t *this)
{
- afr_private_t * priv = NULL;
- int child_count = 0;
- xlator_list_t * trav = NULL;
- int i = 0;
- int ret = -1;
- GF_UNUSED int op_errno = 0;
- xlator_t * read_subvol = NULL;
- xlator_t * fav_child = NULL;
-
+ afr_private_t *priv = NULL;
+ int child_count = 0;
+ xlator_list_t *trav = NULL;
+ int i = 0;
+ int ret = -1;
+ GF_UNUSED int op_errno = 0;
+ xlator_t *read_subvol = NULL;
+ int read_subvol_index = -1;
+ xlator_t *fav_child = NULL;
+ char *qtype = NULL;
if (!this->children) {
gf_log (this->name, GF_LOG_ERROR,
@@ -195,12 +240,25 @@ init (xlator_t *this)
"Volume is dangling.");
}
- ALLOC_OR_GOTO (this->private, afr_private_t, out);
+ this->private = GF_CALLOC (1, sizeof (afr_private_t),
+ gf_afr_mt_afr_private_t);
+ if (!this->private)
+ goto out;
priv = this->private;
+ LOCK_INIT (&priv->lock);
+
+ child_count = xlator_subvolume_count (this);
+
+ priv->child_count = child_count;
priv->read_child = -1;
+ GF_OPTION_INIT ("afr-dirty-xattr", priv->afr_dirty, str, out);
+
+ GF_OPTION_INIT ("metadata-splitbrain-forced-heal",
+ priv->metadata_splitbrain_forced_heal, bool, out);
+
GF_OPTION_INIT ("read-subvolume", read_subvol, xlator, out);
if (read_subvol) {
priv->read_child = xlator_subvolume_index (this, read_subvol);
@@ -210,6 +268,18 @@ init (xlator_t *this)
goto out;
}
}
+ GF_OPTION_INIT ("read-subvolume-index",read_subvol_index,int32,out);
+ if (read_subvol_index > -1) {
+ if (read_subvol_index >= priv->child_count) {
+ gf_log (this->name, GF_LOG_ERROR, "%d not a subvolume-index",
+ read_subvol_index);
+ goto out;
+ }
+ priv->read_child = read_subvol_index;
+ }
+ GF_OPTION_INIT ("choose-local", priv->choose_local, bool, out);
+
+ GF_OPTION_INIT ("read-hash-mode", priv->hash_mode, uint32, out);
priv->favorite_child = -1;
GF_OPTION_INIT ("favorite-child", fav_child, xlator, out);
@@ -242,8 +312,6 @@ init (xlator_t *this)
GF_OPTION_INIT ("entry-self-heal", priv->entry_self_heal, bool, out);
- GF_OPTION_INIT ("self-heal-daemon", priv->shd.enabled, bool, out);
-
GF_OPTION_INIT ("data-change-log", priv->data_change_log, bool, out);
GF_OPTION_INIT ("metadata-change-log", priv->metadata_change_log, bool,
@@ -258,18 +326,24 @@ init (xlator_t *this)
GF_OPTION_INIT ("entrylk-trace", priv->entrylk_trace, bool, out);
- GF_OPTION_INIT ("strict-readdir", priv->strict_readdir, bool, out);
+ GF_OPTION_INIT ("pre-op-compat", priv->pre_op_compat, bool, out);
- GF_OPTION_INIT ("enforce-quorum", priv->enforce_quorum, bool, out);
+ GF_OPTION_INIT ("eager-lock", priv->eager_lock, bool, out);
+ GF_OPTION_INIT ("quorum-type", qtype, str, out);
+ GF_OPTION_INIT ("quorum-count", priv->quorum_count, uint32, out);
+ GF_OPTION_INIT (AFR_SH_READDIR_SIZE_KEY, priv->sh_readdir_size, size,
+ out);
+ fix_quorum_options(this,priv,qtype);
- priv->wait_count = 1;
+ GF_OPTION_INIT ("post-op-delay-secs", priv->post_op_delay_secs, uint32, out);
+ GF_OPTION_INIT ("ensure-durability", priv->ensure_durability, bool,
+ out);
- child_count = xlator_subvolume_count (this);
+ GF_OPTION_INIT ("self-heal-daemon", priv->shd.enabled, bool, out);
- priv->child_count = child_count;
+ GF_OPTION_INIT ("iam-self-heal-daemon", priv->shd.iamshd, bool, out);
- LOCK_INIT (&priv->lock);
- LOCK_INIT (&priv->read_child_lock);
+ priv->wait_count = 1;
priv->child_up = GF_CALLOC (sizeof (unsigned char), child_count,
gf_afr_mt_char);
@@ -309,8 +383,6 @@ init (xlator_t *this)
AFR_XATTR_PREFIX,
trav->xlator->name);
if (-1 == ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "asprintf failed to set pending key");
ret = -ENOMEM;
goto out;
}
@@ -319,6 +391,13 @@ init (xlator_t *this)
i++;
}
+ ret = gf_asprintf (&priv->sh_domain, AFR_SH_DATA_DOMAIN_FMT,
+ this->name);
+ if (-1 == ret) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
priv->last_event = GF_CALLOC (child_count, sizeof (*priv->last_event),
gf_afr_mt_int32_t);
if (!priv->last_event) {
@@ -326,20 +405,23 @@ init (xlator_t *this)
goto out;
}
- priv->shd.pos = GF_CALLOC (sizeof (*priv->shd.pos), child_count,
- gf_afr_mt_afr_brick_pos_t);
- if (!priv->shd.pos) {
- ret = -ENOMEM;
+ ret = afr_selfheal_daemon_init (this);
+ if (ret) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ /* keep more local here as we may need them for self-heal etc */
+ this->local_pool = mem_pool_new (afr_local_t, 512);
+ if (!this->local_pool) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
goto out;
}
- LOCK_INIT (&priv->root_inode_lk);
- priv->first_lookup = 1;
priv->root_inode = NULL;
- pthread_mutex_init (&priv->mutex, NULL);
- INIT_LIST_HEAD (&priv->saved_fds);
-
ret = 0;
out:
return ret;
@@ -349,6 +431,13 @@ out:
int
fini (xlator_t *this)
{
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ this->private = NULL;
+ afr_priv_destroy (priv);
+ if (this->itable);//I dont see any destroy func
+
return 0;
}
@@ -367,6 +456,9 @@ struct xlator_fops fops = {
.finodelk = afr_finodelk,
.entrylk = afr_entrylk,
.fentrylk = afr_fentrylk,
+ .fallocate = afr_fallocate,
+ .discard = afr_discard,
+ .zerofill = afr_zerofill,
/* inode read */
.access = afr_access,
@@ -374,6 +466,7 @@ struct xlator_fops fops = {
.fstat = afr_fstat,
.readlink = afr_readlink,
.getxattr = afr_getxattr,
+ .fgetxattr = afr_fgetxattr,
.readv = afr_readv,
/* inode write */
@@ -381,9 +474,11 @@ struct xlator_fops fops = {
.truncate = afr_truncate,
.ftruncate = afr_ftruncate,
.setxattr = afr_setxattr,
+ .fsetxattr = afr_fsetxattr,
.setattr = afr_setattr,
.fsetattr = afr_fsetattr,
.removexattr = afr_removexattr,
+ .fremovexattr = afr_fremovexattr,
/* dir read */
.opendir = afr_opendir,
@@ -416,33 +511,79 @@ struct xlator_cbks cbks = {
struct volume_options options[] = {
{ .key = {"read-subvolume" },
- .type = GF_OPTION_TYPE_XLATOR
+ .type = GF_OPTION_TYPE_XLATOR,
+ .description = "inode-read fops happen only on one of the bricks in "
+ "replicate. Afr will prefer the one specified using "
+ "this option if it is not stale. Option value must be "
+ "one of the xlator names of the children. "
+ "Ex: <volname>-client-0 till "
+ "<volname>-client-<number-of-bricks - 1>"
+ },
+ { .key = {"read-subvolume-index" },
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "-1",
+ .description = "inode-read fops happen only on one of the bricks in "
+ "replicate. AFR will prefer the one specified using "
+ "this option if it is not stale. allowed options"
+ " include -1 till replica-count - 1"
+ },
+ { .key = {"read-hash-mode" },
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = 2,
+ .default_value = "1",
+ .description = "inode-read fops happen only on one of the bricks in "
+ "replicate. AFR will prefer the one computed using "
+ "the method specified using this option"
+ "0 = first up server, "
+ "1 = hash by GFID of file (all clients use "
+ "same subvolume), "
+ "2 = hash by GFID of file and client PID",
+ },
+ { .key = {"choose-local" },
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "true",
+ .description = "Choose a local subvolume (i.e. Brick) to read from"
+ " if read-subvolume is not explicitly set.",
},
{ .key = {"favorite-child"},
- .type = GF_OPTION_TYPE_XLATOR
+ .type = GF_OPTION_TYPE_XLATOR,
+ .description = "If a split-brain happens choose subvol/brick set by "
+ "this option as source."
},
{ .key = {"background-self-heal-count"},
.type = GF_OPTION_TYPE_INT,
.min = 0,
.default_value = "16",
+ .validate = GF_OPT_VALIDATE_MIN,
+ .description = "This specifies the number of self-heals that can be "
+ " performed in background without blocking the fop"
},
{ .key = {"data-self-heal"},
.type = GF_OPTION_TYPE_STR,
- .default_value = "",
.value = {"1", "on", "yes", "true", "enable",
"0", "off", "no", "false", "disable",
"open"},
.default_value = "on",
+ .description = "Using this option we can enable/disable data "
+ "self-heal on the file. \"open\" means data "
+ "self-heal action will only be triggered by file "
+ "open operations."
},
{ .key = {"data-self-heal-algorithm"},
.type = GF_OPTION_TYPE_STR,
- .default_value = "",
.description = "Select between \"full\", \"diff\". The "
"\"full\" algorithm copies the entire file from "
"source to sink. The \"diff\" algorithm copies to "
"sink only those blocks whose checksums don't match "
- "with those of source.",
- .value = { "diff", "full", "" }
+ "with those of source. If no option is configured "
+ "the option is chosen dynamically as follows: "
+ "If the file does not exist on one of the sinks "
+ "or empty file exists or if the source file size is "
+ "about the same as page size the entire file will "
+ "be read and written i.e \"full\" algo, "
+ "otherwise \"diff\" algo is chosen.",
+ .value = { "diff", "full"}
},
{ .key = {"data-self-heal-window-size"},
.type = GF_OPTION_TYPE_INT,
@@ -455,46 +596,154 @@ struct volume_options options[] = {
{ .key = {"metadata-self-heal"},
.type = GF_OPTION_TYPE_BOOL,
.default_value = "on",
+ .description = "Using this option we can enable/disable metadata "
+ "i.e. Permissions, ownerships, xattrs self-heal on "
+ "the file/directory."
},
{ .key = {"entry-self-heal"},
.type = GF_OPTION_TYPE_BOOL,
.default_value = "on",
+ .description = "Using this option we can enable/disable entry "
+ "self-heal on the directory."
},
{ .key = {"data-change-log"},
.type = GF_OPTION_TYPE_BOOL,
.default_value = "on",
+ .description = "Data fops like write/truncate will not perform "
+ "pre/post fop changelog operations in afr transaction "
+ "if this option is disabled"
},
{ .key = {"metadata-change-log"},
.type = GF_OPTION_TYPE_BOOL,
.default_value = "on",
+ .description = "Metadata fops like setattr/setxattr will not perform "
+ "pre/post fop changelog operations in afr transaction "
+ "if this option is disabled"
},
{ .key = {"entry-change-log"},
.type = GF_OPTION_TYPE_BOOL,
.default_value = "on",
+ .description = "Entry fops like create/unlink will not perform "
+ "pre/post fop changelog operations in afr transaction "
+ "if this option is disabled"
},
{ .key = {"optimistic-change-log"},
.type = GF_OPTION_TYPE_BOOL,
.default_value = "on",
- },
- { .key = {"strict-readdir"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
+ .description = "Entry/Metadata fops will not perform "
+ "pre fop changelog operations in afr transaction "
+ "if this option is enabled."
},
{ .key = {"inodelk-trace"},
.type = GF_OPTION_TYPE_BOOL,
.default_value = "off",
+ .description = "Enabling this option logs inode lock/unlocks"
},
{ .key = {"entrylk-trace"},
.type = GF_OPTION_TYPE_BOOL,
.default_value = "off",
+ .description = "Enabling this option logs entry lock/unlocks"
+ },
+ { .key = {"pre-op-compat"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Use separate pre-op xattrop() FOP rather than "
+ "overloading xdata of the OP"
+ },
+ { .key = {"eager-lock"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Lock phase of a transaction has two sub-phases. "
+ "First is an attempt to acquire locks in parallel by "
+ "broadcasting non-blocking lock requests. If lock "
+ "acquisition fails on any server, then the held locks "
+ "are unlocked and revert to a blocking locked mode "
+ "sequentially on one server after another. If this "
+ "option is enabled the initial broadcasting lock "
+ "request attempt to acquire lock on the entire file. "
+ "If this fails, we revert back to the sequential "
+ "\"regional\" blocking lock as before. In the case "
+ "where such an \"eager\" lock is granted in the "
+ "non-blocking phase, it gives rise to an opportunity "
+ "for optimization. i.e, if the next write transaction "
+ "on the same FD arrives before the unlock phase of "
+ "the first transaction, it \"takes over\" the full "
+ "file lock. Similarly if yet another data transaction "
+ "arrives before the unlock phase of the \"optimized\" "
+ "transaction, that in turn \"takes over\" the lock as "
+ "well. The actual unlock now happens at the end of "
+ "the last \"optimized\" transaction."
+
},
{ .key = {"self-heal-daemon"},
.type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
+ .default_value = "on",
+ .description = "This option applies to only self-heal-daemon. "
+ "Index directory crawl and automatic healing of files "
+ "will not be performed if this option is turned off."
},
- { .key = {"enforce-quorum"},
+ { .key = {"iam-self-heal-daemon"},
.type = GF_OPTION_TYPE_BOOL,
.default_value = "off",
+ .description = "This option differentiates if the replicate "
+ "translator is running as part of self-heal-daemon "
+ "or not."
+ },
+ { .key = {"quorum-type"},
+ .type = GF_OPTION_TYPE_STR,
+ .value = { "none", "auto", "fixed"},
+ .default_value = "none",
+ .description = "If value is \"fixed\" only allow writes if "
+ "quorum-count bricks are present. If value is "
+ "\"auto\" only allow writes if more than half of "
+ "bricks, or exactly half including the first, are "
+ "present.",
+ },
+ { .key = {"quorum-count"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = INT_MAX,
+ .default_value = 0,
+ .description = "If quorum-type is \"fixed\" only allow writes if "
+ "this many bricks or present. Other quorum types "
+ "will OVERWRITE this value.",
+ },
+ { .key = {"node-uuid"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "Local glusterd uuid string, used in starting "
+ "self-heal-daemon so that it can crawl only on "
+ "local index directories.",
+ },
+ { .key = {"post-op-delay-secs"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = INT_MAX,
+ .default_value = "1",
+ .description = "Time interval induced artificially before "
+ "post-operation phase of the transaction to "
+ "enhance overlap of adjacent write operations.",
+ },
+ { .key = {AFR_SH_READDIR_SIZE_KEY},
+ .type = GF_OPTION_TYPE_SIZET,
+ .description = "readdirp size for performing entry self-heal",
+ .min = 1024,
+ .max = 131072,
+ .default_value = "1KB",
+ },
+ { .key = {"ensure-durability"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .description = "Afr performs fsyncs for transactions if this "
+ "option is on to make sure the changelogs/data is "
+ "written to the disk",
+ .default_value = "on",
},
+ { .key = {"afr-dirty-xattr"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = AFR_DIRTY_DEFAULT,
+ },
+ { .key = {"metadata-splitbrain-forced-heal"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ },
{ .key = {NULL} },
};
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 0677b96e9..36042f7b2 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -29,78 +20,42 @@
#include "call-stub.h"
#include "compat-errno.h"
#include "afr-mem-types.h"
-#include "afr-self-heal-algorithm.h"
#include "libxlator.h"
+#include "timer.h"
+#include "syncop.h"
+
+#include "afr-self-heald.h"
#define AFR_XATTR_PREFIX "trusted.afr"
#define AFR_PATHINFO_HEADER "REPLICATE:"
+#define AFR_SH_READDIR_SIZE_KEY "self-heal-readdir-size"
+#define AFR_SH_DATA_DOMAIN_FMT "%s:self-heal"
+#define AFR_DIRTY_DEFAULT AFR_XATTR_PREFIX ".dirty"
+#define AFR_DIRTY (((afr_private_t *) (THIS->private))->afr_dirty)
-struct _pump_private;
+#define AFR_LOCKEE_COUNT_MAX 3
+#define AFR_DOM_COUNT_MAX 3
+#define AFR_NUM_CHANGE_LOGS 3 /*data + metadata + entry*/
-typedef int (*afr_expunge_done_cbk_t) (call_frame_t *frame, xlator_t *this,
- int child, int32_t op_error,
- int32_t op_errno);
+typedef int (*afr_lock_cbk_t) (call_frame_t *frame, xlator_t *this);
-typedef int (*afr_impunge_done_cbk_t) (call_frame_t *frame, xlator_t *this,
- int child, int32_t op_error,
- int32_t op_errno);
-typedef int (*afr_post_remove_call_t) (call_frame_t *frame, xlator_t *this);
+typedef int (*afr_read_txn_wind_t) (call_frame_t *frame, xlator_t *this, int subvol);
-typedef int (*afr_lock_cbk_t) (call_frame_t *frame, xlator_t *this);
-typedef void (*afr_lookup_done_cbk_t) (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno);
+typedef int (*afr_inode_refresh_cbk_t) (call_frame_t *frame, xlator_t *this, int err);
-typedef enum {
- AFR_POS_UNKNOWN,
- AFR_POS_LOCAL,
- AFR_POS_REMOTE
-} afr_child_pos_t;
+typedef int (*afr_changelog_resume_t) (call_frame_t *frame, xlator_t *this);
-typedef enum {
- AFR_INODE_SET_READ_CTX = 1,
- AFR_INODE_RM_STALE_CHILDREN,
- AFR_INODE_SET_OPENDIR_DONE,
- AFR_INODE_SET_SPLIT_BRAIN,
- AFR_INODE_GET_READ_CTX,
- AFR_INODE_GET_OPENDIR_DONE,
- AFR_INODE_GET_SPLIT_BRAIN,
-} afr_inode_op_t;
-
-typedef struct afr_inode_params_ {
- afr_inode_op_t op;
- union {
- gf_boolean_t value;
- struct {
- int32_t read_child;
- int32_t *children;
- } read_ctx;
- } u;
-} afr_inode_params_t;
-
-typedef struct afr_inode_ctx_ {
- uint64_t masks;
- int32_t *fresh_children;//increasing order of latency
-} afr_inode_ctx_t;
-
-typedef struct afr_self_heald_ {
- gf_boolean_t enabled;
- gf_boolean_t pending;
- gf_boolean_t inprogress;
- afr_child_pos_t *pos;
-} afr_self_heald_t;
+#define alloca0(size) ({void *__ptr; __ptr = alloca(size); memset(__ptr, 0, size); __ptr;})
+#define AFR_COUNT(array,max) ({int __i; int __res = 0; for (__i = 0; __i < max; __i++) if (array[__i]) __res++; __res;})
+#define AFR_INTERSECT(dst,src1,src2,max) ({int __i; for (__i = 0; __i < max; __i++) dst[__i] = src1[__i] && src2[__i];})
typedef struct _afr_private {
gf_lock_t lock; /* to guard access to child_count, etc */
unsigned int child_count; /* total number of children */
- unsigned int read_child_rr; /* round-robin index of the read_child */
- gf_lock_t read_child_lock; /* lock to protect above */
-
xlator_t **children;
- gf_lock_t root_inode_lk;
- int first_lookup;
inode_t *root_inode;
unsigned char *child_up;
@@ -121,147 +76,54 @@ typedef struct _afr_private {
gf_boolean_t metadata_change_log; /* on/off */
gf_boolean_t entry_change_log; /* on/off */
+ gf_boolean_t metadata_splitbrain_forced_heal; /* on/off */
int read_child; /* read-subvolume */
+ unsigned int hash_mode; /* for when read_child is not set */
int favorite_child; /* subvolume to be preferred in resolving
split-brain cases */
- unsigned int data_lock_server_count;
- unsigned int metadata_lock_server_count;
- unsigned int entry_lock_server_count;
-
gf_boolean_t inodelk_trace;
gf_boolean_t entrylk_trace;
- gf_boolean_t strict_readdir;
-
unsigned int wait_count; /* # of servers to wait for success */
uint64_t up_count; /* number of CHILD_UPs we have seen */
uint64_t down_count; /* number of CHILD_DOWNs we have seen */
- struct _pump_private *pump_private; /* Set if we are loaded as pump */
- int use_afr_in_pump;
-
- pthread_mutex_t mutex;
- struct list_head saved_fds; /* list of fds on which locks have succeeded */
- gf_boolean_t optimistic_change_log;
- gf_boolean_t eager_lock;
- gf_boolean_t enforce_quorum;
+ gf_boolean_t optimistic_change_log;
+ gf_boolean_t eager_lock;
+ gf_boolean_t pre_op_compat; /* on/off */
+ uint32_t post_op_delay_secs;
+ unsigned int quorum_count;
char vol_uuid[UUID_SIZE + 1];
int32_t *last_event;
- afr_self_heald_t shd;
-} afr_private_t;
-
-typedef struct {
- /* External interface: These are variables (some optional) that
- are set by whoever has triggered self-heal */
-
- gf_boolean_t do_data_self_heal;
- gf_boolean_t do_metadata_self_heal;
- gf_boolean_t do_entry_self_heal;
- gf_boolean_t do_gfid_self_heal;
- gf_boolean_t do_missing_entry_self_heal;
-
- gf_boolean_t forced_merge; /* Is this a self-heal triggered to
- forcibly merge the directories? */
-
- gf_boolean_t background; /* do self-heal in background
- if possible */
- ia_type_t type; /* st_mode of the entry we're doing
- self-heal on */
- inode_t *inode; /* inode on which the self-heal is
- performed on */
- uuid_t sh_gfid_req; /* gfid self-heal needs to be done
- with this gfid if it is not null */
-
- /* Function to call to unwind. If self-heal is being done in the
- background, this function will be called as soon as possible. */
-
- int (*unwind) (call_frame_t *frame, xlator_t *this, int32_t op_ret,
- int32_t op_errno);
-
- /* End of external interface members */
-
-
- /* array of stat's, one for each child */
- struct iatt *buf;
- struct iatt *parentbufs;
- struct iatt parentbuf;
- struct iatt entrybuf;
-
- afr_expunge_done_cbk_t expunge_done;
- afr_impunge_done_cbk_t impunge_done;
- int32_t impunge_ret_child;
-
- /* array of xattr's, one for each child */
- dict_t **xattr;
-
- /* array containing if the lookups succeeded in the order of response
- */
- int32_t *success_children;
- int success_count;
- /* array containing the fresh children found in the self-heal process */
- int32_t *fresh_children;
- /* array containing the fresh children found in the parent lookup */
- int32_t *fresh_parent_dirs;
- /* array of errno's, one for each child */
- int *child_errno;
- /*loc used for lookup*/
- loc_t lookup_loc;
- int32_t lookup_flags;
- afr_lookup_done_cbk_t lookup_done;
-
- int32_t **pending_matrix;
- int32_t **delta_matrix;
- int32_t op_ret;
- int32_t op_errno;
-
- int *sources;
- int source;
- int active_source;
- int active_sinks;
- unsigned char *success;
- unsigned char *locked_nodes;
- int lock_count;
-
- mode_t impunging_entry_mode;
- const char *linkname;
- gf_boolean_t entries_skipped;
-
- int op_failed;
-
- gf_boolean_t data_lock_held;
- gf_boolean_t eof_reached;
- fd_t *healing_fd;
- int file_has_holes;
- blksize_t block_size;
- off_t file_size;
- off_t offset;
- unsigned char *write_needed;
- uint8_t *checksum;
- afr_post_remove_call_t post_remove_call;
-
- loc_t parent_loc;
-
- call_frame_t *orig_frame;
- call_frame_t *old_loop_frame;
- gf_boolean_t unwound;
-
- afr_sh_algo_private_t *private;
-
- afr_lock_cbk_t data_lock_success_handler;
- afr_lock_cbk_t data_lock_failure_handler;
- int (*completion_cbk) (call_frame_t *frame, xlator_t *this);
- int (*sh_data_algo_start) (call_frame_t *frame, xlator_t *this);
- int (*algo_completion_cbk) (call_frame_t *frame, xlator_t *this);
- afr_lock_cbk_t loop_completion_cbk;
- int (*algo_abort_cbk) (call_frame_t *frame, xlator_t *this);
- void (*gfid_sh_success_cbk) (call_frame_t *sh_frame, xlator_t *this);
+ /* @event_generation: Keeps count of number of events received which can
+ potentially impact consistency decisions. The events are CHILD_UP
+ and CHILD_DOWN, when we have to recalculate the freshness/staleness
+ of copies to detect if changes had happened while the other server
+ was down. CHILD_DOWN and CHILD_UP can also be received on network
+ disconnect/reconnects and not necessarily server going down/up.
+ Recalculating freshness/staleness on network events is equally
+ important as we might have had a network split brain.
+ */
+ uint32_t event_generation;
+
+ gf_boolean_t choose_local;
+ gf_boolean_t did_discovery;
+ uint64_t sh_readdir_size;
+ gf_boolean_t ensure_durability;
+ char *sh_domain;
+ char *afr_dirty;
+
+ afr_self_heald_t shd;
+
+ /* pump dependencies */
+ void *pump_private;
+ gf_boolean_t use_afr_in_pump;
+} afr_private_t;
- call_frame_t *sh_frame;
-} afr_self_heal_t;
typedef enum {
AFR_DATA_TRANSACTION, /* truncate, write, ... */
@@ -323,11 +185,31 @@ afr_index_for_transaction_type (afr_transaction_type type)
return -1; /* make gcc happy */
}
+typedef struct {
+ loc_t loc;
+ char *basename;
+ unsigned char *locked_nodes;
+ int locked_count;
+
+} afr_entry_lockee_t;
+
+int
+afr_entry_lockee_cmp (const void *l1, const void *l2);
+
+typedef struct {
+ char *domain; /* Domain on which inodelk is taken */
+ struct gf_flock flock;
+ unsigned char *locked_nodes;
+ int32_t lock_count;
+} afr_inodelk_t;
typedef struct {
loc_t *lk_loc;
- struct gf_flock lk_flock;
+ int lockee_count;
+ afr_entry_lockee_t lockee[AFR_LOCKEE_COUNT_MAX];
+
+ afr_inodelk_t inodelk[AFR_DOM_COUNT_MAX];
const char *lk_basename;
const char *lower_basename;
const char *higher_basename;
@@ -336,87 +218,203 @@ typedef struct {
unsigned char *locked_nodes;
unsigned char *lower_locked_nodes;
- unsigned char *inode_locked_nodes;
- unsigned char *entry_locked_nodes;
selfheal_lk_type_t selfheal_lk_type;
transaction_lk_type_t transaction_lk_type;
int32_t lock_count;
- int32_t inodelk_lock_count;
int32_t entrylk_lock_count;
uint64_t lock_number;
int32_t lk_call_count;
int32_t lk_expected_count;
+ int32_t lk_attempted_count;
int32_t lock_op_ret;
int32_t lock_op_errno;
afr_lock_cbk_t lock_cbk;
+ char *domain; /* Domain on which inode/entry lock/unlock in progress.*/
} afr_internal_lock_t;
-typedef struct _afr_locked_fd {
- fd_t *fd;
- struct list_head list;
-} afr_locked_fd_t;
+struct afr_reply {
+ int valid;
+ int32_t op_ret;
+ int32_t op_errno;
+ dict_t *xdata;
+ struct iatt poststat;
+ struct iatt postparent;
+ struct iatt prestat;
+ struct iatt preparent;
+ struct iatt preparent2;
+ struct iatt postparent2;
+ uint8_t checksum[MD5_DIGEST_LENGTH];
+};
+
+typedef enum {
+ AFR_FD_NOT_OPENED,
+ AFR_FD_OPENED,
+ AFR_FD_OPENING
+} afr_fd_open_status_t;
+
+typedef struct {
+ unsigned int *pre_op_done[AFR_NUM_CHANGE_LOGS];
+ int inherited[AFR_NUM_CHANGE_LOGS];
+ int on_disk[AFR_NUM_CHANGE_LOGS];
+ afr_fd_open_status_t *opened_on; /* which subvolumes the fd is open on */
+
+ unsigned int *lock_piggyback;
+ unsigned int *lock_acquired;
+
+ int flags;
+
+ /* used for delayed-post-op optimization */
+ pthread_mutex_t delay_lock;
+ gf_timer_t *delay_timer;
+ call_frame_t *delay_frame;
+
+ /* set if any write on this fd was a non stable write
+ (i.e, without O_SYNC or O_DSYNC)
+ */
+ gf_boolean_t witnessed_unstable_write;
+
+ /* @open_fd_count:
+ Number of open FDs queried from the server, as queried through
+ xdata in FOPs. Currently, used to decide if eager-locking must be
+ temporarily disabled.
+ */
+ uint32_t open_fd_count;
+
+
+ /* list of frames currently in progress */
+ struct list_head eager_locked;
+} afr_fd_ctx_t;
+
typedef struct _afr_local {
- int uid;
- int gid;
+ glusterfs_fop_t op;
unsigned int call_count;
- unsigned int success_count;
- unsigned int enoent_count;
-
- unsigned int govinda_gOvinda;
+ /* @event_generation: copy of priv->event_generation taken at the
+ time of starting the transaction. The copy is made so that we
+ have a stable value through the various phases of the transaction.
+ */
+ unsigned int event_generation;
- unsigned int read_child_index;
- unsigned char read_child_returned;
- unsigned int first_up_child;
+ uint32_t open_fd_count;
+ gf_boolean_t update_open_fd_count;
- pid_t saved_pid;
+ gf_lkowner_t saved_lk_owner;
int32_t op_ret;
int32_t op_errno;
int32_t **pending;
+ int dirty[AFR_NUM_CHANGE_LOGS];
+
loc_t loc;
loc_t newloc;
fd_t *fd;
- unsigned char *fd_open_on;
-
- glusterfs_fop_t fop;
+ afr_fd_ctx_t *fd_ctx;
+ /* @child_up: copy of priv->child_up taken at the time of transaction
+ start. The copy is taken so that we have a stable child_up array
+ through the phases of the transaction as priv->child_up[i] can keep
+ changing through time.
+ */
unsigned char *child_up;
- int32_t *fresh_children; //in the order of response
- int32_t *child_errno;
+ /* @read_attempted:
+ array of flags representing subvolumes where read operations of
+ the read transaction have already been attempted. The array is
+ first pre-filled with down subvolumes, and as reads are performed
+ on other subvolumes, those are set as well. This way if the read
+ operation fails we do not retry on that subvolume again.
+ */
+ unsigned char *read_attempted;
- dict_t *xattr_req;
+ /* @readfn:
- int32_t inodelk_count;
- int32_t entrylk_count;
+ pointer to function which will perform the read operation on a given
+ subvolume. Used in read transactions.
+ */
- afr_internal_lock_t internal_lock;
+ afr_read_txn_wind_t readfn;
+
+ /* @refreshed:
+
+ the inode was "refreshed" (i.e, pending xattrs from all subvols
+ freshly inspected and inode ctx updated accordingly) as part of
+ this transaction already.
+ */
+ gf_boolean_t refreshed;
+
+ /* @inode:
+
+ the inode on which the read txn is performed on. ref'ed and copied
+ from either fd->inode or loc.inode
+ */
+
+ inode_t *inode;
+
+ /* @parent[2]:
+
+ parent inode[s] on which directory transactions are performed.
+ */
+
+ inode_t *parent;
+ inode_t *parent2;
+
+ /* @readable:
+
+ array of flags representing servers from which a read can be
+ performed. This is the output of afr_inode_refresh()
+ */
+ unsigned char *readable;
+
+ afr_inode_refresh_cbk_t refreshfn;
+
+ /* @refreshinode:
- afr_locked_fd_t *locked_fd;
- int32_t source_child;
- int32_t lock_recovery_child;
+ Inode currently getting refreshed.
+ */
+ inode_t *refreshinode;
+
+ /*
+ @pre_op_compat:
+
+ compatibility mode of pre-op. send a separate pre-op and
+ op operations as part of transaction, rather than combining
+ */
+
+ gf_boolean_t pre_op_compat;
+
+ dict_t *xattr_req;
+
+ afr_internal_lock_t internal_lock;
dict_t *dict;
+
int optimistic_change_log;
+ gf_boolean_t delayed_post_op;
- gf_boolean_t fop_paused;
- int (*fop_call_continue) (call_frame_t *frame, xlator_t *this);
+ /* Is the current writev() going to perform a stable write?
+ i.e, is fd->flags or @flags writev param have O_SYNC or
+ O_DSYNC?
+ */
+ gf_boolean_t stable_write;
+
+ /* This write appended to the file. Nnot necessarily O_APPEND,
+ just means the offset of write was at the end of file.
+ */
+ gf_boolean_t append_write;
/*
This struct contains the arguments for the "continuation"
(scheme-like) of fops
*/
- int op;
struct {
struct {
unsigned char buf_set;
@@ -424,22 +422,7 @@ typedef struct _afr_local {
} statfs;
struct {
- uuid_t gfid_req;
- inode_t *inode;
- struct iatt buf;
- struct iatt postparent;
- dict_t **xattrs;
- dict_t *xattr;
- struct iatt *postparents;
- struct iatt *bufs;
- int32_t read_child;
- int32_t *sources;
- int32_t *success_children;
- } lookup;
-
- struct {
int32_t flags;
- int32_t wbflags;
} open;
struct {
@@ -472,13 +455,14 @@ typedef struct _afr_local {
struct {
char *name;
int last_index;
- long pathinfo_len;
+ long xattr_len;
} getxattr;
struct {
size_t size;
off_t offset;
int last_index;
+ uint32_t flags;
} readv;
/* dir read */
@@ -496,7 +480,7 @@ typedef struct _afr_local {
int32_t op_errno;
size_t size;
off_t offset;
-
+ dict_t *dict;
gf_boolean_t failed;
int last_index;
} readdir;
@@ -505,44 +489,34 @@ typedef struct _afr_local {
struct {
struct iatt prebuf;
struct iatt postbuf;
+ } inode_wfop; //common structure for all inode-write-fops
+ struct {
int32_t op_ret;
struct iovec *vector;
struct iobref *iobref;
int32_t count;
off_t offset;
+ uint32_t flags;
} writev;
struct {
- struct iatt prebuf;
- struct iatt postbuf;
- } fsync;
-
- struct {
off_t offset;
- struct iatt prebuf;
- struct iatt postbuf;
} truncate;
struct {
off_t offset;
- struct iatt prebuf;
- struct iatt postbuf;
} ftruncate;
struct {
struct iatt in_buf;
int32_t valid;
- struct iatt preop_buf;
- struct iatt postop_buf;
} setattr;
struct {
struct iatt in_buf;
int32_t valid;
- struct iatt preop_buf;
- struct iatt postop_buf;
} fsetattr;
struct {
@@ -551,96 +525,85 @@ typedef struct _afr_local {
} setxattr;
struct {
+ dict_t *dict;
+ int32_t flags;
+ } fsetxattr;
+
+ struct {
char *name;
} removexattr;
+ struct {
+ dict_t *xattr;
+ } xattrop;
+
+ struct {
+ dict_t *xattr;
+ } fxattrop;
+
/* dir write */
struct {
- fd_t *fd;
- dict_t *params;
- int32_t flags;
- mode_t mode;
inode_t *inode;
struct iatt buf;
struct iatt preparent;
struct iatt postparent;
- struct iatt read_child_buf;
+ struct iatt prenewparent;
+ struct iatt postnewparent;
+ } dir_fop; //common structure for all dir fops
+
+ struct {
+ fd_t *fd;
+ dict_t *params;
+ int32_t flags;
+ mode_t mode;
} create;
struct {
dev_t dev;
mode_t mode;
dict_t *params;
- inode_t *inode;
- struct iatt buf;
- struct iatt preparent;
- struct iatt postparent;
- struct iatt read_child_buf;
} mknod;
struct {
int32_t mode;
dict_t *params;
- inode_t *inode;
- struct iatt buf;
- struct iatt read_child_buf;
- struct iatt preparent;
- struct iatt postparent;
} mkdir;
struct {
- int32_t op_ret;
- int32_t op_errno;
- struct iatt preparent;
- struct iatt postparent;
- } unlink;
-
- struct {
- int flags;
- int32_t op_ret;
- int32_t op_errno;
- struct iatt preparent;
- struct iatt postparent;
+ int flags;
} rmdir;
struct {
- struct iatt buf;
- struct iatt read_child_buf;
- struct iatt preoldparent;
- struct iatt prenewparent;
- struct iatt postoldparent;
- struct iatt postnewparent;
- } rename;
-
- struct {
- inode_t *inode;
- struct iatt buf;
- struct iatt read_child_buf;
- struct iatt preparent;
- struct iatt postparent;
- } link;
-
- struct {
- inode_t *inode;
dict_t *params;
- struct iatt buf;
- struct iatt read_child_buf;
char *linkpath;
- struct iatt preparent;
- struct iatt postparent;
} symlink;
+ struct {
+ int32_t mode;
+ off_t offset;
+ size_t len;
+ } fallocate;
+
+ struct {
+ off_t offset;
+ size_t len;
+ } discard;
+
struct {
- int32_t flags;
- dir_entry_t *entries;
- int32_t count;
- } setdents;
+ off_t offset;
+ off_t len;
+ struct iatt prebuf;
+ struct iatt postbuf;
+ } zerofill;
+
+
} cont;
struct {
off_t start, len;
+ gf_boolean_t eager_lock_on;
int *eager_lock;
char *basename;
@@ -651,16 +614,67 @@ typedef struct _afr_local {
afr_transaction_type type;
- int success_count;
- int erase_pending;
- int failure_count;
+ /* stub to resume on destruction
+ of the transaction frame */
+ call_stub_t *resume_stub;
+
+ struct list_head eager_locked;
- int last_tried;
- int32_t *child_errno;
unsigned char *pre_op;
+ /* @fop_subvols: subvolumes on which FOP will be attempted */
+ unsigned char *fop_subvols;
+
+ /* @failed_subvols: subvolumes on which FOP failed. Always
+ a subset of @fop_subvols */
+ unsigned char *failed_subvols;
+
+ /* @dirtied: flag which indicates whether we set dirty flag
+ in the OP. Typically true when we are performing operation
+ on more than one subvol and optimistic changelog is disabled
+
+ A 'true' value set in @dirtied flag means an 'undirtying'
+ has to be done in POST-OP phase.
+ */
+ gf_boolean_t dirtied;
+
+ /* @inherited: flag which indicates that the dirty flags
+ of the previous transaction were inherited
+ */
+ gf_boolean_t inherited;
+
+ /*
+ @no_uninherit: flag which indicates that a pre_op_uninherit()
+ must _not_ be attempted (and returned as failure) always. This
+ flag is set when a hard pre-op is performed, but not accounted
+ for it in fd_ctx->on_disk[]. Such transactions are "isolated"
+ from the pre-op piggybacking entirely and therefore uninherit
+ must not be attempted.
+ */
+ gf_boolean_t no_uninherit;
+
+ /* @uninherit_done:
+ @uninherit_value:
+
+ The above pair variables make pre_op_uninherit() idempotent.
+ Both are FALSE initially. The first call to pre_op_uninherit
+ sets @uninherit_done to TRUE and the return value to
+ @uninherit_value. Further calls will check for @uninherit_done
+ to be TRUE and if so will simply return @uninherit_value.
+ */
+ gf_boolean_t uninherit_done;
+ gf_boolean_t uninherit_value;
+
+ /* @changelog_resume: function to be called after changlogging
+ (either pre-op or post-op) is done
+ */
+
+ afr_changelog_resume_t changelog_resume;
+
call_frame_t *main_frame;
+ int (*wind) (call_frame_t *frame, xlator_t *this, int subvol);
+
int (*fop) (call_frame_t *frame, xlator_t *this);
int (*done) (call_frame_t *frame, xlator_t *this);
@@ -672,99 +686,96 @@ typedef struct _afr_local {
/* post-op hook */
} transaction;
- afr_self_heal_t self_heal;
+ syncbarrier_t barrier;
struct marker_str marker;
-} afr_local_t;
-
-typedef enum {
- AFR_FD_NOT_OPENED,
- AFR_FD_OPENED,
- AFR_FD_OPENING
-} afr_fd_open_status_t;
-
-typedef struct {
- struct list_head call_list;
- call_frame_t *frame;
-} afr_fd_paused_call_t;
-typedef struct {
- unsigned int *pre_op_done;
- afr_fd_open_status_t *opened_on; /* which subvolumes the fd is open on */
- unsigned int *pre_op_piggyback;
+ /* extra data for fops */
+ dict_t *xdata_req;
+ dict_t *xdata_rsp;
- unsigned int *lock_piggyback;
- unsigned int *lock_acquired;
+ mode_t umask;
+ int xflag;
+ gf_boolean_t do_discovery;
+ struct afr_reply *replies;
+} afr_local_t;
- int flags;
- int32_t wbflags;
- uint64_t up_count; /* number of CHILD_UPs this fd has seen */
- uint64_t down_count; /* number of CHILD_DOWNs this fd has seen */
- int32_t last_tried;
+/* did a call fail due to a child failing? */
+#define child_went_down(op_ret, op_errno) (((op_ret) < 0) && \
+ ((op_errno == ENOTCONN) || \
+ (op_errno == EBADFD)))
- int hit, miss;
- gf_boolean_t failed_over;
- struct list_head entries; /* needed for readdir failover */
+int
+afr_inode_read_subvol_get (inode_t *inode, xlator_t *this,
+ unsigned char *data_subvols,
+ unsigned char *metadata_subvols,
+ int *event_generation);
+int
+__afr_inode_read_subvol_get (inode_t *inode, xlator_t *this,
+ unsigned char *data_subvols,
+ unsigned char *metadata_subvols,
+ int *event_generation);
- unsigned char *locked_on; /* which subvolumes locks have been successful */
- struct list_head paused_calls; /* queued calls while fix_open happens */
-} afr_fd_ctx_t;
+int
+__afr_inode_read_subvol_set (inode_t *inode, xlator_t *this,
+ unsigned char *data_subvols,
+ unsigned char *metadata_subvol,
+ int event_generation);
+int
+afr_inode_read_subvol_set (inode_t *inode, xlator_t *this,
+ unsigned char *data_subvols,
+ unsigned char *metadata_subvols,
+ int event_generation);
+int
+afr_inode_read_subvol_reset (inode_t *inode, xlator_t *this);
-/* try alloc and if it fails, goto label */
-#define ALLOC_OR_GOTO(var, type, label) do { \
- var = GF_CALLOC (sizeof (type), 1, \
- gf_afr_mt_##type); \
- if (!var) { \
- gf_log (this->name, GF_LOG_ERROR, \
- "out of memory :("); \
- op_errno = ENOMEM; \
- goto label; \
- } \
- } while (0);
+int
+afr_read_subvol_select_by_policy (inode_t *inode, xlator_t *this,
+ unsigned char *readable);
+int
+afr_inode_read_subvol_type_get (inode_t *inode, xlator_t *this,
+ unsigned char *readable, int *event_p,
+ int type);
+int
+afr_read_subvol_get (inode_t *inode, xlator_t *this, int *subvol_p,
+ int *event_p, afr_transaction_type type);
-/* did a call fail due to a child failing? */
-#define child_went_down(op_ret, op_errno) (((op_ret) < 0) && \
- ((op_errno == ENOTCONN) || \
- (op_errno == EBADFD)))
+#define afr_data_subvol_get(i, t, s, e) \
+ afr_read_subvol_get(i, t, s, e, AFR_DATA_TRANSACTION)
-#define afr_fop_failed(op_ret, op_errno) ((op_ret) == -1)
+#define afr_metadata_subvol_get(i, t, s, e) \
+ afr_read_subvol_get(i, t, s, e, AFR_METADATA_TRANSACTION)
-/* have we tried all children? */
-#define all_tried(i, count) ((i) == (count) - 1)
+int
+afr_inode_refresh (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ afr_inode_refresh_cbk_t cbk);
int32_t
-afr_set_dict_gfid (dict_t *dict, uuid_t gfid);
+afr_notify (xlator_t *this, int32_t event, void *data, void *data2);
int
-pump_command_reply (call_frame_t *frame, xlator_t *this);
+afr_init_entry_lockee (afr_entry_lockee_t *lockee, afr_local_t *local,
+ loc_t *loc, char *basename, int child_count);
-int32_t
-afr_notify (xlator_t *this, int32_t event,
- void *data, ...);
+void
+afr_entry_lockee_cleanup (afr_internal_lock_t *int_lock);
int
afr_attempt_lock_recovery (xlator_t *this, int32_t child_index);
int
-afr_save_locked_fd (xlator_t *this, fd_t *fd);
-
-int
afr_mark_locked_nodes (xlator_t *this, fd_t *fd,
unsigned char *locked_nodes);
void
-afr_set_lk_owner (call_frame_t *frame, xlator_t *this);
+afr_set_lk_owner (call_frame_t *frame, xlator_t *this, void *lk_owner);
int
afr_set_lock_number (call_frame_t *frame, xlator_t *this);
-
-loc_t *
-lower_path (loc_t *l1, const char *b1, loc_t *l2, const char *b2);
-
int32_t
afr_unlock (call_frame_t *frame, xlator_t *this);
@@ -780,40 +791,30 @@ afr_blocking_lock (call_frame_t *frame, xlator_t *this);
int
afr_internal_lock_finish (call_frame_t *frame, xlator_t *this);
+int
+afr_lk_transfer_datalock (call_frame_t *dst, call_frame_t *src, char *dom,
+ unsigned int child_count);
-int pump_start (call_frame_t *frame, xlator_t *this);
+int
+__afr_fd_ctx_set (xlator_t *this, fd_t *fd);
int
afr_fd_ctx_set (xlator_t *this, fd_t *fd);
-int32_t
-afr_inode_get_read_ctx (xlator_t *this, inode_t *inode, int32_t *fresh_children);
-
-void
-afr_inode_set_read_ctx (xlator_t *this, inode_t *inode, int32_t read_child,
- int32_t *fresh_children);
-
-void
-afr_build_parent_loc (loc_t *parent, loc_t *child);
-
-unsigned int
-afr_up_children_count (unsigned char *child_up, unsigned int child_count);
+afr_fd_ctx_t *
+afr_fd_ctx_get (fd_t *fd, xlator_t *this);
-unsigned int
-afr_locked_children_count (unsigned char *children, unsigned int child_count);
+int
+afr_build_parent_loc (loc_t *parent, loc_t *child, int32_t *op_errno);
-unsigned int
-afr_pre_op_done_children_count (unsigned char *pre_op,
- unsigned int child_count);
+int
+afr_locked_nodes_count (unsigned char *locked_nodes, int child_count);
-gf_boolean_t
-afr_is_fresh_lookup (loc_t *loc, xlator_t *this);
+int
+afr_replies_interpret (call_frame_t *frame, xlator_t *this, inode_t *inode);
void
-afr_update_loc_gfids (loc_t *loc, struct iatt *buf, struct iatt *postparent);
-
-int
-afr_locked_nodes_count (unsigned char *locked_nodes, int child_count);
+afr_replies_wipe (afr_local_t *local, afr_private_t *priv);
void
afr_local_cleanup (afr_local_t *local, xlator_t *this);
@@ -821,21 +822,9 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this);
int
afr_frame_return (call_frame_t *frame);
-gf_boolean_t
-afr_is_split_brain (xlator_t *this, inode_t *inode);
-
-void
-afr_set_split_brain (xlator_t *this, inode_t *inode, gf_boolean_t set);
-
int
afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, int32_t wbflags);
-
-void
-afr_set_opendir_done (xlator_t *this, inode_t *inode);
-
-gf_boolean_t
-afr_is_opendir_done (xlator_t *this, inode_t *inode);
+ fd_t *fd, dict_t *xdata);
void
afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this);
@@ -843,9 +832,6 @@ afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this);
int
afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd);
-int
-afr_launch_openfd_self_heal (call_frame_t *frame, xlator_t *this, fd_t *fd);
-
#define AFR_STACK_UNWIND(fop, frame, params ...) \
do { \
afr_local_t *__local = NULL; \
@@ -856,22 +842,36 @@ afr_launch_openfd_self_heal (call_frame_t *frame, xlator_t *this, fd_t *fd);
frame->local = NULL; \
} \
STACK_UNWIND_STRICT (fop, frame, params); \
- afr_local_cleanup (__local, __this); \
- GF_FREE (__local); \
- } while (0);
+ if (__local) { \
+ afr_local_cleanup (__local, __this); \
+ mem_put (__local); \
+ } \
+ } while (0)
-#define AFR_STACK_DESTROY(frame) \
- do { \
- afr_local_t *__local = NULL; \
- xlator_t *__this = NULL; \
- __local = frame->local; \
- __this = frame->this; \
- frame->local = NULL; \
- STACK_DESTROY (frame->root); \
- afr_local_cleanup (__local, __this); \
- GF_FREE (__local); \
+#define AFR_STACK_DESTROY(frame) \
+ do { \
+ afr_local_t *__local = NULL; \
+ xlator_t *__this = NULL; \
+ __local = frame->local; \
+ __this = frame->this; \
+ frame->local = NULL; \
+ STACK_DESTROY (frame->root); \
+ if (__local) { \
+ afr_local_cleanup (__local, __this); \
+ mem_put (__local); \
+ } \
} while (0);
+#define AFR_FRAME_INIT(frame, op_errno) \
+ ({frame->local = mem_get0 (THIS->local_pool); \
+ if (afr_local_init (frame->local, THIS->private, &op_errno)) { \
+ afr_local_cleanup (frame->local, THIS); \
+ mem_put (frame->local); \
+ frame->local = NULL; }; \
+ frame->local;})
+
+#define AFR_STACK_RESET(frame) do { int opr; STACK_RESET (frame->root); AFR_FRAME_INIT(frame, opr);} while (0)
+
/* allocate and return a string that is the basename of argument */
static inline char *
AFR_BASENAME (const char *str)
@@ -884,6 +884,9 @@ AFR_BASENAME (const char *str)
return __basename_str;
}
+call_frame_t *
+afr_copy_frame (call_frame_t *base);
+
int
afr_transaction_local_init (afr_local_t *local, xlator_t *this);
@@ -891,129 +894,83 @@ int32_t
afr_marker_getxattr (call_frame_t *frame, xlator_t *this,
loc_t *loc, const char *name,afr_local_t *local, afr_private_t *priv );
-int32_t *
-afr_children_create (int32_t child_count);
-
int
-AFR_LOCAL_INIT (afr_local_t *local, afr_private_t *priv);
+afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno);
int
afr_internal_lock_init (afr_internal_lock_t *lk, size_t child_count,
transaction_lk_type_t lk_type);
int
-afr_first_up_child (unsigned char *child_up, size_t child_count);
+afr_higher_errno (int32_t old_errno, int32_t new_errno);
int
-afr_select_read_child_from_policy (int32_t *fresh_children, int32_t child_count,
- int32_t prev_read_child,
- int32_t config_read_child, int32_t *sources);
-
-void
-afr_set_read_ctx_from_policy (xlator_t *this, inode_t *inode,
- int32_t *fresh_children, int32_t prev_read_child,
- int32_t config_read_child);
-
-int32_t
-afr_get_call_child (xlator_t *this, unsigned char *child_up, int32_t read_child,
- int32_t *fresh_children,
- int32_t *call_child, int32_t *last_index);
+afr_final_errno (afr_local_t *local, afr_private_t *priv);
-int32_t
-afr_next_call_child (int32_t *fresh_children, unsigned char *child_up,
- size_t child_count, int32_t *last_index,
- int32_t read_child);
-void
-afr_get_fresh_children (int32_t *success_children, int32_t *sources,
- int32_t *children, unsigned int child_count);
-void
-afr_children_add_child (int32_t *children, int32_t child,
- int32_t child_count);
-void
-afr_children_rm_child (int32_t *children, int32_t child,
- int32_t child_count);
-void
-afr_reset_children (int32_t *children, int32_t child_count);
-gf_boolean_t
-afr_error_more_important (int32_t old_errno, int32_t new_errno);
int
-afr_errno_count (int32_t *children, int *child_errno,
- unsigned int child_count, int32_t op_errno);
-int
-afr_get_children_count (int32_t *children, unsigned int child_count);
-gf_boolean_t
-afr_is_child_present (int32_t *success_children, int32_t child_count,
- int32_t child);
-void
-afr_update_gfid_from_iatts (uuid_t uuid, struct iatt *bufs,
- int32_t *success_children,
- unsigned int child_count);
-void
-afr_reset_xattr (dict_t **xattr, unsigned int child_count);
-gf_boolean_t
-afr_conflicting_iattrs (struct iatt *bufs, int32_t *success_children,
- unsigned int child_count, const char *path,
- const char *xlator_name);
-unsigned int
-afr_gfid_missing_count (const char *xlator_name, int32_t *children,
- struct iatt *bufs, unsigned int child_count,
- const char *path);
-void
-afr_xattr_req_prepare (xlator_t *this, dict_t *xattr_req, const char *path);
-void
-afr_children_copy (int32_t *dst, int32_t *src, unsigned int child_count);
-afr_transaction_type
-afr_transaction_type_get (ia_type_t ia_type);
-int32_t
-afr_resultant_errno_get (int32_t *children,
- int *child_errno, unsigned int child_count);
-void
-afr_inode_rm_stale_children (xlator_t *this, inode_t *inode, int32_t read_child,
- int32_t *stale_children);
+afr_xattr_req_prepare (xlator_t *this, dict_t *xattr_req);
+
void
-afr_launch_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode,
- gf_boolean_t background, ia_type_t ia_type, char *reason,
- void (*gfid_sh_success_cbk) (call_frame_t *sh_frame,
- xlator_t *this),
- int (*unwind) (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno));
-int
-afr_fix_open (call_frame_t *frame, xlator_t *this, afr_fd_ctx_t *fd_ctx,
- int need_open_count, int *need_open);
-int
-afr_open_fd_fix (call_frame_t *frame, xlator_t *this, gf_boolean_t pause_fop);
-int
-afr_set_elem_count_get (unsigned char *elems, int child_count);
+afr_fix_open (fd_t *fd, xlator_t *this);
afr_fd_ctx_t *
afr_fd_ctx_get (fd_t *fd, xlator_t *this);
-gf_boolean_t
-afr_open_only_data_self_heal (char *data_self_heal);
-
-gf_boolean_t
-afr_data_self_heal_enabled (char *data_self_heal);
-
void
afr_set_low_priority (call_frame_t *frame);
int
afr_child_fd_ctx_set (xlator_t *this, fd_t *fd, int32_t child,
- int flags, int32_t wb_flags);
+ int flags);
gf_boolean_t
afr_have_quorum (char *logname, afr_private_t *priv);
+void
+afr_matrix_cleanup (int32_t **pending, unsigned int m);
+
+int32_t**
+afr_matrix_create (unsigned int m, unsigned int n);
+
+void
+afr_filter_xattrs (dict_t *xattr);
+
+/*
+ * Special value indicating we should use the "auto" quorum method instead of
+ * a fixed value (including zero to turn off quorum enforcement).
+ */
+#define AFR_QUORUM_AUTO INT_MAX
+
/*
* Having this as a macro will make debugging a bit weirder, but does reduce
* the probability of functions handling this check inconsistently.
*/
#define QUORUM_CHECK(_func,_label) do { \
- if (priv->enforce_quorum && !afr_have_quorum(this->name,priv)) { \
- gf_log(this->name,GF_LOG_WARNING, \
- "failing "#_func" due to lack of quorum"); \
- op_errno = EROFS; \
- goto _label; \
- } \
+ if (priv->quorum_count && !afr_have_quorum(this->name,priv)) { \
+ gf_log(this->name,GF_LOG_WARNING, \
+ "failing "#_func" due to lack of quorum"); \
+ op_errno = EROFS; \
+ goto _label; \
+ } \
} while (0);
+int
+afr_fd_report_unstable_write (xlator_t *this, fd_t *fd);
+
+gf_boolean_t
+afr_fd_has_witnessed_unstable_write (xlator_t *this, fd_t *fd);
+
+void
+afr_delayed_changelog_wake_resume (xlator_t *this, fd_t *fd, call_stub_t *stub);
+
+int
+afr_inodelk_init (afr_inodelk_t *lk, char *dom, size_t child_count);
+
+void
+afr_handle_open_fd_count (call_frame_t *frame, xlator_t *this);
+
+int
+afr_local_pathinfo (char *pathinfo, gf_boolean_t *is_local);
+
+void
+afr_remove_eager_lock_stub (afr_local_t *local);
#endif /* __AFR_H__ */
diff --git a/xlators/cluster/afr/src/pump.c b/xlators/cluster/afr/src/pump.c
index 007daa6f1..eed509956 100644
--- a/xlators/cluster/afr/src/pump.c
+++ b/xlators/cluster/afr/src/pump.c
@@ -1,25 +1,17 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include <unistd.h>
#include <sys/time.h>
#include <stdlib.h>
+#include <fnmatch.h>
#ifndef _CONFIG_H
#define _CONFIG_H
@@ -28,8 +20,130 @@
#include "afr-common.c"
#include "defaults.c"
+#include "glusterfs.h"
+#include "pump.h"
+
+
+static int
+afr_set_dict_gfid (dict_t *dict, uuid_t gfid)
+{
+ int ret = 0;
+ uuid_t *pgfid = NULL;
+
+ GF_ASSERT (gfid);
+
+ pgfid = GF_CALLOC (1, sizeof (uuid_t), gf_common_mt_char);
+ if (!pgfid) {
+ ret = -1;
+ goto out;
+ }
+
+ uuid_copy (*pgfid, gfid);
+
+ ret = dict_set_dynptr (dict, "gfid-req", pgfid, sizeof (uuid_t));
+ if (ret)
+ gf_log (THIS->name, GF_LOG_ERROR, "gfid set failed");
+
+out:
+ if (ret && pgfid)
+ GF_FREE (pgfid);
+ return ret;
+}
+
+static int
+afr_set_root_gfid (dict_t *dict)
+{
+ uuid_t gfid;
+ int ret = 0;
+
+ memset (gfid, 0, 16);
+ gfid[15] = 1;
+
+ ret = afr_set_dict_gfid (dict, gfid);
+
+ return ret;
+}
+
+static int
+afr_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name)
+{
+ int ret = -1;
+ uuid_t pargfid = {0};
+
+ if (!child)
+ goto out;
+
+ if (!uuid_is_null (parent->inode->gfid))
+ uuid_copy (pargfid, parent->inode->gfid);
+ else if (!uuid_is_null (parent->gfid))
+ uuid_copy (pargfid, parent->gfid);
+
+ if (uuid_is_null (pargfid))
+ goto out;
+
+ if (strcmp (parent->path, "/") == 0)
+ ret = gf_asprintf ((char **)&child->path, "/%s", name);
+ else
+ ret = gf_asprintf ((char **)&child->path, "%s/%s", parent->path,
+ name);
+
+ if (-1 == ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "asprintf failed while setting child path");
+ }
+
+ child->name = strrchr (child->path, '/');
+ if (child->name)
+ child->name++;
+
+ child->parent = inode_ref (parent->inode);
+ child->inode = inode_new (parent->inode->table);
+ uuid_copy (child->pargfid, pargfid);
+
+ if (!child->inode) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if ((ret == -1) && child)
+ loc_wipe (child);
+
+ return ret;
+}
+
+static void
+afr_build_root_loc (xlator_t *this, loc_t *loc)
+{
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ loc->path = gf_strdup ("/");
+ loc->name = "";
+ loc->inode = inode_ref (priv->root_inode);
+ uuid_copy (loc->gfid, loc->inode->gfid);
+}
+
+static void
+afr_update_loc_gfids (loc_t *loc, struct iatt *buf, struct iatt *postparent)
+{
+ GF_ASSERT (loc);
+ GF_ASSERT (buf);
+
+ uuid_copy (loc->gfid, buf->ia_gfid);
+ if (postparent)
+ uuid_copy (loc->pargfid, postparent->ia_gfid);
+}
static uint64_t pump_pid = 0;
+static inline void
+pump_fill_loc_info (loc_t *loc, struct iatt *iatt, struct iatt *parent)
+{
+ afr_update_loc_gfids (loc, iatt, parent);
+ uuid_copy (loc->inode->gfid, iatt->ia_gfid);
+}
+
static int
pump_mark_start_pending (xlator_t *this)
{
@@ -140,9 +254,7 @@ pump_set_resume_path (xlator_t *this, const char *path)
LOCK (&pump_priv->resume_path_lock);
{
- pump_priv->resume_path = strdup (path);
- if (!pump_priv->resume_path)
- ret = -1;
+ strncpy (pump_priv->resume_path, path, strlen (path) + 1);
}
UNLOCK (&pump_priv->resume_path_lock);
@@ -167,7 +279,7 @@ pump_save_path (xlator_t *this, const char *path)
GF_ASSERT (priv->root_inode);
- afr_build_root_loc (priv->root_inode, &loc);
+ afr_build_root_loc (this, &loc);
dict = dict_new ();
dict_ret = dict_set_str (dict, PUMP_PATH, (char *)path);
@@ -187,6 +299,7 @@ pump_save_path (xlator_t *this, const char *path)
dict_unref (dict);
+ loc_wipe (&loc);
return 0;
}
@@ -362,7 +475,7 @@ gf_pump_traverse_directory (loc_t *loc)
"pump opendir on %s returned=%d",
loc->path, ret);
- while (syncop_readdirp (this, fd, 131072, offset, &entries)) {
+ while (syncop_readdirp (this, fd, 131072, offset, NULL, &entries)) {
free_entries = _gf_true;
if (list_empty (&entries.list)) {
@@ -384,64 +497,81 @@ gf_pump_traverse_directory (loc_t *loc)
}
loc_wipe (&entry_loc);
ret = afr_build_child_loc (this, &entry_loc, loc,
- entry->d_name,
- entry->d_stat.ia_gfid);
+ entry->d_name);
if (ret)
goto out;
- if (!IS_ENTRY_CWD (entry->d_name) &&
- !IS_ENTRY_PARENT (entry->d_name)) {
-
- is_directory_empty = _gf_false;
- gf_log (this->name, GF_LOG_DEBUG,
- "lookup %s => %"PRId64,
- entry_loc.path,
- iatt.ia_ino);
-
- ret = syncop_lookup (this, &entry_loc, NULL,
- &iatt, &xattr_rsp, &parent);
-
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: lookup failed",
- entry_loc.path);
- continue;
- }
- afr_fill_loc_info (&entry_loc, &iatt,
- &parent);
-
- pump_update_resume_state (this, entry_loc.path);
-
- pump_save_path (this, entry_loc.path);
- pump_save_file_stats (this, entry_loc.path);
-
- ret = pump_check_and_update_status (this);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Pump beginning to exit out");
- goto out;
- }
-
- if (IA_ISDIR (iatt.ia_type)) {
- if (is_pump_traversal_allowed (this, entry_loc.path)) {
- gf_log (this->name, GF_LOG_TRACE,
- "entering dir=%s",
- entry->d_name);
- gf_pump_traverse_directory (&entry_loc);
- }
- }
- }
+ if ((strcmp (entry->d_name, ".") == 0) ||
+ (strcmp (entry->d_name, "..") == 0))
+ continue;
+
+ is_directory_empty = _gf_false;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "lookup %s => %"PRId64,
+ entry_loc.path,
+ iatt.ia_ino);
+
+ ret = syncop_lookup (this, &entry_loc, NULL, &iatt,
+ &xattr_rsp, &parent);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: lookup failed", entry_loc.path);
+ continue;
+ }
+
+ ret = afr_selfheal_name (this, loc->gfid, entry->d_name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: name self-heal failed (%s/%s)",
+ entry_loc.path, uuid_utoa (loc->gfid),
+ entry->d_name);
+ continue;
+ }
+
+ ret = afr_selfheal (this, iatt.ia_gfid);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: self-heal failed (%s)",
+ entry_loc.path, uuid_utoa (iatt.ia_gfid));
+ continue;
+ }
+
+ pump_fill_loc_info (&entry_loc, &iatt, &parent);
+
+ pump_update_resume_state (this, entry_loc.path);
+
+ pump_save_path (this, entry_loc.path);
+ pump_save_file_stats (this, entry_loc.path);
+
+ ret = pump_check_and_update_status (this);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Pump beginning to exit out");
+ goto out;
+ }
+
+ if (IA_ISDIR (iatt.ia_type)) {
+ if (is_pump_traversal_allowed (this, entry_loc.path)) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "entering dir=%s", entry->d_name);
+ gf_pump_traverse_directory (&entry_loc);
+ }
+ }
}
gf_dirent_free (&entries);
free_entries = _gf_false;
- gf_log (this->name, GF_LOG_TRACE,
- "offset incremented to %d",
+ gf_log (this->name, GF_LOG_TRACE, "offset incremented to %d",
(int32_t ) offset);
}
- if (is_directory_empty && IS_ROOT_PATH (loc->path)) {
+ ret = syncop_close (fd);
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_DEBUG, "closing the fd failed");
+
+ if (is_directory_empty && (strcmp (loc->path, "/") == 0)) {
pump_change_state (this, PUMP_STATE_RUNNING);
gf_log (this->name, GF_LOG_INFO, "Empty source brick. "
"Nothing to be done.");
@@ -453,7 +583,6 @@ out:
if (free_entries)
gf_dirent_free (&entries);
return 0;
-
}
static int
@@ -481,7 +610,7 @@ pump_update_resume_path (xlator_t *this)
static int32_t
pump_xattr_cleaner (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_private_t *priv = NULL;
loc_t loc = {0};
@@ -492,22 +621,24 @@ pump_xattr_cleaner (call_frame_t *frame, void *cookie, xlator_t *this,
priv = this->private;
- afr_build_root_loc (priv->root_inode, &loc);
+ afr_build_root_loc (this, &loc);
ret = syncop_removexattr (priv->children[source], &loc,
- PUMP_PATH);
+ PUMP_PATH, 0);
ret = syncop_removexattr (priv->children[sink], &loc,
- PUMP_SINK_COMPLETE);
+ PUMP_SINK_COMPLETE, 0);
for (i = 0; i < priv->child_count; i++) {
ret = syncop_removexattr (priv->children[i], &loc,
- PUMP_SOURCE_COMPLETE);
- if (ret)
+ PUMP_SOURCE_COMPLETE, 0);
+ if (ret) {
gf_log (this->name, GF_LOG_DEBUG, "removexattr "
- "failed with %s", strerror (errno));
+ "failed with %s", strerror (-ret));
+ }
}
+ loc_wipe (&loc);
return pump_command_reply (frame, this);
}
@@ -527,7 +658,7 @@ pump_complete_migration (xlator_t *this)
GF_ASSERT (priv->root_inode);
- afr_build_root_loc (priv->root_inode, &loc);
+ afr_build_root_loc (this, &loc);
dict = dict_new ();
@@ -569,6 +700,7 @@ pump_complete_migration (xlator_t *this)
call_resume (pump_priv->cleaner);
}
+ loc_wipe (&loc);
return 0;
}
@@ -595,6 +727,7 @@ pump_lookup_sink (loc_t *loc)
if (ret) {
gf_log (this->name, GF_LOG_DEBUG,
"Lookup on sink child failed");
+ ret = -1;
goto out;
}
@@ -624,7 +757,7 @@ pump_task (void *data)
GF_ASSERT (priv->root_inode);
- afr_build_root_loc (priv->root_inode, &loc);
+ afr_build_root_loc (this, &loc);
xattr_req = dict_new ();
if (!xattr_req) {
gf_log (this->name, GF_LOG_DEBUG,
@@ -662,6 +795,7 @@ out:
if (xattr_req)
dict_unref (xattr_req);
+ loc_wipe (&loc);
return 0;
}
@@ -695,7 +829,7 @@ pump_start (call_frame_t *pump_frame, xlator_t *this)
priv = this->private;
pump_priv = priv->pump_private;
- pump_frame->root->lk_owner = (uint64_t) (unsigned long)pump_frame->root;
+ afr_set_lk_owner (pump_frame, this, pump_frame->root);
pump_pid = (uint64_t) (unsigned long)pump_frame->root;
ret = synctask_new (pump_priv->env, pump_task,
@@ -709,8 +843,8 @@ pump_start (call_frame_t *pump_frame, xlator_t *this)
}
gf_log (this->name, GF_LOG_DEBUG,
- "setting pump as started lk_owner: %"PRIu64" %"PRIu64,
- pump_frame->root->lk_owner, pump_pid);
+ "setting pump as started lk_owner: %s %"PRIu64,
+ lkowner_utoa (&pump_frame->root->lk_owner), pump_pid);
priv->use_afr_in_pump = 1;
out:
@@ -744,7 +878,7 @@ pump_cmd_start_setxattr_cbk (call_frame_t *frame,
void *cookie,
xlator_t *this,
int32_t op_ret,
- int32_t op_errno)
+ int32_t op_errno, dict_t *xdata)
{
call_frame_t *prev = NULL;
@@ -796,9 +930,9 @@ pump_initiate_sink_connect (call_frame_t *frame, xlator_t *this)
GF_ASSERT (priv->root_inode);
- afr_build_root_loc (priv->root_inode, &loc);
+ afr_build_root_loc (this, &loc);
- data = data_ref (dict_get (local->dict, PUMP_CMD_START));
+ data = data_ref (dict_get (local->dict, RB_PUMP_CMD_START));
if (!data) {
ret = -1;
gf_log (this->name, GF_LOG_ERROR,
@@ -837,7 +971,7 @@ pump_initiate_sink_connect (call_frame_t *frame, xlator_t *this)
PUMP_SINK_CHILD(this)->fops->setxattr,
&loc,
dict,
- 0);
+ 0, NULL);
ret = 0;
@@ -851,6 +985,7 @@ out:
if (ret && clnt_cmd)
GF_FREE (clnt_cmd);
+ loc_wipe (&loc);
return ret;
}
@@ -870,7 +1005,7 @@ pump_cmd_start_getxattr_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- dict_t *dict)
+ dict_t *dict, dict_t *xdata)
{
afr_local_t *local = NULL;
char *path = NULL;
@@ -937,6 +1072,7 @@ pump_execute_status (call_frame_t *frame, xlator_t *this)
uint64_t number_files = 0;
char filename[PATH_MAX];
+ char summary[PATH_MAX+256];
char *dict_str = NULL;
int32_t op_ret = 0;
@@ -965,16 +1101,19 @@ pump_execute_status (call_frame_t *frame, xlator_t *this)
}
if (pump_priv->pump_finished) {
- snprintf (dict_str, PATH_MAX + 256, "Number of files migrated = %"PRIu64" Migration complete ",
- number_files);
+ snprintf (summary, PATH_MAX+256,
+ "no_of_files=%"PRIu64, number_files);
} else {
- snprintf (dict_str, PATH_MAX + 256, "Number of files migrated = %"PRIu64" Current file= %s ",
- number_files, filename);
+ snprintf (summary, PATH_MAX+256,
+ "no_of_files=%"PRIu64":current_file=%s",
+ number_files, filename);
}
+ snprintf (dict_str, PATH_MAX+256, "status=%d:%s",
+ (pump_priv->pump_finished)?1:0, summary);
dict = dict_new ();
- ret = dict_set_dynstr (dict, PUMP_CMD_STATUS, dict_str);
+ ret = dict_set_dynstr (dict, RB_PUMP_CMD_STATUS, dict_str);
if (ret < 0) {
gf_log (this->name, GF_LOG_DEBUG,
"dict_set_dynstr returned negative value");
@@ -986,13 +1125,12 @@ pump_execute_status (call_frame_t *frame, xlator_t *this)
out:
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict);
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, NULL);
if (dict)
dict_unref (dict);
- if (dict_str)
- GF_FREE (dict_str);
+ GF_FREE (dict_str);
return 0;
}
@@ -1034,14 +1172,14 @@ pump_execute_start (call_frame_t *frame, xlator_t *this)
GF_ASSERT (priv->root_inode);
- afr_build_root_loc (priv->root_inode, &loc);
+ afr_build_root_loc (this, &loc);
STACK_WIND (frame,
pump_cmd_start_getxattr_cbk,
PUMP_SOURCE_CHILD(this),
PUMP_SOURCE_CHILD(this)->fops->getxattr,
&loc,
- PUMP_PATH);
+ PUMP_PATH, NULL);
ret = 0;
@@ -1051,6 +1189,7 @@ out:
pump_command_reply (frame, this);
}
+ loc_wipe (&loc);
return 0;
}
@@ -1058,7 +1197,7 @@ static int
pump_cleanup_helper (void *data) {
call_frame_t *frame = data;
- pump_xattr_cleaner (frame, 0, frame->this, 0, 0);
+ pump_xattr_cleaner (frame, 0, frame->this, 0, 0, NULL);
return 0;
}
@@ -1140,7 +1279,7 @@ pump_execute_abort (call_frame_t *frame, xlator_t *this)
} else {
pump_priv->cleaner = fop_setxattr_cbk_stub (frame,
pump_xattr_cleaner,
- 0, 0);
+ 0, 0, NULL);
}
return 0;
@@ -1153,7 +1292,7 @@ pump_command_status (xlator_t *this, dict_t *dict)
int dict_ret = -1;
int ret = _gf_true;
- dict_ret = dict_get_str (dict, PUMP_CMD_STATUS, &cmd);
+ dict_ret = dict_get_str (dict, RB_PUMP_CMD_STATUS, &cmd);
if (dict_ret < 0) {
gf_log (this->name, GF_LOG_DEBUG,
"Not a pump status command");
@@ -1177,7 +1316,7 @@ pump_command_pause (xlator_t *this, dict_t *dict)
int dict_ret = -1;
int ret = _gf_true;
- dict_ret = dict_get_str (dict, PUMP_CMD_PAUSE, &cmd);
+ dict_ret = dict_get_str (dict, RB_PUMP_CMD_PAUSE, &cmd);
if (dict_ret < 0) {
gf_log (this->name, GF_LOG_DEBUG,
"Not a pump pause command");
@@ -1201,7 +1340,7 @@ pump_command_commit (xlator_t *this, dict_t *dict)
int dict_ret = -1;
int ret = _gf_true;
- dict_ret = dict_get_str (dict, PUMP_CMD_COMMIT, &cmd);
+ dict_ret = dict_get_str (dict, RB_PUMP_CMD_COMMIT, &cmd);
if (dict_ret < 0) {
gf_log (this->name, GF_LOG_DEBUG,
"Not a pump commit command");
@@ -1225,7 +1364,7 @@ pump_command_abort (xlator_t *this, dict_t *dict)
int dict_ret = -1;
int ret = _gf_true;
- dict_ret = dict_get_str (dict, PUMP_CMD_ABORT, &cmd);
+ dict_ret = dict_get_str (dict, RB_PUMP_CMD_ABORT, &cmd);
if (dict_ret < 0) {
gf_log (this->name, GF_LOG_DEBUG,
"Not a pump abort command");
@@ -1249,7 +1388,7 @@ pump_command_start (xlator_t *this, dict_t *dict)
int dict_ret = -1;
int ret = _gf_true;
- dict_ret = dict_get_str (dict, PUMP_CMD_START, &cmd);
+ dict_ret = dict_get_str (dict, RB_PUMP_CMD_START, &cmd);
if (dict_ret < 0) {
gf_log (this->name, GF_LOG_DEBUG,
"Not a pump start command");
@@ -1266,135 +1405,22 @@ out:
}
-struct _xattr_key {
- char *key;
- struct list_head list;
-};
-
-static void
-__gather_xattr_keys (dict_t *dict, char *key, data_t *value,
- void *data)
-{
- struct list_head * list = data;
- struct _xattr_key * xkey = NULL;
-
- if (!strncmp (key, AFR_XATTR_PREFIX,
- strlen (AFR_XATTR_PREFIX))) {
-
- xkey = GF_CALLOC (1, sizeof (*xkey), gf_afr_mt_xattr_key);
- if (!xkey)
- return;
-
- xkey->key = key;
- INIT_LIST_HEAD (&xkey->list);
-
- list_add_tail (&xkey->list, list);
- }
-}
-
-static void
-__filter_xattrs (dict_t *dict)
-{
- struct list_head keys;
-
- struct _xattr_key *key;
- struct _xattr_key *tmp;
-
- INIT_LIST_HEAD (&keys);
-
- dict_foreach (dict, __gather_xattr_keys,
- (void *) &keys);
-
- list_for_each_entry_safe (key, tmp, &keys, list) {
- dict_del (dict, key->key);
-
- list_del_init (&key->list);
-
- GF_FREE (key);
- }
-}
-
-int32_t
-pump_getxattr_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *dict)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- xlator_t **children = NULL;
- int unwind = 1;
- int32_t *last_index = NULL;
- int32_t next_call_child = -1;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
-
-
- priv = this->private;
- children = priv->children;
-
- local = frame->local;
-
- read_child = (long) cookie;
-
- if (op_ret == -1) {
- last_index = &local->cont.getxattr.last_index;
- fresh_children = local->fresh_children;
- next_call_child = afr_next_call_child (fresh_children,
- local->child_up,
- priv->child_count,
- last_index, read_child);
- if (next_call_child < 0)
- goto out;
-
- unwind = 0;
- STACK_WIND_COOKIE (frame, pump_getxattr_cbk,
- (void *) (long) read_child,
- children[next_call_child],
- children[next_call_child]->fops->getxattr,
- &local->loc,
- local->cont.getxattr.name);
- }
-
-out:
- if (unwind) {
- if (op_ret >= 0 && dict)
- __filter_xattrs (dict);
-
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict);
- }
-
- return 0;
-}
-
-int32_t
-pump_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name)
+int
+pump_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- xlator_t ** children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- uint64_t read_child = 0;
-
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
-
- children = priv->children;
+ afr_private_t *priv = NULL;
+ int op_errno = 0;
+ int ret = 0;
- ALLOC_OR_GOTO (local, afr_local_t, out);
- frame->local = local;
+ priv = this->private;
- op_ret = AFR_LOCAL_INIT (local, priv);
- if (op_ret < 0) {
- op_errno = -op_ret;
- goto out;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame, default_getxattr_cbk,
+ FIRST_CHILD (this),
+ (FIRST_CHILD (this))->fops->getxattr,
+ loc, name, xdata);
+ return 0;
}
if (name) {
@@ -1405,185 +1431,21 @@ pump_getxattr (call_frame_t *frame, xlator_t *this,
goto out;
}
- if (!strcmp (name, PUMP_CMD_STATUS)) {
+ if (!strcmp (name, RB_PUMP_CMD_STATUS)) {
gf_log (this->name, GF_LOG_DEBUG,
"Hit pump command - status");
pump_execute_status (frame, this);
- op_ret = 0;
+ ret = 0;
goto out;
}
}
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame, default_getxattr_cbk,
- FIRST_CHILD (this),
- (FIRST_CHILD (this))->fops->getxattr,
- loc, name);
- return 0;
- }
-
- local->fresh_children = GF_CALLOC (priv->child_count,
- sizeof (*local->fresh_children),
- gf_afr_mt_int32_t);
- if (local->fresh_children) {
- op_errno = ENOMEM;
- goto out;
- }
+ afr_getxattr (frame, this, loc, name, xdata);
- read_child = afr_inode_get_read_ctx (this, loc->inode, local->fresh_children);
- op_ret = afr_get_call_child (this, local->child_up, read_child,
- local->fresh_children,
- &call_child,
- &local->cont.getxattr.last_index);
- if (op_ret < 0) {
- op_errno = -op_ret;
- op_ret = -1;
- goto out;
- }
- loc_copy (&local->loc, loc);
- if (name)
- local->cont.getxattr.name = gf_strdup (name);
-
- STACK_WIND_COOKIE (frame, pump_getxattr_cbk,
- (void *) (long) call_child,
- children[call_child], children[call_child]->fops->getxattr,
- loc, name);
-
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, NULL);
- }
- return 0;
-}
-
-static int
-afr_setxattr_unwind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = NULL;
- call_frame_t *main_frame = NULL;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (setxattr, main_frame,
- local->op_ret, local->op_errno)
- }
- return 0;
-}
-
-static int
-afr_setxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int call_count = -1;
- int need_unwind = 0;
-
- local = frame->local;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- }
- local->success_count++;
-
- if (local->success_count == priv->child_count) {
- need_unwind = 1;
- }
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- if (need_unwind)
- local->transaction.unwind (frame, this);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
- return 0;
-}
-
-static int
-afr_setxattr_wind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_up_children_count (local->child_up, priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_setxattr_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->setxattr,
- &local->loc,
- local->cont.setxattr.dict,
- local->cont.setxattr.flags);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
-
-
-static int
-afr_setxattr_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
- return 0;
-}
-
-int32_t
-pump_setxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
+ if (ret < 0)
+ AFR_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -1601,120 +1463,85 @@ pump_command_reply (call_frame_t *frame, xlator_t *this)
gf_log (this->name, GF_LOG_INFO,
"Command succeeded");
- dict_unref (local->dict);
-
AFR_STACK_UNWIND (setxattr,
frame,
local->op_ret,
- local->op_errno);
+ local->op_errno, NULL);
return 0;
}
int
-pump_parse_command (call_frame_t *frame, xlator_t *this,
- afr_local_t *local, dict_t *dict)
+pump_parse_command (call_frame_t *frame, xlator_t *this, dict_t *dict,
+ int *op_errno_p)
{
-
+ afr_local_t *local = NULL;
int ret = -1;
+ int op_errno = 0;
if (pump_command_start (this, dict)) {
- frame->local = local;
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
local->dict = dict_ref (dict);
ret = pump_execute_start (frame, this);
} else if (pump_command_pause (this, dict)) {
- frame->local = local;
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
local->dict = dict_ref (dict);
ret = pump_execute_pause (frame, this);
} else if (pump_command_abort (this, dict)) {
- frame->local = local;
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
local->dict = dict_ref (dict);
ret = pump_execute_abort (frame, this);
} else if (pump_command_commit (this, dict)) {
- frame->local = local;
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
local->dict = dict_ref (dict);
ret = pump_execute_commit (frame, this);
}
+out:
+ if (op_errno_p)
+ *op_errno_p = op_errno;
return ret;
}
int
-pump_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *dict, int32_t flags)
+pump_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
-
+ afr_private_t *priv = NULL;
int ret = -1;
-
- int op_ret = -1;
int op_errno = 0;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.pump*", dict, op_errno, out);
priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- ret = pump_parse_command (frame, this,
- local, dict);
- if (ret >= 0) {
- op_ret = 0;
- goto out;
- }
-
if (!priv->use_afr_in_pump) {
STACK_WIND (frame, default_setxattr_cbk,
FIRST_CHILD (this),
(FIRST_CHILD (this))->fops->setxattr,
- loc, dict, flags);
+ loc, dict, flags, xdata);
return 0;
}
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
-
- transaction_frame->local = local;
-
- local->op_ret = -1;
-
- local->cont.setxattr.dict = dict_ref (dict);
- local->cont.setxattr.flags = flags;
-
- local->transaction.fop = afr_setxattr_wind;
- local->transaction.done = afr_setxattr_done;
- local->transaction.unwind = afr_setxattr_unwind;
-
- loc_copy (&local->loc, loc);
-
- local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
- local->transaction.len = 0;
+ ret = pump_parse_command (frame, this, dict, &op_errno);
+ if (ret >= 0)
+ goto out;
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ afr_setxattr (frame, this, loc, dict, flags, xdata);
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (setxattr, frame, op_ret, op_errno);
+ if (ret < 0) {
+ AFR_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
}
return 0;
@@ -1748,7 +1575,7 @@ static int32_t
pump_truncate (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
- off_t offset)
+ off_t offset, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -1758,11 +1585,11 @@ pump_truncate (call_frame_t *frame,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->truncate,
loc,
- offset);
+ offset, xdata);
return 0;
}
- afr_truncate (frame, this, loc, offset);
+ afr_truncate (frame, this, loc, offset, xdata);
return 0;
}
@@ -1771,7 +1598,7 @@ static int32_t
pump_ftruncate (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
- off_t offset)
+ off_t offset, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -1781,11 +1608,11 @@ pump_ftruncate (call_frame_t *frame,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->ftruncate,
fd,
- offset);
+ offset, xdata);
return 0;
}
- afr_ftruncate (frame, this, fd, offset);
+ afr_ftruncate (frame, this, fd, offset, xdata);
return 0;
}
@@ -1794,7 +1621,7 @@ pump_ftruncate (call_frame_t *frame,
int
pump_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t rdev, dict_t *parms)
+ loc_t *loc, mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -1802,10 +1629,10 @@ pump_mknod (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, default_mknod_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mknod,
- loc, mode, rdev, parms);
+ loc, mode, rdev, umask, xdata);
return 0;
}
- afr_mknod (frame, this, loc, mode, rdev, parms);
+ afr_mknod (frame, this, loc, mode, rdev, umask, xdata);
return 0;
}
@@ -1814,7 +1641,7 @@ pump_mknod (call_frame_t *frame, xlator_t *this,
int
pump_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dict_t *params)
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -1822,10 +1649,10 @@ pump_mkdir (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, default_mkdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mkdir,
- loc, mode, params);
+ loc, mode, umask, xdata);
return 0;
}
- afr_mkdir (frame, this, loc, mode, params);
+ afr_mkdir (frame, this, loc, mode, umask, xdata);
return 0;
}
@@ -1834,7 +1661,7 @@ pump_mkdir (call_frame_t *frame, xlator_t *this,
static int32_t
pump_unlink (call_frame_t *frame,
xlator_t *this,
- loc_t *loc)
+ loc_t *loc, int xflag, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -1843,10 +1670,10 @@ pump_unlink (call_frame_t *frame,
default_unlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->unlink,
- loc);
+ loc, xflag, xdata);
return 0;
}
- afr_unlink (frame, this, loc);
+ afr_unlink (frame, this, loc, xflag, xdata);
return 0;
}
@@ -1854,7 +1681,7 @@ pump_unlink (call_frame_t *frame,
static int
pump_rmdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int flags)
+ loc_t *loc, int flags, dict_t *xdata)
{
afr_private_t *priv = NULL;
@@ -1864,11 +1691,11 @@ pump_rmdir (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, default_rmdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rmdir,
- loc, flags);
+ loc, flags, xdata);
return 0;
}
- afr_rmdir (frame, this, loc, flags);
+ afr_rmdir (frame, this, loc, flags, xdata);
return 0;
}
@@ -1877,7 +1704,7 @@ pump_rmdir (call_frame_t *frame, xlator_t *this,
int
pump_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *loc, dict_t *params)
+ const char *linkpath, loc_t *loc, mode_t umask, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -1885,10 +1712,10 @@ pump_symlink (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, default_symlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->symlink,
- linkpath, loc, params);
+ linkpath, loc, umask, xdata);
return 0;
}
- afr_symlink (frame, this, linkpath, loc, params);
+ afr_symlink (frame, this, linkpath, loc, umask, xdata);
return 0;
}
@@ -1898,7 +1725,7 @@ static int32_t
pump_rename (call_frame_t *frame,
xlator_t *this,
loc_t *oldloc,
- loc_t *newloc)
+ loc_t *newloc, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -1907,10 +1734,10 @@ pump_rename (call_frame_t *frame,
default_rename_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rename,
- oldloc, newloc);
+ oldloc, newloc, xdata);
return 0;
}
- afr_rename (frame, this, oldloc, newloc);
+ afr_rename (frame, this, oldloc, newloc, xdata);
return 0;
}
@@ -1920,7 +1747,7 @@ static int32_t
pump_link (call_frame_t *frame,
xlator_t *this,
loc_t *oldloc,
- loc_t *newloc)
+ loc_t *newloc, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -1929,10 +1756,10 @@ pump_link (call_frame_t *frame,
default_link_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->link,
- oldloc, newloc);
+ oldloc, newloc, xdata);
return 0;
}
- afr_link (frame, this, oldloc, newloc);
+ afr_link (frame, this, oldloc, newloc, xdata);
return 0;
}
@@ -1941,7 +1768,7 @@ pump_link (call_frame_t *frame,
static int32_t
pump_create (call_frame_t *frame, xlator_t *this,
loc_t *loc, int32_t flags, mode_t mode,
- fd_t *fd, dict_t *params)
+ mode_t umask, fd_t *fd, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -1949,10 +1776,10 @@ pump_create (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, default_create_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->create,
- loc, flags, mode, fd, params);
+ loc, flags, mode, umask, fd, xdata);
return 0;
}
- afr_create (frame, this, loc, flags, mode, fd, params);
+ afr_create (frame, this, loc, flags, mode, umask, fd, xdata);
return 0;
}
@@ -1962,8 +1789,7 @@ static int32_t
pump_open (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
- int32_t flags, fd_t *fd,
- int32_t wbflags)
+ int32_t flags, fd_t *fd, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -1972,10 +1798,10 @@ pump_open (call_frame_t *frame,
default_open_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->open,
- loc, flags, fd, wbflags);
+ loc, flags, fd, xdata);
return 0;
}
- afr_open (frame, this, loc, flags, fd, wbflags);
+ afr_open (frame, this, loc, flags, fd, xdata);
return 0;
}
@@ -1987,8 +1813,8 @@ pump_writev (call_frame_t *frame,
fd_t *fd,
struct iovec *vector,
int32_t count,
- off_t off,
- struct iobref *iobref)
+ off_t off, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -2000,20 +1826,20 @@ pump_writev (call_frame_t *frame,
fd,
vector,
count,
- off,
- iobref);
+ off, flags,
+ iobref, xdata);
return 0;
}
- afr_writev (frame, this, fd, vector, count, off, iobref);
- return 0;
+ afr_writev (frame, this, fd, vector, count, off, flags, iobref, xdata);
+ return 0;
}
static int32_t
pump_flush (call_frame_t *frame,
xlator_t *this,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -2022,10 +1848,10 @@ pump_flush (call_frame_t *frame,
default_flush_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->flush,
- fd);
+ fd, xdata);
return 0;
}
- afr_flush (frame, this, fd);
+ afr_flush (frame, this, fd, xdata);
return 0;
}
@@ -2035,7 +1861,7 @@ static int32_t
pump_fsync (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
- int32_t flags)
+ int32_t flags, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -2045,10 +1871,10 @@ pump_fsync (call_frame_t *frame,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsync,
fd,
- flags);
+ flags, xdata);
return 0;
}
- afr_fsync (frame, this, fd, flags);
+ afr_fsync (frame, this, fd, flags, xdata);
return 0;
}
@@ -2057,7 +1883,7 @@ pump_fsync (call_frame_t *frame,
static int32_t
pump_opendir (call_frame_t *frame,
xlator_t *this,
- loc_t *loc, fd_t *fd)
+ loc_t *loc, fd_t *fd, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -2066,10 +1892,10 @@ pump_opendir (call_frame_t *frame,
default_opendir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->opendir,
- loc, fd);
+ loc, fd, xdata);
return 0;
}
- afr_opendir (frame, this, loc, fd);
+ afr_opendir (frame, this, loc, fd, xdata);
return 0;
}
@@ -2079,7 +1905,7 @@ static int32_t
pump_fsyncdir (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
- int32_t flags)
+ int32_t flags, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -2089,10 +1915,10 @@ pump_fsyncdir (call_frame_t *frame,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsyncdir,
fd,
- flags);
+ flags, xdata);
return 0;
}
- afr_fsyncdir (frame, this, fd, flags);
+ afr_fsyncdir (frame, this, fd, flags, xdata);
return 0;
}
@@ -2103,7 +1929,7 @@ pump_xattrop (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
gf_xattrop_flags_t flags,
- dict_t *dict)
+ dict_t *dict, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -2114,10 +1940,10 @@ pump_xattrop (call_frame_t *frame,
FIRST_CHILD(this)->fops->xattrop,
loc,
flags,
- dict);
+ dict, xdata);
return 0;
}
- afr_xattrop (frame, this, loc, flags, dict);
+ afr_xattrop (frame, this, loc, flags, dict, xdata);
return 0;
}
@@ -2127,7 +1953,7 @@ pump_fxattrop (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
gf_xattrop_flags_t flags,
- dict_t *dict)
+ dict_t *dict, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -2138,10 +1964,10 @@ pump_fxattrop (call_frame_t *frame,
FIRST_CHILD(this)->fops->fxattrop,
fd,
flags,
- dict);
+ dict, xdata);
return 0;
}
- afr_fxattrop (frame, this, fd, flags, dict);
+ afr_fxattrop (frame, this, fd, flags, dict, xdata);
return 0;
}
@@ -2151,9 +1977,17 @@ static int32_t
pump_removexattr (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
- const char *name)
+ const char *name, dict_t *xdata)
{
- afr_private_t *priv = NULL;
+ afr_private_t *priv = NULL;
+ int op_errno = -1;
+
+ VALIDATE_OR_GOTO (this, out);
+
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.pump*",
+ name, op_errno, out);
+
+ op_errno = 0;
priv = this->private;
if (!priv->use_afr_in_pump) {
STACK_WIND (frame,
@@ -2161,10 +1995,14 @@ pump_removexattr (call_frame_t *frame,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->removexattr,
loc,
- name);
+ name, xdata);
return 0;
}
- afr_removexattr (frame, this, loc, name);
+ afr_removexattr (frame, this, loc, name, xdata);
+
+ out:
+ if (op_errno)
+ AFR_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
return 0;
}
@@ -2176,7 +2014,7 @@ pump_readdir (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
size_t size,
- off_t off)
+ off_t off, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -2185,21 +2023,18 @@ pump_readdir (call_frame_t *frame,
default_readdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readdir,
- fd, size, off);
+ fd, size, off, xdata);
return 0;
}
- afr_readdir (frame, this, fd, size, off);
+ afr_readdir (frame, this, fd, size, off, xdata);
return 0;
}
static int32_t
-pump_readdirp (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t off)
+pump_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t off, dict_t *dict)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -2208,10 +2043,10 @@ pump_readdirp (call_frame_t *frame,
default_readdirp_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readdirp,
- fd, size, off);
+ fd, size, off, dict);
return 0;
}
- afr_readdirp (frame, this, fd, size, off);
+ afr_readdirp (frame, this, fd, size, off, dict);
return 0;
}
@@ -2242,13 +2077,24 @@ pump_release (xlator_t *this,
}
+static int32_t
+pump_forget (xlator_t *this, inode_t *inode)
+{
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ if (priv->use_afr_in_pump)
+ afr_forget (this, inode);
+
+ return 0;
+}
static int32_t
pump_setattr (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
struct iatt *stbuf,
- int32_t valid)
+ int32_t valid, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -2257,10 +2103,10 @@ pump_setattr (call_frame_t *frame,
default_setattr_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->setattr,
- loc, stbuf, valid);
+ loc, stbuf, valid, xdata);
return 0;
}
- afr_setattr (frame, this, loc, stbuf, valid);
+ afr_setattr (frame, this, loc, stbuf, valid, xdata);
return 0;
}
@@ -2271,7 +2117,7 @@ pump_fsetattr (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
struct iatt *stbuf,
- int32_t valid)
+ int32_t valid, dict_t *xdata)
{
afr_private_t *priv = NULL;
priv = this->private;
@@ -2280,10 +2126,10 @@ pump_fsetattr (call_frame_t *frame,
default_fsetattr_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->fsetattr,
- fd, stbuf, valid);
+ fd, stbuf, valid, xdata);
return 0;
}
- afr_fsetattr (frame, this, fd, stbuf, valid);
+ afr_fsetattr (frame, this, fd, stbuf, valid, xdata);
return 0;
}
@@ -2332,7 +2178,7 @@ notify (xlator_t *this, int32_t event,
child_xl = (xlator_t *) data;
- ret = afr_notify (this, event, data);
+ ret = afr_notify (this, event, data, NULL);
switch (event) {
case GF_EVENT_CHILD_DOWN:
@@ -2362,7 +2208,7 @@ int32_t
init (xlator_t *this)
{
afr_private_t * priv = NULL;
- pump_private_t *pump_priv = NULL;
+ pump_private_t *pump_priv = NULL;
int child_count = 0;
xlator_list_t * trav = NULL;
int i = 0;
@@ -2383,9 +2229,20 @@ init (xlator_t *this)
"Volume is dangling.");
}
- ALLOC_OR_GOTO (this->private, afr_private_t, out);
+ priv = GF_CALLOC (1, sizeof (afr_private_t), gf_afr_mt_afr_private_t);
+ if (!priv)
+ goto out;
- priv = this->private;
+ LOCK_INIT (&priv->lock);
+
+ child_count = xlator_subvolume_count (this);
+ if (child_count != 2) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "There should be exactly 2 children - one source "
+ "and one sink");
+ return -1;
+ }
+ priv->child_count = child_count;
priv->read_child = source_child;
priv->favorite_child = source_child;
@@ -2395,14 +2252,13 @@ init (xlator_t *this)
priv->metadata_self_heal = 1;
priv->entry_self_heal = 1;
- priv->data_self_heal_algorithm = "";
-
priv->data_self_heal_window_size = 16;
priv->data_change_log = 1;
priv->metadata_change_log = 1;
priv->entry_change_log = 1;
priv->use_afr_in_pump = 1;
+ priv->sh_readdir_size = 65536;
/* Locking options */
@@ -2411,31 +2267,6 @@ init (xlator_t *this)
and the sink.
*/
- priv->data_lock_server_count = 2;
- priv->metadata_lock_server_count = 2;
- priv->entry_lock_server_count = 2;
-
- priv->strict_readdir = _gf_false;
-
- trav = this->children;
- while (trav) {
- child_count++;
- trav = trav->next;
- }
-
- priv->wait_count = 1;
-
- if (child_count != 2) {
- gf_log (this->name, GF_LOG_ERROR,
- "There should be exactly 2 children - one source "
- "and one sink");
- return -1;
- }
- priv->child_count = child_count;
-
- LOCK_INIT (&priv->lock);
- LOCK_INIT (&priv->read_child_lock);
-
priv->child_up = GF_CALLOC (sizeof (unsigned char), child_count,
gf_afr_mt_char);
if (!priv->child_up) {
@@ -2469,7 +2300,8 @@ init (xlator_t *this)
while (i < child_count) {
priv->children[i] = trav->xlator;
- ret = gf_asprintf (&priv->pending_key[i], "%s.%s", AFR_XATTR_PREFIX,
+ ret = gf_asprintf (&priv->pending_key[i], "%s.%s",
+ AFR_XATTR_PREFIX,
trav->xlator->name);
if (-1 == ret) {
gf_log (this->name, GF_LOG_ERROR,
@@ -2482,7 +2314,12 @@ init (xlator_t *this)
i++;
}
- priv->first_lookup = 1;
+ ret = gf_asprintf (&priv->sh_domain, "%s-self-heal", this->name);
+ if (-1 == ret) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
priv->root_inode = NULL;
priv->last_event = GF_CALLOC (child_count, sizeof (*priv->last_event),
@@ -2507,13 +2344,12 @@ init (xlator_t *this)
pump_priv->resume_path = GF_CALLOC (1, PATH_MAX,
gf_afr_mt_char);
if (!pump_priv->resume_path) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
+ gf_log (this->name, GF_LOG_ERROR, "Out of memory");
ret = -1;
goto out;
}
- pump_priv->env = syncenv_new (0);
+ pump_priv->env = this->ctx->env;
if (!pump_priv->env) {
gf_log (this->name, GF_LOG_ERROR,
"Could not create new sync-environment");
@@ -2521,21 +2357,67 @@ init (xlator_t *this)
goto out;
}
+ /* keep more local here as we may need them for self-heal etc */
+ this->local_pool = mem_pool_new (afr_local_t, 128);
+ if (!this->local_pool) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ goto out;
+ }
+
priv->pump_private = pump_priv;
+ pump_priv = NULL;
- pthread_mutex_init (&priv->mutex, NULL);
- INIT_LIST_HEAD (&priv->saved_fds);
+ this->private = priv;
+ priv = NULL;
pump_change_state (this, PUMP_STATE_ABORT);
ret = 0;
out:
+
+ if (pump_priv) {
+ GF_FREE (pump_priv->resume_path);
+ LOCK_DESTROY (&pump_priv->resume_path_lock);
+ LOCK_DESTROY (&pump_priv->pump_state_lock);
+ GF_FREE (pump_priv);
+ }
+
+ if (priv) {
+ GF_FREE (priv->child_up);
+ GF_FREE (priv->children);
+ GF_FREE (priv->pending_key);
+ GF_FREE (priv->last_event);
+ LOCK_DESTROY (&priv->lock);
+ GF_FREE (priv);
+ }
+
return ret;
}
int
fini (xlator_t *this)
{
+ afr_private_t * priv = NULL;
+ pump_private_t *pump_priv = NULL;
+
+ priv = this->private;
+ this->private = NULL;
+ if (!priv)
+ goto out;
+
+ pump_priv = priv->pump_private;
+ if (!pump_priv)
+ goto afr_priv;
+
+ GF_FREE (pump_priv->resume_path);
+ LOCK_DESTROY (&pump_priv->resume_path_lock);
+ LOCK_DESTROY (&pump_priv->pump_state_lock);
+ GF_FREE (pump_priv);
+afr_priv:
+ afr_priv_destroy (priv);
+out:
return 0;
}
@@ -2583,6 +2465,7 @@ struct xlator_dumpops dumpops = {
struct xlator_cbks cbks = {
.release = pump_release,
.releasedir = pump_releasedir,
+ .forget = pump_forget,
};
struct volume_options options[] = {
diff --git a/xlators/cluster/afr/src/pump.h b/xlators/cluster/afr/src/pump.h
index 02eede49c..9d0b6db6a 100644
--- a/xlators/cluster/afr/src/pump.h
+++ b/xlators/cluster/afr/src/pump.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __PUMP_H__
@@ -26,12 +17,6 @@
#define CLIENT_CMD_CONNECT "trusted.glusterfs.client-connect"
#define CLIENT_CMD_DISCONNECT "trusted.glusterfs.client-disconnect"
-#define PUMP_CMD_START "trusted.glusterfs.pump.start"
-#define PUMP_CMD_COMMIT "trusted.glusterfs.pump.commit"
-#define PUMP_CMD_ABORT "trusted.glusterfs.pump.abort"
-#define PUMP_CMD_PAUSE "trusted.glusterfs.pump.pause"
-#define PUMP_CMD_STATUS "trusted.glusterfs.pump.status"
-
#define PUMP_SOURCE_COMPLETE "trusted.glusterfs.pump-source-complete"
#define PUMP_SINK_COMPLETE "trusted.glusterfs.pump-sink-complete"
@@ -50,7 +35,7 @@ typedef enum {
typedef struct _pump_private {
struct syncenv *env; /* The env pointer to the pump synctask */
- const char *resume_path; /* path to resume from the last pause */
+ char *resume_path; /* path to resume from the last pause */
gf_lock_t resume_path_lock; /* Synchronize resume_path changes */
gf_lock_t pump_state_lock; /* Synchronize pump_state changes */
pump_state_t pump_state; /* State of pump */
@@ -90,4 +75,7 @@ pump_command_status (xlator_t *this, dict_t *dict);
int
pump_execute_status (call_frame_t *frame, xlator_t *this);
+int
+pump_command_reply (call_frame_t *frame, xlator_t *this);
+
#endif /* __PUMP_H__ */
diff --git a/xlators/cluster/dht/src/Makefile.am b/xlators/cluster/dht/src/Makefile.am
index e35058d65..174bea841 100644
--- a/xlators/cluster/dht/src/Makefile.am
+++ b/xlators/cluster/dht/src/Makefile.am
@@ -4,7 +4,7 @@ xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
dht_common_source = dht-layout.c dht-helper.c dht-linkfile.c dht-rebalance.c \
dht-selfheal.c dht-rename.c dht-hashfn.c dht-diskusage.c \
- dht-common.c dht-inode-write.c dht-inode-read.c \
+ dht-common.c dht-inode-write.c dht-inode-read.c dht-shared.c \
$(top_builddir)/xlators/lib/src/libxlator.c
dht_la_SOURCES = $(dht_common_source) dht.c
@@ -12,22 +12,23 @@ dht_la_SOURCES = $(dht_common_source) dht.c
nufa_la_SOURCES = $(dht_common_source) nufa.c
switch_la_SOURCES = $(dht_common_source) switch.c
-dht_la_LDFLAGS = -module -avoidversion
+dht_la_LDFLAGS = -module -avoid-version
dht_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-nufa_la_LDFLAGS = -module -avoidversion
+nufa_la_LDFLAGS = -module -avoid-version
nufa_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-switch_la_LDFLAGS = -module -avoidversion
+switch_la_LDFLAGS = -module -avoid-version
switch_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = dht-common.h dht-mem-types.h \
$(top_builddir)/xlators/lib/src/libxlator.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) \
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
-I$(top_srcdir)/xlators/lib/src
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
CLEANFILES =
uninstall-local:
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index fb149e763..3868fc38f 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2009-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -31,17 +22,18 @@
#include "dht-common.h"
#include "defaults.h"
#include "byte-order.h"
+#include "glusterfs-acl.h"
#include <sys/time.h>
#include <libgen.h>
-void
+int
dht_aggregate (dict_t *this, char *key, data_t *value, void *data)
{
dict_t *dst = NULL;
int64_t *ptr = 0, *size = NULL;
int32_t ret = -1;
- data_pair_t *data_pair = NULL;
+ data_t *dict_data = NULL;
dst = data;
@@ -53,32 +45,37 @@ dht_aggregate (dict_t *this, char *key, data_t *value, void *data)
if (size == NULL) {
gf_log ("dht", GF_LOG_WARNING,
"memory allocation failed");
- return;
+ return -1;
}
ret = dict_set_bin (dst, key, size, sizeof (int64_t));
if (ret < 0) {
gf_log ("dht", GF_LOG_WARNING,
"dht aggregate dict set failed");
GF_FREE (size);
- return;
+ return -1;
}
}
ptr = data_to_bin (value);
if (ptr == NULL) {
gf_log ("dht", GF_LOG_WARNING, "data to bin failed");
- return;
+ return -1;
}
*size = hton64 (ntoh64 (*size) + ntoh64 (*ptr));
+
+ } else if (fnmatch (GF_XATTR_STIME_PATTERN, key, FNM_NOESCAPE) == 0) {
+ ret = gf_get_min_stime (THIS, dst, key, value);
+ if (ret < 0)
+ return ret;
} else {
/* compare user xattrs only */
if (!strncmp (key, "user.", strlen ("user."))) {
- ret = dict_lookup (dst, key, &data_pair);
- if (!ret && data) {
- ret = is_data_equal (data_pair->value, value);
+ ret = dict_lookup (dst, key, &dict_data);
+ if (!ret && dict_data && value) {
+ ret = is_data_equal (dict_data, value);
if (!ret)
- gf_log ("dht", GF_LOG_WARNING,
+ gf_log ("dht", GF_LOG_DEBUG,
"xattr mismatch for %s", key);
}
}
@@ -87,7 +84,7 @@ dht_aggregate (dict_t *this, char *key, data_t *value, void *data)
gf_log ("dht", GF_LOG_WARNING, "xattr dict set failed");
}
- return;
+ return 0;
}
@@ -114,7 +111,7 @@ out:
int
dht_lookup_selfheal_cbk (call_frame_t *frame, void *cookie,
xlator_t *this,
- int op_ret, int op_errno)
+ int op_ret, int op_errno, dict_t *xdata)
{
dht_local_t *local = NULL;
dht_layout_t *layout = NULL;
@@ -134,7 +131,10 @@ dht_lookup_selfheal_cbk (call_frame_t *frame, void *cookie,
ret = dht_layout_set (this, local->inode, layout);
}
- WIPE (&local->postparent);
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->postparent, 1);
+ }
DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
@@ -147,6 +147,256 @@ out:
int
+dht_discover_complete (xlator_t *this, call_frame_t *discover_frame)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
+ int op_errno = 0;
+ int ret = -1;
+ dht_layout_t *layout = NULL;
+ dht_conf_t *conf = NULL;
+
+ local = discover_frame->local;
+ layout = local->layout;
+ conf = this->private;
+
+ LOCK(&discover_frame->lock);
+ {
+ main_frame = local->main_frame;
+ local->main_frame = NULL;
+ }
+ UNLOCK(&discover_frame->lock);
+
+ if (!main_frame)
+ return 0;
+
+ if (local->file_count && local->dir_count) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "path %s exists as a file on one subvolume "
+ "and directory on another. "
+ "Please fix it manually",
+ local->loc.path);
+ op_errno = EIO;
+ goto out;
+ }
+
+ if (local->cached_subvol) {
+ ret = dht_layout_preset (this, local->cached_subvol,
+ local->inode);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set layout for subvolume %s",
+ local->cached_subvol ? local->cached_subvol->name : "<nil>");
+ op_errno = EINVAL;
+ goto out;
+ }
+ } else {
+ ret = dht_layout_normalize (this, &local->loc, layout);
+ if ((ret < 0) || ((ret > 0) && (local->op_ret != 0))) {
+ /* either the layout is incorrect or the directory is
+ * not found even in one subvolume.
+ */
+ gf_log (this->name, GF_LOG_DEBUG,
+ "normalizing failed on %s "
+ "(overlaps/holes present: %s, "
+ "ENOENT errors: %d)", local->loc.path,
+ (ret < 0) ? "yes" : "no", (ret > 0) ? ret : 0);
+ if ((ret > 0) && (ret == conf->subvolume_cnt)) {
+ op_errno = ESTALE;
+ goto out;
+ }
+ }
+
+ if (local->inode)
+ dht_layout_set (this, local->inode, layout);
+ }
+
+ DHT_STACK_UNWIND (lookup, main_frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf, local->xattr,
+ &local->postparent);
+ return 0;
+out:
+ DHT_STACK_UNWIND (lookup, main_frame, -1, op_errno, NULL, NULL, NULL,
+ NULL);
+
+ return ret;
+}
+
+
+int
+dht_discover_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+ int ret = -1;
+ int is_dir = 0;
+ int is_linkfile = 0;
+ int attempt_unwind = 0;
+ dht_conf_t *conf = 0;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, out);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", this->private, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+
+ layout = local->layout;
+
+ /* Check if the gfid is different for file from other node */
+ if (!op_ret && uuid_compare (local->gfid, stbuf->ia_gfid)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: gfid different on %s",
+ local->loc.path, prev->this->name);
+ }
+
+
+ LOCK (&frame->lock);
+ {
+ /* TODO: assert equal mode on stbuf->st_mode and
+ local->stbuf->st_mode
+
+ else mkdir/chmod/chown and fix
+ */
+ ret = dht_layout_merge (this, layout, prev->this,
+ op_ret, op_errno, xattr);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to merge layouts", local->loc.path);
+
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "lookup of %s on %s returned error (%s)",
+ local->loc.path, prev->this->name,
+ strerror (op_errno));
+
+ goto unlock;
+ }
+
+ is_linkfile = check_is_linkfile (inode, stbuf, xattr,
+ conf->link_xattr_name);
+ is_dir = check_is_dir (inode, stbuf, xattr);
+
+ if (is_dir) {
+ local->dir_count ++;
+ } else {
+ local->file_count ++;
+
+ if (!is_linkfile) {
+ /* real file */
+ local->cached_subvol = prev->this;
+ attempt_unwind = 1;
+ } else {
+ goto unlock;
+ }
+ }
+
+ local->op_ret = 0;
+
+ if (local->xattr == NULL) {
+ local->xattr = dict_ref (xattr);
+ } else {
+ dht_aggregate_xattr (local->xattr, xattr);
+ }
+
+ if (local->inode == NULL)
+ local->inode = inode_ref (inode);
+
+ dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
+ dht_iatt_merge (this, &local->postparent, postparent,
+ prev->this);
+ }
+unlock:
+ UNLOCK (&frame->lock);
+out:
+ this_call_cnt = dht_frame_return (frame);
+
+ if (is_last_call (this_call_cnt) || attempt_unwind) {
+ dht_discover_complete (this, frame);
+ }
+
+ if (is_last_call (this_call_cnt))
+ DHT_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+
+int
+dht_discover (call_frame_t *frame, xlator_t *this, loc_t *loc)
+{
+ int ret;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int call_cnt = 0;
+ int op_errno = EINVAL;
+ int i = 0;
+ call_frame_t *discover_frame = NULL;
+
+ conf = this->private;
+ local = frame->local;
+
+ ret = dict_set_uint32 (local->xattr_req, conf->xattr_name, 4 * 4);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set '%s' key",
+ loc->path, conf->xattr_name);
+
+ ret = dict_set_uint32 (local->xattr_req, conf->link_xattr_name, 256);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set '%s' key",
+ loc->path, conf->link_xattr_name);
+
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
+
+ local->layout = dht_layout_new (this, conf->subvolume_cnt);
+
+ if (!local->layout) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ uuid_copy (local->gfid, loc->gfid);
+
+ discover_frame = copy_frame (frame);
+ if (!discover_frame) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ discover_frame->local = local;
+ frame->local = NULL;
+ local->main_frame = frame;
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (discover_frame, dht_discover_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup,
+ &local->loc, local->xattr_req);
+ }
+
+ return 0;
+
+err:
+ DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL);
+
+ return 0;
+}
+
+
+int
dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno,
inode_t *inode, struct iatt *stbuf, dict_t *xattr,
@@ -191,7 +441,7 @@ dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
op_ret, op_errno, xattr);
if (op_ret == -1) {
- local->op_errno = ENOENT;
+ local->op_errno = op_errno;
gf_log (this->name, GF_LOG_DEBUG,
"lookup of %s on %s returned error (%s)",
local->loc.path, prev->this->name,
@@ -250,6 +500,11 @@ unlock:
dht_layout_set (this, local->inode, layout);
}
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->postparent, 1);
+ }
+
DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
local->inode, &local->stbuf, local->xattr,
@@ -281,6 +536,8 @@ dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int ret = -1;
int is_dir = 0;
int is_linkfile = 0;
+ call_frame_t *copy = NULL;
+ dht_local_t *copy_local = NULL;
GF_VALIDATE_OR_GOTO ("dht", frame, err);
GF_VALIDATE_OR_GOTO ("dht", this, err);
@@ -339,7 +596,8 @@ dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
layout = local->layout;
is_dir = check_is_dir (inode, stbuf, xattr);
- is_linkfile = check_is_linkfile (inode, stbuf, xattr);
+ is_linkfile = check_is_linkfile (inode, stbuf, xattr,
+ conf->link_xattr_name);
if (is_linkfile) {
gf_log (this->name, GF_LOG_INFO,
@@ -351,6 +609,23 @@ dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
if (is_dir) {
+ ret = dht_dir_has_layout (xattr, conf->xattr_name);
+ if (ret >= 0) {
+ if (is_greater_time(local->stbuf.ia_ctime,
+ local->stbuf.ia_ctime_nsec,
+ stbuf->ia_ctime,
+ stbuf->ia_ctime_nsec)) {
+ local->prebuf.ia_gid = stbuf->ia_gid;
+ local->prebuf.ia_uid = stbuf->ia_uid;
+ }
+ }
+ if (local->stbuf.ia_type != IA_INVAL)
+ {
+ if ((local->stbuf.ia_gid != stbuf->ia_gid) ||
+ (local->stbuf.ia_uid != stbuf->ia_uid)) {
+ local->need_selfheal = 1;
+ }
+ }
ret = dht_layout_dir_mismatch (this, layout,
prev->this, &local->loc,
xattr);
@@ -389,7 +664,28 @@ out:
&& (conf && conf->unhashed_sticky_bit)) {
local->stbuf.ia_prot.sticky = 1;
}
- if (local->layout_mismatch) {
+ if (local->need_selfheal) {
+ local->need_selfheal = 0;
+ uuid_copy (local->gfid, local->stbuf.ia_gfid);
+ local->stbuf.ia_gid = local->prebuf.ia_gid;
+ local->stbuf.ia_uid = local->prebuf.ia_uid;
+ copy = create_frame (this, this->ctx->pool);
+ if (copy) {
+ copy_local = dht_local_init (copy, &local->loc,
+ NULL, 0);
+ if (!copy_local)
+ goto cont;
+ copy_local->stbuf = local->stbuf;
+ copy->local = copy_local;
+ FRAME_SU_DO (copy, dht_local_t);
+ ret = synctask_new (this->ctx->env,
+ dht_dir_attr_heal,
+ dht_dir_attr_heal_done,
+ copy, copy);
+ }
+ }
+cont:
+ if (local->layout_mismatch) {
/* Found layout mismatch in the directory, need to
fix this in the inode context */
dht_layout_unref (this, local->layout);
@@ -415,7 +711,10 @@ out:
local->op_errno = ESTALE;
}
- WIPE (&local->postparent);
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->postparent, 1);
+ }
DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
@@ -433,7 +732,8 @@ dht_lookup_linkfile_create_cbk (call_frame_t *frame, void *cookie,
xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
xlator_t *cached_subvol = NULL;
@@ -450,7 +750,7 @@ dht_lookup_linkfile_create_cbk (call_frame_t *frame, void *cookie,
cached_subvol = local->cached_subvol;
conf = this->private;
- ret = dht_layout_preset (this, local->cached_subvol, inode);
+ ret = dht_layout_preset (this, local->cached_subvol, local->loc.inode);
if (ret < 0) {
gf_log (this->name, GF_LOG_DEBUG,
"failed to set layout for subvolume %s",
@@ -466,8 +766,14 @@ dht_lookup_linkfile_create_cbk (call_frame_t *frame, void *cookie,
local->stbuf.ia_prot.sticky = 1;
}
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ postparent, 1);
+ }
+
unwind:
- WIPE (&local->postparent);
+ if (local->linked == _gf_true)
+ dht_linkfile_attr_heal (frame, this);
DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
@@ -540,7 +846,10 @@ dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this)
"<nil>"));
}
- WIPE (&local->postparent);
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->postparent, 1);
+ }
DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
DHT_STACK_UNWIND (lookup, frame, local->op_ret,
@@ -570,7 +879,10 @@ dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this)
local->op_errno = EINVAL;
}
- WIPE (&local->postparent);
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->postparent, 1);
+ }
DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
DHT_STACK_UNWIND (lookup, frame, local->op_ret,
@@ -586,7 +898,7 @@ dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this)
hashed_subvol->name);
ret = dht_linkfile_create (frame,
- dht_lookup_linkfile_create_cbk,
+ dht_lookup_linkfile_create_cbk, this,
cached_subvol, hashed_subvol, &local->loc);
return ret;
@@ -596,7 +908,8 @@ dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this)
int
dht_lookup_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
int this_call_cnt = 0;
@@ -623,8 +936,9 @@ dht_lookup_everywhere_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
xlator_t *subvol = NULL;
loc_t *loc = NULL;
xlator_t *link_subvol = NULL;
- int ret = -1;
- int32_t fd_count = 0;
+ int ret = -1;
+ int32_t fd_count = 0;
+ dht_conf_t *conf = NULL;
GF_VALIDATE_OR_GOTO ("dht", frame, out);
GF_VALIDATE_OR_GOTO ("dht", this, out);
@@ -634,6 +948,7 @@ dht_lookup_everywhere_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
loc = &local->loc;
+ conf = this->private;
prev = cookie;
subvol = prev->this;
@@ -655,7 +970,8 @@ dht_lookup_everywhere_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
loc->path, prev->this->name);
}
- is_linkfile = check_is_linkfile (inode, buf, xattr);
+ is_linkfile = check_is_linkfile (inode, buf, xattr,
+ conf->link_xattr_name);
is_dir = check_is_dir (inode, buf, xattr);
if (is_linkfile) {
@@ -715,7 +1031,7 @@ unlock:
"deleting stale linkfile %s on %s",
loc->path, subvol->name);
STACK_WIND (frame, dht_lookup_unlink_cbk,
- subvol, subvol->fops->unlink, loc);
+ subvol, subvol->fops->unlink, loc, 0, NULL);
return 0;
}
}
@@ -797,7 +1113,16 @@ dht_lookup_linkfile_cbk (call_frame_t *frame, void *cookie,
gf_log (this->name, GF_LOG_INFO,
"lookup of %s on %s (following linkfile) failed (%s)",
local->loc.path, subvol->name, strerror (op_errno));
- goto err;
+
+ /* If cached subvol returned ENOTCONN, do not do
+ lookup_everywhere. We need to make sure linkfile does not get
+ removed, which can take away the namespace, and subvol is
+ anyways down. */
+
+ if (op_errno != ENOTCONN)
+ goto err;
+ else
+ goto unwind;
}
if (check_is_dir (inode, stbuf, xattr)) {
@@ -807,7 +1132,7 @@ dht_lookup_linkfile_cbk (call_frame_t *frame, void *cookie,
goto err;
}
- if (check_is_linkfile (inode, stbuf, xattr)) {
+ if (check_is_linkfile (inode, stbuf, xattr, conf->link_xattr_name)) {
gf_log (this->name, GF_LOG_INFO,
"lookup of %s on %s (following linkfile) reached link",
local->loc.path, subvol->name);
@@ -835,9 +1160,12 @@ dht_lookup_linkfile_cbk (call_frame_t *frame, void *cookie,
op_errno = EINVAL;
}
-unwind:
- WIPE (postparent);
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ postparent, 1);
+ }
+unwind:
DHT_STRIP_PHASE1_FLAGS (stbuf);
DHT_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, stbuf, xattr,
postparent);
@@ -919,7 +1247,6 @@ dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
loc_t *loc = NULL;
call_frame_t *prev = NULL;
int ret = 0;
- uint64_t tmp_layout = 0;
dht_layout_t *parent_layout = NULL;
GF_VALIDATE_OR_GOTO ("dht", frame, err);
@@ -950,8 +1277,10 @@ dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
if ((conf->search_unhashed == GF_DHT_LOOKUP_UNHASHED_AUTO) &&
(loc->parent)) {
- ret = inode_ctx_get (loc->parent, this, &tmp_layout);
- parent_layout = (dht_layout_t *)(long)tmp_layout;
+ ret = dht_inode_ctx_layout_get (loc->parent, this,
+ &parent_layout);
+ if (ret || !parent_layout)
+ goto out;
if (parent_layout->search_unhashed) {
local->op_errno = ENOENT;
dht_lookup_everywhere (frame, this, loc);
@@ -980,7 +1309,8 @@ dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- is_linkfile = check_is_linkfile (inode, stbuf, xattr);
+ is_linkfile = check_is_linkfile (inode, stbuf, xattr,
+ conf->link_xattr_name);
if (!is_linkfile) {
/* non-directory and not a linkfile */
@@ -1020,7 +1350,10 @@ out:
* from each of the subvolume. See dht_iatt_merge for reference.
*/
- WIPE (postparent);
+ if (!op_ret && local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ postparent, 1);
+ }
DHT_STRIP_PHASE1_FLAGS (stbuf);
DHT_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, stbuf, xattr,
@@ -1029,6 +1362,39 @@ err:
return 0;
}
+/* For directories, check if acl xattrs have been requested (by the acl xlator),
+ * if not, request for them. These xattrs are needed for dht dir self-heal to
+ * perform proper self-healing of dirs
+ */
+void
+dht_check_and_set_acl_xattr_req (inode_t *inode, dict_t *xattr_req)
+{
+ int ret = 0;
+
+ GF_ASSERT (inode);
+ GF_ASSERT (xattr_req);
+
+ if (inode->ia_type != IA_IFDIR)
+ return;
+
+ if (!dict_get (xattr_req, POSIX_ACL_ACCESS_XATTR)) {
+ ret = dict_set_int8 (xattr_req, POSIX_ACL_ACCESS_XATTR, 0);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set key %s",
+ POSIX_ACL_ACCESS_XATTR);
+ }
+
+ if (!dict_get (xattr_req, POSIX_ACL_DEFAULT_XATTR)) {
+ ret = dict_set_int8 (xattr_req, POSIX_ACL_DEFAULT_XATTR, 0);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set key %s",
+ POSIX_ACL_DEFAULT_XATTR);
+ }
+
+ return;
+}
int
dht_lookup (call_frame_t *frame, xlator_t *this,
@@ -1049,7 +1415,6 @@ dht_lookup (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (loc, err);
VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
conf = this->private;
if (!conf)
@@ -1086,6 +1451,12 @@ dht_lookup (call_frame_t *frame, xlator_t *this,
local->xattr_req = dict_new ();
}
+ if (uuid_is_null (loc->pargfid) && !uuid_is_null (loc->gfid) &&
+ !__is_root_gfid (loc->inode->gfid)) {
+ local->cached_subvol = NULL;
+ dht_discover (frame, this, loc);
+ return 0;
+ }
if (!hashed_subvol)
hashed_subvol = dht_subvol_get_hashed (this, loc);
@@ -1118,7 +1489,7 @@ dht_lookup (call_frame_t *frame, xlator_t *this,
* revalidates directly go to the cached-subvolume.
*/
ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht", 4 * 4);
+ conf->xattr_name, 4 * 4);
if (IA_ISDIR (local->inode->ia_type)) {
local->call_cnt = call_cnt = conf->subvolume_cnt;
@@ -1138,6 +1509,9 @@ dht_lookup (call_frame_t *frame, xlator_t *this,
ret = dict_set_uint32 (local->xattr_req,
GLUSTERFS_OPEN_FD_COUNT, 4);
+ /* need it for dir self-heal */
+ dht_check_and_set_acl_xattr_req (loc->inode, local->xattr_req);
+
for (i = 0; i < call_cnt; i++) {
subvol = layout->list[i].xlator;
@@ -1150,16 +1524,19 @@ dht_lookup (call_frame_t *frame, xlator_t *this,
do_fresh_lookup:
/* TODO: remove the hard-coding */
ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht", 4 * 4);
+ conf->xattr_name, 4 * 4);
ret = dict_set_uint32 (local->xattr_req,
- DHT_LINKFILE_KEY, 256);
+ conf->link_xattr_name, 256);
/* need it for self-healing linkfiles which is
'in-migration' state */
ret = dict_set_uint32 (local->xattr_req,
GLUSTERFS_OPEN_FD_COUNT, 4);
+ /* need it for dir self-heal */
+ dht_check_and_set_acl_xattr_req (loc->inode, local->xattr_req);
+
if (!hashed_subvol) {
gf_log (this->name, GF_LOG_DEBUG,
"no subvolume in layout for path=%s, "
@@ -1193,7 +1570,8 @@ dht_lookup (call_frame_t *frame, xlator_t *this,
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL);
return 0;
}
@@ -1201,7 +1579,7 @@ err:
int
dht_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
dht_local_t *local = NULL;
call_frame_t *prev = NULL;
@@ -1225,14 +1603,18 @@ dht_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->postparent = *postparent;
local->preparent = *preparent;
- WIPE (&local->postparent);
- WIPE (&local->preparent);
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->preparent, 0);
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->postparent, 1);
+ }
}
unlock:
UNLOCK (&frame->lock);
DHT_STACK_UNWIND (unlink, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent);
+ &local->preparent, &local->postparent, NULL);
return 0;
}
@@ -1241,7 +1623,7 @@ unlock:
int
dht_unlink_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
dht_local_t *local = NULL;
call_frame_t *prev = NULL;
@@ -1253,7 +1635,8 @@ dht_unlink_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
LOCK (&frame->lock);
{
- if (op_ret == -1) {
+ if ((op_ret == -1) && !((op_errno == ENOENT) ||
+ (op_errno == ENOTCONN))) {
local->op_errno = op_errno;
gf_log (this->name, GF_LOG_DEBUG,
"subvolume %s returned -1 (%s)",
@@ -1266,7 +1649,7 @@ dht_unlink_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
unlock:
UNLOCK (&frame->lock);
- if (op_ret == -1)
+ if (local->op_ret == -1)
goto err;
cached_subvol = dht_subvol_get_cached (this, local->loc.inode);
@@ -1280,53 +1663,19 @@ unlock:
STACK_WIND (frame, dht_unlink_cbk,
cached_subvol, cached_subvol->fops->unlink,
- &local->loc);
+ &local->loc, local->flags, NULL);
return 0;
err:
DHT_STACK_UNWIND (unlink, frame, -1, local->op_errno,
- NULL, NULL);
- return 0;
-}
-
-static int
-dht_ufo_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno)
-{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
-
- local = frame->local;
- prev = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_ret = -1;
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto unlock;
- }
- }
-unlock:
- UNLOCK (&frame->lock);
-
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- DHT_STACK_UNWIND (setxattr, frame, local->op_ret, local->op_errno);
- }
-
+ NULL, NULL, NULL);
return 0;
}
-
int
dht_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno)
+ int op_ret, int op_errno, dict_t *xdata)
{
dht_local_t *local = NULL;
int this_call_cnt = 0;
@@ -1352,7 +1701,8 @@ unlock:
this_call_cnt = dht_frame_return (frame);
if (is_last_call (this_call_cnt)) {
- DHT_STACK_UNWIND (setxattr, frame, local->op_ret, local->op_errno);
+ DHT_STACK_UNWIND (setxattr, frame, local->op_ret,
+ local->op_errno, NULL);
}
return 0;
@@ -1375,114 +1725,252 @@ fill_layout_info (dht_layout_t *layout, char *buf)
}
}
+void
+dht_fill_pathinfo_xattr (xlator_t *this, dht_local_t *local,
+ char *xattr_buf, int32_t alloc_len,
+ int flag, char *layout_buf)
+{
+ if (flag && local->xattr_val)
+ snprintf (xattr_buf, alloc_len,
+ "((<"DHT_PATHINFO_HEADER"%s> %s) (%s-layout %s))",
+ this->name, local->xattr_val, this->name,
+ layout_buf);
+ else if (local->xattr_val)
+ snprintf (xattr_buf, alloc_len,
+ "(<"DHT_PATHINFO_HEADER"%s> %s)",
+ this->name, local->xattr_val);
+ else if (flag)
+ snprintf (xattr_buf, alloc_len, "(%s-layout %s)",
+ this->name, layout_buf);
+}
+
int
-dht_pathinfo_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xattr)
+dht_vgetxattr_alloc_and_fill (dht_local_t *local, dict_t *xattr, xlator_t *this,
+ int op_errno)
+{
+ int ret = -1;
+ char *value = NULL;
+ int32_t plen = 0;
+
+ ret = dict_get_str (xattr, local->xsel, &value);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Subvolume %s returned -1 (%s)", this->name,
+ strerror (op_errno));
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ goto out;
+ }
+
+ local->alloc_len += strlen(value);
+
+ if (!local->xattr_val) {
+ local->alloc_len += (strlen (DHT_PATHINFO_HEADER) + 10);
+ local->xattr_val = GF_CALLOC (local->alloc_len, sizeof (char),
+ gf_common_mt_char);
+ if (!local->xattr_val) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if (local->xattr_val) {
+ plen = strlen (local->xattr_val);
+ if (plen) {
+ /* extra byte(s) for \0 to be safe */
+ local->alloc_len += (plen + 2);
+ local->xattr_val = GF_REALLOC (local->xattr_val,
+ local->alloc_len);
+ if (!local->xattr_val) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ (void) strcat (local->xattr_val, value);
+ (void) strcat (local->xattr_val, " ");
+ local->op_ret = 0;
+ }
+
+ ret = 0;
+
+ out:
+ return ret;
+}
+
+int
+dht_vgetxattr_fill_and_set (dht_local_t *local, dict_t **dict, xlator_t *this,
+ gf_boolean_t flag)
+{
+ int ret = -1;
+ char *xattr_buf = NULL;
+ char layout_buf[8192] = {0,};
+
+ if (flag)
+ fill_layout_info (local->layout, layout_buf);
+
+ *dict = dict_new ();
+ if (!*dict)
+ goto out;
+
+ local->xattr_val[strlen (local->xattr_val) - 1] = '\0';
+
+ /* we would need max this many bytes to create xattr string
+ * extra 40 bytes is just an estimated amount of additional
+ * space required as we include translator name and some
+ * spaces, brackets etc. when forming the pathinfo string.
+ *
+ * For node-uuid we just don't have all the pretty formatting,
+ * but since this is a generic routine for pathinfo & node-uuid
+ * we dont have conditional space allocation and try to be
+ * generic
+ */
+ local->alloc_len += (2 * strlen (this->name))
+ + strlen (layout_buf)
+ + 40;
+ xattr_buf = GF_CALLOC (local->alloc_len, sizeof (char),
+ gf_common_mt_char);
+ if (!xattr_buf)
+ goto out;
+
+ if (XATTR_IS_PATHINFO (local->xsel)) {
+ (void) dht_fill_pathinfo_xattr (this, local, xattr_buf,
+ local->alloc_len, flag,
+ layout_buf);
+ } else if (XATTR_IS_NODE_UUID (local->xsel)) {
+ (void) snprintf (xattr_buf, local->alloc_len, "%s",
+ local->xattr_val);
+ } else {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Unknown local->xsel (%s)", local->xsel);
+ GF_FREE (xattr_buf);
+ goto out;
+ }
+
+ ret = dict_set_dynstr (*dict, local->xsel, xattr_buf);
+ if (ret)
+ GF_FREE (xattr_buf);
+ GF_FREE (local->xattr_val);
+
+ out:
+ return ret;
+}
+
+int
+dht_vgetxattr_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
{
- dht_local_t *local = NULL;
int ret = 0;
- int flag = 0;
+ dht_local_t *local = NULL;
int this_call_cnt = 0;
- char *value_got = NULL;
- char layout_buf[8192] = {0,};
- char *xattr_buf = NULL;
dict_t *dict = NULL;
- int32_t alloc_len = 0;
- int32_t plen = 0;
- local = frame->local;
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (frame->local, out);
- if (op_ret != -1) {
- ret = dict_get_str (xattr, GF_XATTR_PATHINFO_KEY, &value_got);
- if (!ret) {
- alloc_len = strlen (value_got);
+ local = frame->local;
- /**
- * allocate the buffer:- we allocate 10 bytes extra in case we need to
- * append ' Link: ' in the buffer for another STACK_WIND
- */
- if (!local->pathinfo) {
- alloc_len += (strlen (DHT_PATHINFO_HEADER) + 10);
- local->pathinfo = GF_CALLOC (alloc_len, sizeof (char), gf_common_mt_char);
+ LOCK (&frame->lock);
+ {
+ this_call_cnt = --local->call_cnt;
+ if (op_ret < 0) {
+ if (op_errno != ENOTCONN) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "getxattr err (%s) for dir",
+ strerror (op_errno));
+ local->op_ret = -1;
+ local->op_errno = op_errno;
}
- if (local->pathinfo) {
- plen = strlen (local->pathinfo);
- if (plen) {
- /* extra byte(s) for \0 to be safe */
- alloc_len += (plen + 2);
- local->pathinfo = GF_REALLOC (local->pathinfo,
- alloc_len);
- if (!local->pathinfo)
- goto out;
- }
-
- strcat (local->pathinfo, value_got);
- }
+ goto unlock;
}
+
+ ret = dht_vgetxattr_alloc_and_fill (local, xattr, this,
+ op_errno);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "alloc or fill failure");
}
+ unlock:
+ UNLOCK (&frame->lock);
- out:
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- if (local->layout->cnt > 1) {
- /* Set it for directory */
- fill_layout_info (local->layout, layout_buf);
- flag = 1;
- }
+ if (!is_last_call (this_call_cnt))
+ goto out;
- dict = dict_new ();
+ /* -- last call: do patch ups -- */
- /* we would need max-to-max this many bytes to create pathinfo string */
- alloc_len += (2 * strlen (this->name)) + strlen (layout_buf) + 40;
- xattr_buf = GF_CALLOC (alloc_len, sizeof (char), gf_common_mt_char);
+ if (local->op_ret == -1) {
+ goto unwind;
+ }
- if (flag && local->pathinfo)
- snprintf (xattr_buf, alloc_len, "((<"DHT_PATHINFO_HEADER"%s> %s) (%s-layout %s))",
- this->name, local->pathinfo, this->name,
- layout_buf);
- else if (local->pathinfo)
- snprintf (xattr_buf, alloc_len, "(<"DHT_PATHINFO_HEADER"%s> %s)",
- this->name, local->pathinfo);
- else if (flag)
- snprintf (xattr_buf, alloc_len, "(%s-layout %s)",
- this->name, layout_buf);
+ ret = dht_vgetxattr_fill_and_set (local, &dict, this, _gf_true);
+ if (ret)
+ goto unwind;
- ret = dict_set_dynstr (dict, GF_XATTR_PATHINFO_KEY,
- xattr_buf);
+ DHT_STACK_UNWIND (getxattr, frame, 0, 0, dict, xdata);
+ goto cleanup;
- if (local->pathinfo)
- GF_FREE (local->pathinfo);
+ unwind:
+ DHT_STACK_UNWIND (getxattr, frame, -1, local->op_errno, NULL, NULL);
+ cleanup:
+ if (dict)
+ dict_unref (dict);
+ out:
+ return 0;
+}
- DHT_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict);
+int
+dht_vgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int ret = 0;
+ dict_t *dict = NULL;
+ call_frame_t *prev = NULL;
+ gf_boolean_t flag = _gf_true;
- if (dict)
- dict_unref (dict);
+ local = frame->local;
+ prev = cookie;
- return 0;
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_ERROR, "Subvolume %s returned -1 "
+ "(%s)", prev->this->name, strerror (op_errno));
+ goto unwind;
}
- if (local->pathinfo)
- strcat (local->pathinfo, " Link: ");
- if (local->hashed_subvol) {
- /* This will happen if there pending */
- STACK_WIND (frame, dht_pathinfo_getxattr_cbk, local->hashed_subvol,
- local->hashed_subvol->fops->getxattr,
- &local->loc, local->key);
-
- return 0;
+ ret = dht_vgetxattr_alloc_and_fill (local, xattr, this,
+ op_errno);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "alloc or fill failure");
+ goto unwind;
}
- gf_log ("this->name", GF_LOG_ERROR, "Unable to find hashed_subvol for path"
- " %s", local->pathinfo);
+ flag = (local->layout->cnt > 1) ? _gf_true : _gf_false;
+
+ ret = dht_vgetxattr_fill_and_set (local, &dict, this, flag);
+ if (ret)
+ goto unwind;
+
+ DHT_STACK_UNWIND (getxattr, frame, 0, 0, dict, xdata);
+ goto cleanup;
+
+ unwind:
+ DHT_STACK_UNWIND (getxattr, frame, -1, local->op_errno,
+ NULL, NULL);
+ cleanup:
+ if (dict)
+ dict_unref (dict);
- DHT_STACK_UNWIND (getxattr, frame, -1, op_errno, dict);
return 0;
}
int
dht_linkinfo_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xattr)
+ int op_ret, int op_errno, dict_t *xattr,
+ dict_t *xdata)
{
int ret = 0;
char *value = NULL;
@@ -1497,21 +1985,24 @@ dht_linkinfo_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
}
- DHT_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr);
+ DHT_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr, xdata);
return 0;
}
int
dht_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xattr)
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
{
int this_call_cnt = 0;
dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (frame->local, out);
+ VALIDATE_OR_GOTO (this->private, out);
+ conf = this->private;
local = frame->local;
this_call_cnt = dht_frame_return (frame);
@@ -1519,42 +2010,106 @@ dht_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (!xattr || (op_ret == -1))
goto out;
- if (dict_get (xattr, "trusted.glusterfs.dht")) {
- dict_del (xattr, "trusted.glusterfs.dht");
+ if (dict_get (xattr, conf->xattr_name)) {
+ dict_del (xattr, conf->xattr_name);
+ }
+
+ if (frame->root->pid >= 0 ) {
+ GF_REMOVE_INTERNAL_XATTR("trusted.glusterfs.quota*", xattr);
+ GF_REMOVE_INTERNAL_XATTR("trusted.pgfid*", xattr);
}
+
local->op_ret = 0;
if (!local->xattr) {
local->xattr = dict_copy_with_ref (xattr, NULL);
} else {
- /* first aggregate everything into xattr and then copy into
- * local->xattr. This is required as we want to have
- * 'local->xattr' as the proper final dictionary passed above
- * distribute xlator.
- */
- dht_aggregate_xattr (xattr, local->xattr);
- local->xattr = dict_copy (xattr, local->xattr);
+ dht_aggregate_xattr (local->xattr, xattr);
}
out:
if (is_last_call (this_call_cnt)) {
- DHT_STACK_UNWIND (getxattr, frame, local->op_ret, op_errno, local->xattr);
+ DHT_STACK_UNWIND (getxattr, frame, local->op_ret, op_errno,
+ local->xattr, NULL);
}
return 0;
}
int32_t
dht_getxattr_unwind (call_frame_t *frame,
- int op_ret, int op_errno, dict_t *dict)
+ int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
{
- DHT_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict);
+ DHT_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
return 0;
}
int
+dht_getxattr_get_real_filename_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ dict_t *xattr, dict_t *xdata)
+{
+ int this_call_cnt = 0;
+ dht_local_t *local = NULL;
+
+
+ local = frame->local;
+
+ if (op_ret != -1) {
+ if (local->xattr)
+ dict_unref (local->xattr);
+ local->xattr = dict_ref (xattr);
+
+ if (local->xattr_req)
+ dict_unref (local->xattr_req);
+ local->xattr_req = dict_ref (xdata);
+ }
+
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ DHT_STACK_UNWIND (getxattr, frame, local->op_ret, op_errno,
+ local->xattr, local->xattr_req);
+ }
+
+ return 0;
+}
+
+
+int
+dht_getxattr_get_real_filename (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *key, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int i = 0;
+ dht_layout_t *layout = NULL;
+ int cnt = 0;
+ xlator_t *subvol = NULL;
+
+
+ local = frame->local;
+ layout = local->layout;
+
+ cnt = local->call_cnt = layout->cnt;
+
+ local->op_ret = -1;
+ local->op_errno = ENODATA;
+
+ for (i = 0; i < cnt; i++) {
+ subvol = layout->list[i].xlator;
+ STACK_WIND (frame, dht_getxattr_get_real_filename_cbk,
+ subvol, subvol->fops->getxattr,
+ loc, key, xdata);
+ }
+
+ return 0;
+}
+
+
+int
dht_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *key)
+ loc_t *loc, const char *key, dict_t *xdata)
+#define DHT_IS_DIR(layout) (layout->cnt > 1)
{
+
xlator_t *subvol = NULL;
xlator_t *hashed_subvol = NULL;
xlator_t *cached_subvol = NULL;
@@ -1570,7 +2125,6 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (loc, err);
VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
VALIDATE_OR_GOTO (this->private, err);
conf = this->private;
@@ -1598,24 +2152,67 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
}
}
- if (key && (strcmp (key, GF_XATTR_PATHINFO_KEY) == 0)) {
- hashed_subvol = dht_subvol_get_hashed (this, loc);
+ if (key &&
+ (strncmp (key, GF_XATTR_GET_REAL_FILENAME_KEY,
+ strlen (GF_XATTR_GET_REAL_FILENAME_KEY)) == 0)
+ && DHT_IS_DIR(layout)) {
+ dht_getxattr_get_real_filename (frame, this, loc, key, xdata);
+ return 0;
+ }
+
+ /* for file use cached subvolume (obviously!): see if {}
+ * below
+ * for directory:
+ * wind to all subvolumes and exclude subvolumes which
+ * return ENOTCONN (in callback)
+ *
+ * NOTE: Don't trust inode here, as that may not be valid
+ * (until inode_link() happens)
+ */
+ if (key && DHT_IS_DIR(layout) &&
+ (XATTR_IS_PATHINFO (key)
+ || (strcmp (key, GF_XATTR_NODE_UUID_KEY) == 0))) {
+ (void) strncpy (local->xsel, key, 256);
+ cnt = local->call_cnt = layout->cnt;
+ for (i = 0; i < cnt; i++) {
+ subvol = layout->list[i].xlator;
+ STACK_WIND (frame, dht_vgetxattr_dir_cbk,
+ subvol, subvol->fops->getxattr,
+ loc, key, NULL);
+ }
+ return 0;
+ }
+
+ /* node-uuid or pathinfo for files */
+ if (key && ((strcmp (key, GF_XATTR_NODE_UUID_KEY) == 0)
+ || XATTR_IS_PATHINFO (key))) {
cached_subvol = local->cached_subvol;
+ (void) strncpy (local->xsel, key, 256);
local->call_cnt = 1;
- if (hashed_subvol != cached_subvol) {
- local->call_cnt = 2;
- local->hashed_subvol = hashed_subvol;
- }
-
- STACK_WIND (frame, dht_pathinfo_getxattr_cbk, cached_subvol,
- cached_subvol->fops->getxattr, loc, key);
+ STACK_WIND (frame, dht_vgetxattr_cbk, cached_subvol,
+ cached_subvol->fops->getxattr, loc, key, NULL);
return 0;
}
+
if (key && (strcmp (key, GF_XATTR_LINKINFO_KEY) == 0)) {
hashed_subvol = dht_subvol_get_hashed (this, loc);
+ if (!hashed_subvol) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get"
+ "hashed subvol for %s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
cached_subvol = dht_subvol_get_cached (this, loc->inode);
+ if (!cached_subvol) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get"
+ "cached subvol for %s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
if (hashed_subvol == cached_subvol) {
op_errno = ENODATA;
goto err;
@@ -1623,7 +2220,7 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
if (hashed_subvol) {
STACK_WIND (frame, dht_linkinfo_getxattr_cbk, hashed_subvol,
hashed_subvol->fops->getxattr, loc,
- GF_XATTR_PATHINFO_KEY);
+ GF_XATTR_PATHINFO_KEY, NULL);
return 0;
}
op_errno = ENODATA;
@@ -1631,13 +2228,13 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
}
if (key && (!strcmp (GF_XATTR_MARKER_KEY, key))
- && (-1 == frame->root->pid)) {
-
- if (loc->inode-> ia_type == IA_IFDIR) {
+ && (GF_CLIENT_PID_GSYNCD == frame->root->pid)) {
+ if (DHT_IS_DIR(layout)) {
cnt = layout->cnt;
} else {
cnt = 1;
}
+
sub_volumes = alloca ( cnt * sizeof (xlator_t *));
for (i = 0; i < cnt; i++)
*(sub_volumes + i) = layout->list[i].xlator;
@@ -1645,7 +2242,8 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
if (cluster_getmarkerattr (frame, this, loc, key,
local, dht_getxattr_unwind,
sub_volumes, cnt,
- MARKER_UUID_TYPE, conf->vol_uuid)) {
+ MARKER_UUID_TYPE, marker_uuid_default_gauge,
+ conf->vol_uuid)) {
op_errno = EINVAL;
goto err;
}
@@ -1653,10 +2251,22 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
return 0;
}
+ if (key && !strcmp (GF_XATTR_QUOTA_LIMIT_LIST, key)) {
+ /* quota hardlimit and aggregated size of a directory is stored
+ * in inode contexts of each brick. Hence its good enough that
+ * we send getxattr for this key to any brick.
+ */
+ local->call_cnt = 1;
+ subvol = dht_first_up_subvol (this);
+ STACK_WIND (frame, dht_getxattr_cbk, subvol,
+ subvol->fops->getxattr, loc, key, xdata);
+ return 0;
+ }
+
if (key && *conf->vol_uuid) {
if ((match_uuid_local (key, conf->vol_uuid) == 0) &&
- (-1 == frame->root->pid)) {
- if (loc->inode-> ia_type == IA_IFDIR) {
+ (GF_CLIENT_PID_GSYNCD == frame->root->pid)) {
+ if (DHT_IS_DIR(layout)) {
cnt = layout->cnt;
} else {
cnt = 1;
@@ -1669,6 +2279,7 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
local, dht_getxattr_unwind,
sub_volumes, cnt,
MARKER_XTIME_TYPE,
+ marker_xtime_default_gauge,
conf->vol_uuid)) {
op_errno = EINVAL;
goto err;
@@ -1678,7 +2289,7 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
}
}
- if (loc->inode-> ia_type == IA_IFDIR) {
+ if (DHT_IS_DIR(layout)) {
cnt = local->call_cnt = layout->cnt;
} else {
cnt = local->call_cnt = 1;
@@ -1688,29 +2299,101 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
subvol = layout->list[i].xlator;
STACK_WIND (frame, dht_getxattr_cbk,
subvol, subvol->fops->getxattr,
- loc, key);
+ loc, key, NULL);
+ }
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+#undef DHT_IS_DIR
+
+int
+dht_fgetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *key, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int op_errno = -1;
+ int i = 0;
+ int cnt = 0;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
+ VALIDATE_OR_GOTO (this->private, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FGETXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "layout is NULL");
+ op_errno = ENOENT;
+ goto err;
+ }
+
+ if (key) {
+ local->key = gf_strdup (key);
+ if (!local->key) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ }
+
+ if ((fd->inode->ia_type == IA_IFDIR)
+ && key
+ && (strncmp (key, GF_XATTR_LOCKINFO_KEY,
+ strlen (GF_XATTR_LOCKINFO_KEY) != 0))) {
+ cnt = local->call_cnt = layout->cnt;
+ } else {
+ cnt = local->call_cnt = 1;
+ }
+
+ for (i = 0; i < cnt; i++) {
+ subvol = layout->list[i].xlator;
+ STACK_WIND (frame, dht_getxattr_cbk,
+ subvol, subvol->fops->fgetxattr,
+ fd, key, NULL);
}
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL);
+ DHT_STACK_UNWIND (fgetxattr, frame, -1, op_errno, NULL, NULL);
return 0;
}
int
dht_fsetxattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, dict_t *xattr, int flags)
+ fd_t *fd, dict_t *xattr, int flags, dict_t *xdata)
{
xlator_t *subvol = NULL;
dht_local_t *local = NULL;
int op_errno = EINVAL;
+ dht_conf_t *conf = NULL;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (fd, err);
VALIDATE_OR_GOTO (fd->inode, err);
+ VALIDATE_OR_GOTO (this->private, err);
+
+ conf = this->private;
+
+ GF_IF_INTERNAL_XATTR_GOTO (conf->wild_xattr_name, xattr,
+ op_errno, err);
local = dht_local_init (frame, NULL, fd, GF_FOP_FSETXATTR);
if (!local) {
@@ -1729,13 +2412,13 @@ dht_fsetxattr (call_frame_t *frame, xlator_t *this,
local->call_cnt = 1;
STACK_WIND (frame, dht_err_cbk, subvol, subvol->fops->fsetxattr,
- fd, xattr, flags);
+ fd, xattr, flags, NULL);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fsetxattr, frame, -1, op_errno);
+ DHT_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL);
return 0;
}
@@ -1743,16 +2426,18 @@ err:
static int
dht_common_setxattr_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
{
- DHT_STACK_UNWIND (setxattr, frame, op_ret, op_errno);
+ DHT_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata);
return 0;
}
int
dht_checking_pathinfo_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xattr)
+ int op_ret, int op_errno, dict_t *xattr,
+ dict_t *xdata)
{
int i = -1;
int ret = -1;
@@ -1784,7 +2469,7 @@ dht_checking_pathinfo_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
out:
this_call_cnt = dht_frame_return (frame);
if (is_last_call (this_call_cnt)) {
- DHT_STACK_UNWIND (setxattr, frame, local->op_ret, ENOTSUP);
+ DHT_STACK_UNWIND (setxattr, frame, local->op_ret, ENOTSUP, NULL);
}
return 0;
@@ -1792,7 +2477,7 @@ out:
int
dht_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr, int flags)
+ loc_t *loc, dict_t *xattr, int flags, dict_t *xdata)
{
xlator_t *subvol = NULL;
dht_local_t *local = NULL;
@@ -1804,17 +2489,19 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,
data_t *tmp = NULL;
uint32_t dir_spread = 0;
char value[4096] = {0,};
- int forced_rebalance = 0;
+ gf_dht_migrate_data_type_t forced_rebalance = GF_DHT_MIGRATE_DATA;
int call_cnt = 0;
-
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (loc, err);
VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
conf = this->private;
+
+ GF_IF_INTERNAL_XATTR_GOTO (conf->wild_xattr_name, xattr,
+ op_errno, err);
+
local = dht_local_init (frame, loc, NULL, GF_FOP_SETXATTR);
if (!local) {
op_errno = ENOMEM;
@@ -1839,25 +2526,6 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,
local->call_cnt = call_cnt = layout->cnt;
- /* This key is sent by Unified File and Object storage
- * to test xattr support in backend.
- */
- tmp = dict_get (xattr, "user.ufo-test");
- if (tmp) {
- if (IA_ISREG (loc->inode->ia_type)) {
- op_errno = ENOTSUP;
- goto err;
- }
- local->op_ret = 0;
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND (frame, dht_ufo_xattr_cbk,
- layout->list[i].xlator,
- layout->list[i].xlator->fops->setxattr,
- loc, xattr, flags);
- }
- return 0;
- }
-
tmp = dict_get (xattr, "distribute.migrate-data");
if (tmp) {
if (IA_ISDIR (loc->inode->ia_type)) {
@@ -1869,9 +2537,20 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,
(ie, 'target' subvolume given there, etc) */
memcpy (value, tmp->data, tmp->len);
if (strcmp (value, "force") == 0)
- forced_rebalance = 1;
+ forced_rebalance =
+ GF_DHT_MIGRATE_DATA_EVEN_IF_LINK_EXISTS;
+
+ if (conf->decommission_in_progress)
+ forced_rebalance = GF_DHT_MIGRATE_HARDLINK;
local->rebalance.target_node = dht_subvol_get_hashed (this, loc);
+ if (!local->rebalance.target_node) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get "
+ "hashed subvol for %s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
local->rebalance.from_subvol = local->cached_subvol;
if (local->rebalance.target_node == local->rebalance.from_subvol) {
@@ -1911,7 +2590,7 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, dht_checking_pathinfo_cbk,
conf->subvolumes[i],
conf->subvolumes[i]->fops->getxattr,
- loc, GF_XATTR_PATHINFO_KEY);
+ loc, GF_XATTR_PATHINFO_KEY, NULL);
}
return 0;
}
@@ -1921,9 +2600,13 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,
gf_log (this->name, GF_LOG_INFO,
"fixing the layout of %s", loc->path);
- dht_fix_directory_layout (frame, dht_common_setxattr_cbk,
- layout);
- return 0;
+ ret = dht_fix_directory_layout (frame, dht_common_setxattr_cbk,
+ layout);
+ if (ret) {
+ op_errno = ENOTCONN;
+ goto err;
+ }
+ return ret;
}
tmp = dict_get (xattr, "distribute.directory-spread-count");
@@ -1935,10 +2618,14 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,
(dir_spread > 0))) {
layout->spread_cnt = dir_spread;
- dht_fix_directory_layout (frame,
- dht_common_setxattr_cbk,
- layout);
- return 0;
+ ret = dht_fix_directory_layout (frame,
+ dht_common_setxattr_cbk,
+ layout);
+ if (ret) {
+ op_errno = ENOTCONN;
+ goto err;
+ }
+ return ret;
}
gf_log (this->name, GF_LOG_ERROR,
"wrong 'directory-spread-count' value (%s)", value);
@@ -1950,14 +2637,14 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, dht_err_cbk,
layout->list[i].xlator,
layout->list[i].xlator->fops->setxattr,
- loc, xattr, flags);
+ loc, xattr, flags, xdata);
}
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (setxattr, frame, -1, op_errno);
+ DHT_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
return 0;
}
@@ -1965,7 +2652,7 @@ err:
int
dht_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno)
+ int op_ret, int op_errno, dict_t *xdata)
{
dht_local_t *local = NULL;
int this_call_cnt = 0;
@@ -1991,7 +2678,8 @@ unlock:
this_call_cnt = dht_frame_return (frame);
if (is_last_call (this_call_cnt)) {
- DHT_STACK_UNWIND (removexattr, frame, local->op_ret, local->op_errno);
+ DHT_STACK_UNWIND (removexattr, frame, local->op_ret,
+ local->op_errno, NULL);
}
return 0;
@@ -2000,21 +2688,27 @@ unlock:
int
dht_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *key)
+ loc_t *loc, const char *key, dict_t *xdata)
{
xlator_t *subvol = NULL;
int op_errno = -1;
dht_local_t *local = NULL;
dht_layout_t *layout = NULL;
int call_cnt = 0;
+ dht_conf_t *conf = NULL;
int i;
- VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (this->private, err);
+
+ conf = this->private;
+
+ GF_IF_NATIVE_XATTR_GOTO (conf->wild_xattr_name, key, op_errno, err);
+
+ VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (loc, err);
VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
local = dht_local_init (frame, loc, NULL, GF_FOP_REMOVEXATTR);
if (!local) {
@@ -2045,14 +2739,78 @@ dht_removexattr (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, dht_removexattr_cbk,
layout->list[i].xlator,
layout->list[i].xlator->fops->removexattr,
- loc, key);
+ loc, key, NULL);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+int
+dht_fremovexattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *key, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int call_cnt = 0;
+ dht_conf_t *conf = 0;
+
+ int i;
+
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (this->private, err);
+
+ conf = this->private;
+
+ GF_IF_NATIVE_XATTR_GOTO (conf->wild_xattr_name, key, op_errno, err);
+
+ VALIDATE_OR_GOTO (frame, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FREMOVEXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for inode=%s",
+ uuid_utoa (fd->inode->gfid));
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!local->layout) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no layout for inode=%s", uuid_utoa (fd->inode->gfid));
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->call_cnt = call_cnt = layout->cnt;
+ local->key = gf_strdup (key);
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (frame, dht_removexattr_cbk,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->fremovexattr,
+ fd, key, NULL);
}
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (removexattr, frame, -1, op_errno);
+ DHT_STACK_UNWIND (fremovexattr, frame, -1, op_errno, NULL);
return 0;
}
@@ -2060,7 +2818,7 @@ err:
int
dht_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, fd_t *fd)
+ int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
{
dht_local_t *local = NULL;
int this_call_cnt = 0;
@@ -2087,7 +2845,7 @@ unlock:
this_call_cnt = dht_frame_return (frame);
if (is_last_call (this_call_cnt))
DHT_STACK_UNWIND (open, frame, local->op_ret, local->op_errno,
- local->fd);
+ local->fd, NULL);
return 0;
}
@@ -2117,13 +2875,18 @@ dht_normalize_stats (struct statvfs *buf, unsigned long bsize,
int
dht_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct statvfs *statvfs)
+ int op_ret, int op_errno, struct statvfs *statvfs, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- int bsize = 0;
- int frsize = 0;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ int bsize = 0;
+ int frsize = 0;
+ int8_t quota_deem_statfs = 0;
+ GF_UNUSED int ret = 0;
+ unsigned long new_usage = 0;
+ unsigned long cur_usage = 0;
+ ret = dict_get_int8 (xdata, "quota-deem-statfs", &quota_deem_statfs);
local = frame->local;
@@ -2133,8 +2896,22 @@ dht_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->op_errno = op_errno;
goto unlock;
}
+ if (!statvfs) {
+ op_errno = EINVAL;
+ local->op_ret = -1;
+ goto unlock;
+ }
local->op_ret = 0;
+ if (quota_deem_statfs) {
+ new_usage = statvfs->f_blocks - statvfs->f_bfree;
+ cur_usage = local->statvfs.f_blocks - local->statvfs.f_bfree;
+ /* We take the maximux of the usage from the subvols */
+ if (new_usage >= cur_usage)
+ local->statvfs = *statvfs;
+ goto unlock;
+ }
+
if (local->statvfs.f_bsize != 0) {
bsize = max(local->statvfs.f_bsize, statvfs->f_bsize);
frsize = max(local->statvfs.f_frsize, statvfs->f_frsize);
@@ -2155,6 +2932,7 @@ dht_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->statvfs.f_flag = statvfs->f_flag;
local->statvfs.f_namemax = statvfs->f_namemax;
+
}
unlock:
UNLOCK (&frame->lock);
@@ -2163,14 +2941,14 @@ unlock:
this_call_cnt = dht_frame_return (frame);
if (is_last_call (this_call_cnt))
DHT_STACK_UNWIND (statfs, frame, local->op_ret, local->op_errno,
- &local->statvfs);
+ &local->statvfs, xdata);
return 0;
}
int
-dht_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
+dht_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
xlator_t *subvol = NULL;
dht_local_t *local = NULL;
@@ -2182,7 +2960,6 @@ dht_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (loc, err);
VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
VALIDATE_OR_GOTO (this->private, err);
conf = this->private;
@@ -2199,7 +2976,8 @@ dht_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
for (i = 0; i < conf->subvolume_cnt; i++) {
STACK_WIND (frame, dht_statfs_cbk,
conf->subvolumes[i],
- conf->subvolumes[i]->fops->statfs, loc);
+ conf->subvolumes[i]->fops->statfs, loc,
+ xdata);
}
return 0;
}
@@ -2215,20 +2993,21 @@ dht_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
local->call_cnt = 1;
STACK_WIND (frame, dht_statfs_cbk,
- subvol, subvol->fops->statfs, loc);
+ subvol, subvol->fops->statfs, loc, xdata);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (statfs, frame, -1, op_errno, NULL);
+ DHT_STACK_UNWIND (statfs, frame, -1, op_errno, NULL, NULL);
return 0;
}
int
-dht_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
+dht_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
dht_conf_t *conf = NULL;
@@ -2255,14 +3034,14 @@ dht_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
STACK_WIND (frame, dht_fd_cbk,
conf->subvolumes[i],
conf->subvolumes[i]->fops->opendir,
- loc, fd);
+ loc, fd, xdata);
}
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (opendir, frame, -1, op_errno, NULL);
+ DHT_STACK_UNWIND (opendir, frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -2270,7 +3049,7 @@ err:
int
dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, gf_dirent_t *orig_entries)
+ int op_errno, gf_dirent_t *orig_entries, dict_t *xdata)
{
dht_local_t *local = NULL;
gf_dirent_t entries;
@@ -2283,6 +3062,7 @@ dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
dht_layout_t *layout = 0;
dht_conf_t *conf = NULL;
xlator_t *subvol = 0;
+ int ret = 0;
INIT_LIST_HEAD (&entries.list);
prev = cookie;
@@ -2299,10 +3079,13 @@ dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
list_for_each_entry (orig_entry, (&orig_entries->list), list) {
next_offset = orig_entry->d_off;
-
- if (check_is_linkfile_wo_dict (NULL, (&orig_entry->d_stat))
- || (check_is_dir (NULL, (&orig_entry->d_stat), NULL)
- && (prev->this != dht_first_up_subvol (this)))) {
+ if (check_is_dir (NULL, (&orig_entry->d_stat), NULL) &&
+ (prev->this != local->first_up_subvol)) {
+ continue;
+ }
+ if (check_is_linkfile (NULL, (&orig_entry->d_stat),
+ orig_entry->dict,
+ conf->link_xattr_name)) {
continue;
}
@@ -2331,6 +3114,24 @@ dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
entry->d_type = orig_entry->d_type;
entry->d_len = orig_entry->d_len;
+ if (orig_entry->dict)
+ entry->dict = dict_ref (orig_entry->dict);
+
+ /* making sure we set the inode ctx right with layout,
+ currently possible only for non-directories, so for
+ directories don't set entry inodes */
+ if (!IA_ISDIR(entry->d_stat.ia_type) && orig_entry->inode) {
+ ret = dht_layout_preset (this, prev->this,
+ orig_entry->inode);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to link the layout in inode");
+ entry->inode = inode_ref (orig_entry->inode);
+ } else if (orig_entry->inode) {
+ dht_inode_ctx_time_update (orig_entry->inode, this,
+ &entry->d_stat, 1);
+ }
+
list_add_tail (&entry->list, &entries.list);
count++;
}
@@ -2360,9 +3161,23 @@ done:
goto unwind;
}
+ if (conf->readdir_optimize == _gf_true) {
+ if (next_subvol != local->first_up_subvol) {
+ ret = dict_set_int32 (local->xattr,
+ GF_READDIR_SKIP_DIRS, 1);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "dict set failed");
+ } else {
+ dict_del (local->xattr,
+ GF_READDIR_SKIP_DIRS);
+ }
+ }
+
STACK_WIND (frame, dht_readdirp_cbk,
next_subvol, next_subvol->fops->readdirp,
- local->fd, local->size, next_offset);
+ local->fd, local->size, next_offset,
+ local->xattr);
return 0;
}
@@ -2370,7 +3185,7 @@ unwind:
if (op_ret < 0)
op_ret = 0;
- DHT_STACK_UNWIND (readdirp, frame, op_ret, op_errno, &entries);
+ DHT_STACK_UNWIND (readdirp, frame, op_ret, op_errno, &entries, NULL);
gf_dirent_free (&entries);
@@ -2381,7 +3196,8 @@ unwind:
int
dht_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, gf_dirent_t *orig_entries)
+ int op_ret, int op_errno, gf_dirent_t *orig_entries,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
gf_dirent_t entries;
@@ -2458,7 +3274,7 @@ done:
STACK_WIND (frame, dht_readdir_cbk,
next_subvol, next_subvol->fops->readdir,
- local->fd, local->size, next_offset);
+ local->fd, local->size, next_offset, NULL);
return 0;
}
@@ -2466,7 +3282,7 @@ unwind:
if (op_ret < 0)
op_ret = 0;
- DHT_STACK_UNWIND (readdir, frame, op_ret, op_errno, &entries);
+ DHT_STACK_UNWIND (readdir, frame, op_ret, op_errno, &entries, NULL);
gf_dirent_free (&entries);
@@ -2476,17 +3292,21 @@ unwind:
int
dht_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t yoff, int whichop)
+ off_t yoff, int whichop, dict_t *dict)
{
dht_local_t *local = NULL;
int op_errno = -1;
xlator_t *xvol = NULL;
off_t xoff = 0;
-
+ int ret = 0;
+ dht_conf_t *conf = NULL;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (this->private, err);
+
+ conf = this->private;
local = dht_local_init (frame, NULL, NULL, whichop);
if (!local) {
@@ -2496,22 +3316,52 @@ dht_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
local->fd = fd_ref (fd);
local->size = size;
+ local->xattr_req = (dict)? dict_ref (dict) : NULL;
+ local->first_up_subvol = dht_first_up_subvol (this);
dht_deitransform (this, yoff, &xvol, (uint64_t *)&xoff);
/* TODO: do proper readdir */
- if (whichop == GF_FOP_READDIR)
- STACK_WIND (frame, dht_readdir_cbk, xvol, xvol->fops->readdir,
- fd, size, xoff);
- else
+ if (whichop == GF_FOP_READDIRP) {
+ if (dict)
+ local->xattr = dict_ref (dict);
+ else
+ local->xattr = dict_new ();
+
+ if (local->xattr) {
+ ret = dict_set_uint32 (local->xattr,
+ conf->link_xattr_name, 256);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set '%s' key",
+ conf->link_xattr_name);
+ if (conf->readdir_optimize == _gf_true) {
+ if (xvol != local->first_up_subvol) {
+ ret = dict_set_int32 (local->xattr,
+ GF_READDIR_SKIP_DIRS, 1);
+ if (ret)
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "Dict set failed");
+ } else {
+ dict_del (local->xattr,
+ GF_READDIR_SKIP_DIRS);
+ }
+ }
+ }
+
STACK_WIND (frame, dht_readdirp_cbk, xvol, xvol->fops->readdirp,
- fd, size, xoff);
+ fd, size, xoff, local->xattr);
+ } else {
+ STACK_WIND (frame, dht_readdir_cbk, xvol, xvol->fops->readdir,
+ fd, size, xoff, local->xattr);
+ }
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (readdir, frame, -1, op_errno, NULL);
+ DHT_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -2519,7 +3369,7 @@ err:
int
dht_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t yoff)
+ off_t yoff, dict_t *xdata)
{
int op = GF_FOP_READDIR;
dht_conf_t *conf = NULL;
@@ -2540,15 +3390,15 @@ dht_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
op = GF_FOP_READDIRP;
out:
- dht_do_readdir (frame, this, fd, size, yoff, op);
+ dht_do_readdir (frame, this, fd, size, yoff, op, 0);
return 0;
}
int
dht_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t yoff)
+ off_t yoff, dict_t *dict)
{
- dht_do_readdir (frame, this, fd, size, yoff, GF_FOP_READDIRP);
+ dht_do_readdir (frame, this, fd, size, yoff, GF_FOP_READDIRP, dict);
return 0;
}
@@ -2556,7 +3406,7 @@ dht_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
int
dht_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno)
+ int op_ret, int op_errno, dict_t *xdata)
{
dht_local_t *local = NULL;
int this_call_cnt = 0;
@@ -2576,14 +3426,16 @@ dht_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
this_call_cnt = dht_frame_return (frame);
if (is_last_call (this_call_cnt))
- DHT_STACK_UNWIND (fsyncdir, frame, local->op_ret, local->op_errno);
+ DHT_STACK_UNWIND (fsyncdir, frame, local->op_ret,
+ local->op_errno, xdata);
return 0;
}
int
-dht_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync)
+dht_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int datasync, dict_t *xdata)
{
dht_local_t *local = NULL;
dht_conf_t *conf = NULL;
@@ -2610,14 +3462,14 @@ dht_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync)
STACK_WIND (frame, dht_fsyncdir_cbk,
conf->subvolumes[i],
conf->subvolumes[i]->fops->fsyncdir,
- fd, datasync);
+ fd, datasync, xdata);
}
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fsyncdir, frame, -1, op_errno);
+ DHT_STACK_UNWIND (fsyncdir, frame, -1, op_errno, NULL);
return 0;
}
@@ -2627,9 +3479,9 @@ int
dht_newfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno,
inode_t *inode, struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- call_frame_t *prev = NULL;
+ xlator_t *prev = NULL;
int ret = -1;
dht_local_t *local = NULL;
@@ -2647,19 +3499,24 @@ dht_newfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
prev = cookie;
if (local->loc.parent) {
- WIPE (preparent);
- WIPE (postparent);
+
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ preparent, 0);
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ postparent, 1);
}
- ret = dht_layout_preset (this, prev->this, inode);
+ ret = dht_layout_preset (this, prev, inode);
if (ret < 0) {
gf_log (this->name, GF_LOG_DEBUG,
"could not set pre-set layout for subvolume %s",
- prev->this->name);
+ prev? prev->name: NULL);
op_ret = -1;
op_errno = EINVAL;
goto out;
}
+ if (local->linked == _gf_true)
+ dht_linkfile_attr_heal (frame, this);
out:
/*
* FIXME: ia_size and st_blocks of preparent and postparent do not have
@@ -2668,10 +3525,9 @@ out:
* corresponding values from each of the subvolume.
* See dht_iatt_merge for reference.
*/
-
DHT_STRIP_PHASE1_FLAGS (stbuf);
- DHT_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode, stbuf, preparent,
- postparent);
+ DHT_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode, stbuf,
+ preparent, postparent, xdata);
return 0;
}
@@ -2680,7 +3536,8 @@ dht_mknod_linkfile_create_cbk (call_frame_t *frame, void *cookie,
xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
xlator_t *cached_subvol = NULL;
@@ -2689,22 +3546,28 @@ dht_mknod_linkfile_create_cbk (call_frame_t *frame, void *cookie,
goto err;
local = frame->local;
+ if (!local || !local->cached_subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
cached_subvol = local->cached_subvol;
- STACK_WIND (frame, dht_newfile_cbk,
- cached_subvol, cached_subvol->fops->mknod,
- &local->loc, local->mode, local->rdev,
- local->params);
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)cached_subvol,
+ cached_subvol, cached_subvol->fops->mknod,
+ &local->loc, local->mode, local->rdev, local->umask,
+ local->params);
return 0;
err:
- DHT_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ DHT_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL);
return 0;
}
int
dht_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t rdev, dict_t *params)
+ loc_t *loc, mode_t mode, dev_t rdev, mode_t umask, dict_t *params)
{
xlator_t *subvol = NULL;
int op_errno = -1;
@@ -2736,11 +3599,13 @@ dht_mknod (call_frame_t *frame, xlator_t *this,
gf_log (this->name, GF_LOG_TRACE,
"creating %s on %s", loc->path, subvol->name);
- STACK_WIND (frame, dht_newfile_cbk,
- subvol, subvol->fops->mknod,
- loc, mode, rdev, params);
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)subvol,
+ subvol, subvol->fops->mknod, loc, mode,
+ rdev, umask, params);
} else {
- avail_subvol = dht_free_disk_available_subvol (this, subvol);
+
+ avail_subvol = dht_free_disk_available_subvol (this, subvol,
+ local);
if (avail_subvol != subvol) {
/* Choose the minimum filled volume, and create the
files there */
@@ -2749,17 +3614,18 @@ dht_mknod (call_frame_t *frame, xlator_t *this,
local->cached_subvol = avail_subvol;
local->mode = mode;
local->rdev = rdev;
-
+ local->umask = umask;
dht_linkfile_create (frame,
dht_mknod_linkfile_create_cbk,
- avail_subvol, subvol, loc);
+ this, avail_subvol, subvol, loc);
} else {
gf_log (this->name, GF_LOG_TRACE,
"creating %s on %s", loc->path, subvol->name);
- STACK_WIND (frame, dht_newfile_cbk,
- subvol, subvol->fops->mknod,
- loc, mode, rdev, params);
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk,
+ (void *)subvol, subvol,
+ subvol->fops->mknod, loc, mode,
+ rdev, umask, params);
}
}
@@ -2768,7 +3634,7 @@ dht_mknod (call_frame_t *frame, xlator_t *this,
err:
op_errno = (op_errno == -1) ? errno : op_errno;
DHT_STACK_UNWIND (mknod, frame, -1, op_errno,
- NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -2776,7 +3642,7 @@ err:
int
dht_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkname, loc_t *loc, dict_t *params)
+ const char *linkname, loc_t *loc, mode_t umask, dict_t *params)
{
xlator_t *subvol = NULL;
int op_errno = -1;
@@ -2804,23 +3670,24 @@ dht_symlink (call_frame_t *frame, xlator_t *this,
gf_log (this->name, GF_LOG_TRACE,
"creating %s on %s", loc->path, subvol->name);
- STACK_WIND (frame, dht_newfile_cbk,
- subvol, subvol->fops->symlink,
- linkname, loc, params);
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)subvol, subvol,
+ subvol->fops->symlink, linkname, loc, umask,
+ params);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
DHT_STACK_UNWIND (link, frame, -1, op_errno,
- NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL);
return 0;
}
int
-dht_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
+dht_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
{
xlator_t *cached_subvol = NULL;
xlator_t *hashed_subvol = NULL;
@@ -2838,7 +3705,7 @@ dht_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
local->loc.path, cached_subvol->name, loc->path);
STACK_WIND (frame, dht_unlink_cbk,
cached_subvol, cached_subvol->fops->unlink,
- &local->loc);
+ &local->loc, xflag, xdata);
goto done;
}
@@ -2850,12 +3717,12 @@ dht_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
}
hashed_subvol = dht_subvol_get_hashed (this, loc);
+ /* Dont fail unlink if hashed_subvol is NULL which can be the result
+ * of layout anomaly */
if (!hashed_subvol) {
gf_log (this->name, GF_LOG_DEBUG,
"no subvolume in layout for path=%s",
loc->path);
- op_errno = EINVAL;
- goto err;
}
cached_subvol = local->cached_subvol;
@@ -2866,18 +3733,21 @@ dht_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
goto err;
}
- if (hashed_subvol != cached_subvol) {
+ local->flags = xflag;
+ if (hashed_subvol && hashed_subvol != cached_subvol) {
STACK_WIND (frame, dht_unlink_linkfile_cbk,
- hashed_subvol, hashed_subvol->fops->unlink, loc);
+ hashed_subvol, hashed_subvol->fops->unlink, loc,
+ xflag, xdata);
} else {
STACK_WIND (frame, dht_unlink_cbk,
- cached_subvol, cached_subvol->fops->unlink, loc);
+ cached_subvol, cached_subvol->fops->unlink, loc,
+ xflag, xdata);
}
done:
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL);
+ DHT_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -2887,13 +3757,16 @@ int
dht_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno,
inode_t *inode, struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
call_frame_t *prev = NULL;
dht_layout_t *layout = NULL;
+ dht_local_t *local = NULL;
prev = cookie;
+ local = frame->local;
+
if (op_ret == -1)
goto out;
@@ -2907,13 +3780,20 @@ dht_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- WIPE (preparent);
- WIPE (postparent);
-
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ preparent, 0);
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ postparent, 1);
+ }
+ if (local->linked == _gf_true) {
+ local->stbuf = *stbuf;
+ dht_linkfile_attr_heal (frame, this);
+ }
out:
DHT_STRIP_PHASE1_FLAGS (stbuf);
DHT_STACK_UNWIND (link, frame, op_ret, op_errno, inode, stbuf, preparent,
- postparent);
+ postparent, NULL);
return 0;
}
@@ -2923,7 +3803,8 @@ int
dht_link_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno,
inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
xlator_t *srcvol = NULL;
@@ -2935,14 +3816,14 @@ dht_link_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
srcvol = local->linkfile.srcvol;
STACK_WIND (frame, dht_link_cbk, srcvol, srcvol->fops->link,
- &local->loc, &local->loc2);
+ &local->loc, &local->loc2, xdata);
return 0;
err:
DHT_STRIP_PHASE1_FLAGS (stbuf);
DHT_STACK_UNWIND (link, frame, op_ret, op_errno, inode, stbuf, preparent,
- postparent);
+ postparent, NULL);
return 0;
}
@@ -2950,7 +3831,7 @@ err:
int
dht_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
xlator_t *cached_subvol = NULL;
xlator_t *hashed_subvol = NULL;
@@ -2995,19 +3876,19 @@ dht_link (call_frame_t *frame, xlator_t *this,
if (hashed_subvol != cached_subvol) {
uuid_copy (local->gfid, oldloc->inode->gfid);
- dht_linkfile_create (frame, dht_link_linkfile_cbk,
+ dht_linkfile_create (frame, dht_link_linkfile_cbk, this,
cached_subvol, hashed_subvol, newloc);
} else {
STACK_WIND (frame, dht_link_cbk,
cached_subvol, cached_subvol->fops->link,
- oldloc, newloc);
+ oldloc, newloc, xdata);
}
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ DHT_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -3017,7 +3898,7 @@ int
dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno,
fd_t *fd, inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
call_frame_t *prev = NULL;
int ret = -1;
@@ -3036,8 +3917,11 @@ dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
prev = cookie;
if (local->loc.parent) {
- WIPE (preparent);
- WIPE (postparent);
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ preparent, 0);
+
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ postparent, 1);
}
ret = dht_layout_preset (this, prev->this, inode);
@@ -3049,11 +3933,14 @@ dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
op_errno = EINVAL;
goto out;
}
-
+ if (local->linked == _gf_true) {
+ local->stbuf = *stbuf;
+ dht_linkfile_attr_heal (frame, this);
+ }
out:
DHT_STRIP_PHASE1_FLAGS (stbuf);
DHT_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode, stbuf, preparent,
- postparent);
+ postparent, NULL);
return 0;
}
@@ -3063,7 +3950,8 @@ dht_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
xlator_t *cached_subvol = NULL;
@@ -3077,18 +3965,19 @@ dht_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
STACK_WIND (frame, dht_create_cbk,
cached_subvol, cached_subvol->fops->create,
&local->loc, local->flags, local->mode,
- local->fd, local->params);
+ local->umask, local->fd, local->params);
return 0;
err:
- DHT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
+ DHT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL, NULL);
return 0;
}
int
dht_create (call_frame_t *frame, xlator_t *this,
loc_t *loc, int32_t flags, mode_t mode,
- fd_t *fd, dict_t *params)
+ mode_t umask, fd_t *fd, dict_t *params)
{
int op_errno = -1;
xlator_t *subvol = NULL;
@@ -3114,7 +4003,7 @@ dht_create (call_frame_t *frame, xlator_t *this,
local->loc.path, subvol->name, loc->path);
STACK_WIND (frame, dht_create_cbk,
subvol, subvol->fops->create,
- &local->loc, flags, mode, fd, params);
+ &local->loc, flags, mode, umask, fd, params);
goto done;
}
@@ -3132,38 +4021,38 @@ dht_create (call_frame_t *frame, xlator_t *this,
"creating %s on %s", loc->path, subvol->name);
STACK_WIND (frame, dht_create_cbk,
subvol, subvol->fops->create,
- loc, flags, mode, fd, params);
+ loc, flags, mode, umask, fd, params);
goto done;
}
/* Choose the minimum filled volume, and create the
files there */
- avail_subvol = dht_free_disk_available_subvol (this, subvol);
+ avail_subvol = dht_free_disk_available_subvol (this, subvol, local);
if (avail_subvol != subvol) {
local->params = dict_ref (params);
local->flags = flags;
local->mode = mode;
-
+ local->umask = umask;
local->cached_subvol = avail_subvol;
local->hashed_subvol = subvol;
gf_log (this->name, GF_LOG_TRACE,
"creating %s on %s (link at %s)", loc->path,
avail_subvol->name, subvol->name);
- dht_linkfile_create (frame,
- dht_create_linkfile_create_cbk,
- avail_subvol, subvol, loc);
+ dht_linkfile_create (frame, dht_create_linkfile_create_cbk,
+ this, avail_subvol, subvol, loc);
goto done;
}
gf_log (this->name, GF_LOG_TRACE,
"creating %s on %s", loc->path, subvol->name);
STACK_WIND (frame, dht_create_cbk,
subvol, subvol->fops->create,
- loc, flags, mode, fd, params);
+ loc, flags, mode, umask, fd, params);
done:
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
+ DHT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL, NULL);
return 0;
}
@@ -3172,7 +4061,7 @@ err:
int
dht_mkdir_selfheal_cbk (call_frame_t *frame, void *cookie,
xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
dht_local_t *local = NULL;
dht_layout_t *layout = NULL;
@@ -3183,14 +4072,17 @@ dht_mkdir_selfheal_cbk (call_frame_t *frame, void *cookie,
if (op_ret == 0) {
dht_layout_set (this, local->inode, layout);
if (local->loc.parent) {
- WIPE (&local->preparent);
- WIPE (&local->postparent);
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->preparent, 0);
+
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->postparent, 1);
}
}
DHT_STACK_UNWIND (mkdir, frame, op_ret, op_errno,
local->inode, &local->stbuf, &local->preparent,
- &local->postparent);
+ &local->postparent, NULL);
return 0;
}
@@ -3198,7 +4090,7 @@ dht_mkdir_selfheal_cbk (call_frame_t *frame, void *cookie,
int
dht_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
dht_local_t *local = NULL;
int this_call_cnt = 0;
@@ -3219,6 +4111,15 @@ dht_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
ret = dht_layout_merge (this, layout, prev->this,
-1, ENOSPC, NULL);
} else {
+ if (op_ret == -1 && op_errno == EEXIST)
+ /* Very likely just a race between mkdir and
+ self-heal (from lookup of a concurrent mkdir
+ attempt).
+ Ignore error for now. layout setting will
+ anyways fail if this was a different (old)
+ pre-existing different directory.
+ */
+ op_ret = 0;
ret = dht_layout_merge (this, layout, prev->this,
op_ret, op_errno, NULL);
}
@@ -3251,7 +4152,8 @@ int
dht_mkdir_hashed_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int op_ret, int op_errno,
inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
int ret = -1;
@@ -3308,19 +4210,20 @@ dht_mkdir_hashed_cbk (call_frame_t *frame, void *cookie,
continue;
STACK_WIND (frame, dht_mkdir_cbk,
conf->subvolumes[i],
- conf->subvolumes[i]->fops->mkdir,
- &local->loc, local->mode, local->params);
+ conf->subvolumes[i]->fops->mkdir, &local->loc,
+ local->mode, local->umask, local->params);
}
return 0;
err:
- DHT_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ DHT_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL);
return 0;
}
-int
+ int
dht_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dict_t *params)
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *params)
{
dht_local_t *local = NULL;
dht_conf_t *conf = NULL;
@@ -3356,6 +4259,7 @@ dht_mkdir (call_frame_t *frame, xlator_t *this,
local->hashed_subvol = hashed_subvol;
local->mode = mode;
+ local->umask = umask;
local->params = dict_ref (params);
local->inode = inode_ref (loc->inode);
@@ -3368,13 +4272,14 @@ dht_mkdir (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, dht_mkdir_hashed_cbk,
hashed_subvol,
hashed_subvol->fops->mkdir,
- loc, mode, params);
+ loc, mode, umask, params);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ DHT_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL);
return 0;
}
@@ -3382,14 +4287,87 @@ err:
int
dht_rmdir_selfheal_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno)
+ int op_ret, int op_errno, dict_t *xdata)
{
dht_local_t *local = NULL;
local = frame->local;
DHT_STACK_UNWIND (rmdir, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent);
+ &local->preparent, &local->postparent, NULL);
+
+ return 0;
+}
+
+
+int
+dht_rmdir_hashed_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ if (op_errno != ENOENT && op_errno != EACCES) {
+ local->need_selfheal = 1;
+ }
+
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "rmdir on %s for %s failed (%s)",
+ prev->this->name, local->loc.path,
+ strerror (op_errno));
+ goto unlock;
+ }
+
+ dht_iatt_merge (this, &local->preparent, preparent, prev->this);
+ dht_iatt_merge (this, &local->postparent, postparent,
+ prev->this);
+
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ if (local->need_selfheal) {
+ local->layout =
+ dht_layout_get (this, local->loc.inode);
+
+ /* TODO: neater interface needed below */
+ local->stbuf.ia_type = local->loc.inode->ia_type;
+
+ uuid_copy (local->gfid, local->loc.inode->gfid);
+ dht_selfheal_restore (frame, dht_rmdir_selfheal_cbk,
+ &local->loc, local->layout);
+ } else {
+
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent,
+ this,
+ &local->preparent,
+ 0);
+
+ dht_inode_ctx_time_update (local->loc.parent,
+ this,
+ &local->postparent,
+ 1);
+ }
+
+ DHT_STACK_UNWIND (rmdir, frame, local->op_ret,
+ local->op_errno, &local->preparent,
+ &local->postparent, NULL);
+ }
+ }
return 0;
}
@@ -3398,11 +4376,12 @@ dht_rmdir_selfheal_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
dht_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
dht_local_t *local = NULL;
int this_call_cnt = 0;
call_frame_t *prev = NULL;
+ int done = 0;
local = frame->local;
prev = cookie;
@@ -3435,7 +4414,16 @@ unlock:
this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
+
+ /* if local->hashed_subvol, we are yet to wind to hashed_subvol. */
+ if (local->hashed_subvol && (this_call_cnt == 1)) {
+ done = 1;
+ } else if (!local->hashed_subvol && !this_call_cnt) {
+ done = 1;
+ }
+
+
+ if (done) {
if (local->need_selfheal && local->fop_succeeded) {
local->layout =
dht_layout_get (this, local->loc.inode);
@@ -3446,15 +4434,34 @@ unlock:
uuid_copy (local->gfid, local->loc.inode->gfid);
dht_selfheal_restore (frame, dht_rmdir_selfheal_cbk,
&local->loc, local->layout);
- } else {
+ } else if (this_call_cnt) {
+ /* If non-hashed subvol's have responded, proceed */
+
+ local->need_selfheal = 0;
+ STACK_WIND (frame, dht_rmdir_hashed_subvol_cbk,
+ local->hashed_subvol,
+ local->hashed_subvol->fops->rmdir,
+ &local->loc, local->flags, NULL);
+ } else if (!this_call_cnt) {
+ /* All subvol's have responded, proceed */
+
if (local->loc.parent) {
- WIPE (&local->preparent);
- WIPE (&local->postparent);
+
+ dht_inode_ctx_time_update (local->loc.parent,
+ this,
+ &local->preparent,
+ 0);
+
+ dht_inode_ctx_time_update (local->loc.parent,
+ this,
+ &local->postparent,
+ 1);
+
}
DHT_STACK_UNWIND (rmdir, frame, local->op_ret,
local->op_errno, &local->preparent,
- &local->postparent);
+ &local->postparent, NULL);
}
}
@@ -3468,6 +4475,7 @@ dht_rmdir_do (call_frame_t *frame, xlator_t *this)
dht_local_t *local = NULL;
dht_conf_t *conf = NULL;
int i = 0;
+ xlator_t *hashed_subvol = NULL;
VALIDATE_OR_GOTO (this->private, err);
@@ -3479,18 +4487,41 @@ dht_rmdir_do (call_frame_t *frame, xlator_t *this)
local->call_cnt = conf->subvolume_cnt;
+ /* first remove from non-hashed_subvol */
+ hashed_subvol = dht_subvol_get_hashed (this, &local->loc);
+
+ if (!hashed_subvol) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to get hashed "
+ "subvol for %s",local->loc.path);
+ } else {
+ local->hashed_subvol = hashed_subvol;
+ }
+
+ /* When DHT has only 1 child */
+ if (conf->subvolume_cnt == 1) {
+ STACK_WIND (frame, dht_rmdir_hashed_subvol_cbk,
+ conf->subvolumes[0],
+ conf->subvolumes[0]->fops->rmdir,
+ &local->loc, local->flags, NULL);
+ return 0;
+ }
+
for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (hashed_subvol &&
+ (hashed_subvol == conf->subvolumes[i]))
+ continue;
+
STACK_WIND (frame, dht_rmdir_cbk,
conf->subvolumes[i],
conf->subvolumes[i]->fops->rmdir,
- &local->loc, local->flags);
+ &local->loc, local->flags, NULL);
}
return 0;
err:
DHT_STACK_UNWIND (rmdir, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent);
+ &local->preparent, &local->postparent, NULL);
return 0;
}
@@ -3498,7 +4529,7 @@ err:
int
dht_rmdir_linkfile_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
dht_local_t *local = NULL;
call_frame_t *prev = NULL;
@@ -3546,6 +4577,7 @@ dht_rmdir_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
call_frame_t *main_frame = NULL;
dht_local_t *main_local = NULL;
int this_call_cnt = 0;
+ dht_conf_t *conf = this->private;
local = frame->local;
prev = cookie;
@@ -3557,7 +4589,7 @@ dht_rmdir_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret != 0)
goto err;
- if (check_is_linkfile (inode, stbuf, xattr) == 0) {
+ if (!check_is_linkfile (inode, stbuf, xattr, conf->link_xattr_name)) {
main_local->op_ret = -1;
main_local->op_errno = ENOTEMPTY;
@@ -3568,7 +4600,74 @@ dht_rmdir_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
STACK_WIND (frame, dht_rmdir_linkfile_unlink_cbk,
- src, src->fops->unlink, &local->loc);
+ src, src->fops->unlink, &local->loc, 0, NULL);
+ return 0;
+err:
+
+ this_call_cnt = dht_frame_return (main_frame);
+ if (is_last_call (this_call_cnt))
+ dht_rmdir_do (main_frame, this);
+
+ DHT_STACK_DESTROY (frame);
+ return 0;
+}
+
+
+int
+dht_rmdir_cached_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xattr,
+ struct iatt *parent)
+{
+ dht_local_t *local = NULL;
+ xlator_t *src = NULL;
+ call_frame_t *main_frame = NULL;
+ dht_local_t *main_local = NULL;
+ int this_call_cnt = 0;
+ dht_conf_t *conf = this->private;
+ dict_t *xattrs = NULL;
+ int ret = 0;
+
+ local = frame->local;
+ src = local->hashed_subvol;
+
+ main_frame = local->main_frame;
+ main_local = main_frame->local;
+
+ if (op_ret == 0) {
+ main_local->op_ret = -1;
+ main_local->op_errno = ENOTEMPTY;
+
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s found on cached subvol %s",
+ local->loc.path, src->name);
+ goto err;
+ } else if (op_errno != ENOENT) {
+ main_local->op_ret = -1;
+ main_local->op_errno = op_errno;
+ goto err;
+ }
+
+ xattrs = dict_new ();
+ if (!xattrs) {
+ gf_log (this->name, GF_LOG_ERROR, "dict_new failed");
+ goto err;
+ }
+
+ ret = dict_set_uint32 (xattrs, conf->link_xattr_name, 256);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to set linkto key"
+ " in dict");
+ if (xattrs)
+ dict_unref (xattrs);
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_rmdir_lookup_cbk,
+ src, src->fops->lookup, &local->loc, xattrs);
+ if (xattrs)
+ dict_unref (xattrs);
+
return 0;
err:
@@ -3585,12 +4684,15 @@ int
dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this,
gf_dirent_t *entries, xlator_t *src)
{
- int ret = 0;
- int build_ret = 0;
- gf_dirent_t *trav = NULL;
+ int ret = 0;
+ int build_ret = 0;
+ gf_dirent_t *trav = NULL;
call_frame_t *lookup_frame = NULL;
dht_local_t *lookup_local = NULL;
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
+ dict_t *xattrs = NULL;
+ dht_conf_t *conf = this->private;
+ xlator_t *subvol = NULL;
local = frame->local;
@@ -3599,7 +4701,8 @@ dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this,
continue;
if (strcmp (trav->d_name, "..") == 0)
continue;
- if (check_is_linkfile (NULL, (&trav->d_stat), NULL) == 1) {
+ if (check_is_linkfile (NULL, (&trav->d_stat), trav->dict,
+ conf->link_xattr_name)) {
ret++;
continue;
}
@@ -3611,6 +4714,21 @@ dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this,
return 0;
}
+ xattrs = dict_new ();
+ if (!xattrs) {
+ gf_log (this->name, GF_LOG_ERROR, "dict_new failed");
+ return -1;
+ }
+
+ ret = dict_set_uint32 (xattrs, conf->link_xattr_name, 256);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to set linkto key"
+ " in dict");
+ if (xattrs)
+ dict_unref (xattrs);
+ return -1;
+ }
+
list_for_each_entry (trav, &entries->list, list) {
if (strcmp (trav->d_name, ".") == 0)
continue;
@@ -3627,14 +4745,14 @@ dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this,
goto err;
}
- lookup_local = GF_CALLOC (sizeof (*local), 1,
- gf_dht_mt_dht_local_t);
+ lookup_local = mem_get0 (this->local_pool);
if (!lookup_local) {
goto err;
}
lookup_frame->local = lookup_local;
lookup_local->main_frame = frame;
+ lookup_local->hashed_subvol = src;
build_ret = dht_build_child_loc (this, &lookup_local->loc,
&local->loc, trav->d_name);
@@ -3653,14 +4771,31 @@ dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this,
}
UNLOCK (&frame->lock);
- STACK_WIND (lookup_frame, dht_rmdir_lookup_cbk,
- src, src->fops->lookup,
- &lookup_local->loc, NULL);
+ subvol = dht_linkfile_subvol (this, NULL, &trav->d_stat,
+ trav->dict);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_INFO,
+ "linkfile not having link subvolume. path=%s",
+ lookup_local->loc.path);
+ STACK_WIND (lookup_frame, dht_rmdir_lookup_cbk,
+ src, src->fops->lookup,
+ &lookup_local->loc, xattrs);
+ } else {
+ STACK_WIND (lookup_frame, dht_rmdir_cached_lookup_cbk,
+ subvol, subvol->fops->lookup,
+ &lookup_local->loc, xattrs);
+ }
ret++;
}
+ if (xattrs)
+ dict_unref (xattrs);
+
return ret;
err:
+ if (xattrs)
+ dict_unref (xattrs);
+
DHT_STACK_DESTROY (lookup_frame);
return 0;
}
@@ -3668,7 +4803,8 @@ err:
int
dht_rmdir_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, gf_dirent_t *entries)
+ int op_ret, int op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
int this_call_cnt = -1;
@@ -3712,35 +4848,65 @@ dht_rmdir_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
dht_rmdir_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, fd_t *fd)
+ int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
{
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
int this_call_cnt = -1;
- call_frame_t *prev = NULL;
-
+ call_frame_t *prev = NULL;
+ dict_t *dict = NULL;
+ int ret = 0;
+ dht_conf_t *conf = this->private;
+ int i = 0;
local = frame->local;
prev = cookie;
+ this_call_cnt = dht_frame_return (frame);
if (op_ret == -1) {
gf_log (this->name, GF_LOG_DEBUG,
"opendir on %s for %s failed (%s)",
prev->this->name, local->loc.path,
strerror (op_errno));
+ if (op_errno != ENOENT) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ }
+ goto err;
+ }
+
+ if (!is_last_call (this_call_cnt))
+ return 0;
+
+ if (local->op_ret == -1)
+ goto err;
+
+ dict = dict_new ();
+ if (!dict) {
local->op_ret = -1;
- local->op_errno = op_errno;
+ local->op_errno = ENOMEM;
goto err;
}
- STACK_WIND (frame, dht_rmdir_readdirp_cbk,
- prev->this, prev->this->fops->readdirp,
- local->fd, 4096, 0);
+ ret = dict_set_uint32 (dict, conf->link_xattr_name, 256);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set '%s' key",
+ local->loc.path, conf->link_xattr_name);
+
+ local->call_cnt = conf->subvolume_cnt;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND (frame, dht_rmdir_readdirp_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->readdirp,
+ local->fd, 4096, 0, dict);
+ }
+
+ if (dict)
+ dict_unref (dict);
return 0;
err:
- this_call_cnt = dht_frame_return (frame);
-
if (is_last_call (this_call_cnt)) {
dht_rmdir_do (frame, this);
}
@@ -3750,7 +4916,8 @@ err:
int
-dht_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)
+dht_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
dht_conf_t *conf = NULL;
@@ -3789,7 +4956,7 @@ dht_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)
STACK_WIND (frame, dht_rmdir_opendir_cbk,
conf->subvolumes[i],
conf->subvolumes[i]->fops->opendir,
- loc, local->fd);
+ loc, local->fd, NULL);
}
return 0;
@@ -3797,17 +4964,17 @@ dht_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)
err:
op_errno = (op_errno == -1) ? errno : op_errno;
DHT_STACK_UNWIND (rmdir, frame, -1, op_errno,
- NULL, NULL);
+ NULL, NULL, NULL);
return 0;
}
int
dht_entrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+ xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- DHT_STACK_UNWIND (entrylk, frame, op_ret, op_errno);
+ DHT_STACK_UNWIND (entrylk, frame, op_ret, op_errno, xdata);
return 0;
}
@@ -3815,7 +4982,7 @@ dht_entrylk_cbk (call_frame_t *frame, void *cookie,
int
dht_entrylk (call_frame_t *frame, xlator_t *this,
const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
xlator_t *subvol = NULL;
int op_errno = -1;
@@ -3825,7 +4992,6 @@ dht_entrylk (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (loc, err);
VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
local = dht_local_init (frame, loc, NULL, GF_FOP_ENTRYLK);
if (!local) {
@@ -3845,13 +5011,13 @@ dht_entrylk (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, dht_entrylk_cbk,
subvol, subvol->fops->entrylk,
- volume, loc, basename, cmd, type);
+ volume, loc, basename, cmd, type, xdata);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (entrylk, frame, -1, op_errno);
+ DHT_STACK_UNWIND (entrylk, frame, -1, op_errno, NULL);
return 0;
}
@@ -3859,10 +5025,10 @@ err:
int
dht_fentrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+ xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- DHT_STACK_UNWIND (fentrylk, frame, op_ret, op_errno);
+ DHT_STACK_UNWIND (fentrylk, frame, op_ret, op_errno, NULL);
return 0;
}
@@ -3870,7 +5036,7 @@ dht_fentrylk_cbk (call_frame_t *frame, void *cookie,
int
dht_fentrylk (call_frame_t *frame, xlator_t *this,
const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
xlator_t *subvol = NULL;
int op_errno = -1;
@@ -3889,13 +5055,13 @@ dht_fentrylk (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, dht_fentrylk_cbk,
subvol, subvol->fops->fentrylk,
- volume, fd, basename, cmd, type);
+ volume, fd, basename, cmd, type, xdata);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fentrylk, frame, -1, op_errno);
+ DHT_STACK_UNWIND (fentrylk, frame, -1, op_errno, NULL);
return 0;
}
@@ -3904,16 +5070,21 @@ err:
int
dht_forget (xlator_t *this, inode_t *inode)
{
- uint64_t tmp_layout = 0;
+ uint64_t ctx_int = 0;
+ dht_inode_ctx_t *ctx = NULL;
dht_layout_t *layout = NULL;
- inode_ctx_del (inode, this, &tmp_layout);
+ inode_ctx_del (inode, this, &ctx_int);
- if (!tmp_layout)
+ if (!ctx_int)
return 0;
- layout = (dht_layout_t *)(long)tmp_layout;
+ ctx = (dht_inode_ctx_t *) (long) ctx_int;
+
+ layout = ctx->layout;
+ ctx->layout = NULL;
dht_layout_unref (this, layout);
+ GF_FREE (ctx);
return 0;
}
@@ -3922,16 +5093,22 @@ dht_forget (xlator_t *this, inode_t *inode)
int
dht_notify (xlator_t *this, int event, void *data, ...)
{
- xlator_t *subvol = NULL;
- int cnt = -1;
- int i = -1;
- dht_conf_t *conf = NULL;
- int ret = -1;
- int propagate = 0;
+ xlator_t *subvol = NULL;
+ int cnt = -1;
+ int i = -1;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ int propagate = 0;
+
+ int had_heard_from_all = 0;
+ int have_heard_from_all = 0;
+ struct timeval time = {0,};
+ gf_defrag_info_t *defrag = NULL;
+ dict_t *dict = NULL;
+ gf_defrag_type cmd = 0;
+ dict_t *output = NULL;
+ va_list ap;
- int had_heard_from_all = 0;
- int have_heard_from_all = 0;
- struct timeval time = {0,};
conf = this->private;
if (!conf)
@@ -3993,7 +5170,12 @@ dht_notify (xlator_t *this, int event, void *data, ...)
if (conf->assert_no_child_down) {
gf_log (this->name, GF_LOG_WARNING,
"Received CHILD_DOWN. Exiting");
- exit(0);
+ if (conf->defrag) {
+ gf_defrag_stop (conf->defrag,
+ GF_DEFRAG_STATUS_FAILED, NULL);
+ } else {
+ kill (getpid(), SIGTERM);
+ }
}
for (i = 0; i < conf->subvolume_cnt; i++) {
@@ -4044,6 +5226,37 @@ dht_notify (xlator_t *this, int event, void *data, ...)
UNLOCK (&conf->subvolume_lock);
break;
+ case GF_EVENT_VOLUME_DEFRAG:
+ {
+ if (!conf->defrag) {
+ return ret;
+ }
+ defrag = conf->defrag;
+
+ dict = data;
+ va_start (ap, data);
+ output = va_arg (ap, dict_t*);
+
+ ret = dict_get_int32 (dict, "rebalance-command",
+ (int32_t*)&cmd);
+ if (ret)
+ return ret;
+ LOCK (&defrag->lock);
+ {
+ if (defrag->is_exiting)
+ goto unlock;
+ if (cmd == GF_DEFRAG_CMD_STATUS)
+ gf_defrag_status_get (defrag, output);
+ else if (cmd == GF_DEFRAG_CMD_STOP)
+ gf_defrag_stop (defrag,
+ GF_DEFRAG_STATUS_STOPPED, output);
+ }
+unlock:
+ UNLOCK (&defrag->lock);
+ return 0;
+ break;
+ }
+
default:
propagate = 1;
break;
@@ -4059,9 +5272,12 @@ dht_notify (xlator_t *this, int event, void *data, ...)
/* if all subvols have reported status, no need to hide anything
or wait for anything else. Just propagate blindly */
- if (have_heard_from_all)
+ if (have_heard_from_all) {
propagate = 1;
+ }
+
+
if (!had_heard_from_all && have_heard_from_all) {
/* This is the first event which completes aggregation
of events from all subvolumes. If at least one subvol
@@ -4080,6 +5296,19 @@ dht_notify (xlator_t *this, int event, void *data, ...)
/* continue to check other events for CHILD_UP */
}
}
+
+ /* rebalance is started with assert_no_child_down. So we do
+ * not need to handle CHILD_DOWN event here.
+ */
+ if (conf->defrag) {
+ ret = gf_thread_create (&conf->defrag->th, NULL,
+ gf_defrag_start, this);
+ if (ret) {
+ conf->defrag = NULL;
+ GF_FREE (conf->defrag);
+ kill (getpid(), SIGTERM);
+ }
+ }
}
ret = 0;
@@ -4088,3 +5317,24 @@ dht_notify (xlator_t *this, int event, void *data, ...)
return ret;
}
+
+int
+dht_inode_ctx_layout_get (inode_t *inode, xlator_t *this, dht_layout_t **layout)
+{
+ dht_inode_ctx_t *ctx = NULL;
+ int ret = -1;
+
+ ret = dht_inode_ctx_get (inode, this, &ctx);
+
+ if (!ret && ctx) {
+ if (ctx->layout) {
+ if (layout)
+ *layout = ctx->layout;
+ ret = 0;
+ } else {
+ ret = -1;
+ }
+ }
+
+ return ret;
+}
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index 54cef6cd9..d391b87d5 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -22,6 +13,9 @@
#include "config.h"
#endif
+#include <regex.h>
+#include <signal.h>
+
#include "dht-mem-types.h"
#include "libxlator.h"
#include "syncop.h"
@@ -29,7 +23,7 @@
#ifndef _DHT_H
#define _DHT_H
-#define GF_XATTR_FIX_LAYOUT_KEY "trusted.distribute.fix.layout"
+#define GF_XATTR_FIX_LAYOUT_KEY "distribute.fix.layout"
#define GF_DHT_LOOKUP_UNHASHED_ON 1
#define GF_DHT_LOOKUP_UNHASHED_AUTO 2
#define DHT_PATHINFO_HEADER "DISTRIBUTE:"
@@ -38,7 +32,8 @@
typedef int (*dht_selfheal_dir_cbk_t) (call_frame_t *frame, void *cookie,
xlator_t *this,
- int32_t op_ret, int32_t op_errno);
+ int32_t op_ret, int32_t op_errno,
+ dict_t *xdata);
typedef int (*dht_defrag_cbk_fn_t) (xlator_t *this, call_frame_t *frame,
int ret);
@@ -61,20 +56,38 @@ struct dht_layout {
uint32_t start;
uint32_t stop;
xlator_t *xlator;
- } list[0];
+ } list[];
};
typedef struct dht_layout dht_layout_t;
+struct dht_stat_time {
+ uint32_t atime;
+ uint32_t atime_nsec;
+ uint32_t ctime;
+ uint32_t ctime_nsec;
+ uint32_t mtime;
+ uint32_t mtime_nsec;
+};
+
+typedef struct dht_stat_time dht_stat_time_t;
+
+struct dht_inode_ctx {
+ dht_layout_t *layout;
+ dht_stat_time_t time;
+};
+
+typedef struct dht_inode_ctx dht_inode_ctx_t;
+
typedef enum {
DHT_HASH_TYPE_DM,
+ DHT_HASH_TYPE_DM_USER,
} dht_hashfn_type_t;
/* rebalance related */
struct dht_rebalance_ {
xlator_t *from_subvol;
xlator_t *target_node;
- int32_t wbflags;
off_t offset;
size_t size;
int32_t flags;
@@ -83,6 +96,7 @@ struct dht_rebalance_ {
struct iovec *vector;
struct iatt stbuf;
dht_defrag_cbk_fn_t target_op_fn;
+ dict_t *xdata;
};
struct dht_local {
@@ -129,7 +143,6 @@ struct dht_local {
struct {
uint32_t hole_cnt;
uint32_t overlaps_cnt;
- uint32_t missing;
uint32_t down;
uint32_t misc;
dht_selfheal_dir_cbk_t dir_cbk;
@@ -142,11 +155,16 @@ struct dht_local {
int32_t flags;
mode_t mode;
dev_t rdev;
+ mode_t umask;
/* need for file-info */
- char *pathinfo;
+ char *xattr_val;
char *key;
+ /* which xattr request? */
+ char xsel[256];
+ int32_t alloc_len;
+
char *newpath;
/* gfid related */
@@ -162,7 +180,12 @@ struct dht_local {
glusterfs_fop_t fop;
+ gf_boolean_t linked;
+ xlator_t *link_subvol;
+
struct dht_rebalance_ rebalance;
+ xlator_t *first_up_subvol;
+
};
typedef struct dht_local dht_local_t;
@@ -175,6 +198,59 @@ struct dht_du {
};
typedef struct dht_du dht_du_t;
+enum gf_defrag_type {
+ GF_DEFRAG_CMD_START = 1,
+ GF_DEFRAG_CMD_STOP = 1 + 1,
+ GF_DEFRAG_CMD_STATUS = 1 + 2,
+ GF_DEFRAG_CMD_START_LAYOUT_FIX = 1 + 3,
+ GF_DEFRAG_CMD_START_FORCE = 1 + 4,
+};
+typedef enum gf_defrag_type gf_defrag_type;
+
+enum gf_defrag_status_t {
+ GF_DEFRAG_STATUS_NOT_STARTED,
+ GF_DEFRAG_STATUS_STARTED,
+ GF_DEFRAG_STATUS_STOPPED,
+ GF_DEFRAG_STATUS_COMPLETE,
+ GF_DEFRAG_STATUS_FAILED,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_STOPPED,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_FAILED,
+};
+typedef enum gf_defrag_status_t gf_defrag_status_t;
+
+typedef struct gf_defrag_pattern_list gf_defrag_pattern_list_t;
+
+struct gf_defrag_pattern_list {
+ char path_pattern[256];
+ uint64_t size;
+ gf_defrag_pattern_list_t *next;
+};
+
+struct gf_defrag_info_ {
+ uint64_t total_files;
+ uint64_t total_data;
+ uint64_t num_files_lookedup;
+ uint64_t total_failures;
+ uint64_t skipped;
+ gf_lock_t lock;
+ int cmd;
+ pthread_t th;
+ gf_defrag_status_t defrag_status;
+ struct rpc_clnt *rpc;
+ uint32_t connected;
+ uint32_t is_exiting;
+ pid_t pid;
+ inode_t *root_inode;
+ uuid_t node_uuid;
+ struct timeval start_time;
+ gf_boolean_t stats;
+ gf_defrag_pattern_list_t *defrag_pattern;
+};
+
+typedef struct gf_defrag_info_ gf_defrag_info_t;
+
struct dht_conf {
gf_lock_t subvolume_lock;
int subvolume_cnt;
@@ -186,8 +262,8 @@ struct dht_conf {
gf_boolean_t search_unhashed;
int gen;
dht_du_t *du_stats;
- uint64_t min_free_disk;
- uint32_t min_free_inodes;
+ double min_free_disk;
+ double min_free_inodes;
char disk_unit;
int32_t refresh_interval;
gf_boolean_t unhashed_sticky_bit;
@@ -204,10 +280,28 @@ struct dht_conf {
/* Will be a global flag to control the layout spread count */
uint32_t dir_spread_cnt;
- struct syncenv *env; /* The env pointer to the rebalance synctask */
-
/* to keep track of nodes which are decomissioned */
xlator_t **decommissioned_bricks;
+ int decommission_in_progress;
+ int decommission_subvols_cnt;
+
+ /* defrag related */
+ gf_defrag_info_t *defrag;
+
+ /* Request to filter directory entries in readdir request */
+
+ gf_boolean_t readdir_optimize;
+
+ /* Support regex-based name reinterpretation. */
+ regex_t rsync_regex;
+ gf_boolean_t rsync_regex_valid;
+ regex_t extra_regex;
+ gf_boolean_t extra_regex_valid;
+
+ /* Support variable xattr names. */
+ char *xattr_name;
+ char *link_xattr_name;
+ char *wild_xattr_name;
};
typedef struct dht_conf dht_conf_t;
@@ -222,29 +316,23 @@ struct dht_disk_layout {
};
typedef struct dht_disk_layout dht_disk_layout_t;
+typedef enum {
+ GF_DHT_MIGRATE_DATA,
+ GF_DHT_MIGRATE_DATA_EVEN_IF_LINK_EXISTS,
+ GF_DHT_MIGRATE_HARDLINK,
+ GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS
+} gf_dht_migrate_data_type_t;
#define ENTRY_MISSING(op_ret, op_errno) (op_ret == -1 && op_errno == ENOENT)
-#define is_fs_root(loc) (strcmp (loc->path, "/") == 0)
-
-#define is_revalidate(loc) (inode_ctx_get (loc->inode, this, NULL) == 0)
+#define is_revalidate(loc) (dht_inode_ctx_layout_get (loc->inode, this, NULL) == 0)
#define is_last_call(cnt) (cnt == 0)
#define DHT_MIGRATION_IN_PROGRESS 1
#define DHT_MIGRATION_COMPLETED 2
-#define DHT_LINKFILE_KEY "trusted.glusterfs.dht.linkto"
-#define DHT_LINKFILE_MODE (S_ISVTX)
-
-#define check_is_linkfile(i,s,x) ( \
- ((st_mode_from_ia ((s)->ia_prot, (s)->ia_type) & ~S_IFMT) \
- == DHT_LINKFILE_MODE) && \
- dict_get (x, DHT_LINKFILE_KEY))
-
-#define check_is_linkfile_wo_dict(i,s) ( \
- ((st_mode_from_ia ((s)->ia_prot, (s)->ia_type) & ~S_IFMT) \
- == DHT_LINKFILE_MODE))
+#define check_is_linkfile(i,s,x,n) (IS_DHT_LINKFILE_MODE (s) && dict_get (x, n))
#define IS_DHT_MIGRATION_PHASE2(buf) ( \
IA_ISREG ((buf)->ia_type) && \
@@ -263,6 +351,8 @@ typedef struct dht_disk_layout dht_disk_layout_t;
} \
} while (0)
+#define dht_inode_missing(op_errno) (op_errno == ENOENT || op_errno == ESTALE)
+
#define check_is_dir(i,s,x) (IA_ISDIR(s->ia_type))
#define layout_is_sane(layout) ((layout) && (layout->cnt > 0))
@@ -289,6 +379,25 @@ typedef struct dht_disk_layout dht_disk_layout_t;
dht_local_wipe (__xl, __local); \
} while (0)
+#define DHT_UPDATE_TIME(ctx_sec, ctx_nsec, new_sec, new_nsec, inode, post) do {\
+ int32_t sec = 0; \
+ sec = new_sec; \
+ LOCK (&inode->lock); \
+ { \
+ new_sec = max(new_sec, ctx_sec); \
+ if (sec < new_sec) \
+ new_nsec = ctx_nsec; \
+ if (sec == new_sec) \
+ new_nsec = max (new_nsec, ctx_nsec); \
+ if (post) { \
+ ctx_sec = new_sec; \
+ ctx_nsec = new_nsec; \
+ } \
+ } \
+ UNLOCK (&inode->lock); \
+ } while (0)
+
+#define is_greater_time(a, an, b, bn) (((a) < (b)) || (((a) == (b)) && ((an) < (bn))))
dht_layout_t *dht_layout_new (xlator_t *this, int cnt);
dht_layout_t *dht_layout_get (xlator_t *this, inode_t *inode);
dht_layout_t *dht_layout_for_subvol (xlator_t *this, xlator_t *subvol);
@@ -298,7 +407,7 @@ int dht_layout_normalize (xlator_t *this, l
int dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout,
uint32_t *holes_p, uint32_t *overlaps_p,
uint32_t *missing_p, uint32_t *down_p,
- uint32_t *misc_p);
+ uint32_t *misc_p, uint32_t *no_space_p);
int dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout,
xlator_t *subvol, loc_t *loc, dict_t *xattr);
@@ -314,7 +423,7 @@ int dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
int dht_disk_layout_extract (xlator_t *this, dht_layout_t *layout,
int pos, int32_t **disk_layout_p);
int dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout,
- int pos, void *disk_layout_raw);
+ int pos, void *disk_layout_raw, int disk_layout_len);
int dht_frame_return (call_frame_t *frame);
@@ -332,12 +441,14 @@ int dht_iatt_merge (xlator_t *this, struct iatt *to, struct iatt
xlator_t *dht_subvol_get_hashed (xlator_t *this, loc_t *loc);
xlator_t *dht_subvol_get_cached (xlator_t *this, inode_t *inode);
xlator_t *dht_subvol_next (xlator_t *this, xlator_t *prev);
+xlator_t *dht_subvol_next_available (xlator_t *this, xlator_t *prev);
int dht_subvol_cnt (xlator_t *this, xlator_t *subvol);
-int dht_hash_compute (int type, const char *name, uint32_t *hash_p);
+int dht_hash_compute (xlator_t *this, int type, const char *name, uint32_t *hash_p);
int dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
- xlator_t *tovol, xlator_t *fromvol, loc_t *loc);
+ xlator_t *this, xlator_t *tovol,
+ xlator_t *fromvol, loc_t *loc);
int dht_lookup_directory (call_frame_t *frame, xlator_t *this, loc_t *loc);
int dht_lookup_everywhere (call_frame_t *frame, xlator_t *this, loc_t *loc);
int
@@ -352,17 +463,15 @@ dht_selfheal_restore (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
int
dht_layout_sort_volname (dht_layout_t *layout);
-int dht_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc);
-
int dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc);
gf_boolean_t dht_is_subvol_filled (xlator_t *this, xlator_t *subvol);
-xlator_t *dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol);
+xlator_t *dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol,
+ dht_local_t *layout);
int dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx);
int dht_layout_preset (xlator_t *this, xlator_t *subvol, inode_t *inode);
-int dht_layout_set (xlator_t *this, inode_t *inode, dht_layout_t *layout);
+int dht_layout_set (xlator_t *this, inode_t *inode, dht_layout_t *layout);;
void dht_layout_unref (xlator_t *this, dht_layout_t *layout);
dht_layout_t *dht_layout_ref (xlator_t *this, dht_layout_t *layout);
xlator_t *dht_first_up_subvol (xlator_t *this);
@@ -377,7 +486,8 @@ int dht_rename_cleanup (call_frame_t *frame)
int dht_rename_links_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent);
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata);
int dht_fix_directory_layout (call_frame_t *frame,
dht_selfheal_dir_cbk_t dir_cbk,
@@ -400,73 +510,73 @@ int32_t dht_lookup (call_frame_t *frame,
int32_t dht_stat (call_frame_t *frame,
xlator_t *this,
- loc_t *loc);
+ loc_t *loc, dict_t *xdata);
int32_t dht_fstat (call_frame_t *frame,
xlator_t *this,
- fd_t *fd);
+ fd_t *fd, dict_t *xdata);
int32_t dht_truncate (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
- off_t offset);
+ off_t offset, dict_t *xdata);
int32_t dht_ftruncate (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
- off_t offset);
+ off_t offset, dict_t *xdata);
int32_t dht_access (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
- int32_t mask);
+ int32_t mask, dict_t *xdata);
int32_t dht_readlink (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
- size_t size);
+ size_t size, dict_t *xdata);
-int32_t dht_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t rdev, dict_t *params);
+int32_t dht_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata);
int32_t dht_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dict_t *params);
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata);
int32_t dht_unlink (call_frame_t *frame,
xlator_t *this,
- loc_t *loc);
+ loc_t *loc, int xflag, dict_t *xdata);
int32_t dht_rmdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int flags);
+ loc_t *loc, int flags, dict_t *xdata);
int32_t dht_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *loc, dict_t *params);
+ const char *linkpath, loc_t *loc, mode_t umask,
+ dict_t *xdata);
int32_t dht_rename (call_frame_t *frame,
xlator_t *this,
loc_t *oldloc,
- loc_t *newloc);
+ loc_t *newloc, dict_t *xdata);
int32_t dht_link (call_frame_t *frame,
xlator_t *this,
loc_t *oldloc,
- loc_t *newloc);
+ loc_t *newloc, dict_t *xdata);
int32_t dht_create (call_frame_t *frame, xlator_t *this,
loc_t *loc, int32_t flags, mode_t mode,
- fd_t *fd, dict_t *params);
+ mode_t umask, fd_t *fd, dict_t *params);
int32_t dht_open (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
- int32_t flags, fd_t *fd,
- int32_t wbflags);
+ int32_t flags, fd_t *fd, dict_t *xdata);
int32_t dht_readv (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
size_t size,
- off_t offset);
+ off_t offset, uint32_t flags, dict_t *xdata);
int32_t dht_writev (call_frame_t *frame,
xlator_t *this,
@@ -474,107 +584,121 @@ int32_t dht_writev (call_frame_t *frame,
struct iovec *vector,
int32_t count,
off_t offset,
- struct iobref *iobref);
+ uint32_t flags,
+ struct iobref *iobref, dict_t *xdata);
int32_t dht_flush (call_frame_t *frame,
xlator_t *this,
- fd_t *fd);
+ fd_t *fd, dict_t *xdata);
int32_t dht_fsync (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
- int32_t datasync);
+ int32_t datasync, dict_t *xdata);
int32_t dht_opendir (call_frame_t *frame,
xlator_t *this,
- loc_t *loc, fd_t *fd);
+ loc_t *loc, fd_t *fd, dict_t *xdata);
int32_t dht_fsyncdir (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
- int32_t datasync);
+ int32_t datasync, dict_t *xdata);
int32_t dht_statfs (call_frame_t *frame,
xlator_t *this,
- loc_t *loc);
+ loc_t *loc, dict_t *xdata);
int32_t dht_setxattr (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
dict_t *dict,
- int32_t flags);
+ int32_t flags, dict_t *xdata);
int32_t dht_getxattr (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
- const char *name);
+ const char *name, dict_t *xdata);
int32_t dht_fsetxattr (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
dict_t *dict,
- int32_t flags);
+ int32_t flags, dict_t *xdata);
int32_t dht_fgetxattr (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
- const char *name);
+ const char *name, dict_t *xdata);
int32_t dht_removexattr (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
- const char *name);
+ const char *name, dict_t *xdata);
+int32_t dht_fremovexattr (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ const char *name, dict_t *xdata);
int32_t dht_lk (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
int32_t cmd,
- struct gf_flock *flock);
+ struct gf_flock *flock, dict_t *xdata);
int32_t dht_inodelk (call_frame_t *frame, xlator_t *this,
const char *volume, loc_t *loc, int32_t cmd,
- struct gf_flock *flock);
+ struct gf_flock *flock, dict_t *xdata);
int32_t dht_finodelk (call_frame_t *frame, xlator_t *this,
const char *volume, fd_t *fd, int32_t cmd,
- struct gf_flock *flock);
+ struct gf_flock *flock, dict_t *xdata);
int32_t dht_entrylk (call_frame_t *frame, xlator_t *this,
const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type);
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
int32_t dht_fentrylk (call_frame_t *frame, xlator_t *this,
const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type);
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
int32_t dht_readdir (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
- size_t size, off_t off);
+ size_t size, off_t off, dict_t *xdata);
int32_t dht_readdirp (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
- size_t size, off_t off);
+ size_t size, off_t off, dict_t *dict);
int32_t dht_xattrop (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
gf_xattrop_flags_t flags,
- dict_t *dict);
+ dict_t *dict, dict_t *xdata);
int32_t dht_fxattrop (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
gf_xattrop_flags_t flags,
- dict_t *dict);
+ dict_t *dict, dict_t *xdata);
int32_t dht_forget (xlator_t *this, inode_t *inode);
int32_t dht_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid);
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
int32_t dht_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid);
-
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
+int32_t dht_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t mode, off_t offset, size_t len, dict_t *xdata);
+int32_t dht_discard(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, size_t len, dict_t *xdata);
+int32_t dht_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, off_t len, dict_t *xdata);
+
+int32_t dht_init (xlator_t *this);
+void dht_fini (xlator_t *this);
+int dht_reconfigure (xlator_t *this, dict_t *options);
int32_t dht_notify (xlator_t *this, int32_t event, void *data, ...);
/* definitions for nufa/switch */
@@ -597,12 +721,66 @@ int dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno,
fd_t *fd, inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent);
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata);
int dht_newfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno,
inode_t *inode, struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent);
+ struct iatt *postparent, dict_t *xdata);
+
+int
+gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict);
+
+int
+gf_defrag_stop (gf_defrag_info_t *defrag, gf_defrag_status_t status,
+ dict_t *output);
+void*
+gf_defrag_start (void *this);
+int32_t
+gf_defrag_handle_hardlink (xlator_t *this, loc_t *loc, dict_t *xattrs,
+ struct iatt *stbuf);
+int
+dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
+ int flag);
+int
+dht_inode_ctx_layout_get (inode_t *inode, xlator_t *this,
+ dht_layout_t **layout_int);
+int
+dht_inode_ctx_layout_set (inode_t *inode, xlator_t *this,
+ dht_layout_t* layout_int);
+int
+dht_inode_ctx_time_update (inode_t *inode, xlator_t *this, struct iatt *stat,
+ int32_t update_ctx);
+
+int dht_inode_ctx_get (inode_t *inode, xlator_t *this, dht_inode_ctx_t **ctx);
+int dht_inode_ctx_set (inode_t *inode, xlator_t *this, dht_inode_ctx_t *ctx);
+int
+dht_dir_attr_heal (void *data);
+int
+dht_dir_attr_heal_done (int ret, call_frame_t *sync_frame, void *data);
+int
+dht_dir_has_layout (dict_t *xattr, char *name);
+gf_boolean_t
+dht_is_subvol_in_layout (dht_layout_t *layout, xlator_t *xlator);
+xlator_t *
+dht_subvol_with_free_space_inodes (xlator_t *this, xlator_t *subvol,
+ dht_layout_t *layout);
+xlator_t *
+dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol,
+ dht_layout_t *layout);
+int
+dht_linkfile_attr_heal (call_frame_t *frame, xlator_t *this);
+
+void
+dht_layout_dump (dht_layout_t *layout, const char *prefix);
+int32_t
+dht_priv_dump (xlator_t *this);
+int32_t
+dht_inodectx_dump (xlator_t *this, inode_t *inode);
+
+int
+dht_inode_ctx_get1 (xlator_t *this, inode_t *inode, xlator_t **subvol);
#endif/* _DHT_H */
diff --git a/xlators/cluster/dht/src/dht-diskusage.c b/xlators/cluster/dht/src/dht-diskusage.c
index 5453e3b10..fe3955ecb 100644
--- a/xlators/cluster/dht/src/dht-diskusage.c
+++ b/xlators/cluster/dht/src/dht-diskusage.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -35,7 +26,8 @@
int
dht_du_info_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct statvfs *statvfs)
+ int op_ret, int op_errno, struct statvfs *statvfs,
+ dict_t *xdata)
{
dht_conf_t *conf = NULL;
call_frame_t *prev = NULL;
@@ -105,6 +97,7 @@ dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx)
call_frame_t *statfs_frame = NULL;
dht_local_t *statfs_local = NULL;
call_pool_t *pool = NULL;
+ loc_t tmp_loc = {0,};
conf = this->private;
pool = this->ctx->pool;
@@ -121,15 +114,14 @@ dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx)
goto err;
}
- loc_t tmp_loc = { .inode = NULL,
- .path = "/",
- };
+ /* make it root gfid, should be enough to get the proper info back */
+ tmp_loc.gfid[15] = 1;
statfs_local->call_cnt = 1;
STACK_WIND (statfs_frame, dht_du_info_cbk,
conf->subvolumes[subvol_idx],
conf->subvolumes[subvol_idx]->fops->statfs,
- &tmp_loc);
+ &tmp_loc, NULL);
return 0;
err:
@@ -142,15 +134,21 @@ err:
int
dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc)
{
- int i = 0;
+ int i = 0;
dht_conf_t *conf = NULL;
call_frame_t *statfs_frame = NULL;
dht_local_t *statfs_local = NULL;
- struct timeval tv = {0,};
+ struct timeval tv = {0,};
+ loc_t tmp_loc = {0,};
conf = this->private;
gettimeofday (&tv, NULL);
+
+ /* make it root gfid, should be enough to get the proper
+ info back */
+ tmp_loc.gfid[15] = 1;
+
if (tv.tv_sec > (conf->refresh_interval
+ conf->last_stat_fetch.tv_sec)) {
@@ -166,16 +164,12 @@ dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc)
goto err;
}
- loc_t tmp_loc = { .inode = NULL,
- .path = "/",
- };
-
statfs_local->call_cnt = conf->subvolume_cnt;
for (i = 0; i < conf->subvolume_cnt; i++) {
STACK_WIND (statfs_frame, dht_du_info_cbk,
conf->subvolumes[i],
conf->subvolumes[i]->fops->statfs,
- &tmp_loc);
+ &tmp_loc, NULL);
}
conf->last_stat_fetch.tv_sec = tv.tv_sec;
@@ -254,50 +248,167 @@ dht_is_subvol_filled (xlator_t *this, xlator_t *subvol)
return is_subvol_filled;
}
+
+/*Get the best subvolume to create the file in*/
xlator_t *
-dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol)
+dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol,
+ dht_local_t *local)
{
- int i = 0;
- double max = 0;
- double max_inodes = 0;
xlator_t *avail_subvol = NULL;
dht_conf_t *conf = NULL;
+ dht_layout_t *layout = NULL;
+ loc_t *loc = NULL;
conf = this->private;
-
- LOCK (&conf->subvolume_lock);
+ if (!local)
+ goto out;
+ loc = &local->loc;
+ if (!local->layout) {
+ layout = dht_layout_get (this, loc->parent);
+
+ if (!layout) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "layout missing path=%s parent=%s",
+ loc->path, uuid_utoa (loc->parent->gfid));
+ goto out;
+ }
+ } else {
+ layout = dht_layout_ref (this, local->layout);
+ }
+
+ LOCK (&conf->subvolume_lock);
{
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->disk_unit == 'p') {
- if ((conf->du_stats[i].avail_percent > max)
- && (conf->du_stats[i].avail_inodes > max_inodes)) {
- max = conf->du_stats[i].avail_percent;
- max_inodes = conf->du_stats[i].avail_inodes;
- avail_subvol = conf->subvolumes[i];
- }
- } else {
- if ((conf->du_stats[i].avail_space > max)
- && (conf->du_stats[i].avail_inodes > max_inodes)) {
- max = conf->du_stats[i].avail_space;
- max_inodes = conf->du_stats[i].avail_inodes;
- avail_subvol = conf->subvolumes[i];
- }
+ avail_subvol = dht_subvol_with_free_space_inodes(this, subvol,
+ layout);
+ if(!avail_subvol)
+ {
+ avail_subvol = dht_subvol_maxspace_nonzeroinode(this,
+ subvol,
+ layout);
+ }
- }
- }
}
UNLOCK (&conf->subvolume_lock);
-
+out:
if (!avail_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume has enough free space and inodes to create");
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "no subvolume has enough free space and/or inodes\
+ to create");
+ avail_subvol = subvol;
}
- if ((max < conf->min_free_disk) && (max_inodes < conf->min_free_inodes))
- avail_subvol = subvol;
+ if (layout)
+ dht_layout_unref (this, layout);
+ return avail_subvol;
+}
- if (!avail_subvol)
- avail_subvol = subvol;
+static inline
+int32_t dht_subvol_has_err (xlator_t *this, dht_layout_t *layout)
+{
+ int ret = -1;
+ int i = 0;
+
+ if (!this || !layout)
+ goto out;
+
+ /* check if subvol has layout errors, before selecting it */
+ for (i = 0; i < layout->cnt; i++) {
+ if (!strcmp (layout->list[i].xlator->name, this->name) &&
+ (layout->list[i].err != 0)) {
+ ret = -1;
+ goto out;
+ }
+ }
+ ret = 0;
+out:
+ return ret;
+}
- return avail_subvol;
+/*Get subvolume which has both space and inodes more than the min criteria*/
+xlator_t *
+dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol,
+ dht_layout_t *layout)
+{
+ int i = 0;
+ double max = 0;
+ double max_inodes = 0;
+ int ignore_subvol = 0;
+
+ xlator_t *avail_subvol = NULL;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ for(i=0; i < conf->subvolume_cnt; i++) {
+ /* check if subvol has layout errors, before selecting it */
+ ignore_subvol = dht_subvol_has_err (conf->subvolumes[i],
+ layout);
+ if (ignore_subvol)
+ continue;
+
+ if ((conf->disk_unit == 'p') &&
+ (conf->du_stats[i].avail_percent > conf->min_free_disk) &&
+ (conf->du_stats[i].avail_inodes > conf->min_free_inodes)) {
+ if ((conf->du_stats[i].avail_inodes > max_inodes) ||
+ (conf->du_stats[i].avail_percent > max)) {
+ max = conf->du_stats[i].avail_percent;
+ max_inodes = conf->du_stats[i].avail_inodes;
+ avail_subvol = conf->subvolumes[i];
+ }
+ }
+
+ if ((conf->disk_unit != 'p') &&
+ (conf->du_stats[i].avail_space > conf->min_free_disk) &&
+ (conf->du_stats[i].avail_inodes > conf->min_free_inodes)) {
+ if ((conf->du_stats[i].avail_inodes > max_inodes) ||
+ (conf->du_stats[i].avail_space > max)) {
+ max = conf->du_stats[i].avail_space;
+ max_inodes = conf->du_stats[i].avail_inodes;
+ avail_subvol = conf->subvolumes[i];
+ }
+ }
+ }
+
+ return avail_subvol;
+}
+
+
+/* Get subvol which has atleast one inode and maximum space */
+xlator_t *
+dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol,
+ dht_layout_t *layout)
+{
+ int i = 0;
+ double max = 0;
+ int ignore_subvol = 0;
+
+ xlator_t *avail_subvol = NULL;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ /* check if subvol has layout errors, before selecting it */
+ ignore_subvol = dht_subvol_has_err (conf->subvolumes[i],
+ layout);
+ if (ignore_subvol)
+ continue;
+
+ if (conf->disk_unit == 'p') {
+ if ((conf->du_stats[i].avail_percent > max)
+ && (conf->du_stats[i].avail_inodes > 0 )) {
+ max = conf->du_stats[i].avail_percent;
+ avail_subvol = conf->subvolumes[i];
+ }
+ } else {
+ if ((conf->du_stats[i].avail_space > max)
+ && (conf->du_stats[i].avail_inodes > 0)) {
+ max = conf->du_stats[i].avail_space;
+ avail_subvol = conf->subvolumes[i];
+ }
+ }
+ }
+
+ return avail_subvol;
}
diff --git a/xlators/cluster/dht/src/dht-hashfn.c b/xlators/cluster/dht/src/dht-hashfn.c
index c8ae74172..656cf23a0 100644
--- a/xlators/cluster/dht/src/dht-hashfn.c
+++ b/xlators/cluster/dht/src/dht-hashfn.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -37,6 +28,7 @@ dht_hash_compute_internal (int type, const char *name, uint32_t *hash_p)
switch (type) {
case DHT_HASH_TYPE_DM:
+ case DHT_HASH_TYPE_DM_USER:
hash = gf_dm_hashfn (name, strlen (name));
break;
default:
@@ -52,30 +44,68 @@ dht_hash_compute_internal (int type, const char *name, uint32_t *hash_p)
}
-#define MAKE_RSYNC_FRIENDLY_NAME(rsync_frndly_name, name) do { \
- rsync_frndly_name = (char *) name; \
- if (name[0] == '.') { \
- char *dot = 0; \
- int namelen = 0; \
- \
- dot = strrchr (name, '.'); \
- if (dot && dot > (name + 1) && *(dot + 1)) { \
- namelen = (dot - name); \
- rsync_frndly_name = alloca (namelen); \
- strncpy (rsync_frndly_name, name + 1, \
- namelen); \
- rsync_frndly_name[namelen - 1] = 0; \
- } \
- } \
- } while (0);
+static inline
+gf_boolean_t
+dht_munge_name (const char *original, char *modified, size_t len, regex_t *re)
+{
+ regmatch_t matches[2];
+ size_t new_len;
+
+ if (regexec(re,original,2,matches,0) != REG_NOMATCH) {
+ if (matches[1].rm_so != -1) {
+ new_len = matches[1].rm_eo - matches[1].rm_so;
+ /* Equal would fail due to the NUL at the end. */
+ if (new_len < len) {
+ memcpy (modified,original+matches[1].rm_so,
+ new_len);
+ modified[new_len] = '\0';
+ return _gf_true;
+ }
+ }
+ }
+ /* This is guaranteed safe because of how the dest was allocated. */
+ strcpy(modified,original);
+ return _gf_false;
+}
int
-dht_hash_compute (int type, const char *name, uint32_t *hash_p)
+dht_hash_compute (xlator_t *this, int type, const char *name, uint32_t *hash_p)
{
- char *rsync_friendly_name = NULL;
+ char *rsync_friendly_name = NULL;
+ dht_conf_t *priv = this->private;
+ size_t len = 0;
+ gf_boolean_t munged = _gf_false;
+
+ /*
+ * It wouldn't be safe to use alloca in an inline function that doesn't
+ * actually get inlined, and it wouldn't be efficient to do a real
+ * allocation, so we use alloca here (if needed) and pass that to the
+ * inline.
+ */
+
+ if (priv->extra_regex_valid) {
+ len = strlen(name) + 1;
+ rsync_friendly_name = alloca(len);
+ munged = dht_munge_name (name, rsync_friendly_name, len,
+ &priv->extra_regex);
+ }
+
+ if (!munged && priv->rsync_regex_valid) {
+ len = strlen(name) + 1;
+ rsync_friendly_name = alloca(len);
+ gf_log (this->name, GF_LOG_TRACE, "trying regex for %s", name);
+ munged = dht_munge_name (name, rsync_friendly_name, len,
+ &priv->rsync_regex);
+ if (munged) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "munged down to %s", rsync_friendly_name);
+ }
+ }
- MAKE_RSYNC_FRIENDLY_NAME (rsync_friendly_name, name);
+ if (!munged) {
+ rsync_friendly_name = (char *)name;
+ }
return dht_hash_compute_internal (type, rsync_friendly_name, hash_p);
}
diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c
index 01d11ee68..18a501f04 100644
--- a/xlators/cluster/dht/src/dht-helper.c
+++ b/xlators/cluster/dht/src/dht-helper.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -27,6 +18,28 @@
#include "xlator.h"
#include "dht-common.h"
+static inline int
+dht_inode_ctx_set1 (xlator_t *this, inode_t *inode, xlator_t *subvol)
+{
+ uint64_t tmp_subvol = 0;
+
+ tmp_subvol = (long)subvol;
+ return inode_ctx_set1 (inode, this, &tmp_subvol);
+}
+
+int
+dht_inode_ctx_get1 (xlator_t *this, inode_t *inode, xlator_t **subvol)
+{
+ int ret = -1;
+ uint64_t tmp_subvol = 0;
+
+ ret = inode_ctx_get1 (inode, this, &tmp_subvol);
+ if (tmp_subvol && subvol)
+ *subvol = (xlator_t *)tmp_subvol;
+
+ return ret;
+}
+
int
dht_frame_return (call_frame_t *frame)
@@ -49,6 +62,43 @@ dht_frame_return (call_frame_t *frame)
}
+static uint64_t
+dht_bits_for (uint64_t num)
+{
+ uint64_t bits = 0, ctrl = 1;
+
+ while (ctrl < num) {
+ ctrl *= 2;
+ bits ++;
+ }
+
+ return bits;
+}
+
+/*
+ * A slightly "updated" version of the algorithm described in the commit log
+ * is used here.
+ *
+ * The only enhancement is that:
+ *
+ * - The number of bits used by the backend filesystem for HUGE d_off which
+ * is described as 63, and
+ * - The number of bits used by the d_off presented by the transformation
+ * upwards which is described as 64, are both made "configurable."
+ */
+
+
+#define BACKEND_D_OFF_BITS 63
+#define PRESENT_D_OFF_BITS 63
+
+#define ONE 1ULL
+#define MASK (~0ULL)
+#define PRESENT_MASK (MASK >> (64 - PRESENT_D_OFF_BITS))
+#define BACKEND_MASK (MASK >> (64 - BACKEND_D_OFF_BITS))
+
+#define TOP_BIT (ONE << (PRESENT_D_OFF_BITS - 1))
+#define SHIFT_BITS (max (0, (BACKEND_D_OFF_BITS - PRESENT_D_OFF_BITS + 1)))
+
int
dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y_p)
{
@@ -56,6 +106,9 @@ dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y_p)
int cnt = 0;
int max = 0;
uint64_t y = 0;
+ uint64_t hi_mask = 0;
+ uint64_t off_mask = 0;
+ int max_bits = 0;
if (x == ((uint64_t) -1)) {
y = (uint64_t) -1;
@@ -69,7 +122,23 @@ dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y_p)
max = conf->subvolume_cnt;
cnt = dht_subvol_cnt (this, subvol);
- y = ((x * max) + cnt);
+ if (max == 1) {
+ y = x;
+ goto out;
+ }
+
+ max_bits = dht_bits_for (max);
+
+ hi_mask = ~(PRESENT_MASK >> (max_bits + 1));
+
+ if (x & hi_mask) {
+ /* HUGE d_off */
+ off_mask = MASK << max_bits;
+ y = TOP_BIT | ((x >> SHIFT_BITS) & off_mask) | cnt;
+ } else {
+ /* small d_off */
+ y = ((x * max) + cnt);
+ }
out:
if (y_p)
@@ -89,7 +158,7 @@ dht_filter_loc_subvol_key (xlator_t *this, loc_t *loc, loc_t *new_loc,
int ret = 0; /* not found */
/* Why do other tasks if first required 'char' itself is not there */
- if (loc->name && !strchr (loc->name, '@'))
+ if (!new_loc || !loc || !loc->name || !strchr (loc->name, '@'))
goto out;
trav = this->children;
@@ -129,10 +198,8 @@ dht_filter_loc_subvol_key (xlator_t *this, loc_t *loc, loc_t *new_loc,
out:
if (!ret) {
/* !success */
- if (new_path)
- GF_FREE (new_path);
- if (new_name)
- GF_FREE (new_name);
+ GF_FREE (new_path);
+ GF_FREE (new_name);
}
return ret;
}
@@ -146,16 +213,38 @@ dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol_p,
int max = 0;
uint64_t x = 0;
xlator_t *subvol = 0;
+ int max_bits = 0;
+ uint64_t off_mask = 0;
+ uint64_t host_mask = 0;
if (!this->private)
- goto out;
+ return -1;
conf = this->private;
max = conf->subvolume_cnt;
- cnt = y % max;
- x = y / max;
+ if (max == 1) {
+ x = y;
+ cnt = 0;
+ goto out;
+ }
+
+ if (y & TOP_BIT) {
+ /* HUGE d_off */
+ max_bits = dht_bits_for (max);
+ off_mask = (MASK << max_bits);
+ host_mask = ~(off_mask);
+
+ x = ((y & ~TOP_BIT) & off_mask) << SHIFT_BITS;
+
+ cnt = y & host_mask;
+ } else {
+ /* small d_off */
+ cnt = y % max;
+ x = y / max;
+ }
+out:
subvol = conf->subvolumes[cnt];
if (subvol_p)
@@ -164,7 +253,6 @@ dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol_p,
if (x_p)
*x_p = x;
-out:
return 0;
}
@@ -215,21 +303,16 @@ dht_local_wipe (xlator_t *this, dht_local_t *local)
local->selfheal.layout = NULL;
}
- if (local->newpath) {
- GF_FREE (local->newpath);
- }
+ GF_FREE (local->newpath);
- if (local->key) {
- GF_FREE (local->key);
- }
+ GF_FREE (local->key);
- if (local->rebalance.vector)
- GF_FREE (local->rebalance.vector);
+ GF_FREE (local->rebalance.vector);
if (local->rebalance.iobref)
iobref_unref (local->rebalance.iobref);
- GF_FREE (local);
+ mem_put (local);
}
@@ -240,8 +323,7 @@ dht_local_init (call_frame_t *frame, loc_t *loc, fd_t *fd, glusterfs_fop_t fop)
inode_t *inode = NULL;
int ret = 0;
- /* TODO: use mem-pool */
- local = GF_CALLOC (1, sizeof (*local), gf_dht_mt_dht_local_t);
+ local = mem_get0 (THIS->local_pool);
if (!local)
goto out;
@@ -274,26 +356,12 @@ dht_local_init (call_frame_t *frame, loc_t *loc, fd_t *fd, glusterfs_fop_t fop)
out:
if (ret) {
if (local)
- GF_FREE (local);
+ mem_put (local);
local = NULL;
}
return local;
}
-
-char *
-basestr (const char *str)
-{
- char *basestr = NULL;
-
- basestr = strrchr (str, '/');
- if (basestr)
- basestr ++;
-
- return basestr;
-}
-
-
xlator_t *
dht_first_up_subvol (xlator_t *this)
{
@@ -358,11 +426,17 @@ dht_subvol_get_hashed (xlator_t *this, loc_t *loc)
dht_layout_t *layout = NULL;
xlator_t *subvol = NULL;
- if (is_fs_root (loc)) {
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+
+ if (__is_root_gfid (loc->gfid)) {
subvol = dht_first_up_subvol (this);
goto out;
}
+ GF_VALIDATE_OR_GOTO (this->name, loc->parent, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc->name, out);
+
layout = dht_layout_get (this, loc->parent);
if (!layout) {
@@ -396,6 +470,8 @@ dht_subvol_get_cached (xlator_t *this, inode_t *inode)
dht_layout_t *layout = NULL;
xlator_t *subvol = NULL;
+ GF_VALIDATE_OR_GOTO (this->name, this, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
layout = dht_layout_get (this, inode);
@@ -437,7 +513,36 @@ out:
return next;
}
+/* This func wraps around, if prev is actually the last subvol.
+ */
+xlator_t *
+dht_subvol_next_available (xlator_t *this, xlator_t *prev)
+{
+ dht_conf_t *conf = NULL;
+ int i = 0;
+ xlator_t *next = NULL;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == prev) {
+ /* if prev is last in conf->subvolumes, then wrap
+ * around.
+ */
+ if ((i + 1) < conf->subvolume_cnt) {
+ next = conf->subvolumes[i + 1];
+ } else {
+ next = conf->subvolumes[0];
+ }
+ break;
+ }
+ }
+out:
+ return next;
+}
int
dht_subvol_cnt (xlator_t *this, xlator_t *subvol)
{
@@ -466,6 +571,15 @@ out:
(a) = (b); \
} while (0)
+
+#define set_if_greater_time(a, an, b, bn) do { \
+ if (((a) < (b)) || (((a) == (b)) && ((an) < (bn)))){ \
+ (a) = (b); \
+ (an) = (bn); \
+ } \
+ } while (0) \
+
+
int
dht_iatt_merge (xlator_t *this, struct iatt *to,
struct iatt *from, xlator_t *subvol)
@@ -489,9 +603,12 @@ dht_iatt_merge (xlator_t *this, struct iatt *to,
set_if_greater (to->ia_uid, from->ia_uid);
set_if_greater (to->ia_gid, from->ia_gid);
- set_if_greater (to->ia_atime, from->ia_atime);
- set_if_greater (to->ia_mtime, from->ia_mtime);
- set_if_greater (to->ia_ctime, from->ia_ctime);
+ set_if_greater_time(to->ia_atime, to->ia_atime_nsec,
+ from->ia_atime, from->ia_atime_nsec);
+ set_if_greater_time (to->ia_mtime, to->ia_mtime_nsec,
+ from->ia_mtime, from->ia_mtime_nsec);
+ set_if_greater_time (to->ia_ctime, to->ia_ctime_nsec,
+ from->ia_ctime, from->ia_ctime_nsec);
return 0;
}
@@ -617,36 +734,61 @@ dht_migration_complete_check_task (void *data)
call_frame_t *frame = NULL;
loc_t tmp_loc = {0,};
char *path = NULL;
+ dht_conf_t *conf = NULL;
+ inode_t *inode = NULL;
+ fd_t *iter_fd = NULL;
+ uint64_t tmp_subvol = 0;
+ int open_failed = 0;
this = THIS;
frame = data;
local = frame->local;
+ conf = this->private;
src_node = local->cached_subvol;
- /* getxattr on cached_subvol for 'linkto' value */
- if (!local->loc.inode)
+ if (!local->loc.inode && !local->fd) {
+ local->op_errno = EINVAL;
+ goto out;
+ }
+
+ inode = (!local->fd) ? local->loc.inode : local->fd->inode;
+
+ /* getxattr on cached_subvol for 'linkto' value. Do path based getxattr
+ * as root:root. If a fd is already open, access check wont be done*/
+
+ if (!local->loc.inode) {
ret = syncop_fgetxattr (src_node, local->fd, &dict,
- DHT_LINKFILE_KEY);
- else
+ conf->link_xattr_name);
+ } else {
+ SYNCTASK_SETID (0, 0);
ret = syncop_getxattr (src_node, &local->loc, &dict,
- DHT_LINKFILE_KEY);
+ conf->link_xattr_name);
+ SYNCTASK_SETID (frame->root->uid, frame->root->gid);
+ }
if (!ret)
dst_node = dht_linkfile_subvol (this, NULL, NULL, dict);
if (ret) {
- if ((errno != ENOENT) || (!local->loc.inode)) {
+ if (!dht_inode_missing(-ret) || (!local->loc.inode)) {
+ local->op_errno = -ret;
gf_log (this->name, GF_LOG_ERROR,
"%s: failed to get the 'linkto' xattr %s",
- local->loc.path, strerror (errno));
+ local->loc.path, strerror (-ret));
+ ret = -1;
goto out;
}
+
/* Need to do lookup on hashed subvol, then get the file */
ret = syncop_lookup (this, &local->loc, NULL, &stbuf, NULL,
NULL);
- if (ret)
+ if (ret) {
+ local->op_errno = -ret;
+ ret = -1;
goto out;
+ }
+
dst_node = dht_subvol_get_cached (this, local->loc.inode);
}
@@ -655,17 +797,21 @@ dht_migration_complete_check_task (void *data)
"%s: failed to get the destination node",
local->loc.path);
ret = -1;
+ local->op_errno = EINVAL;
goto out;
}
/* lookup on dst */
if (local->loc.inode) {
- ret = syncop_lookup (dst_node, &local->loc, NULL, &stbuf, NULL, NULL);
+ ret = syncop_lookup (dst_node, &local->loc, NULL, &stbuf, NULL,
+ NULL);
if (ret) {
gf_log (this->name, GF_LOG_ERROR,
"%s: failed to lookup the file on %s",
local->loc.path, dst_node->name);
+ local->op_errno = -ret;
+ ret = -1;
goto out;
}
@@ -674,6 +820,7 @@ dht_migration_complete_check_task (void *data)
"%s: gfid different on the target file on %s",
local->loc.path, dst_node->name);
ret = -1;
+ local->op_errno = EIO;
goto out;
}
}
@@ -681,15 +828,13 @@ dht_migration_complete_check_task (void *data)
/* update inode ctx (the layout) */
dht_layout_unref (this, local->layout);
- if (!local->loc.inode)
- ret = dht_layout_preset (this, dst_node, local->fd->inode);
- else
- ret = dht_layout_preset (this, dst_node, local->loc.inode);
+ ret = dht_layout_preset (this, dst_node, inode);
if (ret != 0) {
gf_log (this->name, GF_LOG_DEBUG,
"%s: could not set preset layout for subvol %s",
local->loc.path, dst_node->name);
ret = -1;
+ local->op_errno = EINVAL;
goto out;
}
@@ -699,60 +844,69 @@ dht_migration_complete_check_task (void *data)
"%s: no pre-set layout for subvolume %s",
local->loc.path, dst_node ? dst_node->name : "<nil>");
ret = -1;
+ local->op_errno = EINVAL;
goto out;
}
- if (!local->loc.inode)
- ret = dht_layout_set (this, local->fd->inode, layout);
- else
- ret = dht_layout_set (this, local->loc.inode, layout);
+ ret = dht_layout_set (this, inode, layout);
if (ret) {
gf_log (this->name, GF_LOG_ERROR,
"%s: failed to set the new layout",
local->loc.path);
+ local->op_errno = EINVAL;
goto out;
}
local->cached_subvol = dst_node;
ret = 0;
- if (!local->fd)
+ /* once we detect the migration complete, the inode-ctx2 is no more
+ required.. delete the ctx and also, it means, open() already
+ done on all the fd of inode */
+ ret = inode_ctx_reset1 (inode, this, &tmp_subvol);
+ if (tmp_subvol)
goto out;
- /* once we detect the migration complete, the fd-ctx is no more
- required.. delete the ctx, and do one extra 'fd_unref' for open fd */
- ret = fd_ctx_del (local->fd, this, NULL);
- if (!ret) {
- fd_unref (local->fd);
- ret = 0;
+ if (list_empty (&inode->fd_list))
goto out;
- }
- /* if 'local->fd' (ie, fd based operation), send a 'open()' on
- destination if not already done */
- if (local->loc.inode) {
- ret = syncop_open (dst_node, &local->loc,
- local->fd->flags, local->fd);
- } else {
- tmp_loc.inode = local->fd->inode;
- inode_path (local->fd->inode, NULL, &path);
- if (path)
- tmp_loc.path = path;
+ /* perform open as root:root. There is window between linkfile
+ * creation(root:root) and setattr with the correct uid/gid
+ */
+ SYNCTASK_SETID(0, 0);
+
+ /* perform 'open()' on all the fd's present on the inode */
+ tmp_loc.inode = inode;
+ inode_path (inode, NULL, &path);
+ if (path)
+ tmp_loc.path = path;
+ list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
+ if (fd_is_anonymous (iter_fd))
+ continue;
+
+ /* flags for open are stripped down to allow following the
+ * new location of the file, otherwise we can get EEXIST or
+ * truncate the file again as rebalance is moving the data */
ret = syncop_open (dst_node, &tmp_loc,
- local->fd->flags, local->fd);
- if (path)
- GF_FREE (path);
-
+ (iter_fd->flags &
+ ~(O_CREAT | O_EXCL | O_TRUNC)), iter_fd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to open "
+ "the fd (%p, flags=0%o) on file %s @ %s",
+ iter_fd, iter_fd->flags, path, dst_node->name);
+ open_failed = 1;
+ local->op_errno = -ret;
+ ret = -1;
+ }
}
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to send open() on target file at %s",
- local->loc.path, dst_node->name);
+ GF_FREE (path);
+
+ SYNCTASK_SETID (frame->root->uid, frame->root->gid);
+
+ if (open_failed) {
+ ret = -1;
goto out;
}
-
- /* need this unref for the fd on src_node */
- fd_unref (local->fd);
ret = 0;
out:
@@ -763,11 +917,8 @@ int
dht_rebalance_complete_check (xlator_t *this, call_frame_t *frame)
{
int ret = -1;
- dht_conf_t *conf = NULL;
- conf = this->private;
-
- ret = synctask_new (conf->env, dht_migration_complete_check_task,
+ ret = synctask_new (this->ctx->env, dht_migration_complete_check_task,
dht_migration_complete_check_done,
frame, frame);
return ret;
@@ -799,26 +950,41 @@ dht_rebalance_inprogress_task (void *data)
char *path = NULL;
struct iatt stbuf = {0,};
loc_t tmp_loc = {0,};
+ dht_conf_t *conf = NULL;
+ inode_t *inode = NULL;
+ fd_t *iter_fd = NULL;
+ int open_failed = 0;
this = THIS;
frame = data;
local = frame->local;
+ conf = this->private;
src_node = local->cached_subvol;
- /* getxattr on cached_subvol for 'linkto' value */
- if (local->loc.inode)
+ if (!local->loc.inode && !local->fd)
+ goto out;
+
+ inode = (!local->fd) ? local->loc.inode : local->fd->inode;
+
+ /* getxattr on cached_subvol for 'linkto' value. Do path based getxattr
+ * as root:root. If a fd is already open, access check wont be done*/
+ if (local->loc.inode) {
+ SYNCTASK_SETID (0, 0);
ret = syncop_getxattr (src_node, &local->loc, &dict,
- DHT_LINKFILE_KEY);
- else
+ conf->link_xattr_name);
+ SYNCTASK_SETID (frame->root->uid, frame->root->gid);
+ } else {
ret = syncop_fgetxattr (src_node, local->fd, &dict,
- DHT_LINKFILE_KEY);
+ conf->link_xattr_name);
+ }
- if (ret) {
+ if (ret < 0) {
gf_log (this->name, GF_LOG_ERROR,
"%s: failed to get the 'linkto' xattr %s",
- local->loc.path, strerror (errno));
- goto out;
+ local->loc.path, strerror (-ret));
+ ret = -1;
+ goto out;
}
dst_node = dht_linkfile_subvol (this, NULL, NULL, dict);
@@ -840,6 +1006,7 @@ dht_rebalance_inprogress_task (void *data)
gf_log (this->name, GF_LOG_ERROR,
"%s: failed to lookup the file on %s",
local->loc.path, dst_node->name);
+ ret = -1;
goto out;
}
@@ -854,34 +1021,51 @@ dht_rebalance_inprogress_task (void *data)
ret = 0;
- if (!local->fd)
- goto out;
+ if (list_empty (&inode->fd_list))
+ goto done;
- if (local->loc.inode) {
- ret = syncop_open (dst_node, &local->loc,
- local->fd->flags, local->fd);
- } else {
- tmp_loc.inode = local->fd->inode;
- inode_path (local->fd->inode, NULL, &path);
- if (path)
- tmp_loc.path = path;
+ /* perform open as root:root. There is window between linkfile
+ * creation(root:root) and setattr with the correct uid/gid
+ */
+ SYNCTASK_SETID (0, 0);
+
+ tmp_loc.inode = inode;
+ inode_path (inode, NULL, &path);
+ if (path)
+ tmp_loc.path = path;
+
+ list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
+ if (fd_is_anonymous (iter_fd))
+ continue;
+
+ /* flags for open are stripped down to allow following the
+ * new location of the file, otherwise we can get EEXIST or
+ * truncate the file again as rebalance is moving the data */
ret = syncop_open (dst_node, &tmp_loc,
- local->fd->flags, local->fd);
- if (path)
- GF_FREE (path);
+ (iter_fd->flags &
+ ~(O_CREAT | O_EXCL | O_TRUNC)), iter_fd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to send open "
+ "the fd (%p, flags=0%o) on file %s @ %s",
+ iter_fd, iter_fd->flags, path, dst_node->name);
+ ret = -1;
+ open_failed = 1;
+ }
}
+ GF_FREE (path);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to send open() on target file at %s",
- local->loc.path, dst_node->name);
+ SYNCTASK_SETID (frame->root->uid, frame->root->gid);
+
+ if (open_failed) {
+ ret = -1;
goto out;
}
- ret = fd_ctx_set (local->fd, this, (uint64_t)(long)dst_node);
+done:
+ ret = dht_inode_ctx_set1 (this, inode, dst_node);
if (ret) {
gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to set fd-ctx target file at %s",
+ "%s: failed to set inode-ctx target file at %s",
local->loc.path, dst_node->name);
goto out;
}
@@ -896,12 +1080,99 @@ dht_rebalance_in_progress_check (xlator_t *this, call_frame_t *frame)
{
int ret = -1;
- dht_conf_t *conf = NULL;
- conf = this->private;
-
- ret = synctask_new (conf->env, dht_rebalance_inprogress_task,
+ ret = synctask_new (this->ctx->env, dht_rebalance_inprogress_task,
dht_inprogress_check_done,
frame, frame);
return ret;
}
+
+int
+dht_inode_ctx_layout_set (inode_t *inode, xlator_t *this,
+ dht_layout_t *layout_int)
+{
+ dht_inode_ctx_t *ctx = NULL;
+ int ret = -1;
+
+ ret = dht_inode_ctx_get (inode, this, &ctx);
+ if (!ret && ctx) {
+ ctx->layout = layout_int;
+ } else {
+ ctx = GF_CALLOC (1, sizeof (*ctx), gf_dht_mt_inode_ctx_t);
+ if (!ctx)
+ return ret;
+ ctx->layout = layout_int;
+ }
+
+ ret = dht_inode_ctx_set (inode, this, ctx);
+
+ return ret;
+}
+
+int
+dht_inode_ctx_time_update (inode_t *inode, xlator_t *this, struct iatt *stat,
+ int32_t post)
+{
+ dht_inode_ctx_t *ctx = NULL;
+ dht_stat_time_t *time = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO (this->name, stat, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+
+ ret = dht_inode_ctx_get (inode, this, &ctx);
+
+ if (ret) {
+ ctx = GF_CALLOC (1, sizeof (*ctx), gf_dht_mt_inode_ctx_t);
+ if (!ctx)
+ return -1;
+ }
+
+ time = &ctx->time;
+
+ DHT_UPDATE_TIME(time->mtime, time->mtime_nsec,
+ stat->ia_mtime, stat->ia_mtime_nsec, inode, post);
+ DHT_UPDATE_TIME(time->ctime, time->ctime_nsec,
+ stat->ia_ctime, stat->ia_ctime_nsec, inode, post);
+ DHT_UPDATE_TIME(time->atime, time->atime_nsec,
+ stat->ia_atime, stat->ia_atime_nsec, inode, post);
+
+ ret = dht_inode_ctx_set (inode, this, ctx);
+out:
+ return 0;
+}
+
+int
+dht_inode_ctx_get (inode_t *inode, xlator_t *this, dht_inode_ctx_t **ctx)
+{
+ int ret = -1;
+ uint64_t ctx_int = 0;
+
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+
+ ret = inode_ctx_get (inode, this, &ctx_int);
+
+ if (ret)
+ return ret;
+
+ if (ctx)
+ *ctx = (dht_inode_ctx_t *) ctx_int;
+out:
+ return ret;
+}
+
+int dht_inode_ctx_set (inode_t *inode, xlator_t *this, dht_inode_ctx_t *ctx)
+{
+ int ret = -1;
+ uint64_t ctx_int = 0;
+
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+ GF_VALIDATE_OR_GOTO (this->name, ctx, out);
+
+ ctx_int = (long)ctx;
+ ret = inode_ctx_set (inode, this, &ctx_int);
+out:
+ return ret;
+}
diff --git a/xlators/cluster/dht/src/dht-inode-read.c b/xlators/cluster/dht/src/dht-inode-read.c
index 7352e47bd..e8a9a7196 100644
--- a/xlators/cluster/dht/src/dht-inode-read.c
+++ b/xlators/cluster/dht/src/dht-inode-read.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -34,7 +25,7 @@ int dht_fsync2 (xlator_t *this, call_frame_t *frame, int ret);
int
dht_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, fd_t *fd)
+ int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
{
dht_local_t *local = NULL;
call_frame_t *prev = NULL;
@@ -44,7 +35,7 @@ dht_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
prev = cookie;
local->op_errno = op_errno;
- if ((op_ret == -1) && (op_errno != ENOENT)) {
+ if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
gf_log (this->name, GF_LOG_DEBUG,
"subvolume %s returned -1 (%s)",
prev->this->name, strerror (op_errno));
@@ -61,7 +52,7 @@ dht_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
out:
- DHT_STACK_UNWIND (open, frame, op_ret, op_errno, local->fd);
+ DHT_STACK_UNWIND (open, frame, op_ret, op_errno, local->fd, xdata);
return 0;
}
@@ -86,17 +77,17 @@ dht_open2 (xlator_t *this, call_frame_t *frame, int op_ret)
STACK_WIND (frame, dht_open_cbk, subvol, subvol->fops->open,
&local->loc, local->rebalance.flags, local->fd,
- local->rebalance.wbflags);
+ NULL);
return 0;
out:
- DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL);
+ DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
return 0;
}
int
dht_open (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int flags, fd_t *fd, int wbflags)
+ loc_t *loc, int flags, fd_t *fd, dict_t *xdata)
{
xlator_t *subvol = NULL;
int op_errno = -1;
@@ -120,30 +111,30 @@ dht_open (call_frame_t *frame, xlator_t *this,
goto err;
}
- local->rebalance.wbflags = wbflags;
local->rebalance.flags = flags;
local->call_cnt = 1;
STACK_WIND (frame, dht_open_cbk, subvol, subvol->fops->open,
- loc, flags, fd, wbflags);
+ loc, flags, fd, xdata);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (open, frame, -1, op_errno, NULL);
+ DHT_STACK_UNWIND (open, frame, -1, op_errno, NULL, NULL);
return 0;
}
int
dht_file_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *stbuf)
+ int op_ret, int op_errno, struct iatt *stbuf, dict_t *xdata)
{
- uint64_t tmp_subvol = 0;
+ xlator_t *subvol = 0;
dht_local_t *local = NULL;
call_frame_t *prev = NULL;
int ret = -1;
+ inode_t *inode = NULL;
GF_VALIDATE_OR_GOTO ("dht", frame, err);
GF_VALIDATE_OR_GOTO ("dht", this, out);
@@ -153,7 +144,7 @@ dht_file_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
prev = cookie;
- if ((op_ret == -1) && (op_errno != ENOENT)) {
+ if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
local->op_errno = op_errno;
gf_log (this->name, GF_LOG_DEBUG,
"subvolume %s returned -1 (%s)",
@@ -164,26 +155,28 @@ dht_file_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (local->call_cnt != 1)
goto out;
+ local->op_errno = op_errno;
/* Check if the rebalance phase2 is true */
if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (stbuf)) {
- if (local->fd)
- ret = fd_ctx_get (local->fd, this, &tmp_subvol);
- if (ret) {
+ inode = (local->fd) ? local->fd->inode : local->loc.inode;
+ ret = dht_inode_ctx_get1 (this, inode, &subvol);
+ if (!subvol) {
/* Phase 2 of migration */
local->rebalance.target_op_fn = dht_attr2;
ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
} else {
/* value is already set in fd_ctx, that means no need
to check for whether its complete or not. */
dht_attr2 (this, frame, 0);
- }
- if (!ret)
return 0;
+ }
}
out:
DHT_STRIP_PHASE1_FLAGS (stbuf);
- DHT_STACK_UNWIND (stat, frame, op_ret, op_errno, stbuf);
+ DHT_STACK_UNWIND (stat, frame, op_ret, op_errno, stbuf, xdata);
err:
return 0;
}
@@ -208,21 +201,21 @@ dht_attr2 (xlator_t *this, call_frame_t *frame, int op_ret)
if (local->fop == GF_FOP_FSTAT) {
STACK_WIND (frame, dht_file_attr_cbk, subvol,
- subvol->fops->fstat, local->fd);
+ subvol->fops->fstat, local->fd, NULL);
} else {
STACK_WIND (frame, dht_file_attr_cbk, subvol,
- subvol->fops->stat, &local->loc);
+ subvol->fops->stat, &local->loc, NULL);
}
return 0;
out:
- DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL);
+ DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
return 0;
}
int
dht_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *stbuf)
+ int op_ret, int op_errno, struct iatt *stbuf, dict_t *xdata)
{
dht_local_t *local = NULL;
int this_call_cnt = 0;
@@ -257,14 +250,14 @@ out:
this_call_cnt = dht_frame_return (frame);
if (is_last_call (this_call_cnt)) {
DHT_STACK_UNWIND (stat, frame, local->op_ret, local->op_errno,
- &local->stbuf);
+ &local->stbuf, xdata);
}
err:
return 0;
}
int
-dht_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
+dht_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
xlator_t *subvol = NULL;
int op_errno = -1;
@@ -300,7 +293,7 @@ dht_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
subvol = local->cached_subvol;
STACK_WIND (frame, dht_file_attr_cbk, subvol,
- subvol->fops->stat, loc);
+ subvol->fops->stat, loc, xdata);
return 0;
}
@@ -312,21 +305,21 @@ dht_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
STACK_WIND (frame, dht_attr_cbk,
subvol, subvol->fops->stat,
- loc);
+ loc, xdata);
}
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL);
+ DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
return 0;
}
int
-dht_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd)
+dht_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
xlator_t *subvol = NULL;
int op_errno = -1;
@@ -360,7 +353,7 @@ dht_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd)
subvol = local->cached_subvol;
STACK_WIND (frame, dht_file_attr_cbk, subvol,
- subvol->fops->fstat, fd);
+ subvol->fops->fstat, fd, xdata);
return 0;
}
@@ -371,14 +364,14 @@ dht_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd)
subvol = layout->list[i].xlator;
STACK_WIND (frame, dht_attr_cbk,
subvol, subvol->fops->fstat,
- fd);
+ fd, xdata);
}
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fstat, frame, -1, op_errno, NULL);
+ DHT_STACK_UNWIND (fstat, frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -387,10 +380,12 @@ int
dht_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno,
struct iovec *vector, int count, struct iatt *stbuf,
- struct iobref *iobref)
+ struct iobref *iobref, dict_t *xdata)
{
dht_local_t *local = NULL;
int ret = 0;
+ inode_t *inode = NULL;
+ xlator_t *subvol = 0;
local = frame->local;
if (!local) {
@@ -403,28 +398,30 @@ dht_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (local->call_cnt != 1)
goto out;
- if ((op_ret == -1) && (op_errno != ENOENT))
+ if ((op_ret == -1) && !dht_inode_missing(op_errno))
goto out;
+ local->op_errno = op_errno;
if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (stbuf)) {
/* File would be migrated to other node */
- ret = fd_ctx_get (local->fd, this, NULL);
- if (ret) {
+ ret = dht_inode_ctx_get1 (this, inode, &subvol);
+ if (!subvol) {
local->rebalance.target_op_fn = dht_readv2;
ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
} else {
/* value is already set in fd_ctx, that means no need
to check for whether its complete or not. */
dht_readv2 (this, frame, 0);
- }
- if (!ret)
return 0;
+ }
}
out:
DHT_STRIP_PHASE1_FLAGS (stbuf);
DHT_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, count, stbuf,
- iobref);
+ iobref, xdata);
return 0;
}
@@ -448,18 +445,19 @@ dht_readv2 (xlator_t *this, call_frame_t *frame, int op_ret)
subvol = local->cached_subvol;
STACK_WIND (frame, dht_readv_cbk, subvol, subvol->fops->readv,
- local->fd, local->rebalance.size, local->rebalance.offset);
+ local->fd, local->rebalance.size, local->rebalance.offset,
+ local->rebalance.flags, NULL);
return 0;
out:
- DHT_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL);
+ DHT_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);
return 0;
}
int
dht_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t off)
+ fd_t *fd, size_t size, off_t off, uint32_t flags, dict_t *xdata)
{
xlator_t *subvol = NULL;
int op_errno = -1;
@@ -485,35 +483,57 @@ dht_readv (call_frame_t *frame, xlator_t *this,
local->rebalance.offset = off;
local->rebalance.size = size;
+ local->rebalance.flags = flags;
local->call_cnt = 1;
STACK_WIND (frame, dht_readv_cbk,
subvol, subvol->fops->readv,
- fd, size, off);
+ fd, size, off, flags, xdata);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL);
+ DHT_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);
return 0;
}
int
dht_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno)
+ int op_ret, int op_errno, dict_t *xdata)
{
int ret = -1;
dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ call_frame_t *prev = NULL;
local = frame->local;
+ prev = cookie;
+ if (!prev || !prev->this)
+ goto out;
if (local->call_cnt != 1)
goto out;
+ if ((op_ret == -1) && (op_errno == ENOTCONN) &&
+ IA_ISDIR(local->loc.inode->ia_type)) {
+
+ subvol = dht_subvol_next_available (this, prev->this);
+ if (!subvol)
+ goto out;
- if ((op_ret == -1) && (op_errno == ENOENT)) {
+ /* check if we are done with visiting every node */
+ if (subvol == local->cached_subvol) {
+ goto out;
+ }
+
+ STACK_WIND (frame, dht_access_cbk, subvol, subvol->fops->access,
+ &local->loc, local->rebalance.flags, NULL);
+ return 0;
+ }
+ if ((op_ret == -1) && dht_inode_missing(op_errno)) {
/* File would be migrated to other node */
+ local->op_errno = op_errno;
local->rebalance.target_op_fn = dht_access2;
ret = dht_rebalance_complete_check (frame->this, frame);
if (!ret)
@@ -521,7 +541,7 @@ dht_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
out:
- DHT_STACK_UNWIND (access, frame, op_ret, op_errno);
+ DHT_STACK_UNWIND (access, frame, op_ret, op_errno, xdata);
return 0;
}
@@ -544,18 +564,19 @@ dht_access2 (xlator_t *this, call_frame_t *frame, int op_ret)
subvol = local->cached_subvol;
STACK_WIND (frame, dht_access_cbk, subvol, subvol->fops->access,
- &local->loc, local->rebalance.flags);
+ &local->loc, local->rebalance.flags, NULL);
return 0;
out:
- DHT_STACK_UNWIND (access, frame, -1, op_errno);
+ DHT_STACK_UNWIND (access, frame, -1, op_errno, NULL);
return 0;
}
int
-dht_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask)
+dht_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata)
{
xlator_t *subvol = NULL;
int op_errno = -1;
@@ -584,13 +605,13 @@ dht_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask)
}
STACK_WIND (frame, dht_access_cbk, subvol, subvol->fops->access,
- loc, mask);
+ loc, mask, xdata);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (access, frame, -1, op_errno);
+ DHT_STACK_UNWIND (access, frame, -1, op_errno, NULL);
return 0;
}
@@ -598,10 +619,11 @@ err:
int
dht_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno)
+ int op_ret, int op_errno, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int ret = -1;
+ dht_local_t *local = NULL;
+ inode_t *inode = NULL;
+ xlator_t *subvol = 0;
local = frame->local;
@@ -611,14 +633,14 @@ dht_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
/* If context is set, then send flush() it to the destination */
- ret = fd_ctx_get (local->fd, this, NULL);
- if (!ret) {
+ dht_inode_ctx_get1 (this, inode, &subvol);
+ if (subvol) {
dht_flush2 (this, frame, 0);
return 0;
}
out:
- DHT_STACK_UNWIND (flush, frame, op_ret, op_errno);
+ DHT_STACK_UNWIND (flush, frame, op_ret, op_errno, xdata);
return 0;
}
@@ -628,14 +650,10 @@ dht_flush2 (xlator_t *this, call_frame_t *frame, int op_ret)
{
dht_local_t *local = NULL;
xlator_t *subvol = NULL;
- uint64_t tmp_subvol = 0;
- int ret = -1;
local = frame->local;
- ret = fd_ctx_get (local->fd, this, &tmp_subvol);
- if (!ret)
- subvol = (xlator_t *)(long)tmp_subvol;
+ dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
if (!subvol)
subvol = local->cached_subvol;
@@ -643,20 +661,19 @@ dht_flush2 (xlator_t *this, call_frame_t *frame, int op_ret)
local->call_cnt = 2; /* This is the second attempt */
STACK_WIND (frame, dht_flush_cbk,
- subvol, subvol->fops->flush, local->fd);
+ subvol, subvol->fops->flush, local->fd, NULL);
return 0;
}
int
-dht_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
+dht_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
xlator_t *subvol = NULL;
int op_errno = -1;
dht_local_t *local = NULL;
-
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (fd, err);
@@ -678,13 +695,13 @@ dht_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
local->call_cnt = 1;
STACK_WIND (frame, dht_flush_cbk,
- subvol, subvol->fops->flush, fd);
+ subvol, subvol->fops->flush, fd, xdata);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (flush, frame, -1, op_errno);
+ DHT_STACK_UNWIND (flush, frame, -1, op_errno, NULL);
return 0;
}
@@ -692,17 +709,20 @@ err:
int
dht_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, struct iatt *prebuf, struct iatt *postbuf)
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
call_frame_t *prev = NULL;
int ret = -1;
+ inode_t *inode = NULL;
+ xlator_t *subvol = 0;
local = frame->local;
prev = cookie;
local->op_errno = op_errno;
- if (op_ret == -1) {
+ if (op_ret == -1 && !dht_inode_missing(op_errno)) {
gf_log (this->name, GF_LOG_DEBUG,
"subvolume %s returned -1 (%s)",
prev->this->name, strerror (op_errno));
@@ -717,8 +737,9 @@ dht_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
goto out;
}
- ret = fd_ctx_get (local->fd, this, NULL);
- if (ret) {
+ local->op_errno = op_errno;
+ dht_inode_ctx_get1 (this, inode, &subvol);
+ if (!subvol) {
local->rebalance.target_op_fn = dht_fsync2;
/* Check if the rebalance phase1 is true */
@@ -733,17 +754,18 @@ dht_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
if (IS_DHT_MIGRATION_PHASE2 (postbuf)) {
ret = dht_rebalance_complete_check (this, frame);
}
+ if (!ret)
+ return 0;
} else {
dht_fsync2 (this, frame, 0);
- }
- if (!ret)
return 0;
+ }
out:
DHT_STRIP_PHASE1_FLAGS (postbuf);
DHT_STRIP_PHASE1_FLAGS (prebuf);
DHT_STACK_UNWIND (fsync, frame, op_ret, op_errno,
- prebuf, postbuf);
+ prebuf, postbuf, xdata);
return 0;
}
@@ -753,34 +775,29 @@ dht_fsync2 (xlator_t *this, call_frame_t *frame, int op_ret)
{
dht_local_t *local = NULL;
xlator_t *subvol = NULL;
- uint64_t tmp_subvol = 0;
- int ret = -1;
local = frame->local;
- ret = fd_ctx_get (local->fd, this, &tmp_subvol);
- if (!ret)
- subvol = (xlator_t *)(long)tmp_subvol;
-
+ dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
if (!subvol)
subvol = local->cached_subvol;
local->call_cnt = 2; /* This is the second attempt */
STACK_WIND (frame, dht_fsync_cbk, subvol, subvol->fops->fsync,
- local->fd, local->rebalance.flags);
+ local->fd, local->rebalance.flags, NULL);
return 0;
}
int
-dht_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync)
+dht_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync,
+ dict_t *xdata)
{
xlator_t *subvol = NULL;
int op_errno = -1;
dht_local_t *local = NULL;
-
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (fd, err);
@@ -798,13 +815,13 @@ dht_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync)
subvol = local->cached_subvol;
STACK_WIND (frame, dht_fsync_cbk, subvol, subvol->fops->fsync,
- fd, datasync);
+ fd, datasync, xdata);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL);
+ DHT_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -815,9 +832,9 @@ err:
phase 2 of migration */
int
dht_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct gf_flock *flock)
+ int op_ret, int op_errno, struct gf_flock *flock, dict_t *xdata)
{
- DHT_STACK_UNWIND (lk, frame, op_ret, op_errno, flock);
+ DHT_STACK_UNWIND (lk, frame, op_ret, op_errno, flock, xdata);
return 0;
}
@@ -825,7 +842,7 @@ dht_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
dht_lk (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int cmd, struct gf_flock *flock)
+ fd_t *fd, int cmd, struct gf_flock *flock, dict_t *xdata)
{
xlator_t *subvol = NULL;
int op_errno = -1;
@@ -845,13 +862,13 @@ dht_lk (call_frame_t *frame, xlator_t *this,
/* TODO: for rebalance, we need to preserve the fop arguments */
STACK_WIND (frame, dht_lk_cbk, subvol, subvol->fops->lk, fd,
- cmd, flock);
+ cmd, flock, xdata);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (lk, frame, -1, op_errno, NULL);
+ DHT_STACK_UNWIND (lk, frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -859,7 +876,8 @@ err:
/* Symlinks are currently not migrated, so no need for any check here */
int
dht_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, const char *path, struct iatt *stbuf)
+ int op_ret, int op_errno, const char *path,
+ struct iatt *stbuf, dict_t *xdata)
{
dht_local_t *local = NULL;
@@ -874,14 +892,15 @@ dht_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
err:
DHT_STRIP_PHASE1_FLAGS (stbuf);
- DHT_STACK_UNWIND (readlink, frame, op_ret, op_errno, path, stbuf);
+ DHT_STACK_UNWIND (readlink, frame, op_ret, op_errno, path, stbuf, xdata);
return 0;
}
int
-dht_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size)
+dht_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
{
xlator_t *subvol = NULL;
int op_errno = -1;
@@ -909,13 +928,13 @@ dht_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size)
STACK_WIND (frame, dht_readlink_cbk,
subvol, subvol->fops->readlink,
- loc, size);
+ loc, size, xdata);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (readlink, frame, -1, op_errno, NULL, NULL);
+ DHT_STACK_UNWIND (readlink, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -926,16 +945,16 @@ err:
int
dht_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
- DHT_STACK_UNWIND (xattrop, frame, op_ret, op_errno, dict);
+ DHT_STACK_UNWIND (xattrop, frame, op_ret, op_errno, dict, xdata);
return 0;
}
int
dht_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t flags, dict_t *dict)
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
xlator_t *subvol = NULL;
int op_errno = -1;
@@ -966,13 +985,13 @@ dht_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
STACK_WIND (frame,
dht_xattrop_cbk,
subvol, subvol->fops->xattrop,
- loc, flags, dict);
+ loc, flags, dict, xdata);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL);
+ DHT_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -980,16 +999,16 @@ err:
int
dht_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
- DHT_STACK_UNWIND (fxattrop, frame, op_ret, op_errno, dict);
+ DHT_STACK_UNWIND (fxattrop, frame, op_ret, op_errno, dict, xdata);
return 0;
}
int
dht_fxattrop (call_frame_t *frame, xlator_t *this,
- fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict)
+ fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
xlator_t *subvol = NULL;
int op_errno = -1;
@@ -1009,13 +1028,13 @@ dht_fxattrop (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame,
dht_fxattrop_cbk,
subvol, subvol->fops->fxattrop,
- fd, flags, dict);
+ fd, flags, dict, xdata);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL);
+ DHT_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -1023,17 +1042,17 @@ err:
int
dht_inodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+ xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- DHT_STACK_UNWIND (inodelk, frame, op_ret, op_errno);
+ DHT_STACK_UNWIND (inodelk, frame, op_ret, op_errno, xdata);
return 0;
}
int32_t
-dht_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *lock)
+dht_inodelk (call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
xlator_t *subvol = NULL;
int op_errno = -1;
@@ -1065,31 +1084,31 @@ dht_inodelk (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame,
dht_inodelk_cbk,
subvol, subvol->fops->inodelk,
- volume, loc, cmd, lock);
+ volume, loc, cmd, lock, xdata);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (inodelk, frame, -1, op_errno);
+ DHT_STACK_UNWIND (inodelk, frame, -1, op_errno, NULL);
return 0;
}
int
-dht_finodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+dht_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- DHT_STACK_UNWIND (finodelk, frame, op_ret, op_errno);
+ DHT_STACK_UNWIND (finodelk, frame, op_ret, op_errno, xdata);
return 0;
}
int
-dht_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *lock)
+dht_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
xlator_t *subvol = NULL;
int op_errno = -1;
@@ -1107,16 +1126,14 @@ dht_finodelk (call_frame_t *frame, xlator_t *this,
}
- STACK_WIND (frame,
- dht_finodelk_cbk,
- subvol, subvol->fops->finodelk,
- volume, fd, cmd, lock);
+ STACK_WIND (frame, dht_finodelk_cbk, subvol, subvol->fops->finodelk,
+ volume, fd, cmd, lock, xdata);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (finodelk, frame, -1, op_errno);
+ DHT_STACK_UNWIND (finodelk, frame, -1, op_errno, NULL);
return 0;
}
diff --git a/xlators/cluster/dht/src/dht-inode-write.c b/xlators/cluster/dht/src/dht-inode-write.c
index 4f65b4671..363bff3bf 100644
--- a/xlators/cluster/dht/src/dht-inode-write.c
+++ b/xlators/cluster/dht/src/dht-inode-write.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -28,16 +19,20 @@
int dht_writev2 (xlator_t *this, call_frame_t *frame, int ret);
int dht_truncate2 (xlator_t *this, call_frame_t *frame, int ret);
int dht_setattr2 (xlator_t *this, call_frame_t *frame, int ret);
+int dht_fallocate2(xlator_t *this, call_frame_t *frame, int op_ret);
+int dht_discard2(xlator_t *this, call_frame_t *frame, int op_ret);
+int dht_zerofill2(xlator_t *this, call_frame_t *frame, int op_ret);
int
dht_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
dht_local_t *local = NULL;
int ret = -1;
+ xlator_t *subvol = NULL;
- if (op_ret == -1) {
+ if (op_ret == -1 && !dht_inode_missing(op_errno)) {
goto out;
}
@@ -59,6 +54,7 @@ dht_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->rebalance.target_op_fn = dht_writev2;
+ local->op_errno = op_errno;
/* Phase 2 of migration */
if (IS_DHT_MIGRATION_PHASE2 (postbuf)) {
ret = dht_rebalance_complete_check (this, frame);
@@ -71,8 +67,8 @@ dht_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
- ret = fd_ctx_get (local->fd, this, NULL);
- if (!ret) {
+ ret = dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
+ if (subvol) {
dht_writev2 (this, frame, 0);
return 0;
}
@@ -85,7 +81,8 @@ out:
DHT_STRIP_PHASE1_FLAGS (postbuf);
DHT_STRIP_PHASE1_FLAGS (prebuf);
- DHT_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf);
+ DHT_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
return 0;
}
@@ -95,14 +92,10 @@ dht_writev2 (xlator_t *this, call_frame_t *frame, int op_ret)
{
dht_local_t *local = NULL;
xlator_t *subvol = NULL;
- uint64_t tmp_subvol = 0;
- int ret = -1;
local = frame->local;
- ret = fd_ctx_get (local->fd, this, &tmp_subvol);
- if (!ret)
- subvol = (xlator_t *)(long)tmp_subvol;
+ dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
if (!subvol)
subvol = local->cached_subvol;
@@ -112,15 +105,16 @@ dht_writev2 (xlator_t *this, call_frame_t *frame, int op_ret)
STACK_WIND (frame, dht_writev_cbk,
subvol, subvol->fops->writev,
local->fd, local->rebalance.vector, local->rebalance.count,
- local->rebalance.offset, local->rebalance.iobref);
+ local->rebalance.offset, local->rebalance.flags,
+ local->rebalance.iobref, NULL);
return 0;
}
int
-dht_writev (call_frame_t *frame, xlator_t *this,
- fd_t *fd, struct iovec *vector, int count, off_t off,
- struct iobref *iobref)
+dht_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int count, off_t off, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
{
xlator_t *subvol = NULL;
int op_errno = -1;
@@ -149,18 +143,19 @@ dht_writev (call_frame_t *frame, xlator_t *this,
local->rebalance.vector = iov_dup (vector, count);
local->rebalance.offset = off;
local->rebalance.count = count;
+ local->rebalance.flags = flags;
local->rebalance.iobref = iobref_ref (iobref);
local->call_cnt = 1;
STACK_WIND (frame, dht_writev_cbk,
subvol, subvol->fops->writev,
- fd, vector, count, off, iobref);
+ fd, vector, count, off, flags, iobref, xdata);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL);
+ DHT_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -170,11 +165,13 @@ err:
int
dht_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
dht_local_t *local = NULL;
call_frame_t *prev = NULL;
int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
GF_VALIDATE_OR_GOTO ("dht", frame, err);
GF_VALIDATE_OR_GOTO ("dht", this, out);
@@ -184,7 +181,7 @@ dht_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
prev = cookie;
- if ((op_ret == -1) && (op_errno != ENOENT)) {
+ if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
local->op_errno = op_errno;
local->op_ret = -1;
gf_log (this->name, GF_LOG_DEBUG,
@@ -204,6 +201,7 @@ dht_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->rebalance.target_op_fn = dht_truncate2;
+ local->op_errno = op_errno;
/* Phase 2 of migration */
if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) {
ret = dht_rebalance_complete_check (this, frame);
@@ -215,8 +213,9 @@ dht_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
- ret = fd_ctx_get (local->fd, this, NULL);
- if (!ret) {
+ inode = (local->fd) ? local->fd->inode : local->loc.inode;
+ dht_inode_ctx_get1 (this, inode, &subvol);
+ if (subvol) {
dht_truncate2 (this, frame, 0);
return 0;
}
@@ -229,7 +228,7 @@ out:
DHT_STRIP_PHASE1_FLAGS (postbuf);
DHT_STRIP_PHASE1_FLAGS (prebuf);
DHT_STACK_UNWIND (truncate, frame, op_ret, op_errno,
- prebuf, postbuf);
+ prebuf, postbuf, xdata);
err:
return 0;
}
@@ -240,16 +239,13 @@ dht_truncate2 (xlator_t *this, call_frame_t *frame, int op_ret)
{
dht_local_t *local = NULL;
xlator_t *subvol = NULL;
- uint64_t tmp_subvol = 0;
- int ret = -1;
+ inode_t *inode = NULL;
local = frame->local;
- if (local->fd)
- ret = fd_ctx_get (local->fd, this, &tmp_subvol);
- if (!ret)
- subvol = (xlator_t *)(long)tmp_subvol;
+ inode = local->fd ? local->fd->inode : local->loc.inode;
+ dht_inode_ctx_get1 (this, inode, &subvol);
if (!subvol)
subvol = local->cached_subvol;
@@ -258,18 +254,19 @@ dht_truncate2 (xlator_t *this, call_frame_t *frame, int op_ret)
if (local->fop == GF_FOP_TRUNCATE) {
STACK_WIND (frame, dht_truncate_cbk, subvol,
subvol->fops->truncate, &local->loc,
- local->rebalance.offset);
+ local->rebalance.offset, NULL);
} else {
STACK_WIND (frame, dht_truncate_cbk, subvol,
subvol->fops->ftruncate, local->fd,
- local->rebalance.offset);
+ local->rebalance.offset, NULL);
}
return 0;
}
int
-dht_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
+dht_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
{
xlator_t *subvol = NULL;
int op_errno = -1;
@@ -299,19 +296,20 @@ dht_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
STACK_WIND (frame, dht_truncate_cbk,
subvol, subvol->fops->truncate,
- loc, offset);
+ loc, offset, xdata);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL);
+ DHT_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
int
-dht_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
+dht_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
{
xlator_t *subvol = NULL;
int op_errno = -1;
@@ -339,22 +337,423 @@ dht_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
STACK_WIND (frame, dht_truncate_cbk,
subvol, subvol->fops->ftruncate,
- fd, offset);
+ fd, offset, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int
+dht_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = -1;
+ xlator_t *subvol = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+
+ goto out;
+ }
+
+ if (local->call_cnt != 1) {
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge (this, postbuf, &local->stbuf, NULL);
+ dht_iatt_merge (this, prebuf, &local->prebuf, NULL);
+ }
+ goto out;
+ }
+ local->rebalance.target_op_fn = dht_fallocate2;
+
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) {
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
+ dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
+ dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
+ dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
+ if (subvol) {
+ dht_fallocate2 (this, frame, 0);
+ return 0;
+ }
+ ret = dht_rebalance_in_progress_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+out:
+ DHT_STRIP_PHASE1_FLAGS (postbuf);
+ DHT_STRIP_PHASE1_FLAGS (prebuf);
+ DHT_STACK_UNWIND (fallocate, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+err:
+ return 0;
+}
+
+int
+dht_fallocate2(xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+
+ local = frame->local;
+
+ dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
+
+ if (!subvol)
+ subvol = local->cached_subvol;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ STACK_WIND(frame, dht_fallocate_cbk, subvol, subvol->fops->fallocate,
+ local->fd, local->rebalance.flags, local->rebalance.offset,
+ local->rebalance.size, NULL);
+
+ return 0;
+}
+
+int
+dht_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FALLOCATE);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->rebalance.flags = mode;
+ local->rebalance.offset = offset;
+ local->rebalance.size = len;
+
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_fallocate_cbk,
+ subvol, subvol->fops->fallocate,
+ fd, mode, offset, len, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int
+dht_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = -1;
+ xlator_t *subvol = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+
+ goto out;
+ }
+
+ if (local->call_cnt != 1) {
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge (this, postbuf, &local->stbuf, NULL);
+ dht_iatt_merge (this, prebuf, &local->prebuf, NULL);
+ }
+ goto out;
+ }
+ local->rebalance.target_op_fn = dht_discard2;
+
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) {
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
+ dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
+ dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
+ dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
+ if (subvol) {
+ dht_discard2 (this, frame, 0);
+ return 0;
+ }
+ ret = dht_rebalance_in_progress_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+out:
+ DHT_STRIP_PHASE1_FLAGS (postbuf);
+ DHT_STRIP_PHASE1_FLAGS (prebuf);
+ DHT_STACK_UNWIND (discard, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+err:
+ return 0;
+}
+
+int
+dht_discard2(xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+
+ local = frame->local;
+
+ dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
+
+ if (!subvol)
+ subvol = local->cached_subvol;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ STACK_WIND(frame, dht_discard_cbk, subvol, subvol->fops->discard,
+ local->fd, local->rebalance.offset, local->rebalance.size,
+ NULL);
+
+ return 0;
+}
+
+int
+dht_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_DISCARD);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->rebalance.offset = offset;
+ local->rebalance.size = len;
+
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_discard_cbk, subvol, subvol->fops->discard,
+ fd, offset, len, xdata);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL);
+ DHT_STACK_UNWIND (discard, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
+int
+dht_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+ goto out;
+ }
+
+ if (local->call_cnt != 1) {
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge (this, postbuf, &local->stbuf, NULL);
+ dht_iatt_merge (this, prebuf, &local->prebuf, NULL);
+ }
+ goto out;
+ }
+ local->rebalance.target_op_fn = dht_zerofill2;
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) {
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
+ dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
+ dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
+ ret = fd_ctx_get (local->fd, this, NULL);
+ if (!ret) {
+ dht_zerofill2 (this, frame, 0);
+ return 0;
+ }
+ ret = dht_rebalance_in_progress_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+out:
+ DHT_STRIP_PHASE1_FLAGS (postbuf);
+ DHT_STRIP_PHASE1_FLAGS (prebuf);
+ DHT_STACK_UNWIND (zerofill, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+err:
+ return 0;
+}
+
+int
+dht_zerofill2(xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ uint64_t tmp_subvol = 0;
+ int ret = -1;
+
+ local = frame->local;
+
+ if (local->fd)
+ ret = fd_ctx_get (local->fd, this, &tmp_subvol);
+ if (!ret)
+ subvol = (xlator_t *)(long)tmp_subvol;
+
+ if (!subvol)
+ subvol = local->cached_subvol;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ STACK_WIND(frame, dht_zerofill_cbk, subvol, subvol->fops->zerofill,
+ local->fd, local->rebalance.offset, local->rebalance.size,
+ NULL);
+
+ return 0;
+}
+
+int
+dht_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_ZEROFILL);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->rebalance.offset = offset;
+ local->rebalance.size = len;
+
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_zerofill_cbk, subvol, subvol->fops->zerofill,
+ fd, offset, len, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (zerofill, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+
/* handle cases of migration here for 'setattr()' calls */
int
dht_file_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
dht_local_t *local = NULL;
call_frame_t *prev = NULL;
@@ -364,7 +763,7 @@ dht_file_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
prev = cookie;
local->op_errno = op_errno;
- if ((op_ret == -1) && (op_errno != ENOENT)) {
+ if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
gf_log (this->name, GF_LOG_DEBUG,
"subvolume %s returned -1 (%s)",
prev->this->name, strerror (op_errno));
@@ -391,7 +790,7 @@ out:
DHT_STRIP_PHASE1_FLAGS (postbuf);
DHT_STRIP_PHASE1_FLAGS (prebuf);
DHT_STACK_UNWIND (setattr, frame, op_ret, op_errno,
- prebuf, postbuf);
+ prebuf, postbuf, xdata);
return 0;
}
@@ -401,15 +800,13 @@ dht_setattr2 (xlator_t *this, call_frame_t *frame, int op_ret)
{
dht_local_t *local = NULL;
xlator_t *subvol = NULL;
- uint64_t tmp_subvol = 0;
- int ret = -1;
+ inode_t *inode = NULL;
local = frame->local;
- if (local->fd)
- ret = fd_ctx_get (local->fd, this, &tmp_subvol);
- if (!ret)
- subvol = (xlator_t *)(long)tmp_subvol;
+ inode = (local->fd) ? local->fd->inode : local->loc.inode;
+
+ dht_inode_ctx_get1 (this, inode, &subvol);
if (!subvol)
subvol = local->cached_subvol;
@@ -419,11 +816,13 @@ dht_setattr2 (xlator_t *this, call_frame_t *frame, int op_ret)
if (local->fop == GF_FOP_SETATTR) {
STACK_WIND (frame, dht_file_setattr_cbk, subvol,
subvol->fops->setattr, &local->loc,
- &local->rebalance.stbuf, local->rebalance.flags);
+ &local->rebalance.stbuf, local->rebalance.flags,
+ NULL);
} else {
STACK_WIND (frame, dht_file_setattr_cbk, subvol,
subvol->fops->fsetattr, local->fd,
- &local->rebalance.stbuf, local->rebalance.flags);
+ &local->rebalance.stbuf, local->rebalance.flags,
+ NULL);
}
return 0;
@@ -434,7 +833,7 @@ dht_setattr2 (xlator_t *this, call_frame_t *frame, int op_ret)
int
dht_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *statpre,
- struct iatt *statpost)
+ struct iatt *statpost, dict_t *xdata)
{
dht_local_t *local = NULL;
int this_call_cnt = 0;
@@ -465,7 +864,7 @@ unlock:
this_call_cnt = dht_frame_return (frame);
if (is_last_call (this_call_cnt))
DHT_STACK_UNWIND (setattr, frame, local->op_ret, local->op_errno,
- &local->prebuf, &local->stbuf);
+ &local->prebuf, &local->stbuf, xdata);
return 0;
}
@@ -473,7 +872,7 @@ unlock:
int
dht_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid)
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
xlator_t *subvol = NULL;
dht_layout_t *layout = NULL;
@@ -519,7 +918,7 @@ dht_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
STACK_WIND (frame, dht_file_setattr_cbk, subvol,
subvol->fops->setattr,
- loc, stbuf, valid);
+ loc, stbuf, valid, xdata);
return 0;
}
@@ -530,14 +929,14 @@ dht_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
STACK_WIND (frame, dht_setattr_cbk,
layout->list[i].xlator,
layout->list[i].xlator->fops->setattr,
- loc, stbuf, valid);
+ loc, stbuf, valid, xdata);
}
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL);
+ DHT_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -545,7 +944,7 @@ err:
int
dht_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
- int32_t valid)
+ int32_t valid, dict_t *xdata)
{
xlator_t *subvol = NULL;
dht_layout_t *layout = NULL;
@@ -590,7 +989,7 @@ dht_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
STACK_WIND (frame, dht_file_setattr_cbk, subvol,
subvol->fops->fsetattr,
- fd, stbuf, valid);
+ fd, stbuf, valid, xdata);
return 0;
}
@@ -601,14 +1000,14 @@ dht_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
STACK_WIND (frame, dht_setattr_cbk,
layout->list[i].xlator,
layout->list[i].xlator->fops->fsetattr,
- fd, stbuf, valid);
+ fd, stbuf, valid, xdata);
}
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL);
+ DHT_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
diff --git a/xlators/cluster/dht/src/dht-layout.c b/xlators/cluster/dht/src/dht-layout.c
index 33dcc4067..e1a37b77c 100644
--- a/xlators/cluster/dht/src/dht-layout.c
+++ b/xlators/cluster/dht/src/dht-layout.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -34,14 +25,12 @@
#define layout_size(cnt) (layout_base_size + (cnt * layout_entry_size))
-
dht_layout_t *
dht_layout_new (xlator_t *this, int cnt)
{
dht_layout_t *layout = NULL;
dht_conf_t *conf = NULL;
-
conf = this->private;
layout = GF_CALLOC (1, layout_size (cnt),
@@ -59,6 +48,7 @@ dht_layout_new (xlator_t *this, int cnt)
}
layout->ref = 1;
+
out:
return layout;
}
@@ -68,9 +58,7 @@ dht_layout_t *
dht_layout_get (xlator_t *this, inode_t *inode)
{
dht_conf_t *conf = NULL;
- uint64_t layout_int = 0;
dht_layout_t *layout = NULL;
- int ret = -1;
conf = this->private;
if (!conf)
@@ -78,9 +66,8 @@ dht_layout_get (xlator_t *this, inode_t *inode)
LOCK (&conf->layout_lock);
{
- ret = inode_ctx_get (inode, this, &layout_int);
- if (ret == 0) {
- layout = (dht_layout_t *) (unsigned long) layout_int;
+ dht_inode_ctx_layout_get (inode, this, &layout);
+ if (layout) {
layout->ref++;
}
}
@@ -98,7 +85,6 @@ dht_layout_set (xlator_t *this, inode_t *inode, dht_layout_t *layout)
int oldret = -1;
int ret = 0;
dht_layout_t *old_layout;
- uint64_t old_layout_int;
conf = this->private;
if (!conf)
@@ -106,16 +92,13 @@ dht_layout_set (xlator_t *this, inode_t *inode, dht_layout_t *layout)
LOCK (&conf->layout_lock);
{
- oldret = inode_ctx_get (inode, this, &old_layout_int);
-
+ oldret = dht_inode_ctx_layout_get (inode, this, &old_layout);
layout->ref++;
- ret = inode_ctx_put (inode, this, (uint64_t) (unsigned long)
- layout);
+ dht_inode_ctx_layout_set (inode, this, layout);
}
UNLOCK (&conf->layout_lock);
- if (oldret == 0) {
- old_layout = (dht_layout_t *) (unsigned long) old_layout_int;
+ if (!oldret) {
dht_layout_unref (this, old_layout);
}
@@ -130,7 +113,7 @@ dht_layout_unref (xlator_t *this, dht_layout_t *layout)
dht_conf_t *conf = NULL;
int ref = 0;
- if (layout->preset || !this->private)
+ if (!layout || layout->preset || !this->private)
return;
conf = this->private;
@@ -174,9 +157,9 @@ dht_layout_search (xlator_t *this, dht_layout_t *layout, const char *name)
int ret = 0;
- ret = dht_hash_compute (layout->type, name, &hash);
+ ret = dht_hash_compute (this, layout->type, name, &hash);
if (ret != 0) {
- gf_log (this->name, GF_LOG_INFO,
+ gf_log (this->name, GF_LOG_WARNING,
"hash computation failed for type=%d name=%s",
layout->type, name);
goto out;
@@ -191,7 +174,7 @@ dht_layout_search (xlator_t *this, dht_layout_t *layout, const char *name)
}
if (!subvol) {
- gf_log (this->name, GF_LOG_INFO,
+ gf_log (this->name, GF_LOG_WARNING,
"no subvolume for hash (value) = %u", hash);
}
@@ -280,6 +263,9 @@ dht_disk_layout_extract (xlator_t *this, dht_layout_t *layout,
if (disk_layout_p)
*disk_layout_p = disk_layout;
+ else
+ GF_FREE (disk_layout);
+
ret = 0;
out:
@@ -289,7 +275,7 @@ out:
int
dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout,
- int pos, void *disk_layout_raw)
+ int pos, void *disk_layout_raw, int disk_layout_len)
{
int cnt = 0;
int type = 0;
@@ -297,19 +283,38 @@ dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout,
int stop_off = 0;
int disk_layout[4];
- /* TODO: assert disk_layout_ptr is of required length */
+ if (!disk_layout_raw) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "error no layout on disk for merge");
+ return -1;
+ }
- memcpy (disk_layout, disk_layout_raw, sizeof (disk_layout));
+ GF_ASSERT (disk_layout_len == sizeof (disk_layout));
+
+ memcpy (disk_layout, disk_layout_raw, disk_layout_len);
cnt = ntoh32 (disk_layout[0]);
if (cnt != 1) {
- gf_log (this->name, GF_LOG_INFO,
+ gf_log (this->name, GF_LOG_ERROR,
"disk layout has invalid count %d", cnt);
return -1;
}
- /* TODO: assert type is compatible */
- type = ntoh32 (disk_layout[1]);
+ type = ntoh32 (disk_layout[1]);
+ switch (type) {
+ case DHT_HASH_TYPE_DM_USER:
+ gf_log (this->name, GF_LOG_DEBUG, "found user-set layout");
+ layout->type = type;
+ /* Fall through. */
+ case DHT_HASH_TYPE_DM:
+ break;
+ default:
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Catastrophic error layout with unknown type found %d",
+ disk_layout[1]);
+ return -1;
+ }
+
start_off = ntoh32 (disk_layout[2]);
stop_off = ntoh32 (disk_layout[3]);
@@ -329,11 +334,12 @@ int
dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
int op_ret, int op_errno, dict_t *xattr)
{
- int i = 0;
- int ret = -1;
- int err = -1;
- void *disk_layout_raw = NULL;
-
+ int i = 0;
+ int ret = -1;
+ int err = -1;
+ void *disk_layout_raw = NULL;
+ int disk_layout_len = 0;
+ dht_conf_t *conf = this->private;
if (op_ret != 0) {
err = op_errno;
@@ -354,12 +360,12 @@ dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
if (xattr) {
/* during lookup and not mkdir */
- ret = dict_get_ptr (xattr, "trusted.glusterfs.dht",
- &disk_layout_raw);
+ ret = dict_get_ptr_and_len (xattr, conf->xattr_name,
+ &disk_layout_raw, &disk_layout_len);
}
if (ret != 0) {
- layout->list[i].err = -1;
+ layout->list[i].err = 0;
gf_log (this->name, GF_LOG_TRACE,
"missing disk layout on %s. err = %d",
subvol->name, err);
@@ -367,9 +373,10 @@ dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
goto out;
}
- ret = dht_disk_layout_merge (this, layout, i, disk_layout_raw);
+ ret = dht_disk_layout_merge (this, layout, i, disk_layout_raw,
+ disk_layout_len);
if (ret != 0) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_WARNING,
"layout merge from subvolume %s failed",
subvol->name);
goto out;
@@ -405,6 +412,22 @@ dht_layout_entry_swap (dht_layout_t *layout, int i, int j)
layout->list[j].err = err_swap;
}
+void
+dht_layout_range_swap (dht_layout_t *layout, int i, int j)
+{
+ uint32_t start_swap = 0;
+ uint32_t stop_swap = 0;
+
+ start_swap = layout->list[i].start;
+ stop_swap = layout->list[i].stop;
+
+ layout->list[i].start = layout->list[j].start;
+ layout->list[i].stop = layout->list[j].stop;
+
+ layout->list[j].start = start_swap;
+ layout->list[j].stop = stop_swap;
+}
+
int64_t
dht_layout_entry_cmp_volname (dht_layout_t *layout, int i, int j)
{
@@ -412,17 +435,39 @@ dht_layout_entry_cmp_volname (dht_layout_t *layout, int i, int j)
layout->list[j].xlator->name));
}
+
+gf_boolean_t
+dht_is_subvol_in_layout (dht_layout_t *layout, xlator_t *xlator)
+{
+ int i = 0;
+
+ for (i = 0; i < layout->cnt; i++) {
+ /* Check if xlator is already part of layout, and layout is
+ * non-zero. */
+ if (!strcmp (layout->list[i].xlator->name, xlator->name)) {
+ if (layout->list[i].start != layout->list[i].stop)
+ return _gf_true;
+ break;
+ }
+ }
+ return _gf_false;
+}
+
int64_t
dht_layout_entry_cmp (dht_layout_t *layout, int i, int j)
{
int64_t diff = 0;
- if (layout->list[i].err || layout->list[j].err)
- diff = layout->list[i].err - layout->list[j].err;
- else
- diff = (int64_t) layout->list[i].start
- - (int64_t) layout->list[j].start;
+ /* swap zero'ed out layouts to front, if needed */
+ if (!layout->list[j].start && !layout->list[j].stop) {
+ diff = (int64_t) layout->list[i].stop
+ - (int64_t) layout->list[j].stop;
+ goto out;
+ }
+ diff = (int64_t) layout->list[i].start
+ - (int64_t) layout->list[j].start;
+out:
return diff;
}
@@ -471,7 +516,8 @@ dht_layout_sort_volname (dht_layout_t *layout)
int
dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout,
uint32_t *holes_p, uint32_t *overlaps_p,
- uint32_t *missing_p, uint32_t *down_p, uint32_t *misc_p)
+ uint32_t *missing_p, uint32_t *down_p, uint32_t *misc_p,
+ uint32_t *no_space_p)
{
uint32_t overlaps = 0;
uint32_t missing = 0;
@@ -484,30 +530,51 @@ dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout,
uint32_t prev_stop = 0;
uint32_t last_stop = 0;
char is_virgin = 1;
-
- /* TODO: explain what is happening */
-
+ uint32_t no_space = 0;
+
+ /* This funtion scans through the layout spread of a directory to
+ check if there are any anomalies. Prior to calling this function
+ the layout entries should be sorted in the ascending order.
+
+ If the layout entry has err != 0
+ then increment the corresponding anomaly.
+ else
+ if (start of the current layout entry > stop + 1 of previous
+ non erroneous layout entry)
+ then it indicates a hole in the layout
+ if (start of the current layout entry < stop + 1 of previous
+ non erroneous layout entry)
+ then it indicates an overlap in the layout
+ */
last_stop = layout->list[0].start - 1;
prev_stop = last_stop;
for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].err) {
- switch (layout->list[i].err) {
- case -1:
- case ENOENT:
- missing++;
- break;
- case ENOTCONN:
- down++;
- break;
- case ENOSPC:
- down++;
- break;
- default:
- misc++;
+ switch (layout->list[i].err) {
+ case -1:
+ case ENOENT:
+ case ESTALE:
+ missing++;
+ continue;
+ case ENOTCONN:
+ down++;
+ continue;
+ case ENOSPC:
+ no_space++;
+ continue;
+ case 0:
+ /* if err == 0 and start == stop, then it is a non misc++;
+ * participating subvolume(spread-cnt). Then, do not
+ * check for anomalies. If start != stop, then treat it
+ * as misc err */
+ if (layout->list[i].start == layout->list[i].stop) {
+ continue;
}
+ break;
+ default:
+ misc++;
continue;
- }
+ }
is_virgin = 0;
@@ -540,6 +607,9 @@ dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout,
if (misc_p)
*misc_p = misc;
+ if (no_space_p)
+ *no_space_p = no_space;
+
return ret;
}
@@ -555,7 +625,6 @@ dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout)
uint32_t down = 0;
uint32_t misc = 0;
-
ret = dht_layout_sort (layout);
if (ret == -1) {
gf_log (this->name, GF_LOG_WARNING,
@@ -565,7 +634,7 @@ dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout)
ret = dht_layout_anomalies (this, loc, layout,
&holes, &overlaps,
- &missing, &down, &misc);
+ &missing, &down, &misc, NULL);
if (ret == -1) {
gf_log (this->name, GF_LOG_WARNING,
"error while finding anomalies in %s -- not good news",
@@ -583,43 +652,56 @@ dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout)
"found anomalies in %s. holes=%d overlaps=%d",
loc->path, holes, overlaps);
}
- ret = 1;
+ ret = -1;
}
for (i = 0; i < layout->cnt; i++) {
- /* TODO During DHT selfheal rewrite (almost) find a better place to
- * detect this - probably in dht_layout_anomalies()
+ /* TODO During DHT selfheal rewrite (almost) find a better place
+ * to detect this - probably in dht_layout_anomalies()
*/
if (layout->list[i].err > 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "path=%s err=%s on subvol=%s",
- loc->path, strerror (layout->list[i].err),
- (layout->list[i].xlator ?
- layout->list[i].xlator->name : "<>"));
- if (layout->list[i].err == ENOENT)
- ret = 1;
+ gf_log_callingfn (this->name, GF_LOG_DEBUG,
+ "path=%s err=%s on subvol=%s",
+ loc->path,
+ strerror (layout->list[i].err),
+ (layout->list[i].xlator ?
+ layout->list[i].xlator->name
+ : "<>"));
+ if ((layout->list[i].err == ENOENT) && (ret >= 0)) {
+ ret++;
+ }
}
}
+
out:
return ret;
}
+int
+dht_dir_has_layout (dict_t *xattr, char *name)
+{
+
+ void *disk_layout_raw = NULL;
+
+ return dict_get_ptr (xattr, name, &disk_layout_raw);
+}
int
dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
loc_t *loc, dict_t *xattr)
{
- int idx = 0;
- int pos = -1;
- int ret = 0;
- int err = 0;
- int dict_ret = 0;
- int32_t disk_layout[4];
- void *disk_layout_raw = NULL;
- int32_t count = -1;
- uint32_t start_off = -1;
- uint32_t stop_off = -1;
+ int idx = 0;
+ int pos = -1;
+ int ret = 0;
+ int err = 0;
+ int dict_ret = 0;
+ int32_t disk_layout[4];
+ void *disk_layout_raw = NULL;
+ int32_t count = -1;
+ uint32_t start_off = -1;
+ uint32_t stop_off = -1;
+ dht_conf_t *conf = this->private;
for (idx = 0; idx < layout->cnt; idx++) {
@@ -649,11 +731,11 @@ dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
goto out;
}
- dict_ret = dict_get_ptr (xattr, "trusted.glusterfs.dht",
+ dict_ret = dict_get_ptr (xattr, conf->xattr_name,
&disk_layout_raw);
if (dict_ret < 0) {
- if (err == 0) {
+ if (err == 0 && layout->list[pos].stop) {
gf_log (this->name, GF_LOG_INFO,
"%s - disk layout missing", loc->path);
ret = -1;
@@ -665,7 +747,7 @@ dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
count = ntoh32 (disk_layout[0]);
if (count != 1) {
- gf_log (this->name, GF_LOG_INFO,
+ gf_log (this->name, GF_LOG_ERROR,
"%s - disk layout has invalid count %d",
loc->path, count);
ret = -1;
@@ -714,7 +796,7 @@ dht_layout_preset (xlator_t *this, xlator_t *subvol, inode_t *inode)
LOCK (&conf->layout_lock);
{
- inode_ctx_put (inode, this, (uint64_t)(long)layout);
+ dht_inode_ctx_layout_set (inode, this, layout);
}
UNLOCK (&conf->layout_lock);
diff --git a/xlators/cluster/dht/src/dht-linkfile.c b/xlators/cluster/dht/src/dht-linkfile.c
index 2186b064a..dbc9d0b3c 100644
--- a/xlators/cluster/dht/src/dht-linkfile.c
+++ b/xlators/cluster/dht/src/dht-linkfile.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -28,37 +19,106 @@
#include "compat.h"
#include "dht-common.h"
+int
+dht_linkfile_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ char is_linkfile = 0;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+
+ if (op_ret)
+ goto out;
+
+ is_linkfile = check_is_linkfile (inode, stbuf, xattr,
+ conf->link_xattr_name);
+ if (!is_linkfile)
+ gf_log (this->name, GF_LOG_WARNING, "got non-linkfile %s:%s",
+ prev->this->name, local->loc.path);
+out:
+ local->linkfile.linkfile_cbk (frame, cookie, this, op_ret, op_errno,
+ inode, stbuf, postparent, postparent,
+ xattr);
+ return 0;
+}
+#define is_equal(a, b) (a == b)
int
dht_linkfile_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode,
struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ call_frame_t *prev = NULL;
+ dict_t *xattrs = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
local = frame->local;
+ if (!op_ret)
+ local->linked = _gf_true;
+
+ FRAME_SU_UNDO (frame, dht_local_t);
+
+ if (op_ret && (op_errno == EEXIST)) {
+ conf = this->private;
+ prev = cookie;
+ subvol = prev->this;
+ if (!subvol)
+ goto out;
+ xattrs = dict_new ();
+ if (!xattrs)
+ goto out;
+ ret = dict_set_uint32 (xattrs, conf->link_xattr_name, 256);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set linkto key");
+ goto out;
+ }
+
+ STACK_WIND (frame, dht_linkfile_lookup_cbk, subvol,
+ subvol->fops->lookup, &local->loc, xattrs);
+ if (xattrs)
+ dict_unref (xattrs);
+ return 0;
+ }
+out:
local->linkfile.linkfile_cbk (frame, cookie, this, op_ret, op_errno,
- inode, stbuf, preparent, postparent);
+ inode, stbuf, preparent, postparent,
+ xdata);
+ if (xattrs)
+ dict_unref (xattrs);
return 0;
}
int
dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
+ xlator_t *this,
xlator_t *tovol, xlator_t *fromvol, loc_t *loc)
{
dht_local_t *local = NULL;
dict_t *dict = NULL;
int need_unref = 0;
int ret = 0;
+ dht_conf_t *conf = this->private;
local = frame->local;
local->linkfile.linkfile_cbk = linkfile_cbk;
local->linkfile.srcvol = tovol;
+ local->linked = _gf_false;
+
dict = local->params;
if (!dict) {
dict = dict_new ();
@@ -74,8 +134,12 @@ dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
"%s: gfid set failed", loc->path);
}
- ret = dict_set_str (dict, "trusted.glusterfs.dht.linkto",
- tovol->name);
+ ret = dict_set_str (dict, GLUSTERFS_INTERNAL_FOP_KEY, "yes");
+ if (ret)
+ gf_log ("dht-linkfile", GF_LOG_INFO,
+ "%s: internal-fop set failed", loc->path);
+
+ ret = dict_set_str (dict, conf->link_xattr_name, tovol->name);
if (ret < 0) {
gf_log (frame->this->name, GF_LOG_INFO,
@@ -84,9 +148,13 @@ dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
goto out;
}
+ local->link_subvol = fromvol;
+ /* Always create as root:root. dht_linkfile_attr_heal fixes the
+ * ownsership */
+ FRAME_SU_DO (frame, dht_local_t);
STACK_WIND (frame, dht_linkfile_create_cbk,
fromvol, fromvol->fops->mknod, loc,
- S_IFREG | DHT_LINKFILE_MODE, 0, dict);
+ S_IFREG | DHT_LINKFILE_MODE, 0, 0, dict);
if (need_unref && dict)
dict_unref (dict);
@@ -94,7 +162,7 @@ dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
return 0;
out:
local->linkfile.linkfile_cbk (frame, NULL, frame->this, -1, ENOMEM,
- loc->inode, NULL, NULL, NULL);
+ loc->inode, NULL, NULL, NULL, NULL);
if (need_unref && dict)
dict_unref (dict);
@@ -106,7 +174,8 @@ out:
int
dht_linkfile_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
call_frame_t *prev = NULL;
@@ -150,7 +219,7 @@ dht_linkfile_unlink (call_frame_t *frame, xlator_t *this,
STACK_WIND (unlink_frame, dht_linkfile_unlink_cbk,
subvol, subvol->fops->unlink,
- &unlink_local->loc);
+ &unlink_local->loc, 0, NULL);
return 0;
err:
@@ -175,7 +244,7 @@ dht_linkfile_subvol (xlator_t *this, inode_t *inode, struct iatt *stbuf,
if (!xattr)
goto out;
- ret = dict_get_ptr (xattr, "trusted.glusterfs.dht.linkto", &volname);
+ ret = dict_get_ptr (xattr, conf->link_xattr_name, &volname);
if ((-1 == ret) || !volname)
goto out;
@@ -190,3 +259,70 @@ dht_linkfile_subvol (xlator_t *this, inode_t *inode, struct iatt *stbuf,
out:
return subvol;
}
+
+int
+dht_linkfile_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ loc_t *loc = NULL;
+
+ local = frame->local;
+ loc = &local->loc;
+
+ if (op_ret)
+ gf_log (this->name, GF_LOG_ERROR, "setattr of uid/gid on %s"
+ " :<gfid:%s> failed (%s)",
+ (loc->path? loc->path: "NULL"),
+ uuid_utoa(local->gfid), strerror(op_errno));
+
+ DHT_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+int
+dht_linkfile_attr_heal (call_frame_t *frame, xlator_t *this)
+{
+ int ret = -1;
+ call_frame_t *copy = NULL;
+ dht_local_t *local = NULL;
+ dht_local_t *copy_local = NULL;
+ xlator_t *subvol = NULL;
+ struct iatt stbuf = {0,};
+
+ local = frame->local;
+
+ GF_VALIDATE_OR_GOTO ("dht", local, out);
+ GF_VALIDATE_OR_GOTO ("dht", local->link_subvol, out);
+
+ if (local->stbuf.ia_type == IA_INVAL)
+ return 0;
+
+ uuid_copy (local->loc.gfid, local->stbuf.ia_gfid);
+
+ copy = copy_frame (frame);
+
+ if (!copy)
+ goto out;
+
+ copy_local = dht_local_init (copy, &local->loc, NULL, 0);
+
+ if (!copy_local)
+ goto out;
+
+ stbuf = local->stbuf;
+ subvol = local->link_subvol;
+
+ copy->local = copy_local;
+
+ FRAME_SU_DO (copy, dht_local_t);
+
+ STACK_WIND (copy, dht_linkfile_setattr_cbk, subvol,
+ subvol->fops->setattr, &copy_local->loc,
+ &stbuf, (GF_SET_ATTR_UID | GF_SET_ATTR_GID), NULL);
+ ret = 0;
+out:
+ return ret;
+}
diff --git a/xlators/cluster/dht/src/dht-mem-types.h b/xlators/cluster/dht/src/dht-mem-types.h
index 21fb5a7ca..e893eb48f 100644
--- a/xlators/cluster/dht/src/dht-mem-types.h
+++ b/xlators/cluster/dht/src/dht-mem-types.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -28,7 +19,6 @@ enum gf_dht_mem_types_ {
gf_dht_mt_dht_conf_t,
gf_dht_mt_char,
gf_dht_mt_int32_t,
- gf_dht_mt_dht_local_t,
gf_dht_mt_xlator_t,
gf_dht_mt_dht_layout_t,
gf_switch_mt_dht_conf_t,
@@ -37,6 +27,9 @@ enum gf_dht_mem_types_ {
gf_switch_mt_switch_struct,
gf_dht_mt_subvol_time,
gf_dht_mt_loc_t,
+ gf_defrag_info_mt,
+ gf_dht_mt_inode_ctx_t,
+ gf_dht_mt_ctx_stat_time_t,
gf_dht_mt_end
};
#endif
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index 70950e83e..4f78f5203 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -24,11 +15,13 @@
#endif
#include "dht-common.h"
+#include "xlator.h"
+#include <signal.h>
+#include <fnmatch.h>
#define GF_DISK_SECTOR_SIZE 512
#define DHT_REBALANCE_PID 4242 /* Change it if required */
#define DHT_REBALANCE_BLKSIZE (128 * 1024)
-#define DHT_MIGRATE_EVEN_IF_LINK_EXISTS 1
static int
dht_write_with_holes (xlator_t *to, fd_t *fd, struct iovec *vec, int count,
@@ -60,12 +53,13 @@ dht_write_with_holes (xlator_t *to, fd_t *fd, struct iovec *vec, int count,
ret = syncop_write (to, fd, (buf + tmp_offset),
(start_idx - tmp_offset),
(offset + tmp_offset),
- iobref);
+ iobref, 0);
/* 'path' will be logged in calling function */
if (ret < 0) {
gf_log (THIS->name, GF_LOG_WARNING,
"failed to write (%s)",
- strerror (errno));
+ strerror (-ret));
+ ret = -1;
goto out;
}
@@ -78,12 +72,13 @@ dht_write_with_holes (xlator_t *to, fd_t *fd, struct iovec *vec, int count,
/* This means, last chunk is not yet written.. write it */
ret = syncop_write (to, fd, (buf + tmp_offset),
(buf_len - tmp_offset),
- (offset + tmp_offset), iobref);
+ (offset + tmp_offset), iobref, 0);
if (ret < 0) {
/* 'path' will be logged in calling function */
gf_log (THIS->name, GF_LOG_WARNING,
"failed to write (%s)",
- strerror (errno));
+ strerror (-ret));
+ ret = -1;
goto out;
}
}
@@ -99,8 +94,164 @@ out:
}
+/*
+ return values:
+ -1 : failure
+ -2 : success
+
+Hard link migration is carried out in three stages.
+
+(Say there are n hardlinks)
+Stage 1: Setting the new hashed subvol information on the 1st hardlink
+ encountered (linkto setxattr)
+
+Stage 2: Creating hardlinks on new hashed subvol for the 2nd to (n-1)th
+ hardlink
+
+Stage 3: Physical migration of the data file for nth hardlink
+
+Why to deem "-2" as success and not "0":
+
+ dht_migrate_file expects return value "0" from _is_file_migratable if
+the file has to be migrated.
+
+ _is_file_migratable returns zero only when it is called with the
+flag "GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS".
+
+ gf_defrag_handle_hardlink calls dht_migrate_file for physical migration
+of the data file with the flag "GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS"
+
+Hence, gf_defrag_handle_hardlink returning "0" for success will force
+"dht_migrate_file" to migrate each of the hardlink which is not intended.
+
+For each of the three stage mentioned above "-2" will be returned and will
+be converted to "0" in dht_migrate_file.
+
+*/
+
+int32_t
+gf_defrag_handle_hardlink (xlator_t *this, loc_t *loc, dict_t *xattrs,
+ struct iatt *stbuf)
+{
+ int32_t ret = -1;
+ xlator_t *cached_subvol = NULL;
+ xlator_t *hashed_subvol = NULL;
+ xlator_t *linkto_subvol = NULL;
+ data_t *data = NULL;
+ struct iatt iatt = {0,};
+ int32_t op_errno = 0;
+ dht_conf_t *conf = NULL;
+
+ GF_VALIDATE_OR_GOTO ("defrag", loc, out);
+ GF_VALIDATE_OR_GOTO ("defrag", loc->name, out);
+ GF_VALIDATE_OR_GOTO ("defrag", stbuf, out);
+ GF_VALIDATE_OR_GOTO ("defrag", this, out);
+ GF_VALIDATE_OR_GOTO ("defrag", xattrs, out);
+ GF_VALIDATE_OR_GOTO ("defrag", this->private, out);
+
+ conf = this->private;
+
+ if (uuid_is_null (loc->pargfid)) {
+ gf_log ("", GF_LOG_ERROR, "loc->pargfid is NULL for "
+ "%s", loc->path);
+ goto out;
+ }
+
+ if (uuid_is_null (loc->gfid)) {
+ gf_log ("", GF_LOG_ERROR, "loc->gfid is NULL for "
+ "%s", loc->path);
+ goto out;
+ }
+
+ cached_subvol = dht_subvol_get_cached (this, loc->inode);
+ if (!cached_subvol) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get cached subvol"
+ " for %s on %s", loc->name, this->name);
+ goto out;
+ }
+
+ hashed_subvol = dht_subvol_get_hashed (this, loc);
+ if (!hashed_subvol) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get hashed subvol"
+ " for %s on %s", loc->name, this->name);
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_INFO, "Attempting to migrate hardlink %s "
+ "with gfid %s from %s -> %s", loc->name, uuid_utoa (loc->gfid),
+ cached_subvol->name, hashed_subvol->name);
+ data = dict_get (xattrs, conf->link_xattr_name);
+ /* set linkto on cached -> hashed if not present, else link it */
+ if (!data) {
+ ret = dict_set_str (xattrs, conf->link_xattr_name,
+ hashed_subvol->name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set "
+ "linkto xattr in dict for %s", loc->name);
+ goto out;
+ }
+
+ ret = syncop_setxattr (cached_subvol, loc, xattrs, 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Linkto setxattr "
+ "failed %s -> %s (%s)", cached_subvol->name,
+ loc->name, strerror (-ret));
+ ret = -1;
+ goto out;
+ }
+ ret = -2;
+ goto out;
+ } else {
+ linkto_subvol = dht_linkfile_subvol (this, NULL, NULL, xattrs);
+ if (!linkto_subvol) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get "
+ "linkto subvol for %s", loc->name);
+ } else {
+ hashed_subvol = linkto_subvol;
+ }
+
+ ret = syncop_link (hashed_subvol, loc, loc);
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR, "link of %s -> %s"
+ " failed on subvol %s (%s)", loc->name,
+ uuid_utoa(loc->gfid),
+ hashed_subvol->name, strerror (op_errno));
+ if (op_errno != EEXIST)
+ goto out;
+ }
+ }
+ ret = syncop_lookup (hashed_subvol, loc, NULL, &iatt, NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed lookup %s on %s (%s)"
+ , loc->name, hashed_subvol->name, strerror (-ret));
+ ret = -1;
+ goto out;
+ }
+
+ if (iatt.ia_nlink == stbuf->ia_nlink) {
+ ret = dht_migrate_file (this, loc, cached_subvol, hashed_subvol,
+ GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS);
+ if (ret)
+ goto out;
+ }
+ ret = -2;
+out:
+ return ret;
+}
+
+/*
+ return values
+ 0 : File will be migrated
+ -2 : File will not be migrated
+ (This is the return value from gf_defrag_handle_hardlink. Checkout
+ gf_defrag_handle_hardlink for description of "returning -2")
+ -1 : failure
+*/
static inline int
-__is_file_migratable (xlator_t *this, loc_t *loc, struct iatt *stbuf)
+__is_file_migratable (xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, dict_t *xattrs, int flags)
{
int ret = -1;
@@ -111,11 +262,30 @@ __is_file_migratable (xlator_t *this, loc_t *loc, struct iatt *stbuf)
goto out;
}
+ if (flags == GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS) {
+ ret = 0;
+ goto out;
+ }
if (stbuf->ia_nlink > 1) {
- /* TODO : support migrating hardlinks */
- gf_log (this->name, GF_LOG_WARNING, "%s: file has hardlinks",
- loc->path);
- ret = -ENOTSUP;
+ /* support for decomission */
+ if (flags == GF_DHT_MIGRATE_HARDLINK) {
+ ret = gf_defrag_handle_hardlink (this, loc,
+ xattrs, stbuf);
+
+ /*
+ Returning zero will force the file to be remigrated.
+ Checkout gf_defrag_handle_hardlink for more information.
+ */
+ if (ret && ret != -2) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to migrate file with link",
+ loc->path);
+ }
+ } else {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: file has hardlinks", loc->path);
+ ret = -ENOTSUP;
+ }
goto out;
}
@@ -127,14 +297,16 @@ out:
static inline int
__dht_rebalance_create_dst_file (xlator_t *to, xlator_t *from, loc_t *loc, struct iatt *stbuf,
- dict_t *dict, fd_t **dst_fd)
+ dict_t *dict, fd_t **dst_fd, dict_t *xattr)
{
- xlator_t *this = NULL;
- int ret = -1;
- fd_t *fd = NULL;
- struct iatt new_stbuf = {0,};
+ xlator_t *this = NULL;
+ int ret = -1;
+ fd_t *fd = NULL;
+ struct iatt new_stbuf = {0,};
+ dht_conf_t *conf = NULL;
this = THIS;
+ conf = this->private;
ret = dict_set_static_bin (dict, "gfid-req", stbuf->ia_gfid, 16);
if (ret) {
@@ -143,7 +315,7 @@ __dht_rebalance_create_dst_file (xlator_t *to, xlator_t *from, loc_t *loc, struc
goto out;
}
- ret = dict_set_str (dict, DHT_LINKFILE_KEY, from->name);
+ ret = dict_set_str (dict, conf->link_xattr_name, from->name);
if (ret) {
gf_log (this->name, GF_LOG_ERROR,
"%s: failed to set gfid in dict for create", loc->path);
@@ -170,25 +342,47 @@ __dht_rebalance_create_dst_file (xlator_t *to, xlator_t *from, loc_t *loc, struc
goto out;
}
}
- if ((ret == -1) && (errno != ENOENT)) {
+ if ((ret < 0) && (-ret != ENOENT)) {
/* File exists in destination, but not accessible */
gf_log (THIS->name, GF_LOG_WARNING,
"%s: failed to lookup file (%s)",
- loc->path, strerror (errno));
+ loc->path, strerror (-ret));
+ ret = -1;
goto out;
}
/* Create the destination with LINKFILE mode, and linkto xattr,
if the linkfile already exists, it will just open the file */
ret = syncop_create (to, loc, O_RDWR, DHT_LINKFILE_MODE, fd,
- dict);
+ dict, &new_stbuf);
if (ret < 0) {
gf_log (this->name, GF_LOG_ERROR,
"failed to create %s on %s (%s)",
- loc->path, to->name, strerror (errno));
+ loc->path, to->name, strerror (-ret));
+ ret = -1;
goto out;
}
+ ret = syncop_fsetxattr (to, fd, xattr, 0);
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set xattr on %s (%s)",
+ loc->path, to->name, strerror (-ret));
+
+ ret = syncop_ftruncate (to, fd, stbuf->ia_size);
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_ERROR,
+ "ftruncate failed for %s on %s (%s)",
+ loc->path, to->name, strerror (-ret));
+
+ ret = syncop_fsetattr (to, fd, stbuf,
+ (GF_SET_ATTR_UID | GF_SET_ATTR_GID),
+ NULL, NULL);
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_ERROR,
+ "chown failed for %s on %s (%s)",
+ loc->path, to->name, strerror (-ret));
+
if (dst_fd)
*dst_fd = fd;
@@ -201,20 +395,24 @@ out:
static inline int
__dht_check_free_space (xlator_t *to, xlator_t *from, loc_t *loc,
- struct iatt *stbuf)
+ struct iatt *stbuf, int flag)
{
struct statvfs src_statfs = {0,};
struct statvfs dst_statfs = {0,};
int ret = -1;
xlator_t *this = NULL;
+ uint64_t src_statfs_blocks = 1;
+ uint64_t dst_statfs_blocks = 1;
+
this = THIS;
ret = syncop_statfs (from, loc, &src_statfs);
if (ret) {
gf_log (this->name, GF_LOG_ERROR,
"failed to get statfs of %s on %s (%s)",
- loc->path, from->name, strerror (errno));
+ loc->path, from->name, strerror (-ret));
+ ret = -1;
goto out;
}
@@ -222,21 +420,51 @@ __dht_check_free_space (xlator_t *to, xlator_t *from, loc_t *loc,
if (ret) {
gf_log (this->name, GF_LOG_ERROR,
"failed to get statfs of %s on %s (%s)",
- loc->path, to->name, strerror (errno));
+ loc->path, to->name, strerror (-ret));
+ ret = -1;
goto out;
}
- if (((dst_statfs.f_bavail *
- dst_statfs.f_bsize) / GF_DISK_SECTOR_SIZE) >
- (((src_statfs.f_bavail * src_statfs.f_bsize) /
- GF_DISK_SECTOR_SIZE) - stbuf->ia_blocks)) {
- gf_log (this->name, GF_LOG_WARNING,
- "data movement attempted from node (%s) with"
- " higher disk space to a node (%s) with "
- "lesser disk space (%s)", from->name,
- to->name, loc->path);
- /* this is not a 'failure', but we don't want to
- consider this as 'success' too :-/ */
+ /* if force option is given, do not check for space @ dst.
+ * Check only if space is avail for the file */
+ if (flag != GF_DHT_MIGRATE_DATA)
+ goto check_avail_space;
+
+ /* Check:
+ During rebalance `migrate-data` - Destination subvol experiences
+ a `reduction` in 'blocks' of free space, at the same time source
+ subvol gains certain 'blocks' of free space. A valid check is
+ necessary here to avoid errorneous move to destination where
+ the space could be scantily available.
+ */
+ if (stbuf) {
+ dst_statfs_blocks = ((dst_statfs.f_bavail *
+ dst_statfs.f_bsize) /
+ GF_DISK_SECTOR_SIZE);
+ src_statfs_blocks = ((src_statfs.f_bavail *
+ src_statfs.f_bsize) /
+ GF_DISK_SECTOR_SIZE);
+ if ((dst_statfs_blocks - stbuf->ia_blocks) <
+ (src_statfs_blocks + stbuf->ia_blocks)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "data movement attempted from node (%s) with"
+ " higher disk space to a node (%s) with "
+ "lesser disk space (%s)", from->name,
+ to->name, loc->path);
+
+ /* this is not a 'failure', but we don't want to
+ consider this as 'success' too :-/ */
+ ret = 1;
+ goto out;
+ }
+ }
+check_avail_space:
+ if (((dst_statfs.f_bavail * dst_statfs.f_bsize) /
+ GF_DISK_SECTOR_SIZE) < stbuf->ia_blocks) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "data movement attempted from node (%s) with "
+ "to node (%s) which does not have required free space"
+ " for %s", from->name, to->name, loc->path);
ret = 1;
goto out;
}
@@ -263,7 +491,7 @@ __dht_rebalance_migrate_data (xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst
read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE) ?
DHT_REBALANCE_BLKSIZE : (ia_size - total));
ret = syncop_readv (from, src, read_size,
- offset, &vector, &count, &iobref);
+ offset, 0, &vector, &count, &iobref);
if (!ret || (ret < 0)) {
break;
}
@@ -273,15 +501,14 @@ __dht_rebalance_migrate_data (xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst
ret, offset, iobref);
else
ret = syncop_writev (to, dst, vector, count,
- offset, iobref);
+ offset, iobref, 0);
if (ret < 0) {
break;
}
offset += ret;
total += ret;
- if (vector)
- GF_FREE (vector);
+ GF_FREE (vector);
if (iobref)
iobref_unref (iobref);
iobref = NULL;
@@ -289,11 +516,12 @@ __dht_rebalance_migrate_data (xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst
}
if (iobref)
iobref_unref (iobref);
- if (vector)
- GF_FREE (vector);
+ GF_FREE (vector);
if (ret >= 0)
ret = 0;
+ else
+ ret = -1;
return ret;
}
@@ -308,8 +536,10 @@ __dht_rebalance_open_src_file (xlator_t *from, xlator_t *to, loc_t *loc,
dict_t *dict = NULL;
xlator_t *this = NULL;
struct iatt iatt = {0,};
+ dht_conf_t *conf = NULL;
this = THIS;
+ conf = this->private;
fd = fd_create (loc->inode, DHT_REBALANCE_PID);
if (!fd) {
@@ -320,10 +550,11 @@ __dht_rebalance_open_src_file (xlator_t *from, xlator_t *to, loc_t *loc,
}
ret = syncop_open (from, loc, O_RDWR, fd);
- if (ret == -1) {
+ if (ret < 0) {
gf_log (this->name, GF_LOG_ERROR,
"failed to open file %s on %s (%s)",
- loc->path, from->name, strerror (errno));
+ loc->path, from->name, strerror (-ret));
+ ret = -1;
goto out;
}
@@ -332,7 +563,7 @@ __dht_rebalance_open_src_file (xlator_t *from, xlator_t *to, loc_t *loc,
if (!dict)
goto out;
- ret = dict_set_str (dict, DHT_LINKFILE_KEY, to->name);
+ ret = dict_set_str (dict, conf->link_xattr_name, to->name);
if (ret) {
gf_log (this->name, GF_LOG_ERROR,
"failed to set xattr in dict for %s (linkto:%s)",
@@ -346,7 +577,8 @@ __dht_rebalance_open_src_file (xlator_t *from, xlator_t *to, loc_t *loc,
if (ret) {
gf_log (this->name, GF_LOG_ERROR,
"failed to set xattr on %s in %s (%s)",
- loc->path, from->name, strerror (errno));
+ loc->path, from->name, strerror (-ret));
+ ret = -1;
goto out;
}
@@ -360,7 +592,8 @@ __dht_rebalance_open_src_file (xlator_t *from, xlator_t *to, loc_t *loc,
if (ret) {
gf_log (this->name, GF_LOG_ERROR,
"failed to set mode on %s in %s (%s)",
- loc->path, from->name, strerror (errno));
+ loc->path, from->name, strerror (-ret));
+ ret = -1;
goto out;
}
@@ -385,12 +618,13 @@ migrate_special_files (xlator_t *this, xlator_t *from, xlator_t *to, loc_t *loc,
dict_t *dict = NULL;
char *link = NULL;
struct iatt stbuf = {0,};
+ dht_conf_t *conf = this->private;
dict = dict_new ();
if (!dict)
goto out;
- ret = dict_set_int32 (dict, DHT_LINKFILE_KEY, 256);
+ ret = dict_set_int32 (dict, conf->link_xattr_name, 256);
if (ret) {
gf_log (this->name, GF_LOG_ERROR,
"%s: failed to set 'linkto' key in dict", loc->path);
@@ -399,19 +633,21 @@ migrate_special_files (xlator_t *this, xlator_t *from, xlator_t *to, loc_t *loc,
/* check in the destination if the file is link file */
ret = syncop_lookup (to, loc, dict, &stbuf, &rsp_dict, NULL);
- if ((ret == -1) && (errno != ENOENT)) {
+ if ((ret < 0) && (-ret != ENOENT)) {
gf_log (this->name, GF_LOG_WARNING, "%s: lookup failed (%s)",
- loc->path, strerror (errno));
+ loc->path, strerror (-ret));
+ ret = -1;
goto out;
}
/* we no more require this key */
- dict_del (dict, DHT_LINKFILE_KEY);
+ dict_del (dict, conf->link_xattr_name);
/* file exists in target node, only if it is 'linkfile' its valid,
otherwise, error out */
if (!ret) {
- if (!check_is_linkfile (loc->inode, &stbuf, rsp_dict)) {
+ if (!check_is_linkfile (loc->inode, &stbuf, rsp_dict,
+ conf->link_xattr_name)) {
gf_log (this->name, GF_LOG_WARNING,
"%s: file exists in destination", loc->path);
ret = -1;
@@ -423,7 +659,8 @@ migrate_special_files (xlator_t *this, xlator_t *from, xlator_t *to, loc_t *loc,
if (ret) {
gf_log (this->name, GF_LOG_WARNING,
"%s: failed to delete the linkfile (%s)",
- loc->path, strerror (errno));
+ loc->path, strerror (-ret));
+ ret = -1;
goto out;
}
}
@@ -443,15 +680,17 @@ migrate_special_files (xlator_t *this, xlator_t *from, xlator_t *to, loc_t *loc,
if (ret < 0) {
gf_log (this->name, GF_LOG_WARNING,
"%s: readlink on symlink failed (%s)",
- loc->path, strerror (errno));
+ loc->path, strerror (-ret));
+ ret = -1;
goto out;
}
- ret = syncop_symlink (to, loc, link, dict);
+ ret = syncop_symlink (to, loc, link, dict, 0);
if (ret) {
gf_log (this->name, GF_LOG_WARNING,
"%s: creating symlink failed (%s)",
- loc->path, strerror (errno));
+ loc->path, strerror (-ret));
+ ret = -1;
goto out;
}
@@ -461,18 +700,31 @@ migrate_special_files (xlator_t *this, xlator_t *from, xlator_t *to, loc_t *loc,
ret = syncop_mknod (to, loc, st_mode_from_ia (buf->ia_prot,
buf->ia_type),
makedev (ia_major (buf->ia_rdev),
- ia_minor (buf->ia_rdev)), dict);
+ ia_minor (buf->ia_rdev)), dict, 0);
if (ret) {
gf_log (this->name, GF_LOG_WARNING, "%s: mknod failed (%s)",
- loc->path, strerror (errno));
+ loc->path, strerror (-ret));
+ ret = -1;
goto out;
}
done:
+ ret = syncop_setattr (to, loc, buf,
+ (GF_SET_ATTR_UID | GF_SET_ATTR_GID |
+ GF_SET_ATTR_MODE), NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to perform setattr on %s (%s)",
+ loc->path, to->name, strerror (-ret));
+ ret = -1;
+ }
+
ret = syncop_unlink (from, loc);
- if (ret)
+ if (ret) {
gf_log (this->name, GF_LOG_WARNING, "%s: unlink failed (%s)",
- loc->path, strerror (errno));
+ loc->path, strerror (-ret));
+ ret = -1;
+ }
out:
if (dict)
@@ -504,7 +756,9 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
fd_t *dst_fd = NULL;
dict_t *dict = NULL;
dict_t *xattr = NULL;
+ dict_t *xattr_rsp = NULL;
int file_has_holes = 0;
+ dht_conf_t *conf = this->private;
gf_log (this->name, GF_LOG_INFO, "%s: attempting to move from %s to %s",
loc->path, from->name, to->name);
@@ -513,22 +767,35 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
if (!dict)
goto out;
+ ret = dict_set_int32 (dict, conf->link_xattr_name, 256);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to set 'linkto' key in dict", loc->path);
+ goto out;
+ }
+
/* Phase 1 - Data migration is in progress from now on */
- ret = syncop_lookup (from, loc, NULL, &stbuf, NULL, NULL);
+ ret = syncop_lookup (from, loc, dict, &stbuf, &xattr_rsp, NULL);
if (ret) {
gf_log (this->name, GF_LOG_ERROR, "%s: lookup failed on %s (%s)",
- loc->path, from->name, strerror (errno));
+ loc->path, from->name, strerror (-ret));
+ ret = -1;
goto out;
}
+ /* we no more require this key */
+ dict_del (dict, conf->link_xattr_name);
+
/* preserve source mode, so set the same to the destination */
src_ia_prot = stbuf.ia_prot;
/* Check if file can be migrated */
- ret = __is_file_migratable (this, loc, &stbuf);
- if (ret)
+ ret = __is_file_migratable (this, loc, &stbuf, xattr_rsp, flag);
+ if (ret) {
+ if (ret == -2)
+ ret = 0;
goto out;
-
+ }
/* Take care of the special files */
if (!IA_ISREG (stbuf.ia_type)) {
/* Special files */
@@ -536,18 +803,24 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
goto out;
}
+ /* TODO: move all xattr related operations to fd based operations */
+ ret = syncop_listxattr (from, loc, &xattr);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to get xattr from %s (%s)",
+ loc->path, from->name, strerror (-ret));
+ ret = -1;
+ }
+
/* create the destination, with required modes/xattr */
ret = __dht_rebalance_create_dst_file (to, from, loc, &stbuf,
- dict, &dst_fd);
+ dict, &dst_fd, xattr);
if (ret)
goto out;
- /* Should happen on all files when 'force' option is not given */
- if (flag != DHT_MIGRATE_EVEN_IF_LINK_EXISTS) {
- ret = __dht_check_free_space (to, from, loc, &stbuf);
- if (ret) {
- goto out;
- }
+ ret = __dht_check_free_space (to, from, loc, &stbuf, flag);
+ if (ret) {
+ goto out;
}
/* Open the source, and also update mode/xattr */
@@ -558,10 +831,12 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
goto out;
}
+
ret = syncop_fstat (from, src_fd, &stbuf);
if (ret) {
gf_log (this->name, GF_LOG_ERROR, "failed to lookup %s on %s (%s)",
- loc->path, from->name, strerror (errno));
+ loc->path, from->name, strerror (-ret));
+ ret = -1;
goto out;
}
@@ -580,33 +855,22 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
if (ret) {
gf_log (this->name, GF_LOG_ERROR,
"%s: failed to reset target size back to 0 (%s)",
- loc->path, strerror (errno));
+ loc->path, strerror (-ret));
}
ret = -1;
goto out;
}
- /* TODO: move all xattr related operations to fd based operations */
- ret = syncop_listxattr (from, loc, &xattr);
- if (ret == -1)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to get xattr from %s (%s)",
- loc->path, from->name, strerror (errno));
-
- ret = syncop_setxattr (to, loc, xattr, 0);
- if (ret == -1)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to set xattr on %s (%s)",
- loc->path, to->name, strerror (errno));
-
/* TODO: Sync the locks */
- ret = syncop_fsync (to, dst_fd);
- if (ret)
+ ret = syncop_fsync (to, dst_fd, 0);
+ if (ret) {
gf_log (this->name, GF_LOG_WARNING,
"%s: failed to fsync on %s (%s)",
- loc->path, to->name, strerror (errno));
+ loc->path, to->name, strerror (-ret));
+ ret = -1;
+ }
/* Phase 2 - Data-Migration Complete, Housekeeping updates pending */
@@ -616,7 +880,8 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
/* Failed to get the stat info */
gf_log (this->name, GF_LOG_ERROR,
"failed to fstat file %s on %s (%s)",
- loc->path, from->name, strerror (errno));
+ loc->path, from->name, strerror (-ret));
+ ret = -1;
goto out;
}
@@ -638,7 +903,9 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
if (ret) {
gf_log (this->name, GF_LOG_WARNING,
"%s: failed to perform setattr on %s (%s)",
- loc->path, to->name, strerror (errno));
+ loc->path, to->name, strerror (-ret));
+ ret = -1;
+ goto out;
}
/* Because 'futimes' is not portable */
@@ -648,7 +915,8 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
if (ret) {
gf_log (this->name, GF_LOG_WARNING,
"%s: failed to perform setattr on %s (%s)",
- loc->path, to->name, strerror (errno));
+ loc->path, to->name, strerror (-ret));
+ ret = -1;
}
/* Make the source as a linkfile first before deleting it */
@@ -658,7 +926,28 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
if (ret) {
gf_log (this->name, GF_LOG_WARNING, \
"%s: failed to perform setattr on %s (%s)",
- loc->path, from->name, strerror (errno));
+ loc->path, from->name, strerror (-ret));
+ ret = -1;
+ goto out;
+ }
+
+ /* Free up the data blocks on the source node, as the whole
+ file is migrated */
+ ret = syncop_ftruncate (from, src_fd, 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to perform truncate on %s (%s)",
+ loc->path, from->name, strerror (-ret));
+ ret = -1;
+ }
+
+ /* remove the 'linkto' xattr from the destination */
+ ret = syncop_fremovexattr (to, dst_fd, conf->link_xattr_name, 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to perform removexattr on %s (%s)",
+ loc->path, to->name, strerror (-ret));
+ ret = -1;
}
/* Do a stat and check the gfid before unlink */
@@ -666,41 +955,29 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
if (ret) {
gf_log (this->name, GF_LOG_WARNING,
"%s: failed to do a stat on %s (%s)",
- loc->path, from->name, strerror (errno));
+ loc->path, from->name, strerror (-ret));
+ ret = -1;
+ goto out;
}
- if (uuid_compare (empty_iatt.ia_gfid, loc->inode->gfid) == 0) {
+ if (uuid_compare (empty_iatt.ia_gfid, loc->gfid) == 0) {
/* take out the source from namespace */
ret = syncop_unlink (from, loc);
if (ret) {
gf_log (this->name, GF_LOG_WARNING,
"%s: failed to perform unlink on %s (%s)",
- loc->path, from->name, strerror (errno));
+ loc->path, from->name, strerror (-ret));
+ ret = -1;
+ goto out;
}
}
- /* Free up the data blocks on the source node, as the whole
- file is migrated */
- ret = syncop_ftruncate (from, src_fd, 0);
- if (ret) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to perform truncate on %s (%s)",
- loc->path, from->name, strerror (errno));
- }
-
- /* remove the 'linkto' xattr from the destination */
- ret = syncop_removexattr (to, loc, DHT_LINKFILE_KEY);
- if (ret) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to perform removexattr on %s (%s)",
- loc->path, to->name, strerror (errno));
- }
-
ret = syncop_lookup (this, loc, NULL, NULL, NULL, NULL);
if (ret) {
- gf_log (this->name, GF_LOG_WARNING,
+ gf_log (this->name, GF_LOG_DEBUG,
"%s: failed to lookup the file on subvolumes (%s)",
- loc->path, strerror (errno));
+ loc->path, strerror (-ret));
+ ret = -1;
}
gf_log (this->name, GF_LOG_INFO,
@@ -714,6 +991,8 @@ out:
if (xattr)
dict_unref (xattr);
+ if (xattr_rsp)
+ dict_unref (xattr_rsp);
if (dst_fd)
syncop_close (dst_fd);
@@ -788,7 +1067,7 @@ rebalance_task_completion (int op_ret, call_frame_t *sync_frame, void *data)
op_errno = EPERM;
}
- DHT_STACK_UNWIND (setxattr, sync_frame, op_ret, op_errno);
+ DHT_STACK_UNWIND (setxattr, sync_frame, op_ret, op_errno, NULL);
return 0;
}
@@ -796,12 +1075,834 @@ int
dht_start_rebalance_task (xlator_t *this, call_frame_t *frame)
{
int ret = -1;
- dht_conf_t *conf = NULL;
- conf = this->private;
-
- ret = synctask_new (conf->env, rebalance_task,
+ ret = synctask_new (this->ctx->env, rebalance_task,
rebalance_task_completion,
frame, frame);
return ret;
}
+
+int
+gf_listener_stop (xlator_t *this)
+{
+ glusterfs_ctx_t *ctx = NULL;
+ cmd_args_t *cmd_args = NULL;
+ int ret = 0;
+
+ ctx = this->ctx;
+ GF_ASSERT (ctx);
+ cmd_args = &ctx->cmd_args;
+ if (cmd_args->sock_file) {
+ ret = unlink (cmd_args->sock_file);
+ if (ret && (ENOENT == errno)) {
+ ret = 0;
+ }
+ }
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to unlink listener "
+ "socket %s, error: %s", cmd_args->sock_file,
+ strerror (errno));
+ }
+ return ret;
+}
+
+void
+dht_build_root_inode (xlator_t *this, inode_t **inode)
+{
+ inode_table_t *itable = NULL;
+ uuid_t root_gfid = {0, };
+
+ itable = inode_table_new (0, this);
+ if (!itable)
+ return;
+
+ root_gfid[15] = 1;
+ *inode = inode_find (itable, root_gfid);
+}
+
+void
+dht_build_root_loc (inode_t *inode, loc_t *loc)
+{
+ loc->path = "/";
+ loc->inode = inode;
+ loc->inode->ia_type = IA_IFDIR;
+ memset (loc->gfid, 0, 16);
+ loc->gfid[15] = 1;
+}
+
+
+/* return values: 1 -> error, bug ignore and continue
+ 0 -> proceed
+ -1 -> error, handle it */
+int32_t
+gf_defrag_handle_migrate_error (int32_t op_errno, gf_defrag_info_t *defrag)
+{
+ /* if errno is not ENOSPC or ENOTCONN, we can still continue
+ with rebalance process */
+ if ((op_errno != ENOSPC) || (op_errno != ENOTCONN))
+ return 1;
+
+ if (op_errno == ENOTCONN) {
+ /* Most probably mount point went missing (mostly due
+ to a brick down), say rebalance failure to user,
+ let him restart it if everything is fine */
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
+ return -1;
+ }
+
+ if (op_errno == ENOSPC) {
+ /* rebalance process itself failed, may be
+ remote brick went down, or write failed due to
+ disk full etc etc.. */
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
+ return -1;
+ }
+
+ return 0;
+}
+
+static gf_boolean_t
+gf_defrag_pattern_match (gf_defrag_info_t *defrag, char *name, uint64_t size)
+{
+ gf_defrag_pattern_list_t *trav = NULL;
+ gf_boolean_t match = _gf_false;
+ gf_boolean_t ret = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("dht", defrag, out);
+
+ trav = defrag->defrag_pattern;
+ while (trav) {
+ if (!fnmatch (trav->path_pattern, name, FNM_NOESCAPE)) {
+ match = _gf_true;
+ break;
+ }
+ trav = trav->next;
+ }
+
+ if ((match == _gf_true) && (size >= trav->size))
+ ret = _gf_true;
+
+ out:
+ return ret;
+}
+
+/* We do a depth first traversal of directories. But before we move into
+ * subdirs, we complete the data migration of those directories whose layouts
+ * have been fixed
+ */
+
+int
+gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
+ dict_t *migrate_data)
+{
+ int ret = -1;
+ loc_t entry_loc = {0,};
+ fd_t *fd = NULL;
+ gf_dirent_t entries;
+ gf_dirent_t *tmp = NULL;
+ gf_dirent_t *entry = NULL;
+ gf_boolean_t free_entries = _gf_false;
+ off_t offset = 0;
+ dict_t *dict = NULL;
+ struct iatt iatt = {0,};
+ int32_t op_errno = 0;
+ char *uuid_str = NULL;
+ uuid_t node_uuid = {0,};
+ struct timeval dir_start = {0,};
+ struct timeval end = {0,};
+ double elapsed = {0,};
+ struct timeval start = {0,};
+ int32_t err = 0;
+ int loglevel = GF_LOG_TRACE;
+
+ gf_log (this->name, GF_LOG_INFO, "migrate data called on %s",
+ loc->path);
+ gettimeofday (&dir_start, NULL);
+
+ fd = fd_create (loc->inode, defrag->pid);
+ if (!fd) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to create fd");
+ goto out;
+ }
+
+ ret = syncop_opendir (this, loc, fd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to open dir %s",
+ loc->path);
+ ret = -1;
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&entries.list);
+
+ while ((ret = syncop_readdirp (this, fd, 131072, offset, NULL,
+ &entries)) != 0) {
+
+ if (ret < 0) {
+
+ gf_log (this->name, GF_LOG_ERROR, "Readdir returned %s."
+ " Aborting migrate-data",
+ strerror(-ret));
+ ret = -1;
+ goto out;
+ }
+
+ if (list_empty (&entries.list))
+ break;
+
+ free_entries = _gf_true;
+
+ list_for_each_entry_safe (entry, tmp, &entries.list, list) {
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
+ ret = 1;
+ goto out;
+ }
+
+ offset = entry->d_off;
+
+ if (!strcmp (entry->d_name, ".") ||
+ !strcmp (entry->d_name, ".."))
+ continue;
+
+ if (IA_ISDIR (entry->d_stat.ia_type))
+ continue;
+
+ defrag->num_files_lookedup++;
+ if (defrag->stats == _gf_true) {
+ gettimeofday (&start, NULL);
+ }
+ if (defrag->defrag_pattern &&
+ (gf_defrag_pattern_match (defrag, entry->d_name,
+ entry->d_stat.ia_size)
+ == _gf_false)) {
+ continue;
+ }
+ loc_wipe (&entry_loc);
+ ret =dht_build_child_loc (this, &entry_loc, loc,
+ entry->d_name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Child loc"
+ " build failed");
+ goto out;
+ }
+
+ if (uuid_is_null (entry->d_stat.ia_gfid)) {
+ gf_log (this->name, GF_LOG_ERROR, "%s/%s"
+ " gfid not present", loc->path,
+ entry->d_name);
+ continue;
+ }
+
+ uuid_copy (entry_loc.gfid, entry->d_stat.ia_gfid);
+
+ if (uuid_is_null (loc->gfid)) {
+ gf_log (this->name, GF_LOG_ERROR, "%s/%s"
+ " gfid not present", loc->path,
+ entry->d_name);
+ continue;
+ }
+
+ uuid_copy (entry_loc.pargfid, loc->gfid);
+
+ entry_loc.inode->ia_type = entry->d_stat.ia_type;
+
+ ret = syncop_lookup (this, &entry_loc, NULL, &iatt,
+ NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s"
+ " lookup failed", entry_loc.path);
+ ret = -1;
+ continue;
+ }
+
+ ret = syncop_getxattr (this, &entry_loc, &dict,
+ GF_XATTR_NODE_UUID_KEY);
+ if(ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to "
+ "get node-uuid for %s", entry_loc.path);
+ ret = -1;
+ continue;
+ }
+
+ ret = dict_get_str (dict, GF_XATTR_NODE_UUID_KEY,
+ &uuid_str);
+ if(ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to "
+ "get node-uuid from dict for %s",
+ entry_loc.path);
+ ret = -1;
+ continue;
+ }
+
+ if (uuid_parse (uuid_str, node_uuid)) {
+ gf_log (this->name, GF_LOG_ERROR, "uuid_parse "
+ "failed for %s", entry_loc.path);
+ continue;
+ }
+
+ /* if file belongs to different node, skip migration
+ * the other node will take responsibility of migration
+ */
+ if (uuid_compare (node_uuid, defrag->node_uuid)) {
+ gf_log (this->name, GF_LOG_TRACE, "%s does not"
+ "belong to this node", entry_loc.path);
+ continue;
+ }
+
+ uuid_str = NULL;
+
+ dict_del (dict, GF_XATTR_NODE_UUID_KEY);
+
+
+ /* if distribute is present, it will honor this key.
+ * -1, ENODATA is returned if distribute is not present
+ * or file doesn't have a link-file. If file has
+ * link-file, the path of link-file will be the value,
+ * and also that guarantees that file has to be mostly
+ * migrated */
+
+ ret = syncop_getxattr (this, &entry_loc, &dict,
+ GF_XATTR_LINKINFO_KEY);
+ if (ret < 0) {
+ if (-ret != ENODATA) {
+ loglevel = GF_LOG_ERROR;
+ defrag->total_failures += 1;
+ } else {
+ loglevel = GF_LOG_TRACE;
+ }
+ gf_log (this->name, loglevel, "%s: failed to "
+ "get "GF_XATTR_LINKINFO_KEY" key - %s",
+ entry_loc.path, strerror (-ret));
+ ret = -1;
+ continue;
+ }
+
+ ret = syncop_setxattr (this, &entry_loc, migrate_data,
+ 0);
+ if (ret) {
+ err = op_errno;
+ /* errno is overloaded. See
+ * rebalance_task_completion () */
+ if (err != ENOSPC) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "migrate-data skipped for %s"
+ " due to space constraints",
+ entry_loc.path);
+ defrag->skipped +=1;
+ } else{
+ gf_log (this->name, GF_LOG_ERROR,
+ "migrate-data failed for %s",
+ entry_loc.path);
+ defrag->total_failures +=1;
+ }
+ }
+
+ if (ret < 0) {
+ op_errno = -ret;
+ ret = gf_defrag_handle_migrate_error (op_errno,
+ defrag);
+
+ if (!ret)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "migrate-data on %s failed: %s",
+ entry_loc.path,
+ strerror (op_errno));
+ else if (ret == 1)
+ continue;
+ else if (ret == -1)
+ goto out;
+ }
+
+ LOCK (&defrag->lock);
+ {
+ defrag->total_files += 1;
+ defrag->total_data += iatt.ia_size;
+ }
+ UNLOCK (&defrag->lock);
+ if (defrag->stats == _gf_true) {
+ gettimeofday (&end, NULL);
+ elapsed = (end.tv_sec - start.tv_sec) * 1e6 +
+ (end.tv_usec - start.tv_usec);
+ gf_log (this->name, GF_LOG_INFO, "Migration of "
+ "file:%s size:%"PRIu64" bytes took %.2f"
+ "secs", entry_loc.path, iatt.ia_size,
+ elapsed/1e6);
+ }
+ }
+
+ gf_dirent_free (&entries);
+ free_entries = _gf_false;
+ INIT_LIST_HEAD (&entries.list);
+ }
+
+ gettimeofday (&end, NULL);
+ elapsed = (end.tv_sec - dir_start.tv_sec) * 1e6 +
+ (end.tv_usec - dir_start.tv_usec);
+ gf_log (this->name, GF_LOG_INFO, "Migration operation on dir %s took "
+ "%.2f secs", loc->path, elapsed/1e6);
+ ret = 0;
+out:
+ if (free_entries)
+ gf_dirent_free (&entries);
+
+ loc_wipe (&entry_loc);
+
+ if (dict)
+ dict_unref(dict);
+
+ if (fd)
+ fd_unref (fd);
+ return ret;
+
+}
+
+int
+gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
+ dict_t *fix_layout, dict_t *migrate_data)
+{
+ int ret = -1;
+ loc_t entry_loc = {0,};
+ fd_t *fd = NULL;
+ gf_dirent_t entries;
+ gf_dirent_t *tmp = NULL;
+ gf_dirent_t *entry = NULL;
+ gf_boolean_t free_entries = _gf_false;
+ dict_t *dict = NULL;
+ off_t offset = 0;
+ struct iatt iatt = {0,};
+
+ ret = syncop_lookup (this, loc, NULL, &iatt, NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Lookup failed on %s",
+ loc->path);
+ ret = -1;
+ goto out;
+ }
+
+ if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) {
+ ret = gf_defrag_migrate_data (this, defrag, loc, migrate_data);
+ if (ret)
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_TRACE, "fix layout called on %s", loc->path);
+
+ fd = fd_create (loc->inode, defrag->pid);
+ if (!fd) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to create fd");
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_opendir (this, loc, fd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to open dir %s",
+ loc->path);
+ ret = -1;
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&entries.list);
+ while ((ret = syncop_readdirp (this, fd, 131072, offset, NULL,
+ &entries)) != 0)
+ {
+
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Readdir returned %s"
+ ". Aborting fix-layout",strerror(-ret));
+ ret = -1;
+ goto out;
+ }
+
+ if (list_empty (&entries.list))
+ break;
+
+ free_entries = _gf_true;
+
+ list_for_each_entry_safe (entry, tmp, &entries.list, list) {
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
+ ret = 1;
+ goto out;
+ }
+
+ offset = entry->d_off;
+
+ if (!strcmp (entry->d_name, ".") ||
+ !strcmp (entry->d_name, ".."))
+ continue;
+
+ if (!IA_ISDIR (entry->d_stat.ia_type))
+ continue;
+
+ loc_wipe (&entry_loc);
+ ret =dht_build_child_loc (this, &entry_loc, loc,
+ entry->d_name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Child loc"
+ " build failed");
+ goto out;
+ }
+
+ if (uuid_is_null (entry->d_stat.ia_gfid)) {
+ gf_log (this->name, GF_LOG_ERROR, "%s/%s"
+ " gfid not present", loc->path,
+ entry->d_name);
+ continue;
+ }
+
+ entry_loc.inode->ia_type = entry->d_stat.ia_type;
+
+ uuid_copy (entry_loc.gfid, entry->d_stat.ia_gfid);
+ if (uuid_is_null (loc->gfid)) {
+ gf_log (this->name, GF_LOG_ERROR, "%s/%s"
+ " gfid not present", loc->path,
+ entry->d_name);
+ continue;
+ }
+
+ uuid_copy (entry_loc.pargfid, loc->gfid);
+
+ ret = syncop_lookup (this, &entry_loc, NULL, &iatt,
+ NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s"
+ " lookup failed", entry_loc.path);
+ ret = -1;
+ continue;
+ }
+
+ ret = syncop_setxattr (this, &entry_loc, fix_layout,
+ 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Setxattr "
+ "failed for %s", entry_loc.path);
+ defrag->defrag_status =
+ GF_DEFRAG_STATUS_FAILED;
+ defrag->total_failures ++;
+ ret = -1;
+ goto out;
+ }
+ ret = gf_defrag_fix_layout (this, defrag, &entry_loc,
+ fix_layout, migrate_data);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Fix layout "
+ "failed for %s", entry_loc.path);
+ defrag->total_failures++;
+ goto out;
+ }
+
+ }
+ gf_dirent_free (&entries);
+ free_entries = _gf_false;
+ INIT_LIST_HEAD (&entries.list);
+ }
+
+ ret = 0;
+out:
+ if (free_entries)
+ gf_dirent_free (&entries);
+
+ loc_wipe (&entry_loc);
+
+ if (dict)
+ dict_unref(dict);
+
+ if (fd)
+ fd_unref (fd);
+
+ return ret;
+
+}
+
+
+int
+gf_defrag_start_crawl (void *data)
+{
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ gf_defrag_info_t *defrag = NULL;
+ int ret = -1;
+ loc_t loc = {0,};
+ struct iatt iatt = {0,};
+ struct iatt parent = {0,};
+ dict_t *fix_layout = NULL;
+ dict_t *migrate_data = NULL;
+ dict_t *status = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+
+ this = data;
+ if (!this)
+ goto out;
+
+ ctx = this->ctx;
+ if (!ctx)
+ goto out;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ defrag = conf->defrag;
+ if (!defrag)
+ goto out;
+
+ gettimeofday (&defrag->start_time, NULL);
+ dht_build_root_inode (this, &defrag->root_inode);
+ if (!defrag->root_inode)
+ goto out;
+
+ dht_build_root_loc (defrag->root_inode, &loc);
+
+ /* fix-layout on '/' first */
+
+ ret = syncop_lookup (this, &loc, NULL, &iatt, NULL, &parent);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "look up on / failed");
+ ret = -1;
+ goto out;
+ }
+
+ fix_layout = dict_new ();
+ if (!fix_layout) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_str (fix_layout, GF_XATTR_FIX_LAYOUT_KEY, "yes");
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set dict str");
+ goto out;
+ }
+
+ ret = syncop_setxattr (this, &loc, fix_layout, 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "fix layout on %s failed",
+ loc.path);
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
+ }
+
+ if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) {
+ migrate_data = dict_new ();
+ if (!migrate_data) {
+ ret = -1;
+ goto out;
+ }
+ if (defrag->cmd == GF_DEFRAG_CMD_START_FORCE)
+ ret = dict_set_str (migrate_data,
+ "distribute.migrate-data", "force");
+ else
+ ret = dict_set_str (migrate_data,
+ "distribute.migrate-data",
+ "non-force");
+ if (ret)
+ goto out;
+ }
+ ret = gf_defrag_fix_layout (this, defrag, &loc, fix_layout,
+ migrate_data);
+ if ((defrag->defrag_status != GF_DEFRAG_STATUS_STOPPED) &&
+ (defrag->defrag_status != GF_DEFRAG_STATUS_FAILED)) {
+ defrag->defrag_status = GF_DEFRAG_STATUS_COMPLETE;
+ }
+
+
+
+out:
+ LOCK (&defrag->lock);
+ {
+ status = dict_new ();
+ gf_defrag_status_get (defrag, status);
+ if (ctx->notify)
+ ctx->notify (GF_EN_DEFRAG_STATUS, status);
+ if (status)
+ dict_unref (status);
+ defrag->is_exiting = 1;
+ }
+ UNLOCK (&defrag->lock);
+
+ if (defrag) {
+ GF_FREE (defrag);
+ conf->defrag = NULL;
+ }
+
+ return ret;
+}
+
+
+static int
+gf_defrag_done (int ret, call_frame_t *sync_frame, void *data)
+{
+ gf_listener_stop (sync_frame->this);
+
+ STACK_DESTROY (sync_frame->root);
+ kill (getpid(), SIGTERM);
+ return 0;
+}
+
+void *
+gf_defrag_start (void *data)
+{
+ int ret = -1;
+ call_frame_t *frame = NULL;
+ dht_conf_t *conf = NULL;
+ gf_defrag_info_t *defrag = NULL;
+ xlator_t *this = NULL;
+
+ this = data;
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ defrag = conf->defrag;
+ if (!defrag)
+ goto out;
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ goto out;
+
+ frame->root->pid = GF_CLIENT_PID_DEFRAG;
+
+ defrag->pid = frame->root->pid;
+
+ defrag->defrag_status = GF_DEFRAG_STATUS_STARTED;
+
+ ret = synctask_new (this->ctx->env, gf_defrag_start_crawl,
+ gf_defrag_done, frame, this);
+
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "Could not create"
+ " task for rebalance");
+out:
+ return NULL;
+}
+
+int
+gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict)
+{
+ int ret = 0;
+ uint64_t files = 0;
+ uint64_t size = 0;
+ uint64_t lookup = 0;
+ uint64_t failures = 0;
+ uint64_t skipped = 0;
+ char *status = "";
+ double elapsed = 0;
+ struct timeval end = {0,};
+
+
+ if (!defrag)
+ goto out;
+
+ ret = 0;
+ if (defrag->defrag_status == GF_DEFRAG_STATUS_NOT_STARTED)
+ goto out;
+
+ files = defrag->total_files;
+ size = defrag->total_data;
+ lookup = defrag->num_files_lookedup;
+ failures = defrag->total_failures;
+ skipped = defrag->skipped;
+
+ gettimeofday (&end, NULL);
+
+ elapsed = end.tv_sec - defrag->start_time.tv_sec;
+
+ if (!dict)
+ goto log;
+
+ ret = dict_set_uint64 (dict, "files", files);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set file count");
+
+ ret = dict_set_uint64 (dict, "size", size);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set size of xfer");
+
+ ret = dict_set_uint64 (dict, "lookups", lookup);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set lookedup file count");
+
+
+ ret = dict_set_int32 (dict, "status", defrag->defrag_status);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set status");
+ if (elapsed) {
+ ret = dict_set_double (dict, "run-time", elapsed);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set run-time");
+ }
+
+ ret = dict_set_uint64 (dict, "failures", failures);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set failure count");
+
+ ret = dict_set_uint64 (dict, "skipped", skipped);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set skipped file count");
+log:
+ switch (defrag->defrag_status) {
+ case GF_DEFRAG_STATUS_NOT_STARTED:
+ status = "not started";
+ break;
+ case GF_DEFRAG_STATUS_STARTED:
+ status = "in progress";
+ break;
+ case GF_DEFRAG_STATUS_STOPPED:
+ status = "stopped";
+ break;
+ case GF_DEFRAG_STATUS_COMPLETE:
+ status = "completed";
+ break;
+ case GF_DEFRAG_STATUS_FAILED:
+ status = "failed";
+ break;
+ default:
+ break;
+ }
+
+ gf_log (THIS->name, GF_LOG_INFO, "Rebalance is %s. Time taken is %.2f "
+ "secs", status, elapsed);
+ gf_log (THIS->name, GF_LOG_INFO, "Files migrated: %"PRIu64", size: %"
+ PRIu64", lookups: %"PRIu64", failures: %"PRIu64", skipped: "
+ "%"PRIu64, files, size, lookup, failures, skipped);
+
+
+out:
+ return 0;
+}
+
+int
+gf_defrag_stop (gf_defrag_info_t *defrag, gf_defrag_status_t status,
+ dict_t *output)
+{
+ /* TODO: set a variable 'stop_defrag' here, it should be checked
+ in defrag loop */
+ int ret = -1;
+ GF_ASSERT (defrag);
+
+ if (defrag->defrag_status == GF_DEFRAG_STATUS_NOT_STARTED) {
+ goto out;
+ }
+
+ gf_log ("", GF_LOG_INFO, "Received stop command on rebalance");
+ defrag->defrag_status = status;
+
+ if (output)
+ gf_defrag_status_get (defrag, output);
+ ret = 0;
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
diff --git a/xlators/cluster/dht/src/dht-rename.c b/xlators/cluster/dht/src/dht-rename.c
index 02b12887f..925538cc8 100644
--- a/xlators/cluster/dht/src/dht-rename.c
+++ b/xlators/cluster/dht/src/dht-rename.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
/* TODO: link(oldpath, newpath) fails if newpath already exists. DHT should
@@ -35,7 +26,8 @@ int
dht_rename_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
int this_call_cnt = 0;
@@ -84,7 +76,7 @@ unwind:
DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
&local->stbuf, &local->preoldparent,
&local->postoldparent,
- &local->preparent, &local->postparent);
+ &local->preparent, &local->postparent, xdata);
}
return 0;
@@ -97,7 +89,7 @@ dht_rename_hashed_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt *preoldparent,
struct iatt *postoldparent,
struct iatt *prenewparent,
- struct iatt *postnewparent)
+ struct iatt *postnewparent, dict_t *xdata)
{
dht_conf_t *conf = NULL;
dht_local_t *local = NULL;
@@ -147,7 +139,7 @@ dht_rename_hashed_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
STACK_WIND (frame, dht_rename_dir_cbk,
conf->subvolumes[i],
conf->subvolumes[i]->fops->rename,
- &local->loc, &local->loc2);
+ &local->loc, &local->loc2, NULL);
if (!--call_cnt)
break;
}
@@ -164,7 +156,7 @@ unwind:
DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
&local->stbuf, &local->preoldparent,
&local->postoldparent,
- &local->preparent, &local->postparent);
+ &local->preparent, &local->postparent, NULL);
return 0;
}
@@ -185,19 +177,20 @@ dht_rename_dir_do (call_frame_t *frame, xlator_t *this)
STACK_WIND (frame, dht_rename_hashed_dir_cbk,
local->dst_hashed,
local->dst_hashed->fops->rename,
- &local->loc, &local->loc2);
+ &local->loc, &local->loc2, NULL);
return 0;
err:
DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno, NULL, NULL,
- NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL);
return 0;
}
int
dht_rename_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, gf_dirent_t *entries)
+ int op_ret, int op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
int this_call_cnt = -1;
@@ -226,7 +219,7 @@ dht_rename_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
dht_rename_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, fd_t *fd)
+ int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
{
dht_local_t *local = NULL;
int this_call_cnt = -1;
@@ -246,7 +239,7 @@ dht_rename_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
STACK_WIND (frame, dht_rename_readdir_cbk,
prev->this, prev->this->fops->readdir,
- local->fd, 4096, 0);
+ local->fd, 4096, 0, NULL);
return 0;
@@ -302,22 +295,70 @@ dht_rename_dir (call_frame_t *frame, xlator_t *this)
STACK_WIND (frame, dht_rename_opendir_cbk,
conf->subvolumes[i],
conf->subvolumes[i]->fops->opendir,
- &local->loc2, local->fd);
+ &local->loc2, local->fd, NULL);
}
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
+ DHT_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL, NULL);
return 0;
}
+#define DHT_MARK_FOP_INTERNAL(xattr) do { \
+ int tmp = -1; \
+ if (!xattr) { \
+ xattr = dict_new (); \
+ if (!xattr) \
+ break; \
+ } \
+ tmp = dict_set_str (xattr, GLUSTERFS_INTERNAL_FOP_KEY, "yes"); \
+ if (tmp) { \
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set" \
+ " internal dict key for %s", local->loc.path); \
+ } \
+ }while (0)
+
+#define DHT_MARKER_DONT_ACCOUNT(xattr) do { \
+ int tmp = -1; \
+ if (!xattr) { \
+ xattr = dict_new (); \
+ if (!xattr) \
+ break; \
+ } \
+ tmp = dict_set_str (xattr, GLUSTERFS_MARKER_DONT_ACCOUNT_KEY, \
+ "yes"); \
+ if (tmp) { \
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set" \
+ " marker dont account key for %s", local->loc.path); \
+ } \
+ }while (0)
+
+int
+dht_rename_done (call_frame_t *frame, xlator_t *this)
+{
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->linked == _gf_true) {
+ local->linked = _gf_false;
+ dht_linkfile_attr_heal (frame, this);
+ }
+ DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
+ DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
+ &local->stbuf, &local->preoldparent,
+ &local->postoldparent, &local->preparent,
+ &local->postparent, NULL);
+ return 0;
+}
int
dht_rename_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
dht_local_t *local = NULL;
call_frame_t *prev = NULL;
@@ -346,11 +387,7 @@ dht_rename_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
WIPE (&local->postparent);
if (is_last_call (this_call_cnt)) {
- DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
- DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
- &local->stbuf, &local->preoldparent,
- &local->postoldparent, &local->preparent,
- &local->postparent);
+ dht_rename_done (frame, this);
}
out:
@@ -368,7 +405,7 @@ dht_rename_cleanup (call_frame_t *frame)
xlator_t *dst_hashed = NULL;
xlator_t *dst_cached = NULL;
int call_cnt = 0;
-
+ dict_t *xattr = NULL;
local = frame->local;
this = frame->this;
@@ -392,24 +429,53 @@ dht_rename_cleanup (call_frame_t *frame)
if (!call_cnt)
goto nolinks;
+ DHT_MARK_FOP_INTERNAL (xattr);
+
if (dst_hashed != src_hashed && dst_hashed != src_cached) {
+ dict_t *xattr_new = NULL;
+
gf_log (this->name, GF_LOG_TRACE,
"unlinking linkfile %s @ %s => %s",
local->loc.path, dst_hashed->name, src_cached->name);
+
+ xattr_new = dict_copy_with_ref (xattr, NULL);
+
+
+ DHT_MARKER_DONT_ACCOUNT(xattr_new);
+
STACK_WIND (frame, dht_rename_unlink_cbk,
dst_hashed, dst_hashed->fops->unlink,
- &local->loc);
+ &local->loc, 0, xattr_new);
+
+ dict_unref (xattr_new);
+ xattr_new = NULL;
}
if (src_cached != dst_hashed) {
+ dict_t *xattr_new = NULL;
+
gf_log (this->name, GF_LOG_TRACE,
"unlinking link %s => %s (%s)", local->loc.path,
local->loc2.path, src_cached->name);
+
+ xattr_new = dict_copy_with_ref (xattr, NULL);
+
+ if (uuid_compare (local->loc.pargfid,
+ local->loc2.pargfid) == 0) {
+ DHT_MARKER_DONT_ACCOUNT(xattr_new);
+ }
+
STACK_WIND (frame, dht_rename_unlink_cbk,
src_cached, src_cached->fops->unlink,
- &local->loc2);
+ &local->loc2, 0, xattr_new);
+
+ dict_unref (xattr_new);
+ xattr_new = NULL;
}
+ if (xattr)
+ dict_unref (xattr);
+
return 0;
nolinks:
@@ -422,7 +488,7 @@ nolinks:
DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
&local->stbuf, &local->preoldparent,
&local->postoldparent, &local->preparent,
- &local->postparent);
+ &local->postparent, NULL);
return 0;
}
@@ -430,9 +496,10 @@ nolinks:
int
dht_rename_links_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent)
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
call_frame_t *prev = NULL;
dht_local_t *local = NULL;
@@ -446,6 +513,10 @@ dht_rename_links_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->loc.path, prev->this->name, strerror (op_errno));
}
+ if (local->linked == _gf_true) {
+ local->linked = _gf_false;
+ dht_linkfile_attr_heal (frame, this);
+ }
DHT_STACK_DESTROY (frame);
return 0;
@@ -456,7 +527,8 @@ int
dht_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
call_frame_t *prev = NULL;
@@ -467,6 +539,7 @@ dht_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
xlator_t *rename_subvol = NULL;
call_frame_t *link_frame = NULL;
dht_local_t *link_local = NULL;
+ dict_t *xattr = NULL;
local = frame->local;
prev = cookie;
@@ -476,6 +549,8 @@ dht_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
dst_hashed = local->dst_hashed;
dst_cached = local->dst_cached;
+ if (local->linked == _gf_true)
+ FRAME_SU_UNDO (frame, dht_local_t);
if (op_ret == -1) {
gf_log (this->name, GF_LOG_WARNING,
"%s: rename on %s failed (%s)", local->loc.path,
@@ -505,15 +580,25 @@ dht_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
uuid_copy (link_local->gfid, local->loc.inode->gfid);
dht_linkfile_create (link_frame, dht_rename_links_create_cbk,
- src_cached, dst_hashed, &link_local->loc);
+ this, src_cached, dst_hashed,
+ &link_local->loc);
}
err:
- dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
- dht_iatt_merge (this, &local->preoldparent, preoldparent, prev->this);
- dht_iatt_merge (this, &local->postoldparent, postoldparent, prev->this);
- dht_iatt_merge (this, &local->preparent, prenewparent, prev->this);
- dht_iatt_merge (this, &local->postparent, postnewparent, prev->this);
+ /* Merge attrs only from src_cached. In case there of src_cached !=
+ * dst_hashed, this ignores linkfile attrs. */
+ if (prev->this == src_cached) {
+ dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
+ dht_iatt_merge (this, &local->preoldparent, preoldparent,
+ prev->this);
+ dht_iatt_merge (this, &local->postoldparent, postoldparent,
+ prev->this);
+ dht_iatt_merge (this, &local->preparent, prenewparent,
+ prev->this);
+ dht_iatt_merge (this, &local->postparent, postnewparent,
+ prev->this);
+ }
+
/* NOTE: rename_subvol is the same subvolume from which dht_rename_cbk
* is called. since rename has already happened on rename_subvol,
@@ -538,24 +623,47 @@ err:
if (local->call_cnt == 0)
goto unwind;
+ DHT_MARK_FOP_INTERNAL (xattr);
+
if (src_cached != dst_hashed && src_cached != dst_cached) {
+ dict_t *xattr_new = NULL;
+
+ xattr_new = dict_copy_with_ref (xattr, NULL);
+
gf_log (this->name, GF_LOG_TRACE,
"deleting old src datafile %s @ %s",
local->loc.path, src_cached->name);
+ if (uuid_compare (local->loc.pargfid,
+ local->loc2.pargfid) == 0) {
+ DHT_MARKER_DONT_ACCOUNT(xattr_new);
+ }
+
STACK_WIND (frame, dht_rename_unlink_cbk,
src_cached, src_cached->fops->unlink,
- &local->loc);
+ &local->loc, 0, xattr_new);
+
+ dict_unref (xattr_new);
+ xattr_new = NULL;
}
if (src_hashed != rename_subvol && src_hashed != src_cached) {
+ dict_t *xattr_new = NULL;
+
+ xattr_new = dict_copy_with_ref (xattr, NULL);
+
gf_log (this->name, GF_LOG_TRACE,
"deleting old src linkfile %s @ %s",
local->loc.path, src_hashed->name);
+ DHT_MARKER_DONT_ACCOUNT(xattr_new);
+
STACK_WIND (frame, dht_rename_unlink_cbk,
src_hashed, src_hashed->fops->unlink,
- &local->loc);
+ &local->loc, 0, xattr_new);
+
+ dict_unref (xattr_new);
+ xattr_new = NULL;
}
if (dst_cached
@@ -567,8 +675,10 @@ err:
STACK_WIND (frame, dht_rename_unlink_cbk,
dst_cached, dst_cached->fops->unlink,
- &local->loc2);
+ &local->loc2, 0, xattr);
}
+ if (xattr)
+ dict_unref (xattr);
return 0;
unwind:
@@ -576,16 +686,16 @@ unwind:
WIPE (&local->postoldparent);
WIPE (&local->preparent);
WIPE (&local->postparent);
+ if (xattr)
+ dict_unref (xattr);
- DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
- DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
- &local->stbuf, &local->preoldparent,
- &local->postoldparent, &local->preparent,
- &local->postparent);
+ dht_rename_done (frame, this);
return 0;
cleanup:
+ if (xattr)
+ dict_unref (xattr);
dht_rename_cleanup (frame);
return 0;
@@ -595,12 +705,13 @@ cleanup:
int
dht_do_rename (call_frame_t *frame)
{
- dht_local_t *local = NULL;
- xlator_t *dst_hashed = NULL;
- xlator_t *src_cached = NULL;
- xlator_t *dst_cached = NULL;
- xlator_t *this = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *dst_hashed = NULL;
+ xlator_t *src_cached = NULL;
+ xlator_t *dst_cached = NULL;
+ xlator_t *this = NULL;
xlator_t *rename_subvol = NULL;
+ dict_t *dict = NULL;
local = frame->local;
@@ -615,13 +726,19 @@ dht_do_rename (call_frame_t *frame)
else
rename_subvol = dst_hashed;
+ if ((src_cached != dst_hashed) && (rename_subvol == dst_hashed)) {
+ DHT_MARKER_DONT_ACCOUNT(dict);
+ }
+
gf_log (this->name, GF_LOG_TRACE,
"renaming %s => %s (%s)",
local->loc.path, local->loc2.path, rename_subvol->name);
+ if (local->linked == _gf_true)
+ FRAME_SU_DO (frame, dht_local_t);
STACK_WIND (frame, dht_rename_cbk,
rename_subvol, rename_subvol->fops->rename,
- &local->loc, &local->loc2);
+ &local->loc, &local->loc2, dict);
return 0;
}
@@ -631,7 +748,8 @@ int
dht_rename_links_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
call_frame_t *prev = NULL;
@@ -646,7 +764,11 @@ dht_rename_links_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
"link/file on %s failed (%s)",
prev->this->name, strerror (op_errno));
local->op_ret = -1;
- local->op_errno = op_errno;
+ if (op_errno != ENOENT)
+ local->op_errno = op_errno;
+ } else if (local->src_cached == prev->this) {
+ /* merge of attr returned only from linkfile creation */
+ dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
}
this_call_cnt = dht_frame_return (frame);
@@ -669,7 +791,8 @@ cleanup:
int
dht_rename_unlink_links_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
call_frame_t *prev = NULL;
@@ -711,6 +834,7 @@ dht_rename_create_links (call_frame_t *frame)
xlator_t *dst_hashed = NULL;
xlator_t *dst_cached = NULL;
int call_cnt = 0;
+ dict_t *xattr = NULL;
local = frame->local;
@@ -721,18 +845,27 @@ dht_rename_create_links (call_frame_t *frame)
dst_hashed = local->dst_hashed;
dst_cached = local->dst_cached;
+ DHT_MARK_FOP_INTERNAL (xattr);
if (src_cached == dst_cached) {
+ dict_t *xattr_new = NULL;
+
if (dst_hashed == dst_cached)
goto nolinks;
+ xattr_new = dict_copy_with_ref (xattr, NULL);
+
gf_log (this->name, GF_LOG_TRACE,
"unlinking dst linkfile %s @ %s",
local->loc2.path, dst_hashed->name);
+ DHT_MARKER_DONT_ACCOUNT(xattr_new);
+
STACK_WIND (frame, dht_rename_unlink_links_cbk,
dst_hashed, dst_hashed->fops->unlink,
- &local->loc2);
+ &local->loc2, 0, xattr_new);
+
+ dict_unref (xattr_new);
return 0;
}
@@ -749,17 +882,28 @@ dht_rename_create_links (call_frame_t *frame)
"linkfile %s @ %s => %s",
local->loc.path, dst_hashed->name, src_cached->name);
memcpy (local->gfid, local->loc.inode->gfid, 16);
- dht_linkfile_create (frame, dht_rename_links_cbk,
+ dht_linkfile_create (frame, dht_rename_links_cbk, this,
src_cached, dst_hashed, &local->loc);
}
if (src_cached != dst_hashed) {
+ dict_t *xattr_new = NULL;
+
+ xattr_new = dict_copy_with_ref (xattr, NULL);
+
gf_log (this->name, GF_LOG_TRACE,
"link %s => %s (%s)", local->loc.path,
local->loc2.path, src_cached->name);
+ if (uuid_compare (local->loc.pargfid,
+ local->loc2.pargfid) == 0) {
+ DHT_MARKER_DONT_ACCOUNT(xattr_new);
+ }
+
STACK_WIND (frame, dht_rename_links_cbk,
src_cached, src_cached->fops->link,
- &local->loc, &local->loc2);
+ &local->loc, &local->loc2, xattr_new);
+
+ dict_unref (xattr_new);
}
nolinks:
@@ -767,6 +911,8 @@ nolinks:
/* skip to next step */
dht_do_rename (frame);
}
+ if (xattr)
+ dict_unref (xattr);
return 0;
}
@@ -774,7 +920,7 @@ nolinks:
int
dht_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
xlator_t *src_cached = NULL;
xlator_t *src_hashed = NULL;
@@ -856,7 +1002,8 @@ dht_rename (call_frame_t *frame, xlator_t *this,
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
+ DHT_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL, NULL);
return 0;
}
diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c
index 3342c35a9..0e6527544 100644
--- a/xlators/cluster/dht/src/dht-selfheal.c
+++ b/xlators/cluster/dht/src/dht-selfheal.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -26,7 +17,7 @@
#include "glusterfs.h"
#include "xlator.h"
#include "dht-common.h"
-
+#include "glusterfs-acl.h"
#define DHT_SET_LAYOUT_RANGE(layout,i,srt,chunk,cnt,path) do { \
layout->list[i].start = srt; \
@@ -38,43 +29,40 @@
layout->list[i].xlator->name, path); \
} while (0)
+#define DHT_RESET_LAYOUT_RANGE(layout) do { \
+ int cnt = 0; \
+ for (cnt = 0; cnt < layout->cnt; cnt++ ) { \
+ layout->list[cnt].start = 0; \
+ layout->list[cnt].stop = 0; \
+ } \
+ } while (0)
-static inline uint32_t
-dht_find_overlap (int idx, int cnk_idx, uint32_t start, uint32_t stop,
- uint32_t chunk_size)
+static uint32_t
+dht_overlap_calc (dht_layout_t *old, int o, dht_layout_t *new, int n)
{
- uint32_t overlap = 0;
- uint32_t chunk_begin = 0;
+ if (o >= old->cnt || n >= new->cnt)
+ return 0;
- chunk_begin = cnk_idx * chunk_size;
+ if (old->list[o].err > 0 || new->list[n].err > 0)
+ return 0;
- /* There is no chance of overlap */
- if ((chunk_begin > stop) ||
- ((chunk_begin + chunk_size) < start))
- goto out;
-
- if ((chunk_begin <= start) &&
- ((chunk_begin + chunk_size) <= stop)) {
- overlap = ((chunk_begin + chunk_size) - start);
- goto out;
+ if (old->list[o].start == old->list[o].stop) {
+ return 0;
}
- if ((chunk_begin <= start) &&
- ((chunk_begin + chunk_size) >= stop)) {
- overlap = (stop - start);
- goto out;
+ if (new->list[n].start == new->list[n].stop) {
+ return 0;
}
- if ((chunk_begin < stop) &&
- ((chunk_begin + chunk_size) >= stop)) {
- overlap = (stop - chunk_begin);
- goto out;
- }
+ if ((old->list[o].start > new->list[n].stop) ||
+ (old->list[o].stop < new->list[n].start))
+ return 0;
-out:
- return overlap;
+ return min (old->list[o].stop, new->list[n].stop) -
+ max (old->list[o].start, new->list[n].start) + 1;
}
+
int
dht_selfheal_dir_finish (call_frame_t *frame, xlator_t *this, int ret)
{
@@ -82,7 +70,7 @@ dht_selfheal_dir_finish (call_frame_t *frame, xlator_t *this, int ret)
local = frame->local;
local->selfheal.dir_cbk (frame, NULL, frame->this, ret,
- local->op_errno);
+ local->op_errno, NULL);
return 0;
}
@@ -90,7 +78,7 @@ dht_selfheal_dir_finish (call_frame_t *frame, xlator_t *this, int ret)
int
dht_selfheal_dir_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno)
+ int op_ret, int op_errno, dict_t *xdata)
{
dht_local_t *local = NULL;
call_frame_t *prev = NULL;
@@ -129,18 +117,33 @@ dht_selfheal_dir_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
dht_selfheal_dir_xattr_persubvol (call_frame_t *frame, loc_t *loc,
- dht_layout_t *layout, int i)
+ dht_layout_t *layout, int i,
+ xlator_t *req_subvol)
{
xlator_t *subvol = NULL;
dict_t *xattr = NULL;
int ret = 0;
xlator_t *this = NULL;
int32_t *disk_layout = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ data_t *data = NULL;
-
- subvol = layout->list[i].xlator;
+ local = frame->local;
+ if (req_subvol)
+ subvol = req_subvol;
+ else
+ subvol = layout->list[i].xlator;
this = frame->this;
+ GF_VALIDATE_OR_GOTO ("", this, err);
+ GF_VALIDATE_OR_GOTO (this->name, layout, err);
+ GF_VALIDATE_OR_GOTO (this->name, local, err);
+ GF_VALIDATE_OR_GOTO (this->name, subvol, err);
+ VALIDATE_OR_GOTO (this->private, err);
+
+ conf = this->private;
+
xattr = get_new_dict ();
if (!xattr) {
goto err;
@@ -154,8 +157,7 @@ dht_selfheal_dir_xattr_persubvol (call_frame_t *frame, loc_t *loc,
goto err;
}
- ret = dict_set_bin (xattr, "trusted.glusterfs.dht",
- disk_layout, 4 * 4);
+ ret = dict_set_bin (xattr, conf->xattr_name, disk_layout, 4 * 4);
if (ret == -1) {
gf_log (this->name, GF_LOG_WARNING,
"%s: (subvol %s) failed to set xattr dictionary",
@@ -170,10 +172,22 @@ dht_selfheal_dir_xattr_persubvol (call_frame_t *frame, loc_t *loc,
layout->type, subvol->name, loc->path);
dict_ref (xattr);
+ if (local->xattr) {
+ data = dict_get (local->xattr, QUOTA_LIMIT_KEY);
+ if (data) {
+ ret = dict_add (xattr, QUOTA_LIMIT_KEY, data);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to "
+ "set quota limit key on %s",loc->path);
+ }
+ }
+ }
+ if (!uuid_is_null (local->gfid))
+ uuid_copy (loc->gfid, local->gfid);
STACK_WIND (frame, dht_selfheal_dir_xattr_cbk,
subvol, subvol->fops->setxattr,
- loc, xattr, 0);
+ loc, xattr, 0, NULL);
dict_unref (xattr);
@@ -183,11 +197,10 @@ err:
if (xattr)
dict_destroy (xattr);
- if (disk_layout)
- GF_FREE (disk_layout);
+ GF_FREE (disk_layout);
dht_selfheal_dir_xattr_cbk (frame, subvol, frame->this,
- -1, ENOMEM);
+ -1, ENOMEM, NULL);
return 0;
}
@@ -198,21 +211,42 @@ dht_fix_dir_xattr (call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
int i = 0;
int count = 0;
xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ dht_layout_t *dummy = NULL;
local = frame->local;
this = frame->this;
+ conf = this->private;
gf_log (this->name, GF_LOG_DEBUG,
"writing the new range for all subvolumes");
- local->call_cnt = count = layout->cnt;
+ local->call_cnt = count = conf->subvolume_cnt;
for (i = 0; i < layout->cnt; i++) {
- dht_selfheal_dir_xattr_persubvol (frame, loc, layout, i);
+ dht_selfheal_dir_xattr_persubvol (frame, loc, layout, i, NULL);
if (--count == 0)
- break;
+ goto out;
+ }
+ /* if we are here, subvolcount > layout_count. subvols-per-directory
+ * option might be set here. We need to clear out layout from the
+ * non-participating subvolumes, else it will result in overlaps */
+ dummy = dht_layout_new (this, 1);
+ if (!dummy)
+ goto out;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (_gf_false ==
+ dht_is_subvol_in_layout (layout, conf->subvolumes[i])) {
+ dht_selfheal_dir_xattr_persubvol (frame, loc, dummy, 0,
+ conf->subvolumes[i]);
+ if (--count == 0)
+ break;
+ }
}
+
+ dht_layout_unref (this, dummy);
+out:
return 0;
}
@@ -223,9 +257,12 @@ dht_selfheal_dir_xattr (call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
int missing_xattr = 0;
int i = 0;
xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ dht_layout_t *dummy = NULL;
local = frame->local;
this = frame->this;
+ conf = this->private;
for (i = 0; i < layout->cnt; i++) {
if (layout->list[i].err != -1 || !layout->list[i].stop) {
@@ -238,7 +275,14 @@ dht_selfheal_dir_xattr (call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
}
missing_xattr++;
}
-
+ /* Also account for subvolumes with no-layout. Used for zero'ing out
+ * the layouts and for setting quota key's if present */
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (_gf_false ==
+ dht_is_subvol_in_layout (layout, conf->subvolumes[i])) {
+ missing_xattr++;
+ }
+ }
gf_log (this->name, GF_LOG_TRACE,
"%d subvolumes missing xattr for %s",
missing_xattr, loc->path);
@@ -249,23 +293,36 @@ dht_selfheal_dir_xattr (call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
}
local->call_cnt = missing_xattr;
-
for (i = 0; i < layout->cnt; i++) {
if (layout->list[i].err != -1 || !layout->list[i].stop)
continue;
- dht_selfheal_dir_xattr_persubvol (frame, loc, layout, i);
+ dht_selfheal_dir_xattr_persubvol (frame, loc, layout, i, NULL);
if (--missing_xattr == 0)
break;
}
+ dummy = dht_layout_new (this, 1);
+ if (!dummy)
+ goto out;
+ for (i = 0; i < conf->subvolume_cnt && missing_xattr; i++) {
+ if (_gf_false ==
+ dht_is_subvol_in_layout (layout, conf->subvolumes[i])) {
+ dht_selfheal_dir_xattr_persubvol (frame, loc, dummy, 0,
+ conf->subvolumes[i]);
+ missing_xattr--;
+ }
+ }
+
+ dht_layout_unref (this, dummy);
+out:
return 0;
}
int
dht_selfheal_dir_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *statpre,
- struct iatt *statpost)
+ struct iatt *statpost, dict_t *xdata)
{
dht_local_t *local = NULL;
dht_layout_t *layout = NULL;
@@ -306,6 +363,9 @@ dht_selfheal_dir_setattr (call_frame_t *frame, loc_t *loc, struct iatt *stbuf,
return 0;
}
+ if (!uuid_is_null (local->gfid))
+ uuid_copy (loc->gfid, local->gfid);
+
local->call_cnt = missing_attr;
for (i = 0; i < layout->cnt; i++) {
if (layout->list[i].err == -1) {
@@ -316,7 +376,7 @@ dht_selfheal_dir_setattr (call_frame_t *frame, loc_t *loc, struct iatt *stbuf,
STACK_WIND (frame, dht_selfheal_dir_setattr_cbk,
layout->list[i].xlator,
layout->list[i].xlator->fops->setattr,
- loc, stbuf, valid);
+ loc, stbuf, valid, NULL);
}
}
@@ -327,7 +387,8 @@ int
dht_selfheal_dir_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno,
inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
dht_layout_t *layout = NULL;
@@ -373,6 +434,46 @@ out:
return 0;
}
+void
+dht_selfheal_dir_mkdir_setacl (dict_t *xattr, dict_t *dict)
+{
+ data_t *acl_default = NULL;
+ data_t *acl_access = NULL;
+ xlator_t *this = NULL;
+ int ret = -1;
+
+ GF_ASSERT (xattr);
+ GF_ASSERT (dict);
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ acl_default = dict_get (xattr, POSIX_ACL_DEFAULT_XATTR);
+
+ if (!acl_default) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "ACL_DEFAULT xattr not present");
+ goto cont;
+ }
+ ret = dict_set (dict, POSIX_ACL_DEFAULT_XATTR, acl_default);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "Could not set ACL_DEFAULT xattr");
+cont:
+ acl_access = dict_get (xattr, POSIX_ACL_ACCESS_XATTR);
+ if (!acl_access) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "ACL_ACCESS xattr not present");
+ goto out;
+ }
+ ret = dict_set (dict, POSIX_ACL_ACCESS_XATTR, acl_access);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "Could not set ACL_ACCESS xattr");
+
+out:
+ return;
+}
int
dht_selfheal_dir_mkdir (call_frame_t *frame, loc_t *loc,
@@ -406,16 +507,19 @@ dht_selfheal_dir_mkdir (call_frame_t *frame, loc_t *loc,
ret = dict_set_static_bin (dict, "gfid-req", local->gfid, 16);
if (ret)
- gf_log (this->name, GF_LOG_INFO,
+ gf_log (this->name, GF_LOG_WARNING,
"%s: failed to set gfid in dict", loc->path);
} else if (local->params) {
/* Send the dictionary from higher layers directly */
dict = dict_ref (local->params);
}
+ /* Set acls */
+ if (local->xattr && dict)
+ dht_selfheal_dir_mkdir_setacl (local->xattr, dict);
if (!dict)
gf_log (this->name, GF_LOG_WARNING,
- "dict is NULL, need to make sure gfid's are same");
+ "dict is NULL, need to make sure gfids are same");
for (i = 0; i < layout->cnt; i++) {
if (layout->list[i].err == ENOENT || force) {
@@ -429,7 +533,7 @@ dht_selfheal_dir_mkdir (call_frame_t *frame, loc_t *loc,
loc,
st_mode_from_ia (local->stbuf.ia_prot,
local->stbuf.ia_type),
- dict);
+ 0, dict);
}
}
@@ -448,7 +552,7 @@ dht_selfheal_layout_alloc_start (xlator_t *this, loc_t *loc,
uint32_t hashval = 0;
int ret = 0;
- ret = dht_hash_compute (layout->type, loc->path, &hashval);
+ ret = dht_hash_compute (this, layout->type, loc->path, &hashval);
if (ret == 0) {
start = (hashval % layout->cnt);
}
@@ -471,7 +575,7 @@ dht_get_layout_count (xlator_t *this, dht_layout_t *layout, int new_layout)
for (j = 0; j < conf->subvolume_cnt; j++) {
if (conf->decommissioned_bricks[j] &&
conf->decommissioned_bricks[j] == layout->list[i].xlator) {
- layout->list[i].err = -EINVAL;
+ layout->list[i].err = EINVAL;
break;
}
}
@@ -479,9 +583,33 @@ dht_get_layout_count (xlator_t *this, dht_layout_t *layout, int new_layout)
for (i = 0; i < layout->cnt; i++) {
err = layout->list[i].err;
- if (err == -1 || err == 0) {
- layout->list[i].err = -1;
+ if (err == -1 || err == 0 || err == ENOENT) {
+ /* Setting list[i].err = -1 is an indication for
+ dht_selfheal_layout_new_directory() to assign
+ a range. We set it to -1 based on any one of
+ the three criteria:
+
+ - err == -1 already, which means directory
+ existed but layout was not set on it.
+
+ - err == 0, which means directory exists and
+ has an old layout piece which will be
+ overwritten now.
+
+ - err == ENOENT, which means directory does
+ not exist (possibly racing with mkdir or
+ finishing half done mkdir). The missing
+ directory will be attempted to be recreated.
+
+ It is important to note that it is safe
+ to race with mkdir() as self-heal and
+ mkdir are idempotent operations. Both will
+ strive to set the directory and layouts to
+ the same final state.
+ */
count++;
+ if (!err)
+ layout->list[i].err = -1;
}
}
@@ -496,49 +624,126 @@ dht_get_layout_count (xlator_t *this, dht_layout_t *layout, int new_layout)
}
}
- count = ((layout->spread_cnt) ? layout->spread_cnt :
- ((count) ? count : 1));
+ /* if layout->spread_cnt is set, check if it is <= available
+ * subvolumes (down brick and decommissioned bricks are considered
+ * un-availbale). Else return count (available up bricks) */
+ count = ((layout->spread_cnt &&
+ (layout->spread_cnt <= count)) ?
+ layout->spread_cnt : ((count) ? count : 1));
return count;
}
+void dht_selfheal_layout_new_directory (call_frame_t *frame, loc_t *loc,
+ dht_layout_t *new_layout);
+
+void dht_layout_entry_swap (dht_layout_t *layout, int i, int j);
+void dht_layout_range_swap (dht_layout_t *layout, int i, int j);
+
+/*
+ * It's a bit icky using local variables in a macro, but it makes the rest
+ * of the code a lot clearer.
+ */
+#define OV_ENTRY(x,y) table[x*new->cnt+y]
+
+void
+dht_selfheal_layout_maximize_overlap (call_frame_t *frame, loc_t *loc,
+ dht_layout_t *new, dht_layout_t *old)
+{
+ int i = 0;
+ int j = 0;
+ uint32_t curr_overlap = 0;
+ uint32_t max_overlap = 0;
+ int max_overlap_idx = -1;
+ uint32_t overlap = 0;
+ uint32_t *table = NULL;
+
+ dht_layout_sort_volname (old);
+ /* Now both old_layout->list[] and new_layout->list[]
+ are match the same xlators/subvolumes. i.e,
+ old_layout->[i] and new_layout->[i] are referring
+ to the same subvolumes
+ */
+
+ /* Build a table of overlaps between new[i] and old[j]. */
+ table = alloca(sizeof(overlap)*old->cnt*new->cnt);
+ if (!table) {
+ return;
+ }
+ memset(table,0,sizeof(overlap)*old->cnt*new->cnt);
+ for (i = 0; i < new->cnt; ++i) {
+ for (j = 0; j < old->cnt; ++j) {
+ OV_ENTRY(i,j) = dht_overlap_calc(old,j,new,i);
+ }
+ }
+
+ for (i = 0; i < new->cnt; i++) {
+ if (new->list[i].err > 0) {
+ /* Subvol might be marked for decommission
+ with EINVAL, or some other serious error
+ marked with positive errno.
+ */
+ continue;
+ }
+
+ max_overlap = 0;
+ max_overlap_idx = i;
+ for (j = (i + 1); j < new->cnt; ++j) {
+ if (new->list[j].err > 0) {
+ /* Subvol might be marked for decommission
+ with EINVAL, or some other serious error
+ marked with positive errno.
+ */
+ continue;
+ }
+ /* Calculate the overlap now. */
+ curr_overlap = OV_ENTRY(i,i) + OV_ENTRY(j,j);
+ /* Calculate the overlap after the proposed swap. */
+ overlap = OV_ENTRY(i,j) + OV_ENTRY(j,i);
+ /* Are we better than status quo? */
+ if (overlap > curr_overlap) {
+ overlap -= curr_overlap;
+ /* Are we better than the previous choice? */
+ if (overlap > max_overlap) {
+ max_overlap = overlap;
+ max_overlap_idx = j;
+ }
+ }
+ }
+
+ if (max_overlap_idx != i) {
+ dht_layout_range_swap (new, i, max_overlap_idx);
+ /* Need to swap the table values too. */
+ for (j = 0; j < old->cnt; ++j) {
+ overlap = OV_ENTRY(i,j);
+ OV_ENTRY(i,j) = OV_ENTRY(max_overlap_idx,j);
+ OV_ENTRY(max_overlap_idx,j) = overlap;
+ }
+ }
+ }
+}
+
+
dht_layout_t *
dht_fix_layout_of_directory (call_frame_t *frame, loc_t *loc,
dht_layout_t *layout)
{
- uint32_t chunk = 0;
- uint32_t start = 0;
- uint32_t stop = 0;
- uint32_t overlap = 0;
- uint32_t max_overlap = 0;
- uint32_t chunk_begin = 0;
- int count = 0;
- int cnt = 0;
int i = 0;
- int j = 0;
- int k = 0;
- int loop_cnt = 0;
- int start_subvol = 0;
- int *fix_array = NULL;
xlator_t *this = NULL;
dht_layout_t *new_layout = NULL;
dht_conf_t *priv = NULL;
dht_local_t *local = NULL;
+ uint32_t subvol_down = 0;
+ int ret = 0;
this = frame->this;
priv = this->private;
local = frame->local;
- count = cnt = dht_get_layout_count (this, layout, 0);
-
- chunk = ((unsigned long) 0xffffffff) / ((cnt) ? cnt : 1);
-
- start_subvol = dht_selfheal_layout_alloc_start (this, loc, layout);
-
- fix_array = GF_CALLOC (sizeof (int), layout->cnt, gf_common_mt_char);
- if (!fix_array) {
- /* No fix, use the existing layout itself */
+ if (layout->type == DHT_HASH_TYPE_DM_USER) {
+ gf_log (THIS->name, GF_LOG_DEBUG, "leaving %s alone",
+ loc->path);
goto done;
}
@@ -546,98 +751,33 @@ dht_fix_layout_of_directory (call_frame_t *frame, loc_t *loc,
if (!new_layout)
goto done;
- for (i = 0; i < new_layout->cnt; i++) {
- /* TODO: fix this in layout_alloc() itself */
- new_layout->list[i].err = -ENOENT;
- if (i < layout->cnt)
- new_layout->list[i].xlator = layout->list[i].xlator;
- }
-
- /* Check if there are any overlap in layout, and give the proper fix */
- for (i = 0; i < layout->cnt; i++) {
- /* No need to fix if 'err' is not '-1' */
- if (layout->list[i].err != -1)
- continue;
-
- /* If already existing layout is having no range, skip it */
- start = layout->list[i].start;
- stop = layout->list[i].stop;
- if ((stop - start) == 0)
- continue;
-
- max_overlap = 0;
-
- /* 'j' is used as starting point of each chunk */
- for (j = 1; j <= count; j++) {
- /* if chunk is already used, don't use it again */
- for (k = 0; k < i; k++)
- if (j == fix_array[k])
- break;
- if (k < i)
- continue;
-
- overlap = dht_find_overlap (i, (j-1), start, stop, chunk);
- if (max_overlap < overlap) {
- max_overlap = overlap;
- fix_array[i] = j;
- }
- }
-
- /* If we have any overlap, then use that itself as new
- layout for the subvolume */
- if (fix_array[i]) {
- chunk_begin = chunk * (fix_array[i] - 1);
- new_layout->list[i].err = -1;
- DHT_SET_LAYOUT_RANGE (new_layout, i, chunk_begin,
- chunk, cnt, loc->path);
- /* make sure to give (max - 1) as 'stop' range,
- if it is last chunk */
- if (fix_array[i] == count)
- new_layout->list[i].stop = 0xffffffff;
- if (--cnt == 0)
- goto done;
+ /* If a subvolume is down, do not re-write the layout. */
+ ret = dht_layout_anomalies (this, loc, layout, NULL, NULL, NULL,
+ &subvol_down, NULL, NULL);
- }
+ if (subvol_down || (ret == -1)) {
+ gf_log (this->name, GF_LOG_WARNING, "%u subvolume(s) are down"
+ ". Skipping fix layout.", subvol_down);
+ GF_FREE (new_layout);
+ return NULL;
}
- /* Now, look for layouts which are not having any overlaps
- and give it a fix */
- for (loop_cnt = 0, i = start_subvol; loop_cnt < new_layout->cnt;
- i++, loop_cnt++) {
- if (i == new_layout->cnt)
- i = 0;
-
- /* If 'fix_array[i]' is set, the layout is already fixed. */
- if (fix_array[i])
- continue;
+ for (i = 0; i < new_layout->cnt; i++) {
+ if (layout->list[i].err != ENOSPC)
+ new_layout->list[i].err = layout->list[i].err;
+ else
+ new_layout->list[i].err = -1;
- if (layout->list[i].err != -1) {
- new_layout->list[i].err = layout->list[i].err;
- continue;
- }
+ new_layout->list[i].xlator = layout->list[i].xlator;
+ }
- for (k = 1; k <= count; k++) {
- for (j = 0; j < new_layout->cnt; j++) {
- if (k == fix_array[j])
- break;
- }
- /* Didn't find any of the list begining with 'k' */
- if (j == new_layout->cnt)
- break;
- }
+ /* First give it a layout as though it is a new directory. This
+ ensures rotation to kick in */
+ dht_layout_sort_volname (new_layout);
+ dht_selfheal_layout_new_directory (frame, loc, new_layout);
- fix_array[i] = k;
- chunk_begin = (k - 1) * chunk;
- new_layout->list[i].err = -1;
- DHT_SET_LAYOUT_RANGE (new_layout, i, chunk_begin, chunk, cnt,
- loc->path);
- /* make sure to give (max - 1) as 'stop' range,
- if it is last chunk */
- if (k == count)
- new_layout->list[i].stop = 0xffffffff;
- if (--cnt == 0)
- goto done;
- }
+ /* Now selectively re-assign ranges only when it helps */
+ dht_selfheal_layout_maximize_overlap (frame, loc, new_layout, layout);
done:
if (new_layout) {
@@ -651,7 +791,7 @@ done:
local->layout = new_layout;
}
- return new_layout;
+ return local->layout;
}
@@ -675,9 +815,11 @@ dht_selfheal_layout_new_directory (call_frame_t *frame, loc_t *loc,
start_subvol = dht_selfheal_layout_alloc_start (this, loc, layout);
+ /* clear out the range, as we are re-computing here */
+ DHT_RESET_LAYOUT_RANGE (layout);
for (i = start_subvol; i < layout->cnt; i++) {
err = layout->list[i].err;
- if (err == -1) {
+ if (err == -1 || err == ENOENT) {
DHT_SET_LAYOUT_RANGE(layout, i, start, chunk,
cnt, loc->path);
if (--cnt == 0) {
@@ -690,7 +832,7 @@ dht_selfheal_layout_new_directory (call_frame_t *frame, loc_t *loc,
for (i = 0; i < start_subvol; i++) {
err = layout->list[i].err;
- if (err == -1) {
+ if (err == -1 || err == ENOENT) {
DHT_SET_LAYOUT_RANGE(layout, i, start, chunk,
cnt, loc->path);
if (--cnt == 0) {
@@ -709,35 +851,17 @@ int
dht_selfheal_dir_getafix (call_frame_t *frame, loc_t *loc,
dht_layout_t *layout)
{
- dht_conf_t *conf = NULL;
- xlator_t *this = NULL;
dht_local_t *local = NULL;
- int missing = -1;
- int down = -1;
- int holes = -1;
+ uint32_t holes = 0;
int ret = -1;
int i = -1;
- int overlaps = -1;
+ uint32_t overlaps = 0;
- this = frame->this;
- conf = this->private;
local = frame->local;
- missing = local->selfheal.missing;
- down = local->selfheal.down;
holes = local->selfheal.hole_cnt;
overlaps = local->selfheal.overlaps_cnt;
- if ((missing + down) == conf->subvolume_cnt) {
- dht_selfheal_layout_new_directory (frame, loc, layout);
- ret = 0;
- }
-
- if (holes <= down) {
- /* the down subvol might fill up the holes */
- ret = 0;
- }
-
if (holes || overlaps) {
dht_selfheal_layout_new_directory (frame, loc, layout);
ret = 0;
@@ -789,6 +913,9 @@ dht_fix_directory_layout (call_frame_t *frame,
/* No layout sorting required here */
tmp_layout = dht_fix_layout_of_directory (frame, &local->loc, layout);
+ if (!tmp_layout) {
+ return -1;
+ }
dht_fix_dir_xattr (frame, &local->loc, tmp_layout);
return 0;
@@ -811,9 +938,8 @@ dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
dht_layout_anomalies (this, loc, layout,
&local->selfheal.hole_cnt,
&local->selfheal.overlaps_cnt,
- &local->selfheal.missing,
- &local->selfheal.down,
- &local->selfheal.misc);
+ NULL, &local->selfheal.down,
+ &local->selfheal.misc, NULL);
down = local->selfheal.down;
misc = local->selfheal.misc;
@@ -822,14 +948,14 @@ dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
local->selfheal.layout = dht_layout_ref (this, layout);
if (down) {
- gf_log (this->name, GF_LOG_INFO,
+ gf_log (this->name, GF_LOG_WARNING,
"%d subvolumes down -- not fixing", down);
ret = 0;
goto sorry_no_fix;
}
if (misc) {
- gf_log (this->name, GF_LOG_INFO,
+ gf_log (this->name, GF_LOG_WARNING,
"%d subvolumes have unrecoverable errors", misc);
ret = 0;
goto sorry_no_fix;
@@ -839,7 +965,7 @@ dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
ret = dht_selfheal_dir_getafix (frame, loc, layout);
if (ret == -1) {
- gf_log (this->name, GF_LOG_INFO,
+ gf_log (this->name, GF_LOG_WARNING,
"not able to form layout for the directory");
goto sorry_no_fix;
}
@@ -872,3 +998,51 @@ dht_selfheal_restore (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
return ret;
}
+
+int
+dht_dir_attr_heal (void *data)
+{
+ call_frame_t *frame = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ int call_cnt = 0;
+ int ret = -1;
+ int i = 0;
+
+ GF_VALIDATE_OR_GOTO ("dht", data, out);
+
+ frame = data;
+ local = frame->local;
+ this = frame->this;
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", local, out);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO ("dht", conf, out);
+
+ call_cnt = conf->subvolume_cnt;
+
+ for (i = 0; i < call_cnt; i++) {
+ subvol = conf->subvolumes[i];
+ if (!subvol || (subvol == dht_first_up_subvol (this)))
+ continue;
+ ret = syncop_setattr (subvol, &local->loc, &local->stbuf,
+ (GF_SET_ATTR_UID | GF_SET_ATTR_GID),
+ NULL, NULL);
+ if (ret) {
+ gf_log ("dht", GF_LOG_ERROR, "Failed to set uid/gid on"
+ " %s on %s subvol (%s)", local->loc.path,
+ subvol->name, strerror (-ret));
+ }
+ }
+out:
+ return 0;
+}
+
+int
+dht_dir_attr_heal_done (int ret, call_frame_t *sync_frame, void *data)
+{
+ DHT_STACK_DESTROY (sync_frame);
+ return 0;
+}
diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c
new file mode 100644
index 000000000..36c073973
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-shared.c
@@ -0,0 +1,781 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+/* TODO: add NS locking */
+
+#include "statedump.h"
+#include "dht-common.h"
+
+/* TODO:
+ - use volumename in xattr instead of "dht"
+ - use NS locks
+ - handle all cases in self heal layout reconstruction
+ - complete linkfile selfheal
+*/
+struct volume_options options[];
+
+void
+dht_layout_dump (dht_layout_t *layout, const char *prefix)
+{
+
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i = 0;
+
+ if (!layout)
+ goto out;
+ if (!prefix)
+ goto out;
+
+ gf_proc_dump_build_key(key, prefix, "cnt");
+ gf_proc_dump_write(key, "%d", layout->cnt);
+ gf_proc_dump_build_key(key, prefix, "preset");
+ gf_proc_dump_write(key, "%d", layout->preset);
+ gf_proc_dump_build_key(key, prefix, "gen");
+ gf_proc_dump_write(key, "%d", layout->gen);
+ if (layout->type != IA_INVAL) {
+ gf_proc_dump_build_key(key, prefix, "inode type");
+ gf_proc_dump_write(key, "%d", layout->type);
+ }
+
+ if (!IA_ISDIR (layout->type))
+ goto out;
+
+ for (i = 0; i < layout->cnt; i++) {
+ gf_proc_dump_build_key(key, prefix,"list[%d].err", i);
+ gf_proc_dump_write(key, "%d", layout->list[i].err);
+ gf_proc_dump_build_key(key, prefix,"list[%d].start", i);
+ gf_proc_dump_write(key, "%u", layout->list[i].start);
+ gf_proc_dump_build_key(key, prefix,"list[%d].stop", i);
+ gf_proc_dump_write(key, "%u", layout->list[i].stop);
+ if (layout->list[i].xlator) {
+ gf_proc_dump_build_key(key, prefix,
+ "list[%d].xlator.type", i);
+ gf_proc_dump_write(key, "%s",
+ layout->list[i].xlator->type);
+ gf_proc_dump_build_key(key, prefix,
+ "list[%d].xlator.name", i);
+ gf_proc_dump_write(key, "%s",
+ layout->list[i].xlator->name);
+ }
+ }
+
+out:
+ return;
+}
+
+
+int32_t
+dht_priv_dump (xlator_t *this)
+{
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i = 0;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+
+ if (!this)
+ goto out;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ ret = TRY_LOCK(&conf->subvolume_lock);
+ if (ret != 0) {
+ return ret;
+ }
+
+ gf_proc_dump_add_section("xlator.cluster.dht.%s.priv", this->name);
+ gf_proc_dump_build_key(key_prefix,"xlator.cluster.dht","%s.priv",
+ this->name);
+ gf_proc_dump_write("subvol_cnt","%d", conf->subvolume_cnt);
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ snprintf (key, sizeof (key), "subvolumes[%d]", i);
+ gf_proc_dump_write(key, "%s.%s", conf->subvolumes[i]->type,
+ conf->subvolumes[i]->name);
+ if (conf->file_layouts && conf->file_layouts[i]){
+ snprintf (key, sizeof (key), "file_layouts[%d]", i);
+ dht_layout_dump(conf->file_layouts[i], key);
+ }
+ if (conf->dir_layouts && conf->dir_layouts[i]) {
+ snprintf (key, sizeof (key), "dir_layouts[%d]", i);
+ dht_layout_dump(conf->dir_layouts[i], key);
+ }
+ if (conf->subvolume_status) {
+
+ snprintf (key, sizeof (key), "subvolume_status[%d]", i);
+ gf_proc_dump_write(key, "%d",
+ (int)conf->subvolume_status[i]);
+ }
+
+ }
+
+ gf_proc_dump_write("search_unhashed", "%d", conf->search_unhashed);
+ gf_proc_dump_write("gen", "%d", conf->gen);
+ gf_proc_dump_write("min_free_disk", "%lf", conf->min_free_disk);
+ gf_proc_dump_write("min_free_inodes", "%lf", conf->min_free_inodes);
+ gf_proc_dump_write("disk_unit", "%c", conf->disk_unit);
+ gf_proc_dump_write("refresh_interval", "%d", conf->refresh_interval);
+ gf_proc_dump_write("unhashed_sticky_bit", "%d", conf->unhashed_sticky_bit);
+
+ if (conf->du_stats) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!conf->subvolume_status[i])
+ continue;
+
+ snprintf (key, sizeof (key), "subvolumes[%d]", i);
+ gf_proc_dump_write (key, "%s",
+ conf->subvolumes[i]->name);
+
+ snprintf (key, sizeof (key),
+ "du_stats[%d].avail_percent", i);
+ gf_proc_dump_write (key, "%lf",
+ conf->du_stats[i].avail_percent);
+
+ snprintf (key, sizeof (key), "du_stats[%d].avail_space",
+ i);
+ gf_proc_dump_write (key, "%lu",
+ conf->du_stats[i].avail_space);
+
+ snprintf (key, sizeof (key),
+ "du_stats[%d].avail_inodes", i);
+ gf_proc_dump_write (key, "%lf",
+ conf->du_stats[i].avail_inodes);
+
+ snprintf (key, sizeof (key), "du_stats[%d].log", i);
+ gf_proc_dump_write (key, "%lu",
+ conf->du_stats[i].log);
+ }
+ }
+
+ if (conf->last_stat_fetch.tv_sec)
+ gf_proc_dump_write("last_stat_fetch", "%s",
+ ctime(&conf->last_stat_fetch.tv_sec));
+
+ UNLOCK(&conf->subvolume_lock);
+
+out:
+ return ret;
+}
+
+int32_t
+dht_inodectx_dump (xlator_t *this, inode_t *inode)
+{
+ int ret = -1;
+ dht_layout_t *layout = NULL;
+
+ if (!this)
+ goto out;
+ if (!inode)
+ goto out;
+
+ ret = dht_inode_ctx_layout_get (inode, this, &layout);
+
+ if ((ret != 0) || !layout)
+ return ret;
+
+ gf_proc_dump_add_section("xlator.cluster.dht.%s.inode", this->name);
+ dht_layout_dump(layout, "layout");
+
+out:
+ return ret;
+}
+
+void
+dht_fini (xlator_t *this)
+{
+ int i = 0;
+ dht_conf_t *conf = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+
+ conf = this->private;
+ this->private = NULL;
+ if (conf) {
+ if (conf->file_layouts) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ GF_FREE (conf->file_layouts[i]);
+ }
+ GF_FREE (conf->file_layouts);
+ }
+
+ GF_FREE (conf->subvolumes);
+
+ GF_FREE (conf->subvolume_status);
+
+ GF_FREE (conf);
+ }
+out:
+ return;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+
+ ret = xlator_mem_acct_init (this, gf_dht_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
+ "failed");
+ return ret;
+ }
+out:
+ return ret;
+}
+
+
+int
+dht_parse_decommissioned_bricks (xlator_t *this, dht_conf_t *conf,
+ const char *bricks)
+{
+ int i = 0;
+ int ret = -1;
+ char *tmpstr = NULL;
+ char *dup_brick = NULL;
+ char *node = NULL;
+
+ if (!conf || !bricks)
+ goto out;
+
+ dup_brick = gf_strdup (bricks);
+ node = strtok_r (dup_brick, ",", &tmpstr);
+ while (node) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!strcmp (conf->subvolumes[i]->name, node)) {
+ conf->decommissioned_bricks[i] =
+ conf->subvolumes[i];
+ conf->decommission_subvols_cnt++;
+ gf_log (this->name, GF_LOG_INFO,
+ "decommissioning subvolume %s",
+ conf->subvolumes[i]->name);
+ break;
+ }
+ }
+ if (i == conf->subvolume_cnt) {
+ /* Wrong node given. */
+ goto out;
+ }
+ node = strtok_r (NULL, ",", &tmpstr);
+ }
+
+ ret = 0;
+ conf->decommission_in_progress = 1;
+out:
+ GF_FREE (dup_brick);
+
+ return ret;
+}
+
+
+int
+dht_decommissioned_remove (xlator_t *this, dht_conf_t *conf)
+{
+ int i = 0;
+ int ret = -1;
+
+ if (!conf)
+ goto out;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->decommissioned_bricks[i]) {
+ conf->decommissioned_bricks[i] = NULL;
+ conf->decommission_subvols_cnt--;
+ }
+ }
+
+ ret = 0;
+out:
+
+ return ret;
+}
+void
+dht_init_regex (xlator_t *this, dict_t *odict, char *name,
+ regex_t *re, gf_boolean_t *re_valid)
+{
+ char *temp_str;
+
+ if (dict_get_str (odict, name, &temp_str) != 0) {
+ if (strcmp(name,"rsync-hash-regex")) {
+ return;
+ }
+ temp_str = "^\\.(.+)\\.[^.]+$";
+ }
+
+ if (*re_valid) {
+ regfree(re);
+ *re_valid = _gf_false;
+ }
+
+ if (!strcmp(temp_str,"none")) {
+ return;
+ }
+
+ if (regcomp(re,temp_str,REG_EXTENDED) == 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "using regex %s = %s", name, temp_str);
+ *re_valid = _gf_true;
+ }
+ else {
+ gf_log (this->name, GF_LOG_WARNING,
+ "compiling regex %s failed", temp_str);
+ }
+}
+
+int
+dht_reconfigure (xlator_t *this, dict_t *options)
+{
+ dht_conf_t *conf = NULL;
+ char *temp_str = NULL;
+ gf_boolean_t search_unhashed;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", options, out);
+
+ conf = this->private;
+ if (!conf)
+ return 0;
+
+ if (dict_get_str (options, "lookup-unhashed", &temp_str) == 0) {
+ /* If option is not "auto", other options _should_ be boolean*/
+ if (strcasecmp (temp_str, "auto")) {
+ if (!gf_string2boolean (temp_str, &search_unhashed)) {
+ gf_log(this->name, GF_LOG_DEBUG, "Reconfigure:"
+ " lookup-unhashed reconfigured (%s)",
+ temp_str);
+ conf->search_unhashed = search_unhashed;
+ } else {
+ gf_log(this->name, GF_LOG_ERROR, "Reconfigure:"
+ " lookup-unhashed should be boolean,"
+ " not (%s), defaulting to (%d)",
+ temp_str, conf->search_unhashed);
+ //return -1;
+ ret = -1;
+ goto out;
+ }
+ } else {
+ gf_log(this->name, GF_LOG_DEBUG, "Reconfigure:"
+ " lookup-unhashed reconfigured auto ");
+ conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO;
+ }
+ }
+
+ GF_OPTION_RECONF ("min-free-disk", conf->min_free_disk, options,
+ percent_or_size, out);
+ /* option can be any one of percent or bytes */
+ conf->disk_unit = 0;
+ if (conf->min_free_disk < 100.0)
+ conf->disk_unit = 'p';
+
+ GF_OPTION_RECONF ("min-free-inodes", conf->min_free_inodes, options,
+ percent, out);
+
+ GF_OPTION_RECONF ("directory-layout-spread", conf->dir_spread_cnt,
+ options, uint32, out);
+
+ GF_OPTION_RECONF ("readdir-optimize", conf->readdir_optimize, options,
+ bool, out);
+ if (conf->defrag) {
+ GF_OPTION_RECONF ("rebalance-stats", conf->defrag->stats,
+ options, bool, out);
+ }
+
+ if (dict_get_str (options, "decommissioned-bricks", &temp_str) == 0) {
+ ret = dht_parse_decommissioned_bricks (this, conf, temp_str);
+ if (ret == -1)
+ goto out;
+ } else {
+ ret = dht_decommissioned_remove (this, conf);
+ if (ret == -1)
+ goto out;
+ }
+
+ dht_init_regex (this, options, "rsync-hash-regex",
+ &conf->rsync_regex, &conf->rsync_regex_valid);
+ dht_init_regex (this, options, "extra-hash-regex",
+ &conf->extra_regex, &conf->extra_regex_valid);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+gf_defrag_pattern_list_fill (xlator_t *this, gf_defrag_info_t *defrag, char *data)
+{
+ int ret = -1;
+ char *tmp_str = NULL;
+ char *tmp_str1 = NULL;
+ char *dup_str = NULL;
+ char *num = NULL;
+ char *pattern_str = NULL;
+ char *pattern = NULL;
+ gf_defrag_pattern_list_t *temp_list = NULL;
+ gf_defrag_pattern_list_t *pattern_list = NULL;
+
+ if (!this || !defrag || !data)
+ goto out;
+
+ /* Get the pattern for pattern list. "pattern:<optional-size>"
+ * eg: *avi, *pdf:10MB, *:1TB
+ */
+ pattern_str = strtok_r (data, ",", &tmp_str);
+ while (pattern_str) {
+ dup_str = gf_strdup (pattern_str);
+ pattern_list = GF_CALLOC (1, sizeof (gf_defrag_pattern_list_t),
+ 1);
+ if (!pattern_list) {
+ goto out;
+ }
+ pattern = strtok_r (dup_str, ":", &tmp_str1);
+ num = strtok_r (NULL, ":", &tmp_str1);
+ if (!pattern)
+ goto out;
+ if (!num) {
+ if (gf_string2bytesize(pattern, &pattern_list->size)
+ == 0) {
+ pattern = "*";
+ }
+ } else if (gf_string2bytesize (num, &pattern_list->size) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "invalid number format \"%s\"", num);
+ goto out;
+ }
+ memcpy (pattern_list->path_pattern, pattern, strlen (dup_str));
+
+ if (!defrag->defrag_pattern)
+ temp_list = NULL;
+ else
+ temp_list = defrag->defrag_pattern;
+
+ pattern_list->next = temp_list;
+
+ defrag->defrag_pattern = pattern_list;
+ pattern_list = NULL;
+
+ GF_FREE (dup_str);
+ dup_str = NULL;
+
+ pattern_str = strtok_r (NULL, ",", &tmp_str);
+ }
+
+ ret = 0;
+out:
+ if (ret)
+ GF_FREE (pattern_list);
+ GF_FREE (dup_str);
+
+ return ret;
+}
+
+int
+dht_init (xlator_t *this)
+{
+ dht_conf_t *conf = NULL;
+ char *temp_str = NULL;
+ int ret = -1;
+ int i = 0;
+ gf_defrag_info_t *defrag = NULL;
+ int cmd = 0;
+ char *node_uuid = NULL;
+
+
+ GF_VALIDATE_OR_GOTO ("dht", this, err);
+
+ if (!this->children) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Distribute needs more than one subvolume");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile");
+ }
+
+ conf = GF_CALLOC (1, sizeof (*conf), gf_dht_mt_dht_conf_t);
+ if (!conf) {
+ goto err;
+ }
+
+ ret = dict_get_int32 (this->options, "rebalance-cmd", &cmd);
+
+ if (cmd) {
+ defrag = GF_CALLOC (1, sizeof (gf_defrag_info_t),
+ gf_defrag_info_mt);
+
+ GF_VALIDATE_OR_GOTO (this->name, defrag, err);
+
+ LOCK_INIT (&defrag->lock);
+
+ defrag->is_exiting = 0;
+
+ conf->defrag = defrag;
+
+ ret = dict_get_str (this->options, "node-uuid", &node_uuid);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "node-uuid not "
+ "specified");
+ goto err;
+ }
+
+ if (uuid_parse (node_uuid, defrag->node_uuid)) {
+ gf_log (this->name, GF_LOG_ERROR, "Cannot parse "
+ "glusterd node uuid");
+ goto err;
+ }
+
+ defrag->cmd = cmd;
+
+ defrag->stats = _gf_false;
+ }
+
+ conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_ON;
+ if (dict_get_str (this->options, "lookup-unhashed", &temp_str) == 0) {
+ /* If option is not "auto", other options _should_ be boolean */
+ if (strcasecmp (temp_str, "auto"))
+ gf_string2boolean (temp_str, &conf->search_unhashed);
+ else
+ conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO;
+ }
+
+ GF_OPTION_INIT ("unhashed-sticky-bit", conf->unhashed_sticky_bit, bool,
+ err);
+
+ GF_OPTION_INIT ("use-readdirp", conf->use_readdirp, bool, err);
+
+ GF_OPTION_INIT ("min-free-disk", conf->min_free_disk, percent_or_size,
+ err);
+
+ GF_OPTION_INIT ("min-free-inodes", conf->min_free_inodes, percent,
+ err);
+
+ conf->dir_spread_cnt = conf->subvolume_cnt;
+ GF_OPTION_INIT ("directory-layout-spread", conf->dir_spread_cnt,
+ uint32, err);
+
+ GF_OPTION_INIT ("assert-no-child-down", conf->assert_no_child_down,
+ bool, err);
+
+ GF_OPTION_INIT ("readdir-optimize", conf->readdir_optimize, bool, err);
+
+ if (defrag) {
+ GF_OPTION_INIT ("rebalance-stats", defrag->stats, bool, err);
+ if (dict_get_str (this->options, "rebalance-filter", &temp_str)
+ == 0) {
+ if (gf_defrag_pattern_list_fill (this, defrag, temp_str)
+ == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "Cannot parse"
+ " rebalance-filter (%s)", temp_str);
+ goto err;
+ }
+ }
+ }
+
+ /* option can be any one of percent or bytes */
+ conf->disk_unit = 0;
+ if (conf->min_free_disk < 100)
+ conf->disk_unit = 'p';
+
+ ret = dht_init_subvolumes (this, conf);
+ if (ret == -1) {
+ goto err;
+ }
+
+ if (dict_get_str (this->options, "decommissioned-bricks", &temp_str) == 0) {
+ ret = dht_parse_decommissioned_bricks (this, conf, temp_str);
+ if (ret == -1)
+ goto err;
+ }
+
+ dht_init_regex (this, this->options, "rsync-hash-regex",
+ &conf->rsync_regex, &conf->rsync_regex_valid);
+ dht_init_regex (this, this->options, "extra-hash-regex",
+ &conf->extra_regex, &conf->extra_regex_valid);
+
+ ret = dht_layouts_init (this, conf);
+ if (ret == -1) {
+ goto err;
+ }
+
+ LOCK_INIT (&conf->subvolume_lock);
+ LOCK_INIT (&conf->layout_lock);
+
+ conf->gen = 1;
+
+ this->local_pool = mem_pool_new (dht_local_t, 512);
+ if (!this->local_pool) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ goto err;
+ }
+
+ GF_OPTION_INIT ("xattr-name", conf->xattr_name, str, err);
+ gf_asprintf (&conf->link_xattr_name, "%s."DHT_LINKFILE_STR,
+ conf->xattr_name);
+ gf_asprintf (&conf->wild_xattr_name, "%s*", conf->xattr_name);
+ if (!conf->link_xattr_name || !conf->wild_xattr_name) {
+ goto err;
+ }
+
+ this->private = conf;
+
+ return 0;
+
+err:
+ if (conf) {
+ if (conf->file_layouts) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ GF_FREE (conf->file_layouts[i]);
+ }
+ GF_FREE (conf->file_layouts);
+ }
+
+ GF_FREE (conf->subvolumes);
+
+ GF_FREE (conf->subvolume_status);
+
+ GF_FREE (conf->du_stats);
+
+ GF_FREE (conf->defrag);
+
+ GF_FREE (conf->xattr_name);
+ GF_FREE (conf->link_xattr_name);
+ GF_FREE (conf->wild_xattr_name);
+
+ GF_FREE (conf);
+ }
+
+ return -1;
+}
+
+
+struct volume_options options[] = {
+ { .key = {"lookup-unhashed"},
+ .value = {"auto", "yes", "no", "enable", "disable", "1", "0",
+ "on", "off"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "on",
+ .description = "This option if set to ON, does a lookup through "
+ "all the sub-volumes, in case a lookup didn't return any result "
+ "from the hash subvolume. If set to OFF, it does not do a lookup "
+ "on the remaining subvolumes."
+ },
+ { .key = {"min-free-disk"},
+ .type = GF_OPTION_TYPE_PERCENT_OR_SIZET,
+ .default_value = "10%",
+ .description = "Percentage/Size of disk space, after which the "
+ "process starts balancing out the cluster, and logs will appear "
+ "in log files",
+ },
+ { .key = {"min-free-inodes"},
+ .type = GF_OPTION_TYPE_PERCENT,
+ .default_value = "5%",
+ .description = "after system has only N% of inodes, warnings "
+ "starts to appear in log files",
+ },
+ { .key = {"unhashed-sticky-bit"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ },
+ { .key = {"use-readdirp"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "This option if set to ON, forces the use of "
+ "readdirp, and hence also displays the stats of the files."
+ },
+ { .key = {"assert-no-child-down"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "This option if set to ON, in the event of "
+ "CHILD_DOWN, will call exit."
+ },
+ { .key = {"directory-layout-spread"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .validate = GF_OPT_VALIDATE_MIN,
+ .description = "Specifies the directory layout spread. Takes number "
+ "of subvolumes as default value."
+ },
+ { .key = {"decommissioned-bricks"},
+ .type = GF_OPTION_TYPE_ANY,
+ .description = "This option if set to ON, decommissions "
+ "the brick, so that no new data is allowed to be created "
+ "on that brick."
+ },
+ { .key = {"rebalance-cmd"},
+ .type = GF_OPTION_TYPE_INT,
+ },
+ { .key = {"node-uuid"},
+ .type = GF_OPTION_TYPE_STR,
+ },
+ { .key = {"rebalance-stats"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "This option if set to ON displays and logs the "
+ " time taken for migration of each file, during the rebalance "
+ "process. If set to OFF, the rebalance logs will only display the "
+ "time spent in each directory."
+ },
+ { .key = {"readdir-optimize"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "This option if set to ON enables the optimization "
+ "that allows DHT to requests non-first subvolumes to filter out "
+ "directory entries."
+ },
+ { .key = {"rsync-hash-regex"},
+ .type = GF_OPTION_TYPE_STR,
+ /* Setting a default here doesn't work. See dht_init_regex. */
+ .description = "Regular expression for stripping temporary-file "
+ "suffix and prefix used by rsync, to prevent relocation when the "
+ "file is renamed."
+ },
+ { .key = {"extra-hash-regex"},
+ .type = GF_OPTION_TYPE_STR,
+ /* Setting a default here doesn't work. See dht_init_regex. */
+ .description = "Regular expression for stripping temporary-file "
+ "suffix and prefix used by an application, to prevent relocation when "
+ "the file is renamed."
+ },
+ { .key = {"rebalance-filter"},
+ .type = GF_OPTION_TYPE_STR,
+ },
+
+ { .key = {"xattr-name"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "trusted.glusterfs.dht",
+ .description = "Base for extended attributes used by this "
+ "translator instance, to avoid conflicts with others above or "
+ "below it."
+ },
+
+ /* NUFA option */
+ { .key = {"local-volume-name"},
+ .type = GF_OPTION_TYPE_XLATOR
+ },
+
+ /* switch option */
+ { .key = {"pattern.switch.case"},
+ .type = GF_OPTION_TYPE_ANY
+ },
+
+ { .key = {NULL} },
+};
diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c
index 8be573f51..fc0ca2f77 100644
--- a/xlators/cluster/dht/src/dht.c
+++ b/xlators/cluster/dht/src/dht.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -23,434 +14,15 @@
#include "config.h"
#endif
-/* TODO: add NS locking */
-
#include "statedump.h"
#include "dht-common.h"
-/* TODO:
- - use volumename in xattr instead of "dht"
- - use NS locks
- - handle all cases in self heal layout reconstruction
- - complete linkfile selfheal
-*/
-struct volume_options options[];
-
-void
-dht_layout_dump (dht_layout_t *layout, const char *prefix)
-{
-
- char key[GF_DUMP_MAX_BUF_LEN];
- int i = 0;
-
- GF_VALIDATE_OR_GOTO ("dht", layout, out);
- GF_VALIDATE_OR_GOTO ("dht", prefix, out);
-
- gf_proc_dump_build_key(key, prefix, "cnt");
- gf_proc_dump_write(key, "%d", layout->cnt);
- gf_proc_dump_build_key(key, prefix, "preset");
- gf_proc_dump_write(key, "%d", layout->preset);
- gf_proc_dump_build_key(key, prefix, "gen");
- gf_proc_dump_write(key, "%d", layout->gen);
- gf_proc_dump_build_key(key, prefix, "type");
- gf_proc_dump_write(key, "%d", layout->type);
-
- for (i = 0; i < layout->cnt; i++) {
- gf_proc_dump_build_key(key, prefix,"list[%d].err", i);
- gf_proc_dump_write(key, "%d", layout->list[i].err);
- gf_proc_dump_build_key(key, prefix,"list[%d].start", i);
- gf_proc_dump_write(key, "%u", layout->list[i].start);
- gf_proc_dump_build_key(key, prefix,"list[%d].stop", i);
- gf_proc_dump_write(key, "%u", layout->list[i].stop);
- if (layout->list[i].xlator) {
- gf_proc_dump_build_key(key, prefix,
- "list[%d].xlator.type", i);
- gf_proc_dump_write(key, "%s",
- layout->list[i].xlator->type);
- gf_proc_dump_build_key(key, prefix,
- "list[%d].xlator.name", i);
- gf_proc_dump_write(key, "%s",
- layout->list[i].xlator->name);
- }
- }
-
-out:
- return;
-}
-
-
-int32_t
-dht_priv_dump (xlator_t *this)
-{
- char key_prefix[GF_DUMP_MAX_BUF_LEN];
- char key[GF_DUMP_MAX_BUF_LEN];
- int i = 0;
- dht_conf_t *conf = NULL;
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO ("dht", this, out);
-
- conf = this->private;
-
- if (!conf)
- return -1;
-
- ret = TRY_LOCK(&conf->subvolume_lock);
-
- if (ret != 0) {
- gf_log("", GF_LOG_WARNING, "Unable to lock dht subvolume %s",
- this->name);
- return ret;
- }
-
- gf_proc_dump_add_section("xlator.cluster.dht.%s.priv", this->name);
- gf_proc_dump_build_key(key_prefix,"xlator.cluster.dht","%s.priv",
- this->name);
- gf_proc_dump_write("subvol_cnt","%d", conf->subvolume_cnt);
- for (i = 0; i < conf->subvolume_cnt; i++) {
- sprintf (key, "subvolumes[%d]", i);
- gf_proc_dump_write(key, "%s.%s", conf->subvolumes[i]->type,
- conf->subvolumes[i]->name);
- if (conf->file_layouts && conf->file_layouts[i]){
- sprintf (key, "file_layouts[%d]", i);
- dht_layout_dump(conf->file_layouts[i], key);
- }
- if (conf->dir_layouts && conf->dir_layouts[i]) {
- sprintf (key, "dir_layouts[%d]", i);
- dht_layout_dump(conf->dir_layouts[i], key);
- }
- if (conf->subvolume_status) {
-
- sprintf (key, "subvolume_status[%d]", i);
- gf_proc_dump_write(key, "%d",
- (int)conf->subvolume_status[i]);
- }
-
- }
-
- gf_proc_dump_write("search_unhashed", "%d", conf->search_unhashed);
- gf_proc_dump_write("gen", "%d", conf->gen);
- gf_proc_dump_write("min_free_disk", "%lu", conf->min_free_disk);
- gf_proc_dump_write("min_free_inodes", "%lu", conf->min_free_inodes);
- gf_proc_dump_write("disk_unit", "%c", conf->disk_unit);
- gf_proc_dump_write("refresh_interval", "%d", conf->refresh_interval);
- gf_proc_dump_write("unhashed_sticky_bit", "%d", conf->unhashed_sticky_bit);
- if (conf ->du_stats) {
- gf_proc_dump_write("du_stats.avail_percent", "%lf",
- conf->du_stats->avail_percent);
- gf_proc_dump_write("du_stats.avail_space", "%lu",
- conf->du_stats->avail_space);
- gf_proc_dump_write("du_stats.avail_inodes", "%lf",
- conf->du_stats->avail_inodes);
- gf_proc_dump_write("du_stats.log", "%lu", conf->du_stats->log);
- }
- gf_proc_dump_write("last_stat_fetch", "%s", ctime(&conf->last_stat_fetch.tv_sec));
-
- UNLOCK(&conf->subvolume_lock);
-
-out:
- return ret;
-}
-
-int32_t
-dht_inodectx_dump (xlator_t *this, inode_t *inode)
-{
- int ret = -1;
- dht_layout_t *layout = NULL;
- uint64_t tmp_layout = 0;
-
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", inode, out);
-
- ret = inode_ctx_get (inode, this, &tmp_layout);
-
- if (ret != 0)
- return ret;
-
- layout = (dht_layout_t *)(long)tmp_layout;
-
- if (!layout)
- return -1;
-
- gf_proc_dump_add_section("xlator.cluster.dht.%s.inode", this->name);
- dht_layout_dump(layout, "layout");
-
-out:
- return ret;
-}
-
-int
-notify (xlator_t *this, int event, void *data, ...)
-{
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO ("dht", this, out);
-
- ret = dht_notify (this, event, data);
-
-out:
- return ret;
-}
-
-void
-fini (xlator_t *this)
-{
- int i = 0;
- dht_conf_t *conf = NULL;
-
- GF_VALIDATE_OR_GOTO ("dht", this, out);
-
- conf = this->private;
- this->private = NULL;
- if (conf) {
- if (conf->file_layouts) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- GF_FREE (conf->file_layouts[i]);
- }
- GF_FREE (conf->file_layouts);
- }
-
- if (conf->subvolumes)
- GF_FREE (conf->subvolumes);
-
- if (conf->subvolume_status)
- GF_FREE (conf->subvolume_status);
-
- GF_FREE (conf);
- }
-out:
- return;
-}
-
-int32_t
-mem_acct_init (xlator_t *this)
-{
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO ("dht", this, out);
-
- ret = xlator_mem_acct_init (this, gf_dht_mt_end + 1);
-
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
- return ret;
- }
-out:
- return ret;
-}
-
-
-int
-dht_parse_decommissioned_bricks (xlator_t *this, dht_conf_t *conf,
- const char *bricks)
-{
- int i = 0;
- int ret = -1;
- char *tmpstr = NULL;
- char *dup_brick = NULL;
- char *node = NULL;
-
- if (!conf || !bricks)
- goto out;
-
- dup_brick = gf_strdup (bricks);
- node = strtok_r (dup_brick, ",", &tmpstr);
- while (node) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (!strcmp (conf->subvolumes[i]->name, node)) {
- conf->decommissioned_bricks[i] =
- conf->subvolumes[i];
- gf_log (this->name, GF_LOG_INFO,
- "decommissioning subvolume %s",
- conf->subvolumes[i]->name);
- break;
- }
- }
- if (i == conf->subvolume_cnt) {
- /* Wrong node given. */
- goto out;
- }
- node = strtok_r (NULL, ",", &tmpstr);
- }
-
- ret = 0;
-out:
- if (dup_brick)
- GF_FREE (dup_brick);
-
- return ret;
-}
-
-int
-reconfigure (xlator_t *this, dict_t *options)
-{
- dht_conf_t *conf = NULL;
- char *temp_str = NULL;
- gf_boolean_t search_unhashed;
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", options, out);
-
- conf = this->private;
- if (!conf)
- return 0;
-
- if (dict_get_str (options, "lookup-unhashed", &temp_str) == 0) {
- /* If option is not "auto", other options _should_ be boolean*/
- if (strcasecmp (temp_str, "auto")) {
- if (!gf_string2boolean (temp_str, &search_unhashed)) {
- gf_log(this->name, GF_LOG_DEBUG, "Reconfigure:"
- " lookup-unhashed reconfigured (%s)",
- temp_str);
- conf->search_unhashed = search_unhashed;
- } else {
- gf_log(this->name, GF_LOG_ERROR, "Reconfigure:"
- " lookup-unhashed should be boolean,"
- " not (%s), defaulting to (%d)",
- temp_str, conf->search_unhashed);
- //return -1;
- ret = -1;
- goto out;
- }
- } else {
- gf_log(this->name, GF_LOG_DEBUG, "Reconfigure:"
- " lookup-unhashed reconfigured auto ");
- conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO;
- }
- }
-
- GF_OPTION_RECONF ("min-free-disk", conf->min_free_disk, options,
- percent_or_size, out);
- GF_OPTION_RECONF ("min-free-inodes", conf->min_free_inodes, options,
- percent, out);
- GF_OPTION_RECONF ("directory-layout-spread", conf->dir_spread_cnt,
- options, uint32, out);
-
- if (dict_get_str (options, "decommissioned-bricks", &temp_str) == 0) {
- ret = dht_parse_decommissioned_bricks (this, conf, temp_str);
- if (ret == -1)
- goto out;
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-
-int
-init (xlator_t *this)
-{
- dht_conf_t *conf = NULL;
- char *temp_str = NULL;
- int ret = -1;
- int i = 0;
-
- GF_VALIDATE_OR_GOTO ("dht", this, err);
-
- if (!this->children) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "Distribute needs more than one subvolume");
- return -1;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile");
- }
-
- conf = GF_CALLOC (1, sizeof (*conf), gf_dht_mt_dht_conf_t);
- if (!conf) {
- goto err;
- }
-
- conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_ON;
- if (dict_get_str (this->options, "lookup-unhashed", &temp_str) == 0) {
- /* If option is not "auto", other options _should_ be boolean */
- if (strcasecmp (temp_str, "auto"))
- gf_string2boolean (temp_str, &conf->search_unhashed);
- else
- conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO;
- }
-
- GF_OPTION_INIT ("unhashed-sticky-bit", conf->unhashed_sticky_bit, bool,
- err);
-
- GF_OPTION_INIT ("use-readdirp", conf->use_readdirp, bool, err);
-
- GF_OPTION_INIT ("min-free-disk", conf->min_free_disk, percent_or_size,
- err);
-
- GF_OPTION_INIT ("min-free-inodes", conf->min_free_inodes, percent,
- err);
-
- conf->dir_spread_cnt = conf->subvolume_cnt;
- GF_OPTION_INIT ("directory-layout-spread", conf->dir_spread_cnt,
- uint32, err);
-
- GF_OPTION_INIT ("assert-no-child-down", conf->assert_no_child_down,
- bool, err);
-
- ret = dht_init_subvolumes (this, conf);
- if (ret == -1) {
- goto err;
- }
-
- if (dict_get_str (this->options, "decommissioned-bricks", &temp_str) == 0) {
- ret = dht_parse_decommissioned_bricks (this, conf, temp_str);
- if (ret == -1)
- goto err;
- }
-
- ret = dht_layouts_init (this, conf);
- if (ret == -1) {
- goto err;
- }
-
- LOCK_INIT (&conf->subvolume_lock);
- LOCK_INIT (&conf->layout_lock);
-
- conf->gen = 1;
-
- /* Create 'syncop' environment */
- conf->env = syncenv_new (0);
- if (!conf->env) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to create sync environment %s",
- strerror (errno));
- goto err;
- }
-
- this->private = conf;
-
- return 0;
-
-err:
- if (conf) {
- if (conf->file_layouts) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- GF_FREE (conf->file_layouts[i]);
- }
- GF_FREE (conf->file_layouts);
- }
-
- if (conf->subvolumes)
- GF_FREE (conf->subvolumes);
-
- if (conf->subvolume_status)
- GF_FREE (conf->subvolume_status);
-
- if (conf->du_stats)
- GF_FREE (conf->du_stats);
-
- GF_FREE (conf);
- }
-
- return -1;
-}
-
+class_methods_t class_methods = {
+ .init = dht_init,
+ .fini = dht_fini,
+ .reconfigure = dht_reconfigure,
+ .notify = dht_notify
+};
struct xlator_fops fops = {
.lookup = dht_lookup,
@@ -478,6 +50,7 @@ struct xlator_fops fops = {
.access = dht_access,
.readlink = dht_readlink,
.getxattr = dht_getxattr,
+ .fgetxattr = dht_fgetxattr,
.readv = dht_readv,
.flush = dht_flush,
.fsync = dht_fsync,
@@ -486,6 +59,7 @@ struct xlator_fops fops = {
.lk = dht_lk,
/* Inode write operations */
+ .fremovexattr = dht_fremovexattr,
.removexattr = dht_removexattr,
.setxattr = dht_setxattr,
.fsetxattr = dht_fsetxattr,
@@ -496,6 +70,9 @@ struct xlator_fops fops = {
.fxattrop = dht_fxattrop,
.setattr = dht_setattr,
.fsetattr = dht_fsetattr,
+ .fallocate = dht_fallocate,
+ .discard = dht_discard,
+ .zerofill = dht_zerofill,
};
struct xlator_dumpops dumpops = {
@@ -509,44 +86,4 @@ struct xlator_cbks cbks = {
// .releasedir = dht_releasedir,
.forget = dht_forget
};
-
-
-struct volume_options options[] = {
- { .key = {"lookup-unhashed"},
- .value = {"auto", "yes", "no", "enable", "disable", "1", "0",
- "on", "off"},
- .type = GF_OPTION_TYPE_STR,
- .default_value = "on",
- },
- { .key = {"min-free-disk"},
- .type = GF_OPTION_TYPE_PERCENT_OR_SIZET,
- .default_value = "10%",
- .description = "Percentage/Size of disk space that must be "
- "kept free."
- },
- { .key = {"min-free-inodes"},
- .type = GF_OPTION_TYPE_PERCENT,
- .default_value = "5%",
- .description = "Percentage inodes that must be "
- "kept free."
- },
- { .key = {"unhashed-sticky-bit"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- },
- { .key = {"use-readdirp"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "on",
- },
- { .key = {"assert-no-child-down"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- },
- { .key = {"directory-layout-spread"},
- .type = GF_OPTION_TYPE_INT,
- },
- { .key = {"decommissioned-bricks"},
- .type = GF_OPTION_TYPE_ANY,
- },
- { .key = {NULL} },
-};
+;
diff --git a/xlators/cluster/dht/src/nufa.c b/xlators/cluster/dht/src/nufa.c
index 2f196951a..e934acdf0 100644
--- a/xlators/cluster/dht/src/nufa.c
+++ b/xlators/cluster/dht/src/nufa.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -27,6 +18,8 @@
/* TODO: all 'TODO's in dht.c holds good */
+extern struct volume_options options[];
+
int
nufa_local_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno,
@@ -44,7 +37,6 @@ nufa_local_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int call_cnt = 0;
int ret = 0;
-
conf = this->private;
prev = cookie;
@@ -62,7 +54,8 @@ nufa_local_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret == -1)
goto out;
- is_linkfile = check_is_linkfile (inode, stbuf, xattr);
+ is_linkfile = check_is_linkfile (inode, stbuf, xattr,
+ conf->link_xattr_name);
is_dir = check_is_dir (inode, stbuf, xattr);
if (!is_dir && !is_linkfile) {
@@ -141,7 +134,7 @@ out:
err:
DHT_STACK_UNWIND (lookup, frame, op_ret, op_errno,
- inode, stbuf, xattr, NULL);
+ inode, stbuf, xattr, postparent);
return 0;
}
@@ -211,7 +204,7 @@ nufa_lookup (call_frame_t *frame, xlator_t *this,
* revalidates directly go to the cached-subvolume.
*/
ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht", 4 * 4);
+ conf->xattr_name, 4 * 4);
if (ret < 0) {
gf_log (this->name, GF_LOG_ERROR,
"Failed to set dict value.");
@@ -232,7 +225,7 @@ nufa_lookup (call_frame_t *frame, xlator_t *this,
} else {
do_fresh_lookup:
ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht", 4 * 4);
+ conf->xattr_name, 4 * 4);
if (ret < 0) {
gf_log (this->name, GF_LOG_ERROR,
"Failed to set dict value.");
@@ -241,7 +234,7 @@ nufa_lookup (call_frame_t *frame, xlator_t *this,
}
ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht.linkto", 256);
+ conf->link_xattr_name, 256);
if (ret < 0) {
gf_log (this->name, GF_LOG_ERROR,
"Failed to set dict value.");
@@ -260,7 +253,8 @@ nufa_lookup (call_frame_t *frame, xlator_t *this,
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL);
return 0;
}
@@ -269,7 +263,7 @@ nufa_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int op_ret, int op_errno,
inode_t *inode, struct iatt *stbuf,
struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
dht_local_t *local = NULL;
@@ -280,21 +274,21 @@ nufa_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
STACK_WIND (frame, dht_create_cbk,
local->cached_subvol, local->cached_subvol->fops->create,
- &local->loc, local->flags, local->mode, local->fd,
- local->params);
+ &local->loc, local->flags, local->mode, local->umask,
+ local->fd, local->params);
return 0;
err:
DHT_STACK_UNWIND (create, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL, NULL);
return 0;
}
int
nufa_create (call_frame_t *frame, xlator_t *this,
loc_t *loc, int32_t flags, mode_t mode,
- fd_t *fd, dict_t *params)
+ mode_t umask, fd_t *fd, dict_t *params)
{
dht_local_t *local = NULL;
dht_conf_t *conf = NULL;
@@ -329,7 +323,8 @@ nufa_create (call_frame_t *frame, xlator_t *this,
if (dht_is_subvol_filled (this, (xlator_t *)conf->private)) {
avail_subvol =
dht_free_disk_available_subvol (this,
- (xlator_t *)conf->private);
+ (xlator_t *)conf->private,
+ local);
}
if (subvol != avail_subvol) {
@@ -337,11 +332,10 @@ nufa_create (call_frame_t *frame, xlator_t *this,
local->params = dict_ref (params);
local->mode = mode;
local->flags = flags;
-
+ local->umask = umask;
local->cached_subvol = avail_subvol;
- dht_linkfile_create (frame,
- nufa_create_linkfile_create_cbk,
- avail_subvol, subvol, loc);
+ dht_linkfile_create (frame, nufa_create_linkfile_create_cbk,
+ this, avail_subvol, subvol, loc);
return 0;
}
@@ -350,14 +344,14 @@ nufa_create (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, dht_create_cbk,
subvol, subvol->fops->create,
- loc, flags, mode, fd, params);
+ loc, flags, mode, umask, fd, params);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
DHT_STACK_UNWIND (create, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -366,34 +360,39 @@ int
nufa_mknod_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode,
struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
dht_local_t *local = NULL;
local = frame->local;
+ if (!local || !local->cached_subvol) {
+ op_errno = EINVAL;
+ op_ret = -1;
+ goto err;
+ }
if (op_ret >= 0) {
- STACK_WIND (frame, dht_newfile_cbk,
- local->cached_subvol,
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk,
+ (void *)local->cached_subvol, local->cached_subvol,
local->cached_subvol->fops->mknod,
&local->loc, local->mode, local->rdev,
- local->params);
+ local->umask, local->params);
return 0;
}
-
+err:
WIPE (postparent);
WIPE (preparent);
DHT_STACK_UNWIND (link, frame, op_ret, op_errno,
- inode, stbuf, preparent, postparent);
+ inode, stbuf, preparent, postparent, xdata);
return 0;
}
int
nufa_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t rdev, dict_t *params)
+ loc_t *loc, mode_t mode, dev_t rdev, mode_t umask, dict_t *params)
{
dht_local_t *local = NULL;
dht_conf_t *conf = NULL;
@@ -429,7 +428,8 @@ nufa_mknod (call_frame_t *frame, xlator_t *this,
if (dht_is_subvol_filled (this, (xlator_t *)conf->private)) {
avail_subvol =
dht_free_disk_available_subvol (this,
- (xlator_t *)conf->private);
+ (xlator_t *)conf->private,
+ local);
}
if (avail_subvol != subvol) {
@@ -437,10 +437,11 @@ nufa_mknod (call_frame_t *frame, xlator_t *this,
local->params = dict_ref (params);
local->mode = mode;
+ local->umask = umask;
local->rdev = rdev;
local->cached_subvol = avail_subvol;
- dht_linkfile_create (frame, nufa_mknod_linkfile_cbk,
+ dht_linkfile_create (frame, nufa_mknod_linkfile_cbk, this,
avail_subvol, subvol, loc);
return 0;
}
@@ -448,211 +449,185 @@ nufa_mknod (call_frame_t *frame, xlator_t *this,
gf_log (this->name, GF_LOG_TRACE,
"creating %s on %s", loc->path, subvol->name);
- STACK_WIND (frame, dht_newfile_cbk,
- subvol, subvol->fops->mknod,
- loc, mode, rdev, params);
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)subvol, subvol,
+ subvol->fops->mknod, loc, mode, rdev, umask,
+ params);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
DHT_STACK_UNWIND (mknod, frame, -1, op_errno,
- NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL);
return 0;
}
-int
-notify (xlator_t *this, int event, void *data, ...)
+gf_boolean_t
+same_first_part (char *str1, char term1, char *str2, char term2)
{
- int ret = -1;
-
- ret = dht_notify (this, event, data);
-
- return ret;
+ gf_boolean_t ended1;
+ gf_boolean_t ended2;
+
+ for (;;) {
+ ended1 = ((*str1 == '\0') || (*str1 == term1));
+ ended2 = ((*str2 == '\0') || (*str2 == term2));
+ if (ended1 && ended2) {
+ return _gf_true;
+ }
+ if (ended1 || ended2 || (*str1 != *str2)) {
+ return _gf_false;
+ }
+ ++str1;
+ ++str2;
+ }
}
-void
-fini (xlator_t *this)
-{
- int i = 0;
- dht_conf_t *conf = NULL;
+typedef struct nufa_args {
+ xlator_t *this;
+ char *volname;
+ gf_boolean_t addr_match;
+} nufa_args_t;
- conf = this->private;
+static void
+nufa_find_local_brick (xlator_t *xl, void *data)
+{
+ nufa_args_t *args = data;
+ xlator_t *this = args->this;
+ char *local_volname = args->volname;
+ gf_boolean_t addr_match = args->addr_match;
+ char *brick_host = NULL;
+ dht_conf_t *conf = this->private;
+ int ret = -1;
+
+ /*This means a local subvol was already found. We pick the first brick
+ * that is local*/
+ if (conf->private)
+ return;
+
+ if (strcmp (xl->name, local_volname) == 0) {
+ conf->private = xl;
+ gf_log (this->name, GF_LOG_INFO, "Using specified subvol %s",
+ local_volname);
+ return;
+ }
- if (conf) {
- if (conf->file_layouts) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- GF_FREE (conf->file_layouts[i]);
- }
- GF_FREE (conf->file_layouts);
- }
+ if (!addr_match)
+ return;
- if (conf->subvolumes)
- GF_FREE (conf->subvolumes);
+ ret = dict_get_str (xl->options, "remote-host", &brick_host);
+ if ((ret == 0) &&
+ (gf_is_same_address (local_volname, brick_host) ||
+ gf_is_local_addr (brick_host))) {
+ conf->private = xl;
+ gf_log (this->name, GF_LOG_INFO, "Using the first local "
+ "subvol %s", xl->name);
+ return;
+ }
- if (conf->subvolume_status)
- GF_FREE (conf->subvolume_status);
+}
- GF_FREE (conf);
- }
+static void
+nufa_to_dht (xlator_t *this)
+{
+ GF_ASSERT (this);
+ GF_ASSERT (this->fops);
- return;
+ this->fops->lookup = dht_lookup;
+ this->fops->create = dht_create;
+ this->fops->mknod = dht_mknod;
}
int
-init (xlator_t *this)
+nufa_find_local_subvol (xlator_t *this,
+ void (*fn) (xlator_t *each, void* data), void *data)
{
- dht_conf_t *conf = NULL;
- xlator_list_t *trav = NULL;
- data_t *data = NULL;
- char *local_volname = NULL;
- char *temp_str = NULL;
- int ret = -1;
- int i = 0;
- char my_hostname[256];
- uint32_t temp_free_disk = 0;
-
- if (!this->children) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "NUFA needs more than one subvolume");
+ int ret = -1;
+ dht_conf_t *conf = this->private;
+ xlator_list_t *trav = NULL;
+ xlator_t *parent = NULL;
+ xlator_t *candidate = NULL;
+
+ xlator_foreach_depth_first (this, fn, data);
+ if (!conf->private) {
+ gf_log (this->name, GF_LOG_ERROR, "Couldn't find a local "
+ "brick");
return -1;
}
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile");
- }
-
- conf = GF_CALLOC (1, sizeof (*conf),
- gf_dht_mt_dht_conf_t);
- if (!conf) {
- goto err;
- }
-
- conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_ON;
- if (dict_get_str (this->options, "lookup-unhashed", &temp_str) == 0) {
- /* If option is not "auto", other options _should_ be boolean */
- if (strcasecmp (temp_str, "auto"))
- gf_string2boolean (temp_str, &conf->search_unhashed);
- else
- conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO;
- }
+ candidate = conf->private;
+ trav = candidate->parents;
+ while (trav) {
- ret = dht_init_subvolumes (this, conf);
- if (ret == -1) {
- goto err;
- }
+ parent = trav->xlator;
+ if (strcmp (parent->type, "cluster/nufa") == 0) {
+ gf_log (this->name, GF_LOG_INFO, "Found local subvol, "
+ "%s", candidate->name);
+ ret = 0;
+ conf->private = candidate;
+ break;
+ }
- ret = dht_layouts_init (this, conf);
- if (ret == -1) {
- goto err;
+ candidate = parent;
+ trav = parent->parents;
}
- LOCK_INIT (&conf->subvolume_lock);
- LOCK_INIT (&conf->layout_lock);
+ return ret;
+}
- conf->gen = 1;
+int
+nufa_init (xlator_t *this)
+{
+ data_t *data = NULL;
+ char *local_volname = NULL;
+ int ret = -1;
+ char my_hostname[256];
+ gf_boolean_t addr_match = _gf_false;
+ nufa_args_t args = {0, };
- local_volname = "localhost";
- ret = gethostname (my_hostname, 256);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "could not find hostname (%s)",
- strerror (errno));
+ ret = dht_init(this);
+ if (ret) {
+ return ret;
}
- if (ret == 0)
- local_volname = my_hostname;
-
- data = dict_get (this->options, "local-volume-name");
- if (data) {
+ if ((data = dict_get (this->options, "local-volume-name"))) {
local_volname = data->data;
- }
- trav = this->children;
- while (trav) {
- if (strcmp (trav->xlator->name, local_volname) == 0)
- break;
- trav = trav->next;
- }
+ } else {
+ addr_match = _gf_true;
+ local_volname = "localhost";
+ ret = gethostname (my_hostname, 256);
+ if (ret == 0)
+ local_volname = my_hostname;
- if (!trav) {
- gf_log (this->name, GF_LOG_ERROR,
- "Could not find subvolume named '%s'. "
- "Please define volume with the name as the hostname "
- "or override it with 'option local-volume-name'",
- local_volname);
- goto err;
- }
- /* The volume specified exists */
- conf->private = trav->xlator;
-
- conf->min_free_disk = 10;
- conf->disk_unit = 'p';
-
- if (dict_get_str (this->options, "min-free-disk",
- &temp_str) == 0) {
- if (gf_string2percent (temp_str,
- &temp_free_disk) == 0) {
- if (temp_free_disk > 100) {
- gf_string2bytesize (temp_str,
- &conf->min_free_disk);
- conf->disk_unit = 'b';
- } else {
- conf->min_free_disk = (uint64_t)temp_free_disk;
- conf->disk_unit = 'p';
- }
- } else {
- gf_string2bytesize (temp_str,
- &conf->min_free_disk);
- conf->disk_unit = 'b';
- }
- }
+ else
+ gf_log (this->name, GF_LOG_WARNING,
+ "could not find hostname (%s)",
+ strerror (errno));
- conf->du_stats = GF_CALLOC (conf->subvolume_cnt, sizeof (dht_du_t),
- gf_dht_mt_dht_du_t);
- if (!conf->du_stats) {
- goto err;
}
- /* Create 'syncop' environment */
- conf->env = syncenv_new (0);
- if (!conf->env) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to create sync environment %s",
- strerror (errno));
- goto err;
+ args.this = this;
+ args.volname = local_volname;
+ args.addr_match = addr_match;
+ ret = nufa_find_local_subvol (this, nufa_find_local_brick, &args);
+ if (ret) {
+ gf_log (this->name, GF_LOG_INFO,
+ "Unable to find local subvolume, switching "
+ "to dht mode");
+ nufa_to_dht (this);
}
-
- this->private = conf;
-
return 0;
+}
-err:
- if (conf) {
- if (conf->file_layouts) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- GF_FREE (conf->file_layouts[i]);
- }
- GF_FREE (conf->file_layouts);
- }
-
- if (conf->subvolumes)
- GF_FREE (conf->subvolumes);
-
- if (conf->subvolume_status)
- GF_FREE (conf->subvolume_status);
-
- if (conf->du_stats)
- GF_FREE (conf->du_stats);
-
- GF_FREE (conf);
- }
- return -1;
-}
+class_methods_t class_methods = {
+ .init = nufa_init,
+ .fini = dht_fini,
+ .reconfigure = dht_reconfigure,
+ .notify = dht_notify
+};
struct xlator_fops fops = {
@@ -699,19 +674,3 @@ struct xlator_fops fops = {
struct xlator_cbks cbks = {
.forget = dht_forget
};
-
-
-struct volume_options options[] = {
- { .key = {"lookup-unhashed"},
- .value = {"auto", "yes", "no", "enable", "disable", "1", "0",
- "on", "off"},
- .type = GF_OPTION_TYPE_STR
- },
- { .key = {"local-volume-name"},
- .type = GF_OPTION_TYPE_XLATOR
- },
- { .key = {"min-free-disk"},
- .type = GF_OPTION_TYPE_PERCENT_OR_SIZET,
- },
- { .key = {NULL} },
-};
diff --git a/xlators/cluster/dht/src/switch.c b/xlators/cluster/dht/src/switch.c
index fd3f22ea0..2717ce975 100644
--- a/xlators/cluster/dht/src/switch.c
+++ b/xlators/cluster/dht/src/switch.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -31,6 +22,8 @@
#include <fnmatch.h>
#include <string.h>
+extern struct volume_options options[];
+
struct switch_sched_array {
xlator_t *xl;
int32_t eligible;
@@ -76,29 +69,37 @@ get_switch_matching_subvol (const char *path, dht_conf_t *conf,
struct switch_struct *cond = NULL;
struct switch_struct *trav = NULL;
char *pathname = NULL;
- int idx = 0;
+ int idx = 0;
+ xlator_t *subvol = NULL;
cond = conf->private;
+ subvol = hashed_subvol;
if (!cond)
- return hashed_subvol;
+ goto out;
- trav = cond;
pathname = gf_strdup (path);
+ if (!pathname)
+ goto out;
+
+ trav = cond;
while (trav) {
if (fnmatch (trav->path_pattern,
pathname, FNM_NOESCAPE) == 0) {
for (idx = 0; idx < trav->num_child; idx++) {
if (trav->array[idx].xl == hashed_subvol)
- return hashed_subvol;
+ goto out;
}
idx = trav->node_index++;
trav->node_index %= trav->num_child;
- return trav->array[idx].xl;
+ subvol = trav->array[idx].xl;
+ goto out;
}
trav = trav->next;
}
+out:
GF_FREE (pathname);
- return hashed_subvol;
+
+ return subvol;
}
@@ -136,7 +137,8 @@ switch_local_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret == -1)
goto out;
- is_linkfile = check_is_linkfile (inode, stbuf, xattr);
+ is_linkfile = check_is_linkfile (inode, stbuf, xattr,
+ conf->link_xattr_name);
is_dir = check_is_dir (inode, stbuf, xattr);
if (!is_dir && !is_linkfile) {
@@ -290,11 +292,11 @@ switch_lookup (call_frame_t *frame, xlator_t *this,
* attribute, revalidates directly go to the cached-subvolume.
*/
ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht", 4 * 4);
+ conf->xattr_name, 4 * 4);
if (ret < 0)
gf_log (this->name, GF_LOG_WARNING,
- "failed to set dict value for "
- "trusted.glusterfs.dht");
+ "failed to set dict value for %s",
+ conf->xattr_name);
for (i = 0; i < layout->cnt; i++) {
subvol = layout->list[i].xlator;
@@ -309,18 +311,18 @@ switch_lookup (call_frame_t *frame, xlator_t *this,
} else {
do_fresh_lookup:
ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht", 4 * 4);
+ conf->xattr_name, 4 * 4);
if (ret < 0)
gf_log (this->name, GF_LOG_WARNING,
- "failed to set dict value for "
- "trusted.glusterfs.dht");
+ "failed to set dict value for %s",
+ conf->xattr_name);
ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht.linkto", 256);
+ conf->link_xattr_name, 256);
if (ret < 0)
gf_log (this->name, GF_LOG_WARNING,
- "failed to set dict value for "
- "trusted.glusterfs.dht.linkto");
+ "failed to set dict value for %s",
+ conf->link_xattr_name);
if (!hashed_subvol) {
gf_log (this->name, GF_LOG_DEBUG,
@@ -366,7 +368,8 @@ switch_lookup (call_frame_t *frame, xlator_t *this,
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ DHT_STACK_UNWIND (lookup, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL);
return 0;
}
@@ -375,7 +378,7 @@ switch_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int op_ret, int op_errno,
inode_t *inode, struct iatt *stbuf,
struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
dht_local_t *local = NULL;
@@ -386,21 +389,21 @@ switch_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
STACK_WIND (frame, dht_create_cbk,
local->cached_subvol, local->cached_subvol->fops->create,
- &local->loc, local->flags, local->mode, local->fd,
- local->params);
+ &local->loc, local->flags, local->mode, local->umask,
+ local->fd, local->params);
return 0;
err:
DHT_STACK_UNWIND (create, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL, NULL);
return 0;
}
int
switch_create (call_frame_t *frame, xlator_t *this,
loc_t *loc, int32_t flags, mode_t mode,
- fd_t *fd, dict_t *params)
+ mode_t umask, fd_t *fd, dict_t *params)
{
dht_local_t *local = NULL;
dht_conf_t *conf = NULL;
@@ -434,18 +437,18 @@ switch_create (call_frame_t *frame, xlator_t *this,
avail_subvol = get_switch_matching_subvol (loc->path, conf, subvol);
if (dht_is_subvol_filled (this, avail_subvol)) {
avail_subvol =
- dht_free_disk_available_subvol (this, avail_subvol);
+ dht_free_disk_available_subvol (this, avail_subvol,
+ local);
}
if (subvol != avail_subvol) {
/* create a link file instead of actual file */
local->mode = mode;
local->flags = flags;
-
+ local->umask = umask;
local->cached_subvol = avail_subvol;
- dht_linkfile_create (frame,
- switch_create_linkfile_create_cbk,
- avail_subvol, subvol, loc);
+ dht_linkfile_create (frame, switch_create_linkfile_create_cbk,
+ this, avail_subvol, subvol, loc);
return 0;
}
@@ -454,14 +457,14 @@ switch_create (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, dht_create_cbk,
subvol, subvol->fops->create,
- loc, flags, mode, fd, params);
+ loc, flags, mode, umask, fd, params);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
DHT_STACK_UNWIND (create, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -470,31 +473,36 @@ int
switch_mknod_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode,
struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
dht_local_t *local = NULL;
local = frame->local;
+ if (!local || !local->cached_subvol) {
+ op_errno = EINVAL;
+ op_ret = -1;
+ goto err;
+ }
if (op_ret >= 0) {
- STACK_WIND (frame, dht_newfile_cbk,
- local->cached_subvol,
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk,
+ (void *)local->cached_subvol, local->cached_subvol,
local->cached_subvol->fops->mknod,
&local->loc, local->mode, local->rdev,
- local->params);
+ local->umask, local->params);
return 0;
}
-
+err:
DHT_STACK_UNWIND (link, frame, op_ret, op_errno,
- inode, stbuf, preparent, postparent);
+ inode, stbuf, preparent, postparent, xdata);
return 0;
}
int
-switch_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t rdev, dict_t *params)
+switch_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *params)
{
dht_local_t *local = NULL;
dht_conf_t *conf = NULL;
@@ -529,7 +537,8 @@ switch_mknod (call_frame_t *frame, xlator_t *this,
avail_subvol = get_switch_matching_subvol (loc->path, conf, subvol);
if (dht_is_subvol_filled (this, avail_subvol)) {
avail_subvol =
- dht_free_disk_available_subvol (this, avail_subvol);
+ dht_free_disk_available_subvol (this, avail_subvol,
+ local);
}
if (avail_subvol != subvol) {
@@ -537,46 +546,36 @@ switch_mknod (call_frame_t *frame, xlator_t *this,
local->params = dict_ref (params);
local->mode = mode;
+ local->umask = umask;
local->rdev = rdev;
local->cached_subvol = avail_subvol;
dht_linkfile_create (frame, switch_mknod_linkfile_cbk,
- avail_subvol, subvol, loc);
+ this, avail_subvol, subvol, loc);
return 0;
}
gf_log (this->name, GF_LOG_TRACE,
"creating %s on %s", loc->path, subvol->name);
- STACK_WIND (frame, dht_newfile_cbk,
- subvol, subvol->fops->mknod,
- loc, mode, rdev, params);
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)subvol, subvol,
+ subvol->fops->mknod, loc, mode, rdev, umask,
+ params);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
DHT_STACK_UNWIND (mknod, frame, -1, op_errno,
- NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL);
return 0;
}
-int
-notify (xlator_t *this, int event, void *data, ...)
-{
- int ret = -1;
-
- ret = dht_notify (this, event, data);
-
- return ret;
-}
-
void
-fini (xlator_t *this)
+switch_fini (xlator_t *this)
{
- int i = 0;
dht_conf_t *conf = NULL;
struct switch_struct *trav = NULL;
struct switch_struct *prev = NULL;
@@ -587,30 +586,14 @@ fini (xlator_t *this)
trav = (struct switch_struct *)conf->private;
conf->private = NULL;
while (trav) {
- if (trav->array)
- GF_FREE (trav->array);
+ GF_FREE (trav->array);
prev = trav;
trav = trav->next;
GF_FREE (prev);
}
-
- if (conf->file_layouts) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- GF_FREE (conf->file_layouts[i]);
- }
- GF_FREE (conf->file_layouts);
- }
-
- if (conf->subvolumes)
- GF_FREE (conf->subvolumes);
-
- if (conf->subvolume_status)
- GF_FREE (conf->subvolume_status);
-
- GF_FREE (conf);
}
- return;
+ dht_fini(this);
}
int
@@ -670,8 +653,10 @@ set_switch_pattern (xlator_t *this, dht_conf_t *conf,
dup_str = gf_strdup (switch_str);
switch_opt = GF_CALLOC (1, sizeof (struct switch_struct),
gf_switch_mt_switch_struct);
- if (!switch_opt)
+ if (!switch_opt) {
+ GF_FREE (dup_str);
goto err;
+ }
pattern = strtok_r (dup_str, ":", &tmp_str1);
childs = strtok_r (NULL, ":", &tmp_str1);
@@ -681,9 +666,11 @@ set_switch_pattern (xlator_t *this, dht_conf_t *conf,
"for all the unconfigured child nodes,"
" hence neglecting current option");
switch_str = strtok_r (NULL, ";", &tmp_str);
+ GF_FREE (switch_opt);
GF_FREE (dup_str);
continue;
}
+ GF_FREE (dup_str);
memcpy (switch_opt->path_pattern, pattern, strlen (pattern));
if (childs) {
dup_childs = gf_strdup (childs);
@@ -740,7 +727,6 @@ set_switch_pattern (xlator_t *this, dht_conf_t *conf,
"option in unify volume. Exiting");
goto err;
}
- GF_FREE (dup_str);
/* Link it to the main structure */
if (switch_buf) {
@@ -753,6 +739,7 @@ set_switch_pattern (xlator_t *this, dht_conf_t *conf,
/* First entry */
switch_buf = switch_opt;
}
+ switch_opt = NULL;
switch_str = strtok_r (NULL, ";", &tmp_str);
}
@@ -809,19 +796,20 @@ set_switch_pattern (xlator_t *this, dht_conf_t *conf,
/* First entry */
switch_buf = switch_opt;
}
+ switch_opt = NULL;
}
/* */
conf->private = switch_buf;
return 0;
err:
+ GF_FREE (switch_buf_array);
+ GF_FREE (switch_opt);
+
if (switch_buf) {
- if (switch_buf_array)
- GF_FREE (switch_buf_array);
trav = switch_buf;
while (trav) {
- if (trav->array)
- GF_FREE (trav->array);
+ GF_FREE (trav->array);
switch_opt = trav;
trav = trav->next;
GF_FREE (switch_opt);
@@ -831,68 +819,18 @@ err:
}
-int
-init (xlator_t *this)
+int32_t
+switch_init (xlator_t *this)
{
dht_conf_t *conf = NULL;
data_t *data = NULL;
- char *temp_str = NULL;
int ret = -1;
- int i = 0;
- uint32_t temp_free_disk = 0;
-
- if (!this->children) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "SWITCH needs more than one subvolume");
- return -1;
- }
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile");
- }
-
- conf = GF_CALLOC (1, sizeof (*conf), gf_switch_mt_dht_conf_t);
- if (!conf) {
- goto err;
- }
-
- conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_ON;
- if (dict_get_str (this->options, "lookup-unhashed", &temp_str) == 0) {
- /* If option is not "auto", other options _should_ be boolean */
- if (strcasecmp (temp_str, "auto"))
- gf_string2boolean (temp_str, &conf->search_unhashed);
- else
- conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO;
- }
-
- conf->unhashed_sticky_bit = 0;
- if (dict_get_str (this->options, "unhashed-sticky-bit",
- &temp_str) == 0) {
- gf_string2boolean (temp_str, &conf->unhashed_sticky_bit);
- }
-
- conf->min_free_disk = 10;
- conf->disk_unit = 'p';
-
- if (dict_get_str (this->options, "min-free-disk",
- &temp_str) == 0) {
- if (gf_string2percent (temp_str,
- &temp_free_disk) == 0) {
- if (temp_free_disk > 100) {
- gf_string2bytesize (temp_str,
- &conf->min_free_disk);
- conf->disk_unit = 'b';
- } else {
- conf->min_free_disk = (uint64_t)temp_free_disk;
- conf->disk_unit = 'p';
- }
- } else {
- gf_string2bytesize (temp_str,
- &conf->min_free_disk);
- conf->disk_unit = 'b';
- }
+ ret = dht_init(this);
+ if (ret) {
+ return ret;
}
+ conf = this->private;
data = dict_get (this->options, "pattern.switch.case");
if (data) {
@@ -903,65 +841,23 @@ init (xlator_t *this)
}
}
- ret = dht_init_subvolumes (this, conf);
- if (ret == -1) {
- goto err;
- }
-
- ret = dht_layouts_init (this, conf);
- if (ret == -1) {
- goto err;
- }
-
- LOCK_INIT (&conf->subvolume_lock);
- LOCK_INIT (&conf->layout_lock);
-
- conf->gen = 1;
-
- conf->du_stats = GF_CALLOC (conf->subvolume_cnt, sizeof (dht_du_t),
- gf_switch_mt_dht_du_t);
- if (!conf->du_stats) {
- goto err;
- }
-
- /* Create 'syncop' environment */
- conf->env = syncenv_new (0);
- if (!conf->env) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to create sync environment %s",
- strerror (errno));
- goto err;
- }
-
this->private = conf;
-
return 0;
err:
- if (conf) {
- if (conf->file_layouts) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- GF_FREE (conf->file_layouts[i]);
- }
- GF_FREE (conf->file_layouts);
- }
-
- if (conf->subvolumes)
- GF_FREE (conf->subvolumes);
-
- if (conf->subvolume_status)
- GF_FREE (conf->subvolume_status);
-
- if (conf->du_stats)
- GF_FREE (conf->du_stats);
-
- GF_FREE (conf);
- }
-
+ dht_fini(this);
return -1;
}
+class_methods_t class_methods = {
+ .init = switch_init,
+ .fini = switch_fini,
+ .reconfigure = dht_reconfigure,
+ .notify = dht_notify
+};
+
+
struct xlator_fops fops = {
.lookup = switch_lookup,
.create = switch_create,
@@ -1006,19 +902,3 @@ struct xlator_fops fops = {
struct xlator_cbks cbks = {
.forget = dht_forget
};
-
-
-struct volume_options options[] = {
- { .key = {"lookup-unhashed"},
- .value = {"auto", "yes", "no", "enable", "disable", "1", "0",
- "on", "off"},
- .type = GF_OPTION_TYPE_STR
- },
- { .key = {"pattern.switch.case"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {"min-free-disk"},
- .type = GF_OPTION_TYPE_PERCENT_OR_SIZET,
- },
- { .key = {NULL} },
-};
diff --git a/xlators/cluster/dht/src/unittest/dht_layout_mock.c b/xlators/cluster/dht/src/unittest/dht_layout_mock.c
new file mode 100644
index 000000000..aa19ddc57
--- /dev/null
+++ b/xlators/cluster/dht/src/unittest/dht_layout_mock.c
@@ -0,0 +1,63 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "dht-common.h"
+#include "byte-order.h"
+
+int
+dht_hash_compute (xlator_t *this, int type, const char *name, uint32_t *hash_p)
+{
+ return 0;
+}
+
+int
+dht_inode_ctx_layout_get (inode_t *inode, xlator_t *this, dht_layout_t **layout)
+{
+ return 0;
+}
+
+int
+dht_inode_ctx_layout_set (inode_t *inode, xlator_t *this,
+ dht_layout_t *layout_int)
+{
+ return 0;
+}
+
+int
+dict_get_ptr (dict_t *this, char *key, void **ptr)
+{
+ return 0;
+}
+
+int
+dict_get_ptr_and_len (dict_t *this, char *key, void **ptr, int *len)
+{
+ return 0;
+}
+
+int _gf_log (const char *domain, const char *file,
+ const char *function, int32_t line, gf_loglevel_t level,
+ const char *fmt, ...)
+{
+ return 0;
+}
+
+int _gf_log_callingfn (const char *domain, const char *file,
+ const char *function, int32_t line, gf_loglevel_t level,
+ const char *fmt, ...)
+{
+ return 0;
+}
diff --git a/xlators/cluster/dht/src/unittest/dht_layout_unittest.c b/xlators/cluster/dht/src/unittest/dht_layout_unittest.c
new file mode 100644
index 000000000..b5233d235
--- /dev/null
+++ b/xlators/cluster/dht/src/unittest/dht_layout_unittest.c
@@ -0,0 +1,124 @@
+/*
+ Copyright (c) 2008-2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "dht-common.h"
+#include "logging.h"
+#include "xlator.h"
+
+#include <stdarg.h>
+#include <stddef.h>
+#include <setjmp.h>
+#include <inttypes.h>
+#include <cmockery/pbc.h>
+#include <cmockery/cmockery.h>
+
+/*
+ * Helper functions
+ */
+
+static xlator_t *
+helper_xlator_init(uint32_t num_types)
+{
+ xlator_t *xl;
+ int i, ret;
+
+ REQUIRE(num_types > 0);
+
+ xl = test_calloc(1, sizeof(xlator_t));
+ assert_non_null(xl);
+ xl->mem_acct.num_types = num_types;
+ xl->mem_acct.rec = test_calloc(num_types, sizeof(struct mem_acct_rec));
+ assert_non_null(xl->mem_acct.rec);
+
+ xl->ctx = test_calloc(1, sizeof(glusterfs_ctx_t));
+ assert_non_null(xl->ctx);
+
+ for (i = 0; i < num_types; i++) {
+ ret = LOCK_INIT(&(xl->mem_acct.rec[i].lock));
+ assert_false(ret);
+ }
+
+ ENSURE(num_types == xl->mem_acct.num_types);
+ ENSURE(NULL != xl);
+
+ return xl;
+}
+
+static int
+helper_xlator_destroy(xlator_t *xl)
+{
+ int i, ret;
+
+ for (i = 0; i < xl->mem_acct.num_types; i++) {
+ ret = LOCK_DESTROY(&(xl->mem_acct.rec[i].lock));
+ assert_int_equal(ret, 0);
+ }
+
+ free(xl->mem_acct.rec);
+ free(xl->ctx);
+ free(xl);
+ return 0;
+}
+
+/*
+ * Unit tests
+ */
+static void
+test_dht_layout_new(void **state)
+{
+ xlator_t *xl;
+ dht_layout_t *layout;
+ dht_conf_t *conf;
+ int cnt;
+
+ expect_assert_failure(dht_layout_new(NULL, 0));
+ expect_assert_failure(dht_layout_new((xlator_t *)0x12345, -1));
+ xl = helper_xlator_init(10);
+
+ // xl->private is NULL
+ assert_null(xl->private);
+ cnt = 100;
+ layout = dht_layout_new(xl, cnt);
+ assert_non_null(layout);
+ assert_int_equal(layout->type, DHT_HASH_TYPE_DM);
+ assert_int_equal(layout->cnt, cnt);
+ assert_int_equal(layout->ref, 1);
+ assert_int_equal(layout->gen, 0);
+ assert_int_equal(layout->spread_cnt, 0);
+ free(layout);
+
+ // xl->private is not NULL
+ cnt = 110;
+ conf = (dht_conf_t *)test_calloc(1, sizeof(dht_conf_t));
+ assert_non_null(conf);
+ conf->dir_spread_cnt = 12345;
+ conf->gen = -123;
+ xl->private = conf;
+
+ layout = dht_layout_new(xl, cnt);
+ assert_non_null(layout);
+ assert_int_equal(layout->type, DHT_HASH_TYPE_DM);
+ assert_int_equal(layout->cnt, cnt);
+ assert_int_equal(layout->ref, 1);
+ assert_int_equal(layout->gen, conf->gen);
+ assert_int_equal(layout->spread_cnt, conf->dir_spread_cnt);
+ free(layout);
+
+ free(conf);
+ helper_xlator_destroy(xl);
+}
+
+int main(void) {
+ const UnitTest tests[] = {
+ unit_test(test_dht_layout_new),
+ };
+
+ return run_tests(tests, "xlator_dht_layout");
+}
diff --git a/xlators/cluster/ha/src/Makefile.am b/xlators/cluster/ha/src/Makefile.am
index 5f78a2965..5c1364b7f 100644
--- a/xlators/cluster/ha/src/Makefile.am
+++ b/xlators/cluster/ha/src/Makefile.am
@@ -1,15 +1,16 @@
xlator_LTLIBRARIES = ha.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/cluster
-ha_la_LDFLAGS = -module -avoidversion
+ha_la_LDFLAGS = -module -avoid-version
ha_la_SOURCES = ha-helpers.c ha.c
ha_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = ha.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/cluster/ha/src/ha-helpers.c b/xlators/cluster/ha/src/ha-helpers.c
index 1e4af1b62..19be1ed27 100644
--- a/xlators/cluster/ha/src/ha-helpers.c
+++ b/xlators/cluster/ha/src/ha-helpers.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include "xlator.h"
#include "call-stub.h"
#include "defaults.h"
diff --git a/xlators/cluster/ha/src/ha-mem-types.h b/xlators/cluster/ha/src/ha-mem-types.h
index 9bfb3972b..e5e97d237 100644
--- a/xlators/cluster/ha/src/ha-mem-types.h
+++ b/xlators/cluster/ha/src/ha-mem-types.h
@@ -1,24 +1,13 @@
-
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __HA_MEM_TYPES_H__
#define __HA_MEM_TYPES_H__
diff --git a/xlators/cluster/ha/src/ha.c b/xlators/cluster/ha/src/ha.c
index 38d4229d3..3eccb516b 100644
--- a/xlators/cluster/ha/src/ha.c
+++ b/xlators/cluster/ha/src/ha.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
/* generate errors randomly, code is simple now, better alogorithm
* can be written to decide what error to be returned and when
*/
@@ -1876,13 +1866,9 @@ err:
}
if (hafdp) {
- if (hafdp->fdstate) {
- GF_FREE (hafdp->fdstate);
- }
+ GF_FREE (hafdp->fdstate);
- if (hafdp->path) {
- GF_FREE (hafdp->path);
- }
+ GF_FREE (hafdp->path);
GF_FREE (hafdp);
}
diff --git a/xlators/cluster/ha/src/ha.h b/xlators/cluster/ha/src/ha.h
index 39b6851e7..e2ed7eaa6 100644
--- a/xlators/cluster/ha/src/ha.h
+++ b/xlators/cluster/ha/src/ha.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef __HA_H_
#define __HA_H_
diff --git a/xlators/cluster/map/src/Makefile.am b/xlators/cluster/map/src/Makefile.am
index 26e19137a..a278b05e2 100644
--- a/xlators/cluster/map/src/Makefile.am
+++ b/xlators/cluster/map/src/Makefile.am
@@ -1,15 +1,16 @@
xlator_LTLIBRARIES = map.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/cluster
-map_la_LDFLAGS = -module -avoidversion
+map_la_LDFLAGS = -module -avoid-version
map_la_SOURCES = map.c map-helper.c
map_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = map.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/cluster/map/src/map-helper.c b/xlators/cluster/map/src/map-helper.c
index 81212fcfd..851397b68 100644
--- a/xlators/cluster/map/src/map-helper.c
+++ b/xlators/cluster/map/src/map-helper.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2009-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2009-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
diff --git a/xlators/cluster/map/src/map-mem-types.h b/xlators/cluster/map/src/map-mem-types.h
index 669b93dc2..3e89f4736 100644
--- a/xlators/cluster/map/src/map-mem-types.h
+++ b/xlators/cluster/map/src/map-mem-types.h
@@ -1,24 +1,13 @@
-
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __MAP_MEM_TYPES_H__
#define __MAP_MEM_TYPES_H__
diff --git a/xlators/cluster/map/src/map.c b/xlators/cluster/map/src/map.c
index ead9da0b9..6150a33ce 100644
--- a/xlators/cluster/map/src/map.c
+++ b/xlators/cluster/map/src/map.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -2375,8 +2365,7 @@ fini (xlator_t *this)
priv = this->private;
if (priv) {
- if (priv->xlarray)
- GF_FREE (priv->xlarray);
+ GF_FREE (priv->xlarray);
trav_map = priv->map;
while (trav_map) {
diff --git a/xlators/cluster/map/src/map.h b/xlators/cluster/map/src/map.h
index bccac437c..7703a543e 100644
--- a/xlators/cluster/map/src/map.h
+++ b/xlators/cluster/map/src/map.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __MAP_H__
#define __MAP_H__
diff --git a/xlators/cluster/stripe/src/Makefile.am b/xlators/cluster/stripe/src/Makefile.am
index 0db3c9eeb..2d151422a 100644
--- a/xlators/cluster/stripe/src/Makefile.am
+++ b/xlators/cluster/stripe/src/Makefile.am
@@ -2,16 +2,19 @@
xlator_LTLIBRARIES = stripe.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
-stripe_la_LDFLAGS = -module -avoidversion
+stripe_la_LDFLAGS = -module -avoid-version
+
+stripe_la_SOURCES = stripe.c stripe-helpers.c \
+ $(top_builddir)/xlators/lib/src/libxlator.c
-stripe_la_SOURCES = stripe.c $(top_builddir)/xlators/lib/src/libxlator.c
stripe_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = stripe.h stripe-mem-types.h $(top_builddir)/xlators/lib/src/libxlator.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) \
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
-I$(top_srcdir)/xlators/lib/src
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
CLEANFILES =
diff --git a/xlators/cluster/stripe/src/stripe-helpers.c b/xlators/cluster/stripe/src/stripe-helpers.c
new file mode 100644
index 000000000..a047d4a2e
--- /dev/null
+++ b/xlators/cluster/stripe/src/stripe-helpers.c
@@ -0,0 +1,677 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <fnmatch.h>
+
+#include "stripe.h"
+#include "byte-order.h"
+#include "mem-types.h"
+
+void
+stripe_local_wipe (stripe_local_t *local)
+{
+ if (!local)
+ goto out;
+
+ loc_wipe (&local->loc);
+ loc_wipe (&local->loc2);
+
+ if (local->fd)
+ fd_unref (local->fd);
+
+ if (local->inode)
+ inode_unref (local->inode);
+
+ if (local->xattr)
+ dict_unref (local->xattr);
+
+ if (local->xdata)
+ dict_unref (local->xdata);
+
+out:
+ return;
+}
+
+
+
+int
+stripe_aggregate (dict_t *this, char *key, data_t *value, void *data)
+{
+ dict_t *dst = NULL;
+ int64_t *ptr = 0, *size = NULL;
+ int32_t ret = -1;
+
+ dst = data;
+
+ if (strcmp (key, GF_XATTR_QUOTA_SIZE_KEY) == 0) {
+ ret = dict_get_bin (dst, key, (void **)&size);
+ if (ret < 0) {
+ size = GF_CALLOC (1, sizeof (int64_t),
+ gf_common_mt_char);
+ if (size == NULL) {
+ gf_log ("stripe", GF_LOG_WARNING,
+ "memory allocation failed");
+ goto out;
+ }
+ ret = dict_set_bin (dst, key, size, sizeof (int64_t));
+ if (ret < 0) {
+ gf_log ("stripe", GF_LOG_WARNING,
+ "stripe aggregate dict set failed");
+ GF_FREE (size);
+ goto out;
+ }
+ }
+
+ ptr = data_to_bin (value);
+ if (ptr == NULL) {
+ gf_log ("stripe", GF_LOG_WARNING, "data to bin failed");
+ goto out;
+ }
+
+ *size = hton64 (ntoh64 (*size) + ntoh64 (*ptr));
+ } else if (strcmp (key, GF_CONTENT_KEY)) {
+ /* No need to aggregate 'CONTENT' data */
+ ret = dict_set (dst, key, value);
+ if (ret)
+ gf_log ("stripe", GF_LOG_WARNING, "xattr dict set failed");
+ }
+
+out:
+ return 0;
+}
+
+
+void
+stripe_aggregate_xattr (dict_t *dst, dict_t *src)
+{
+ if ((dst == NULL) || (src == NULL)) {
+ goto out;
+ }
+
+ dict_foreach (src, stripe_aggregate, dst);
+out:
+ return;
+}
+
+
+int32_t
+stripe_xattr_aggregate (char *buffer, stripe_local_t *local, int32_t *total)
+{
+ int32_t i = 0;
+ int32_t ret = -1;
+ int32_t len = 0;
+ char *sbuf = NULL;
+ stripe_xattr_sort_t *xattr = NULL;
+
+ if (!buffer || !local || !local->xattr_list)
+ goto out;
+
+ sbuf = buffer;
+
+ for (i = 0; i < local->nallocs; i++) {
+ xattr = local->xattr_list + i;
+ len = xattr->xattr_len;
+
+ if (len && xattr && xattr->xattr_value) {
+ memcpy (buffer, xattr->xattr_value, len);
+ buffer += len;
+ *buffer++ = ' ';
+ }
+ }
+
+ *--buffer = '\0';
+ if (total)
+ *total = buffer - sbuf;
+ ret = 0;
+
+ out:
+ return ret;
+}
+
+int32_t
+stripe_free_xattr_str (stripe_local_t *local)
+{
+ int32_t i = 0;
+ int32_t ret = -1;
+ stripe_xattr_sort_t *xattr = NULL;
+
+ if (!local || !local->xattr_list)
+ goto out;
+
+ for (i = 0; i < local->nallocs; i++) {
+ xattr = local->xattr_list + i;
+
+ if (xattr && xattr->xattr_value)
+ GF_FREE (xattr->xattr_value);
+ }
+
+ ret = 0;
+ out:
+ return ret;
+}
+
+
+int32_t
+stripe_fill_lockinfo_xattr (xlator_t *this, stripe_local_t *local,
+ void **xattr_serz)
+{
+ int32_t ret = -1, i = 0, len = 0;
+ dict_t *tmp1 = NULL, *tmp2 = NULL;
+ char *buf = NULL;
+ stripe_xattr_sort_t *xattr = NULL;
+
+ if (xattr_serz == NULL) {
+ goto out;
+ }
+
+ tmp2 = dict_new ();
+
+ if (tmp2 == NULL) {
+ goto out;
+ }
+
+ for (i = 0; i < local->nallocs; i++) {
+ xattr = local->xattr_list + i;
+ len = xattr->xattr_len;
+
+ if (len && xattr && xattr->xattr_value) {
+ ret = dict_reset (tmp2);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "dict_reset failed (%s)",
+ strerror (-ret));
+ }
+
+ ret = dict_unserialize (xattr->xattr_value,
+ xattr->xattr_len,
+ &tmp2);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dict_unserialize failed (%s)",
+ strerror (-ret));
+ ret = -1;
+ goto out;
+ }
+
+ tmp1 = dict_copy (tmp2, tmp1);
+ if (tmp1 == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dict_copy failed (%s)",
+ strerror (-ret));
+ ret = -1;
+ goto out;
+ }
+ }
+ }
+
+ len = dict_serialized_length (tmp1);
+ if (len > 0) {
+ buf = GF_CALLOC (1, len, gf_common_mt_dict_t);
+ if (buf == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_serialize (tmp1, buf);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dict_serialize failed (%s)", strerror (-ret));
+ GF_FREE(buf);
+ ret = -1;
+ goto out;
+ }
+
+ *xattr_serz = buf;
+ }
+
+ ret = 0;
+out:
+ if (tmp1 != NULL) {
+ dict_unref (tmp1);
+ }
+
+ if (tmp2 != NULL) {
+ dict_unref (tmp2);
+ }
+
+ return ret;
+}
+
+
+int32_t
+stripe_fill_pathinfo_xattr (xlator_t *this, stripe_local_t *local,
+ char **xattr_serz)
+{
+ int ret = -1;
+ int32_t padding = 0;
+ int32_t tlen = 0;
+ char stripe_size_str[20] = {0,};
+ char *pathinfo_serz = NULL;
+
+ if (!local) {
+ gf_log (this->name, GF_LOG_ERROR, "Possible NULL deref");
+ goto out;
+ }
+
+ (void) snprintf (stripe_size_str, 20, "%ld",
+ (local->fctx) ? local->fctx->stripe_size : 0);
+
+ /* extra bytes for decorations (brackets and <>'s) */
+ padding = strlen (this->name) + strlen (STRIPE_PATHINFO_HEADER)
+ + strlen (stripe_size_str) + 7;
+ local->xattr_total_len += (padding + 2);
+
+ pathinfo_serz = GF_CALLOC (local->xattr_total_len, sizeof (char),
+ gf_common_mt_char);
+ if (!pathinfo_serz)
+ goto out;
+
+ /* xlator info */
+ (void) sprintf (pathinfo_serz, "(<"STRIPE_PATHINFO_HEADER"%s:[%s]> ",
+ this->name, stripe_size_str);
+
+ ret = stripe_xattr_aggregate (pathinfo_serz + padding, local, &tlen);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Cannot aggregate pathinfo list");
+ GF_FREE(pathinfo_serz);
+ goto out;
+ }
+
+ *(pathinfo_serz + padding + tlen) = ')';
+ *(pathinfo_serz + padding + tlen + 1) = '\0';
+
+ *xattr_serz = pathinfo_serz;
+
+ ret = 0;
+ out:
+ return ret;
+}
+
+/**
+ * stripe_get_matching_bs - Get the matching block size for the given path.
+ */
+int32_t
+stripe_get_matching_bs (const char *path, stripe_private_t *priv)
+{
+ struct stripe_options *trav = NULL;
+ uint64_t block_size = 0;
+
+ GF_VALIDATE_OR_GOTO ("stripe", priv, out);
+ GF_VALIDATE_OR_GOTO ("stripe", path, out);
+
+ LOCK (&priv->lock);
+ {
+ block_size = priv->block_size;
+ trav = priv->pattern;
+ while (trav) {
+ if (!fnmatch (trav->path_pattern, path, FNM_NOESCAPE)) {
+ block_size = trav->block_size;
+ break;
+ }
+ trav = trav->next;
+ }
+ }
+ UNLOCK (&priv->lock);
+
+out:
+ return block_size;
+}
+
+int32_t
+stripe_ctx_handle (xlator_t *this, call_frame_t *prev, stripe_local_t *local,
+ dict_t *dict)
+{
+ char key[256] = {0,};
+ data_t *data = NULL;
+ int32_t index = 0;
+ stripe_private_t *priv = NULL;
+
+ priv = this->private;
+
+
+ if (!local->fctx) {
+ local->fctx = GF_CALLOC (1, sizeof (stripe_fd_ctx_t),
+ gf_stripe_mt_stripe_fd_ctx_t);
+ if (!local->fctx) {
+ local->op_errno = ENOMEM;
+ local->op_ret = -1;
+ goto out;
+ }
+
+ local->fctx->static_array = 0;
+ }
+ /* Stripe block size */
+ sprintf (key, "trusted.%s.stripe-size", this->name);
+ data = dict_get (dict, key);
+ if (!data) {
+ local->xattr_self_heal_needed = 1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get stripe-size");
+ goto out;
+ } else {
+ if (!local->fctx->stripe_size) {
+ local->fctx->stripe_size =
+ data_to_int64 (data);
+ }
+
+ if (local->fctx->stripe_size != data_to_int64 (data)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "stripe-size mismatch in blocks");
+ local->xattr_self_heal_needed = 1;
+ }
+ }
+
+ /* Stripe count */
+ sprintf (key, "trusted.%s.stripe-count", this->name);
+ data = dict_get (dict, key);
+
+ if (!data) {
+ local->xattr_self_heal_needed = 1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get stripe-count");
+ goto out;
+ }
+ if (!local->fctx->xl_array) {
+ local->fctx->stripe_count = data_to_int32 (data);
+ if (!local->fctx->stripe_count) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "error with stripe-count xattr");
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto out;
+ }
+
+ local->fctx->xl_array = GF_CALLOC (local->fctx->stripe_count,
+ sizeof (xlator_t *),
+ gf_stripe_mt_xlator_t);
+
+ if (!local->fctx->xl_array) {
+ local->op_errno = ENOMEM;
+ local->op_ret = -1;
+ goto out;
+ }
+ }
+ if (local->fctx->stripe_count != data_to_int32 (data)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "error with stripe-count xattr (%d != %d)",
+ local->fctx->stripe_count, data_to_int32 (data));
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto out;
+ }
+
+ /* index */
+ sprintf (key, "trusted.%s.stripe-index", this->name);
+ data = dict_get (dict, key);
+ if (!data) {
+ local->xattr_self_heal_needed = 1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get stripe-index");
+ goto out;
+ }
+ index = data_to_int32 (data);
+ if (index > priv->child_count) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "error with stripe-index xattr (%d)", index);
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto out;
+ }
+ if (local->fctx->xl_array) {
+ if (!local->fctx->xl_array[index])
+ local->fctx->xl_array[index] = prev->this;
+ }
+
+ sprintf(key, "trusted.%s.stripe-coalesce", this->name);
+ data = dict_get(dict, key);
+ if (!data) {
+ /*
+ * The file was probably created prior to coalesce support.
+ * Assume non-coalesce mode for this file to maintain backwards
+ * compatibility.
+ */
+ gf_log(this->name, GF_LOG_DEBUG, "missing stripe-coalesce "
+ "attr, assume non-coalesce mode");
+ local->fctx->stripe_coalesce = 0;
+ } else {
+ local->fctx->stripe_coalesce = data_to_int32(data);
+ }
+
+
+out:
+ return 0;
+}
+
+int32_t
+stripe_xattr_request_build (xlator_t *this, dict_t *dict, uint64_t stripe_size,
+ uint32_t stripe_count, uint32_t stripe_index,
+ uint32_t stripe_coalesce)
+{
+ char key[256] = {0,};
+ int32_t ret = -1;
+
+ sprintf (key, "trusted.%s.stripe-size", this->name);
+ ret = dict_set_int64 (dict, key, stripe_size);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set %s in xattr_req dict", key);
+ goto out;
+ }
+
+ sprintf (key, "trusted.%s.stripe-count", this->name);
+ ret = dict_set_int32 (dict, key, stripe_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set %s in xattr_req dict", key);
+ goto out;
+ }
+
+ sprintf (key, "trusted.%s.stripe-index", this->name);
+ ret = dict_set_int32 (dict, key, stripe_index);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set %s in xattr_req dict", key);
+ goto out;
+ }
+
+ sprintf(key, "trusted.%s.stripe-coalesce", this->name);
+ ret = dict_set_int32(dict, key, stripe_coalesce);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "failed to set %s in xattr_req_dict", key);
+ goto out;
+ }
+out:
+ return ret;
+}
+
+
+static int
+set_default_block_size (stripe_private_t *priv, char *num)
+{
+
+ int ret = -1;
+ GF_VALIDATE_OR_GOTO ("stripe", THIS, out);
+ GF_VALIDATE_OR_GOTO (THIS->name, priv, out);
+ GF_VALIDATE_OR_GOTO (THIS->name, num, out);
+
+
+ if (gf_string2bytesize (num, &priv->block_size) != 0) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "invalid number format \"%s\"", num);
+ goto out;
+ }
+
+ ret = 0;
+
+ out:
+ return ret;
+
+}
+
+
+int
+set_stripe_block_size (xlator_t *this, stripe_private_t *priv, char *data)
+{
+ int ret = -1;
+ char *tmp_str = NULL;
+ char *tmp_str1 = NULL;
+ char *dup_str = NULL;
+ char *stripe_str = NULL;
+ char *pattern = NULL;
+ char *num = NULL;
+ struct stripe_options *temp_stripeopt = NULL;
+ struct stripe_options *stripe_opt = NULL;
+
+ if (!this || !priv || !data)
+ goto out;
+
+ /* Get the pattern for striping.
+ "option block-size *avi:10MB" etc */
+ stripe_str = strtok_r (data, ",", &tmp_str);
+ while (stripe_str) {
+ dup_str = gf_strdup (stripe_str);
+ stripe_opt = GF_CALLOC (1, sizeof (struct stripe_options),
+ gf_stripe_mt_stripe_options);
+ if (!stripe_opt) {
+ goto out;
+ }
+
+ pattern = strtok_r (dup_str, ":", &tmp_str1);
+ num = strtok_r (NULL, ":", &tmp_str1);
+ if (!num) {
+ num = pattern;
+ pattern = "*";
+ ret = set_default_block_size (priv, num);
+ if (ret)
+ goto out;
+ }
+ if (gf_string2bytesize (num, &stripe_opt->block_size) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "invalid number format \"%s\"", num);
+ goto out;
+ }
+
+ if (stripe_opt->block_size < STRIPE_MIN_BLOCK_SIZE) {
+ gf_log (this->name, GF_LOG_ERROR, "Invalid Block-size: "
+ "%s. Should be atleast %llu bytes", num,
+ STRIPE_MIN_BLOCK_SIZE);
+ goto out;
+ }
+ if (stripe_opt->block_size % 512) {
+ gf_log (this->name, GF_LOG_ERROR, "Block-size: %s should"
+ " be a multiple of 512 bytes", num);
+ goto out;
+ }
+
+ memcpy (stripe_opt->path_pattern, pattern, strlen (pattern));
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "block-size : pattern %s : size %"PRId64,
+ stripe_opt->path_pattern, stripe_opt->block_size);
+
+ if (priv->pattern)
+ temp_stripeopt = NULL;
+ else
+ temp_stripeopt = priv->pattern;
+
+ stripe_opt->next = temp_stripeopt;
+
+ priv->pattern = stripe_opt;
+ stripe_opt = NULL;
+
+ GF_FREE (dup_str);
+ dup_str = NULL;
+
+ stripe_str = strtok_r (NULL, ",", &tmp_str);
+ }
+
+ ret = 0;
+out:
+
+ GF_FREE (dup_str);
+
+ GF_FREE (stripe_opt);
+
+ return ret;
+}
+
+int32_t
+stripe_iatt_merge (struct iatt *from, struct iatt *to)
+{
+ if (to->ia_size < from->ia_size)
+ to->ia_size = from->ia_size;
+ if (to->ia_mtime < from->ia_mtime)
+ to->ia_mtime = from->ia_mtime;
+ if (to->ia_ctime < from->ia_ctime)
+ to->ia_ctime = from->ia_ctime;
+ if (to->ia_atime < from->ia_atime)
+ to->ia_atime = from->ia_atime;
+ return 0;
+}
+
+off_t
+coalesced_offset(off_t offset, uint64_t stripe_size, int stripe_count)
+{
+ size_t line_size = 0;
+ uint64_t stripe_num = 0;
+ off_t coalesced_offset = 0;
+
+ line_size = stripe_size * stripe_count;
+ stripe_num = offset / line_size;
+
+ coalesced_offset = (stripe_num * stripe_size) +
+ (offset % stripe_size);
+
+ return coalesced_offset;
+}
+
+off_t
+uncoalesced_size(off_t size, uint64_t stripe_size, int stripe_count,
+ int stripe_index)
+{
+ uint64_t nr_full_stripe_chunks = 0, mod = 0;
+
+ if (!size)
+ return size;
+
+ /*
+ * Estimate the number of fully written stripes from the
+ * local file size. Each stripe_size chunk corresponds to
+ * a stripe.
+ */
+ nr_full_stripe_chunks = (size / stripe_size) * stripe_count;
+ mod = size % stripe_size;
+
+ if (!mod) {
+ /*
+ * There is no remainder, thus we could have overestimated
+ * the size of the file in terms of chunks. Trim the number
+ * of chunks by the following stripe members and leave it
+ * up to those nodes to respond with a larger size (if
+ * necessary).
+ */
+ nr_full_stripe_chunks -= stripe_count -
+ (stripe_index + 1);
+ size = nr_full_stripe_chunks * stripe_size;
+ } else {
+ /*
+ * There is a remainder and thus we own the last chunk of the
+ * file. Add the preceding stripe members of the final stripe
+ * along with the remainder to calculate the exact size.
+ */
+ nr_full_stripe_chunks += stripe_index;
+ size = nr_full_stripe_chunks * stripe_size + mod;
+ }
+
+ return size;
+}
+
diff --git a/xlators/cluster/stripe/src/stripe-mem-types.h b/xlators/cluster/stripe/src/stripe-mem-types.h
index 29c95c257..e9ac9cf46 100644
--- a/xlators/cluster/stripe/src/stripe-mem-types.h
+++ b/xlators/cluster/stripe/src/stripe-mem-types.h
@@ -1,21 +1,11 @@
-
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -25,12 +15,12 @@
#include "mem-types.h"
enum gf_stripe_mem_types_ {
- gf_stripe_mt_stripe_local_t = gf_common_mt_end + 1,
- gf_stripe_mt_iovec,
- gf_stripe_mt_readv_replies,
+ gf_stripe_mt_iovec = gf_common_mt_end + 1,
+ gf_stripe_mt_stripe_replies,
gf_stripe_mt_stripe_fd_ctx_t,
gf_stripe_mt_char,
gf_stripe_mt_int8_t,
+ gf_stripe_mt_int32_t,
gf_stripe_mt_xlator_t,
gf_stripe_mt_stripe_private_t,
gf_stripe_mt_stripe_options,
diff --git a/xlators/cluster/stripe/src/stripe.c b/xlators/cluster/stripe/src/stripe.c
index af60102c1..79e80b513 100644
--- a/xlators/cluster/stripe/src/stripe.c
+++ b/xlators/cluster/stripe/src/stripe.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
/**
@@ -32,6 +23,7 @@
* very much necessary, or else, use it in combination with AFR, to have a
* backup copy.
*/
+#include <fnmatch.h>
#include "stripe.h"
#include "libxlator.h"
@@ -40,73 +32,10 @@
struct volume_options options[];
-void
-stripe_local_wipe (stripe_local_t *local)
-{
- if (!local)
- goto out;
-
- loc_wipe (&local->loc);
- loc_wipe (&local->loc2);
-
- if (local->fd)
- fd_unref (local->fd);
-
- if (local->inode)
- inode_unref (local->inode);
-
- if (local->xattr)
- dict_unref (local->xattr);
-
- if (local->dict)
- dict_unref (local->dict);
-
-out:
- return;
-}
-
-/**
- * stripe_get_matching_bs - Get the matching block size for the given path.
- */
-int32_t
-stripe_get_matching_bs (const char *path, struct stripe_options *opts,
- uint64_t default_bs)
-{
- struct stripe_options *trav = NULL;
- char *pathname = NULL;
- uint64_t block_size = 0;
-
- block_size = default_bs;
-
- if (!path || !opts)
- goto out;
-
- /* FIXME: is a strdup really necessary? */
- pathname = gf_strdup (path);
- if (!pathname)
- goto out;
-
- trav = opts;
- while (trav) {
- if (!fnmatch (trav->path_pattern, pathname, FNM_NOESCAPE)) {
- block_size = trav->block_size;
- break;
- }
- trav = trav->next;
- }
-
- GF_FREE (pathname);
-
-out:
- return block_size;
-}
-
-
-
int32_t
stripe_sh_chown_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
int callcnt = -1;
stripe_local_t *local = NULL;
@@ -135,7 +64,7 @@ int32_t
stripe_sh_make_entry_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
stripe_local_t *local = NULL;
call_frame_t *prev = NULL;
@@ -150,7 +79,7 @@ stripe_sh_make_entry_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
STACK_WIND (frame, stripe_sh_chown_cbk, prev->this,
prev->this->fops->setattr, &local->loc,
- &local->stbuf, (GF_SET_ATTR_UID | GF_SET_ATTR_GID));
+ &local->stbuf, (GF_SET_ATTR_UID | GF_SET_ATTR_GID), NULL);
out:
return 0;
@@ -164,7 +93,7 @@ stripe_entry_self_heal (call_frame_t *frame, xlator_t *this,
call_frame_t *rframe = NULL;
stripe_local_t *rlocal = NULL;
stripe_private_t *priv = NULL;
- dict_t *dict = NULL;
+ dict_t *xdata = NULL;
int ret = 0;
if (!local || !this || !frame) {
@@ -182,8 +111,7 @@ stripe_entry_self_heal (call_frame_t *frame, xlator_t *this,
if (!rframe) {
goto out;
}
- rlocal = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ rlocal = mem_get0 (this->local_pool);
if (!rlocal) {
goto out;
}
@@ -192,11 +120,11 @@ stripe_entry_self_heal (call_frame_t *frame, xlator_t *this,
loc_copy (&rlocal->loc, &local->loc);
memcpy (&rlocal->stbuf, &local->stbuf, sizeof (struct iatt));
- dict = dict_new ();
- if (!dict)
+ xdata = dict_new ();
+ if (!xdata)
goto out;
- ret = dict_set_static_bin (dict, "gfid-req", local->stbuf.ia_gfid, 16);
+ ret = dict_set_static_bin (xdata, "gfid-req", local->stbuf.ia_gfid, 16);
if (ret)
gf_log (this->name, GF_LOG_WARNING,
"%s: failed to set gfid-req", local->loc.path);
@@ -207,101 +135,43 @@ stripe_entry_self_heal (call_frame_t *frame, xlator_t *this,
trav->xlator, trav->xlator->fops->mknod,
&local->loc,
st_mode_from_ia (local->stbuf.ia_prot,
- local->stbuf.ia_type), 0,
- dict);
+ local->stbuf.ia_type),
+ 0, 0, xdata);
}
if (IA_ISDIR (local->stbuf.ia_type)) {
STACK_WIND (rframe, stripe_sh_make_entry_cbk,
trav->xlator, trav->xlator->fops->mkdir,
- &local->loc, st_mode_from_ia (local->stbuf.ia_prot,
- local->stbuf.ia_type),
- dict);
+ &local->loc,
+ st_mode_from_ia (local->stbuf.ia_prot,
+ local->stbuf.ia_type),
+ 0, xdata);
}
trav = trav->next;
}
- if (dict)
- dict_unref (dict);
+ if (xdata)
+ dict_unref (xdata);
return 0;
out:
if (rframe)
STRIPE_STACK_DESTROY (rframe);
- if (dict)
- dict_unref (dict);
+ if (xdata)
+ dict_unref (xdata);
return 0;
}
-void
-stripe_aggregate (dict_t *this, char *key, data_t *value, void *data)
-{
- dict_t *dst = NULL;
- int64_t *ptr = 0, *size = NULL;
- int32_t ret = -1;
-
- dst = data;
-
- if (strcmp (key, GF_XATTR_QUOTA_SIZE_KEY) == 0) {
- ret = dict_get_bin (dst, key, (void **)&size);
- if (ret < 0) {
- size = GF_CALLOC (1, sizeof (int64_t),
- gf_common_mt_char);
- if (size == NULL) {
- gf_log ("stripe", GF_LOG_WARNING,
- "memory allocation failed");
- goto out;
- }
- ret = dict_set_bin (dst, key, size, sizeof (int64_t));
- if (ret < 0) {
- gf_log ("stripe", GF_LOG_WARNING,
- "stripe aggregate dict set failed");
- GF_FREE (size);
- goto out;
- }
- }
-
- ptr = data_to_bin (value);
- if (ptr == NULL) {
- gf_log ("stripe", GF_LOG_WARNING, "data to bin failed");
- goto out;
- }
-
- *size = hton64 (ntoh64 (*size) + ntoh64 (*ptr));
- } else if (strcmp (key, GF_CONTENT_KEY)) {
- /* No need to aggregate 'CONTENT' data */
- ret = dict_set (dst, key, value);
- if (ret)
- gf_log ("stripe", GF_LOG_WARNING, "xattr dict set failed");
- }
-
-out:
- return;
-}
-
-
-void
-stripe_aggregate_xattr (dict_t *dst, dict_t *src)
-{
- if ((dst == NULL) || (src == NULL)) {
- goto out;
- }
-
- dict_foreach (src, stripe_aggregate, dst);
-out:
- return;
-}
-
-
int32_t
stripe_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *dict, struct iatt *postparent)
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = 0;
if (!this || !frame || !frame->local || !cookie) {
gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
@@ -332,30 +202,42 @@ stripe_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret >= 0) {
local->op_ret = 0;
+ if (IA_ISREG (buf->ia_type)) {
+ ret = stripe_ctx_handle (this, prev, local,
+ xdata);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error getting fctx info from"
+ " dict");
+ }
if (FIRST_CHILD(this) == prev->this) {
local->stbuf = *buf;
local->postparent = *postparent;
local->inode = inode_ref (inode);
- local->dict = dict_ref (dict);
+ if (xdata)
+ local->xdata = dict_ref (xdata);
if (local->xattr) {
- stripe_aggregate_xattr (local->dict,
+ stripe_aggregate_xattr (local->xdata,
local->xattr);
dict_unref (local->xattr);
local->xattr = NULL;
}
}
- if (!local->dict && !local->xattr) {
- local->xattr = dict_ref (dict);
- } else if (local->dict) {
- stripe_aggregate_xattr (local->dict, dict);
+
+ if (!local->xdata && !local->xattr) {
+ local->xattr = dict_ref (xdata);
+ } else if (local->xdata) {
+ stripe_aggregate_xattr (local->xdata, xdata);
} else if (local->xattr) {
- stripe_aggregate_xattr (local->xattr, dict);
+ stripe_aggregate_xattr (local->xattr, xdata);
}
local->stbuf_blocks += buf->ia_blocks;
local->postparent_blocks += postparent->ia_blocks;
+ correct_file_size(buf, local->fctx, prev);
+
if (local->stbuf_size < buf->ia_size)
local->stbuf_size = buf->ia_size;
if (local->postparent_size < postparent->ia_size)
@@ -387,11 +269,13 @@ stripe_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->stbuf.ia_size = local->stbuf_size;
local->postparent.ia_blocks = local->postparent_blocks;
local->postparent.ia_size = local->postparent_size;
+ inode_ctx_put (local->inode, this,
+ (uint64_t) (long)local->fctx);
}
STRIPE_STACK_UNWIND (lookup, frame, local->op_ret,
local->op_errno, local->inode,
- &local->stbuf, local->dict,
+ &local->stbuf, local->xdata,
&local->postparent);
}
out:
@@ -400,14 +284,15 @@ out:
int32_t
stripe_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *xattr_req)
+ dict_t *xdata)
{
- stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- stripe_private_t *priv = NULL;
+ stripe_local_t *local = NULL;
+ xlator_list_t *trav = NULL;
+ stripe_private_t *priv = NULL;
int32_t op_errno = EINVAL;
int64_t filesize = 0;
- int ret = 0;
+ int ret = 0;
+ uint64_t tmpctx = 0;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -419,8 +304,7 @@ stripe_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
trav = this->children;
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -429,10 +313,37 @@ stripe_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
frame->local = local;
loc_copy (&local->loc, loc);
- if (xattr_req && dict_get (xattr_req, GF_CONTENT_KEY)) {
- ret = dict_get_int64 (xattr_req, GF_CONTENT_KEY, &filesize);
+ inode_ctx_get (local->inode, this, &tmpctx);
+ if (tmpctx)
+ local->fctx = (stripe_fd_ctx_t*) (long)tmpctx;
+
+ /* quick-read friendly changes */
+ if (xdata && dict_get (xdata, GF_CONTENT_KEY)) {
+ ret = dict_get_int64 (xdata, GF_CONTENT_KEY, &filesize);
if (!ret && (filesize > priv->block_size))
- dict_del (xattr_req, GF_CONTENT_KEY);
+ dict_del (xdata, GF_CONTENT_KEY);
+ }
+
+ /* get stripe-size xattr on lookup. This would be required for
+ * open/read/write/pathinfo calls. Hence we send down the request
+ * even when type == IA_INVAL */
+
+ /*
+ * We aren't guaranteed to have xdata here. We need the format info for
+ * the file, so allocate xdata if necessary.
+ */
+ if (!xdata)
+ xdata = dict_new();
+ else
+ xdata = dict_ref(xdata);
+
+ if (xdata && (IA_ISREG (loc->inode->ia_type) ||
+ (loc->inode->ia_type == IA_INVAL))) {
+ ret = stripe_xattr_request_build (this, xdata, 8, 4, 4, 0);
+ if (ret)
+ gf_log (this->name , GF_LOG_ERROR, "Failed to build"
+ " xattr request for %s", loc->path);
+
}
/* Everytime in stripe lookup, all child nodes
@@ -440,11 +351,12 @@ stripe_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
local->call_count = priv->child_count;
while (trav) {
STACK_WIND (frame, stripe_lookup_cbk, trav->xlator,
- trav->xlator->fops->lookup,
- loc, xattr_req);
+ trav->xlator->fops->lookup, loc, xdata);
trav = trav->next;
}
+ dict_unref(xdata);
+
return 0;
err:
STRIPE_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
@@ -454,7 +366,7 @@ err:
int32_t
stripe_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -489,6 +401,9 @@ stripe_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
local->stbuf_blocks += buf->ia_blocks;
+
+ correct_file_size(buf, local->fctx, prev);
+
if (local->stbuf_size < buf->ia_size)
local->stbuf_size = buf->ia_size;
}
@@ -505,18 +420,19 @@ stripe_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
STRIPE_STACK_UNWIND (stat, frame, local->op_ret,
- local->op_errno, &local->stbuf);
+ local->op_errno, &local->stbuf, NULL);
}
out:
return 0;
}
int32_t
-stripe_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
+stripe_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
int32_t op_errno = EINVAL;
VALIDATE_OR_GOTO (frame, err);
@@ -534,8 +450,7 @@ stripe_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -544,23 +459,30 @@ stripe_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
frame->local = local;
local->call_count = priv->child_count;
+ if (IA_ISREG(loc->inode->ia_type)) {
+ inode_ctx_get(loc->inode, this, (uint64_t *) &fctx);
+ if (!fctx)
+ goto err;
+ local->fctx = fctx;
+ }
+
while (trav) {
STACK_WIND (frame, stripe_stat_cbk, trav->xlator,
- trav->xlator->fops->stat, loc);
+ trav->xlator->fops->stat, loc, NULL);
trav = trav->next;
}
return 0;
err:
- STRIPE_STACK_UNWIND (stat, frame, -1, op_errno, NULL);
+ STRIPE_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
return 0;
}
int32_t
stripe_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *stbuf)
+ int32_t op_ret, int32_t op_errno, struct statvfs *stbuf, dict_t *xdata)
{
stripe_local_t *local = NULL;
int32_t callcnt = 0;
@@ -598,14 +520,14 @@ stripe_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (!callcnt) {
STRIPE_STACK_UNWIND (statfs, frame, local->op_ret,
- local->op_errno, &local->statvfs_buf);
+ local->op_errno, &local->statvfs_buf, NULL);
}
out:
return 0;
}
int32_t
-stripe_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
+stripe_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
stripe_local_t *local = NULL;
xlator_list_t *trav = NULL;
@@ -620,8 +542,7 @@ stripe_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
priv = this->private;
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -633,13 +554,13 @@ stripe_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
local->call_count = priv->child_count;
while (trav) {
STACK_WIND (frame, stripe_statfs_cbk, trav->xlator,
- trav->xlator->fops->statfs, loc);
+ trav->xlator->fops->statfs, loc, NULL);
trav = trav->next;
}
return 0;
err:
- STRIPE_STACK_UNWIND (statfs, frame, -1, op_errno, NULL);
+ STRIPE_STACK_UNWIND (statfs, frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -648,7 +569,7 @@ err:
int32_t
stripe_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -686,6 +607,9 @@ stripe_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->prebuf_blocks += prebuf->ia_blocks;
local->postbuf_blocks += postbuf->ia_blocks;
+ correct_file_size(prebuf, local->fctx, prev);
+ correct_file_size(postbuf, local->fctx, prev);
+
if (local->prebuf_size < prebuf->ia_size)
local->prebuf_size = prebuf->ia_size;
@@ -708,19 +632,21 @@ stripe_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
STRIPE_STACK_UNWIND (truncate, frame, local->op_ret,
local->op_errno, &local->pre_buf,
- &local->post_buf);
+ &local->post_buf, NULL);
}
out:
return 0;
}
int32_t
-stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
+stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, dict_t *xdata)
{
- xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
int32_t op_errno = EINVAL;
+ int i, eof_idx;
+ off_t dest_offset, tmp_offset;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -729,7 +655,6 @@ stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
VALIDATE_OR_GOTO (loc->inode, err);
priv = this->private;
- trav = this->children;
if (priv->first_child_down) {
op_errno = ENOTCONN;
@@ -737,8 +662,7 @@ stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -747,15 +671,55 @@ stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
frame->local = local;
local->call_count = priv->child_count;
- while (trav) {
- STACK_WIND (frame, stripe_truncate_cbk, trav->xlator,
- trav->xlator->fops->truncate, loc, offset);
- trav = trav->next;
- }
+ inode_ctx_get(loc->inode, this, (uint64_t *) &fctx);
+ if (!fctx) {
+ gf_log(this->name, GF_LOG_ERROR, "no stripe context");
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->fctx = fctx;
+ eof_idx = (offset / fctx->stripe_size) % fctx->stripe_count;
+
+ for (i = 0; i < fctx->stripe_count; i++) {
+ if (!fctx->xl_array[i]) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "no xlator at index %d", i);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (fctx->stripe_coalesce) {
+ /*
+ * The node that owns EOF is truncated to the exact
+ * coalesced offset. Nodes prior to this index should
+ * be rounded up to the size of the complete stripe,
+ * while nodes after this index should be rounded down
+ * to the size of the previous stripe.
+ */
+ if (i < eof_idx)
+ tmp_offset = roof(offset, fctx->stripe_size *
+ fctx->stripe_count);
+ else if (i > eof_idx)
+ tmp_offset = floor(offset, fctx->stripe_size *
+ fctx->stripe_count);
+ else
+ tmp_offset = offset;
+
+ dest_offset = coalesced_offset(tmp_offset,
+ fctx->stripe_size, fctx->stripe_count);
+ } else {
+ dest_offset = offset;
+ }
+
+ STACK_WIND(frame, stripe_truncate_cbk, fctx->xl_array[i],
+ fctx->xl_array[i]->fops->truncate, loc, dest_offset,
+ NULL);
+ }
return 0;
err:
- STRIPE_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL);
+ STRIPE_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -763,7 +727,7 @@ err:
int32_t
stripe_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -802,6 +766,9 @@ stripe_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->prebuf_blocks += preop->ia_blocks;
local->postbuf_blocks += postop->ia_blocks;
+ correct_file_size(preop, local->fctx, prev);
+ correct_file_size(postop, local->fctx, prev);
+
if (local->prebuf_size < preop->ia_size)
local->prebuf_size = preop->ia_size;
if (local->postbuf_size < postop->ia_size)
@@ -823,7 +790,7 @@ stripe_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
STRIPE_STACK_UNWIND (setattr, frame, local->op_ret,
local->op_errno, &local->pre_buf,
- &local->post_buf);
+ &local->post_buf, NULL);
}
out:
return 0;
@@ -832,11 +799,12 @@ out:
int32_t
stripe_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid)
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
int32_t op_errno = EINVAL;
VALIDATE_OR_GOTO (frame, err);
@@ -854,8 +822,7 @@ stripe_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -867,28 +834,35 @@ stripe_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
local->call_count = 1;
STACK_WIND (frame, stripe_setattr_cbk, FIRST_CHILD (this),
FIRST_CHILD (this)->fops->setattr,
- loc, stbuf, valid);
+ loc, stbuf, valid, NULL);
return 0;
}
+ if (IA_ISREG(loc->inode->ia_type)) {
+ inode_ctx_get(loc->inode, this, (uint64_t *) &fctx);
+ if (!fctx)
+ goto err;
+ local->fctx = fctx;
+ }
+
local->call_count = priv->child_count;
while (trav) {
STACK_WIND (frame, stripe_setattr_cbk,
trav->xlator, trav->xlator->fops->setattr,
- loc, stbuf, valid);
+ loc, stbuf, valid, NULL);
trav = trav->next;
}
return 0;
err:
- STRIPE_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL);
+ STRIPE_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
int32_t
stripe_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid)
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
@@ -904,8 +878,7 @@ stripe_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
trav = this->children;
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -916,13 +889,13 @@ stripe_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
while (trav) {
STACK_WIND (frame, stripe_setattr_cbk, trav->xlator,
- trav->xlator->fops->fsetattr, fd, stbuf, valid);
+ trav->xlator->fops->fsetattr, fd, stbuf, valid, NULL);
trav = trav->next;
}
return 0;
err:
- STRIPE_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL);
+ STRIPE_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -930,7 +903,8 @@ int32_t
stripe_stack_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -967,6 +941,8 @@ stripe_stack_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->pre_buf.ia_blocks += prenewparent->ia_blocks;
local->post_buf.ia_blocks += postnewparent->ia_blocks;
+ correct_file_size(buf, local->fctx, prev);
+
if (local->stbuf.ia_size < buf->ia_size)
local->stbuf.ia_size = buf->ia_size;
@@ -992,7 +968,7 @@ stripe_stack_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
STRIPE_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
&local->stbuf, &local->preparent,
&local->postparent, &local->pre_buf,
- &local->post_buf);
+ &local->post_buf, NULL);
}
out:
return 0;
@@ -1002,7 +978,8 @@ int32_t
stripe_first_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
stripe_local_t *local = NULL;
xlator_list_t *trav = NULL;
@@ -1033,24 +1010,25 @@ stripe_first_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
while (trav) {
STACK_WIND (frame, stripe_stack_rename_cbk,
trav->xlator, trav->xlator->fops->rename,
- &local->loc, &local->loc2);
+ &local->loc, &local->loc2, NULL);
trav = trav->next;
}
return 0;
unwind:
STRIPE_STACK_UNWIND (rename, frame, -1, op_errno, buf, preoldparent,
- postoldparent, prenewparent, postnewparent);
+ postoldparent, prenewparent, postnewparent, NULL);
return 0;
}
int32_t
stripe_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc)
+ loc_t *newloc, dict_t *xdata)
{
stripe_private_t *priv = NULL;
stripe_local_t *local = NULL;
xlator_list_t *trav = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
int32_t op_errno = EINVAL;
VALIDATE_OR_GOTO (frame, err);
@@ -1070,33 +1048,40 @@ stripe_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
+
+ frame->local = local;
+
local->op_ret = -1;
loc_copy (&local->loc, oldloc);
loc_copy (&local->loc2, newloc);
local->call_count = priv->child_count;
- frame->local = local;
+ if (IA_ISREG(oldloc->inode->ia_type)) {
+ inode_ctx_get(oldloc->inode, this, (uint64_t *) &fctx);
+ if (!fctx)
+ goto err;
+ local->fctx = fctx;
+ }
STACK_WIND (frame, stripe_first_rename_cbk, trav->xlator,
- trav->xlator->fops->rename, oldloc, newloc);
+ trav->xlator->fops->rename, oldloc, newloc, NULL);
return 0;
err:
STRIPE_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL,
- NULL, NULL);
+ NULL, NULL, NULL);
return 0;
}
int32_t
stripe_first_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
stripe_local_t *local = NULL;
call_frame_t *prev = NULL;
@@ -1121,10 +1106,10 @@ stripe_first_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->postparent_blocks += postparent->ia_blocks;
STRIPE_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent);
+ &local->preparent, &local->postparent, xdata);
return 0;
out:
- STRIPE_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL);
+ STRIPE_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -1135,7 +1120,7 @@ out:
int32_t
stripe_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -1171,17 +1156,19 @@ stripe_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
STACK_WIND(frame, stripe_first_unlink_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->unlink, &local->loc);
+ FIRST_CHILD (this)->fops->unlink, &local->loc,
+ local->xflag, local->xdata);
}
return 0;
out:
- STRIPE_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL);
+ STRIPE_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
int32_t
-stripe_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
+stripe_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int xflag, dict_t *xdata)
{
xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
@@ -1209,14 +1196,18 @@ stripe_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
local->op_ret = -1;
loc_copy (&local->loc, loc);
+ local->xflag = xflag;
+
+ if (xdata)
+ local->xdata = dict_ref (xdata);
+
frame->local = local;
local->call_count = priv->child_count;
trav = trav->next; /* Skip the first child */
@@ -1224,13 +1215,13 @@ stripe_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
while (trav) {
STACK_WIND (frame, stripe_unlink_cbk,
trav->xlator, trav->xlator->fops->unlink,
- loc);
+ loc, xflag, xdata);
trav = trav->next;
}
return 0;
err:
- STRIPE_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL);
+ STRIPE_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -1238,8 +1229,7 @@ err:
int32_t
stripe_first_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,struct iatt *preparent,
- struct iatt *postparent)
-
+ struct iatt *postparent, dict_t *xdata)
{
stripe_local_t *local = NULL;
@@ -1266,10 +1256,10 @@ stripe_first_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->postparent_blocks += postparent->ia_blocks;
STRIPE_STACK_UNWIND (rmdir, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent);
+ &local->preparent, &local->postparent, xdata);
return 0;
err:
- STRIPE_STACK_UNWIND (rmdir, frame, op_ret, op_errno, NULL, NULL);
+ STRIPE_STACK_UNWIND (rmdir, frame, op_ret, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -1277,7 +1267,7 @@ err:
int32_t
stripe_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -1309,16 +1299,16 @@ stripe_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
STACK_WIND (frame, stripe_first_rmdir_cbk, FIRST_CHILD (this),
FIRST_CHILD (this)->fops->rmdir, &local->loc,
- local->flags);
+ local->flags, NULL);
}
return 0;
out:
- STRIPE_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL);
+ STRIPE_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
int32_t
-stripe_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)
+stripe_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, dict_t *xdata)
{
xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
@@ -1341,8 +1331,7 @@ stripe_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -1356,13 +1345,13 @@ stripe_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)
while (trav) {
STACK_WIND (frame, stripe_rmdir_cbk, trav->xlator,
- trav->xlator->fops->rmdir, loc, flags);
+ trav->xlator->fops->rmdir, loc, flags, NULL);
trav = trav->next;
}
return 0;
err:
- STRIPE_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL);
+ STRIPE_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -1371,7 +1360,7 @@ int32_t
stripe_mknod_ifreg_fail_unlink_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -1392,7 +1381,7 @@ stripe_mknod_ifreg_fail_unlink_cbk (call_frame_t *frame, void *cookie,
if (!callcnt) {
STRIPE_STACK_UNWIND (mknod, frame, local->op_ret, local->op_errno,
local->inode, &local->stbuf,
- &local->preparent, &local->postparent);
+ &local->preparent, &local->postparent, NULL);
}
out:
return 0;
@@ -1404,7 +1393,7 @@ out:
int32_t
stripe_mknod_ifreg_setxattr_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
- int32_t op_errno)
+ int32_t op_errno, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -1443,7 +1432,7 @@ stripe_mknod_ifreg_setxattr_cbk (call_frame_t *frame, void *cookie,
stripe_mknod_ifreg_fail_unlink_cbk,
trav->xlator,
trav->xlator->fops->unlink,
- &local->loc);
+ &local->loc, 0, NULL);
trav = trav->next;
}
return 0;
@@ -1451,7 +1440,7 @@ stripe_mknod_ifreg_setxattr_cbk (call_frame_t *frame, void *cookie,
STRIPE_STACK_UNWIND (mknod, frame, local->op_ret, local->op_errno,
local->inode, &local->stbuf,
- &local->preparent, &local->postparent);
+ &local->preparent, &local->postparent, NULL);
}
out:
return 0;
@@ -1461,7 +1450,7 @@ int32_t
stripe_mknod_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -1499,10 +1488,16 @@ stripe_mknod_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (uuid_is_null (local->ia_gfid))
uuid_copy (local->ia_gfid, buf->ia_gfid);
+ if (stripe_ctx_handle(this, prev, local, xdata))
+ gf_log(this->name, GF_LOG_ERROR,
+ "Error getting fctx info from dict");
+
local->stbuf_blocks += buf->ia_blocks;
local->preparent_blocks += preparent->ia_blocks;
local->postparent_blocks += postparent->ia_blocks;
+ correct_file_size(buf, local->fctx, prev);
+
if (local->stbuf_size < buf->ia_size)
local->stbuf_size = buf->ia_size;
if (local->preparent_size < preparent->ia_size)
@@ -1527,7 +1522,7 @@ stripe_mknod_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
stripe_mknod_ifreg_fail_unlink_cbk,
trav->xlator,
trav->xlator->fops->unlink,
- &local->loc);
+ &local->loc, 0, NULL);
trav = trav->next;
}
return 0;
@@ -1541,13 +1536,13 @@ stripe_mknod_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->postparent.ia_size = local->postparent_size;
local->stbuf.ia_size = local->stbuf_size;
local->stbuf.ia_blocks = local->stbuf_blocks;
- }
+ inode_ctx_put (local->inode, this,
+ (uint64_t)(long) local->fctx);
- /* Create itself has failed.. so return
- without setxattring */
+ }
STRIPE_STACK_UNWIND (mknod, frame, local->op_ret, local->op_errno,
local->inode, &local->stbuf,
- &local->preparent, &local->postparent);
+ &local->preparent, &local->postparent, NULL);
}
out:
return 0;
@@ -1558,16 +1553,13 @@ int32_t
stripe_mknod_first_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
call_frame_t *prev = NULL;
xlator_list_t *trav = NULL;
int i = 1;
- char size_key[256] = {0,};
- char index_key[256] = {0,};
- char count_key[256] = {0,};
dict_t *dict = NULL;
int ret = 0;
int need_unref = 0;
@@ -1607,10 +1599,6 @@ stripe_mknod_first_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->stbuf.ia_size = local->stbuf_size;
local->stbuf.ia_blocks = local->stbuf_blocks;
- sprintf (size_key, "trusted.%s.stripe-size", this->name);
- sprintf (count_key, "trusted.%s.stripe-count", this->name);
- sprintf (index_key, "trusted.%s.stripe-index", this->name);
-
trav = trav->next;
while (trav) {
if (priv->xattr_supported) {
@@ -1623,26 +1611,21 @@ stripe_mknod_first_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
dict_copy (local->xattr, dict);
- ret = dict_set_int64 (dict, size_key, local->stripe_size);
+ ret = stripe_xattr_request_build (this, dict,
+ local->stripe_size,
+ priv->child_count, i,
+ priv->coalesce);
if (ret)
gf_log (this->name, GF_LOG_ERROR,
- "%s: set stripe-size failed", local->loc.path);
- ret = dict_set_int32 (dict, count_key, priv->child_count);
+ "Failed to build xattr request");
- if (ret)
- gf_log (this->name, GF_LOG_ERROR,
- "%s: set child_count failed", local->loc.path);
- ret = dict_set_int32 (dict, index_key, i);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR,
- "%s: set stripe-index failed", local->loc.path);
} else {
dict = local->xattr;
}
STACK_WIND (frame, stripe_mknod_ifreg_cbk,
trav->xlator, trav->xlator->fops->mknod,
- &local->loc, local->mode, local->rdev, dict);
+ &local->loc, local->mode, local->rdev, 0, dict);
trav = trav->next;
i++;
@@ -1654,7 +1637,7 @@ stripe_mknod_first_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
out:
- STRIPE_STACK_UNWIND (mknod, frame, op_ret, op_errno, NULL, NULL, NULL, NULL);
+ STRIPE_STACK_UNWIND (mknod, frame, op_ret, op_errno, NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -1663,25 +1646,22 @@ int32_t
stripe_single_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
STRIPE_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
+ preparent, postparent, xdata);
return 0;
}
int
stripe_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t rdev, dict_t *params)
+ dev_t rdev, mode_t umask, dict_t *xdata)
{
stripe_private_t *priv = NULL;
stripe_local_t *local = NULL;
int32_t op_errno = EINVAL;
int32_t i = 0;
- char size_key[256] = {0,};
- char index_key[256] = {0,};
- char count_key[256] = {0,};
dict_t *dict = NULL;
int ret = 0;
int need_unref = 0;
@@ -1711,37 +1691,26 @@ stripe_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
local->op_ret = -1;
local->op_errno = ENOTCONN;
- local->stripe_size = stripe_get_matching_bs (loc->path,
- priv->pattern,
- priv->block_size);
+ local->stripe_size = stripe_get_matching_bs (loc->path, priv);
frame->local = local;
local->inode = inode_ref (loc->inode);
loc_copy (&local->loc, loc);
- local->xattr = dict_copy_with_ref (params, NULL);
+ local->xattr = dict_copy_with_ref (xdata, NULL);
local->mode = mode;
+ local->umask = umask;
local->rdev = rdev;
/* Everytime in stripe lookup, all child nodes should
be looked up */
local->call_count = priv->child_count;
- /* Send a setxattr request to nodes where the
- files are created */
- sprintf (size_key,
- "trusted.%s.stripe-size", this->name);
- sprintf (count_key,
- "trusted.%s.stripe-count", this->name);
- sprintf (index_key,
- "trusted.%s.stripe-index", this->name);
-
if (priv->xattr_supported) {
dict = dict_new ();
if (!dict) {
@@ -1750,29 +1719,22 @@ stripe_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
}
need_unref = 1;
- dict_copy (params, dict);
+ dict_copy (xdata, dict);
- ret = dict_set_int64 (dict, size_key,
- local->stripe_size);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR,
- "%s: set stripe-size failed", loc->path);
- ret = dict_set_int32 (dict, count_key,
- priv->child_count);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR,
- "%s: set child_count failed", loc->path);
- ret = dict_set_int32 (dict, index_key, i);
+ ret = stripe_xattr_request_build (this, dict,
+ local->stripe_size,
+ priv->child_count,
+ i, priv->coalesce);
if (ret)
gf_log (this->name, GF_LOG_ERROR,
- "%s: set stripe-index failed", loc->path);
+ "failed to build xattr request");
} else {
- dict = params;
+ dict = xdata;
}
STACK_WIND (frame, stripe_mknod_first_ifreg_cbk,
FIRST_CHILD (this), FIRST_CHILD (this)->fops->mknod,
- loc, mode, rdev, dict);
+ loc, mode, rdev, umask, dict);
if (dict && need_unref)
dict_unref (dict);
@@ -1781,11 +1743,11 @@ stripe_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
STACK_WIND (frame, stripe_single_mknod_cbk,
FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod,
- loc, mode, rdev, params);
+ loc, mode, rdev, umask, xdata);
return 0;
err:
- STRIPE_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ STRIPE_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -1794,7 +1756,7 @@ int32_t
stripe_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -1851,7 +1813,7 @@ stripe_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
STRIPE_STACK_UNWIND (mkdir, frame, local->op_ret,
local->op_errno, local->inode,
&local->stbuf, &local->preparent,
- &local->postparent);
+ &local->postparent, NULL);
}
out:
return 0;
@@ -1860,9 +1822,9 @@ out:
int32_t
stripe_first_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
stripe_local_t *local = NULL;
call_frame_t *prev = NULL;
@@ -1893,7 +1855,7 @@ stripe_first_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->stbuf = *buf;
local->postparent = *postparent;
local->preparent = *preparent;
-
+
local->stbuf_blocks += buf->ia_blocks;
local->preparent_blocks += preparent->ia_blocks;
local->postparent_blocks += postparent->ia_blocks;
@@ -1905,13 +1867,13 @@ stripe_first_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
while (trav) {
STACK_WIND (frame, stripe_mkdir_cbk, trav->xlator,
trav->xlator->fops->mkdir, &local->loc, local->mode,
- local->dict);
+ local->umask, local->xdata);
trav = trav->next;
}
return 0;
out:
STRIPE_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL,
- NULL);
+ NULL, NULL);
return 0;
@@ -1920,7 +1882,7 @@ out:
int
stripe_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dict_t *params)
+ mode_t umask, dict_t *xdata)
{
stripe_private_t *priv = NULL;
stripe_local_t *local = NULL;
@@ -1942,26 +1904,27 @@ stripe_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
local->op_ret = -1;
local->call_count = priv->child_count;
- local->dict = dict_ref (params);
- local->mode = mode;
+ if (xdata)
+ local->xdata = dict_ref (xdata);
+ local->mode = mode;
+ local->umask = umask;
loc_copy (&local->loc, loc);
frame->local = local;
/* Everytime in stripe lookup, all child nodes should be looked up */
STACK_WIND (frame, stripe_first_mkdir_cbk, trav->xlator,
- trav->xlator->fops->mkdir, loc, mode, params);
+ trav->xlator->fops->mkdir, loc, mode, umask, xdata);
return 0;
err:
- STRIPE_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ STRIPE_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -1970,11 +1933,12 @@ int32_t
stripe_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
call_frame_t *prev = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
if (!this || !frame || !frame->local || !cookie) {
gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
@@ -2001,6 +1965,16 @@ stripe_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret >= 0) {
local->op_ret = 0;
+ if (IA_ISREG(inode->ia_type)) {
+ inode_ctx_get(inode, this, (uint64_t *) &fctx);
+ if (!fctx) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to get stripe context");
+ op_ret = -1;
+ op_errno = EINVAL;
+ }
+ }
+
if (FIRST_CHILD(this) == prev->this) {
local->inode = inode_ref (inode);
local->stbuf = *buf;
@@ -2011,6 +1985,8 @@ stripe_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->preparent_blocks += preparent->ia_blocks;
local->postparent_blocks += postparent->ia_blocks;
+ correct_file_size(buf, fctx, prev);
+
if (local->stbuf_size < buf->ia_size)
local->stbuf_size = buf->ia_size;
if (local->preparent_size < preparent->ia_size)
@@ -2036,14 +2012,14 @@ stripe_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
STRIPE_STACK_UNWIND (link, frame, local->op_ret,
local->op_errno, local->inode,
&local->stbuf, &local->preparent,
- &local->postparent);
+ &local->postparent, NULL);
}
out:
return 0;
}
int32_t
-stripe_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
+stripe_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
@@ -2066,8 +2042,7 @@ stripe_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -2081,13 +2056,13 @@ stripe_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
while (trav) {
STACK_WIND (frame, stripe_link_cbk,
trav->xlator, trav->xlator->fops->link,
- oldloc, newloc);
+ oldloc, newloc, NULL);
trav = trav->next;
}
return 0;
err:
- STRIPE_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ STRIPE_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -2095,7 +2070,7 @@ int32_t
stripe_create_fail_unlink_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -2116,7 +2091,7 @@ stripe_create_fail_unlink_cbk (call_frame_t *frame, void *cookie,
if (!callcnt) {
STRIPE_STACK_UNWIND (create, frame, local->op_ret, local->op_errno,
local->fd, local->inode, &local->stbuf,
- &local->preparent, &local->postparent);
+ &local->preparent, &local->postparent, NULL);
}
out:
return 0;
@@ -2127,12 +2102,11 @@ int32_t
stripe_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, fd_t *fd,
inode_t *inode, struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
- stripe_fd_ctx_t *fctx = NULL;
call_frame_t *prev = NULL;
xlator_list_t *trav = NULL;
@@ -2158,12 +2132,21 @@ stripe_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
if (op_ret >= 0) {
+ if (IA_ISREG(buf->ia_type)) {
+ if (stripe_ctx_handle(this, prev, local, xdata))
+ gf_log(this->name, GF_LOG_ERROR,
+ "Error getting fctx info from "
+ "dict");
+ }
+
local->op_ret = op_ret;
local->stbuf_blocks += buf->ia_blocks;
local->preparent_blocks += preparent->ia_blocks;
local->postparent_blocks += postparent->ia_blocks;
+ correct_file_size(buf, local->fctx, prev);
+
if (local->stbuf_size < buf->ia_size)
local->stbuf_size = buf->ia_size;
if (local->preparent_size < preparent->ia_size)
@@ -2186,7 +2169,7 @@ stripe_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
stripe_create_fail_unlink_cbk,
trav->xlator,
trav->xlator->fops->unlink,
- &local->loc);
+ &local->loc, 0, NULL);
trav = trav->next;
}
@@ -2201,29 +2184,19 @@ stripe_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->stbuf.ia_size = local->stbuf_size;
local->stbuf.ia_blocks = local->stbuf_blocks;
- fctx = GF_CALLOC (1, sizeof (stripe_fd_ctx_t),
- gf_stripe_mt_stripe_fd_ctx_t);
- if (!fctx) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- goto unwind;
- }
-
- fctx->stripe_size = local->stripe_size;
- fctx->stripe_count = priv->child_count;
- fctx->static_array = 1;
- fctx->xl_array = priv->xl_array;
- fd_ctx_set (local->fd, this,
- (uint64_t)(long)fctx);
+ stripe_copy_xl_array(local->fctx->xl_array,
+ priv->xl_array,
+ local->fctx->stripe_count);
+ inode_ctx_put(local->inode, this,
+ (uint64_t) local->fctx);
}
- unwind:
/* Create itself has failed.. so return
without setxattring */
STRIPE_STACK_UNWIND (create, frame, local->op_ret,
local->op_errno, local->fd,
local->inode, &local->stbuf,
- &local->preparent, &local->postparent);
+ &local->preparent, &local->postparent, NULL);
}
out:
@@ -2236,7 +2209,7 @@ int32_t
stripe_first_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, fd_t *fd,
inode_t *inode, struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
@@ -2247,9 +2220,6 @@ stripe_first_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
loc_t *loc = NULL;
int32_t need_unref = 0;
int32_t ret = -1;
- char size_key[256] = {0,};
- char index_key[256] = {0,};
- char count_key[256] = {0,};
if (!this || !frame || !frame->local || !cookie) {
gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
@@ -2296,7 +2266,7 @@ stripe_first_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->call_count = 1;
STACK_WIND (frame, stripe_create_fail_unlink_cbk,
FIRST_CHILD (this), FIRST_CHILD (this)->fops->unlink,
- &local->loc);
+ &local->loc, 0, NULL);
return 0;
}
@@ -2311,9 +2281,6 @@ stripe_first_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
/* Send a setxattr request to nodes where the
files are created */
- sprintf (size_key, "trusted.%s.stripe-size", this->name);
- sprintf (count_key, "trusted.%s.stripe-count", this->name);
- sprintf (index_key, "trusted.%s.stripe-index", this->name);
trav = trav->next;
while (trav) {
if (priv->xattr_supported) {
@@ -2326,27 +2293,20 @@ stripe_first_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
dict_copy (local->xattr, dict);
- ret = dict_set_int64 (dict, size_key,
- local->stripe_size);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR,
- "%s: set stripe-size failed", loc->path);
- ret = dict_set_int32 (dict, count_key,
- priv->child_count);
+ ret = stripe_xattr_request_build (this, dict,
+ local->stripe_size,
+ priv->child_count,
+ i, priv->coalesce);
if (ret)
gf_log (this->name, GF_LOG_ERROR,
- "%s: set child_count failed", loc->path);
- ret = dict_set_int32 (dict, index_key, i);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR,
- "%s: set stripe-index failed", loc->path);
+ "failed to build xattr request");
} else {
dict = local->xattr;
}
-
+
STACK_WIND (frame, stripe_create_cbk, trav->xlator,
trav->xlator->fops->create, &local->loc,
- local->flags, local->mode, local->fd,
+ local->flags, local->mode, local->umask, local->fd,
dict);
trav = trav->next;
if (need_unref && dict)
@@ -2368,7 +2328,7 @@ out:
*/
int32_t
stripe_create (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, mode_t mode, fd_t *fd, dict_t *params)
+ int32_t flags, mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
stripe_private_t *priv = NULL;
stripe_local_t *local = NULL;
@@ -2376,9 +2336,6 @@ stripe_create (call_frame_t *frame, xlator_t *this, loc_t *loc,
int ret = 0;
int need_unref = 0;
int i = 0;
- char size_key[256] = {0,};
- char index_key[256] = {0,};
- char count_key[256] = {0,};
dict_t *dict = NULL;
VALIDATE_OR_GOTO (frame, err);
@@ -2400,31 +2357,27 @@ stripe_create (call_frame_t *frame, xlator_t *this, loc_t *loc,
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
local->op_ret = -1;
local->op_errno = ENOTCONN;
- local->stripe_size = stripe_get_matching_bs (loc->path,
- priv->pattern,
- priv->block_size);
+ local->stripe_size = stripe_get_matching_bs (loc->path, priv);
frame->local = local;
local->inode = inode_ref (loc->inode);
loc_copy (&local->loc, loc);
local->fd = fd_ref (fd);
local->flags = flags;
local->mode = mode;
- local->xattr = dict_copy_with_ref (params, NULL);
+ local->umask = umask;
+ if (xdata)
+ local->xattr = dict_ref (xdata);
local->call_count = priv->child_count;
/* Send a setxattr request to nodes where the
files are created */
- sprintf (size_key, "trusted.%s.stripe-size", this->name);
- sprintf (count_key, "trusted.%s.stripe-count", this->name);
- sprintf (index_key, "trusted.%s.stripe-index", this->name);
if (priv->xattr_supported) {
dict = dict_new ();
@@ -2434,30 +2387,23 @@ stripe_create (call_frame_t *frame, xlator_t *this, loc_t *loc,
}
need_unref = 1;
- dict_copy (params, dict);
+ dict_copy (xdata, dict);
- ret = dict_set_int64 (dict, size_key,
- local->stripe_size);
+ ret = stripe_xattr_request_build (this, dict,
+ local->stripe_size,
+ priv->child_count,
+ i, priv->coalesce);
if (ret)
gf_log (this->name, GF_LOG_ERROR,
- "%s: set stripe-size failed", loc->path);
- ret = dict_set_int32 (dict, count_key,
- priv->child_count);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR,
- "%s: set child_count failed", loc->path);
- ret = dict_set_int32 (dict, index_key, i);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR,
- "%s: set stripe-index failed", loc->path);
+ "failed to build xattr request");
} else {
- dict = params;
+ dict = xdata;
}
STACK_WIND (frame, stripe_first_create_cbk, FIRST_CHILD (this),
FIRST_CHILD (this)->fops->create, loc, flags, mode,
- fd, dict);
+ umask, fd, dict);
if (need_unref && dict)
dict_unref (dict);
@@ -2466,13 +2412,13 @@ stripe_create (call_frame_t *frame, xlator_t *this, loc_t *loc,
return 0;
err:
STRIPE_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL,
- NULL, NULL);
+ NULL, NULL, xdata);
return 0;
}
int32_t
stripe_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -2510,224 +2456,25 @@ stripe_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (local->failed)
local->op_ret = -1;
- if (local->op_ret == -1) {
- if (local->fctx) {
- if (!local->fctx->static_array)
- GF_FREE (local->fctx->xl_array);
- GF_FREE (local->fctx);
- }
- } else {
- fd_ctx_set (local->fd, this,
- (uint64_t)(long)local->fctx);
- }
-
STRIPE_STACK_UNWIND (open, frame, local->op_ret,
- local->op_errno, local->fd);
+ local->op_errno, local->fd, xdata);
}
out:
return 0;
}
-int32_t
-stripe_open_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *dict, struct iatt *postparent)
-{
- int32_t index = 0;
- int32_t callcnt = 0;
- char key[256] = {0,};
- stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- stripe_private_t *priv = NULL;
- data_t *data = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = (call_frame_t *)cookie;
- priv = this->private;
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s returned error %s",
- prev->this->name, strerror (op_errno));
- local->op_ret = -1;
- if (local->op_errno != EIO)
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) ||
- (prev->this == FIRST_CHILD (this)))
- local->failed = 1;
- goto unlock;
- }
-
- if (!dict)
- goto unlock;
-
- if (!local->fctx) {
- local->fctx = GF_CALLOC (1, sizeof (stripe_fd_ctx_t),
- gf_stripe_mt_stripe_fd_ctx_t);
- if (!local->fctx) {
- local->op_errno = ENOMEM;
- local->op_ret = -1;
- goto unlock;
- }
-
- local->fctx->static_array = 0;
- }
- /* Stripe block size */
- sprintf (key, "trusted.%s.stripe-size", this->name);
- data = dict_get (dict, key);
- if (!data) {
- local->xattr_self_heal_needed = 1;
- } else {
- if (!local->fctx->stripe_size) {
- local->fctx->stripe_size =
- data_to_int64 (data);
- }
-
- if (local->fctx->stripe_size != data_to_int64 (data)) {
- gf_log (this->name, GF_LOG_WARNING,
- "stripe-size mismatch in blocks");
- local->xattr_self_heal_needed = 1;
- }
- }
- /* Stripe count */
- sprintf (key, "trusted.%s.stripe-count", this->name);
- data = dict_get (dict, key);
- if (!data) {
- local->xattr_self_heal_needed = 1;
- goto unlock;
- }
- if (!local->fctx->xl_array) {
- local->fctx->stripe_count = data_to_int32 (data);
- if (!local->fctx->stripe_count) {
- gf_log (this->name, GF_LOG_ERROR,
- "error with stripe-count xattr");
- local->op_ret = -1;
- local->op_errno = EIO;
- goto unlock;
- }
-
- local->fctx->xl_array =
- GF_CALLOC (local->fctx->stripe_count,
- sizeof (xlator_t *),
- gf_stripe_mt_xlator_t);
- if (!local->fctx->xl_array) {
- local->op_errno = ENOMEM;
- local->op_ret = -1;
- goto unlock;
- }
- }
- if (local->fctx->stripe_count != data_to_int32 (data)) {
- gf_log (this->name, GF_LOG_ERROR,
- "error with stripe-count xattr (%d != %d)",
- local->fctx->stripe_count, data_to_int32 (data));
- local->op_ret = -1;
- local->op_errno = EIO;
- goto unlock;
- }
-
- /* index */
- sprintf (key, "trusted.%s.stripe-index", this->name);
- data = dict_get (dict, key);
- if (!data) {
- local->xattr_self_heal_needed = 1;
- goto unlock;
- }
- index = data_to_int32 (data);
- if (index > priv->child_count) {
- gf_log (this->name, GF_LOG_ERROR,
- "error with stripe-index xattr (%d)", index);
- local->op_ret = -1;
- local->op_errno = EIO;
- goto unlock;
- }
- if (local->fctx->xl_array) {
- if (local->fctx->xl_array[index]) {
- gf_log (this->name, GF_LOG_ERROR,
- "duplicate entry @ index (%d)", index);
- local->op_ret = -1;
- local->op_errno = EIO;
- goto unlock;
- }
- local->fctx->xl_array[index] = prev->this;
- }
- local->entry_count++;
- local->op_ret = 0;
- }
-unlock:
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- /* TODO: if self-heal flag is set, do it */
- if (local->xattr_self_heal_needed) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: stripe info need to be healed",
- local->loc.path);
- }
-
- if (local->failed)
- local->op_ret = -1;
-
- if (local->op_ret)
- goto err;
-
- if (local->entry_count != local->fctx->stripe_count) {
- gf_log (this->name, GF_LOG_ERROR,
- "entry-count (%d) != stripe-count (%d)",
- local->entry_count, local->fctx->stripe_count);
- local->op_ret = -1;
- local->op_errno = EIO;
- goto err;
- }
- if (!local->fctx->stripe_size) {
- gf_log (this->name, GF_LOG_ERROR, "stripe size not set");
- local->op_ret = -1;
- local->op_errno = EIO;
- goto err;
- }
-
- local->call_count = local->fctx->stripe_count;
-
- trav = this->children;
- while (trav) {
- STACK_WIND (frame, stripe_open_cbk, trav->xlator,
- trav->xlator->fops->open, &local->loc,
- local->flags, local->fd, 0);
- trav = trav->next;
- }
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND (open, frame, local->op_ret, local->op_errno,
- local->fd);
-out:
- return 0;
-}
/**
* stripe_open -
*/
int32_t
stripe_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, fd_t *fd, int32_t wbflags)
+ int32_t flags, fd_t *fd, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
xlator_list_t *trav = NULL;
int32_t op_errno = 1;
- dict_t *dict = NULL;
- int ret = 0;
- char key[256] = {0,};
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -2744,8 +2491,7 @@ stripe_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -2761,73 +2507,25 @@ stripe_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
/* Striped files */
local->flags = flags;
local->call_count = priv->child_count;
- local->stripe_size = stripe_get_matching_bs (loc->path,
- priv->pattern,
- priv->block_size);
-
- if (priv->xattr_supported) {
- dict = dict_new ();
- if (!dict)
- goto err;
-
- sprintf (key, "trusted.%s.stripe-size", this->name);
- ret = dict_set_int64 (dict, key, 8);
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "failed to set %s in xattr_req dict", key);
-
- sprintf (key, "trusted.%s.stripe-count", this->name);
- ret = dict_set_int32 (dict, key, 4);
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "failed to set %s in xattr_req dict", key);
-
- sprintf (key, "trusted.%s.stripe-index", this->name);
- ret = dict_set_int32 (dict, key, 4);
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "failed to set %s in xattr_req dict", key);
-
- while (trav) {
- STACK_WIND (frame, stripe_open_lookup_cbk,
- trav->xlator, trav->xlator->fops->lookup,
- loc, dict);
- trav = trav->next;
- }
- if (dict)
- dict_unref (dict);
-
- return 0;
- }
- local->fctx = GF_CALLOC (1, sizeof (stripe_fd_ctx_t),
- gf_stripe_mt_stripe_fd_ctx_t);
- if (!local->fctx) {
- op_errno = ENOMEM;
- goto err;
- }
-
- local->fctx->static_array = 1;
- local->fctx->stripe_size = local->stripe_size;
- local->fctx->stripe_count = priv->child_count;
- local->fctx->xl_array = priv->xl_array;
+ local->stripe_size = stripe_get_matching_bs (loc->path, priv);
while (trav) {
STACK_WIND (frame, stripe_open_cbk, trav->xlator,
trav->xlator->fops->open,
&local->loc, local->flags, local->fd,
- wbflags);
+ xdata);
trav = trav->next;
}
return 0;
err:
- STRIPE_STACK_UNWIND (open, frame, -1, op_errno, NULL);
+ STRIPE_STACK_UNWIND (open, frame, -1, op_errno, NULL, NULL);
return 0;
}
int32_t
stripe_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -2860,7 +2558,7 @@ stripe_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (!callcnt) {
STRIPE_STACK_UNWIND (opendir, frame, local->op_ret,
- local->op_errno, local->fd);
+ local->op_errno, local->fd, NULL);
}
out:
return 0;
@@ -2868,7 +2566,7 @@ out:
int32_t
-stripe_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
+stripe_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, dict_t *xdata)
{
xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
@@ -2890,8 +2588,7 @@ stripe_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -2902,19 +2599,19 @@ stripe_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
while (trav) {
STACK_WIND (frame, stripe_opendir_cbk, trav->xlator,
- trav->xlator->fops->opendir, loc, fd);
+ trav->xlator->fops->opendir, loc, fd, NULL);
trav = trav->next;
}
return 0;
err:
- STRIPE_STACK_UNWIND (opendir, frame, -1, op_errno, NULL);
+ STRIPE_STACK_UNWIND (opendir, frame, -1, op_errno, NULL, NULL);
return 0;
}
int32_t
stripe_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock)
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -2954,7 +2651,7 @@ stripe_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (local->failed)
local->op_ret = -1;
STRIPE_STACK_UNWIND (lk, frame, local->op_ret,
- local->op_errno, &local->lock);
+ local->op_errno, &local->lock, NULL);
}
out:
return 0;
@@ -2962,7 +2659,7 @@ out:
int32_t
stripe_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
- struct gf_flock *lock)
+ struct gf_flock *lock, dict_t *xdata)
{
stripe_local_t *local = NULL;
xlator_list_t *trav = NULL;
@@ -2978,8 +2675,7 @@ stripe_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
priv = this->private;
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -2990,20 +2686,20 @@ stripe_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
while (trav) {
STACK_WIND (frame, stripe_lk_cbk, trav->xlator,
- trav->xlator->fops->lk, fd, cmd, lock);
+ trav->xlator->fops->lk, fd, cmd, lock, NULL);
trav = trav->next;
}
return 0;
err:
- STRIPE_STACK_UNWIND (lk, frame, -1, op_errno, NULL);
+ STRIPE_STACK_UNWIND (lk, frame, -1, op_errno, NULL, NULL);
return 0;
}
int32_t
stripe_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -3040,14 +2736,14 @@ stripe_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->op_ret = -1;
STRIPE_STACK_UNWIND (flush, frame, local->op_ret,
- local->op_errno);
+ local->op_errno, NULL);
}
out:
return 0;
}
int32_t
-stripe_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
+stripe_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
@@ -3067,8 +2763,7 @@ stripe_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
goto err;
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -3079,13 +2774,13 @@ stripe_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
while (trav) {
STACK_WIND (frame, stripe_flush_cbk, trav->xlator,
- trav->xlator->fops->flush, fd);
+ trav->xlator->fops->flush, fd, NULL);
trav = trav->next;
}
return 0;
err:
- STRIPE_STACK_UNWIND (flush, frame, -1, op_errno);
+ STRIPE_STACK_UNWIND (flush, frame, -1, op_errno, NULL);
return 0;
}
@@ -3094,7 +2789,7 @@ err:
int32_t
stripe_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -3130,6 +2825,9 @@ stripe_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->prebuf_blocks += prebuf->ia_blocks;
local->postbuf_blocks += postbuf->ia_blocks;
+ correct_file_size(prebuf, local->fctx, prev);
+ correct_file_size(postbuf, local->fctx, prev);
+
if (local->prebuf_size < prebuf->ia_size)
local->prebuf_size = prebuf->ia_size;
@@ -3152,18 +2850,19 @@ stripe_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
STRIPE_STACK_UNWIND (fsync, frame, local->op_ret,
local->op_errno, &local->pre_buf,
- &local->post_buf);
+ &local->post_buf, NULL);
}
out:
return 0;
}
int32_t
-stripe_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
+stripe_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
xlator_list_t *trav = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
int32_t op_errno = 1;
VALIDATE_OR_GOTO (frame, err);
@@ -3175,31 +2874,38 @@ stripe_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
trav = this->children;
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
- local->op_ret = -1;
+
frame->local = local;
+
+ inode_ctx_get(fd->inode, this, (uint64_t *) &fctx);
+ if (!fctx) {
+ op_errno = EINVAL;
+ goto err;
+ }
+ local->fctx = fctx;
+ local->op_ret = -1;
local->call_count = priv->child_count;
while (trav) {
STACK_WIND (frame, stripe_fsync_cbk, trav->xlator,
- trav->xlator->fops->fsync, fd, flags);
+ trav->xlator->fops->fsync, fd, flags, NULL);
trav = trav->next;
}
return 0;
err:
- STRIPE_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL);
+ STRIPE_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
int32_t
stripe_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -3234,6 +2940,9 @@ stripe_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->stbuf = *buf;
local->stbuf_blocks += buf->ia_blocks;
+
+ correct_file_size(buf, local->fctx, prev);
+
if (local->stbuf_size < buf->ia_size)
local->stbuf_size = buf->ia_size;
}
@@ -3250,7 +2959,7 @@ stripe_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
STRIPE_STACK_UNWIND (fstat, frame, local->op_ret,
- local->op_errno, &local->stbuf);
+ local->op_errno, &local->stbuf, NULL);
}
out:
@@ -3260,11 +2969,12 @@ out:
int32_t
stripe_fstat (call_frame_t *frame,
xlator_t *this,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
xlator_list_t *trav = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
int32_t op_errno = 1;
VALIDATE_OR_GOTO (frame, err);
@@ -3276,8 +2986,7 @@ stripe_fstat (call_frame_t *frame,
trav = this->children;
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -3286,26 +2995,35 @@ stripe_fstat (call_frame_t *frame,
frame->local = local;
local->call_count = priv->child_count;
+ if (IA_ISREG(fd->inode->ia_type)) {
+ inode_ctx_get(fd->inode, this, (uint64_t *) &fctx);
+ if (!fctx)
+ goto err;
+ local->fctx = fctx;
+ }
+
while (trav) {
STACK_WIND (frame, stripe_fstat_cbk, trav->xlator,
- trav->xlator->fops->fstat, fd);
+ trav->xlator->fops->fstat, fd, NULL);
trav = trav->next;
}
return 0;
err:
- STRIPE_STACK_UNWIND (fstat, frame, -1, op_errno, NULL);
+ STRIPE_STACK_UNWIND (fstat, frame, -1, op_errno, NULL, NULL);
return 0;
}
int32_t
-stripe_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
+stripe_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- int32_t op_errno = 1;
+ stripe_fd_ctx_t *fctx = NULL;
+ int i, eof_idx;
+ off_t dest_offset, tmp_offset;
+ int32_t op_errno = 1;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -3313,11 +3031,9 @@ stripe_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
VALIDATE_OR_GOTO (fd->inode, err);
priv = this->private;
- trav = this->children;
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -3326,22 +3042,60 @@ stripe_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
frame->local = local;
local->call_count = priv->child_count;
- while (trav) {
- STACK_WIND (frame, stripe_truncate_cbk, trav->xlator,
- trav->xlator->fops->ftruncate, fd, offset);
- trav = trav->next;
- }
+ inode_ctx_get(fd->inode, this, (uint64_t *) &fctx);
+ if (!fctx) {
+ gf_log(this->name, GF_LOG_ERROR, "no stripe context");
+ op_errno = EINVAL;
+ goto err;
+ }
+ if (!fctx->stripe_count) {
+ gf_log(this->name, GF_LOG_ERROR, "no stripe count");
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->fctx = fctx;
+ eof_idx = (offset / fctx->stripe_size) % fctx->stripe_count;
+
+ for (i = 0; i < fctx->stripe_count; i++) {
+ if (!fctx->xl_array[i]) {
+ gf_log(this->name, GF_LOG_ERROR, "no xlator at index "
+ "%d", i);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (fctx->stripe_coalesce) {
+ if (i < eof_idx)
+ tmp_offset = roof(offset, fctx->stripe_size *
+ fctx->stripe_count);
+ else if (i > eof_idx)
+ tmp_offset = floor(offset, fctx->stripe_size *
+ fctx->stripe_count);
+ else
+ tmp_offset = offset;
+
+ dest_offset = coalesced_offset(tmp_offset,
+ fctx->stripe_size, fctx->stripe_count);
+ } else {
+ dest_offset = offset;
+ }
+
+ STACK_WIND(frame, stripe_truncate_cbk, fctx->xl_array[i],
+ fctx->xl_array[i]->fops->ftruncate, fd, dest_offset,
+ NULL);
+ }
return 0;
err:
- STRIPE_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL);
+ STRIPE_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
int32_t
stripe_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
@@ -3378,14 +3132,14 @@ stripe_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->op_ret = -1;
STRIPE_STACK_UNWIND (fsyncdir, frame, local->op_ret,
- local->op_errno);
+ local->op_errno, NULL);
}
out:
return 0;
}
int32_t
-stripe_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
+stripe_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
@@ -3401,8 +3155,7 @@ stripe_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
trav = this->children;
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -3413,20 +3166,20 @@ stripe_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
while (trav) {
STACK_WIND (frame, stripe_fsyncdir_cbk, trav->xlator,
- trav->xlator->fops->fsyncdir, fd, flags);
+ trav->xlator->fops->fsyncdir, fd, flags, NULL);
trav = trav->next;
}
return 0;
err:
- STRIPE_STACK_UNWIND (fsyncdir, frame, -1, op_errno);
+ STRIPE_STACK_UNWIND (fsyncdir, frame, -1, op_errno, NULL);
return 0;
}
int32_t
stripe_readv_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
int32_t i = 0;
int32_t callcnt = 0;
@@ -3436,6 +3189,7 @@ stripe_readv_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt tmp_stbuf = {0,};
struct iobref *tmp_iobref = NULL;
struct iobuf *iobuf = NULL;
+ call_frame_t *prev = NULL;
if (!this || !frame || !frame->local) {
gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
@@ -3443,13 +3197,16 @@ stripe_readv_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
local = frame->local;
+ prev = cookie;
LOCK (&frame->lock);
{
callcnt = --local->call_count;
- if (op_ret != -1)
+ if (op_ret != -1) {
+ correct_file_size(buf, local->fctx, prev);
if (local->stbuf_size < buf->ia_size)
local->stbuf_size = buf->ia_size;
+ }
}
UNLOCK (&frame->lock);
@@ -3478,7 +3235,8 @@ stripe_readv_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
vec[count].iov_len =
(local->replies[i].requested_size -
local->replies[i].op_ret);
- iobuf = iobuf_get (this->ctx->iobuf_pool);
+ iobuf = iobuf_get2 (this->ctx->iobuf_pool,
+ vec[count].iov_len);
if (!iobuf) {
gf_log (this->name, GF_LOG_ERROR,
"Out of memory.");
@@ -3487,9 +3245,11 @@ stripe_readv_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto done;
}
memset (iobuf->ptr, 0, vec[count].iov_len);
- iobref_add (local->iobref, iobuf);
vec[count].iov_base = iobuf->ptr;
+ iobref_add (local->iobref, iobuf);
+ iobuf_unref(iobuf);
+
op_ret += vec[count].iov_len;
count++;
}
@@ -3507,11 +3267,10 @@ stripe_readv_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
GF_FREE (local->replies);
tmp_iobref = local->iobref;
STRIPE_STACK_UNWIND (readv, frame, op_ret, op_errno, vec,
- count, &tmp_stbuf, tmp_iobref);
+ count, &tmp_stbuf, tmp_iobref, NULL);
iobref_unref (tmp_iobref);
- if (vec)
- GF_FREE (vec);
+ GF_FREE (vec);
}
out:
return 0;
@@ -3524,7 +3283,7 @@ out:
int32_t
stripe_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct iatt *stbuf, struct iobref *iobref)
+ int32_t count, struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
{
int32_t index = 0;
int32_t callcnt = 0;
@@ -3535,8 +3294,10 @@ stripe_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
stripe_local_t *local = NULL;
struct iovec *final_vec = NULL;
struct iatt tmp_stbuf = {0,};
+ struct iatt *tmp_stbuf_p = NULL; //need it for a warning
struct iobref *tmp_iobref = NULL;
stripe_fd_ctx_t *fctx = NULL;
+ call_frame_t *prev = NULL;
if (!this || !frame || !frame->local || !cookie) {
gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
@@ -3545,6 +3306,7 @@ stripe_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
index = local->node_index;
+ prev = cookie;
mframe = local->orig_frame;
if (!mframe)
goto out;
@@ -3564,6 +3326,9 @@ stripe_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
mlocal->replies[index].stbuf = *stbuf;
mlocal->replies[index].count = count;
mlocal->replies[index].vector = iov_dup (vector, count);
+
+ correct_file_size(stbuf, fctx, prev);
+
if (local->stbuf_size < stbuf->ia_size)
local->stbuf_size = stbuf->ia_size;
local->stbuf_blocks += stbuf->ia_blocks;
@@ -3632,13 +3397,13 @@ stripe_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
GF_FREE (mlocal->replies);
tmp_iobref = mlocal->iobref;
/* work around for nfs truncated read. Bug 3774 */
- WIPE (&tmp_stbuf);
+ tmp_stbuf_p = &tmp_stbuf;
+ WIPE (tmp_stbuf_p);
STRIPE_STACK_UNWIND (readv, mframe, op_ret, op_errno, final_vec,
- final_count, &tmp_stbuf, tmp_iobref);
+ final_count, &tmp_stbuf, tmp_iobref, NULL);
iobref_unref (tmp_iobref);
- if (final_vec)
- GF_FREE (final_vec);
+ GF_FREE (final_vec);
}
goto out;
@@ -3650,7 +3415,7 @@ check_size:
STACK_WIND (mframe, stripe_readv_fstat_cbk,
(fctx->xl_array[index]),
(fctx->xl_array[index])->fops->fstat,
- mlocal->fd);
+ mlocal->fd, NULL);
}
out:
@@ -3662,7 +3427,7 @@ end:
int32_t
stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t offset)
+ size_t size, off_t offset, uint32_t flags, dict_t *xdata)
{
int32_t op_errno = EINVAL;
int32_t idx = 0;
@@ -3675,6 +3440,7 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
uint64_t stripe_size = 0;
off_t rounded_start = 0;
off_t frame_offset = offset;
+ off_t dest_offset = 0;
stripe_local_t *local = NULL;
call_frame_t *rframe = NULL;
stripe_local_t *rlocal = NULL;
@@ -3685,7 +3451,7 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
VALIDATE_OR_GOTO (fd, err);
VALIDATE_OR_GOTO (fd->inode, err);
- fd_ctx_get (fd, this, &tmp_fctx);
+ inode_ctx_get (fd->inode, this, &tmp_fctx);
if (!tmp_fctx) {
op_errno = EBADFD;
goto err;
@@ -3693,6 +3459,8 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
stripe_size = fctx->stripe_size;
+ STRIPE_VALIDATE_FCTX (fctx, err);
+
if (!stripe_size) {
gf_log (this->name, GF_LOG_DEBUG,
"Wrong stripe size for the file");
@@ -3707,8 +3475,7 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
rounded_end = roof (offset+size, stripe_size);
num_stripe = (rounded_end- rounded_start)/stripe_size;
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -3716,8 +3483,8 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
frame->local = local;
/* This is where all the vectors should be copied. */
- local->replies = GF_CALLOC (num_stripe, sizeof (struct readv_replies),
- gf_stripe_mt_readv_replies);
+ local->replies = GF_CALLOC (num_stripe, sizeof (struct stripe_replies),
+ gf_stripe_mt_stripe_replies);
if (!local->replies) {
op_errno = ENOMEM;
goto err;
@@ -3732,8 +3499,7 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
for (index = off_index; index < (num_stripe + off_index); index++) {
rframe = copy_frame (frame);
- rlocal = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ rlocal = mem_get0 (this->local_pool);
if (!rlocal) {
op_errno = ENOMEM;
goto err;
@@ -3747,9 +3513,16 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
rlocal->readv_size = frame_size;
rframe->local = rlocal;
idx = (index % fctx->stripe_count);
+
+ if (fctx->stripe_coalesce)
+ dest_offset = coalesced_offset(frame_offset,
+ stripe_size, fctx->stripe_count);
+ else
+ dest_offset = frame_offset;
+
STACK_WIND (rframe, stripe_readv_cbk, fctx->xl_array[idx],
fctx->xl_array[idx]->fops->readv,
- fd, frame_size, frame_offset);
+ fd, frame_size, dest_offset, flags, xdata);
frame_offset += frame_size;
}
@@ -3759,7 +3532,7 @@ err:
if (rframe)
STRIPE_STACK_DESTROY (rframe);
- STRIPE_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL);
+ STRIPE_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);
return 0;
}
@@ -3767,11 +3540,15 @@ err:
int32_t
stripe_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
+ stripe_local_t *mlocal = NULL;
call_frame_t *prev = NULL;
+ call_frame_t *mframe = NULL;
+ struct stripe_replies *reply = NULL;
+ int32_t i = 0;
if (!this || !frame || !frame->local || !cookie) {
gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
@@ -3780,39 +3557,82 @@ stripe_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
prev = cookie;
local = frame->local;
+ mframe = local->orig_frame;
+ mlocal = mframe->local;
LOCK(&frame->lock);
{
- callcnt = ++local->call_count;
+ callcnt = ++mlocal->call_count;
+
+ mlocal->replies[local->node_index].op_ret = op_ret;
+ mlocal->replies[local->node_index].op_errno = op_errno;
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s returned error %s",
- prev->this->name, strerror (op_errno));
- local->op_errno = op_errno;
- local->op_ret = -1;
- }
if (op_ret >= 0) {
- local->op_ret += op_ret;
- local->post_buf = *postbuf;
- local->pre_buf = *prebuf;
+ mlocal->post_buf = *postbuf;
+ mlocal->pre_buf = *prebuf;
+
+ mlocal->prebuf_blocks += prebuf->ia_blocks;
+ mlocal->postbuf_blocks += postbuf->ia_blocks;
+
+ correct_file_size(prebuf, mlocal->fctx, prev);
+ correct_file_size(postbuf, mlocal->fctx, prev);
+
+ if (mlocal->prebuf_size < prebuf->ia_size)
+ mlocal->prebuf_size = prebuf->ia_size;
+ if (mlocal->postbuf_size < postbuf->ia_size)
+ mlocal->postbuf_size = postbuf->ia_size;
}
}
UNLOCK (&frame->lock);
- if ((callcnt == local->wind_count) && local->unwind) {
- STRIPE_STACK_UNWIND (writev, frame, local->op_ret,
- local->op_errno, &local->pre_buf,
- &local->post_buf);
+ if ((callcnt == mlocal->wind_count) && mlocal->unwind) {
+ mlocal->pre_buf.ia_size = mlocal->prebuf_size;
+ mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks;
+ mlocal->post_buf.ia_size = mlocal->postbuf_size;
+ mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks;
+
+ /*
+ * Only return the number of consecutively written bytes up until
+ * the first error. Only return an error if it occurs first.
+ *
+ * When a short write occurs, the application should retry at the
+ * appropriate offset, at which point we'll potentially pass back
+ * the error.
+ */
+ for (i = 0, reply = mlocal->replies; i < mlocal->wind_count;
+ i++, reply++) {
+ if (reply->op_ret == -1) {
+ gf_log(this->name, GF_LOG_DEBUG, "reply %d "
+ "returned error %s", i,
+ strerror(reply->op_errno));
+ if (!mlocal->op_ret) {
+ mlocal->op_ret = -1;
+ mlocal->op_errno = reply->op_errno;
+ }
+ break;
+ }
+
+ mlocal->op_ret += reply->op_ret;
+
+ if (reply->op_ret < reply->requested_size)
+ break;
+ }
+
+ GF_FREE(mlocal->replies);
+
+ STRIPE_STACK_UNWIND (writev, mframe, mlocal->op_ret,
+ mlocal->op_errno, &mlocal->pre_buf,
+ &mlocal->post_buf, NULL);
}
out:
+ STRIPE_STACK_DESTROY(frame);
return 0;
}
int32_t
stripe_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iovec *vector, int32_t count, off_t offset,
- struct iobref *iobref)
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
struct iovec *tmp_vec = NULL;
stripe_local_t *local = NULL;
@@ -3826,13 +3646,19 @@ stripe_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
off_t fill_size = 0;
uint64_t stripe_size = 0;
uint64_t tmp_fctx = 0;
+ off_t dest_offset = 0;
+ off_t rounded_start = 0;
+ off_t rounded_end = 0;
+ int32_t total_chunks = 0;
+ call_frame_t *wframe = NULL;
+ stripe_local_t *wlocal = NULL;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (fd, err);
VALIDATE_OR_GOTO (fd->inode, err);
- fd_ctx_get (fd, this, &tmp_fctx);
+ inode_ctx_get (fd->inode, this, &tmp_fctx);
if (!tmp_fctx) {
op_errno = EINVAL;
goto err;
@@ -3840,22 +3666,51 @@ stripe_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
stripe_size = fctx->stripe_size;
+ STRIPE_VALIDATE_FCTX (fctx, err);
+
/* File has to be stripped across the child nodes */
for (idx = 0; idx< count; idx ++) {
total_size += vector[idx].iov_len;
}
remaining_size = total_size;
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
frame->local = local;
local->stripe_size = stripe_size;
+ local->fctx = fctx;
+ if (!stripe_size) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Wrong stripe size for the file");
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ rounded_start = floor(offset, stripe_size);
+ rounded_end = roof(offset + total_size, stripe_size);
+ total_chunks = (rounded_end - rounded_start) / stripe_size;
+ local->replies = GF_CALLOC(total_chunks, sizeof(struct stripe_replies),
+ gf_stripe_mt_stripe_replies);
+ if (!local->replies) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ total_chunks = 0;
while (1) {
+ wframe = copy_frame(frame);
+ wlocal = mem_get0(this->local_pool);
+ if (!wlocal) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ wlocal->orig_frame = frame;
+ wframe->local = wlocal;
+
/* Send striped chunk of the vector to child
nodes appropriately. */
idx = (((offset + offset_offset) /
@@ -3883,47 +3738,567 @@ stripe_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
if (remaining_size == 0)
local->unwind = 1;
- STACK_WIND (frame, stripe_writev_cbk, fctx->xl_array[idx],
+ /*
+ * Store off the request index (with respect to the chunk of the
+ * initial offset) and the size of the request. This is required
+ * in the callback to calculate an appropriate return value in
+ * the event of a write failure in one or more requests.
+ */
+ wlocal->node_index = total_chunks;
+ local->replies[total_chunks].requested_size = fill_size;
+
+ dest_offset = offset + offset_offset;
+ if (fctx->stripe_coalesce)
+ dest_offset = coalesced_offset(dest_offset,
+ local->stripe_size, fctx->stripe_count);
+
+ STACK_WIND (wframe, stripe_writev_cbk, fctx->xl_array[idx],
fctx->xl_array[idx]->fops->writev, fd, tmp_vec,
- tmp_count, offset + offset_offset, iobref);
+ tmp_count, dest_offset, flags, iobref,
+ xdata);
+
GF_FREE (tmp_vec);
offset_offset += fill_size;
+ total_chunks++;
if (remaining_size == 0)
break;
}
return 0;
err:
- STRIPE_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL);
+ if (wframe)
+ STRIPE_STACK_DESTROY(wframe);
+
+ STRIPE_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
int32_t
-stripe_release (xlator_t *this, fd_t *fd)
+stripe_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ stripe_local_t *mlocal = NULL;
+ call_frame_t *prev = NULL;
+ call_frame_t *mframe = NULL;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ local = frame->local;
+ mframe = local->orig_frame;
+ mlocal = mframe->local;
+
+ LOCK(&frame->lock);
+ {
+ callcnt = ++mlocal->call_count;
+
+ if (op_ret == 0) {
+ mlocal->post_buf = *postbuf;
+ mlocal->pre_buf = *prebuf;
+
+ mlocal->prebuf_blocks += prebuf->ia_blocks;
+ mlocal->postbuf_blocks += postbuf->ia_blocks;
+
+ correct_file_size(prebuf, mlocal->fctx, prev);
+ correct_file_size(postbuf, mlocal->fctx, prev);
+
+ if (mlocal->prebuf_size < prebuf->ia_size)
+ mlocal->prebuf_size = prebuf->ia_size;
+ if (mlocal->postbuf_size < postbuf->ia_size)
+ mlocal->postbuf_size = postbuf->ia_size;
+ }
+
+ /* return the first failure */
+ if (mlocal->op_ret == 0) {
+ mlocal->op_ret = op_ret;
+ mlocal->op_errno = op_errno;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if ((callcnt == mlocal->wind_count) && mlocal->unwind) {
+ mlocal->pre_buf.ia_size = mlocal->prebuf_size;
+ mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks;
+ mlocal->post_buf.ia_size = mlocal->postbuf_size;
+ mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks;
+
+ STRIPE_STACK_UNWIND (fallocate, mframe, mlocal->op_ret,
+ mlocal->op_errno, &mlocal->pre_buf,
+ &mlocal->post_buf, NULL);
+ }
+out:
+ STRIPE_STACK_DESTROY(frame);
+ return 0;
+}
+
+int32_t
+stripe_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
+ int32_t op_errno = 1;
+ int32_t idx = 0;
+ int32_t offset_offset = 0;
+ int32_t remaining_size = 0;
+ off_t fill_size = 0;
+ uint64_t stripe_size = 0;
uint64_t tmp_fctx = 0;
+ off_t dest_offset = 0;
+ call_frame_t *fframe = NULL;
+ stripe_local_t *flocal = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
+
+ inode_ctx_get (fd->inode, this, &tmp_fctx);
+ if (!tmp_fctx) {
+ op_errno = EINVAL;
+ goto err;
+ }
+ fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
+ stripe_size = fctx->stripe_size;
+
+ STRIPE_VALIDATE_FCTX (fctx, err);
+
+ remaining_size = len;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ frame->local = local;
+ local->stripe_size = stripe_size;
+ local->fctx = fctx;
+
+ if (!stripe_size) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Wrong stripe size for the file");
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ while (1) {
+ fframe = copy_frame(frame);
+ flocal = mem_get0(this->local_pool);
+ if (!flocal) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ flocal->orig_frame = frame;
+ fframe->local = flocal;
+
+ /* send fallocate request to the associated child node */
+ idx = (((offset + offset_offset) /
+ local->stripe_size) % fctx->stripe_count);
+
+ fill_size = (local->stripe_size -
+ ((offset + offset_offset) % local->stripe_size));
+ if (fill_size > remaining_size)
+ fill_size = remaining_size;
+
+ remaining_size -= fill_size;
+
+ local->wind_count++;
+ if (remaining_size == 0)
+ local->unwind = 1;
+
+ dest_offset = offset + offset_offset;
+ if (fctx->stripe_coalesce)
+ dest_offset = coalesced_offset(dest_offset,
+ local->stripe_size, fctx->stripe_count);
+
+ /*
+ * TODO: Create a separate handler for coalesce mode that sends a
+ * single fallocate per-child (since the ranges are linear).
+ */
+ STACK_WIND(fframe, stripe_fallocate_cbk, fctx->xl_array[idx],
+ fctx->xl_array[idx]->fops->fallocate, fd, mode,
+ dest_offset, fill_size, xdata);
+
+ offset_offset += fill_size;
+ if (remaining_size == 0)
+ break;
+ }
+
+ return 0;
+err:
+ if (fframe)
+ STRIPE_STACK_DESTROY(fframe);
+
+ STRIPE_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+
+int32_t
+stripe_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ stripe_local_t *mlocal = NULL;
+ call_frame_t *prev = NULL;
+ call_frame_t *mframe = NULL;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ local = frame->local;
+ mframe = local->orig_frame;
+ mlocal = mframe->local;
+
+ LOCK(&frame->lock);
+ {
+ callcnt = ++mlocal->call_count;
+
+ if (op_ret == 0) {
+ mlocal->post_buf = *postbuf;
+ mlocal->pre_buf = *prebuf;
+
+ mlocal->prebuf_blocks += prebuf->ia_blocks;
+ mlocal->postbuf_blocks += postbuf->ia_blocks;
+
+ correct_file_size(prebuf, mlocal->fctx, prev);
+ correct_file_size(postbuf, mlocal->fctx, prev);
+
+ if (mlocal->prebuf_size < prebuf->ia_size)
+ mlocal->prebuf_size = prebuf->ia_size;
+ if (mlocal->postbuf_size < postbuf->ia_size)
+ mlocal->postbuf_size = postbuf->ia_size;
+ }
+
+ /* return the first failure */
+ if (mlocal->op_ret == 0) {
+ mlocal->op_ret = op_ret;
+ mlocal->op_errno = op_errno;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if ((callcnt == mlocal->wind_count) && mlocal->unwind) {
+ mlocal->pre_buf.ia_size = mlocal->prebuf_size;
+ mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks;
+ mlocal->post_buf.ia_size = mlocal->postbuf_size;
+ mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks;
+
+ STRIPE_STACK_UNWIND (discard, mframe, mlocal->op_ret,
+ mlocal->op_errno, &mlocal->pre_buf,
+ &mlocal->post_buf, NULL);
+ }
+out:
+ STRIPE_STACK_DESTROY(frame);
+ return 0;
+}
+
+int32_t
+stripe_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
stripe_fd_ctx_t *fctx = NULL;
+ int32_t op_errno = 1;
+ int32_t idx = 0;
+ int32_t offset_offset = 0;
+ int32_t remaining_size = 0;
+ off_t fill_size = 0;
+ uint64_t stripe_size = 0;
+ uint64_t tmp_fctx = 0;
+ off_t dest_offset = 0;
+ call_frame_t *fframe = NULL;
+ stripe_local_t *flocal = NULL;
+ VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
- fd_ctx_del (fd, this, &tmp_fctx);
+ inode_ctx_get (fd->inode, this, &tmp_fctx);
if (!tmp_fctx) {
+ op_errno = EINVAL;
+ goto err;
+ }
+ fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
+ stripe_size = fctx->stripe_size;
+
+ STRIPE_VALIDATE_FCTX (fctx, err);
+
+ remaining_size = len;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
goto err;
}
+ frame->local = local;
+ local->stripe_size = stripe_size;
+ local->fctx = fctx;
+
+ if (!stripe_size) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Wrong stripe size for the file");
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ while (1) {
+ fframe = copy_frame(frame);
+ flocal = mem_get0(this->local_pool);
+ if (!flocal) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ flocal->orig_frame = frame;
+ fframe->local = flocal;
+
+ /* send discard request to the associated child node */
+ idx = (((offset + offset_offset) /
+ local->stripe_size) % fctx->stripe_count);
+
+ fill_size = (local->stripe_size -
+ ((offset + offset_offset) % local->stripe_size));
+ if (fill_size > remaining_size)
+ fill_size = remaining_size;
+ remaining_size -= fill_size;
+
+ local->wind_count++;
+ if (remaining_size == 0)
+ local->unwind = 1;
+
+ dest_offset = offset + offset_offset;
+ if (fctx->stripe_coalesce)
+ dest_offset = coalesced_offset(dest_offset,
+ local->stripe_size, fctx->stripe_count);
+
+ /*
+ * TODO: Create a separate handler for coalesce mode that sends a
+ * single discard per-child (since the ranges are linear).
+ */
+ STACK_WIND(fframe, stripe_discard_cbk, fctx->xl_array[idx],
+ fctx->xl_array[idx]->fops->discard, fd, dest_offset,
+ fill_size, xdata);
+
+ offset_offset += fill_size;
+ if (remaining_size == 0)
+ break;
+ }
+
+ return 0;
+err:
+ if (fframe)
+ STRIPE_STACK_DESTROY(fframe);
+
+ STRIPE_STACK_UNWIND (discard, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+stripe_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ stripe_local_t *mlocal = NULL;
+ call_frame_t *prev = NULL;
+ call_frame_t *mframe = NULL;
+
+ GF_ASSERT (frame);
+
+ if (!this || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ local = frame->local;
+ mframe = local->orig_frame;
+ mlocal = mframe->local;
+
+ LOCK(&frame->lock);
+ {
+ callcnt = ++mlocal->call_count;
+
+ if (op_ret == 0) {
+ mlocal->post_buf = *postbuf;
+ mlocal->pre_buf = *prebuf;
+
+ mlocal->prebuf_blocks += prebuf->ia_blocks;
+ mlocal->postbuf_blocks += postbuf->ia_blocks;
+
+ correct_file_size(prebuf, mlocal->fctx, prev);
+ correct_file_size(postbuf, mlocal->fctx, prev);
+
+ if (mlocal->prebuf_size < prebuf->ia_size)
+ mlocal->prebuf_size = prebuf->ia_size;
+ if (mlocal->postbuf_size < postbuf->ia_size)
+ mlocal->postbuf_size = postbuf->ia_size;
+ }
+
+ /* return the first failure */
+ if (mlocal->op_ret == 0) {
+ mlocal->op_ret = op_ret;
+ mlocal->op_errno = op_errno;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if ((callcnt == mlocal->wind_count) && mlocal->unwind) {
+ mlocal->pre_buf.ia_size = mlocal->prebuf_size;
+ mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks;
+ mlocal->post_buf.ia_size = mlocal->postbuf_size;
+ mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks;
+
+ STRIPE_STACK_UNWIND (zerofill, mframe, mlocal->op_ret,
+ mlocal->op_errno, &mlocal->pre_buf,
+ &mlocal->post_buf, NULL);
+ }
+out:
+ STRIPE_STACK_DESTROY(frame);
+ return 0;
+}
+
+int32_t
+stripe_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
+ int32_t op_errno = 1;
+ int32_t idx = 0;
+ int32_t offset_offset = 0;
+ int32_t remaining_size = 0;
+ off_t fill_size = 0;
+ uint64_t stripe_size = 0;
+ uint64_t tmp_fctx = 0;
+ off_t dest_offset = 0;
+ call_frame_t *fframe = NULL;
+ stripe_local_t *flocal = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
+
+ inode_ctx_get (fd->inode, this, &tmp_fctx);
+ if (!tmp_fctx) {
+ op_errno = EINVAL;
+ goto err;
+ }
fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
+ stripe_size = fctx->stripe_size;
- if (!fctx->static_array)
- GF_FREE (fctx->xl_array);
+ STRIPE_VALIDATE_FCTX (fctx, err);
- GF_FREE (fctx);
+ remaining_size = len;
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ frame->local = local;
+ local->stripe_size = stripe_size;
+ local->fctx = fctx;
+
+ if (!stripe_size) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Wrong stripe size for the file");
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ while (1) {
+ fframe = copy_frame(frame);
+ flocal = mem_get0(this->local_pool);
+ if (!flocal) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ flocal->orig_frame = frame;
+ fframe->local = flocal;
+
+ idx = (((offset + offset_offset) /
+ local->stripe_size) % fctx->stripe_count);
+
+ fill_size = (local->stripe_size -
+ ((offset + offset_offset) % local->stripe_size));
+ if (fill_size > remaining_size)
+ fill_size = remaining_size;
+
+ remaining_size -= fill_size;
+
+ local->wind_count++;
+ if (remaining_size == 0)
+ local->unwind = 1;
+
+ dest_offset = offset + offset_offset;
+ if (fctx->stripe_coalesce)
+ dest_offset = coalesced_offset(dest_offset,
+ local->stripe_size,
+ fctx->stripe_count);
+
+ STACK_WIND(fframe, stripe_zerofill_cbk, fctx->xl_array[idx],
+ fctx->xl_array[idx]->fops->zerofill, fd,
+ dest_offset, fill_size, xdata);
+ offset_offset += fill_size;
+ if (remaining_size == 0)
+ break;
+ }
+
+ return 0;
err:
+ if (fframe)
+ STRIPE_STACK_DESTROY(fframe);
+
+ STRIPE_STACK_UNWIND (zerofill, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+stripe_release (xlator_t *this, fd_t *fd)
+{
return 0;
}
+int
+stripe_forget (xlator_t *this, inode_t *inode)
+{
+ uint64_t tmp_fctx = 0;
+ stripe_fd_ctx_t *fctx = NULL;
+
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (inode, err);
+
+ (void) inode_ctx_del (inode, this, &tmp_fctx);
+ if (!tmp_fctx) {
+ goto err;
+ }
+
+ fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
+
+ if (!fctx->static_array)
+ GF_FREE (fctx->xl_array);
+
+ GF_FREE (fctx);
+err:
+ return 0;
+}
int32_t
notify (xlator_t *this, int32_t event, void *data, ...)
@@ -3931,6 +4306,7 @@ notify (xlator_t *this, int32_t event, void *data, ...)
stripe_private_t *priv = NULL;
int down_client = 0;
int i = 0;
+ gf_boolean_t heard_from_all_children = _gf_false;
if (!this)
return 0;
@@ -3942,30 +4318,34 @@ notify (xlator_t *this, int32_t event, void *data, ...)
switch (event)
{
case GF_EVENT_CHILD_UP:
- case GF_EVENT_CHILD_CONNECTING:
{
/* get an index number to set */
for (i = 0; i < priv->child_count; i++) {
if (data == priv->xl_array[i])
break;
}
- priv->state[i] = 1;
- for (i = 0; i < priv->child_count; i++) {
- if (!priv->state[i])
- down_client++;
+
+ if (priv->child_count == i) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "got GF_EVENT_CHILD_UP bad subvolume %s",
+ data? ((xlator_t *)data)->name: NULL);
+ break;
}
LOCK (&priv->lock);
{
- priv->nodes_down = down_client;
if (data == FIRST_CHILD (this))
priv->first_child_down = 0;
- if (!priv->nodes_down)
- default_notify (this, event, data);
+ priv->last_event[i] = event;
}
UNLOCK (&priv->lock);
}
break;
+ case GF_EVENT_CHILD_CONNECTING:
+ {
+ // 'CONNECTING' doesn't ensure its CHILD_UP, so do nothing
+ goto out;
+ }
case GF_EVENT_CHILD_DOWN:
{
/* get an index number to set */
@@ -3973,20 +4353,19 @@ notify (xlator_t *this, int32_t event, void *data, ...)
if (data == priv->xl_array[i])
break;
}
- priv->state[i] = 0;
- for (i = 0; i < priv->child_count; i++) {
- if (!priv->state[i])
- down_client++;
+
+ if (priv->child_count == i) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "got GF_EVENT_CHILD_DOWN bad subvolume %s",
+ data? ((xlator_t *)data)->name: NULL);
+ break;
}
LOCK (&priv->lock);
{
- priv->nodes_down = down_client;
-
if (data == FIRST_CHILD (this))
priv->first_child_down = 1;
- if (priv->nodes_down)
- default_notify (this, event, data);
+ priv->last_event[i] = event;
}
UNLOCK (&priv->lock);
}
@@ -3996,79 +4375,252 @@ notify (xlator_t *this, int32_t event, void *data, ...)
{
/* */
default_notify (this, event, data);
+ goto out;
}
break;
}
+ // Consider child as down if it's last_event is not CHILD_UP
+ for (i = 0, down_client = 0; i < priv->child_count; i++)
+ if (priv->last_event[i] != GF_EVENT_CHILD_UP)
+ down_client++;
+
+ LOCK (&priv->lock);
+ {
+ priv->nodes_down = down_client;
+ }
+ UNLOCK (&priv->lock);
+
+ heard_from_all_children = _gf_true;
+ for (i = 0; i < priv->child_count; i++)
+ if (!priv->last_event[i])
+ heard_from_all_children = _gf_false;
+
+ if (heard_from_all_children)
+ default_notify (this, event, data);
+out:
return 0;
}
int
-set_stripe_block_size (xlator_t *this, stripe_private_t *priv, char *data)
+stripe_setxattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno, dict_t *xdata)
{
- int ret = -1;
- char *tmp_str = NULL;
- char *tmp_str1 = NULL;
- char *dup_str = NULL;
- char *stripe_str = NULL;
- char *pattern = NULL;
- char *num = NULL;
- struct stripe_options *temp_stripeopt = NULL;
- struct stripe_options *stripe_opt = NULL;
-
- if (!this || !priv || !data)
- goto out;
+ int ret = -1;
+ int call_cnt = 0;
+ stripe_local_t *local = NULL;
- /* Get the pattern for striping.
- "option block-size *avi:10MB" etc */
- stripe_str = strtok_r (data, ",", &tmp_str);
- while (stripe_str) {
- dup_str = gf_strdup (stripe_str);
- stripe_opt = CALLOC (1, sizeof (struct stripe_options));
- if (!stripe_opt) {
- GF_FREE (dup_str);
- goto out;
- }
+ if (!frame || !frame->local || !this) {
+ gf_log ("", GF_LOG_ERROR, "Possible NULL deref");
+ return ret;
+ }
- pattern = strtok_r (dup_str, ":", &tmp_str1);
- num = strtok_r (NULL, ":", &tmp_str1);
- if (!num) {
- num = pattern;
- pattern = "*";
- }
- if (gf_string2bytesize (num, &stripe_opt->block_size) != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid number format \"%s\"", num);
- goto out;
- }
+ local = frame->local;
- if (stripe_opt->block_size < 512) {
- gf_log (this->name, GF_LOG_ERROR, "Invalid Block-size: "
- "%s. Should be atleast 512 bytes", num);
- goto out;
+ LOCK (&frame->lock);
+ {
+ call_cnt = --local->wind_count;
+
+ /**
+ * We overwrite ->op_* values here for subsequent faliure
+ * conditions, hence we propogate the last errno down the
+ * stack.
+ */
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto unlock;
}
- if (stripe_opt->block_size % 512) {
- gf_log (this->name, GF_LOG_ERROR, "Block-size: %s should"
- " be a multiple of 512 bytes", num);
- goto out;
+ }
+
+ unlock:
+ UNLOCK (&frame->lock);
+
+ if (!call_cnt) {
+ STRIPE_STACK_UNWIND (setxattr, frame, local->op_ret,
+ local->op_errno, xdata);
+ }
+
+ return 0;
+}
+
+#ifdef HAVE_BD_XLATOR
+int
+stripe_is_bd (dict_t *this, char *key, data_t *value, void *data)
+{
+ gf_boolean_t *is_bd = data;
+
+ if (data == NULL)
+ return 0;
+
+ if (XATTR_IS_BD (key))
+ *is_bd = _gf_true;
+
+ return 0;
+}
+
+inline gf_boolean_t
+stripe_setxattr_is_bd (dict_t *dict)
+{
+ gf_boolean_t is_bd = _gf_false;
+
+ if (dict == NULL)
+ goto out;
+
+ dict_foreach (dict, stripe_is_bd, &is_bd);
+out:
+ return is_bd;
+}
+#else
+#define stripe_setxattr_is_bd(dict) _gf_false
+#endif
+
+int
+stripe_setxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *dict, int flags, dict_t *xdata)
+{
+ int32_t op_errno = EINVAL;
+ xlator_list_t *trav = NULL;
+ stripe_private_t *priv = NULL;
+ stripe_local_t *local = NULL;
+ int i = 0;
+ gf_boolean_t is_bd = _gf_false;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.*stripe*", dict,
+ op_errno, err);
+
+ priv = this->private;
+ trav = this->children;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ frame->local = local;
+ local->wind_count = priv->child_count;
+ local->op_ret = local->op_errno = 0;
+
+ is_bd = stripe_setxattr_is_bd (dict);
+
+ /**
+ * Set xattrs for directories on all subvolumes. Additionally
+ * this power is only given to a special client. Bd xlator
+ * also needs xattrs for regular files (ie LVs)
+ */
+ if (((frame->root->pid == GF_CLIENT_PID_GSYNCD) &&
+ IA_ISDIR (loc->inode->ia_type)) || is_bd) {
+ for (i = 0; i < priv->child_count; i++, trav = trav->next) {
+ STACK_WIND (frame, stripe_setxattr_cbk,
+ trav->xlator, trav->xlator->fops->setxattr,
+ loc, dict, flags, xdata);
}
+ } else {
+ local->wind_count = 1;
+ STACK_WIND (frame, stripe_setxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr,
+ loc, dict, flags, xdata);
+ }
- memcpy (stripe_opt->path_pattern, pattern, strlen (pattern));
+ return 0;
+err:
+ STRIPE_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
+ return 0;
+}
- gf_log (this->name, GF_LOG_DEBUG,
- "block-size : pattern %s : size %"PRId64,
- stripe_opt->path_pattern, stripe_opt->block_size);
- if (!priv->pattern) {
- priv->pattern = stripe_opt;
- } else {
- temp_stripeopt = priv->pattern;
- while (temp_stripeopt->next)
- temp_stripeopt = temp_stripeopt->next;
- temp_stripeopt->next = stripe_opt;
+int
+stripe_fsetxattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno, dict_t *xdata)
+{
+ STRIPE_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+
+int
+stripe_is_special_key (dict_t *this,
+ char *key,
+ data_t *value,
+ void *data)
+{
+ gf_boolean_t *is_special = NULL;
+
+ if (data == NULL) {
+ goto out;
+ }
+
+ is_special = data;
+
+ if (XATTR_IS_LOCKINFO (key) || XATTR_IS_BD (key))
+ *is_special = _gf_true;
+
+out:
+ return 0;
+}
+
+int32_t
+stripe_fsetxattr_everyone_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
+{
+ int call_count = 0;
+ stripe_local_t *local = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ call_count = --local->wind_count;
+
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
}
- stripe_str = strtok_r (NULL, ",", &tmp_str);
- GF_FREE (dup_str);
+ }
+ UNLOCK (&frame->lock);
+
+ if (call_count == 0) {
+ STRIPE_STACK_UNWIND (fsetxattr, frame, local->op_ret,
+ local->op_errno, NULL);
+ }
+ return 0;
+}
+
+int
+stripe_fsetxattr_to_everyone (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *dict, int flags, dict_t *xdata)
+{
+ xlator_list_t *trav = NULL;
+ stripe_private_t *priv = NULL;
+ int ret = -1;
+ stripe_local_t *local = NULL;
+
+ priv = this->private;
+
+ local = mem_get0 (this->local_pool);
+ if (local == NULL) {
+ goto out;
+ }
+
+ frame->local = local;
+
+ local->wind_count = priv->child_count;
+
+ trav = this->children;
+
+ while (trav) {
+ STACK_WIND (frame, stripe_fsetxattr_everyone_cbk,
+ trav->xlator, trav->xlator->fops->fsetxattr,
+ fd, dict, flags, xdata);
+ trav = trav->next;
}
ret = 0;
@@ -4076,80 +4628,220 @@ out:
return ret;
}
-int32_t
-stripe_iatt_merge (struct iatt *from, struct iatt *to)
+inline gf_boolean_t
+stripe_fsetxattr_is_special (dict_t *dict)
{
- if (to->ia_size < from->ia_size)
- to->ia_size = from->ia_size;
- if (to->ia_mtime < from->ia_mtime)
- to->ia_mtime = from->ia_mtime;
- if (to->ia_ctime < from->ia_ctime)
- to->ia_ctime = from->ia_ctime;
- if (to->ia_atime < from->ia_atime)
- to->ia_atime = from->ia_atime;
- return 0;
+ gf_boolean_t is_spl = _gf_false;
+
+ if (dict == NULL) {
+ goto out;
+ }
+
+ dict_foreach (dict, stripe_is_special_key, &is_spl);
+
+out:
+ return is_spl;
}
-int32_t
-stripe_readdirp_entry_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+int
+stripe_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *dict, int flags, dict_t *xdata)
{
- gf_dirent_t *entry = NULL;
- stripe_local_t *local = NULL;
- int32_t done = 0;
+ int32_t op_ret = -1, ret = -1, op_errno = EINVAL;
+ gf_boolean_t is_spl = _gf_false;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.*stripe*", dict,
+ op_errno, err);
+
+ is_spl = stripe_fsetxattr_is_special (dict);
+ if (is_spl) {
+ ret = stripe_fsetxattr_to_everyone (frame, this, fd, dict,
+ flags, xdata);
+ if (ret < 0) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- if (!this || !frame || !frame->local || !cookie) {
- gf_log (this->name, GF_LOG_DEBUG, "possible NULL deref");
goto out;
}
- entry = cookie;
+
+ STACK_WIND (frame, stripe_fsetxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ fd, dict, flags, xdata);
+out:
+ return 0;
+err:
+ STRIPE_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, NULL);
+ return 0;
+}
+
+int
+stripe_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ STRIPE_STACK_UNWIND (removexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+stripe_removexattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ int32_t op_errno = EINVAL;
+
+ VALIDATE_OR_GOTO (this, err);
+
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.*stripe*",
+ name, op_errno, err);
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (loc, err);
+
+ STACK_WIND (frame, stripe_removexattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr,
+ loc, name, xdata);
+ return 0;
+err:
+ STRIPE_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
+ return 0;
+}
+
+
+int
+stripe_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ STRIPE_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+stripe_fremovexattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.*stripe*",
+ name, op_errno, err);
+
+ STACK_WIND (frame, stripe_fremovexattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr,
+ fd, name, xdata);
+ return 0;
+ err:
+ STRIPE_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+stripe_readdirp_lookup_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ dict_t *xattr, struct iatt *parent)
+{
+ stripe_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
+ stripe_local_t *main_local = NULL;
+ gf_dirent_t *entry = NULL;
+ call_frame_t *prev = NULL;
+ int done = 0;
+
local = frame->local;
+ prev = cookie;
+
+ entry = local->dirent;
+
+ main_frame = local->orig_frame;
+ main_local = main_frame->local;
LOCK (&frame->lock);
{
- local->wind_count--;
- if (!local->wind_count)
+ local->call_count--;
+ if (!local->call_count)
done = 1;
if (op_ret == -1) {
local->op_errno = op_errno;
local->op_ret = op_ret;
goto unlock;
}
- stripe_iatt_merge (buf, &entry->d_stat);
+
+ if (stripe_ctx_handle(this, prev, local, xattr))
+ gf_log(this->name, GF_LOG_ERROR,
+ "Error getting fctx info from dict.");
+
+ correct_file_size(stbuf, local->fctx, prev);
+
+ stripe_iatt_merge (stbuf, &entry->d_stat);
+ local->stbuf_blocks += stbuf->ia_blocks;
}
unlock:
UNLOCK(&frame->lock);
if (done) {
- frame->local = NULL;
- STRIPE_STACK_UNWIND (readdir, frame, local->op_ret,
- local->op_errno, &local->entries);
+ inode_ctx_put (entry->inode, this,
+ (uint64_t) (long)local->fctx);
- gf_dirent_free (&local->entries);
+ done = 0;
+ LOCK (&main_frame->lock);
+ {
+ main_local->wind_count--;
+ if (!main_local->wind_count)
+ done = 1;
+ if (local->op_ret == -1) {
+ main_local->op_errno = local->op_errno;
+ main_local->op_ret = local->op_ret;
+ }
+ entry->d_stat.ia_blocks = local->stbuf_blocks;
+ }
+ UNLOCK (&main_frame->lock);
+ if (done) {
+ main_frame->local = NULL;
+ STRIPE_STACK_UNWIND (readdir, main_frame,
+ main_local->op_ret,
+ main_local->op_errno,
+ &main_local->entries, NULL);
+ gf_dirent_free (&main_local->entries);
+ stripe_local_wipe (main_local);
+ mem_put (main_local);
+ }
+ frame->local = NULL;
stripe_local_wipe (local);
- GF_FREE (local);
+ mem_put (local);
+ STRIPE_STACK_DESTROY (frame);
}
-out:
- return 0;
+ return 0;
}
int32_t
stripe_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *orig_entries)
+ int32_t op_ret, int32_t op_errno,
+ gf_dirent_t *orig_entries, dict_t *xdata)
{
stripe_local_t *local = NULL;
call_frame_t *prev = NULL;
gf_dirent_t *local_entry = NULL;
- int32_t ret = -1;
gf_dirent_t *tmp_entry = NULL;
xlator_list_t *trav = NULL;
loc_t loc = {0, };
- inode_t *inode = NULL;
- char *path;
int32_t count = 0;
stripe_private_t *priv = NULL;
int32_t subvols = 0;
+ dict_t *xattrs = NULL;
+ call_frame_t *local_frame = NULL;
+ stripe_local_t *local_ent = NULL;
if (!this || !frame || !frame->local || !cookie) {
gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
@@ -4175,8 +4867,9 @@ stripe_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->op_ret = op_ret;
list_splice_init (&orig_entries->list,
&local->entries.list);
- local->wind_count = op_ret * subvols;
+ local->wind_count = op_ret;
}
+
}
unlock:
UNLOCK (&frame->lock);
@@ -4184,76 +4877,85 @@ unlock:
if (op_ret == -1)
goto out;
+ xattrs = dict_new ();
+ if (xattrs)
+ (void) stripe_xattr_request_build (this, xattrs, 0, 0, 0, 0);
count = op_ret;
- ret = 0;
list_for_each_entry_safe (local_entry, tmp_entry,
(&local->entries.list), list) {
if (!local_entry)
break;
- if (!IA_ISREG (local_entry->d_stat.ia_type)) {
+ if (!IA_ISREG (local_entry->d_stat.ia_type) || !local_entry->inode) {
LOCK (&frame->lock);
{
- local->wind_count -= subvols;
+ local->wind_count--;
count = local->wind_count;
}
UNLOCK (&frame->lock);
continue;
}
- inode = inode_new (local->fd->inode->table);
- if (!inode)
+ local_frame = copy_frame (frame);
+
+ if (!local_frame) {
+ op_errno = ENOMEM;
+ op_ret = -1;
goto out;
+ }
- loc.inode = inode;
- loc.parent = local->fd->inode;
- ret = inode_path (local->fd->inode, local_entry->d_name, &path);
- if (ret != -1) {
- loc.path = path;
- } else if (inode) {
- ret = inode_path (inode, NULL, &path);
- if (ret != -1) {
- loc.path = path;
- } else {
- goto out;
- }
+ local_ent = mem_get0 (this->local_pool);
+ if (!local_ent) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ goto out;
}
- loc.name = strrchr (loc.path, '/');
- loc.name++;
+ loc.inode = inode_ref (local_entry->inode);
+
+ uuid_copy (loc.gfid, local_entry->d_stat.ia_gfid);
+
+ local_ent->orig_frame = frame;
+
+ local_ent->call_count = subvols;
+
+ local_ent->dirent = local_entry;
+
+ local_frame->local = local_ent;
+
trav = this->children;
while (trav) {
- STACK_WIND_COOKIE (frame, stripe_readdirp_entry_stat_cbk,
- local_entry, trav->xlator,
- trav->xlator->fops->stat, &loc);
+ STACK_WIND (local_frame, stripe_readdirp_lookup_cbk,
+ trav->xlator, trav->xlator->fops->lookup,
+ &loc, xattrs);
trav = trav->next;
}
- inode_unref (loc.inode);
+ loc_wipe (&loc);
}
out:
if (!count) {
/* all entries are directories */
frame->local = NULL;
STRIPE_STACK_UNWIND (readdir, frame, local->op_ret,
- local->op_errno, &local->entries);
+ local->op_errno, &local->entries, NULL);
gf_dirent_free (&local->entries);
stripe_local_wipe (local);
- GF_FREE (local);
+ mem_put (local);
}
-
+ if (xattrs)
+ dict_unref (xattrs);
return 0;
}
int32_t
stripe_readdirp (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t off)
+ fd_t *fd, size_t size, off_t off, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
xlator_list_t *trav = NULL;
int op_errno = -1;
-
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (fd, err);
@@ -4267,8 +4969,7 @@ stripe_readdirp (call_frame_t *frame, xlator_t *this,
}
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -4288,15 +4989,16 @@ stripe_readdirp (call_frame_t *frame, xlator_t *this,
goto err;
STACK_WIND (frame, stripe_readdirp_cbk, trav->xlator,
- trav->xlator->fops->readdirp, fd, size, off);
+ trav->xlator->fops->readdirp, fd, size, off, xdata);
return 0;
err:
op_errno = (op_errno == -1) ? errno : op_errno;
- STRIPE_STACK_UNWIND (readdir, frame, -1, op_errno, NULL);
+ STRIPE_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL);
return 0;
}
+
int32_t
mem_acct_init (xlator_t *this)
{
@@ -4317,21 +5019,86 @@ out:
return ret;
}
+static int
+clear_pattern_list (stripe_private_t *priv)
+{
+ struct stripe_options *prev = NULL;
+ struct stripe_options *trav = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("stripe", priv, out);
+
+ trav = priv->pattern;
+ priv->pattern = NULL;
+ while (trav) {
+ prev = trav;
+ trav = trav->next;
+ GF_FREE (prev);
+ }
+
+ ret = 0;
+ out:
+ return ret;
+
+
+}
+
int
reconfigure (xlator_t *this, dict_t *options)
{
- stripe_private_t *priv = NULL;
- int ret = -1;
+ stripe_private_t *priv = NULL;
+ data_t *data = NULL;
+ int ret = -1;
+ volume_option_t *opt = NULL;
- priv = this->private;
+ GF_ASSERT (this);
+ GF_ASSERT (this->private);
+
+ priv = this->private;
- GF_OPTION_RECONF ("block-size", priv->block_size, options, size, out);
ret = 0;
-out:
- return ret;
+ LOCK (&priv->lock);
+ {
+ ret = clear_pattern_list (priv);
+ if (ret)
+ goto unlock;
+
+ data = dict_get (options, "block-size");
+ if (data) {
+ ret = set_stripe_block_size (this, priv, data->data);
+ if (ret)
+ goto unlock;
+ } else {
+ opt = xlator_volume_option_get (this, "block-size");
+ if (!opt) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "option 'block-size' not found");
+ ret = -1;
+ goto unlock;
+ }
+
+ if (gf_string2bytesize (opt->default_value, &priv->block_size)){
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to set default block-size ");
+ ret = -1;
+ goto unlock;
+ }
+ }
+
+ GF_OPTION_RECONF("coalesce", priv->coalesce, options, bool,
+ unlock);
+ }
+ unlock:
+ UNLOCK (&priv->lock);
+ if (ret)
+ goto out;
+
+ ret = 0;
+ out:
+ return ret;
}
@@ -4344,6 +5111,7 @@ int32_t
init (xlator_t *this)
{
stripe_private_t *priv = NULL;
+ volume_option_t *opt = NULL;
xlator_list_t *trav = NULL;
data_t *data = NULL;
int32_t count = 0;
@@ -4387,9 +5155,9 @@ init (xlator_t *this)
if (!priv->xl_array)
goto out;
- priv->state = GF_CALLOC (count, sizeof (int8_t),
- gf_stripe_mt_int8_t);
- if (!priv->state)
+ priv->last_event = GF_CALLOC (count, sizeof (int),
+ gf_stripe_mt_int32_t);
+ if (!priv->last_event)
goto out;
priv->child_count = count;
@@ -4409,30 +5177,56 @@ init (xlator_t *this)
goto out;
}
-
- GF_OPTION_INIT ("block-size", priv->block_size, size, out);
-
- /* option stripe-pattern *avi:1GB,*pdf:4096 */
- data = dict_get (this->options, "block-size");
- if (data) {
- ret = set_stripe_block_size (this, priv, data->data);
- if (ret)
- goto out;
+ ret = 0;
+ LOCK (&priv->lock);
+ {
+ opt = xlator_volume_option_get (this, "block-size");
+ if (!opt) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "option 'block-size' not found");
+ ret = -1;
+ goto unlock;
+ }
+ if (gf_string2bytesize (opt->default_value, &priv->block_size)){
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to set default block-size ");
+ ret = -1;
+ goto unlock;
+ }
+ /* option stripe-pattern *avi:1GB,*pdf:16K */
+ data = dict_get (this->options, "block-size");
+ if (data) {
+ ret = set_stripe_block_size (this, priv, data->data);
+ if (ret)
+ goto unlock;
+ }
}
+ unlock:
+ UNLOCK (&priv->lock);
+ if (ret)
+ goto out;
GF_OPTION_INIT ("use-xattr", priv->xattr_supported, bool, out);
-
/* notify related */
priv->nodes_down = priv->child_count;
- this->private = priv;
+ GF_OPTION_INIT("coalesce", priv->coalesce, bool, out);
+
+ this->local_pool = mem_pool_new (stripe_local_t, 128);
+ if (!this->local_pool) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ goto out;
+ }
+
+ this->private = priv;
ret = 0;
out:
if (ret) {
if (priv) {
- if (priv->xl_array)
- GF_FREE (priv->xl_array);
+ GF_FREE (priv->xl_array);
GF_FREE (priv);
}
}
@@ -4456,15 +5250,15 @@ fini (xlator_t *this)
priv = this->private;
if (priv) {
this->private = NULL;
- if (priv->xl_array)
- GF_FREE (priv->xl_array);
+ GF_FREE (priv->xl_array);
trav = priv->pattern;
while (trav) {
prev = trav;
trav = trav->next;
- FREE (prev);
+ GF_FREE (prev);
}
+ GF_FREE (priv->last_event);
LOCK_DESTROY (&priv->lock);
GF_FREE (priv);
}
@@ -4475,17 +5269,50 @@ out:
int32_t
stripe_getxattr_unwind (call_frame_t *frame,
- int op_ret, int op_errno, dict_t *dict)
+ int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
{
- STRIPE_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict);
+ STRIPE_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
return 0;
}
+int
+stripe_internal_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr,
+ dict_t *xdata)
+{
+
+ char size_key[256] = {0,};
+ char index_key[256] = {0,};
+ char count_key[256] = {0,};
+ char coalesce_key[256] = {0,};
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (frame->local, out);
+
+ if (!xattr || (op_ret == -1))
+ goto out;
+
+ sprintf (size_key, "trusted.%s.stripe-size", this->name);
+ sprintf (count_key, "trusted.%s.stripe-count", this->name);
+ sprintf (index_key, "trusted.%s.stripe-index", this->name);
+ sprintf (coalesce_key, "trusted.%s.stripe-coalesce", this->name);
+
+ dict_del (xattr, size_key);
+ dict_del (xattr, count_key);
+ dict_del (xattr, index_key);
+ dict_del (xattr, coalesce_key);
+
+out:
+ STRIPE_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr, xdata);
+
+ return 0;
+
+}
int
stripe_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xattr)
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
{
int call_cnt = 0;
stripe_local_t *local = NULL;
@@ -4515,92 +5342,39 @@ stripe_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
out:
if (!call_cnt) {
STRIPE_STACK_UNWIND (getxattr, frame, local->op_ret, op_errno,
- local->xattr);
+ local->xattr, xdata);
}
return 0;
}
int32_t
-stripe_pathinfo_aggregate (char *buffer, stripe_local_t *local, int32_t *total)
-{
- int32_t i = 0;
- int32_t ret = -1;
- int32_t len = 0;
- char *sbuf = NULL;
- stripe_xattr_sort_t *xattr = NULL;
-
- if (!buffer || !local || !local->xattr_list)
- goto out;
-
- sbuf = buffer;
-
- for (i = 0; i < local->nallocs; i++) {
- xattr = local->xattr_list + i;
- len = xattr->pathinfo_len;
-
- if (len && xattr && xattr->pathinfo) {
- memcpy (buffer, xattr->pathinfo, len);
- buffer += len;
- *buffer++ = ' ';
- }
- }
-
- *--buffer = '\0';
- if (total)
- *total = buffer - sbuf;
- ret = 0;
-
- out:
- return ret;
-}
-
-int32_t
-stripe_free_pathinfo_str (stripe_local_t *local)
+stripe_vgetxattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
{
- int32_t i = 0;
- int32_t ret = -1;
- stripe_xattr_sort_t *xattr = NULL;
-
- if (!local || !local->xattr_list)
- goto out;
-
- for (i = 0; i < local->nallocs; i++) {
- xattr = local->xattr_list + i;
-
- if (xattr && xattr->pathinfo)
- GF_FREE (xattr->pathinfo);
- }
-
- ret = 0;
- out:
- return ret;
-}
-
-int32_t
-stripe_getxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *dict) {
stripe_local_t *local = NULL;
int32_t callcnt = 0;
int32_t ret = -1;
long cky = 0;
- char *pathinfo = NULL;
- char *pathinfo_serz = NULL;
- int32_t padding = 0;
- int32_t tlen = 0;
- char stripe_size_str[20] = {0,};
+ void *xattr_val = NULL;
+ void *xattr_serz = NULL;
stripe_xattr_sort_t *xattr = NULL;
dict_t *stripe_xattr = NULL;
if (!frame || !frame->local || !this) {
- gf_log (this->name, GF_LOG_ERROR, "Possible NULL deref");
+ gf_log ("", GF_LOG_ERROR, "Possible NULL deref");
return ret;
}
local = frame->local;
cky = (long) cookie;
+ if (local->xsel[0] == '\0') {
+ gf_log (this->name, GF_LOG_ERROR, "Empty xattr in cbk");
+ return ret;
+ }
+
LOCK (&frame->lock);
{
callcnt = --local->wind_count;
@@ -4609,23 +5383,26 @@ stripe_getxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,
goto out;
if (!local->xattr_list)
- local->xattr_list = (stripe_xattr_sort_t *) GF_CALLOC (local->nallocs,
- sizeof (stripe_xattr_sort_t),
- gf_stripe_mt_xattr_sort_t);
+ local->xattr_list = (stripe_xattr_sort_t *)
+ GF_CALLOC (local->nallocs,
+ sizeof (stripe_xattr_sort_t),
+ gf_stripe_mt_xattr_sort_t);
if (local->xattr_list) {
- ret = dict_get_str (dict, GF_XATTR_PATHINFO_KEY, &pathinfo);
- if (ret)
- goto out;
-
xattr = local->xattr_list + (int32_t) cky;
- pathinfo = gf_strdup (pathinfo);
+ ret = dict_get_ptr_and_len (dict, local->xsel,
+ &xattr_val,
+ &xattr->xattr_len);
+ if (xattr->xattr_len == 0)
+ goto out;
+
xattr->pos = cky;
- xattr->pathinfo = pathinfo;
- xattr->pathinfo_len = strlen (pathinfo);
+ xattr->xattr_value = gf_memdup (xattr_val,
+ xattr->xattr_len);
- local->xattr_total_len += strlen (pathinfo) + 1;
+ if (xattr->xattr_value != NULL)
+ local->xattr_total_len += xattr->xattr_len + 1;
}
}
out:
@@ -4639,41 +5416,36 @@ stripe_getxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,
if (!stripe_xattr)
goto unwind;
- snprintf (stripe_size_str, 20, "%ld", local->stripe_size);
-
- /* extra bytes for decorations (brackets and <>'s) */
- padding = strlen (this->name) + strlen (STRIPE_PATHINFO_HEADER)
- + strlen (stripe_size_str) + 7;
- local->xattr_total_len += (padding + 2);
-
- pathinfo_serz = GF_CALLOC (local->xattr_total_len, sizeof (char),
- gf_common_mt_char);
- if (!pathinfo_serz)
- goto unwind;
-
- /* xlator info */
- sprintf (pathinfo_serz, "(<"STRIPE_PATHINFO_HEADER"%s:[%s]> ", this->name, stripe_size_str);
-
- ret = stripe_pathinfo_aggregate (pathinfo_serz + padding, local, &tlen);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Cannot aggregate pathinfo list");
+ /* select filler based on ->xsel */
+ if (XATTR_IS_PATHINFO (local->xsel))
+ ret = stripe_fill_pathinfo_xattr (this, local,
+ (char **)&xattr_serz);
+ else if (XATTR_IS_LOCKINFO (local->xsel)) {
+ ret = stripe_fill_lockinfo_xattr (this, local,
+ &xattr_serz);
+ } else {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Unknown xattr in xattr request");
goto unwind;
}
- *(pathinfo_serz + padding + tlen) = ')';
- *(pathinfo_serz + padding + tlen + 1) = '\0';
-
- ret = dict_set_dynstr (stripe_xattr, GF_XATTR_PATHINFO_KEY, pathinfo_serz);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR, "Cannot set pathinfo key in dict");
+ if (!ret) {
+ ret = dict_set_dynptr (stripe_xattr, local->xsel,
+ xattr_serz,
+ local->xattr_total_len);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Can't set %s key in dict",
+ local->xsel);
+ }
unwind:
- STRIPE_STACK_UNWIND (getxattr, frame, op_ret, op_errno, stripe_xattr);
+ STRIPE_STACK_UNWIND (getxattr, frame, op_ret, op_errno,
+ stripe_xattr, NULL);
- ret = stripe_free_pathinfo_str (local);
+ ret = stripe_free_xattr_str (local);
- if (local->xattr_list)
- GF_FREE (local->xattr_list);
+ GF_FREE (local->xattr_list);
if (stripe_xattr)
dict_unref (stripe_xattr);
@@ -4684,14 +5456,15 @@ stripe_getxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,
int32_t
stripe_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name)
+ loc_t *loc, const char *name, dict_t *xdata)
{
- stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- stripe_private_t *priv = NULL;
- int32_t op_errno = EINVAL;
- int i = 0;
- xlator_t **sub_volumes;
+ stripe_local_t *local = NULL;
+ xlator_list_t *trav = NULL;
+ stripe_private_t *priv = NULL;
+ int32_t op_errno = EINVAL;
+ int i = 0;
+ xlator_t **sub_volumes;
+ int ret = 0;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -4703,8 +5476,7 @@ stripe_getxattr (call_frame_t *frame, xlator_t *this,
trav = this->children;
/* Initialization */
- local = GF_CALLOC (1, sizeof (stripe_local_t),
- gf_stripe_mt_stripe_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -4715,7 +5487,7 @@ stripe_getxattr (call_frame_t *frame, xlator_t *this,
if (name && (strcmp (GF_XATTR_MARKER_KEY, name) == 0)
- && (-1 == frame->root->pid)) {
+ && (GF_CLIENT_PID_GSYNCD == frame->root->pid)) {
local->marker.call_count = priv->child_count;
sub_volumes = alloca ( priv->child_count *
@@ -4730,7 +5502,8 @@ stripe_getxattr (call_frame_t *frame, xlator_t *this,
if (cluster_getmarkerattr (frame, this, loc, name,
local, stripe_getxattr_unwind,
sub_volumes, priv->child_count,
- MARKER_UUID_TYPE, priv->vol_uuid)) {
+ MARKER_UUID_TYPE, marker_uuid_default_gauge,
+ priv->vol_uuid)) {
op_errno = EINVAL;
goto err;
}
@@ -4746,25 +5519,37 @@ stripe_getxattr (call_frame_t *frame, xlator_t *this,
trav = trav->next) {
STACK_WIND (frame, stripe_getxattr_cbk,
trav->xlator, trav->xlator->fops->getxattr,
- loc, name);
+ loc, name, xdata);
}
return 0;
}
- if (name && (strncmp (name, GF_XATTR_PATHINFO_KEY,
- strlen (GF_XATTR_PATHINFO_KEY)) == 0)) {
- local->stripe_size = stripe_get_matching_bs (loc->path,
- priv->pattern,
- priv->block_size);
+ if (name && (XATTR_IS_PATHINFO (name))) {
+ if (IA_ISREG (loc->inode->ia_type)) {
+ ret = inode_ctx_get (loc->inode, this,
+ (uint64_t *) &local->fctx);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "stripe size unavailable from fctx"
+ " relying on pathinfo could lead to"
+ " wrong results");
+ }
+
local->nallocs = local->wind_count = priv->child_count;
+ (void) strncpy (local->xsel, name, strlen (name));
+ /**
+ * for xattrs that need info from all childs, fill ->xsel
+ * as above and call the filler function in cbk based on
+ * it
+ */
for (i = 0, trav = this->children; i < priv->child_count; i++,
trav = trav->next) {
- STACK_WIND_COOKIE (frame, stripe_getxattr_pathinfo_cbk,
+ STACK_WIND_COOKIE (frame, stripe_vgetxattr_cbk,
(void *) (long) i, trav->xlator,
trav->xlator->fops->getxattr,
- loc, name);
+ loc, name, xdata);
}
return 0;
@@ -4772,42 +5557,124 @@ stripe_getxattr (call_frame_t *frame, xlator_t *this,
if (name &&(*priv->vol_uuid)) {
if ((match_uuid_local (name, priv->vol_uuid) == 0)
- && (-1 == frame->root->pid)) {
- local->marker.call_count = priv->child_count;
+ && (GF_CLIENT_PID_GSYNCD == frame->root->pid)) {
- sub_volumes = alloca ( priv->child_count *
- sizeof (xlator_t *));
- for (i = 0, trav = this->children; trav ;
- trav = trav->next, i++) {
+ if (!IA_FILE_OR_DIR (loc->inode->ia_type))
+ local->marker.call_count = 1;
+ else
+ local->marker.call_count = priv->child_count;
+ sub_volumes = alloca (local->marker.call_count *
+ sizeof (xlator_t *));
+
+ for (i = 0, trav = this->children;
+ i < local->marker.call_count;
+ i++, trav = trav->next) {
*(sub_volumes + i) = trav->xlator;
}
if (cluster_getmarkerattr (frame, this, loc, name,
- local, stripe_getxattr_unwind,
+ local,
+ stripe_getxattr_unwind,
sub_volumes,
- priv->child_count,
+ local->marker.call_count,
MARKER_XTIME_TYPE,
+ marker_xtime_default_gauge,
priv->vol_uuid)) {
op_errno = EINVAL;
goto err;
}
+
return 0;
}
}
- STACK_WIND (frame, default_getxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr, loc, name);
+ STACK_WIND (frame, stripe_internal_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
return 0;
err:
- STRIPE_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL);
+ STRIPE_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL);
return 0;
}
+inline gf_boolean_t
+stripe_is_special_xattr (const char *name)
+{
+ gf_boolean_t is_spl = _gf_false;
+
+ if (!name) {
+ goto out;
+ }
+
+ if (!strncmp (name, GF_XATTR_LOCKINFO_KEY,
+ strlen (GF_XATTR_LOCKINFO_KEY))
+ || XATTR_IS_PATHINFO (name))
+ is_spl = _gf_true;
+out:
+ return is_spl;
+}
+
+int32_t
+stripe_fgetxattr_from_everyone (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
+ stripe_private_t *priv = NULL;
+ int32_t ret = -1, op_errno = 0;
+ int i = 0;
+ xlator_list_t *trav = NULL;
+
+ priv = this->private;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->op_ret = -1;
+ frame->local = local;
+
+ strncpy (local->xsel, name, strlen (name));
+ local->nallocs = local->wind_count = priv->child_count;
+
+ for (i = 0, trav = this->children; i < priv->child_count; i++,
+ trav = trav->next) {
+ STACK_WIND_COOKIE (frame, stripe_vgetxattr_cbk,
+ (void *) (long) i, trav->xlator,
+ trav->xlator->fops->fgetxattr,
+ fd, name, xdata);
+ }
+
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT (fgetxattr, frame, -1, op_errno, NULL, NULL);
+ return ret;
+}
+
+int32_t
+stripe_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ if (stripe_is_special_xattr (name)) {
+ stripe_fgetxattr_from_everyone (frame, this, fd, name, xdata);
+ goto out;
+ }
+
+ STACK_WIND (frame, stripe_internal_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
+
+out:
+ return 0;
+}
+
+
+
int32_t
stripe_priv_dump (xlator_t *this)
{
@@ -4856,36 +5723,44 @@ out:
}
struct xlator_fops fops = {
- .stat = stripe_stat,
- .unlink = stripe_unlink,
- .rename = stripe_rename,
- .link = stripe_link,
- .truncate = stripe_truncate,
- .create = stripe_create,
- .open = stripe_open,
- .readv = stripe_readv,
- .writev = stripe_writev,
- .statfs = stripe_statfs,
- .flush = stripe_flush,
- .fsync = stripe_fsync,
- .ftruncate = stripe_ftruncate,
- .fstat = stripe_fstat,
- .mkdir = stripe_mkdir,
- .rmdir = stripe_rmdir,
- .lk = stripe_lk,
- .opendir = stripe_opendir,
- .fsyncdir = stripe_fsyncdir,
- .setattr = stripe_setattr,
- .fsetattr = stripe_fsetattr,
- .lookup = stripe_lookup,
- .mknod = stripe_mknod,
-
- .getxattr = stripe_getxattr,
- .readdirp = stripe_readdirp,
+ .stat = stripe_stat,
+ .unlink = stripe_unlink,
+ .rename = stripe_rename,
+ .link = stripe_link,
+ .truncate = stripe_truncate,
+ .create = stripe_create,
+ .open = stripe_open,
+ .readv = stripe_readv,
+ .writev = stripe_writev,
+ .statfs = stripe_statfs,
+ .flush = stripe_flush,
+ .fsync = stripe_fsync,
+ .ftruncate = stripe_ftruncate,
+ .fstat = stripe_fstat,
+ .mkdir = stripe_mkdir,
+ .rmdir = stripe_rmdir,
+ .lk = stripe_lk,
+ .opendir = stripe_opendir,
+ .fsyncdir = stripe_fsyncdir,
+ .setattr = stripe_setattr,
+ .fsetattr = stripe_fsetattr,
+ .lookup = stripe_lookup,
+ .mknod = stripe_mknod,
+ .setxattr = stripe_setxattr,
+ .fsetxattr = stripe_fsetxattr,
+ .getxattr = stripe_getxattr,
+ .fgetxattr = stripe_fgetxattr,
+ .removexattr = stripe_removexattr,
+ .fremovexattr = stripe_fremovexattr,
+ .readdirp = stripe_readdirp,
+ .fallocate = stripe_fallocate,
+ .discard = stripe_discard,
+ .zerofill = stripe_zerofill,
};
struct xlator_cbks cbks = {
.release = stripe_release,
+ .forget = stripe_forget,
};
struct xlator_dumpops dumpops = {
@@ -4894,8 +5769,9 @@ struct xlator_dumpops dumpops = {
struct volume_options options[] = {
{ .key = {"block-size"},
- .type = GF_OPTION_TYPE_ANY,
+ .type = GF_OPTION_TYPE_SIZE_LIST,
.default_value = "128KB",
+ .min = STRIPE_MIN_BLOCK_SIZE,
.description = "Size of the stripe unit that would be read "
"from or written to the striped servers."
},
@@ -4903,5 +5779,12 @@ struct volume_options options[] = {
.type = GF_OPTION_TYPE_BOOL,
.default_value = "true"
},
+ { .key = {"coalesce"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "true",
+ .description = "Enable/Disable coalesce mode to flatten striped "
+ "files as stored on the server (i.e., eliminate holes "
+ "caused by the traditional format)."
+ },
{ .key = {NULL} },
};
diff --git a/xlators/cluster/stripe/src/stripe.h b/xlators/cluster/stripe/src/stripe.h
index 3ab67d621..5673d18f3 100644
--- a/xlators/cluster/stripe/src/stripe.h
+++ b/xlators/cluster/stripe/src/stripe.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -38,36 +29,53 @@
#include <signal.h>
#define STRIPE_PATHINFO_HEADER "STRIPE:"
-
+#define STRIPE_MIN_BLOCK_SIZE (16*GF_UNIT_KB)
#define STRIPE_STACK_UNWIND(fop, frame, params ...) do { \
stripe_local_t *__local = NULL; \
- if (frame) { \
- __local = frame->local; \
- frame->local = NULL; \
- } \
- STACK_UNWIND_STRICT (fop, frame, params); \
+ if (frame) { \
+ __local = frame->local; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT (fop, frame, params); \
+ if (__local) { \
+ stripe_local_wipe(__local); \
+ mem_put (__local); \
+ } \
+ } while (0)
+
+#define STRIPE_STACK_DESTROY(frame) do { \
+ stripe_local_t *__local = NULL; \
+ __local = frame->local; \
+ frame->local = NULL; \
+ STACK_DESTROY (frame->root); \
if (__local) { \
- stripe_local_wipe(__local); \
- GF_FREE (__local); \
+ stripe_local_wipe (__local); \
+ mem_put (__local); \
} \
} while (0)
-#define STRIPE_STACK_DESTROY(frame) do { \
- stripe_local_t *__local = NULL; \
- __local = frame->local; \
- frame->local = NULL; \
- STACK_DESTROY (frame->root); \
- if (__local) { \
- stripe_local_wipe (__local); \
- GF_FREE (__local); \
- } \
- } while (0)
+#define STRIPE_VALIDATE_FCTX(fctx, label) do { \
+ int idx = 0; \
+ if (!fctx) { \
+ op_errno = EINVAL; \
+ goto label; \
+ } \
+ for (idx = 0; idx < fctx->stripe_count; idx++) { \
+ if (!fctx->xl_array[idx]) { \
+ gf_log (this->name, GF_LOG_ERROR, \
+ "fctx->xl_array[%d] is NULL", \
+ idx); \
+ op_errno = ESTALE; \
+ goto label; \
+ } \
+ } \
+ } while (0)
typedef struct stripe_xattr_sort {
- int32_t pos;
- int32_t pathinfo_len;
- char *pathinfo;
+ int pos;
+ int xattr_len;
+ char *xattr_value;
} stripe_xattr_sort_t;
/**
@@ -90,16 +98,17 @@ struct stripe_private {
gf_lock_t lock;
uint8_t nodes_down;
int8_t first_child_down;
+ int *last_event;
int8_t child_count;
- int8_t *state; /* Current state of child node */
gf_boolean_t xattr_supported; /* default yes */
+ gf_boolean_t coalesce;
char vol_uuid[UUID_SIZE + 1];
};
/**
- * Used to keep info about the replies received from fops->readv calls
+ * Used to keep info about the replies received from readv/writev calls
*/
-struct readv_replies {
+struct stripe_replies {
struct iovec *vector;
int32_t count; //count of vector
int32_t op_ret; //op_ret of readv
@@ -111,6 +120,7 @@ struct readv_replies {
typedef struct _stripe_fd_ctx {
off_t stripe_size;
int stripe_count;
+ int stripe_coalesce;
int static_array;
xlator_t **xl_array;
} stripe_fd_ctx_t;
@@ -146,7 +156,7 @@ struct stripe_local {
blkcnt_t preparent_blocks;
blkcnt_t postparent_blocks;
- struct readv_replies *replies;
+ struct stripe_replies *replies;
struct statvfs statvfs_buf;
dir_entry_t *entry;
@@ -173,11 +183,12 @@ struct stripe_local {
mode_t mode;
dev_t rdev;
/* For File I/O fops */
- dict_t *dict;
+ dict_t *xdata;
stripe_xattr_sort_t *xattr_list;
int32_t xattr_total_len;
int32_t nallocs;
+ char xsel[256];
struct marker_str marker;
@@ -194,12 +205,84 @@ struct stripe_local {
void *value;
struct iobref *iobref;
gf_dirent_t entries;
+ gf_dirent_t *dirent;
dict_t *xattr;
uuid_t ia_gfid;
+
+ int xflag;
+ mode_t umask;
};
typedef struct stripe_local stripe_local_t;
typedef struct stripe_private stripe_private_t;
+/*
+ * Determine the stripe index of a particular frame based on the translator.
+ */
+static inline int32_t stripe_get_frame_index(stripe_fd_ctx_t *fctx,
+ call_frame_t *prev)
+{
+ int32_t i, idx = -1;
+
+ for (i = 0; i < fctx->stripe_count; i++) {
+ if (fctx->xl_array[i] == prev->this) {
+ idx = i;
+ break;
+ }
+ }
+
+ return idx;
+}
+
+static inline void stripe_copy_xl_array(xlator_t **dst, xlator_t **src,
+ int count)
+{
+ int i;
+
+ for (i = 0; i < count; i++)
+ dst[i] = src[i];
+}
+
+void stripe_local_wipe (stripe_local_t *local);
+int32_t stripe_ctx_handle (xlator_t *this, call_frame_t *prev,
+ stripe_local_t *local, dict_t *dict);
+void stripe_aggregate_xattr (dict_t *dst, dict_t *src);
+int32_t stripe_xattr_request_build (xlator_t *this, dict_t *dict,
+ uint64_t stripe_size, uint32_t stripe_count,
+ uint32_t stripe_index,
+ uint32_t stripe_coalesce);
+int32_t stripe_get_matching_bs (const char *path, stripe_private_t *priv);
+int set_stripe_block_size (xlator_t *this, stripe_private_t *priv, char *data);
+int32_t stripe_iatt_merge (struct iatt *from, struct iatt *to);
+int32_t stripe_fill_pathinfo_xattr (xlator_t *this, stripe_local_t *local,
+ char **xattr_serz);
+int32_t stripe_free_xattr_str (stripe_local_t *local);
+int32_t stripe_xattr_aggregate (char *buffer, stripe_local_t *local,
+ int32_t *total);
+off_t coalesced_offset(off_t offset, uint64_t stripe_size, int stripe_count);
+off_t uncoalesced_size(off_t size, uint64_t stripe_size, int stripe_count,
+ int stripe_index);
+int32_t
+stripe_fill_lockinfo_xattr (xlator_t *this, stripe_local_t *local,
+ void **xattr_serz);
+
+/*
+ * Adjust the size attribute for files if coalesce is enabled.
+ */
+static inline void correct_file_size(struct iatt *buf, stripe_fd_ctx_t *fctx,
+ call_frame_t *prev)
+{
+ int index;
+
+ if (!IA_ISREG(buf->ia_type))
+ return;
+
+ if (!fctx || !fctx->stripe_coalesce)
+ return;
+
+ index = stripe_get_frame_index(fctx, prev);
+ buf->ia_size = uncoalesced_size(buf->ia_size, fctx->stripe_size,
+ fctx->stripe_count, index);
+}
#endif /* _STRIPE_H_ */
diff --git a/xlators/cluster/unify/src/Makefile.am b/xlators/cluster/unify/src/Makefile.am
deleted file mode 100644
index 2a1fe8372..000000000
--- a/xlators/cluster/unify/src/Makefile.am
+++ /dev/null
@@ -1,16 +0,0 @@
-
-xlator_LTLIBRARIES = unify.la
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/legacy/cluster
-
-unify_la_LDFLAGS = -module -avoidversion
-
-unify_la_SOURCES = unify.c unify-self-heal.c
-unify_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-noinst_HEADERS = unify.h
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
-
-CLEANFILES =
-
diff --git a/xlators/cluster/unify/src/unify-mem-types.h b/xlators/cluster/unify/src/unify-mem-types.h
deleted file mode 100644
index 13c9cc1f7..000000000
--- a/xlators/cluster/unify/src/unify-mem-types.h
+++ /dev/null
@@ -1,41 +0,0 @@
-
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#ifndef __UNIFY_MEM_TYPES_H__
-#define __UNIFY_MEM_TYPES_H__
-
-#include "mem-types.h"
-
-enum gf_unify_mem_types_ {
- gf_unify_mt_char = gf_common_mt_end + 1,
- gf_unify_mt_int16_t,
- gf_unify_mt_xlator_t,
- gf_unify_mt_unify_private_t,
- gf_unify_mt_xlator_list_t,
- gf_unify_mt_dir_entry_t,
- gf_unify_mt_off_t,
- gf_unify_mt_int,
- gf_unify_mt_unify_self_heal_struct,
- gf_unify_mt_unify_local_t,
- gf_unify_mt_end
-};
-#endif
-
diff --git a/xlators/cluster/unify/src/unify-self-heal.c b/xlators/cluster/unify/src/unify-self-heal.c
deleted file mode 100644
index f99e4c7c3..000000000
--- a/xlators/cluster/unify/src/unify-self-heal.c
+++ /dev/null
@@ -1,1239 +0,0 @@
-/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-/**
- * unify-self-heal.c :
- * This file implements few functions which enables 'unify' translator
- * to be consistent in its behaviour when
- * > a node fails,
- * > a node gets added,
- * > a failed node comes back
- * > a new namespace server is added (ie, an fresh namespace server).
- *
- * This functionality of 'unify' will enable glusterfs to support storage
- * system failure, and maintain consistancy. This works both ways, ie, when
- * an entry (either file or directory) is found on namespace server, and not
- * on storage nodes, its created in storage nodes and vica-versa.
- *
- * The two fops, where it can be implemented are 'getdents ()' and 'lookup ()'
- *
- */
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "unify.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "common-utils.h"
-
-int32_t
-unify_sh_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count);
-
-int32_t
-unify_sh_ns_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count);
-
-int32_t
-unify_bgsh_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count);
-
-int32_t
-unify_bgsh_ns_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count);
-
-/**
- * unify_local_wipe - free all the extra allocation of local->* here.
- */
-static void
-unify_local_wipe (unify_local_t *local)
-{
- /* Free the strdup'd variables in the local structure */
- if (local->name) {
- GF_FREE (local->name);
- }
-
- if (local->sh_struct) {
- if (local->sh_struct->offset_list)
- GF_FREE (local->sh_struct->offset_list);
-
- if (local->sh_struct->entry_list)
- GF_FREE (local->sh_struct->entry_list);
-
- if (local->sh_struct->count_list)
- GF_FREE (local->sh_struct->count_list);
-
- GF_FREE (local->sh_struct);
- }
-
- loc_wipe (&local->loc1);
- loc_wipe (&local->loc2);
-}
-
-int32_t
-unify_sh_setdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = -1;
- unify_local_t *local = frame->local;
- inode_t *inode = NULL;
- dict_t *tmp_dict = NULL;
- dir_entry_t *prev, *entry, *trav;
-
- LOCK (&frame->lock);
- {
- /* if local->call_count == 0, that means, setdents on
- * storagenodes is still pending.
- */
- if (local->call_count)
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
- if (callcnt == 0) {
- if (local->sh_struct->entry_list[0]) {
- prev = entry = local->sh_struct->entry_list[0];
- if (!entry)
- return 0;
- trav = entry->next;
- while (trav) {
- prev->next = trav->next;
- GF_FREE (trav->name);
- if (IA_ISLNK (trav->buf.ia_type))
- GF_FREE (trav->link);
- GF_FREE (trav);
- trav = prev->next;
- }
- GF_FREE (entry);
- }
-
- if (!local->flags) {
- if (local->sh_struct->count_list[0] >=
- UNIFY_SELF_HEAL_GETDENTS_COUNT) {
- /* count == size, that means, there are more entries
- to read from */
- //local->call_count = 0;
- local->sh_struct->offset_list[0] +=
- UNIFY_SELF_HEAL_GETDENTS_COUNT;
- STACK_WIND (frame,
- unify_sh_ns_getdents_cbk,
- NS(this),
- NS(this)->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- local->sh_struct->offset_list[0],
- GF_GET_DIR_ONLY);
- }
- } else {
- inode = local->loc1.inode;
- fd_unref (local->fd);
- tmp_dict = local->dict;
-
- unify_local_wipe (local);
-
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- inode, &local->stbuf, local->dict,
- &local->oldpostparent);
- if (tmp_dict)
- dict_unref (tmp_dict);
- }
- }
-
- return 0;
-}
-
-
-int32_t
-unify_sh_ns_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count)
-{
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- long index = 0;
- unsigned long final = 0;
- dir_entry_t *tmp = GF_CALLOC (1, sizeof (dir_entry_t),
- gf_unify_mt_dir_entry_t);
-
- local->sh_struct->entry_list[0] = tmp;
- local->sh_struct->count_list[0] = count;
- if (entry) {
- tmp->next = entry->next;
- entry->next = NULL;
- }
-
- if ((count < UNIFY_SELF_HEAL_GETDENTS_COUNT) || !entry) {
- final = 1;
- }
-
- LOCK (&frame->lock);
- {
- /* local->call_count will be '0' till now. make it 1 so, it
- can be UNWIND'ed for the last call. */
- local->call_count = priv->child_count;
- if (final)
- local->flags = 1;
- }
- UNLOCK (&frame->lock);
-
- for (index = 0; index < priv->child_count; index++)
- {
- STACK_WIND_COOKIE (frame,
- unify_sh_setdents_cbk,
- (void *)index,
- priv->xl_array[index],
- priv->xl_array[index]->fops->setdents,
- local->fd, GF_SET_DIR_ONLY,
- local->sh_struct->entry_list[0], count);
- }
-
- return 0;
-}
-
-int32_t
-unify_sh_ns_setdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = -1;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- long index = (long)cookie;
- dir_entry_t *prev, *entry, *trav;
-
- LOCK (&frame->lock);
- {
- if (local->sh_struct->entry_list[index]) {
- prev = entry = local->sh_struct->entry_list[index];
- trav = entry->next;
- while (trav) {
- prev->next = trav->next;
- GF_FREE (trav->name);
- if (IA_ISLNK (trav->buf.ia_type))
- GF_FREE (trav->link);
- GF_FREE (trav);
- trav = prev->next;
- }
- GF_FREE (entry);
- }
- }
- UNLOCK (&frame->lock);
-
- if (local->sh_struct->count_list[index] <
- UNIFY_SELF_HEAL_GETDENTS_COUNT) {
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
- } else {
- /* count == size, that means, there are more entries
- to read from */
- local->sh_struct->offset_list[index] +=
- UNIFY_SELF_HEAL_GETDENTS_COUNT;
- STACK_WIND_COOKIE (frame,
- unify_sh_getdents_cbk,
- cookie,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- local->sh_struct->offset_list[index],
- GF_GET_ALL);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "readdir on (%s) with offset %"PRId64"",
- priv->xl_array[index]->name,
- local->sh_struct->offset_list[index]);
- }
-
- if (!callcnt) {
- /* All storage nodes have done unified setdents on NS node.
- * Now, do getdents from NS and do setdents on storage nodes.
- */
-
- /* sh_struct->offset_list is no longer required for
- storage nodes now */
- local->sh_struct->offset_list[0] = 0; /* reset */
-
- STACK_WIND (frame,
- unify_sh_ns_getdents_cbk,
- NS(this),
- NS(this)->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- 0, /* In this call, do send '0' as offset */
- GF_GET_DIR_ONLY);
- }
-
- return 0;
-}
-
-
-/**
- * unify_sh_getdents_cbk -
- */
-int32_t
-unify_sh_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count)
-{
- int32_t callcnt = -1;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- long index = (long)cookie;
- dir_entry_t *tmp = NULL;
-
- if (op_ret >= 0 && count > 0) {
- /* There is some dentry found, just send the dentry to NS */
- tmp = GF_CALLOC (1, sizeof (dir_entry_t),
- gf_unify_mt_dir_entry_t);
- local->sh_struct->entry_list[index] = tmp;
- local->sh_struct->count_list[index] = count;
- if (entry) {
- tmp->next = entry->next;
- entry->next = NULL;
- }
- STACK_WIND_COOKIE (frame,
- unify_sh_ns_setdents_cbk,
- cookie,
- NS(this),
- NS(this)->fops->setdents,
- local->fd,
- GF_SET_IF_NOT_PRESENT,
- local->sh_struct->entry_list[index],
- count);
- return 0;
- }
-
- if (count < UNIFY_SELF_HEAL_GETDENTS_COUNT) {
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
- } else {
- /* count == size, that means, there are more entries
- to read from */
- local->sh_struct->offset_list[index] +=
- UNIFY_SELF_HEAL_GETDENTS_COUNT;
- STACK_WIND_COOKIE (frame,
- unify_sh_getdents_cbk,
- cookie,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- local->sh_struct->offset_list[index],
- GF_GET_ALL);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "readdir on (%s) with offset %"PRId64"",
- priv->xl_array[index]->name,
- local->sh_struct->offset_list[index]);
- }
-
- if (!callcnt) {
- /* All storage nodes have done unified setdents on NS node.
- * Now, do getdents from NS and do setdents on storage nodes.
- */
-
- /* sh_struct->offset_list is no longer required for
- storage nodes now */
- local->sh_struct->offset_list[0] = 0; /* reset */
-
- STACK_WIND (frame,
- unify_sh_ns_getdents_cbk,
- NS(this),
- NS(this)->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- 0, /* In this call, do send '0' as offset */
- GF_GET_DIR_ONLY);
- }
-
- return 0;
-}
-
-/**
- * unify_sh_opendir_cbk -
- *
- * @cookie:
- */
-int32_t
-unify_sh_opendir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- int32_t callcnt = 0;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int16_t index = 0;
- inode_t *inode = NULL;
- dict_t *tmp_dict = NULL;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- } else {
- gf_log (this->name, GF_LOG_WARNING, "failed");
- local->failed = 1;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- local->call_count = priv->child_count + 1;
-
- if (!local->failed) {
- /* send getdents() namespace after finishing
- storage nodes */
- local->call_count--;
-
- fd_bind (fd);
-
- if (local->call_count) {
- /* Used as the offset index. This list keeps
- * track of offset sent to each node during
- * STACK_WIND.
- */
- local->sh_struct->offset_list =
- GF_CALLOC (priv->child_count,
- sizeof (off_t),
- gf_unify_mt_off_t);
- ERR_ABORT (local->sh_struct->offset_list);
-
- local->sh_struct->entry_list =
- GF_CALLOC (priv->child_count,
- sizeof (dir_entry_t *),
- gf_unify_mt_dir_entry_t);
- ERR_ABORT (local->sh_struct->entry_list);
-
- local->sh_struct->count_list =
- GF_CALLOC (priv->child_count,
- sizeof (int),
- gf_unify_mt_int);
- ERR_ABORT (local->sh_struct->count_list);
-
- /* Send getdents on all the fds */
- for (index = 0;
- index < priv->child_count; index++) {
- STACK_WIND_COOKIE (frame,
- unify_sh_getdents_cbk,
- (void *)(long)index,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- 0, /* In this call, do send '0' as offset */
- GF_GET_ALL);
- }
-
- /* did stack wind, so no need to unwind here */
- return 0;
- } /* (local->call_count) */
- } /* (!local->failed) */
-
- /* Opendir failed on one node. */
- inode = local->loc1.inode;
- fd_unref (local->fd);
- tmp_dict = local->dict;
-
- unify_local_wipe (local);
- /* Only 'self-heal' failed, lookup() was successful. */
- local->op_ret = 0;
-
- /* This is lookup_cbk ()'s UNWIND. */
- STACK_UNWIND (frame, local->op_ret, local->op_errno, inode,
- &local->stbuf, local->dict, &local->oldpostparent);
- if (tmp_dict)
- dict_unref (tmp_dict);
- }
-
- return 0;
-}
-
-/**
- * gf_sh_checksum_cbk -
- *
- * @frame: frame used in lookup. get a copy of it, and use that copy.
- * @this: pointer to unify xlator.
- * @inode: pointer to inode, for which the consistency check is required.
- *
- */
-int32_t
-unify_sh_checksum_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- uint8_t *file_checksum,
- uint8_t *dir_checksum)
-{
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int16_t index = 0;
- int32_t callcnt = 0;
- inode_t *inode = NULL;
- dict_t *tmp_dict = NULL;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret >= 0) {
- if (NS(this) == (xlator_t *)cookie) {
- memcpy (local->sh_struct->ns_file_checksum,
- file_checksum, NAME_MAX);
- memcpy (local->sh_struct->ns_dir_checksum,
- dir_checksum, NAME_MAX);
- } else {
- if (local->entry_count == 0) {
- /* Initialize the dir_checksum to be
- * used for comparision with other
- * storage nodes. Should be done for
- * the first successful call *only*.
- */
- /* Using 'entry_count' as a flag */
- local->entry_count = 1;
- memcpy (local->sh_struct->dir_checksum,
- dir_checksum, NAME_MAX);
- }
-
- /* Reply from the storage nodes */
- for (index = 0;
- index < NAME_MAX; index++) {
- /* Files should be present in
- only one node */
- local->sh_struct->file_checksum[index] ^= file_checksum[index];
-
- /* directory structure should be
- same accross */
- if (local->sh_struct->dir_checksum[index] != dir_checksum[index])
- local->failed = 1;
- }
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- for (index = 0; index < NAME_MAX ; index++) {
- if (local->sh_struct->file_checksum[index] !=
- local->sh_struct->ns_file_checksum[index]) {
- local->failed = 1;
- break;
- }
- if (local->sh_struct->dir_checksum[index] !=
- local->sh_struct->ns_dir_checksum[index]) {
- local->failed = 1;
- break;
- }
- }
-
- if (local->failed) {
- /* Log it, it should be a rare event */
- gf_log (this->name, GF_LOG_WARNING,
- "Self-heal triggered on directory %s",
- local->loc1.path);
-
- /* Any self heal will be done at directory level */
- local->call_count = 0;
- local->op_ret = -1;
- local->failed = 0;
-
- local->fd = fd_create (local->loc1.inode,
- frame->root->pid);
-
- local->call_count = priv->child_count + 1;
-
- for (index = 0;
- index < (priv->child_count + 1); index++) {
- STACK_WIND_COOKIE (frame,
- unify_sh_opendir_cbk,
- priv->xl_array[index]->name,
- priv->xl_array[index],
- priv->xl_array[index]->fops->opendir,
- &local->loc1,
- local->fd);
- }
- /* opendir can be done on the directory */
- return 0;
- }
-
- /* no mismatch */
- inode = local->loc1.inode;
- tmp_dict = local->dict;
-
- unify_local_wipe (local);
-
- /* This is lookup_cbk ()'s UNWIND. */
- STACK_UNWIND (frame,
- local->op_ret,
- local->op_errno,
- inode,
- &local->stbuf,
- local->dict, &local->oldpostparent);
- if (tmp_dict)
- dict_unref (tmp_dict);
- }
-
- return 0;
-}
-
-/* Foreground self-heal part over */
-
-/* Background self-heal part */
-
-int32_t
-unify_bgsh_setdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = -1;
- unify_local_t *local = frame->local;
- dir_entry_t *prev, *entry, *trav;
-
- LOCK (&frame->lock);
- {
- /* if local->call_count == 0, that means, setdents
- on storagenodes is still pending. */
- if (local->call_count)
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
-
- if (callcnt == 0) {
- if (local->sh_struct->entry_list[0]) {
- prev = entry = local->sh_struct->entry_list[0];
- trav = entry->next;
- while (trav) {
- prev->next = trav->next;
- GF_FREE (trav->name);
- if (IA_ISLNK (trav->buf.ia_type))
- GF_FREE (trav->link);
- GF_FREE (trav);
- trav = prev->next;
- }
- GF_FREE (entry);
- }
-
- if (!local->flags) {
- if (local->sh_struct->count_list[0] >=
- UNIFY_SELF_HEAL_GETDENTS_COUNT) {
- /* count == size, that means, there are more
- entries to read from */
- //local->call_count = 0;
- local->sh_struct->offset_list[0] +=
- UNIFY_SELF_HEAL_GETDENTS_COUNT;
- STACK_WIND (frame,
- unify_bgsh_ns_getdents_cbk,
- NS(this),
- NS(this)->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- local->sh_struct->offset_list[0],
- GF_GET_DIR_ONLY);
- }
- } else {
- fd_unref (local->fd);
- unify_local_wipe (local);
- STACK_DESTROY (frame->root);
- }
- }
-
- return 0;
-}
-
-
-int32_t
-unify_bgsh_ns_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count)
-{
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- long index = 0;
- unsigned long final = 0;
- dir_entry_t *tmp = GF_CALLOC (1, sizeof (dir_entry_t),
- gf_unify_mt_dir_entry_t);
-
- local->sh_struct->entry_list[0] = tmp;
- local->sh_struct->count_list[0] = count;
- if (entry) {
- tmp->next = entry->next;
- entry->next = NULL;
- }
-
- if ((count < UNIFY_SELF_HEAL_GETDENTS_COUNT) || !entry) {
- final = 1;
- }
-
- LOCK (&frame->lock);
- {
- /* local->call_count will be '0' till now. make it 1 so,
- it can be UNWIND'ed for the last call. */
- local->call_count = priv->child_count;
- if (final)
- local->flags = 1;
- }
- UNLOCK (&frame->lock);
-
- for (index = 0; index < priv->child_count; index++)
- {
- STACK_WIND_COOKIE (frame,
- unify_bgsh_setdents_cbk,
- (void *)index,
- priv->xl_array[index],
- priv->xl_array[index]->fops->setdents,
- local->fd, GF_SET_DIR_ONLY,
- local->sh_struct->entry_list[0], count);
- }
-
- return 0;
-}
-
-int32_t
-unify_bgsh_ns_setdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = -1;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- long index = (long)cookie;
- dir_entry_t *prev, *entry, *trav;
-
- if (local->sh_struct->entry_list[index]) {
- prev = entry = local->sh_struct->entry_list[index];
- if (!entry)
- return 0;
- trav = entry->next;
- while (trav) {
- prev->next = trav->next;
- GF_FREE (trav->name);
- if (IA_ISLNK (trav->buf.ia_type))
- GF_FREE (trav->link);
- GF_FREE (trav);
- trav = prev->next;
- }
- GF_FREE (entry);
- }
-
- if (local->sh_struct->count_list[index] <
- UNIFY_SELF_HEAL_GETDENTS_COUNT) {
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
- } else {
- /* count == size, that means, there are more entries
- to read from */
- local->sh_struct->offset_list[index] +=
- UNIFY_SELF_HEAL_GETDENTS_COUNT;
- STACK_WIND_COOKIE (frame,
- unify_bgsh_getdents_cbk,
- cookie,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- local->sh_struct->offset_list[index],
- GF_GET_ALL);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "readdir on (%s) with offset %"PRId64"",
- priv->xl_array[index]->name,
- local->sh_struct->offset_list[index]);
- }
-
- if (!callcnt) {
- /* All storage nodes have done unified setdents on NS node.
- * Now, do getdents from NS and do setdents on storage nodes.
- */
-
- /* sh_struct->offset_list is no longer required for
- storage nodes now */
- local->sh_struct->offset_list[0] = 0; /* reset */
-
- STACK_WIND (frame,
- unify_bgsh_ns_getdents_cbk,
- NS(this),
- NS(this)->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- 0, /* In this call, do send '0' as offset */
- GF_GET_DIR_ONLY);
- }
-
- return 0;
-}
-
-
-/**
- * unify_bgsh_getdents_cbk -
- */
-int32_t
-unify_bgsh_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count)
-{
- int32_t callcnt = -1;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- long index = (long)cookie;
- dir_entry_t *tmp = NULL;
-
- if (op_ret >= 0 && count > 0) {
- /* There is some dentry found, just send the dentry to NS */
- tmp = GF_CALLOC (1, sizeof (dir_entry_t),
- gf_unify_mt_dir_entry_t);
- local->sh_struct->entry_list[index] = tmp;
- local->sh_struct->count_list[index] = count;
- if (entry) {
- tmp->next = entry->next;
- entry->next = NULL;
- }
- STACK_WIND_COOKIE (frame,
- unify_bgsh_ns_setdents_cbk,
- cookie,
- NS(this),
- NS(this)->fops->setdents,
- local->fd,
- GF_SET_IF_NOT_PRESENT,
- local->sh_struct->entry_list[index],
- count);
- return 0;
- }
-
- if (count < UNIFY_SELF_HEAL_GETDENTS_COUNT) {
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
- } else {
- /* count == size, that means, there are more entries to read from */
- local->sh_struct->offset_list[index] +=
- UNIFY_SELF_HEAL_GETDENTS_COUNT;
-
- STACK_WIND_COOKIE (frame,
- unify_bgsh_getdents_cbk,
- cookie,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- local->sh_struct->offset_list[index],
- GF_GET_ALL);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "readdir on (%s) with offset %"PRId64"",
- priv->xl_array[index]->name,
- local->sh_struct->offset_list[index]);
- }
-
- if (!callcnt) {
- /* All storage nodes have done unified setdents on NS node.
- * Now, do getdents from NS and do setdents on storage nodes.
- */
-
- /* sh_struct->offset_list is no longer required for
- storage nodes now */
- local->sh_struct->offset_list[0] = 0; /* reset */
-
- STACK_WIND (frame,
- unify_bgsh_ns_getdents_cbk,
- NS(this),
- NS(this)->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- 0, /* In this call, do send '0' as offset */
- GF_GET_DIR_ONLY);
- }
-
- return 0;
-}
-
-/**
- * unify_bgsh_opendir_cbk -
- *
- * @cookie:
- */
-int32_t
-unify_bgsh_opendir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int32_t callcnt = 0;
- int16_t index = 0;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- } else {
- local->failed = 1;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- local->call_count = priv->child_count + 1;
-
- if (!local->failed) {
- /* send getdents() namespace after finishing
- storage nodes */
- local->call_count--;
- callcnt = local->call_count;
-
- fd_bind (fd);
-
- if (local->call_count) {
- /* Used as the offset index. This list keeps
- track of offset sent to each node during
- STACK_WIND. */
- local->sh_struct->offset_list =
- GF_CALLOC (priv->child_count,
- sizeof (off_t),
- gf_unify_mt_off_t);
- ERR_ABORT (local->sh_struct->offset_list);
-
- local->sh_struct->entry_list =
- GF_CALLOC (priv->child_count,
- sizeof (dir_entry_t *),
- gf_unify_mt_dir_entry_t);
- ERR_ABORT (local->sh_struct->entry_list);
-
- local->sh_struct->count_list =
- GF_CALLOC (priv->child_count,
- sizeof (int),
- gf_unify_mt_int);
- ERR_ABORT (local->sh_struct->count_list);
-
- /* Send getdents on all the fds */
- for (index = 0;
- index < priv->child_count; index++) {
- STACK_WIND_COOKIE (frame,
- unify_bgsh_getdents_cbk,
- (void *)(long)index,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- 0, /* In this call, do send '0' as offset */
- GF_GET_ALL);
- }
- /* did a stack wind, so no need to unwind here */
- return 0;
- } /* (local->call_count) */
- } /* (!local->failed) */
-
- /* Opendir failed on one node. */
- fd_unref (local->fd);
-
- unify_local_wipe (local);
- STACK_DESTROY (frame->root);
- }
-
- return 0;
-}
-
-/**
- * gf_bgsh_checksum_cbk -
- *
- * @frame: frame used in lookup. get a copy of it, and use that copy.
- * @this: pointer to unify xlator.
- * @inode: pointer to inode, for which the consistency check is required.
- *
- */
-int32_t
-unify_bgsh_checksum_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- uint8_t *file_checksum,
- uint8_t *dir_checksum)
-{
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int16_t index = 0;
- int32_t callcnt = 0;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret >= 0) {
- if (NS(this) == (xlator_t *)cookie) {
- memcpy (local->sh_struct->ns_file_checksum,
- file_checksum, NAME_MAX);
- memcpy (local->sh_struct->ns_dir_checksum,
- dir_checksum, NAME_MAX);
- } else {
- if (local->entry_count == 0) {
- /* Initialize the dir_checksum to be
- * used for comparision with other
- * storage nodes. Should be done for
- * the first successful call *only*.
- */
- /* Using 'entry_count' as a flag */
- local->entry_count = 1;
- memcpy (local->sh_struct->dir_checksum,
- dir_checksum, NAME_MAX);
- }
-
- /* Reply from the storage nodes */
- for (index = 0;
- index < NAME_MAX; index++) {
- /* Files should be present in only
- one node */
- local->sh_struct->file_checksum[index] ^= file_checksum[index];
-
- /* directory structure should be same
- accross */
- if (local->sh_struct->dir_checksum[index] != dir_checksum[index])
- local->failed = 1;
- }
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- for (index = 0; index < NAME_MAX ; index++) {
- if (local->sh_struct->file_checksum[index] !=
- local->sh_struct->ns_file_checksum[index]) {
- local->failed = 1;
- break;
- }
- if (local->sh_struct->dir_checksum[index] !=
- local->sh_struct->ns_dir_checksum[index]) {
- local->failed = 1;
- break;
- }
- }
-
- if (local->failed) {
- /* Log it, it should be a rare event */
- gf_log (this->name, GF_LOG_WARNING,
- "Self-heal triggered on directory %s",
- local->loc1.path);
-
- /* Any self heal will be done at the directory level */
- local->op_ret = -1;
- local->failed = 0;
-
- local->fd = fd_create (local->loc1.inode,
- frame->root->pid);
- local->call_count = priv->child_count + 1;
-
- for (index = 0;
- index < (priv->child_count + 1); index++) {
- STACK_WIND_COOKIE (frame,
- unify_bgsh_opendir_cbk,
- priv->xl_array[index]->name,
- priv->xl_array[index],
- priv->xl_array[index]->fops->opendir,
- &local->loc1,
- local->fd);
- }
-
- /* opendir can be done on the directory */
- return 0;
- }
-
- /* no mismatch */
- unify_local_wipe (local);
- STACK_DESTROY (frame->root);
- }
-
- return 0;
-}
-
-/* Background self-heal part over */
-
-
-
-
-/**
- * zr_unify_self_heal -
- *
- * @frame: frame used in lookup. get a copy of it, and use that copy.
- * @this: pointer to unify xlator.
- * @inode: pointer to inode, for which the consistency check is required.
- *
- */
-int32_t
-zr_unify_self_heal (call_frame_t *frame,
- xlator_t *this,
- unify_local_t *local)
-{
- unify_private_t *priv = this->private;
- call_frame_t *bg_frame = NULL;
- unify_local_t *bg_local = NULL;
- inode_t *tmp_inode = NULL;
- dict_t *tmp_dict = NULL;
- int16_t index = 0;
-
- if (local->inode_generation < priv->inode_generation) {
- /* Any self heal will be done at the directory level */
- /* Update the inode's generation to the current generation
- value. */
- local->inode_generation = priv->inode_generation;
- inode_ctx_put (local->loc1.inode, this,
- (uint64_t)(long)local->inode_generation);
-
- if (priv->self_heal == ZR_UNIFY_FG_SELF_HEAL) {
- local->op_ret = 0;
- local->failed = 0;
- local->call_count = priv->child_count + 1;
- local->sh_struct =
- GF_CALLOC (1, sizeof (struct unify_self_heal_struct),
- gf_unify_mt_unify_self_heal_struct);
-
- /* +1 is for NS */
- for (index = 0;
- index < (priv->child_count + 1); index++) {
- STACK_WIND_COOKIE (frame,
- unify_sh_checksum_cbk,
- priv->xl_array[index],
- priv->xl_array[index],
- priv->xl_array[index]->fops->checksum,
- &local->loc1,
- 0);
- }
-
- /* Self-heal in foreground, hence no need
- to UNWIND here */
- return 0;
- }
-
- /* Self Heal done in background */
- bg_frame = copy_frame (frame);
- INIT_LOCAL (bg_frame, bg_local);
- loc_copy (&bg_local->loc1, &local->loc1);
- bg_local->op_ret = 0;
- bg_local->failed = 0;
- bg_local->call_count = priv->child_count + 1;
- bg_local->sh_struct =
- GF_CALLOC (1, sizeof (struct unify_self_heal_struct),
- gf_unify_mt_unify_self_heal_struct);
-
- /* +1 is for NS */
- for (index = 0; index < (priv->child_count + 1); index++) {
- STACK_WIND_COOKIE (bg_frame,
- unify_bgsh_checksum_cbk,
- priv->xl_array[index],
- priv->xl_array[index],
- priv->xl_array[index]->fops->checksum,
- &bg_local->loc1,
- 0);
- }
- }
-
- /* generation number matches, self heal already done or
- * self heal done in background: just do STACK_UNWIND
- */
- tmp_inode = local->loc1.inode;
- tmp_dict = local->dict;
-
- unify_local_wipe (local);
-
- /* This is lookup_cbk ()'s UNWIND. */
- STACK_UNWIND (frame,
- local->op_ret,
- local->op_errno,
- tmp_inode,
- &local->stbuf,
- local->dict,
- &local->oldpostparent);
-
- if (tmp_dict)
- dict_unref (tmp_dict);
-
- return 0;
-}
-
diff --git a/xlators/cluster/unify/src/unify.c b/xlators/cluster/unify/src/unify.c
deleted file mode 100644
index 6dc93083d..000000000
--- a/xlators/cluster/unify/src/unify.c
+++ /dev/null
@@ -1,4589 +0,0 @@
-/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-/**
- * xlators/cluster/unify:
- * - This xlator is one of the main translator in GlusterFS, which
- * actually does the clustering work of the file system. One need to
- * understand that, unify assumes file to be existing in only one of
- * the child node, and directories to be present on all the nodes.
- *
- * NOTE:
- * Now, unify has support for global namespace, which is used to keep a
- * global view of fs's namespace tree. The stat for directories are taken
- * just from the namespace, where as for files, just 'ia_ino' is taken from
- * Namespace node, and other stat info is taken from the actual storage node.
- * Also Namespace node helps to keep consistant inode for files across
- * glusterfs (re-)mounts.
- */
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "unify.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include <signal.h>
-#include <libgen.h>
-#include "compat-errno.h"
-#include "compat.h"
-
-#define UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR(_loc) do { \
- if (!(_loc && _loc->inode)) { \
- STACK_UNWIND (frame, -1, EINVAL, NULL, NULL, NULL); \
- return 0; \
- } \
-} while(0)
-
-
-#define UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR(_fd) do { \
- if (!(_fd && !fd_ctx_get (_fd, this, NULL))) { \
- STACK_UNWIND (frame, -1, EBADFD, NULL, NULL); \
- return 0; \
- } \
-} while(0)
-
-#define UNIFY_CHECK_FD_AND_UNWIND_ON_ERR(_fd) do { \
- if (!_fd) { \
- STACK_UNWIND (frame, -1, EBADFD, NULL, NULL); \
- return 0; \
- } \
-} while(0)
-
-/**
- * unify_local_wipe - free all the extra allocation of local->* here.
- */
-static void
-unify_local_wipe (unify_local_t *local)
-{
- /* Free the strdup'd variables in the local structure */
- if (local->name) {
- GF_FREE (local->name);
- }
- loc_wipe (&local->loc1);
- loc_wipe (&local->loc2);
-}
-
-
-
-/*
- * unify_normalize_stats -
- */
-void
-unify_normalize_stats (struct statvfs *buf,
- unsigned long bsize,
- unsigned long frsize)
-{
- double factor;
-
- if (buf->f_bsize != bsize) {
- factor = ((double) buf->f_bsize) / bsize;
- buf->f_bsize = bsize;
- buf->f_bfree = (fsblkcnt_t) (factor * buf->f_bfree);
- buf->f_bavail = (fsblkcnt_t) (factor * buf->f_bavail);
- }
-
- if (buf->f_frsize != frsize) {
- factor = ((double) buf->f_frsize) / frsize;
- buf->f_frsize = frsize;
- buf->f_blocks = (fsblkcnt_t) (factor * buf->f_blocks);
- }
-}
-
-
-xlator_t *
-unify_loc_subvol (loc_t *loc, xlator_t *this)
-{
- unify_private_t *priv = NULL;
- xlator_t *subvol = NULL;
- int16_t *list = NULL;
- long index = 0;
- xlator_t *subvol_i = NULL;
- int ret = 0;
- uint64_t tmp_list = 0;
-
- priv = this->private;
- subvol = NS (this);
-
- if (!IA_ISDIR (loc->inode->ia_type)) {
- ret = inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
- if (!list)
- goto out;
-
- for (index = 0; list[index] != -1; index++) {
- subvol_i = priv->xl_array[list[index]];
- if (subvol_i != NS (this)) {
- subvol = subvol_i;
- break;
- }
- }
- }
-out:
- return subvol;
-}
-
-
-
-/**
- * unify_statfs_cbk -
- */
-int32_t
-unify_statfs_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct statvfs *stbuf)
-{
- int32_t callcnt = 0;
- struct statvfs *dict_buf = NULL;
- unsigned long bsize;
- unsigned long frsize;
- unify_local_t *local = (unify_local_t *)frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret >= 0) {
- /* when a call is successfull, add it to local->dict */
- dict_buf = &local->statvfs_buf;
-
- if (dict_buf->f_bsize != 0) {
- bsize = max (dict_buf->f_bsize,
- stbuf->f_bsize);
-
- frsize = max (dict_buf->f_frsize,
- stbuf->f_frsize);
- unify_normalize_stats(dict_buf, bsize, frsize);
- unify_normalize_stats(stbuf, bsize, frsize);
- } else {
- dict_buf->f_bsize = stbuf->f_bsize;
- dict_buf->f_frsize = stbuf->f_frsize;
- }
-
- dict_buf->f_blocks += stbuf->f_blocks;
- dict_buf->f_bfree += stbuf->f_bfree;
- dict_buf->f_bavail += stbuf->f_bavail;
- dict_buf->f_files += stbuf->f_files;
- dict_buf->f_ffree += stbuf->f_ffree;
- dict_buf->f_favail += stbuf->f_favail;
- dict_buf->f_fsid = stbuf->f_fsid;
- dict_buf->f_flag = stbuf->f_flag;
- dict_buf->f_namemax = stbuf->f_namemax;
- local->op_ret = op_ret;
- } else {
- /* fop on storage node has failed due to some error */
- if (op_errno != ENOTCONN) {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): %s",
- prev_frame->this->name,
- strerror (op_errno));
- }
- local->op_errno = op_errno;
- }
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->statvfs_buf);
- }
-
- return 0;
-}
-
-/**
- * unify_statfs -
- */
-int32_t
-unify_statfs (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- unify_local_t *local = NULL;
- xlator_list_t *trav = this->children;
-
- INIT_LOCAL (frame, local);
- local->call_count = ((unify_private_t *)this->private)->child_count;
-
- while(trav) {
- STACK_WIND (frame,
- unify_statfs_cbk,
- trav->xlator,
- trav->xlator->fops->statfs,
- loc);
- trav = trav->next;
- }
-
- return 0;
-}
-
-/**
- * unify_buf_cbk -
- */
-int32_t
-unify_buf_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s(): child(%s): path(%s): %s",
- gf_fop_list[frame->root->op],
- prev_frame->this->name,
- (local->loc1.path)?local->loc1.path:"",
- strerror (op_errno));
-
- local->op_errno = op_errno;
- if ((op_errno == ENOENT) && priv->optimist)
- local->op_ret = 0;
- }
-
- if (op_ret >= 0) {
- local->op_ret = 0;
-
- if (NS (this) == prev_frame->this) {
- local->ia_ino = buf->ia_ino;
- /* If the entry is directory, get the stat
- from NS node */
- if (IA_ISDIR (buf->ia_type) ||
- !local->stbuf.ia_blksize) {
- local->stbuf = *buf;
- }
- }
-
- if ((!IA_ISDIR (buf->ia_type)) &&
- (NS (this) != prev_frame->this)) {
- /* If file, take the stat info from Storage
- node. */
- local->stbuf = *buf;
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- /* If the inode number is not filled, operation should
- fail */
- if (!local->ia_ino)
- local->op_ret = -1;
-
- local->stbuf.ia_ino = local->ia_ino;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->stbuf);
- }
-
- return 0;
-}
-
-#define check_if_dht_linkfile(s) \
- ((st_mode_from_ia (s->ia_prot, s->ia_type) & ~S_IFMT) == S_ISVTX)
-
-/**
- * unify_lookup_cbk -
- */
-int32_t
-unify_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- dict_t *dict,
- struct iatt *postparent)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- inode_t *tmp_inode = NULL;
- dict_t *local_dict = NULL;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- if (local->revalidate &&
- (op_errno == ESTALE)) {
- /* ESTALE takes priority */
- local->op_errno = op_errno;
- local->failed = 1;
- }
-
- if ((op_errno != ENOTCONN)
- && (op_errno != ENOENT)
- && (local->op_errno != ESTALE)) {
- /* if local->op_errno is already ESTALE, then
- * ESTALE has to propogated to the parent first.
- * do not enter here.
- */
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- priv->xl_array[(long)cookie]->name,
- local->loc1.path, strerror (op_errno));
- local->op_errno = op_errno;
- local->failed = 1;
-
- } else if (local->revalidate &&
- (local->op_errno != ESTALE) &&
- !(priv->optimist && (op_errno == ENOENT))) {
-
- gf_log (this->name,
- (op_errno == ENOTCONN) ?
- GF_LOG_DEBUG:GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- priv->xl_array[(long)cookie]->name,
- local->loc1.path, strerror (op_errno));
- local->op_errno = op_errno;
- local->failed = 1;
- }
- }
-
- if (op_ret == 0) {
- local->op_ret = 0;
-
- if (check_if_dht_linkfile(buf)) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "file %s may be DHT link file on %s, "
- "make sure the backend is not shared "
- "between unify and DHT",
- local->loc1.path,
- priv->xl_array[(long)cookie]->name);
- }
-
- if (local->stbuf.ia_type && local->stbuf.ia_blksize) {
- /* make sure we already have a stbuf
- stored in local->stbuf */
- if (IA_ISDIR (local->stbuf.ia_type) &&
- !IA_ISDIR (buf->ia_type)) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "[CRITICAL] '%s' is directory "
- "on namespace, non-directory "
- "on node '%s', returning EIO",
- local->loc1.path,
- priv->xl_array[(long)cookie]->name);
- local->return_eio = 1;
- }
- if (!IA_ISDIR (local->stbuf.ia_type) &&
- IA_ISDIR (buf->ia_type)) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "[CRITICAL] '%s' is directory "
- "on node '%s', non-directory "
- "on namespace, returning EIO",
- local->loc1.path,
- priv->xl_array[(long)cookie]->name);
- local->return_eio = 1;
- }
- }
-
- if (!local->revalidate && !IA_ISDIR (buf->ia_type)) {
- /* This is the first time lookup on file*/
- if (!local->list) {
- /* list is not allocated, allocate
- the max possible range */
- local->list = GF_CALLOC (1, 2 * (priv->child_count + 2),
- gf_unify_mt_int16_t);
- if (!local->list) {
- gf_log (this->name,
- GF_LOG_CRITICAL,
- "Not enough memory");
- STACK_UNWIND (frame, -1,
- ENOMEM, inode,
- NULL, NULL, NULL);
- return 0;
- }
- }
- /* update the index of the list */
- local->list [local->index++] =
- (int16_t)(long)cookie;
- }
-
- if (!local->revalidate && IA_ISDIR (buf->ia_type)) {
- /* fresh lookup of a directory */
- inode_ctx_put (local->loc1.inode, this,
- priv->inode_generation);
- }
-
- if ((!local->dict) && dict &&
- (priv->xl_array[(long)cookie] != NS(this))) {
- local->dict = dict_ref (dict);
- }
-
- /* index of NS node is == total child count */
- if (priv->child_count == (int16_t)(long)cookie) {
- /* Take the inode number from namespace */
- local->ia_ino = buf->ia_ino;
- if (IA_ISDIR (buf->ia_type) ||
- !(local->stbuf.ia_blksize)) {
- local->stbuf = *buf;
- local->oldpostparent = *postparent;
- }
- } else if (!IA_ISDIR (buf->ia_type)) {
- /* If file, then get the stat from
- storage node */
- local->stbuf = *buf;
- }
-
- if (local->ia_nlink < buf->ia_nlink) {
- local->ia_nlink = buf->ia_nlink;
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- local_dict = local->dict;
- if (local->return_eio) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "[CRITICAL] Unable to fix the path (%s) with "
- "self-heal, try manual verification. "
- "returning EIO.", local->loc1.path);
- unify_local_wipe (local);
- STACK_UNWIND (frame, -1, EIO, inode, NULL, NULL);
- if (local_dict) {
- dict_unref (local_dict);
- }
- return 0;
- }
-
- if (!local->stbuf.ia_blksize) {
- /* Inode not present */
- local->op_ret = -1;
- } else {
- if (!local->revalidate &&
- !IA_ISDIR (local->stbuf.ia_type)) {
- /* If its a file, big array is useless,
- allocate the smaller one */
- int16_t *list = NULL;
- list = GF_CALLOC (1, 2 * (local->index + 1),
- gf_unify_mt_int16_t);
- ERR_ABORT (list);
- memcpy (list, local->list, 2 * local->index);
- /* Make the end of the list as -1 */
- GF_FREE (local->list);
- local->list = list;
- local->list [local->index] = -1;
- /* Update the inode's ctx with proper array */
- /* TODO: log on failure */
- inode_ctx_put (local->loc1.inode, this,
- (uint64_t)(long)local->list);
- }
-
- if (IA_ISDIR(local->loc1.inode->ia_type)) {
- /* lookup is done for directory */
- if (local->failed && priv->self_heal) {
- /* Triggering self-heal */
- /* means, self-heal required for this
- inode */
- local->inode_generation = 0;
- priv->inode_generation++;
- }
- } else {
- local->stbuf.ia_ino = local->ia_ino;
- }
-
- local->stbuf.ia_nlink = local->ia_nlink;
- }
- if (local->op_ret == -1) {
- if (!local->revalidate && local->list)
- GF_FREE (local->list);
- }
-
- if ((local->op_ret >= 0) && local->failed &&
- local->revalidate) {
- /* Done revalidate, but it failed */
- if ((op_errno != ENOTCONN)
- && (local->op_errno != ESTALE)) {
- gf_log (this->name, GF_LOG_ERROR,
- "Revalidate failed for path(%s): %s",
- local->loc1.path, strerror (op_errno));
- }
- local->op_ret = -1;
- }
-
- if ((priv->self_heal && !priv->optimist) &&
- (!local->revalidate && (local->op_ret == 0) &&
- IA_ISDIR(local->stbuf.ia_type))) {
- /* Let the self heal be done here */
- zr_unify_self_heal (frame, this, local);
- local_dict = NULL;
- } else {
- if (local->failed) {
- /* NOTE: directory lookup is sent to all
- * subvolumes and success from a subvolume
- * might set local->op_ret to 0 (zero) */
- local->op_ret = -1;
- }
-
- /* either no self heal, or op_ret == -1 (failure) */
- tmp_inode = local->loc1.inode;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- tmp_inode, &local->stbuf, local->dict,
- &local->oldpostparent);
- }
- if (local_dict) {
- dict_unref (local_dict);
- }
- }
-
- return 0;
-}
-
-/**
- * unify_lookup -
- */
-int32_t
-unify_lookup (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *xattr_req)
-{
- unify_local_t *local = NULL;
- unify_private_t *priv = this->private;
- int16_t *list = NULL;
- long index = 0;
-
- if (!(loc && loc->inode)) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: Argument not right", loc?loc->path:"(null)");
- STACK_UNWIND (frame, -1, EINVAL, NULL, NULL, NULL, NULL);
- return 0;
- }
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
- if (local->loc1.path == NULL) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, loc->inode, NULL, NULL, NULL);
- return 0;
- }
-
- if (inode_ctx_get (loc->inode, this, NULL)
- && IA_ISDIR (loc->inode->ia_type)) {
- local->revalidate = 1;
- }
-
- if (!inode_ctx_get (loc->inode, this, NULL) &&
- loc->inode->ia_type &&
- !IA_ISDIR (loc->inode->ia_type)) {
- uint64_t tmp_list = 0;
- /* check if revalidate or fresh lookup */
- inode_ctx_get (loc->inode, this, &tmp_list);
- local->list = (int16_t *)(long)tmp_list;
- }
-
- if (local->list) {
- list = local->list;
- for (index = 0; list[index] != -1; index++);
- if (index != 2) {
- if (index < 2) {
- gf_log (this->name, GF_LOG_ERROR,
- "returning ESTALE for %s: file "
- "count is %ld", loc->path, index);
- /* Print where all the file is present */
- for (index = 0;
- local->list[index] != -1; index++) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: found on %s", loc->path,
- priv->xl_array[list[index]]->name);
- }
- unify_local_wipe (local);
- STACK_UNWIND (frame, -1, ESTALE,
- NULL, NULL, NULL, NULL);
- return 0;
- } else {
- /* There are more than 2 presences */
- /* Just log and continue */
- gf_log (this->name, GF_LOG_ERROR,
- "%s: file count is %ld",
- loc->path, index);
- /* Print where all the file is present */
- for (index = 0;
- local->list[index] != -1; index++) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: found on %s", loc->path,
- priv->xl_array[list[index]]->name);
- }
- }
- }
-
- /* is revalidate */
- local->revalidate = 1;
-
- for (index = 0; list[index] != -1; index++)
- local->call_count++;
-
- for (index = 0; list[index] != -1; index++) {
- char need_break = (list[index+1] == -1);
- STACK_WIND_COOKIE (frame,
- unify_lookup_cbk,
- (void *)(long)list[index], //cookie
- priv->xl_array [list[index]],
- priv->xl_array [list[index]]->fops->lookup,
- loc,
- xattr_req);
- if (need_break)
- break;
- }
- } else {
- if (loc->inode->ia_type) {
- if (inode_ctx_get (loc->inode, this, NULL)) {
- inode_ctx_get (loc->inode, this,
- &local->inode_generation);
- }
- }
- /* This is first call, there is no list */
- /* call count should be all child + 1 namespace */
- local->call_count = priv->child_count + 1;
-
- for (index = 0; index <= priv->child_count; index++) {
- STACK_WIND_COOKIE (frame,
- unify_lookup_cbk,
- (void *)index, //cookie
- priv->xl_array[index],
- priv->xl_array[index]->fops->lookup,
- loc,
- xattr_req);
- }
- }
-
- return 0;
-}
-
-/**
- * unify_stat - if directory, get the stat directly from NameSpace child.
- * if file, check for a hint and send it only there (also to NS).
- * if its a fresh stat, then do it on all the nodes.
- *
- * NOTE: for all the call, sending cookie as xlator pointer, which will be
- * used in cbk.
- */
-int32_t
-unify_stat (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- unify_local_t *local = NULL;
- unify_private_t *priv = this->private;
- int16_t index = 0;
- int16_t *list = NULL;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
- if (local->loc1.path == NULL) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
- return 0;
- }
- local->ia_ino = loc->inode->ino;
- if (IA_ISDIR (loc->inode->ia_type)) {
- /* Directory */
- local->call_count = 1;
- STACK_WIND (frame, unify_buf_cbk, NS(this),
- NS(this)->fops->stat, loc);
- } else {
- /* File */
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- for (index = 0; list[index] != -1; index++)
- local->call_count++;
-
- for (index = 0; list[index] != -1; index++) {
- char need_break = (list[index+1] == -1);
- STACK_WIND (frame,
- unify_buf_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->stat,
- loc);
- if (need_break)
- break;
- }
- }
-
- return 0;
-}
-
-/**
- * unify_access_cbk -
- */
-int32_t
-unify_access_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-
-/**
- * unify_access - Send request to only namespace, which has all the
- * attributes set for the file.
- */
-int32_t
-unify_access (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t mask)
-{
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- STACK_WIND (frame,
- unify_access_cbk,
- NS(this),
- NS(this)->fops->access,
- loc,
- mask);
-
- return 0;
-}
-
-int32_t
-unify_mkdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- inode_t *tmp_inode = NULL;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if ((op_ret == -1) && !(priv->optimist &&
- (op_errno == ENOENT ||
- op_errno == EEXIST))) {
- /* TODO: Decrement the inode_generation of
- * this->inode's parent inode, hence the missing
- * directory is created properly by self-heal.
- * Currently, there is no way to get the parent
- * inode directly.
- */
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- priv->xl_array[(long)cookie]->name,
- local->loc1.path, strerror (op_errno));
- if (op_errno != EEXIST)
- local->failed = 1;
- local->op_errno = op_errno;
- }
-
- if (op_ret >= 0)
- local->op_ret = 0;
-
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (!local->failed) {
- inode_ctx_put (local->loc1.inode, this,
- priv->inode_generation);
- }
-
- tmp_inode = local->loc1.inode;
- unify_local_wipe (local);
-
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- tmp_inode, &local->stbuf,
- &local->oldpreparent, &local->oldpostparent);
- }
-
- return 0;
-}
-
-/**
- * unify_ns_mkdir_cbk -
- */
-int32_t
-unify_ns_mkdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- long index = 0;
-
- if (op_ret == -1) {
- /* No need to send mkdir request to other servers,
- * as namespace action failed
- */
- gf_log (this->name, GF_LOG_ERROR,
- "namespace: path(%s): %s",
- local->name, strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, inode, NULL,
- NULL, NULL);
- return 0;
- }
-
- /* Create one inode for this entry */
- local->op_ret = 0;
- local->stbuf = *buf;
-
- local->oldpreparent = *preparent;
- local->oldpostparent = *postparent;
-
- local->call_count = priv->child_count;
-
- /* Send mkdir request to all the nodes now */
- for (index = 0; index < priv->child_count; index++) {
- STACK_WIND_COOKIE (frame,
- unify_mkdir_cbk,
- (void *)index, //cookie
- priv->xl_array[index],
- priv->xl_array[index]->fops->mkdir,
- &local->loc1,
- local->mode);
- }
-
- return 0;
-}
-
-
-/**
- * unify_mkdir -
- */
-int32_t
-unify_mkdir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode)
-{
- unify_local_t *local = NULL;
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- local->mode = mode;
-
- loc_copy (&local->loc1, loc);
-
- if (local->loc1.path == NULL) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, NULL, NULL);
- return 0;
- }
-
- STACK_WIND (frame,
- unify_ns_mkdir_cbk,
- NS(this),
- NS(this)->fops->mkdir,
- loc,
- mode);
- return 0;
-}
-
-/**
- * unify_rmdir_cbk -
- */
-int32_t
-unify_rmdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret == 0 || (priv->optimist && (op_errno == ENOENT)))
- local->op_ret = 0;
- if (op_ret == -1)
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->oldpreparent, &local->oldpostparent);
- }
-
- return 0;
-}
-
-/**
- * unify_ns_rmdir_cbk -
- */
-int32_t
-unify_ns_rmdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- int16_t index = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- if (op_ret == -1) {
- /* No need to send rmdir request to other servers,
- * as namespace action failed
- */
- gf_log (this->name,
- ((op_errno != ENOTEMPTY) ?
- GF_LOG_ERROR : GF_LOG_DEBUG),
- "namespace: path(%s): %s",
- local->loc1.path, strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, NULL, NULL);
- return 0;
- }
-
- local->call_count = priv->child_count;
-
- local->oldpreparent = *preparent;
- local->oldpostparent = *postparent;
-
- for (index = 0; index < priv->child_count; index++) {
- STACK_WIND (frame,
- unify_rmdir_cbk,
- priv->xl_array[index],
- priv->xl_array[index]->fops->rmdir,
- &local->loc1);
- }
-
- return 0;
-}
-
-/**
- * unify_rmdir -
- */
-int32_t
-unify_rmdir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- unify_local_t *local = NULL;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
-
- loc_copy (&local->loc1, loc);
- if (local->loc1.path == NULL) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, NULL, NULL);
- return 0;
- }
-
- STACK_WIND (frame,
- unify_ns_rmdir_cbk,
- NS(this),
- NS(this)->fops->rmdir,
- loc);
-
- return 0;
-}
-
-/**
- * unify_open_cbk -
- */
-int32_t
-unify_open_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- int32_t callcnt = 0;
- unify_local_t *local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- if (NS(this) != (xlator_t *)cookie) {
- /* Store child node's ptr, used in
- all the f*** / FileIO calls */
- fd_ctx_set (fd, this, (uint64_t)(long)cookie);
- }
- }
- if (op_ret == -1) {
- local->op_errno = op_errno;
- local->failed = 1;
- }
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if ((local->failed == 1) && (local->op_ret >= 0)) {
- local->call_count = 1;
- /* return -1 to user */
- local->op_ret = -1;
- //local->op_errno = EIO;
-
- if (!fd_ctx_get (local->fd, this, NULL)) {
- gf_log (this->name, GF_LOG_ERROR,
- "Open success on child node, "
- "failed on namespace");
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "Open success on namespace, "
- "failed on child node");
- }
- }
-
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret,
- local->op_errno, local->fd);
- }
-
- return 0;
-}
-
-#ifdef GF_DARWIN_HOST_OS
-/**
- * unify_create_lookup_cbk -
- */
-int32_t
-unify_open_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- dict_t *dict,
- struct iatt *postparent)
-{
- int32_t callcnt = 0;
- int16_t index = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if ((op_ret == -1) && (op_errno != ENOENT)) {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- priv->xl_array[(long)cookie]->name,
- local->loc1.path, strerror (op_errno));
- local->op_errno = op_errno;
- }
-
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- local->index++;
- if (NS(this) == priv->xl_array[(long)cookie]) {
- local->list[0] = (int16_t)(long)cookie;
- } else {
- local->list[1] = (int16_t)(long)cookie;
- }
- if (IA_ISDIR (buf->ia_type))
- local->failed = 1;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- int16_t file_list[3] = {0,};
- local->op_ret = -1;
-
- file_list[0] = local->list[0];
- file_list[1] = local->list[1];
- file_list[2] = -1;
-
- if (local->index != 2) {
- /* Lookup failed, can't do open */
- gf_log (this->name, GF_LOG_ERROR,
- "%s: present on %d nodes",
- local->name, local->index);
-
- if (local->index < 2) {
- unify_local_wipe (local);
- gf_log (this->name, GF_LOG_ERROR,
- "returning as file found on less "
- "than 2 nodes");
- STACK_UNWIND (frame, local->op_ret,
- local->op_errno, local->fd);
- return 0;
- }
- }
-
- if (local->failed) {
- /* Open on directory, return EISDIR */
- unify_local_wipe (local);
- STACK_UNWIND (frame, -1, EISDIR, local->fd);
- return 0;
- }
-
- /* Everything is perfect :) */
- local->call_count = 2;
-
- for (index = 0; file_list[index] != -1; index++) {
- char need_break = (file_list[index+1] == -1);
- STACK_WIND_COOKIE (frame,
- unify_open_cbk,
- priv->xl_array[file_list[index]],
- priv->xl_array[file_list[index]],
- priv->xl_array[file_list[index]]->fops->open,
- &local->loc1,
- local->flags,
- local->fd, local->wbflags);
- if (need_break)
- break;
- }
- }
-
- return 0;
-}
-
-
-int32_t
-unify_open_readlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- const char *path,
- struct iatt *sbuf)
-{
- int16_t index = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- if (op_ret == -1) {
- STACK_UNWIND (frame, -1, ENOENT);
- return 0;
- }
-
- if (path[0] == '/') {
- local->name = gf_strdup (path);
- ERR_ABORT (local->name);
- } else {
- char *tmp_str = gf_strdup (local->loc1.path);
- char *tmp_base = dirname (tmp_str);
- local->name = GF_CALLOC (1, ZR_PATH_MAX, gf_unify_mt_char);
- strcpy (local->name, tmp_base);
- strncat (local->name, "/", 1);
- strcat (local->name, path);
- GF_FREE (tmp_str);
- }
-
- local->list = GF_CALLOC (1, sizeof (int16_t) * 3,
- gf_unify_mt_int16_t);
- ERR_ABORT (local->list);
- local->call_count = priv->child_count + 1;
- local->op_ret = -1;
- for (index = 0; index <= priv->child_count; index++) {
- /* Send the lookup to all the nodes including namespace */
- STACK_WIND_COOKIE (frame,
- unify_open_lookup_cbk,
- (void *)(long)index,
- priv->xl_array[index],
- priv->xl_array[index]->fops->lookup,
- &local->loc1,
- NULL);
- }
-
- return 0;
-}
-#endif /* GF_DARWIN_HOST_OS */
-
-/**
- * unify_open -
- */
-int32_t
-unify_open (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags,
- fd_t *fd,
- int32_t wbflags)
-{
- unify_private_t *priv = this->private;
- unify_local_t *local = NULL;
- int16_t *list = NULL;
- int16_t index = 0;
- int16_t file_list[3] = {0,};
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Init */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
- local->fd = fd;
- local->flags = flags;
- local->wbflags = wbflags;
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- local->list = list;
- file_list[0] = priv->child_count; /* Thats namespace */
- file_list[2] = -1;
- for (index = 0; list[index] != -1; index++) {
- local->call_count++;
- if (list[index] != priv->child_count)
- file_list[1] = list[index];
- }
-
- if (local->call_count != 2) {
- /* If the lookup was done for file */
- gf_log (this->name, GF_LOG_ERROR,
- "%s: entry_count is %d",
- loc->path, local->call_count);
- for (index = 0; local->list[index] != -1; index++)
- gf_log (this->name, GF_LOG_ERROR, "%s: found on %s",
- loc->path, priv->xl_array[list[index]]->name);
-
- if (local->call_count < 2) {
- gf_log (this->name, GF_LOG_ERROR,
- "returning EIO as file found on onlyone node");
- STACK_UNWIND (frame, -1, EIO, fd);
- return 0;
- }
- }
-
-#ifdef GF_DARWIN_HOST_OS
- /* Handle symlink here */
- if (IA_ISLNK (loc->inode->ia_type)) {
- /* Callcount doesn't matter here */
- STACK_WIND (frame,
- unify_open_readlink_cbk,
- NS(this),
- NS(this)->fops->readlink,
- loc, ZR_PATH_MAX);
- return 0;
- }
-#endif /* GF_DARWIN_HOST_OS */
-
- local->call_count = 2;
- for (index = 0; file_list[index] != -1; index++) {
- char need_break = (file_list[index+1] == -1);
- STACK_WIND_COOKIE (frame,
- unify_open_cbk,
- priv->xl_array[file_list[index]], //cookie
- priv->xl_array[file_list[index]],
- priv->xl_array[file_list[index]]->fops->open,
- loc,
- flags,
- fd, wbflags);
- if (need_break)
- break;
- }
-
- return 0;
-}
-
-
-int32_t
-unify_create_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- unify_local_t *local = frame->local;
- inode_t *inode = local->loc1.inode;
-
- unify_local_wipe (local);
-
- STACK_UNWIND (frame, local->op_ret, local->op_errno, local->fd,
- inode, &local->stbuf,
- &local->oldpreparent, &local->oldpostparent);
-
- return 0;
-}
-
-/**
- * unify_create_open_cbk -
- */
-int32_t
-unify_create_open_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- int ret = 0;
- int32_t callcnt = 0;
- unify_local_t *local = frame->local;
- inode_t *inode = NULL;
- xlator_t *child = NULL;
- uint64_t tmp_value = 0;
-
- LOCK (&frame->lock);
- {
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- if (NS(this) != (xlator_t *)cookie) {
- /* Store child node's ptr, used in all
- the f*** / FileIO calls */
- /* TODO: log on failure */
- ret = fd_ctx_get (fd, this, &tmp_value);
- cookie = (void *)(long)tmp_value;
- } else {
- /* NOTE: open successful on namespace.
- * fd's ctx can be used to identify open
- * failure on storage subvolume. cool
- * ide ;) */
- local->failed = 0;
- }
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- ((xlator_t *)cookie)->name,
- local->loc1.path, strerror (op_errno));
- local->op_errno = op_errno;
- local->failed = 1;
- }
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (local->failed == 1 && (local->op_ret >= 0)) {
- local->call_count = 1;
- /* return -1 to user */
- local->op_ret = -1;
- local->op_errno = EIO;
- local->fd = fd;
- local->call_count = 1;
-
- if (!fd_ctx_get (local->fd, this, &tmp_value)) {
- child = (xlator_t *)(long)tmp_value;
-
- gf_log (this->name, GF_LOG_ERROR,
- "Create success on child node, "
- "failed on namespace");
-
- STACK_WIND (frame,
- unify_create_unlink_cbk,
- child,
- child->fops->unlink,
- &local->loc1);
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "Create success on namespace, "
- "failed on child node");
-
- STACK_WIND (frame,
- unify_create_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
- }
- return 0;
- }
- inode = local->loc1.inode;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno, fd,
- inode, &local->stbuf,
- &local->oldpreparent, &local->oldpostparent);
- }
- return 0;
-}
-
-/**
- * unify_create_lookup_cbk -
- */
-int32_t
-unify_create_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- dict_t *dict,
- struct iatt *postparent)
-{
- int32_t callcnt = 0;
- int16_t index = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- priv->xl_array[(long)cookie]->name,
- local->loc1.path, strerror (op_errno));
- local->op_errno = op_errno;
- local->failed = 1;
- }
-
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- local->list[local->index++] = (int16_t)(long)cookie;
- if (NS(this) == priv->xl_array[(long)cookie]) {
- local->ia_ino = buf->ia_ino;
- } else {
- local->stbuf = *buf;
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- int16_t *list = local->list;
- int16_t file_list[3] = {0,};
- local->op_ret = -1;
-
- local->list [local->index] = -1;
- file_list[0] = list[0];
- file_list[1] = list[1];
- file_list[2] = -1;
-
- local->stbuf.ia_ino = local->ia_ino;
- /* TODO: log on failure */
- inode_ctx_put (local->loc1.inode, this,
- (uint64_t)(long)local->list);
-
- if (local->index != 2) {
- /* Lookup failed, can't do open */
- gf_log (this->name, GF_LOG_ERROR,
- "%s: present on %d nodes",
- local->loc1.path, local->index);
- file_list[0] = priv->child_count;
- for (index = 0; list[index] != -1; index++) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: found on %s", local->loc1.path,
- priv->xl_array[list[index]]->name);
- if (list[index] != priv->child_count)
- file_list[1] = list[index];
- }
-
- if (local->index < 2) {
- unify_local_wipe (local);
- gf_log (this->name, GF_LOG_ERROR,
- "returning EIO as file found on "
- "only one node");
- STACK_UNWIND (frame, -1, EIO,
- local->fd, inode, NULL,
- NULL, NULL);
- return 0;
- }
- }
- /* Everything is perfect :) */
- local->call_count = 2;
-
- for (index = 0; file_list[index] != -1; index++) {
- char need_break = (file_list[index+1] == -1);
- STACK_WIND_COOKIE (frame,
- unify_create_open_cbk,
- priv->xl_array[file_list[index]],
- priv->xl_array[file_list[index]],
- priv->xl_array[file_list[index]]->fops->open,
- &local->loc1,
- local->flags,
- local->fd, 0);
- if (need_break)
- break;
- }
- }
-
- return 0;
-}
-
-
-/**
- * unify_create_cbk -
- */
-int32_t
-unify_create_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- int ret = 0;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
- inode_t *tmp_inode = NULL;
-
- if (op_ret == -1) {
- /* send unlink () on Namespace */
- local->op_errno = op_errno;
- local->op_ret = -1;
- local->call_count = 1;
- gf_log (this->name, GF_LOG_ERROR,
- "create failed on %s (file %s, error %s), "
- "sending unlink to namespace",
- prev_frame->this->name,
- local->loc1.path, strerror (op_errno));
-
- STACK_WIND (frame,
- unify_create_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
-
- return 0;
- }
-
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- local->stbuf = *buf;
- /* Just inode number should be from NS node */
- local->stbuf.ia_ino = local->ia_ino;
-
- /* TODO: log on failure */
- ret = fd_ctx_set (fd, this, (uint64_t)(long)prev_frame->this);
- }
-
- tmp_inode = local->loc1.inode;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno, local->fd,
- tmp_inode, &local->stbuf,
- &local->oldpreparent, &local->oldpostparent);
-
- return 0;
-}
-
-/**
- * unify_ns_create_cbk -
- *
- */
-int32_t
-unify_ns_create_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- struct sched_ops *sched_ops = NULL;
- xlator_t *sched_xl = NULL;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int16_t *list = NULL;
- int16_t index = 0;
-
- if (op_ret == -1) {
- /* No need to send create request to other servers, as
- namespace action failed. Handle exclusive create here. */
- if ((op_errno != EEXIST) ||
- ((op_errno == EEXIST) &&
- ((local->flags & O_EXCL) == O_EXCL))) {
- /* If its just a create call without O_EXCL,
- don't do this */
- gf_log (this->name, GF_LOG_ERROR,
- "namespace: path(%s): %s",
- local->loc1.path, strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent);
- return 0;
- }
- }
-
- if (op_ret >= 0) {
- /* Get the inode number from the NS node */
- local->ia_ino = buf->ia_ino;
-
- local->oldpreparent = *preparent;
- local->oldpostparent = *postparent;
-
- local->op_ret = -1;
-
- /* Start the mapping list */
- list = GF_CALLOC (1, sizeof (int16_t) * 3,
- gf_unify_mt_int16_t);
- ERR_ABORT (list);
- inode_ctx_put (inode, this, (uint64_t)(long)list);
- list[0] = priv->child_count;
- list[2] = -1;
-
- /* This means, file doesn't exist anywhere in the Filesystem */
- sched_ops = priv->sched_ops;
-
- /* Send create request to the scheduled node now */
- sched_xl = sched_ops->schedule (this, local->loc1.path);
- if (sched_xl == NULL)
- {
- /* send unlink () on Namespace */
- local->op_errno = ENOTCONN;
- local->op_ret = -1;
- local->call_count = 1;
- gf_log (this->name, GF_LOG_ERROR,
- "no node online to schedule create:(file %s) "
- "sending unlink to namespace",
- (local->loc1.path)?local->loc1.path:"");
-
- STACK_WIND (frame,
- unify_create_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
-
- return 0;
- }
-
- for (index = 0; index < priv->child_count; index++)
- if (sched_xl == priv->xl_array[index])
- break;
- list[1] = index;
-
- STACK_WIND (frame, unify_create_cbk,
- sched_xl, sched_xl->fops->create,
- &local->loc1, local->flags, local->mode, fd);
- } else {
- /* File already exists, and there is no O_EXCL flag */
-
- gf_log (this->name, GF_LOG_DEBUG,
- "File(%s) already exists on namespace, sending "
- "open instead", local->loc1.path);
-
- local->list = GF_CALLOC (1, sizeof (int16_t) * 3,
- gf_unify_mt_int16_t);
- ERR_ABORT (local->list);
- local->call_count = priv->child_count + 1;
- local->op_ret = -1;
- for (index = 0; index <= priv->child_count; index++) {
- /* Send lookup() to all nodes including namespace */
- STACK_WIND_COOKIE (frame,
- unify_create_lookup_cbk,
- (void *)(long)index,
- priv->xl_array[index],
- priv->xl_array[index]->fops->lookup,
- &local->loc1,
- NULL);
- }
- }
- return 0;
-}
-
-/**
- * unify_create - create a file in global namespace first, so other
- * clients can see them. Create the file in storage nodes in background.
- */
-int32_t
-unify_create (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags,
- mode_t mode,
- fd_t *fd)
-{
- unify_local_t *local = NULL;
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- local->mode = mode;
- local->flags = flags;
- local->fd = fd;
-
- loc_copy (&local->loc1, loc);
- if (local->loc1.path == NULL) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, fd, loc->inode, NULL,
- NULL, NULL);
- return 0;
- }
-
- STACK_WIND (frame,
- unify_ns_create_cbk,
- NS(this),
- NS(this)->fops->create,
- loc,
- flags | O_EXCL,
- mode,
- fd);
-
- return 0;
-}
-
-
-/**
- * unify_opendir_cbk -
- */
-int32_t
-unify_opendir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- STACK_UNWIND (frame, op_ret, op_errno, fd);
-
- return 0;
-}
-
-/**
- * unify_opendir -
- */
-int32_t
-unify_opendir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- fd_t *fd)
-{
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- STACK_WIND (frame, unify_opendir_cbk,
- NS(this), NS(this)->fops->opendir, loc, fd);
-
- return 0;
-}
-
-
-int32_t
-unify_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *statpre,
- struct iatt *statpost)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s(): child(%s): path(%s): %s",
- gf_fop_list[frame->root->op],
- prev_frame->this->name,
- (local->loc1.path)?local->loc1.path:"",
- strerror (op_errno));
-
- local->op_errno = op_errno;
- if ((op_errno == ENOENT) && priv->optimist)
- local->op_ret = 0;
- }
-
- if (op_ret >= 0) {
- local->op_ret = 0;
-
- if (NS (this) == prev_frame->this) {
- local->ia_ino = statpost->ia_ino;
- /* If the entry is directory, get the stat
- from NS node */
- if (IA_ISDIR (statpost->ia_type) ||
- !local->stpost.ia_blksize) {
- local->stpre = *statpre;
- local->stpost = *statpost;
- }
- }
-
- if ((!IA_ISDIR (statpost->ia_type)) &&
- (NS (this) != prev_frame->this)) {
- /* If file, take the stat info from Storage
- node. */
- local->stpre = *statpre;
- local->stpost = *statpost;
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- /* If the inode number is not filled, operation should
- fail */
- if (!local->ia_ino)
- local->op_ret = -1;
-
- local->stpre.ia_ino = local->ia_ino;
- local->stpost.ia_ino = local->ia_ino;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->stpre, &local->stpost);
- }
-
- return 0;
-}
-
-
-int32_t
-unify_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid)
-{
- unify_local_t *local = NULL;
- unify_private_t *priv = this->private;
- int32_t index = 0;
- int32_t callcnt = 0;
- uint64_t tmp_list = 0;
-
- if (!(loc && loc->inode)) {
- STACK_UNWIND (frame, -1, EINVAL, NULL, NULL);
- return 0;
- }
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
-
- if (IA_ISDIR (loc->inode->ia_type)) {
- local->call_count = 1;
-
- STACK_WIND (frame,
- unify_setattr_cbk,
- NS (this),
- NS (this)->fops->setattr,
- loc, stbuf, valid);
- } else {
- inode_ctx_get (loc->inode, this, &tmp_list);
- local->list = (int16_t *)(long)tmp_list;
-
- for (index = 0; local->list[index] != -1; index++) {
- local->call_count++;
- callcnt++;
- }
-
- for (index = 0; local->list[index] != -1; index++) {
- STACK_WIND (frame,
- unify_setattr_cbk,
- priv->xl_array[local->list[index]],
- priv->xl_array[local->list[index]]->fops->setattr,
- loc, stbuf, valid);
-
- if (!--callcnt)
- break;
- }
- }
-
- return 0;
-}
-
-
-int32_t
-unify_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid)
-{
- unify_local_t *local = NULL;
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- UNIFY_CHECK_FD_AND_UNWIND_ON_ERR(fd);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
-
- if (!fd_ctx_get (fd, this, &tmp_child)) {
- /* If its set, then its file */
- child = (xlator_t *)(long)tmp_child;
-
- local->call_count = 2;
-
- STACK_WIND (frame, unify_setattr_cbk, child,
- child->fops->fsetattr, fd, stbuf, valid);
-
- STACK_WIND (frame, unify_setattr_cbk, NS(this),
- NS(this)->fops->fsetattr, fd, stbuf, valid);
- } else {
- local->call_count = 1;
-
- STACK_WIND (frame, unify_setattr_cbk,
- NS(this), NS(this)->fops->fsetattr,
- fd, stbuf, valid);
- }
-
- return 0;
-}
-
-
-/**
- * unify_truncate_cbk -
- */
-int32_t
-unify_truncate_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- prev_frame->this->name,
- (local->loc1.path)?local->loc1.path:"",
- strerror (op_errno));
- local->op_errno = op_errno;
- if (!((op_errno == ENOENT) && priv->optimist))
- local->op_ret = -1;
- }
-
- if (op_ret >= 0) {
- if (NS (this) == prev_frame->this) {
- local->ia_ino = postbuf->ia_ino;
- /* If the entry is directory, get the
- stat from NS node */
- if (IA_ISDIR (postbuf->ia_type) ||
- !local->stbuf.ia_blksize) {
- local->stbuf = *prebuf;
- local->poststbuf = *postbuf;
- }
- }
-
- if ((!IA_ISDIR (postbuf->ia_type)) &&
- (NS (this) != prev_frame->this)) {
- /* If file, take the stat info from
- Storage node. */
- local->stbuf = *prebuf;
- local->poststbuf = *postbuf;
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (local->ia_ino) {
- local->stbuf.ia_ino = local->ia_ino;
- local->poststbuf.ia_ino = local->ia_ino;
- } else {
- local->op_ret = -1;
- }
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->stbuf, &local->poststbuf);
- }
-
- return 0;
-}
-
-
-/**
- * unify_truncate -
- */
-int32_t
-unify_truncate (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- off_t offset)
-{
- unify_local_t *local = NULL;
- unify_private_t *priv = this->private;
- int32_t index = 0;
- int32_t callcnt = 0;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
- local->ia_ino = loc->inode->ino;
-
- if (IA_ISDIR (loc->inode->ia_type)) {
- local->call_count = 1;
-
- STACK_WIND (frame,
- unify_truncate_cbk,
- NS(this),
- NS(this)->fops->truncate,
- loc,
- 0);
- } else {
- local->op_ret = 0;
- inode_ctx_get (loc->inode, this, &tmp_list);
- local->list = (int16_t *)(long)tmp_list;
-
- for (index = 0; local->list[index] != -1; index++) {
- local->call_count++;
- callcnt++;
- }
-
- /* Don't send offset to NS truncate */
- STACK_WIND (frame, unify_truncate_cbk, NS(this),
- NS(this)->fops->truncate, loc, 0);
- callcnt--;
-
- for (index = 0; local->list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[local->list[index]]) {
- STACK_WIND (frame,
- unify_truncate_cbk,
- priv->xl_array[local->list[index]],
- priv->xl_array[local->list[index]]->fops->truncate,
- loc,
- offset);
- if (!--callcnt)
- break;
- }
- }
- }
-
- return 0;
-}
-
-/**
- * unify_readlink_cbk -
- */
-int32_t
-unify_readlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- const char *path,
- struct iatt *sbuf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, path, sbuf);
- return 0;
-}
-
-/**
- * unify_readlink - Read the link only from the storage node.
- */
-int32_t
-unify_readlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- size_t size)
-{
- unify_private_t *priv = this->private;
- int32_t entry_count = 0;
- int16_t *list = NULL;
- int16_t index = 0;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- for (index = 0; list[index] != -1; index++)
- entry_count++;
-
- if (entry_count >= 2) {
- for (index = 0; list[index] != -1; index++) {
- if (priv->xl_array[list[index]] != NS(this)) {
- STACK_WIND (frame,
- unify_readlink_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->readlink,
- loc,
- size);
- break;
- }
- }
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "returning ENOENT, no softlink files found "
- "on storage node");
- STACK_UNWIND (frame, -1, ENOENT, NULL);
- }
-
- return 0;
-}
-
-
-/**
- * unify_unlink_cbk -
- */
-int32_t
-unify_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret == 0 || ((op_errno == ENOENT) && priv->optimist))
- local->op_ret = 0;
- if (op_ret == -1)
- local->op_errno = op_errno;
-
- if (((call_frame_t *)cookie)->this == NS(this)) {
- local->oldpreparent = *preparent;
- local->oldpostparent = *postparent;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->oldpreparent, &local->oldpostparent);
- }
-
- return 0;
-}
-
-
-/**
- * unify_unlink -
- */
-int32_t
-unify_unlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- unify_private_t *priv = this->private;
- unify_local_t *local = NULL;
- int16_t *list = NULL;
- int16_t index = 0;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
-
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- for (index = 0; list[index] != -1; index++)
- local->call_count++;
-
- if (local->call_count) {
- for (index = 0; list[index] != -1; index++) {
- char need_break = (list[index+1] == -1);
- STACK_WIND (frame,
- unify_unlink_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->unlink,
- loc);
- if (need_break)
- break;
- }
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: returning ENOENT", loc->path);
- STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
- }
-
- return 0;
-}
-
-
-/**
- * unify_readv_cbk -
- */
-int32_t
-unify_readv_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iovec *vector,
- int32_t count,
- struct iatt *stbuf,
- struct iobref *iobref)
-{
- STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf, iobref);
- return 0;
-}
-
-/**
- * unify_readv -
- */
-int32_t
-unify_readv (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame,
- unify_readv_cbk,
- child,
- child->fops->readv,
- fd,
- size,
- offset);
-
-
- return 0;
-}
-
-/**
- * unify_writev_cbk -
- */
-int32_t
-unify_writev_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf)
-{
- unify_local_t *local = NULL;
-
- local = frame->local;
-
- local->stbuf = *prebuf;
- local->stbuf.ia_ino = local->ia_ino;
-
- local->poststbuf = *postbuf;
- local->poststbuf.ia_ino = local->ia_ino;
-
- STACK_UNWIND (frame, op_ret, op_errno,
- &local->stbuf, &local->poststbuf);
- return 0;
-}
-
-/**
- * unify_writev -
- */
-int32_t
-unify_writev (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t off,
- struct iobref *iobref)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
- unify_local_t *local = NULL;
-
- INIT_LOCAL (frame, local);
- local->ia_ino = fd->inode->ino;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame,
- unify_writev_cbk,
- child,
- child->fops->writev,
- fd,
- vector,
- count,
- off,
- iobref);
-
- return 0;
-}
-
-/**
- * unify_ftruncate -
- */
-int32_t
-unify_ftruncate (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- off_t offset)
-{
- xlator_t *child = NULL;
- unify_local_t *local = NULL;
- uint64_t tmp_child = 0;
-
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR(fd);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- local->op_ret = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- local->call_count = 2;
-
- STACK_WIND (frame, unify_truncate_cbk,
- child, child->fops->ftruncate,
- fd, offset);
-
- STACK_WIND (frame, unify_truncate_cbk,
- NS(this), NS(this)->fops->ftruncate,
- fd, 0);
-
- return 0;
-}
-
-
-/**
- * unify_flush_cbk -
- */
-int32_t
-unify_flush_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-/**
- * unify_flush -
- */
-int32_t
-unify_flush (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame, unify_flush_cbk, child,
- child->fops->flush, fd);
-
- return 0;
-}
-
-
-/**
- * unify_fsync_cbk -
- */
-int32_t
-unify_fsync_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, prebuf, postbuf);
- return 0;
-}
-
-/**
- * unify_fsync -
- */
-int32_t
-unify_fsync (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame, unify_fsync_cbk, child,
- child->fops->fsync, fd, flags);
-
- return 0;
-}
-
-/**
- * unify_fstat - Send fstat FOP to Namespace only if its directory, and to
- * both namespace and the storage node if its a file.
- */
-int32_t
-unify_fstat (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd)
-{
- unify_local_t *local = NULL;
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- UNIFY_CHECK_FD_AND_UNWIND_ON_ERR(fd);
-
- INIT_LOCAL (frame, local);
- local->ia_ino = fd->inode->ino;
-
- if (!fd_ctx_get (fd, this, &tmp_child)) {
- /* If its set, then its file */
- child = (xlator_t *)(long)tmp_child;
- local->call_count = 2;
-
- STACK_WIND (frame, unify_buf_cbk, child,
- child->fops->fstat, fd);
-
- STACK_WIND (frame, unify_buf_cbk, NS(this),
- NS(this)->fops->fstat, fd);
-
- } else {
- /* this is an directory */
- local->call_count = 1;
- STACK_WIND (frame, unify_buf_cbk, NS(this),
- NS(this)->fops->fstat, fd);
- }
-
- return 0;
-}
-
-/**
- * unify_getdents_cbk -
- */
-int32_t
-unify_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count)
-{
- STACK_UNWIND (frame, op_ret, op_errno, entry, count);
- return 0;
-}
-
-/**
- * unify_getdents - send the FOP request to all the nodes.
- */
-int32_t
-unify_getdents (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset,
- int32_t flag)
-{
- UNIFY_CHECK_FD_AND_UNWIND_ON_ERR (fd);
-
- STACK_WIND (frame, unify_getdents_cbk, NS(this),
- NS(this)->fops->getdents, fd, size, offset, flag);
-
- return 0;
-}
-
-
-/**
- * unify_readdir_cbk -
- */
-int32_t
-unify_readdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- gf_dirent_t *buf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
-
- return 0;
-}
-
-/**
- * unify_readdir - send the FOP request to all the nodes.
- */
-int32_t
-unify_readdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset)
-{
- UNIFY_CHECK_FD_AND_UNWIND_ON_ERR (fd);
-
- STACK_WIND (frame, unify_readdir_cbk, NS(this),
- NS(this)->fops->readdir, fd, size, offset);
-
- return 0;
-}
-
-
-int32_t
-unify_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *buf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
-
- return 0;
-}
-
-
-int32_t
-unify_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
-{
- UNIFY_CHECK_FD_AND_UNWIND_ON_ERR (fd);
-
- STACK_WIND (frame, unify_readdirp_cbk, NS(this),
- NS(this)->fops->readdirp, fd, size, offset);
-
- return 0;
-}
-
-
-/**
- * unify_fsyncdir_cbk -
- */
-int32_t
-unify_fsyncdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
-
- return 0;
-}
-
-/**
- * unify_fsyncdir -
- */
-int32_t
-unify_fsyncdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags)
-{
- UNIFY_CHECK_FD_AND_UNWIND_ON_ERR (fd);
-
- STACK_WIND (frame, unify_fsyncdir_cbk,
- NS(this), NS(this)->fops->fsyncdir, fd, flags);
-
- return 0;
-}
-
-/**
- * unify_lk_cbk - UNWIND frame with the proper return arguments.
- */
-int32_t
-unify_lk_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct gf_flock *lock)
-{
- STACK_UNWIND (frame, op_ret, op_errno, lock);
- return 0;
-}
-
-/**
- * unify_lk - Send it to all the storage nodes, (should be 1) which has file.
- */
-int32_t
-unify_lk (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t cmd,
- struct gf_flock *lock)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame, unify_lk_cbk, child,
- child->fops->lk, fd, cmd, lock);
-
- return 0;
-}
-
-
-int32_t
-unify_setxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno);
-
-static int32_t
-unify_setxattr_file_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- unify_private_t *private = this->private;
- unify_local_t *local = frame->local;
- xlator_t *sched_xl = NULL;
- struct sched_ops *sched_ops = NULL;
-
- if (op_ret == -1) {
- if (!ENOTSUP)
- gf_log (this->name, GF_LOG_ERROR,
- "setxattr with XATTR_CREATE on ns: "
- "path(%s) key(%s): %s",
- local->loc1.path, local->name,
- strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
- }
-
- LOCK (&frame->lock);
- {
- local->failed = 0;
- local->op_ret = 0;
- local->op_errno = 0;
- local->call_count = 1;
- }
- UNLOCK (&frame->lock);
-
- /* schedule XATTR_CREATE on one of the child node */
- sched_ops = private->sched_ops;
-
- /* Send create request to the scheduled node now */
- sched_xl = sched_ops->schedule (this, local->name);
- if (!sched_xl) {
- STACK_UNWIND (frame, -1, ENOTCONN);
- return 0;
- }
-
- STACK_WIND (frame,
- unify_setxattr_cbk,
- sched_xl,
- sched_xl->fops->setxattr,
- &local->loc1,
- local->dict,
- local->flags);
- return 0;
-}
-
-/**
- * unify_setxattr_cbk - When all the child nodes return, UNWIND frame.
- */
-int32_t
-unify_setxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = 0;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
- dict_t *dict = NULL;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, (((op_errno == ENOENT) ||
- (op_errno == ENOTSUP))?
- GF_LOG_DEBUG : GF_LOG_ERROR),
- "child(%s): path(%s): %s",
- prev_frame->this->name,
- (local->loc1.path)?local->loc1.path:"",
- strerror (op_errno));
- if (local->failed == -1) {
- local->failed = 1;
- }
- local->op_errno = op_errno;
- } else {
- local->failed = 0;
- local->op_ret = op_ret;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (local->failed && local->name &&
- ZR_FILE_CONTENT_REQUEST(local->name)) {
- dict = get_new_dict ();
- dict_set (dict, local->dict->members_list->key,
- data_from_dynptr(NULL, 0));
- dict_ref (dict);
-
- local->call_count = 1;
-
- STACK_WIND (frame,
- unify_setxattr_file_cbk,
- NS(this),
- NS(this)->fops->setxattr,
- &local->loc1,
- dict,
- XATTR_CREATE);
-
- dict_unref (dict);
- return 0;
- }
-
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno);
- }
-
- return 0;
-}
-
-/**
- * unify_sexattr - This function should be sent to all the storage nodes,
- * which contains the file, (excluding namespace).
- */
-int32_t
-unify_setxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *dict,
- int32_t flags)
-{
- unify_private_t *priv = this->private;
- unify_local_t *local = NULL;
- int16_t *list = NULL;
- int16_t index = 0;
- int32_t call_count = 0;
- uint64_t tmp_list = 0;
- data_pair_t *trav = dict->members_list;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- local->failed = -1;
- loc_copy (&local->loc1, loc);
-
- if (IA_ISDIR (loc->inode->ia_type)) {
-
- if (trav && trav->key && ZR_FILE_CONTENT_REQUEST(trav->key)) {
- /* direct the storage xlators to change file
- content only if file exists */
- local->flags = flags;
- local->dict = dict;
- local->name = gf_strdup (trav->key);
- flags |= XATTR_REPLACE;
- }
-
- local->call_count = priv->child_count;
- for (index = 0; index < priv->child_count; index++) {
- STACK_WIND (frame,
- unify_setxattr_cbk,
- priv->xl_array[index],
- priv->xl_array[index]->fops->setxattr,
- loc, dict, flags);
- }
- return 0;
- }
-
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- for (index = 0; list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[list[index]]) {
- local->call_count++;
- call_count++;
- }
- }
-
- if (local->call_count) {
- for (index = 0; list[index] != -1; index++) {
- if (priv->xl_array[list[index]] != NS(this)) {
- STACK_WIND (frame,
- unify_setxattr_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->setxattr,
- loc,
- dict,
- flags);
- if (!--call_count)
- break;
- }
- }
- return 0;
- }
-
- /* No entry in storage nodes */
- gf_log (this->name, GF_LOG_DEBUG,
- "returning ENOENT, file not found on storage node.");
- STACK_UNWIND (frame, -1, ENOENT);
-
- return 0;
-}
-
-
-/**
- * unify_getxattr_cbk - This function is called from only one child, so, no
- * need of any lock or anything else, just send it to above layer
- */
-int32_t
-unify_getxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *value)
-{
- int32_t callcnt = 0;
- dict_t *local_value = NULL;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name,
- (((op_errno == ENOENT) ||
- (op_errno == ENODATA) ||
- (op_errno == ENOTSUP)) ?
- GF_LOG_DEBUG : GF_LOG_ERROR),
- "child(%s): path(%s): %s",
- prev_frame->this->name,
- (local->loc1.path)?local->loc1.path:"",
- strerror (op_errno));
- } else {
- if (!local->dict)
- local->dict = dict_ref (value);
- local->op_ret = op_ret;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- local_value = local->dict;
- local->dict = NULL;
-
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local_value);
-
- if (local_value)
- dict_unref (local_value);
- }
-
- return 0;
-}
-
-
-/**
- * unify_getxattr - This FOP is sent to only the storage node.
- */
-int32_t
-unify_getxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name)
-{
- unify_private_t *priv = this->private;
- int16_t *list = NULL;
- int16_t index = 0;
- int16_t count = 0;
- unify_local_t *local = NULL;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
- INIT_LOCAL (frame, local);
-
- if (IA_ISDIR (loc->inode->ia_type)) {
- local->call_count = priv->child_count;
- for (index = 0; index < priv->child_count; index++)
- STACK_WIND (frame,
- unify_getxattr_cbk,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getxattr,
- loc,
- name);
- return 0;
- }
-
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- for (index = 0; list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[list[index]]) {
- local->call_count++;
- count++;
- }
- }
-
- if (count) {
- for (index = 0; list[index] != -1; index++) {
- if (priv->xl_array[list[index]] != NS(this)) {
- STACK_WIND (frame,
- unify_getxattr_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->getxattr,
- loc,
- name);
- if (!--count)
- break;
- }
- }
- } else {
- dict_t *tmp_dict = get_new_dict ();
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: returning ENODATA, no file found on storage node",
- loc->path);
- STACK_UNWIND (frame, -1, ENODATA, tmp_dict);
- dict_destroy (tmp_dict);
- }
-
- return 0;
-}
-
-/**
- * unify_removexattr_cbk - Wait till all the child node returns the call
- * and then UNWIND to above layer.
- */
-int32_t
-unify_removexattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = 0;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret == -1) {
- local->op_errno = op_errno;
- if (op_errno != ENOTSUP)
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- prev_frame->this->name,
- local->loc1.path, strerror (op_errno));
- } else {
- local->op_ret = op_ret;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- STACK_UNWIND (frame, local->op_ret, local->op_errno);
- }
-
- return 0;
-}
-
-/**
- * unify_removexattr - Send it to all the child nodes which has the files.
- */
-int32_t
-unify_removexattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name)
-{
- unify_private_t *priv = this->private;
- unify_local_t *local = NULL;
- int16_t *list = NULL;
- int16_t index = 0;
- int32_t call_count = 0;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
-
- if (IA_ISDIR (loc->inode->ia_type)) {
- local->call_count = priv->child_count;
- for (index = 0; index < priv->child_count; index++)
- STACK_WIND (frame,
- unify_removexattr_cbk,
- priv->xl_array[index],
- priv->xl_array[index]->fops->removexattr,
- loc,
- name);
-
- return 0;
- }
-
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- for (index = 0; list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[list[index]]) {
- local->call_count++;
- call_count++;
- }
- }
-
- if (local->call_count) {
- for (index = 0; list[index] != -1; index++) {
- if (priv->xl_array[list[index]] != NS(this)) {
- STACK_WIND (frame,
- unify_removexattr_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->removexattr,
- loc,
- name);
- if (!--call_count)
- break;
- }
- }
- return 0;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: returning ENOENT, not found on storage node.", loc->path);
- STACK_UNWIND (frame, -1, ENOENT);
-
- return 0;
-}
-
-
-int32_t
-unify_mknod_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- unify_local_t *local = frame->local;
-
- if (op_ret == -1)
- gf_log (this->name, GF_LOG_ERROR,
- "%s: %s", local->loc1.path, strerror (op_errno));
-
- unify_local_wipe (local);
- /* No log required here as this -1 is for mknod call */
- STACK_UNWIND (frame, -1, local->op_errno, NULL, NULL);
- return 0;
-}
-
-/**
- * unify_mknod_cbk -
- */
-int32_t
-unify_mknod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- unify_local_t *local = frame->local;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "mknod failed on storage node, sending unlink to "
- "namespace");
- local->op_errno = op_errno;
- STACK_WIND (frame,
- unify_mknod_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
- return 0;
- }
-
- local->stbuf = *buf;
- local->stbuf.ia_ino = local->ia_ino;
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, inode, &local->stbuf,
- &local->oldpreparent, &local->oldpostparent);
- return 0;
-}
-
-/**
- * unify_ns_mknod_cbk -
- */
-int32_t
-unify_ns_mknod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- struct sched_ops *sched_ops = NULL;
- xlator_t *sched_xl = NULL;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int16_t *list = NULL;
- int16_t index = 0;
- call_frame_t *prev_frame = cookie;
-
- if (op_ret == -1) {
- /* No need to send mknod request to other servers,
- * as namespace action failed
- */
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- prev_frame->this->name, local->loc1.path,
- strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
- return 0;
- }
-
- /* Create one inode for this entry */
- local->op_ret = 0;
- local->stbuf = *buf;
- local->ia_ino = buf->ia_ino;
-
- local->oldpreparent = *preparent;
- local->oldpostparent = *postparent;
-
- list = GF_CALLOC (1, sizeof (int16_t) * 3, gf_unify_mt_int16_t);
- ERR_ABORT (list);
- list[0] = priv->child_count;
- list[2] = -1;
- inode_ctx_put (inode, this, (uint64_t)(long)list);
-
- sched_ops = priv->sched_ops;
-
- /* Send mknod request to scheduled node now */
- sched_xl = sched_ops->schedule (this, local->loc1.path);
- if (!sched_xl) {
- gf_log (this->name, GF_LOG_ERROR,
- "mknod failed on storage node, no node online "
- "at the moment, sending unlink to NS");
- local->op_errno = ENOTCONN;
- STACK_WIND (frame,
- unify_mknod_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
-
- return 0;
- }
-
- for (index = 0; index < priv->child_count; index++)
- if (sched_xl == priv->xl_array[index])
- break;
- list[1] = index;
-
- STACK_WIND (frame, unify_mknod_cbk,
- sched_xl, sched_xl->fops->mknod,
- &local->loc1, local->mode, local->dev);
-
- return 0;
-}
-
-/**
- * unify_mknod - Create a device on namespace first, and later create on
- * the storage node.
- */
-int32_t
-unify_mknod (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode,
- dev_t rdev)
-{
- unify_local_t *local = NULL;
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- local->mode = mode;
- local->dev = rdev;
- loc_copy (&local->loc1, loc);
- if (local->loc1.path == NULL) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, loc->inode, NULL);
- return 0;
- }
-
- STACK_WIND (frame,
- unify_ns_mknod_cbk,
- NS(this),
- NS(this)->fops->mknod,
- loc,
- mode,
- rdev);
-
- return 0;
-}
-
-int32_t
-unify_symlink_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- unify_local_t *local = frame->local;
- if (op_ret == -1)
- gf_log (this->name, GF_LOG_ERROR,
- "%s: %s", local->loc1.path, strerror (op_errno));
-
- unify_local_wipe (local);
- STACK_UNWIND (frame, -1, local->op_errno, NULL, NULL);
- return 0;
-}
-
-/**
- * unify_symlink_cbk -
- */
-int32_t
-unify_symlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- unify_local_t *local = frame->local;
-
- if (op_ret == -1) {
- /* Symlink on storage node failed, hence send unlink
- to the NS node */
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_ERROR,
- "symlink on storage node failed, sending unlink "
- "to namespace");
-
- STACK_WIND (frame,
- unify_symlink_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
-
- return 0;
- }
-
- local->stbuf = *buf;
- local->stbuf.ia_ino = local->ia_ino;
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, inode, &local->stbuf,
- &local->oldpreparent, &local->oldpostparent);
-
- return 0;
-}
-
-/**
- * unify_ns_symlink_cbk -
- */
-int32_t
-unify_ns_symlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
-
- struct sched_ops *sched_ops = NULL;
- xlator_t *sched_xl = NULL;
- int16_t *list = NULL;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int16_t index = 0;
-
- if (op_ret == -1) {
- /* No need to send symlink request to other servers,
- * as namespace action failed
- */
- gf_log (this->name, GF_LOG_ERROR,
- "namespace: path(%s): %s",
- local->loc1.path, strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, NULL, buf,
- preparent, postparent);
- return 0;
- }
-
- /* Create one inode for this entry */
- local->op_ret = 0;
- local->ia_ino = buf->ia_ino;
-
- local->oldpreparent = *preparent;
- local->oldpostparent = *postparent;
-
- /* Start the mapping list */
-
- list = GF_CALLOC (1, sizeof (int16_t) * 3, gf_unify_mt_int16_t);
- ERR_ABORT (list);
- list[0] = priv->child_count; //namespace's index
- list[2] = -1;
- inode_ctx_put (inode, this, (uint64_t)(long)list);
-
- sched_ops = priv->sched_ops;
-
- /* Send symlink request to all the nodes now */
- sched_xl = sched_ops->schedule (this, local->loc1.path);
- if (!sched_xl) {
- /* Symlink on storage node failed, hence send unlink
- to the NS node */
- local->op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_ERROR,
- "symlink on storage node failed, no node online, "
- "sending unlink to namespace");
-
- STACK_WIND (frame,
- unify_symlink_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
-
- return 0;
- }
-
- for (index = 0; index < priv->child_count; index++)
- if (sched_xl == priv->xl_array[index])
- break;
- list[1] = index;
-
- STACK_WIND (frame,
- unify_symlink_cbk,
- sched_xl,
- sched_xl->fops->symlink,
- local->name,
- &local->loc1);
-
- return 0;
-}
-
-/**
- * unify_symlink -
- */
-int32_t
-unify_symlink (call_frame_t *frame,
- xlator_t *this,
- const char *linkpath,
- loc_t *loc)
-{
- unify_local_t *local = NULL;
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
- local->name = gf_strdup (linkpath);
-
- if ((local->name == NULL) ||
- (local->loc1.path == NULL)) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, loc->inode, NULL);
- return 0;
- }
-
- STACK_WIND (frame,
- unify_ns_symlink_cbk,
- NS(this),
- NS(this)->fops->symlink,
- linkpath,
- loc);
-
- return 0;
-}
-
-
-int32_t
-unify_rename_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- int32_t callcnt = 0;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s -> %s): %s",
- prev_frame->this->name,
- local->loc1.path, local->loc2.path,
- strerror (op_errno));
-
- }
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- local->stbuf.ia_ino = local->ia_ino;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->stbuf);
- }
- return 0;
-}
-
-int32_t
-unify_ns_rename_undo_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf,
- struct iatt *preoldparent,
- struct iatt *postoldparent,
- struct iatt *prenewparent,
- struct iatt *postnewparent)
-{
- unify_local_t *local = frame->local;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "namespace: path(%s -> %s): %s",
- local->loc1.path, local->loc2.path,
- strerror (op_errno));
- }
-
- local->stbuf.ia_ino = local->ia_ino;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno, &local->stbuf);
- return 0;
-}
-
-int32_t
-unify_rename_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf,
- struct iatt *preoldparent,
- struct iatt *postoldparent,
- struct iatt *prenewparent,
- struct iatt *postnewparent)
-{
- int32_t index = 0;
- int32_t callcnt = 0;
- int16_t *list = NULL;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret >= 0) {
- if (!IA_ISDIR (buf->ia_type))
- local->stbuf = *buf;
- local->op_ret = op_ret;
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s -> %s): %s",
- prev_frame->this->name,
- local->loc1.path, local->loc2.path,
- strerror (op_errno));
- local->op_errno = op_errno;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- local->stbuf.ia_ino = local->ia_ino;
- if (IA_ISDIR (local->loc1.inode->ia_type)) {
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->stbuf, &local->oldpreparent,
- &local->oldpostparent, &local->newpreparent,
- &local->newpostparent);
- return 0;
- }
-
- if (local->op_ret == -1) {
- /* TODO: check this logic */
-
- /* Rename failed in storage node, successful on NS,
- * hence, rename back the entries in NS */
- /* NOTE: this will be done only if the destination
- * doesn't exists, if the destination exists, the
- * job of correcting NS is left to self-heal
- */
- if (!local->index) {
- loc_t tmp_oldloc = {
- /* its actual 'newloc->path' */
- .path = local->loc2.path,
- .inode = local->loc1.inode,
- .parent = local->loc2.parent
- };
-
- loc_t tmp_newloc = {
- /* Actual 'oldloc->path' */
- .path = local->loc1.path,
- .parent = local->loc1.parent
- };
-
- gf_log (this->name, GF_LOG_ERROR,
- "rename succussful on namespace, on "
- "stroage node failed, reverting back");
-
- STACK_WIND (frame,
- unify_ns_rename_undo_cbk,
- NS(this),
- NS(this)->fops->rename,
- &tmp_oldloc,
- &tmp_newloc);
- return 0;
- }
- } else {
- /* Rename successful on storage nodes */
-
- int32_t idx = 0;
- int16_t *tmp_list = NULL;
- uint64_t tmp_list_int64 = 0;
- if (local->loc2.inode) {
- inode_ctx_get (local->loc2.inode,
- this, &tmp_list_int64);
- list = (int16_t *)(long)tmp_list_int64;
-
- }
-
- if (list) {
- for (index = 0; list[index] != -1; index++);
- tmp_list = GF_CALLOC (1, index * 2,
- gf_unify_mt_int16_t);
- memcpy (tmp_list, list, index * 2);
-
- for (index = 0; list[index] != -1; index++) {
- /* TODO: Check this logic. */
- /* If the destination file exists in
- * the same storage node where we sent
- * 'rename' call, no need to send
- * unlink
- */
- for (idx = 0;
- local->list[idx] != -1; idx++) {
- if (tmp_list[index] == local->list[idx]) {
- tmp_list[index] = priv->child_count;
- continue;
- }
- }
-
- if (NS(this) != priv->xl_array[tmp_list[index]]) {
- local->call_count++;
- callcnt++;
- }
- }
-
- if (local->call_count) {
- if (callcnt > 1)
- gf_log (this->name,
- GF_LOG_ERROR,
- "%s->%s: more (%d) "
- "subvolumes have the "
- "newloc entry",
- local->loc1.path,
- local->loc2.path,
- callcnt);
-
- for (index=0;
- tmp_list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[tmp_list[index]]) {
- STACK_WIND (frame,
- unify_rename_unlink_cbk,
- priv->xl_array[tmp_list[index]],
- priv->xl_array[tmp_list[index]]->fops->unlink,
- &local->loc2);
- if (!--callcnt)
- break;
- }
- }
-
- GF_FREE (tmp_list);
- return 0;
- }
- if (tmp_list)
- GF_FREE (tmp_list);
- }
- }
-
- /* Need not send 'unlink' to storage node */
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret,
- local->op_errno, &local->stbuf,
- &local->oldpreparent, &local->oldpostparent,
- &local->newpreparent, &local->newpostparent);
- }
-
- return 0;
-}
-
-int32_t
-unify_ns_rename_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf,
- struct iatt *preoldparent,
- struct iatt *postoldparent,
- struct iatt *prenewparent,
- struct iatt *postnewparent)
-{
- int32_t index = 0;
- int32_t callcnt = 0;
- int16_t *list = NULL;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- if (op_ret == -1) {
- /* Free local->new_inode */
- gf_log (this->name, GF_LOG_ERROR,
- "namespace: path(%s -> %s): %s",
- local->loc1.path, local->loc2.path,
- strerror (op_errno));
-
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, buf,
- preoldparent, postoldparent,
- prenewparent, postnewparent);
- return 0;
- }
-
- local->stbuf = *buf;
- local->ia_ino = buf->ia_ino;
-
- local->oldpreparent = *preoldparent;
- local->oldpostparent = *postoldparent;
- local->newpreparent = *prenewparent;
- local->newpostparent = *postnewparent;
-
- /* Everything is fine. */
- if (IA_ISDIR (buf->ia_type)) {
- local->call_count = priv->child_count;
- for (index=0; index < priv->child_count; index++) {
- STACK_WIND (frame,
- unify_rename_cbk,
- priv->xl_array[index],
- priv->xl_array[index]->fops->rename,
- &local->loc1,
- &local->loc2);
- }
-
- return 0;
- }
-
- local->call_count = 0;
- /* send rename */
- list = local->list;
- for (index=0; list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[list[index]]) {
- local->call_count++;
- callcnt++;
- }
- }
-
- if (local->call_count) {
- for (index=0; list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[list[index]]) {
- STACK_WIND (frame,
- unify_rename_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->rename,
- &local->loc1,
- &local->loc2);
- if (!--callcnt)
- break;
- }
- }
- } else {
- /* file doesn't seem to be present in storage nodes */
- gf_log (this->name, GF_LOG_CRITICAL,
- "CRITICAL: source file not in storage node, "
- "rename successful on namespace :O");
- unify_local_wipe (local);
- STACK_UNWIND (frame, -1, EIO, NULL,
- NULL, NULL, /* preoldparent, postoldparent */
- NULL, NULL); /* prenewparent, postnewparent */
- }
- return 0;
-}
-
-
-/**
- * unify_rename - One of the tricky function. The deadliest of all :O
- */
-int32_t
-unify_rename (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc)
-{
- unify_local_t *local = NULL;
- uint64_t tmp_list = 0;
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, oldloc);
- loc_copy (&local->loc2, newloc);
-
- if ((local->loc1.path == NULL) ||
- (local->loc2.path == NULL)) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, NULL,
- NULL, NULL, /* preoldparent, postoldparent */
- NULL, NULL); /* prenewparent, postnewparent */
- return 0;
- }
-
- inode_ctx_get (oldloc->inode, this, &tmp_list);
- local->list = (int16_t *)(long)tmp_list;
-
- STACK_WIND (frame,
- unify_ns_rename_cbk,
- NS(this),
- NS(this)->fops->rename,
- oldloc,
- newloc);
- return 0;
-}
-
-/**
- * unify_link_cbk -
- */
-int32_t
-unify_link_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- unify_local_t *local = frame->local;
-
- if (op_ret >= 0)
- local->stbuf = *buf;
- local->stbuf.ia_ino = local->ia_ino;
-
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, inode, &local->stbuf,
- &local->oldpreparent, &local->oldpostparent);
-
- return 0;
-}
-
-/**
- * unify_ns_link_cbk -
- */
-int32_t
-unify_ns_link_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- int16_t *list = local->list;
- int16_t index = 0;
-
- if (op_ret == -1) {
- /* No need to send link request to other servers,
- * as namespace action failed
- */
- gf_log (this->name, GF_LOG_ERROR,
- "namespace: path(%s -> %s): %s",
- local->loc1.path, local->loc2.path,
- strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
- return 0;
- }
-
- /* Update inode for this entry */
- local->op_ret = 0;
- local->ia_ino = buf->ia_ino;
-
- local->oldpreparent = *preparent;
- local->oldpostparent = *postparent;
-
- /* Send link request to the node now */
- for (index = 0; list[index] != -1; index++) {
- char need_break = (list[index+1] == -1);
- if (priv->xl_array[list[index]] != NS (this)) {
- STACK_WIND (frame,
- unify_link_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->link,
- &local->loc1,
- &local->loc2);
- break;
- }
- if (need_break)
- break;
- }
-
- return 0;
-}
-
-/**
- * unify_link -
- */
-int32_t
-unify_link (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc)
-{
- unify_local_t *local = NULL;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (oldloc);
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (newloc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
-
- loc_copy (&local->loc1, oldloc);
- loc_copy (&local->loc2, newloc);
-
- inode_ctx_get (oldloc->inode, this, &tmp_list);
- local->list = (int16_t *)(long)tmp_list;
-
- STACK_WIND (frame,
- unify_ns_link_cbk,
- NS(this),
- NS(this)->fops->link,
- oldloc,
- newloc);
-
- return 0;
-}
-
-
-/**
- * unify_checksum_cbk -
- */
-int32_t
-unify_checksum_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- uint8_t *fchecksum,
- uint8_t *dchecksum)
-{
- STACK_UNWIND (frame, op_ret, op_errno, fchecksum, dchecksum);
-
- return 0;
-}
-
-/**
- * unify_checksum -
- */
-int32_t
-unify_checksum (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flag)
-{
- STACK_WIND (frame,
- unify_checksum_cbk,
- NS(this),
- NS(this)->fops->checksum,
- loc,
- flag);
-
- return 0;
-}
-
-
-/**
- * unify_finodelk_cbk -
- */
-int
-unify_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-/**
- * unify_finodelk
- */
-int
-unify_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int cmd, struct gf_flock *flock)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame, unify_finodelk_cbk,
- child, child->fops->finodelk,
- volume, fd, cmd, flock);
-
- return 0;
-}
-
-
-
-/**
- * unify_fentrylk_cbk -
- */
-int
-unify_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-/**
- * unify_fentrylk
- */
-int
-unify_fentrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
-
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame, unify_fentrylk_cbk,
- child, child->fops->fentrylk,
- volume, fd, basename, cmd, type);
-
- return 0;
-}
-
-
-
-/**
- * unify_fxattrop_cbk -
- */
-int
-unify_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr)
-{
- STACK_UNWIND (frame, op_ret, op_errno, xattr);
- return 0;
-}
-
-/**
- * unify_fxattrop
- */
-int
-unify_fxattrop (call_frame_t *frame, xlator_t *this,
- fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame, unify_fxattrop_cbk,
- child, child->fops->fxattrop,
- fd, optype, xattr);
-
- return 0;
-}
-
-
-/**
- * unify_inodelk_cbk -
- */
-int
-unify_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-
-/**
- * unify_inodelk
- */
-int
-unify_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int cmd, struct gf_flock *flock)
-{
- xlator_t *child = NULL;
-
- child = unify_loc_subvol (loc, this);
-
- STACK_WIND (frame, unify_inodelk_cbk,
- child, child->fops->inodelk,
- volume, loc, cmd, flock);
-
- return 0;
-}
-
-
-
-/**
- * unify_entrylk_cbk -
- */
-int
-unify_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-/**
- * unify_entrylk
- */
-int
-unify_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
-
-{
- xlator_t *child = NULL;
-
- child = unify_loc_subvol (loc, this);
-
- STACK_WIND (frame, unify_entrylk_cbk,
- child, child->fops->entrylk,
- volume, loc, basename, cmd, type);
-
- return 0;
-}
-
-
-
-/**
- * unify_xattrop_cbk -
- */
-int
-unify_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr)
-{
- STACK_UNWIND (frame, op_ret, op_errno, xattr);
- return 0;
-}
-
-/**
- * unify_xattrop
- */
-int
-unify_xattrop (call_frame_t *frame, xlator_t *this,
- loc_t *loc, gf_xattrop_flags_t optype, dict_t *xattr)
-{
- xlator_t *child = NULL;
-
- child = unify_loc_subvol (loc, this);
-
- STACK_WIND (frame, unify_xattrop_cbk,
- child, child->fops->xattrop,
- loc, optype, xattr);
-
- return 0;
-}
-
-int
-unify_forget (xlator_t *this,
- inode_t *inode)
-{
- int16_t *list = NULL;
- uint64_t tmp_list = 0;
-
- if (inode->ia_type && (!IA_ISDIR(inode->ia_type))) {
- inode_ctx_get (inode, this, &tmp_list);
- if (tmp_list) {
- list = (int16_t *)(long)tmp_list;
- GF_FREE (list);
- }
- }
-
- return 0;
-}
-
-/**
- * notify
- */
-int32_t
-notify (xlator_t *this,
- int32_t event,
- void *data,
- ...)
-{
- unify_private_t *priv = this->private;
- struct sched_ops *sched = NULL;
-
- if (!priv) {
- return 0;
- }
-
- sched = priv->sched_ops;
- if (!sched) {
- gf_log (this->name, GF_LOG_CRITICAL, "No scheduler :O");
- raise (SIGTERM);
- return 0;
- }
- if (priv->namespace == data) {
- if (event == GF_EVENT_CHILD_UP) {
- sched->notify (this, event, data);
- }
- return 0;
- }
-
- switch (event)
- {
- case GF_EVENT_CHILD_UP:
- {
- /* Call scheduler's update () to enable it for scheduling */
- sched->notify (this, event, data);
-
- LOCK (&priv->lock);
- {
- /* Increment the inode's generation, which is
- used for self_heal */
- ++priv->inode_generation;
- ++priv->num_child_up;
- }
- UNLOCK (&priv->lock);
-
- if (!priv->is_up) {
- default_notify (this, event, data);
- priv->is_up = 1;
- }
- }
- break;
- case GF_EVENT_CHILD_DOWN:
- {
- /* Call scheduler's update () to disable the child node
- * for scheduling
- */
- sched->notify (this, event, data);
- LOCK (&priv->lock);
- {
- --priv->num_child_up;
- }
- UNLOCK (&priv->lock);
-
- if (priv->num_child_up == 0) {
- /* Send CHILD_DOWN to upper layer */
- default_notify (this, event, data);
- priv->is_up = 0;
- }
- }
- break;
-
- default:
- {
- default_notify (this, event, data);
- }
- break;
- }
-
- return 0;
-}
-
-int32_t
-mem_acct_init (xlator_t *this)
-{
- int ret = -1;
-
- if (!this)
- return ret;
-
- ret = xlator_mem_acct_init (this, gf_unify_mt_end + 1);
-
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
- return ret;
- }
-
- return ret;
-}
-
-/**
- * init - This function is called first in the xlator, while initializing.
- * All the config file options are checked and appropriate flags are set.
- *
- * @this -
- */
-int32_t
-init (xlator_t *this)
-{
- int32_t ret = 0;
- int32_t count = 0;
- data_t *scheduler = NULL;
- data_t *data = NULL;
- xlator_t *ns_xl = NULL;
- xlator_list_t *trav = NULL;
- xlator_list_t *xlparent = NULL;
- xlator_list_t *parent = NULL;
- unify_private_t *_private = NULL;
-
-
- /* Check for number of child nodes, if there is no child nodes, exit */
- if (!this->children) {
- gf_log (this->name, GF_LOG_ERROR,
- "No child nodes specified. check \"subvolumes \" "
- "option in volfile");
- return -1;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
- }
-
- /* Check for 'scheduler' in volume */
- scheduler = dict_get (this->options, "scheduler");
- if (!scheduler) {
- gf_log (this->name, GF_LOG_ERROR,
- "\"option scheduler <x>\" is missing in volfile");
- return -1;
- }
-
- /* Setting "option namespace <node>" */
- data = dict_get (this->options, "namespace");
- if(!data) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "namespace option not specified, Exiting");
- return -1;
- }
- /* Search namespace in the child node, if found, exit */
- trav = this->children;
- while (trav) {
- if (strcmp (trav->xlator->name, data->data) == 0)
- break;
- trav = trav->next;
- }
- if (trav) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "namespace node used as a subvolume, Exiting");
- return -1;
- }
-
- /* Search for the namespace node, if found, continue */
- ns_xl = this->next;
- while (ns_xl) {
- if (strcmp (ns_xl->name, data->data) == 0)
- break;
- ns_xl = ns_xl->next;
- }
- if (!ns_xl) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "namespace node not found in volfile, Exiting");
- return -1;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "namespace node specified as %s", data->data);
-
- _private = GF_CALLOC (1, sizeof (*_private),
- gf_unify_mt_unify_private_t);
- ERR_ABORT (_private);
- _private->sched_ops = get_scheduler (this, scheduler->data);
- if (!_private->sched_ops) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "Error while loading scheduler. Exiting");
- GF_FREE (_private);
- return -1;
- }
-
- if (ns_xl->parents) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "Namespace node should not be a child of any other node. Exiting");
- GF_FREE (_private);
- return -1;
- }
-
- _private->namespace = ns_xl;
-
- /* update _private structure */
- {
- count = 0;
- trav = this->children;
- /* Get the number of child count */
- while (trav) {
- count++;
- trav = trav->next;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Child node count is %d", count);
-
- _private->child_count = count;
- if (count == 1) {
- /* TODO: Should I error out here? */
- gf_log (this->name, GF_LOG_CRITICAL,
- "WARNING: You have defined only one "
- "\"subvolumes\" for unify volume. It may not "
- "be the desired config, review your volume "
- "volfile. If this is how you are testing it,"
- " you may hit some performance penalty");
- }
-
- _private->xl_array = GF_CALLOC (1,
- sizeof (xlator_t) * (count + 1),
- gf_unify_mt_xlator_t);
- ERR_ABORT (_private->xl_array);
-
- count = 0;
- trav = this->children;
- while (trav) {
- _private->xl_array[count++] = trav->xlator;
- trav = trav->next;
- }
- _private->xl_array[count] = _private->namespace;
-
- /* self-heal part, start with generation '1' */
- _private->inode_generation = 1;
- /* Because, Foreground part is tested well */
- _private->self_heal = ZR_UNIFY_FG_SELF_HEAL;
- data = dict_get (this->options, "self-heal");
- if (data) {
- if (strcasecmp (data->data, "off") == 0)
- _private->self_heal = ZR_UNIFY_SELF_HEAL_OFF;
-
- if (strcasecmp (data->data, "foreground") == 0)
- _private->self_heal = ZR_UNIFY_FG_SELF_HEAL;
-
- if (strcasecmp (data->data, "background") == 0)
- _private->self_heal = ZR_UNIFY_BG_SELF_HEAL;
- }
-
- /* optimist - ask bulde for more about it */
- data = dict_get (this->options, "optimist");
- if (data) {
- if (gf_string2boolean (data->data,
- &_private->optimist) == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "optimist excepts only boolean "
- "options");
- }
- }
-
- LOCK_INIT (&_private->lock);
- }
-
- /* Now that everything is fine. */
- this->private = (void *)_private;
- {
- ret = _private->sched_ops->mem_acct_init (this);
-
- if (ret == -1) {
- return -1;
- }
-
- /* Initialize scheduler, if everything else is successful */
- ret = _private->sched_ops->init (this);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "Initializing scheduler failed, Exiting");
- GF_FREE (_private);
- return -1;
- }
-
-
- ret = 0;
-
- /* This section is required because some fops may look
- * for 'xl->parent' variable
- */
- xlparent = GF_CALLOC (1, sizeof (*xlparent),
- gf_unify_mt_xlator_list_t);
- xlparent->xlator = this;
- if (!ns_xl->parents) {
- ns_xl->parents = xlparent;
- } else {
- parent = ns_xl->parents;
- while (parent->next)
- parent = parent->next;
- parent->next = xlparent;
- }
- }
-
- /* Tell namespace node that init is done */
- xlator_notify (ns_xl, GF_EVENT_PARENT_UP, this);
-
- return 0;
-}
-
-/**
- * fini - Free all the allocated memory
- */
-void
-fini (xlator_t *this)
-{
- unify_private_t *priv = this->private;
- priv->sched_ops->fini (this);
- this->private = NULL;
- LOCK_DESTROY (&priv->lock);
- GF_FREE (priv->xl_array);
- GF_FREE (priv);
- return;
-}
-
-
-struct xlator_fops fops = {
- .stat = unify_stat,
- .readlink = unify_readlink,
- .mknod = unify_mknod,
- .mkdir = unify_mkdir,
- .unlink = unify_unlink,
- .rmdir = unify_rmdir,
- .symlink = unify_symlink,
- .rename = unify_rename,
- .link = unify_link,
- .truncate = unify_truncate,
- .create = unify_create,
- .open = unify_open,
- .readv = unify_readv,
- .writev = unify_writev,
- .statfs = unify_statfs,
- .flush = unify_flush,
- .fsync = unify_fsync,
- .setxattr = unify_setxattr,
- .getxattr = unify_getxattr,
- .removexattr = unify_removexattr,
- .opendir = unify_opendir,
- .readdir = unify_readdir,
- .readdirp = unify_readdirp,
- .fsyncdir = unify_fsyncdir,
- .access = unify_access,
- .ftruncate = unify_ftruncate,
- .fstat = unify_fstat,
- .lk = unify_lk,
- .lookup = unify_lookup,
- .getdents = unify_getdents,
- .checksum = unify_checksum,
- .inodelk = unify_inodelk,
- .finodelk = unify_finodelk,
- .entrylk = unify_entrylk,
- .fentrylk = unify_fentrylk,
- .xattrop = unify_xattrop,
- .fxattrop = unify_fxattrop,
- .setattr = unify_setattr,
- .fsetattr = unify_fsetattr,
-};
-
-
-struct xlator_cbks cbks = {
- .forget = unify_forget,
-};
-
-struct volume_options options[] = {
- { .key = { "namespace" },
- .type = GF_OPTION_TYPE_XLATOR
- },
- { .key = { "scheduler" },
- .value = { "alu", "rr", "random", "nufa", "switch" },
- .type = GF_OPTION_TYPE_STR
- },
- { .key = {"self-heal"},
- .value = { "foreground", "background", "off" },
- .type = GF_OPTION_TYPE_STR
- },
- /* TODO: remove it some time later */
- { .key = {"optimist"},
- .type = GF_OPTION_TYPE_BOOL
- },
-
- { .key = {NULL} },
-};
diff --git a/xlators/cluster/unify/src/unify.h b/xlators/cluster/unify/src/unify.h
deleted file mode 100644
index dbd5e44a2..000000000
--- a/xlators/cluster/unify/src/unify.h
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#ifndef _UNIFY_H
-#define _UNIFY_H
-
-#include "scheduler.h"
-#include "list.h"
-#include "unify-mem-types.h"
-
-#define MAX_DIR_ENTRY_STRING (32 * 1024)
-
-#define ZR_UNIFY_SELF_HEAL_OFF 0
-#define ZR_UNIFY_FG_SELF_HEAL 1
-#define ZR_UNIFY_BG_SELF_HEAL 2
-
-/* Sometimes one should use completely random numbers.. its good :p */
-#define UNIFY_SELF_HEAL_GETDENTS_COUNT 512
-
-#define NS(xl) (((unify_private_t *)xl->private)->namespace)
-
-/* This is used to allocate memory for local structure */
-#define INIT_LOCAL(fr, loc) \
-do { \
- loc = GF_CALLOC (1, sizeof (unify_local_t), gf_unify_mt_unify_local_t); \
- ERR_ABORT (loc); \
- if (!loc) { \
- STACK_UNWIND (fr, -1, ENOMEM); \
- return 0; \
- } \
- fr->local = loc; \
- loc->op_ret = -1; \
- loc->op_errno = ENOENT; \
-} while (0)
-
-
-
-struct unify_private {
- /* Update this structure depending on requirement */
- void *scheduler; /* THIS SHOULD BE THE FIRST VARIABLE,
- if xlator is using scheduler */
- struct sched_ops *sched_ops; /* Scheduler options */
- xlator_t *namespace; /* ptr to namespace xlator */
- xlator_t **xl_array;
- gf_boolean_t optimist;
- int16_t child_count;
- int16_t num_child_up;
- uint8_t self_heal;
- uint8_t is_up;
- uint64_t inode_generation;
- gf_lock_t lock;
-};
-typedef struct unify_private unify_private_t;
-
-struct unify_self_heal_struct {
- uint8_t dir_checksum[NAME_MAX];
- uint8_t ns_dir_checksum[NAME_MAX];
- uint8_t file_checksum[NAME_MAX];
- uint8_t ns_file_checksum[NAME_MAX];
- off_t *offset_list;
- int *count_list;
- dir_entry_t **entry_list;
-};
-
-
-struct _unify_local_t {
- int32_t call_count;
- int32_t op_ret;
- int32_t op_errno;
- mode_t mode;
- off_t offset;
- dev_t dev;
- uid_t uid;
- gid_t gid;
- int32_t flags;
- int32_t entry_count;
- int32_t count; // dir_entry_t count;
- fd_t *fd;
- struct iatt stbuf;
- struct iatt stpre;
- struct iatt stpost;
- struct statvfs statvfs_buf;
- struct timespec tv[2];
- char *name;
- int32_t revalidate;
-
- ino_t ia_ino;
- nlink_t ia_nlink;
-
- dict_t *dict;
-
- int16_t *list;
- int16_t *new_list; /* Used only in case of rename */
- int16_t index;
-
- int32_t failed;
- int32_t return_eio; /* Used in case of different st-mode
- present for a given path */
-
- uint64_t inode_generation; /* used to store the per directory
- * inode_generation. Got from inode's ctx
- * of directory inodes
- */
-
- struct unify_self_heal_struct *sh_struct;
- loc_t loc1, loc2;
-
- struct iatt poststbuf;
- /* When not used for rename, old*
- * are used as the attrs for the current
- * parent directory.
- */
- struct iatt oldpreparent;
- struct iatt oldpostparent;
- struct iatt newpreparent;
- struct iatt newpostparent;
- int32_t wbflags;
-};
-typedef struct _unify_local_t unify_local_t;
-
-int32_t zr_unify_self_heal (call_frame_t *frame,
- xlator_t *this,
- unify_local_t *local);
-
-#endif /* _UNIFY_H */
diff --git a/xlators/debug/error-gen/src/Makefile.am b/xlators/debug/error-gen/src/Makefile.am
index df9080358..5075c59a8 100644
--- a/xlators/debug/error-gen/src/Makefile.am
+++ b/xlators/debug/error-gen/src/Makefile.am
@@ -2,15 +2,16 @@
xlator_LTLIBRARIES = error-gen.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/debug
-error_gen_la_LDFLAGS = -module -avoidversion
+error_gen_la_LDFLAGS = -module -avoid-version
error_gen_la_SOURCES = error-gen.c
error_gen_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = error-gen.h error-gen-mem-types.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/debug/error-gen/src/error-gen-mem-types.h b/xlators/debug/error-gen/src/error-gen-mem-types.h
index b643dc5f7..f02280535 100644
--- a/xlators/debug/error-gen/src/error-gen-mem-types.h
+++ b/xlators/debug/error-gen/src/error-gen-mem-types.h
@@ -1,23 +1,13 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __ERROR_GEN_MEM_TYPES_H__
#define __ERROR_GEN_MEM_TYPES_H__
diff --git a/xlators/debug/error-gen/src/error-gen.c b/xlators/debug/error-gen/src/error-gen.c
index 6d6c5f24a..ec0874b35 100644
--- a/xlators/debug/error-gen/src/error-gen.c
+++ b/xlators/debug/error-gen/src/error-gen.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -24,6 +14,7 @@
#include "xlator.h"
#include "error-gen.h"
+#include "statedump.h"
sys_error_t error_no_list[] = {
[GF_FOP_LOOKUP] = { .error_no_count = 4,
@@ -91,9 +82,10 @@ sys_error_t error_no_list[] = {
[GF_FOP_READ] = { .error_no_count = 5,
.error_no = {EINVAL,EBADF,EFAULT,EISDIR,
ENAMETOOLONG}},
- [GF_FOP_WRITE] = { .error_no_count = 5,
+ [GF_FOP_WRITE] = { .error_no_count = 7,
.error_no = {EINVAL,EBADF,EFAULT,EISDIR,
- ENAMETOOLONG}},
+ ENAMETOOLONG,ENOSPC,
+ GF_ERROR_SHORT_WRITE}},
[GF_FOP_STATFS] = {.error_no_count = 10,
.error_no = {EACCES,EBADF,EFAULT,EINTR,
EIO,ENAMETOOLONG,ENOENT,
@@ -113,6 +105,15 @@ sys_error_t error_no_list[] = {
[GF_FOP_REMOVEXATTR] = { .error_no_count = 4,
.error_no = {EACCES,EBADF,ENAMETOOLONG,
EINTR}},
+ [GF_FOP_FSETXATTR] = { .error_no_count = 4,
+ .error_no = {EACCES,EBADF,EINTR,
+ ENAMETOOLONG}},
+ [GF_FOP_FGETXATTR] = { .error_no_count = 4,
+ .error_no = {EACCES,EBADF,ENAMETOOLONG,
+ EINTR}},
+ [GF_FOP_FREMOVEXATTR] = { .error_no_count = 4,
+ .error_no = {EACCES,EBADF,ENAMETOOLONG,
+ EINTR}},
[GF_FOP_OPENDIR] = { .error_no_count = 8,
.error_no = {EACCES,EEXIST,EFAULT,
EISDIR,EMFILE,
@@ -237,6 +238,8 @@ conv_errno_to_int (char **error_no)
return EINTR;
else if (!strcmp ((*error_no), "EFBIG"))
return EFBIG;
+ else if (!strcmp((*error_no), "GF_ERROR_SHORT_WRITE"))
+ return GF_ERROR_SHORT_WRITE;
else
return EAGAIN;
}
@@ -286,6 +289,12 @@ get_fop_int (char **op_no_str)
return GF_FOP_GETXATTR;
else if (!strcmp ((*op_no_str), "removexattr"))
return GF_FOP_REMOVEXATTR;
+ else if (!strcmp ((*op_no_str), "fsetxattr"))
+ return GF_FOP_FSETXATTR;
+ else if (!strcmp ((*op_no_str), "fgetxattr"))
+ return GF_FOP_FGETXATTR;
+ else if (!strcmp ((*op_no_str), "fremovexattr"))
+ return GF_FOP_FREMOVEXATTR;
else if (!strcmp ((*op_no_str), "opendir"))
return GF_FOP_OPENDIR;
else if (!strcmp ((*op_no_str), "readdir"))
@@ -362,7 +371,8 @@ error_gen (xlator_t *this, int op_no)
rand_no = 0;
ret = error_no_list[op_no].error_no[rand_no];
}
- egp->failure_iter_no = 3 + (rand () % GF_UNIVERSAL_ANSWER);
+ if (egp->random_failure == _gf_true)
+ egp->failure_iter_no = 3 + (rand () % GF_UNIVERSAL_ANSWER);
}
return ret;
}
@@ -371,17 +381,17 @@ error_gen (xlator_t *this, int op_no)
int
error_gen_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *dict, struct iatt *postparent)
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
{
STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode,
- buf, dict, postparent);
- return 0;
+ buf, xdata, postparent);
+ return 0;
}
int
error_gen_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *xattr_req)
+ dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -396,36 +406,28 @@ error_gen_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
STACK_UNWIND_STRICT (lookup, frame, -1, op_errno, NULL, NULL, NULL,
- NULL);
- return 0;
+ NULL);
+ return 0;
}
STACK_WIND (frame, error_gen_lookup_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lookup,
- loc, xattr_req);
- return 0;
-}
-
-
-int
-error_gen_forget (xlator_t *this, inode_t *inode)
-{
- return 0;
+ loc, xdata);
+ return 0;
}
int
error_gen_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf);
-
- return 0;
+ STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
}
int
-error_gen_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
+error_gen_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -439,32 +441,31 @@ error_gen_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (stat, frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (stat, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_stat_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->stat,
- loc);
- return 0;
+ loc, xdata);
+ return 0;
}
int
error_gen_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
- STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, preop, postop);
-
- return 0;
+ STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, preop, postop, xdata);
+ return 0;
}
int
error_gen_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid)
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -478,21 +479,21 @@ error_gen_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (setattr, frame, -1, op_errno, NULL, NULL);
- return 0;
+ STACK_UNWIND_STRICT (setattr, frame, -1, op_errno, NULL, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_setattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->setattr,
- loc, stbuf, valid);
- return 0;
+ loc, stbuf, valid, xdata);
+ return 0;
}
int
error_gen_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid)
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -506,32 +507,32 @@ error_gen_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (fsetattr, frame, -1, op_errno, NULL, NULL);
- return 0;
+ STACK_UNWIND_STRICT (fsetattr, frame, -1, op_errno, NULL, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_setattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsetattr,
- fd, stbuf, valid);
- return 0;
+ fd, stbuf, valid, xdata);
+ return 0;
}
int
error_gen_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno,
- prebuf, postbuf);
- return 0;
+ prebuf, postbuf, xdata);
+ return 0;
}
int
error_gen_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc,
- off_t offset)
+ off_t offset, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -546,32 +547,32 @@ error_gen_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
STACK_UNWIND_STRICT (truncate, frame, -1, op_errno,
- NULL, NULL);
- return 0;
+ NULL, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_truncate_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->truncate,
- loc, offset);
- return 0;
+ loc, offset, xdata);
+ return 0;
}
int
error_gen_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno,
- prebuf, postbuf);
- return 0;
+ prebuf, postbuf, xdata);
+ return 0;
}
int
error_gen_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd,
- off_t offset)
+ off_t offset, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp =NULL;
@@ -586,31 +587,30 @@ error_gen_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
STACK_UNWIND_STRICT (ftruncate, frame, -1, op_errno,
- NULL, NULL);
- return 0;
+ NULL, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_ftruncate_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->ftruncate,
- fd, offset);
- return 0;
+ fd, offset, xdata);
+ return 0;
}
int
error_gen_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (access, frame, op_ret, op_errno);
-
- return 0;
+ STACK_UNWIND_STRICT (access, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
error_gen_access (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t mask)
+ int32_t mask, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -624,31 +624,31 @@ error_gen_access (call_frame_t *frame, xlator_t *this, loc_t *loc,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (access, frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (access, frame, -1, op_errno, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_access_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->access,
- loc, mask);
- return 0;
+ loc, mask, xdata);
+ return 0;
}
int
error_gen_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- const char *path, struct iatt *sbuf)
+ const char *path, struct iatt *sbuf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, path, sbuf);
- return 0;
+ STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, path, sbuf, xdata);
+ return 0;
}
int
error_gen_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc,
- size_t size)
+ size_t size, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -662,15 +662,15 @@ error_gen_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (readlink, frame, -1, op_errno, NULL, NULL);
- return 0;
+ STACK_UNWIND_STRICT (readlink, frame, -1, op_errno, NULL, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_readlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readlink,
- loc, size);
- return 0;
+ loc, size, xdata);
+ return 0;
}
@@ -678,18 +678,18 @@ int
error_gen_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno,
inode, buf,
- preparent, postparent);
- return 0;
+ preparent, postparent, xdata);
+ return 0;
}
int
error_gen_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc,
- mode_t mode, dev_t rdev, dict_t *params)
+ mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -704,15 +704,15 @@ error_gen_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
STACK_UNWIND_STRICT (mknod, frame, -1, op_errno, NULL, NULL,
- NULL, NULL);
- return 0;
+ NULL, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_mknod_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mknod,
- loc, mode, rdev, params);
- return 0;
+ loc, mode, rdev, umask, xdata);
+ return 0;
}
@@ -720,17 +720,17 @@ int
error_gen_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno,
inode, buf,
- preparent, postparent);
- return 0;
+ preparent, postparent, xdata);
+ return 0;
}
int
error_gen_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dict_t *params)
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -745,31 +745,32 @@ error_gen_mkdir (call_frame_t *frame, xlator_t *this,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
STACK_UNWIND_STRICT (mkdir, frame, -1, op_errno, NULL, NULL,
- NULL, NULL);
- return 0;
+ NULL, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_mkdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mkdir,
- loc, mode, params);
- return 0;
+ loc, mode, umask, xdata);
+ return 0;
}
int
error_gen_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno,
- preparent, postparent);
- return 0;
+ preparent, postparent, xdata);
+ return 0;
}
int
-error_gen_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
+error_gen_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -783,31 +784,34 @@ error_gen_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (unlink, frame, -1, op_errno, NULL, NULL);
- return 0;
+ STACK_UNWIND_STRICT (unlink, frame, -1, op_errno, NULL, NULL,
+ xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_unlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->unlink,
- loc);
- return 0;
+ loc, xflag, xdata);
+ return 0;
}
int
error_gen_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno,
- preparent, postparent);
- return 0;
+ preparent, postparent, xdata);
+ return 0;
}
int
-error_gen_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)
+error_gen_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -821,15 +825,15 @@ error_gen_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (rmdir, frame, -1, op_errno, NULL, NULL);
- return 0;
+ STACK_UNWIND_STRICT (rmdir, frame, -1, op_errno, NULL, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_rmdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rmdir,
- loc, flags);
- return 0;
+ loc, flags, xdata);
+ return 0;
}
@@ -837,17 +841,17 @@ int
error_gen_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
- return 0;
+ preparent, postparent, xdata);
+ return 0;
}
int
error_gen_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
- loc_t *loc, dict_t *params)
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -862,15 +866,15 @@ error_gen_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
STACK_UNWIND_STRICT (symlink, frame, -1, op_errno, NULL, NULL,
- NULL, NULL); /* pre & post parent attr */
+ NULL, NULL, NULL); /* pre & post parent attr */
return 0;
}
STACK_WIND (frame, error_gen_symlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->symlink,
- linkpath, loc, params);
- return 0;
+ linkpath, loc, umask, xdata);
+ return 0;
}
@@ -878,18 +882,19 @@ int
error_gen_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, buf,
preoldparent, postoldparent,
- prenewparent, postnewparent);
- return 0;
+ prenewparent, postnewparent, xdata);
+ return 0;
}
int
error_gen_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -904,15 +909,15 @@ error_gen_rename (call_frame_t *frame, xlator_t *this,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
STACK_UNWIND_STRICT (rename, frame, -1, op_errno, NULL,
- NULL, NULL, NULL, NULL); /* pre & post parent attr */
+ NULL, NULL, NULL, NULL, NULL);
return 0;
}
STACK_WIND (frame, error_gen_rename_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rename,
- oldloc, newloc);
- return 0;
+ oldloc, newloc, xdata);
+ return 0;
}
@@ -920,17 +925,17 @@ int
error_gen_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
- return 0;
+ preparent, postparent, xdata);
+ return 0;
}
int
error_gen_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -945,15 +950,15 @@ error_gen_link (call_frame_t *frame, xlator_t *this,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
STACK_UNWIND_STRICT (link, frame, -1, op_errno, NULL, NULL,
- NULL, NULL); /* pre & post parent attr */
+ NULL, NULL, NULL);
return 0;
}
STACK_WIND (frame, error_gen_link_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->link,
- oldloc, newloc);
- return 0;
+ oldloc, newloc, xdata);
+ return 0;
}
@@ -961,17 +966,18 @@ int
error_gen_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
fd_t *fd, inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent);
- return 0;
+ preparent, postparent, xdata);
+ return 0;
}
int
error_gen_create (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, mode_t mode, fd_t *fd, dict_t *params)
+ int32_t flags, mode_t mode, mode_t umask, fd_t *fd,
+ dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -986,30 +992,30 @@ error_gen_create (call_frame_t *frame, xlator_t *this, loc_t *loc,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
STACK_UNWIND_STRICT (create, frame, -1, op_errno, NULL, NULL,
- NULL, NULL, NULL); /* pre & post attr */
+ NULL, NULL, NULL, NULL);
return 0;
}
STACK_WIND (frame, error_gen_create_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->create,
- loc, flags, mode, fd, params);
- return 0;
+ loc, flags, mode, umask, fd, xdata);
+ return 0;
}
int
error_gen_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd);
- return 0;
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
+ return 0;
}
int
error_gen_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, fd_t *fd, int32_t wbflags)
+ int32_t flags, fd_t *fd, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1023,15 +1029,15 @@ error_gen_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (open, frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (open, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_open_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->open,
- loc, flags, fd, wbflags);
- return 0;
+ loc, flags, fd, xdata);
+ return 0;
}
@@ -1039,17 +1045,17 @@ int
error_gen_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
struct iovec *vector, int32_t count,
- struct iatt *stbuf, struct iobref *iobref)
+ struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
{
STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno,
- vector, count, stbuf, iobref);
- return 0;
+ vector, count, stbuf, iobref, xdata);
+ return 0;
}
int
error_gen_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset)
+ fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1064,33 +1070,33 @@ error_gen_readv (call_frame_t *frame, xlator_t *this,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
STACK_UNWIND_STRICT (readv, frame, -1, op_errno, NULL, 0,
- NULL, NULL);
- return 0;
+ NULL, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_readv_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readv,
- fd, size, offset);
- return 0;
+ fd, size, offset, flags, xdata);
+ return 0;
}
int
error_gen_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf);
- return 0;
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
}
int
error_gen_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iovec *vector, int32_t count,
- off_t off, struct iobref *iobref)
+ off_t off, uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1102,31 +1108,47 @@ error_gen_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
if (enable)
op_errno = error_gen (this, GF_FOP_WRITE);
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL);
+ if (op_errno == GF_ERROR_SHORT_WRITE) {
+ struct iovec *shortvec;
+
+ /*
+ * A short write error returns some value less than what was
+ * requested from a write. To simulate this, replace the vector
+ * with one half the size;
+ */
+ shortvec = iov_dup(vector, 1);
+ shortvec->iov_len /= 2;
+
+ STACK_WIND(frame, error_gen_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, shortvec, count,
+ off, flags, iobref, xdata);
+ GF_FREE(shortvec);
return 0;
+ } else if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_writev_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->writev,
- fd, vector, count, off, iobref);
- return 0;
+ fd, vector, count, off, flags, iobref, xdata);
+ return 0;
}
int
error_gen_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
-error_gen_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
+error_gen_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1140,15 +1162,15 @@ error_gen_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (flush, frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (flush, frame, -1, op_errno, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_flush_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->flush,
- fd);
- return 0;
+ fd, xdata);
+ return 0;
}
@@ -1156,15 +1178,15 @@ int
error_gen_fsync_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf);
- return 0;
+ STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
}
int
-error_gen_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
+error_gen_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1178,29 +1200,29 @@ error_gen_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (fsync, frame, -1, op_errno, NULL, NULL);
- return 0;
+ STACK_UNWIND_STRICT (fsync, frame, -1, op_errno, NULL, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_fsync_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsync,
- fd, flags);
- return 0;
+ fd, flags, xdata);
+ return 0;
}
int
error_gen_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf);
- return 0;
+ STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
}
int
-error_gen_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd)
+error_gen_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1214,29 +1236,29 @@ error_gen_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd)
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (fstat, frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (fstat, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_fstat_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fstat,
- fd);
- return 0;
+ fd, xdata);
+ return 0;
}
int
error_gen_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd);
- return 0;
+ STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, xdata);
+ return 0;
}
int
-error_gen_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
+error_gen_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1250,29 +1272,29 @@ error_gen_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (opendir, frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (opendir, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_opendir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->opendir,
- loc, fd);
- return 0;
+ loc, fd, xdata);
+ return 0;
}
int
error_gen_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
error_gen_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t flags)
+ int32_t flags, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1286,30 +1308,29 @@ error_gen_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (fsyncdir, frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (fsyncdir, frame, -1, op_errno, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_fsyncdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsyncdir,
- fd, flags);
- return 0;
+ fd, flags, xdata);
+ return 0;
}
int
error_gen_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *buf)
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf);
-
- return 0;
+ STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf, xdata);
+ return 0;
}
int
-error_gen_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
+error_gen_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1323,31 +1344,30 @@ error_gen_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (statfs, frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (statfs, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_statfs_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->statfs,
- loc);
- return 0;
+ loc, xdata);
+ return 0;
}
int
error_gen_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno);
-
- return 0;
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
error_gen_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *dict, int32_t flags)
+ dict_t *dict, int32_t flags, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1361,30 +1381,30 @@ error_gen_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (setxattr, frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (setxattr, frame, -1, op_errno, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_setxattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->setxattr,
- loc, dict, flags);
- return 0;
+ loc, dict, flags, xdata);
+ return 0;
}
int
error_gen_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
- STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict);
- return 0;
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
}
int
error_gen_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
+ const char *name, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1398,31 +1418,103 @@ error_gen_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (getxattr, frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (getxattr, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_getxattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->getxattr,
- loc, name);
- return 0;
+ loc, name, xdata);
+ return 0;
+}
+
+int
+error_gen_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
-error_gen_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+error_gen_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *dict, int32_t flags, dict_t *xdata)
{
- STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, dict);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
- return 0;
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FSETXATTR];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_FSETXATTR);
+
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND_STRICT (fsetxattr, frame, -1, op_errno, xdata);
+ return 0;
+ }
+
+ STACK_WIND (frame, error_gen_fsetxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ fd, dict, flags, xdata);
+ return 0;
+}
+
+
+int
+error_gen_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+
+int
+error_gen_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FGETXATTR];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_FGETXATTR);
+
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND_STRICT (fgetxattr, frame, -1, op_errno, NULL, xdata);
+ return 0;
+ }
+
+ STACK_WIND (frame, error_gen_fgetxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ fd, name, xdata);
+ return 0;
+}
+
+
+int
+error_gen_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, dict, xdata);
+ return 0;
}
int
error_gen_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t flags, dict_t *dict)
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1436,31 +1528,30 @@ error_gen_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (xattrop, frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (xattrop, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_xattrop_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->xattrop,
- loc, flags, dict);
- return 0;
+ loc, flags, dict, xdata);
+ return 0;
}
int
error_gen_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fxattrop, frame, op_ret, op_errno, dict);
-
- return 0;
+ STACK_UNWIND_STRICT (fxattrop, frame, op_ret, op_errno, dict, xdata);
+ return 0;
}
int
error_gen_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_xattrop_flags_t flags, dict_t *dict)
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1474,31 +1565,30 @@ error_gen_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (fxattrop, frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (fxattrop, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_fxattrop_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fxattrop,
- fd, flags, dict);
- return 0;
+ fd, flags, dict, xdata);
+ return 0;
}
int
error_gen_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno);
-
- return 0;
+ STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
error_gen_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
+ const char *name, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1512,30 +1602,66 @@ error_gen_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (removexattr, frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (removexattr, frame, -1, op_errno, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_removexattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->removexattr,
- loc, name);
- return 0;
+ loc, name, xdata);
+ return 0;
+}
+
+int
+error_gen_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+
+int
+error_gen_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FREMOVEXATTR];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_FREMOVEXATTR);
+
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND_STRICT (fremovexattr, frame, -1, op_errno, xdata);
+ return 0;
+ }
+
+ STACK_WIND (frame, error_gen_fremovexattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr,
+ fd, name, xdata);
+ return 0;
}
int
error_gen_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock)
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
{
- STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock);
- return 0;
+ STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock, xdata);
+ return 0;
}
int
error_gen_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
- struct gf_flock *lock)
+ struct gf_flock *lock, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1549,32 +1675,31 @@ error_gen_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (lk, frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (lk, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_lk_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lk,
- fd, cmd, lock);
- return 0;
+ fd, cmd, lock, xdata);
+ return 0;
}
int
-error_gen_inodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
+error_gen_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
error_gen_inodelk (call_frame_t *frame, xlator_t *this,
const char *volume, loc_t *loc, int32_t cmd,
- struct gf_flock *lock)
+ struct gf_flock *lock, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1588,32 +1713,31 @@ error_gen_inodelk (call_frame_t *frame, xlator_t *this,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (inodelk, frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (inodelk, frame, -1, op_errno, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_inodelk_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->inodelk,
- volume, loc, cmd, lock);
- return 0;
+ volume, loc, cmd, lock, xdata);
+ return 0;
}
int
-error_gen_finodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
+error_gen_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (finodelk, frame, op_ret, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (finodelk, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
error_gen_finodelk (call_frame_t *frame, xlator_t *this,
const char *volume, fd_t *fd, int32_t cmd,
- struct gf_flock *lock)
+ struct gf_flock *lock, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1627,32 +1751,31 @@ error_gen_finodelk (call_frame_t *frame, xlator_t *this,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (finodelk, frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (finodelk, frame, -1, op_errno, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_finodelk_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->finodelk,
- volume, fd, cmd, lock);
- return 0;
+ volume, fd, cmd, lock, xdata);
+ return 0;
}
int
-error_gen_entrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
+error_gen_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
error_gen_entrylk (call_frame_t *frame, xlator_t *this,
const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1666,32 +1789,31 @@ error_gen_entrylk (call_frame_t *frame, xlator_t *this,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (entrylk, frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (entrylk, frame, -1, op_errno, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_entrylk_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->entrylk,
- volume, loc, basename, cmd, type);
- return 0;
+ volume, loc, basename, cmd, type, xdata);
+ return 0;
}
int
-error_gen_fentrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
+error_gen_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fentrylk, frame, op_ret, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (fentrylk, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
error_gen_fentrylk (call_frame_t *frame, xlator_t *this,
const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1705,15 +1827,15 @@ error_gen_fentrylk (call_frame_t *frame, xlator_t *this,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (fentrylk, frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (fentrylk, frame, -1, op_errno, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_fentrylk_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fentrylk,
- volume, fd, basename, cmd, type);
- return 0;
+ volume, fd, basename, cmd, type, xdata);
+ return 0;
}
@@ -1725,8 +1847,7 @@ error_gen_getspec_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, char *spec_data)
{
STACK_UNWIND_STRICT (getspec, frame, op_ret, op_errno, spec_data);
-
- return 0;
+ return 0;
}
@@ -1760,16 +1881,17 @@ error_gen_getspec (call_frame_t *frame, xlator_t *this, const char *key,
int
error_gen_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
{
- STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, entries);
+ STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, entries, xdata);
return 0;
}
int
error_gen_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t off)
+ size_t size, off_t off, dict_t *xdata)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1783,30 +1905,31 @@ error_gen_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (readdir, frame, -1, op_errno, NULL);
+ STACK_UNWIND_STRICT (readdir, frame, -1, op_errno, NULL, xdata);
return 0;
}
STACK_WIND (frame, error_gen_readdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readdir,
- fd, size, off);
+ fd, size, off, xdata);
return 0;
}
int
error_gen_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
{
- STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries);
+ STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata);
return 0;
}
int
error_gen_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t off)
+ off_t off, dict_t *dict)
{
int op_errno = 0;
eg_t *egp = NULL;
@@ -1820,29 +1943,94 @@ error_gen_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (readdirp, frame, -1, op_errno, NULL);
+ STACK_UNWIND_STRICT (readdirp, frame, -1, op_errno, NULL, NULL);
return 0;
}
STACK_WIND (frame, error_gen_readdirp_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readdirp,
- fd, size, off);
+ fd, size, off, dict);
return 0;
}
-
-int
-error_gen_closedir (xlator_t *this, fd_t *fd)
+static void
+error_gen_set_failure (eg_t *pvt, int percent)
{
- return 0;
+ GF_ASSERT (pvt);
+
+ if (percent)
+ pvt->failure_iter_no = 100/percent;
+ else
+ pvt->failure_iter_no = 100/GF_FAILURE_DEFAULT;
}
+static void
+error_gen_parse_fill_fops (eg_t *pvt, char *enable_fops)
+{
+ char *op_no_str = NULL;
+ int op_no = -1;
+ int i = 0;
+ xlator_t *this = THIS;
+ char *saveptr = NULL;
-int
-error_gen_close (xlator_t *this, fd_t *fd)
+ GF_ASSERT (pvt);
+ GF_ASSERT (this);
+
+ for (i = 0; i < GF_FOP_MAXVALUE; i++)
+ pvt->enable[i] = 0;
+
+ if (!enable_fops) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "All fops are enabled.");
+ for (i = 0; i < GF_FOP_MAXVALUE; i++)
+ pvt->enable[i] = 1;
+ } else {
+ op_no_str = strtok_r (enable_fops, ",", &saveptr);
+ while (op_no_str) {
+ op_no = get_fop_int (&op_no_str);
+ if (op_no == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Wrong option value %s", op_no_str);
+ } else
+ pvt->enable[op_no] = 1;
+
+ op_no_str = strtok_r (NULL, ",", &saveptr);
+ }
+ }
+}
+
+int32_t
+error_gen_priv_dump (xlator_t *this)
{
- return 0;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ int ret = -1;
+ eg_t *conf = NULL;
+
+ if (!this)
+ goto out;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ ret = TRY_LOCK(&conf->lock);
+ if (ret != 0) {
+ return ret;
+ }
+
+ gf_proc_dump_add_section("xlator.debug.error-gen.%s.priv", this->name);
+ gf_proc_dump_build_key(key_prefix,"xlator.debug.error-gen","%s.priv",
+ this->name);
+
+ gf_proc_dump_write("op_count", "%d", conf->op_count);
+ gf_proc_dump_write("failure_iter_no", "%d", conf->failure_iter_no);
+ gf_proc_dump_write("error_no", "%s", conf->error_no);
+ gf_proc_dump_write("random_failure", "%d", conf->random_failure);
+
+ UNLOCK(&conf->lock);
+out:
+ return ret;
}
int32_t
@@ -1865,17 +2053,43 @@ mem_acct_init (xlator_t *this)
}
int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ eg_t *pvt = NULL;
+ int32_t ret = 0;
+ char *error_enable_fops = NULL;
+ int32_t failure_percent_int = 0;
+
+ if (!this || !this->private)
+ goto out;
+
+ pvt = this->private;
+
+ GF_OPTION_RECONF ("error-no", pvt->error_no, options, str, out);
+
+ GF_OPTION_RECONF ("failure", failure_percent_int, options, int32,
+ out);
+
+ GF_OPTION_RECONF ("enable", error_enable_fops, options, str, out);
+
+ GF_OPTION_RECONF ("random-failure", pvt->random_failure, options,
+ bool, out);
+
+ error_gen_parse_fill_fops (pvt, error_enable_fops);
+ error_gen_set_failure (pvt, failure_percent_int);
+
+ ret = 0;
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "reconfigure returning %d", ret);
+ return ret;
+}
+
+int
init (xlator_t *this)
{
eg_t *pvt = NULL;
- data_t *error_no = NULL;
- data_t *failure_percent = NULL;
- data_t *enable = NULL;
int32_t ret = 0;
char *error_enable_fops = NULL;
- char *op_no_str = NULL;
- int op_no = -1;
- int i = 0;
int32_t failure_percent_int = 0;
if (!this->children || this->children->next) {
@@ -1890,74 +2104,34 @@ init (xlator_t *this)
"dangling volume. check volfile ");
}
- error_no = dict_get (this->options, "error-no");
- failure_percent = dict_get (this->options, "failure");
- enable = dict_get (this->options, "enable");
-
pvt = GF_CALLOC (1, sizeof (eg_t), gf_error_gen_mt_eg_t);
if (!pvt) {
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory.");
ret = -1;
goto out;
}
LOCK_INIT (&pvt->lock);
- for (i = 0; i < GF_FOP_MAXVALUE; i++)
- pvt->enable[i] = 0;
- if (!error_no) {
- gf_log (this->name, GF_LOG_DEBUG,
- "error-no not specified.");
- } else {
- pvt->error_no = data_to_str (error_no);
- }
+ GF_OPTION_INIT ("error-no", pvt->error_no, str, out);
- if (!failure_percent) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failure percent not specified.");
- pvt->failure_iter_no = 100/GF_FAILURE_DEFAULT;
- } else {
- failure_percent_int = data_to_int32 (failure_percent);
- if (failure_percent_int)
- pvt->failure_iter_no = 100/failure_percent_int;
- else
- pvt->failure_iter_no = 100/GF_FAILURE_DEFAULT;
- }
+ GF_OPTION_INIT ("failure", failure_percent_int, int32, out);
+
+ GF_OPTION_INIT ("enable", error_enable_fops, str, out);
+
+ GF_OPTION_INIT ("random-failure", pvt->random_failure, bool, out);
+
+
+ error_gen_parse_fill_fops (pvt, error_enable_fops);
+ error_gen_set_failure (pvt, failure_percent_int);
- if (!enable) {
- gf_log (this->name, GF_LOG_WARNING,
- "All fops are enabled.");
- for (i = 0; i < GF_FOP_MAXVALUE; i++)
- pvt->enable[i] = 1;
- } else {
- error_enable_fops = data_to_str (enable);
- op_no_str = error_enable_fops;
- while ((*error_enable_fops) != '\0') {
- error_enable_fops++;
- if (((*error_enable_fops) == ',') ||
- ((*error_enable_fops) == '\0')) {
- if ((*error_enable_fops) != '\0') {
- (*error_enable_fops) = '\0';
- error_enable_fops++;
- }
- op_no = get_fop_int (&op_no_str);
- if (op_no == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "Wrong option value %s",
- op_no_str);
- } else
- pvt->enable[op_no] = 1;
- op_no_str = error_enable_fops;
- }
- }
- }
this->private = pvt;
/* Give some seed value here */
srand (time(NULL));
out:
+ if (ret)
+ GF_FREE (pvt);
return ret;
}
@@ -1979,6 +2153,12 @@ fini (xlator_t *this)
return;
}
+struct xlator_dumpops dumpops = {
+ .priv = error_gen_priv_dump,
+};
+
+struct xlator_fops cbks;
+
struct xlator_fops fops = {
.lookup = error_gen_lookup,
.stat = error_gen_stat,
@@ -2001,6 +2181,9 @@ struct xlator_fops fops = {
.setxattr = error_gen_setxattr,
.getxattr = error_gen_getxattr,
.removexattr = error_gen_removexattr,
+ .fsetxattr = error_gen_fsetxattr,
+ .fgetxattr = error_gen_fgetxattr,
+ .fremovexattr = error_gen_fremovexattr,
.opendir = error_gen_opendir,
.readdir = error_gen_readdir,
.readdirp = error_gen_readdirp,
@@ -2021,22 +2204,29 @@ struct xlator_fops fops = {
.getspec = error_gen_getspec,
};
-struct xlator_cbks cbks = {
- .release = error_gen_close,
- .releasedir = error_gen_closedir,
-};
-
struct volume_options options[] = {
{ .key = {"failure"},
- .type = GF_OPTION_TYPE_INT },
+ .type = GF_OPTION_TYPE_INT,
+ .description = "Percentage failure of operations when enabled.",
+ },
+
{ .key = {"error-no"},
.value = {"ENOENT","ENOTDIR","ENAMETOOLONG","EACCES","EBADF",
"EFAULT","ENOMEM","EINVAL","EIO","EEXIST","ENOSPC",
"EPERM","EROFS","EBUSY","EISDIR","ENOTEMPTY","EMLINK"
"ENODEV","EXDEV","EMFILE","ENFILE","ENOSYS","EINTR",
- "EFBIG","EAGAIN"},
- .type = GF_OPTION_TYPE_STR },
+ "EFBIG","EAGAIN","GF_ERROR_SHORT_WRITE"},
+ .type = GF_OPTION_TYPE_STR,
+ },
+
+ { .key = {"random-failure"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ },
+
{ .key = {"enable"},
- .type = GF_OPTION_TYPE_STR },
+ .type = GF_OPTION_TYPE_STR,
+ },
+
{ .key = {NULL} }
};
diff --git a/xlators/debug/error-gen/src/error-gen.h b/xlators/debug/error-gen/src/error-gen.h
index bd92aad6a..d92c23062 100644
--- a/xlators/debug/error-gen/src/error-gen.h
+++ b/xlators/debug/error-gen/src/error-gen.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _ERROR_GEN_H
#define _ERROR_GEN_H
@@ -29,11 +19,24 @@
#define GF_FAILURE_DEFAULT 10
+/*
+ * Pseudo-errors refer to errors beyond the scope of traditional <-1, op_errno>
+ * returns. This facilitates the ability to return unexpected, but not -1 values
+ * and/or to inject operations that lead to implicit error conditions. The range
+ * for pseudo errors resides at a high value to avoid conflicts with the errno
+ * range.
+ */
+enum GF_PSEUDO_ERRORS {
+ GF_ERROR_SHORT_WRITE = 1000, /* short writev return value */
+ GF_ERROR_MAX
+};
+
typedef struct {
int enable[GF_FOP_MAXVALUE];
int op_count;
int failure_iter_no;
char *error_no;
+ gf_boolean_t random_failure;
gf_lock_t lock;
} eg_t;
diff --git a/xlators/debug/io-stats/src/Makefile.am b/xlators/debug/io-stats/src/Makefile.am
index b894e79c3..dff294cd8 100644
--- a/xlators/debug/io-stats/src/Makefile.am
+++ b/xlators/debug/io-stats/src/Makefile.am
@@ -2,14 +2,17 @@
xlator_LTLIBRARIES = io-stats.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/debug
-io_stats_la_LDFLAGS = -module -avoidversion
+io_stats_la_LDFLAGS = -module -avoid-version
io_stats_la_SOURCES = io-stats.c
io_stats_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = io-stats-mem-types.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/debug/io-stats/src/io-stats-mem-types.h b/xlators/debug/io-stats/src/io-stats-mem-types.h
index 2063f6d6a..c30dfb17e 100644
--- a/xlators/debug/io-stats/src/io-stats-mem-types.h
+++ b/xlators/debug/io-stats/src/io-stats-mem-types.h
@@ -1,24 +1,13 @@
-
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __IO_STATS_MEM_TYPES_H__
#define __IO_STATS_MEM_TYPES_H__
diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c
index 2786109b9..9e48a7c6e 100644
--- a/xlators/debug/io-stats/src/io-stats.c
+++ b/xlators/debug/io-stats/src/io-stats.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -46,6 +36,7 @@
#include <stdarg.h>
#include "defaults.h"
#include "logging.h"
+#include "cli1-xdr.h"
#define MAX_LIST_MEMBERS 100
@@ -58,7 +49,7 @@ typedef enum {
IOS_STATS_TYPE_READDIRP,
IOS_STATS_TYPE_READ_THROUGHPUT,
IOS_STATS_TYPE_WRITE_THROUGHPUT,
- IOS_STATS_TYPE_MAX,
+ IOS_STATS_TYPE_MAX
}ios_stats_type_t;
typedef enum {
@@ -293,43 +284,43 @@ is_fop_latency_started (call_frame_t *frame)
} \
UNLOCK (&iosstat->lock); \
ios_stat_add_to_list (&conf->list[type], \
- value, iosstat); \
+ value, iosstat); \
\
} while (0)
#define BUMP_THROUGHPUT(iosstat, type) \
do { \
- struct ios_conf *conf = NULL; \
- double elapsed; \
- struct timeval *begin, *end; \
- double throughput; \
+ struct ios_conf *conf = NULL; \
+ double elapsed; \
+ struct timeval *begin, *end; \
+ double throughput; \
int flag = 0; \
- \
- begin = &frame->begin; \
- end = &frame->end; \
- \
- elapsed = (end->tv_sec - begin->tv_sec) * 1e6 \
- + (end->tv_usec - begin->tv_usec); \
- throughput = op_ret / elapsed; \
- \
- conf = this->private; \
- LOCK(&iosstat->lock); \
- { \
- if (iosstat->thru_counters[type].throughput \
+ \
+ begin = &frame->begin; \
+ end = &frame->end; \
+ \
+ elapsed = (end->tv_sec - begin->tv_sec) * 1e6 \
+ + (end->tv_usec - begin->tv_usec); \
+ throughput = op_ret / elapsed; \
+ \
+ conf = this->private; \
+ LOCK(&iosstat->lock); \
+ { \
+ if (iosstat->thru_counters[type].throughput \
<= throughput) { \
- iosstat->thru_counters[type].throughput = \
+ iosstat->thru_counters[type].throughput = \
throughput; \
- gettimeofday (&iosstat-> \
+ gettimeofday (&iosstat-> \
thru_counters[type].time, NULL); \
flag = 1; \
} \
} \
- UNLOCK (&iosstat->lock); \
+ UNLOCK (&iosstat->lock); \
if (flag) \
ios_stat_add_to_list (&conf->thru_list[type], \
throughput, iosstat); \
- } while (0)
+ } while (0)
int
ios_fd_ctx_get (fd_t *fd, xlator_t *this, struct ios_fd **iosfd)
@@ -484,12 +475,12 @@ ios_stat_add_to_list (struct ios_stat_head *list_head, uint64_t value,
new = GF_CALLOC (1, sizeof (*new),
gf_io_stats_mt_ios_stat_list);
new->iosstat = iosstat;
- new->value = value;
+ new->value = value;
ios_stat_ref (iosstat);
- list_add_tail (&new->list, &tmp->list);
+ list_add_tail (&new->list, &tmp->list);
stat = last->iosstat;
last->iosstat = NULL;
- ios_stat_unref (stat);
+ ios_stat_unref (stat);
list_del (&last->list);
GF_FREE (last);
if (reposition == MAX_LIST_MEMBERS)
@@ -511,7 +502,7 @@ ios_stat_add_to_list (struct ios_stat_head *list_head, uint64_t value,
list_head->members++;
if (list_head->min_cnt > value)
list_head->min_cnt = value;
- }
+ }
}
out:
UNLOCK (&list_head->lock);
@@ -568,19 +559,16 @@ ios_dump_throughput_stats (struct ios_stat_head *list_head, xlator_t *this,
FILE* logfp, ios_stats_type_t type)
{
struct ios_stat_list *entry = NULL;
- struct timeval time = {0, };
- struct tm *tm = NULL;
+ struct timeval time = {0, };
char timestr[256] = {0, };
LOCK (&list_head->lock);
{
list_for_each_entry (entry, &list_head->iosstats->list, list) {
- time = entry->iosstat->thru_counters[type].time;
- tm = localtime (&time.tv_sec);
- if (!tm)
- continue;
- strftime (timestr, 256, "%Y-%m-%d %H:%M:%S", tm);
- snprintf (timestr + strlen (timestr), 256 - strlen (timestr),
+ gf_time_fmt (timestr, sizeof timestr,
+ entry->iosstat->thru_counters[type].time.tv_sec,
+ gf_timefmt_FT);
+ snprintf (timestr + strlen (timestr), sizeof timestr - strlen (timestr),
".%"GF_PRI_SUSECONDS, time.tv_usec);
ios_log (this, logfp, "%s \t %-10.2f \t %s",
@@ -600,7 +588,6 @@ io_stats_dump_global_to_logfp (xlator_t *this, struct ios_global_stats *stats,
int index = 0;
struct ios_stat_head *list_head = NULL;
struct ios_conf *conf = NULL;
- struct tm *tm = NULL;
char timestr[256] = {0, };
char str_header[128] = {0};
char str_read[128] = {0};
@@ -694,9 +681,10 @@ io_stats_dump_global_to_logfp (xlator_t *this, struct ios_global_stats *stats,
if (interval == -1) {
LOCK (&conf->lock);
{
- tm = localtime (&conf->cumulative.max_openfd_time.tv_sec);
- strftime (timestr, 256, "%Y-%m-%d %H:%M:%S", tm);
- snprintf (timestr + strlen (timestr), 256 - strlen (timestr),
+ gf_time_fmt (timestr, sizeof timestr,
+ conf->cumulative.max_openfd_time.tv_sec,
+ gf_timefmt_FT);
+ snprintf (timestr + strlen (timestr), sizeof timestr - strlen (timestr),
".%"GF_PRI_SUSECONDS,
conf->cumulative.max_openfd_time.tv_usec);
ios_log (this, logfp, "Current open fd's: %"PRId64
@@ -735,13 +723,13 @@ io_stats_dump_global_to_logfp (xlator_t *this, struct ios_global_stats *stats,
ios_log (this, logfp, "\nTIMESTAMP \t\t\t THROUGHPUT(KBPS)"
"\tFILE NAME");
list_head = &conf->thru_list[IOS_STATS_THRU_READ];
- ios_dump_throughput_stats(list_head, this, logfp, IOS_STATS_THRU_READ);
+ ios_dump_throughput_stats(list_head, this, logfp, IOS_STATS_THRU_READ);
ios_log (this, logfp, "\n======Write Throughput File Stats======");
ios_log (this, logfp, "\nTIMESTAMP \t\t\t THROUGHPUT(KBPS)"
"\tFILE NAME");
list_head = &conf->thru_list[IOS_STATS_THRU_WRITE];
- ios_dump_throughput_stats (list_head, this, logfp, IOS_STATS_THRU_WRITE);
+ ios_dump_throughput_stats (list_head, this, logfp, IOS_STATS_THRU_WRITE);
}
return 0;
}
@@ -929,8 +917,19 @@ ios_dump_args_init (struct ios_dump_args *args, ios_dump_type_t type,
return ret;
}
+static void
+ios_global_stats_clear (struct ios_global_stats *stats, struct timeval *now)
+{
+ GF_ASSERT (stats);
+ GF_ASSERT (now);
+
+ memset (stats, 0, sizeof (*stats));
+ stats->started_at = *now;
+}
+
int
-io_stats_dump (xlator_t *this, struct ios_dump_args *args)
+io_stats_dump (xlator_t *this, struct ios_dump_args *args,
+ gf1_cli_info_op op, gf_boolean_t is_peek)
{
struct ios_conf *conf = NULL;
struct ios_global_stats cumulative = {0, };
@@ -948,18 +947,32 @@ io_stats_dump (xlator_t *this, struct ios_dump_args *args)
gettimeofday (&now, NULL);
LOCK (&conf->lock);
{
- cumulative = conf->cumulative;
- incremental = conf->incremental;
+ if (op == GF_CLI_INFO_ALL ||
+ op == GF_CLI_INFO_CUMULATIVE)
+ cumulative = conf->cumulative;
- increment = conf->increment++;
+ if (op == GF_CLI_INFO_ALL ||
+ op == GF_CLI_INFO_INCREMENTAL) {
+ incremental = conf->incremental;
+ increment = conf->increment;
- memset (&conf->incremental, 0, sizeof (conf->incremental));
- conf->incremental.started_at = now;
+ if (!is_peek) {
+ increment = conf->increment++;
+
+ ios_global_stats_clear (&conf->incremental,
+ &now);
+ }
+ }
}
UNLOCK (&conf->lock);
- io_stats_dump_global (this, &cumulative, &now, -1, args);
- io_stats_dump_global (this, &incremental, &now, increment, args);
+ if (op == GF_CLI_INFO_ALL ||
+ op == GF_CLI_INFO_CUMULATIVE)
+ io_stats_dump_global (this, &cumulative, &now, -1, args);
+
+ if (op == GF_CLI_INFO_ALL ||
+ op == GF_CLI_INFO_INCREMENTAL)
+ io_stats_dump_global (this, &incremental, &now, increment, args);
return 0;
}
@@ -1080,36 +1093,45 @@ io_stats_dump_stats_to_dict (xlator_t *this, dict_t *resp,
struct ios_stat_list *entry = NULL;
int ret = -1;
ios_stats_thru_t index = IOS_STATS_THRU_MAX;
- struct tm *tm = NULL;
char timestr[256] = {0, };
+ char *dict_timestr = NULL;
conf = this->private;
switch (flags) {
- case IOS_STATS_TYPE_OPEN:
+ case IOS_STATS_TYPE_OPEN:
list_head = &conf->list[IOS_STATS_TYPE_OPEN];
LOCK (&conf->lock);
{
ret = dict_set_uint64 (resp, "current-open",
conf->cumulative.nr_opens);
if (ret)
- goto out;
+ goto unlock;
ret = dict_set_uint64 (resp, "max-open",
conf->cumulative.max_nr_opens);
- tm = localtime (&conf->cumulative.max_openfd_time.tv_sec);
- strftime (timestr, 256, "%Y-%m-%d %H:%M:%S", tm);
- snprintf (timestr + strlen (timestr), 256 - strlen (timestr),
- ".%"GF_PRI_SUSECONDS,
- conf->cumulative.max_openfd_time.tv_usec);
-
- ret = dict_set_str (resp, "max-openfd-time",
- timestr);
+ gf_time_fmt (timestr, sizeof timestr,
+ conf->cumulative.max_openfd_time.tv_sec,
+ gf_timefmt_FT);
+ if (conf->cumulative.max_openfd_time.tv_sec)
+ snprintf (timestr + strlen (timestr), sizeof timestr - strlen (timestr),
+ ".%"GF_PRI_SUSECONDS,
+ conf->cumulative.max_openfd_time.tv_usec);
+
+ dict_timestr = gf_strdup (timestr);
+ if (!dict_timestr)
+ goto unlock;
+ ret = dict_set_dynstr (resp, "max-openfd-time",
+ dict_timestr);
if (ret)
- goto out;
+ goto unlock;
}
+ unlock:
UNLOCK (&conf->lock);
-
+ /* Do not proceed if we came here because of some error
+ * during the dict operation */
+ if (ret)
+ goto out;
break;
case IOS_STATS_TYPE_READ:
list_head = &conf->list[IOS_STATS_TYPE_READ];
@@ -1125,7 +1147,7 @@ io_stats_dump_stats_to_dict (xlator_t *this, dict_t *resp,
break;
case IOS_STATS_TYPE_READ_THROUGHPUT:
list_head = &conf->thru_list[IOS_STATS_THRU_READ];
- index = IOS_STATS_THRU_READ;
+ index = IOS_STATS_THRU_READ;
break;
case IOS_STATS_TYPE_WRITE_THROUGHPUT:
list_head = &conf->thru_list[IOS_STATS_THRU_WRITE];
@@ -1146,39 +1168,44 @@ io_stats_dump_stats_to_dict (xlator_t *this, dict_t *resp,
snprintf (key, 256, "%s-%d", "filename", cnt);
ret = dict_set_str (resp, key, entry->iosstat->filename);
if (ret)
- goto out;
+ goto unlock_list_head;
snprintf (key, 256, "%s-%d", "value",cnt);
ret = dict_set_uint64 (resp, key, entry->value);
if (ret)
- goto out;
+ goto unlock_list_head;
if (index != IOS_STATS_THRU_MAX) {
snprintf (key, 256, "%s-%d", "time-sec", cnt);
- ret = dict_set_int32 (resp, key,
+ ret = dict_set_int32 (resp, key,
entry->iosstat->thru_counters[index].time.tv_sec);
if (ret)
- goto out;
+ goto unlock_list_head;
snprintf (key, 256, "%s-%d", "time-usec", cnt);
- ret = dict_set_int32 (resp, key,
+ ret = dict_set_int32 (resp, key,
entry->iosstat->thru_counters[index].time.tv_usec);
if (ret)
- goto out;
+ goto unlock_list_head;
}
if (cnt == list_cnt)
break;
}
}
+unlock_list_head:
UNLOCK (&list_head->lock);
-
+ /* ret is !=0 if some dict operation in the above critical region
+ * failed. */
+ if (ret)
+ goto out;
ret = dict_set_int32 (resp, "members", cnt);
out:
return ret;
}
+
int
io_stats_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, fd_t *fd,
inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
struct ios_fd *iosfd = NULL;
char *path = NULL;
@@ -1231,21 +1258,21 @@ io_stats_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
unwind:
UPDATE_PROFILE_STATS (frame, CREATE);
STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent);
+ preparent, postparent, xdata);
return 0;
}
int
io_stats_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
struct ios_fd *iosfd = NULL;
char *path = NULL;
struct ios_stat *iosstat = NULL;
struct ios_conf *conf = NULL;
- conf = this->private;
+ conf = this->private;
path = frame->local;
frame->local = NULL;
@@ -1269,6 +1296,16 @@ io_stats_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
ios_fd_ctx_set (fd, this, iosfd);
ios_inode_ctx_get (fd->inode, this, &iosstat);
+ if (!iosstat) {
+ iosstat = GF_CALLOC (1, sizeof (*iosstat),
+ gf_io_stats_mt_ios_stat);
+ if (iosstat) {
+ iosstat->filename = gf_strdup (path);
+ uuid_copy (iosstat->gfid, fd->inode->gfid);
+ LOCK_INIT (&iosstat->lock);
+ ios_inode_ctx_set (fd->inode, this, iosstat);
+ }
+ }
LOCK (&conf->lock);
{
@@ -1286,7 +1323,7 @@ io_stats_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
unwind:
UPDATE_PROFILE_STATS (frame, OPEN);
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd);
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
return 0;
}
@@ -1294,10 +1331,10 @@ unwind:
int
io_stats_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, STAT);
- STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf);
+ STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf, xdata);
return 0;
}
@@ -1306,7 +1343,7 @@ int
io_stats_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
struct iovec *vector, int32_t count,
- struct iatt *buf, struct iobref *iobref)
+ struct iatt *buf, struct iobref *iobref, dict_t *xdata)
{
int len = 0;
fd_t *fd = NULL;
@@ -1330,7 +1367,7 @@ io_stats_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno,
- vector, count, buf, iobref);
+ vector, count, buf, iobref, xdata);
return 0;
}
@@ -1339,7 +1376,7 @@ io_stats_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
io_stats_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
struct ios_stat *iosstat = NULL;
inode_t *inode = NULL;
@@ -1351,13 +1388,13 @@ io_stats_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
ios_inode_ctx_get (inode, this, &iosstat);
if (iosstat) {
BUMP_STATS (iosstat, IOS_STATS_TYPE_WRITE);
- BUMP_THROUGHPUT (iosstat, IOS_STATS_THRU_WRITE);
+ BUMP_THROUGHPUT (iosstat, IOS_STATS_THRU_WRITE);
inode = NULL;
iosstat = NULL;
}
}
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf);
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf, xdata);
return 0;
}
@@ -1367,7 +1404,7 @@ io_stats_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
io_stats_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *buf)
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *buf, dict_t *xdata)
{
struct ios_stat *iosstat = NULL;
inode_t *inode = frame->local;
@@ -1383,17 +1420,17 @@ io_stats_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
iosstat = NULL;
}
- STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, buf);
+ STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, buf, xdata);
return 0;
}
int
io_stats_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *buf)
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *buf, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, READDIR);
- STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, buf);
+ STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, buf, xdata);
return 0;
}
@@ -1401,10 +1438,10 @@ io_stats_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
io_stats_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, FSYNC);
- STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf);
+ STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata);
return 0;
}
@@ -1412,10 +1449,10 @@ io_stats_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
io_stats_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, SETATTR);
- STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, preop, postop);
+ STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, preop, postop, xdata);
return 0;
}
@@ -1423,11 +1460,11 @@ io_stats_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
io_stats_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, UNLINK);
STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno,
- preparent, postparent);
+ preparent, postparent, xdata);
return 0;
}
@@ -1437,12 +1474,12 @@ int
io_stats_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
+ struct iatt *prenewparent, struct iatt *postnewparent, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, RENAME);
STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, buf,
preoldparent, postoldparent,
- prenewparent, postnewparent);
+ prenewparent, postnewparent, xdata);
return 0;
}
@@ -1450,10 +1487,10 @@ io_stats_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
io_stats_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, const char *buf,
- struct iatt *sbuf)
+ struct iatt *sbuf, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, READLINK);
- STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, buf, sbuf);
+ STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, buf, sbuf, xdata);
return 0;
}
@@ -1462,10 +1499,10 @@ int
io_stats_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf,
- dict_t *xattr, struct iatt *postparent)
+ dict_t *xdata, struct iatt *postparent)
{
UPDATE_PROFILE_STATS (frame, LOOKUP);
- STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf, xattr,
+ STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf, xdata,
postparent);
return 0;
}
@@ -1475,11 +1512,11 @@ int
io_stats_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, SYMLINK);
STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
+ preparent, postparent, xdata);
return 0;
}
@@ -1488,11 +1525,11 @@ int
io_stats_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, MKNOD);
STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
+ preparent, postparent, xdata);
return 0;
}
@@ -1501,7 +1538,8 @@ int
io_stats_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
struct ios_stat *iosstat = NULL;
char *path = frame->local;
@@ -1519,8 +1557,11 @@ io_stats_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
unwind:
+ /* local is assigned with path */
+ GF_FREE (frame->local);
+ frame->local = NULL;
STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
+ preparent, postparent, xdata);
return 0;
}
@@ -1529,28 +1570,28 @@ int
io_stats_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, LINK);
STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
+ preparent, postparent, xdata);
return 0;
}
int
io_stats_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, FLUSH);
- STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, xdata);
return 0;
}
int
io_stats_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
struct ios_stat *iosstat = NULL;
int ret = -1;
@@ -1566,7 +1607,7 @@ io_stats_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
BUMP_STATS (iosstat, IOS_STATS_TYPE_OPENDIR);
unwind:
- STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd);
+ STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, xdata);
return 0;
}
@@ -1574,13 +1615,13 @@ unwind:
int
io_stats_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, RMDIR);
STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno,
- preparent, postparent);
+ preparent, postparent, xdata);
return 0;
}
@@ -1588,71 +1629,100 @@ io_stats_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
io_stats_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, TRUNCATE);
STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno,
- prebuf, postbuf);
+ prebuf, postbuf, xdata);
return 0;
}
int
io_stats_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *buf)
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, STATFS);
- STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf);
+ STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf, xdata);
return 0;
}
int
io_stats_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, SETXATTR);
- STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata);
return 0;
}
int
io_stats_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, GETXATTR);
- STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict);
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata);
return 0;
}
int
io_stats_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, REMOVEXATTR);
- STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+io_stats_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS (frame, FSETXATTR);
+ STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+
+int
+io_stats_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS (frame, FGETXATTR);
+ STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+
+int
+io_stats_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS (frame, FREMOVEXATTR);
+ STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, xdata);
return 0;
}
int
io_stats_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, FSYNCDIR);
- STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno, xdata);
return 0;
}
int
io_stats_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, ACCESS);
- STACK_UNWIND_STRICT (access, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (access, frame, op_ret, op_errno, xdata);
return 0;
}
@@ -1660,92 +1730,126 @@ io_stats_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
io_stats_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, FTRUNCATE);
STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno,
- prebuf, postbuf);
+ prebuf, postbuf, xdata);
return 0;
}
int
io_stats_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, FSTAT);
- STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf);
+ STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf, xdata);
return 0;
}
int
+io_stats_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS(frame, FALLOCATE);
+ STACK_UNWIND_STRICT(fallocate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+
+int
+io_stats_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS(frame, DISCARD);
+ STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+int
+io_stats_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS(frame, ZEROFILL);
+ STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+int
io_stats_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock)
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, LK);
- STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock);
+ STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock, xdata);
return 0;
}
int
io_stats_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, ENTRYLK);
- STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno, xdata);
return 0;
}
int
io_stats_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, XATTROP);
- STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, dict);
+ STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, dict, xdata);
return 0;
}
int
io_stats_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, FXATTROP);
- STACK_UNWIND_STRICT (fxattrop, frame, op_ret, op_errno, dict);
+ STACK_UNWIND_STRICT (fxattrop, frame, op_ret, op_errno, dict, xdata);
return 0;
}
int
io_stats_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, INODELK);
- STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno, xdata);
return 0;
}
int
io_stats_entrylk (call_frame_t *frame, xlator_t *this,
const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_entrylk_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->entrylk,
- volume, loc, basename, cmd, type);
+ volume, loc, basename, cmd, type, xdata);
return 0;
}
int
io_stats_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *flock)
+ const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
START_FOP_LATENCY (frame);
@@ -1753,128 +1857,122 @@ io_stats_inodelk (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, io_stats_inodelk_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->inodelk,
- volume, loc, cmd, flock);
+ volume, loc, cmd, flock, xdata);
return 0;
}
int
io_stats_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
UPDATE_PROFILE_STATS (frame, FINODELK);
- STACK_UNWIND_STRICT (finodelk, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (finodelk, frame, op_ret, op_errno, xdata);
return 0;
}
int
-io_stats_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *flock)
+io_stats_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_finodelk_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->finodelk,
- volume, fd, cmd, flock);
+ volume, fd, cmd, flock, xdata);
return 0;
}
int
-io_stats_xattrop (call_frame_t *frame, xlator_t *this,
- loc_t *loc, gf_xattrop_flags_t flags, dict_t *dict)
+io_stats_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_xattrop_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->xattrop,
- loc, flags, dict);
-
+ loc, flags, dict, xdata);
return 0;
}
int
-io_stats_fxattrop (call_frame_t *frame, xlator_t *this,
- fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict)
+io_stats_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_fxattrop_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fxattrop,
- fd, flags, dict);
-
+ fd, flags, dict, xdata);
return 0;
}
int
io_stats_lookup (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr_req)
+ loc_t *loc, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_lookup_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lookup,
- loc, xattr_req);
-
+ loc, xdata);
return 0;
}
int
-io_stats_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
+io_stats_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_stat_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->stat,
- loc);
-
+ loc, xdata);
return 0;
}
int
io_stats_readlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, size_t size)
+ loc_t *loc, size_t size, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_readlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readlink,
- loc, size);
-
+ loc, size, xdata);
return 0;
}
int
-io_stats_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t dev, dict_t *params)
+io_stats_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, dev_t dev, mode_t umask, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_mknod_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mknod,
- loc, mode, dev, params);
-
+ loc, mode, dev, umask, xdata);
return 0;
}
int
io_stats_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dict_t *params)
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata)
{
frame->local = gf_strdup (loc->path);
@@ -1883,117 +1981,112 @@ io_stats_mkdir (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, io_stats_mkdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mkdir,
- loc, mode, params);
+ loc, mode, umask, xdata);
return 0;
}
int
io_stats_unlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc)
+ loc_t *loc, int xflag, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_unlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->unlink,
- loc);
+ loc, xflag, xdata);
return 0;
}
int
io_stats_rmdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int flags)
+ loc_t *loc, int flags, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_rmdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rmdir,
- loc, flags);
-
+ loc, flags, xdata);
return 0;
}
int
-io_stats_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *loc, dict_t *params)
+io_stats_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_symlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->symlink,
- linkpath, loc, params);
-
+ linkpath, loc, umask, xdata);
return 0;
}
int
io_stats_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_rename_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rename,
- oldloc, newloc);
-
+ oldloc, newloc, xdata);
return 0;
}
int
io_stats_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_link_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->link,
- oldloc, newloc);
+ oldloc, newloc, xdata);
return 0;
}
int
io_stats_setattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, struct iatt *stbuf, int32_t valid)
+ loc_t *loc, struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_setattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->setattr,
- loc, stbuf, valid);
-
+ loc, stbuf, valid, xdata);
return 0;
}
int
io_stats_truncate (call_frame_t *frame, xlator_t *this,
- loc_t *loc, off_t offset)
+ loc_t *loc, off_t offset, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_truncate_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->truncate,
- loc, offset);
-
+ loc, offset, xdata);
return 0;
}
int
-io_stats_open (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, fd_t *fd, int32_t wbflags)
+io_stats_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata)
{
frame->local = gf_strdup (loc->path);
@@ -2002,7 +2095,7 @@ io_stats_open (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, io_stats_open_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->open,
- loc, flags, fd, wbflags);
+ loc, flags, fd, xdata);
return 0;
}
@@ -2010,7 +2103,7 @@ io_stats_open (call_frame_t *frame, xlator_t *this,
int
io_stats_create (call_frame_t *frame, xlator_t *this,
loc_t *loc, int32_t flags, mode_t mode,
- fd_t *fd, dict_t *params)
+ mode_t umask, fd_t *fd, dict_t *xdata)
{
frame->local = gf_strdup (loc->path);
@@ -2019,14 +2112,14 @@ io_stats_create (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, io_stats_create_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->create,
- loc, flags, mode, fd, params);
+ loc, flags, mode, umask, fd, xdata);
return 0;
}
int
io_stats_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset)
+ fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata)
{
frame->local = fd;
@@ -2035,7 +2128,7 @@ io_stats_readv (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, io_stats_readv_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readv,
- fd, size, offset);
+ fd, size, offset, flags, xdata);
return 0;
}
@@ -2044,7 +2137,7 @@ int
io_stats_writev (call_frame_t *frame, xlator_t *this,
fd_t *fd, struct iovec *vector,
int32_t count, off_t offset,
- struct iobref *iobref)
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
int len = 0;
@@ -2052,14 +2145,13 @@ io_stats_writev (call_frame_t *frame, xlator_t *this,
frame->local = fd->inode;
len = iov_length (vector, count);
-
BUMP_WRITE (fd, len);
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_writev_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->writev,
- fd, vector, count, offset, iobref);
+ fd, vector, count, offset, flags, iobref, xdata);
return 0;
}
@@ -2067,47 +2159,47 @@ io_stats_writev (call_frame_t *frame, xlator_t *this,
int
io_stats_statfs (call_frame_t *frame, xlator_t *this,
- loc_t *loc)
+ loc_t *loc, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_statfs_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->statfs,
- loc);
+ loc, xdata);
return 0;
}
int
io_stats_flush (call_frame_t *frame, xlator_t *this,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_flush_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->flush,
- fd);
+ fd, xdata);
return 0;
}
int
io_stats_fsync (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t flags)
+ fd_t *fd, int32_t flags, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_fsync_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsync,
- fd, flags);
+ fd, flags, xdata);
return 0;
}
-void
+int
conditional_dump (dict_t *dict, char *key, data_t *value, void *data)
{
struct {
@@ -2131,27 +2223,27 @@ conditional_dump (dict_t *dict, char *key, data_t *value, void *data)
if (!strncmp (filename, "", 1)) {
gf_log (this->name, GF_LOG_ERROR, "No filename given");
- return;
+ return -1;
}
logfp = fopen (filename, "w+");
- GF_ASSERT (logfp);
if (!logfp) {
gf_log (this->name, GF_LOG_ERROR, "failed to open %s "
"for writing", filename);
- return;
- }
+ return -1;
+ }
(void) ios_dump_args_init (&args, IOS_DUMP_TYPE_FILE,
logfp);
- io_stats_dump (this, &args);
+ io_stats_dump (this, &args, GF_CLI_INFO_ALL, _gf_false);
fclose (logfp);
}
+ return 0;
}
int
io_stats_setxattr (call_frame_t *frame, xlator_t *this,
loc_t *loc, dict_t *dict,
- int32_t flags)
+ int32_t flags, dict_t *xdata)
{
struct {
xlator_t *this;
@@ -2170,43 +2262,85 @@ io_stats_setxattr (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, io_stats_setxattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->setxattr,
- loc, dict, flags);
+ loc, dict, flags, xdata);
return 0;
}
int
io_stats_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name)
+ loc_t *loc, const char *name, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_getxattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->getxattr,
- loc, name);
+ loc, name, xdata);
return 0;
}
int
io_stats_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name)
+ loc_t *loc, const char *name, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_removexattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->removexattr,
- loc, name);
+ loc, name, xdata);
+ return 0;
+}
+
+
+int
+io_stats_fsetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ START_FOP_LATENCY (frame);
+
+ STACK_WIND (frame, io_stats_fsetxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ fd, dict, flags, xdata);
+ return 0;
+}
+
+
+int
+io_stats_fgetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
+{
+ START_FOP_LATENCY (frame);
+
+ STACK_WIND (frame, io_stats_fgetxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ fd, name, xdata);
+ return 0;
+}
+
+int
+io_stats_fremovexattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
+{
+ START_FOP_LATENCY (frame);
+
+ STACK_WIND (frame, io_stats_fremovexattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr,
+ fd, name, xdata);
return 0;
}
int
io_stats_opendir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, fd_t *fd)
+ loc_t *loc, fd_t *fd, dict_t *xdata)
{
START_FOP_LATENCY (frame);
@@ -2214,13 +2348,13 @@ io_stats_opendir (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, io_stats_opendir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->opendir,
- loc, fd);
+ loc, fd, xdata);
return 0;
}
int
io_stats_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
+ off_t offset, dict_t *dict)
{
frame->local = fd->inode;
START_FOP_LATENCY (frame);
@@ -2228,108 +2362,144 @@ io_stats_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
STACK_WIND (frame, io_stats_readdirp_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readdirp,
- fd, size, offset);
-
+ fd, size, offset, dict);
return 0;
}
int
io_stats_readdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset)
+ fd_t *fd, size_t size, off_t offset, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_readdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readdir,
- fd, size, offset);
-
+ fd, size, offset, xdata);
return 0;
}
int
io_stats_fsyncdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t datasync)
+ fd_t *fd, int32_t datasync, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_fsyncdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsyncdir,
- fd, datasync);
+ fd, datasync, xdata);
return 0;
}
int
io_stats_access (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t mask)
+ loc_t *loc, int32_t mask, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_access_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->access,
- loc, mask);
+ loc, mask, xdata);
return 0;
}
int
io_stats_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset)
+ fd_t *fd, off_t offset, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_ftruncate_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->ftruncate,
- fd, offset);
-
+ fd, offset, xdata);
return 0;
}
int
io_stats_fsetattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, struct iatt *stbuf, int32_t valid)
+ fd_t *fd, struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_setattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsetattr,
- fd, stbuf, valid);
+ fd, stbuf, valid, xdata);
return 0;
}
int
io_stats_fstat (call_frame_t *frame, xlator_t *this,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_fstat_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fstat,
- fd);
+ fd, xdata);
+ return 0;
+}
+
+
+int
+io_stats_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ START_FOP_LATENCY(frame);
+
+ STACK_WIND(frame, io_stats_fallocate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len,
+ xdata);
+
+ return 0;
+}
+
+
+int
+io_stats_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ START_FOP_LATENCY(frame);
+
+ STACK_WIND(frame, io_stats_discard_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata);
+
+ return 0;
+}
+
+int
+io_stats_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ START_FOP_LATENCY(frame);
+
+ STACK_WIND(frame, io_stats_zerofill_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata);
+
return 0;
}
int
io_stats_lk (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t cmd, struct gf_flock *lock)
+ fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_lk_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lk,
- fd, cmd, lock);
+ fd, cmd, lock, xdata);
return 0;
}
@@ -2346,7 +2516,7 @@ io_stats_release (xlator_t *this, fd_t *fd)
LOCK (&conf->lock);
{
- conf->cumulative.nr_opens--;
+ conf->cumulative.nr_opens--;
}
UNLOCK (&conf->lock);
@@ -2354,8 +2524,7 @@ io_stats_release (xlator_t *this, fd_t *fd)
if (iosfd) {
io_stats_dump_fd (this, iosfd);
- if (iosfd->filename)
- GF_FREE (iosfd->filename);
+ GF_FREE (iosfd->filename);
GF_FREE (iosfd);
}
@@ -2380,6 +2549,116 @@ io_stats_forget (xlator_t *this, inode_t *inode)
return 0;
}
+static int
+ios_init_top_stats (struct ios_conf *conf)
+{
+ int i = 0;
+
+ GF_ASSERT (conf);
+
+ for (i = 0; i <IOS_STATS_TYPE_MAX; i++) {
+ conf->list[i].iosstats = GF_CALLOC (1,
+ sizeof(*conf->list[i].iosstats),
+ gf_io_stats_mt_ios_stat);
+
+ if (!conf->list[i].iosstats)
+ return -1;
+
+ INIT_LIST_HEAD(&conf->list[i].iosstats->list);
+ LOCK_INIT (&conf->list[i].lock);
+ }
+
+ for (i = 0; i < IOS_STATS_THRU_MAX; i ++) {
+ conf->thru_list[i].iosstats = GF_CALLOC (1,
+ sizeof (*conf->thru_list[i].iosstats),
+ gf_io_stats_mt_ios_stat);
+
+ if (!conf->thru_list[i].iosstats)
+ return -1;
+
+ INIT_LIST_HEAD(&conf->thru_list[i].iosstats->list);
+ LOCK_INIT (&conf->thru_list[i].lock);
+ }
+
+ return 0;
+}
+
+static void
+ios_destroy_top_stats (struct ios_conf *conf)
+{
+ int i = 0;
+ struct ios_stat_head *list_head = NULL;
+ struct ios_stat_list *entry = NULL;
+ struct ios_stat_list *tmp = NULL;
+ struct ios_stat_list *list = NULL;
+ struct ios_stat *stat = NULL;
+
+ GF_ASSERT (conf);
+
+ LOCK (&conf->lock);
+
+ conf->cumulative.nr_opens = 0;
+ conf->cumulative.max_nr_opens = 0;
+ conf->cumulative.max_openfd_time.tv_sec = 0;
+ conf->cumulative.max_openfd_time.tv_usec = 0;
+
+ for (i = 0; i < IOS_STATS_TYPE_MAX; i++) {
+ list_head = &conf->list[i];
+ if (!list_head)
+ continue;
+ list_for_each_entry_safe (entry, tmp,
+ &list_head->iosstats->list, list) {
+ list = entry;
+ stat = list->iosstat;
+ ios_stat_unref (stat);
+ list_del (&list->list);
+ GF_FREE (list);
+ list_head->members--;
+ }
+ }
+
+ for (i = 0; i < IOS_STATS_THRU_MAX; i++) {
+ list_head = &conf->thru_list[i];
+ if (!list_head)
+ continue;
+ list_for_each_entry_safe (entry, tmp,
+ &list_head->iosstats->list, list) {
+ list = entry;
+ stat = list->iosstat;
+ ios_stat_unref (stat);
+ list_del (&list->list);
+ GF_FREE (list);
+ list_head->members--;
+ }
+ }
+
+ UNLOCK (&conf->lock);
+
+ return;
+}
+
+static int
+io_stats_clear (struct ios_conf *conf)
+{
+ struct timeval now;
+ int ret = -1;
+
+ GF_ASSERT (conf);
+
+ if (!gettimeofday (&now, NULL))
+ {
+ LOCK (&conf->lock);
+ {
+ ios_global_stats_clear (&conf->cumulative, &now);
+ ios_global_stats_clear (&conf->incremental, &now);
+ conf->increment = 0;
+ }
+ UNLOCK (&conf->lock);
+ ret = 0;
+ }
+
+ return ret;
+}
int
reconfigure (xlator_t *this, dict_t *options)
@@ -2447,7 +2726,6 @@ int
init (xlator_t *this)
{
struct ios_conf *conf = NULL;
- int i = 0;
char *sys_log_str = NULL;
int sys_log_level = -1;
char *log_str = NULL;
@@ -2484,35 +2762,9 @@ init (xlator_t *this)
gettimeofday (&conf->cumulative.started_at, NULL);
gettimeofday (&conf->incremental.started_at, NULL);
- for (i = 0; i <IOS_STATS_TYPE_MAX; i++) {
- conf->list[i].iosstats = GF_CALLOC (1,
- sizeof(*conf->list[i].iosstats),
- gf_io_stats_mt_ios_stat);
-
- if (!conf->list[i].iosstats) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- return -1;
- }
-
- INIT_LIST_HEAD(&conf->list[i].iosstats->list);
- LOCK_INIT (&conf->list[i].lock);
- }
-
- for (i = 0; i < IOS_STATS_THRU_MAX; i ++) {
- conf->thru_list[i].iosstats = GF_CALLOC (1,
- sizeof (*conf->thru_list[i].iosstats),
- gf_io_stats_mt_ios_stat);
-
- if (!conf->thru_list[i].iosstats) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- return -1;
- }
-
- INIT_LIST_HEAD(&conf->thru_list[i].iosstats->list);
- LOCK_INIT (&conf->thru_list[i].lock);
- }
+ ret = ios_init_top_stats (conf);
+ if (ret)
+ return -1;
GF_OPTION_INIT ("dump-fd-stats", conf->dump_fd_stats, bool, out);
@@ -2554,6 +2806,8 @@ fini (xlator_t *this)
return;
this->private = NULL;
+ ios_destroy_top_stats (conf);
+
GF_FREE(conf);
gf_log (this->name, GF_LOG_INFO,
@@ -2561,7 +2815,6 @@ fini (xlator_t *this)
return;
}
-
int
notify (xlator_t *this, int32_t event, void *data, ...)
{
@@ -2569,10 +2822,11 @@ notify (xlator_t *this, int32_t event, void *data, ...)
struct ios_dump_args args = {0};
dict_t *output = NULL;
dict_t *dict = NULL;
- int32_t top_op = 0;
+ int32_t op = 0;
int32_t list_cnt = 0;
double throughput = 0;
double time = 0;
+ gf_boolean_t is_peek = _gf_false;
va_list ap;
dict = data;
@@ -2581,15 +2835,37 @@ notify (xlator_t *this, int32_t event, void *data, ...)
va_end (ap);
switch (event) {
case GF_EVENT_TRANSLATOR_INFO:
- ret = dict_get_int32 (dict, "top-op", &top_op);
+ ret = dict_get_str_boolean (dict, "clear-stats", _gf_false);
+ if (ret) {
+ ret = dict_set_int32 (output, "top-op", op);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set top-op in dict");
+ goto out;
+ }
+ ios_destroy_top_stats (this->private);
+ ret = ios_init_top_stats (this->private);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to reset top stats");
+ ret = dict_set_int32 (output, "stats-cleared",
+ ret ? 0 : 1);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set stats-cleared"
+ " in dict");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "top-op", &op);
if (!ret) {
ret = dict_get_int32 (dict, "list-cnt", &list_cnt);
- if (top_op > IOS_STATS_TYPE_NONE &&
- top_op < IOS_STATS_TYPE_MAX)
+ if (op > IOS_STATS_TYPE_NONE &&
+ op < IOS_STATS_TYPE_MAX)
ret = io_stats_dump_stats_to_dict (this, output,
- top_op, list_cnt);
- if (top_op == IOS_STATS_TYPE_READ_THROUGHPUT ||
- top_op == IOS_STATS_TYPE_WRITE_THROUGHPUT) {
+ op, list_cnt);
+ if (op == IOS_STATS_TYPE_READ_THROUGHPUT ||
+ op == IOS_STATS_TYPE_WRITE_THROUGHPUT) {
ret = dict_get_double (dict, "throughput",
&throughput);
if (!ret) {
@@ -2610,9 +2886,41 @@ notify (xlator_t *this, int32_t event, void *data, ...)
}
} else {
- (void) ios_dump_args_init (&args, IOS_DUMP_TYPE_DICT,
- output);
- ret = io_stats_dump (this, &args);
+ ret = dict_get_int32 (dict, "info-op", &op);
+ if (ret || op < GF_CLI_INFO_ALL ||
+ GF_CLI_INFO_CLEAR < op)
+ op = GF_CLI_INFO_ALL;
+
+ ret = dict_set_int32 (output, "info-op", op);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set info-op in dict");
+ goto out;
+ }
+
+ if (GF_CLI_INFO_CLEAR == op) {
+ ret = io_stats_clear (this->private);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to clear info stats");
+
+ ret = dict_set_int32 (output, "stats-cleared",
+ ret ? 0 : 1);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set stats-cleared"
+ " in dict");
+ }
+ else {
+ ret = dict_get_str_boolean (dict, "peek",
+ _gf_false);
+ if (-1 != ret)
+ is_peek = ret;
+
+ (void) ios_dump_args_init (&args,
+ IOS_DUMP_TYPE_DICT, output);
+ ret = io_stats_dump (this, &args, op, is_peek);
+ }
}
break;
default:
@@ -2644,6 +2952,9 @@ struct xlator_fops fops = {
.setxattr = io_stats_setxattr,
.getxattr = io_stats_getxattr,
.removexattr = io_stats_removexattr,
+ .fsetxattr = io_stats_fsetxattr,
+ .fgetxattr = io_stats_fgetxattr,
+ .fremovexattr = io_stats_fremovexattr,
.opendir = io_stats_opendir,
.readdir = io_stats_readdir,
.readdirp = io_stats_readdirp,
@@ -2661,6 +2972,9 @@ struct xlator_fops fops = {
.fxattrop = io_stats_fxattrop,
.setattr = io_stats_setattr,
.fsetattr = io_stats_fsetattr,
+ .fallocate = io_stats_fallocate,
+ .discard = io_stats_discard,
+ .zerofill = io_stats_zerofill,
};
struct xlator_cbks cbks = {
@@ -2704,7 +3018,7 @@ struct volume_options options[] = {
.type = GF_OPTION_TYPE_STR,
.default_value = "CRITICAL",
.description = "Gluster's syslog log-level",
- .value = { "WARNING", "ERROR", "CRITICAL"}
+ .value = { "WARNING", "ERROR", "INFO", "CRITICAL"}
},
{ .key = {"brick-log-level"},
.type = GF_OPTION_TYPE_STR,
diff --git a/xlators/debug/trace/src/Makefile.am b/xlators/debug/trace/src/Makefile.am
index 0f1679a04..7b2597b4d 100644
--- a/xlators/debug/trace/src/Makefile.am
+++ b/xlators/debug/trace/src/Makefile.am
@@ -2,13 +2,15 @@
xlator_LTLIBRARIES = trace.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/debug
-trace_la_LDFLAGS = -module -avoidversion
+trace_la_LDFLAGS = -module -avoid-version
trace_la_SOURCES = trace.c
trace_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+noinst_HEADERS = trace.h trace-mem-types.h
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/debug/trace/src/trace-mem-types.h b/xlators/debug/trace/src/trace-mem-types.h
new file mode 100644
index 000000000..9fa7d97c2
--- /dev/null
+++ b/xlators/debug/trace/src/trace-mem-types.h
@@ -0,0 +1,21 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef __TRACE_MEM_TYPES_H__
+#define __TRACE_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_trace_mem_types_ {
+ gf_trace_mt_trace_conf_t = gf_common_mt_end + 1,
+ gf_trace_mt_end
+};
+#endif
diff --git a/xlators/debug/trace/src/trace.c b/xlators/debug/trace/src/trace.c
index 88e413c1d..c9d839356 100644
--- a/xlators/debug/trace/src/trace.c
+++ b/xlators/debug/trace/src/trace.c
@@ -1,26 +1,15 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+#include "trace.h"
+#include "trace-mem-types.h"
/**
* xlators/debug/trace :
@@ -29,1203 +18,1388 @@
* Very helpful translator for debugging.
*/
-#include <time.h>
-#include <errno.h>
-#include "glusterfs.h"
-#include "xlator.h"
-#include "common-utils.h"
-
-
-struct {
- char *name;
- int enabled;
-} trace_fop_names[GF_FOP_MAXVALUE];
-
-int trace_log_level = GF_LOG_INFO;
-
-static char *
-trace_stat_to_str (struct iatt *buf)
+int
+dump_history_trace (circular_buffer_t *cb, void *data)
{
- char *statstr = NULL;
- char atime_buf[256] = {0,};
- char mtime_buf[256] = {0,};
- char ctime_buf[256] = {0,};
- int asprint_ret_value = 0;
- uint64_t ia_time = 0;
-
- if (!buf) {
- statstr = NULL;
- goto out;
- }
-
- ia_time = buf->ia_atime;
- strftime (atime_buf, 256, "[%b %d %H:%M:%S]",
- localtime ((time_t *)&ia_time));
-
- ia_time = buf->ia_mtime;
- strftime (mtime_buf, 256, "[%b %d %H:%M:%S]",
- localtime ((time_t *)&ia_time));
+ char *string = NULL;
+ struct tm *tm = NULL;
+ char timestr[256] = {0,};
- ia_time = buf->ia_ctime;
- strftime (ctime_buf, 256, "[%b %d %H:%M:%S]",
- localtime ((time_t *)&ia_time));
+ string = (char *)cb->data;
+ tm = localtime (&cb->tv.tv_sec);
- asprint_ret_value = gf_asprintf (&statstr,
- "gfid=%s ino=%"PRIu64", mode=%o, "
- "nlink=%"GF_PRI_NLINK", uid=%u, "
- "gid=%u, size=%"PRIu64", "
- "blocks=%"PRIu64", atime=%s, "
- "mtime=%s, ctime=%s",
- uuid_utoa (buf->ia_gfid), buf->ia_ino,
- st_mode_from_ia (buf->ia_prot,
- buf->ia_type),
- buf->ia_nlink, buf->ia_uid,
- buf->ia_gid, buf->ia_size,
- buf->ia_blocks, atime_buf,
- mtime_buf, ctime_buf);
+ /* Since we are continuing with adding entries to the buffer even when
+ gettimeofday () fails, it's safe to check tm and then dump the time
+ at which the entry was added to the buffer */
+ if (tm) {
+ strftime (timestr, 256, "%Y-%m-%d %H:%M:%S", tm);
+ snprintf (timestr + strlen (timestr), 256 - strlen (timestr),
+ ".%"GF_PRI_SUSECONDS, cb->tv.tv_usec);
+ gf_proc_dump_write ("TIME", "%s", timestr);
+ }
- if (asprint_ret_value < 0)
- statstr = NULL;
+ gf_proc_dump_write ("FOP", "%s\n", string);
-out:
- return statstr;
+ return 0;
}
-
int
trace_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, fd_t *fd,
inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- char *statstr = NULL;
- char *preparentstr = NULL;
- char *postparentstr = NULL;
+ char statstr[4096] = {0, };
+ char preparentstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_CREATE].enabled) {
+ char string[4096] = {0,};
if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- preparentstr = trace_stat_to_str (preparent);
- postparentstr = trace_stat_to_str (postparent);
-
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s (op_ret=%d, fd=%p"
- "*stbuf {%s}, *preparent {%s}, *postparent = "
- "{%s})", frame->root->unique,
- uuid_utoa (inode->gfid), op_ret, fd,
- statstr, preparentstr, postparentstr);
-
- if (statstr)
- GF_FREE (statstr);
- if (preparentstr)
- GF_FREE (preparentstr);
- if (postparentstr)
- GF_FREE (postparentstr);
+ trace_stat_to_str (buf, statstr);
+ trace_stat_to_str (preparent, preparentstr);
+ trace_stat_to_str (postparent, postparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s (op_ret=%d, fd=%p"
+ "*stbuf {%s}, *preparent {%s}, "
+ "*postparent = {%s})",
+ frame->root->unique,
+ uuid_utoa (inode->gfid), op_ret, fd,
+ statstr, preparentstr, postparentstr);
/* for 'release' log */
fd_ctx_set (fd, this, 0);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent);
+out:
+ TRACE_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
return 0;
}
-
int
trace_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_OPEN].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d, *fd=%p",
- frame->root->unique, uuid_utoa (frame->local), op_ret, op_errno, fd);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d, "
+ "*fd=%p", frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno,
+ fd);
+
+ LOG_ELEMENT (conf, string);
}
+out:
/* for 'release' log */
if (op_ret >= 0)
fd_ctx_set (fd, this, 0);
- frame->local = NULL;
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd);
+ TRACE_STACK_UNWIND (open, frame, op_ret, op_errno, fd, xdata);
return 0;
}
-
int
trace_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
- char *statstr = NULL;
+ char statstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_STAT].enabled) {
- if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d buf=%s",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, statstr);
-
- if (statstr)
- GF_FREE (statstr);
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (buf, statstr);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d buf=%s",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ statstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d)",
- frame->root->unique, uuid_utoa (frame->local), op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d)",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf);
+out:
+ TRACE_STACK_UNWIND (stat, frame, op_ret, op_errno, buf, xdata);
return 0;
}
-
int
trace_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct iatt *buf, struct iobref *iobref)
+ int32_t count, struct iatt *buf, struct iobref *iobref,
+ dict_t *xdata)
{
- char *statstr = NULL;
+ char statstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_READ].enabled) {
+ char string[4096] = {0,};
if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d buf=%s",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, statstr);
-
- if (statstr)
- GF_FREE (statstr);
+ trace_stat_to_str (buf, statstr);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d buf=%s",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ statstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d)",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d)",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count,
- buf, iobref);
+out:
+ TRACE_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, count,
+ buf, iobref, xdata);
return 0;
}
-
int
trace_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
- char *preopstr = NULL;
- char *postopstr = NULL;
+ char preopstr[4096] = {0, };
+ char postopstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_WRITE].enabled) {
+ char string[4096] = {0,};
if (op_ret >= 0) {
- preopstr = trace_stat_to_str (prebuf);
- postopstr = trace_stat_to_str (postbuf);
-
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, *prebuf = {%s}, "
- "*postbuf = {%s})",
- frame->root->unique, op_ret,
- preopstr, postopstr);
-
- if (preopstr)
- GF_FREE (preopstr);
-
- if (postopstr)
- GF_FREE (postopstr);
+ trace_stat_to_str (prebuf, preopstr);
+ trace_stat_to_str (postbuf, postopstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "*prebuf = {%s}, *postbuf = {%s})",
+ frame->root->unique, op_ret,
+ preopstr, postopstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf);
+out:
+ TRACE_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
return 0;
}
-
-
int
trace_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *buf)
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *buf,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_READDIR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64" : gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local), op_ret, op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64" : gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique, uuid_utoa (frame->local),
+ op_ret, op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, buf);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (readdir, frame, op_ret, op_errno, buf, xdata);
return 0;
}
-
int
trace_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *buf)
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *buf,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_READDIRP].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64" : gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local), op_ret, op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64" : gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique, uuid_utoa (frame->local),
+ op_ret, op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, buf);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (readdirp, frame, op_ret, op_errno, buf, xdata);
return 0;
}
-
int
trace_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
- char *preopstr = NULL;
- char *postopstr = NULL;
-
- if (trace_fop_names[GF_FOP_FSYNC].enabled) {
- if (op_ret >= 0) {
- preopstr = trace_stat_to_str (prebuf);
- postopstr = trace_stat_to_str (postbuf);
+ char preopstr[4096] = {0, };
+ char postopstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, *prebuf = {%s}, "
- "*postbuf = {%s}",
- frame->root->unique, op_ret,
- preopstr, postopstr);
+ conf = this->private;
- if (preopstr)
- GF_FREE (preopstr);
-
- if (postopstr)
- GF_FREE (postopstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FSYNC].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (prebuf, preopstr);
+ trace_stat_to_str (postbuf, postopstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "*prebuf = {%s}, *postbuf = {%s}",
+ frame->root->unique, op_ret,
+ preopstr, postopstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
+
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf);
+out:
+ TRACE_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
return 0;
}
-
int
trace_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *statpre, struct iatt *statpost)
+ struct iatt *statpre, struct iatt *statpost, dict_t *xdata)
{
- char *preopstr = NULL;
- char *postopstr = NULL;
-
- if (trace_fop_names[GF_FOP_SETATTR].enabled) {
- if (op_ret >= 0) {
- preopstr = trace_stat_to_str (statpre);
- postopstr = trace_stat_to_str (statpost);
+ char preopstr[4096] = {0, };
+ char postopstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, *prebuf = {%s}, "
- "*postbuf = {%s})",
- frame->root->unique, op_ret,
- preopstr, postopstr);
+ conf = this->private;
- if (preopstr)
- GF_FREE (preopstr);
-
- if (postopstr)
- GF_FREE (postopstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_SETATTR].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (statpre, preopstr);
+ trace_stat_to_str (statpost, postopstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "*prebuf = {%s}, *postbuf = {%s})",
+ frame->root->unique, op_ret,
+ preopstr, postopstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d)",
- frame->root->unique, uuid_utoa (frame->local), op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d)", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, statpre, statpost);
+out:
+ TRACE_STACK_UNWIND (setattr, frame, op_ret, op_errno, statpre,
+ statpost, xdata);
return 0;
}
-
int
trace_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *statpre, struct iatt *statpost)
+ struct iatt *statpre, struct iatt *statpost, dict_t *xdata)
{
- char *preopstr = NULL;
- char *postopstr = NULL;
-
- if (trace_fop_names[GF_FOP_FSETATTR].enabled) {
- if (op_ret >= 0) {
- preopstr = trace_stat_to_str (statpre);
- postopstr = trace_stat_to_str (statpost);
+ char preopstr[4096] = {0, };
+ char postopstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, *prebuf = {%s}, "
- "*postbuf = {%s})",
- frame->root->unique, op_ret,
- preopstr, postopstr);
+ conf = this->private;
- if (preopstr)
- GF_FREE (preopstr);
-
- if (postopstr)
- GF_FREE (postopstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FSETATTR].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (statpre, preopstr);
+ trace_stat_to_str (statpost, postopstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "*prebuf = {%s}, *postbuf = {%s})",
+ frame->root->unique, op_ret,
+ preopstr, postopstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d)",
- frame->root->unique, uuid_utoa (frame->local), op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d)",
+ frame->root->unique, uuid_utoa (frame->local),
+ op_ret, op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (fsetattr, frame, op_ret, op_errno,
- statpre, statpost);
+out:
+ TRACE_STACK_UNWIND (fsetattr, frame, op_ret, op_errno,
+ statpre, statpost, xdata);
return 0;
}
-
int
trace_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- char *preparentstr = NULL;
- char *postparentstr = NULL;
-
- if (trace_fop_names[GF_FOP_UNLINK].enabled) {
- if (op_ret >= 0) {
- preparentstr = trace_stat_to_str (preparent);
- postparentstr = trace_stat_to_str (postparent);
-
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, *preparent = {%s}, "
- "*postparent = {%s})",
- frame->root->unique, uuid_utoa (frame->local), op_ret, preparentstr,
- postparentstr);
+ char preparentstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- if (preparentstr)
- GF_FREE (preparentstr);
+ conf = this->private;
- if (postparentstr)
- GF_FREE (postparentstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_UNLINK].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (preparent, preparentstr);
+ trace_stat_to_str (postparent, postparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ " *preparent = {%s}, "
+ "*postparent = {%s})",
+ frame->root->unique,
+ uuid_utoa (frame->local),
+ op_ret, preparentstr,
+ postparentstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d)",
- frame->root->unique, uuid_utoa (frame->local), op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d)",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno,
- preparent, postparent);
+out:
+ TRACE_STACK_UNWIND (unlink, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
return 0;
}
-
int
trace_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
- char *statstr = NULL;
- char *preoldparentstr = NULL;
- char *postoldparentstr = NULL;
- char *prenewparentstr = NULL;
- char *postnewparentstr = NULL;
-
- if (trace_fop_names[GF_FOP_RENAME].enabled) {
- if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- preoldparentstr = trace_stat_to_str (preoldparent);
- postoldparentstr = trace_stat_to_str (postoldparent);
-
- prenewparentstr = trace_stat_to_str (prenewparent);
- postnewparentstr = trace_stat_to_str (postnewparent);
-
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, *stbuf = {%s}, "
- "*preoldparent = {%s}, *postoldparent = {%s}"
- " *prenewparent = {%s}, *postnewparent = {%s})",
- frame->root->unique, op_ret, statstr,
- preoldparentstr, postoldparentstr,
- prenewparentstr, postnewparentstr);
+ char statstr[4096] = {0, };
+ char preoldparentstr[4096] = {0, };
+ char postoldparentstr[4096] = {0, };
+ char prenewparentstr[4096] = {0, };
+ char postnewparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- if (preoldparentstr)
- GF_FREE (preoldparentstr);
+ conf = this->private;
- if (postoldparentstr)
- GF_FREE (postoldparentstr);
-
- if (prenewparentstr)
- GF_FREE (prenewparentstr);
-
- if (postnewparentstr)
- GF_FREE (postnewparentstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_RENAME].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (buf, statstr);
+ trace_stat_to_str (preoldparent, preoldparentstr);
+ trace_stat_to_str (postoldparent, postoldparentstr);
+ trace_stat_to_str (prenewparent, prenewparentstr);
+ trace_stat_to_str (postnewparent, postnewparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "*stbuf = {%s}, *preoldparent = {%s},"
+ " *postoldparent = {%s}"
+ " *prenewparent = {%s}, "
+ "*postnewparent = {%s})",
+ frame->root->unique, op_ret, statstr,
+ preoldparentstr, postoldparentstr,
+ prenewparentstr, postnewparentstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d", frame->root->unique,
+ uuid_utoa (frame->local),
+ op_ret, op_errno);
+
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, buf,
- preoldparent, postoldparent,
- prenewparent, postnewparent);
+out:
+ TRACE_STACK_UNWIND (rename, frame, op_ret, op_errno, buf,
+ preoldparent, postoldparent,
+ prenewparent, postnewparent, xdata);
return 0;
}
-
int
trace_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- const char *buf, struct iatt *stbuf)
+ const char *buf, struct iatt *stbuf, dict_t *xdata)
{
- char *statstr = NULL;
+ char statstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- if (trace_fop_names[GF_FOP_READLINK].enabled) {
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_READLINK].enabled) {
+ char string[4096] = {0,};
if (op_ret == 0) {
- statstr = trace_stat_to_str (stbuf);
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, op_errno=%d, buf=%s, "
- "stbuf = { %s })",
- frame->root->unique, op_ret, op_errno, buf,
- statstr);
- } else
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
+ trace_stat_to_str (stbuf, statstr);
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, op_errno=%d,"
+ "buf=%s, stbuf = { %s })",
+ frame->root->unique, op_ret, op_errno,
+ buf, statstr);
+ } else {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
+ }
- if (statstr)
- GF_FREE (statstr);
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, buf, stbuf);
+out:
+ TRACE_STACK_UNWIND (readlink, frame, op_ret, op_errno, buf, stbuf,
+ xdata);
return 0;
}
-
int
trace_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf,
- dict_t *xattr, struct iatt *postparent)
+ dict_t *xdata, struct iatt *postparent)
{
- char *statstr = NULL;
- char *postparentstr = NULL;
+ char statstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- if (trace_fop_names[GF_FOP_LOOKUP].enabled) {
- if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- postparentstr = trace_stat_to_str (postparent);
+ conf = this->private;
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s (op_ret=%d "
- "*buf {%s}, *postparent {%s}",
- frame->root->unique, uuid_utoa (inode->gfid),
- op_ret, statstr, postparentstr);
-
- if (statstr)
- GF_FREE (statstr);
- if (postparentstr)
- GF_FREE (postparentstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_LOOKUP].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (buf, statstr);
+ trace_stat_to_str (postparent, postparentstr);
+ /* print buf->ia_gfid instead of inode->gfid,
+ * since if the inode is not yet linked to the
+ * inode table (fresh lookup) then null gfid
+ * will be printed.
+ */
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s (op_ret=%d "
+ "*buf {%s}, *postparent {%s}",
+ frame->root->unique,
+ uuid_utoa (buf->ia_gfid),
+ op_ret, statstr, postparentstr);
/* For 'forget' */
inode_ctx_put (inode, this, 0);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d)",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d)",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf,
- xattr, postparent);
+out:
+ TRACE_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, buf,
+ xdata, postparent);
return 0;
}
-
int
trace_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- char *statstr = NULL;
- char *preparentstr = NULL;
- char *postparentstr = NULL;
-
- if (trace_fop_names[GF_FOP_SYMLINK].enabled) {
- if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- preparentstr = trace_stat_to_str (preparent);
- postparentstr = trace_stat_to_str (postparent);
-
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s (op_ret=%d "
- "*stbuf = {%s}, *preparent = {%s}, "
- "*postparent = {%s})",
- frame->root->unique, uuid_utoa (inode->gfid),
- op_ret, statstr, preparentstr, postparentstr);
-
- if (statstr)
- GF_FREE (statstr);
-
- if (preparentstr)
- GF_FREE (preparentstr);
+ char statstr[4096] = {0, };
+ char preparentstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- if (postparentstr)
- GF_FREE (postparentstr);
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_SYMLINK].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (buf, statstr);
+ trace_stat_to_str (preparent, preparentstr);
+ trace_stat_to_str (postparent, postparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s (op_ret=%d "
+ "*stbuf = {%s}, *preparent = {%s}, "
+ "*postparent = {%s})",
+ frame->root->unique,
+ uuid_utoa (inode->gfid),
+ op_ret, statstr, preparentstr,
+ postparentstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": op_ret=%d, op_errno=%d",
- frame->root->unique, op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": op_ret=%d, op_errno=%d",
+ frame->root->unique, op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
+out:
+ TRACE_STACK_UNWIND (symlink, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
return 0;
}
-
int
trace_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- char *statstr = NULL;
- char *preparentstr = NULL;
- char *postparentstr = NULL;
-
- if (trace_fop_names[GF_FOP_MKNOD].enabled) {
- if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- preparentstr = trace_stat_to_str (preparent);
- postparentstr = trace_stat_to_str (postparent);
-
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s (op_ret=%d "
- "*stbuf = {%s}, *preparent = {%s}, "
- "*postparent = {%s})",
- frame->root->unique, uuid_utoa (inode->gfid),
- op_ret, statstr, preparentstr, postparentstr);
-
- if (statstr)
- GF_FREE (statstr);
+ char statstr[4096] = {0, };
+ char preparentstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- if (preparentstr)
- GF_FREE (preparentstr);
+ conf = this->private;
- if (postparentstr)
- GF_FREE (postparentstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ char string[4096] = {0,};
+ if (trace_fop_names[GF_FOP_MKNOD].enabled) {
+ if (op_ret == 0) {
+ trace_stat_to_str (buf, statstr);
+ trace_stat_to_str (preparent, preparentstr);
+ trace_stat_to_str (postparent, postparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s (op_ret=%d "
+ "*stbuf = {%s}, *preparent = {%s}, "
+ "*postparent = {%s})",
+ frame->root->unique,
+ uuid_utoa (inode->gfid),
+ op_ret, statstr, preparentstr,
+ postparentstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
+out:
+ TRACE_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
return 0;
}
-
int
trace_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- char *statstr = NULL;
- char *preparentstr = NULL;
- char *postparentstr = NULL;
-
- if (trace_fop_names[GF_FOP_MKDIR].enabled) {
- if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- preparentstr = trace_stat_to_str (preparent);
- postparentstr = trace_stat_to_str (postparent);
-
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s (op_ret=%d "
- ", *stbuf = {%s}, *prebuf = {%s}, "
- "*postbuf = {%s} )",
- frame->root->unique, uuid_utoa (inode->gfid),
- op_ret, statstr, preparentstr, postparentstr);
-
- if (statstr)
- GF_FREE (statstr);
+ char statstr[4096] = {0, };
+ char preparentstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- if (preparentstr)
- GF_FREE (preparentstr);
+ conf = this->private;
- if (postparentstr)
- GF_FREE (postparentstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_MKDIR].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (buf, statstr);
+ trace_stat_to_str (preparent, preparentstr);
+ trace_stat_to_str (postparent, postparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s (op_ret=%d "
+ ", *stbuf = {%s}, *prebuf = {%s}, "
+ "*postbuf = {%s} )",
+ frame->root->unique,
+ uuid_utoa (inode->gfid),
+ op_ret, statstr, preparentstr,
+ postparentstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
+out:
+ TRACE_STACK_UNWIND (mkdir, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
return 0;
}
-
int
trace_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- char *statstr = NULL;
- char *preparentstr = NULL;
- char *postparentstr = NULL;
-
- if (trace_fop_names[GF_FOP_LINK].enabled) {
- if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- preparentstr = trace_stat_to_str (preparent);
- postparentstr = trace_stat_to_str (postparent);
-
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, *stbuf = {%s}, "
- " *prebuf = {%s}, *postbuf = {%s})",
- frame->root->unique, op_ret,
- statstr, preparentstr, postparentstr);
+ char statstr[4096] = {0, };
+ char preparentstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- if (statstr)
- GF_FREE (statstr);
+ conf = this->private;
- if (preparentstr)
- GF_FREE (preparentstr);
-
- if (postparentstr)
- GF_FREE (postparentstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ char string[4096] = {0,};
+ if (trace_fop_names[GF_FOP_LINK].enabled) {
+ if (op_ret == 0) {
+ trace_stat_to_str (buf, statstr);
+ trace_stat_to_str (preparent, preparentstr);
+ trace_stat_to_str (postparent, postparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "*stbuf = {%s}, *prebuf = {%s},"
+ " *postbuf = {%s})",
+ frame->root->unique, op_ret,
+ statstr, preparentstr, postparentstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local),
+ op_ret, op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
+out:
+ TRACE_STACK_UNWIND (link, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
return 0;
}
-
int
trace_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ char string[4096] = {0,};
if (trace_fop_names[GF_FOP_FLUSH].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique, uuid_utoa (frame->local),
+ op_ret, op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (flush, frame, op_ret, op_errno, xdata);
return 0;
}
-
int
trace_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ char string[4096] = {0,};
if (trace_fop_names[GF_FOP_OPENDIR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d, fd=%p",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno, fd);
- }
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d,"
+ " fd=%p",
+ frame->root->unique, uuid_utoa (frame->local),
+ op_ret, op_errno, fd);
+ LOG_ELEMENT (conf, string);
+ }
+out:
/* for 'releasedir' log */
if (op_ret >= 0)
fd_ctx_set (fd, this, 0);
- frame->local = NULL;
- STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd);
+ TRACE_STACK_UNWIND (opendir, frame, op_ret, op_errno, fd, xdata);
return 0;
}
-
int
trace_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- char *preparentstr = NULL;
- char *postparentstr = NULL;
+ char preparentstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- if (trace_fop_names[GF_FOP_RMDIR].enabled) {
- if (op_ret >= 0) {
- preparentstr = trace_stat_to_str (preparent);
- postparentstr = trace_stat_to_str (postparent);
+ conf = this->private;
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, *prebuf = {%s}, "
- "*postbuf = {%s}",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, preparentstr, postparentstr);
-
- if (preparentstr)
- GF_FREE (preparentstr);
-
- if (postparentstr)
- GF_FREE (postparentstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_RMDIR].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (preparent, preparentstr);
+ trace_stat_to_str (postparent, postparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "*prebuf={%s}, *postbuf={%s}",
+ frame->root->unique,
+ uuid_utoa (frame->local),
+ op_ret, preparentstr, postparentstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d", frame->root->unique,
+ uuid_utoa (frame->local),
+ op_ret, op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno,
- preparent, postparent);
+out:
+ TRACE_STACK_UNWIND (rmdir, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
return 0;
}
-
int
trace_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
- char *preopstr = NULL;
- char *postopstr = NULL;
-
- if (trace_fop_names[GF_FOP_TRUNCATE].enabled) {
- if (op_ret >= 0) {
- preopstr = trace_stat_to_str (prebuf);
- postopstr = trace_stat_to_str (postbuf);
+ char preopstr[4096] = {0, };
+ char postopstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, *prebuf = {%s}, "
- "*postbuf = {%s} )",
- frame->root->unique, op_ret, preopstr,
- postopstr);
+ conf = this->private;
- if (preopstr)
- GF_FREE (preopstr);
-
- if (postopstr)
- GF_FREE (postopstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_TRUNCATE].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (prebuf, preopstr);
+ trace_stat_to_str (postbuf, postopstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "*prebuf = {%s}, *postbuf = {%s} )",
+ frame->root->unique, op_ret,
+ preopstr, postopstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf, postbuf);
+out:
+ TRACE_STACK_UNWIND (truncate, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
return 0;
}
-
int
trace_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *buf)
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_STATFS].enabled) {
- if (op_ret >= 0) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": ({f_bsize=%lu, f_frsize=%lu, f_blocks=%"GF_PRI_FSBLK
- ", f_bfree=%"GF_PRI_FSBLK", f_bavail=%"GF_PRI_FSBLK", "
- "f_files=%"GF_PRI_FSBLK", f_ffree=%"GF_PRI_FSBLK", f_favail=%"
- GF_PRI_FSBLK", f_fsid=%lu, f_flag=%lu, f_namemax=%lu}) => ret=%d",
- frame->root->unique, buf->f_bsize, buf->f_frsize, buf->f_blocks,
- buf->f_bfree, buf->f_bavail, buf->f_files, buf->f_ffree,
- buf->f_favail, buf->f_fsid, buf->f_flag, buf->f_namemax, op_ret);
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ snprintf (string, sizeof (string),
+ "%"PRId64": ({f_bsize=%lu, "
+ "f_frsize=%lu, "
+ "f_blocks=%"GF_PRI_FSBLK
+ ", f_bfree=%"GF_PRI_FSBLK", "
+ "f_bavail=%"GF_PRI_FSBLK", "
+ "f_files=%"GF_PRI_FSBLK", "
+ "f_ffree=%"GF_PRI_FSBLK", "
+ "f_favail=%"GF_PRI_FSBLK", "
+ "f_fsid=%lu, f_flag=%lu, "
+ "f_namemax=%lu}) => ret=%d",
+ frame->root->unique, buf->f_bsize,
+ buf->f_frsize, buf->f_blocks,
+ buf->f_bfree, buf->f_bavail,
+ buf->f_files, buf->f_ffree,
+ buf->f_favail, buf->f_fsid,
+ buf->f_flag, buf->f_namemax, op_ret);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "op_errno=%d)",
+ frame->root->unique, op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf);
+out:
+ TRACE_STACK_UNWIND (statfs, frame, op_ret, op_errno, buf, xdata);
return 0;
}
-
int
trace_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_SETXATTR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata);
return 0;
}
-
int
trace_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_GETXATTR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d, dict=%p",
- frame->root->unique, uuid_utoa (frame->local), op_ret,
- op_errno, dict);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d,"
+ " dict=%p", frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno,
+ dict);
- frame->local = NULL;
- STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
return 0;
}
int
trace_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FSETXATTR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local), op_ret,
- op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, xdata);
return 0;
}
-
int
trace_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FGETXATTR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d, dict=%p",
- frame->root->unique, uuid_utoa (frame->local), op_ret,
- op_errno, dict);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d,"
+ " dict=%p", frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno,
+ dict);
- frame->local = NULL;
- STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, dict, xdata);
return 0;
}
int
trace_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_REMOVEXATTR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (removexattr, frame, op_ret, op_errno, xdata);
return 0;
}
-
int
trace_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FSYNCDIR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (fsyncdir, frame, op_ret, op_errno, xdata);
return 0;
}
-
int
trace_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_ACCESS].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d)",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d)", frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (access, frame, op_ret, op_errno);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (access, frame, op_ret, op_errno, xdata);
return 0;
}
-
int
trace_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
- char *prebufstr = NULL;
- char *postbufstr = NULL;
-
- if (trace_fop_names[GF_FOP_FTRUNCATE].enabled) {
- if (op_ret >= 0) {
- prebufstr = trace_stat_to_str (prebuf);
- postbufstr = trace_stat_to_str (postbuf);
-
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": op_ret=%d, *prebuf = {%s}, "
- "*postbuf = {%s} )",
- frame->root->unique, op_ret,
- prebufstr, postbufstr);
+ char prebufstr[4096] = {0, };
+ char postbufstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
- if (prebufstr)
- GF_FREE (prebufstr);
-
- if (postbufstr)
- GF_FREE (postbufstr);
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FTRUNCATE].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (prebuf, prebufstr);
+ trace_stat_to_str (postbuf, postbufstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": op_ret=%d, "
+ "*prebuf = {%s}, *postbuf = {%s} )",
+ frame->root->unique, op_ret,
+ prebufstr, postbufstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, prebuf, postbuf);
+out:
+ TRACE_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
return 0;
}
-
int
trace_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
- char *statstr = NULL;
+ char statstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FSTAT].enabled) {
- if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d buf=%s",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, statstr);
-
- if (statstr)
- GF_FREE (statstr);
+ char string[4096] = {0.};
+ if (op_ret == 0) {
+ trace_stat_to_str (buf, statstr);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d "
+ "buf=%s", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ statstr);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
}
+ LOG_ELEMENT (conf, string);
}
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf);
+out:
+ TRACE_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf, xdata);
return 0;
}
-
int
trace_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock)
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_LK].enabled) {
- if (op_ret >= 0) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, {l_type=%d, l_whence=%d, "
- "l_start=%"PRId64", l_len=%"PRId64", l_pid=%u})",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, lock->l_type, lock->l_whence,
- lock->l_start, lock->l_len, lock->l_pid);
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "{l_type=%d, l_whence=%d, "
+ "l_start=%"PRId64", "
+ "l_len=%"PRId64", l_pid=%u})",
+ frame->root->unique,
+ uuid_utoa (frame->local),
+ op_ret, lock->l_type, lock->l_whence,
+ lock->l_start, lock->l_len,
+ lock->l_pid);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d)",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d)", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
}
- }
- frame->local = NULL;
- STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (lk, frame, op_ret, op_errno, lock, xdata);
return 0;
}
-
-
int
trace_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_ENTRYLK].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (entrylk, frame, op_ret, op_errno, xdata);
return 0;
}
int
trace_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FENTRYLK].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (fentrylk, frame, op_ret, op_errno);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (fentrylk, frame, op_ret, op_errno, xdata);
return 0;
}
-
int
trace_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_XATTROP].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, dict);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (xattrop, frame, op_ret, op_errno, dict, xdata);
return 0;
}
-
int
trace_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FXATTROP].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (fxattrop, frame, op_ret, op_errno, dict);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (fxattrop, frame, op_ret, op_errno, dict, xdata);
return 0;
}
-
int
trace_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_INODELK].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local),op_ret, op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (inodelk, frame, op_ret, op_errno, xdata);
return 0;
}
int
trace_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FINODELK].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local), op_ret, op_errno);
- }
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (finodelk, frame, op_ret, op_errno);
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (finodelk, frame, op_ret, op_errno, xdata);
return 0;
}
-
int
trace_rchecksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- uint32_t weak_checksum, uint8_t *strong_checksum)
+ uint32_t weak_checksum, uint8_t *strong_checksum,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_RCHECKSUM].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local), op_ret, op_errno);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
+
+ LOG_ELEMENT (conf, string);
}
- frame->local = NULL;
- STACK_UNWIND_STRICT (rchecksum, frame, op_ret, op_errno, weak_checksum,
- strong_checksum);
+out:
+ TRACE_STACK_UNWIND (rchecksum, frame, op_ret, op_errno, weak_checksum,
+ strong_checksum, xdata);
return 0;
}
@@ -1235,35 +1409,54 @@ trace_rchecksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
trace_entrylk (call_frame_t *frame, xlator_t *this,
const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_ENTRYLK].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s volume=%s, (path=%s basename=%s, "
- "cmd=%s, type=%s)",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- volume, loc->path, basename,
- ((cmd == ENTRYLK_LOCK) ? "ENTRYLK_LOCK" : "ENTRYLK_UNLOCK"),
- ((type == ENTRYLK_RDLCK) ? "ENTRYLK_RDLCK" : "ENTRYLK_WRLCK"));
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s volume=%s, (path=%s "
+ "basename=%s, cmd=%s, type=%s)",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid),
+ volume, loc->path, basename,
+ ((cmd == ENTRYLK_LOCK) ? "ENTRYLK_LOCK" :
+ "ENTRYLK_UNLOCK"),
+ ((type == ENTRYLK_RDLCK) ? "ENTRYLK_RDLCK" :
+ "ENTRYLK_WRLCK"));
+
frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_entrylk_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->entrylk,
- volume, loc, basename, cmd, type);
+ volume, loc, basename, cmd, type, xdata);
return 0;
}
-
int
trace_inodelk (call_frame_t *frame, xlator_t *this, const char *volume,
- loc_t *loc, int32_t cmd, struct gf_flock *flock)
+ loc_t *loc, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
- char *cmd_str = NULL;
- char *type_str = NULL;
+ char *cmd_str = NULL;
+ char *type_str = NULL;
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_INODELK].enabled) {
+ char string[4096] = {0,};
switch (cmd) {
#if F_GETLK != F_GETLK64
case F_GETLK64:
@@ -1306,32 +1499,43 @@ trace_inodelk (call_frame_t *frame, xlator_t *this, const char *volume,
break;
}
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s volume=%s, (path=%s "
- "cmd=%s, type=%s, start=%llu, len=%llu, pid=%llu)",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- volume, loc->path,
- cmd_str, type_str, (unsigned long long) flock->l_start,
- (unsigned long long) flock->l_len,
- (unsigned long long) flock->l_pid);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s volume=%s, (path=%s "
+ "cmd=%s, type=%s, start=%llu, len=%llu, "
+ "pid=%llu)", frame->root->unique,
+ uuid_utoa (loc->inode->gfid), volume,
+ loc->path, cmd_str, type_str,
+ (unsigned long long)flock->l_start,
+ (unsigned long long) flock->l_len,
+ (unsigned long long) flock->l_pid);
+
frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_inodelk_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->inodelk,
- volume, loc, cmd, flock);
+ volume, loc, cmd, flock, xdata);
return 0;
}
-
int
trace_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
- fd_t *fd, int32_t cmd, struct gf_flock *flock)
+ fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
- char *cmd_str = NULL, *type_str = NULL;
+ char *cmd_str = NULL;
+ char *type_str = NULL;
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FINODELK].enabled) {
+ char string[4096] = {0,};
switch (cmd) {
#if F_GETLK != F_GETLK64
case F_GETLK64:
@@ -1374,233 +1578,351 @@ trace_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
break;
}
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s volume=%s, (fd =%p "
- "cmd=%s, type=%s, start=%llu, len=%llu, pid=%llu)",
- frame->root->unique, uuid_utoa (fd->inode->gfid), volume, fd,
- cmd_str, type_str, (unsigned long long) flock->l_start,
- (unsigned long long) flock->l_len,
- (unsigned long long) flock->l_pid);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s volume=%s, (fd =%p "
+ "cmd=%s, type=%s, start=%llu, len=%llu, "
+ "pid=%llu)", frame->root->unique,
+ uuid_utoa (fd->inode->gfid), volume, fd,
+ cmd_str, type_str,
+ (unsigned long long) flock->l_start,
+ (unsigned long long) flock->l_len,
+ (unsigned long long) flock->l_pid);
+
frame->local = fd->inode->gfid;
- }
+ LOG_ELEMENT (conf, string);
+ }
+out:
STACK_WIND (frame, trace_finodelk_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->finodelk,
- volume, fd, cmd, flock);
+ volume, fd, cmd, flock, xdata);
return 0;
}
-
int
trace_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t flags, dict_t *dict)
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_XATTROP].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s (path=%s flags=%d)",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, flags);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s (path=%s flags=%d)",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ flags);
+
frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_xattrop_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->xattrop,
- loc, flags, dict);
+ loc, flags, dict, xdata);
return 0;
}
-
int
trace_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_xattrop_flags_t flags, dict_t *dict)
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FXATTROP].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p, flags=%d",
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- fd, flags);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, flags=%d",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, flags);
+
frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_fxattrop_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fxattrop,
- fd, flags, dict);
+ fd, flags, dict, xdata);
return 0;
}
-
int
trace_lookup (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr_req)
+ loc_t *loc, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_LOOKUP].enabled) {
+ char string[4096] = {0,};
/* TODO: print all the keys mentioned in xattr_req */
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path);
+
frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_lookup_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lookup,
- loc, xattr_req);
+ loc, xdata);
return 0;
}
-
int
-trace_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
+trace_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_STAT].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path);
+
frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_stat_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->stat,
- loc);
+ loc, xdata);
return 0;
}
-
int
-trace_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size)
+trace_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_READLINK].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s, size=%"GF_PRI_SIZET")",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, size);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s, "
+ "size=%"GF_PRI_SIZET")", frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ size);
+
frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_readlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readlink,
- loc, size);
+ loc, size, xdata);
return 0;
}
-
int
trace_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc,
- mode_t mode, dev_t dev, dict_t *params)
+ mode_t mode, dev_t dev, mode_t umask, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_MKNOD].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s mode=%d dev=%"GF_PRI_DEV")",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, mode, dev);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s mode=%d "
+ "umask=0%o, dev=%"GF_PRI_DEV")",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ mode, umask, dev);
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_mknod_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mknod,
- loc, mode, dev, params);
+ loc, mode, dev, umask, xdata);
return 0;
}
-
int
trace_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dict_t *params)
+ mode_t umask, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_MKDIR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s mode=%d",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, mode);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s mode=%d"
+ " umask=0%o", frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ mode, umask);
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_mkdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mkdir,
- loc, mode, params);
+ loc, mode, umask, xdata);
return 0;
}
-
int
-trace_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
+trace_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_UNLINK].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s flag=%d",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ xflag);
+
frame->local = loc->inode->gfid;
- }
+ LOG_ELEMENT (conf, string);
+ }
+out:
STACK_WIND (frame, trace_unlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->unlink,
- loc);
+ loc, xflag, xdata);
return 0;
}
-
int
-trace_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)
+trace_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_RMDIR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s flags=%d",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, flags);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s flags=%d",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ flags);
+
frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_rmdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rmdir,
- loc, flags);
+ loc, flags, xdata);
return 0;
}
-
int
trace_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
- loc_t *loc, dict_t *params)
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_SYMLINK].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s linkpath=%s, path=%s",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- linkpath, loc->path);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s linkpath=%s, path=%s"
+ " umask=0%o", frame->root->unique,
+ uuid_utoa (loc->inode->gfid), linkpath,
+ loc->path, umask);
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_symlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->symlink,
- linkpath, loc, params);
+ linkpath, loc, umask, xdata);
return 0;
}
-
int
-trace_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
+trace_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
- char oldgfid[50] = {0,};
- char newgfid[50] = {0,};
+ char oldgfid[50] = {0,};
+ char newgfid[50] = {0,};
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_RENAME].enabled) {
+ char string[4096] = {0,};
if (newloc->inode)
uuid_utoa_r (newloc->inode->gfid, newgfid);
else
@@ -1608,29 +1930,40 @@ trace_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
uuid_utoa_r (oldloc->inode->gfid, oldgfid);
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": oldgfid=%s oldpath=%s --> newgfid=%s newpath=%s",
- frame->root->unique, oldgfid, oldloc->path, newgfid, newloc->path);
+ snprintf (string, sizeof (string),
+ "%"PRId64": oldgfid=%s oldpath=%s --> "
+ "newgfid=%s newpath=%s",
+ frame->root->unique, oldgfid,
+ oldloc->path, newgfid, newloc->path);
frame->local = oldloc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_rename_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rename,
- oldloc, newloc);
+ oldloc, newloc, xdata);
return 0;
}
-
int
-trace_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
+trace_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
- char oldgfid[50] = {0,};
- char newgfid[50] = {0,};
+ char oldgfid[50] = {0,};
+ char newgfid[50] = {0,};
+ trace_conf_t *conf = NULL;
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_LINK].enabled) {
+ char string[4096] = {0,};
if (newloc->inode)
uuid_utoa_r (newloc->inode->gfid, newgfid);
else
@@ -1638,42 +1971,63 @@ trace_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
uuid_utoa_r (oldloc->inode->gfid, oldgfid);
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": oldgfid=%s oldpath=%s --> newgfid=%s newpath=%s",
- frame->root->unique, oldgfid, oldloc->path,
- newgfid, newloc->path);
+ snprintf (string, sizeof (string),
+ "%"PRId64": oldgfid=%s oldpath=%s --> "
+ "newgfid=%s newpath=%s", frame->root->unique,
+ oldgfid, oldloc->path, newgfid,
+ newloc->path);
+
frame->local = oldloc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_link_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->link,
- oldloc, newloc);
+ oldloc, newloc, xdata);
return 0;
}
-
int
trace_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid)
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- uint64_t ia_time = 0;
- char actime_str[256] = {0,};
- char modtime_str[256] = {0,};
+ uint64_t ia_time = 0;
+ char actime_str[256] = {0,};
+ char modtime_str[256] = {0,};
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_SETATTR].enabled) {
+ char string[4096] = {0,};
if (valid & GF_SET_ATTR_MODE) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s mode=%o)",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type));
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s mode=%o)",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid),
+ loc->path,
+ st_mode_from_ia (stbuf->ia_prot,
+ stbuf->ia_type));
+
+ LOG_ELEMENT (conf, string);
+ memset (string, 0 , sizeof (string));
}
if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s uid=%o, gid=%o",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, stbuf->ia_uid, stbuf->ia_gid);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s uid=%o,"
+ " gid=%o", frame->root->unique,
+ uuid_utoa (loc->inode->gfid),
+ loc->path, stbuf->ia_uid,
+ stbuf->ia_gid);
+
+ LOG_ELEMENT (conf, string);
+ memset (string, 0 , sizeof (string));
}
if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) {
@@ -1685,44 +2039,64 @@ trace_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
strftime (modtime_str, 256, "[%b %d %H:%M:%S]",
localtime ((time_t *)&ia_time));
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s ia_atime=%s, ia_mtime=%s",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, actime_str, modtime_str);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s "
+ "ia_atime=%s, ia_mtime=%s",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid),
+ loc->path, actime_str, modtime_str);
+
+ LOG_ELEMENT (conf, string);
+ memset (string, 0 , sizeof (string));
}
frame->local = loc->inode->gfid;
}
+out:
STACK_WIND (frame, trace_setattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->setattr,
- loc, stbuf, valid);
+ loc, stbuf, valid, xdata);
return 0;
}
-
int
trace_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid)
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- uint64_t ia_time = 0;
- char actime_str[256] = {0,};
- char modtime_str[256] = {0,};
+ uint64_t ia_time = 0;
+ char actime_str[256] = {0,};
+ char modtime_str[256] = {0,};
+ trace_conf_t *conf = NULL;
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FSETATTR].enabled) {
+ char string[4096] = {0,};
if (valid & GF_SET_ATTR_MODE) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p, mode=%o",
- frame->root->unique, uuid_utoa (fd->inode->gfid), fd,
- st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type));
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, mode=%o",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd,
+ st_mode_from_ia (stbuf->ia_prot,
+ stbuf->ia_type));
+
+ LOG_ELEMENT (conf, string);
+ memset (string, 0, sizeof (string));
}
if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p, uid=%o, gid=%o",
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- fd, stbuf->ia_uid, stbuf->ia_gid);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, uid=%o, "
+ "gid=%o", frame->root->unique,
+ uuid_utoa (fd->inode->gfid),
+ fd, stbuf->ia_uid, stbuf->ia_gid);
+
+ LOG_ELEMENT (conf, string);
+ memset (string, 0, sizeof (string));
}
if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) {
@@ -1734,353 +2108,552 @@ trace_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
strftime (modtime_str, 256, "[%b %d %H:%M:%S]",
localtime ((time_t *)&ia_time));
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p ia_atime=%s, ia_mtime=%s",
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- fd, actime_str, modtime_str);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p "
+ "ia_atime=%s, ia_mtime=%s",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid),
+ fd, actime_str, modtime_str);
+
+ LOG_ELEMENT (conf, string);
+ memset (string, 0, sizeof (string));
}
frame->local = fd->inode->gfid;
}
+out:
STACK_WIND (frame, trace_fsetattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsetattr,
- fd, stbuf, valid);
+ fd, stbuf, valid, xdata);
return 0;
}
-
int
trace_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc,
- off_t offset)
+ off_t offset, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_TRUNCATE].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s, offset=%"PRId64"",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, offset);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s, "
+ "offset=%"PRId64"", frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ offset);
+
frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_truncate_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->truncate,
- loc, offset);
+ loc, offset, xdata);
return 0;
}
-
int
trace_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, fd_t *fd, int32_t wbflags)
+ int32_t flags, fd_t *fd, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_OPEN].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s flags=%d fd=%p wbflags=%d",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, flags, fd, wbflags);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s flags=%d fd=%p",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ flags, fd);
+
frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_open_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->open,
- loc, flags, fd, wbflags);
+ loc, flags, fd, xdata);
return 0;
}
-
int
trace_create (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, mode_t mode, fd_t *fd, dict_t *params)
+ int32_t flags, mode_t mode, mode_t umask, fd_t *fd,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_CREATE].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s, fd=%p, flags=0%o mode=0%o",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, fd, flags, mode);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s, fd=%p, "
+ "flags=0%o mode=0%o umask=0%o",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ fd, flags, mode, umask);
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_create_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->create,
- loc, flags, mode, fd, params);
+ loc, flags, mode, umask, fd, xdata);
return 0;
}
-
int
trace_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t offset)
+ size_t size, off_t offset, uint32_t flags, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_READ].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p, size=%"GF_PRI_SIZET", offset=%"PRId64")",
- frame->root->unique, uuid_utoa (fd->inode->gfid), fd, size, offset);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, size=%"
+ GF_PRI_SIZET"offset=%"PRId64" flags=0%x)",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, size,
+ offset, flags);
+
frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_readv_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readv,
- fd, size, offset);
+ fd, size, offset, flags, xdata);
return 0;
}
-
int
trace_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iovec *vector, int32_t count,
- off_t offset, struct iobref *iobref)
+ off_t offset, uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_WRITE].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p, count=%d, offset=%"PRId64")",
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- fd, count, offset);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, count=%d, "
+ " offset=%"PRId64" flags=0%x)",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, count,
+ offset, flags);
+
frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_writev_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->writev,
- fd, vector, count, offset, iobref);
+ fd, vector, count, offset, flags, iobref, xdata);
return 0;
}
-
int
-trace_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
+trace_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_STATFS].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s",
- frame->root->unique, (loc->inode)?
- uuid_utoa (loc->inode->gfid):"0", loc->path);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s",
+ frame->root->unique, (loc->inode)?
+ uuid_utoa (loc->inode->gfid):"0", loc->path);
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_statfs_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->statfs,
- loc);
+ loc, xdata);
return 0;
}
-
int
-trace_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
+trace_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FLUSH].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p",
- frame->root->unique, uuid_utoa (fd->inode->gfid), fd);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd);
+
frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_flush_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->flush,
- fd);
+ fd, xdata);
return 0;
}
-
int
-trace_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
+trace_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FSYNC].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s flags=%d fd=%p",
- frame->root->unique, uuid_utoa (fd->inode->gfid), flags, fd);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s flags=%d fd=%p",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), flags, fd);
+
frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_fsync_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsync,
- fd, flags);
+ fd, flags, xdata);
return 0;
}
-
int
trace_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *dict, int32_t flags)
+ loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_SETXATTR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s flags=%d",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, flags);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s flags=%d",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ flags);
+
frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_setxattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->setxattr,
- loc, dict, flags);
+ loc, dict, flags, xdata);
return 0;
}
-
int
trace_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name)
+ loc_t *loc, const char *name, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_GETXATTR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s name=%s",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, name);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s name=%s",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ name);
+
frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_getxattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->getxattr,
- loc, name);
+ loc, name, xdata);
return 0;
}
-
int
trace_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name)
+ loc_t *loc, const char *name, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_REMOVEXATTR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s name=%s",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, name);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s name=%s",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ name);
+
frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_removexattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->removexattr,
- loc, name);
+ loc, name, xdata);
return 0;
}
-
int
-trace_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
+trace_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_OPENDIR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s fd=%p",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, fd);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s fd=%p",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path, fd);
+
frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_opendir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->opendir,
- loc, fd);
+ loc, fd, xdata);
return 0;
}
int
trace_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
+ off_t offset, dict_t *dict)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_READDIRP].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p, size=%"GF_PRI_SIZET", offset=%"PRId64,
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- fd, size, offset);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, size=%"GF_PRI_SIZET
+ ", offset=%"PRId64" dict=%p",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, size,
+ offset, dict);
+
frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_readdirp_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readdirp,
- fd, size, offset);
+ fd, size, offset, dict);
return 0;
}
-
int
trace_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t offset)
+ size_t size, off_t offset, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_READDIR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p, size=%"GF_PRI_SIZET", offset=%"PRId64,
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- fd, size, offset);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, size=%"GF_PRI_SIZET
+ ", offset=%"PRId64,
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, size,
+ offset);
+
frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_readdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readdir,
- fd, size, offset);
+ fd, size, offset, xdata);
return 0;
}
-
int
trace_fsyncdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t datasync)
+ fd_t *fd, int32_t datasync, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FSYNCDIR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s datasync=%d fd=%p",
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- datasync, fd);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s datasync=%d fd=%p",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), datasync, fd);
+
frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_fsyncdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsyncdir,
- fd, datasync);
+ fd, datasync, xdata);
return 0;
}
-
int
-trace_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask)
+trace_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_ACCESS].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s mask=0%o",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, mask);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s mask=0%o",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid),
+ loc->path, mask);
+
frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_access_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->access,
- loc, mask);
+ loc, mask, xdata);
return 0;
}
-
int32_t
trace_rchecksum (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- int32_t len)
+ int32_t len, dict_t *xdata)
{
+
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_RCHECKSUM].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s offset=%"PRId64" len=%u fd=%p",
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- offset, len, fd);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s offset=%"PRId64
+ "len=%u fd=%p", frame->root->unique,
+ uuid_utoa (fd->inode->gfid), offset, len, fd);
+
frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_rchecksum_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rchecksum,
- fd, offset, len);
+ fd, offset, len, xdata);
return 0;
@@ -2089,162 +2662,264 @@ trace_rchecksum (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
int32_t
trace_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume,
fd_t *fd, const char *basename, entrylk_cmd cmd,
- entrylk_type type)
+ entrylk_type type, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FENTRYLK].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s volume=%s, (fd=%p basename=%s, "
- "cmd=%s, type=%s)",
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- volume, fd, basename,
- ((cmd == ENTRYLK_LOCK) ? "ENTRYLK_LOCK" : "ENTRYLK_UNLOCK"),
- ((type == ENTRYLK_RDLCK) ? "ENTRYLK_RDLCK" : "ENTRYLK_WRLCK"));
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s volume=%s, (fd=%p "
+ "basename=%s, cmd=%s, type=%s)",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), volume, fd,
+ basename,
+ ((cmd == ENTRYLK_LOCK) ? "ENTRYLK_LOCK" :
+ "ENTRYLK_UNLOCK"),
+ ((type == ENTRYLK_RDLCK) ? "ENTRYLK_RDLCK" :
+ "ENTRYLK_WRLCK"));
+
frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_fentrylk_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->fentrylk,
- volume, fd, basename, cmd, type);
+ volume, fd, basename, cmd, type, xdata);
return 0;
}
int32_t
trace_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name)
+ const char *name, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FGETXATTR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p name=%s",
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- fd, name);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p name=%s",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, name);
+
frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_fgetxattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fgetxattr,
- fd, name);
+ fd, name, xdata);
return 0;
}
int32_t
trace_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- dict_t *dict, int32_t flags)
+ dict_t *dict, int32_t flags, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FSETXATTR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p flags=%d",
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- fd, flags);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p flags=%d",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, flags);
+
frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_fsetxattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsetxattr,
- fd, dict, flags);
+ fd, dict, flags, xdata);
return 0;
}
int
trace_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset)
+ fd_t *fd, off_t offset, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FTRUNCATE].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s offset=%"PRId64" fd=%p",
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- offset, fd);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s offset=%"PRId64" fd=%p",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), offset, fd);
+
frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_ftruncate_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->ftruncate,
- fd, offset);
+ fd, offset, xdata);
return 0;
}
-
int
-trace_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd)
+trace_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_FSTAT].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p",
- frame->root->unique, uuid_utoa (fd->inode->gfid), fd);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd);
+
frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_fstat_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fstat,
- fd);
+ fd, xdata);
return 0;
}
-
int
trace_lk (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t cmd, struct gf_flock *lock)
+ int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_LK].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p, cmd=%d, lock {l_type=%d, l_whence=%d, "
- "l_start=%"PRId64", l_len=%"PRId64", l_pid=%u})",
- frame->root->unique, uuid_utoa (fd->inode->gfid), fd,
- cmd, lock->l_type, lock->l_whence,
- lock->l_start, lock->l_len, lock->l_pid);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, cmd=%d, "
+ "lock {l_type=%d, "
+ "l_whence=%d, l_start=%"PRId64", "
+ "l_len=%"PRId64", l_pid=%u})",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, cmd,
+ lock->l_type, lock->l_whence,
+ lock->l_start, lock->l_len, lock->l_pid);
+
frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
}
+out:
STACK_WIND (frame, trace_lk_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lk,
- fd, cmd, lock);
+ fd, cmd, lock, xdata);
return 0;
}
int32_t
trace_forget (xlator_t *this, inode_t *inode)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
/* If user want to understand when a lookup happens,
he should know about 'forget' too */
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_LOOKUP].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "gfid=%s", uuid_utoa (inode->gfid));
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "gfid=%s", uuid_utoa (inode->gfid));
+
+ LOG_ELEMENT (conf, string);
}
+
+out:
return 0;
}
-
int32_t
trace_releasedir (xlator_t *this, fd_t *fd)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_OPENDIR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "gfid=%s fd=%p", uuid_utoa (fd->inode->gfid), fd);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "gfid=%s fd=%p",
+ uuid_utoa (fd->inode->gfid), fd);
+
+ LOG_ELEMENT (conf, string);
}
+out:
return 0;
}
int32_t
trace_release (xlator_t *this, fd_t *fd)
{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
if (trace_fop_names[GF_FOP_OPEN].enabled ||
trace_fop_names[GF_FOP_CREATE].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "gfid=%s fd=%p", uuid_utoa (fd->inode->gfid), fd);
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "gfid=%s fd=%p",
+ uuid_utoa (fd->inode->gfid), fd);
+
+ LOG_ELEMENT (conf, string);
}
+
+out:
return 0;
}
-
void
enable_all_calls (int enabled)
{
@@ -2254,7 +2929,6 @@ enable_all_calls (int enabled)
trace_fop_names[i].enabled = enabled;
}
-
void
enable_call (const char *name, int enabled)
{
@@ -2282,6 +2956,105 @@ process_call_list (const char *list, int include)
}
}
+int32_t
+trace_dump_history (xlator_t *this)
+{
+ int ret = -1;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0,};
+ trace_conf_t *conf = NULL;
+
+ GF_VALIDATE_OR_GOTO ("trace", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->history, out);
+
+ conf = this->private;
+ // Is it ok to return silently if log-history option his off?
+ if (conf && conf->log_history == _gf_true) {
+ gf_proc_dump_build_key (key_prefix, "xlator.debug.trace",
+ "history");
+ gf_proc_dump_add_section (key_prefix);
+ eh_dump (this->history, NULL, dump_history_trace);
+ }
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_trace_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
+ " failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ int32_t ret = -1;
+ trace_conf_t *conf = NULL;
+ char *includes = NULL, *excludes = NULL;
+
+ GF_VALIDATE_OR_GOTO ("quick-read", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO (this->name, options, out);
+
+ conf = this->private;
+
+ includes = data_to_str (dict_get (options, "include-ops"));
+ excludes = data_to_str (dict_get (options, "exclude-ops"));
+
+ {
+ int i;
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ if (gf_fop_list[i])
+ strncpy (trace_fop_names[i].name,
+ gf_fop_list[i],
+ strlen (gf_fop_list[i]));
+ else
+ strncpy (trace_fop_names[i].name, ":O",
+ strlen (":O"));
+ trace_fop_names[i].enabled = 1;
+ }
+ }
+
+ if (includes && excludes) {
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "must specify only one of 'include-ops' and "
+ "'exclude-ops'");
+ goto out;
+ }
+
+ if (includes)
+ process_call_list (includes, 1);
+ if (excludes)
+ process_call_list (excludes, 0);
+
+ /* Should resizing of the event-history be allowed in reconfigure?
+ * for which a new event_history might have to be allocated and the
+ * older history has to be freed.
+ */
+ GF_OPTION_RECONF ("log-file", conf->log_file, options, bool, out);
+
+ GF_OPTION_RECONF ("log-history", conf->log_history, options, bool, out);
+
+ ret = 0;
+
+out:
+ return ret;
+}
int32_t
init (xlator_t *this)
@@ -2289,6 +3062,10 @@ init (xlator_t *this)
dict_t *options = NULL;
char *includes = NULL, *excludes = NULL;
char *forced_loglevel = NULL;
+ eh_t *history = NULL;
+ int ret = -1;
+ size_t history_size = TRACE_DEFAULT_HISTORY_SIZE;
+ trace_conf_t *conf = NULL;
if (!this)
return -1;
@@ -2303,6 +3080,12 @@ init (xlator_t *this)
"dangling volume. check volfile ");
}
+ conf = GF_CALLOC (1, sizeof (trace_conf_t), gf_trace_mt_trace_conf_t);
+ if (!conf) {
+ gf_log (this->name, GF_LOG_ERROR, "cannot allocate "
+ "xl->private");
+ return -1;
+ }
options = this->options;
includes = data_to_str (dict_get (options, "include-ops"));
@@ -2311,8 +3094,13 @@ init (xlator_t *this)
{
int i;
for (i = 0; i < GF_FOP_MAXVALUE; i++) {
- trace_fop_names[i].name = (gf_fop_list[i] ?
- gf_fop_list[i] : ":O");
+ if (gf_fop_list[i])
+ strncpy (trace_fop_names[i].name,
+ gf_fop_list[i],
+ strlen (gf_fop_list[i]));
+ else
+ strncpy (trace_fop_names[i].name, ":O",
+ strlen (":O"));
trace_fop_names[i].enabled = 1;
}
}
@@ -2320,14 +3108,43 @@ init (xlator_t *this)
if (includes && excludes) {
gf_log (this->name,
GF_LOG_ERROR,
- "must specify only one of 'include-ops' and 'exclude-ops'");
+ "must specify only one of 'include-ops' and "
+ "'exclude-ops'");
return -1;
}
+
if (includes)
process_call_list (includes, 1);
if (excludes)
process_call_list (excludes, 0);
+
+ GF_OPTION_INIT ("history-size", conf->history_size, size, out);
+
+ gf_log (this->name, GF_LOG_INFO, "history size %"GF_PRI_SIZET,
+ history_size);
+
+ GF_OPTION_INIT ("log-file", conf->log_file, bool, out);
+
+ gf_log (this->name, GF_LOG_INFO, "logging to file %s",
+ (conf->log_file == _gf_true)?"enabled":"disabled");
+
+ GF_OPTION_INIT ("log-history", conf->log_history, bool, out);
+
+ gf_log (this->name, GF_LOG_DEBUG, "logging to history %s",
+ (conf->log_history == _gf_true)?"enabled":"disabled");
+
+ history = eh_new (history_size, _gf_false, NULL);
+ if (!history) {
+ gf_log (this->name, GF_LOG_ERROR, "event history cannot be "
+ "initialized");
+ return -1;
+ }
+
+ this->history = history;
+
+ conf->trace_log_level = GF_LOG_INFO;
+
if (dict_get (options, "force-log-level")) {
forced_loglevel = data_to_str (dict_get (options,
"force-log-level"));
@@ -2335,25 +3152,34 @@ init (xlator_t *this)
goto setloglevel;
if (strcmp (forced_loglevel, "INFO") == 0)
- trace_log_level = GF_LOG_INFO;
+ conf->trace_log_level = GF_LOG_INFO;
else if (strcmp (forced_loglevel, "TRACE") == 0)
- trace_log_level = GF_LOG_TRACE;
+ conf->trace_log_level = GF_LOG_TRACE;
else if (strcmp (forced_loglevel, "ERROR") == 0)
- trace_log_level = GF_LOG_ERROR;
+ conf->trace_log_level = GF_LOG_ERROR;
else if (strcmp (forced_loglevel, "DEBUG") == 0)
- trace_log_level = GF_LOG_DEBUG;
+ conf->trace_log_level = GF_LOG_DEBUG;
else if (strcmp (forced_loglevel, "WARNING") == 0)
- trace_log_level = GF_LOG_WARNING;
+ conf->trace_log_level = GF_LOG_WARNING;
else if (strcmp (forced_loglevel, "CRITICAL") == 0)
- trace_log_level = GF_LOG_CRITICAL;
+ conf->trace_log_level = GF_LOG_CRITICAL;
else if (strcmp (forced_loglevel, "NONE") == 0)
- trace_log_level = GF_LOG_NONE;
+ conf->trace_log_level = GF_LOG_NONE;
}
setloglevel:
- gf_log_set_loglevel (trace_log_level);
+ gf_log_set_loglevel (conf->trace_log_level);
+ this->private = conf;
+ ret = 0;
+out:
+ if (ret == -1) {
+ if (history)
+ GF_FREE (history);
+ if (conf)
+ GF_FREE (conf);
+ }
- return 0;
+ return ret;
}
void
@@ -2362,6 +3188,9 @@ fini (xlator_t *this)
if (!this)
return;
+ if (this->history)
+ eh_destroy (this->history);
+
gf_log (this->name, GF_LOG_INFO,
"trace translator unloaded");
return;
@@ -2410,7 +3239,6 @@ struct xlator_fops fops = {
.fsetattr = trace_fsetattr,
};
-
struct xlator_cbks cbks = {
.release = trace_release,
.releasedir = trace_releasedir,
@@ -2426,5 +3254,21 @@ struct volume_options options[] = {
.type = GF_OPTION_TYPE_STR
/*.value = { ""} */
},
+ { .key = {"history-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .default_value = "1024",
+ },
+ { .key = {"log-file"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ },
+ { .key = {"log-history"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ },
{ .key = {NULL} },
};
+
+struct xlator_dumpops dumpops = {
+ .history = trace_dump_history
+};
diff --git a/xlators/debug/trace/src/trace.h b/xlators/debug/trace/src/trace.h
new file mode 100644
index 000000000..045eefb36
--- /dev/null
+++ b/xlators/debug/trace/src/trace.h
@@ -0,0 +1,98 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <time.h>
+#include <errno.h>
+#include "glusterfs.h"
+#include "xlator.h"
+#include "common-utils.h"
+#include "event-history.h"
+#include "logging.h"
+#include "circ-buff.h"
+#include "statedump.h"
+#include "options.h"
+
+#define TRACE_DEFAULT_HISTORY_SIZE 1024
+
+typedef struct {
+ /* Since the longest fop name is fremovexattr i.e 12 characters, array size
+ * is kept 24, i.e double of the maximum.
+ */
+ char name[24];
+ int enabled;
+} trace_fop_name_t;
+
+trace_fop_name_t trace_fop_names[GF_FOP_MAXVALUE];
+
+typedef struct {
+ gf_boolean_t log_file;
+ gf_boolean_t log_history;
+ size_t history_size;
+ int trace_log_level;
+} trace_conf_t;
+
+#define TRACE_STACK_UNWIND(op, frame, params ...) \
+ do { \
+ frame->local = NULL; \
+ STACK_UNWIND_STRICT (op, frame, params); \
+ } while (0);
+
+#define LOG_ELEMENT(_conf, _string) \
+ do { \
+ if (_conf) { \
+ if ((_conf->log_history) == _gf_true) \
+ gf_log_eh ("%s", _string); \
+ if ((_conf->log_file) == _gf_true) \
+ gf_log (THIS->name, _conf->trace_log_level, \
+ "%s", _string); \
+ } \
+ } while (0);
+
+#define trace_stat_to_str(buf, statstr) \
+ do { \
+ char atime_buf[256] = {0,}; \
+ char mtime_buf[256] = {0,}; \
+ char ctime_buf[256] = {0,}; \
+ uint64_t ia_time = 0; \
+ \
+ if (!buf) \
+ break; \
+ \
+ ia_time = buf->ia_atime; \
+ strftime (atime_buf, 256, "[%b %d %H:%M:%S]", \
+ localtime ((time_t *)&ia_time)); \
+ \
+ ia_time = buf->ia_mtime; \
+ strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", \
+ localtime ((time_t *)&ia_time)); \
+ \
+ ia_time = buf->ia_ctime; \
+ strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", \
+ localtime ((time_t *)&ia_time)); \
+ \
+ snprintf (statstr, sizeof (statstr), \
+ "gfid=%s ino=%"PRIu64", mode=%o, " \
+ "nlink=%"GF_PRI_NLINK", uid=%u, " \
+ "gid=%u, size=%"PRIu64", " \
+ "blocks=%"PRIu64", atime=%s, " \
+ "mtime=%s, ctime=%s", \
+ uuid_utoa (buf->ia_gfid), buf->ia_ino, \
+ st_mode_from_ia (buf->ia_prot, \
+ buf->ia_type), \
+ buf->ia_nlink, buf->ia_uid, \
+ buf->ia_gid, buf->ia_size, \
+ buf->ia_blocks, atime_buf, \
+ mtime_buf, ctime_buf); \
+ } while (0);
diff --git a/xlators/encryption/Makefile.am b/xlators/encryption/Makefile.am
index 2cbde680f..36efc6698 100644
--- a/xlators/encryption/Makefile.am
+++ b/xlators/encryption/Makefile.am
@@ -1,3 +1,3 @@
-SUBDIRS = rot-13
+SUBDIRS = rot-13 crypt
CLEANFILES =
diff --git a/xlators/cluster/unify/Makefile.am b/xlators/encryption/crypt/Makefile.am
index d471a3f92..d471a3f92 100644
--- a/xlators/cluster/unify/Makefile.am
+++ b/xlators/encryption/crypt/Makefile.am
diff --git a/xlators/encryption/crypt/src/Makefile.am b/xlators/encryption/crypt/src/Makefile.am
new file mode 100644
index 000000000..b13f65043
--- /dev/null
+++ b/xlators/encryption/crypt/src/Makefile.am
@@ -0,0 +1,24 @@
+if ENABLE_CRYPT_XLATOR
+
+xlator_LTLIBRARIES = crypt.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/encryption
+
+crypt_la_LDFLAGS = -module -avoid-version -lssl -lcrypto
+
+crypt_la_SOURCES = keys.c data.c metadata.c atom.c crypt.c
+crypt_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = crypt-common.h crypt-mem-types.h crypt.h metadata.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
+
+else
+
+noinst_DIST = keys.c data.c metadata.c atom.c crypt.c
+noinst_HEADERS = crypt-common.h crypt-mem-types.h crypt.h metadata.h
+
+endif \ No newline at end of file
diff --git a/xlators/encryption/crypt/src/atom.c b/xlators/encryption/crypt/src/atom.c
new file mode 100644
index 000000000..1ec41495c
--- /dev/null
+++ b/xlators/encryption/crypt/src/atom.c
@@ -0,0 +1,962 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "defaults.h"
+#include "crypt-common.h"
+#include "crypt.h"
+
+/*
+ * Glossary
+ *
+ *
+ * cblock (or cipher block). A logical unit in a file.
+ * cblock size is defined as the number of bits
+ * in an input (or output) block of the block
+ * cipher (*). Cipher block size is a property of
+ * cipher algorithm. E.g. cblock size is 64 bits
+ * for DES, 128 bits for AES, etc.
+ *
+ * atomic cipher A cipher algorithm, which requires some chunks of
+ * algorithm text to be padded at left and(or) right sides before
+ * cipher transaform.
+ *
+ *
+ * block (atom) Minimal chunk of file's data, which doesn't require
+ * padding. We'll consider logical units in a file of
+ * block size (atom size).
+ *
+ * cipher algorithm Atomic cipher algorithm, which requires the last
+ * with EOF issue incomplete cblock in a file to be padded with some
+ * data (usually zeros).
+ *
+ *
+ * operation, which reading/writing from offset, which is not aligned to
+ * forms a gap at to atom size
+ * the beginning
+ *
+ *
+ * operation, which reading/writing count bytes starting from offset off,
+ * forms a gap at so that off+count is not aligned to atom_size
+ * the end
+ *
+ * head block the first atom affected by an operation, which forms
+ * a gap at the beginning, or(and) at the end.
+ * Сomment. Head block has at least one gap (either at
+ * the beginning, or at the end)
+ *
+ *
+ * tail block the last atom different from head, affected by an
+ * operation, which forms a gap at the end.
+ * Сomment: Tail block has exactly one gap (at the end).
+ *
+ *
+ * partial block head or tail block
+ *
+ *
+ * full block block without gaps.
+ *
+ *
+ * (*) Recommendation for Block Cipher Modes of Operation
+ * Methods and Techniques
+ * NIST Special Publication 800-38A Edition 2001
+ */
+
+/*
+ * atom->offset_at()
+ */
+static off_t offset_at_head(struct avec_config *conf)
+{
+ return conf->aligned_offset;
+}
+
+static off_t offset_at_hole_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_at_head(get_hole_conf(frame));
+}
+
+static off_t offset_at_data_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_at_head(get_data_conf(frame));
+}
+
+
+static off_t offset_at_tail(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ return conf->aligned_offset +
+ (conf->off_in_head ? get_atom_size(object) : 0) +
+ (conf->nr_full_blocks << get_atom_bits(object));
+}
+
+static off_t offset_at_hole_tail(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_at_tail(get_hole_conf(frame), object);
+}
+
+
+static off_t offset_at_data_tail(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_at_tail(get_data_conf(frame), object);
+}
+
+static off_t offset_at_full(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ return conf->aligned_offset +
+ (conf->off_in_head ? get_atom_size(object) : 0);
+}
+
+static off_t offset_at_data_full(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_at_full(get_data_conf(frame), object);
+}
+
+static off_t offset_at_hole_full(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_at_full(get_hole_conf(frame), object);
+}
+
+/*
+ * atom->io_size_nopad()
+ */
+
+static uint32_t io_size_nopad_head(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ uint32_t gap_at_beg;
+ uint32_t gap_at_end;
+
+ check_head_block(conf);
+
+ gap_at_beg = conf->off_in_head;
+
+ if (has_tail_block(conf) || has_full_blocks(conf) || conf->off_in_tail == 0 )
+ gap_at_end = 0;
+ else
+ gap_at_end = get_atom_size(object) - conf->off_in_tail;
+
+ return get_atom_size(object) - (gap_at_beg + gap_at_end);
+}
+
+static uint32_t io_size_nopad_tail(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ check_tail_block(conf);
+ return conf->off_in_tail;
+}
+
+static uint32_t io_size_nopad_full(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ check_full_block(conf);
+ return get_atom_size(object);
+}
+
+static uint32_t io_size_nopad_data_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return io_size_nopad_head(get_data_conf(frame), object);
+}
+
+static uint32_t io_size_nopad_hole_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return io_size_nopad_head(get_hole_conf(frame), object);
+}
+
+static uint32_t io_size_nopad_data_tail(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return io_size_nopad_tail(get_data_conf(frame), object);
+}
+
+static uint32_t io_size_nopad_hole_tail(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return io_size_nopad_tail(get_hole_conf(frame), object);
+}
+
+static uint32_t io_size_nopad_data_full(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return io_size_nopad_full(get_data_conf(frame), object);
+}
+
+static uint32_t io_size_nopad_hole_full(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return io_size_nopad_full(get_hole_conf(frame), object);
+}
+
+static uint32_t offset_in_head(struct avec_config *conf)
+{
+ check_cursor_head(conf);
+
+ return conf->off_in_head;
+}
+
+static uint32_t offset_in_tail(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return 0;
+}
+
+static uint32_t offset_in_full(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ check_cursor_full(conf);
+
+ if (has_head_block(conf))
+ return (conf->cursor - 1) << get_atom_bits(object);
+ else
+ return conf->cursor << get_atom_bits(object);
+}
+
+static uint32_t offset_in_data_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_in_head(get_data_conf(frame));
+}
+
+static uint32_t offset_in_hole_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_in_head(get_hole_conf(frame));
+}
+
+static uint32_t offset_in_data_full(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_in_full(get_data_conf(frame), object);
+}
+
+static uint32_t offset_in_hole_full(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_in_full(get_hole_conf(frame), object);
+}
+
+/*
+ * atom->rmw()
+ */
+/*
+ * Pre-conditions:
+ * @vec contains plain text of the latest
+ * version.
+ *
+ * Uptodate gaps of the @partial block with
+ * this plain text, encrypt the whole block
+ * and write the result to disk.
+ */
+static int32_t rmw_partial_block(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ struct rmw_atom *atom)
+{
+ size_t was_read = 0;
+ uint64_t file_size;
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = &local->info->cinfo;
+
+ struct iovec *partial = atom->get_iovec(frame, 0);
+ struct avec_config *conf = atom->get_config(frame);
+ end_writeback_handler_t end_writeback_partial_block;
+#if DEBUG_CRYPT
+ gf_boolean_t check_last_cblock = _gf_false;
+#endif
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret < 0)
+ goto exit;
+
+ file_size = local->cur_file_size;
+ was_read = op_ret;
+
+ if (atom->locality == HEAD_ATOM && conf->off_in_head) {
+ /*
+ * head atom with a non-uptodate gap
+ * at the beginning
+ *
+ * fill the gap with plain text of the
+ * latest version. Convert a part of hole
+ * (if any) to zeros.
+ */
+ int32_t i;
+ int32_t copied = 0;
+ int32_t to_gap; /* amount of data needed to uptodate
+ the gap at the beginning */
+#if 0
+ int32_t hole = 0; /* The part of the hole which
+ * got in the head block */
+#endif /* 0 */
+ to_gap = conf->off_in_head;
+
+ if (was_read < to_gap) {
+ if (file_size >
+ offset_at_head(conf) + was_read) {
+ /*
+ * It is impossible to uptodate
+ * head block: too few bytes have
+ * been read from disk, so that
+ * partial write is impossible.
+ *
+ * It could happen because of many
+ * reasons: IO errors, (meta)data
+ * corruption in the local file system,
+ * etc.
+ */
+ gf_log(this->name, GF_LOG_WARNING,
+ "Can not uptodate a gap at the beginning");
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto exit;
+ }
+#if 0
+ hole = to_gap - was_read;
+#endif /* 0 */
+ to_gap = was_read;
+ }
+ /*
+ * uptodate the gap at the beginning
+ */
+ for (i = 0; i < count && copied < to_gap; i++) {
+ int32_t to_copy;
+
+ to_copy = vec[i].iov_len;
+ if (to_copy > to_gap - copied)
+ to_copy = to_gap - copied;
+
+ memcpy(partial->iov_base, vec[i].iov_base, to_copy);
+ copied += to_copy;
+ }
+#if 0
+ /*
+ * If possible, convert part of the
+ * hole, which got in the head block
+ */
+ ret = TRY_LOCK(&local->hole_lock);
+ if (!ret) {
+ if (local->hole_handled)
+ /*
+ * already converted by
+ * crypt_writev_cbk()
+ */
+ UNLOCK(&local->hole_lock);
+ else {
+ /*
+ * convert the part of the hole
+ * which got in the head block
+ * to zeros.
+ *
+ * Update the orig_offset to make
+ * sure writev_cbk() won't care
+ * about this part of the hole.
+ *
+ */
+ memset(partial->iov_base + to_gap, 0, hole);
+
+ conf->orig_offset -= hole;
+ conf->orig_size += hole;
+ UNLOCK(&local->hole_lock);
+ }
+ }
+ else /*
+ * conversion is being performed
+ * by crypt_writev_cbk()
+ */
+ ;
+#endif /* 0 */
+ }
+ if (atom->locality == TAIL_ATOM ||
+ (!has_tail_block(conf) && conf->off_in_tail)) {
+ /*
+ * tail atom, or head atom with a non-uptodate
+ * gap at the end.
+ *
+ * fill the gap at the end of the block
+ * with plain text of the latest version.
+ * Pad the result, (if needed)
+ */
+ int32_t i;
+ int32_t to_gap;
+ int copied;
+ off_t off_in_tail;
+ int32_t to_copy;
+
+ off_in_tail = conf->off_in_tail;
+ to_gap = conf->gap_in_tail;
+
+ if (to_gap && was_read < off_in_tail + to_gap) {
+ /*
+ * It is impossible to uptodate
+ * the gap at the end: too few bytes
+ * have been read from disk, so that
+ * partial write is impossible.
+ *
+ * It could happen because of many
+ * reasons: IO errors, (meta)data
+ * corruption in the local file system,
+ * etc.
+ */
+ gf_log(this->name, GF_LOG_WARNING,
+ "Can not uptodate a gap at the end");
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto exit;
+ }
+ /*
+ * uptodate the gap at the end
+ */
+ copied = 0;
+ to_copy = to_gap;
+ for(i = count - 1; i >= 0 && to_copy > 0; i--) {
+ uint32_t from_vec, off_in_vec;
+
+ off_in_vec = 0;
+ from_vec = vec[i].iov_len;
+ if (from_vec > to_copy) {
+ off_in_vec = from_vec - to_copy;
+ from_vec = to_copy;
+ }
+ memcpy(partial->iov_base +
+ off_in_tail + to_gap - copied - from_vec,
+ vec[i].iov_base + off_in_vec,
+ from_vec);
+
+ gf_log(this->name, GF_LOG_DEBUG,
+ "uptodate %d bytes at tail. Offset at target(source): %d(%d)",
+ (int)from_vec,
+ (int)off_in_tail + to_gap - copied - from_vec,
+ (int)off_in_vec);
+
+ copied += from_vec;
+ to_copy -= from_vec;
+ }
+ partial->iov_len = off_in_tail + to_gap;
+
+ if (object_alg_should_pad(object)) {
+ int32_t resid = 0;
+ resid = partial->iov_len & (object_alg_blksize(object) - 1);
+ if (resid) {
+ /*
+ * append a new EOF padding
+ */
+ local->eof_padding_size =
+ object_alg_blksize(object) - resid;
+
+ gf_log(this->name, GF_LOG_DEBUG,
+ "set padding size %d",
+ local->eof_padding_size);
+
+ memset(partial->iov_base + partial->iov_len,
+ 1,
+ local->eof_padding_size);
+ partial->iov_len += local->eof_padding_size;
+#if DEBUG_CRYPT
+ gf_log(this->name, GF_LOG_DEBUG,
+ "pad cblock with %d zeros:",
+ local->eof_padding_size);
+ dump_cblock(this,
+ (unsigned char *)partial->iov_base +
+ partial->iov_len - object_alg_blksize(object));
+ check_last_cblock = _gf_true;
+#endif
+ }
+ }
+ }
+ /*
+ * encrypt the whole block
+ */
+ encrypt_aligned_iov(object,
+ partial,
+ 1,
+ atom->offset_at(frame, object));
+#if DEBUG_CRYPT
+ if (check_last_cblock == _gf_true) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "encrypt last cblock with offset %llu",
+ (unsigned long long)atom->offset_at(frame, object));
+ dump_cblock(this, (unsigned char *)partial->iov_base +
+ partial->iov_len - object_alg_blksize(object));
+ }
+#endif
+ set_local_io_params_writev(frame, object, atom,
+ atom->offset_at(frame, object),
+ iovec_get_size(partial, 1));
+ /*
+ * write the whole block to disk
+ */
+ end_writeback_partial_block = dispatch_end_writeback(local->fop);
+ conf->cursor ++;
+ STACK_WIND(frame,
+ end_writeback_partial_block,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev,
+ local->fd,
+ partial,
+ 1,
+ atom->offset_at(frame, object),
+ local->flags,
+ local->iobref_data,
+ local->xdata);
+
+ gf_log("crypt", GF_LOG_DEBUG,
+ "submit partial block: %d bytes from %d offset",
+ (int)iovec_get_size(partial, 1),
+ (int)atom->offset_at(frame, object));
+ exit:
+ return 0;
+}
+
+/*
+ * Perform a (read-)modify-write sequence.
+ * This should be performed only after approval
+ * of upper server-side manager, i.e. the caller
+ * needs to make sure this is his turn to rmw.
+ */
+void submit_partial(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ atom_locality_type ltype)
+{
+ int32_t ret;
+ dict_t *dict;
+ struct rmw_atom *atom;
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = &local->info->cinfo;
+
+ atom = atom_by_types(local->active_setup, ltype);
+ /*
+ * To perform the "read" component of the read-modify-write
+ * sequence the crypt translator does stack_wind to itself.
+ *
+ * Pass current file size to crypt_readv()
+ */
+ dict = dict_new();
+ if (!dict) {
+ /*
+ * FIXME: Handle the error
+ */
+ gf_log("crypt", GF_LOG_WARNING, "Can not alloc dict");
+ return;
+ }
+ ret = dict_set(dict,
+ FSIZE_XATTR_PREFIX,
+ data_from_uint64(local->cur_file_size));
+ if (ret) {
+ /*
+ * FIXME: Handle the error
+ */
+ dict_unref(dict);
+ gf_log("crypt", GF_LOG_WARNING, "Can not set dict");
+ goto exit;
+ }
+ STACK_WIND(frame,
+ atom->rmw,
+ this,
+ this->fops->readv, /* crypt_readv */
+ fd,
+ atom->count_to_uptodate(frame, object), /* count */
+ atom->offset_at(frame, object), /* offset to read from */
+ 0,
+ dict);
+ exit:
+ dict_unref(dict);
+}
+
+/*
+ * submit blocks of FULL_ATOM type
+ */
+void submit_full(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = &local->info->cinfo;
+ struct rmw_atom *atom = atom_by_types(local->active_setup, FULL_ATOM);
+ uint32_t count; /* total number of full blocks to submit */
+ uint32_t granularity; /* number of blocks to submit in one iteration */
+
+ uint64_t off_in_file; /* start offset in the file, bytes */
+ uint32_t off_in_atom; /* start offset in the atom, blocks */
+ uint32_t blocks_written = 0; /* blocks written for this submit */
+
+ struct avec_config *conf = atom->get_config(frame);
+ end_writeback_handler_t end_writeback_full_block;
+ /*
+ * Write full blocks by groups of granularity size.
+ */
+ end_writeback_full_block = dispatch_end_writeback(local->fop);
+
+ if (is_ordered_mode(frame)) {
+ uint32_t skip = has_head_block(conf) ? 1 : 0;
+ count = 1;
+ granularity = 1;
+ /*
+ * calculate start offset using cursor value;
+ * here we should take into accout head block,
+ * which corresponds to cursor value 0.
+ */
+ off_in_file = atom->offset_at(frame, object) +
+ ((conf->cursor - skip) << get_atom_bits(object));
+ off_in_atom = conf->cursor - skip;
+ }
+ else {
+ /*
+ * in parallel mode
+ */
+ count = conf->nr_full_blocks;
+ granularity = MAX_IOVEC;
+ off_in_file = atom->offset_at(frame, object);
+ off_in_atom = 0;
+ }
+ while (count) {
+ uint32_t blocks_to_write = count;
+
+ if (blocks_to_write > granularity)
+ blocks_to_write = granularity;
+ if (conf->type == HOLE_ATOM)
+ /*
+ * reset iovec before encryption
+ */
+ memset(atom->get_iovec(frame, 0)->iov_base,
+ 0,
+ get_atom_size(object));
+ /*
+ * encrypt the group
+ */
+ encrypt_aligned_iov(object,
+ atom->get_iovec(frame,
+ off_in_atom +
+ blocks_written),
+ blocks_to_write,
+ off_in_file + (blocks_written <<
+ get_atom_bits(object)));
+
+ set_local_io_params_writev(frame, object, atom,
+ off_in_file + (blocks_written << get_atom_bits(object)),
+ blocks_to_write << get_atom_bits(object));
+
+ conf->cursor += blocks_to_write;
+
+ STACK_WIND(frame,
+ end_writeback_full_block,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev,
+ local->fd,
+ atom->get_iovec(frame, off_in_atom + blocks_written),
+ blocks_to_write,
+ off_in_file + (blocks_written << get_atom_bits(object)),
+ local->flags,
+ local->iobref_data ? local->iobref_data : local->iobref,
+ local->xdata);
+
+ gf_log("crypt", GF_LOG_DEBUG, "submit %d full blocks from %d offset",
+ blocks_to_write,
+ (int)(off_in_file + (blocks_written << get_atom_bits(object))));
+
+ count -= blocks_to_write;
+ blocks_written += blocks_to_write;
+ }
+ return;
+}
+
+static int32_t rmw_data_head(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ dict_t *xdata)
+{
+ return rmw_partial_block(frame,
+ cookie,
+ this,
+ op_ret,
+ op_errno,
+ vec,
+ count,
+ stbuf,
+ iobref,
+ atom_by_types(DATA_ATOM, HEAD_ATOM));
+}
+
+static int32_t rmw_data_tail(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ dict_t *xdata)
+{
+ return rmw_partial_block(frame,
+ cookie,
+ this,
+ op_ret,
+ op_errno,
+ vec,
+ count,
+ stbuf,
+ iobref,
+ atom_by_types(DATA_ATOM, TAIL_ATOM));
+}
+
+static int32_t rmw_hole_head(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ dict_t *xdata)
+{
+ return rmw_partial_block(frame,
+ cookie,
+ this,
+ op_ret,
+ op_errno,
+ vec,
+ count,
+ stbuf,
+ iobref,
+ atom_by_types(HOLE_ATOM, HEAD_ATOM));
+}
+
+static int32_t rmw_hole_tail(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ dict_t *xdata)
+{
+ return rmw_partial_block(frame,
+ cookie,
+ this,
+ op_ret,
+ op_errno,
+ vec,
+ count,
+ stbuf,
+ iobref,
+ atom_by_types(HOLE_ATOM, TAIL_ATOM));
+}
+
+/*
+ * atom->count_to_uptodate()
+ */
+static uint32_t count_to_uptodate_head(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ if (conf->acount == 1 && conf->off_in_tail)
+ return get_atom_size(object);
+ else
+ /* there is no need to read the whole head block */
+ return conf->off_in_head;
+}
+
+static uint32_t count_to_uptodate_tail(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ /* we need to read the whole tail block */
+ return get_atom_size(object);
+}
+
+static uint32_t count_to_uptodate_data_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return count_to_uptodate_head(get_data_conf(frame), object);
+}
+
+static uint32_t count_to_uptodate_data_tail(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return count_to_uptodate_tail(get_data_conf(frame), object);
+}
+
+static uint32_t count_to_uptodate_hole_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return count_to_uptodate_head(get_hole_conf(frame), object);
+}
+
+static uint32_t count_to_uptodate_hole_tail(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return count_to_uptodate_tail(get_hole_conf(frame), object);
+}
+
+/* atom->get_config() */
+
+static struct avec_config *get_config_data(call_frame_t *frame)
+{
+ return &((crypt_local_t *)frame->local)->data_conf;
+}
+
+static struct avec_config *get_config_hole(call_frame_t *frame)
+{
+ return &((crypt_local_t *)frame->local)->hole_conf;
+}
+
+/*
+ * atom->get_iovec()
+ */
+static struct iovec *get_iovec_hole_head(call_frame_t *frame,
+ uint32_t count)
+{
+ struct avec_config *conf = get_hole_conf(frame);
+
+ return conf->avec;
+}
+
+static struct iovec *get_iovec_hole_full(call_frame_t *frame,
+ uint32_t count)
+{
+ struct avec_config *conf = get_hole_conf(frame);
+
+ return conf->avec + (conf->off_in_head ? 1 : 0);
+}
+
+static inline struct iovec *get_iovec_hole_tail(call_frame_t *frame,
+ uint32_t count)
+{
+ struct avec_config *conf = get_hole_conf(frame);
+
+ return conf->avec + (conf->blocks_in_pool - 1);
+}
+
+static inline struct iovec *get_iovec_data_head(call_frame_t *frame,
+ uint32_t count)
+{
+ struct avec_config *conf = get_data_conf(frame);
+
+ return conf->avec;
+}
+
+static inline struct iovec *get_iovec_data_full(call_frame_t *frame,
+ uint32_t count)
+{
+ struct avec_config *conf = get_data_conf(frame);
+
+ return conf->avec + (conf->off_in_head ? 1 : 0) + count;
+}
+
+static inline struct iovec *get_iovec_data_tail(call_frame_t *frame,
+ uint32_t count)
+{
+ struct avec_config *conf = get_data_conf(frame);
+
+ return conf->avec +
+ (conf->off_in_head ? 1 : 0) +
+ conf->nr_full_blocks;
+}
+
+static struct rmw_atom atoms[LAST_DATA_TYPE][LAST_LOCALITY_TYPE] = {
+ [DATA_ATOM][HEAD_ATOM] =
+ { .locality = HEAD_ATOM,
+ .rmw = rmw_data_head,
+ .offset_at = offset_at_data_head,
+ .offset_in = offset_in_data_head,
+ .get_iovec = get_iovec_data_head,
+ .io_size_nopad = io_size_nopad_data_head,
+ .count_to_uptodate = count_to_uptodate_data_head,
+ .get_config = get_config_data
+ },
+ [DATA_ATOM][TAIL_ATOM] =
+ { .locality = TAIL_ATOM,
+ .rmw = rmw_data_tail,
+ .offset_at = offset_at_data_tail,
+ .offset_in = offset_in_tail,
+ .get_iovec = get_iovec_data_tail,
+ .io_size_nopad = io_size_nopad_data_tail,
+ .count_to_uptodate = count_to_uptodate_data_tail,
+ .get_config = get_config_data
+ },
+ [DATA_ATOM][FULL_ATOM] =
+ { .locality = FULL_ATOM,
+ .offset_at = offset_at_data_full,
+ .offset_in = offset_in_data_full,
+ .get_iovec = get_iovec_data_full,
+ .io_size_nopad = io_size_nopad_data_full,
+ .get_config = get_config_data
+ },
+ [HOLE_ATOM][HEAD_ATOM] =
+ { .locality = HEAD_ATOM,
+ .rmw = rmw_hole_head,
+ .offset_at = offset_at_hole_head,
+ .offset_in = offset_in_hole_head,
+ .get_iovec = get_iovec_hole_head,
+ .io_size_nopad = io_size_nopad_hole_head,
+ .count_to_uptodate = count_to_uptodate_hole_head,
+ .get_config = get_config_hole
+ },
+ [HOLE_ATOM][TAIL_ATOM] =
+ { .locality = TAIL_ATOM,
+ .rmw = rmw_hole_tail,
+ .offset_at = offset_at_hole_tail,
+ .offset_in = offset_in_tail,
+ .get_iovec = get_iovec_hole_tail,
+ .io_size_nopad = io_size_nopad_hole_tail,
+ .count_to_uptodate = count_to_uptodate_hole_tail,
+ .get_config = get_config_hole
+ },
+ [HOLE_ATOM][FULL_ATOM] =
+ { .locality = FULL_ATOM,
+ .offset_at = offset_at_hole_full,
+ .offset_in = offset_in_hole_full,
+ .get_iovec = get_iovec_hole_full,
+ .io_size_nopad = io_size_nopad_hole_full,
+ .get_config = get_config_hole
+ }
+};
+
+struct rmw_atom *atom_by_types(atom_data_type data,
+ atom_locality_type locality)
+{
+ return &atoms[data][locality];
+}
+
+/*
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 80
+ scroll-step: 1
+ End:
+*/
diff --git a/xlators/encryption/crypt/src/crypt-common.h b/xlators/encryption/crypt/src/crypt-common.h
new file mode 100644
index 000000000..7c212ad5d
--- /dev/null
+++ b/xlators/encryption/crypt/src/crypt-common.h
@@ -0,0 +1,141 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __CRYPT_COMMON_H__
+#define __CRYPT_COMMON_H__
+
+#define INVAL_SUBVERSION_NUMBER (0xff)
+#define CRYPT_INVAL_OP (GF_FOP_NULL)
+
+#define CRYPTO_FORMAT_PREFIX "trusted.glusterfs.crypt.att.cfmt"
+#define FSIZE_XATTR_PREFIX "trusted.glusterfs.crypt.att.size"
+#define SUBREQ_PREFIX "trusted.glusterfs.crypt.msg.sreq"
+#define FSIZE_MSG_PREFIX "trusted.glusterfs.crypt.msg.size"
+#define DE_MSG_PREFIX "trusted.glusterfs.crypt.msg.dent"
+#define REQUEST_ID_PREFIX "trusted.glusterfs.crypt.msg.rqid"
+#define MSGFLAGS_PREFIX "trusted.glusterfs.crypt.msg.xfgs"
+
+
+/* messages for crypt_open() */
+#define MSGFLAGS_REQUEST_MTD_RLOCK 1 /* take read lock and don't unlock */
+#define MSGFLAGS_REQUEST_MTD_WLOCK 2 /* take write lock and don't unlock */
+
+#define AES_BLOCK_BITS (4) /* AES_BLOCK_SIZE == 1 << AES_BLOCK_BITS */
+
+#define noop do {; } while (0)
+#define cassert(cond) ({ switch (-1) { case (cond): case 0: break; } })
+#define __round_mask(x, y) ((__typeof__(x))((y)-1))
+#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
+
+/*
+ * Format of file's metadata
+ */
+struct crypt_format {
+ uint8_t loader_id; /* version of metadata loader */
+ uint8_t versioned[0]; /* file's metadata of specific version */
+} __attribute__((packed));
+
+typedef enum {
+ AES_CIPHER_ALG,
+ LAST_CIPHER_ALG
+} cipher_alg_t;
+
+typedef enum {
+ XTS_CIPHER_MODE,
+ LAST_CIPHER_MODE
+} cipher_mode_t;
+
+typedef enum {
+ MTD_LOADER_V1,
+ LAST_MTD_LOADER
+} mtd_loader_id;
+
+static inline void msgflags_set_mtd_rlock(uint32_t *flags)
+{
+ *flags |= MSGFLAGS_REQUEST_MTD_RLOCK;
+}
+
+static inline void msgflags_set_mtd_wlock(uint32_t *flags)
+{
+ *flags |= MSGFLAGS_REQUEST_MTD_WLOCK;
+}
+
+static inline gf_boolean_t msgflags_check_mtd_rlock(uint32_t *flags)
+{
+ return *flags & MSGFLAGS_REQUEST_MTD_RLOCK;
+}
+
+static inline gf_boolean_t msgflags_check_mtd_wlock(uint32_t *flags)
+{
+ return *flags & MSGFLAGS_REQUEST_MTD_WLOCK;
+}
+
+static inline gf_boolean_t msgflags_check_mtd_lock(uint32_t *flags)
+{
+ return msgflags_check_mtd_rlock(flags) ||
+ msgflags_check_mtd_wlock(flags);
+}
+
+/*
+ * returns number of logical blocks occupied
+ * (maybe partially) by @count bytes
+ * at offset @start.
+ */
+static inline off_t logical_blocks_occupied(uint64_t start, off_t count,
+ int blkbits)
+{
+ return ((start + count - 1) >> blkbits) - (start >> blkbits) + 1;
+}
+
+/*
+ * are two bytes (represented by offsets @off1
+ * and @off2 respectively) in the same logical
+ * block.
+ */
+static inline int in_same_lblock(uint64_t off1, uint64_t off2,
+ int blkbits)
+{
+ return off1 >> blkbits == off2 >> blkbits;
+}
+
+static inline void dump_cblock(xlator_t *this, unsigned char *buf)
+{
+ gf_log(this->name, GF_LOG_DEBUG,
+ "dump cblock: %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x",
+ (buf)[0],
+ (buf)[1],
+ (buf)[2],
+ (buf)[3],
+ (buf)[4],
+ (buf)[5],
+ (buf)[6],
+ (buf)[7],
+ (buf)[8],
+ (buf)[9],
+ (buf)[10],
+ (buf)[11],
+ (buf)[12],
+ (buf)[13],
+ (buf)[14],
+ (buf)[15]);
+}
+
+#endif /* __CRYPT_COMMON_H__ */
+
+/*
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 80
+ scroll-step: 1
+ End:
+*/
diff --git a/xlators/encryption/crypt/src/crypt-mem-types.h b/xlators/encryption/crypt/src/crypt-mem-types.h
new file mode 100644
index 000000000..2eab921fc
--- /dev/null
+++ b/xlators/encryption/crypt/src/crypt-mem-types.h
@@ -0,0 +1,44 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef __CRYPT_MEM_TYPES_H__
+#define __CRYPT_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_crypt_mem_types_ {
+ gf_crypt_mt_priv = gf_common_mt_end + 1,
+ gf_crypt_mt_inode,
+ gf_crypt_mt_data,
+ gf_crypt_mt_mtd,
+ gf_crypt_mt_loc,
+ gf_crypt_mt_iatt,
+ gf_crypt_mt_key,
+ gf_crypt_mt_iovec,
+ gf_crypt_mt_char,
+ gf_crypt_mt_end,
+};
+
+#endif /* __CRYPT_MEM_TYPES_H__ */
+
+/*
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 80
+ scroll-step: 1
+ End:
+*/
+
+
+
diff --git a/xlators/encryption/crypt/src/crypt.c b/xlators/encryption/crypt/src/crypt.c
new file mode 100644
index 000000000..1abdad31d
--- /dev/null
+++ b/xlators/encryption/crypt/src/crypt.c
@@ -0,0 +1,4522 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <ctype.h>
+#include <sys/uio.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "logging.h"
+#include "defaults.h"
+
+#include "crypt-common.h"
+#include "crypt.h"
+
+static void init_inode_info_head(struct crypt_inode_info *info, fd_t *fd);
+static int32_t init_inode_info_tail(struct crypt_inode_info *info,
+ struct master_cipher_info *master);
+static int32_t prepare_for_submit_hole(call_frame_t *frame, xlator_t *this,
+ uint64_t from, off_t size);
+static int32_t load_file_size(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata);
+static void do_ordered_submit(call_frame_t *frame, xlator_t *this,
+ atom_data_type dtype);
+static void do_parallel_submit(call_frame_t *frame, xlator_t *this,
+ atom_data_type dtype);
+static void put_one_call_open(call_frame_t *frame);
+static void put_one_call_readv(call_frame_t *frame, xlator_t *this);
+static void put_one_call_writev(call_frame_t *frame, xlator_t *this);
+static void put_one_call_ftruncate(call_frame_t *frame, xlator_t *this);
+static void free_avec(struct iovec *avec, char **pool, int blocks_in_pool);
+static void free_avec_data(crypt_local_t *local);
+static void free_avec_hole(crypt_local_t *local);
+
+static crypt_local_t *crypt_alloc_local(call_frame_t *frame, xlator_t *this,
+ glusterfs_fop_t fop)
+{
+ crypt_local_t *local = NULL;
+
+ local = mem_get0(this->local_pool);
+ if (!local) {
+ gf_log(this->name, GF_LOG_ERROR, "out of memory");
+ return NULL;
+ }
+ local->fop = fop;
+ LOCK_INIT(&local->hole_lock);
+ LOCK_INIT(&local->call_lock);
+ LOCK_INIT(&local->rw_count_lock);
+
+ frame->local = local;
+ return local;
+}
+
+struct crypt_inode_info *get_crypt_inode_info(inode_t *inode, xlator_t *this)
+{
+ int ret;
+ uint64_t value = 0;
+ struct crypt_inode_info *info;
+
+ ret = inode_ctx_get(inode, this, &value);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Can not get inode info");
+ return NULL;
+ }
+ info = (struct crypt_inode_info *)(long)value;
+ if (info == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Can not obtain inode info");
+ return NULL;
+ }
+ return info;
+}
+
+static struct crypt_inode_info *local_get_inode_info(crypt_local_t *local,
+ xlator_t *this)
+{
+ if (local->info)
+ return local->info;
+ local->info = get_crypt_inode_info(local->fd->inode, this);
+ return local->info;
+}
+
+static struct crypt_inode_info *alloc_inode_info(crypt_local_t *local,
+ loc_t *loc)
+{
+ struct crypt_inode_info *info;
+
+ info = GF_CALLOC(1, sizeof(*info), gf_crypt_mt_inode);
+ if (!info) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ gf_log ("crypt", GF_LOG_WARNING,
+ "Can not allocate inode info");
+ return NULL;
+ }
+ memset(info, 0, sizeof(*info));
+#if DEBUG_CRYPT
+ info->loc = GF_CALLOC(1, sizeof(*loc), gf_crypt_mt_loc);
+ if (!info->loc) {
+ gf_log("crypt", GF_LOG_WARNING, "Can not allocate loc");
+ GF_FREE(info);
+ return NULL;
+ }
+ if (loc_copy(info->loc, loc)){
+ GF_FREE(info->loc);
+ GF_FREE(info);
+ return NULL;
+ }
+#endif /* DEBUG_CRYPT */
+
+ local->info = info;
+ return info;
+}
+
+static void free_inode_info(struct crypt_inode_info *info)
+{
+#if DEBUG_CRYPT
+ loc_wipe(info->loc);
+ GF_FREE(info->loc);
+#endif
+ memset(info, 0, sizeof(*info));
+ GF_FREE(info);
+}
+
+int crypt_forget (xlator_t *this, inode_t *inode)
+{
+ uint64_t ctx_addr = 0;
+ if (!inode_ctx_del (inode, this, &ctx_addr))
+ free_inode_info((struct crypt_inode_info *)(long)ctx_addr);
+ return 0;
+}
+
+#if DEBUG_CRYPT
+static void check_read(call_frame_t *frame, xlator_t *this, int32_t read,
+ struct iovec *vec, int32_t count, struct iatt *stbuf)
+{
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = get_object_cinfo(local->info);
+ struct avec_config *conf = &local->data_conf;
+ uint32_t resid = stbuf->ia_size & (object_alg_blksize(object) - 1);
+
+ if (read <= 0)
+ return;
+ if (read != iovec_get_size(vec, count))
+ gf_log ("crypt", GF_LOG_DEBUG,
+ "op_ret differs from amount of read bytes");
+
+ if (object_alg_should_pad(object) && (read & (object_alg_blksize(object) - 1)))
+ gf_log ("crypt", GF_LOG_DEBUG,
+ "bad amount of read bytes (!= 0 mod(cblock size))");
+
+ if (conf->aligned_offset + read >
+ stbuf->ia_size + (resid ? object_alg_blksize(object) - resid : 0))
+ gf_log ("crypt", GF_LOG_DEBUG,
+ "bad amount of read bytes (too large))");
+
+}
+
+#define PT_BYTES_TO_DUMP (32)
+static void dump_plain_text(crypt_local_t *local, struct iovec *avec)
+{
+ int32_t to_dump;
+ char str[PT_BYTES_TO_DUMP + 1];
+
+ if (!avec)
+ return;
+ to_dump = avec->iov_len;
+ if (to_dump > PT_BYTES_TO_DUMP)
+ to_dump = PT_BYTES_TO_DUMP;
+ memcpy(str, avec->iov_base, to_dump);
+ memset(str + to_dump, '0', 1);
+ gf_log("crypt", GF_LOG_DEBUG, "Read file: %s", str);
+}
+
+static int32_t data_conf_invariant(struct avec_config *conf)
+{
+ return conf->acount ==
+ !!has_head_block(conf) +
+ !!has_tail_block(conf)+
+ conf->nr_full_blocks;
+}
+
+static int32_t hole_conf_invariant(struct avec_config *conf)
+{
+ return conf->blocks_in_pool ==
+ !!has_head_block(conf) +
+ !!has_tail_block(conf)+
+ !!has_full_blocks(conf);
+}
+
+static void crypt_check_conf(struct avec_config *conf)
+{
+ int32_t ret = 0;
+ const char *msg;
+
+ switch (conf->type) {
+ case DATA_ATOM:
+ msg = "data";
+ ret = data_conf_invariant(conf);
+ break;
+ case HOLE_ATOM:
+ msg = "hole";
+ ret = hole_conf_invariant(conf);
+ break;
+ default:
+ msg = "unknown";
+ }
+ if (!ret)
+ gf_log("crypt", GF_LOG_DEBUG, "bad %s conf", msg);
+}
+
+static void check_buf(call_frame_t *frame, xlator_t *this, struct iatt *buf)
+{
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = &local->info->cinfo;
+ uint64_t local_file_size;
+
+ switch(local->fop) {
+ case GF_FOP_FTRUNCATE:
+ return;
+ case GF_FOP_WRITE:
+ local_file_size = local->new_file_size;
+ break;
+ case GF_FOP_READ:
+ if (parent_is_crypt_xlator(frame, this))
+ return;
+ local_file_size = local->cur_file_size;
+ break;
+ default:
+ gf_log("crypt", GF_LOG_DEBUG, "bad file operation");
+ return;
+ }
+ if (buf->ia_size != round_up(local_file_size,
+ object_alg_blksize(object)))
+ gf_log("crypt", GF_LOG_DEBUG,
+ "bad ia_size in buf (%llu), should be %llu",
+ (unsigned long long)buf->ia_size,
+ (unsigned long long)round_up(local_file_size,
+ object_alg_blksize(object)));
+}
+
+#else
+#define check_read(frame, this, op_ret, vec, count, stbuf) noop
+#define dump_plain_text(local, avec) noop
+#define crypt_check_conf(conf) noop
+#define check_buf(frame, this, buf) noop
+#endif /* DEBUG_CRYPT */
+
+/*
+ * Pre-conditions:
+ * @vec represents a ciphertext of expanded size and
+ * aligned offset.
+ *
+ * Compound a temporal vector @avec with block-aligned
+ * components, decrypt and fix it up to represent a chunk
+ * of data corresponding to the original size and offset.
+ * Pass the result to the next translator.
+ */
+int32_t crypt_readv_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf = &local->data_conf;
+ struct object_cipher_info *object = &local->info->cinfo;
+
+ struct iovec *avec;
+ uint32_t i;
+ uint32_t to_vec;
+ uint32_t to_user;
+
+ check_buf(frame, this, stbuf);
+ check_read(frame, this, op_ret, vec, count, stbuf);
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ local->iobref = iobref_ref(iobref);
+
+ local->buf = *stbuf;
+ local->buf.ia_size = local->cur_file_size;
+
+ if (op_ret <= 0 || count == 0 || vec[0].iov_len == 0)
+ goto put_one_call;
+
+ if (conf->orig_offset >= local->cur_file_size) {
+ local->op_ret = 0;
+ goto put_one_call;
+ }
+ /*
+ * correct config params with real file size
+ * and actual amount of bytes read
+ */
+ set_config_offsets(frame, this,
+ conf->orig_offset, op_ret, DATA_ATOM, 0);
+
+ if (conf->orig_offset + conf->orig_size > local->cur_file_size)
+ conf->orig_size = local->cur_file_size - conf->orig_offset;
+ /*
+ * calculate amount of data to be returned
+ * to user.
+ */
+ to_user = op_ret;
+ if (conf->aligned_offset + to_user <= conf->orig_offset) {
+ gf_log(this->name, GF_LOG_WARNING, "Incomplete read");
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto put_one_call;
+ }
+ to_user -= (conf->aligned_offset - conf->orig_offset);
+
+ if (to_user > conf->orig_size)
+ to_user = conf->orig_size;
+ local->rw_count = to_user;
+
+ op_errno = set_config_avec_data(this, local,
+ conf, object, vec, count);
+ if (op_errno) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ goto put_one_call;
+ }
+ avec = conf->avec;
+#if DEBUG_CRYPT
+ if (conf->off_in_tail != 0 &&
+ conf->off_in_tail < object_alg_blksize(object) &&
+ object_alg_should_pad(object))
+ gf_log(this->name, GF_LOG_DEBUG, "Bad offset in tail %d",
+ conf->off_in_tail);
+ if (iovec_get_size(vec, count) != 0 &&
+ in_same_lblock(conf->orig_offset + iovec_get_size(vec, count) - 1,
+ local->cur_file_size - 1,
+ object_alg_blkbits(object))) {
+ gf_log(this->name, GF_LOG_DEBUG, "Compound last cblock");
+ dump_cblock(this,
+ (unsigned char *)(avec[conf->acount - 1].iov_base) +
+ avec[conf->acount - 1].iov_len - object_alg_blksize(object));
+ dump_cblock(this,
+ (unsigned char *)(vec[count - 1].iov_base) +
+ vec[count - 1].iov_len - object_alg_blksize(object));
+ }
+#endif
+ decrypt_aligned_iov(object, avec,
+ conf->acount, conf->aligned_offset);
+ /*
+ * pass proper plain data to user
+ */
+ avec[0].iov_base += (conf->aligned_offset - conf->orig_offset);
+ avec[0].iov_len -= (conf->aligned_offset - conf->orig_offset);
+
+ to_vec = to_user;
+ for (i = 0; i < conf->acount; i++) {
+ if (avec[i].iov_len > to_vec)
+ avec[i].iov_len = to_vec;
+ to_vec -= avec[i].iov_len;
+ }
+ put_one_call:
+ put_one_call_readv(frame, this);
+ return 0;
+}
+
+static int32_t do_readv(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict,
+ dict_t *xdata)
+{
+ data_t *data;
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ goto error;
+ /*
+ * extract regular file size
+ */
+ data = dict_get(dict, FSIZE_XATTR_PREFIX);
+ if (!data) {
+ gf_log("crypt", GF_LOG_WARNING, "Regular file size not found");
+ op_errno = EIO;
+ goto error;
+ }
+ local->cur_file_size = data_to_uint64(data);
+
+ get_one_call(frame);
+ STACK_WIND(frame,
+ crypt_readv_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->readv,
+ local->fd,
+ /*
+ * FIXME: read amount can be reduced
+ */
+ local->data_conf.expanded_size,
+ local->data_conf.aligned_offset,
+ local->flags,
+ local->xdata);
+ return 0;
+ error:
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+
+ get_one_call(frame);
+ put_one_call_readv(frame, this);
+ return 0;
+}
+
+static int32_t crypt_readv_finodelk_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ goto error;
+ /*
+ * An access has been granted,
+ * retrieve file size
+ */
+ STACK_WIND(frame,
+ do_readv,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ local->fd,
+ FSIZE_XATTR_PREFIX,
+ NULL);
+ return 0;
+ error:
+ fd_unref(local->fd);
+ if (local->xdata)
+ dict_unref(local->xdata);
+ STACK_UNWIND_STRICT(readv,
+ frame,
+ -1,
+ op_errno,
+ NULL,
+ 0,
+ NULL,
+ NULL,
+ NULL);
+ return 0;
+}
+
+static int32_t readv_trivial_completion(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *buf,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "stat failed (%d)", op_errno);
+ goto error;
+ }
+ local->buf = *buf;
+ STACK_WIND(frame,
+ load_file_size,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr,
+ local->loc,
+ FSIZE_XATTR_PREFIX,
+ NULL);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno,
+ NULL, 0, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t crypt_readv(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t offset,
+ uint32_t flags, dict_t *xdata)
+{
+ int32_t ret;
+ crypt_local_t *local;
+ struct crypt_inode_info *info;
+ struct gf_flock lock = {0, };
+
+#if DEBUG_CRYPT
+ gf_log("crypt", GF_LOG_DEBUG, "reading %d bytes from offset %llu",
+ (int)size, (long long)offset);
+ if (parent_is_crypt_xlator(frame, this))
+ gf_log("crypt", GF_LOG_DEBUG, "parent is crypt");
+#endif
+ local = crypt_alloc_local(frame, this, GF_FOP_READ);
+ if (!local) {
+ ret = ENOMEM;
+ goto error;
+ }
+ if (size == 0)
+ goto trivial;
+
+ local->fd = fd_ref(fd);
+ local->flags = flags;
+
+ info = local_get_inode_info(local, this);
+ if (info == NULL) {
+ ret = EINVAL;
+ fd_unref(fd);
+ goto error;
+ }
+ if (!object_alg_atomic(&info->cinfo)) {
+ ret = EINVAL;
+ fd_unref(fd);
+ goto error;
+ }
+ set_config_offsets(frame, this, offset, size,
+ DATA_ATOM, 0);
+ if (parent_is_crypt_xlator(frame, this)) {
+ data_t *data;
+ /*
+ * We are called by crypt_writev (or cypt_ftruncate)
+ * to perform the "read" component of the read-modify-write
+ * (or read-prune-write) sequence for some atom;
+ *
+ * don't ask for access:
+ * it has already been acquired
+ *
+ * Retrieve current file size
+ */
+ if (!xdata) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "Regular file size hasn't been passed");
+ ret = EIO;
+ goto error;
+ }
+ data = dict_get(xdata, FSIZE_XATTR_PREFIX);
+ if (!data) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "Regular file size not found");
+ ret = EIO;
+ goto error;
+ }
+ local->old_file_size =
+ local->cur_file_size = data_to_uint64(data);
+
+ get_one_call(frame);
+ STACK_WIND(frame,
+ crypt_readv_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv,
+ local->fd,
+ /*
+ * FIXME: read amount can be reduced
+ */
+ local->data_conf.expanded_size,
+ local->data_conf.aligned_offset,
+ flags,
+ NULL);
+ return 0;
+ }
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+
+ lock.l_len = 0;
+ lock.l_start = 0;
+ lock.l_type = F_RDLCK;
+ lock.l_whence = SEEK_SET;
+
+ STACK_WIND(frame,
+ crypt_readv_finodelk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+ trivial:
+ STACK_WIND(frame,
+ readv_trivial_completion,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat,
+ fd,
+ NULL);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(readv,
+ frame,
+ -1,
+ ret,
+ NULL,
+ 0,
+ NULL,
+ NULL,
+ NULL);
+ return 0;
+}
+
+void set_local_io_params_writev(call_frame_t *frame,
+ struct object_cipher_info *object,
+ struct rmw_atom *atom,
+ off_t io_offset,
+ uint32_t io_size)
+{
+ crypt_local_t *local = frame->local;
+
+ local->io_offset = io_offset;
+ local->io_size = io_size;
+
+ local->io_offset_nopad =
+ atom->offset_at(frame, object) + atom->offset_in(frame, object);
+
+ gf_log("crypt", GF_LOG_DEBUG,
+ "set nopad offset to %llu",
+ (unsigned long long)local->io_offset_nopad);
+
+ local->io_size_nopad = atom->io_size_nopad(frame, object);
+
+ gf_log("crypt", GF_LOG_DEBUG,
+ "set nopad size to %llu",
+ (unsigned long long)local->io_size_nopad);
+
+ local->update_disk_file_size = 0;
+ /*
+ * NOTE: eof_padding_size is 0 for all full atoms;
+ * For head and tail atoms it will be set up at rmw_partial block()
+ */
+ local->new_file_size = local->cur_file_size;
+
+ if (local->io_offset_nopad + local->io_size_nopad > local->cur_file_size) {
+
+ local->new_file_size = local->io_offset_nopad + local->io_size_nopad;
+
+ gf_log("crypt", GF_LOG_DEBUG,
+ "set new file size to %llu",
+ (unsigned long long)local->new_file_size);
+
+ local->update_disk_file_size = 1;
+ }
+}
+
+void set_local_io_params_ftruncate(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ uint32_t resid;
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf = &local->data_conf;
+
+ resid = conf->orig_offset & (object_alg_blksize(object) - 1);
+ if (resid) {
+ local->eof_padding_size =
+ object_alg_blksize(object) - resid;
+ local->new_file_size = conf->aligned_offset;
+ local->update_disk_file_size = 0;
+ /*
+ * file size will be updated
+ * in the ->writev() stack,
+ * when submitting file tail
+ */
+ }
+ else {
+ local->eof_padding_size = 0;
+ local->new_file_size = conf->orig_offset;
+ local->update_disk_file_size = 1;
+ /*
+ * file size will be updated
+ * in this ->ftruncate stack
+ */
+ }
+}
+
+static inline void submit_head(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+ submit_partial(frame, this, local->fd, HEAD_ATOM);
+}
+
+static inline void submit_tail(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+ submit_partial(frame, this, local->fd, TAIL_ATOM);
+}
+
+static void submit_hole(call_frame_t *frame, xlator_t *this)
+{
+ /*
+ * hole conversion always means
+ * appended write and goes in ordered fashion
+ */
+ do_ordered_submit(frame, this, HOLE_ATOM);
+}
+
+static void submit_data(call_frame_t *frame, xlator_t *this)
+{
+ if (is_ordered_mode(frame)) {
+ do_ordered_submit(frame, this, DATA_ATOM);
+ return;
+ }
+ gf_log("crypt", GF_LOG_WARNING, "Bad submit mode");
+ get_nr_calls(frame, nr_calls_data(frame));
+ do_parallel_submit(frame, this, DATA_ATOM);
+ return;
+}
+
+/*
+ * heplers called by writev_cbk, fruncate_cbk in ordered mode
+ */
+
+static inline int32_t should_submit_hole(crypt_local_t *local)
+{
+ struct avec_config *conf = &local->hole_conf;
+
+ return conf->avec != NULL;
+}
+
+static inline int32_t should_resume_submit_hole(crypt_local_t *local)
+{
+ struct avec_config *conf = &local->hole_conf;
+
+ if (local->fop == GF_FOP_WRITE && has_tail_block(conf))
+ /*
+ * Don't submit a part of hole, which
+ * fits into a data block:
+ * this part of hole will be converted
+ * as a gap filled by zeros in data head
+ * block.
+ */
+ return conf->cursor < conf->acount - 1;
+ else
+ return conf->cursor < conf->acount;
+}
+
+static inline int32_t should_resume_submit_data(call_frame_t *frame)
+{
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf = &local->data_conf;
+
+ if (is_ordered_mode(frame))
+ return conf->cursor < conf->acount;
+ /*
+ * parallel writes
+ */
+ return 0;
+}
+
+static inline int32_t should_submit_data_after_hole(crypt_local_t *local)
+{
+ return local->data_conf.avec != NULL;
+}
+
+static void update_local_file_params(call_frame_t *frame,
+ xlator_t *this,
+ struct iatt *prebuf,
+ struct iatt *postbuf)
+{
+ crypt_local_t *local = frame->local;
+
+ check_buf(frame, this, postbuf);
+
+ local->prebuf = *prebuf;
+ local->postbuf = *postbuf;
+
+ local->prebuf.ia_size = local->cur_file_size;
+ local->postbuf.ia_size = local->new_file_size;
+
+ local->cur_file_size = local->new_file_size;
+}
+
+static int32_t end_writeback_writev(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret <= 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "writev iteration failed");
+ goto put_one_call;
+ }
+ /*
+ * op_ret includes paddings (atom's head, atom's tail and EOF)
+ */
+ if (op_ret < local->io_size) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Incomplete writev iteration");
+ goto put_one_call;
+ }
+ op_ret -= local->eof_padding_size;
+ local->op_ret = op_ret;
+
+ update_local_file_params(frame, this, prebuf, postbuf);
+
+ if (data_write_in_progress(local)) {
+
+ LOCK(&local->rw_count_lock);
+ local->rw_count += op_ret;
+ UNLOCK(&local->rw_count_lock);
+
+ if (should_resume_submit_data(frame))
+ submit_data(frame, this);
+ }
+ else {
+ /*
+ * hole conversion is going on;
+ * don't take into account written zeros
+ */
+ if (should_resume_submit_hole(local))
+ submit_hole(frame, this);
+
+ else if (should_submit_data_after_hole(local))
+ submit_data(frame, this);
+ }
+ put_one_call:
+ put_one_call_writev(frame, this);
+ return 0;
+}
+
+#define crypt_writev_cbk end_writeback_writev
+
+#define HOLE_WRITE_CHUNK_BITS 12
+#define HOLE_WRITE_CHUNK_SIZE (1 << HOLE_WRITE_CHUNK_BITS)
+
+/*
+ * Convert hole of size @size at offset @off to
+ * zeros and prepare respective iovecs for submit.
+ * The hole lock should be held.
+ *
+ * Pre-conditions:
+ * @local->file_size is set and valid.
+ */
+int32_t prepare_for_submit_hole(call_frame_t *frame, xlator_t *this,
+ uint64_t off, off_t size)
+{
+ int32_t ret;
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = &local->info->cinfo;
+
+ set_config_offsets(frame, this, off, size, HOLE_ATOM, 1);
+
+ ret = set_config_avec_hole(this, local,
+ &local->hole_conf, object, local->fop);
+ crypt_check_conf(&local->hole_conf);
+
+ return ret;
+}
+
+/*
+ * prepare for submit @count bytes at offset @from
+ */
+int32_t prepare_for_submit_data(call_frame_t *frame, xlator_t *this,
+ off_t from, int32_t size, struct iovec *vec,
+ int32_t vec_count, int32_t setup_gap)
+{
+ uint32_t ret;
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = &local->info->cinfo;
+
+ set_config_offsets(frame, this, from, size,
+ DATA_ATOM, setup_gap);
+
+ ret = set_config_avec_data(this, local,
+ &local->data_conf, object, vec, vec_count);
+ crypt_check_conf(&local->data_conf);
+
+ return ret;
+}
+
+static void free_avec(struct iovec *avec,
+ char **pool, int blocks_in_pool)
+{
+ if (!avec)
+ return;
+ GF_FREE(pool);
+ GF_FREE(avec);
+}
+
+static void free_avec_data(crypt_local_t *local)
+{
+ return free_avec(local->data_conf.avec,
+ local->data_conf.pool,
+ local->data_conf.blocks_in_pool);
+}
+
+static void free_avec_hole(crypt_local_t *local)
+{
+ return free_avec(local->hole_conf.avec,
+ local->hole_conf.pool,
+ local->hole_conf.blocks_in_pool);
+}
+
+
+static void do_parallel_submit(call_frame_t *frame, xlator_t *this,
+ atom_data_type dtype)
+{
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf;
+
+ local->active_setup = dtype;
+ conf = conf_by_type(frame, dtype);
+
+ if (has_head_block(conf))
+ submit_head(frame, this);
+
+ if (has_full_blocks(conf))
+ submit_full(frame, this);
+
+ if (has_tail_block(conf))
+ submit_tail(frame, this);
+ return;
+}
+
+static void do_ordered_submit(call_frame_t *frame, xlator_t *this,
+ atom_data_type dtype)
+{
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf;
+
+ local->active_setup = dtype;
+ conf = conf_by_type(frame, dtype);
+
+ if (should_submit_head_block(conf)) {
+ get_one_call_nolock(frame);
+ submit_head(frame, this);
+ }
+ else if (should_submit_full_block(conf)) {
+ get_one_call_nolock(frame);
+ submit_full(frame, this);
+ }
+ else if (should_submit_tail_block(conf)) {
+ get_one_call_nolock(frame);
+ submit_tail(frame, this);
+ }
+ else
+ gf_log("crypt", GF_LOG_DEBUG,
+ "nothing has been submitted in ordered mode");
+ return;
+}
+
+static int32_t do_writev(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict,
+ dict_t *xdata)
+{
+ data_t *data;
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = &local->info->cinfo;
+ /*
+ * extract regular file size
+ */
+ data = dict_get(dict, FSIZE_XATTR_PREFIX);
+ if (!data) {
+ gf_log("crypt", GF_LOG_WARNING, "Regular file size not found");
+ op_ret = -1;
+ op_errno = EIO;
+ goto error;
+ }
+ local->old_file_size = local->cur_file_size = data_to_uint64(data);
+
+ set_gap_at_end(frame, object, &local->data_conf, DATA_ATOM);
+
+ if (local->cur_file_size < local->data_conf.orig_offset) {
+ /*
+ * Set up hole config
+ */
+ op_errno = prepare_for_submit_hole(frame,
+ this,
+ local->cur_file_size,
+ local->data_conf.orig_offset - local->cur_file_size);
+ if (op_errno) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ goto error;
+ }
+ }
+ if (should_submit_hole(local))
+ submit_hole(frame, this);
+ else
+ submit_data(frame, this);
+ return 0;
+ error:
+ get_one_call_nolock(frame);
+ put_one_call_writev(frame, this);
+ return 0;
+}
+
+static int32_t crypt_writev_finodelk_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret < 0)
+ goto error;
+ /*
+ * An access has been granted,
+ * retrieve file size first
+ */
+ STACK_WIND(frame,
+ do_writev,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ local->fd,
+ FSIZE_XATTR_PREFIX,
+ NULL);
+ return 0;
+ error:
+ get_one_call_nolock(frame);
+ put_one_call_writev(frame, this);
+ return 0;
+}
+
+static int32_t writev_trivial_completion(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *buf,
+ dict_t *dict)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ local->prebuf = *buf;
+ local->postbuf = *buf;
+
+ local->prebuf.ia_size = local->cur_file_size;
+ local->postbuf.ia_size = local->cur_file_size;
+
+ get_one_call(frame);
+ put_one_call_writev(frame, this);
+ return 0;
+}
+
+int crypt_writev(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iovec *vec,
+ int32_t count,
+ off_t offset,
+ uint32_t flags,
+ struct iobref *iobref,
+ dict_t *xdata)
+{
+ int32_t ret;
+ crypt_local_t *local;
+ struct crypt_inode_info *info;
+ struct gf_flock lock = {0, };
+#if DEBUG_CRYPT
+ gf_log ("crypt", GF_LOG_DEBUG, "writing %d bytes from offset %llu",
+ (int)iovec_get_size(vec, count), (long long)offset);
+#endif
+ local = crypt_alloc_local(frame, this, GF_FOP_WRITE);
+ if (!local) {
+ ret = ENOMEM;
+ goto error;
+ }
+ local->fd = fd_ref(fd);
+
+ if (iobref)
+ local->iobref = iobref_ref(iobref);
+ /*
+ * to update real file size on the server
+ */
+ local->xattr = dict_new();
+ if (!local->xattr) {
+ ret = ENOMEM;
+ goto error;
+ }
+ local->flags = flags;
+
+ info = local_get_inode_info(local, this);
+ if (info == NULL) {
+ ret = EINVAL;
+ goto error;
+ }
+ if (!object_alg_atomic(&info->cinfo)) {
+ ret = EINVAL;
+ goto error;
+ }
+ if (iovec_get_size(vec, count) == 0)
+ goto trivial;
+
+ ret = prepare_for_submit_data(frame, this, offset,
+ iovec_get_size(vec, count),
+ vec, count, 0 /* don't setup gup
+ in tail: we don't
+ know file size yet */);
+ if (ret)
+ goto error;
+
+ if (parent_is_crypt_xlator(frame, this)) {
+ data_t *data;
+ /*
+ * we are called by shinking crypt_ftruncate(),
+ * which doesn't perform hole conversion;
+ *
+ * don't ask for access:
+ * it has already been acquired
+ */
+
+ /*
+ * extract file size
+ */
+ if (!xdata) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "Regular file size hasn't been passed");
+ ret = EIO;
+ goto error;
+ }
+ data = dict_get(xdata, FSIZE_XATTR_PREFIX);
+ if (!data) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "Regular file size not found");
+ ret = EIO;
+ goto error;
+ }
+ local->old_file_size =
+ local->cur_file_size = data_to_uint64(data);
+
+ submit_data(frame, this);
+ return 0;
+ }
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+ /*
+ * lock the file and retrieve its size
+ */
+ lock.l_len = 0;
+ lock.l_start = 0;
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+
+ STACK_WIND(frame,
+ crypt_writev_finodelk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+ trivial:
+ STACK_WIND(frame,
+ writev_trivial_completion,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat,
+ fd,
+ NULL);
+ return 0;
+ error:
+ if (local && local->fd)
+ fd_unref(fd);
+ if (local && local->iobref)
+ iobref_unref(iobref);
+ if (local && local->xdata)
+ dict_unref(xdata);
+ if (local && local->xattr)
+ dict_unref(local->xattr);
+ if (local && local->info)
+ free_inode_info(local->info);
+
+ STACK_UNWIND_STRICT(writev, frame, -1, ret, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t prepare_for_prune(call_frame_t *frame, xlator_t *this, uint64_t offset)
+{
+ set_config_offsets(frame, this,
+ offset,
+ 0, /* count */
+ DATA_ATOM,
+ 0 /* since we prune, there is no
+ gap in tail to uptodate */);
+ return 0;
+}
+
+/*
+ * Finish the read-prune-modify sequence
+ *
+ * Can be invoked as
+ * 1) ->ftruncate_cbk() for cblock-aligned, or trivial prune
+ * 2) ->writev_cbk() for non-cblock-aligned prune
+ */
+
+static int32_t prune_complete(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ update_local_file_params(frame, this, prebuf, postbuf);
+
+ put_one_call_ftruncate(frame, this);
+ return 0;
+}
+
+/*
+ * This is called as ->ftruncate_cbk()
+ *
+ * Perform the "write" component of the
+ * read-prune-write sequence.
+ *
+ * submuit the rest of the file
+ */
+static int32_t prune_submit_file_tail(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf = &local->data_conf;
+ dict_t *dict;
+
+ if (op_ret < 0)
+ goto put_one_call;
+
+ if (local->xdata) {
+ dict_unref(local->xdata);
+ local->xdata = NULL;
+ }
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+
+ dict = dict_new();
+ if (!dict) {
+ op_errno = ENOMEM;
+ goto error;
+ }
+
+ update_local_file_params(frame, this, prebuf, postbuf);
+ local->new_file_size = conf->orig_offset;
+
+ /*
+ * The rest of the file is a partial block and, hence,
+ * should be written via RMW sequence, so the crypt xlator
+ * does STACK_WIND to itself.
+ *
+ * Pass current file size to crypt_writev()
+ */
+ op_errno = dict_set(dict,
+ FSIZE_XATTR_PREFIX,
+ data_from_uint64(local->cur_file_size));
+ if (op_errno) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "can not set key to update file size");
+ dict_unref(dict);
+ goto error;
+ }
+ gf_log("crypt", GF_LOG_DEBUG,
+ "passing current file size (%llu) to crypt_writev",
+ (unsigned long long)local->cur_file_size);
+ /*
+ * Padding will be filled with
+ * zeros by rmw_partial_block()
+ */
+ STACK_WIND(frame,
+ prune_complete,
+ this,
+ this->fops->writev, /* crypt_writev */
+ local->fd,
+ &local->vec,
+ 1,
+ conf->aligned_offset, /* offset to write from */
+ 0,
+ local->iobref,
+ dict);
+
+ dict_unref(dict);
+ return 0;
+ error:
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ put_one_call:
+ put_one_call_ftruncate(frame, this);
+ return 0;
+}
+
+/*
+ * This is called as a callback of ->writev() invoked in behalf
+ * of ftruncate(): it can be
+ * 1) ordered writes issued by hole conversion in the case of
+ * expanded truncate, or
+ * 2) an rmw partial data block issued by non-cblock-aligned
+ * prune.
+ */
+int32_t end_writeback_ftruncate(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ /*
+ * if nothing has been written,
+ * then it must be an error
+ */
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret < 0)
+ goto put_one_call;
+
+ update_local_file_params(frame, this, prebuf, postbuf);
+
+ if (data_write_in_progress(local))
+ /* case (2) */
+ goto put_one_call;
+ /* case (1) */
+ if (should_resume_submit_hole(local))
+ submit_hole(frame, this);
+ /*
+ * case of hole, when we should't resume
+ */
+ put_one_call:
+ put_one_call_ftruncate(frame, this);
+ return 0;
+}
+
+/*
+ * Perform prune and write components of the
+ * read-prune-write sequence.
+ *
+ * Called as ->readv_cbk()
+ *
+ * Pre-conditions:
+ * @vec contains the latest atom of the file
+ * (plain text)
+ */
+static int32_t prune_write(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ dict_t *xdata)
+{
+ int32_t i;
+ size_t to_copy;
+ size_t copied = 0;
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf = &local->data_conf;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ if (op_ret == -1)
+ goto put_one_call;
+
+ /*
+ * At first, uptodate head block
+ */
+ if (iovec_get_size(vec, count) < conf->off_in_head) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Failed to uptodate head block for prune");
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto put_one_call;
+ }
+ local->vec.iov_len = conf->off_in_head;
+ local->vec.iov_base = GF_CALLOC(1, local->vec.iov_len,
+ gf_crypt_mt_data);
+
+ if (local->vec.iov_base == NULL) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Failed to calloc head block for prune");
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto put_one_call;
+ }
+ for (i = 0; i < count; i++) {
+ to_copy = vec[i].iov_len;
+ if (to_copy > local->vec.iov_len - copied)
+ to_copy = local->vec.iov_len - copied;
+
+ memcpy((char *)local->vec.iov_base + copied,
+ vec[i].iov_base,
+ to_copy);
+ copied += to_copy;
+ if (copied == local->vec.iov_len)
+ break;
+ }
+ /*
+ * perform prune with aligned offset
+ * (i.e. at this step we prune a bit
+ * more then it is needed
+ */
+ STACK_WIND(frame,
+ prune_submit_file_tail,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate,
+ local->fd,
+ conf->aligned_offset,
+ local->xdata);
+ return 0;
+ put_one_call:
+ put_one_call_ftruncate(frame, this);
+ return 0;
+}
+
+/*
+ * Perform a read-prune-write sequence
+ */
+int32_t read_prune_write(call_frame_t *frame, xlator_t *this)
+{
+ int32_t ret = 0;
+ dict_t *dict = NULL;
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf = &local->data_conf;
+ struct object_cipher_info *object = &local->info->cinfo;
+
+ set_local_io_params_ftruncate(frame, object);
+ get_one_call_nolock(frame);
+
+ if ((conf->orig_offset & (object_alg_blksize(object) - 1)) == 0) {
+ /*
+ * cblock-aligned prune:
+ * we don't need read and write components,
+ * just cut file body
+ */
+ gf_log("crypt", GF_LOG_DEBUG,
+ "prune without RMW (at offset %llu",
+ (unsigned long long)conf->orig_offset);
+
+ STACK_WIND(frame,
+ prune_complete,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate,
+ local->fd,
+ conf->orig_offset,
+ local->xdata);
+ return 0;
+ }
+ gf_log("crypt", GF_LOG_DEBUG,
+ "prune with RMW (at offset %llu",
+ (unsigned long long)conf->orig_offset);
+ /*
+ * We are about to perform the "read" component of the
+ * read-prune-write sequence. It means that we need to
+ * read encrypted data from disk and decrypt it.
+ * So, the crypt translator does STACK_WIND to itself.
+ *
+ * Pass current file size to crypt_readv()
+
+ */
+ dict = dict_new();
+ if (!dict) {
+ gf_log("crypt", GF_LOG_WARNING, "Can not alloc dict");
+ ret = ENOMEM;
+ goto exit;
+ }
+ ret = dict_set(dict,
+ FSIZE_XATTR_PREFIX,
+ data_from_uint64(local->cur_file_size));
+ if (ret) {
+ gf_log("crypt", GF_LOG_WARNING, "Can not set dict");
+ goto exit;
+ }
+ STACK_WIND(frame,
+ prune_write,
+ this,
+ this->fops->readv, /* crypt_readv */
+ local->fd,
+ get_atom_size(object), /* bytes to read */
+ conf->aligned_offset, /* offset to read from */
+ 0,
+ dict);
+ exit:
+ if (dict)
+ dict_unref(dict);
+ return ret;
+}
+
+/*
+ * File prune is more complicated than expand.
+ * First we need to read the latest atom to not lose info
+ * needed for proper update. Also we need to make sure that
+ * every component of read-prune-write sequence leaves data
+ * consistent
+ *
+ * Non-cblock aligned prune is performed as read-prune-write
+ * sequence:
+ *
+ * 1) read the latest atom;
+ * 2) perform cblock-aligned prune
+ * 3) issue a write request for the end-of-file
+ */
+int32_t prune_file(call_frame_t *frame, xlator_t *this, uint64_t offset)
+{
+ int32_t ret;
+
+ ret = prepare_for_prune(frame, this, offset);
+ if (ret)
+ return ret;
+ return read_prune_write(frame, this);
+}
+
+int32_t expand_file(call_frame_t *frame, xlator_t *this,
+ uint64_t offset)
+{
+ int32_t ret;
+ crypt_local_t *local = frame->local;
+
+ ret = prepare_for_submit_hole(frame, this,
+ local->old_file_size,
+ offset - local->old_file_size);
+ if (ret)
+ return ret;
+ submit_hole(frame, this);
+ return 0;
+}
+
+static int32_t ftruncate_trivial_completion(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *buf,
+ dict_t *dict)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ local->prebuf = *buf;
+ local->postbuf = *buf;
+
+ local->prebuf.ia_size = local->cur_file_size;
+ local->postbuf.ia_size = local->cur_file_size;
+
+ get_one_call(frame);
+ put_one_call_ftruncate(frame, this);
+ return 0;
+}
+
+static int32_t do_ftruncate(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict,
+ dict_t *xdata)
+{
+ data_t *data;
+ crypt_local_t *local = frame->local;
+
+ if (op_ret)
+ goto error;
+ /*
+ * extract regular file size
+ */
+ data = dict_get(dict, FSIZE_XATTR_PREFIX);
+ if (!data) {
+ gf_log("crypt", GF_LOG_WARNING, "Regular file size not found");
+ op_errno = EIO;
+ goto error;
+ }
+ local->old_file_size = local->cur_file_size = data_to_uint64(data);
+
+ if (local->data_conf.orig_offset == local->cur_file_size) {
+#if DEBUG_CRYPT
+ gf_log("crypt", GF_LOG_DEBUG,
+ "trivial ftruncate (current file size %llu)",
+ (unsigned long long)local->cur_file_size);
+#endif
+ goto trivial;
+ }
+ else if (local->data_conf.orig_offset < local->cur_file_size) {
+#if DEBUG_CRYPT
+ gf_log("crypt", GF_LOG_DEBUG, "prune from %llu to %llu",
+ (unsigned long long)local->cur_file_size,
+ (unsigned long long)local->data_conf.orig_offset);
+#endif
+ op_errno = prune_file(frame,
+ this,
+ local->data_conf.orig_offset);
+ }
+ else {
+#if DEBUG_CRYPT
+ gf_log("crypt", GF_LOG_DEBUG, "expand from %llu to %llu",
+ (unsigned long long)local->cur_file_size,
+ (unsigned long long)local->data_conf.orig_offset);
+#endif
+ op_errno = expand_file(frame,
+ this,
+ local->data_conf.orig_offset);
+ }
+ if (op_errno)
+ goto error;
+ return 0;
+ trivial:
+ STACK_WIND(frame,
+ ftruncate_trivial_completion,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat,
+ local->fd,
+ NULL);
+ return 0;
+ error:
+ /*
+ * finish with ftruncate
+ */
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+
+ get_one_call_nolock(frame);
+ put_one_call_ftruncate(frame, this);
+ return 0;
+}
+
+static int32_t crypt_ftruncate_finodelk_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret < 0)
+ goto error;
+ /*
+ * An access has been granted,
+ * retrieve file size first
+ */
+ STACK_WIND(frame,
+ do_ftruncate,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ local->fd,
+ FSIZE_XATTR_PREFIX,
+ NULL);
+ return 0;
+ error:
+ get_one_call_nolock(frame);
+ put_one_call_ftruncate(frame, this);
+ return 0;
+}
+
+/*
+ * ftruncate is performed in 2 steps:
+ * . recieve file size;
+ * . expand or prune file.
+ */
+static int32_t crypt_ftruncate(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ off_t offset,
+ dict_t *xdata)
+{
+ int32_t ret;
+ crypt_local_t *local;
+ struct crypt_inode_info *info;
+ struct gf_flock lock = {0, };
+
+ local = crypt_alloc_local(frame, this, GF_FOP_FTRUNCATE);
+ if (!local) {
+ ret = ENOMEM;
+ goto error;
+ }
+ local->xattr = dict_new();
+ if (!local->xattr) {
+ ret = ENOMEM;
+ goto error;
+ }
+ local->fd = fd_ref(fd);
+ info = local_get_inode_info(local, this);
+ if (info == NULL) {
+ ret = EINVAL;
+ goto error;
+ }
+ if (!object_alg_atomic(&info->cinfo)) {
+ ret = EINVAL;
+ goto error;
+ }
+ local->data_conf.orig_offset = offset;
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+
+ lock.l_len = 0;
+ lock.l_start = 0;
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+
+ STACK_WIND(frame,
+ crypt_ftruncate_finodelk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+ error:
+ if (local && local->fd)
+ fd_unref(fd);
+ if (local && local->xdata)
+ dict_unref(xdata);
+ if (local && local->xattr)
+ dict_unref(local->xattr);
+ if (local && local->info)
+ free_inode_info(local->info);
+
+ STACK_UNWIND_STRICT(ftruncate, frame, -1, ret, NULL, NULL, NULL);
+ return 0;
+}
+
+/* ->flush_cbk() */
+int32_t truncate_end(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ STACK_UNWIND_STRICT(truncate,
+ frame,
+ op_ret,
+ op_errno,
+ &local->prebuf,
+ &local->postbuf,
+ local->xdata);
+ return 0;
+}
+
+/* ftruncate_cbk() */
+int32_t truncate_flush(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ fd_t *fd = local->fd;
+ local->prebuf = *prebuf;
+ local->postbuf = *postbuf;
+
+ STACK_WIND(frame,
+ truncate_end,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush,
+ fd,
+ NULL);
+ fd_unref(fd);
+ return 0;
+}
+
+/*
+ * is called as ->open_cbk()
+ */
+static int32_t truncate_begin(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0) {
+ fd_unref(fd);
+ STACK_UNWIND_STRICT(truncate,
+ frame,
+ op_ret,
+ op_errno, NULL, NULL, NULL);
+ return 0;
+ }
+ /*
+ * crypt_truncate() is implemented via crypt_ftruncate(),
+ * so the crypt xlator does STACK_WIND to itself here
+ */
+ STACK_WIND(frame,
+ truncate_flush,
+ this,
+ this->fops->ftruncate, /* crypt_ftruncate */
+ fd,
+ local->offset,
+ NULL);
+ return 0;
+}
+
+/*
+ * crypt_truncate() is implemented via crypt_ftruncate() as a
+ * sequence crypt_open() - crypt_ftruncate() - truncate_flush()
+ */
+int32_t crypt_truncate(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ off_t offset,
+ dict_t *xdata)
+{
+ fd_t *fd;
+ crypt_local_t *local;
+
+#if DEBUG_CRYPT
+ gf_log(this->name, GF_LOG_DEBUG,
+ "truncate file %s at offset %llu",
+ loc->path, (unsigned long long)offset);
+#endif
+ local = crypt_alloc_local(frame, this, GF_FOP_TRUNCATE);
+ if (!local)
+ goto error;
+
+ fd = fd_create(loc->inode, frame->root->pid);
+ if (!fd) {
+ gf_log(this->name, GF_LOG_ERROR, "Can not create fd");
+ goto error;
+ }
+ local->fd = fd;
+ local->offset = offset;
+ local->xdata = xdata;
+ STACK_WIND(frame,
+ truncate_begin,
+ this,
+ this->fops->open, /* crypt_open() */
+ loc,
+ O_RDWR,
+ fd,
+ NULL);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(truncate, frame, -1, EINVAL, NULL, NULL, NULL);
+ return 0;
+}
+
+end_writeback_handler_t dispatch_end_writeback(glusterfs_fop_t fop)
+{
+ switch (fop) {
+ case GF_FOP_WRITE:
+ return end_writeback_writev;
+ case GF_FOP_FTRUNCATE:
+ return end_writeback_ftruncate;
+ default:
+ gf_log("crypt", GF_LOG_WARNING, "Bad wb operation %d", fop);
+ return NULL;
+ }
+}
+
+/*
+ * true, if the caller needs metadata string
+ */
+static int32_t is_custom_mtd(dict_t *xdata)
+{
+ data_t *data;
+ uint32_t flags;
+
+ if (!xdata)
+ return 0;
+
+ data = dict_get(xdata, MSGFLAGS_PREFIX);
+ if (!data)
+ return 0;
+ if (data->len != sizeof(uint32_t)) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "Bad msgflags size (%d)", data->len);
+ return -1;
+ }
+ flags = *((uint32_t *)data->data);
+ return msgflags_check_mtd_lock(&flags);
+}
+
+static int32_t crypt_open_done(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ if (op_ret < 0)
+ gf_log(this->name, GF_LOG_WARNING, "mtd unlock failed (%d)",
+ op_errno);
+ put_one_call_open(frame);
+ return 0;
+}
+
+static void crypt_open_tail(call_frame_t *frame, xlator_t *this)
+{
+ struct gf_flock lock = {0, };
+ crypt_local_t *local = frame->local;
+
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+ lock.l_pid = 0;
+
+ STACK_WIND(frame,
+ crypt_open_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ local->fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+}
+
+/*
+ * load private inode info at open time
+ * called as ->fgetxattr_cbk()
+ */
+static int load_mtd_open(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict,
+ dict_t *xdata)
+{
+ int32_t ret;
+ gf_boolean_t upload_info;
+ data_t *mtd;
+ uint64_t value = 0;
+ struct crypt_inode_info *info;
+ crypt_local_t *local = frame->local;
+ crypt_private_t *priv = this->private;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (local->fd->inode->ia_type == IA_IFLNK)
+ goto exit;
+ if (op_ret < 0)
+ goto exit;
+ /*
+ * first, check for cached info
+ */
+ ret = inode_ctx_get(local->fd->inode, this, &value);
+ if (ret != -1) {
+ info = (struct crypt_inode_info *)(long)value;
+ if (info == NULL) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Inode info expected, but not found");
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto exit;
+ }
+ /*
+ * info has been found in the cache
+ */
+ upload_info = _gf_false;
+ }
+ else {
+ /*
+ * info hasn't been found in the cache.
+ */
+ info = alloc_inode_info(local, local->loc);
+ if (!info) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto exit;
+ }
+ init_inode_info_head(info, local->fd);
+ upload_info = _gf_true;
+ }
+ /*
+ * extract metadata
+ */
+ mtd = dict_get(dict, CRYPTO_FORMAT_PREFIX);
+ if (!mtd) {
+ local->op_ret = -1;
+ local->op_errno = ENOENT;
+ gf_log (this->name, GF_LOG_WARNING,
+ "Format string wasn't found");
+ goto exit;
+ }
+ /*
+ * authenticate metadata against the path
+ */
+ ret = open_format((unsigned char *)mtd->data,
+ mtd->len,
+ local->loc,
+ info,
+ get_master_cinfo(priv),
+ local,
+ upload_info);
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = ret;
+ goto exit;
+ }
+ if (upload_info) {
+ ret = init_inode_info_tail(info, get_master_cinfo(priv));
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = ret;
+ goto exit;
+ }
+ ret = inode_ctx_put(local->fd->inode,
+ this, (uint64_t)(long)info);
+ if (ret == -1) {
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto exit;
+ }
+ }
+ if (local->custom_mtd) {
+ /*
+ * pass the metadata string to the customer
+ */
+ ret = dict_set_static_bin(local->xdata,
+ CRYPTO_FORMAT_PREFIX,
+ mtd->data,
+ mtd->len);
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = ret;
+ goto exit;
+ }
+ }
+ exit:
+ if (!local->custom_mtd)
+ crypt_open_tail(frame, this);
+ else
+ put_one_call_open(frame);
+ return 0;
+}
+
+static int32_t crypt_open_finodelk_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING, "finodelk (LOCK) failed");
+ goto exit;
+ }
+ STACK_WIND(frame,
+ load_mtd_open,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ local->fd,
+ CRYPTO_FORMAT_PREFIX,
+ NULL);
+ return 0;
+ exit:
+ put_one_call_open(frame);
+ return 0;
+}
+
+/*
+ * verify metadata against the specified pathname
+ */
+static int32_t crypt_open_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd,
+ dict_t *xdata)
+{
+ struct gf_flock lock = {0, };
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (local->fd->inode->ia_type == IA_IFLNK)
+ goto exit;
+ if (op_ret < 0)
+ goto exit;
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+ else if (local->custom_mtd){
+ local->xdata = dict_new();
+ if (!local->xdata) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ gf_log ("crypt", GF_LOG_ERROR,
+ "Can not get new dict for mtd string");
+ goto exit;
+ }
+ }
+ lock.l_len = 0;
+ lock.l_start = 0;
+ lock.l_type = local->custom_mtd ? F_WRLCK : F_RDLCK;
+ lock.l_whence = SEEK_SET;
+
+ STACK_WIND(frame,
+ crypt_open_finodelk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+ exit:
+ put_one_call_open(frame);
+ return 0;
+}
+
+static int32_t crypt_open(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags,
+ fd_t *fd,
+ dict_t *xdata)
+{
+ int32_t ret = ENOMEM;
+ crypt_local_t *local;
+
+ local = crypt_alloc_local(frame, this, GF_FOP_OPEN);
+ if (!local)
+ goto error;
+ local->loc = GF_CALLOC(1, sizeof(*loc), gf_crypt_mt_loc);
+ if (!local->loc) {
+ ret = ENOMEM;
+ goto error;
+ }
+ memset(local->loc, 0, sizeof(*local->loc));
+ ret = loc_copy(local->loc, loc);
+ if (ret) {
+ GF_FREE(local->loc);
+ goto error;
+ }
+ local->fd = fd_ref(fd);
+
+ ret = is_custom_mtd(xdata);
+ if (ret < 0) {
+ loc_wipe(local->loc);
+ GF_FREE(local->loc);
+ ret = EINVAL;
+ goto error;
+ }
+ local->custom_mtd = ret;
+
+ if ((flags & O_ACCMODE) == O_WRONLY)
+ /*
+ * we can't open O_WRONLY, because
+ * we need to do read-modify-write
+ */
+ flags = (flags & ~O_ACCMODE) | O_RDWR;
+ /*
+ * Make sure that out translated offsets
+ * and counts won't be ignored
+ */
+ flags &= ~O_APPEND;
+ get_one_call_nolock(frame);
+ STACK_WIND(frame,
+ crypt_open_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open,
+ loc,
+ flags,
+ fd,
+ xdata);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(open,
+ frame,
+ -1,
+ ret,
+ NULL,
+ NULL);
+ return 0;
+}
+
+static int32_t init_inode_info_tail(struct crypt_inode_info *info,
+ struct master_cipher_info *master)
+{
+ int32_t ret;
+ struct object_cipher_info *object = &info->cinfo;
+
+#if DEBUG_CRYPT
+ gf_log("crypt", GF_LOG_DEBUG, "Init inode info for object %s",
+ uuid_utoa(info->oid));
+#endif
+ ret = data_cipher_algs[object->o_alg][object->o_mode].set_private(info,
+ master);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, "Set private info failed");
+ return ret;
+ }
+ return 0;
+}
+
+/*
+ * Init inode info at ->create() time
+ */
+static void init_inode_info_create(struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ data_t *data)
+{
+ struct object_cipher_info *object;
+
+ info->nr_minor = CRYPT_XLATOR_ID;
+ memcpy(info->oid, data->data, data->len);
+
+ object = &info->cinfo;
+
+ object->o_alg = master->m_alg;
+ object->o_mode = master->m_mode;
+ object->o_block_bits = master->m_block_bits;
+ object->o_dkey_size = master->m_dkey_size;
+}
+
+static void init_inode_info_head(struct crypt_inode_info *info, fd_t *fd)
+{
+ memcpy(info->oid, fd->inode->gfid, sizeof(uuid_t));
+}
+
+static int32_t crypt_create_done(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ crypt_private_t *priv = this->private;
+ crypt_local_t *local = frame->local;
+ struct crypt_inode_info *info = local->info;
+ fd_t *local_fd = local->fd;
+ dict_t *local_xdata = local->xdata;
+ inode_t *local_inode = local->inode;
+
+ if (op_ret < 0) {
+ free_inode_info(info);
+ goto unwind;
+ }
+ op_errno = init_inode_info_tail(info, get_master_cinfo(priv));
+ if (op_errno) {
+ op_ret = -1;
+ free_inode_info(info);
+ goto unwind;
+ }
+ /*
+ * FIXME: drop major subversion number
+ */
+ op_ret = inode_ctx_put(local->fd->inode, this, (uint64_t)(long)info);
+ if (op_ret == -1) {
+ op_errno = EIO;
+ free_inode_info(info);
+ goto unwind;
+ }
+ unwind:
+ free_format(local);
+ STACK_UNWIND_STRICT(create,
+ frame,
+ op_ret,
+ op_errno,
+ local_fd,
+ local_inode,
+ &local->buf,
+ &local->prebuf,
+ &local->postbuf,
+ local_xdata);
+ fd_unref(local_fd);
+ inode_unref(local_inode);
+ if (local_xdata)
+ dict_unref(local_xdata);
+ return 0;
+}
+
+static int crypt_create_tail(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ struct gf_flock lock = {0, };
+ crypt_local_t *local = frame->local;
+ fd_t *local_fd = local->fd;
+ dict_t *local_xdata = local->xdata;
+ inode_t *local_inode = local->inode;
+
+ dict_unref(local->xattr);
+
+ if (op_ret < 0)
+ goto error;
+
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+ lock.l_pid = 0;
+
+ STACK_WIND(frame,
+ crypt_create_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ local->fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+ error:
+ free_inode_info(local->info);
+ free_format(local);
+
+ STACK_UNWIND_STRICT(create,
+ frame,
+ op_ret,
+ op_errno,
+ local_fd,
+ local_inode,
+ &local->buf,
+ &local->prebuf,
+ &local->postbuf,
+ local_xdata);
+
+ fd_unref(local_fd);
+ inode_unref(local_inode);
+ if (local_xdata)
+ dict_unref(local_xdata);
+ return 0;
+}
+
+static int32_t crypt_create_finodelk_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ struct crypt_inode_info *info = local->info;
+
+ if (op_ret < 0)
+ goto error;
+
+ STACK_WIND(frame,
+ crypt_create_tail,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ local->fd,
+ local->xattr, /* CRYPTO_FORMAT_PREFIX */
+ 0,
+ NULL);
+ return 0;
+ error:
+ free_inode_info(info);
+ free_format(local);
+ fd_unref(local->fd);
+ dict_unref(local->xattr);
+ if (local->xdata)
+ dict_unref(local->xdata);
+
+ STACK_UNWIND_STRICT(create,
+ frame,
+ op_ret,
+ op_errno,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL);
+ return 0;
+}
+
+/*
+ * Create and store crypt-specific format on disk;
+ * Populate cache with private inode info
+ */
+static int32_t crypt_create_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd,
+ inode_t *inode,
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent,
+ dict_t *xdata)
+{
+ struct gf_flock lock = {0, };
+ crypt_local_t *local = frame->local;
+ struct crypt_inode_info *info = local->info;
+
+ if (op_ret < 0)
+ goto error;
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+ local->inode = inode_ref(inode);
+ local->buf = *buf;
+ local->prebuf = *preparent;
+ local->postbuf = *postparent;
+
+ lock.l_len = 0;
+ lock.l_start = 0;
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+
+ STACK_WIND(frame,
+ crypt_create_finodelk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ local->fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+ error:
+ free_inode_info(info);
+ free_format(local);
+ fd_unref(local->fd);
+ dict_unref(local->xattr);
+
+ STACK_UNWIND_STRICT(create,
+ frame,
+ op_ret,
+ op_errno,
+ NULL, NULL, NULL,
+ NULL, NULL, NULL);
+ return 0;
+}
+
+static int32_t crypt_create(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags,
+ mode_t mode,
+ mode_t umask,
+ fd_t *fd,
+ dict_t *xdata)
+{
+ int ret;
+ data_t *data;
+ crypt_local_t *local;
+ crypt_private_t *priv;
+ struct master_cipher_info *master;
+ struct crypt_inode_info *info;
+
+ priv = this->private;
+ master = get_master_cinfo(priv);
+
+ if (master_alg_atomic(master)) {
+ /*
+ * We can't open O_WRONLY, because we
+ * need to do read-modify-write.
+ */
+ if ((flags & O_ACCMODE) == O_WRONLY)
+ flags = (flags & ~O_ACCMODE) | O_RDWR;
+ /*
+ * Make sure that out translated offsets
+ * and counts won't be ignored
+ */
+ flags &= ~O_APPEND;
+ }
+ local = crypt_alloc_local(frame, this, GF_FOP_CREATE);
+ if (!local) {
+ ret = ENOMEM;
+ goto error;
+ }
+ data = dict_get(xdata, "gfid-req");
+ if (!data) {
+ ret = EINVAL;
+ gf_log("crypt", GF_LOG_WARNING, "gfid not found");
+ goto error;
+ }
+ if (data->len != sizeof(uuid_t)) {
+ ret = EINVAL;
+ gf_log("crypt", GF_LOG_WARNING,
+ "bad gfid size (%d), should be %d",
+ (int)data->len, (int)sizeof(uuid_t));
+ goto error;
+ }
+ info = alloc_inode_info(local, loc);
+ if (!info){
+ ret = ENOMEM;
+ goto error;
+ }
+ /*
+ * NOTE:
+ * format has to be created BEFORE
+ * proceeding to the untrusted server
+ */
+ ret = alloc_format_create(local);
+ if (ret) {
+ free_inode_info(info);
+ goto error;
+ }
+ init_inode_info_create(info, master, data);
+
+ ret = create_format(local->format,
+ loc,
+ info,
+ master);
+ if (ret) {
+ free_inode_info(info);
+ goto error;
+ }
+ local->xattr = dict_new();
+ if (!local->xattr) {
+ free_inode_info(info);
+ free_format(local);
+ goto error;
+ }
+ ret = dict_set_static_bin(local->xattr,
+ CRYPTO_FORMAT_PREFIX,
+ local->format,
+ new_format_size());
+ if (ret) {
+ dict_unref(local->xattr);
+ free_inode_info(info);
+ free_format(local);
+ goto error;
+ }
+ ret = dict_set(local->xattr, FSIZE_XATTR_PREFIX, data_from_uint64(0));
+ if (ret) {
+ dict_unref(local->xattr);
+ free_inode_info(info);
+ free_format(local);
+ goto error;
+ }
+ local->fd = fd_ref(fd);
+
+ STACK_WIND(frame,
+ crypt_create_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create,
+ loc,
+ flags,
+ mode,
+ umask,
+ fd,
+ xdata);
+ return 0;
+ error:
+ gf_log("crypt", GF_LOG_WARNING, "can not create file");
+ STACK_UNWIND_STRICT(create,
+ frame,
+ -1,
+ ret,
+ NULL, NULL, NULL,
+ NULL, NULL, NULL);
+ return 0;
+}
+
+/*
+ * FIXME: this should depends on the version of format string
+ */
+static int32_t filter_crypt_xattr(dict_t *dict,
+ char *key, data_t *value, void *data)
+{
+ dict_del(dict, key);
+ return 0;
+}
+
+static int32_t crypt_fsetxattr(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ dict_foreach_fnmatch(dict, "trusted.glusterfs.crypt*",
+ filter_crypt_xattr, NULL);
+ STACK_WIND(frame,
+ default_fsetxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ fd,
+ dict,
+ flags,
+ xdata);
+ return 0;
+}
+
+/*
+ * TBD: verify file metadata before wind
+ */
+static int32_t crypt_setxattr(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ dict_foreach_fnmatch(dict, "trusted.glusterfs.crypt*",
+ filter_crypt_xattr, NULL);
+ STACK_WIND(frame,
+ default_setxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr,
+ loc,
+ dict,
+ flags,
+ xdata);
+ return 0;
+}
+
+/*
+ * called as flush_cbk()
+ */
+static int32_t linkop_end(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ linkop_unwind_handler_t unwind_fn;
+ unwind_fn = linkop_unwind_dispatch(local->fop);
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret < 0 &&
+ op_errno == ENOENT &&
+ local->loc->inode->ia_type == IA_IFLNK) {
+ local->op_ret = 0;
+ local->op_errno = 0;
+ }
+ unwind_fn(frame);
+ return 0;
+}
+
+/*
+ * unpin inode on the server
+ */
+static int32_t link_flush(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ goto error;
+ if (local->xdata) {
+ dict_unref(local->xdata);
+ local->xdata = NULL;
+ }
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+ local->inode = inode_ref(inode);
+ local->buf = *buf;
+ local->prebuf = *preparent;
+ local->postbuf = *postparent;
+
+ STACK_WIND(frame,
+ linkop_end,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush,
+ local->fd,
+ NULL);
+ return 0;
+ error:
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ link_unwind(frame);
+ return 0;
+}
+
+void link_unwind(call_frame_t *frame)
+{
+ crypt_local_t *local = frame->local;
+ dict_t *xdata;
+ dict_t *xattr;
+ inode_t *inode;
+
+ if (!local) {
+ STACK_UNWIND_STRICT(link,
+ frame,
+ -1,
+ ENOMEM,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL);
+ return;
+ }
+ xdata = local->xdata;
+ xattr = local->xattr;
+ inode = local->inode;
+
+ if (local->loc){
+ loc_wipe(local->loc);
+ GF_FREE(local->loc);
+ }
+ if (local->newloc) {
+ loc_wipe(local->newloc);
+ GF_FREE(local->newloc);
+ }
+ if (local->fd)
+ fd_unref(local->fd);
+ if (local->format)
+ GF_FREE(local->format);
+
+ STACK_UNWIND_STRICT(link,
+ frame,
+ local->op_ret,
+ local->op_errno,
+ inode,
+ &local->buf,
+ &local->prebuf,
+ &local->postbuf,
+ xdata);
+ if (xdata)
+ dict_unref(xdata);
+ if (xattr)
+ dict_unref(xattr);
+ if (inode)
+ inode_unref(inode);
+}
+
+void link_wind(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+
+ STACK_WIND(frame,
+ link_flush,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link,
+ local->loc,
+ local->newloc,
+ local->xdata);
+}
+
+/*
+ * unlink()
+ */
+static int32_t unlink_flush(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ goto error;
+ local->prebuf = *preparent;
+ local->postbuf = *postparent;
+ if (local->xdata) {
+ dict_unref(local->xdata);
+ local->xdata = NULL;
+ }
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+
+ STACK_WIND(frame,
+ linkop_end,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush,
+ local->fd,
+ NULL);
+ return 0;
+ error:
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ unlink_unwind(frame);
+ return 0;
+}
+
+void unlink_unwind(call_frame_t *frame)
+{
+ crypt_local_t *local = frame->local;
+ dict_t *xdata;
+ dict_t *xattr;
+
+ if (!local) {
+ STACK_UNWIND_STRICT(unlink,
+ frame,
+ -1,
+ ENOMEM,
+ NULL,
+ NULL,
+ NULL);
+ return;
+ }
+ xdata = local->xdata;
+ xattr = local->xattr;
+ if (local->loc){
+ loc_wipe(local->loc);
+ GF_FREE(local->loc);
+ }
+ if (local->fd)
+ fd_unref(local->fd);
+ if (local->format)
+ GF_FREE(local->format);
+
+ STACK_UNWIND_STRICT(unlink,
+ frame,
+ local->op_ret,
+ local->op_errno,
+ &local->prebuf,
+ &local->postbuf,
+ xdata);
+ if (xdata)
+ dict_unref(xdata);
+ if (xattr)
+ dict_unref(xattr);
+}
+
+void unlink_wind(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+
+ STACK_WIND(frame,
+ unlink_flush,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink,
+ local->loc,
+ local->flags,
+ local->xdata);
+}
+
+void rename_unwind(call_frame_t *frame)
+{
+ crypt_local_t *local = frame->local;
+ dict_t *xdata;
+ dict_t *xattr;
+ struct iatt *prenewparent;
+ struct iatt *postnewparent;
+
+ if (!local) {
+ STACK_UNWIND_STRICT(rename,
+ frame,
+ -1,
+ ENOMEM,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL);
+ return;
+ }
+ xdata = local->xdata;
+ xattr = local->xattr;
+ prenewparent = local->prenewparent;
+ postnewparent = local->postnewparent;
+
+ if (local->loc){
+ loc_wipe(local->loc);
+ GF_FREE(local->loc);
+ }
+ if (local->newloc){
+ loc_wipe(local->newloc);
+ GF_FREE(local->newloc);
+ }
+ if (local->fd)
+ fd_unref(local->fd);
+ if (local->format)
+ GF_FREE(local->format);
+
+ STACK_UNWIND_STRICT(rename,
+ frame,
+ local->op_ret,
+ local->op_errno,
+ &local->buf,
+ &local->prebuf,
+ &local->postbuf,
+ prenewparent,
+ postnewparent,
+ xdata);
+ if (xdata)
+ dict_unref(xdata);
+ if (xattr)
+ dict_unref(xattr);
+ if (prenewparent)
+ GF_FREE(prenewparent);
+ if (postnewparent)
+ GF_FREE(postnewparent);
+}
+
+/*
+ * called as flush_cbk()
+ */
+static int32_t rename_end(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ rename_unwind(frame);
+ return 0;
+}
+
+static int32_t rename_flush(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *buf,
+ struct iatt *preoldparent,
+ struct iatt *postoldparent,
+ struct iatt *prenewparent,
+ struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ goto error;
+ dict_unref(local->xdata);
+ local->xdata = NULL;
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+
+ local->buf = *buf;
+ local->prebuf = *preoldparent;
+ local->postbuf = *postoldparent;
+ if (prenewparent) {
+ local->prenewparent = GF_CALLOC(1, sizeof(*prenewparent),
+ gf_crypt_mt_iatt);
+ if (!local->prenewparent) {
+ op_errno = ENOMEM;
+ goto error;
+ }
+ *local->prenewparent = *prenewparent;
+ }
+ if (postnewparent) {
+ local->postnewparent = GF_CALLOC(1, sizeof(*postnewparent),
+ gf_crypt_mt_iatt);
+ if (!local->postnewparent) {
+ op_errno = ENOMEM;
+ goto error;
+ }
+ *local->postnewparent = *postnewparent;
+ }
+ STACK_WIND(frame,
+ rename_end,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush,
+ local->fd,
+ NULL);
+ return 0;
+ error:
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ rename_unwind(frame);
+ return 0;
+}
+
+void rename_wind(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+
+ STACK_WIND(frame,
+ rename_flush,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename,
+ local->loc,
+ local->newloc,
+ local->xdata);
+}
+
+static int32_t __do_linkop(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ linkop_wind_handler_t wind_fn;
+ linkop_unwind_handler_t unwind_fn;
+
+ wind_fn = linkop_wind_dispatch(local->fop);
+ unwind_fn = linkop_unwind_dispatch(local->fop);
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret >= 0)
+ wind_fn(frame, this);
+ else {
+ gf_log(this->name, GF_LOG_WARNING, "mtd unlock failed (%d)",
+ op_errno);
+ unwind_fn(frame);
+ }
+ return 0;
+}
+
+static int32_t do_linkop(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ struct gf_flock lock = {0, };
+ crypt_local_t *local = frame->local;
+ linkop_unwind_handler_t unwind_fn;
+
+ unwind_fn = linkop_unwind_dispatch(local->fop);
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if(op_ret < 0)
+ goto error;
+
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+ lock.l_pid = 0;
+
+ STACK_WIND(frame,
+ __do_linkop,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ local->fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+ error:
+ unwind_fn(frame);
+ return 0;
+}
+
+/*
+ * Update the metadata string (against the new pathname);
+ * submit the result
+ */
+static int32_t linkop_begin(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd,
+ dict_t *xdata)
+{
+ gf_boolean_t upload_info;
+ crypt_local_t *local = frame->local;
+ crypt_private_t *priv = this->private;
+ struct crypt_inode_info *info;
+ data_t *old_mtd;
+ uint32_t new_mtd_size;
+ uint64_t value = 0;
+ void (*unwind_fn)(call_frame_t *frame);
+ void (*wind_fn)(call_frame_t *frame, xlator_t *this);
+ mtd_op_t mop;
+
+ wind_fn = linkop_wind_dispatch(local->fop);
+ unwind_fn = linkop_unwind_dispatch(local->fop);
+ mop = linkop_mtdop_dispatch(local->fop);
+
+ if (local->fd->inode->ia_type == IA_IFLNK)
+ goto wind;
+ if (op_ret < 0)
+ /*
+ * verification failed
+ */
+ goto error;
+
+ old_mtd = dict_get(xdata, CRYPTO_FORMAT_PREFIX);
+ if (!old_mtd) {
+ op_errno = EIO;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Metadata string wasn't found");
+ goto error;
+ }
+ new_mtd_size = format_size(mop, old_mtd->len);
+ op_errno = alloc_format(local, new_mtd_size);
+ if (op_errno)
+ goto error;
+ /*
+ * check for cached info
+ */
+ op_ret = inode_ctx_get(fd->inode, this, &value);
+ if (op_ret != -1) {
+ info = (struct crypt_inode_info *)(long)value;
+ if (info == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Inode info was not found");
+ op_errno = EINVAL;
+ goto error;
+ }
+ /*
+ * info was found in the cache
+ */
+ local->info = info;
+ upload_info = _gf_false;
+ }
+ else {
+ /*
+ * info wasn't found in the cache;
+ */
+ info = alloc_inode_info(local, local->loc);
+ if (!info)
+ goto error;
+ init_inode_info_head(info, fd);
+ local->info = info;
+ upload_info = _gf_true;
+ }
+ op_errno = open_format((unsigned char *)old_mtd->data,
+ old_mtd->len,
+ local->loc,
+ info,
+ get_master_cinfo(priv),
+ local,
+ upload_info);
+ if (op_errno)
+ goto error;
+ if (upload_info == _gf_true) {
+ op_errno = init_inode_info_tail(info,
+ get_master_cinfo(priv));
+ if (op_errno)
+ goto error;
+ op_errno = inode_ctx_put(fd->inode, this,
+ (uint64_t)(long)(info));
+ if (op_errno == -1) {
+ op_errno = EIO;
+ goto error;
+ }
+ }
+ /*
+ * update the format string (append/update/cup a MAC)
+ */
+ op_errno = update_format(local->format,
+ (unsigned char *)old_mtd->data,
+ old_mtd->len,
+ local->mac_idx,
+ mop,
+ local->newloc,
+ info,
+ get_master_cinfo(priv),
+ local);
+ if (op_errno)
+ goto error;
+ /*
+ * store the new format string on the server
+ */
+ if (new_mtd_size) {
+ op_errno = dict_set_static_bin(local->xattr,
+ CRYPTO_FORMAT_PREFIX,
+ local->format,
+ new_mtd_size);
+ if (op_errno)
+ goto error;
+ }
+ STACK_WIND(frame,
+ do_linkop,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr,
+ local->loc,
+ local->xattr,
+ 0,
+ NULL);
+ return 0;
+ wind:
+ wind_fn(frame, this);
+ return 0;
+ error:
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ unwind_fn(frame);
+ return 0;
+}
+
+static int32_t linkop_grab_local(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc,
+ int flags, dict_t *xdata,
+ glusterfs_fop_t op)
+{
+ int32_t ret = ENOMEM;
+ fd_t *fd;
+ crypt_local_t *local;
+
+ local = crypt_alloc_local(frame, this, op);
+ if (!local)
+ goto error;
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+
+ fd = fd_create(oldloc->inode, frame->root->pid);
+ if (!fd) {
+ gf_log(this->name, GF_LOG_ERROR, "Can not create fd");
+ goto error;
+ }
+ local->fd = fd;
+ local->flags = flags;
+ local->loc = GF_CALLOC(1, sizeof(*oldloc), gf_crypt_mt_loc);
+ if (!local->loc)
+ goto error;
+ memset(local->loc, 0, sizeof(*local->loc));
+ ret = loc_copy(local->loc, oldloc);
+ if (ret) {
+ GF_FREE(local->loc);
+ local->loc = NULL;
+ goto error;
+ }
+ if (newloc) {
+ local->newloc = GF_CALLOC(1, sizeof(*newloc), gf_crypt_mt_loc);
+ if (!local->newloc) {
+ loc_wipe(local->loc);
+ GF_FREE(local->loc);
+ goto error;
+ }
+ memset(local->newloc, 0, sizeof(*local->newloc));
+ ret = loc_copy(local->newloc, newloc);
+ if (ret) {
+ loc_wipe(local->loc);
+ GF_FREE(local->loc);
+ GF_FREE(local->newloc);
+ goto error;
+ }
+ }
+ local->xattr = dict_new();
+ if (!local->xattr) {
+ gf_log(this->name, GF_LOG_ERROR, "Can not create dict");
+ ret = ENOMEM;
+ goto error;
+ }
+ return 0;
+
+error:
+ if (local) {
+ if (local->xdata)
+ dict_unref(local->xdata);
+ if (local->fd)
+ fd_unref(local->fd);
+ local->fd = 0;
+ local->loc = NULL;
+ local->newloc = NULL;
+ local->op_ret = -1;
+ local->op_errno = ret;
+ }
+
+ return ret;
+}
+
+/*
+ * read and verify locked metadata against the old pathname (via open);
+ * update the metadata string in accordance with the new pathname;
+ * submit modified metadata;
+ * wind;
+ */
+static int32_t linkop(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc,
+ int flags,
+ dict_t *xdata,
+ glusterfs_fop_t op)
+{
+ int32_t ret;
+ dict_t *dict;
+ crypt_local_t *local;
+ void (*unwind_fn)(call_frame_t *frame);
+
+ unwind_fn = linkop_unwind_dispatch(op);
+
+ ret = linkop_grab_local(frame, this, oldloc, newloc, flags, xdata, op);
+ local = frame->local;
+ if (ret)
+ goto error;
+ dict = dict_new();
+ if (!dict) {
+ gf_log(this->name, GF_LOG_ERROR, "Can not create dict");
+ ret = ENOMEM;
+ goto error;
+ }
+ /*
+ * Set a message to crypt_open() that we need
+ * locked metadata string.
+ * All link operations (link, unlink, rename)
+ * need write lock
+ */
+ msgflags_set_mtd_wlock(&local->msgflags);
+ ret = dict_set_static_bin(dict,
+ MSGFLAGS_PREFIX,
+ &local->msgflags,
+ sizeof(local->msgflags));
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR, "Can not set dict");
+ dict_unref(dict);
+ goto error;
+ }
+ /*
+ * verify metadata against the old pathname
+ * and retrieve locked metadata string
+ */
+ STACK_WIND(frame,
+ linkop_begin,
+ this,
+ this->fops->open, /* crypt_open() */
+ oldloc,
+ O_RDWR,
+ local->fd,
+ dict);
+ dict_unref(dict);
+ return 0;
+ error:
+ local->op_ret = -1;
+ local->op_errno = ret;
+ unwind_fn(frame);
+ return 0;
+}
+
+static int32_t crypt_link(call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ return linkop(frame, this, oldloc, newloc, 0, xdata, GF_FOP_LINK);
+}
+
+static int32_t crypt_unlink(call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int flags, dict_t *xdata)
+{
+ return linkop(frame, this, loc, NULL, flags, xdata, GF_FOP_UNLINK);
+}
+
+static int32_t crypt_rename(call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ return linkop(frame, this, oldloc, newloc, 0, xdata, GF_FOP_RENAME);
+}
+
+static void put_one_call_open(call_frame_t *frame)
+{
+ crypt_local_t *local = frame->local;
+ if (put_one_call(local)) {
+ fd_t *fd = local->fd;
+ loc_t *loc = local->loc;
+ dict_t *xdata = local->xdata;
+
+ STACK_UNWIND_STRICT(open,
+ frame,
+ local->op_ret,
+ local->op_errno,
+ fd,
+ xdata);
+ fd_unref(fd);
+ if (xdata)
+ dict_unref(xdata);
+ loc_wipe(loc);
+ GF_FREE(loc);
+ }
+}
+
+static int32_t __crypt_readv_done(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ fd_t *local_fd = local->fd;
+ dict_t *local_xdata = local->xdata;
+ /* read deals with data configs only */
+ struct iovec *avec = local->data_conf.avec;
+ char **pool = local->data_conf.pool;
+ int blocks_in_pool = local->data_conf.blocks_in_pool;
+ struct iobref *iobref = local->iobref;
+ struct iobref *iobref_data = local->iobref_data;
+
+ if (op_ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "readv unlock failed (%d)", op_errno);
+ if (local->op_ret >= 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ }
+ }
+ dump_plain_text(local, avec);
+
+ gf_log("crypt", GF_LOG_DEBUG,
+ "readv: ret_to_user: %d, iovec len: %d, ia_size: %llu",
+ (int)(local->rw_count > 0 ? local->rw_count : local->op_ret),
+ (int)(local->rw_count > 0 ? iovec_get_size(avec, local->data_conf.acount) : 0),
+ (unsigned long long)local->buf.ia_size);
+
+ STACK_UNWIND_STRICT(readv,
+ frame,
+ local->rw_count > 0 ? local->rw_count : local->op_ret,
+ local->op_errno,
+ avec,
+ avec ? local->data_conf.acount : 0,
+ &local->buf,
+ local->iobref,
+ local_xdata);
+
+ free_avec(avec, pool, blocks_in_pool);
+ fd_unref(local_fd);
+ if (local_xdata)
+ dict_unref(local_xdata);
+ if (iobref)
+ iobref_unref(iobref);
+ if (iobref_data)
+ iobref_unref(iobref_data);
+ return 0;
+}
+
+static void crypt_readv_done(call_frame_t *frame, xlator_t *this)
+{
+ if (parent_is_crypt_xlator(frame, this))
+ /*
+ * don't unlock (it will be done by the parent)
+ */
+ __crypt_readv_done(frame, NULL, this, 0, 0, NULL);
+ else {
+ crypt_local_t *local = frame->local;
+ struct gf_flock lock = {0, };
+
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+ lock.l_pid = 0;
+
+ STACK_WIND(frame,
+ __crypt_readv_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ local->fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ }
+}
+
+static void put_one_call_readv(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+ if (put_one_call(local))
+ crypt_readv_done(frame, this);
+}
+
+static int32_t __crypt_writev_done(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ fd_t *local_fd = local->fd;
+ dict_t *local_xdata = local->xdata;
+ int32_t ret_to_user;
+
+ if (local->xattr)
+ dict_unref(local->xattr);
+ /*
+ * Calculate amout of butes to be returned
+ * to user. We need to subtract paddings that
+ * have been written as a part of atom.
+ */
+ /*
+ * subtract head padding
+ */
+ if (local->rw_count == 0)
+ /*
+ * Nothing has been written, it must be an error
+ */
+ ret_to_user = local->op_ret;
+ else if (local->rw_count <= local->data_conf.off_in_head) {
+ gf_log("crypt", GF_LOG_WARNING, "Incomplete write");
+ ret_to_user = 0;
+ }
+ else
+ ret_to_user = local->rw_count -
+ local->data_conf.off_in_head;
+ /*
+ * subtract tail padding
+ */
+ if (ret_to_user > local->data_conf.orig_size)
+ ret_to_user = local->data_conf.orig_size;
+
+ if (local->iobref)
+ iobref_unref(local->iobref);
+ if (local->iobref_data)
+ iobref_unref(local->iobref_data);
+ free_avec_data(local);
+ free_avec_hole(local);
+
+ gf_log("crypt", GF_LOG_DEBUG,
+ "writev: ret_to_user: %d", ret_to_user);
+
+ STACK_UNWIND_STRICT(writev,
+ frame,
+ ret_to_user,
+ local->op_errno,
+ &local->prebuf,
+ &local->postbuf,
+ local_xdata);
+ fd_unref(local_fd);
+ if (local_xdata)
+ dict_unref(local_xdata);
+ return 0;
+}
+
+static int32_t crypt_writev_done(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ gf_log("crypt", GF_LOG_WARNING, "can not update file size");
+
+ if (parent_is_crypt_xlator(frame, this))
+ /*
+ * don't unlock (it will be done by the parent)
+ */
+ __crypt_writev_done(frame, NULL, this, 0, 0, NULL);
+ else {
+ struct gf_flock lock = {0, };
+
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+ lock.l_pid = 0;
+
+ STACK_WIND(frame,
+ __crypt_writev_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ local->fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ }
+ return 0;
+}
+
+static void put_one_call_writev(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+ if (put_one_call(local)) {
+ if (local->update_disk_file_size) {
+ int32_t ret;
+ /*
+ * update file size, unlock the file and unwind
+ */
+ ret = dict_set(local->xattr,
+ FSIZE_XATTR_PREFIX,
+ data_from_uint64(local->cur_file_size));
+ if (ret) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "can not set key to update file size");
+ crypt_writev_done(frame, NULL,
+ this, 0, 0, NULL);
+ return;
+ }
+ gf_log("crypt", GF_LOG_DEBUG,
+ "Updating disk file size to %llu",
+ (unsigned long long)local->cur_file_size);
+ STACK_WIND(frame,
+ crypt_writev_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ local->fd,
+ local->xattr, /* CRYPTO_FORMAT_PREFIX */
+ 0,
+ NULL);
+ }
+ else
+ crypt_writev_done(frame, NULL, this, 0, 0, NULL);
+ }
+}
+
+static int32_t __crypt_ftruncate_done(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ fd_t *local_fd = local->fd;
+ dict_t *local_xdata = local->xdata;
+ char *iobase = local->vec.iov_base;
+
+ if (op_ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "ftruncate unlock failed (%d)", op_errno);
+ if (local->op_ret >= 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ }
+ }
+ if (local->iobref_data)
+ iobref_unref(local->iobref_data);
+ free_avec_data(local);
+ free_avec_hole(local);
+
+ gf_log("crypt", GF_LOG_DEBUG,
+ "ftruncate, return to user: presize=%llu, postsize=%llu",
+ (unsigned long long)local->prebuf.ia_size,
+ (unsigned long long)local->postbuf.ia_size);
+
+ STACK_UNWIND_STRICT(ftruncate,
+ frame,
+ local->op_ret < 0 ? -1 : 0,
+ local->op_errno,
+ &local->prebuf,
+ &local->postbuf,
+ local_xdata);
+ fd_unref(local_fd);
+ if (local_xdata)
+ dict_unref(local_xdata);
+ if (iobase)
+ GF_FREE(iobase);
+ return 0;
+}
+
+static int32_t crypt_ftruncate_done(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ struct gf_flock lock = {0, };
+
+ dict_unref(local->xattr);
+ if (op_ret < 0)
+ gf_log("crypt", GF_LOG_WARNING, "can not update file size");
+
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+ lock.l_pid = 0;
+
+ STACK_WIND(frame,
+ __crypt_ftruncate_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ local->fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+}
+
+static void put_one_call_ftruncate(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+ if (put_one_call(local)) {
+ if (local->update_disk_file_size) {
+ int32_t ret;
+ /*
+ * update file size, unlock the file and unwind
+ */
+ ret = dict_set(local->xattr,
+ FSIZE_XATTR_PREFIX,
+ data_from_uint64(local->cur_file_size));
+ if (ret) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "can not set key to update file size");
+ crypt_ftruncate_done(frame, NULL,
+ this, 0, 0, NULL);
+ return;
+ }
+ gf_log("crypt", GF_LOG_DEBUG,
+ "Updating disk file size to %llu",
+ (unsigned long long)local->cur_file_size);
+ STACK_WIND(frame,
+ crypt_ftruncate_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ local->fd,
+ local->xattr, /* CRYPTO_FORMAT_PREFIX */
+ 0,
+ NULL);
+ }
+ else
+ crypt_ftruncate_done(frame, NULL, this, 0, 0, NULL);
+ }
+}
+
+/*
+ * load regular file size for some FOPs
+ */
+static int32_t load_file_size(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict,
+ dict_t *xdata)
+{
+ data_t *data;
+ crypt_local_t *local = frame->local;
+
+ dict_t *local_xdata = local->xdata;
+ inode_t *local_inode = local->inode;
+
+ if (op_ret < 0)
+ goto unwind;
+ /*
+ * load regular file size
+ */
+ data = dict_get(dict, FSIZE_XATTR_PREFIX);
+ if (!data) {
+ if (local->xdata)
+ dict_unref(local->xdata);
+ gf_log("crypt", GF_LOG_WARNING, "Regular file size not found");
+ op_ret = -1;
+ op_errno = EIO;
+ goto unwind;
+ }
+ local->buf.ia_size = data_to_uint64(data);
+
+ gf_log(this->name, GF_LOG_DEBUG,
+ "FOP %d: Translate regular file to %llu",
+ local->fop,
+ (unsigned long long)local->buf.ia_size);
+ unwind:
+ if (local->fd)
+ fd_unref(local->fd);
+ if (local->loc) {
+ loc_wipe(local->loc);
+ GF_FREE(local->loc);
+ }
+ switch (local->fop) {
+ case GF_FOP_FSTAT:
+ STACK_UNWIND_STRICT(fstat,
+ frame,
+ op_ret,
+ op_errno,
+ op_ret >= 0 ? &local->buf : NULL,
+ local->xdata);
+ break;
+ case GF_FOP_STAT:
+ STACK_UNWIND_STRICT(stat,
+ frame,
+ op_ret,
+ op_errno,
+ op_ret >= 0 ? &local->buf : NULL,
+ local->xdata);
+ break;
+ case GF_FOP_LOOKUP:
+ STACK_UNWIND_STRICT(lookup,
+ frame,
+ op_ret,
+ op_errno,
+ op_ret >= 0 ? local->inode : NULL,
+ op_ret >= 0 ? &local->buf : NULL,
+ local->xdata,
+ op_ret >= 0 ? &local->postbuf : NULL);
+ break;
+ case GF_FOP_READ:
+ STACK_UNWIND_STRICT(readv,
+ frame,
+ op_ret,
+ op_errno,
+ NULL,
+ 0,
+ op_ret >= 0 ? &local->buf : NULL,
+ NULL,
+ NULL);
+ break;
+ default:
+ gf_log(this->name, GF_LOG_WARNING,
+ "Improper file operation %d", local->fop);
+ }
+ if (local_xdata)
+ dict_unref(local_xdata);
+ if (local_inode)
+ inode_unref(local_inode);
+ return 0;
+}
+
+static int32_t crypt_stat_common_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *buf, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ goto unwind;
+ if (!IA_ISREG(buf->ia_type))
+ goto unwind;
+
+ local->buf = *buf;
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+
+ switch (local->fop) {
+ case GF_FOP_FSTAT:
+ STACK_WIND(frame,
+ load_file_size,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ local->fd,
+ FSIZE_XATTR_PREFIX,
+ NULL);
+ break;
+ case GF_FOP_STAT:
+ STACK_WIND(frame,
+ load_file_size,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr,
+ local->loc,
+ FSIZE_XATTR_PREFIX,
+ NULL);
+ break;
+ default:
+ gf_log (this->name, GF_LOG_WARNING,
+ "Improper file operation %d", local->fop);
+ }
+ return 0;
+ unwind:
+ if (local->fd)
+ fd_unref(local->fd);
+ if (local->loc) {
+ loc_wipe(local->loc);
+ GF_FREE(local->loc);
+ }
+ switch (local->fop) {
+ case GF_FOP_FSTAT:
+ STACK_UNWIND_STRICT(fstat,
+ frame,
+ op_ret,
+ op_errno,
+ op_ret >= 0 ? buf : NULL,
+ op_ret >= 0 ? xdata : NULL);
+ break;
+ case GF_FOP_STAT:
+ STACK_UNWIND_STRICT(stat,
+ frame,
+ op_ret,
+ op_errno,
+ op_ret >= 0 ? buf : NULL,
+ op_ret >= 0 ? xdata : NULL);
+ break;
+ default:
+ gf_log (this->name, GF_LOG_WARNING,
+ "Improper file operation %d", local->fop);
+ }
+ return 0;
+}
+
+static int32_t crypt_fstat(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd, dict_t *xdata)
+{
+ crypt_local_t *local;
+
+ local = crypt_alloc_local(frame, this, GF_FOP_FSTAT);
+ if (!local)
+ goto error;
+ local->fd = fd_ref(fd);
+ STACK_WIND(frame,
+ crypt_stat_common_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat,
+ fd,
+ xdata);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(fstat,
+ frame,
+ -1,
+ ENOMEM,
+ NULL,
+ NULL);
+ return 0;
+}
+
+static int32_t crypt_stat(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, dict_t *xdata)
+{
+ int32_t ret;
+ crypt_local_t *local;
+
+ local = crypt_alloc_local(frame, this, GF_FOP_STAT);
+ if (!local)
+ goto error;
+ local->loc = GF_CALLOC(1, sizeof(*loc), gf_crypt_mt_loc);
+ if (!local->loc)
+ goto error;
+ memset(local->loc, 0, sizeof(*local->loc));
+ ret = loc_copy(local->loc, loc);
+ if (ret) {
+ GF_FREE(local->loc);
+ goto error;
+ }
+ STACK_WIND(frame,
+ crypt_stat_common_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat,
+ loc,
+ xdata);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(stat,
+ frame,
+ -1,
+ ENOMEM,
+ NULL,
+ NULL);
+ return 0;
+}
+
+static int32_t crypt_lookup_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ goto unwind;
+ if (!IA_ISREG(buf->ia_type))
+ goto unwind;
+
+ local->inode = inode_ref(inode);
+ local->buf = *buf;
+ local->postbuf = *postparent;
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+ uuid_copy(local->loc->gfid, buf->ia_gfid);
+
+ STACK_WIND(frame,
+ load_file_size,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr,
+ local->loc,
+ FSIZE_XATTR_PREFIX,
+ NULL);
+ return 0;
+ unwind:
+ loc_wipe(local->loc);
+ GF_FREE(local->loc);
+ STACK_UNWIND_STRICT(lookup,
+ frame,
+ op_ret,
+ op_errno,
+ inode,
+ buf,
+ xdata,
+ postparent);
+ return 0;
+}
+
+static int32_t crypt_lookup(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, dict_t *xdata)
+{
+ int32_t ret;
+ crypt_local_t *local;
+
+ local = crypt_alloc_local(frame, this, GF_FOP_LOOKUP);
+ if (!local)
+ goto error;
+ local->loc = GF_CALLOC(1, sizeof(*loc), gf_crypt_mt_loc);
+ if (!local->loc)
+ goto error;
+ memset(local->loc, 0, sizeof(*local->loc));
+ ret = loc_copy(local->loc, loc);
+ if (ret) {
+ GF_FREE(local->loc);
+ goto error;
+ }
+ gf_log(this->name, GF_LOG_DEBUG, "Lookup %s", loc->path);
+ STACK_WIND(frame,
+ crypt_lookup_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup,
+ loc,
+ xdata);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(lookup,
+ frame,
+ -1,
+ ENOMEM,
+ NULL,
+ NULL,
+ NULL,
+ NULL);
+ return 0;
+}
+
+/*
+ * for every regular directory entry find its real file size
+ * and update stat's buf properly
+ */
+static int32_t crypt_readdirp_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ gf_dirent_t *entries, dict_t *xdata)
+{
+ gf_dirent_t *entry = NULL;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ list_for_each_entry (entry, (&entries->list), list) {
+ data_t *data;
+
+ if (!IA_ISREG(entry->d_stat.ia_type))
+ continue;
+ data = dict_get(entry->dict, FSIZE_XATTR_PREFIX);
+ if (!data){
+ gf_log("crypt", GF_LOG_WARNING,
+ "Regular file size of direntry not found");
+ op_errno = EIO;
+ op_ret = -1;
+ break;
+ }
+ entry->d_stat.ia_size = data_to_uint64(data);
+ }
+ unwind:
+ STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, entries, xdata);
+ return 0;
+}
+
+/*
+ * ->readdirp() fills in-core inodes, so we need to set proper
+ * file sizes for all directory entries of the parent @fd.
+ * Actual updates take place in ->crypt_readdirp_cbk()
+ */
+static int32_t crypt_readdirp(call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t offset,
+ dict_t *xdata)
+{
+ int32_t ret = ENOMEM;
+
+ if (!xdata) {
+ xdata = dict_new();
+ if (!xdata)
+ goto error;
+ }
+ else
+ dict_ref(xdata);
+ /*
+ * make sure that we'll have real file sizes at ->readdirp_cbk()
+ */
+ ret = dict_set(xdata, FSIZE_XATTR_PREFIX, data_from_uint64(0));
+ if (ret) {
+ dict_unref(xdata);
+ goto error;
+ }
+ STACK_WIND(frame,
+ crypt_readdirp_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp,
+ fd,
+ size,
+ offset,
+ xdata);
+ dict_unref(xdata);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(readdirp, frame, -1, ret, NULL, NULL);
+ return 0;
+}
+
+static int32_t crypt_access(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t mask, dict_t *xdata)
+{
+ gf_log(this->name, GF_LOG_WARNING,
+ "NFS mounts of encrypted volumes are unsupported");
+ STACK_UNWIND_STRICT(access, frame, -1, EPERM, NULL);
+ return 0;
+}
+
+int32_t master_set_block_size (xlator_t *this, crypt_private_t *priv,
+ dict_t *options)
+{
+ uint64_t block_size = 0;
+ struct master_cipher_info *master = get_master_cinfo(priv);
+
+ if (options != NULL)
+ GF_OPTION_RECONF("block-size", block_size, options,
+ size, error);
+ else
+ GF_OPTION_INIT("block-size", block_size, size, error);
+
+ switch (block_size) {
+ case 512:
+ master->m_block_bits = 9;
+ break;
+ case 1024:
+ master->m_block_bits = 10;
+ break;
+ case 2048:
+ master->m_block_bits = 11;
+ break;
+ case 4096:
+ master->m_block_bits = 12;
+ break;
+ default:
+ gf_log("crypt", GF_LOG_ERROR,
+ "FATAL: unsupported block size %llu",
+ (unsigned long long)block_size);
+ goto error;
+ }
+ return 0;
+ error:
+ return -1;
+}
+
+int32_t master_set_alg(xlator_t *this, crypt_private_t *priv)
+{
+ struct master_cipher_info *master = get_master_cinfo(priv);
+ master->m_alg = AES_CIPHER_ALG;
+ return 0;
+}
+
+int32_t master_set_mode(xlator_t *this, crypt_private_t *priv)
+{
+ struct master_cipher_info *master = get_master_cinfo(priv);
+ master->m_mode = XTS_CIPHER_MODE;
+ return 0;
+}
+
+/*
+ * set key size in bits to the master info
+ * Pre-conditions: cipher mode in the master info is uptodate.
+ */
+static int master_set_data_key_size (xlator_t *this, crypt_private_t *priv,
+ dict_t *options)
+{
+ int32_t ret;
+ uint64_t key_size = 0;
+ struct master_cipher_info *master = get_master_cinfo(priv);
+
+ if (options != NULL)
+ GF_OPTION_RECONF("data-key-size", key_size, options,
+ size, error);
+ else
+ GF_OPTION_INIT("data-key-size", key_size, size, error);
+
+ ret = data_cipher_algs[master->m_alg][master->m_mode].check_key(key_size);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "FATAL: wrong bin key size %llu for alg %d mode %d",
+ (unsigned long long)key_size,
+ (int)master->m_alg,
+ (int)master->m_mode);
+ goto error;
+ }
+ master->m_dkey_size = key_size;
+ return 0;
+ error:
+ return -1;
+}
+
+static int is_hex(char *s) {
+ return ('0' <= *s && *s <= '9') || ('a' <= *s && *s <= 'f');
+}
+
+static int parse_hex_buf(xlator_t *this, char *src, unsigned char *dst,
+ int hex_size)
+{
+ int i;
+ int hex_byte = 0;
+
+ for (i = 0; i < (hex_size / 2); i++) {
+ if (!is_hex(src + i*2) || !is_hex(src + i*2 + 1)) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "FATAL: not hex symbol in key");
+ return -1;
+ }
+ if (sscanf(src + i*2, "%2x", &hex_byte) != 1) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "FATAL: can not parse hex key");
+ return -1;
+ }
+ dst[i] = hex_byte & 0xff;
+ }
+ return 0;
+}
+
+/*
+ * Parse options;
+ * install master volume key
+ */
+int32_t master_set_master_vol_key(xlator_t *this, crypt_private_t *priv)
+{
+ int32_t ret;
+ FILE *file = NULL;
+
+ int32_t key_size;
+ char *opt_key_file_pathname = NULL;
+
+ unsigned char bin_buf[MASTER_VOL_KEY_SIZE];
+ char hex_buf[2 * MASTER_VOL_KEY_SIZE];
+
+ struct master_cipher_info *master = get_master_cinfo(priv);
+ /*
+ * extract master key passed via option
+ */
+ GF_OPTION_INIT("master-key", opt_key_file_pathname, path, bad_key);
+
+ if (!opt_key_file_pathname) {
+ gf_log(this->name, GF_LOG_ERROR, "FATAL: missing master key");
+ return -1;
+ }
+ gf_log(this->name, GF_LOG_DEBUG, "handling file key %s",
+ opt_key_file_pathname);
+
+ file = fopen(opt_key_file_pathname, "r");
+ if (file == NULL) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "FATAL: can not open file with master key");
+ return -1;
+ }
+ /*
+ * extract hex key
+ */
+ key_size = fread(hex_buf, 1, sizeof(hex_buf), file);
+ if (key_size < sizeof(hex_buf)) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "FATAL: master key is too short");
+ goto bad_key;
+ }
+ ret = parse_hex_buf(this, hex_buf, bin_buf, key_size);
+ if (ret)
+ goto bad_key;
+ memcpy(master->m_key, bin_buf, MASTER_VOL_KEY_SIZE);
+ memset(hex_buf, 0, sizeof(hex_buf));
+ fclose(file);
+
+ memset(bin_buf, 0, sizeof(bin_buf));
+ return 0;
+ bad_key:
+ gf_log(this->name, GF_LOG_ERROR, "FATAL: bad master key");
+ if (file)
+ fclose(file);
+ memset(bin_buf, 0, sizeof(bin_buf));
+ return -1;
+}
+
+/*
+ * Derive volume key for object-id authentication
+ */
+int32_t master_set_nmtd_vol_key(xlator_t *this, crypt_private_t *priv)
+{
+ return get_nmtd_vol_key(get_master_cinfo(priv));
+}
+
+int32_t crypt_init_xlator(xlator_t *this)
+{
+ int32_t ret;
+ crypt_private_t *priv = this->private;
+
+ ret = master_set_alg(this, priv);
+ if (ret)
+ return ret;
+ ret = master_set_mode(this, priv);
+ if (ret)
+ return ret;
+ ret = master_set_block_size(this, priv, NULL);
+ if (ret)
+ return ret;
+ ret = master_set_data_key_size(this, priv, NULL);
+ if (ret)
+ return ret;
+ ret = master_set_master_vol_key(this, priv);
+ if (ret)
+ return ret;
+ return master_set_nmtd_vol_key(this, priv);
+}
+
+static int32_t crypt_alloc_private(xlator_t *this)
+{
+ this->private = GF_CALLOC(1, sizeof(crypt_private_t), gf_crypt_mt_priv);
+ if (!this->private) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "Can not allocate memory for private data");
+ return ENOMEM;
+ }
+ return 0;
+}
+
+static void crypt_free_private(xlator_t *this)
+{
+ crypt_private_t *priv = this->private;
+ if (priv) {
+ memset(priv, 0, sizeof(*priv));
+ GF_FREE(priv);
+ }
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_crypt_mt_end);
+
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR, "Memory accounting init"
+ "failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+int32_t reconfigure (xlator_t *this, dict_t *options)
+{
+ int32_t ret = -1;
+ crypt_private_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO ("crypt", this, error);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, error);
+ GF_VALIDATE_OR_GOTO (this->name, options, error);
+
+ priv = this->private;
+
+ ret = master_set_block_size(this, priv, options);
+ if (ret) {
+ gf_log("this->name", GF_LOG_ERROR,
+ "Failed to reconfure block size");
+ goto error;
+ }
+ ret = master_set_data_key_size(this, priv, options);
+ if (ret) {
+ gf_log("this->name", GF_LOG_ERROR,
+ "Failed to reconfure data key size");
+ goto error;
+ }
+ return 0;
+ error:
+ return ret;
+}
+
+int32_t init(xlator_t *this)
+{
+ int32_t ret;
+
+ if (!this->children || this->children->next) {
+ gf_log ("crypt", GF_LOG_ERROR,
+ "FATAL: crypt should have exactly one child");
+ return EINVAL;
+ }
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+ ret = crypt_alloc_private(this);
+ if (ret)
+ return ret;
+ ret = crypt_init_xlator(this);
+ if (ret)
+ goto error;
+ this->local_pool = mem_pool_new(crypt_local_t, 64);
+ if (!this->local_pool) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ ret = ENOMEM;
+ goto error;
+ }
+ gf_log ("crypt", GF_LOG_INFO, "crypt xlator loaded");
+ return 0;
+ error:
+ crypt_free_private(this);
+ return ret;
+}
+
+void fini (xlator_t *this)
+{
+ crypt_free_private(this);
+}
+
+struct xlator_fops fops = {
+ .readv = crypt_readv,
+ .writev = crypt_writev,
+ .truncate = crypt_truncate,
+ .ftruncate = crypt_ftruncate,
+ .setxattr = crypt_setxattr,
+ .fsetxattr = crypt_fsetxattr,
+ .link = crypt_link,
+ .unlink = crypt_unlink,
+ .rename = crypt_rename,
+ .open = crypt_open,
+ .create = crypt_create,
+ .stat = crypt_stat,
+ .fstat = crypt_fstat,
+ .lookup = crypt_lookup,
+ .readdirp = crypt_readdirp,
+ .access = crypt_access
+};
+
+struct xlator_cbks cbks = {
+ .forget = crypt_forget
+};
+
+struct volume_options options[] = {
+ { .key = {"master-key"},
+ .type = GF_OPTION_TYPE_PATH,
+ .description = "Pathname of regular file which contains master volume key"
+ },
+ { .key = {"data-key-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .description = "Data key size (bits)",
+ .min = 256,
+ .max = 512,
+ .default_value = "256",
+ },
+ { .key = {"block-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .description = "Atom size (bits)",
+ .min = 512,
+ .max = 4096,
+ .default_value = "4096"
+ },
+ { .key = {NULL} },
+};
+
+/*
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 80
+ scroll-step: 1
+ End:
+*/
diff --git a/xlators/encryption/crypt/src/crypt.h b/xlators/encryption/crypt/src/crypt.h
new file mode 100644
index 000000000..ff8eb571b
--- /dev/null
+++ b/xlators/encryption/crypt/src/crypt.h
@@ -0,0 +1,903 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __CRYPT_H__
+#define __CRYPT_H__
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+#include <openssl/aes.h>
+#include <openssl/evp.h>
+#include <openssl/sha.h>
+#include <openssl/hmac.h>
+#include <openssl/cmac.h>
+#include <openssl/modes.h>
+#include "crypt-mem-types.h"
+
+#define CRYPT_XLATOR_ID (0)
+
+#define MAX_IOVEC_BITS (3)
+#define MAX_IOVEC (1 << MAX_IOVEC_BITS)
+#define KEY_FACTOR_BITS (6)
+
+#define DEBUG_CRYPT (0)
+#define TRIVIAL_TFM (0)
+
+#define CRYPT_MIN_BLOCK_BITS (9)
+#define CRYPT_MAX_BLOCK_BITS (12)
+
+#define MASTER_VOL_KEY_SIZE (32)
+#define NMTD_VOL_KEY_SIZE (16)
+
+#ifdef __NetBSD__
+typedef off_t loff_t;
+#endif
+
+struct crypt_key {
+ uint32_t len;
+ const char *label;
+};
+
+/*
+ * Add new key types to the end of this
+ * enumeration but before LAST_KEY_TYPE
+ */
+typedef enum {
+ MASTER_VOL_KEY,
+ NMTD_VOL_KEY,
+ NMTD_LINK_KEY,
+ EMTD_FILE_KEY,
+ DATA_FILE_KEY_256,
+ DATA_FILE_KEY_512,
+ LAST_KEY_TYPE
+}crypt_key_type;
+
+struct kderive_context {
+ const unsigned char *pkey;/* parent key */
+ uint32_t pkey_len; /* parent key size, bits */
+ uint32_t ckey_len; /* child key size, bits */
+ unsigned char *fid; /* fixed input data, NIST 800-108, 5.1 */
+ uint32_t fid_len; /* fid len, bytes */
+ unsigned char *out; /* contains child keying material */
+ uint32_t out_len; /* out len, bytes */
+};
+
+typedef enum {
+ DATA_ATOM,
+ HOLE_ATOM,
+ LAST_DATA_TYPE
+}atom_data_type;
+
+typedef enum {
+ HEAD_ATOM,
+ TAIL_ATOM,
+ FULL_ATOM,
+ LAST_LOCALITY_TYPE
+}atom_locality_type;
+
+typedef enum {
+ MTD_CREATE,
+ MTD_APPEND,
+ MTD_OVERWRITE,
+ MTD_CUT,
+ MTD_LAST_OP
+} mtd_op_t;
+
+struct xts128_context {
+ void *key1, *key2;
+ block128_f block1,block2;
+};
+
+struct object_cipher_info {
+ cipher_alg_t o_alg;
+ cipher_mode_t o_mode;
+ uint32_t o_block_bits;
+ uint32_t o_dkey_size; /* raw data key size in bits */
+ union {
+ struct {
+ unsigned char ivec[16];
+ AES_KEY dkey[2];
+ AES_KEY tkey; /* key used for tweaking */
+ XTS128_CONTEXT xts;
+ } aes_xts;
+ } u;
+};
+
+struct master_cipher_info {
+ /*
+ * attributes inherited by newly created regular files
+ */
+ cipher_alg_t m_alg;
+ cipher_mode_t m_mode;
+ uint32_t m_block_bits;
+ uint32_t m_dkey_size; /* raw key size in bits */
+ /*
+ * master key
+ */
+ unsigned char m_key[MASTER_VOL_KEY_SIZE];
+ /*
+ * volume key for oid authentication
+ */
+ unsigned char m_nmtd_key[NMTD_VOL_KEY_SIZE];
+};
+
+/*
+* This info is not changed during file's life
+ */
+struct crypt_inode_info {
+#if DEBUG_CRYPT
+ loc_t *loc; /* pathname that the file has been
+ opened, or created with */
+#endif
+ uint16_t nr_minor;
+ uuid_t oid;
+ struct object_cipher_info cinfo;
+};
+
+/*
+ * this should locate in secure memory
+ */
+typedef struct {
+ struct master_cipher_info master;
+} crypt_private_t;
+
+static inline struct master_cipher_info *get_master_cinfo(crypt_private_t *priv)
+{
+ return &priv->master;
+}
+
+static inline struct object_cipher_info *get_object_cinfo(struct crypt_inode_info
+ *info)
+{
+ return &info->cinfo;
+}
+
+/*
+ * this describes layouts and properties
+ * of atoms in an aligned vector
+ */
+struct avec_config {
+ uint32_t atom_size;
+ atom_data_type type;
+ size_t orig_size;
+ off_t orig_offset;
+ size_t expanded_size;
+ off_t aligned_offset;
+
+ uint32_t off_in_head;
+ uint32_t off_in_tail;
+ uint32_t gap_in_tail;
+ uint32_t nr_full_blocks;
+
+ struct iovec *avec; /* aligned vector */
+ uint32_t acount; /* number of avec components. The same
+ * as number of occupied logical blocks */
+ char **pool;
+ uint32_t blocks_in_pool;
+ uint32_t cursor; /* makes sense only for ordered writes,
+ * so there is no races on this counter.
+ *
+ * Cursor is per-config object, we don't
+ * reset cursor for atoms of different
+ * localities (head, tail, full)
+ */
+};
+
+
+typedef struct {
+ glusterfs_fop_t fop; /* code of FOP this local info built for */
+ fd_t *fd;
+ inode_t *inode;
+ loc_t *loc;
+ int32_t mac_idx;
+ loc_t *newloc;
+ int32_t flags;
+ int32_t wbflags;
+ struct crypt_inode_info *info;
+ struct iobref *iobref;
+ struct iobref *iobref_data;
+ off_t offset;
+
+ uint64_t old_file_size; /* per FOP, retrieved under lock held */
+ uint64_t cur_file_size; /* per iteration, before issuing IOs */
+ uint64_t new_file_size; /* per iteration, after issuing IOs */
+
+ uint64_t io_offset; /* offset of IOs issued per iteration */
+ uint64_t io_offset_nopad; /* offset of user's data in the atom */
+ uint32_t io_size; /* size of IOs issued per iteration */
+ uint32_t io_size_nopad; /* size of user's data in the IOs */
+ uint32_t eof_padding_size; /* size od EOF padding in the IOs */
+
+ gf_lock_t call_lock; /* protect nr_calls from many cbks */
+ int32_t nr_calls;
+
+ atom_data_type active_setup; /* which setup (hole or date)
+ is currently active */
+ /* data setup */
+ struct avec_config data_conf;
+
+ /* hole setup */
+ int hole_conv_in_proggress;
+ gf_lock_t hole_lock; /* protect hole config from many cbks */
+ int hole_handled;
+ struct avec_config hole_conf;
+ struct iatt buf;
+ struct iatt prebuf;
+ struct iatt postbuf;
+ struct iatt *prenewparent;
+ struct iatt *postnewparent;
+ int32_t op_ret;
+ int32_t op_errno;
+ int32_t rw_count; /* total read or written */
+ gf_lock_t rw_count_lock; /* protect the counter above */
+ unsigned char *format; /* for create, update format string */
+ uint32_t format_size;
+ uint32_t msgflags; /* messages for crypt_open() */
+ dict_t *xdata;
+ dict_t *xattr;
+ struct iovec vec; /* contains last file's atom for
+ read-prune-write sequence */
+ gf_boolean_t custom_mtd;
+ /*
+ * the next 3 fields are used by readdir and friends
+ */
+ gf_dirent_t *de; /* directory entry */
+ char *de_path; /* pathname of directory entry */
+ uint32_t de_prefix_len; /* lenght of the parent's pathname */
+ gf_dirent_t *entries;
+
+ uint32_t update_disk_file_size:1;
+} crypt_local_t;
+
+/* This represents a (read)modify-write atom */
+struct rmw_atom {
+ atom_locality_type locality;
+ /*
+ * read-modify-write sequence of the atom
+ */
+ int32_t (*rmw)(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ dict_t *xdata);
+ /*
+ * offset of the logical block in a file
+ */
+ loff_t (*offset_at)(call_frame_t *frame,
+ struct object_cipher_info *object);
+ /*
+ * IO offset in an atom
+ */
+ uint32_t (*offset_in)(call_frame_t *frame,
+ struct object_cipher_info *object);
+ /*
+ * number of bytes of plain text of this atom that user
+ * wants to read/write.
+ * It can be smaller than atom_size in the case of head
+ * or tail atoms.
+ */
+ uint32_t (*io_size_nopad)(call_frame_t *frame,
+ struct object_cipher_info *object);
+ /*
+ * which iovec represents the atom
+ */
+ struct iovec *(*get_iovec)(call_frame_t *frame, uint32_t count);
+ /*
+ * how many bytes of partial block should be uptodated by
+ * reading from disk.
+ * This is used to perform a read component of RMW (read-modify-write).
+ */
+ uint32_t (*count_to_uptodate)(call_frame_t *frame, struct object_cipher_info *object);
+ struct avec_config *(*get_config)(call_frame_t *frame);
+};
+
+struct data_cipher_alg {
+ gf_boolean_t atomic; /* true means that algorithm requires
+ to pad data before cipher transform */
+ gf_boolean_t should_pad; /* true means that algorithm requires
+ to pad the end of file with extra-data */
+ uint32_t blkbits; /* blksize = 1 << blkbits */
+ /*
+ * any preliminary sanity checks goes here
+ */
+ int32_t (*init)(void);
+ /*
+ * set alg-mode specific inode info
+ */
+ int32_t (*set_private)(struct crypt_inode_info *info,
+ struct master_cipher_info *master);
+ /*
+ * check alg-mode specific data key
+ */
+ int32_t (*check_key)(uint32_t key_size);
+ void (*set_iv)(off_t offset, struct object_cipher_info *object);
+ int32_t (*encrypt)(const unsigned char *from, unsigned char *to,
+ size_t length, off_t offset, const int enc,
+ struct object_cipher_info *object);
+};
+
+/*
+ * version-dependent metadata loader
+ */
+struct crypt_mtd_loader {
+ /*
+ * return core format size
+ */
+ size_t (*format_size)(mtd_op_t op, size_t old_size);
+ /*
+ * pack version-specific metadata of an object
+ * at ->create()
+ */
+ int32_t (*create_format)(unsigned char *wire,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master);
+ /*
+ * extract version-specific metadata of an object
+ * at ->open() time
+ */
+ int32_t (*open_format)(unsigned char *wire,
+ int32_t len,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local,
+ gf_boolean_t load_info);
+ int32_t (*update_format)(unsigned char *new,
+ unsigned char *old,
+ size_t old_len,
+ int32_t mac_idx,
+ mtd_op_t op,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local);
+};
+
+typedef int32_t (*end_writeback_handler_t)(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata);
+typedef void (*linkop_wind_handler_t)(call_frame_t *frame, xlator_t *this);
+typedef void (*linkop_unwind_handler_t)(call_frame_t *frame);
+
+
+/* Declarations */
+
+/* keys.c */
+extern struct crypt_key crypt_keys[LAST_KEY_TYPE];
+int32_t get_nmtd_vol_key(struct master_cipher_info *master);
+int32_t get_nmtd_link_key(loc_t *loc,
+ struct master_cipher_info *master,
+ unsigned char *result);
+int32_t get_emtd_file_key(struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ unsigned char *result);
+int32_t get_data_file_key(struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ uint32_t keysize,
+ unsigned char *key);
+/* data.c */
+extern struct data_cipher_alg data_cipher_algs[LAST_CIPHER_ALG][LAST_CIPHER_MODE];
+void encrypt_aligned_iov(struct object_cipher_info *object,
+ struct iovec *vec,
+ int count,
+ off_t off);
+void decrypt_aligned_iov(struct object_cipher_info *object,
+ struct iovec *vec,
+ int count,
+ off_t off);
+int32_t align_iov_by_atoms(xlator_t *this,
+ crypt_local_t *local,
+ struct object_cipher_info *object,
+ struct iovec *vec /* input vector */,
+ int32_t count /* number of vec components */,
+ struct iovec *avec /* aligned vector */,
+ char **blocks /* pool of blocks */,
+ uint32_t *blocks_allocated,
+ struct avec_config *conf);
+int32_t set_config_avec_data(xlator_t *this,
+ crypt_local_t *local,
+ struct avec_config *conf,
+ struct object_cipher_info *object,
+ struct iovec *vec,
+ int32_t vec_count);
+int32_t set_config_avec_hole(xlator_t *this,
+ crypt_local_t *local,
+ struct avec_config *conf,
+ struct object_cipher_info *object,
+ glusterfs_fop_t fop);
+void set_gap_at_end(call_frame_t *frame, struct object_cipher_info *object,
+ struct avec_config *conf, atom_data_type dtype);
+void set_config_offsets(call_frame_t *frame,
+ xlator_t *this,
+ uint64_t offset,
+ uint64_t count,
+ atom_data_type dtype,
+ int32_t setup_gap_in_tail);
+
+/* metadata.c */
+extern struct crypt_mtd_loader mtd_loaders [LAST_MTD_LOADER];
+
+int32_t alloc_format(crypt_local_t *local, size_t size);
+int32_t alloc_format_create(crypt_local_t *local);
+void free_format(crypt_local_t *local);
+size_t format_size(mtd_op_t op, size_t old_size);
+size_t new_format_size(void);
+int32_t open_format(unsigned char *str, int32_t len, loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master, crypt_local_t *local,
+ gf_boolean_t load_info);
+int32_t update_format(unsigned char *new, unsigned char *old,
+ size_t old_len, int32_t mac_idx, mtd_op_t op, loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local);
+int32_t create_format(unsigned char *wire,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master);
+
+/* atom.c */
+struct rmw_atom *atom_by_types(atom_data_type data,
+ atom_locality_type locality);
+void submit_partial(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ atom_locality_type ltype);
+void submit_full(call_frame_t *frame, xlator_t *this);
+
+/* crypt.c */
+
+end_writeback_handler_t dispatch_end_writeback(glusterfs_fop_t fop);
+static size_t iovec_get_size(struct iovec *vec, uint32_t count);
+void set_local_io_params_writev(call_frame_t *frame,
+ struct object_cipher_info *object,
+ struct rmw_atom *atom, off_t io_offset,
+ uint32_t io_size);
+void link_wind(call_frame_t *frame, xlator_t *this);
+void unlink_wind(call_frame_t *frame, xlator_t *this);
+void link_unwind(call_frame_t *frame);
+void unlink_unwind(call_frame_t *frame);
+void rename_wind(call_frame_t *frame, xlator_t *this);
+void rename_unwind(call_frame_t *frame);
+
+/* Inline functions */
+
+static inline size_t iovec_get_size(struct iovec *vec, uint32_t count)
+{
+ int i;
+ size_t size = 0;
+ for (i = 0; i < count; i++)
+ size += vec[i].iov_len;
+ return size;
+}
+
+static inline int32_t crypt_xlator_id(void)
+{
+ return CRYPT_XLATOR_ID;
+}
+
+static inline mtd_loader_id current_mtd_loader(void)
+{
+ return MTD_LOADER_V1;
+}
+
+static inline uint32_t master_key_size (void)
+{
+ return crypt_keys[MASTER_VOL_KEY].len >> 3;
+}
+
+static inline uint32_t nmtd_vol_key_size (void)
+{
+ return crypt_keys[NMTD_VOL_KEY].len >> 3;
+}
+
+static inline uint32_t alg_mode_blkbits(cipher_alg_t alg,
+ cipher_mode_t mode)
+{
+ return data_cipher_algs[alg][mode].blkbits;
+}
+
+static inline uint32_t alg_mode_blksize(cipher_alg_t alg,
+ cipher_mode_t mode)
+{
+ return 1 << alg_mode_blkbits(alg, mode);
+}
+
+static inline gf_boolean_t alg_mode_atomic(cipher_alg_t alg,
+ cipher_mode_t mode)
+{
+ return data_cipher_algs[alg][mode].atomic;
+}
+
+static inline gf_boolean_t alg_mode_should_pad(cipher_alg_t alg,
+ cipher_mode_t mode)
+{
+ return data_cipher_algs[alg][mode].should_pad;
+}
+
+static inline uint32_t master_alg_blksize(struct master_cipher_info *mr)
+{
+ return alg_mode_blksize(mr->m_alg, mr->m_mode);
+}
+
+static inline uint32_t master_alg_blkbits(struct master_cipher_info *mr)
+{
+ return alg_mode_blkbits(mr->m_alg, mr->m_mode);
+}
+
+static inline gf_boolean_t master_alg_atomic(struct master_cipher_info *mr)
+{
+ return alg_mode_atomic(mr->m_alg, mr->m_mode);
+}
+
+static inline gf_boolean_t master_alg_should_pad(struct master_cipher_info *mr)
+{
+ return alg_mode_should_pad(mr->m_alg, mr->m_mode);
+}
+
+static inline uint32_t object_alg_blksize(struct object_cipher_info *ob)
+{
+ return alg_mode_blksize(ob->o_alg, ob->o_mode);
+}
+
+static inline uint32_t object_alg_blkbits(struct object_cipher_info *ob)
+{
+ return alg_mode_blkbits(ob->o_alg, ob->o_mode);
+}
+
+static inline gf_boolean_t object_alg_atomic(struct object_cipher_info *ob)
+{
+ return alg_mode_atomic(ob->o_alg, ob->o_mode);
+}
+
+static inline gf_boolean_t object_alg_should_pad(struct object_cipher_info *ob)
+{
+ return alg_mode_should_pad(ob->o_alg, ob->o_mode);
+}
+
+static inline uint32_t aes_raw_key_size(struct master_cipher_info *master)
+{
+ return master->m_dkey_size >> 3;
+}
+
+static inline struct avec_config *get_hole_conf(call_frame_t *frame)
+{
+ return &(((crypt_local_t *)frame->local)->hole_conf);
+}
+
+static inline struct avec_config *get_data_conf(call_frame_t *frame)
+{
+ return &(((crypt_local_t *)frame->local)->data_conf);
+}
+
+static inline int32_t get_atom_bits (struct object_cipher_info *object)
+{
+ return object->o_block_bits;
+}
+
+static inline int32_t get_atom_size (struct object_cipher_info *object)
+{
+ return 1 << get_atom_bits(object);
+}
+
+static inline int32_t has_head_block(struct avec_config *conf)
+{
+ return conf->off_in_head ||
+ (conf->acount == 1 && conf->off_in_tail);
+}
+
+static inline int32_t has_tail_block(struct avec_config *conf)
+{
+ return conf->off_in_tail && conf->acount > 1;
+}
+
+static inline int32_t has_full_blocks(struct avec_config *conf)
+{
+ return conf->nr_full_blocks;
+}
+
+static inline int32_t should_submit_head_block(struct avec_config *conf)
+{
+ return has_head_block(conf) && (conf->cursor == 0);
+}
+
+static inline int32_t should_submit_tail_block(struct avec_config *conf)
+{
+ return has_tail_block(conf) && (conf->cursor == conf->acount - 1);
+}
+
+static inline int32_t should_submit_full_block(struct avec_config *conf)
+{
+ uint32_t start = has_head_block(conf) ? 1 : 0;
+
+ return has_full_blocks(conf) &&
+ conf->cursor >= start &&
+ conf->cursor < start + conf->nr_full_blocks;
+}
+
+#if DEBUG_CRYPT
+static inline void crypt_check_input_len(size_t len,
+ struct object_cipher_info *object)
+{
+ if (object_alg_should_pad(object) && (len & (object_alg_blksize(object) - 1)))
+ gf_log ("crypt", GF_LOG_DEBUG, "bad input len: %d", (int)len);
+}
+
+static inline void check_head_block(struct avec_config *conf)
+{
+ if (!has_head_block(conf))
+ gf_log("crypt", GF_LOG_DEBUG, "not a head atom");
+}
+
+static inline void check_tail_block(struct avec_config *conf)
+{
+ if (!has_tail_block(conf))
+ gf_log("crypt", GF_LOG_DEBUG, "not a tail atom");
+}
+
+static inline void check_full_block(struct avec_config *conf)
+{
+ if (!has_full_blocks(conf))
+ gf_log("crypt", GF_LOG_DEBUG, "not a full atom");
+}
+
+static inline void check_cursor_head(struct avec_config *conf)
+{
+ if (!has_head_block(conf))
+ gf_log("crypt",
+ GF_LOG_DEBUG, "Illegal call of head atom method");
+ else if (conf->cursor != 0)
+ gf_log("crypt",
+ GF_LOG_DEBUG, "Cursor (%d) is not at head atom",
+ conf->cursor);
+}
+
+static inline void check_cursor_full(struct avec_config *conf)
+{
+ if (!has_full_blocks(conf))
+ gf_log("crypt",
+ GF_LOG_DEBUG, "Illegal call of full atom method");
+ if (has_head_block(conf) && (conf->cursor == 0))
+ gf_log("crypt",
+ GF_LOG_DEBUG, "Cursor is not at full atom");
+}
+
+/*
+ * FIXME: use avec->iov_len to check setup
+ */
+static inline int data_local_invariant(crypt_local_t *local)
+{
+ return 0;
+}
+
+#else
+#define crypt_check_input_len(len, object) noop
+#define check_head_block(conf) noop
+#define check_tail_block(conf) noop
+#define check_full_block(conf) noop
+#define check_cursor_head(conf) noop
+#define check_cursor_full(conf) noop
+
+#endif /* DEBUG_CRYPT */
+
+static inline struct avec_config *conf_by_type(call_frame_t *frame,
+ atom_data_type dtype)
+{
+ struct avec_config *conf = NULL;
+
+ switch (dtype) {
+ case HOLE_ATOM:
+ conf = get_hole_conf(frame);
+ break;
+ case DATA_ATOM:
+ conf = get_data_conf(frame);
+ break;
+ default:
+ gf_log("crypt", GF_LOG_DEBUG, "bad atom type");
+ }
+ return conf;
+}
+
+static inline uint32_t nr_calls_head(struct avec_config *conf)
+{
+ return has_head_block(conf) ? 1 : 0;
+}
+
+static inline uint32_t nr_calls_tail(struct avec_config *conf)
+{
+ return has_tail_block(conf) ? 1 : 0;
+}
+
+static inline uint32_t nr_calls_full(struct avec_config *conf)
+{
+ switch(conf->type) {
+ case HOLE_ATOM:
+ return has_full_blocks(conf);
+ case DATA_ATOM:
+ return has_full_blocks(conf) ?
+ logical_blocks_occupied(0,
+ conf->nr_full_blocks,
+ MAX_IOVEC_BITS) : 0;
+ default:
+ gf_log("crypt", GF_LOG_DEBUG, "bad atom data type");
+ return 0;
+ }
+}
+
+static inline uint32_t nr_calls(struct avec_config *conf)
+{
+ return nr_calls_head(conf) + nr_calls_tail(conf) + nr_calls_full(conf);
+}
+
+static inline uint32_t nr_calls_data(call_frame_t *frame)
+{
+ return nr_calls(get_data_conf(frame));
+}
+
+static inline uint32_t nr_calls_hole(call_frame_t *frame)
+{
+ return nr_calls(get_hole_conf(frame));
+}
+
+static inline void get_one_call_nolock(call_frame_t *frame)
+{
+ crypt_local_t *local = frame->local;
+
+ ++local->nr_calls;
+
+ //gf_log("crypt", GF_LOG_DEBUG, "get %d calls", 1);
+}
+
+static inline void get_one_call(call_frame_t *frame)
+{
+ crypt_local_t *local = frame->local;
+
+ LOCK(&local->call_lock);
+ get_one_call_nolock(frame);
+ UNLOCK(&local->call_lock);
+}
+
+static inline void get_nr_calls_nolock(call_frame_t *frame, int32_t nr)
+{
+ crypt_local_t *local = frame->local;
+
+ local->nr_calls += nr;
+
+ //gf_log("crypt", GF_LOG_DEBUG, "get %d calls", nr);
+}
+
+static inline void get_nr_calls(call_frame_t *frame, int32_t nr)
+{
+ crypt_local_t *local = frame->local;
+
+ LOCK(&local->call_lock);
+ get_nr_calls_nolock(frame, nr);
+ UNLOCK(&local->call_lock);
+}
+
+static inline int put_one_call(crypt_local_t *local)
+{
+ uint32_t last = 0;
+
+ LOCK(&local->call_lock);
+ if (--local->nr_calls == 0)
+ last = 1;
+
+ //gf_log("crypt", GF_LOG_DEBUG, "put %d calls", 1);
+
+ UNLOCK(&local->call_lock);
+ return last;
+}
+
+static inline int is_appended_write(call_frame_t *frame)
+{
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf = get_data_conf(frame);
+
+ return conf->orig_offset + conf->orig_size > local->old_file_size;
+}
+
+static inline int is_ordered_mode(call_frame_t *frame)
+{
+#if 0
+ crypt_local_t *local = frame->local;
+ return local->fop == GF_FOP_FTRUNCATE ||
+ (local->fop == GF_FOP_WRITE && is_appended_write(frame));
+#endif
+ return 1;
+}
+
+static inline int32_t hole_conv_completed(crypt_local_t *local)
+{
+ struct avec_config *conf = &local->hole_conf;
+ return conf->cursor == conf->acount;
+}
+
+static inline int32_t data_write_in_progress(crypt_local_t *local)
+{
+ return local->active_setup == DATA_ATOM;
+}
+
+static inline int32_t parent_is_crypt_xlator(call_frame_t *frame,
+ xlator_t *this)
+{
+ return frame->parent->this == this;
+}
+
+static inline linkop_wind_handler_t linkop_wind_dispatch(glusterfs_fop_t fop)
+{
+ switch(fop){
+ case GF_FOP_LINK:
+ return link_wind;
+ case GF_FOP_UNLINK:
+ return unlink_wind;
+ case GF_FOP_RENAME:
+ return rename_wind;
+ default:
+ gf_log("crypt", GF_LOG_ERROR, "Bad link operation %d", fop);
+ return NULL;
+ }
+}
+
+static inline linkop_unwind_handler_t linkop_unwind_dispatch(glusterfs_fop_t fop)
+{
+ switch(fop){
+ case GF_FOP_LINK:
+ return link_unwind;
+ case GF_FOP_UNLINK:
+ return unlink_unwind;
+ case GF_FOP_RENAME:
+ return rename_unwind;
+ default:
+ gf_log("crypt", GF_LOG_ERROR, "Bad link operation %d", fop);
+ return NULL;
+ }
+}
+
+static inline mtd_op_t linkop_mtdop_dispatch(glusterfs_fop_t fop)
+{
+ switch (fop) {
+ case GF_FOP_LINK:
+ return MTD_APPEND;
+ case GF_FOP_UNLINK:
+ return MTD_CUT;
+ case GF_FOP_RENAME:
+ return MTD_OVERWRITE;
+ default:
+ gf_log("crypt", GF_LOG_WARNING, "Bad link operation %d", fop);
+ return MTD_LAST_OP;
+ }
+}
+
+#endif /* __CRYPT_H__ */
+
+/*
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 80
+ scroll-step: 1
+ End:
+*/
diff --git a/xlators/encryption/crypt/src/data.c b/xlators/encryption/crypt/src/data.c
new file mode 100644
index 000000000..762fa554a
--- /dev/null
+++ b/xlators/encryption/crypt/src/data.c
@@ -0,0 +1,769 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "defaults.h"
+#include "crypt-common.h"
+#include "crypt.h"
+
+static void set_iv_aes_xts(off_t offset, struct object_cipher_info *object)
+{
+ unsigned char *ivec;
+
+ ivec = object->u.aes_xts.ivec;
+
+ /* convert the tweak into a little-endian byte
+ * array (IEEE P1619/D16, May 2007, section 5.1)
+ */
+
+ *((uint64_t *)ivec) = htole64(offset);
+
+ /* ivec is padded with zeroes */
+}
+
+static int32_t aes_set_keys_common(unsigned char *raw_key, uint32_t key_size,
+ AES_KEY *keys)
+{
+ int32_t ret;
+
+ ret = AES_set_encrypt_key(raw_key,
+ key_size,
+ &keys[AES_ENCRYPT]);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, "Set encrypt key failed");
+ return ret;
+ }
+ ret = AES_set_decrypt_key(raw_key,
+ key_size,
+ &keys[AES_DECRYPT]);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, "Set decrypt key failed");
+ return ret;
+ }
+ return 0;
+}
+
+/*
+ * set private cipher info for xts mode
+ */
+static int32_t set_private_aes_xts(struct crypt_inode_info *info,
+ struct master_cipher_info *master)
+{
+ int ret;
+ struct object_cipher_info *object = get_object_cinfo(info);
+ unsigned char *data_key;
+ uint32_t subkey_size;
+
+ /* init tweak value */
+ memset(object->u.aes_xts.ivec, 0, 16);
+
+ data_key = GF_CALLOC(1, object->o_dkey_size, gf_crypt_mt_key);
+ if (!data_key)
+ return ENOMEM;
+
+ /*
+ * retrieve data keying meterial
+ */
+ ret = get_data_file_key(info, master, object->o_dkey_size, data_key);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, "Failed to retrieve data key");
+ GF_FREE(data_key);
+ return ret;
+ }
+ /*
+ * parse compound xts key
+ */
+ subkey_size = object->o_dkey_size >> 4; /* (xts-key-size-in-bytes / 2) */
+ /*
+ * install key for data encryption
+ */
+ ret = aes_set_keys_common(data_key,
+ subkey_size << 3, object->u.aes_xts.dkey);
+ if (ret) {
+ GF_FREE(data_key);
+ return ret;
+ }
+ /*
+ * set up key used to encrypt tweaks
+ */
+ ret = AES_set_encrypt_key(data_key + subkey_size,
+ object->o_dkey_size / 2,
+ &object->u.aes_xts.tkey);
+ if (ret < 0)
+ gf_log("crypt", GF_LOG_ERROR, "Set tweak key failed");
+
+ GF_FREE(data_key);
+ return ret;
+}
+
+static int32_t aes_xts_init(void)
+{
+ cassert(AES_BLOCK_SIZE == (1 << AES_BLOCK_BITS));
+ return 0;
+}
+
+static int32_t check_key_aes_xts(uint32_t keysize)
+{
+ switch(keysize) {
+ case 256:
+ case 512:
+ return 0;
+ default:
+ break;
+ }
+ return -1;
+}
+
+static int32_t encrypt_aes_xts(const unsigned char *from,
+ unsigned char *to, size_t length,
+ off_t offset, const int enc,
+ struct object_cipher_info *object)
+{
+ XTS128_CONTEXT ctx;
+ if (enc) {
+ ctx.key1 = &object->u.aes_xts.dkey[AES_ENCRYPT];
+ ctx.block1 = (block128_f)AES_encrypt;
+ }
+ else {
+ ctx.key1 = &object->u.aes_xts.dkey[AES_DECRYPT];
+ ctx.block1 = (block128_f)AES_decrypt;
+ }
+ ctx.key2 = &object->u.aes_xts.tkey;
+ ctx.block2 = (block128_f)AES_encrypt;
+
+ return CRYPTO_xts128_encrypt(&ctx,
+ object->u.aes_xts.ivec,
+ from,
+ to,
+ length, enc);
+}
+
+/*
+ * Cipher input chunk @from of length @len;
+ * @to: result of cipher transform;
+ * @off: offset in a file (must be cblock-aligned);
+ */
+static void cipher_data(struct object_cipher_info *object,
+ char *from,
+ char *to,
+ off_t off,
+ size_t len,
+ const int enc)
+{
+ crypt_check_input_len(len, object);
+
+#if TRIVIAL_TFM && DEBUG_CRYPT
+ return;
+#endif
+ data_cipher_algs[object->o_alg][object->o_mode].set_iv(off, object);
+ data_cipher_algs[object->o_alg][object->o_mode].encrypt
+ ((const unsigned char *)from,
+ (unsigned char *)to,
+ len,
+ off,
+ enc,
+ object);
+}
+
+#define MAX_CIPHER_CHUNK (1 << 30)
+
+/*
+ * Do cipher (encryption/decryption) transform of a
+ * continuous region of memory.
+ *
+ * @len: a number of bytes to transform;
+ * @buf: data to transform;
+ * @off: offset in a file, should be block-aligned
+ * for atomic cipher modes and ksize-aligned
+ * for other modes).
+ * @dir: direction of transform (encrypt/decrypt).
+ */
+static void cipher_region(struct object_cipher_info *object,
+ char *from,
+ char *to,
+ off_t off,
+ size_t len,
+ int dir)
+{
+ while (len > 0) {
+ size_t to_cipher;
+
+ to_cipher = len;
+ if (to_cipher > MAX_CIPHER_CHUNK)
+ to_cipher = MAX_CIPHER_CHUNK;
+
+ /* this will reset IV */
+ cipher_data(object,
+ from,
+ to,
+ off,
+ to_cipher,
+ dir);
+ from += to_cipher;
+ to += to_cipher;
+ off += to_cipher;
+ len -= to_cipher;
+ }
+}
+
+/*
+ * Do cipher transform (encryption/decryption) of
+ * plaintext/ciphertext represented by @vec.
+ *
+ * Pre-conditions: @vec represents a continuous piece
+ * of data in a file at offset @off to be ciphered
+ * (encrypted/decrypted).
+ * @count is the number of vec's components. All the
+ * components must be block-aligned, the caller is
+ * responsible for this. @dir is "direction" of
+ * transform (encrypt/decrypt).
+ */
+static void cipher_aligned_iov(struct object_cipher_info *object,
+ struct iovec *vec,
+ int count,
+ off_t off,
+ int32_t dir)
+{
+ int i;
+ int len = 0;
+
+ for (i = 0; i < count; i++) {
+ cipher_region(object,
+ vec[i].iov_base,
+ vec[i].iov_base,
+ off + len,
+ vec[i].iov_len,
+ dir);
+ len += vec[i].iov_len;
+ }
+}
+
+void encrypt_aligned_iov(struct object_cipher_info *object,
+ struct iovec *vec,
+ int count,
+ off_t off)
+{
+ cipher_aligned_iov(object, vec, count, off, 1);
+}
+
+void decrypt_aligned_iov(struct object_cipher_info *object,
+ struct iovec *vec,
+ int count,
+ off_t off)
+{
+ cipher_aligned_iov(object, vec, count, off, 0);
+}
+
+#if DEBUG_CRYPT
+static void compound_stream(struct iovec *vec, int count, char *buf, off_t skip)
+{
+ int i;
+ int off = 0;
+ for (i = 0; i < count; i++) {
+ memcpy(buf + off,
+ vec[i].iov_base + skip,
+ vec[i].iov_len - skip);
+
+ off += (vec[i].iov_len - skip);
+ skip = 0;
+ }
+}
+
+static void check_iovecs(struct iovec *vec, int cnt,
+ struct iovec *avec, int acnt, uint32_t off_in_head)
+{
+ char *s1, *s2;
+ uint32_t size, asize;
+
+ size = iovec_get_size(vec, cnt);
+ asize = iovec_get_size(avec, acnt) - off_in_head;
+ if (size != asize) {
+ gf_log("crypt", GF_LOG_DEBUG, "size %d is not eq asize %d",
+ size, asize);
+ return;
+ }
+ s1 = GF_CALLOC(1, size, gf_crypt_mt_data);
+ if (!s1) {
+ gf_log("crypt", GF_LOG_DEBUG, "Can not allocate stream ");
+ return;
+ }
+ s2 = GF_CALLOC(1, asize, gf_crypt_mt_data);
+ if (!s2) {
+ GF_FREE(s1);
+ gf_log("crypt", GF_LOG_DEBUG, "Can not allocate stream ");
+ return;
+ }
+ compound_stream(vec, cnt, s1, 0);
+ compound_stream(avec, acnt, s2, off_in_head);
+ if (memcmp(s1, s2, size))
+ gf_log("crypt", GF_LOG_DEBUG, "chunks of different data");
+ GF_FREE(s1);
+ GF_FREE(s2);
+}
+
+#else
+#define check_iovecs(vec, count, avec, avecn, off) noop
+#endif /* DEBUG_CRYPT */
+
+static char *data_alloc_block(xlator_t *this, crypt_local_t *local,
+ int32_t block_size)
+{
+ struct iobuf *iobuf = NULL;
+
+ iobuf = iobuf_get2(this->ctx->iobuf_pool, block_size);
+ if (!iobuf) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "Failed to get iobuf");
+ return NULL;
+ }
+ if (!local->iobref_data) {
+ local->iobref_data = iobref_new();
+ if (!local->iobref_data) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "Failed to get iobref");
+ iobuf_unref(iobuf);
+ return NULL;
+ }
+ }
+ iobref_add(local->iobref_data, iobuf);
+ return iobuf->ptr;
+}
+
+/*
+ * Compound @avec, which represent the same data
+ * chunk as @vec, but has aligned components of
+ * specified block size. Alloc blocks, if needed.
+ * In particular, incomplete head and tail blocks
+ * must be allocated.
+ * Put number of allocated blocks to @num_blocks.
+ *
+ * Example:
+ *
+ * input: data chunk represented by 4 components
+ * [AB],[BC],[CD],[DE];
+ * output: 5 logical blocks (0, 1, 2, 3, 4).
+ *
+ * A B C D E
+ * *-----*+------*-+---*----+--------+-*
+ * | || | | | | | |
+ * *-+-----+*------+-*---+----*--------*-+------*
+ * 0 1 2 3 4
+ *
+ * 0 - incomplete compound (head);
+ * 1, 2 - full compound;
+ * 3 - full non-compound (the case of reuse);
+ * 4 - incomplete non-compound (tail).
+ */
+int32_t align_iov_by_atoms(xlator_t *this,
+ crypt_local_t *local,
+ struct object_cipher_info *object,
+ struct iovec *vec /* input vector */,
+ int32_t count /* number of vec components */,
+ struct iovec *avec /* aligned vector */,
+ char **blocks /* pool of blocks */,
+ uint32_t *blocks_allocated,
+ struct avec_config *conf)
+{
+ int vecn = 0; /* number of the current component in vec */
+ int avecn = 0; /* number of the current component in avec */
+ off_t vec_off = 0; /* offset in the current vec component,
+ * i.e. the number of bytes have already
+ * been copied */
+ int32_t block_size = get_atom_size(object);
+ size_t to_process; /* number of vec's bytes to copy and(or) re-use */
+ int32_t off_in_head = conf->off_in_head;
+
+ to_process = iovec_get_size(vec, count);
+
+ while (to_process > 0) {
+ if (off_in_head ||
+ vec[vecn].iov_len - vec_off < block_size) {
+ /*
+ * less than block_size:
+ * the case of incomplete (head or tail),
+ * or compound block
+ */
+ size_t copied = 0;
+ /*
+ * populate the pool with a new block
+ */
+ blocks[*blocks_allocated] = data_alloc_block(this,
+ local,
+ block_size);
+ if (!blocks[*blocks_allocated])
+ return -ENOMEM;
+ memset(blocks[*blocks_allocated], 0, off_in_head);
+ /*
+ * fill the block with vec components
+ */
+ do {
+ size_t to_copy;
+
+ to_copy = vec[vecn].iov_len - vec_off;
+ if (to_copy > block_size - off_in_head)
+ to_copy = block_size - off_in_head;
+
+ memcpy(blocks[*blocks_allocated] + off_in_head + copied,
+ vec[vecn].iov_base + vec_off,
+ to_copy);
+
+ copied += to_copy;
+ to_process -= to_copy;
+
+ vec_off += to_copy;
+ if (vec_off == vec[vecn].iov_len) {
+ /* finished with this vecn */
+ vec_off = 0;
+ vecn++;
+ }
+ } while (copied < (block_size - off_in_head) && to_process > 0);
+ /*
+ * update avec
+ */
+ avec[avecn].iov_len = off_in_head + copied;
+ avec[avecn].iov_base = blocks[*blocks_allocated];
+
+ (*blocks_allocated)++;
+ off_in_head = 0;
+ } else {
+ /*
+ * the rest of the current vec component
+ * is not less than block_size, so reuse
+ * the memory buffer of the component.
+ */
+ size_t to_reuse;
+ to_reuse = (to_process > block_size ?
+ block_size :
+ to_process);
+ avec[avecn].iov_len = to_reuse;
+ avec[avecn].iov_base = vec[vecn].iov_base + vec_off;
+
+ vec_off += to_reuse;
+ if (vec_off == vec[vecn].iov_len) {
+ /* finished with this vecn */
+ vec_off = 0;
+ vecn++;
+ }
+ to_process -= to_reuse;
+ }
+ avecn++;
+ }
+ check_iovecs(vec, count, avec, avecn, conf->off_in_head);
+ return 0;
+}
+
+/*
+ * allocate and setup aligned vector for data submission
+ * Pre-condition: @conf is set.
+ */
+int32_t set_config_avec_data(xlator_t *this,
+ crypt_local_t *local,
+ struct avec_config *conf,
+ struct object_cipher_info *object,
+ struct iovec *vec,
+ int32_t vec_count)
+{
+ int32_t ret = ENOMEM;
+ struct iovec *avec;
+ char **pool;
+ uint32_t blocks_in_pool = 0;
+
+ conf->type = DATA_ATOM;
+
+ avec = GF_CALLOC(conf->acount, sizeof(*avec), gf_crypt_mt_iovec);
+ if (!avec)
+ return ret;
+ pool = GF_CALLOC(conf->acount, sizeof(pool), gf_crypt_mt_char);
+ if (!pool) {
+ GF_FREE(avec);
+ return ret;
+ }
+ if (!vec) {
+ /*
+ * degenerated case: no data
+ */
+ pool[0] = data_alloc_block(this, local, get_atom_size(object));
+ if (!pool[0])
+ goto free;
+ blocks_in_pool = 1;
+ avec->iov_base = pool[0];
+ avec->iov_len = conf->off_in_tail;
+ }
+ else {
+ ret = align_iov_by_atoms(this, local, object, vec, vec_count,
+ avec, pool, &blocks_in_pool, conf);
+ if (ret)
+ goto free;
+ }
+ conf->avec = avec;
+ conf->pool = pool;
+ conf->blocks_in_pool = blocks_in_pool;
+ return 0;
+ free:
+ GF_FREE(avec);
+ GF_FREE(pool);
+ return ret;
+}
+
+/*
+ * allocate and setup aligned vector for hole submission
+ */
+int32_t set_config_avec_hole(xlator_t *this,
+ crypt_local_t *local,
+ struct avec_config *conf,
+ struct object_cipher_info *object,
+ glusterfs_fop_t fop)
+{
+ uint32_t i, idx;
+ struct iovec *avec;
+ char **pool;
+ uint32_t num_blocks;
+ uint32_t blocks_in_pool = 0;
+
+ conf->type = HOLE_ATOM;
+
+ num_blocks = conf->acount -
+ (conf->nr_full_blocks ? conf->nr_full_blocks - 1 : 0);
+
+ switch (fop) {
+ case GF_FOP_WRITE:
+ /*
+ * hole goes before data
+ */
+ if (num_blocks == 1 && conf->off_in_tail != 0)
+ /*
+ * we won't submit a hole which fits into
+ * a data atom: this part of hole will be
+ * submitted with data write
+ */
+ return 0;
+ break;
+ case GF_FOP_FTRUNCATE:
+ /*
+ * expanding truncate, hole goes after data,
+ * and will be submited in any case.
+ */
+ break;
+ default:
+ gf_log("crypt", GF_LOG_WARNING,
+ "bad file operation %d", fop);
+ return 0;
+ }
+ avec = GF_CALLOC(num_blocks, sizeof(*avec), gf_crypt_mt_iovec);
+ if (!avec)
+ return ENOMEM;
+ pool = GF_CALLOC(num_blocks, sizeof(pool), gf_crypt_mt_char);
+ if (!pool) {
+ GF_FREE(avec);
+ return ENOMEM;
+ }
+ for (i = 0; i < num_blocks; i++) {
+ pool[i] = data_alloc_block(this, local, get_atom_size(object));
+ if (pool[i] == NULL)
+ goto free;
+ blocks_in_pool++;
+ }
+ if (has_head_block(conf)) {
+ /* set head block */
+ idx = 0;
+ avec[idx].iov_base = pool[idx];
+ avec[idx].iov_len = get_atom_size(object);
+ memset(avec[idx].iov_base + conf->off_in_head,
+ 0,
+ get_atom_size(object) - conf->off_in_head);
+ }
+ if (has_tail_block(conf)) {
+ /* set tail block */
+ idx = num_blocks - 1;
+ avec[idx].iov_base = pool[idx];
+ avec[idx].iov_len = get_atom_size(object);
+ memset(avec[idx].iov_base, 0, conf->off_in_tail);
+ }
+ if (has_full_blocks(conf)) {
+ /* set full block */
+ idx = conf->off_in_head ? 1 : 0;
+ avec[idx].iov_base = pool[idx];
+ avec[idx].iov_len = get_atom_size(object);
+ /*
+ * since we re-use the buffer,
+ * zeroes will be set every time
+ * before encryption, see submit_full()
+ */
+ }
+ conf->avec = avec;
+ conf->pool = pool;
+ conf->blocks_in_pool = blocks_in_pool;
+ return 0;
+ free:
+ GF_FREE(avec);
+ GF_FREE(pool);
+ return ENOMEM;
+}
+
+/* A helper for setting up config of partial atoms (which
+ * participate in read-modify-write sequence).
+ *
+ * Calculate and setup precise amount of "extra-bytes"
+ * that should be uptodated at the end of partial (not
+ * necessarily tail!) block.
+ *
+ * Pre-condition: local->old_file_size is valid!
+ * @conf contains setup, which is enough for correct calculation
+ * of has_tail_block(), ->get_offset().
+ */
+void set_gap_at_end(call_frame_t *frame, struct object_cipher_info *object,
+ struct avec_config *conf, atom_data_type dtype)
+{
+ uint32_t to_block;
+ crypt_local_t *local = frame->local;
+ uint64_t old_file_size = local->old_file_size;
+ struct rmw_atom *partial = atom_by_types(dtype,
+ has_tail_block(conf) ?
+ TAIL_ATOM : HEAD_ATOM);
+
+ if (old_file_size <= partial->offset_at(frame, object))
+ to_block = 0;
+ else {
+ to_block = old_file_size - partial->offset_at(frame, object);
+ if (to_block > get_atom_size(object))
+ to_block = get_atom_size(object);
+ }
+ if (to_block > conf->off_in_tail)
+ conf->gap_in_tail = to_block - conf->off_in_tail;
+ else
+ /*
+ * nothing to uptodate
+ */
+ conf->gap_in_tail = 0;
+}
+
+/*
+ * fill struct avec_config with offsets layouts
+ */
+void set_config_offsets(call_frame_t *frame,
+ xlator_t *this,
+ uint64_t offset,
+ uint64_t count,
+ atom_data_type dtype,
+ int32_t set_gap)
+{
+ crypt_local_t *local;
+ struct object_cipher_info *object;
+ struct avec_config *conf;
+ uint32_t resid;
+
+ uint32_t atom_size;
+ uint32_t atom_bits;
+
+ size_t orig_size;
+ off_t orig_offset;
+ size_t expanded_size;
+ off_t aligned_offset;
+
+ uint32_t off_in_head = 0;
+ uint32_t off_in_tail = 0;
+ uint32_t nr_full_blocks;
+ int32_t size_full_blocks;
+
+ uint32_t acount; /* number of alifned components to write.
+ * The same as number of occupied logical
+ * blocks (atoms)
+ */
+ local = frame->local;
+ object = &local->info->cinfo;
+ conf = (dtype == DATA_ATOM ?
+ get_data_conf(frame) : get_hole_conf(frame));
+
+ orig_offset = offset;
+ orig_size = count;
+
+ atom_size = get_atom_size(object);
+ atom_bits = get_atom_bits(object);
+
+ /*
+ * Round-down the start,
+ * round-up the end.
+ */
+ resid = offset & (uint64_t)(atom_size - 1);
+
+ if (resid)
+ off_in_head = resid;
+ aligned_offset = offset - off_in_head;
+ expanded_size = orig_size + off_in_head;
+
+ /* calculate tail,
+ expand size forward */
+ resid = (offset + orig_size) & (uint64_t)(atom_size - 1);
+
+ if (resid) {
+ off_in_tail = resid;
+ expanded_size += (atom_size - off_in_tail);
+ }
+ /*
+ * calculate number of occupied blocks
+ */
+ acount = expanded_size >> atom_bits;
+ /*
+ * calculate number of full blocks
+ */
+ size_full_blocks = expanded_size;
+ if (off_in_head)
+ size_full_blocks -= atom_size;
+ if (off_in_tail && size_full_blocks > 0)
+ size_full_blocks -= atom_size;
+ nr_full_blocks = size_full_blocks >> atom_bits;
+
+ conf->atom_size = atom_size;
+ conf->orig_size = orig_size;
+ conf->orig_offset = orig_offset;
+ conf->expanded_size = expanded_size;
+ conf->aligned_offset = aligned_offset;
+
+ conf->off_in_head = off_in_head;
+ conf->off_in_tail = off_in_tail;
+ conf->nr_full_blocks = nr_full_blocks;
+ conf->acount = acount;
+ /*
+ * Finally, calculate precise amount of
+ * "extra-bytes" that should be uptodated
+ * at the end.
+ * Only if RMW is expected.
+ */
+ if (off_in_tail && set_gap)
+ set_gap_at_end(frame, object, conf, dtype);
+}
+
+struct data_cipher_alg data_cipher_algs[LAST_CIPHER_ALG][LAST_CIPHER_MODE] = {
+ [AES_CIPHER_ALG][XTS_CIPHER_MODE] =
+ { .atomic = _gf_true,
+ .should_pad = _gf_true,
+ .blkbits = AES_BLOCK_BITS,
+ .init = aes_xts_init,
+ .set_private = set_private_aes_xts,
+ .check_key = check_key_aes_xts,
+ .set_iv = set_iv_aes_xts,
+ .encrypt = encrypt_aes_xts
+ }
+};
+
+/*
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 80
+ scroll-step: 1
+ End:
+*/
diff --git a/xlators/encryption/crypt/src/keys.c b/xlators/encryption/crypt/src/keys.c
new file mode 100644
index 000000000..4a1d3bb5a
--- /dev/null
+++ b/xlators/encryption/crypt/src/keys.c
@@ -0,0 +1,302 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "defaults.h"
+#include "crypt-common.h"
+#include "crypt.h"
+
+/* Key hierarchy
+
+ +----------------+
+ | MASTER_VOL_KEY |
+ +-------+--------+
+ |
+ |
+ +----------------+----------------+
+ | | |
+ | | |
+ +-------+------+ +-------+-------+ +------+--------+
+ | NMTD_VOL_KEY | | EMTD_FILE_KEY | | DATA_FILE_KEY |
+ +-------+------+ +---------------+ +---------------+
+ |
+ |
+ +-------+-------+
+ | NMTD_LINK_KEY |
+ +---------------+
+
+ */
+
+#if DEBUG_CRYPT
+static void check_prf_iters(uint32_t num_iters)
+{
+ if (num_iters == 0)
+ gf_log ("crypt", GF_LOG_DEBUG,
+ "bad number of prf iterations : %d", num_iters);
+}
+#else
+#define check_prf_iters(num_iters) noop
+#endif /* DEBUG_CRYPT */
+
+unsigned char crypt_fake_oid[16] =
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+/*
+ * derive key in the counter mode using
+ * sha256-based HMAC as PRF, see
+ * NIST Special Publication 800-108, 5.1)
+ */
+
+#define PRF_OUTPUT_SIZE SHA256_DIGEST_LENGTH
+
+static int32_t kderive_init(struct kderive_context *ctx,
+ const unsigned char *pkey, /* parent key */
+ uint32_t pkey_size, /* parent key size */
+ const unsigned char *idctx, /* id-context */
+ uint32_t idctx_size,
+ crypt_key_type type /* type of child key */)
+{
+ unsigned char *pos;
+ uint32_t llen = strlen(crypt_keys[type].label);
+ /*
+ * Compoud the fixed input data for KDF:
+ * [i]_2 || Label || 0x00 || Id-Context || [L]_2),
+ * NIST SP 800-108, 5.1
+ */
+ ctx->fid_len =
+ sizeof(uint32_t) +
+ llen +
+ 1 +
+ idctx_size +
+ sizeof(uint32_t);
+
+ ctx->fid = GF_CALLOC(ctx->fid_len, 1, gf_crypt_mt_key);
+ if (!ctx->fid)
+ return ENOMEM;
+ ctx->out_len = round_up(crypt_keys[type].len >> 3,
+ PRF_OUTPUT_SIZE);
+ ctx->out = GF_CALLOC(ctx->out_len, 1, gf_crypt_mt_key);
+ if (!ctx->out) {
+ GF_FREE(ctx->fid);
+ return ENOMEM;
+ }
+ ctx->pkey = pkey;
+ ctx->pkey_len = pkey_size;
+ ctx->ckey_len = crypt_keys[type].len;
+
+ pos = ctx->fid;
+
+ /* counter will be set up in kderive_rfn() */
+ pos += sizeof(uint32_t);
+
+ memcpy(pos, crypt_keys[type].label, llen);
+ pos += llen;
+
+ /* set up zero octet */
+ *pos = 0;
+ pos += 1;
+
+ memcpy(pos, idctx, idctx_size);
+ pos += idctx_size;
+
+ *((uint32_t *)pos) = htobe32(ctx->ckey_len);
+
+ return 0;
+}
+
+static void kderive_update(struct kderive_context *ctx)
+{
+ uint32_t i;
+ HMAC_CTX hctx;
+ unsigned char *pos = ctx->out;
+ uint32_t *p_iter = (uint32_t *)ctx->fid;
+ uint32_t num_iters = ctx->out_len / PRF_OUTPUT_SIZE;
+
+ check_prf_iters(num_iters);
+
+ HMAC_CTX_init(&hctx);
+ for (i = 0; i < num_iters; i++) {
+ /*
+ * update the iteration number in the fid
+ */
+ *p_iter = htobe32(i);
+ HMAC_Init_ex(&hctx,
+ ctx->pkey, ctx->pkey_len >> 3,
+ EVP_sha256(),
+ NULL);
+ HMAC_Update(&hctx, ctx->fid, ctx->fid_len);
+ HMAC_Final(&hctx, pos, NULL);
+
+ pos += PRF_OUTPUT_SIZE;
+ }
+ HMAC_CTX_cleanup(&hctx);
+}
+
+static void kderive_final(struct kderive_context *ctx, unsigned char *child)
+{
+ memcpy(child, ctx->out, ctx->ckey_len >> 3);
+ GF_FREE(ctx->fid);
+ GF_FREE(ctx->out);
+ memset(ctx, 0, sizeof(*ctx));
+}
+
+/*
+ * derive per-volume key for object ids aithentication
+ */
+int32_t get_nmtd_vol_key(struct master_cipher_info *master)
+{
+ int32_t ret;
+ struct kderive_context ctx;
+
+ ret = kderive_init(&ctx,
+ master->m_key,
+ master_key_size(),
+ crypt_fake_oid, sizeof(uuid_t), NMTD_VOL_KEY);
+ if (ret)
+ return ret;
+ kderive_update(&ctx);
+ kderive_final(&ctx, master->m_nmtd_key);
+ return 0;
+}
+
+/*
+ * derive per-link key for aithentication of non-encrypted
+ * meta-data (nmtd)
+ */
+int32_t get_nmtd_link_key(loc_t *loc,
+ struct master_cipher_info *master,
+ unsigned char *result)
+{
+ int32_t ret;
+ struct kderive_context ctx;
+
+ ret = kderive_init(&ctx,
+ master->m_nmtd_key,
+ nmtd_vol_key_size(),
+ (const unsigned char *)loc->path,
+ strlen(loc->path), NMTD_LINK_KEY);
+ if (ret)
+ return ret;
+ kderive_update(&ctx);
+ kderive_final(&ctx, result);
+ return 0;
+}
+
+/*
+ * derive per-file key for encryption and authentication
+ * of encrypted part of metadata (emtd)
+ */
+int32_t get_emtd_file_key(struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ unsigned char *result)
+{
+ int32_t ret;
+ struct kderive_context ctx;
+
+ ret = kderive_init(&ctx,
+ master->m_key,
+ master_key_size(),
+ info->oid, sizeof(uuid_t), EMTD_FILE_KEY);
+ if (ret)
+ return ret;
+ kderive_update(&ctx);
+ kderive_final(&ctx, result);
+ return 0;
+}
+
+static int32_t data_key_type_by_size(uint32_t keysize, crypt_key_type *type)
+{
+ int32_t ret = 0;
+ switch (keysize) {
+ case 256:
+ *type = DATA_FILE_KEY_256;
+ break;
+ case 512:
+ *type = DATA_FILE_KEY_512;
+ break;
+ default:
+ gf_log("crypt", GF_LOG_ERROR, "Unsupported data key size %d",
+ keysize);
+ ret = ENOTSUP;
+ break;
+ }
+ return ret;
+}
+
+/*
+ * derive per-file key for data encryption
+ */
+int32_t get_data_file_key(struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ uint32_t keysize,
+ unsigned char *key)
+{
+ int32_t ret;
+ struct kderive_context ctx;
+ crypt_key_type type;
+
+ ret = data_key_type_by_size(keysize, &type);
+ if (ret)
+ return ret;
+ ret = kderive_init(&ctx,
+ master->m_key,
+ master_key_size(),
+ info->oid, sizeof(uuid_t), type);
+ if (ret)
+ return ret;
+ kderive_update(&ctx);
+ kderive_final(&ctx, key);
+ return 0;
+}
+
+/*
+ * NOTE: Don't change existing keys: it will break compatibility;
+ */
+struct crypt_key crypt_keys[LAST_KEY_TYPE] = {
+ [MASTER_VOL_KEY] =
+ { .len = MASTER_VOL_KEY_SIZE << 3,
+ .label = "volume-master",
+ },
+ [NMTD_VOL_KEY] =
+ { .len = NMTD_VOL_KEY_SIZE << 3,
+ .label = "volume-nmtd-key-generation"
+ },
+ [NMTD_LINK_KEY] =
+ { .len = 128,
+ .label = "link-nmtd-authentication"
+ },
+ [EMTD_FILE_KEY] =
+ { .len = 128,
+ .label = "file-emtd-encryption-and-auth"
+ },
+ [DATA_FILE_KEY_256] =
+ { .len = 256,
+ .label = "file-data-encryption-256"
+ },
+ [DATA_FILE_KEY_512] =
+ { .len = 512,
+ .label = "file-data-encryption-512"
+ }
+};
+
+/*
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 80
+ scroll-step: 1
+ End:
+*/
diff --git a/xlators/encryption/crypt/src/metadata.c b/xlators/encryption/crypt/src/metadata.c
new file mode 100644
index 000000000..36b14c055
--- /dev/null
+++ b/xlators/encryption/crypt/src/metadata.c
@@ -0,0 +1,605 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "defaults.h"
+#include "crypt-common.h"
+#include "crypt.h"
+#include "metadata.h"
+
+int32_t alloc_format(crypt_local_t *local, size_t size)
+{
+ if (size > 0) {
+ local->format = GF_CALLOC(1, size, gf_crypt_mt_mtd);
+ if (!local->format)
+ return ENOMEM;
+ }
+ local->format_size = size;
+ return 0;
+}
+
+int32_t alloc_format_create(crypt_local_t *local)
+{
+ return alloc_format(local, new_format_size());
+}
+
+void free_format(crypt_local_t *local)
+{
+ GF_FREE(local->format);
+}
+
+/*
+ * Check compatibility with extracted metadata
+ */
+static int32_t check_file_metadata(struct crypt_inode_info *info)
+{
+ struct object_cipher_info *object = &info->cinfo;
+
+ if (info->nr_minor != CRYPT_XLATOR_ID) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "unsupported minor subversion %d", info->nr_minor);
+ return EINVAL;
+ }
+ if (object->o_alg > LAST_CIPHER_ALG) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "unsupported cipher algorithm %d",
+ object->o_alg);
+ return EINVAL;
+ }
+ if (object->o_mode > LAST_CIPHER_MODE) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "unsupported cipher mode %d",
+ object->o_mode);
+ return EINVAL;
+ }
+ if (object->o_block_bits < CRYPT_MIN_BLOCK_BITS ||
+ object->o_block_bits > CRYPT_MAX_BLOCK_BITS) {
+ gf_log("crypt", GF_LOG_WARNING, "unsupported block bits %d",
+ object->o_block_bits);
+ return EINVAL;
+ }
+ /* TBD: check data key size */
+ return 0;
+}
+
+static size_t format_size_v1(mtd_op_t op, size_t old_size)
+{
+
+ switch (op) {
+ case MTD_CREATE:
+ return sizeof(struct mtd_format_v1);
+ case MTD_OVERWRITE:
+ return old_size;
+ case MTD_APPEND:
+ return old_size + NMTD_8_MAC_SIZE;
+ case MTD_CUT:
+ if (old_size > sizeof(struct mtd_format_v1))
+ return old_size - NMTD_8_MAC_SIZE;
+ else
+ return 0;
+ default:
+ gf_log("crypt", GF_LOG_WARNING, "Bad mtd operation");
+ return 0;
+ }
+}
+
+/*
+ * Calculate size of the updated format string.
+ * Returned zero means that we don't need to update the format string.
+ */
+size_t format_size(mtd_op_t op, size_t old_size)
+{
+ size_t versioned;
+
+ versioned = mtd_loaders[current_mtd_loader()].format_size(op,
+ old_size - sizeof(struct crypt_format));
+ if (versioned != 0)
+ return versioned + sizeof(struct crypt_format);
+ return 0;
+}
+
+/*
+ * size of the format string of newly created file (nr_links = 1)
+ */
+size_t new_format_size(void)
+{
+ return format_size(MTD_CREATE, 0);
+}
+
+/*
+ * Calculate per-link MAC by pathname
+ */
+static int32_t calc_link_mac_v1(struct mtd_format_v1 *fmt,
+ loc_t *loc,
+ unsigned char *result,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master)
+{
+ int32_t ret;
+ unsigned char nmtd_link_key[16];
+ CMAC_CTX *cctx;
+ size_t len;
+
+ ret = get_nmtd_link_key(loc, master, nmtd_link_key);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, "Can not get nmtd link key");
+ return -1;
+ }
+ cctx = CMAC_CTX_new();
+ if (!cctx) {
+ gf_log("crypt", GF_LOG_ERROR, "CMAC_CTX_new failed");
+ return -1;
+ }
+ ret = CMAC_Init(cctx, nmtd_link_key, sizeof(nmtd_link_key),
+ EVP_aes_128_cbc(), 0);
+ if (!ret) {
+ gf_log("crypt", GF_LOG_ERROR, "CMAC_Init failed");
+ CMAC_CTX_free(cctx);
+ return -1;
+ }
+ ret = CMAC_Update(cctx, get_NMTD_V1(info), SIZE_OF_NMTD_V1);
+ if (!ret) {
+ gf_log("crypt", GF_LOG_ERROR, "CMAC_Update failed");
+ CMAC_CTX_free(cctx);
+ return -1;
+ }
+ ret = CMAC_Final(cctx, result, &len);
+ CMAC_CTX_free(cctx);
+ if (!ret) {
+ gf_log("crypt", GF_LOG_ERROR, "CMAC_Final failed");
+ return -1;
+ }
+ return 0;
+}
+
+/*
+ * Create per-link MAC of index @idx by pathname
+ */
+static int32_t create_link_mac_v1(struct mtd_format_v1 *fmt,
+ uint32_t idx,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master)
+{
+ int32_t ret;
+ unsigned char *mac;
+ unsigned char cmac[16];
+
+ mac = get_NMTD_V1_MAC(fmt) + idx * SIZE_OF_NMTD_V1_MAC;
+
+ ret = calc_link_mac_v1(fmt, loc, cmac, info, master);
+ if (ret)
+ return -1;
+ memcpy(mac, cmac, SIZE_OF_NMTD_V1_MAC);
+ return 0;
+}
+
+static int32_t create_format_v1(unsigned char *wire,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master)
+{
+ int32_t ret;
+ struct mtd_format_v1 *fmt;
+ unsigned char mtd_key[16];
+ AES_KEY EMTD_KEY;
+ unsigned char nmtd_link_key[16];
+ uint32_t ad;
+ GCM128_CONTEXT *gctx;
+
+ fmt = (struct mtd_format_v1 *)wire;
+
+ fmt->minor_id = info->nr_minor;
+ fmt->alg_id = AES_CIPHER_ALG;
+ fmt->dkey_factor = master->m_dkey_size >> KEY_FACTOR_BITS;
+ fmt->block_bits = master->m_block_bits;
+ fmt->mode_id = master->m_mode;
+ /*
+ * retrieve keys for the parts of metadata
+ */
+ ret = get_emtd_file_key(info, master, mtd_key);
+ if (ret)
+ return ret;
+ ret = get_nmtd_link_key(loc, master, nmtd_link_key);
+ if (ret)
+ return ret;
+
+ AES_set_encrypt_key(mtd_key, sizeof(mtd_key)*8, &EMTD_KEY);
+
+ gctx = CRYPTO_gcm128_new(&EMTD_KEY, (block128_f)AES_encrypt);
+
+ /* TBD: Check return values */
+
+ CRYPTO_gcm128_setiv(gctx, info->oid, sizeof(uuid_t));
+
+ ad = htole32(MTD_LOADER_V1);
+ ret = CRYPTO_gcm128_aad(gctx, (const unsigned char *)&ad, sizeof(ad));
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, " CRYPTO_gcm128_aad failed");
+ CRYPTO_gcm128_release(gctx);
+ return ret;
+ }
+ ret = CRYPTO_gcm128_encrypt(gctx,
+ get_EMTD_V1(fmt),
+ get_EMTD_V1(fmt),
+ SIZE_OF_EMTD_V1);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, " CRYPTO_gcm128_encrypt failed");
+ CRYPTO_gcm128_release(gctx);
+ return ret;
+ }
+ /*
+ * set MAC of encrypted part of metadata
+ */
+ CRYPTO_gcm128_tag(gctx, get_EMTD_V1_MAC(fmt), SIZE_OF_EMTD_V1_MAC);
+ CRYPTO_gcm128_release(gctx);
+ /*
+ * set the first MAC of non-encrypted part of metadata
+ */
+ return create_link_mac_v1(fmt, 0, loc, info, master);
+}
+
+/*
+ * Called by fops:
+ * ->create();
+ * ->link();
+ *
+ * Pack common and version-specific parts of file's metadata
+ * Pre-conditions: @info contains valid object-id.
+ */
+int32_t create_format(unsigned char *wire,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master)
+{
+ struct crypt_format *fmt = (struct crypt_format *)wire;
+
+ fmt->loader_id = current_mtd_loader();
+
+ wire += sizeof(struct crypt_format);
+ return mtd_loaders[current_mtd_loader()].create_format(wire, loc,
+ info, master);
+}
+
+/*
+ * Append or overwrite per-link mac of @mac_idx index
+ * in accordance with the new pathname
+ */
+int32_t appov_link_mac_v1(unsigned char *new,
+ unsigned char *old,
+ uint32_t old_size,
+ int32_t mac_idx,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local)
+{
+ memcpy(new, old, old_size);
+ return create_link_mac_v1((struct mtd_format_v1 *)new, mac_idx,
+ loc, info, master);
+}
+
+/*
+ * Cut per-link mac of @mac_idx index
+ */
+static int32_t cut_link_mac_v1(unsigned char *new,
+ unsigned char *old,
+ uint32_t old_size,
+ int32_t mac_idx,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local)
+{
+ memcpy(new,
+ old,
+ sizeof(struct mtd_format_v1) + NMTD_8_MAC_SIZE * (mac_idx - 1));
+
+ memcpy(new + sizeof(struct mtd_format_v1) + NMTD_8_MAC_SIZE * (mac_idx - 1),
+ old + sizeof(struct mtd_format_v1) + NMTD_8_MAC_SIZE * mac_idx,
+ old_size - (sizeof(struct mtd_format_v1) + NMTD_8_MAC_SIZE * mac_idx));
+ return 0;
+}
+
+int32_t update_format_v1(unsigned char *new,
+ unsigned char *old,
+ size_t old_len,
+ int32_t mac_idx, /* of old name */
+ mtd_op_t op,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local)
+{
+ switch (op) {
+ case MTD_APPEND:
+ mac_idx = 1 + (old_len - sizeof(struct mtd_format_v1))/8;
+ case MTD_OVERWRITE:
+ return appov_link_mac_v1(new, old, old_len, mac_idx,
+ loc, info, master, local);
+ case MTD_CUT:
+ return cut_link_mac_v1(new, old, old_len, mac_idx,
+ loc, info, master, local);
+ default:
+ gf_log("crypt", GF_LOG_ERROR, "Bad mtd operation %d", op);
+ return -1;
+ }
+}
+
+/*
+ * Called by fops:
+ *
+ * ->link()
+ * ->unlink()
+ * ->rename()
+ *
+ */
+int32_t update_format(unsigned char *new,
+ unsigned char *old,
+ size_t old_len,
+ int32_t mac_idx,
+ mtd_op_t op,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local)
+{
+ if (!new)
+ return 0;
+ memcpy(new, old, sizeof(struct crypt_format));
+
+ old += sizeof(struct crypt_format);
+ new += sizeof(struct crypt_format);
+ old_len -= sizeof(struct crypt_format);
+
+ return mtd_loaders[current_mtd_loader()].update_format(new, old,
+ old_len,
+ mac_idx, op,
+ loc, info,
+ master, local);
+}
+
+/*
+ * Perform preliminary checks of found metadata
+ * Return < 0 on errors;
+ * Return number of object-id MACs (>= 1) on success
+ */
+int32_t check_format_v1(uint32_t len, unsigned char *wire)
+{
+ uint32_t nr_links;
+
+ if (len < sizeof(struct mtd_format_v1)) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "v1-loader: bad metadata size %d", len);
+ goto error;
+ }
+ len -= sizeof(struct mtd_format_v1);
+ if (len % sizeof(nmtd_8_mac_t)) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "v1-loader: bad metadata format");
+ goto error;
+ }
+ nr_links = 1 + len / sizeof(nmtd_8_mac_t);
+ if (nr_links > _POSIX_LINK_MAX)
+ goto error;
+ return nr_links;
+ error:
+ return EIO;
+}
+
+/*
+ * Verify per-link MAC specified by index @idx
+ *
+ * return:
+ * -1 on errors;
+ * 0 on failed verification;
+ * 1 on sucessful verification
+ */
+static int32_t verify_link_mac_v1(struct mtd_format_v1 *fmt,
+ uint32_t idx /* index of the mac to verify */,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master)
+{
+ int32_t ret;
+ unsigned char *mac;
+ unsigned char cmac[16];
+
+ mac = get_NMTD_V1_MAC(fmt) + idx * SIZE_OF_NMTD_V1_MAC;
+
+ ret = calc_link_mac_v1(fmt, loc, cmac, info, master);
+ if (ret)
+ return -1;
+ if (memcmp(cmac, mac, SIZE_OF_NMTD_V1_MAC))
+ return 0;
+ return 1;
+}
+
+/*
+ * Lookup per-link MAC by pathname.
+ *
+ * return index of the MAC, if it was found;
+ * return < 0 on errors, or if the MAC wasn't found
+ */
+static int32_t lookup_link_mac_v1(struct mtd_format_v1 *fmt,
+ uint32_t nr_macs,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master)
+{
+ int32_t ret;
+ uint32_t idx;
+
+ for (idx = 0; idx < nr_macs; idx++) {
+ ret = verify_link_mac_v1(fmt, idx, loc, info, master);
+ if (ret < 0)
+ return ret;
+ if (ret > 0)
+ return idx;
+ }
+ return -ENOENT;
+}
+
+/*
+ * Extract version-specific part of metadata
+ */
+static int32_t open_format_v1(unsigned char *wire,
+ int32_t len,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local,
+ gf_boolean_t load_info)
+{
+ int32_t ret;
+ int32_t num_nmtd_macs;
+ struct mtd_format_v1 *fmt;
+ unsigned char mtd_key[16];
+ AES_KEY EMTD_KEY;
+ GCM128_CONTEXT *gctx;
+ uint32_t ad;
+ emtd_8_mac_t gmac;
+ struct object_cipher_info *object;
+
+ num_nmtd_macs = check_format_v1(len, wire);
+ if (num_nmtd_macs <= 0)
+ return EIO;
+ fmt = (struct mtd_format_v1 *)wire;
+
+ ret = lookup_link_mac_v1(fmt, num_nmtd_macs, loc, info, master);
+ if (ret < 0) {
+ gf_log("crypt", GF_LOG_ERROR, "NMTD verification failed");
+ return EINVAL;
+ }
+ local->mac_idx = ret;
+ if (load_info == _gf_false)
+ /* the case of partial open */
+ return 0;
+
+ object = &info->cinfo;
+
+ ret = get_emtd_file_key(info, master, mtd_key);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, "Can not retrieve metadata key");
+ return ret;
+ }
+ /*
+ * decrypt encrypted meta-data
+ */
+ ret = AES_set_encrypt_key(mtd_key, sizeof(mtd_key)*8, &EMTD_KEY);
+ if (ret < 0) {
+ gf_log("crypt", GF_LOG_ERROR, "Can not set encrypt key");
+ return ret;
+ }
+ gctx = CRYPTO_gcm128_new(&EMTD_KEY, (block128_f)AES_encrypt);
+ if (!gctx) {
+ gf_log("crypt", GF_LOG_ERROR, "Can not alloc gcm context");
+ return ENOMEM;
+ }
+ CRYPTO_gcm128_setiv(gctx, info->oid, sizeof(uuid_t));
+
+ ad = htole32(MTD_LOADER_V1);
+ ret = CRYPTO_gcm128_aad(gctx, (const unsigned char *)&ad, sizeof(ad));
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, " CRYPTO_gcm128_aad failed");
+ CRYPTO_gcm128_release(gctx);
+ return ret;
+ }
+ ret = CRYPTO_gcm128_decrypt(gctx,
+ get_EMTD_V1(fmt),
+ get_EMTD_V1(fmt),
+ SIZE_OF_EMTD_V1);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, " CRYPTO_gcm128_decrypt failed");
+ CRYPTO_gcm128_release(gctx);
+ return ret;
+ }
+ /*
+ * verify metadata
+ */
+ CRYPTO_gcm128_tag(gctx, gmac, sizeof(gmac));
+ CRYPTO_gcm128_release(gctx);
+ if (memcmp(gmac, get_EMTD_V1_MAC(fmt), SIZE_OF_EMTD_V1_MAC)) {
+ gf_log("crypt", GF_LOG_ERROR, "EMTD verification failed");
+ return EINVAL;
+ }
+ /*
+ * load verified metadata to the private part of inode
+ */
+ info->nr_minor = fmt->minor_id;
+
+ object->o_alg = fmt->alg_id;
+ object->o_dkey_size = fmt->dkey_factor << KEY_FACTOR_BITS;
+ object->o_block_bits = fmt->block_bits;
+ object->o_mode = fmt->mode_id;
+
+ return check_file_metadata(info);
+}
+
+/*
+ * perform metadata authentication against @loc->path;
+ * extract crypt-specific attribtes and populate @info
+ * with them (optional)
+ */
+int32_t open_format(unsigned char *str,
+ int32_t len,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local,
+ gf_boolean_t load_info)
+{
+ struct crypt_format *fmt;
+ if (len < sizeof(*fmt)) {
+ gf_log("crypt", GF_LOG_ERROR, "Bad core format");
+ return EIO;
+ }
+ fmt = (struct crypt_format *)str;
+
+ if (fmt->loader_id >= LAST_MTD_LOADER) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "Unsupported loader id %d", fmt->loader_id);
+ return EINVAL;
+ }
+ str += sizeof(*fmt);
+ len -= sizeof(*fmt);
+
+ return mtd_loaders[fmt->loader_id].open_format(str,
+ len,
+ loc,
+ info,
+ master,
+ local,
+ load_info);
+}
+
+struct crypt_mtd_loader mtd_loaders [LAST_MTD_LOADER] = {
+ [MTD_LOADER_V1] =
+ {.format_size = format_size_v1,
+ .create_format = create_format_v1,
+ .open_format = open_format_v1,
+ .update_format = update_format_v1
+ }
+};
+
+/*
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 80
+ scroll-step: 1
+ End:
+*/
diff --git a/xlators/encryption/crypt/src/metadata.h b/xlators/encryption/crypt/src/metadata.h
new file mode 100644
index 000000000..a92f149ef
--- /dev/null
+++ b/xlators/encryption/crypt/src/metadata.h
@@ -0,0 +1,74 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __METADATA_H__
+#define __METADATA_H__
+
+#define NMTD_8_MAC_SIZE (8)
+#define EMTD_8_MAC_SIZE (8)
+
+typedef uint8_t nmtd_8_mac_t[NMTD_8_MAC_SIZE];
+typedef uint8_t emtd_8_mac_t[EMTD_8_MAC_SIZE] ;
+
+/*
+ * Version "v1" of file's metadata.
+ * Metadata of this version has 4 components:
+ *
+ * 1) EMTD (Encrypted part of MeTaData);
+ * 2) NMTD (Non-encrypted part of MeTaData);
+ * 3) EMTD_MAC; (EMTD Message Authentication Code);
+ * 4) Array of per-link NMTD MACs (for every (hard)link it includes
+ * exactly one MAC)
+ */
+struct mtd_format_v1 {
+ /* EMTD, encrypted part of meta-data */
+ uint8_t alg_id; /* cipher algorithm id (only AES for now) */
+ uint8_t mode_id; /* cipher mode id; (only XTS for now) */
+ uint8_t block_bits; /* encoded block size */
+ uint8_t minor_id; /* client translator id */
+ uint8_t dkey_factor; /* encoded size of the data key */
+ /* MACs */
+ emtd_8_mac_t gmac; /* MAC of the encrypted meta-data, 8 bytes */
+ nmtd_8_mac_t omac; /* per-link MACs of the non-encrypted
+ * meta-data: at least one such MAC is always
+ * present */
+} __attribute__((packed));
+
+/*
+ * NMTD, the non-encrypted part of metadata of version "v1"
+ * is file's gfid, which is generated on trusted machines.
+ */
+#define SIZE_OF_NMTD_V1 (sizeof(uuid_t))
+#define SIZE_OF_EMTD_V1 (offsetof(struct mtd_format_v1, gmac) - \
+ offsetof(struct mtd_format_v1, alg_id))
+#define SIZE_OF_NMTD_V1_MAC (NMTD_8_MAC_SIZE)
+#define SIZE_OF_EMTD_V1_MAC (EMTD_8_MAC_SIZE)
+
+static inline unsigned char *get_EMTD_V1(struct mtd_format_v1 *format)
+{
+ return &format->alg_id;
+}
+
+static inline unsigned char *get_NMTD_V1(struct crypt_inode_info *info)
+{
+ return info->oid;
+}
+
+static inline unsigned char *get_EMTD_V1_MAC(struct mtd_format_v1 *format)
+{
+ return format->gmac;
+}
+
+static inline unsigned char *get_NMTD_V1_MAC(struct mtd_format_v1 *format)
+{
+ return format->omac;
+}
+
+#endif /* __METADATA_H__ */
diff --git a/xlators/encryption/rot-13/src/Makefile.am b/xlators/encryption/rot-13/src/Makefile.am
index ba5e623d8..94e8d18e7 100644
--- a/xlators/encryption/rot-13/src/Makefile.am
+++ b/xlators/encryption/rot-13/src/Makefile.am
@@ -1,14 +1,15 @@
xlator_LTLIBRARIES = rot-13.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/encryption
-rot_13_la_LDFLAGS = -module -avoidversion
+rot_13_la_LDFLAGS = -module -avoid-version
rot_13_la_SOURCES = rot-13.c
rot_13_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = rot-13.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/encryption/rot-13/src/rot-13.c b/xlators/encryption/rot-13/src/rot-13.c
index 3cf925ed8..1bcfe0192 100644
--- a/xlators/encryption/rot-13/src/rot-13.c
+++ b/xlators/encryption/rot-13/src/rot-13.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <ctype.h>
#include <sys/uio.h>
@@ -32,13 +22,13 @@
#include "rot-13.h"
/*
- * This is a rot13 ``encryption'' xlator. It rot13's data when
- * writing to disk and rot13's it back when reading it.
+ * This is a rot13 ``encryption'' xlator. It rot13's data when
+ * writing to disk and rot13's it back when reading it.
* This xlator is meant as an example, NOT FOR PRODUCTION
* USE ;) (hence no error-checking)
*/
-void
+void
rot13 (char *buf, int len)
{
int i;
@@ -68,14 +58,15 @@ rot13_readv_cbk (call_frame_t *frame,
struct iovec *vector,
int32_t count,
struct iatt *stbuf,
- struct iobref *iobref)
+ struct iobref *iobref, dict_t *xdata)
{
rot_13_private_t *priv = (rot_13_private_t *)this->private;
-
+
if (priv->decrypt_read)
rot13_iovec (vector, count);
- STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count, stbuf, iobref);
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count,
+ stbuf, iobref, xdata);
return 0;
}
@@ -84,13 +75,13 @@ rot13_readv (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
size_t size,
- off_t offset)
+ off_t offset, uint32_t flags, dict_t *xdata)
{
STACK_WIND (frame,
rot13_readv_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->readv,
- fd, size, offset);
+ fd, size, offset, flags, xdata);
return 0;
}
@@ -101,9 +92,10 @@ rot13_writev_cbk (call_frame_t *frame,
int32_t op_ret,
int32_t op_errno,
struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf);
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
return 0;
}
@@ -112,20 +104,20 @@ rot13_writev (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
struct iovec *vector,
- int32_t count,
- off_t offset,
- struct iobref *iobref)
+ int32_t count,
+ off_t offset, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
{
rot_13_private_t *priv = (rot_13_private_t *)this->private;
if (priv->encrypt_write)
rot13_iovec (vector, count);
- STACK_WIND (frame,
+ STACK_WIND (frame,
rot13_writev_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->writev,
- fd, vector, count, offset,
- iobref);
+ fd, vector, count, offset, flags,
+ iobref, xdata);
return 0;
}
@@ -136,7 +128,7 @@ init (xlator_t *this)
rot_13_private_t *priv = NULL;
if (!this->children || this->children->next) {
- gf_log ("rot13", GF_LOG_ERROR,
+ gf_log ("rot13", GF_LOG_ERROR,
"FATAL: rot13 should have exactly one child");
return -1;
}
@@ -158,6 +150,7 @@ init (xlator_t *this)
if (gf_string2boolean (data->data, &priv->encrypt_write) == -1) {
gf_log (this->name, GF_LOG_ERROR,
"encrypt-write takes only boolean options");
+ GF_FREE (priv);
return -1;
}
}
@@ -167,6 +160,7 @@ init (xlator_t *this)
if (gf_string2boolean (data->data, &priv->decrypt_read) == -1) {
gf_log (this->name, GF_LOG_ERROR,
"decrypt-read takes only boolean options");
+ GF_FREE (priv);
return -1;
}
}
@@ -194,15 +188,14 @@ struct xlator_fops fops = {
.writev = rot13_writev
};
-struct xlator_cbks cbks = {
-};
+struct xlator_cbks cbks;
struct volume_options options[] = {
- { .key = {"encrypt-write"},
+ { .key = {"encrypt-write"},
.type = GF_OPTION_TYPE_BOOL
},
- { .key = {"decrypt-read"},
- .type = GF_OPTION_TYPE_BOOL
+ { .key = {"decrypt-read"},
+ .type = GF_OPTION_TYPE_BOOL
},
{ .key = {NULL} },
};
diff --git a/xlators/encryption/rot-13/src/rot-13.h b/xlators/encryption/rot-13/src/rot-13.h
index 8ef8162ae..3e9fc19c7 100644
--- a/xlators/encryption/rot-13/src/rot-13.h
+++ b/xlators/encryption/rot-13/src/rot-13.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __ROT_13_H__
#define __ROT_13_H__
diff --git a/xlators/features/Makefile.am b/xlators/features/Makefile.am
index 809bbe510..d2f5ef192 100644
--- a/xlators/features/Makefile.am
+++ b/xlators/features/Makefile.am
@@ -1,3 +1,4 @@
-SUBDIRS = locks trash quota read-only mac-compat quiesce marker#path-converter # filter
+SUBDIRS = locks quota read-only mac-compat quiesce marker index \
+ protect compress changelog gfid-access $(GLUPY_SUBDIR) qemu-block # trash path-converter # filter
CLEANFILES =
diff --git a/xlators/features/changelog/Makefile.am b/xlators/features/changelog/Makefile.am
new file mode 100644
index 000000000..153bb6850
--- /dev/null
+++ b/xlators/features/changelog/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src lib
+
+CLEANFILES =
diff --git a/xlators/storage/bdb/Makefile.am b/xlators/features/changelog/lib/Makefile.am
index d471a3f92..a985f42a8 100644
--- a/xlators/storage/bdb/Makefile.am
+++ b/xlators/features/changelog/lib/Makefile.am
@@ -1,3 +1,3 @@
SUBDIRS = src
-CLEANFILES =
+CLEANFILES =
diff --git a/xlators/features/changelog/lib/examples/c/get-changes.c b/xlators/features/changelog/lib/examples/c/get-changes.c
new file mode 100644
index 000000000..6d0d0357d
--- /dev/null
+++ b/xlators/features/changelog/lib/examples/c/get-changes.c
@@ -0,0 +1,87 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/**
+ * get set of new changes every 10 seconds (just print the file names)
+ *
+ * Compile it using:
+ * gcc -o getchanges `pkg-config --cflags libgfchangelog` get-changes.c \
+ * `pkg-config --libs libgfchangelog`
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/un.h>
+#include <limits.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <errno.h>
+
+#include "changelog.h"
+
+#define handle_error(fn) \
+ printf ("%s (reason: %s)\n", fn, strerror (errno))
+
+int
+main (int argc, char ** argv)
+{
+ int i = 0;
+ int ret = 0;
+ ssize_t nr_changes = 0;
+ ssize_t changes = 0;
+ char fbuf[PATH_MAX] = {0,};
+
+ /* get changes for brick "/home/vshankar/export/yow/yow-1" */
+ ret = gf_changelog_register ("/home/vshankar/exports/yow/yow-1",
+ "/tmp/scratch", "/tmp/change.log", 9, 5);
+ if (ret) {
+ handle_error ("register failed");
+ goto out;
+ }
+
+ while (1) {
+ i = 0;
+ nr_changes = gf_changelog_scan ();
+ if (nr_changes < 0) {
+ handle_error ("scan(): ");
+ break;
+ }
+
+ if (nr_changes == 0)
+ goto next;
+
+ printf ("Got %ld changelog files\n", nr_changes);
+
+ while ( (changes =
+ gf_changelog_next_change (fbuf, PATH_MAX)) > 0) {
+ printf ("changelog file [%d]: %s\n", ++i, fbuf);
+
+ /* process changelog */
+ /* ... */
+ /* ... */
+ /* ... */
+ /* done processing */
+
+ ret = gf_changelog_done (fbuf);
+ if (ret)
+ handle_error ("gf_changelog_done");
+ }
+
+ if (changes == -1)
+ handle_error ("gf_changelog_next_change");
+
+ next:
+ sleep (10);
+ }
+
+ out:
+ return ret;
+}
diff --git a/xlators/features/changelog/lib/examples/python/changes.py b/xlators/features/changelog/lib/examples/python/changes.py
new file mode 100644
index 000000000..d21db8eab
--- /dev/null
+++ b/xlators/features/changelog/lib/examples/python/changes.py
@@ -0,0 +1,32 @@
+#!/usr/bin/python
+
+import os
+import sys
+import time
+import libgfchangelog
+
+cl = libgfchangelog.Changes()
+
+def get_changes(brick, scratch_dir, log_file, log_level, interval):
+ change_list = []
+ try:
+ cl.cl_register(brick, scratch_dir, log_file, log_level)
+ while True:
+ cl.cl_scan()
+ change_list = cl.cl_getchanges()
+ if change_list:
+ print change_list
+ for change in change_list:
+ print('done with %s' % (change))
+ cl.cl_done(change)
+ time.sleep(interval)
+ except OSError:
+ ex = sys.exc_info()[1]
+ print ex
+
+if __name__ == '__main__':
+ if len(sys.argv) != 5:
+ print("usage: %s <brick> <scratch-dir> <log-file> <fetch-interval>"
+ % (sys.argv[0]))
+ sys.exit(1)
+ get_changes(sys.argv[1], sys.argv[2], sys.argv[3], 9, int(sys.argv[4]))
diff --git a/xlators/features/changelog/lib/examples/python/libgfchangelog.py b/xlators/features/changelog/lib/examples/python/libgfchangelog.py
new file mode 100644
index 000000000..68ec3baf1
--- /dev/null
+++ b/xlators/features/changelog/lib/examples/python/libgfchangelog.py
@@ -0,0 +1,64 @@
+import os
+from ctypes import *
+from ctypes.util import find_library
+
+class Changes(object):
+ libgfc = CDLL(find_library("gfchangelog"), use_errno=True)
+
+ @classmethod
+ def geterrno(cls):
+ return get_errno()
+
+ @classmethod
+ def raise_oserr(cls):
+ errn = cls.geterrno()
+ raise OSError(errn, os.strerror(errn))
+
+ @classmethod
+ def _get_api(cls, call):
+ return getattr(cls.libgfc, call)
+
+ @classmethod
+ def cl_register(cls, brick, path, log_file, log_level, retries = 0):
+ ret = cls._get_api('gf_changelog_register')(brick, path,
+ log_file, log_level, retries)
+ if ret == -1:
+ cls.raise_oserr()
+
+ @classmethod
+ def cl_scan(cls):
+ ret = cls._get_api('gf_changelog_scan')()
+ if ret == -1:
+ cls.raise_oserr()
+
+ @classmethod
+ def cl_startfresh(cls):
+ ret = cls._get_api('gf_changelog_start_fresh')()
+ if ret == -1:
+ cls.raise_oserr()
+
+ @classmethod
+ def cl_getchanges(cls):
+ """ remove hardcoding for path name length """
+ def clsort(f):
+ return f.split('.')[-1]
+ changes = []
+ buf = create_string_buffer('\0', 4096)
+ call = cls._get_api('gf_changelog_next_change')
+
+ while True:
+ ret = call(buf, 4096)
+ if ret in (0, -1):
+ break;
+ changes.append(buf.raw[:ret-1])
+ if ret == -1:
+ cls.raise_oserr()
+ # cleanup tracker
+ cls.cl_startfresh()
+ return sorted(changes, key=clsort)
+
+ @classmethod
+ def cl_done(cls, clfile):
+ ret = cls._get_api('gf_changelog_done')(clfile)
+ if ret == -1:
+ cls.raise_oserr()
diff --git a/xlators/features/changelog/lib/src/Makefile.am b/xlators/features/changelog/lib/src/Makefile.am
new file mode 100644
index 000000000..775f026cf
--- /dev/null
+++ b/xlators/features/changelog/lib/src/Makefile.am
@@ -0,0 +1,37 @@
+libgfchangelog_la_CFLAGS = -Wall $(GF_CFLAGS) $(GF_DARWIN_LIBGLUSTERFS_CFLAGS) \
+ -DDATADIR=\"$(localstatedir)\"
+
+libgfchangelog_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 -fpic \
+ -I../../../src/ -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/xlators/features/changelog/src \
+ -DDATADIR=\"$(localstatedir)\"
+
+libgfchangelog_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ $(GF_GLUSTERFS_LIBS)
+
+libgfchangelog_la_LDFLAGS = $(GF_LDFLAGS) -version-info $(LIBGFCHANGELOG_LT_VERSION)
+
+libgfchangelogdir = $(includedir)/glusterfs/gfchangelog
+lib_LTLIBRARIES = libgfchangelog.la
+
+CONTRIB_BUILDDIR = $(top_builddir)/contrib
+
+libgfchangelog_la_SOURCES = gf-changelog.c gf-changelog-process.c \
+ gf-changelog-helpers.c $(CONTRIBDIR)/uuid/clear.c \
+ $(CONTRIBDIR)/uuid/copy.c $(CONTRIBDIR)/uuid/gen_uuid.c \
+ $(CONTRIBDIR)/uuid/pack.c $(CONTRIBDIR)/uuid/parse.c \
+ $(CONTRIBDIR)/uuid/unparse.c $(CONTRIBDIR)/uuid/uuid_time.c \
+ $(CONTRIBDIR)/uuid/compare.c $(CONTRIBDIR)/uuid/isnull.c \
+ $(CONTRIBDIR)/uuid/unpack.c
+
+noinst_HEADERS = gf-changelog-helpers.h $(CONTRIBDIR)/uuid/uuidd.h \
+ $(CONTRIBDIR)/uuid/uuid.h $(CONTRIBDIR)/uuid/uuidP.h \
+ $(CONTRIB_BUILDDIR)/uuid/uuid_types.h
+
+libgfchangelog_HEADERS = changelog.h
+
+CLEANFILES =
+CONFIG_CLEAN_FILES = $(CONTRIB_BUILDDIR)/uuid/uuid_types.h
+
+$(top_builddir)/libglusterfs/src/libglusterfs.la:
+ $(MAKE) -C $(top_builddir)/libglusterfs/src/ all
diff --git a/xlators/features/changelog/lib/src/changelog.h b/xlators/features/changelog/lib/src/changelog.h
new file mode 100644
index 000000000..5cddfb583
--- /dev/null
+++ b/xlators/features/changelog/lib/src/changelog.h
@@ -0,0 +1,31 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GF_CHANGELOG_H
+#define _GF_CHANGELOG_H
+
+/* API set */
+
+int
+gf_changelog_register (char *brick_path, char *scratch_dir,
+ char *log_file, int log_levl, int max_reconnects);
+ssize_t
+gf_changelog_scan ();
+
+int
+gf_changelog_start_fresh ();
+
+ssize_t
+gf_changelog_next_change (char *bufptr, size_t maxlen);
+
+int
+gf_changelog_done (char *file);
+
+#endif
diff --git a/xlators/features/changelog/lib/src/gf-changelog-helpers.c b/xlators/features/changelog/lib/src/gf-changelog-helpers.c
new file mode 100644
index 000000000..1eef8bf04
--- /dev/null
+++ b/xlators/features/changelog/lib/src/gf-changelog-helpers.c
@@ -0,0 +1,180 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "changelog-mem-types.h"
+#include "gf-changelog-helpers.h"
+
+ssize_t gf_changelog_read_path (int fd, char *buffer, size_t bufsize)
+{
+ return read (fd, buffer, bufsize);
+}
+
+size_t
+gf_changelog_write (int fd, char *buffer, size_t len)
+{
+ ssize_t size = 0;
+ size_t writen = 0;
+
+ while (writen < len) {
+ size = write (fd,
+ buffer + writen, len - writen);
+ if (size <= 0)
+ break;
+
+ writen += size;
+ }
+
+ return writen;
+}
+
+void
+gf_rfc3986_encode (unsigned char *s, char *enc, char *estr)
+{
+ for (; *s; s++) {
+ if (estr[*s])
+ sprintf(enc, "%c", estr[*s]);
+ else
+ sprintf(enc, "%%%02X", *s);
+ while (*++enc);
+ }
+}
+
+/**
+ * thread safe version of readline with buffering
+ * (taken from Unix Network Programming Volume I, W.R. Stevens)
+ *
+ * This is favoured over fgets() as we'd need to ftruncate()
+ * (see gf_changelog_scan() API) to record new changelog files.
+ * stream open functions does have a truncate like api (although
+ * that can be done via @fflush(fp), @ftruncate(fd) and @fseek(fp),
+ * but this involves mixing POSIX file descriptors and stream FILE *).
+ *
+ * NOTE: This implmentation still does work with more than one fd's
+ * used to perform gf_readline(). For this very reason it's not
+ * made a part of libglusterfs.
+ */
+
+static pthread_key_t rl_key;
+static pthread_once_t rl_once = PTHREAD_ONCE_INIT;
+
+static void
+readline_destructor (void *ptr)
+{
+ GF_FREE (ptr);
+}
+
+static void
+readline_once (void)
+{
+ pthread_key_create (&rl_key, readline_destructor);
+}
+
+static ssize_t
+my_read (read_line_t *tsd, int fd, char *ptr)
+{
+ if (tsd->rl_cnt <= 0) {
+ if ( (tsd->rl_cnt = read (fd, tsd->rl_buf, MAXLINE)) < 0 )
+ return -1;
+ else if (tsd->rl_cnt == 0)
+ return 0;
+ tsd->rl_bufptr = tsd->rl_buf;
+ }
+
+ tsd->rl_cnt--;
+ *ptr = *tsd->rl_bufptr++;
+ return 1;
+}
+
+static int
+gf_readline_init_once (read_line_t **tsd)
+{
+ if (pthread_once (&rl_once, readline_once) != 0)
+ return -1;
+
+ *tsd = pthread_getspecific (rl_key);
+ if (*tsd)
+ goto out;
+
+ *tsd = GF_CALLOC (1, sizeof (**tsd),
+ gf_changelog_mt_libgfchangelog_rl_t);
+ if (!*tsd)
+ return -1;
+
+ if (pthread_setspecific (rl_key, *tsd) != 0)
+ return -1;
+
+ out:
+ return 0;
+}
+
+ssize_t
+gf_readline (int fd, void *vptr, size_t maxlen)
+{
+ size_t n = 0;
+ size_t rc = 0;
+ char c = ' ';
+ char *ptr = NULL;
+ read_line_t *tsd = NULL;
+
+ if (gf_readline_init_once (&tsd))
+ return -1;
+
+ ptr = vptr;
+ for (n = 1; n < maxlen; n++) {
+ if ( (rc = my_read (tsd, fd, &c)) == 1 ) {
+ *ptr++ = c;
+ if (c == '\n')
+ break;
+ } else if (rc == 0) {
+ *ptr = '\0';
+ return (n - 1);
+ } else
+ return -1;
+ }
+
+ *ptr = '\0';
+ return n;
+
+}
+
+off_t
+gf_lseek (int fd, off_t offset, int whence)
+{
+ off_t off = 0;
+ read_line_t *tsd = NULL;
+
+ if (gf_readline_init_once (&tsd))
+ return -1;
+
+ if ( (off = lseek (fd, offset, whence)) == -1)
+ return -1;
+
+ tsd->rl_cnt = 0;
+ tsd->rl_bufptr = tsd->rl_buf;
+
+ return off;
+}
+
+int
+gf_ftruncate (int fd, off_t length)
+{
+ read_line_t *tsd = NULL;
+
+ if (gf_readline_init_once (&tsd))
+ return -1;
+
+ if (ftruncate (fd, 0))
+ return -1;
+
+ tsd->rl_cnt = 0;
+ tsd->rl_bufptr = tsd->rl_buf;
+
+ return 0;
+}
diff --git a/xlators/features/changelog/lib/src/gf-changelog-helpers.h b/xlators/features/changelog/lib/src/gf-changelog-helpers.h
new file mode 100644
index 000000000..7e13d9376
--- /dev/null
+++ b/xlators/features/changelog/lib/src/gf-changelog-helpers.h
@@ -0,0 +1,97 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GF_CHANGELOG_HELPERS_H
+#define _GF_CHANGELOG_HELPERS_H
+
+#include <unistd.h>
+#include <dirent.h>
+#include <limits.h>
+#include <pthread.h>
+
+#include <xlator.h>
+
+#define GF_CHANGELOG_TRACKER "tracker"
+
+#define GF_CHANGELOG_CURRENT_DIR ".current"
+#define GF_CHANGELOG_PROCESSED_DIR ".processed"
+#define GF_CHANGELOG_PROCESSING_DIR ".processing"
+
+#ifndef MAXLINE
+#define MAXLINE 4096
+#endif
+
+#define GF_CHANGELOG_FILL_BUFFER(ptr, ascii, off, len) do { \
+ memcpy (ascii + off, ptr, len); \
+ off += len; \
+ } while (0)
+
+typedef struct read_line {
+ int rl_cnt;
+ char *rl_bufptr;
+ char rl_buf[MAXLINE];
+} read_line_t;
+
+typedef struct gf_changelog {
+ xlator_t *this;
+
+ /* 'processing' directory stream */
+ DIR *gfc_dir;
+
+ /* fd to the tracker file */
+ int gfc_fd;
+
+ /* connection retries */
+ int gfc_connretries;
+
+ char gfc_sockpath[UNIX_PATH_MAX];
+
+ char gfc_brickpath[PATH_MAX];
+
+ /* socket for recieving notifications */
+ int gfc_sockfd;
+
+ char *gfc_working_dir;
+
+ /* RFC 3986 string encoding */
+ char rfc3986[256];
+
+ char gfc_current_dir[PATH_MAX];
+ char gfc_processed_dir[PATH_MAX];
+ char gfc_processing_dir[PATH_MAX];
+
+ pthread_t gfc_changelog_processor;
+} gf_changelog_t;
+
+int
+gf_changelog_notification_init (xlator_t *this, gf_changelog_t *gfc);
+
+void *
+gf_changelog_process (void *data);
+
+ssize_t
+gf_changelog_read_path (int fd, char *buffer, size_t bufsize);
+
+void
+gf_rfc3986_encode (unsigned char *s, char *enc, char *estr);
+
+size_t
+gf_changelog_write (int fd, char *buffer, size_t len);
+
+ssize_t
+gf_readline (int fd, void *vptr, size_t maxlen);
+
+int
+gf_ftruncate (int fd, off_t length);
+
+off_t
+gf_lseek (int fd, off_t offset, int whence);
+
+#endif
diff --git a/xlators/features/changelog/lib/src/gf-changelog-process.c b/xlators/features/changelog/lib/src/gf-changelog-process.c
new file mode 100644
index 000000000..3ea2700c6
--- /dev/null
+++ b/xlators/features/changelog/lib/src/gf-changelog-process.c
@@ -0,0 +1,618 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <unistd.h>
+#include <pthread.h>
+
+#include "uuid.h"
+#include "globals.h"
+#include "glusterfs.h"
+
+#include "gf-changelog-helpers.h"
+
+/* from the changelog translator */
+#include "changelog-misc.h"
+
+extern int byebye;
+
+/**
+ * number of gfid records after fop number
+ */
+int nr_gfids[] = {
+ [GF_FOP_MKNOD] = 1,
+ [GF_FOP_MKDIR] = 1,
+ [GF_FOP_UNLINK] = 1,
+ [GF_FOP_RMDIR] = 1,
+ [GF_FOP_SYMLINK] = 1,
+ [GF_FOP_RENAME] = 2,
+ [GF_FOP_LINK] = 1,
+ [GF_FOP_CREATE] = 1,
+};
+
+int nr_extra_recs[] = {
+ [GF_FOP_MKNOD] = 3,
+ [GF_FOP_MKDIR] = 3,
+ [GF_FOP_UNLINK] = 0,
+ [GF_FOP_RMDIR] = 0,
+ [GF_FOP_SYMLINK] = 0,
+ [GF_FOP_RENAME] = 0,
+ [GF_FOP_LINK] = 0,
+ [GF_FOP_CREATE] = 3,
+};
+
+static char *
+binary_to_ascii (uuid_t uuid)
+{
+ return uuid_utoa (uuid);
+}
+
+static char *
+conv_noop (char *ptr) { return ptr; }
+
+#define VERIFY_SEPARATOR(ptr, plen, perr) \
+ { \
+ if (*(ptr + plen) != '\0') { \
+ perr = 1; \
+ break; \
+ } \
+ }
+
+#define MOVER_MOVE(mover, nleft, bytes) \
+ { \
+ mover += bytes; \
+ nleft -= bytes; \
+ } \
+
+#define PARSE_GFID(mov, ptr, le, fn, perr) \
+ { \
+ VERIFY_SEPARATOR (mov, le, perr); \
+ ptr = fn (mov); \
+ if (!ptr) { \
+ perr = 1; \
+ break; \
+ } \
+ }
+
+#define FILL_AND_MOVE(pt, buf, of, mo, nl, le) \
+ { \
+ GF_CHANGELOG_FILL_BUFFER (pt, buf, of, strlen (pt)); \
+ MOVER_MOVE (mo, nl, le); \
+ }
+
+
+#define PARSE_GFID_MOVE(ptr, uuid, mover, nleft, perr) \
+ { \
+ memcpy (uuid, mover, sizeof (uuid_t)); \
+ ptr = binary_to_ascii (uuid); \
+ if (!ptr) { \
+ perr = 1; \
+ break; \
+ } \
+ MOVER_MOVE (mover, nleft, sizeof (uuid_t)); \
+ } \
+
+#define LINE_BUFSIZE 3*PATH_MAX /* enough buffer for extra chars too */
+
+/**
+ * using mmap() makes parsing easy. fgets() cannot be used here as
+ * the binary gfid could contain a line-feed (0x0A), in that case fgets()
+ * would read an incomplete line and parsing would fail. using POSIX fds
+ * would result is additional code to maintain state in case of partial
+ * reads of data (where multiple entries do not fit extirely in the buffer).
+ *
+ * mmap() gives the flexibility of pointing to an offset in the file
+ * without us worrying about reading it in memory (VM does that for us for
+ * free).
+ */
+
+static int
+gf_changelog_parse_binary (xlator_t *this,
+ gf_changelog_t *gfc, int from_fd, int to_fd,
+ size_t start_offset, struct stat *stbuf)
+
+{
+ int ret = -1;
+ off_t off = 0;
+ off_t nleft = 0;
+ uuid_t uuid = {0,};
+ char *ptr = NULL;
+ char *bname_start = NULL;
+ char *bname_end = NULL;
+ char *mover = NULL;
+ char *start = NULL;
+ char current_mover = ' ';
+ size_t blen = 0;
+ int parse_err = 0;
+ char ascii[LINE_BUFSIZE] = {0,};
+
+ nleft = stbuf->st_size;
+
+ start = (char *) mmap (NULL, nleft,
+ PROT_READ, MAP_PRIVATE, from_fd, 0);
+ if (!start) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "mmap() error (reason: %s)", strerror (errno));
+ goto out;
+ }
+
+ mover = start;
+
+ MOVER_MOVE (mover, nleft, start_offset);
+
+ while (nleft > 0) {
+
+ off = blen = 0;
+ ptr = bname_start = bname_end = NULL;
+
+ current_mover = *mover;
+
+ switch (current_mover) {
+ case 'D':
+ case 'M':
+ MOVER_MOVE (mover, nleft, 1);
+ PARSE_GFID_MOVE (ptr, uuid, mover, nleft, parse_err);
+
+ break;
+
+ case 'E':
+ MOVER_MOVE (mover, nleft, 1);
+ PARSE_GFID_MOVE (ptr, uuid, mover, nleft, parse_err);
+
+ bname_start = mover;
+ if ( (bname_end = strchr (mover, '\n')) == NULL ) {
+ parse_err = 1;
+ break;
+ }
+
+ blen = bname_end - bname_start;
+ MOVER_MOVE (mover, nleft, blen);
+
+ break;
+
+ default:
+ parse_err = 1;
+ }
+
+ if (parse_err)
+ break;
+
+ GF_CHANGELOG_FILL_BUFFER (&current_mover, ascii, off, 1);
+ GF_CHANGELOG_FILL_BUFFER (" ", ascii, off, 1);
+ GF_CHANGELOG_FILL_BUFFER (ptr, ascii, off, strlen (ptr));
+ if (blen)
+ GF_CHANGELOG_FILL_BUFFER (bname_start,
+ ascii, off, blen);
+ GF_CHANGELOG_FILL_BUFFER ("\n", ascii, off, 1);
+
+ if (gf_changelog_write (to_fd, ascii, off) != off) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "processing binary changelog failed due to "
+ " error in writing ascii change (reason: %s)",
+ strerror (errno));
+ break;
+ }
+
+ MOVER_MOVE (mover, nleft, 1);
+ }
+
+ if ( (nleft == 0) && (!parse_err))
+ ret = 0;
+
+ if (munmap (start, stbuf->st_size))
+ gf_log (this->name, GF_LOG_ERROR,
+ "munmap() error (reason: %s)", strerror (errno));
+ out:
+ return ret;
+}
+
+/**
+ * ascii decoder:
+ * - separate out one entry from another
+ * - use fop name rather than fop number
+ */
+static int
+gf_changelog_parse_ascii (xlator_t *this,
+ gf_changelog_t *gfc, int from_fd, int to_fd,
+ size_t start_offset, struct stat *stbuf)
+{
+ int ng = 0;
+ int ret = -1;
+ int fop = 0;
+ int len = 0;
+ off_t off = 0;
+ off_t nleft = 0;
+ char *ptr = NULL;
+ char *eptr = NULL;
+ char *start = NULL;
+ char *mover = NULL;
+ int parse_err = 0;
+ char current_mover = ' ';
+ char ascii[LINE_BUFSIZE] = {0,};
+ const char *fopname = NULL;
+
+ nleft = stbuf->st_size;
+
+ start = (char *) mmap (NULL, nleft,
+ PROT_READ, MAP_PRIVATE, from_fd, 0);
+ if (!start) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "mmap() error (reason: %s)", strerror (errno));
+ goto out;
+ }
+
+ mover = start;
+
+ MOVER_MOVE (mover, nleft, start_offset);
+
+ while (nleft > 0) {
+ off = 0;
+ current_mover = *mover;
+
+ GF_CHANGELOG_FILL_BUFFER (&current_mover, ascii, off, 1);
+ GF_CHANGELOG_FILL_BUFFER (" ", ascii, off, 1);
+
+ switch (current_mover) {
+ case 'D':
+ MOVER_MOVE (mover, nleft, 1);
+
+ /* target gfid */
+ PARSE_GFID (mover, ptr, UUID_CANONICAL_FORM_LEN,
+ conv_noop, parse_err);
+ FILL_AND_MOVE(ptr, ascii, off,
+ mover, nleft, UUID_CANONICAL_FORM_LEN);
+ break;
+ case 'M':
+ MOVER_MOVE (mover, nleft, 1);
+
+ /* target gfid */
+ PARSE_GFID (mover, ptr, UUID_CANONICAL_FORM_LEN,
+ conv_noop, parse_err);
+ FILL_AND_MOVE (ptr, ascii, off,
+ mover, nleft, UUID_CANONICAL_FORM_LEN);
+ FILL_AND_MOVE (" ", ascii, off, mover, nleft, 1);
+
+ /* fop */
+ len = strlen (mover);
+ VERIFY_SEPARATOR (mover, len, parse_err);
+
+ fop = atoi (mover);
+ if ( (fopname = gf_fop_list[fop]) == NULL) {
+ parse_err = 1;
+ break;
+ }
+
+ MOVER_MOVE (mover, nleft, len);
+
+ len = strlen (fopname);
+ GF_CHANGELOG_FILL_BUFFER (fopname, ascii, off, len);
+
+ break;
+
+ case 'E':
+ MOVER_MOVE (mover, nleft, 1);
+
+ /* target gfid */
+ PARSE_GFID (mover, ptr, UUID_CANONICAL_FORM_LEN,
+ conv_noop, parse_err);
+ FILL_AND_MOVE (ptr, ascii, off,
+ mover, nleft, UUID_CANONICAL_FORM_LEN);
+ FILL_AND_MOVE (" ", ascii, off,
+ mover, nleft, 1);
+
+ /* fop */
+ len = strlen (mover);
+ VERIFY_SEPARATOR (mover, len, parse_err);
+
+ fop = atoi (mover);
+ if ( (fopname = gf_fop_list[fop]) == NULL) {
+ parse_err = 1;
+ break;
+ }
+
+ MOVER_MOVE (mover, nleft, len);
+
+ len = strlen (fopname);
+ GF_CHANGELOG_FILL_BUFFER (fopname, ascii, off, len);
+
+ ng = nr_extra_recs[fop];
+ for (;ng > 0; ng--) {
+ MOVER_MOVE (mover, nleft, 1);
+ len = strlen (mover);
+ VERIFY_SEPARATOR (mover, len, parse_err);
+
+ GF_CHANGELOG_FILL_BUFFER (" ", ascii, off, 1);
+ FILL_AND_MOVE (mover, ascii,
+ off, mover, nleft, len);
+ }
+
+ /* pargfid + bname */
+ ng = nr_gfids[fop];
+ while (ng-- > 0) {
+ MOVER_MOVE (mover, nleft, 1);
+ len = strlen (mover);
+ GF_CHANGELOG_FILL_BUFFER (" ", ascii, off, 1);
+
+ PARSE_GFID (mover, ptr, len,
+ conv_noop, parse_err);
+ eptr = calloc (3, strlen (ptr));
+ if (!eptr) {
+ parse_err = 1;
+ break;
+ }
+
+ gf_rfc3986_encode ((unsigned char *) ptr,
+ eptr, gfc->rfc3986);
+ FILL_AND_MOVE (eptr, ascii, off,
+ mover, nleft, len);
+ free (eptr);
+ }
+
+ break;
+ default:
+ parse_err = 1;
+ }
+
+ if (parse_err)
+ break;
+
+ GF_CHANGELOG_FILL_BUFFER ("\n", ascii, off, 1);
+
+ if (gf_changelog_write (to_fd, ascii, off) != off) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "processing ascii changelog failed due to "
+ " error in writing change (reason: %s)",
+ strerror (errno));
+ break;
+ }
+
+ MOVER_MOVE (mover, nleft, 1);
+
+ }
+
+ if ( (nleft == 0) && (!parse_err))
+ ret = 0;
+
+ if (munmap (start, stbuf->st_size))
+ gf_log (this->name, GF_LOG_ERROR,
+ "munmap() error (reason: %s)", strerror (errno));
+
+ out:
+ return ret;
+}
+
+#define COPY_BUFSIZE 8192
+static int
+gf_changelog_copy (xlator_t *this, int from_fd, int to_fd)
+{
+ ssize_t size = 0;
+ char buffer[COPY_BUFSIZE+1] = {0,};
+
+ while (1) {
+ size = read (from_fd, buffer, COPY_BUFSIZE);
+ if (size <= 0)
+ break;
+
+ if (gf_changelog_write (to_fd,
+ buffer, size) != size) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "error processing ascii changlog");
+ size = -1;
+ break;
+ }
+ }
+
+ return (size < 0 ? -1 : 0);
+}
+
+static int
+gf_changelog_decode (xlator_t *this, gf_changelog_t *gfc, int from_fd,
+ int to_fd, struct stat *stbuf, int *zerob)
+{
+ int ret = -1;
+ int encoding = -1;
+ size_t elen = 0;
+ char buffer[1024] = {0,};
+
+ CHANGELOG_GET_ENCODING (from_fd, buffer, 1024, encoding, elen);
+ if (encoding == -1) /* unknown encoding */
+ goto out;
+
+ if (!CHANGELOG_VALID_ENCODING (encoding))
+ goto out;
+
+ if (elen == stbuf->st_size) {
+ *zerob = 1;
+ goto out;
+ }
+
+ /**
+ * start processing after the header
+ */
+ lseek (from_fd, elen, SEEK_SET);
+
+ switch (encoding) {
+ case CHANGELOG_ENCODE_BINARY:
+ /**
+ * this ideally should have been a part of changelog-encoders.c
+ * (ie. part of the changelog translator).
+ */
+ ret = gf_changelog_parse_binary (this, gfc, from_fd,
+ to_fd, elen, stbuf);
+ break;
+
+ case CHANGELOG_ENCODE_ASCII:
+ ret = gf_changelog_parse_ascii (this, gfc, from_fd,
+ to_fd, elen, stbuf);
+ break;
+ default:
+ ret = gf_changelog_copy (this, from_fd, to_fd);
+ }
+
+ out:
+ return ret;
+}
+
+static int
+gf_changelog_consume (xlator_t *this, gf_changelog_t *gfc, char *from_path)
+{
+ int ret = -1;
+ int fd1 = 0;
+ int fd2 = 0;
+ int zerob = 0;
+ struct stat stbuf = {0,};
+ char dest[PATH_MAX] = {0,};
+ char to_path[PATH_MAX] = {0,};
+
+ ret = stat (from_path, &stbuf);
+ if (ret || !S_ISREG(stbuf.st_mode)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "stat failed on changelog file: %s", from_path);
+ goto out;
+ }
+
+ fd1 = open (from_path, O_RDONLY);
+ if (fd1 < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "cannot open changelog file: %s (reason: %s)",
+ from_path, strerror (errno));
+ goto out;
+ }
+
+ (void) snprintf (to_path, PATH_MAX, "%s%s",
+ gfc->gfc_current_dir, basename (from_path));
+ (void) snprintf (dest, PATH_MAX, "%s%s",
+ gfc->gfc_processing_dir, basename (from_path));
+
+ fd2 = open (to_path, O_CREAT | O_TRUNC | O_RDWR,
+ S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
+ if (fd2 < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "cannot create ascii changelog file %s (reason %s)",
+ to_path, strerror (errno));
+ goto close_fd;
+ } else {
+ ret = gf_changelog_decode (this, gfc, fd1,
+ fd2, &stbuf, &zerob);
+
+ close (fd2);
+
+ if (!ret) {
+ /* move it to processing on a successfull
+ decode */
+ ret = rename (to_path, dest);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "error moving %s to processing dir"
+ " (reason: %s)", to_path,
+ strerror (errno));
+ }
+
+ /* remove it from .current if it's an empty file */
+ if (zerob) {
+ ret = unlink (to_path);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not unlink %s (reason: %s",
+ to_path, strerror (errno));
+ }
+ }
+
+ close_fd:
+ close (fd1);
+
+ out:
+ return ret;
+}
+
+static char *
+gf_changelog_ext_change (xlator_t *this,
+ gf_changelog_t *gfc, char *path, size_t readlen)
+{
+ int alo = 0;
+ int ret = 0;
+ size_t len = 0;
+ char *buf = NULL;
+
+ buf = path;
+ while (len < readlen) {
+ if (*buf == '\0') {
+ alo = 1;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "processing changelog: %s", path);
+ ret = gf_changelog_consume (this, gfc, path);
+ }
+
+ if (ret)
+ break;
+
+ len++; buf++;
+ if (alo) {
+ alo = 0;
+ path = buf;
+ }
+ }
+
+ return (ret) ? NULL : path;
+}
+
+void *
+gf_changelog_process (void *data)
+{
+ ssize_t len = 0;
+ ssize_t offlen = 0;
+ xlator_t *this = NULL;
+ char *sbuf = NULL;
+ gf_changelog_t *gfc = NULL;
+ char from_path[PATH_MAX] = {0,};
+
+ gfc = (gf_changelog_t *) data;
+ this = gfc->this;
+
+ pthread_detach (pthread_self());
+
+ for (;;) {
+ len = gf_changelog_read_path (gfc->gfc_sockfd,
+ from_path + offlen,
+ PATH_MAX - offlen);
+ if (len < 0)
+ continue; /* ignore it for now */
+
+ if (len == 0) { /* close() from the changelog translator */
+ gf_log (this->name, GF_LOG_INFO, "close from changelog"
+ " notification translator.");
+
+ if (gfc->gfc_connretries != 1) {
+ if (!gf_changelog_notification_init(this, gfc))
+ continue;
+ }
+
+ byebye = 1;
+ break;
+ }
+
+ len += offlen;
+ sbuf = gf_changelog_ext_change (this, gfc, from_path, len);
+ if (!sbuf) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not extract changelog filename");
+ continue;
+ }
+
+ offlen = 0;
+ if (sbuf != (from_path + len)) {
+ offlen = from_path + len - sbuf;
+ memmove (from_path, sbuf, offlen);
+ }
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "byebye (%d) from processing thread...", byebye);
+ return NULL;
+}
diff --git a/xlators/features/changelog/lib/src/gf-changelog.c b/xlators/features/changelog/lib/src/gf-changelog.c
new file mode 100644
index 000000000..4b2b25ad5
--- /dev/null
+++ b/xlators/features/changelog/lib/src/gf-changelog.c
@@ -0,0 +1,516 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <errno.h>
+#include <dirent.h>
+#include <stddef.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <string.h>
+
+#include "globals.h"
+#include "glusterfs.h"
+#include "logging.h"
+
+#include "gf-changelog-helpers.h"
+
+/* from the changelog translator */
+#include "changelog-misc.h"
+#include "changelog-mem-types.h"
+
+int byebye = 0;
+
+static void
+gf_changelog_cleanup (gf_changelog_t *gfc)
+{
+ /* socket */
+ if (gfc->gfc_sockfd != -1)
+ close (gfc->gfc_sockfd);
+ /* tracker fd */
+ if (gfc->gfc_fd != -1)
+ close (gfc->gfc_fd);
+ /* processing dir */
+ if (gfc->gfc_dir)
+ closedir (gfc->gfc_dir);
+
+ if (gfc->gfc_working_dir)
+ free (gfc->gfc_working_dir); /* allocated by realpath */
+}
+
+void
+__attribute__ ((constructor)) gf_changelog_ctor (void)
+{
+ glusterfs_ctx_t *ctx = NULL;
+
+ ctx = glusterfs_ctx_new ();
+ if (!ctx)
+ return;
+
+ if (glusterfs_globals_init (ctx)) {
+ free (ctx);
+ ctx = NULL;
+ return;
+ }
+
+ THIS->ctx = ctx;
+}
+
+void
+__attribute__ ((destructor)) gf_changelog_dtor (void)
+{
+ xlator_t *this = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ gf_changelog_t *gfc = NULL;
+
+ this = THIS;
+ if (!this)
+ return;
+
+ ctx = this->ctx;
+ gfc = this->private;
+
+ if (gfc) {
+ gf_changelog_cleanup (gfc);
+ GF_FREE (gfc);
+ }
+
+ if (ctx) {
+ pthread_mutex_destroy (&ctx->lock);
+ free (ctx);
+ ctx = NULL;
+ }
+}
+
+
+static int
+gf_changelog_open_dirs (gf_changelog_t *gfc)
+{
+ int ret = -1;
+ DIR *dir = NULL;
+ int tracker_fd = 0;
+ char tracker_path[PATH_MAX] = {0,};
+
+ (void) snprintf (gfc->gfc_current_dir, PATH_MAX,
+ "%s/"GF_CHANGELOG_CURRENT_DIR"/",
+ gfc->gfc_working_dir);
+ ret = mkdir_p (gfc->gfc_current_dir, 0600, _gf_false);
+ if (ret)
+ goto out;
+
+ (void) snprintf (gfc->gfc_processed_dir, PATH_MAX,
+ "%s/"GF_CHANGELOG_PROCESSED_DIR"/",
+ gfc->gfc_working_dir);
+ ret = mkdir_p (gfc->gfc_processed_dir, 0600, _gf_false);
+ if (ret)
+ goto out;
+
+ (void) snprintf (gfc->gfc_processing_dir, PATH_MAX,
+ "%s/"GF_CHANGELOG_PROCESSING_DIR"/",
+ gfc->gfc_working_dir);
+ ret = mkdir_p (gfc->gfc_processing_dir, 0600, _gf_false);
+ if (ret)
+ goto out;
+
+ dir = opendir (gfc->gfc_processing_dir);
+ if (!dir) {
+ gf_log ("", GF_LOG_ERROR,
+ "opendir() error [reason: %s]", strerror (errno));
+ goto out;
+ }
+
+ gfc->gfc_dir = dir;
+
+ (void) snprintf (tracker_path, PATH_MAX,
+ "%s/"GF_CHANGELOG_TRACKER, gfc->gfc_working_dir);
+
+ tracker_fd = open (tracker_path, O_CREAT | O_APPEND | O_RDWR,
+ S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
+ if (tracker_fd < 0) {
+ closedir (gfc->gfc_dir);
+ ret = -1;
+ goto out;
+ }
+
+ gfc->gfc_fd = tracker_fd;
+ ret = 0;
+ out:
+ return ret;
+}
+
+int
+gf_changelog_notification_init (xlator_t *this, gf_changelog_t *gfc)
+{
+ int ret = 0;
+ int len = 0;
+ int tries = 0;
+ int sockfd = 0;
+ struct sockaddr_un remote;
+
+ this = gfc->this;
+
+ if (gfc->gfc_sockfd != -1) {
+ gf_log (this->name, GF_LOG_INFO,
+ "Reconnecting...");
+ close (gfc->gfc_sockfd);
+ }
+
+ sockfd = socket (AF_UNIX, SOCK_STREAM, 0);
+ if (sockfd < 0) {
+ ret = -1;
+ goto out;
+ }
+
+ CHANGELOG_MAKE_SOCKET_PATH (gfc->gfc_brickpath,
+ gfc->gfc_sockpath, UNIX_PATH_MAX);
+ gf_log (this->name, GF_LOG_INFO,
+ "connecting to changelog socket: %s (brick: %s)",
+ gfc->gfc_sockpath, gfc->gfc_brickpath);
+
+ remote.sun_family = AF_UNIX;
+ strcpy (remote.sun_path, gfc->gfc_sockpath);
+
+ len = strlen (remote.sun_path) + sizeof (remote.sun_family);
+
+ while (tries < gfc->gfc_connretries) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "connection attempt %d/%d...",
+ tries + 1, gfc->gfc_connretries);
+
+ /* initiate a connect */
+ if (connect (sockfd, (struct sockaddr *) &remote, len) == 0) {
+ gfc->gfc_sockfd = sockfd;
+ break;
+ }
+
+ tries++;
+ sleep (2);
+ }
+
+ if (tries == gfc->gfc_connretries) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not connect to changelog socket!"
+ " bailing out...");
+ close (sockfd);
+ ret = -1;
+ } else
+ gf_log (this->name, GF_LOG_INFO,
+ "connection successful");
+
+ out:
+ return ret;
+}
+
+int
+gf_changelog_done (char *file)
+{
+ int ret = -1;
+ char *buffer = NULL;
+ xlator_t *this = NULL;
+ gf_changelog_t *gfc = NULL;
+ char to_path[PATH_MAX] = {0,};
+
+ errno = EINVAL;
+
+ this = THIS;
+ if (!this)
+ goto out;
+
+ gfc = (gf_changelog_t *) this->private;
+ if (!gfc)
+ goto out;
+
+ if (!file || !strlen (file))
+ goto out;
+
+ /* make sure 'file' is inside ->gfc_working_dir */
+ buffer = realpath (file, NULL);
+ if (!buffer)
+ goto out;
+
+ if (strncmp (gfc->gfc_working_dir,
+ buffer, strlen (gfc->gfc_working_dir)))
+ goto out;
+
+ (void) snprintf (to_path, PATH_MAX, "%s%s",
+ gfc->gfc_processed_dir, basename (buffer));
+ gf_log (this->name, GF_LOG_DEBUG,
+ "moving %s to processed directory", file);
+ ret = rename (buffer, to_path);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "cannot move %s to %s (reason: %s)",
+ file, to_path, strerror (errno));
+ goto out;
+ }
+
+ ret = 0;
+
+ out:
+ if (buffer)
+ free (buffer); /* allocated by realpath() */
+ return ret;
+}
+
+/**
+ * @API
+ * for a set of changelogs, start from the begining
+ */
+int
+gf_changelog_start_fresh ()
+{
+ xlator_t *this = NULL;
+ gf_changelog_t *gfc = NULL;
+
+ this = THIS;
+ if (!this)
+ goto out;
+
+ errno = EINVAL;
+
+ gfc = (gf_changelog_t *) this->private;
+ if (!gfc)
+ goto out;
+
+ if (gf_ftruncate (gfc->gfc_fd, 0))
+ goto out;
+
+ return 0;
+
+ out:
+ return -1;
+}
+
+/**
+ * @API
+ * return the next changelog file entry. zero means all chanelogs
+ * consumed.
+ */
+ssize_t
+gf_changelog_next_change (char *bufptr, size_t maxlen)
+{
+ ssize_t size = 0;
+ int tracker_fd = 0;
+ xlator_t *this = NULL;
+ gf_changelog_t *gfc = NULL;
+ char buffer[PATH_MAX] = {0,};
+
+ errno = EINVAL;
+
+ this = THIS;
+ if (!this)
+ goto out;
+
+ gfc = (gf_changelog_t *) this->private;
+ if (!gfc)
+ goto out;
+
+ tracker_fd = gfc->gfc_fd;
+
+ size = gf_readline (tracker_fd, buffer, maxlen);
+ if (size < 0)
+ goto out;
+ if (size == 0)
+ return 0;
+
+ memcpy (bufptr, buffer, size - 1);
+ *(buffer + size) = '\0';
+
+ return size;
+
+ out:
+ return -1;
+}
+
+/**
+ * @API
+ * gf_changelog_scan() - scan and generate a list of change entries
+ *
+ * calling this api multiple times (without calling gf_changlog_done())
+ * would result new changelogs(s) being refreshed in the tracker file.
+ * This call also acts as a cancellation point for the consumer.
+ */
+ssize_t
+gf_changelog_scan ()
+{
+ int ret = 0;
+ int tracker_fd = 0;
+ size_t len = 0;
+ size_t off = 0;
+ xlator_t *this = NULL;
+ size_t nr_entries = 0;
+ gf_changelog_t *gfc = NULL;
+ struct dirent *entryp = NULL;
+ struct dirent *result = NULL;
+ char buffer[PATH_MAX] = {0,};
+
+ this = THIS;
+ if (!this)
+ goto out;
+
+ gfc = (gf_changelog_t *) this->private;
+ if (!gfc)
+ goto out;
+
+ /**
+ * do we need to protect 'byebye' with locks? worst, the
+ * consumer would get notified during next scan().
+ */
+ if (byebye) {
+ errno = ECONNREFUSED;
+ goto out;
+ }
+
+ errno = EINVAL;
+
+ tracker_fd = gfc->gfc_fd;
+
+ if (gf_ftruncate (tracker_fd, 0))
+ goto out;
+
+ len = offsetof(struct dirent, d_name)
+ + pathconf(gfc->gfc_processing_dir, _PC_NAME_MAX) + 1;
+ entryp = GF_CALLOC (1, len,
+ gf_changelog_mt_libgfchangelog_dirent_t);
+ if (!entryp)
+ goto out;
+
+ rewinddir (gfc->gfc_dir);
+ while (1) {
+ ret = readdir_r (gfc->gfc_dir, entryp, &result);
+ if (ret || !result)
+ break;
+
+ if ( !strcmp (basename (entryp->d_name), ".")
+ || !strcmp (basename (entryp->d_name), "..") )
+ continue;
+
+ nr_entries++;
+
+ GF_CHANGELOG_FILL_BUFFER (gfc->gfc_processing_dir,
+ buffer, off,
+ strlen (gfc->gfc_processing_dir));
+ GF_CHANGELOG_FILL_BUFFER (entryp->d_name, buffer,
+ off, strlen (entryp->d_name));
+ GF_CHANGELOG_FILL_BUFFER ("\n", buffer, off, 1);
+
+ if (gf_changelog_write (tracker_fd, buffer, off) != off) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "error writing changelog filename"
+ " to tracker file");
+ break;
+ }
+ off = 0;
+ }
+
+ GF_FREE (entryp);
+
+ if (!result) {
+ if (gf_lseek (tracker_fd, 0, SEEK_SET) != -1)
+ return nr_entries;
+ }
+ out:
+ return -1;
+}
+
+/**
+ * @API
+ * gf_changelog_register() - register a client for updates.
+ */
+int
+gf_changelog_register (char *brick_path, char *scratch_dir,
+ char *log_file, int log_level, int max_reconnects)
+{
+ int i = 0;
+ int ret = -1;
+ int errn = 0;
+ xlator_t *this = NULL;
+ gf_changelog_t *gfc = NULL;
+
+ this = THIS;
+ if (!this->ctx)
+ goto out;
+
+ errno = ENOMEM;
+
+ gfc = GF_CALLOC (1, sizeof (*gfc),
+ gf_changelog_mt_libgfchangelog_t);
+ if (!gfc)
+ goto out;
+
+ gfc->this = this;
+
+ gfc->gfc_dir = NULL;
+ gfc->gfc_fd = gfc->gfc_sockfd = -1;
+
+ gfc->gfc_working_dir = realpath (scratch_dir, NULL);
+ if (!gfc->gfc_working_dir) {
+ errn = errno;
+ goto cleanup;
+ }
+
+ ret = gf_changelog_open_dirs (gfc);
+ if (ret) {
+ errn = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not create entries in scratch dir");
+ goto cleanup;
+ }
+
+ /* passing ident as NULL means to use default ident for syslog */
+ if (gf_log_init (this->ctx, log_file, NULL))
+ goto cleanup;
+
+ gf_log_set_loglevel ((log_level == -1) ? GF_LOG_INFO :
+ log_level);
+
+ gfc->gfc_connretries = (max_reconnects <= 0) ? 1 : max_reconnects;
+ (void) strncpy (gfc->gfc_brickpath, brick_path, PATH_MAX);
+
+ ret = gf_changelog_notification_init (this, gfc);
+ if (ret) {
+ errn = errno;
+ goto cleanup;
+ }
+
+ ret = gf_thread_create (&gfc->gfc_changelog_processor,
+ NULL, gf_changelog_process, gfc);
+ if (ret) {
+ errn = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "error creating changelog processor thread"
+ " new changes won't be recorded!!!");
+ goto cleanup;
+ }
+
+ for (; i < 256; i++) {
+ gfc->rfc3986[i] =
+ (isalnum(i) || i == '~' ||
+ i == '-' || i == '.' || i == '_') ? i : 0;
+ }
+
+ ret = 0;
+ this->private = gfc;
+
+ goto out;
+
+ cleanup:
+ gf_changelog_cleanup (gfc);
+ GF_FREE (gfc);
+ this->private = NULL;
+ errno = errn;
+
+ out:
+ return ret;
+}
diff --git a/xlators/features/changelog/src/Makefile.am b/xlators/features/changelog/src/Makefile.am
new file mode 100644
index 000000000..54c21ac21
--- /dev/null
+++ b/xlators/features/changelog/src/Makefile.am
@@ -0,0 +1,19 @@
+xlator_LTLIBRARIES = changelog.la
+
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+noinst_HEADERS = changelog-helpers.h changelog-mem-types.h changelog-rt.h \
+ changelog-misc.h changelog-encoders.h changelog-notifier.h
+
+changelog_la_LDFLAGS = -module -avoid-version
+
+changelog_la_SOURCES = changelog.c changelog-rt.c changelog-helpers.c \
+ changelog-encoders.c changelog-notifier.c
+changelog_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src -fPIC -D_FILE_OFFSET_BITS=64 \
+ -D_GNU_SOURCE -D$(GF_HOST_OS) -shared -nostartfiles -DDATADIR=\"$(localstatedir)\"
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/changelog/src/changelog-encoders.c b/xlators/features/changelog/src/changelog-encoders.c
new file mode 100644
index 000000000..08626ee2f
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-encoders.c
@@ -0,0 +1,197 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "changelog-encoders.h"
+
+size_t
+entry_fn (void *data, char *buffer, gf_boolean_t encode)
+{
+ char *tmpbuf = NULL;
+ size_t bufsz = 0;
+ struct changelog_entry_fields *ce = NULL;
+
+ ce = (struct changelog_entry_fields *) data;
+
+ if (encode) {
+ tmpbuf = uuid_utoa (ce->cef_uuid);
+ CHANGELOG_FILL_BUFFER (buffer, bufsz, tmpbuf, strlen (tmpbuf));
+ } else {
+ CHANGELOG_FILL_BUFFER (buffer, bufsz,
+ ce->cef_uuid, sizeof (uuid_t));
+ }
+
+ CHANGELOG_FILL_BUFFER (buffer, bufsz, "/", 1);
+ CHANGELOG_FILL_BUFFER (buffer, bufsz,
+ ce->cef_bname, strlen (ce->cef_bname));
+ return bufsz;
+}
+
+size_t
+fop_fn (void *data, char *buffer, gf_boolean_t encode)
+{
+ char buf[10] = {0,};
+ size_t bufsz = 0;
+ glusterfs_fop_t fop = 0;
+
+ fop = *(glusterfs_fop_t *) data;
+
+ if (encode) {
+ (void) snprintf (buf, sizeof (buf), "%d", fop);
+ CHANGELOG_FILL_BUFFER (buffer, bufsz, buf, strlen (buf));
+ } else
+ CHANGELOG_FILL_BUFFER (buffer, bufsz, &fop, sizeof (fop));
+
+ return bufsz;
+}
+
+size_t
+number_fn (void *data, char *buffer, gf_boolean_t encode)
+{
+ size_t bufsz = 0;
+ unsigned int nr = 0;
+ char buf[20] = {0,};
+
+ nr = *(unsigned int *) data;
+
+ if (encode) {
+ (void) snprintf (buf, sizeof (buf), "%u", nr);
+ CHANGELOG_FILL_BUFFER (buffer, bufsz, buf, strlen (buf));
+ } else
+ CHANGELOG_FILL_BUFFER (buffer, bufsz, &nr, sizeof (unsigned int));
+
+ return bufsz;
+}
+
+void
+entry_free_fn (void *data)
+{
+ changelog_opt_t *co = data;
+
+ if (!co)
+ return;
+
+ GF_FREE (co->co_entry.cef_bname);
+}
+
+/**
+ * try to write all data in one shot
+ */
+
+static inline void
+changelog_encode_write_xtra (changelog_log_data_t *cld,
+ char *buffer, size_t *off, gf_boolean_t encode)
+{
+ int i = 0;
+ size_t offset = 0;
+ void *data = NULL;
+ changelog_opt_t *co = NULL;
+
+ offset = *off;
+
+ co = (changelog_opt_t *) cld->cld_ptr;
+
+ for (; i < cld->cld_xtra_records; i++, co++) {
+ CHANGELOG_FILL_BUFFER (buffer, offset, "\0", 1);
+
+ switch (co->co_type) {
+ case CHANGELOG_OPT_REC_FOP:
+ data = &co->co_fop;
+ break;
+ case CHANGELOG_OPT_REC_ENTRY:
+ data = &co->co_entry;
+ break;
+ case CHANGELOG_OPT_REC_UINT32:
+ data = &co->co_uint32;
+ break;
+ }
+
+ if (co->co_convert)
+ offset += co->co_convert (data,
+ buffer + offset, encode);
+ else /* no coversion: write it out as it is */
+ CHANGELOG_FILL_BUFFER (buffer, offset,
+ data, co->co_len);
+ }
+
+ *off = offset;
+}
+
+int
+changelog_encode_ascii (xlator_t *this, changelog_log_data_t *cld)
+{
+ size_t off = 0;
+ size_t gfid_len = 0;
+ char *gfid_str = NULL;
+ char *buffer = NULL;
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+
+ gfid_str = uuid_utoa (cld->cld_gfid);
+ gfid_len = strlen (gfid_str);
+
+ /* extra bytes for decorations */
+ buffer = alloca (gfid_len + cld->cld_ptr_len + 10);
+ CHANGELOG_STORE_ASCII (priv, buffer,
+ off, gfid_str, gfid_len, cld);
+
+ if (cld->cld_xtra_records)
+ changelog_encode_write_xtra (cld, buffer, &off, _gf_true);
+
+ CHANGELOG_FILL_BUFFER (buffer, off, "\0", 1);
+
+ return changelog_write_change (priv, buffer, off);
+}
+
+int
+changelog_encode_binary (xlator_t *this, changelog_log_data_t *cld)
+{
+ size_t off = 0;
+ char *buffer = NULL;
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+
+ /* extra bytes for decorations */
+ buffer = alloca (sizeof (uuid_t) + cld->cld_ptr_len + 10);
+ CHANGELOG_STORE_BINARY (priv, buffer, off, cld->cld_gfid, cld);
+
+ if (cld->cld_xtra_records)
+ changelog_encode_write_xtra (cld, buffer, &off, _gf_false);
+
+ CHANGELOG_FILL_BUFFER (buffer, off, "\0", 1);
+
+ return changelog_write_change (priv, buffer, off);
+}
+
+static struct changelog_encoder
+cb_encoder[] = {
+ [CHANGELOG_ENCODE_BINARY] =
+ {
+ .encoder = CHANGELOG_ENCODE_BINARY,
+ .encode = changelog_encode_binary,
+ },
+ [CHANGELOG_ENCODE_ASCII] =
+ {
+ .encoder = CHANGELOG_ENCODE_ASCII,
+ .encode = changelog_encode_ascii,
+ },
+};
+
+void
+changelog_encode_change( changelog_priv_t * priv)
+{
+ priv->ce = &cb_encoder[priv->encode_mode];
+}
diff --git a/xlators/features/changelog/src/changelog-encoders.h b/xlators/features/changelog/src/changelog-encoders.h
new file mode 100644
index 000000000..c5dcc8a77
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-encoders.h
@@ -0,0 +1,48 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CHANGELOG_ENCODERS_H
+#define _CHANGELOG_ENCODERS_H
+
+#include "xlator.h"
+#include "defaults.h"
+
+#include "changelog-helpers.h"
+
+#define CHANGELOG_STORE_ASCII(priv, buf, off, gfid, gfid_len, cld) do { \
+ CHANGELOG_FILL_BUFFER (buffer, off, \
+ priv->maps[cld->cld_type], 1); \
+ CHANGELOG_FILL_BUFFER (buffer, \
+ off, gfid, gfid_len); \
+ } while (0)
+
+#define CHANGELOG_STORE_BINARY(priv, buf, off, gfid, cld) do { \
+ CHANGELOG_FILL_BUFFER (buffer, off, \
+ priv->maps[cld->cld_type], 1); \
+ CHANGELOG_FILL_BUFFER (buffer, \
+ off, gfid, sizeof (uuid_t)); \
+ } while (0)
+
+size_t
+entry_fn (void *data, char *buffer, gf_boolean_t encode);
+size_t
+fop_fn (void *data, char *buffer, gf_boolean_t encode);
+size_t
+number_fn (void *data, char *buffer, gf_boolean_t encode);
+void
+entry_free_fn (void *data);
+int
+changelog_encode_binary (xlator_t *, changelog_log_data_t *);
+int
+changelog_encode_ascii (xlator_t *, changelog_log_data_t *);
+void
+changelog_encode_change(changelog_priv_t *);
+
+#endif /* _CHANGELOG_ENCODERS_H */
diff --git a/xlators/features/changelog/src/changelog-helpers.c b/xlators/features/changelog/src/changelog-helpers.c
new file mode 100644
index 000000000..91c43a16c
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-helpers.c
@@ -0,0 +1,696 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "defaults.h"
+#include "logging.h"
+#include "iobuf.h"
+
+#include "changelog-helpers.h"
+#include "changelog-mem-types.h"
+
+#include "changelog-encoders.h"
+#include <pthread.h>
+
+void
+changelog_thread_cleanup (xlator_t *this, pthread_t thr_id)
+{
+ int ret = 0;
+ void *retval = NULL;
+
+ /* send a cancel request to the thread */
+ ret = pthread_cancel (thr_id);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not cancel thread (reason: %s)",
+ strerror (errno));
+ goto out;
+ }
+
+ ret = pthread_join (thr_id, &retval);
+ if (ret || (retval != PTHREAD_CANCELED)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "cancel request not adhered as expected"
+ " (reason: %s)", strerror (errno));
+ }
+
+ out:
+ return;
+}
+
+inline void *
+changelog_get_usable_buffer (changelog_local_t *local)
+{
+ changelog_log_data_t *cld = NULL;
+
+ if (!local)
+ return NULL;
+
+ cld = &local->cld;
+ if (!cld->cld_iobuf)
+ return NULL;
+
+ return cld->cld_iobuf->ptr;
+}
+
+inline void
+changelog_set_usable_record_and_length (changelog_local_t *local,
+ size_t len, int xr)
+{
+ changelog_log_data_t *cld = NULL;
+
+ cld = &local->cld;
+
+ cld->cld_ptr_len = len;
+ cld->cld_xtra_records = xr;
+}
+
+void
+changelog_local_cleanup (xlator_t *xl, changelog_local_t *local)
+{
+ int i = 0;
+ changelog_opt_t *co = NULL;
+ changelog_log_data_t *cld = NULL;
+
+ if (!local)
+ return;
+
+ cld = &local->cld;
+
+ /* cleanup dynamic allocation for extra records */
+ if (cld->cld_xtra_records) {
+ co = (changelog_opt_t *) cld->cld_ptr;
+ for (; i < cld->cld_xtra_records; i++, co++)
+ if (co->co_free)
+ co->co_free (co);
+ }
+
+ CHANGELOG_IOBUF_UNREF (cld->cld_iobuf);
+
+ if (local->inode)
+ inode_unref (local->inode);
+
+ mem_put (local);
+}
+
+inline int
+changelog_write (int fd, char *buffer, size_t len)
+{
+ ssize_t size = 0;
+ size_t writen = 0;
+
+ while (writen < len) {
+ size = write (fd,
+ buffer + writen, len - writen);
+ if (size <= 0)
+ break;
+
+ writen += size;
+ }
+
+ return (writen != len);
+}
+
+static int
+changelog_rollover_changelog (xlator_t *this,
+ changelog_priv_t *priv, unsigned long ts)
+{
+ int ret = -1;
+ int notify = 0;
+ char *bname = NULL;
+ char ofile[PATH_MAX] = {0,};
+ char nfile[PATH_MAX] = {0,};
+
+ if (priv->changelog_fd != -1) {
+ close (priv->changelog_fd);
+ priv->changelog_fd = -1;
+ }
+
+ (void) snprintf (ofile, PATH_MAX,
+ "%s/"CHANGELOG_FILE_NAME, priv->changelog_dir);
+ (void) snprintf (nfile, PATH_MAX,
+ "%s/"CHANGELOG_FILE_NAME".%lu",
+ priv->changelog_dir, ts);
+
+ ret = rename (ofile, nfile);
+ if (!ret)
+ notify = 1;
+
+ if (ret && (errno == ENOENT)) {
+ ret = 0;
+ }
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "error renaming %s -> %s (reason %s)",
+ ofile, nfile, strerror (errno));
+ }
+
+ if (notify) {
+ bname = basename (nfile);
+ gf_log (this->name, GF_LOG_DEBUG, "notifying: %s", bname);
+ ret = changelog_write (priv->wfd, bname, strlen (bname) + 1);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to send file name to notify thread"
+ " (reason: %s)", strerror (errno));
+ }
+ }
+
+ return ret;
+}
+
+int
+changelog_open (xlator_t *this,
+ changelog_priv_t *priv)
+{
+ int fd = 0;
+ int ret = -1;
+ int flags = 0;
+ char buffer[1024] = {0,};
+ char changelog_path[PATH_MAX] = {0,};
+
+ (void) snprintf (changelog_path, PATH_MAX,
+ "%s/"CHANGELOG_FILE_NAME,
+ priv->changelog_dir);
+
+ flags |= (O_CREAT | O_RDWR);
+ if (priv->fsync_interval == 0)
+ flags |= O_SYNC;
+
+ fd = open (changelog_path, flags,
+ S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
+ if (fd < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "unable to open/create changelog file %s"
+ " (reason: %s). change-logging will be"
+ " inactive", changelog_path, strerror (errno));
+ goto out;
+ }
+
+ priv->changelog_fd = fd;
+
+ (void) snprintf (buffer, 1024, CHANGELOG_HEADER,
+ CHANGELOG_VERSION_MAJOR,
+ CHANGELOG_VERSION_MINOR,
+ priv->ce->encoder);
+ ret = changelog_write_change (priv, buffer, strlen (buffer));
+ if (ret) {
+ close (priv->changelog_fd);
+ priv->changelog_fd = -1;
+ goto out;
+ }
+
+ ret = 0;
+
+ out:
+ return ret;
+}
+
+int
+changelog_start_next_change (xlator_t *this,
+ changelog_priv_t *priv,
+ unsigned long ts, gf_boolean_t finale)
+{
+ int ret = -1;
+
+ ret = changelog_rollover_changelog (this, priv, ts);
+
+ if (!ret && !finale)
+ ret = changelog_open (this, priv);
+
+ return ret;
+}
+
+/**
+ * return the length of entry
+ */
+inline size_t
+changelog_entry_length ()
+{
+ return sizeof (changelog_log_data_t);
+}
+
+int
+changelog_fill_rollover_data (changelog_log_data_t *cld, gf_boolean_t is_last)
+{
+ struct timeval tv = {0,};
+
+ cld->cld_type = CHANGELOG_TYPE_ROLLOVER;
+
+ if (gettimeofday (&tv, NULL))
+ return -1;
+
+ cld->cld_roll_time = (unsigned long) tv.tv_sec;
+ cld->cld_finale = is_last;
+ return 0;
+}
+
+int
+changelog_write_change (changelog_priv_t *priv, char *buffer, size_t len)
+{
+ return changelog_write (priv->changelog_fd, buffer, len);
+}
+
+inline int
+changelog_handle_change (xlator_t *this,
+ changelog_priv_t *priv, changelog_log_data_t *cld)
+{
+ int ret = 0;
+
+ if (CHANGELOG_TYPE_IS_ROLLOVER (cld->cld_type)) {
+ changelog_encode_change(priv);
+ ret = changelog_start_next_change (this, priv,
+ cld->cld_roll_time,
+ cld->cld_finale);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Problem rolling over changelog(s)");
+ goto out;
+ }
+
+ /**
+ * case when there is reconfigure done (disabling changelog) and there
+ * are still fops that have updates in prgress.
+ */
+ if (priv->changelog_fd == -1)
+ return 0;
+
+ if (CHANGELOG_TYPE_IS_FSYNC (cld->cld_type)) {
+ ret = fsync (priv->changelog_fd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "fsync failed (reason: %s)",
+ strerror (errno));
+ }
+ goto out;
+ }
+
+ ret = priv->ce->encode (this, cld);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "error writing changelog to disk");
+ }
+
+ out:
+ return ret;
+}
+
+changelog_local_t *
+changelog_local_init (xlator_t *this, inode_t *inode,
+ uuid_t gfid, int xtra_records,
+ gf_boolean_t update_flag)
+{
+ changelog_local_t *local = NULL;
+ struct iobuf *iobuf = NULL;
+
+ /**
+ * We relax the presence of inode if @update_flag is true.
+ * The caller (implmentation of the fop) needs to be careful to
+ * not blindly use local->inode.
+ */
+ if (!update_flag && !inode) {
+ gf_log_callingfn (this->name, GF_LOG_WARNING,
+ "inode needed for version checking !!!");
+ goto out;
+ }
+
+ if (xtra_records) {
+ iobuf = iobuf_get2 (this->ctx->iobuf_pool,
+ xtra_records * CHANGELOG_OPT_RECORD_LEN);
+ if (!iobuf)
+ goto out;
+ }
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ CHANGELOG_IOBUF_UNREF (iobuf);
+ goto out;
+ }
+
+ local->update_no_check = update_flag;
+
+ uuid_copy (local->cld.cld_gfid, gfid);
+
+ local->cld.cld_iobuf = iobuf;
+ local->cld.cld_xtra_records = 0; /* set by the caller */
+
+ if (inode)
+ local->inode = inode_ref (inode);
+
+ out:
+ return local;
+}
+
+int
+changelog_forget (xlator_t *this, inode_t *inode)
+{
+ uint64_t ctx_addr = 0;
+ changelog_inode_ctx_t *ctx = NULL;
+
+ inode_ctx_del (inode, this, &ctx_addr);
+ if (!ctx_addr)
+ return 0;
+
+ ctx = (changelog_inode_ctx_t *) (long) ctx_addr;
+ GF_FREE (ctx);
+
+ return 0;
+}
+
+int
+changelog_inject_single_event (xlator_t *this,
+ changelog_priv_t *priv,
+ changelog_log_data_t *cld)
+{
+ return priv->cd.dispatchfn (this, priv, priv->cd.cd_data, cld, NULL);
+}
+
+/**
+ * TODO: these threads have many thing in common (wake up after
+ * a certain time etc..). move them into separate routine.
+ */
+void *
+changelog_rollover (void *data)
+{
+ int ret = 0;
+ xlator_t *this = NULL;
+ struct timeval tv = {0,};
+ changelog_log_data_t cld = {0,};
+ changelog_time_slice_t *slice = NULL;
+ changelog_priv_t *priv = data;
+
+ this = priv->cr.this;
+ slice = &priv->slice;
+
+ while (1) {
+ tv.tv_sec = priv->rollover_time;
+ tv.tv_usec = 0;
+
+ ret = select (0, NULL, NULL, NULL, &tv);
+ if (ret)
+ continue;
+
+ ret = changelog_fill_rollover_data (&cld, _gf_false);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to fill rollover data");
+ continue;
+ }
+
+ LOCK (&priv->lock);
+ {
+ ret = changelog_inject_single_event (this, priv, &cld);
+ if (!ret)
+ SLICE_VERSION_UPDATE (slice);
+ }
+ UNLOCK (&priv->lock);
+ }
+
+ return NULL;
+}
+
+void *
+changelog_fsync_thread (void *data)
+{
+ int ret = 0;
+ xlator_t *this = NULL;
+ struct timeval tv = {0,};
+ changelog_log_data_t cld = {0,};
+ changelog_priv_t *priv = data;
+
+ this = priv->cf.this;
+ cld.cld_type = CHANGELOG_TYPE_FSYNC;
+
+ while (1) {
+ tv.tv_sec = priv->fsync_interval;
+ tv.tv_usec = 0;
+
+ ret = select (0, NULL, NULL, NULL, &tv);
+ if (ret)
+ continue;
+
+ ret = changelog_inject_single_event (this, priv, &cld);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to inject fsync event");
+ }
+
+ return NULL;
+}
+
+/* macros for inode/changelog version checks */
+
+#define INODE_VERSION_UPDATE(priv, inode, iver, slice, type) do { \
+ LOCK (&inode->lock); \
+ { \
+ LOCK (&priv->lock); \
+ { \
+ *iver = slice->changelog_version[type]; \
+ } \
+ UNLOCK (&priv->lock); \
+ } \
+ UNLOCK (&inode->lock); \
+ } while (0)
+
+#define INODE_VERSION_EQUALS_SLICE(priv, ver, slice, type, upd) do { \
+ LOCK (&priv->lock); \
+ { \
+ upd = (ver == slice->changelog_version[type]) \
+ ? _gf_false : _gf_true; \
+ } \
+ UNLOCK (&priv->lock); \
+ } while (0)
+
+static int
+__changelog_inode_ctx_set (xlator_t *this,
+ inode_t *inode, changelog_inode_ctx_t *ctx)
+{
+ uint64_t ctx_addr = (uint64_t) ctx;
+ return __inode_ctx_set (inode, this, &ctx_addr);
+}
+
+/**
+ * one shot routine to get the address and the value of a inode version
+ * for a particular type.
+ */
+static changelog_inode_ctx_t *
+__changelog_inode_ctx_get (xlator_t *this,
+ inode_t *inode, unsigned long **iver,
+ unsigned long *version, changelog_log_type type)
+{
+ int ret = 0;
+ uint64_t ctx_addr = 0;
+ changelog_inode_ctx_t *ctx = NULL;
+
+ ret = __inode_ctx_get (inode, this, &ctx_addr);
+ if (ret < 0)
+ ctx_addr = 0;
+ if (ctx_addr != 0) {
+ ctx = (changelog_inode_ctx_t *) (long)ctx_addr;
+ goto out;
+ }
+
+ ctx = GF_CALLOC (1, sizeof (*ctx), gf_changelog_mt_inode_ctx_t);
+ if (!ctx)
+ goto out;
+
+ ret = __changelog_inode_ctx_set (this, inode, ctx);
+ if (ret) {
+ GF_FREE (ctx);
+ ctx = NULL;
+ }
+
+ out:
+ if (ctx && iver && version) {
+ *iver = CHANGELOG_INODE_VERSION_TYPE (ctx, type);
+ *version = **iver;
+ }
+
+ return ctx;
+}
+
+static changelog_inode_ctx_t *
+changelog_inode_ctx_get (xlator_t *this,
+ inode_t *inode, unsigned long **iver,
+ unsigned long *version, changelog_log_type type)
+{
+ changelog_inode_ctx_t *ctx = NULL;
+
+ LOCK (&inode->lock);
+ {
+ ctx = __changelog_inode_ctx_get (this,
+ inode, iver, version, type);
+ }
+ UNLOCK (&inode->lock);
+
+ return ctx;
+}
+
+/**
+ * This is the main update routine. Locking has been made granular so as to
+ * maximize parallelism of fops - I'll try to explain it below using execution
+ * timelines.
+ *
+ * Basically, the contention is between multiple execution threads of this
+ * routine and the roll-over thread. So, instead of having a big lock, we hold
+ * granular locks: inode->lock and priv->lock. Now I'll explain what happens
+ * when there is an update and a roll-over at just about the same time.
+ * NOTE:
+ * - the dispatcher itself synchronizes updates via it's own lock
+ * - the slice version in incremented by the roll-over thread
+ *
+ * Case 1: When the rollover thread wins before the inode version can be
+ * compared with the slice version.
+ *
+ * [updater] | [rollover]
+ * |
+ * | <SLICE: 1, 1, 1>
+ * <changelog_update> |
+ * <changelog_inode_ctx_get> |
+ * <CTX: 1, 1, 1> |
+ * | <dispatch-rollover-event>
+ * | LOCK (&priv->lock)
+ * | <SLICE_VERSION_UPDATE>
+ * | <SLICE: 2, 2, 2>
+ * | UNLOCK (&priv->lock)
+ * |
+ * LOCK (&priv->lock) |
+ * <INODE_VERSION_EQUALS_SLICE> |
+ * I: 1 <-> S: 2 |
+ * update: true |
+ * UNLOCK (&priv->lock) |
+ * |
+ * <if update == true> |
+ * <dispath-update-event> |
+ * <INODE_VERSION_UPDATE> |
+ * LOCK (&inode->lock) |
+ * LOCK (&priv->lock) |
+ * <CTX: 2, 1, 1> |
+ * UNLOCK (&priv->lock) |
+ * UNLOCK (&inode->lock) |
+ *
+ * Therefore, the change gets recorded in the next change (no lost change). If
+ * the slice version was ahead of the inode version (say I:1, S: 2), then
+ * anyway the comparison would result in a update (I: 1, S: 3).
+ *
+ * If the rollover time is too less, then there is another contention when the
+ * updater tries to bring up inode version to the slice version (this is also
+ * the case when the roll-over thread wakes up during INODE_VERSION_UPDATE.
+ *
+ * <CTX: 1, 1, 1> | <SLICE: 2, 2, 2>
+ * |
+ * |
+ * <dispath-update-event> |
+ * <INODE_VERSION_UPDATE> |
+ * LOCK (&inode->lock) |
+ * LOCK (&priv->lock) |
+ * <CTX: 2, 1, 1> |
+ * UNLOCK (&priv->lock) |
+ * UNLOCK (&inode->lock) |
+ * | <dispatch-rollover-event>
+ * | LOCK (&priv->lock)
+ * | <SLICE_VERSION_UPDATE>
+ * | <SLICE: 3, 3, 3>
+ * | UNLOCK (&priv->lock)
+ *
+ *
+ * Case 2: When the fop thread wins
+ *
+ * [updater] | [rollover]
+ * |
+ * | <SLICE: 1, 1, 1>
+ * <changelog_update> |
+ * <changelog_inode_ctx_get> |
+ * <CTX: 0, 0, 0> |
+ * |
+ * LOCK (&priv->lock) |
+ * <INODE_VERSION_EQUALS_SLICE> |
+ * I: 0 <-> S: 1 |
+ * update: true |
+ * UNLOCK (&priv->lock) |
+ * | <dispatch-rollover-event>
+ * | LOCK (&priv->lock)
+ * | <SLICE_VERSION_UPDATE>
+ * | <SLICE: 2, 2, 2>
+ * | UNLOCK (&priv->lock)
+ * <if update == true> |
+ * <dispath-update-event> |
+ * <INODE_VERSION_UPDATE> |
+ * LOCK (&inode->lock) |
+ * LOCK (&priv->lock) |
+ * <CTX: 2, 0, 0> |
+ * UNLOCK (&priv->lock) |
+ * UNLOCK (&inode->lock) |
+ *
+ * Here again, if the inode version was equal to the slice version (I: 1, S: 1)
+ * then there is no need to record an update (as the equality of the two version
+ * signifies an update was recorded in the current time slice).
+ */
+inline void
+changelog_update (xlator_t *this, changelog_priv_t *priv,
+ changelog_local_t *local, changelog_log_type type)
+{
+ int ret = 0;
+ unsigned long *iver = NULL;
+ unsigned long version = 0;
+ inode_t *inode = NULL;
+ changelog_time_slice_t *slice = NULL;
+ changelog_inode_ctx_t *ctx = NULL;
+ changelog_log_data_t *cld_0 = NULL;
+ changelog_log_data_t *cld_1 = NULL;
+ changelog_local_t *next_local = NULL;
+ gf_boolean_t need_upd = _gf_true;
+
+ slice = &priv->slice;
+
+ /**
+ * for fops that do not require inode version checking
+ */
+ if (local->update_no_check)
+ goto update;
+
+ inode = local->inode;
+
+ ctx = changelog_inode_ctx_get (this,
+ inode, &iver, &version, type);
+ if (!ctx)
+ goto update;
+
+ INODE_VERSION_EQUALS_SLICE (priv, version, slice, type, need_upd);
+
+ update:
+ if (need_upd) {
+ cld_0 = &local->cld;
+ cld_0->cld_type = type;
+
+ if ( (next_local = local->prev_entry) != NULL ) {
+ cld_1 = &next_local->cld;
+ cld_1->cld_type = type;
+ }
+
+ ret = priv->cd.dispatchfn (this, priv,
+ priv->cd.cd_data, cld_0, cld_1);
+
+ /**
+ * update after the dispatcher has successfully done
+ * it's job.
+ */
+ if (!local->update_no_check && iver && !ret)
+ INODE_VERSION_UPDATE (priv, inode, iver, slice, type);
+ }
+
+ return;
+}
diff --git a/xlators/features/changelog/src/changelog-helpers.h b/xlators/features/changelog/src/changelog-helpers.h
new file mode 100644
index 000000000..b441f7068
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-helpers.h
@@ -0,0 +1,405 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CHANGELOG_HELPERS_H
+#define _CHANGELOG_HELPERS_H
+
+#include "locking.h"
+#include "timer.h"
+#include "pthread.h"
+#include "iobuf.h"
+
+#include "changelog-misc.h"
+
+/**
+ * the changelog entry
+ */
+typedef struct changelog_log_data {
+ /* rollover related */
+ unsigned long cld_roll_time;
+
+ /* reopen changelog? */
+ gf_boolean_t cld_finale;
+
+ changelog_log_type cld_type;
+
+ /**
+ * sincd gfid is _always_ a necessity, it's not a part
+ * of the iobuf. by doing this we do not add any overhead
+ * for data and metadata related fops.
+ */
+ uuid_t cld_gfid;
+
+ /**
+ * iobufs are used for optionals records: pargfid, path,
+ * write offsets etc.. It's the fop implementers job
+ * to allocate (iobuf_get() in the fop) and get unref'ed
+ * in the callback (CHANGELOG_STACK_UNWIND).
+ */
+ struct iobuf *cld_iobuf;
+
+#define cld_ptr cld_iobuf->ptr
+
+ /**
+ * after allocation you can point this to the length of
+ * usable data, but make sure it does not exceed the
+ * the size of the requested iobuf.
+ */
+ size_t cld_iobuf_len;
+
+#define cld_ptr_len cld_iobuf_len
+
+ /**
+ * number of optional records
+ */
+ int cld_xtra_records;
+} changelog_log_data_t;
+
+/**
+ * holder for dispatch function and private data
+ */
+
+typedef struct changelog_priv changelog_priv_t;
+
+typedef struct changelog_dispatcher {
+ void *cd_data;
+ int (*dispatchfn) (xlator_t *, changelog_priv_t *, void *,
+ changelog_log_data_t *, changelog_log_data_t *);
+} changelog_dispatcher_t;
+
+struct changelog_bootstrap {
+ changelog_mode_t mode;
+ int (*ctor) (xlator_t *, changelog_dispatcher_t *);
+ int (*dtor) (xlator_t *, changelog_dispatcher_t *);
+};
+
+struct changelog_encoder {
+ changelog_encoder_t encoder;
+ int (*encode) (xlator_t *, changelog_log_data_t *);
+};
+
+
+/* xlator private */
+
+typedef struct changelog_time_slice {
+ /**
+ * just in case we need nanosecond granularity some day.
+ * field is unused as of now (maybe we'd need it later).
+ */
+ struct timeval tv_start;
+
+ /**
+ * version of changelog file, incremented each time changes
+ * rollover.
+ */
+ unsigned long changelog_version[CHANGELOG_MAX_TYPE];
+} changelog_time_slice_t;
+
+typedef struct changelog_rollover {
+ /* rollover thread */
+ pthread_t rollover_th;
+
+ xlator_t *this;
+} changelog_rollover_t;
+
+typedef struct changelog_fsync {
+ /* fsync() thread */
+ pthread_t fsync_th;
+
+ xlator_t *this;
+} changelog_fsync_t;
+
+# define CHANGELOG_MAX_CLIENTS 5
+typedef struct changelog_notify {
+ /* reader end of the pipe */
+ int rfd;
+
+ /* notifier thread */
+ pthread_t notify_th;
+
+ /* unique socket path */
+ char sockpath[UNIX_PATH_MAX];
+
+ int socket_fd;
+
+ /**
+ * simple array of accept()'ed fds. Not scalable at all
+ * for large number of clients, but it's okay as we have
+ * a ahrd limit in this version (@CHANGELOG_MAX_CLIENTS).
+ */
+ int client_fd[CHANGELOG_MAX_CLIENTS];
+
+ xlator_t *this;
+} changelog_notify_t;
+
+struct changelog_priv {
+ gf_boolean_t active;
+
+ /* to generate unique socket file per brick */
+ char *changelog_brick;
+
+ /* logging directory */
+ char *changelog_dir;
+
+ /* one file for all changelog types */
+ int changelog_fd;
+
+ gf_lock_t lock;
+
+ /* writen end of the pipe */
+ int wfd;
+
+ /* rollover time */
+ int32_t rollover_time;
+
+ /* fsync() interval */
+ int32_t fsync_interval;
+
+ /* changelog type maps */
+ const char *maps[CHANGELOG_MAX_TYPE];
+
+ /* time slicer */
+ changelog_time_slice_t slice;
+
+ /* context of the updater */
+ changelog_dispatcher_t cd;
+
+ /* context of the rollover thread */
+ changelog_rollover_t cr;
+
+ /* context of fsync thread */
+ changelog_fsync_t cf;
+
+ /* context of the notifier thread */
+ changelog_notify_t cn;
+
+ /* operation mode */
+ changelog_mode_t op_mode;
+
+ /* bootstrap routine for 'current' logger */
+ struct changelog_bootstrap *cb;
+
+ /* encoder mode */
+ changelog_encoder_t encode_mode;
+
+ /* encoder */
+ struct changelog_encoder *ce;
+};
+
+struct changelog_local {
+ inode_t *inode;
+ gf_boolean_t update_no_check;
+
+ changelog_log_data_t cld;
+
+ /**
+ * ->prev_entry is used in cases when there needs to be
+ * additional changelog entry for the parent (eg. rename)
+ * It's analogous to ->next in single linked list world,
+ * but we call it as ->prev_entry... ha ha ha
+ */
+ struct changelog_local *prev_entry;
+};
+
+typedef struct changelog_local changelog_local_t;
+
+/* inode version is stored in inode ctx */
+typedef struct changelog_inode_ctx {
+ unsigned long iversion[CHANGELOG_MAX_TYPE];
+} changelog_inode_ctx_t;
+
+#define CHANGELOG_INODE_VERSION_TYPE(ctx, type) &(ctx->iversion[type])
+
+/**
+ * Optional Records:
+ * fops that need to save additional information request a array of
+ * @changelog_opt_t struct. The array is allocated via @iobufs.
+ */
+typedef enum {
+ CHANGELOG_OPT_REC_FOP,
+ CHANGELOG_OPT_REC_ENTRY,
+ CHANGELOG_OPT_REC_UINT32,
+} changelog_optional_rec_type_t;
+
+struct changelog_entry_fields {
+ uuid_t cef_uuid;
+ char *cef_bname;
+};
+
+typedef struct {
+ /**
+ * @co_covert can be used to do post-processing of the record before
+ * it's persisted to the CHANGELOG. If this is NULL, then the record
+ * is persisted as per it's in memory format.
+ */
+ size_t (*co_convert) (void *data, char *buffer, gf_boolean_t encode);
+
+ /* release routines */
+ void (*co_free) (void *data);
+
+ /* type of the field */
+ changelog_optional_rec_type_t co_type;
+
+ /**
+ * sizeof of the 'valid' field in the union. This field is not used if
+ * @co_convert is specified.
+ */
+ size_t co_len;
+
+ union {
+ unsigned int co_uint32;
+ glusterfs_fop_t co_fop;
+ struct changelog_entry_fields co_entry;
+ };
+} changelog_opt_t;
+
+#define CHANGELOG_OPT_RECORD_LEN sizeof (changelog_opt_t)
+
+/**
+ * helpers routines
+ */
+
+void
+changelog_thread_cleanup (xlator_t *this, pthread_t thr_id);
+inline void *
+changelog_get_usable_buffer (changelog_local_t *local);
+inline void
+changelog_set_usable_record_and_length (changelog_local_t *local,
+ size_t len, int xr);
+void
+changelog_local_cleanup (xlator_t *xl, changelog_local_t *local);
+changelog_local_t *
+changelog_local_init (xlator_t *this, inode_t *inode, uuid_t gfid,
+ int xtra_records, gf_boolean_t update_flag);
+int
+changelog_start_next_change (xlator_t *this,
+ changelog_priv_t *priv,
+ unsigned long ts, gf_boolean_t finale);
+int
+changelog_open (xlator_t *this, changelog_priv_t *priv);
+int
+changelog_fill_rollover_data (changelog_log_data_t *cld, gf_boolean_t is_last);
+int
+changelog_inject_single_event (xlator_t *this,
+ changelog_priv_t *priv,
+ changelog_log_data_t *cld);
+inline size_t
+changelog_entry_length ();
+inline int
+changelog_write (int fd, char *buffer, size_t len);
+int
+changelog_write_change (changelog_priv_t *priv, char *buffer, size_t len);
+inline int
+changelog_handle_change (xlator_t *this,
+ changelog_priv_t *priv, changelog_log_data_t *cld);
+inline void
+changelog_update (xlator_t *this, changelog_priv_t *priv,
+ changelog_local_t *local, changelog_log_type type);
+void *
+changelog_rollover (void *data);
+void *
+changelog_fsync_thread (void *data);
+int
+changelog_forget (xlator_t *this, inode_t *inode);
+
+/* macros */
+
+#define CHANGELOG_STACK_UNWIND(fop, frame, params ...) do { \
+ changelog_local_t *__local = NULL; \
+ xlator_t *__xl = NULL; \
+ if (frame) { \
+ __local = frame->local; \
+ __xl = frame->this; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT (fop, frame, params); \
+ changelog_local_cleanup (__xl, __local); \
+ if (__local && __local->prev_entry) \
+ changelog_local_cleanup (__xl, \
+ __local->prev_entry); \
+ } while (0)
+
+#define CHANGELOG_IOBUF_REF(iobuf) do { \
+ if (iobuf) \
+ iobuf_ref (iobuf); \
+ } while (0)
+
+#define CHANGELOG_IOBUF_UNREF(iobuf) do { \
+ if (iobuf) \
+ iobuf_unref (iobuf); \
+ } while (0)
+
+#define CHANGELOG_FILL_BUFFER(buffer, off, val, len) do { \
+ memcpy (buffer + off, val, len); \
+ off += len; \
+ } while (0)
+
+#define SLICE_VERSION_UPDATE(slice) do { \
+ int i = 0; \
+ for (; i < CHANGELOG_MAX_TYPE; i++) { \
+ slice->changelog_version[i]++; \
+ } \
+ } while (0)
+
+#define CHANGELOG_FILL_UINT32(co, number, converter, xlen) do { \
+ co->co_convert = converter; \
+ co->co_free = NULL; \
+ co->co_type = CHANGELOG_OPT_REC_UINT32; \
+ co->co_uint32 = number; \
+ xlen += sizeof (unsigned int); \
+ } while (0)
+
+#define CHANGLOG_FILL_FOP_NUMBER(co, fop, converter, xlen) do { \
+ co->co_convert = converter; \
+ co->co_free = NULL; \
+ co->co_type = CHANGELOG_OPT_REC_FOP; \
+ co->co_fop = fop; \
+ xlen += sizeof (fop); \
+ } while (0)
+
+#define CHANGELOG_FILL_ENTRY(co, pargfid, bname, \
+ converter, freefn, xlen, label) \
+ do { \
+ co->co_convert = converter; \
+ co->co_free = freefn; \
+ co->co_type = CHANGELOG_OPT_REC_ENTRY; \
+ uuid_copy (co->co_entry.cef_uuid, pargfid); \
+ co->co_entry.cef_bname = gf_strdup(bname); \
+ if (!co->co_entry.cef_bname) \
+ goto label; \
+ xlen += (UUID_CANONICAL_FORM_LEN + strlen (bname)); \
+ } while (0)
+
+#define CHANGELOG_INIT(this, local, inode, gfid, xrec) \
+ local = changelog_local_init (this, inode, gfid, xrec, _gf_false)
+
+#define CHANGELOG_INIT_NOCHECK(this, local, inode, gfid, xrec) \
+ local = changelog_local_init (this, inode, gfid, xrec, _gf_true)
+
+#define CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, label) do { \
+ if (!priv->active) \
+ goto label; \
+ /* ignore rebalance process's activity. */ \
+ if (frame->root->pid == GF_CLIENT_PID_DEFRAG) \
+ goto label; \
+ } while (0)
+
+/* ignore internal fops */
+#define CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO(dict, label) do { \
+ if (dict && dict_get (dict, GLUSTERFS_INTERNAL_FOP_KEY)) \
+ goto label; \
+ } while (0)
+
+#define CHANGELOG_COND_GOTO(priv, cond, label) do { \
+ if (!priv->active || cond) \
+ goto label; \
+ } while (0)
+
+#endif /* _CHANGELOG_HELPERS_H */
diff --git a/xlators/features/changelog/src/changelog-mem-types.h b/xlators/features/changelog/src/changelog-mem-types.h
new file mode 100644
index 000000000..d72464eab
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-mem-types.h
@@ -0,0 +1,29 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CHANGELOG_MEM_TYPES_H
+#define _CHANGELOG_MEM_TYPES_H
+
+#include "mem-types.h"
+
+enum gf_changelog_mem_types {
+ gf_changelog_mt_priv_t = gf_common_mt_end + 1,
+ gf_changelog_mt_str_t = gf_common_mt_end + 2,
+ gf_changelog_mt_batch_t = gf_common_mt_end + 3,
+ gf_changelog_mt_rt_t = gf_common_mt_end + 4,
+ gf_changelog_mt_inode_ctx_t = gf_common_mt_end + 5,
+ gf_changelog_mt_libgfchangelog_t = gf_common_mt_end + 6,
+ gf_changelog_mt_libgfchangelog_rl_t = gf_common_mt_end + 7,
+ gf_changelog_mt_libgfchangelog_dirent_t = gf_common_mt_end + 8,
+ gf_changelog_mt_changelog_buffer_t = gf_common_mt_end + 9,
+ gf_changelog_mt_end
+};
+
+#endif
diff --git a/xlators/features/changelog/src/changelog-misc.h b/xlators/features/changelog/src/changelog-misc.h
new file mode 100644
index 000000000..127b03e2e
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-misc.h
@@ -0,0 +1,101 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CHANGELOG_MISC_H
+#define _CHANGELOG_MISC_H
+
+#include "glusterfs.h"
+#include "common-utils.h"
+
+#define CHANGELOG_MAX_TYPE 3
+#define CHANGELOG_FILE_NAME "CHANGELOG"
+
+#define CHANGELOG_VERSION_MAJOR 1
+#define CHANGELOG_VERSION_MINOR 1
+
+#define CHANGELOG_UNIX_SOCK DEFAULT_VAR_RUN_DIRECTORY"/changelog-%s.sock"
+
+/**
+ * header starts with the version and the format of the changelog.
+ * 'version' not much of a use now.
+ */
+#define CHANGELOG_HEADER \
+ "GlusterFS Changelog | version: v%d.%d | encoding : %d\n"
+
+#define CHANGELOG_MAKE_SOCKET_PATH(brick_path, sockpath, len) do { \
+ char md5_sum[MD5_DIGEST_LENGTH*2+1] = {0,}; \
+ md5_wrapper((unsigned char *) brick_path, \
+ strlen(brick_path), \
+ md5_sum); \
+ (void) snprintf (sockpath, len, \
+ CHANGELOG_UNIX_SOCK, md5_sum); \
+ } while (0)
+
+/**
+ * ... used by libgfchangelog.
+ */
+#define CHANGELOG_GET_ENCODING(fd, buffer, len, enc, enc_len) do { \
+ FILE *fp; \
+ int fd_dup, maj, min; \
+ \
+ enc = -1; \
+ fd_dup = dup (fd); \
+ \
+ if (fd_dup != -1) { \
+ fp = fdopen (fd_dup, "r"); \
+ if (fp) { \
+ if (fgets (buffer, len, fp)) { \
+ elen = strlen (buffer); \
+ sscanf (buffer, \
+ CHANGELOG_HEADER, \
+ &maj, &min, &enc); \
+ } \
+ fclose (fp); \
+ } else { \
+ close (fd_dup); \
+ } \
+ } \
+ } while (0)
+
+/**
+ * everything after 'CHANGELOG_TYPE_ENTRY' are internal types
+ * (ie. none of the fops trigger this type of event), hence
+ * CHANGELOG_MAX_TYPE = 3
+ */
+typedef enum {
+ CHANGELOG_TYPE_DATA = 0,
+ CHANGELOG_TYPE_METADATA,
+ CHANGELOG_TYPE_ENTRY,
+ CHANGELOG_TYPE_ROLLOVER,
+ CHANGELOG_TYPE_FSYNC,
+} changelog_log_type;
+
+/* operation modes - RT for now */
+typedef enum {
+ CHANGELOG_MODE_RT = 0,
+} changelog_mode_t;
+
+/* encoder types */
+
+typedef enum {
+ CHANGELOG_ENCODE_MIN = 0,
+ CHANGELOG_ENCODE_BINARY,
+ CHANGELOG_ENCODE_ASCII,
+ CHANGELOG_ENCODE_MAX,
+} changelog_encoder_t;
+
+#define CHANGELOG_VALID_ENCODING(enc) \
+ (enc > CHANGELOG_ENCODE_MIN && enc < CHANGELOG_ENCODE_MAX)
+
+#define CHANGELOG_TYPE_IS_ENTRY(type) (type == CHANGELOG_TYPE_ENTRY)
+#define CHANGELOG_TYPE_IS_ROLLOVER(type) (type == CHANGELOG_TYPE_ROLLOVER)
+#define CHANGELOG_TYPE_IS_FSYNC(type) (type == CHANGELOG_TYPE_FSYNC)
+
+#endif /* _CHANGELOG_MISC_H */
diff --git a/xlators/features/changelog/src/changelog-notifier.c b/xlators/features/changelog/src/changelog-notifier.c
new file mode 100644
index 000000000..5f3d063a8
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-notifier.c
@@ -0,0 +1,314 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "changelog-notifier.h"
+
+#include <pthread.h>
+
+inline static void
+changelog_notify_clear_fd (changelog_notify_t *cn, int i)
+{
+ cn->client_fd[i] = -1;
+}
+
+inline static void
+changelog_notify_save_fd (changelog_notify_t *cn, int i, int fd)
+{
+ cn->client_fd[i] = fd;
+}
+
+static int
+changelog_notify_insert_fd (xlator_t *this, changelog_notify_t *cn, int fd)
+{
+ int i = 0;
+ int ret = 0;
+
+ for (; i < CHANGELOG_MAX_CLIENTS; i++) {
+ if (cn->client_fd[i] == -1)
+ break;
+ }
+
+ if (i == CHANGELOG_MAX_CLIENTS) {
+ /**
+ * this case should not be hit as listen() would limit
+ * the number of completely established connections.
+ */
+ gf_log (this->name, GF_LOG_WARNING,
+ "hit max client limit (%d)", CHANGELOG_MAX_CLIENTS);
+ ret = -1;
+ }
+ else
+ changelog_notify_save_fd (cn, i, fd);
+
+ return ret;
+}
+
+static void
+changelog_notify_fill_rset (changelog_notify_t *cn, fd_set *rset, int *maxfd)
+{
+ int i = 0;
+
+ FD_ZERO (rset);
+
+ FD_SET (cn->socket_fd, rset);
+ *maxfd = cn->socket_fd;
+
+ FD_SET (cn->rfd, rset);
+ *maxfd = max (*maxfd, cn->rfd);
+
+ for (; i < CHANGELOG_MAX_CLIENTS; i++) {
+ if (cn->client_fd[i] != -1) {
+ FD_SET (cn->client_fd[i], rset);
+ *maxfd = max (*maxfd, cn->client_fd[i]);
+ }
+ }
+
+ *maxfd = *maxfd + 1;
+}
+
+static int
+changelog_notify_client (changelog_notify_t *cn, char *path, ssize_t len)
+{
+ int i = 0;
+ int ret = 0;
+
+ for (; i < CHANGELOG_MAX_CLIENTS; i++) {
+ if (cn->client_fd[i] == -1)
+ continue;
+
+ if (changelog_write (cn->client_fd[i],
+ path, len)) {
+ ret = -1;
+
+ close (cn->client_fd[i]);
+ changelog_notify_clear_fd (cn, i);
+ }
+ }
+
+ return ret;
+}
+
+static void
+changelog_notifier_init (changelog_notify_t *cn)
+{
+ int i = 0;
+
+ cn->socket_fd = -1;
+
+ for (; i < CHANGELOG_MAX_CLIENTS; i++) {
+ changelog_notify_clear_fd (cn, i);
+ }
+}
+
+static void
+changelog_close_client_conn (changelog_notify_t *cn)
+{
+ int i = 0;
+
+ for (; i < CHANGELOG_MAX_CLIENTS; i++) {
+ if (cn->client_fd[i] == -1)
+ continue;
+
+ close (cn->client_fd[i]);
+ changelog_notify_clear_fd (cn, i);
+ }
+}
+
+static void
+changelog_notifier_cleanup (void *arg)
+{
+ changelog_notify_t *cn = NULL;
+
+ cn = (changelog_notify_t *) arg;
+
+ changelog_close_client_conn (cn);
+
+ if (cn->socket_fd != -1)
+ close (cn->socket_fd);
+
+ if (cn->rfd)
+ close (cn->rfd);
+
+ if (unlink (cn->sockpath))
+ gf_log ("", GF_LOG_WARNING,
+ "could not unlink changelog socket file"
+ " %s (reason: %s", cn->sockpath, strerror (errno));
+}
+
+void *
+changelog_notifier (void *data)
+{
+ int i = 0;
+ int fd = 0;
+ int max_fd = 0;
+ int len = 0;
+ ssize_t readlen = 0;
+ xlator_t *this = NULL;
+ changelog_priv_t *priv = NULL;
+ changelog_notify_t *cn = NULL;
+ struct sockaddr_un local = {0,};
+ char path[PATH_MAX] = {0,};
+ char abspath[PATH_MAX] = {0,};
+
+ char buffer;
+ fd_set rset;
+
+ priv = (changelog_priv_t *) data;
+
+ cn = &priv->cn;
+ this = cn->this;
+
+ pthread_cleanup_push (changelog_notifier_cleanup, cn);
+
+ changelog_notifier_init (cn);
+
+ cn->socket_fd = socket (AF_UNIX, SOCK_STREAM, 0);
+ if (cn->socket_fd < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "changelog socket error (reason: %s)",
+ strerror (errno));
+ goto out;
+ }
+
+ CHANGELOG_MAKE_SOCKET_PATH (priv->changelog_brick,
+ cn->sockpath, UNIX_PATH_MAX);
+ if (unlink (cn->sockpath) < 0) {
+ if (errno != ENOENT) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not unlink changelog socket file (%s)"
+ " (reason: %s)",
+ CHANGELOG_UNIX_SOCK, strerror (errno));
+ goto cleanup;
+ }
+ }
+
+ local.sun_family = AF_UNIX;
+ strcpy (local.sun_path, cn->sockpath);
+
+ len = strlen (local.sun_path) + sizeof (local.sun_family);
+
+ /* bind to the unix domain socket */
+ if (bind (cn->socket_fd, (struct sockaddr *) &local, len) < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not bind to changelog socket (reason: %s)",
+ strerror (errno));
+ goto cleanup;
+ }
+
+ /* listen for incoming connections */
+ if (listen (cn->socket_fd, CHANGELOG_MAX_CLIENTS) < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "listen() error on changelog socket (reason: %s)",
+ strerror (errno));
+ goto cleanup;
+ }
+
+ /**
+ * simple select() on all to-be-read file descriptors. This method
+ * though old school works pretty well when you have a handfull of
+ * fd's to be watched (clients).
+ *
+ * Future TODO: move this to epoll based notification facility if
+ * number of clients increase.
+ */
+ for (;;) {
+ changelog_notify_fill_rset (cn, &rset, &max_fd);
+
+ if (select (max_fd, &rset, NULL, NULL, NULL) < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "select() returned -1 (reason: %s)",
+ strerror (errno));
+ sleep (2);
+ continue;
+ }
+
+ if (FD_ISSET (cn->socket_fd, &rset)) {
+ fd = accept (cn->socket_fd, NULL, NULL);
+ if (fd < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "accept error on changelog socket"
+ " (reason: %s)", strerror (errno));
+ } else if (changelog_notify_insert_fd (this, cn, fd)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "hit max client limit");
+ }
+ }
+
+ if (FD_ISSET (cn->rfd, &rset)) {
+ /**
+ * read changelog filename and notify all connected
+ * clients.
+ */
+ readlen = 0;
+ while (readlen < PATH_MAX) {
+ len = read (cn->rfd, &path[readlen++], 1);
+ if (len == -1) {
+ break;
+ }
+
+ if (len == 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rollover thread sent EOF"
+ " on pipe - possibly a crash.");
+ /* be blunt and close all connections */
+ pthread_exit(NULL);
+ }
+
+ if (path[readlen - 1] == '\0')
+ break;
+ }
+
+ /* should we close all client connections here too? */
+ if (len < 0 || readlen == PATH_MAX) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not get pathname from rollover"
+ " thread or pathname too long");
+ goto process_rest;
+ }
+
+ (void) snprintf (abspath, PATH_MAX,
+ "%s/%s", priv->changelog_dir, path);
+ if (changelog_notify_client (cn, abspath,
+ strlen (abspath) + 1))
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not notify some clients with new"
+ " changelogs");
+ }
+
+ process_rest:
+ for (i = 0; i < CHANGELOG_MAX_CLIENTS; i++) {
+ if ( (fd = cn->client_fd[i]) == -1 )
+ continue;
+
+ if (FD_ISSET (fd, &rset)) {
+ /**
+ * the only data we accept from the client is a
+ * disconnect. Anything else is treated as bogus
+ * and is silently discarded (also warned!!!).
+ */
+ if ( (readlen = read (fd, &buffer, 1)) <= 0 ) {
+ close (fd);
+ changelog_notify_clear_fd (cn, i);
+ } else {
+ /* silently discard data and log */
+ gf_log (this->name, GF_LOG_WARNING,
+ "misbehaving changelog client");
+ }
+ }
+ }
+
+ }
+
+ cleanup:;
+ pthread_cleanup_pop (1);
+
+ out:
+ return NULL;
+}
diff --git a/xlators/features/changelog/src/changelog-notifier.h b/xlators/features/changelog/src/changelog-notifier.h
new file mode 100644
index 000000000..55e728356
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-notifier.h
@@ -0,0 +1,19 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CHANGELOG_NOTIFIER_H
+#define _CHANGELOG_NOTIFIER_H
+
+#include "changelog-helpers.h"
+
+void *
+changelog_notifier (void *data);
+
+#endif
diff --git a/xlators/features/changelog/src/changelog-rt.c b/xlators/features/changelog/src/changelog-rt.c
new file mode 100644
index 000000000..c147f68ca
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-rt.c
@@ -0,0 +1,72 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "defaults.h"
+#include "logging.h"
+
+#include "changelog-rt.h"
+#include "changelog-mem-types.h"
+
+int
+changelog_rt_init (xlator_t *this, changelog_dispatcher_t *cd)
+{
+ changelog_rt_t *crt = NULL;
+
+ crt = GF_CALLOC (1, sizeof (*crt),
+ gf_changelog_mt_rt_t);
+ if (!crt)
+ return -1;
+
+ LOCK_INIT (&crt->lock);
+
+ cd->cd_data = crt;
+ cd->dispatchfn = &changelog_rt_enqueue;
+
+ return 0;
+}
+
+int
+changelog_rt_fini (xlator_t *this, changelog_dispatcher_t *cd)
+{
+ changelog_rt_t *crt = NULL;
+
+ crt = cd->cd_data;
+
+ LOCK_DESTROY (&crt->lock);
+ GF_FREE (crt);
+
+ return 0;
+}
+
+int
+changelog_rt_enqueue (xlator_t *this, changelog_priv_t *priv, void *cbatch,
+ changelog_log_data_t *cld_0, changelog_log_data_t *cld_1)
+{
+ int ret = 0;
+ changelog_rt_t *crt = NULL;
+
+ crt = (changelog_rt_t *) cbatch;
+
+ LOCK (&crt->lock);
+ {
+ ret = changelog_handle_change (this, priv, cld_0);
+ if (!ret && cld_1)
+ ret = changelog_handle_change (this, priv, cld_1);
+ }
+ UNLOCK (&crt->lock);
+
+ return ret;
+}
diff --git a/xlators/features/changelog/src/changelog-rt.h b/xlators/features/changelog/src/changelog-rt.h
new file mode 100644
index 000000000..1fc2bbc5b
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-rt.h
@@ -0,0 +1,33 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CHANGELOG_RT_H
+#define _CHANGELOG_RT_H
+
+#include "locking.h"
+#include "timer.h"
+#include "pthread.h"
+
+#include "changelog-helpers.h"
+
+/* unused as of now - may be you would need it later */
+typedef struct changelog_rt {
+ gf_lock_t lock;
+} changelog_rt_t;
+
+int
+changelog_rt_init (xlator_t *this, changelog_dispatcher_t *cd);
+int
+changelog_rt_fini (xlator_t *this, changelog_dispatcher_t *cd);
+int
+changelog_rt_enqueue (xlator_t *this, changelog_priv_t *priv, void *cbatch,
+ changelog_log_data_t *cld_0, changelog_log_data_t *cld_1);
+
+#endif /* _CHANGELOG_RT_H */
diff --git a/xlators/features/changelog/src/changelog.c b/xlators/features/changelog/src/changelog.c
new file mode 100644
index 000000000..5fe3b4362
--- /dev/null
+++ b/xlators/features/changelog/src/changelog.c
@@ -0,0 +1,1568 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "defaults.h"
+#include "logging.h"
+#include "iobuf.h"
+
+#include "changelog-rt.h"
+
+#include "changelog-encoders.h"
+#include "changelog-mem-types.h"
+
+#include <pthread.h>
+
+#include "changelog-notifier.h"
+
+static struct changelog_bootstrap
+cb_bootstrap[] = {
+ {
+ .mode = CHANGELOG_MODE_RT,
+ .ctor = changelog_rt_init,
+ .dtor = changelog_rt_fini,
+ },
+};
+
+/* Entry operations - TYPE III */
+
+/**
+ * entry operations do not undergo inode version checking.
+ */
+
+/* {{{ */
+
+/* rmdir */
+
+int32_t
+changelog_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (rmdir, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+changelog_rmdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int xflags, dict_t *xdata)
+{
+ size_t xtra_len = 0;
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ CHANGELOG_INIT_NOCHECK (this, frame->local,
+ NULL, loc->inode->gfid, 2);
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+
+ co++;
+ CHANGELOG_FILL_ENTRY (co, loc->pargfid, loc->name,
+ entry_fn, entry_free_fn, xtra_len, wind);
+
+ changelog_set_usable_record_and_length (frame->local, xtra_len, 2);
+
+ wind:
+ STACK_WIND (frame, changelog_rmdir_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->rmdir,
+ loc, xflags, xdata);
+ return 0;
+}
+
+/* unlink */
+
+int32_t
+changelog_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (unlink, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+changelog_unlink (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int xflags, dict_t *xdata)
+{
+ size_t xtra_len = 0;
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+ CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO (xdata, wind);
+
+ CHANGELOG_INIT_NOCHECK (this, frame->local, NULL, loc->inode->gfid, 2);
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+
+ co++;
+ CHANGELOG_FILL_ENTRY (co, loc->pargfid, loc->name,
+ entry_fn, entry_free_fn, xtra_len, wind);
+
+ changelog_set_usable_record_and_length (frame->local, xtra_len, 2);
+
+ wind:
+ STACK_WIND (frame, changelog_unlink_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->unlink,
+ loc, xflags, xdata);
+ return 0;
+}
+
+/* rename */
+
+int32_t
+changelog_rename_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *buf, struct iatt *preoldparent,
+ struct iatt *postoldparent, struct iatt *prenewparent,
+ struct iatt *postnewparent, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (rename, frame, op_ret, op_errno,
+ buf, preoldparent, postoldparent,
+ prenewparent, postnewparent, xdata);
+ return 0;
+}
+
+
+int32_t
+changelog_rename (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ size_t xtra_len = 0;
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ /* 3 == fop + oldloc + newloc */
+ CHANGELOG_INIT_NOCHECK (this, frame->local,
+ NULL, oldloc->inode->gfid, 3);
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+
+ co++;
+ CHANGELOG_FILL_ENTRY (co, oldloc->pargfid, oldloc->name,
+ entry_fn, entry_free_fn, xtra_len, wind);
+
+ co++;
+ CHANGELOG_FILL_ENTRY (co, newloc->pargfid, newloc->name,
+ entry_fn, entry_free_fn, xtra_len, wind);
+
+ changelog_set_usable_record_and_length (frame->local, xtra_len, 3);
+
+ wind:
+ STACK_WIND (frame, changelog_rename_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->rename,
+ oldloc, newloc, xdata);
+ return 0;
+}
+
+/* link */
+
+int32_t
+changelog_link_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (link, frame, op_ret, op_errno,
+ inode, buf, preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+changelog_link (call_frame_t *frame,
+ xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ size_t xtra_len = 0;
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+
+ priv = this->private;
+
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+ CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO (xdata, wind);
+
+ CHANGELOG_INIT_NOCHECK (this, frame->local, NULL, oldloc->gfid, 2);
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+
+ co++;
+ CHANGELOG_FILL_ENTRY (co, newloc->pargfid, newloc->name,
+ entry_fn, entry_free_fn, xtra_len, wind);
+
+ changelog_set_usable_record_and_length (frame->local, xtra_len, 2);
+
+ wind:
+ STACK_WIND (frame, changelog_link_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->link,
+ oldloc, newloc, xdata);
+ return 0;
+}
+
+/* mkdir */
+
+int32_t
+changelog_mkdir_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (mkdir, frame, op_ret, op_errno,
+ inode, buf, preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+changelog_mkdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata)
+{
+ int ret = -1;
+ uuid_t gfid = {0,};
+ void *uuid_req = NULL;
+ size_t xtra_len = 0;
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ ret = dict_get_ptr (xdata, "gfid-req", &uuid_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to get gfid from dict");
+ goto wind;
+ }
+ uuid_copy (gfid, uuid_req);
+
+ CHANGELOG_INIT_NOCHECK (this, frame->local, NULL, gfid, 5);
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+ co++;
+
+ CHANGELOG_FILL_UINT32 (co, S_IFDIR | mode, number_fn, xtra_len);
+ co++;
+
+ CHANGELOG_FILL_UINT32 (co, frame->root->uid, number_fn, xtra_len);
+ co++;
+
+ CHANGELOG_FILL_UINT32 (co, frame->root->gid, number_fn, xtra_len);
+ co++;
+
+ CHANGELOG_FILL_ENTRY (co, loc->pargfid, loc->name,
+ entry_fn, entry_free_fn, xtra_len, wind);
+
+ changelog_set_usable_record_and_length (frame->local, xtra_len, 5);
+
+ wind:
+ STACK_WIND (frame, changelog_mkdir_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->mkdir,
+ loc, mode, umask, xdata);
+ return 0;
+}
+
+/* symlink */
+
+int32_t
+changelog_symlink_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (symlink, frame, op_ret, op_errno,
+ inode, buf, preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+changelog_symlink (call_frame_t *frame, xlator_t *this,
+ const char *linkname, loc_t *loc,
+ mode_t umask, dict_t *xdata)
+{
+ int ret = -1;
+ size_t xtra_len = 0;
+ uuid_t gfid = {0,};
+ void *uuid_req = NULL;
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ ret = dict_get_ptr (xdata, "gfid-req", &uuid_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to get gfid from dict");
+ goto wind;
+ }
+ uuid_copy (gfid, uuid_req);
+
+ CHANGELOG_INIT_NOCHECK (this, frame->local, NULL, gfid, 2);
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+ co++;
+
+ CHANGELOG_FILL_ENTRY (co, loc->pargfid, loc->name,
+ entry_fn, entry_free_fn, xtra_len, wind);
+
+ changelog_set_usable_record_and_length (frame->local, xtra_len, 2);
+
+ wind:
+ STACK_WIND (frame, changelog_symlink_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->symlink,
+ linkname, loc, umask, xdata);
+ return 0;
+}
+
+/* mknod */
+
+int32_t
+changelog_mknod_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (mknod, frame, op_ret, op_errno,
+ inode, buf, preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+changelog_mknod (call_frame_t *frame,
+ xlator_t *this, loc_t *loc,
+ mode_t mode, dev_t dev, mode_t umask, dict_t *xdata)
+{
+ int ret = -1;
+ uuid_t gfid = {0,};
+ void *uuid_req = NULL;
+ size_t xtra_len = 0;
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ ret = dict_get_ptr (xdata, "gfid-req", &uuid_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to get gfid from dict");
+ goto wind;
+ }
+ uuid_copy (gfid, uuid_req);
+
+ CHANGELOG_INIT_NOCHECK (this, frame->local, NULL, gfid, 5);
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+ co++;
+
+ CHANGELOG_FILL_UINT32 (co, mode, number_fn, xtra_len);
+ co++;
+
+ CHANGELOG_FILL_UINT32 (co, frame->root->uid, number_fn, xtra_len);
+ co++;
+
+ CHANGELOG_FILL_UINT32 (co, frame->root->gid, number_fn, xtra_len);
+ co++;
+
+ CHANGELOG_FILL_ENTRY (co, loc->pargfid, loc->name,
+ entry_fn, entry_free_fn, xtra_len, wind);
+
+ changelog_set_usable_record_and_length (frame->local, xtra_len, 5);
+
+ wind:
+ STACK_WIND (frame, changelog_mknod_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->mknod,
+ loc, mode, dev, umask, xdata);
+ return 0;
+}
+
+/* creat */
+
+int32_t
+changelog_create_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ fd_t *fd, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (create, frame,
+ op_ret, op_errno, fd, inode,
+ buf, preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+changelog_create (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags, mode_t mode,
+ mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ int ret = -1;
+ uuid_t gfid = {0,};
+ void *uuid_req = NULL;
+ changelog_opt_t *co = NULL;
+ changelog_priv_t *priv = NULL;
+ size_t xtra_len = 0;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ ret = dict_get_ptr (xdata, "gfid-req", &uuid_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to get gfid from dict");
+ goto wind;
+ }
+ uuid_copy (gfid, uuid_req);
+
+ /* init with two extra records */
+ CHANGELOG_INIT_NOCHECK (this, frame->local, NULL, gfid, 5);
+ if (!frame->local)
+ goto wind;
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+ co++;
+
+ CHANGELOG_FILL_UINT32 (co, mode, number_fn, xtra_len);
+ co++;
+
+ CHANGELOG_FILL_UINT32 (co, frame->root->uid, number_fn, xtra_len);
+ co++;
+
+ CHANGELOG_FILL_UINT32 (co, frame->root->gid, number_fn, xtra_len);
+ co++;
+
+ CHANGELOG_FILL_ENTRY (co, loc->pargfid, loc->name,
+ entry_fn, entry_free_fn, xtra_len, wind);
+
+ changelog_set_usable_record_and_length (frame->local, xtra_len, 5);
+
+ wind:
+ STACK_WIND (frame, changelog_create_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->create,
+ loc, flags, mode, umask, fd, xdata);
+ return 0;
+}
+
+/* }}} */
+
+
+/* Metadata modification fops - TYPE II */
+
+/* {{{ */
+
+/* {f}setattr */
+
+int32_t
+changelog_fsetattr_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *preop_stbuf,
+ struct iatt *postop_stbuf, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_METADATA);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (fsetattr, frame, op_ret, op_errno,
+ preop_stbuf, postop_stbuf, xdata);
+
+ return 0;
+
+
+}
+
+int32_t
+changelog_fsetattr (call_frame_t *frame,
+ xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+ size_t xtra_len = 0;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ CHANGELOG_INIT (this, frame->local,
+ fd->inode, fd->inode->gfid, 1);
+ if (!frame->local)
+ goto wind;
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+
+ changelog_set_usable_record_and_length (frame->local, xtra_len, 1);
+
+ wind:
+ STACK_WIND (frame, changelog_fsetattr_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->fsetattr,
+ fd, stbuf, valid, xdata);
+ return 0;
+
+
+}
+
+int32_t
+changelog_setattr_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *preop_stbuf,
+ struct iatt *postop_stbuf, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_METADATA);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (setattr, frame, op_ret, op_errno,
+ preop_stbuf, postop_stbuf, xdata);
+
+ return 0;
+}
+
+int32_t
+changelog_setattr (call_frame_t *frame,
+ xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+ size_t xtra_len = 0;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ CHANGELOG_INIT (this, frame->local,
+ loc->inode, loc->inode->gfid, 1);
+ if (!frame->local)
+ goto wind;
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+
+ changelog_set_usable_record_and_length (frame->local, xtra_len, 1);
+
+ wind:
+ STACK_WIND (frame, changelog_setattr_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->setattr,
+ loc, stbuf, valid, xdata);
+ return 0;
+}
+
+/* {f}removexattr */
+
+int32_t
+changelog_fremovexattr_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_METADATA);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+int32_t
+changelog_fremovexattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+ size_t xtra_len = 0;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ CHANGELOG_INIT (this, frame->local,
+ fd->inode, fd->inode->gfid, 1);
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+
+ changelog_set_usable_record_and_length (frame->local, xtra_len, 1);
+
+ wind:
+ STACK_WIND (frame, changelog_fremovexattr_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->fremovexattr,
+ fd, name, xdata);
+ return 0;
+}
+
+int32_t
+changelog_removexattr_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_METADATA);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (removexattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+int32_t
+changelog_removexattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+ size_t xtra_len = 0;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ CHANGELOG_INIT (this, frame->local,
+ loc->inode, loc->inode->gfid, 1);
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+
+ changelog_set_usable_record_and_length (frame->local, xtra_len, 1);
+
+ wind:
+ STACK_WIND (frame, changelog_removexattr_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->removexattr,
+ loc, name, xdata);
+ return 0;
+}
+
+/* {f}setxattr */
+
+int32_t
+changelog_setxattr_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_METADATA);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+int32_t
+changelog_setxattr (call_frame_t *frame,
+ xlator_t *this, loc_t *loc,
+ dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+ size_t xtra_len = 0;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ CHANGELOG_INIT (this, frame->local,
+ loc->inode, loc->inode->gfid, 1);
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+
+ changelog_set_usable_record_and_length (frame->local, xtra_len, 1);
+
+ wind:
+ STACK_WIND (frame, changelog_setxattr_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->setxattr,
+ loc, dict, flags, xdata);
+ return 0;
+}
+
+int32_t
+changelog_fsetxattr_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_METADATA);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+int32_t
+changelog_fsetxattr (call_frame_t *frame,
+ xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+ size_t xtra_len = 0;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ CHANGELOG_INIT (this, frame->local,
+ fd->inode, fd->inode->gfid, 1);
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+
+ changelog_set_usable_record_and_length (frame->local, xtra_len, 1);
+
+ wind:
+ STACK_WIND (frame, changelog_fsetxattr_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->fsetxattr,
+ fd, dict, flags, xdata);
+ return 0;
+}
+
+/* }}} */
+
+
+/* Data modification fops - TYPE I */
+
+/* {{{ */
+
+/* {f}truncate() */
+
+int32_t
+changelog_truncate_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_DATA);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (truncate, frame,
+ op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
+}
+
+int32_t
+changelog_truncate (call_frame_t *frame,
+ xlator_t *this, loc_t *loc, off_t offset, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ CHANGELOG_INIT (this, frame->local,
+ loc->inode, loc->inode->gfid, 0);
+
+ wind:
+ STACK_WIND (frame, changelog_truncate_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->truncate,
+ loc, offset, xdata);
+ return 0;
+}
+
+int32_t
+changelog_ftruncate_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_DATA);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (ftruncate, frame,
+ op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
+}
+
+int32_t
+changelog_ftruncate (call_frame_t *frame,
+ xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ CHANGELOG_INIT (this, frame->local,
+ fd->inode, fd->inode->gfid, 0);
+
+ wind:
+ STACK_WIND (frame, changelog_ftruncate_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->ftruncate,
+ fd, offset, xdata);
+ return 0;
+}
+
+/* writev() */
+
+int32_t
+changelog_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret <= 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_DATA);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (writev, frame,
+ op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
+}
+
+int32_t
+changelog_writev (call_frame_t *frame,
+ xlator_t *this, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t offset, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ CHANGELOG_INIT (this, frame->local,
+ fd->inode, fd->inode->gfid, 0);
+
+ wind:
+ STACK_WIND (frame, changelog_writev_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->writev, fd, vector,
+ count, offset, flags, iobref, xdata);
+ return 0;
+}
+
+/* }}} */
+
+/**
+ * The
+ * - @init ()
+ * - @fini ()
+ * - @reconfigure ()
+ * ... and helper routines
+ */
+
+/**
+ * needed if there are more operation modes in the future.
+ */
+static void
+changelog_assign_opmode (changelog_priv_t *priv, char *mode)
+{
+ if ( strncmp (mode, "realtime", 8) == 0 ) {
+ priv->op_mode = CHANGELOG_MODE_RT;
+ }
+}
+
+static void
+changelog_assign_encoding (changelog_priv_t *priv, char *enc)
+{
+ if ( strncmp (enc, "binary", 6) == 0 ) {
+ priv->encode_mode = CHANGELOG_ENCODE_BINARY;
+ } else if ( strncmp (enc, "ascii", 5) == 0 ) {
+ priv->encode_mode = CHANGELOG_ENCODE_ASCII;
+ }
+}
+
+/* cleanup any helper threads that are running */
+static void
+changelog_cleanup_helper_threads (xlator_t *this, changelog_priv_t *priv)
+{
+ if (priv->cr.rollover_th) {
+ changelog_thread_cleanup (this, priv->cr.rollover_th);
+ priv->cr.rollover_th = 0;
+ }
+
+ if (priv->cf.fsync_th) {
+ changelog_thread_cleanup (this, priv->cf.fsync_th);
+ priv->cf.fsync_th = 0;
+ }
+}
+
+/* spawn helper thread; cleaning up in case of errors */
+static int
+changelog_spawn_helper_threads (xlator_t *this, changelog_priv_t *priv)
+{
+ int ret = 0;
+
+ priv->cr.this = this;
+ ret = gf_thread_create (&priv->cr.rollover_th,
+ NULL, changelog_rollover, priv);
+ if (ret)
+ goto out;
+
+ if (priv->fsync_interval) {
+ priv->cf.this = this;
+ ret = gf_thread_create (&priv->cf.fsync_th,
+ NULL, changelog_fsync_thread, priv);
+ }
+
+ if (ret)
+ changelog_cleanup_helper_threads (this, priv);
+
+ out:
+ return ret;
+}
+
+/* cleanup the notifier thread */
+static int
+changelog_cleanup_notifier (xlator_t *this, changelog_priv_t *priv)
+{
+ int ret = 0;
+
+ if (priv->cn.notify_th) {
+ changelog_thread_cleanup (this, priv->cn.notify_th);
+ priv->cn.notify_th = 0;
+
+ ret = close (priv->wfd);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "error closing writer end of notifier pipe"
+ " (reason: %s)", strerror (errno));
+ }
+
+ return ret;
+}
+
+/* spawn the notifier thread - nop if already running */
+static int
+changelog_spawn_notifier (xlator_t *this, changelog_priv_t *priv)
+{
+ int ret = 0;
+ int flags = 0;
+ int pipe_fd[2] = {0, 0};
+
+ if (priv->cn.notify_th)
+ goto out; /* notifier thread already running */
+
+ ret = pipe (pipe_fd);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Cannot create pipe (reason: %s)", strerror (errno));
+ goto out;
+ }
+
+ /* writer is non-blocking */
+ flags = fcntl (pipe_fd[1], F_GETFL);
+ flags |= O_NONBLOCK;
+
+ ret = fcntl (pipe_fd[1], F_SETFL, flags);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to set O_NONBLOCK flag");
+ goto out;
+ }
+
+ priv->wfd = pipe_fd[1];
+
+ priv->cn.this = this;
+ priv->cn.rfd = pipe_fd[0];
+
+ ret = gf_thread_create (&priv->cn.notify_th,
+ NULL, changelog_notifier, priv);
+
+ out:
+ return ret;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_changelog_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_WARNING, "Memory accounting"
+ " init failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+static int
+changelog_init (xlator_t *this, changelog_priv_t *priv)
+{
+ int i = 0;
+ int ret = -1;
+ struct timeval tv = {0,};
+ changelog_log_data_t cld = {0,};
+
+ ret = gettimeofday (&tv, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "gettimeofday() failure");
+ goto out;
+ }
+
+ priv->slice.tv_start = tv;
+
+ priv->maps[CHANGELOG_TYPE_DATA] = "D ";
+ priv->maps[CHANGELOG_TYPE_METADATA] = "M ";
+ priv->maps[CHANGELOG_TYPE_ENTRY] = "E ";
+
+ for (; i < CHANGELOG_MAX_TYPE; i++) {
+ /* start with version 1 */
+ priv->slice.changelog_version[i] = 1;
+ }
+
+ if (!priv->active)
+ return ret;
+
+ /* spawn the notifier thread */
+ ret = changelog_spawn_notifier (this, priv);
+ if (ret)
+ goto out;
+
+ /**
+ * start with a fresh changelog file every time. this is done
+ * in case there was an encoding change. so... things are kept
+ * simple here.
+ */
+ ret = changelog_fill_rollover_data (&cld, _gf_false);
+ if (ret)
+ goto out;
+
+ LOCK (&priv->lock);
+ {
+ ret = changelog_inject_single_event (this, priv, &cld);
+ }
+ UNLOCK (&priv->lock);
+
+ /* ... and finally spawn the helpers threads */
+ ret = changelog_spawn_helper_threads (this, priv);
+
+ out:
+ return ret;
+}
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ int ret = 0;
+ char *tmp = NULL;
+ changelog_priv_t *priv = NULL;
+ gf_boolean_t active_earlier = _gf_true;
+ gf_boolean_t active_now = _gf_true;
+ changelog_time_slice_t *slice = NULL;
+ changelog_log_data_t cld = {0,};
+
+ priv = this->private;
+ if (!priv)
+ goto out;
+
+ ret = -1;
+ active_earlier = priv->active;
+
+ /* first stop the rollover and the fsync thread */
+ changelog_cleanup_helper_threads (this, priv);
+
+ GF_OPTION_RECONF ("changelog-dir", tmp, options, str, out);
+ if (!tmp) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "\"changelog-dir\" option is not set");
+ goto out;
+ }
+
+ GF_FREE (priv->changelog_dir);
+ priv->changelog_dir = gf_strdup (tmp);
+ if (!priv->changelog_dir)
+ goto out;
+
+ ret = mkdir_p (priv->changelog_dir, 0600, _gf_true);
+ if (ret)
+ goto out;
+
+ GF_OPTION_RECONF ("changelog", active_now, options, bool, out);
+
+ /**
+ * changelog_handle_change() handles changes that could possibly
+ * have been submit changes before changelog deactivation.
+ */
+ if (!active_now)
+ priv->active = _gf_false;
+
+ GF_OPTION_RECONF ("op-mode", tmp, options, str, out);
+ changelog_assign_opmode (priv, tmp);
+
+ tmp = NULL;
+
+ GF_OPTION_RECONF ("encoding", tmp, options, str, out);
+ changelog_assign_encoding (priv, tmp);
+
+ GF_OPTION_RECONF ("rollover-time",
+ priv->rollover_time, options, int32, out);
+ GF_OPTION_RECONF ("fsync-interval",
+ priv->fsync_interval, options, int32, out);
+
+ if (active_now || active_earlier) {
+ ret = changelog_fill_rollover_data (&cld, !active_now);
+ if (ret)
+ goto out;
+
+ slice = &priv->slice;
+
+ LOCK (&priv->lock);
+ {
+ ret = changelog_inject_single_event (this, priv, &cld);
+ if (!ret && active_now)
+ SLICE_VERSION_UPDATE (slice);
+ }
+ UNLOCK (&priv->lock);
+
+ if (ret)
+ goto out;
+
+ if (active_now) {
+ ret = changelog_spawn_notifier (this, priv);
+ if (!ret)
+ ret = changelog_spawn_helper_threads (this,
+ priv);
+ } else
+ ret = changelog_cleanup_notifier (this, priv);
+ }
+
+ out:
+ if (ret) {
+ ret = changelog_cleanup_notifier (this, priv);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "changelog reconfigured");
+ if (active_now)
+ priv->active = _gf_true;
+ }
+
+ return ret;
+}
+
+int32_t
+init (xlator_t *this)
+{
+ int ret = -1;
+ char *tmp = NULL;
+ changelog_priv_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO ("changelog", this, out);
+
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "translator needs a single subvolume");
+ goto out;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "dangling volume. please check volfile");
+ goto out;
+ }
+
+ priv = GF_CALLOC (1, sizeof (*priv), gf_changelog_mt_priv_t);
+ if (!priv)
+ goto out;
+
+ this->local_pool = mem_pool_new (changelog_local_t, 64);
+ if (!this->local_pool) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local memory pool");
+ goto out;
+ }
+
+ LOCK_INIT (&priv->lock);
+
+ GF_OPTION_INIT ("changelog-brick", tmp, str, out);
+ if (!tmp) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "\"changelog-brick\" option is not set");
+ goto out;
+ }
+
+ priv->changelog_brick = gf_strdup (tmp);
+ if (!priv->changelog_brick)
+ goto out;
+ tmp = NULL;
+
+ GF_OPTION_INIT ("changelog-dir", tmp, str, out);
+ if (!tmp) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "\"changelog-dir\" option is not set");
+ goto out;
+ }
+
+ priv->changelog_dir = gf_strdup (tmp);
+ if (!priv->changelog_dir)
+ goto out;
+ tmp = NULL;
+
+ /**
+ * create the directory even if change-logging would be inactive
+ * so that consumers can _look_ into it (finding nothing...)
+ */
+ ret = mkdir_p (priv->changelog_dir, 0600, _gf_true);
+ if (ret)
+ goto out;
+
+ GF_OPTION_INIT ("changelog", priv->active, bool, out);
+
+ GF_OPTION_INIT ("op-mode", tmp, str, out);
+ changelog_assign_opmode (priv, tmp);
+
+ tmp = NULL;
+
+ GF_OPTION_INIT ("encoding", tmp, str, out);
+ changelog_assign_encoding (priv, tmp);
+
+ GF_OPTION_INIT ("rollover-time", priv->rollover_time, int32, out);
+
+ GF_OPTION_INIT ("fsync-interval", priv->fsync_interval, int32, out);
+
+ changelog_encode_change(priv);
+
+ GF_ASSERT (cb_bootstrap[priv->op_mode].mode == priv->op_mode);
+ priv->cb = &cb_bootstrap[priv->op_mode];
+
+ /* ... now bootstrap the logger */
+ ret = priv->cb->ctor (this, &priv->cd);
+ if (ret)
+ goto out;
+
+ priv->changelog_fd = -1;
+ ret = changelog_init (this, priv);
+ if (ret)
+ goto out;
+
+ gf_log (this->name, GF_LOG_DEBUG, "changelog translator loaded");
+
+ out:
+ if (ret) {
+ if (this->local_pool)
+ mem_pool_destroy (this->local_pool);
+ if (priv->cb) {
+ ret = priv->cb->dtor (this, &priv->cd);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "error in cleanup during init()");
+ }
+ GF_FREE (priv->changelog_brick);
+ GF_FREE (priv->changelog_dir);
+ GF_FREE (priv);
+ this->private = NULL;
+ } else
+ this->private = priv;
+
+ return ret;
+}
+
+void
+fini (xlator_t *this)
+{
+ int ret = -1;
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv) {
+ ret = priv->cb->dtor (this, &priv->cd);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "error in fini");
+ mem_pool_destroy (this->local_pool);
+ GF_FREE (priv->changelog_brick);
+ GF_FREE (priv->changelog_dir);
+ GF_FREE (priv);
+ }
+
+ this->private = NULL;
+
+ return;
+}
+
+struct xlator_fops fops = {
+ .mknod = changelog_mknod,
+ .mkdir = changelog_mkdir,
+ .create = changelog_create,
+ .symlink = changelog_symlink,
+ .writev = changelog_writev,
+ .truncate = changelog_truncate,
+ .ftruncate = changelog_ftruncate,
+ .link = changelog_link,
+ .rename = changelog_rename,
+ .unlink = changelog_unlink,
+ .rmdir = changelog_rmdir,
+ .setattr = changelog_setattr,
+ .fsetattr = changelog_fsetattr,
+ .setxattr = changelog_setxattr,
+ .fsetxattr = changelog_fsetxattr,
+ .removexattr = changelog_removexattr,
+ .fremovexattr = changelog_fremovexattr,
+};
+
+struct xlator_cbks cbks = {
+ .forget = changelog_forget,
+};
+
+struct volume_options options[] = {
+ {.key = {"changelog"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "enable/disable change-logging"
+ },
+ {.key = {"changelog-brick"},
+ .type = GF_OPTION_TYPE_PATH,
+ .description = "brick path to generate unique socket file name."
+ " should be the export directory of the volume strictly."
+ },
+ {.key = {"changelog-dir"},
+ .type = GF_OPTION_TYPE_PATH,
+ .description = "directory for the changelog files"
+ },
+ {.key = {"op-mode"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "realtime",
+ .value = {"realtime"},
+ .description = "operation mode - futuristic operation modes"
+ },
+ {.key = {"encoding"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "ascii",
+ .value = {"binary", "ascii"},
+ .description = "encoding type for changelogs"
+ },
+ {.key = {"rollover-time"},
+ .default_value = "60",
+ .type = GF_OPTION_TYPE_TIME,
+ .description = "time to switch to a new changelog file (in seconds)"
+ },
+ {.key = {"fsync-interval"},
+ .type = GF_OPTION_TYPE_TIME,
+ .default_value = "0",
+ .description = "do not open CHANGELOG file with O_SYNC mode."
+ " instead perform fsync() at specified intervals"
+ },
+ {.key = {NULL}
+ },
+};
diff --git a/xlators/protocol/legacy/lib/Makefile.am b/xlators/features/compress/Makefile.am
index d471a3f92..a985f42a8 100644
--- a/xlators/protocol/legacy/lib/Makefile.am
+++ b/xlators/features/compress/Makefile.am
@@ -1,3 +1,3 @@
SUBDIRS = src
-CLEANFILES =
+CLEANFILES =
diff --git a/xlators/features/compress/src/Makefile.am b/xlators/features/compress/src/Makefile.am
new file mode 100644
index 000000000..0bf757c06
--- /dev/null
+++ b/xlators/features/compress/src/Makefile.am
@@ -0,0 +1,17 @@
+xlator_LTLIBRARIES = cdc.la
+
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+noinst_HEADERS = cdc.h cdc-mem-types.h
+
+cdc_la_LDFLAGS = -module -avoid-version $(LIBZ_LIBS)
+
+cdc_la_SOURCES = cdc.c cdc-helper.c
+cdc_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS) \
+-shared -nostartfiles $(LIBZ_CFLAGS)
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/compress/src/cdc-helper.c b/xlators/features/compress/src/cdc-helper.c
new file mode 100644
index 000000000..54432ff45
--- /dev/null
+++ b/xlators/features/compress/src/cdc-helper.c
@@ -0,0 +1,547 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "logging.h"
+
+#include "cdc.h"
+#include "cdc-mem-types.h"
+
+#ifdef HAVE_LIB_Z
+#include "zlib.h"
+#endif
+
+#ifdef HAVE_LIB_Z
+/* gzip header looks something like this
+ * (RFC 1950)
+ *
+ * +---+---+---+---+---+---+---+---+---+---+
+ * |ID1|ID2|CM |FLG| MTIME |XFL|OS |
+ * +---+---+---+---+---+---+---+---+---+---+
+ *
+ * Data is usually sent without this header i.e
+ * Data sent = <compressed-data> + trailer(8)
+ * The trailer contains the checksum.
+ *
+ * gzip_header is added only during debugging.
+ * Refer to the function cdc_dump_iovec_to_disk
+ */
+static const char gzip_header[10] =
+ {
+ '\037', '\213', Z_DEFLATED, 0,
+ 0, 0, 0, 0,
+ 0, GF_CDC_OS_ID
+ };
+
+static int32_t
+cdc_next_iovec (xlator_t *this, cdc_info_t *ci)
+{
+ int ret = -1;
+
+ ci->ncount++;
+ /* check for iovec overflow -- should not happen */
+ if (ci->ncount == MAX_IOVEC) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Zlib output buffer overflow"
+ " ->ncount (%d) | ->MAX_IOVEC (%d)",
+ ci->ncount, MAX_IOVEC);
+ goto out;
+ }
+
+ ret = 0;
+
+ out:
+ return ret;
+}
+
+static void
+cdc_put_long (unsigned char *string, unsigned long x)
+{
+ string[0] = (unsigned char) (x & 0xff);
+ string[1] = (unsigned char) ((x & 0xff00) >> 8);
+ string[2] = (unsigned char) ((x & 0xff0000) >> 16);
+ string[3] = (unsigned char) ((x & 0xff000000) >> 24);
+}
+
+static unsigned long
+cdc_get_long (unsigned char *buf)
+{
+ return ((unsigned long) buf[0])
+ | (((unsigned long) buf[1]) << 8)
+ | (((unsigned long) buf[2]) << 16)
+ | (((unsigned long) buf[3]) << 24);
+}
+
+static int32_t
+cdc_init_gzip_trailer (xlator_t *this, cdc_priv_t *priv, cdc_info_t *ci)
+{
+ int ret = -1;
+ char *buf = NULL;
+
+ ret = cdc_next_iovec (this, ci);
+ if (ret)
+ goto out;
+
+ buf = CURR_VEC(ci).iov_base =
+ (char *) GF_CALLOC (1, GF_CDC_VALIDATION_SIZE,
+ gf_cdc_mt_gzip_trailer_t);
+
+ if (!CURR_VEC(ci).iov_base)
+ goto out;
+
+ CURR_VEC(ci).iov_len = GF_CDC_VALIDATION_SIZE;
+
+ cdc_put_long ((unsigned char *)&buf[0], ci->crc);
+ cdc_put_long ((unsigned char *)&buf[4], ci->stream.total_in);
+
+ ret = 0;
+
+ out:
+ return ret;
+}
+
+static int32_t
+cdc_alloc_iobuf_and_init_vec (xlator_t *this,
+ cdc_priv_t *priv, cdc_info_t *ci,
+ int size)
+{
+ int ret = -1;
+ int alloc_len = 0;
+ struct iobuf *iobuf = NULL;
+
+ ret = cdc_next_iovec (this, ci);
+ if (ret)
+ goto out;
+
+ alloc_len = size ? size : ci->buffer_size;
+
+ iobuf = iobuf_get2 (this->ctx->iobuf_pool, alloc_len);
+ if (!iobuf)
+ goto out;
+
+ ret = iobref_add (ci->iobref, iobuf);
+ if (ret)
+ goto out;
+
+ /* Initialize this iovec */
+ CURR_VEC(ci).iov_base = iobuf->ptr;
+ CURR_VEC(ci).iov_len = alloc_len;
+
+ ret = 0;
+
+ out:
+ return ret;
+}
+
+static void
+cdc_init_zlib_output_stream (cdc_priv_t *priv, cdc_info_t *ci, int size)
+{
+ ci->stream.next_out = (unsigned char *) CURR_VEC(ci).iov_base;
+ ci->stream.avail_out = size ? size : ci->buffer_size;
+}
+
+/* This routine is for testing and debugging only.
+ * Data written = header(10) + <compressed-data> + trailer(8)
+ * So each gzip dump file is at least 18 bytes in size.
+ */
+void
+cdc_dump_iovec_to_disk (xlator_t *this, cdc_info_t *ci, const char *file)
+{
+ int i = 0;
+ int fd = 0;
+ size_t writen = 0;
+ size_t total_writen = 0;
+
+ fd = open (file, O_WRONLY|O_CREAT|O_TRUNC, 0777 );
+ if (fd < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Cannot open file: %s", file);
+ return;
+ }
+
+ writen = write (fd, (char *) gzip_header, 10);
+ total_writen += writen;
+ for (i = 0; i < ci->ncount; i++) {
+ writen = write (fd, (char *) ci->vec[i].iov_base, ci->vec[i].iov_len);
+ total_writen += writen;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "dump'd %zu bytes to %s", total_writen, GF_CDC_DEBUG_DUMP_FILE );
+
+ close (fd);
+}
+
+static int32_t
+cdc_flush_libz_buffer (cdc_priv_t *priv, xlator_t *this, cdc_info_t *ci,
+ int (*libz_func)(z_streamp, int),
+ int flush)
+{
+ int32_t ret = Z_OK;
+ int done = 0;
+ unsigned int deflate_len = 0;
+
+ for (;;) {
+ deflate_len = ci->buffer_size - ci->stream.avail_out;
+
+ if (deflate_len != 0) {
+ CURR_VEC(ci).iov_len = deflate_len;
+
+ ret = cdc_alloc_iobuf_and_init_vec (this, priv, ci, 0);
+ if (ret) {
+ ret = Z_MEM_ERROR;
+ break;
+ }
+
+ /* Re-position Zlib output buffer */
+ cdc_init_zlib_output_stream (priv, ci, 0);
+ }
+
+ if (done) {
+ ci->ncount--;
+ break;
+ }
+
+ ret = libz_func (&ci->stream, flush);
+
+ if (ret == Z_BUF_ERROR) {
+ ret = Z_OK;
+ ci->ncount--;
+ break;
+ }
+
+ done = (ci->stream.avail_out != 0 || ret == Z_STREAM_END);
+
+ if (ret != Z_OK && ret != Z_STREAM_END)
+ break;
+ }
+
+ return ret;
+}
+
+static int32_t
+do_cdc_compress (struct iovec *vec, xlator_t *this, cdc_priv_t *priv,
+ cdc_info_t *ci)
+{
+ int ret = -1;
+
+ /* Initialize defalte */
+ ret = deflateInit2 (&ci->stream, priv->cdc_level, Z_DEFLATED,
+ priv->window_size, priv->mem_level,
+ Z_DEFAULT_STRATEGY);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "unable to init Zlib (retval: %d)", ret);
+ goto out;
+ }
+
+ ret = cdc_alloc_iobuf_and_init_vec (this, priv, ci, 0);
+ if (ret)
+ goto out;
+
+ /* setup output buffer */
+ cdc_init_zlib_output_stream (priv, ci, 0);
+
+ /* setup input buffer */
+ ci->stream.next_in = (unsigned char *) vec->iov_base;
+ ci->stream.avail_in = vec->iov_len;
+
+ ci->crc = crc32 (ci->crc, (const Bytef *) vec->iov_base, vec->iov_len);
+
+ gf_log (this->name, GF_LOG_DEBUG, "crc=%lu len=%d buffer_size=%d",
+ ci->crc, ci->stream.avail_in, ci->buffer_size);
+
+ /* compress !! */
+ while (ci->stream.avail_in != 0) {
+ if (ci->stream.avail_out == 0) {
+
+ CURR_VEC(ci).iov_len = ci->buffer_size;
+
+ ret = cdc_alloc_iobuf_and_init_vec (this, priv, ci, 0);
+ if (ret)
+ break;
+
+ /* Re-position Zlib output buffer */
+ cdc_init_zlib_output_stream (priv, ci, 0);
+ }
+
+ ret = deflate (&ci->stream, Z_NO_FLUSH);
+ if (ret != Z_OK)
+ break;
+ }
+
+ out:
+ return ret;
+}
+
+int32_t
+cdc_compress (xlator_t *this, cdc_priv_t *priv, cdc_info_t *ci,
+ dict_t **xdata)
+{
+ int ret = -1;
+ int i = 0;
+
+ ci->iobref = iobref_new ();
+ if (!ci->iobref)
+ goto out;
+
+ if (!*xdata) {
+ *xdata = dict_new ();
+ if (!*xdata) {
+ gf_log (this->name, GF_LOG_ERROR, "Cannot allocate xdata"
+ " dict");
+ goto out;
+ }
+ }
+
+ /* data */
+ for (i = 0; i < ci->count; i++) {
+ ret = do_cdc_compress (&ci->vector[i], this, priv, ci);
+ if (ret != Z_OK)
+ goto deflate_cleanup_out;
+ }
+
+ /* flush zlib buffer */
+ ret = cdc_flush_libz_buffer (priv, this, ci, deflate, Z_FINISH);
+ if (!(ret == Z_OK || ret == Z_STREAM_END)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Compression Error: ret (%d)", ret);
+ ret = -1;
+ goto deflate_cleanup_out;
+ }
+
+ /* trailer */
+ ret = cdc_init_gzip_trailer (this, priv, ci);
+ if (ret)
+ goto deflate_cleanup_out;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Compressed %ld to %ld bytes",
+ ci->stream.total_in, ci->stream.total_out);
+
+ ci->nbytes = ci->stream.total_out + GF_CDC_VALIDATION_SIZE;
+
+ /* set deflated canary value for identification */
+ ret = dict_set_int32 (*xdata, GF_CDC_DEFLATE_CANARY_VAL, 1);
+ if (ret) {
+ /* Send uncompressed data if we can't _tell_ the client
+ * that deflated data is on it's way. So, we just log
+ * the faliure and continue as usual.
+ */
+ gf_log (this->name, GF_LOG_ERROR,
+ "Data deflated, but could not set canary"
+ " value in dict for identification");
+ }
+
+ /* This is to be used in testing */
+ if ( priv->debug ) {
+ cdc_dump_iovec_to_disk (this, ci, GF_CDC_DEBUG_DUMP_FILE );
+ }
+
+ deflate_cleanup_out:
+ (void) deflateEnd(&ci->stream);
+
+ out:
+ return ret;
+}
+
+
+/* deflate content is checked by the presence of a canary
+ * value in the dict as the key
+ */
+static int32_t
+cdc_check_content_for_deflate (dict_t *xdata)
+{
+ return dict_get (xdata, GF_CDC_DEFLATE_CANARY_VAL) ? -1 : 0;
+}
+
+static unsigned long
+cdc_extract_crc (char *trailer)
+{
+ return cdc_get_long ((unsigned char *) &trailer[0]);
+}
+
+static unsigned long
+cdc_extract_size (char *trailer)
+{
+ return cdc_get_long ((unsigned char *) &trailer[4]);
+}
+
+static int32_t
+cdc_validate_inflate (cdc_info_t *ci, unsigned long crc,
+ unsigned long len)
+{
+ return !((crc == ci->crc)
+ /* inflated length is hidden inside
+ * Zlib stream struct */
+ && (len == ci->stream.total_out));
+}
+
+static int32_t
+do_cdc_decompress (xlator_t *this, cdc_priv_t *priv, cdc_info_t *ci)
+{
+ int ret = -1;
+ int i = 0;
+ int len = 0;
+ char *inflte = NULL;
+ char *trailer = NULL;
+ struct iovec vec = {0,};
+ unsigned long computed_crc = 0;
+ unsigned long computed_len = 0;
+
+ ret = inflateInit2 (&ci->stream, priv->window_size);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Zlib: Unable to initialize inflate");
+ goto out;
+ }
+
+ vec = THIS_VEC(ci, 0);
+
+ trailer = (char *) (((char *) vec.iov_base) + vec.iov_len
+ - GF_CDC_VALIDATION_SIZE);
+
+ /* CRC of uncompressed data */
+ computed_crc = cdc_extract_crc (trailer);
+
+ /* size of uncomrpessed data */
+ computed_len = cdc_extract_size (trailer);
+
+ gf_log (this->name, GF_LOG_DEBUG, "crc=%lu len=%lu buffer_size=%d",
+ computed_crc, computed_len, ci->buffer_size);
+
+ inflte = vec.iov_base ;
+ len = vec.iov_len - GF_CDC_VALIDATION_SIZE;
+
+ /* allocate buffer of the original length of the data */
+ ret = cdc_alloc_iobuf_and_init_vec (this, priv, ci, 0);
+ if (ret)
+ goto out;
+
+ /* setup output buffer */
+ cdc_init_zlib_output_stream (priv, ci, 0);
+
+ /* setup input buffer */
+ ci->stream.next_in = (unsigned char *) inflte;
+ ci->stream.avail_in = len;
+
+ while (ci->stream.avail_in != 0) {
+ if (ci->stream.avail_out == 0) {
+ CURR_VEC(ci).iov_len = ci->buffer_size;
+
+ ret = cdc_alloc_iobuf_and_init_vec (this, priv, ci, 0);
+ if (ret)
+ break;
+
+ /* Re-position Zlib output buffer */
+ cdc_init_zlib_output_stream (priv, ci, 0);
+ }
+
+ ret = inflate (&ci->stream, Z_NO_FLUSH);
+ if (ret == Z_STREAM_ERROR)
+ break;
+ }
+
+ /* flush zlib buffer */
+ ret = cdc_flush_libz_buffer (priv, this, ci, inflate, Z_SYNC_FLUSH);
+ if (!(ret == Z_OK || ret == Z_STREAM_END)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Decompression Error: ret (%d)", ret);
+ ret = -1;
+ goto out;
+ }
+
+ /* compute CRC of the uncompresses data to check for
+ * correctness */
+
+ for (i = 0; i < ci->ncount; i++) {
+ ci->crc = crc32 (ci->crc,
+ (const Bytef *) ci->vec[i].iov_base,
+ ci->vec[i].iov_len);
+ }
+
+ /* validate inflated data */
+ ret = cdc_validate_inflate (ci, computed_crc, computed_len);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Checksum or length mismatched in inflated data");
+ }
+
+ out:
+ return ret;
+}
+
+int32_t
+cdc_decompress (xlator_t *this, cdc_priv_t *priv, cdc_info_t *ci,
+ dict_t *xdata)
+{
+ int32_t ret = -1;
+
+ /* check for deflate content */
+ if (!cdc_check_content_for_deflate (xdata)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Content not deflated, passing through ...");
+ goto passthrough_out;
+ }
+
+ ci->iobref = iobref_new ();
+ if (!ci->iobref)
+ goto passthrough_out;
+
+ /* do we need to do this? can we assume that one iovec
+ * will hold per request data everytime?
+ *
+ * server/client protocol seems to deal with a single
+ * iovec even if op_ret > 1M. So, it looks ok to
+ * assume that a single iovec will contain all the
+ * data (This saves us a lot from finding the trailer
+ * and the data since it could have been split-up onto
+ * two adjacent iovec's.
+ *
+ * But, in case this translator is loaded above quick-read
+ * for some reason, then it's entirely possible that we get
+ * multiple iovec's...
+ *
+ * This case (handled below) is not tested. (by loading the
+ * xlator below quick-read)
+ */
+
+ /* @@ I_HOPE_THIS_IS_NEVER_HIT */
+ if (ci->count > 1) {
+ gf_log (this->name, GF_LOG_WARNING, "unable to handle"
+ " multiple iovecs (%d in number)", ci->count);
+ goto inflate_cleanup_out;
+ /* TODO: coallate all iovecs in one */
+ }
+
+ ret = do_cdc_decompress (this, priv, ci);
+ if (ret)
+ goto inflate_cleanup_out;
+
+ ci->nbytes = ci->stream.total_out;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Inflated %ld to %ld bytes",
+ ci->stream.total_in, ci->stream.total_out);
+
+ inflate_cleanup_out:
+ (void) inflateEnd (&ci->stream);
+
+ passthrough_out:
+ return ret;
+}
+
+#endif
diff --git a/xlators/features/compress/src/cdc-mem-types.h b/xlators/features/compress/src/cdc-mem-types.h
new file mode 100644
index 000000000..ead2c70ba
--- /dev/null
+++ b/xlators/features/compress/src/cdc-mem-types.h
@@ -0,0 +1,23 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __CDC_MEM_TYPES_H
+#define __CDC_MEM_TYPES_H
+
+#include "mem-types.h"
+
+enum gf_cdc_mem_types {
+ gf_cdc_mt_priv_t = gf_common_mt_end + 1,
+ gf_cdc_mt_vec_t = gf_common_mt_end + 2,
+ gf_cdc_mt_gzip_trailer_t = gf_common_mt_end + 3,
+ gf_cdc_mt_end = gf_common_mt_end + 4,
+};
+
+#endif
diff --git a/xlators/features/compress/src/cdc.c b/xlators/features/compress/src/cdc.c
new file mode 100644
index 000000000..67fc52505
--- /dev/null
+++ b/xlators/features/compress/src/cdc.c
@@ -0,0 +1,361 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <sys/uio.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "defaults.h"
+#include "logging.h"
+
+#include "cdc.h"
+#include "cdc-mem-types.h"
+
+static void
+cdc_cleanup_iobref (cdc_info_t *ci)
+{
+ assert(ci->iobref != NULL);
+ iobref_clear (ci->iobref);
+}
+
+int32_t
+cdc_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iovec *vector, int32_t count,
+ struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
+{
+ int ret = -1;
+ cdc_priv_t *priv = NULL;
+ cdc_info_t ci = {0,};
+
+ GF_VALIDATE_OR_GOTO ("cdc", this, default_out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, default_out);
+
+ priv = this->private;
+
+ if (op_ret <= 0)
+ goto default_out;
+
+ if ( (priv->min_file_size != 0)
+ && (op_ret < priv->min_file_size) )
+ goto default_out;
+
+ ci.count = count;
+ ci.ibytes = op_ret;
+ ci.vector = vector;
+ ci.buf = NULL;
+ ci.iobref = NULL;
+ ci.ncount = 0;
+ ci.crc = 0;
+ ci.buffer_size = GF_CDC_DEF_BUFFERSIZE;
+
+/* A readv compresses on the server side and decompresses on the client side
+ */
+ if (priv->op_mode == GF_CDC_MODE_SERVER) {
+ ret = cdc_compress (this, priv, &ci, &xdata);
+ } else if (priv->op_mode == GF_CDC_MODE_CLIENT) {
+ ret = cdc_decompress (this, priv, &ci, xdata);
+ } else {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Invalid operation mode (%d)", priv->op_mode);
+ }
+
+ if (ret)
+ goto default_out;
+
+ STACK_UNWIND_STRICT (readv, frame, ci.nbytes, op_errno,
+ ci.vec, ci.ncount, stbuf, iobref,
+ xdata);
+ cdc_cleanup_iobref (&ci);
+ return 0;
+
+ default_out:
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno,
+ vector, count, stbuf, iobref, xdata);
+ return 0;
+}
+
+int32_t
+cdc_readv (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t offset, uint32_t flags,
+ dict_t *xdata)
+{
+ fop_readv_cbk_t cbk = NULL;
+
+#ifdef HAVE_LIB_Z
+ cbk = cdc_readv_cbk;
+#else
+ cbk = default_readv_cbk;
+#endif
+ STACK_WIND (frame, cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv,
+ fd, size, offset, flags, xdata);
+ return 0;
+}
+
+int32_t
+cdc_writev_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
+}
+
+int32_t
+cdc_writev (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iovec *vector,
+ int32_t count,
+ off_t offset,
+ uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ int ret = -1;
+ cdc_priv_t *priv = NULL;
+ cdc_info_t ci = {0,};
+ size_t isize = 0;
+
+ GF_VALIDATE_OR_GOTO ("cdc", this, default_out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, default_out);
+
+ priv = this->private;
+
+ isize = iov_length(vector, count);
+
+ if (isize <= 0)
+ goto default_out;
+
+ if ( (priv->min_file_size != 0) && (isize < priv->min_file_size) )
+ goto default_out;
+
+ ci.count = count;
+ ci.ibytes = isize;
+ ci.vector = vector;
+ ci.buf = NULL;
+ ci.iobref = NULL;
+ ci.ncount = 0;
+ ci.crc = 0;
+ ci.buffer_size = GF_CDC_DEF_BUFFERSIZE;
+
+/* A writev compresses on the client side and decompresses on the server side
+ */
+ if (priv->op_mode == GF_CDC_MODE_CLIENT) {
+ ret = cdc_compress (this, priv, &ci, &xdata);
+ } else if (priv->op_mode == GF_CDC_MODE_SERVER) {
+ ret = cdc_decompress (this, priv, &ci, xdata);
+ } else {
+ gf_log (this->name, GF_LOG_ERROR, "Invalid operation mode (%d) ", priv->op_mode);
+ }
+
+ if (ret)
+ goto default_out;
+
+ STACK_WIND (frame,
+ cdc_writev_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->writev,
+ fd, ci.vec, ci.ncount, offset, flags,
+ iobref, xdata);
+
+ cdc_cleanup_iobref (&ci);
+ return 0;
+
+ default_out:
+ STACK_WIND (frame,
+ cdc_writev_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->writev,
+ fd, vector, count, offset, flags,
+ iobref, xdata);
+ return 0;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_cdc_mt_end);
+
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR, "Memory accounting init"
+ "failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+int32_t
+init (xlator_t *this)
+{
+ int ret = -1;
+ char *temp_str = NULL;
+ cdc_priv_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO ("cdc", this, err);
+
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Need subvolume == 1");
+ goto err;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Dangling volume. Check volfile");
+ }
+
+ priv = GF_CALLOC (1, sizeof (*priv), gf_cdc_mt_priv_t);
+ if (!priv) {
+ goto err;
+ }
+
+ /* Check if debug mode is turned on */
+ GF_OPTION_INIT ("debug", priv->debug, bool, err);
+ if( priv->debug ) {
+ gf_log (this->name, GF_LOG_DEBUG, "CDC debug option turned on");
+ }
+
+ /* Set Gzip Window Size */
+ GF_OPTION_INIT ("window-size", priv->window_size, int32, err);
+ if ( (priv->window_size > GF_CDC_MAX_WINDOWSIZE)
+ || (priv->window_size < GF_CDC_DEF_WINDOWSIZE) ) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Invalid gzip window size (%d), using default",
+ priv->window_size);
+ priv->window_size = GF_CDC_DEF_WINDOWSIZE;
+ }
+
+ /* Set Gzip (De)Compression Level */
+ GF_OPTION_INIT ("compression-level", priv->cdc_level, int32, err);
+ if ( ((priv->cdc_level < 1) || (priv->cdc_level > 9))
+ && (priv->cdc_level != GF_CDC_DEF_COMPRESSION) ) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Invalid gzip (de)compression level (%d),"
+ " using default", priv->cdc_level);
+ priv->cdc_level = GF_CDC_DEF_COMPRESSION;
+ }
+
+ /* Set Gzip Memory Level */
+ GF_OPTION_INIT ("mem-level", priv->mem_level, int32, err);
+ if ( (priv->mem_level < 1) || (priv->mem_level > 9) ) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Invalid gzip memory level, using the default");
+ priv->mem_level = GF_CDC_DEF_MEMLEVEL;
+ }
+
+ /* Set min file size to enable compression */
+ GF_OPTION_INIT ("min-size", priv->min_file_size, int32, err);
+
+ /* Mode of operation - Server/Client */
+ ret = dict_get_str (this->options, "mode", &temp_str);
+ if (ret) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Operation mode not specified !!");
+ goto err;
+ }
+
+ if (GF_CDC_MODE_IS_CLIENT (temp_str)) {
+ priv->op_mode = GF_CDC_MODE_CLIENT;
+ } else if (GF_CDC_MODE_IS_SERVER (temp_str)) {
+ priv->op_mode = GF_CDC_MODE_SERVER;
+ } else {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Bogus operation mode (%s) specified", temp_str);
+ goto err;
+ }
+
+ this->private = priv;
+ gf_log (this->name, GF_LOG_DEBUG, "CDC xlator loaded in (%s) mode",temp_str);
+ return 0;
+
+ err:
+ if (priv)
+ GF_FREE (priv);
+
+ return -1;
+}
+
+void
+fini (xlator_t *this)
+{
+ cdc_priv_t *priv = this->private;
+
+ if (priv)
+ GF_FREE (priv);
+ this->private = NULL;
+ return;
+}
+
+struct xlator_fops fops = {
+ .readv = cdc_readv,
+ .writev = cdc_writev,
+};
+
+struct xlator_cbks cbks = {
+};
+
+struct volume_options options[] = {
+ { .key = {"window-size"},
+ .default_value = "-15",
+ .type = GF_OPTION_TYPE_INT,
+ .description = "Size of the zlib history buffer."
+ },
+ { .key = {"mem-level"},
+ .default_value = "8",
+ .type = GF_OPTION_TYPE_INT,
+ .description = "Memory allocated for internal compression state. "
+ "1 uses minimum memory but is slow and reduces "
+ "compression ratio; memLevel=9 uses maximum memory "
+ "for optimal speed. The default value is 8."
+ },
+ { .key = {"compression-level"},
+ .default_value = "-1",
+ .type = GF_OPTION_TYPE_INT,
+ .description = "Compression levels \n"
+ "0 : no compression, 1 : best speed, \n"
+ "9 : best compression, -1 : default compression "
+ },
+ { .key = {"min-size"},
+ .default_value = "0",
+ .type = GF_OPTION_TYPE_INT,
+ .description = "Data is compressed only when its size exceeds this."
+ },
+ { .key = {"mode"},
+ .value = {"server", "client"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "Set on the basis of where the xlator is loaded. "
+ "This option should NOT be configured by user."
+ },
+ { .key = {"debug"},
+ .default_value = "false",
+ .type = GF_OPTION_TYPE_BOOL,
+ .description = "This is used in testing. Will dump compressed data "
+ "to disk as a gzip file."
+ },
+ { .key = {NULL}
+ },
+};
diff --git a/xlators/features/compress/src/cdc.h b/xlators/features/compress/src/cdc.h
new file mode 100644
index 000000000..71f4d2317
--- /dev/null
+++ b/xlators/features/compress/src/cdc.h
@@ -0,0 +1,107 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __CDC_H
+#define __CDC_H
+
+#ifdef HAVE_LIB_Z
+#include "zlib.h"
+#endif
+
+#include "xlator.h"
+
+#ifndef MAX_IOVEC
+#define MAX_IOVEC 16
+#endif
+
+typedef struct cdc_priv {
+ int window_size;
+ int mem_level;
+ int cdc_level;
+ int min_file_size;
+ int op_mode;
+ gf_boolean_t debug;
+ gf_lock_t lock;
+} cdc_priv_t;
+
+typedef struct cdc_info {
+ /* input bits */
+ int count;
+ int32_t ibytes;
+ struct iovec *vector;
+ struct iatt *buf;
+
+ /* output bits */
+ int ncount;
+ int nbytes;
+ int buffer_size;
+ struct iovec vec[MAX_IOVEC];
+ struct iobref *iobref;
+
+ /* zlib bits */
+#ifdef HAVE_LIB_Z
+ z_stream stream;
+#endif
+ unsigned long crc;
+} cdc_info_t;
+
+#define NVEC(ci) (ci->ncount - 1)
+#define CURR_VEC(ci) ci->vec[ci->ncount - 1]
+#define THIS_VEC(ci, i) ci->vector[i]
+
+/* Gzip defaults */
+#define GF_CDC_DEF_WINDOWSIZE -15 /* default value */
+#define GF_CDC_MAX_WINDOWSIZE -8 /* max value */
+
+#ifdef HAVE_LIB_Z
+#define GF_CDC_DEF_COMPRESSION Z_DEFAULT_COMPRESSION
+#else
+#define GF_CDC_DEF_COMPRESSION -1
+#endif
+
+#define GF_CDC_DEF_MEMLEVEL 8
+#define GF_CDC_DEF_BUFFERSIZE 262144 // 256K - default compression buffer size
+
+/* Operation mode
+ * If xlator is loaded on client, readv decompresses and writev compresses
+ * If xlator is loaded on server, readv compresses and writev decompresses
+ */
+#define GF_CDC_MODE_CLIENT 0
+#define GF_CDC_MODE_SERVER 1
+
+/* min size of data to do cmpression
+ * 0 == compress even 1byte
+ */
+#define GF_CDC_MIN_CHUNK_SIZE 0
+
+#define GF_CDC_VALIDATION_SIZE 8
+
+#define GF_CDC_OS_ID 0xFF
+#define GF_CDC_DEFLATE_CANARY_VAL "deflate"
+#define GF_CDC_DEBUG_DUMP_FILE "/tmp/cdcdump.gz"
+
+#define GF_CDC_MODE_IS_CLIENT(m) \
+ (strcmp (m, "client") == 0)
+
+#define GF_CDC_MODE_IS_SERVER(m) \
+ (strcmp (m, "server") == 0)
+
+int32_t
+cdc_compress (xlator_t *this,
+ cdc_priv_t *priv,
+ cdc_info_t *ci,
+ dict_t **xdata);
+int32_t
+cdc_decompress (xlator_t *this,
+ cdc_priv_t *priv,
+ cdc_info_t *ci,
+ dict_t *xdata);
+
+#endif
diff --git a/xlators/features/filter/src/Makefile.am b/xlators/features/filter/src/Makefile.am
index d473b9ea1..d1fda8b0a 100644
--- a/xlators/features/filter/src/Makefile.am
+++ b/xlators/features/filter/src/Makefile.am
@@ -1,15 +1,16 @@
xlator_LTLIBRARIES = filter.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/features
-filter_la_LDFLAGS = -module -avoidversion
+filter_la_LDFLAGS = -module -avoid-version
filter_la_SOURCES = filter.c
filter_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = filter-mem-types.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/features/filter/src/filter-mem-types.h b/xlators/features/filter/src/filter-mem-types.h
index de2cb9665..47a17249b 100644
--- a/xlators/features/filter/src/filter-mem-types.h
+++ b/xlators/features/filter/src/filter-mem-types.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __FILTER_MEM_TYPES_H__
#define __FILTER_MEM_TYPES_H__
diff --git a/xlators/features/filter/src/filter.c b/xlators/features/filter/src/filter.c
index eda042f35..1d4887b71 100644
--- a/xlators/features/filter/src/filter.c
+++ b/xlators/features/filter/src/filter.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
diff --git a/xlators/bindings/python/Makefile.am b/xlators/features/gfid-access/Makefile.am
index af437a64d..af437a64d 100644
--- a/xlators/bindings/python/Makefile.am
+++ b/xlators/features/gfid-access/Makefile.am
diff --git a/xlators/features/gfid-access/src/Makefile.am b/xlators/features/gfid-access/src/Makefile.am
new file mode 100644
index 000000000..db53affaa
--- /dev/null
+++ b/xlators/features/gfid-access/src/Makefile.am
@@ -0,0 +1,15 @@
+xlator_LTLIBRARIES = gfid-access.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+gfid_access_la_LDFLAGS = -module -avoid-version
+
+gfid_access_la_SOURCES = gfid-access.c
+gfid_access_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = gfid-access.h gfid-access-mem-types.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/gfid-access/src/gfid-access-mem-types.h b/xlators/features/gfid-access/src/gfid-access-mem-types.h
new file mode 100644
index 000000000..168d67b43
--- /dev/null
+++ b/xlators/features/gfid-access/src/gfid-access-mem-types.h
@@ -0,0 +1,23 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GFID_ACCESS_MEM_TYPES_H
+#define _GFID_ACCESS_MEM_TYPES_H
+
+#include "mem-types.h"
+
+enum gf_changelog_mem_types {
+ gf_gfid_access_mt_priv_t = gf_common_mt_end + 1,
+ gf_gfid_access_mt_gfid_t,
+ gf_gfid_access_mt_end
+};
+
+#endif
+
diff --git a/xlators/features/gfid-access/src/gfid-access.c b/xlators/features/gfid-access/src/gfid-access.c
new file mode 100644
index 000000000..8e614397c
--- /dev/null
+++ b/xlators/features/gfid-access/src/gfid-access.c
@@ -0,0 +1,1287 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "gfid-access.h"
+#include "inode.h"
+#include "byte-order.h"
+
+
+
+void
+ga_newfile_args_free (ga_newfile_args_t *args)
+{
+ if (!args)
+ goto out;
+
+ GF_FREE (args->bname);
+
+ if (S_ISLNK (args->st_mode) && args->args.symlink.linkpath) {
+ GF_FREE (args->args.symlink.linkpath);
+ args->args.symlink.linkpath = NULL;
+ }
+
+ mem_put (args);
+out:
+ return;
+}
+
+
+void
+ga_heal_args_free (ga_heal_args_t *args)
+{
+ if (!args)
+ goto out;
+
+ GF_FREE (args->bname);
+
+ mem_put (args);
+out:
+ return;
+}
+
+
+ga_newfile_args_t *
+ga_newfile_parse_args (xlator_t *this, data_t *data)
+{
+ ga_newfile_args_t *args = NULL;
+ ga_private_t *priv = NULL;
+ int len = 0;
+ int blob_len = 0;
+ int min_len = 0;
+ void *blob = NULL;
+
+ priv = this->private;
+
+ blob = data->data;
+ blob_len = data->len;
+
+ min_len = sizeof (args->uid) + sizeof (args->gid) + sizeof (args->gfid)
+ + sizeof (args->st_mode) + 2 + 2;
+ if (blob_len < min_len) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Invalid length: Total length is less "
+ "than minimum length.");
+ goto err;
+ }
+
+ args = mem_get0 (priv->newfile_args_pool);
+ if (args == NULL)
+ goto err;
+
+ args->uid = ntoh32 (*(uint32_t *)blob);
+ blob += sizeof (uint32_t);
+ blob_len -= sizeof (uint32_t);
+
+ args->gid = ntoh32 (*(uint32_t *)blob);
+ blob += sizeof (uint32_t);
+ blob_len -= sizeof (uint32_t);
+
+ memcpy (args->gfid, blob, sizeof (args->gfid));
+ blob += sizeof (args->gfid);
+ blob_len -= sizeof (args->gfid);
+
+ args->st_mode = ntoh32 (*(uint32_t *)blob);
+ blob += sizeof (uint32_t);
+ blob_len -= sizeof (uint32_t);
+
+ len = strnlen (blob, blob_len);
+ if (len == blob_len)
+ if (len == blob_len) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "gfid: %s. No null byte present.",
+ args->gfid);
+ goto err;
+ }
+
+ args->bname = GF_CALLOC (1, (len + 1), gf_common_mt_char);
+ if (args->bname == NULL)
+ goto err;
+
+ memcpy (args->bname, blob, (len + 1));
+ blob += (len + 1);
+ blob_len -= (len + 1);
+
+ if (S_ISDIR (args->st_mode)) {
+ if (blob_len < sizeof (uint32_t)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "gfid: %s. Invalid length",
+ args->gfid);
+ goto err;
+ }
+ args->args.mkdir.mode = ntoh32 (*(uint32_t *)blob);
+ blob += sizeof (uint32_t);
+ blob_len -= sizeof (uint32_t);
+
+ if (blob_len < sizeof (uint32_t)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "gfid: %s. Invalid length",
+ args->gfid);
+ goto err;
+ }
+ args->args.mkdir.umask = ntoh32 (*(uint32_t *)blob);
+ blob += sizeof (uint32_t);
+ blob_len -= sizeof (uint32_t);
+ if (blob_len < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "gfid: %s. Invalid length",
+ args->gfid);
+ goto err;
+ }
+ } else if (S_ISLNK (args->st_mode)) {
+ len = strnlen (blob, blob_len);
+ if (len == blob_len) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "gfid: %s. Invalid length",
+ args->gfid);
+ goto err;
+ }
+ args->args.symlink.linkpath = GF_CALLOC (1, len + 1,
+ gf_common_mt_char);
+ if (args->args.symlink.linkpath == NULL)
+ goto err;
+
+ memcpy (args->args.symlink.linkpath, blob, (len + 1));
+ blob += (len + 1);
+ blob_len -= (len + 1);
+ } else {
+ if (blob_len < sizeof (uint32_t)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "gfid: %s. Invalid length",
+ args->gfid);
+ goto err;
+ }
+ args->args.mknod.mode = ntoh32 (*(uint32_t *)blob);
+ blob += sizeof (uint32_t);
+ blob_len -= sizeof (uint32_t);
+
+ if (blob_len < sizeof (uint32_t)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "gfid: %s. Invalid length",
+ args->gfid);
+ goto err;
+ }
+ args->args.mknod.rdev = ntoh32 (*(uint32_t *)blob);
+ blob += sizeof (uint32_t);
+ blob_len -= sizeof (uint32_t);
+
+ if (blob_len < sizeof (uint32_t)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "gfid: %s. Invalid length",
+ args->gfid);
+ goto err;
+ }
+ args->args.mknod.umask = ntoh32 (*(uint32_t *)blob);
+ blob += sizeof (uint32_t);
+ blob_len -= sizeof (uint32_t);
+ }
+
+ if (blob_len) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "gfid: %s. Invalid length",
+ args->gfid);
+ goto err;
+ }
+
+ return args;
+
+err:
+ if (args)
+ ga_newfile_args_free (args);
+
+ return NULL;
+}
+
+ga_heal_args_t *
+ga_heal_parse_args (xlator_t *this, data_t *data)
+{
+ ga_heal_args_t *args = NULL;
+ ga_private_t *priv = NULL;
+ void *blob = NULL;
+ int len = 0;
+ int blob_len = 0;
+
+ blob = data->data;
+ blob_len = data->len;
+
+ priv = this->private;
+
+ /* bname should at least contain a character */
+ if (blob_len < (sizeof (args->gfid) + 2))
+ goto err;
+
+ args = mem_get0 (priv->heal_args_pool);
+ if (!args)
+ goto err;
+
+ memcpy (args->gfid, blob, sizeof (args->gfid));
+ blob += sizeof (args->gfid);
+ blob_len -= sizeof (args->gfid);
+
+ len = strnlen (blob, blob_len);
+ if (len == blob_len)
+ goto err;
+
+ args->bname = GF_CALLOC (1, len + 1, gf_common_mt_char);
+ if (!args->bname)
+ goto err;
+
+ memcpy (args->bname, blob, len);
+ blob_len -= (len + 1);
+
+ if (blob_len)
+ goto err;
+
+ return args;
+
+err:
+ if (args)
+ ga_heal_args_free (args);
+
+ return NULL;
+}
+
+static int32_t
+ga_fill_tmp_loc (loc_t *loc, xlator_t *this, uuid_t gfid,
+ char *bname, dict_t *xdata, loc_t *new_loc)
+{
+ int ret = -1;
+ uint64_t value = 0;
+ inode_t *parent = NULL;
+
+ parent = loc->inode;
+ ret = inode_ctx_get (loc->inode, this, &value);
+ if (!ret) {
+ parent = (void *)value;
+ if (uuid_is_null (parent->gfid))
+ parent = loc->inode;
+ }
+
+ /* parent itself should be looked up */
+ uuid_copy (new_loc->pargfid, parent->gfid);
+ new_loc->parent = inode_ref (parent);
+
+ new_loc->inode = inode_grep (parent->table, parent, bname);
+ if (!new_loc->inode)
+ new_loc->inode = inode_new (parent->table);
+
+ loc_path (new_loc, bname);
+ new_loc->name = basename (new_loc->path);
+
+ /* As GFID would not be set on the entry yet, lets not send entry
+ gfid in the request */
+ /*uuid_copy (new_loc->gfid, (const unsigned char *)gfid); */
+
+ ret = dict_set_static_bin (xdata, "gfid-req", gfid, 16);
+ if (ret < 0)
+ goto out;
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+
+
+static gf_boolean_t
+__is_gfid_access_dir (uuid_t gfid)
+{
+ uuid_t aux_gfid;
+
+ memset (aux_gfid, 0, 16);
+ aux_gfid[15] = GF_AUX_GFID;
+
+ if (uuid_compare (gfid, aux_gfid) == 0)
+ return _gf_true;
+
+ return _gf_false;
+}
+
+int32_t
+ga_forget (xlator_t *this, inode_t *inode)
+{
+ int ret = -1;
+ uint64_t value = 0;
+ inode_t *tmp_inode = NULL;
+
+ ret = inode_ctx_del (inode, this, &value);
+ if (ret)
+ goto out;
+
+ tmp_inode = (void *)value;
+ inode_unref (tmp_inode);
+
+out:
+ return 0;
+}
+
+
+static int
+ga_heal_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *stat, dict_t *dict,
+ struct iatt *postparent)
+{
+ call_frame_t *orig_frame = NULL;
+
+ orig_frame = frame->local;
+ frame->local = NULL;
+
+ /* don't worry about inode linking and other stuff. They'll happen on
+ * the next lookup.
+ */
+ STACK_DESTROY (frame->root);
+
+ STACK_UNWIND_STRICT (setxattr, orig_frame, op_ret, op_errno, dict);
+
+ return 0;
+}
+
+static int32_t
+ga_newentry_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost,
+ dict_t *xdata)
+{
+ ga_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+
+ /* don't worry about inode linking and other stuff. They'll happen on
+ * the next lookup.
+ */
+ STACK_DESTROY (frame->root);
+
+ STACK_UNWIND_STRICT (setxattr, local->orig_frame, op_ret,
+ op_errno, xdata);
+
+ loc_wipe (&local->loc);
+ mem_put (local);
+
+ return 0;
+}
+
+static int
+ga_newentry_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ ga_local_t *local = NULL;
+ struct iatt temp_stat = {0,};
+
+ local = frame->local;
+
+ if (!local->uid && !local->gid)
+ goto done;
+
+ temp_stat.ia_uid = local->uid;
+ temp_stat.ia_gid = local->gid;
+
+ STACK_WIND (frame, ga_newentry_setattr_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->setattr, &local->loc, &temp_stat,
+ (GF_SET_ATTR_UID | GF_SET_ATTR_GID), xdata);
+
+ return 0;
+
+done:
+ /* don't worry about inode linking and other stuff. They'll happen on
+ * the next lookup.
+ */
+ frame->local = NULL;
+ STACK_DESTROY (frame->root);
+
+ STACK_UNWIND_STRICT (setxattr, local->orig_frame, op_ret,
+ op_errno, xdata);
+
+ loc_wipe (&local->loc);
+ mem_put (local);
+
+ return 0;
+}
+
+int32_t
+ga_new_entry (call_frame_t *frame, xlator_t *this, loc_t *loc, data_t *data,
+ dict_t *xdata)
+{
+ int ret = -1;
+ ga_newfile_args_t *args = NULL;
+ loc_t tmp_loc = {0,};
+ call_frame_t *new_frame = NULL;
+ mode_t mode = 0;
+ ga_local_t *local = NULL;
+ uuid_t gfid = {0,};
+
+ args = ga_newfile_parse_args (this, data);
+ if (!args)
+ goto out;
+
+ ret = uuid_parse (args->gfid, gfid);
+ if (ret)
+ goto out;
+
+ if (!xdata)
+ xdata = dict_new ();
+
+ ret = ga_fill_tmp_loc (loc, this, gfid,
+ args->bname, xdata, &tmp_loc);
+ if (ret)
+ goto out;
+
+ new_frame = copy_frame (frame);
+ if (!new_frame)
+ goto out;
+
+ local = mem_get0 (this->local_pool);
+ local->orig_frame = frame;
+
+ local->uid = args->uid;
+ local->gid = args->gid;
+
+ loc_copy (&local->loc, &tmp_loc);
+
+ new_frame->local = local;
+
+ if (S_ISDIR (args->st_mode)) {
+ STACK_WIND (new_frame, ga_newentry_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir,
+ &tmp_loc, args->args.mkdir.mode,
+ args->args.mkdir.umask, xdata);
+ } else if (S_ISLNK (args->st_mode)) {
+ STACK_WIND (new_frame, ga_newentry_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->symlink,
+ args->args.symlink.linkpath,
+ &tmp_loc, 0, xdata);
+ } else {
+ /* use 07777 (4 7s) for considering the Sticky bits etc) */
+ mode = (S_IFMT & args->st_mode) |
+ (07777 & args->args.mknod.mode);;
+
+ STACK_WIND (new_frame, ga_newentry_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod,
+ &tmp_loc, mode,
+ args->args.mknod.rdev, args->args.mknod.umask,
+ xdata);
+ }
+
+ ret = 0;
+out:
+ ga_newfile_args_free (args);
+
+ return ret;
+}
+
+int32_t
+ga_heal_entry (call_frame_t *frame, xlator_t *this, loc_t *loc, data_t *data,
+ dict_t *xdata)
+{
+ int ret = -1;
+ ga_heal_args_t *args = NULL;
+ loc_t tmp_loc = {0,};
+ call_frame_t *new_frame = NULL;
+ uuid_t gfid = {0,};
+
+ args = ga_heal_parse_args (this, data);
+ if (!args)
+ goto out;
+
+ ret = uuid_parse (args->gfid, gfid);
+ if (ret)
+ goto out;
+
+ if (!xdata)
+ xdata = dict_new ();
+
+ ret = ga_fill_tmp_loc (loc, this, gfid, args->bname,
+ xdata, &tmp_loc);
+ if (ret)
+ goto out;
+
+ new_frame = copy_frame (frame);
+ if (!new_frame)
+ goto out;
+ new_frame->local = (void *)frame;
+
+ STACK_WIND (new_frame, ga_heal_cbk, FIRST_CHILD (this),
+ FIRST_CHILD(this)->fops->lookup,
+ &tmp_loc, xdata);
+
+ ret = 0;
+out:
+ if (args)
+ ga_heal_args_free (args);
+
+ return ret;
+}
+
+int32_t
+ga_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+ga_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+
+ data_t *data = NULL;
+ int op_errno = ENOMEM;
+ int ret = 0;
+ inode_t *unref = NULL;
+
+ if ((loc->name && !strcmp (GF_GFID_DIR, loc->name)) &&
+ ((loc->parent &&
+ __is_root_gfid (loc->parent->gfid)) ||
+ __is_root_gfid (loc->pargfid))) {
+ op_errno = EPERM;
+ goto err;
+ }
+
+ data = dict_get (dict, GF_FUSE_AUX_GFID_NEWFILE);
+ if (data) {
+ ret = ga_new_entry (frame, this, loc, data, xdata);
+ if (ret)
+ goto err;
+ return 0;
+ }
+
+ data = dict_get (dict, GF_FUSE_AUX_GFID_HEAL);
+ if (data) {
+ ret = ga_heal_entry (frame, this, loc, data, xdata);
+ if (ret)
+ goto err;
+ return 0;
+ }
+
+ //If the inode is a virtual inode change the inode otherwise perform
+ //the operation on same inode
+ GFID_ACCESS_GET_VALID_DIR_INODE (this, loc, unref, wind);
+
+wind:
+ STACK_WIND (frame, ga_setxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, loc, dict, flags,
+ xdata);
+ if (unref)
+ inode_unref (unref);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (setxattr, frame, -1, op_errno, xdata);
+ return 0;
+}
+
+
+int32_t
+ga_virtual_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+{
+ int j = 0;
+ int i = 0;
+ int ret = 0;
+ uint64_t temp_ino = 0;
+ inode_t *cbk_inode = NULL;
+ inode_t *true_inode = NULL;
+ uuid_t random_gfid = {0,};
+
+ if (frame->local)
+ cbk_inode = frame->local;
+ else
+ cbk_inode = inode;
+
+ frame->local = NULL;
+ if (op_ret)
+ goto unwind;
+
+ if (!IA_ISDIR (buf->ia_type))
+ goto unwind;
+
+ /* need to send back a different inode for linking in itable */
+ if (cbk_inode == inode) {
+ /* check if the inode is in the 'itable' or
+ if its just previously discover()'d inode */
+ true_inode = inode_find (inode->table, buf->ia_gfid);
+ if (!true_inode) {
+ cbk_inode = inode_new (inode->table);
+
+ if (!cbk_inode) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ /* the inode is not present in itable, ie, the actual
+ path is not yet looked up. Use the current inode
+ itself for now */
+
+ inode_link (inode, NULL, NULL, buf);
+ } else {
+ /* 'inode_ref()' has been done in inode_find() */
+ inode = true_inode;
+ }
+
+ ret = inode_ctx_put (cbk_inode, this, (uint64_t)inode);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set the inode ctx with"
+ "the actual inode");
+ if (inode)
+ inode_unref (inode);
+ }
+ inode = NULL;
+ }
+
+ if (!uuid_is_null (cbk_inode->gfid)) {
+ /* if the previous linked inode is used, use the
+ same gfid */
+ uuid_copy (random_gfid, cbk_inode->gfid);
+ } else {
+ /* replace the buf->ia_gfid to a random gfid
+ for directory, for files, what we received is fine */
+ uuid_generate (random_gfid);
+ }
+
+ uuid_copy (buf->ia_gfid, random_gfid);
+
+ for (i = 15; i > (15 - 8); i--) {
+ temp_ino += (uint64_t)(buf->ia_gfid[i]) << j;
+ j += 8;
+ }
+ buf->ia_ino = temp_ino;
+
+unwind:
+ /* Lookup on non-existing gfid returns ESTALE.
+ Convert into ENOENT for virtual lookup*/
+ if (op_errno == ESTALE)
+ op_errno = ENOENT;
+
+ STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, cbk_inode, buf,
+ xdata, postparent);
+
+ return 0;
+}
+
+int32_t
+ga_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+{
+ ga_private_t *priv = NULL;
+
+ /* if the entry in question is not 'root',
+ then follow the normal path */
+ if (op_ret || !__is_root_gfid(buf->ia_gfid))
+ goto unwind;
+
+ priv = this->private;
+
+ /* do we need to copy root stbuf everytime? */
+ /* mostly yes, as we want to have the 'stat' info show latest
+ in every _cbk() */
+
+ /* keep the reference for root stat buf */
+ priv->root_stbuf = *buf;
+ priv->gfiddir_stbuf = priv->root_stbuf;
+ priv->gfiddir_stbuf.ia_gfid[15] = GF_AUX_GFID;
+ priv->gfiddir_stbuf.ia_ino = GF_AUX_GFID;
+
+unwind:
+ STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf,
+ xdata, postparent);
+ return 0;
+}
+
+int32_t
+ga_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ ga_private_t *priv = NULL;
+ int ret = -1;
+ uuid_t tmp_gfid = {0,};
+ loc_t tmp_loc = {0,};
+ uint64_t value = 0;
+ inode_t *inode = NULL;
+ inode_t *true_inode = NULL;
+ int32_t op_errno = ENOENT;
+
+ /* if its discover(), no need for any action here */
+ if (!loc->name)
+ goto wind;
+
+ /* if its revalidate, and inode is not of type directory,
+ proceed with 'wind' */
+ if (loc->inode && loc->inode->ia_type &&
+ !IA_ISDIR (loc->inode->ia_type)) {
+
+ /* a revalidate on ".gfid/<dentry>" is possible, check for it */
+ if (((loc->parent &&
+ __is_gfid_access_dir (loc->parent->gfid)) ||
+ __is_gfid_access_dir (loc->pargfid))) {
+
+ /* here, just send 'loc->gfid' and 'loc->inode' */
+ tmp_loc.inode = inode_ref (loc->inode);
+ uuid_copy (tmp_loc.gfid, loc->inode->gfid);
+
+ STACK_WIND (frame, default_lookup_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup,
+ &tmp_loc, xdata);
+
+ inode_unref (tmp_loc.inode);
+
+ return 0;
+ }
+
+ /* not something to bother, continue the flow */
+ goto wind;
+ }
+
+ priv = this->private;
+
+ /* need to check if the lookup is on virtual dir */
+ if ((loc->name && !strcmp (GF_GFID_DIR, loc->name)) &&
+ ((loc->parent && __is_root_gfid (loc->parent->gfid)) ||
+ __is_root_gfid (loc->pargfid))) {
+ /* this means, the query is on '/.gfid', return the fake stat,
+ and say success */
+
+ STACK_UNWIND_STRICT (lookup, frame, 0, 0, loc->inode,
+ &priv->gfiddir_stbuf, xdata,
+ &priv->root_stbuf);
+ return 0;
+ }
+
+ /* now, check if the lookup() is on an existing entry,
+ but on gfid-path */
+ if (!((loc->parent && __is_gfid_access_dir (loc->parent->gfid)) ||
+ __is_gfid_access_dir (loc->pargfid))) {
+ if (!loc->parent)
+ goto wind;
+
+ ret = inode_ctx_get (loc->parent, this, &value);
+ if (ret)
+ goto wind;
+
+ inode = (inode_t *) value;
+
+ ret = loc_copy_overload_parent (&tmp_loc, loc, inode);
+ if (ret)
+ goto err;
+
+ STACK_WIND (frame, ga_lookup_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->lookup, &tmp_loc, xdata);
+
+ loc_wipe (&tmp_loc);
+ return 0;
+ }
+
+ /* make sure the 'basename' is actually a 'canonical-gfid',
+ otherwise, return error */
+ ret = uuid_parse (loc->name, tmp_gfid);
+ if (ret)
+ goto err;
+
+ /* if its fresh lookup, go ahead and send it down, if not,
+ for directory, we need indirection to actual dir inode */
+ if (!(loc->inode && loc->inode->ia_type))
+ goto discover;
+
+ /* revalidate on directory */
+ ret = inode_ctx_get (loc->inode, this, &value);
+ if (ret)
+ goto err;
+
+ inode = (void *)value;
+
+ /* valid inode, already looked up, work on that */
+ if (inode->ia_type)
+ goto discover;
+
+ /* check if the inode is in the 'itable' or
+ if its just previously discover()'d inode */
+ true_inode = inode_find (loc->inode->table, tmp_gfid);
+ if (true_inode) {
+ /* time do another lookup and update the context
+ with proper inode */
+ op_errno = ESTALE;
+ goto err;
+ }
+
+discover:
+ /* for the virtual entries, we don't need to send 'gfid-req' key, as
+ for these entries, we don't want to 'set' a new gfid */
+ if (xdata)
+ dict_del (xdata, "gfid-req");
+
+ uuid_copy (tmp_loc.gfid, tmp_gfid);
+
+ /* if revalidate, then we need to have the proper reference */
+ if (inode) {
+ tmp_loc.inode = inode_ref (inode);
+ frame->local = loc->inode;
+ } else {
+ tmp_loc.inode = inode_ref (loc->inode);
+ }
+
+ STACK_WIND (frame, ga_virtual_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, &tmp_loc, xdata);
+
+ inode_unref (tmp_loc.inode);
+
+ return 0;
+
+wind:
+ /* used for all the normal lookup path */
+ STACK_WIND (frame, ga_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT (lookup, frame, -1, op_errno, loc->inode,
+ &priv->gfiddir_stbuf, xdata,
+ &priv->root_stbuf);
+ return 0;
+}
+
+int
+ga_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
+{
+ int op_errno = 0;
+
+ GFID_ACCESS_ENTRY_OP_CHECK (loc, op_errno, err);
+
+ STACK_WIND (frame, default_mkdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir, loc, mode, umask,
+ xdata);
+
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT (mkdir, frame, -1, op_errno, loc->inode,
+ NULL, NULL, NULL, xdata);
+ return 0;
+}
+
+
+int
+ga_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ int op_errno = 0;
+
+ GFID_ACCESS_ENTRY_OP_CHECK (loc, op_errno, err);
+
+ STACK_WIND (frame, default_create_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->create,
+ loc, flags, mode, umask, fd, xdata);
+ return 0;
+err:
+ STACK_UNWIND_STRICT (create, frame, -1, op_errno, NULL,
+ NULL, NULL, NULL, NULL, xdata);
+
+ return 0;
+
+}
+
+int
+ga_symlink (call_frame_t *frame, xlator_t *this, const char *linkname,
+ loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ int op_errno = 0;
+
+ GFID_ACCESS_ENTRY_OP_CHECK (loc, op_errno, err);
+
+ STACK_WIND (frame, default_symlink_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->symlink,
+ linkname, loc, umask, xdata);
+ return 0;
+err:
+ STACK_UNWIND_STRICT (symlink, frame, -1, op_errno, NULL,
+ NULL, NULL, NULL, xdata);
+
+ return 0;
+}
+
+int
+ga_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ int op_errno = 0;
+
+ GFID_ACCESS_ENTRY_OP_CHECK (loc, op_errno, err);
+
+ STACK_WIND (frame, default_mknod_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod, loc, mode, rdev,
+ umask, xdata);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (mknod, frame, -1, op_errno, NULL,
+ NULL, NULL, NULL, xdata);
+
+ return 0;
+}
+
+int
+ga_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flag,
+ dict_t *xdata)
+{
+ int op_errno = 0;
+ inode_t *unref = NULL;
+
+ GFID_ACCESS_ENTRY_OP_CHECK (loc, op_errno, err);
+
+ GFID_ACCESS_GET_VALID_DIR_INODE (this, loc, unref, wind);
+
+wind:
+ STACK_WIND (frame, default_rmdir_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->rmdir,
+ loc, flag, xdata);
+ if (unref)
+ inode_unref (unref);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (rmdir, frame, -1, op_errno, NULL,
+ NULL, xdata);
+
+ return 0;
+}
+
+int
+ga_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t xflag,
+ dict_t *xdata)
+{
+ int op_errno = 0;
+ inode_t *unref = NULL;
+
+ GFID_ACCESS_ENTRY_OP_CHECK (loc, op_errno, err);
+
+ GFID_ACCESS_GET_VALID_DIR_INODE (this, loc, unref, wind);
+
+wind:
+ STACK_WIND (frame, default_unlink_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->unlink,
+ loc, xflag, xdata);
+
+ if (unref)
+ inode_unref (unref);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (unlink, frame, -1, op_errno, NULL,
+ NULL, xdata);
+
+ return 0;
+}
+
+int
+ga_rename (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ int op_errno = 0;
+ inode_t *oldloc_unref = NULL;
+ inode_t *newloc_unref = NULL;
+
+ GFID_ACCESS_ENTRY_OP_CHECK (oldloc, op_errno, err);
+ GFID_ACCESS_ENTRY_OP_CHECK (newloc, op_errno, err);
+
+ GFID_ACCESS_GET_VALID_DIR_INODE (this, oldloc, oldloc_unref,
+ handle_newloc);
+
+handle_newloc:
+ GFID_ACCESS_GET_VALID_DIR_INODE (this, newloc, newloc_unref, wind);
+
+wind:
+ STACK_WIND (frame, default_rename_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->rename,
+ oldloc, newloc, xdata);
+
+ if (oldloc_unref)
+ inode_unref (oldloc_unref);
+
+ if (newloc_unref)
+ inode_unref (newloc_unref);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (rename, frame, -1, op_errno, NULL,
+ NULL, NULL, NULL, NULL, xdata);
+
+ return 0;
+}
+
+
+int
+ga_link (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ int op_errno = 0;
+ inode_t *oldloc_unref = NULL;
+ inode_t *newloc_unref = NULL;
+
+ GFID_ACCESS_ENTRY_OP_CHECK (oldloc, op_errno, err);
+ GFID_ACCESS_ENTRY_OP_CHECK (newloc, op_errno, err);
+
+ GFID_ACCESS_GET_VALID_DIR_INODE (this, oldloc, oldloc_unref,
+ handle_newloc);
+
+handle_newloc:
+ GFID_ACCESS_GET_VALID_DIR_INODE (this, newloc, newloc_unref, wind);
+
+wind:
+ STACK_WIND (frame, default_link_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->link,
+ oldloc, newloc, xdata);
+
+ if (oldloc_unref)
+ inode_unref (oldloc_unref);
+
+ if (newloc_unref)
+ inode_unref (newloc_unref);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (link, frame, -1, op_errno, NULL,
+ NULL, NULL, NULL, xdata);
+
+ return 0;
+}
+
+int32_t
+ga_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ fd_t *fd, dict_t *xdata)
+{
+ int op_errno = 0;
+
+ GFID_ACCESS_ENTRY_OP_CHECK (loc, op_errno, err);
+
+ /* also check if the loc->inode itself is virtual
+ inode, if yes, return with failure, mainly because we
+ can't handle all the readdirp and other things on it. */
+ if (inode_ctx_get (loc->inode, this, NULL) == 0) {
+ op_errno = ENOTSUP;
+ goto err;
+ }
+
+ STACK_WIND (frame, default_opendir_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->opendir,
+ loc, fd, xdata);
+ return 0;
+err:
+ STACK_UNWIND_STRICT (opendir, frame, -1, op_errno, NULL, xdata);
+
+ return 0;
+}
+
+int32_t
+ga_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ inode_t *unref = NULL;
+
+ GFID_ACCESS_GET_VALID_DIR_INODE (this, loc, unref, wind);
+
+wind:
+ STACK_WIND (frame, default_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
+
+ if (unref)
+ inode_unref (unref);
+
+ return 0;
+}
+
+int32_t
+ga_stat (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ inode_t *unref = NULL;
+
+ GFID_ACCESS_GET_VALID_DIR_INODE (this, loc, unref, wind);
+
+wind:
+ STACK_WIND (frame, default_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc, xdata);
+ if (unref)
+ inode_unref (unref);
+
+ return 0;
+}
+
+int32_t
+ga_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid,
+ dict_t *xdata)
+{
+ inode_t *unref = NULL;
+
+ GFID_ACCESS_GET_VALID_DIR_INODE (this, loc, unref, wind);
+
+wind:
+ STACK_WIND (frame, default_setattr_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->setattr, loc, stbuf, valid,
+ xdata);
+ if (unref)
+ inode_unref (unref);
+
+ return 0;
+}
+
+int32_t
+ga_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ inode_t *unref = NULL;
+
+ GFID_ACCESS_GET_VALID_DIR_INODE (this, loc, unref, wind);
+
+wind:
+ STACK_WIND (frame, default_removexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, loc, name,
+ xdata);
+ if (unref)
+ inode_unref (unref);
+
+ return 0;
+}
+
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_gfid_access_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_WARNING, "Memory accounting"
+ " init failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+int32_t
+init (xlator_t *this)
+{
+ ga_private_t *priv = NULL;
+ int ret = -1;
+
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "not configured with exactly one child. exiting");
+ goto out;
+ }
+
+ /* This can be the top of graph in certain cases */
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "dangling volume. check volfile ");
+ }
+
+ /* TODO: define a mem-type structure */
+ priv = GF_CALLOC (1, sizeof (*priv), gf_gfid_access_mt_priv_t);
+ if (!priv)
+ goto out;
+
+ priv->newfile_args_pool = mem_pool_new (ga_newfile_args_t, 512);
+ if (!priv->newfile_args_pool)
+ goto out;
+
+ priv->heal_args_pool = mem_pool_new (ga_heal_args_t, 512);
+ if (!priv->heal_args_pool)
+ goto out;
+
+ this->local_pool = mem_pool_new (ga_local_t, 16);
+ if (!this->local_pool) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ goto out;
+ }
+
+ this->private = priv;
+
+ ret = 0;
+out:
+ if (ret && priv) {
+ if (priv->newfile_args_pool)
+ mem_pool_destroy (priv->newfile_args_pool);
+ GF_FREE (priv);
+ }
+
+ return ret;
+}
+
+void
+fini (xlator_t *this)
+{
+ ga_private_t *priv = NULL;
+ priv = this->private;
+ this->private = NULL;
+
+ if (priv) {
+ if (priv->newfile_args_pool)
+ mem_pool_destroy (priv->newfile_args_pool);
+ if (priv->heal_args_pool)
+ mem_pool_destroy (priv->heal_args_pool);
+ GF_FREE (priv);
+ }
+
+ return;
+}
+
+
+struct xlator_fops fops = {
+ .lookup = ga_lookup,
+
+ /* entry fops */
+ .mkdir = ga_mkdir,
+ .mknod = ga_mknod,
+ .create = ga_create,
+ .symlink = ga_symlink,
+ .link = ga_link,
+ .unlink = ga_unlink,
+ .rmdir = ga_rmdir,
+ .rename = ga_rename,
+
+ /* handle any other directory operations here */
+ .opendir = ga_opendir,
+ .stat = ga_stat,
+ .setattr = ga_setattr,
+ .getxattr = ga_getxattr,
+ .removexattr = ga_removexattr,
+
+ /* special fop to handle more entry creations */
+ .setxattr = ga_setxattr,
+};
+
+struct xlator_cbks cbks = {
+ .forget = ga_forget,
+};
+
+struct volume_options options[] = {
+ /* This translator doesn't take any options, or provide any options */
+ { .key = {NULL} },
+};
diff --git a/xlators/features/gfid-access/src/gfid-access.h b/xlators/features/gfid-access/src/gfid-access.h
new file mode 100644
index 000000000..e883eca69
--- /dev/null
+++ b/xlators/features/gfid-access/src/gfid-access.h
@@ -0,0 +1,134 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef __GFID_ACCESS_H__
+#define __GFID_ACCESS_H__
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "dict.h"
+#include "xlator.h"
+#include "defaults.h"
+#include "gfid-access-mem-types.h"
+
+#define UUID_CANONICAL_FORM_LEN 36
+
+#define GF_FUSE_AUX_GFID_NEWFILE "glusterfs.gfid.newfile"
+#define GF_FUSE_AUX_GFID_HEAL "glusterfs.gfid.heal"
+
+#define GF_GFID_KEY "GLUSTERFS_GFID"
+#define GF_GFID_DIR ".gfid"
+#define GF_AUX_GFID 0xd
+
+#define GFID_ACCESS_GET_VALID_DIR_INODE(x,l,unref,lbl) do { \
+ int ret = 0; \
+ uint64_t value = 0; \
+ inode_t *tmp_inode = NULL; \
+ \
+ /* if its an entry operation, on the virtual */ \
+ /* directory inode as parent, we need to handle */ \
+ /* it properly */ \
+ if (l->parent) { \
+ ret = inode_ctx_get (l->parent, x, &value); \
+ if (ret) \
+ goto lbl; \
+ tmp_inode = (inode_t *)value; \
+ l->parent = inode_ref (tmp_inode); \
+ /* if parent is virtual, no need to handle */ \
+ /* loc->inode */ \
+ break; \
+ } \
+ \
+ /* if its an inode operation, on the virtual */ \
+ /* directory inode itself, we need to handle */ \
+ /* it properly */ \
+ if (l->inode) { \
+ ret = inode_ctx_get (l->inode, x, &value); \
+ if (ret) \
+ goto lbl; \
+ tmp_inode = (inode_t *)value; \
+ l->inode = inode_ref (tmp_inode); \
+ } \
+ \
+ } while (0)
+
+#define GFID_ACCESS_ENTRY_OP_CHECK(loc,err,lbl) do { \
+ /* need to check if the lookup is on virtual dir */ \
+ if ((loc->name && !strcmp (GF_GFID_DIR, loc->name)) && \
+ ((loc->parent && \
+ __is_root_gfid (loc->parent->gfid)) || \
+ __is_root_gfid (loc->pargfid))) { \
+ err = EEXIST; \
+ goto lbl; \
+ } \
+ \
+ /* now, check if the lookup() is on an existing */ \
+ /* entry, but on gfid-path */ \
+ if ((loc->parent && \
+ __is_gfid_access_dir (loc->parent->gfid)) || \
+ __is_gfid_access_dir (loc->pargfid)) { \
+ err = EPERM; \
+ goto lbl; \
+ } \
+ } while (0)
+
+
+typedef struct {
+ unsigned int uid;
+ unsigned int gid;
+ char gfid[UUID_CANONICAL_FORM_LEN + 1];
+ unsigned int st_mode;
+ char *bname;
+
+ union {
+ struct _symlink_in {
+ char *linkpath;
+ } __attribute__ ((__packed__)) symlink;
+
+ struct _mknod_in {
+ unsigned int mode;
+ unsigned int rdev;
+ unsigned int umask;
+ } __attribute__ ((__packed__)) mknod;
+
+ struct _mkdir_in {
+ unsigned int mode;
+ unsigned int umask;
+ } __attribute__ ((__packed__)) mkdir;
+ } __attribute__ ((__packed__)) args;
+} __attribute__((__packed__)) ga_newfile_args_t;
+
+typedef struct {
+ char gfid[UUID_CANONICAL_FORM_LEN + 1];
+ char *bname; /* a null terminated basename */
+} __attribute__((__packed__)) ga_heal_args_t;
+
+struct ga_private {
+ /* root inode's stbuf */
+ struct iatt root_stbuf;
+ struct iatt gfiddir_stbuf;
+ struct mem_pool *newfile_args_pool;
+ struct mem_pool *heal_args_pool;
+};
+typedef struct ga_private ga_private_t;
+
+struct __ga_local {
+ call_frame_t *orig_frame;
+ unsigned int uid;
+ unsigned int gid;
+ loc_t loc;
+};
+typedef struct __ga_local ga_local_t;
+
+#endif /* __GFID_ACCESS_H__ */
diff --git a/xlators/features/glupy/Makefile.am b/xlators/features/glupy/Makefile.am
new file mode 100644
index 000000000..060429ecf
--- /dev/null
+++ b/xlators/features/glupy/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src examples
+
+CLEANFILES =
diff --git a/xlators/features/glupy/doc/README.md b/xlators/features/glupy/doc/README.md
new file mode 100644
index 000000000..2d7b30ef6
--- /dev/null
+++ b/xlators/features/glupy/doc/README.md
@@ -0,0 +1,44 @@
+This is just the very start for a GlusterFS[1] meta-translator that will
+allow translator code to be written in Python. It's based on the standard
+Python embedding (not extending) techniques, plus a dash of the ctypes module.
+The interface is a pretty minimal adaptation of the dispatches and callbacks
+from the C API[2] to Python, as follows:
+
+* Dispatch functions and callbacks must be defined on an "xlator" class
+ derived from gluster.Translator so that they'll be auto-registered with
+ the C translator during initialization.
+
+* For each dispatch or callback function you want to intercept, you define a
+ Python function using the xxx\_fop\_t or xxx\_cbk\_t decorator.
+
+* The arguments for each operation are different, so you'll need to refer to
+ the C API. GlusterFS-specific types are used (though only loc\_t is fully
+ defined so far) and type correctness is enforced by ctypes.
+
+* If you do intercept a dispatch function, it is your responsibility to call
+ xxx\_wind (like STACK\_WIND in the C API but operation-specific) to pass
+ the request to the next translator. If you do not intercept a function, it
+ will default the same way as for C (pass through to the same operation with
+ the same arguments on the first child translator).
+
+* If you intercept a callback function, it is your responsibility to call
+ xxx\_unwind (like STACK\_UNWIND\_STRICT in the C API) to pass the request back
+ to the caller.
+
+So far only the lookup and create operations are handled this way, to support
+the "negative lookup" example. Now that the basic infrastructure is in place,
+adding more functions should be very quick, though with that much boilerplate I
+might pause to write a code generator. I also plan to add structure
+definitions and interfaces for some of the utility functions in libglusterfs
+(especially those having to do with inode and fd context) in the fairly near
+future. Note that you can also use ctypes to get at anything not explicitly
+exposed to Python already.
+
+_If you're coming here because of the Linux Journal article, please note that
+the code has evolved since that was written. The version that matches the
+article is here:_
+
+https://github.com/jdarcy/glupy/tree/4bbae91ba459ea46ef32f2966562492e4ca9187a
+
+[1] http://www.gluster.org
+[2] http://hekafs.org/dist/xlator_api_2.html
diff --git a/xlators/features/glupy/doc/TESTING b/xlators/features/glupy/doc/TESTING
new file mode 100644
index 000000000..e05f17f49
--- /dev/null
+++ b/xlators/features/glupy/doc/TESTING
@@ -0,0 +1,9 @@
+Loading a translator written in Python using the glupy meta translator
+-------------------------------------------------------------------------------
+'test.vol' is a simple volfile with the debug-trace Python translator on top
+of a brick. The volfile can be mounted using the following command.
+
+$ glusterfs --debug -f test.vol /path/to/mntpt
+
+If then file operations are performed on the newly mounted file system, log
+output would be printed by the Python translator on the standard output.
diff --git a/xlators/features/glupy/doc/test.vol b/xlators/features/glupy/doc/test.vol
new file mode 100644
index 000000000..0751a488c
--- /dev/null
+++ b/xlators/features/glupy/doc/test.vol
@@ -0,0 +1,10 @@
+volume vol-posix
+ type storage/posix
+ option directory /path/to/brick
+end-volume
+
+volume vol-glupy
+ type features/glupy
+ option module-name debug-trace
+ subvolumes vol-posix
+end-volume
diff --git a/xlators/features/glupy/examples/Makefile.am b/xlators/features/glupy/examples/Makefile.am
new file mode 100644
index 000000000..c26abeaaf
--- /dev/null
+++ b/xlators/features/glupy/examples/Makefile.am
@@ -0,0 +1,5 @@
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+glupyexamplesdir = $(xlatordir)/glupy
+
+glupyexamples_PYTHON = negative.py helloworld.py debug-trace.py
diff --git a/xlators/features/glupy/examples/debug-trace.py b/xlators/features/glupy/examples/debug-trace.py
new file mode 100644
index 000000000..6eef1b58b
--- /dev/null
+++ b/xlators/features/glupy/examples/debug-trace.py
@@ -0,0 +1,775 @@
+import sys
+import stat
+from uuid import UUID
+from time import strftime, localtime
+from gluster.glupy import *
+
+# This translator was written primarily to test the fop entry point definitions
+# and structure definitions in 'glupy.py'.
+
+# It is similar to the C language debug-trace translator, which logs the
+# arguments passed to the fops and their corresponding cbk functions.
+
+dl.get_id.restype = c_long
+dl.get_id.argtypes = [ POINTER(call_frame_t) ]
+
+dl.get_rootunique.restype = c_uint64
+dl.get_rootunique.argtypes = [ POINTER(call_frame_t) ]
+
+def uuid2str (gfid):
+ return str(UUID(''.join(map("{0:02x}".format, gfid))))
+
+
+def st_mode_from_ia (prot, filetype):
+ st_mode = 0
+ type_bit = 0
+ prot_bit = 0
+
+ if filetype == IA_IFREG:
+ type_bit = stat.S_IFREG
+ elif filetype == IA_IFDIR:
+ type_bit = stat.S_IFDIR
+ elif filetype == IA_IFLNK:
+ type_bit = stat.S_IFLNK
+ elif filetype == IA_IFBLK:
+ type_bit = stat.S_IFBLK
+ elif filetype == IA_IFCHR:
+ type_bit = stat.S_IFCHR
+ elif filetype == IA_IFIFO:
+ type_bit = stat.S_IFIFO
+ elif filetype == IA_IFSOCK:
+ type_bit = stat.S_IFSOCK
+ elif filetype == IA_INVAL:
+ pass
+
+
+ if prot.suid:
+ prot_bit |= stat.S_ISUID
+ if prot.sgid:
+ prot_bit |= stat.S_ISGID
+ if prot.sticky:
+ prot_bit |= stat.S_ISVTX
+
+ if prot.owner.read:
+ prot_bit |= stat.S_IRUSR
+ if prot.owner.write:
+ prot_bit |= stat.S_IWUSR
+ if prot.owner.execn:
+ prot_bit |= stat.S_IXUSR
+
+ if prot.group.read:
+ prot_bit |= stat.S_IRGRP
+ if prot.group.write:
+ prot_bit |= stat.S_IWGRP
+ if prot.group.execn:
+ prot_bit |= stat.S_IXGRP
+
+ if prot.other.read:
+ prot_bit |= stat.S_IROTH
+ if prot.other.write:
+ prot_bit |= stat.S_IWOTH
+ if prot.other.execn:
+ prot_bit |= stat.S_IXOTH
+
+ st_mode = (type_bit | prot_bit)
+
+ return st_mode
+
+
+def trace_stat2str (buf):
+ gfid = uuid2str(buf.contents.ia_gfid)
+ mode = st_mode_from_ia(buf.contents.ia_prot, buf.contents.ia_type)
+ atime_buf = strftime("[%b %d %H:%M:%S]",
+ localtime(buf.contents.ia_atime))
+ mtime_buf = strftime("[%b %d %H:%M:%S]",
+ localtime(buf.contents.ia_mtime))
+ ctime_buf = strftime("[%b %d %H:%M:%S]",
+ localtime(buf.contents.ia_ctime))
+ return ("(gfid={0:s}, ino={1:d}, mode={2:o}, nlink={3:d}, uid ={4:d}, "+
+ "gid ={5:d}, size={6:d}, blocks={7:d}, atime={8:s}, mtime={9:s}, "+
+ "ctime={10:s})").format(gfid, buf.contents.ia_no, mode,
+ buf.contents.ia_nlink,
+ buf.contents.ia_uid,
+ buf.contents.ia_gid,
+ buf.contents.ia_size,
+ buf.contents.ia_blocks,
+ atime_buf, mtime_buf,
+ ctime_buf)
+
+class xlator(Translator):
+
+ def __init__(self, c_this):
+ Translator.__init__(self, c_this)
+ self.gfids = {}
+
+ def lookup_fop(self, frame, this, loc, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(loc.contents.gfid)
+ print("GLUPY TRACE LOOKUP FOP- {0:d}: gfid={1:s}; " +
+ "path={2:s}").format(unique, gfid, loc.contents.path)
+ self.gfids[key] = gfid
+ dl.wind_lookup(frame, POINTER(xlator_t)(), loc, xdata)
+ return 0
+
+ def lookup_cbk(self, frame, cookie, this, op_ret, op_errno,
+ inode, buf, xdata, postparent):
+ unique =dl.get_rootunique(frame)
+ key =dl.get_id(frame)
+ if op_ret == 0:
+ gfid = uuid2str(buf.contents.ia_gfid)
+ statstr = trace_stat2str(buf)
+ postparentstr = trace_stat2str(postparent)
+ print("GLUPY TRACE LOOKUP CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; *buf={3:s}; " +
+ "*postparent={4:s}").format(unique, gfid,
+ op_ret, statstr,
+ postparentstr)
+ else:
+ gfid = self.gfids[key]
+ print("GLUPY TRACE LOOKUP CBK - {0:d}: gfid={1:s};" +
+ " op_ret={2:d}; op_errno={3:d}").format(unique,
+ gfid,
+ op_ret,
+ op_errno)
+ del self.gfids[key]
+ dl.unwind_lookup(frame, cookie, this, op_ret, op_errno,
+ inode, buf, xdata, postparent)
+ return 0
+
+ def create_fop(self, frame, this, loc, flags, mode, umask, fd,
+ xdata):
+ unique = dl.get_rootunique(frame)
+ gfid = uuid2str(loc.contents.gfid)
+ print("GLUPY TRACE CREATE FOP- {0:d}: gfid={1:s}; path={2:s}; " +
+ "fd={3:s}; flags=0{4:o}; mode=0{5:o}; " +
+ "umask=0{6:o}").format(unique, gfid, loc.contents.path,
+ fd, flags, mode, umask)
+ dl.wind_create(frame, POINTER(xlator_t)(), loc, flags,mode,
+ umask, fd, xdata)
+ return 0
+
+ def create_cbk(self, frame, cookie, this, op_ret, op_errno, fd,
+ inode, buf, preparent, postparent, xdata):
+ unique = dl.get_rootunique(frame)
+ if op_ret >= 0:
+ gfid = uuid2str(inode.contents.gfid)
+ statstr = trace_stat2str(buf)
+ preparentstr = trace_stat2str(preparent)
+ postparentstr = trace_stat2str(postparent)
+ print("GLUPY TRACE CREATE CBK- {0:d}: gfid={1:s};" +
+ " op_ret={2:d}; fd={3:s}; *stbuf={4:s}; " +
+ "*preparent={5:s};" +
+ " *postparent={6:s}").format(unique, gfid, op_ret,
+ fd, statstr,
+ preparentstr,
+ postparentstr)
+ else:
+ print ("GLUPY TRACE CREATE CBK- {0:d}: op_ret={1:d}; " +
+ "op_errno={2:d}").format(unique, op_ret, op_errno)
+ dl.unwind_create(frame, cookie, this, op_ret, op_errno, fd,
+ inode, buf, preparent, postparent, xdata)
+ return 0
+
+ def open_fop(self, frame, this, loc, flags, fd, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ print("GLUPY TRACE OPEN FOP- {0:d}: gfid={1:s}; path={2:s}; "+
+ "flags={3:d}; fd={4:s}").format(unique, gfid,
+ loc.contents.path, flags,
+ fd)
+ self.gfids[key] = gfid
+ dl.wind_open(frame, POINTER(xlator_t)(), loc, flags, fd, xdata)
+ return 0
+
+ def open_cbk(self, frame, cookie, this, op_ret, op_errno, fd, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ print("GLUPY TRACE OPEN CBK- {0:d}: gfid={1:s}; op_ret={2:d}; "
+ "op_errno={3:d}; *fd={4:s}").format(unique, gfid,
+ op_ret, op_errno, fd)
+ del self.gfids[key]
+ dl.unwind_open(frame, cookie, this, op_ret, op_errno, fd,
+ xdata)
+ return 0
+
+ def readv_fop(self, frame, this, fd, size, offset, flags, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(fd.contents.inode.contents.gfid)
+ print("GLUPY TRACE READV FOP- {0:d}: gfid={1:s}; "+
+ "fd={2:s}; size ={3:d}; offset={4:d}; " +
+ "flags=0{5:x}").format(unique, gfid, fd, size, offset,
+ flags)
+ self.gfids[key] = gfid
+ dl.wind_readv (frame, POINTER(xlator_t)(), fd, size, offset,
+ flags, xdata)
+ return 0
+
+ def readv_cbk(self, frame, cookie, this, op_ret, op_errno, vector,
+ count, buf, iobref, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ if op_ret >= 0:
+ statstr = trace_stat2str(buf)
+ print("GLUPY TRACE READV CBK- {0:d}: gfid={1:s}, "+
+ "op_ret={2:d}; *buf={3:s};").format(unique, gfid,
+ op_ret,
+ statstr)
+
+ else:
+ print("GLUPY TRACE READV CBK- {0:d}: gfid={1:s}, "+
+ "op_ret={2:d}; op_errno={3:d}").format(unique,
+ gfid,
+ op_ret,
+ op_errno)
+ del self.gfids[key]
+ dl.unwind_readv (frame, cookie, this, op_ret, op_errno,
+ vector, count, buf, iobref, xdata)
+ return 0
+
+ def writev_fop(self, frame, this, fd, vector, count, offset, flags,
+ iobref, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(fd.contents.inode.contents.gfid)
+ print("GLUPY TRACE WRITEV FOP- {0:d}: gfid={1:s}; " +
+ "fd={2:s}; count={3:d}; offset={4:d}; " +
+ "flags=0{5:x}").format(unique, gfid, fd, count, offset,
+ flags)
+ self.gfids[key] = gfid
+ dl.wind_writev(frame, POINTER(xlator_t)(), fd, vector, count,
+ offset, flags, iobref, xdata)
+ return 0
+
+ def writev_cbk(self, frame, cookie, this, op_ret, op_errno, prebuf,
+ postbuf, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ if op_ret >= 0:
+ preopstr = trace_stat2str(prebuf)
+ postopstr = trace_stat2str(postbuf)
+ print("GLUPY TRACE WRITEV CBK- {0:d}: op_ret={1:d}; " +
+ "*prebuf={2:s}; " +
+ "*postbuf={3:s}").format(unique, op_ret, preopstr,
+ postopstr)
+ else:
+ gfid = self.gfids[key]
+ print("GLUPY TRACE WRITEV CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; op_errno={3:d}").format(unique,
+ gfid,
+ op_ret,
+ op_errno)
+ del self.gfids[key]
+ dl.unwind_writev (frame, cookie, this, op_ret, op_errno,
+ prebuf, postbuf, xdata)
+ return 0
+
+ def opendir_fop(self, frame, this, loc, fd, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ print("GLUPY TRACE OPENDIR FOP- {0:d}: gfid={1:s}; path={2:s}; "+
+ "fd={3:s}").format(unique, gfid, loc.contents.path, fd)
+ self.gfids[key] = gfid
+ dl.wind_opendir(frame, POINTER(xlator_t)(), loc, fd, xdata)
+ return 0
+
+ def opendir_cbk(self, frame, cookie, this, op_ret, op_errno, fd,
+ xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ print("GLUPY TRACE OPENDIR CBK- {0:d}: gfid={1:s}; op_ret={2:d};"+
+ " op_errno={3:d}; fd={4:s}").format(unique, gfid, op_ret,
+ op_errno, fd)
+ del self.gfids[key]
+ dl.unwind_opendir(frame, cookie, this, op_ret, op_errno,
+ fd, xdata)
+ return 0
+
+ def readdir_fop(self, frame, this, fd, size, offset, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(fd.contents.inode.contents.gfid)
+ print("GLUPY TRACE READDIR FOP- {0:d}: gfid={1:s}; fd={2:s}; " +
+ "size={3:d}; offset={4:d}").format(unique, gfid, fd, size,
+ offset)
+ self.gfids[key] = gfid
+ dl.wind_readdir(frame, POINTER(xlator_t)(), fd, size, offset,
+ xdata)
+ return 0
+
+ def readdir_cbk(self, frame, cookie, this, op_ret, op_errno, buf,
+ xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ print("GLUPY TRACE READDIR CBK- {0:d}: gfid={1:s}; op_ret={2:d};"+
+ " op_errno={3:d}").format(unique, gfid, op_ret, op_errno)
+ del self.gfids[key]
+ dl.unwind_readdir(frame, cookie, this, op_ret, op_errno, buf,
+ xdata)
+ return 0
+
+ def readdirp_fop(self, frame, this, fd, size, offset, dictionary):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(fd.contents.inode.contents.gfid)
+ print("GLUPY TRACE READDIRP FOP- {0:d}: gfid={1:s}; fd={2:s}; "+
+ " size={3:d}; offset={4:d}").format(unique, gfid, fd, size,
+ offset)
+ self.gfids[key] = gfid
+ dl.wind_readdirp(frame, POINTER(xlator_t)(), fd, size, offset,
+ dictionary)
+ return 0
+
+ def readdirp_cbk(self, frame, cookie, this, op_ret, op_errno, buf,
+ xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ print("GLUPY TRACE READDIRP CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; op_errno={3:d}").format(unique, gfid,
+ op_ret, op_errno)
+ del self.gfids[key]
+ dl.unwind_readdirp(frame, cookie, this, op_ret, op_errno, buf,
+ xdata)
+ return 0
+
+ def mkdir_fop(self, frame, this, loc, mode, umask, xdata):
+ unique = dl.get_rootunique(frame)
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ print("GLUPY TRACE MKDIR FOP- {0:d}: gfid={1:s}; path={2:s}; " +
+ "mode={3:d}; umask=0{4:o}").format(unique, gfid,
+ loc.contents.path, mode,
+ umask)
+ dl.wind_mkdir(frame, POINTER(xlator_t)(), loc, mode, umask,
+ xdata)
+ return 0
+
+ def mkdir_cbk(self, frame, cookie, this, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata):
+ unique = dl.get_rootunique(frame)
+ if op_ret == 0:
+ gfid = uuid2str(inode.contents.gfid)
+ statstr = trace_stat2str(buf)
+ preparentstr = trace_stat2str(preparent)
+ postparentstr = trace_stat2str(postparent)
+ print("GLUPY TRACE MKDIR CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; *stbuf={3:s}; *prebuf={4:s}; "+
+ "*postbuf={5:s} ").format(unique, gfid, op_ret,
+ statstr,
+ preparentstr,
+ postparentstr)
+ else:
+ print("GLUPY TRACE MKDIR CBK- {0:d}: op_ret={1:d}; "+
+ "op_errno={2:d}").format(unique, op_ret, op_errno)
+ dl.unwind_mkdir(frame, cookie, this, op_ret, op_errno, inode,
+ buf, preparent, postparent, xdata)
+ return 0
+
+ def rmdir_fop(self, frame, this, loc, flags, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ print("GLUPY TRACE RMDIR FOP- {0:d}: gfid={1:s}; path={2:s}; "+
+ "flags={3:d}").format(unique, gfid, loc.contents.path,
+ flags)
+ self.gfids[key] = gfid
+ dl.wind_rmdir(frame, POINTER(xlator_t)(), loc, flags, xdata)
+ return 0
+
+ def rmdir_cbk(self, frame, cookie, this, op_ret, op_errno, preparent,
+ postparent, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ if op_ret == 0:
+ preparentstr = trace_stat2str(preparent)
+ postparentstr = trace_stat2str(postparent)
+ print("GLUPY TRACE RMDIR CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; *prebuf={3:s}; "+
+ "*postbuf={4:s}").format(unique, gfid, op_ret,
+ preparentstr,
+ postparentstr)
+ else:
+ print("GLUPY TRACE RMDIR CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; op_errno={3:d}").format(unique,
+ gfid,
+ op_ret,
+ op_errno)
+ del self.gfids[key]
+ dl.unwind_rmdir(frame, cookie, this, op_ret, op_errno,
+ preparent, postparent, xdata)
+ return 0
+
+ def stat_fop(self, frame, this, loc, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ print("GLUPY TRACE STAT FOP- {0:d}: gfid={1:s}; " +
+ " path={2:s}").format(unique, gfid, loc.contents.path)
+ self.gfids[key] = gfid
+ dl.wind_stat(frame, POINTER(xlator_t)(), loc, xdata)
+ return 0
+
+ def stat_cbk(self, frame, cookie, this, op_ret, op_errno, buf,
+ xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ if op_ret == 0:
+ statstr = trace_stat2str(buf)
+ print("GLUPY TRACE STAT CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; *buf={3:s};").format(unique,
+ gfid,
+ op_ret,
+ statstr)
+ else:
+ print("GLUPY TRACE STAT CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; op_errno={3:d}").format(unique,
+ gfid,
+ op_ret,
+ op_errno)
+ del self.gfids[key]
+ dl.unwind_stat(frame, cookie, this, op_ret, op_errno,
+ buf, xdata)
+ return 0
+
+ def fstat_fop(self, frame, this, fd, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(fd.contents.inode.contents.gfid)
+ print("GLUPY TRACE FSTAT FOP- {0:d}: gfid={1:s}; " +
+ "fd={2:s}").format(unique, gfid, fd)
+ self.gfids[key] = gfid
+ dl.wind_fstat(frame, POINTER(xlator_t)(), fd, xdata)
+ return 0
+
+ def fstat_cbk(self, frame, cookie, this, op_ret, op_errno, buf,
+ xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ if op_ret == 0:
+ statstr = trace_stat2str(buf)
+ print("GLUPY TRACE FSTAT CBK- {0:d}: gfid={1:s} "+
+ " op_ret={2:d}; *buf={3:s}").format(unique,
+ gfid,
+ op_ret,
+ statstr)
+ else:
+ print("GLUPY TRACE FSTAT CBK- {0:d}: gfid={1:s} "+
+ "op_ret={2:d}; op_errno={3:d}").format(unique.
+ gfid,
+ op_ret,
+ op_errno)
+ del self.gfids[key]
+ dl.unwind_fstat(frame, cookie, this, op_ret, op_errno,
+ buf, xdata)
+ return 0
+
+ def statfs_fop(self, frame, this, loc, xdata):
+ unique = dl.get_rootunique(frame)
+ if loc.contents.inode:
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ else:
+ gfid = "0"
+ print("GLUPY TRACE STATFS FOP- {0:d}: gfid={1:s}; "+
+ "path={2:s}").format(unique, gfid, loc.contents.path)
+ dl.wind_statfs(frame, POINTER(xlator_t)(), loc, xdata)
+ return 0
+
+ def statfs_cbk(self, frame, cookie, this, op_ret, op_errno, buf,
+ xdata):
+ unique = dl.get_rootunique(frame)
+ if op_ret == 0:
+ #TBD: print buf (pointer to an iovec type object)
+ print("GLUPY TRACE STATFS CBK {0:d}: "+
+ "op_ret={1:d}").format(unique, op_ret)
+ else:
+ print("GLUPY TRACE STATFS CBK- {0:d}"+
+ "op_ret={1:d}; op_errno={2:d}").format(unique,
+ op_ret,
+ op_errno)
+ dl.unwind_statfs(frame, cookie, this, op_ret, op_errno,
+ buf, xdata)
+ return 0
+
+ def getxattr_fop(self, frame, this, loc, name, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ print("GLUPY TRACE GETXATTR FOP- {0:d}: gfid={1:s}; path={2:s};"+
+ " name={3:s}").format(unique, gfid, loc.contents.path,
+ name)
+ self.gfids[key]=gfid
+ dl.wind_getxattr(frame, POINTER(xlator_t)(), loc, name, xdata)
+ return 0
+
+ def getxattr_cbk(self, frame, cookie, this, op_ret, op_errno,
+ dictionary, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ print("GLUPY TRACE GETXATTR CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; op_errno={3:d}; "+
+ " dictionary={4:s}").format(unique, gfid, op_ret, op_errno,
+ dictionary)
+ del self.gfids[key]
+ dl.unwind_getxattr(frame, cookie, this, op_ret, op_errno,
+ dictionary, xdata)
+ return 0
+
+ def fgetxattr_fop(self, frame, this, fd, name, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(fd.contents.inode.contents.gfid)
+ print("GLUPY TRACE FGETXATTR FOP- {0:d}: gfid={1:s}; fd={2:s}; "+
+ "name={3:s}").format(unique, gfid, fd, name)
+ self.gfids[key] = gfid
+ dl.wind_fgetxattr(frame, POINTER(xlator_t)(), fd, name, xdata)
+ return 0
+
+ def fgetxattr_cbk(self, frame, cookie, this, op_ret, op_errno,
+ dictionary, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ print("GLUPY TRACE FGETXATTR CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; op_errno={3:d};"+
+ " dictionary={4:s}").format(unique, gfid, op_ret,
+ op_errno, dictionary)
+ del self.gfids[key]
+ dl.unwind_fgetxattr(frame, cookie, this, op_ret, op_errno,
+ dictionary, xdata)
+ return 0
+
+ def setxattr_fop(self, frame, this, loc, dictionary, flags, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ print("GLUPY TRACE SETXATTR FOP- {0:d}: gfid={1:s}; path={2:s};"+
+ " flags={3:d}").format(unique, gfid, loc.contents.path,
+ flags)
+ self.gfids[key] = gfid
+ dl.wind_setxattr(frame, POINTER(xlator_t)(), loc, dictionary,
+ flags, xdata)
+ return 0
+
+ def setxattr_cbk(self, frame, cookie, this, op_ret, op_errno, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ print("GLUPY TRACE SETXATTR CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; op_errno={3:d}").format(unique, gfid,
+ op_ret, op_errno)
+ del self.gfids[key]
+ dl.unwind_setxattr(frame, cookie, this, op_ret, op_errno,
+ xdata)
+ return 0
+
+ def fsetxattr_fop(self, frame, this, fd, dictionary, flags, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(fd.contents.inode.contents.gfid)
+ print("GLUPY TRACE FSETXATTR FOP- {0:d}: gfid={1:s}; fd={2:p}; "+
+ "flags={3:d}").format(unique, gfid, fd, flags)
+ self.gfids[key] = gfid
+ dl.wind_fsetxattr(frame, POINTER(xlator_t)(), fd, dictionary,
+ flags, xdata)
+ return 0
+
+ def fsetxattr_cbk(self, frame, cookie, this, op_ret, op_errno, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ print("GLUPY TRACE FSETXATTR CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; op_errno={3:d}").format(unique, gfid,
+ op_ret, op_errno)
+ del self.gfids[key]
+ dl.unwind_fsetxattr(frame, cookie, this, op_ret, op_errno,
+ xdata)
+ return 0
+
+ def removexattr_fop(self, frame, this, loc, name, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ print("GLUPY TRACE REMOVEXATTR FOP- {0:d}: gfid={1:s}; "+
+ "path={2:s}; name={3:s}").format(unique, gfid,
+ loc.contents.path,
+ name)
+ self.gfids[key] = gfid
+ dl.wind_removexattr(frame, POINTER(xlator_t)(), loc, name,
+ xdata)
+ return 0
+
+ def removexattr_cbk(self, frame, cookie, this, op_ret, op_errno,
+ xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ print("GLUPY TRACE REMOVEXATTR CBK- {0:d}: gfid={1:s} "+
+ " op_ret={2:d}; op_errno={3:d}").format(unique, gfid,
+ op_ret, op_errno)
+ del self.gfids[key]
+ dl.unwind_removexattr(frame, cookie, this, op_ret, op_errno,
+ xdata)
+ return 0
+
+ def link_fop(self, frame, this, oldloc, newloc, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ if (newloc.contents.inode):
+ newgfid = uuid2str(newloc.contents.inode.contents.gfid)
+ else:
+ newgfid = "0"
+ oldgfid = uuid2str(oldloc.contents.inode.contents.gfid)
+ print("GLUPY TRACE LINK FOP-{0:d}: oldgfid={1:s}; oldpath={2:s};"+
+ "newgfid={3:s};"+
+ "newpath={4:s}").format(unique, oldgfid,
+ oldloc.contents.path,
+ newgfid,
+ newloc.contents.path)
+ self.gfids[key] = oldgfid
+ dl.wind_link(frame, POINTER(xlator_t)(), oldloc, newloc,
+ xdata)
+ return 0
+
+ def link_cbk(self, frame, cookie, this, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ if op_ret == 0:
+ statstr = trace_stat2str(buf)
+ preparentstr = trace_stat2str(preparent)
+ postparentstr = trace_stat2str(postparent)
+ print("GLUPY TRACE LINK CBK- {0:d}: op_ret={1:d} "+
+ "*stbuf={2:s}; *prebuf={3:s}; "+
+ "*postbuf={4:s} ").format(unique, op_ret, statstr,
+ preparentstr,
+ postparentstr)
+ else:
+ print("GLUPY TRACE LINK CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; "+
+ "op_errno={3:d}").format(unique, gfid,
+ op_ret, op_errno)
+ del self.gfids[key]
+ dl.unwind_link(frame, cookie, this, op_ret, op_errno, inode,
+ buf, preparent, postparent, xdata)
+ return 0
+
+ def unlink_fop(self, frame, this, loc, xflag, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ print("GLUPY TRACE UNLINK FOP- {0:d}; gfid={1:s}; path={2:s}; "+
+ "flag={3:d}").format(unique, gfid, loc.contents.path,
+ xflag)
+ self.gfids[key] = gfid
+ dl.wind_unlink(frame, POINTER(xlator_t)(), loc, xflag,
+ xdata)
+ return 0
+
+ def unlink_cbk(self, frame, cookie, this, op_ret, op_errno,
+ preparent, postparent, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ if op_ret == 0:
+ preparentstr = trace_stat2str(preparent)
+ postparentstr = trace_stat2str(postparent)
+ print("GLUPY TRACE UNLINK CBK- {0:d}: gfid ={1:s}; "+
+ "op_ret={2:d}; *prebuf={3:s}; "+
+ "*postbuf={4:s} ").format(unique, gfid, op_ret,
+ preparentstr,
+ postparentstr)
+ else:
+ print("GLUPY TRACE UNLINK CBK: {0:d}: gfid ={1:s}; "+
+ "op_ret={2:d}; "+
+ "op_errno={3:d}").format(unique, gfid, op_ret,
+ op_errno)
+ del self.gfids[key]
+ dl.unwind_unlink(frame, cookie, this, op_ret, op_errno,
+ preparent, postparent, xdata)
+ return 0
+
+ def readlink_fop(self, frame, this, loc, size, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ print("GLUPY TRACE READLINK FOP- {0:d}: gfid={1:s}; path={2:s};"+
+ " size={3:d}").format(unique, gfid, loc.contents.path,
+ size)
+ self.gfids[key] = gfid
+ dl.wind_readlink(frame, POINTER(xlator_t)(), loc, size,
+ xdata)
+ return 0
+
+ def readlink_cbk(self, frame, cookie, this, op_ret, op_errno,
+ buf, stbuf, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ if op_ret == 0:
+ statstr = trace_stat2str(stbuf)
+ print("GLUPY TRACE READLINK CBK- {0:d}: gfid={1:s} "+
+ " op_ret={2:d}; op_errno={3:d}; *prebuf={4:s}; "+
+ "*postbuf={5:s} ").format(unique, gfid,
+ op_ret, op_errno,
+ buf, statstr)
+ else:
+ print("GLUPY TRACE READLINK CBK- {0:d}: gfid={1:s} "+
+ " op_ret={2:d}; op_errno={3:d}").format(unique,
+ gfid,
+ op_ret,
+ op_errno)
+ del self.gfids[key]
+ dl.unwind_readlink(frame, cookie, this, op_ret, op_errno, buf,
+ stbuf, xdata)
+ return 0
+
+ def symlink_fop(self, frame, this, linkpath, loc, umask, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ print("GLUPY TRACE SYMLINK FOP- {0:d}: gfid={1:s}; "+
+ "linkpath={2:s}; path={3:s};"+
+ "umask=0{4:o}").format(unique, gfid, linkpath,
+ loc.contents.path, umask)
+ self.gfids[key] = gfid
+ dl.wind_symlink(frame, POINTER(xlator_t)(), linkpath, loc,
+ umask, xdata)
+ return 0
+
+ def symlink_cbk(self, frame, cookie, this, op_ret, op_errno,
+ inode, buf, preparent, postparent, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ if op_ret == 0:
+ statstr = trace_stat2str(buf)
+ preparentstr = trace_stat2str(preparent)
+ postparentstr = trace_stat2str(postparent)
+ print("GLUPY TRACE SYMLINK CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; *stbuf={3:s}; *preparent={4:s}; "+
+ "*postparent={5:s}").format(unique, gfid,
+ op_ret, statstr,
+ preparentstr,
+ postparentstr)
+ else:
+ print("GLUPY TRACE SYMLINK CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; op_errno={3:d}").format(unique,
+ gfid,
+ op_ret,
+ op_errno)
+ del self.gfids[key]
+ dl.unwind_symlink(frame, cookie, this, op_ret, op_errno,
+ inode, buf, preparent, postparent, xdata)
+ return 0
diff --git a/xlators/features/glupy/examples/helloworld.py b/xlators/features/glupy/examples/helloworld.py
new file mode 100644
index 000000000..b565a4e5b
--- /dev/null
+++ b/xlators/features/glupy/examples/helloworld.py
@@ -0,0 +1,19 @@
+import sys
+from gluster.glupy import *
+
+class xlator (Translator):
+
+ def __init__(self, c_this):
+ Translator.__init__(self, c_this)
+
+ def lookup_fop(self, frame, this, loc, xdata):
+ print "Python xlator: Hello!"
+ dl.wind_lookup(frame, POINTER(xlator_t)(), loc, xdata)
+ return 0
+
+ def lookup_cbk(self, frame, cookie, this, op_ret, op_errno, inode, buf,
+ xdata, postparent):
+ print "Python xlator: Hello again!"
+ dl.unwind_lookup(frame, cookie, this, op_ret, op_errno, inode, buf,
+ xdata, postparent)
+ return 0
diff --git a/xlators/features/glupy/examples/negative.py b/xlators/features/glupy/examples/negative.py
new file mode 100644
index 000000000..e7a4fc07c
--- /dev/null
+++ b/xlators/features/glupy/examples/negative.py
@@ -0,0 +1,91 @@
+import sys
+from uuid import UUID
+from gluster.glupy import *
+
+# Negative-lookup-caching example. If a file wasn't there the last time we
+# looked, it's probably still not there. This translator keeps track of
+# those failed lookups for us, and returns ENOENT without needing to pass the
+# call any further for repeated requests.
+
+# If we were doing this for real, we'd need separate caches for each xlator
+# instance. The easiest way to do this would be to have xlator.__init__
+# "register" each instance in a module-global dict, with the key as the C
+# translator address and the value as the xlator object itself. For testing
+# and teaching, it's sufficient just to have one cache. The keys are parent
+# GFIDs, and the entries are lists of names within that parent that we know
+# don't exist.
+cache = {}
+
+# TBD: we need a better way of handling per-request data (frame->local in C).
+dl.get_id.restype = c_long
+dl.get_id.argtypes = [ POINTER(call_frame_t) ]
+
+def uuid2str (gfid):
+ return str(UUID(''.join(map("{0:02x}".format, gfid))))
+
+class xlator (Translator):
+
+ def __init__ (self, c_this):
+ self.requests = {}
+ Translator.__init__(self,c_this)
+
+ def lookup_fop (self, frame, this, loc, xdata):
+ pargfid = uuid2str(loc.contents.pargfid)
+ print "lookup FOP: %s:%s" % (pargfid, loc.contents.name)
+ # Check the cache.
+ if cache.has_key(pargfid):
+ if loc.contents.name in cache[pargfid]:
+ print "short-circuiting for %s:%s" % (pargfid,
+ loc.contents.name)
+ dl.unwind_lookup(frame,0,this,-1,2,None,None,None,None)
+ return 0
+ key = dl.get_id(frame)
+ self.requests[key] = (pargfid, loc.contents.name[:])
+ # TBD: get real child xl from init, pass it here
+ dl.wind_lookup(frame,POINTER(xlator_t)(),loc,xdata)
+ return 0
+
+ def lookup_cbk (self, frame, cookie, this, op_ret, op_errno, inode, buf,
+ xdata, postparent):
+ print "lookup CBK: %d (%d)" % (op_ret, op_errno)
+ key = dl.get_id(frame)
+ pargfid, name = self.requests[key]
+ # Update the cache.
+ if op_ret == 0:
+ print "found %s, removing from cache" % name
+ if cache.has_key(pargfid):
+ cache[pargfid].discard(name)
+ elif op_errno == 2: # ENOENT
+ print "failed to find %s, adding to cache" % name
+ if cache.has_key(pargfid):
+ cache[pargfid].add(name)
+ else:
+ cache[pargfid] = set([name])
+ del self.requests[key]
+ dl.unwind_lookup(frame,cookie,this,op_ret,op_errno,
+ inode,buf,xdata,postparent)
+ return 0
+
+ def create_fop (self, frame, this, loc, flags, mode, umask, fd, xdata):
+ pargfid = uuid2str(loc.contents.pargfid)
+ print "create FOP: %s:%s" % (pargfid, loc.contents.name)
+ key = dl.get_id(frame)
+ self.requests[key] = (pargfid, loc.contents.name[:])
+ # TBD: get real child xl from init, pass it here
+ dl.wind_create(frame,POINTER(xlator_t)(),loc,flags,mode,umask,fd,xdata)
+ return 0
+
+ def create_cbk (self, frame, cookie, this, op_ret, op_errno, fd, inode,
+ buf, preparent, postparent, xdata):
+ print "create CBK: %d (%d)" % (op_ret, op_errno)
+ key = dl.get_id(frame)
+ pargfid, name = self.requests[key]
+ # Update the cache.
+ if op_ret == 0:
+ print "created %s, removing from cache" % name
+ if cache.has_key(pargfid):
+ cache[pargfid].discard(name)
+ del self.requests[key]
+ dl.unwind_create(frame,cookie,this,op_ret,op_errno,fd,inode,buf,
+ preparent,postparent,xdata)
+ return 0
diff --git a/xlators/features/glupy/src/Makefile.am b/xlators/features/glupy/src/Makefile.am
new file mode 100644
index 000000000..ae7b6d14d
--- /dev/null
+++ b/xlators/features/glupy/src/Makefile.am
@@ -0,0 +1,21 @@
+xlator_LTLIBRARIES = glupy.la
+
+# Ensure GLUSTER_PYTHON_PATH is passed to glupy.so
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+glupydir = $(xlatordir)/glupy
+AM_CPPFLAGS = $(PYTHONDEV_CPPFLAGS) $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src -isystem $(BUILD_PYTHON_INC)
+AM_CFLAGS = $(PYTHONDEV_CFLAGS) -Wall -fno-strict-aliasing -DGLUSTER_PYTHON_PATH=\"$(glupydir)\" $(GF_CFLAGS)
+
+# Flags to build glupy.so with
+glupy_la_LDFLAGS = $(PYTHONDEV_LDFLAGS) -module -avoid-version -shared -nostartfiles
+glupy_la_SOURCES = glupy.c
+glupy_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ -lpthread -l$(BUILD_PYTHON_LIB)
+
+noinst_HEADERS = glupy.h
+
+# Install glupy.py into the Python site-packages area
+pyglupydir = $(pythondir)/gluster
+pyglupy_PYTHON = glupy.py
+
+CLEANFILES =
diff --git a/xlators/features/glupy/src/glupy.c b/xlators/features/glupy/src/glupy.c
new file mode 100644
index 000000000..948b66f8d
--- /dev/null
+++ b/xlators/features/glupy/src/glupy.c
@@ -0,0 +1,2471 @@
+/*
+ Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#include <ctype.h>
+#include <sys/uio.h>
+#include <Python.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "logging.h"
+#include "defaults.h"
+
+#include "glupy.h"
+
+/* UTILITY FUNCTIONS FOR FOP-SPECIFIC CODE */
+
+pthread_key_t gil_init_key;
+
+PyGILState_STATE
+glupy_enter (void)
+{
+#if 0
+ if (!pthread_getspecific(gil_init_key)) {
+ PyEval_ReleaseLock();
+ (void)pthread_setspecific(gil_init_key,(void *)1);
+ }
+#endif
+
+ return PyGILState_Ensure();
+}
+
+void
+glupy_leave (PyGILState_STATE gstate)
+{
+ PyGILState_Release(gstate);
+}
+
+/* FOP: LOOKUP */
+
+int32_t
+glupy_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_LOOKUP]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_lookup_cbk_t)(priv->cbks[GLUPY_LOOKUP]))(
+ frame, cookie, this, op_ret, op_errno,
+ inode, buf, xdata, postparent);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf,
+ xdata, postparent);
+ return 0;
+}
+
+int32_t
+glupy_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_LOOKUP]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_lookup_t)(priv->fops[GLUPY_LOOKUP]))(
+ frame, this, loc, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+ return 0;
+}
+
+void
+wind_lookup (call_frame_t *frame, xlator_t *xl, loc_t *loc, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND(frame,glupy_lookup_cbk,xl,xl->fops->lookup,loc,xdata);
+}
+
+void
+unwind_lookup (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT(lookup,frame,op_ret,op_errno,
+ inode,buf,xdata,postparent);
+}
+
+void
+set_lookup_fop (long py_this, fop_lookup_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_LOOKUP] = (long)fop;
+}
+
+void
+set_lookup_cbk (long py_this, fop_lookup_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_LOOKUP] = (long)cbk;
+}
+
+/* FOP: CREATE */
+
+int32_t
+glupy_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_CREATE]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_create_cbk_t)(priv->cbks[GLUPY_CREATE]))(
+ frame, cookie, this, op_ret, op_errno,
+ fd, inode, buf, preparent, postparent, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+glupy_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_CREATE]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_create_t)(priv->fops[GLUPY_CREATE]))(
+ frame, this, loc, flags, mode, umask, fd, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_create_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create, loc, flags, mode, umask,
+ fd, xdata);
+ return 0;
+}
+
+void
+wind_create (call_frame_t *frame, xlator_t *xl, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_create_cbk,xl, xl->fops->create,
+ loc, flags, mode, umask, fd, xdata);
+}
+
+void
+unwind_create (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
+}
+
+void
+set_create_fop (long py_this, fop_create_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_CREATE] = (long)fop;
+}
+
+void
+set_create_cbk (long py_this, fop_create_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_CREATE] = (long)cbk;
+}
+
+/* FOP: OPEN */
+
+int32_t
+glupy_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_OPEN]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_open_cbk_t)(priv->cbks[GLUPY_OPEN]))(
+ frame, cookie, this, op_ret, op_errno,
+ fd, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+int32_t
+glupy_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_OPEN]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_open_t)(priv->fops[GLUPY_OPEN]))(
+ frame, this, loc, flags, fd, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_open_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
+ return 0;
+}
+
+void
+wind_open (call_frame_t *frame, xlator_t *xl, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_open_cbk, xl, xl->fops->open, loc, flags,
+ fd, xdata);
+}
+
+void
+unwind_open (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
+}
+
+void
+set_open_fop (long py_this, fop_open_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+ priv->fops[GLUPY_OPEN] = (long)fop;
+}
+
+void
+set_open_cbk (long py_this, fop_open_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+ priv->cbks[GLUPY_OPEN] = (long)cbk;
+}
+
+/* FOP: READV */
+
+int32_t
+glupy_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_READV]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_readv_cbk_t)(priv->cbks[GLUPY_READV]))(
+ frame, cookie, this, op_ret, op_errno,
+ vector, count, stbuf, iobref, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector,
+ count, stbuf, iobref, xdata);
+ return 0;
+}
+
+int32_t
+glupy_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t offset, uint32_t flags, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_READV]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_readv_t)(priv->fops[GLUPY_READV]))(
+ frame, this, fd, size, offset, flags, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset,
+ flags, xdata);
+ return 0;
+}
+
+void
+wind_readv (call_frame_t *frame, xlator_t *xl, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_readv_cbk, xl, xl->fops->readv, fd, size,
+ offset, flags, xdata);
+}
+
+void
+unwind_readv (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector,
+ count, stbuf, iobref, xdata);
+}
+
+void
+set_readv_fop (long py_this, fop_readv_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+ priv->fops[GLUPY_READV] = (long)fop;
+}
+
+void
+set_readv_cbk (long py_this, fop_readv_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+ priv->cbks[GLUPY_READV] = (long)cbk;
+}
+
+/* FOP: WRITEV */
+
+int32_t
+glupy_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_WRITEV]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_writev_cbk_t)(priv->cbks[GLUPY_WRITEV]))(
+ frame, cookie, this, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+ return 0;
+}
+
+int32_t
+glupy_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_WRITEV]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_writev_t)(priv->fops[GLUPY_WRITEV]))(
+ frame, this, fd, vector, count, offset, flags,
+ iobref, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector, count,
+ offset, flags, iobref, xdata);
+ return 0;
+}
+
+void
+wind_writev (call_frame_t *frame, xlator_t *xl, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_writev_cbk, xl, xl->fops->writev, fd, vector,
+ count, offset, flags, iobref, xdata);
+}
+
+void
+unwind_writev (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+}
+
+void
+set_writev_fop (long py_this, fop_writev_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+ priv->fops[GLUPY_WRITEV] = (long)fop;
+}
+
+void
+set_writev_cbk (long py_this, fop_writev_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+ priv->cbks[GLUPY_WRITEV] = (long)cbk;
+}
+
+
+/* FOP: OPENDIR */
+
+int32_t
+glupy_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_OPENDIR]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_opendir_cbk_t)(priv->cbks[GLUPY_OPENDIR]))(
+ frame, cookie, this, op_ret, op_errno,
+ fd, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+int32_t
+glupy_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ fd_t *fd, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_OPENDIR]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_opendir_t)(priv->fops[GLUPY_OPENDIR]))(
+ frame, this, loc, fd, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_opendir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->opendir, loc, fd, xdata);
+ return 0;
+}
+
+void
+wind_opendir (call_frame_t *frame, xlator_t *xl, loc_t *loc, fd_t *fd, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND(frame,glupy_opendir_cbk,xl,xl->fops->opendir,loc,fd,xdata);
+}
+
+void
+unwind_opendir (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT(opendir,frame,op_ret,op_errno,
+ fd,xdata);
+}
+
+void
+set_opendir_fop (long py_this, fop_opendir_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_OPENDIR] = (long)fop;
+}
+
+void
+set_opendir_cbk (long py_this, fop_opendir_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_OPENDIR] = (long)cbk;
+}
+
+/* FOP: READDIR */
+
+int32_t
+glupy_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_READDIR]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_readdir_cbk_t)(priv->cbks[GLUPY_READDIR]))(
+ frame, cookie, this, op_ret, op_errno,
+ entries, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, entries,
+ xdata);
+ return 0;
+}
+
+int32_t
+glupy_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t offset, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_READDIR]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_readdir_t)(priv->fops[GLUPY_READDIR]))(
+ frame, this, fd, size, offset, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_readdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdir,fd, size, offset, xdata);
+ return 0;
+}
+
+void
+wind_readdir(call_frame_t *frame, xlator_t *xl, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND(frame,glupy_readdir_cbk,xl,xl->fops->readdir,fd,size,offset,xdata);
+}
+
+void
+unwind_readdir (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT(readdir,frame,op_ret,op_errno,
+ entries, xdata);
+}
+
+void
+set_readdir_fop (long py_this, fop_readdir_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_READDIR] = (long)fop;
+}
+
+void
+set_readdir_cbk (long py_this, fop_readdir_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_READDIR] = (long)cbk;
+}
+
+
+/* FOP: READDIRP */
+
+int32_t
+glupy_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_READDIRP]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_readdirp_cbk_t)(priv->cbks[GLUPY_READDIRP]))(
+ frame, cookie, this, op_ret, op_errno,
+ entries, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries,
+ xdata);
+ return 0;
+}
+
+int32_t
+glupy_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t offset, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_READDIRP]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_readdirp_t)(priv->fops[GLUPY_READDIRP]))(
+ frame, this, fd, size, offset, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_readdirp_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp,fd, size, offset, xdata);
+ return 0;
+}
+
+void
+wind_readdirp (call_frame_t *frame, xlator_t *xl, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND(frame,glupy_readdirp_cbk,xl,xl->fops->readdirp,fd,size,offset,xdata);
+}
+
+void
+unwind_readdirp (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT(readdirp,frame,op_ret,op_errno,
+ entries, xdata);
+}
+
+void
+set_readdirp_fop (long py_this, fop_readdirp_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_READDIRP] = (long)fop;
+}
+
+void
+set_readdirp_cbk (long py_this, fop_readdirp_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_READDIRP] = (long)cbk;
+}
+
+
+/* FOP:STAT */
+
+int32_t
+glupy_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_STAT]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_stat_cbk_t)(priv->cbks[GLUPY_STAT]))(
+ frame, cookie, this, op_ret, op_errno,
+ buf, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int32_t
+glupy_stat (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_STAT]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_stat_t)(priv->fops[GLUPY_STAT]))(
+ frame, this, loc, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc, xdata);
+ return 0;
+}
+
+void
+wind_stat (call_frame_t *frame, xlator_t *xl, loc_t *loc, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND(frame,glupy_stat_cbk,xl,xl->fops->stat,loc,xdata);
+}
+
+void
+unwind_stat (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT(stat,frame,op_ret,op_errno,
+ buf,xdata);
+}
+
+void
+set_stat_fop (long py_this, fop_stat_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_STAT] = (long)fop;
+}
+
+void
+set_stat_cbk (long py_this, fop_stat_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_STAT] = (long)cbk;
+}
+
+
+/* FOP: FSTAT */
+
+int32_t
+glupy_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_FSTAT]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_fstat_cbk_t)(priv->cbks[GLUPY_FSTAT]))(
+ frame, cookie, this, op_ret, op_errno,
+ buf, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int32_t
+glupy_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_FSTAT]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_fstat_t)(priv->fops[GLUPY_FSTAT]))(
+ frame, this, fd, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_fstat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
+ return 0;
+}
+
+void
+wind_fstat (call_frame_t *frame, xlator_t *xl, fd_t *fd, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND(frame,glupy_fstat_cbk,xl,xl->fops->fstat,fd,xdata);
+}
+
+void
+unwind_fstat (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT(fstat,frame,op_ret,op_errno,
+ buf,xdata);
+}
+
+void
+set_fstat_fop (long py_this, fop_fstat_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_FSTAT] = (long)fop;
+}
+
+void
+set_fstat_cbk (long py_this, fop_fstat_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_FSTAT] = (long)cbk;
+}
+
+/* FOP:STATFS */
+
+int32_t
+glupy_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_STATFS]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_statfs_cbk_t)(priv->cbks[GLUPY_STATFS]))(
+ frame, cookie, this, op_ret, op_errno,
+ buf, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int32_t
+glupy_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_STATFS]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_statfs_t)(priv->fops[GLUPY_STATFS]))(
+ frame, this, loc, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_statfs_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->statfs, loc, xdata);
+ return 0;
+}
+
+void
+wind_statfs (call_frame_t *frame, xlator_t *xl, loc_t *loc, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND(frame,glupy_statfs_cbk,xl,xl->fops->statfs,loc,xdata);
+}
+
+void
+unwind_statfs (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf,
+ dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT(statfs,frame,op_ret,op_errno,
+ buf,xdata);
+}
+
+void
+set_statfs_fop (long py_this, fop_statfs_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_STATFS] = (long)fop;
+}
+
+void
+set_statfs_cbk (long py_this, fop_statfs_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_STATFS] = (long)cbk;
+}
+
+
+/* FOP: SETXATTR */
+
+int32_t
+glupy_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_SETXATTR]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_setxattr_cbk_t)(priv->cbks[GLUPY_SETXATTR]))(
+ frame, cookie, this, op_ret, op_errno,
+ xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+glupy_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_SETXATTR]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_setxattr_t)(priv->fops[GLUPY_SETXATTR]))(
+ frame, this, loc, dict, flags, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_setxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, loc, dict,
+ flags, xdata);
+ return 0;
+}
+
+void
+wind_setxattr (call_frame_t *frame, xlator_t *xl, loc_t *loc,
+ dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_setxattr_cbk, xl, xl->fops->setxattr,
+ loc, dict, flags, xdata);
+}
+
+
+void
+unwind_setxattr (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata);
+
+}
+
+void
+set_setxattr_fop (long py_this, fop_setxattr_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_SETXATTR] = (long)fop;
+}
+
+void
+set_setxattr_cbk (long py_this, fop_setxattr_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_SETXATTR] = (long)cbk;
+}
+
+/* FOP: GETXATTR */
+
+int32_t
+glupy_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_GETXATTR]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_getxattr_cbk_t)(priv->cbks[GLUPY_GETXATTR]))(
+ frame, cookie, this, op_ret, op_errno, dict,
+ xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict,
+ xdata);
+ return 0;
+}
+
+int32_t
+glupy_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_GETXATTR]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_getxattr_t)(priv->fops[GLUPY_GETXATTR]))(
+ frame, this, loc, name, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, name,
+ xdata);
+ return 0;
+}
+
+void
+wind_getxattr (call_frame_t *frame, xlator_t *xl, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_getxattr_cbk, xl, xl->fops->getxattr,
+ loc, name, xdata);
+}
+
+
+void
+unwind_getxattr (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict,
+ xdata);
+
+}
+
+
+void
+set_getxattr_fop (long py_this, fop_getxattr_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_GETXATTR] = (long)fop;
+}
+
+
+void
+set_getxattr_cbk (long py_this, fop_getxattr_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_GETXATTR] = (long)cbk;
+}
+
+/* FOP: FSETXATTR */
+
+int32_t
+glupy_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_FSETXATTR]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_fsetxattr_cbk_t)(priv->cbks[GLUPY_FSETXATTR]))(
+ frame, cookie, this, op_ret, op_errno,
+ xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+glupy_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_FSETXATTR]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_fsetxattr_t)(priv->fops[GLUPY_FSETXATTR]))(
+ frame, this, fd, dict, flags, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_fsetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict,
+ flags, xdata);
+ return 0;
+}
+
+void
+wind_fsetxattr (call_frame_t *frame, xlator_t *xl, fd_t *fd,
+ dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_fsetxattr_cbk, xl, xl->fops->fsetxattr,
+ fd, dict, flags, xdata);
+}
+
+
+void
+unwind_fsetxattr (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata);
+
+}
+
+void
+set_fsetxattr_fop (long py_this, fop_fsetxattr_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_FSETXATTR] = (long)fop;
+}
+
+void
+set_fsetxattr_cbk (long py_this, fop_fsetxattr_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_FSETXATTR] = (long)cbk;
+}
+
+/* FOP: FGETXATTR */
+
+int32_t
+glupy_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_FGETXATTR]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_fgetxattr_cbk_t)(priv->cbks[GLUPY_FGETXATTR]))(
+ frame, cookie, this, op_ret, op_errno, dict,
+ xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict,
+ xdata);
+ return 0;
+}
+
+int32_t
+glupy_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_FGETXATTR]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_fgetxattr_t)(priv->fops[GLUPY_FGETXATTR]))(
+ frame, this, fd, name, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_fgetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr, fd, name,
+ xdata);
+ return 0;
+}
+
+void
+wind_fgetxattr (call_frame_t *frame, xlator_t *xl, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_fgetxattr_cbk, xl, xl->fops->fgetxattr,
+ fd, name, xdata);
+}
+
+
+void
+unwind_fgetxattr (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict,
+ xdata);
+
+}
+
+
+void
+set_fgetxattr_fop (long py_this, fop_fgetxattr_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_FGETXATTR] = (long)fop;
+}
+
+
+void
+set_fgetxattr_cbk (long py_this, fop_fgetxattr_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_FGETXATTR] = (long)cbk;
+}
+
+/* FOP:REMOVEXATTR */
+
+int32_t
+glupy_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_REMOVEXATTR]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_removexattr_cbk_t)(priv->cbks[GLUPY_REMOVEXATTR]))(
+ frame, cookie, this, op_ret, op_errno, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+glupy_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_REMOVEXATTR]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_removexattr_t)(priv->fops[GLUPY_REMOVEXATTR]))(
+ frame, this, loc, name, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_removexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, loc, name,
+ xdata);
+ return 0;
+}
+
+void
+wind_removexattr (call_frame_t *frame, xlator_t *xl, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_removexattr_cbk, xl, xl->fops->removexattr,
+ loc, name, xdata);
+}
+
+
+void
+unwind_removexattr (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xdata);
+
+}
+
+void
+set_removexattr_fop (long py_this, fop_removexattr_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_REMOVEXATTR] = (long)fop;
+}
+
+void
+set_removexattr_cbk (long py_this, fop_removexattr_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_REMOVEXATTR] = (long)cbk;
+}
+
+
+/* FOP:FREMOVEXATTR */
+
+int32_t
+glupy_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_FREMOVEXATTR]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_fremovexattr_cbk_t)(priv->cbks[GLUPY_FREMOVEXATTR]))(
+ frame, cookie, this, op_ret, op_errno, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+glupy_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_FREMOVEXATTR]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_fremovexattr_t)(priv->fops[GLUPY_FREMOVEXATTR]))(
+ frame, this, fd, name, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_fremovexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr, fd, name,
+ xdata);
+ return 0;
+}
+
+void
+wind_fremovexattr (call_frame_t *frame, xlator_t *xl, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_fremovexattr_cbk, xl, xl->fops->fremovexattr,
+ fd, name, xdata);
+}
+
+
+void
+unwind_fremovexattr (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, xdata);
+
+}
+
+void
+set_fremovexattr_fop (long py_this, fop_fremovexattr_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_FREMOVEXATTR] = (long)fop;
+}
+
+void
+set_fremovexattr_cbk (long py_this, fop_fremovexattr_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_FREMOVEXATTR] = (long)cbk;
+}
+
+
+/* FOP: LINK*/
+int32_t
+glupy_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_LINK]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_link_cbk_t)(priv->cbks[GLUPY_LINK]))(
+ frame, cookie, this, op_ret, op_errno,
+ inode, buf, preparent, postparent, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+glupy_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_LINK]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_link_t)(priv->fops[GLUPY_LINK]))(
+ frame, this, oldloc, newloc, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_link_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link, oldloc, newloc,
+ xdata);
+ return 0;
+}
+
+void
+wind_link (call_frame_t *frame, xlator_t *xl, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_link_cbk, xl, xl->fops->link,
+ oldloc, newloc, xdata);
+}
+
+void
+unwind_link (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+}
+
+void
+set_link_fop (long py_this, fop_link_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_LINK] = (long)fop;
+}
+
+void
+set_link_cbk (long py_this, fop_link_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_LINK] = (long)cbk;
+}
+
+/* FOP: SYMLINK*/
+int32_t
+glupy_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_SYMLINK]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_symlink_cbk_t)(priv->cbks[GLUPY_SYMLINK]))(
+ frame, cookie, this, op_ret, op_errno,
+ inode, buf, preparent, postparent, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+glupy_symlink(call_frame_t *frame, xlator_t *this, const char *linkname,
+ loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_SYMLINK]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_symlink_t)(priv->fops[GLUPY_SYMLINK]))(
+ frame, this, linkname, loc, umask, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_symlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->symlink, linkname, loc,
+ umask, xdata);
+ return 0;
+}
+
+void
+wind_symlink (call_frame_t *frame, xlator_t *xl, const char *linkname,
+ loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_symlink_cbk, xl, xl->fops->symlink,
+ linkname, loc, umask, xdata);
+}
+
+void
+unwind_symlink (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+}
+
+void
+set_symlink_fop (long py_this, fop_symlink_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_SYMLINK] = (long)fop;
+}
+
+void
+set_symlink_cbk (long py_this, fop_symlink_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_SYMLINK] = (long)cbk;
+}
+
+
+/* FOP: READLINK */
+int32_t
+glupy_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, const char *path,
+ struct iatt *buf, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_READLINK]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_readlink_cbk_t)(priv->cbks[GLUPY_READLINK]))(
+ frame, cookie, this, op_ret, op_errno,
+ path, buf, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, path,
+ buf, xdata);
+ return 0;
+}
+
+int32_t
+glupy_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ size_t size, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_READLINK]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_readlink_t)(priv->fops[GLUPY_READLINK]))(
+ frame, this, loc, size, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_readlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readlink, loc,
+ size, xdata);
+ return 0;
+}
+
+void
+wind_readlink (call_frame_t *frame, xlator_t *xl, loc_t *loc,
+ size_t size, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_readlink_cbk, xl, xl->fops->readlink,
+ loc, size, xdata);
+}
+
+void
+unwind_readlink (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, const char *path,
+ struct iatt *buf, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, path, buf,
+ xdata);
+}
+
+void
+set_readlink_fop (long py_this, fop_readlink_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_READLINK] = (long)fop;
+}
+
+void
+set_readlink_cbk (long py_this, fop_readlink_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_READLINK] = (long)cbk;
+}
+
+
+/* FOP: UNLINK */
+
+int32_t
+glupy_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_UNLINK]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_unlink_cbk_t)(priv->cbks[GLUPY_UNLINK]))(
+ frame, cookie, this, op_ret, op_errno,
+ preparent, postparent, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+int32_t
+glupy_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int xflags, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_UNLINK]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_unlink_t)(priv->fops[GLUPY_UNLINK]))(
+ frame, this, loc, xflags, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_unlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc,
+ xflags, xdata);
+ return 0;
+}
+
+void
+wind_unlink (call_frame_t *frame, xlator_t *xl, loc_t *loc,
+ int xflags, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_unlink_cbk, xl, xl->fops->unlink,
+ loc, xflags, xdata);
+}
+
+void
+unwind_unlink (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
+}
+
+void
+set_unlink_fop (long py_this, fop_unlink_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_UNLINK] = (long)fop;
+}
+
+void
+set_unlink_cbk (long py_this, fop_unlink_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_UNLINK] = (long)cbk;
+}
+
+
+/* FOP: MKDIR */
+
+int32_t
+glupy_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_MKDIR]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_mkdir_cbk_t)(priv->cbks[GLUPY_MKDIR]))(
+ frame, cookie, this, op_ret, op_errno,
+ inode, buf, preparent, postparent, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+glupy_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_MKDIR]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_mkdir_t)(priv->fops[GLUPY_MKDIR]))(
+ frame, this, loc, mode, umask, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_mkdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir, loc, mode, umask,
+ xdata);
+ return 0;
+}
+
+void
+wind_mkdir (call_frame_t *frame, xlator_t *xl, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
+{
+
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_mkdir_cbk, xl, xl->fops->mkdir,
+ loc, mode, umask, xdata);
+}
+
+void
+unwind_mkdir (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+}
+
+void
+set_mkdir_fop (long py_this, fop_mkdir_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_MKDIR] = (long)fop;
+}
+
+void
+set_mkdir_cbk (long py_this, fop_mkdir_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_MKDIR] = (long)cbk;
+}
+
+
+/* FOP: RMDIR */
+
+int32_t
+glupy_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_RMDIR]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_rmdir_cbk_t)(priv->cbks[GLUPY_RMDIR]))(
+ frame, cookie, this, op_ret, op_errno,
+ preparent, postparent, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+int32_t
+glupy_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int xflags, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_RMDIR]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_rmdir_t)(priv->fops[GLUPY_RMDIR]))(
+ frame, this, loc, xflags, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_rmdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rmdir, loc,
+ xflags, xdata);
+ return 0;
+}
+
+void
+wind_rmdir (call_frame_t *frame, xlator_t *xl, loc_t *loc,
+ int xflags, dict_t *xdata)
+{
+
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_rmdir_cbk, xl, xl->fops->rmdir,
+ loc, xflags, xdata);
+}
+
+void
+unwind_rmdir (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
+}
+
+void
+set_rmdir_fop (long py_this, fop_rmdir_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_RMDIR] = (long)fop;
+}
+
+void
+set_rmdir_cbk (long py_this, fop_rmdir_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_RMDIR] = (long)cbk;
+}
+
+
+/* NON-FOP-SPECIFIC CODE */
+
+
+long
+get_id (call_frame_t *frame)
+{
+ return (long)(frame->local);
+}
+
+uint64_t
+get_rootunique (call_frame_t *frame)
+{
+ return frame->root->unique;
+}
+
+int32_t
+init (xlator_t *this)
+{
+ glupy_private_t *priv = NULL;
+ char *module_name = NULL;
+ PyObject *py_mod_name = NULL;
+ PyObject *py_init_func = NULL;
+ PyObject *py_args = NULL;
+ PyObject *syspath = NULL;
+ PyObject *path = NULL;
+ static gf_boolean_t py_inited = _gf_false;
+ void * err_cleanup = &&err_return;
+
+ if (dict_get_str(this->options,"module-name",&module_name) != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "missing module-name");
+ return -1;
+ }
+
+ priv = GF_CALLOC (1, sizeof (glupy_private_t), gf_glupy_mt_priv);
+ if (!priv) {
+ goto *err_cleanup;
+ }
+ this->private = priv;
+ err_cleanup = &&err_free_priv;
+
+ if (!py_inited) {
+ Py_Initialize();
+ PyEval_InitThreads();
+#if 0
+ (void)pthread_key_create(&gil_init_key,NULL);
+ (void)pthread_setspecific(gil_init_key,(void *)1);
+#endif
+ /* PyEval_InitThreads takes this "for" us. No thanks. */
+ PyEval_ReleaseLock();
+ py_inited = _gf_true;
+ }
+
+ /* Adjust python's path */
+ syspath = PySys_GetObject("path");
+ path = PyString_FromString(GLUSTER_PYTHON_PATH);
+ PyList_Append(syspath, path);
+ Py_DECREF(path);
+
+ py_mod_name = PyString_FromString(module_name);
+ if (!py_mod_name) {
+ gf_log (this->name, GF_LOG_ERROR, "could not create name");
+ if (PyErr_Occurred()) {
+ PyErr_Print();
+ }
+ goto *err_cleanup;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "py_mod_name = %s", module_name);
+ priv->py_module = PyImport_Import(py_mod_name);
+ Py_DECREF(py_mod_name);
+ if (!priv->py_module) {
+ gf_log (this->name, GF_LOG_ERROR, "Python import failed");
+ if (PyErr_Occurred()) {
+ PyErr_Print();
+ }
+ goto *err_cleanup;
+ }
+ gf_log (this->name, GF_LOG_INFO, "Import of %s succeeded", module_name);
+ err_cleanup = &&err_deref_module;
+
+ py_init_func = PyObject_GetAttrString(priv->py_module, "xlator");
+ if (!py_init_func || !PyCallable_Check(py_init_func)) {
+ gf_log (this->name, GF_LOG_ERROR, "missing init func");
+ if (PyErr_Occurred()) {
+ PyErr_Print();
+ }
+ goto *err_cleanup;
+ }
+ err_cleanup = &&err_deref_init;
+
+ py_args = PyTuple_New(1);
+ if (!py_args) {
+ gf_log (this->name, GF_LOG_ERROR, "could not create args");
+ if (PyErr_Occurred()) {
+ PyErr_Print();
+ }
+ goto *err_cleanup;
+ }
+ PyTuple_SetItem(py_args,0,PyLong_FromLong((long)this));
+
+ /* TBD: pass in list of children */
+ priv->py_xlator = PyObject_CallObject(py_init_func, py_args);
+ Py_DECREF(py_args);
+ if (!priv->py_xlator) {
+ gf_log (this->name, GF_LOG_ERROR, "Python init failed");
+ if (PyErr_Occurred()) {
+ PyErr_Print();
+ }
+ goto *err_cleanup;
+ }
+ gf_log (this->name, GF_LOG_DEBUG, "init returned %p", priv->py_xlator);
+
+ return 0;
+
+err_deref_init:
+ Py_DECREF(py_init_func);
+err_deref_module:
+ Py_DECREF(priv->py_module);
+err_free_priv:
+ GF_FREE(priv);
+err_return:
+ return -1;
+}
+
+void
+fini (xlator_t *this)
+{
+ glupy_private_t *priv = this->private;
+
+ if (!priv)
+ return;
+ Py_DECREF(priv->py_xlator);
+ Py_DECREF(priv->py_module);
+ this->private = NULL;
+ GF_FREE (priv);
+
+ return;
+}
+
+struct xlator_fops fops = {
+ .lookup = glupy_lookup,
+ .create = glupy_create,
+ .open = glupy_open,
+ .readv = glupy_readv,
+ .writev = glupy_writev,
+ .opendir = glupy_opendir,
+ .readdir = glupy_readdir,
+ .stat = glupy_stat,
+ .fstat = glupy_fstat,
+ .setxattr = glupy_setxattr,
+ .getxattr = glupy_getxattr,
+ .fsetxattr = glupy_fsetxattr,
+ .fgetxattr = glupy_fgetxattr,
+ .removexattr = glupy_removexattr,
+ .fremovexattr = glupy_fremovexattr,
+ .link = glupy_link,
+ .unlink = glupy_unlink,
+ .readlink = glupy_readlink,
+ .symlink = glupy_symlink,
+ .mkdir = glupy_mkdir,
+ .rmdir = glupy_rmdir,
+ .statfs = glupy_statfs,
+ .readdirp = glupy_readdirp
+};
+
+struct xlator_cbks cbks = {
+};
+
+struct volume_options options[] = {
+ { .key = {NULL} },
+};
diff --git a/xlators/features/glupy/src/glupy.h b/xlators/features/glupy/src/glupy.h
new file mode 100644
index 000000000..8661fce88
--- /dev/null
+++ b/xlators/features/glupy/src/glupy.h
@@ -0,0 +1,69 @@
+/*
+ Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __GLUPY_H__
+#define __GLUPY_H__
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+#include "mem-types.h"
+
+enum {
+ GLUPY_LOOKUP = 0,
+ GLUPY_CREATE,
+ GLUPY_OPEN,
+ GLUPY_READV,
+ GLUPY_WRITEV,
+ GLUPY_OPENDIR,
+ GLUPY_READDIR,
+ GLUPY_READDIRP,
+ GLUPY_STAT,
+ GLUPY_FSTAT,
+ GLUPY_STATFS,
+ GLUPY_SETXATTR,
+ GLUPY_GETXATTR,
+ GLUPY_FSETXATTR,
+ GLUPY_FGETXATTR,
+ GLUPY_REMOVEXATTR,
+ GLUPY_FREMOVEXATTR,
+ GLUPY_LINK,
+ GLUPY_UNLINK,
+ GLUPY_READLINK,
+ GLUPY_SYMLINK,
+ GLUPY_MKNOD,
+ GLUPY_MKDIR,
+ GLUPY_RMDIR,
+ GLUPY_N_FUNCS
+};
+
+typedef struct {
+ PyObject *py_module;
+ PyObject *py_xlator;
+ long fops[GLUPY_N_FUNCS];
+ long cbks[GLUPY_N_FUNCS];
+} glupy_private_t;
+
+enum gf_glupy_mem_types_ {
+ gf_glupy_mt_priv = gf_common_mt_end + 1,
+ gf_glupy_mt_end
+};
+
+#endif /* __GLUPY_H__ */
diff --git a/xlators/features/glupy/src/glupy.py b/xlators/features/glupy/src/glupy.py
new file mode 100644
index 000000000..a5daa77d3
--- /dev/null
+++ b/xlators/features/glupy/src/glupy.py
@@ -0,0 +1,841 @@
+import sys
+from ctypes import *
+
+dl = CDLL("",RTLD_GLOBAL)
+
+
+class call_frame_t (Structure):
+ pass
+
+class dev_t (Structure):
+ pass
+
+
+class dict_t (Structure):
+ pass
+
+
+class gf_dirent_t (Structure):
+ pass
+
+
+class iobref_t (Structure):
+ pass
+
+
+class iovec_t (Structure):
+ pass
+
+
+class list_head (Structure):
+ pass
+
+list_head._fields_ = [
+ ("next", POINTER(list_head)),
+ ("prev", POINTER(list_head))
+ ]
+
+
+class rwxperm_t (Structure):
+ _fields_ = [
+ ("read", c_uint8, 1),
+ ("write", c_uint8, 1),
+ ("execn", c_uint8, 1)
+ ]
+
+
+class statvfs_t (Structure):
+ pass
+
+
+class xlator_t (Structure):
+ pass
+
+
+class ia_prot_t (Structure):
+ _fields_ = [
+ ("suid", c_uint8, 1),
+ ("sgid", c_uint8, 1),
+ ("sticky", c_uint8, 1),
+ ("owner", rwxperm_t),
+ ("group", rwxperm_t),
+ ("other", rwxperm_t)
+ ]
+
+# For checking file type.
+(IA_INVAL, IA_IFREG, IA_IFDIR, IA_IFLNK, IA_IFBLK, IA_IFCHR, IA_IFIFO,
+ IA_IFSOCK) = xrange(8)
+
+
+class iatt_t (Structure):
+ _fields_ = [
+ ("ia_no", c_uint64),
+ ("ia_gfid", c_ubyte * 16),
+ ("ia_dev", c_uint64),
+ ("ia_type", c_uint),
+ ("ia_prot", ia_prot_t),
+ ("ia_nlink", c_uint32),
+ ("ia_uid", c_uint32),
+ ("ia_gid", c_uint32),
+ ("ia_rdev", c_uint64),
+ ("ia_size", c_uint64),
+ ("ia_blksize", c_uint32),
+ ("ia_blocks", c_uint64),
+ ("ia_atime", c_uint32 ),
+ ("ia_atime_nsec", c_uint32),
+ ("ia_mtime", c_uint32),
+ ("ia_mtime_nsec", c_uint32),
+ ("ia_ctime", c_uint32),
+ ("ia_ctime_nsec", c_uint32)
+ ]
+
+
+class mem_pool (Structure):
+ _fields_ = [
+ ("list", list_head),
+ ("hot_count", c_int),
+ ("cold_count", c_int),
+ ("lock", c_void_p),
+ ("padded_sizeof_type", c_ulong),
+ ("pool", c_void_p),
+ ("pool_end", c_void_p),
+ ("real_sizeof_type", c_int),
+ ("alloc_count", c_uint64),
+ ("pool_misses", c_uint64),
+ ("max_alloc", c_int),
+ ("curr_stdalloc", c_int),
+ ("max_stdalloc", c_int),
+ ("name", c_char_p),
+ ("global_list", list_head)
+ ]
+
+
+class U_ctx_key_inode (Union):
+ _fields_ = [
+ ("key", c_uint64),
+ ("xl_key", POINTER(xlator_t))
+ ]
+
+
+class U_ctx_value1 (Union):
+ _fields_ = [
+ ("value1", c_uint64),
+ ("ptr1", c_void_p)
+ ]
+
+
+class U_ctx_value2 (Union):
+ _fields_ = [
+ ("value2", c_uint64),
+ ("ptr2", c_void_p)
+ ]
+
+class inode_ctx (Structure):
+ _anonymous_ = ("u_key","u_value1","u_value2",)
+ _fields_ = [
+ ("u_key", U_ctx_key_inode),
+ ("u_value1", U_ctx_value1),
+ ("u_value2", U_ctx_value2)
+ ]
+
+class inode_t (Structure):
+ pass
+
+class inode_table_t (Structure):
+ _fields_ = [
+ ("lock", c_void_p),
+ ("hashsize", c_size_t),
+ ("name", c_char_p),
+ ("root", POINTER(inode_t)),
+ ("xl", POINTER(xlator_t)),
+ ("lru_limit", c_uint32),
+ ("inode_hash", POINTER(list_head)),
+ ("name_hash", POINTER(list_head)),
+ ("active", list_head),
+ ("active_size", c_uint32),
+ ("lru", list_head),
+ ("lru_size", c_uint32),
+ ("purge", list_head),
+ ("purge_size", c_uint32),
+ ("inode_pool", POINTER(mem_pool)),
+ ("dentry_pool", POINTER(mem_pool)),
+ ("fd_mem_pool", POINTER(mem_pool))
+ ]
+
+inode_t._fields_ = [
+ ("table", POINTER(inode_table_t)),
+ ("gfid", c_ubyte * 16),
+ ("lock", c_void_p),
+ ("nlookup", c_uint64),
+ ("fd_count", c_uint32),
+ ("ref", c_uint32),
+ ("ia_type", c_uint),
+ ("fd_list", list_head),
+ ("dentry_list", list_head),
+ ("hashv", list_head),
+ ("listv", list_head),
+ ("ctx", POINTER(inode_ctx))
+ ]
+
+
+
+class U_ctx_key_fd (Union):
+ _fields_ = [
+ ("key", c_uint64),
+ ("xl_key", c_void_p)
+ ]
+
+class fd_lk_ctx (Structure):
+ _fields_ = [
+ ("lk_list", list_head),
+ ("ref", c_int),
+ ("lock", c_void_p)
+ ]
+
+class fd_ctx (Structure):
+ _anonymous_ = ("u_key","u_value1")
+ _fields_ = [
+ ("u_key", U_ctx_key_fd),
+ ("u_value1", U_ctx_value1)
+ ]
+
+class fd_t (Structure):
+ _fields_ = [
+ ("pid", c_uint64),
+ ("flags", c_int32),
+ ("refcount", c_int32),
+ ("inode_list", list_head),
+ ("inode", POINTER(inode_t)),
+ ("lock", c_void_p),
+ ("ctx", POINTER(fd_ctx)),
+ ("xl_count", c_int),
+ ("lk_ctx", POINTER(fd_lk_ctx)),
+ ("anonymous", c_uint)
+ ]
+
+class loc_t (Structure):
+ _fields_ = [
+ ("path", c_char_p),
+ ("name", c_char_p),
+ ("inode", POINTER(inode_t)),
+ ("parent", POINTER(inode_t)),
+ ("gfid", c_ubyte * 16),
+ ("pargfid", c_ubyte * 16),
+ ]
+
+
+
+def _init_op (a_class, fop, cbk, wind, unwind):
+ # Decorators, used by translators. We could pass the signatures as
+ # parameters, but it's actually kind of nice to keep them around for
+ # inspection.
+ a_class.fop_type = apply(CFUNCTYPE,a_class.fop_sig)
+ a_class.cbk_type = apply(CFUNCTYPE,a_class.cbk_sig)
+ # Dispatch-function registration.
+ fop.restype = None
+ fop.argtypes = [ c_long, a_class.fop_type ]
+ # Callback-function registration.
+ cbk.restype = None
+ cbk.argtypes = [ c_long, a_class.cbk_type ]
+ # STACK_WIND function.
+ wind.restype = None
+ wind.argtypes = list(a_class.fop_sig[1:])
+ # STACK_UNWIND function.
+ unwind.restype = None
+ unwind.argtypes = list(a_class.cbk_sig[1:])
+
+class OpLookup:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(inode_t), POINTER(iatt_t),
+ POINTER(dict_t), POINTER(iatt_t))
+_init_op (OpLookup, dl.set_lookup_fop, dl.set_lookup_cbk,
+ dl.wind_lookup, dl.unwind_lookup)
+
+class OpCreate:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), c_int, c_uint, c_uint, POINTER(fd_t),
+ POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(fd_t), POINTER(inode_t),
+ POINTER(iatt_t), POINTER(iatt_t), POINTER(iatt_t),
+ POINTER(dict_t))
+_init_op (OpCreate, dl.set_create_fop, dl.set_create_cbk,
+ dl.wind_create, dl.unwind_create)
+
+class OpOpen:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), c_int, POINTER(fd_t), POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(fd_t), POINTER(dict_t))
+_init_op (OpOpen, dl.set_open_fop, dl.set_open_cbk,
+ dl.wind_open, dl.unwind_open)
+
+class OpReadv:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(fd_t), c_size_t, c_long, c_uint32, POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(iovec_t), c_int, POINTER(iatt_t),
+ POINTER(iobref_t), POINTER(dict_t))
+_init_op (OpReadv, dl.set_readv_fop, dl.set_readv_cbk,
+ dl.wind_readv, dl.unwind_readv)
+class OpWritev:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(fd_t), POINTER(iovec_t), c_int, c_long, c_uint32,
+ POINTER(iobref_t), POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(iatt_t), POINTER(iatt_t),
+ POINTER(dict_t))
+_init_op (OpWritev, dl.set_writev_fop, dl.set_writev_cbk,
+ dl.wind_writev, dl.unwind_writev)
+
+class OpOpendir:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), POINTER(fd_t) ,POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(fd_t), POINTER(dict_t))
+_init_op (OpOpendir, dl.set_opendir_fop, dl.set_opendir_cbk,
+ dl.wind_opendir, dl.unwind_opendir)
+
+class OpReaddir:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(fd_t), c_size_t, c_long, POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(gf_dirent_t), POINTER(dict_t))
+_init_op (OpReaddir, dl.set_readdir_fop, dl.set_readdir_cbk,
+ dl.wind_readdir, dl.unwind_readdir)
+
+class OpReaddirp:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(fd_t), c_size_t, c_long, POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(gf_dirent_t), POINTER(dict_t))
+_init_op (OpReaddirp, dl.set_readdirp_fop, dl.set_readdirp_cbk,
+ dl.wind_readdirp, dl.unwind_readdirp)
+
+class OpStat:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(iatt_t), POINTER(dict_t))
+_init_op (OpStat, dl.set_stat_fop, dl.set_stat_cbk,
+ dl.wind_stat, dl.unwind_stat)
+
+class OpFstat:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(fd_t), POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(iatt_t), POINTER(dict_t))
+_init_op (OpFstat, dl.set_fstat_fop, dl.set_fstat_cbk,
+ dl.wind_fstat, dl.unwind_fstat)
+
+class OpStatfs:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(statvfs_t), POINTER(dict_t))
+_init_op (OpStatfs, dl.set_statfs_fop, dl.set_statfs_cbk,
+ dl.wind_statfs, dl.unwind_statfs)
+
+
+class OpSetxattr:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), POINTER(dict_t), c_int32,
+ POINTER (dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(dict_t))
+_init_op (OpSetxattr, dl.set_setxattr_fop, dl.set_setxattr_cbk,
+ dl.wind_setxattr, dl.unwind_setxattr)
+
+class OpGetxattr:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), c_char_p, POINTER (dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(dict_t), POINTER(dict_t))
+_init_op (OpGetxattr, dl.set_getxattr_fop, dl.set_getxattr_cbk,
+ dl.wind_getxattr, dl.unwind_getxattr)
+
+class OpFsetxattr:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(fd_t), POINTER(dict_t), c_int32,
+ POINTER (dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(dict_t))
+_init_op (OpFsetxattr, dl.set_fsetxattr_fop, dl.set_fsetxattr_cbk,
+ dl.wind_fsetxattr, dl.unwind_fsetxattr)
+
+class OpFgetxattr:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(fd_t), c_char_p, POINTER (dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(dict_t), POINTER(dict_t))
+_init_op (OpFgetxattr, dl.set_fgetxattr_fop, dl.set_fgetxattr_cbk,
+ dl.wind_fgetxattr, dl.unwind_fgetxattr)
+
+class OpRemovexattr:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), c_char_p, POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(dict_t))
+_init_op (OpRemovexattr, dl.set_removexattr_fop, dl.set_removexattr_cbk,
+ dl.wind_removexattr, dl.unwind_removexattr)
+
+
+class OpFremovexattr:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(fd_t), c_char_p, POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(dict_t))
+_init_op (OpFremovexattr, dl.set_fremovexattr_fop, dl.set_fremovexattr_cbk,
+ dl.wind_fremovexattr, dl.unwind_fremovexattr)
+
+class OpLink:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), POINTER(loc_t), POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(inode_t), POINTER(iatt_t),
+ POINTER(iatt_t), POINTER(iatt_t), POINTER(dict_t))
+_init_op (OpLink, dl.set_link_fop, dl.set_link_cbk,
+ dl.wind_link, dl.unwind_link)
+
+class OpSymlink:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ c_char_p, POINTER(loc_t), c_uint, POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(inode_t), POINTER(iatt_t),
+ POINTER(iatt_t), POINTER(iatt_t), POINTER(dict_t))
+_init_op (OpSymlink, dl.set_symlink_fop, dl.set_symlink_cbk,
+ dl.wind_symlink, dl.unwind_symlink)
+
+class OpUnlink:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), c_int, POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(iatt_t), POINTER(iatt_t),
+ POINTER(dict_t))
+_init_op (OpUnlink, dl.set_unlink_fop, dl.set_unlink_cbk,
+ dl.wind_unlink, dl.unwind_unlink)
+
+class OpReadlink:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), c_size_t, POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, c_char_p, POINTER(iatt_t), POINTER(dict_t))
+_init_op (OpReadlink, dl.set_readlink_fop, dl.set_readlink_cbk,
+ dl.wind_readlink, dl.unwind_readlink)
+
+class OpMkdir:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), c_uint, c_uint, POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(inode_t), POINTER(iatt_t),
+ POINTER(iatt_t), POINTER(iatt_t), POINTER(dict_t))
+_init_op (OpMkdir, dl.set_mkdir_fop, dl.set_mkdir_cbk,
+ dl.wind_mkdir, dl.unwind_mkdir)
+
+class OpRmdir:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), c_int, POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(iatt_t), POINTER(iatt_t),
+ POINTER(dict_t))
+_init_op (OpRmdir, dl.set_rmdir_fop, dl.set_rmdir_cbk,
+ dl.wind_rmdir, dl.unwind_rmdir)
+
+
+class Translator:
+ def __init__ (self, c_this):
+ # This is only here to keep references to the stubs we create,
+ # because ctypes doesn't and glupy.so can't because it doesn't
+ # get a pointer to the actual Python object. It's a dictionary
+ # instead of a list in case we ever allow changing fops/cbks
+ # after initialization and need to look them up.
+ self.stub_refs = {}
+ funcs = dir(self.__class__)
+ if "lookup_fop" in funcs:
+ @OpLookup.fop_type
+ def stub (frame, this, loc, xdata, s=self):
+ return s.lookup_fop (frame, this, loc, xdata)
+ self.stub_refs["lookup_fop"] = stub
+ dl.set_lookup_fop(c_this,stub)
+ if "lookup_cbk" in funcs:
+ @OpLookup.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno, inode,
+ buf, xdata, postparent, s=self):
+ return s.lookup_cbk(frame, cookie, this, op_ret,
+ op_errno, inode, buf, xdata,
+ postparent)
+ self.stub_refs["lookup_cbk"] = stub
+ dl.set_lookup_cbk(c_this,stub)
+ if "create_fop" in funcs:
+ @OpCreate.fop_type
+ def stub (frame, this, loc, flags, mode, umask, fd,
+ xdata, s=self):
+ return s.create_fop (frame, this, loc, flags,
+ mode, umask, fd, xdata)
+ self.stub_refs["create_fop"] = stub
+ dl.set_create_fop(c_this,stub)
+ if "create_cbk" in funcs:
+ @OpCreate.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno, fd,
+ inode, buf, preparent, postparent, xdata,
+ s=self):
+ return s.create_cbk (frame, cookie, this,
+ op_ret, op_errno, fd,
+ inode, buf, preparent,
+ postparent, xdata)
+ self.stub_refs["create_cbk"] = stub
+ dl.set_create_cbk(c_this,stub)
+ if "open_fop" in funcs:
+ @OpOpen.fop_type
+ def stub (frame, this, loc, flags, fd,
+ xdata, s=self):
+ return s.open_fop (frame, this, loc, flags,
+ fd, xdata)
+ self.stub_refs["open_fop"] = stub
+ dl.set_open_fop(c_this,stub)
+ if "open_cbk" in funcs:
+ @OpOpen.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno, fd,
+ xdata, s=self):
+ return s.open_cbk (frame, cookie, this,
+ op_ret, op_errno, fd,
+ xdata)
+ self.stub_refs["open_cbk"] = stub
+ dl.set_open_cbk(c_this,stub)
+ if "readv_fop" in funcs:
+ @OpReadv.fop_type
+ def stub (frame, this, fd, size, offset, flags,
+ xdata, s=self):
+ return s.readv_fop (frame, this, fd, size,
+ offset, flags, xdata)
+ self.stub_refs["readv_fop"] = stub
+ dl.set_readv_fop(c_this,stub)
+ if "readv_cbk" in funcs:
+ @OpReadv.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ vector, count, stbuf, iobref, xdata,
+ s=self):
+ return s.readv_cbk (frame, cookie, this,
+ op_ret, op_errno, vector,
+ count, stbuf, iobref,
+ xdata)
+ self.stub_refs["readv_cbk"] = stub
+ dl.set_readv_cbk(c_this,stub)
+ if "writev_fop" in funcs:
+ @OpWritev.fop_type
+ def stub (frame, this, fd, vector, count,
+ offset, flags, iobref, xdata, s=self):
+ return s.writev_fop (frame, this, fd, vector,
+ count, offset, flags,
+ iobref, xdata)
+ self.stub_refs["writev_fop"] = stub
+ dl.set_writev_fop(c_this,stub)
+ if "writev_cbk" in funcs:
+ @OpWritev.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ prebuf, postbuf, xdata, s=self):
+ return s.writev_cbk (frame, cookie, this,
+ op_ret, op_errno, prebuf,
+ postbuf, xdata)
+ self.stub_refs["writev_cbk"] = stub
+ dl.set_writev_cbk(c_this,stub)
+ if "opendir_fop" in funcs:
+ @OpOpendir.fop_type
+ def stub (frame, this, loc, fd, xdata, s=self):
+ return s.opendir_fop (frame, this, loc, fd,
+ xdata)
+ self.stub_refs["opendir_fop"] = stub
+ dl.set_opendir_fop(c_this,stub)
+ if "opendir_cbk" in funcs:
+ @OpOpendir.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno, fd,
+ xdata, s=self):
+ return s.opendir_cbk(frame, cookie, this,
+ op_ret, op_errno, fd,
+ xdata)
+ self.stub_refs["opendir_cbk"] = stub
+ dl.set_opendir_cbk(c_this,stub)
+ if "readdir_fop" in funcs:
+ @OpReaddir.fop_type
+ def stub (frame, this, fd, size, offset, xdata, s=self):
+ return s.readdir_fop (frame, this, fd, size,
+ offset, xdata)
+ self.stub_refs["readdir_fop"] = stub
+ dl.set_readdir_fop(c_this,stub)
+ if "readdir_cbk" in funcs:
+ @OpReaddir.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ entries, xdata, s=self):
+ return s.readdir_cbk(frame, cookie, this,
+ op_ret, op_errno, entries,
+ xdata)
+ self.stub_refs["readdir_cbk"] = stub
+ dl.set_readdir_cbk(c_this,stub)
+ if "readdirp_fop" in funcs:
+ @OpReaddirp.fop_type
+ def stub (frame, this, fd, size, offset, xdata, s=self):
+ return s.readdirp_fop (frame, this, fd, size,
+ offset, xdata)
+ self.stub_refs["readdirp_fop"] = stub
+ dl.set_readdirp_fop(c_this,stub)
+ if "readdirp_cbk" in funcs:
+ @OpReaddirp.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ entries, xdata, s=self):
+ return s.readdirp_cbk (frame, cookie, this,
+ op_ret, op_errno,
+ entries, xdata)
+ self.stub_refs["readdirp_cbk"] = stub
+ dl.set_readdirp_cbk(c_this,stub)
+ if "stat_fop" in funcs:
+ @OpStat.fop_type
+ def stub (frame, this, loc, xdata, s=self):
+ return s.stat_fop (frame, this, loc, xdata)
+ self.stub_refs["stat_fop"] = stub
+ dl.set_stat_fop(c_this,stub)
+ if "stat_cbk" in funcs:
+ @OpStat.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno, buf,
+ xdata, s=self):
+ return s.stat_cbk(frame, cookie, this, op_ret,
+ op_errno, buf, xdata)
+ self.stub_refs["stat_cbk"] = stub
+ dl.set_stat_cbk(c_this,stub)
+ if "fstat_fop" in funcs:
+ @OpFstat.fop_type
+ def stub (frame, this, fd, xdata, s=self):
+ return s.fstat_fop (frame, this, fd, xdata)
+ self.stub_refs["fstat_fop"] = stub
+ dl.set_fstat_fop(c_this,stub)
+ if "fstat_cbk" in funcs:
+ @OpFstat.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno, buf,
+ xdata, s=self):
+ return s.fstat_cbk(frame, cookie, this, op_ret,
+ op_errno, buf, xdata)
+ self.stub_refs["fstat_cbk"] = stub
+ dl.set_fstat_cbk(c_this,stub)
+ if "statfs_fop" in funcs:
+ @OpStatfs.fop_type
+ def stub (frame, this, loc, xdata, s=self):
+ return s.statfs_fop (frame, this, loc, xdata)
+ self.stub_refs["statfs_fop"] = stub
+ dl.set_statfs_fop(c_this,stub)
+ if "statfs_cbk" in funcs:
+ @OpStatfs.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno, buf,
+ xdata, s=self):
+ return s.statfs_cbk (frame, cookie, this,
+ op_ret, op_errno, buf,
+ xdata)
+ self.stub_refs["statfs_cbk"] = stub
+ dl.set_statfs_cbk(c_this,stub)
+ if "setxattr_fop" in funcs:
+ @OpSetxattr.fop_type
+ def stub (frame, this, loc, dictionary, flags, xdata,
+ s=self):
+ return s.setxattr_fop (frame, this, loc,
+ dictionary, flags,
+ xdata)
+ self.stub_refs["setxattr_fop"] = stub
+ dl.set_setxattr_fop(c_this,stub)
+ if "setxattr_cbk" in funcs:
+ @OpSetxattr.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno, xdata,
+ s=self):
+ return s.setxattr_cbk(frame, cookie, this,
+ op_ret, op_errno, xdata)
+ self.stub_refs["setxattr_cbk"] = stub
+ dl.set_setxattr_cbk(c_this,stub)
+ if "getxattr_fop" in funcs:
+ @OpGetxattr.fop_type
+ def stub (frame, this, loc, name, xdata, s=self):
+ return s.getxattr_fop (frame, this, loc, name,
+ xdata)
+ self.stub_refs["getxattr_fop"] = stub
+ dl.set_getxattr_fop(c_this,stub)
+ if "getxattr_cbk" in funcs:
+ @OpGetxattr.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ dictionary, xdata, s=self):
+ return s.getxattr_cbk(frame, cookie, this,
+ op_ret, op_errno,
+ dictionary, xdata)
+ self.stub_refs["getxattr_cbk"] = stub
+ dl.set_getxattr_cbk(c_this,stub)
+ if "fsetxattr_fop" in funcs:
+ @OpFsetxattr.fop_type
+ def stub (frame, this, fd, dictionary, flags, xdata,
+ s=self):
+ return s.fsetxattr_fop (frame, this, fd,
+ dictionary, flags,
+ xdata)
+ self.stub_refs["fsetxattr_fop"] = stub
+ dl.set_fsetxattr_fop(c_this,stub)
+ if "fsetxattr_cbk" in funcs:
+ @OpFsetxattr.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno, xdata,
+ s=self):
+ return s.fsetxattr_cbk(frame, cookie, this,
+ op_ret, op_errno, xdata)
+ self.stub_refs["fsetxattr_cbk"] = stub
+ dl.set_fsetxattr_cbk(c_this,stub)
+ if "fgetxattr_fop" in funcs:
+ @OpFgetxattr.fop_type
+ def stub (frame, this, fd, name, xdata, s=self):
+ return s.fgetxattr_fop (frame, this, fd, name,
+ xdata)
+ self.stub_refs["fgetxattr_fop"] = stub
+ dl.set_fgetxattr_fop(c_this,stub)
+ if "fgetxattr_cbk" in funcs:
+ @OpFgetxattr.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ dictionary, xdata, s=self):
+ return s.fgetxattr_cbk(frame, cookie, this,
+ op_ret, op_errno,
+ dictionary, xdata)
+ self.stub_refs["fgetxattr_cbk"] = stub
+ dl.set_fgetxattr_cbk(c_this,stub)
+ if "removexattr_fop" in funcs:
+ @OpRemovexattr.fop_type
+ def stub (frame, this, loc, name, xdata, s=self):
+ return s.removexattr_fop (frame, this, loc,
+ name, xdata)
+ self.stub_refs["removexattr_fop"] = stub
+ dl.set_removexattr_fop(c_this,stub)
+ if "removexattr_cbk" in funcs:
+ @OpRemovexattr.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ xdata, s=self):
+ return s.removexattr_cbk(frame, cookie, this,
+ op_ret, op_errno,
+ xdata)
+ self.stub_refs["removexattr_cbk"] = stub
+ dl.set_removexattr_cbk(c_this,stub)
+ if "fremovexattr_fop" in funcs:
+ @OpFremovexattr.fop_type
+ def stub (frame, this, fd, name, xdata, s=self):
+ return s.fremovexattr_fop (frame, this, fd,
+ name, xdata)
+ self.stub_refs["fremovexattr_fop"] = stub
+ dl.set_fremovexattr_fop(c_this,stub)
+ if "fremovexattr_cbk" in funcs:
+ @OpFremovexattr.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ xdata, s=self):
+ return s.fremovexattr_cbk(frame, cookie, this,
+ op_ret, op_errno,
+ xdata)
+ self.stub_refs["fremovexattr_cbk"] = stub
+ dl.set_fremovexattr_cbk(c_this,stub)
+ if "link_fop" in funcs:
+ @OpLink.fop_type
+ def stub (frame, this, oldloc, newloc,
+ xdata, s=self):
+ return s.link_fop (frame, this, oldloc,
+ newloc, xdata)
+ self.stub_refs["link_fop"] = stub
+ dl.set_link_fop(c_this,stub)
+ if "link_cbk" in funcs:
+ @OpLink.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ inode, buf, preparent, postparent, xdata,
+ s=self):
+ return s.link_cbk (frame, cookie, this,
+ op_ret, op_errno, inode,
+ buf, preparent,
+ postparent, xdata)
+ self.stub_refs["link_cbk"] = stub
+ dl.set_link_cbk(c_this,stub)
+ if "symlink_fop" in funcs:
+ @OpSymlink.fop_type
+ def stub (frame, this, linkname, loc,
+ umask, xdata, s=self):
+ return s.symlink_fop (frame, this, linkname,
+ loc, umask, xdata)
+ self.stub_refs["symlink_fop"] = stub
+ dl.set_symlink_fop(c_this,stub)
+ if "symlink_cbk" in funcs:
+ @OpSymlink.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ inode, buf, preparent, postparent, xdata,
+ s=self):
+ return s.symlink_cbk (frame, cookie, this,
+ op_ret, op_errno, inode,
+ buf, preparent,
+ postparent, xdata)
+ self.stub_refs["symlink_cbk"] = stub
+ dl.set_symlink_cbk(c_this,stub)
+ if "unlink_fop" in funcs:
+ @OpUnlink.fop_type
+ def stub (frame, this, loc, xflags,
+ xdata, s=self):
+ return s.unlink_fop (frame, this, loc,
+ xflags, xdata)
+ self.stub_refs["unlink_fop"] = stub
+ dl.set_unlink_fop(c_this,stub)
+ if "unlink_cbk" in funcs:
+ @OpUnlink.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ preparent, postparent, xdata, s=self):
+ return s.unlink_cbk (frame, cookie, this,
+ op_ret, op_errno,
+ preparent, postparent,
+ xdata)
+ self.stub_refs["unlink_cbk"] = stub
+ dl.set_unlink_cbk(c_this,stub)
+ if "readlink_fop" in funcs:
+ @OpReadlink.fop_type
+ def stub (frame, this, loc, size,
+ xdata, s=self):
+ return s.readlink_fop (frame, this, loc,
+ size, xdata)
+ self.stub_refs["readlink_fop"] = stub
+ dl.set_readlink_fop(c_this,stub)
+ if "readlink_cbk" in funcs:
+ @OpReadlink.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ path, buf, xdata, s=self):
+ return s.readlink_cbk (frame, cookie, this,
+ op_ret, op_errno,
+ path, buf, xdata)
+ self.stub_refs["readlink_cbk"] = stub
+ dl.set_readlink_cbk(c_this,stub)
+ if "mkdir_fop" in funcs:
+ @OpMkdir.fop_type
+ def stub (frame, this, loc, mode, umask, xdata,
+ s=self):
+ return s.mkdir_fop (frame, this, loc, mode,
+ umask, xdata)
+ self.stub_refs["mkdir_fop"] = stub
+ dl.set_mkdir_fop(c_this,stub)
+ if "mkdir_cbk" in funcs:
+ @OpMkdir.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno, inode,
+ buf, preparent, postparent, xdata, s=self):
+ return s.mkdir_cbk (frame, cookie, this,
+ op_ret, op_errno, inode,
+ buf, preparent,
+ postparent, xdata)
+ self.stub_refs["mkdir_cbk"] = stub
+ dl.set_mkdir_cbk(c_this,stub)
+ if "rmdir_fop" in funcs:
+ @OpRmdir.fop_type
+ def stub (frame, this, loc, xflags,
+ xdata, s=self):
+ return s.rmdir_fop (frame, this, loc,
+ xflags, xdata)
+ self.stub_refs["rmdir_fop"] = stub
+ dl.set_rmdir_fop(c_this,stub)
+ if "rmdir_cbk" in funcs:
+ @OpRmdir.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ preparent, postparent, xdata, s=self):
+ return s.rmdir_cbk (frame, cookie, this,
+ op_ret, op_errno,
+ preparent, postparent,
+ xdata)
+ self.stub_refs["rmdir_cbk"] = stub
+ dl.set_rmdir_cbk(c_this,stub)
diff --git a/xlators/features/glupy/src/setup.py.in b/xlators/features/glupy/src/setup.py.in
new file mode 100644
index 000000000..1aea9875f
--- /dev/null
+++ b/xlators/features/glupy/src/setup.py.in
@@ -0,0 +1,24 @@
+from distutils.core import setup
+
+DESC = """GlusterFS is a clustered file-system capable of scaling to
+several petabytes. It aggregates various storage bricks over Infiniband
+RDMA or TCP/IP interconnect into one large parallel network file system.
+GlusterFS is one of the most sophisticated file systems in terms of
+features and extensibility. It borrows a powerful concept called
+Translators from GNU Hurd kernel. Much of the code in GlusterFS is in
+user space and easily manageable.
+
+This package contains Glupy, the Python translator interface for GlusterFS."""
+
+setup(
+ name='glusterfs-glupy',
+ version='@PACKAGE_VERSION@',
+ description='Glupy is the Python translator interface for GlusterFS',
+ long_description=DESC,
+ author='Gluster Community',
+ author_email='gluster-devel@nongnu.org',
+ license='LGPLv3',
+ url='http://gluster.org/',
+ package_dir={'gluster':''},
+ packages=['gluster']
+)
diff --git a/xlators/protocol/legacy/server/Makefile.am b/xlators/features/index/Makefile.am
index d471a3f92..a985f42a8 100644
--- a/xlators/protocol/legacy/server/Makefile.am
+++ b/xlators/features/index/Makefile.am
@@ -1,3 +1,3 @@
SUBDIRS = src
-CLEANFILES =
+CLEANFILES =
diff --git a/xlators/features/index/src/Makefile.am b/xlators/features/index/src/Makefile.am
new file mode 100644
index 000000000..73bb8972e
--- /dev/null
+++ b/xlators/features/index/src/Makefile.am
@@ -0,0 +1,17 @@
+xlator_LTLIBRARIES = index.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+index_la_LDFLAGS = -module -avoid-version
+
+index_la_SOURCES = index.c
+index_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = index.h index-mem-types.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
+ -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/xdr/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/index/src/index-mem-types.h b/xlators/features/index/src/index-mem-types.h
new file mode 100644
index 000000000..553d492df
--- /dev/null
+++ b/xlators/features/index/src/index-mem-types.h
@@ -0,0 +1,22 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __QUIESCE_MEM_TYPES_H__
+#define __QUIESCE_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_index_mem_types_ {
+ gf_index_mt_priv_t = gf_common_mt_end + 1,
+ gf_index_inode_ctx_t = gf_common_mt_end + 2,
+ gf_index_fd_ctx_t = gf_common_mt_end + 3,
+ gf_index_mt_end
+};
+#endif
diff --git a/xlators/features/index/src/index.c b/xlators/features/index/src/index.c
new file mode 100644
index 000000000..5c1c65fbd
--- /dev/null
+++ b/xlators/features/index/src/index.c
@@ -0,0 +1,1275 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "index.h"
+#include "options.h"
+#include "glusterfs3-xdr.h"
+#include "syscall.h"
+
+#define XATTROP_SUBDIR "xattrop"
+
+call_stub_t *
+__index_dequeue (struct list_head *callstubs)
+{
+ call_stub_t *stub = NULL;
+
+ if (!list_empty (callstubs)) {
+ stub = list_entry (callstubs->next, call_stub_t, list);
+ list_del_init (&stub->list);
+ }
+
+ return stub;
+}
+
+inline static void
+__index_enqueue (struct list_head *callstubs, call_stub_t *stub)
+{
+ list_add_tail (&stub->list, callstubs);
+}
+
+static void
+worker_enqueue (xlator_t *this, call_stub_t *stub)
+{
+ index_priv_t *priv = NULL;
+
+ priv = this->private;
+ pthread_mutex_lock (&priv->mutex);
+ {
+ __index_enqueue (&priv->callstubs, stub);
+ pthread_cond_signal (&priv->cond);
+ }
+ pthread_mutex_unlock (&priv->mutex);
+}
+
+void *
+index_worker (void *data)
+{
+ index_priv_t *priv = NULL;
+ xlator_t *this = NULL;
+ call_stub_t *stub = NULL;
+ int ret = 0;
+
+ THIS = data;
+ this = data;
+ priv = this->private;
+
+ for (;;) {
+ pthread_mutex_lock (&priv->mutex);
+ {
+ while (list_empty (&priv->callstubs)) {
+ ret = pthread_cond_wait (&priv->cond,
+ &priv->mutex);
+ }
+
+ stub = __index_dequeue (&priv->callstubs);
+ }
+ pthread_mutex_unlock (&priv->mutex);
+
+ if (stub) /* guard against spurious wakeups */
+ call_resume (stub);
+ }
+
+ return NULL;
+}
+int
+__index_inode_ctx_get (inode_t *inode, xlator_t *this, index_inode_ctx_t **ctx)
+{
+ int ret = 0;
+ index_inode_ctx_t *ictx = NULL;
+ uint64_t tmpctx = 0;
+
+ ret = __inode_ctx_get (inode, this, &tmpctx);
+ if (!ret) {
+ ictx = (index_inode_ctx_t*) (long) tmpctx;
+ goto out;
+ }
+ ictx = GF_CALLOC (1, sizeof (*ictx), gf_index_inode_ctx_t);
+ if (!ictx) {
+ ret = -1;
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&ictx->callstubs);
+ ret = __inode_ctx_put (inode, this, (uint64_t)ictx);
+ if (ret) {
+ GF_FREE (ictx);
+ ictx = NULL;
+ goto out;
+ }
+out:
+ if (ictx)
+ *ctx = ictx;
+ return ret;
+}
+
+int
+index_inode_ctx_get (inode_t *inode, xlator_t *this, index_inode_ctx_t **ctx)
+{
+ int ret = 0;
+
+ LOCK (&inode->lock);
+ {
+ ret = __index_inode_ctx_get (inode, this, ctx);
+ }
+ UNLOCK (&inode->lock);
+
+ return ret;
+}
+
+static void
+make_index_dir_path (char *base, const char *subdir,
+ char *index_dir, size_t len)
+{
+ snprintf (index_dir, len, "%s/%s", base, subdir);
+}
+
+int
+index_dir_create (xlator_t *this, const char *subdir)
+{
+ int ret = 0;
+ struct stat st = {0};
+ char fullpath[PATH_MAX] = {0};
+ char path[PATH_MAX] = {0};
+ char *dir = NULL;
+ index_priv_t *priv = NULL;
+ size_t len = 0;
+ size_t pathlen = 0;
+
+ priv = this->private;
+ make_index_dir_path (priv->index_basepath, subdir, fullpath,
+ sizeof (fullpath));
+ ret = stat (fullpath, &st);
+ if (!ret) {
+ if (!S_ISDIR (st.st_mode))
+ ret = -2;
+ goto out;
+ }
+
+ pathlen = strlen (fullpath);
+ if ((pathlen > 1) && fullpath[pathlen - 1] == '/')
+ fullpath[pathlen - 1] = '\0';
+ dir = strchr (fullpath, '/');
+ while (dir) {
+ dir = strchr (dir + 1, '/');
+ if (dir)
+ len = pathlen - strlen (dir);
+ else
+ len = pathlen;
+ strncpy (path, fullpath, len);
+ path[len] = '\0';
+ ret = mkdir (path, 0600);
+ if (ret && (errno != EEXIST))
+ goto out;
+ }
+ ret = 0;
+out:
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "%s/%s: Failed to "
+ "create (%s)", priv->index_basepath, subdir,
+ strerror (errno));
+ } else if (ret == -2) {
+ gf_log (this->name, GF_LOG_ERROR, "%s/%s: Failed to create, "
+ "path exists, not a directory ", priv->index_basepath,
+ subdir);
+ }
+ return ret;
+}
+
+void
+index_get_index (index_priv_t *priv, uuid_t index)
+{
+ LOCK (&priv->lock);
+ {
+ uuid_copy (index, priv->index);
+ }
+ UNLOCK (&priv->lock);
+}
+
+void
+index_generate_index (index_priv_t *priv, uuid_t index)
+{
+ LOCK (&priv->lock);
+ {
+ //To prevent duplicate generates.
+ //This method fails if number of contending threads is greater
+ //than MAX_LINK count of the fs
+ if (!uuid_compare (priv->index, index))
+ uuid_generate (priv->index);
+ uuid_copy (index, priv->index);
+ }
+ UNLOCK (&priv->lock);
+}
+
+static void
+make_index_path (char *base, const char *subdir, uuid_t index,
+ char *index_path, size_t len)
+{
+ make_index_dir_path (base, subdir, index_path, len);
+ snprintf (index_path + strlen (index_path), len - strlen (index_path),
+ "/%s-%s", subdir, uuid_utoa (index));
+}
+
+static void
+make_gfid_path (char *base, const char *subdir, uuid_t gfid,
+ char *gfid_path, size_t len)
+{
+ make_index_dir_path (base, subdir, gfid_path, len);
+ snprintf (gfid_path + strlen (gfid_path), len - strlen (gfid_path),
+ "/%s", uuid_utoa (gfid));
+}
+
+static void
+make_file_path (char *base, const char *subdir, const char *filename,
+ char *file_path, size_t len)
+{
+ make_index_dir_path (base, subdir, file_path, len);
+ snprintf (file_path + strlen (file_path), len - strlen (file_path),
+ "/%s", filename);
+}
+
+static void
+check_delete_stale_index_file (xlator_t *this, char *filename)
+{
+ int ret = 0;
+ struct stat st = {0};
+ char filepath[PATH_MAX] = {0};
+ index_priv_t *priv = NULL;
+
+ priv = this->private;
+ make_file_path (priv->index_basepath, XATTROP_SUBDIR,
+ filename, filepath, sizeof (filepath));
+ ret = stat (filepath, &st);
+ if (!ret && st.st_nlink == 1)
+ unlink (filepath);
+}
+
+static int
+index_fill_readdir (fd_t *fd, DIR *dir, off_t off,
+ size_t size, gf_dirent_t *entries)
+{
+ off_t in_case = -1;
+ size_t filled = 0;
+ int count = 0;
+ char entrybuf[sizeof(struct dirent) + 256 + 8];
+ struct dirent *entry = NULL;
+ int32_t this_size = -1;
+ gf_dirent_t *this_entry = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ if (!off) {
+ rewinddir (dir);
+ } else {
+ seekdir (dir, off);
+ }
+
+ while (filled <= size) {
+ in_case = telldir (dir);
+
+ if (in_case == -1) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "telldir failed on dir=%p: %s",
+ dir, strerror (errno));
+ goto out;
+ }
+
+ errno = 0;
+ entry = NULL;
+ readdir_r (dir, (struct dirent *)entrybuf, &entry);
+
+ if (!entry) {
+ if (errno == EBADF) {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "readdir failed on dir=%p: %s",
+ dir, strerror (errno));
+ goto out;
+ }
+ break;
+ }
+
+ if (!strncmp (entry->d_name, XATTROP_SUBDIR"-",
+ strlen (XATTROP_SUBDIR"-"))) {
+ check_delete_stale_index_file (this, entry->d_name);
+ continue;
+ }
+
+ this_size = max (sizeof (gf_dirent_t),
+ sizeof (gfs3_dirplist))
+ + strlen (entry->d_name) + 1;
+
+ if (this_size + filled > size) {
+ seekdir (dir, in_case);
+ break;
+ }
+
+ this_entry = gf_dirent_for_name (entry->d_name);
+
+ if (!this_entry) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "could not create gf_dirent for entry %s: (%s)",
+ entry->d_name, strerror (errno));
+ goto out;
+ }
+ this_entry->d_off = telldir (dir);
+ this_entry->d_ino = entry->d_ino;
+
+ list_add_tail (&this_entry->list, &entries->list);
+
+ filled += this_size;
+ count ++;
+ }
+
+ if ((!readdir (dir) && (errno == 0)))
+ /* Indicate EOF */
+ errno = ENOENT;
+out:
+ return count;
+}
+
+int
+index_add (xlator_t *this, uuid_t gfid, const char *subdir)
+{
+ int32_t op_errno = 0;
+ char gfid_path[PATH_MAX] = {0};
+ char index_path[PATH_MAX] = {0};
+ int ret = 0;
+ uuid_t index = {0};
+ index_priv_t *priv = NULL;
+ struct stat st = {0};
+ int fd = 0;
+
+ priv = this->private;
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name, !uuid_is_null (gfid),
+ out, op_errno, EINVAL);
+
+ make_gfid_path (priv->index_basepath, subdir, gfid,
+ gfid_path, sizeof (gfid_path));
+
+ ret = stat (gfid_path, &st);
+ if (!ret)
+ goto out;
+ index_get_index (priv, index);
+ make_index_path (priv->index_basepath, subdir,
+ index, index_path, sizeof (index_path));
+ ret = sys_link (index_path, gfid_path);
+ if (!ret || (errno == EEXIST)) {
+ ret = 0;
+ goto out;
+ }
+
+ op_errno = errno;
+ if (op_errno == ENOENT) {
+ ret = index_dir_create (this, subdir);
+ if (ret)
+ goto out;
+ } else if (op_errno == EMLINK) {
+ index_generate_index (priv, index);
+ make_index_path (priv->index_basepath, subdir,
+ index, index_path, sizeof (index_path));
+ } else {
+ goto out;
+ }
+
+ fd = creat (index_path, 0);
+ if ((fd < 0) && (errno != EEXIST)) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR, "%s: Not able to "
+ "create index (%s)", uuid_utoa (gfid),
+ strerror (errno));
+ goto out;
+ }
+
+ if (fd >= 0)
+ close (fd);
+
+ ret = sys_link (index_path, gfid_path);
+ if (ret && (errno != EEXIST)) {
+ gf_log (this->name, GF_LOG_ERROR, "%s: Not able to "
+ "add to index (%s)", uuid_utoa (gfid),
+ strerror (errno));
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+index_del (xlator_t *this, uuid_t gfid, const char *subdir)
+{
+ int32_t op_errno __attribute__((unused)) = 0;
+ index_priv_t *priv = NULL;
+ int ret = 0;
+ char gfid_path[PATH_MAX] = {0};
+
+ priv = this->private;
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name, !uuid_is_null (gfid),
+ out, op_errno, EINVAL);
+ make_gfid_path (priv->index_basepath, subdir, gfid,
+ gfid_path, sizeof (gfid_path));
+ ret = unlink (gfid_path);
+ if (ret && (errno != ENOENT)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to delete from index (%s)",
+ gfid_path, strerror (errno));
+ ret = -errno;
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+_check_key_is_zero_filled (dict_t *d, char *k, data_t *v,
+ void *tmp)
+{
+ if (mem_0filled ((const char*)v->data, v->len)) {
+ /* -1 means, no more iterations, treat as 'break' */
+ return -1;
+ }
+ return 0;
+}
+
+void
+_index_action (xlator_t *this, inode_t *inode, gf_boolean_t zero_xattr)
+{
+ int ret = 0;
+ index_inode_ctx_t *ctx = NULL;
+
+ ret = index_inode_ctx_get (inode, this, &ctx);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Not able to %s %s -> index",
+ zero_xattr?"del":"add", uuid_utoa (inode->gfid));
+ goto out;
+ }
+ if (zero_xattr) {
+ if (ctx->state == NOTIN)
+ goto out;
+ ret = index_del (this, inode->gfid, XATTROP_SUBDIR);
+ if (!ret)
+ ctx->state = NOTIN;
+ } else {
+ if (ctx->state == IN)
+ goto out;
+ ret = index_add (this, inode->gfid, XATTROP_SUBDIR);
+ if (!ret)
+ ctx->state = IN;
+ }
+out:
+ return;
+}
+
+void
+_xattrop_index_action (xlator_t *this, inode_t *inode, dict_t *xattr)
+{
+ gf_boolean_t zero_xattr = _gf_true;
+ int ret = 0;
+
+ ret = dict_foreach (xattr, _check_key_is_zero_filled, NULL);
+ if (ret == -1)
+ zero_xattr = _gf_false;
+ _index_action (this, inode, zero_xattr);
+ return;
+}
+
+void
+fop_xattrop_index_action (xlator_t *this, inode_t *inode, dict_t *xattr)
+{
+ _xattrop_index_action (this, inode, xattr);
+}
+
+void
+fop_fxattrop_index_action (xlator_t *this, inode_t *inode, dict_t *xattr)
+{
+ _xattrop_index_action (this, inode, xattr);
+}
+
+inline gf_boolean_t
+index_xattrop_track (loc_t *loc, gf_xattrop_flags_t flags, dict_t *dict)
+{
+ return (flags == GF_XATTROP_ADD_ARRAY);
+}
+
+inline gf_boolean_t
+index_fxattrop_track (fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict)
+{
+ return (flags == GF_XATTROP_ADD_ARRAY);
+}
+
+int
+__index_fd_ctx_get (fd_t *fd, xlator_t *this, index_fd_ctx_t **ctx)
+{
+ int ret = 0;
+ index_fd_ctx_t *fctx = NULL;
+ uint64_t tmpctx = 0;
+ char index_dir[PATH_MAX] = {0};
+ index_priv_t *priv = NULL;
+
+ priv = this->private;
+ if (uuid_compare (fd->inode->gfid, priv->xattrop_vgfid)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = __fd_ctx_get (fd, this, &tmpctx);
+ if (!ret) {
+ fctx = (index_fd_ctx_t*) (long) tmpctx;
+ goto out;
+ }
+
+ fctx = GF_CALLOC (1, sizeof (*fctx), gf_index_fd_ctx_t);
+ if (!fctx) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ make_index_dir_path (priv->index_basepath, XATTROP_SUBDIR,
+ index_dir, sizeof (index_dir));
+ fctx->dir = opendir (index_dir);
+ if (!fctx->dir) {
+ ret = -errno;
+ GF_FREE (fctx);
+ fctx = NULL;
+ goto out;
+ }
+
+ ret = __fd_ctx_set (fd, this, (uint64_t)(long)fctx);
+ if (ret) {
+ GF_FREE (fctx);
+ fctx = NULL;
+ ret = -EINVAL;
+ goto out;
+ }
+out:
+ if (fctx)
+ *ctx = fctx;
+ return ret;
+}
+
+int
+index_fd_ctx_get (fd_t *fd, xlator_t *this, index_fd_ctx_t **ctx)
+{
+ int ret = 0;
+ LOCK (&fd->lock);
+ {
+ ret = __index_fd_ctx_get (fd, this, ctx);
+ }
+ UNLOCK (&fd->lock);
+ return ret;
+}
+
+//new - Not NULL means start a fop
+//new - NULL means done processing the fop
+void
+index_queue_process (xlator_t *this, inode_t *inode, call_stub_t *new)
+{
+ call_stub_t *stub = NULL;
+ index_inode_ctx_t *ctx = NULL;
+ int ret = 0;
+ call_frame_t *frame = NULL;
+
+ LOCK (&inode->lock);
+ {
+ ret = __index_inode_ctx_get (inode, this, &ctx);
+ if (ret)
+ goto unlock;
+
+ if (new) {
+ __index_enqueue (&ctx->callstubs, new);
+ new = NULL;
+ } else {
+ ctx->processing = _gf_false;
+ }
+
+ if (!ctx->processing) {
+ stub = __index_dequeue (&ctx->callstubs);
+ if (stub)
+ ctx->processing = _gf_true;
+ else
+ ctx->processing = _gf_false;
+ }
+ }
+unlock:
+ UNLOCK (&inode->lock);
+
+ if (ret && new) {
+ frame = new->frame;
+ if (new->fop == GF_FOP_XATTROP) {
+ INDEX_STACK_UNWIND (xattrop, frame, -1, ENOMEM,
+ NULL, NULL);
+ } else if (new->fop == GF_FOP_FXATTROP) {
+ INDEX_STACK_UNWIND (fxattrop, frame, -1, ENOMEM,
+ NULL, NULL);
+ }
+ call_stub_destroy (new);
+ } else if (stub) {
+ call_resume (stub);
+ }
+ return;
+}
+
+int32_t
+index_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr, dict_t *xdata)
+{
+ inode_t *inode = NULL;
+
+ inode = inode_ref (frame->local);
+ if (op_ret < 0)
+ goto out;
+ fop_xattrop_index_action (this, frame->local, xattr);
+out:
+ INDEX_STACK_UNWIND (xattrop, frame, op_ret, op_errno, xattr, xdata);
+ index_queue_process (this, inode, NULL);
+ inode_unref (inode);
+
+ return 0;
+}
+
+int32_t
+index_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
+ dict_t *xdata)
+{
+ inode_t *inode = NULL;
+
+ inode = inode_ref (frame->local);
+ if (op_ret < 0)
+ goto out;
+
+ fop_fxattrop_index_action (this, frame->local, xattr);
+out:
+ INDEX_STACK_UNWIND (fxattrop, frame, op_ret, op_errno, xattr, xdata);
+ index_queue_process (this, inode, NULL);
+ inode_unref (inode);
+
+ return 0;
+}
+
+int
+index_xattrop_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+{
+ //In wind phase bring the gfid into index. This way if the brick crashes
+ //just after posix performs xattrop before _cbk reaches index xlator
+ //we will still have the gfid in index.
+ _index_action (this, frame->local, _gf_false);
+
+ STACK_WIND (frame, index_xattrop_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->xattrop, loc, optype, xattr,
+ xdata);
+ return 0;
+}
+
+int
+index_fxattrop_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+{
+ //In wind phase bring the gfid into index. This way if the brick crashes
+ //just after posix performs xattrop before _cbk reaches index xlator
+ //we will still have the gfid in index.
+ _index_action (this, frame->local, _gf_false);
+ STACK_WIND (frame, index_fxattrop_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fxattrop, fd, optype, xattr,
+ xdata);
+ return 0;
+}
+
+int32_t
+index_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ if (!index_xattrop_track (loc, flags, dict))
+ goto out;
+
+ frame->local = inode_ref (loc->inode);
+ stub = fop_xattrop_stub (frame, index_xattrop_wrapper,
+ loc, flags, dict, xdata);
+ if (!stub) {
+ INDEX_STACK_UNWIND (xattrop, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
+
+ index_queue_process (this, loc->inode, stub);
+ return 0;
+out:
+ STACK_WIND (frame, default_xattrop_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->xattrop, loc, flags, dict, xdata);
+ return 0;
+}
+
+int32_t
+index_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ if (!index_fxattrop_track (fd, flags, dict))
+ goto out;
+
+ frame->local = inode_ref (fd->inode);
+ stub = fop_fxattrop_stub (frame, index_fxattrop_wrapper,
+ fd, flags, dict, xdata);
+ if (!stub) {
+ INDEX_STACK_UNWIND (fxattrop, frame, -1, ENOMEM, NULL, xdata);
+ return 0;
+ }
+
+ index_queue_process (this, fd->inode, stub);
+ return 0;
+out:
+ STACK_WIND (frame, default_fxattrop_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fxattrop, fd, flags, dict, xdata);
+ return 0;
+}
+
+uint64_t
+index_entry_count (xlator_t *this, char *subdir)
+{
+ index_priv_t *priv = NULL;
+ char index_dir[PATH_MAX];
+ DIR *dirp = NULL;
+ uint64_t count = 0;
+ struct dirent buf;
+ struct dirent *entry = NULL;
+
+ priv = this->private;
+
+ make_index_dir_path (priv->index_basepath, subdir,
+ index_dir, sizeof (index_dir));
+
+ dirp = opendir (index_dir);
+ if (!dirp)
+ return 0;
+
+ while (readdir_r (dirp, &buf, &entry) == 0) {
+ if (!entry)
+ break;
+ if (!strcmp (entry->d_name, ".") ||
+ !strcmp (entry->d_name, ".."))
+ continue;
+ if (!strncmp (entry->d_name, subdir, strlen (subdir)))
+ continue;
+ count++;
+ }
+ closedir (dirp);
+
+ return count;
+}
+
+
+int32_t
+index_getxattr_wrapper (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ index_priv_t *priv = NULL;
+ dict_t *xattr = NULL;
+ int ret = 0;
+ uint64_t count = 0;
+
+ priv = this->private;
+
+ xattr = dict_new ();
+ if (!xattr) {
+ ret = -ENOMEM;
+ goto done;
+ }
+
+ if (strcmp (name, GF_XATTROP_INDEX_GFID) == 0) {
+ ret = dict_set_static_bin (xattr, (char*)name, priv->xattrop_vgfid,
+ sizeof (priv->xattrop_vgfid));
+ if (ret) {
+ ret = -ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "xattrop index "
+ "gfid set failed");
+ goto done;
+ }
+ } else if (strcmp (name, GF_XATTROP_INDEX_COUNT) == 0) {
+ count = index_entry_count (this, XATTROP_SUBDIR);
+
+ ret = dict_set_uint64 (xattr, (char *)name, count);
+ if (ret) {
+ ret = -ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "xattrop index "
+ "count set failed");
+ goto done;
+ }
+ }
+done:
+ if (ret)
+ STACK_UNWIND_STRICT (getxattr, frame, -1, -ret, xattr, xdata);
+ else
+ STACK_UNWIND_STRICT (getxattr, frame, 0, 0, xattr, xdata);
+
+ if (xattr)
+ dict_unref (xattr);
+
+ return 0;
+}
+
+int32_t
+index_lookup_wrapper (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xattr_req)
+{
+ index_priv_t *priv = NULL;
+ struct stat lstatbuf = {0};
+ int ret = 0;
+ int32_t op_errno = EINVAL;
+ int32_t op_ret = -1;
+ char path[PATH_MAX] = {0};
+ struct iatt stbuf = {0, };
+ struct iatt postparent = {0,};
+ dict_t *xattr = NULL;
+ gf_boolean_t is_dir = _gf_false;
+
+ priv = this->private;
+
+ VALIDATE_OR_GOTO (loc, done);
+ if (!uuid_compare (loc->gfid, priv->xattrop_vgfid)) {
+ make_index_dir_path (priv->index_basepath, XATTROP_SUBDIR,
+ path, sizeof (path));
+ is_dir = _gf_true;
+ } else if (!uuid_compare (loc->pargfid, priv->xattrop_vgfid)) {
+ make_file_path (priv->index_basepath, XATTROP_SUBDIR,
+ loc->name, path, sizeof (path));
+ }
+
+ ret = lstat (path, &lstatbuf);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "Stat failed on index dir "
+ "(%s)", strerror (errno));
+ op_errno = errno;
+ goto done;
+ } else if (!S_ISDIR (lstatbuf.st_mode) && is_dir) {
+ gf_log (this->name, GF_LOG_DEBUG, "Stat failed on index dir, "
+ "not a directory");
+ op_errno = ENOENT;
+ goto done;
+ }
+ xattr = dict_new ();
+ if (!xattr) {
+ op_errno = ENOMEM;
+ goto done;
+ }
+
+ iatt_from_stat (&stbuf, &lstatbuf);
+ if (is_dir)
+ uuid_copy (stbuf.ia_gfid, priv->xattrop_vgfid);
+ else
+ uuid_generate (stbuf.ia_gfid);
+ stbuf.ia_ino = -1;
+ op_ret = 0;
+done:
+ STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno,
+ loc->inode, &stbuf, xattr, &postparent);
+ if (xattr)
+ dict_unref (xattr);
+ return 0;
+}
+
+int32_t
+index_readdir_wrapper (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t off, dict_t *xdata)
+{
+ index_fd_ctx_t *fctx = NULL;
+ DIR *dir = NULL;
+ int ret = -1;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int count = 0;
+ gf_dirent_t entries;
+
+ INIT_LIST_HEAD (&entries.list);
+
+ ret = index_fd_ctx_get (fd, this, &fctx);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "pfd is NULL, fd=%p", fd);
+ op_errno = -ret;
+ goto done;
+ }
+
+ dir = fctx->dir;
+
+ if (!dir) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dir is NULL for fd=%p", fd);
+ op_errno = EINVAL;
+ goto done;
+ }
+
+ count = index_fill_readdir (fd, dir, off, size, &entries);
+
+ /* pick ENOENT to indicate EOF */
+ op_errno = errno;
+ op_ret = count;
+done:
+ STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, &entries, xdata);
+ gf_dirent_free (&entries);
+ return 0;
+}
+
+int
+index_unlink_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, int flag,
+ dict_t *xdata)
+{
+ index_priv_t *priv = NULL;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+ int ret = 0;
+ struct iatt preparent = {0};
+ struct iatt postparent = {0};
+ char index_dir[PATH_MAX] = {0};
+ struct stat lstatbuf = {0};
+ uuid_t gfid = {0};
+
+ priv = this->private;
+ make_index_dir_path (priv->index_basepath, XATTROP_SUBDIR,
+ index_dir, sizeof (index_dir));
+ ret = lstat (index_dir, &lstatbuf);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = errno;
+ goto done;
+ }
+
+ iatt_from_stat (&preparent, &lstatbuf);
+ uuid_copy (preparent.ia_gfid, priv->xattrop_vgfid);
+ preparent.ia_ino = -1;
+ uuid_parse (loc->name, gfid);
+ ret = index_del (this, gfid, XATTROP_SUBDIR);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = -ret;
+ goto done;
+ }
+ memset (&lstatbuf, 0, sizeof (lstatbuf));
+ ret = lstat (index_dir, &lstatbuf);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = errno;
+ goto done;
+ }
+ iatt_from_stat (&postparent, &lstatbuf);
+ uuid_copy (postparent.ia_gfid, priv->xattrop_vgfid);
+ postparent.ia_ino = -1;
+done:
+ INDEX_STACK_UNWIND (unlink, frame, op_ret, op_errno, &preparent,
+ &postparent, xdata);
+ return 0;
+}
+
+int32_t
+index_getxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ index_priv_t *priv = NULL;
+
+ priv = this->private;
+
+ if (!name || (strcmp (GF_XATTROP_INDEX_GFID, name) &&
+ strcmp (GF_XATTROP_INDEX_COUNT, name)))
+ goto out;
+
+ stub = fop_getxattr_stub (frame, index_getxattr_wrapper, loc, name,
+ xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (getxattr, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
+ worker_enqueue (this, stub);
+ return 0;
+out:
+ STACK_WIND (frame, default_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
+ return 0;
+}
+
+int32_t
+index_lookup (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xattr_req)
+{
+ call_stub_t *stub = NULL;
+ index_priv_t *priv = NULL;
+
+ priv = this->private;
+
+ if (uuid_compare (loc->gfid, priv->xattrop_vgfid) &&
+ uuid_compare (loc->pargfid, priv->xattrop_vgfid))
+ goto normal;
+
+ stub = fop_lookup_stub (frame, index_lookup_wrapper, loc, xattr_req);
+ if (!stub) {
+ STACK_UNWIND_STRICT (lookup, frame, -1, ENOMEM, loc->inode,
+ NULL, NULL, NULL);
+ return 0;
+ }
+ worker_enqueue (this, stub);
+ return 0;
+normal:
+ STACK_WIND (frame, default_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
+
+ return 0;
+}
+
+int32_t
+index_readdir (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t off, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ index_priv_t *priv = NULL;
+
+ priv = this->private;
+ if (uuid_compare (fd->inode->gfid, priv->xattrop_vgfid))
+ goto out;
+ stub = fop_readdir_stub (frame, index_readdir_wrapper, fd, size, off,
+ xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (readdir, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
+ worker_enqueue (this, stub);
+ return 0;
+out:
+ STACK_WIND (frame, default_readdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdir, fd, size, off, xdata);
+ return 0;
+}
+
+int
+index_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ index_priv_t *priv = NULL;
+
+ priv = this->private;
+ if (uuid_compare (loc->pargfid, priv->xattrop_vgfid))
+ goto out;
+
+ stub = fop_unlink_stub (frame, index_unlink_wrapper, loc, xflag, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (unlink, frame, -1, ENOMEM, NULL, NULL,
+ NULL);
+ return 0;
+ }
+ worker_enqueue (this, stub);
+ return 0;
+out:
+ STACK_WIND (frame, default_unlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ return 0;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ ret = xlator_mem_acct_init (this, gf_index_mt_end + 1);
+
+ return ret;
+}
+
+int
+init (xlator_t *this)
+{
+ int ret = -1;
+ index_priv_t *priv = NULL;
+ pthread_t thread;
+ pthread_attr_t w_attr;
+ gf_boolean_t mutex_inited = _gf_false;
+ gf_boolean_t cond_inited = _gf_false;
+ gf_boolean_t attr_inited = _gf_false;
+
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "'index' not configured with exactly one child");
+ goto out;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ priv = GF_CALLOC (1, sizeof (*priv), gf_index_mt_priv_t);
+ if (!priv)
+ goto out;
+
+ LOCK_INIT (&priv->lock);
+ if ((ret = pthread_cond_init(&priv->cond, NULL)) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "pthread_cond_init failed (%d)", ret);
+ goto out;
+ }
+ cond_inited = _gf_true;
+
+ if ((ret = pthread_mutex_init(&priv->mutex, NULL)) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "pthread_mutex_init failed (%d)", ret);
+ goto out;
+ }
+ mutex_inited = _gf_true;
+
+ if ((ret = pthread_attr_init (&w_attr)) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "pthread_attr_init failed (%d)", ret);
+ goto out;
+ }
+ attr_inited = _gf_true;
+
+ ret = pthread_attr_setstacksize (&w_attr, INDEX_THREAD_STACK_SIZE);
+ if (ret == EINVAL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Using default thread stack size");
+ }
+ GF_OPTION_INIT ("index-base", priv->index_basepath, path, out);
+ uuid_generate (priv->index);
+ uuid_generate (priv->xattrop_vgfid);
+ INIT_LIST_HEAD (&priv->callstubs);
+
+ this->private = priv;
+
+ ret = index_dir_create (this, XATTROP_SUBDIR);
+ if (ret < 0)
+ goto out;
+
+ ret = gf_thread_create (&thread, &w_attr, index_worker, this);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "Failed to create "
+ "worker thread, aborting");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret) {
+ if (cond_inited)
+ pthread_cond_destroy (&priv->cond);
+ if (mutex_inited)
+ pthread_mutex_destroy (&priv->mutex);
+ if (priv)
+ GF_FREE (priv);
+ this->private = NULL;
+ }
+ if (attr_inited)
+ pthread_attr_destroy (&w_attr);
+ return ret;
+}
+
+void
+fini (xlator_t *this)
+{
+ index_priv_t *priv = NULL;
+
+ priv = this->private;
+ if (!priv)
+ goto out;
+ this->private = NULL;
+ LOCK_DESTROY (&priv->lock);
+ pthread_cond_destroy (&priv->cond);
+ pthread_mutex_destroy (&priv->mutex);
+ GF_FREE (priv);
+out:
+ return;
+}
+
+int
+index_forget (xlator_t *this, inode_t *inode)
+{
+ uint64_t tmp_cache = 0;
+ if (!inode_ctx_del (inode, this, &tmp_cache))
+ GF_FREE ((index_inode_ctx_t*) (long)tmp_cache);
+
+ return 0;
+}
+
+int32_t
+index_releasedir (xlator_t *this, fd_t *fd)
+{
+ index_fd_ctx_t *fctx = NULL;
+ uint64_t ctx = 0;
+ int ret = 0;
+
+ ret = fd_ctx_del (fd, this, &ctx);
+ if (ret < 0)
+ goto out;
+
+ fctx = (index_fd_ctx_t*) (long) ctx;
+ if (fctx->dir)
+ closedir (fctx->dir);
+
+ GF_FREE (fctx);
+out:
+ return 0;
+}
+
+int32_t
+index_release (xlator_t *this, fd_t *fd)
+{
+ index_fd_ctx_t *fctx = NULL;
+ uint64_t ctx = 0;
+ int ret = 0;
+
+ ret = fd_ctx_del (fd, this, &ctx);
+ if (ret < 0)
+ goto out;
+
+ fctx = (index_fd_ctx_t*) (long) ctx;
+ GF_FREE (fctx);
+out:
+ return 0;
+}
+
+int
+notify (xlator_t *this, int event, void *data, ...)
+{
+ int ret = 0;
+ ret = default_notify (this, event, data);
+ return ret;
+}
+
+struct xlator_fops fops = {
+ .xattrop = index_xattrop,
+ .fxattrop = index_fxattrop,
+
+ //interface functions follow
+ .getxattr = index_getxattr,
+ .lookup = index_lookup,
+ .readdir = index_readdir,
+ .unlink = index_unlink
+};
+
+struct xlator_dumpops dumpops;
+
+struct xlator_cbks cbks = {
+ .forget = index_forget,
+ .release = index_release,
+ .releasedir = index_releasedir
+};
+
+struct volume_options options[] = {
+ { .key = {"index-base" },
+ .type = GF_OPTION_TYPE_PATH,
+ .description = "path where the index files need to be stored",
+ },
+ { .key = {NULL} },
+};
diff --git a/xlators/features/index/src/index.h b/xlators/features/index/src/index.h
new file mode 100644
index 000000000..661dcdbc4
--- /dev/null
+++ b/xlators/features/index/src/index.h
@@ -0,0 +1,59 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __INDEX_H__
+#define __INDEX_H__
+
+#include "xlator.h"
+#include "call-stub.h"
+#include "defaults.h"
+#include "byte-order.h"
+#include "common-utils.h"
+#include "index-mem-types.h"
+
+#define INDEX_THREAD_STACK_SIZE ((size_t)(1024*1024))
+
+typedef enum {
+ UNKNOWN,
+ IN,
+ NOTIN
+} index_state_t;
+
+typedef struct index_inode_ctx {
+ gf_boolean_t processing;
+ struct list_head callstubs;
+ index_state_t state;
+} index_inode_ctx_t;
+
+typedef struct index_fd_ctx {
+ DIR *dir;
+} index_fd_ctx_t;
+
+typedef struct index_priv {
+ char *index_basepath;
+ uuid_t index;
+ gf_lock_t lock;
+ uuid_t xattrop_vgfid;//virtual gfid of the xattrop index dir
+ struct list_head callstubs;
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+} index_priv_t;
+
+#define INDEX_STACK_UNWIND(fop, frame, params ...) \
+do { \
+ if (frame) { \
+ inode_t *_inode = frame->local; \
+ frame->local = NULL; \
+ inode_unref (_inode); \
+ } \
+ STACK_UNWIND_STRICT (fop, frame, params); \
+} while (0)
+
+#endif
diff --git a/xlators/features/locks/src/Makefile.am b/xlators/features/locks/src/Makefile.am
index 53dd3aa5d..0f79731b4 100644
--- a/xlators/features/locks/src/Makefile.am
+++ b/xlators/features/locks/src/Makefile.am
@@ -1,15 +1,18 @@
xlator_LTLIBRARIES = locks.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
-locks_la_LDFLAGS = -module -avoidversion
+locks_la_LDFLAGS = -module -avoid-version
-locks_la_SOURCES = common.c posix.c entrylk.c inodelk.c reservelk.c
-locks_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+locks_la_SOURCES = common.c posix.c entrylk.c inodelk.c reservelk.c \
+ clear.c
+locks_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = locks.h common.h locks-mem-types.h
+noinst_HEADERS = locks.h common.h locks-mem-types.h clear.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -fno-strict-aliasing -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src $(GF_CFLAGS) -shared -nostartfiles
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+
+AM_CFLAGS = -Wall -fno-strict-aliasing $(GF_CFLAGS)
CLEANFILES =
@@ -17,4 +20,4 @@ uninstall-local:
rm -f $(DESTDIR)$(xlatordir)/posix-locks.so
install-data-hook:
- ln -sf locks.so $(DESTDIR)$(xlatordir)/posix-locks.so \ No newline at end of file
+ ln -sf locks.so $(DESTDIR)$(xlatordir)/posix-locks.so
diff --git a/xlators/features/locks/src/clear.c b/xlators/features/locks/src/clear.c
new file mode 100644
index 000000000..75593b898
--- /dev/null
+++ b/xlators/features/locks/src/clear.c
@@ -0,0 +1,423 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <unistd.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <pthread.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "compat.h"
+#include "xlator.h"
+#include "inode.h"
+#include "logging.h"
+#include "common-utils.h"
+
+#include "locks.h"
+#include "common.h"
+#include "statedump.h"
+#include "clear.h"
+
+int
+clrlk_get_kind (char *kind)
+{
+ char *clrlk_kinds[CLRLK_KIND_MAX] = {"dummy", "blocked", "granted",
+ "all"};
+ int ret_kind = CLRLK_KIND_MAX;
+ int i = 0;
+
+ for (i = CLRLK_BLOCKED; i < CLRLK_KIND_MAX; i++) {
+ if (!strcmp (clrlk_kinds[i], kind)) {
+ ret_kind = i;
+ break;
+ }
+ }
+
+ return ret_kind;
+}
+
+int
+clrlk_get_type (char *type)
+{
+ char *clrlk_types[CLRLK_TYPE_MAX] = {"inode", "entry", "posix"};
+ int ret_type = CLRLK_TYPE_MAX;
+ int i = 0;
+
+ for (i = CLRLK_INODE; i < CLRLK_TYPE_MAX; i++) {
+ if (!strcmp (clrlk_types[i], type)) {
+ ret_type = i;
+ break;
+ }
+ }
+
+ return ret_type;
+}
+
+int
+clrlk_get_lock_range (char *range_str, struct gf_flock *ulock,
+ gf_boolean_t *chk_range)
+{
+ int ret = -1;
+
+ if (!chk_range)
+ goto out;
+
+ if (!range_str) {
+ ret = 0;
+ *chk_range = _gf_false;
+ goto out;
+ }
+
+ if (sscanf (range_str, "%hd,%"PRId64"-""%"PRId64, &ulock->l_whence,
+ &ulock->l_start, &ulock->l_len) != 3) {
+ goto out;
+ }
+
+ ret = 0;
+ *chk_range = _gf_true;
+out:
+ return ret;
+}
+
+int
+clrlk_parse_args (const char* cmd, clrlk_args *args)
+{
+ char *opts = NULL;
+ char *cur = NULL;
+ char *tok = NULL;
+ char *sptr = NULL;
+ char *free_ptr = NULL;
+ char kw[KW_MAX] = {[KW_TYPE] = 't',
+ [KW_KIND] = 'k',
+ };
+ int ret = -1;
+ int i = 0;
+
+ GF_ASSERT (cmd);
+ free_ptr = opts = GF_CALLOC (1, strlen (cmd), gf_common_mt_char);
+ if (!opts)
+ goto out;
+
+ if (sscanf (cmd, GF_XATTR_CLRLK_CMD".%s", opts) < 1) {
+ ret = -1;
+ goto out;
+ }
+
+ /*clr_lk_prefix.ttype.kkind.args, args - type specific*/
+ cur = opts;
+ for (i = 0; i < KW_MAX && (tok = strtok_r (cur, ".", &sptr));
+ cur = NULL, i++) {
+ if (tok[0] != kw[i]) {
+ ret = -1;
+ goto out;
+ }
+ if (i == KW_TYPE)
+ args->type = clrlk_get_type (tok+1);
+ if (i == KW_KIND)
+ args->kind = clrlk_get_kind (tok+1);
+ }
+
+ if ((args->type == CLRLK_TYPE_MAX) || (args->kind == CLRLK_KIND_MAX))
+ goto out;
+
+ /*optional args, neither range nor basename can 'legally' contain
+ * "/" in them*/
+ tok = strtok_r (NULL, "/", &sptr);
+ if (tok)
+ args->opts = gf_strdup (tok);
+
+ ret = 0;
+out:
+ GF_FREE (free_ptr);
+ return ret;
+}
+
+int
+clrlk_clear_posixlk (xlator_t *this, pl_inode_t *pl_inode, clrlk_args *args,
+ int *blkd, int *granted, int *op_errno)
+{
+ posix_lock_t *plock = NULL;
+ posix_lock_t *tmp = NULL;
+ struct gf_flock ulock = {0, };
+ int ret = -1;
+ int bcount = 0;
+ int gcount = 0;
+ gf_boolean_t chk_range = _gf_false;
+
+ if (clrlk_get_lock_range (args->opts, &ulock, &chk_range)) {
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry_safe (plock, tmp, &pl_inode->ext_list,
+ list) {
+ if ((plock->blocked &&
+ !(args->kind & CLRLK_BLOCKED)) ||
+ (!plock->blocked &&
+ !(args->kind & CLRLK_GRANTED)))
+ continue;
+
+ if (chk_range &&
+ (plock->user_flock.l_whence != ulock.l_whence
+ || plock->user_flock.l_start != ulock.l_start
+ || plock->user_flock.l_len != ulock.l_len))
+ continue;
+
+ list_del_init (&plock->list);
+ if (plock->blocked) {
+ bcount++;
+ pl_trace_out (this, plock->frame, NULL, NULL,
+ F_SETLKW, &plock->user_flock,
+ -1, EAGAIN, NULL);
+
+ STACK_UNWIND_STRICT (lk, plock->frame, -1, EAGAIN,
+ &plock->user_flock, NULL);
+
+ } else {
+ gcount++;
+ }
+ GF_FREE (plock);
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+ grant_blocked_locks (this, pl_inode);
+ ret = 0;
+out:
+ *blkd = bcount;
+ *granted = gcount;
+ return ret;
+}
+
+/* Returns 0 on success and -1 on failure */
+int
+clrlk_clear_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom,
+ clrlk_args *args, int *blkd, int *granted, int *op_errno)
+{
+ pl_inode_lock_t *ilock = NULL;
+ pl_inode_lock_t *tmp = NULL;
+ struct gf_flock ulock = {0, };
+ int ret = -1;
+ int bcount = 0;
+ int gcount = 0;
+ gf_boolean_t chk_range = _gf_false;
+ struct list_head released;
+
+ INIT_LIST_HEAD (&released);
+ if (clrlk_get_lock_range (args->opts, &ulock, &chk_range)) {
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ if (args->kind & CLRLK_BLOCKED)
+ goto blkd;
+
+ if (args->kind & CLRLK_GRANTED)
+ goto granted;
+
+blkd:
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry_safe (ilock, tmp, &dom->blocked_inodelks,
+ blocked_locks) {
+ if (chk_range &&
+ (ilock->user_flock.l_whence != ulock.l_whence
+ || ilock->user_flock.l_start != ulock.l_start
+ || ilock->user_flock.l_len != ulock.l_len))
+ continue;
+
+ bcount++;
+ list_del_init (&ilock->blocked_locks);
+ list_add (&ilock->blocked_locks, &released);
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ list_for_each_entry_safe (ilock, tmp, &released, blocked_locks) {
+ list_del_init (&ilock->blocked_locks);
+ pl_trace_out (this, ilock->frame, NULL, NULL, F_SETLKW,
+ &ilock->user_flock, -1, EAGAIN,
+ ilock->volume);
+ STACK_UNWIND_STRICT (inodelk, ilock->frame, -1,
+ EAGAIN, NULL);
+ //No need to take lock as the locks are only in one list
+ __pl_inodelk_unref (ilock);
+ }
+
+ if (!(args->kind & CLRLK_GRANTED)) {
+ ret = 0;
+ goto out;
+ }
+
+granted:
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry_safe (ilock, tmp, &dom->inodelk_list,
+ list) {
+ if (chk_range &&
+ (ilock->user_flock.l_whence != ulock.l_whence
+ || ilock->user_flock.l_start != ulock.l_start
+ || ilock->user_flock.l_len != ulock.l_len))
+ continue;
+
+ gcount++;
+ list_del_init (&ilock->list);
+ list_add (&ilock->list, &released);
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ list_for_each_entry_safe (ilock, tmp, &released, list) {
+ list_del_init (&ilock->list);
+ //No need to take lock as the locks are only in one list
+ __pl_inodelk_unref (ilock);
+ }
+
+ ret = 0;
+out:
+ grant_blocked_inode_locks (this, pl_inode, dom);
+ *blkd = bcount;
+ *granted = gcount;
+ return ret;
+}
+
+/* Returns 0 on success and -1 on failure */
+int
+clrlk_clear_entrylk (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom,
+ clrlk_args *args, int *blkd, int *granted, int *op_errno)
+{
+ pl_entry_lock_t *elock = NULL;
+ pl_entry_lock_t *tmp = NULL;
+ int bcount = 0;
+ int gcount = 0;
+ int ret = -1;
+ struct list_head removed;
+ struct list_head released;
+
+ INIT_LIST_HEAD (&released);
+ if (args->kind & CLRLK_BLOCKED)
+ goto blkd;
+
+ if (args->kind & CLRLK_GRANTED)
+ goto granted;
+
+blkd:
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry_safe (elock, tmp, &dom->blocked_entrylks,
+ blocked_locks) {
+ if (args->opts) {
+ if (!elock->basename ||
+ strcmp (elock->basename, args->opts))
+ continue;
+ }
+
+ bcount++;
+
+ list_del_init (&elock->blocked_locks);
+ list_add_tail (&elock->blocked_locks, &released);
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ list_for_each_entry_safe (elock, tmp, &released, blocked_locks) {
+ list_del_init (&elock->blocked_locks);
+ entrylk_trace_out (this, elock->frame, elock->volume, NULL, NULL,
+ elock->basename, ENTRYLK_LOCK, elock->type,
+ -1, EAGAIN);
+ STACK_UNWIND_STRICT (entrylk, elock->frame, -1, EAGAIN, NULL);
+
+ __pl_entrylk_unref (elock);
+ }
+
+ if (!(args->kind & CLRLK_GRANTED)) {
+ ret = 0;
+ goto out;
+ }
+
+granted:
+ INIT_LIST_HEAD (&removed);
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry_safe (elock, tmp, &dom->entrylk_list,
+ domain_list) {
+ if (args->opts) {
+ if (!elock->basename ||
+ strcmp (elock->basename, args->opts))
+ continue;
+ }
+
+ gcount++;
+ list_del_init (&elock->domain_list);
+ list_add_tail (&elock->domain_list, &removed);
+
+ __pl_entrylk_unref (elock);
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ grant_blocked_entry_locks (this, pl_inode, dom);
+
+ ret = 0;
+out:
+ *blkd = bcount;
+ *granted = gcount;
+ return ret;
+}
+
+int
+clrlk_clear_lks_in_all_domains (xlator_t *this, pl_inode_t *pl_inode,
+ clrlk_args *args, int *blkd, int *granted,
+ int *op_errno)
+{
+ pl_dom_list_t *dom = NULL;
+ int ret = -1;
+ int tmp_bcount = 0;
+ int tmp_gcount = 0;
+
+ if (list_empty (&pl_inode->dom_list)) {
+ ret = 0;
+ goto out;
+ }
+
+ list_for_each_entry (dom, &pl_inode->dom_list, inode_list) {
+ tmp_bcount = tmp_gcount = 0;
+
+ switch (args->type)
+ {
+ case CLRLK_INODE:
+ ret = clrlk_clear_inodelk (this, pl_inode, dom, args,
+ &tmp_bcount, &tmp_gcount,
+ op_errno);
+ if (ret)
+ goto out;
+ break;
+ case CLRLK_ENTRY:
+ ret = clrlk_clear_entrylk (this, pl_inode, dom, args,
+ &tmp_bcount, &tmp_gcount,
+ op_errno);
+ if (ret)
+ goto out;
+ break;
+ }
+
+ *blkd += tmp_bcount;
+ *granted += tmp_gcount;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
diff --git a/xlators/features/locks/src/clear.h b/xlators/features/locks/src/clear.h
new file mode 100644
index 000000000..511f3f74a
--- /dev/null
+++ b/xlators/features/locks/src/clear.h
@@ -0,0 +1,76 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef __CLEAR_H__
+#define __CLEAR_H__
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "compat-errno.h"
+#include "stack.h"
+#include "call-stub.h"
+#include "locks.h"
+
+typedef enum {
+ CLRLK_INODE,
+ CLRLK_ENTRY,
+ CLRLK_POSIX,
+ CLRLK_TYPE_MAX
+} clrlk_type;
+
+typedef enum {
+ CLRLK_BLOCKED = 1,
+ CLRLK_GRANTED,
+ CLRLK_ALL,
+ CLRLK_KIND_MAX
+} clrlk_kind;
+
+typedef enum {
+ KW_TYPE,
+ KW_KIND,
+ /*add new keywords here*/
+ KW_MAX
+} clrlk_opts;
+
+struct _clrlk_args;
+typedef struct _clrlk_args clrlk_args;
+
+struct _clrlk_args {
+ int type;
+ int kind;
+ char *opts;
+};
+
+int
+clrlk_get__kind (char *kind);
+int
+clrlk_get_type (char *type);
+int
+clrlk_get_lock_range (char *range_str, struct gf_flock *ulock,
+ gf_boolean_t *chk_range);
+int
+clrlk_parse_args (const char* cmd, clrlk_args *args);
+
+int
+clrlk_clear_posixlk (xlator_t *this, pl_inode_t *pl_inode, clrlk_args *args,
+ int *blkd, int *granted, int *op_errno);
+int
+clrlk_clear_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom,
+ clrlk_args *args, int *blkd, int *granted, int *op_errno);
+int
+clrlk_clear_entrylk (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom,
+ clrlk_args *args, int *blkd, int *granted, int *op_errno);
+int
+clrlk_clear_lks_in_all_domains (xlator_t *this, pl_inode_t *pl_inode,
+ clrlk_args *args, int *blkd, int *granted,
+ int *op_errno);
+#endif /* __CLEAR_H__ */
diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c
index f6b9c3315..f6c71c1cf 100644
--- a/xlators/features/locks/src/common.c
+++ b/xlators/features/locks/src/common.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <unistd.h>
#include <fcntl.h>
#include <limits.h>
@@ -45,8 +35,9 @@ __insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock);
static int
pl_send_prelock_unlock (xlator_t *this, pl_inode_t *pl_inode,
posix_lock_t *old_lock);
+
static pl_dom_list_t *
-allocate_domain (const char *volume)
+__allocate_domain (const char *volume)
{
pl_dom_list_t *dom = NULL;
@@ -85,24 +76,26 @@ get_domain (pl_inode_t *pl_inode, const char *volume)
{
pl_dom_list_t *dom = NULL;
- GF_VALIDATE_OR_GOTO (POSIX_LOCKS, pl_inode, out);
- GF_VALIDATE_OR_GOTO (POSIX_LOCKS, volume, out);
-
- list_for_each_entry (dom, &pl_inode->dom_list, inode_list) {
- if (strcmp (dom->domain, volume) == 0)
- goto found;
+ GF_VALIDATE_OR_GOTO ("posix-locks", pl_inode, out);
+ GF_VALIDATE_OR_GOTO ("posix-locks", volume, out);
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry (dom, &pl_inode->dom_list, inode_list) {
+ if (strcmp (dom->domain, volume) == 0)
+ goto unlock;
+ }
+ dom = __allocate_domain (volume);
+ if (dom)
+ list_add (&dom->inode_list, &pl_inode->dom_list);
}
-
- dom = allocate_domain (volume);
- if (dom)
- list_add (&dom->inode_list, &pl_inode->dom_list);
-found:
+unlock:
+ pthread_mutex_unlock (&pl_inode->mutex);
if (dom) {
- gf_log (POSIX_LOCKS, GF_LOG_TRACE, "Domain %s found", volume);
+ gf_log ("posix-locks", GF_LOG_TRACE, "Domain %s found", volume);
} else {
- gf_log (POSIX_LOCKS, GF_LOG_TRACE, "Domain %s not found", volume);
+ gf_log ("posix-locks", GF_LOG_TRACE, "Domain %s not found", volume);
}
out:
return dom;
@@ -143,10 +136,10 @@ __pl_inode_is_empty (pl_inode_t *pl_inode)
void
pl_print_locker (char *str, int size, xlator_t *this, call_frame_t *frame)
{
- snprintf (str, size, "Pid=%llu, lk-owner=%llu, Transport=%p, Frame=%llu",
+ snprintf (str, size, "Pid=%llu, lk-owner=%s, Client=%p, Frame=%llu",
(unsigned long long) frame->root->pid,
- (unsigned long long) frame->root->lk_owner,
- (void *)frame->root->trans,
+ lkowner_utoa (&frame->root->lk_owner),
+ frame->root->client,
(unsigned long long) frame->root->unique);
}
@@ -180,14 +173,13 @@ pl_print_lockee (char *str, int size, fd_t *fd, loc_t *loc)
uuid_utoa (inode->gfid), fd,
ipath ? ipath : "<nul>");
- if (ipath)
- GF_FREE (ipath);
+ GF_FREE (ipath);
}
void
pl_print_lock (char *str, int size, int cmd,
- struct gf_flock *flock, uint64_t owner)
+ struct gf_flock *flock, gf_lkowner_t *owner)
{
char *cmd_str = NULL;
char *type_str = NULL;
@@ -235,11 +227,11 @@ pl_print_lock (char *str, int size, int cmd,
}
snprintf (str, size, "lock=FCNTL, cmd=%s, type=%s, "
- "start=%llu, len=%llu, pid=%llu, lk-owner=%llu",
+ "start=%llu, len=%llu, pid=%llu, lk-owner=%s",
cmd_str, type_str, (unsigned long long) flock->l_start,
(unsigned long long) flock->l_len,
(unsigned long long) flock->l_pid,
- (unsigned long long) owner);
+ lkowner_utoa (owner));
}
@@ -262,7 +254,7 @@ pl_trace_in (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc,
if (domain)
pl_print_inodelk (pl_lock, 256, cmd, flock, domain);
else
- pl_print_lock (pl_lock, 256, cmd, flock, frame->root->lk_owner);
+ pl_print_lock (pl_lock, 256, cmd, flock, &frame->root->lk_owner);
gf_log (this->name, GF_LOG_INFO,
"[REQUEST] Locker = {%s} Lockee = {%s} Lock = {%s}",
@@ -312,7 +304,7 @@ pl_trace_out (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc,
if (domain)
pl_print_inodelk (pl_lock, 256, cmd, flock, domain);
else
- pl_print_lock (pl_lock, 256, cmd, flock, frame->root->lk_owner);
+ pl_print_lock (pl_lock, 256, cmd, flock, &frame->root->lk_owner);
pl_print_verdict (verdict, 32, op_ret, op_errno);
@@ -342,7 +334,7 @@ pl_trace_block (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc,
if (domain)
pl_print_inodelk (pl_lock, 256, cmd, flock, domain);
else
- pl_print_lock (pl_lock, 256, cmd, flock, frame->root->lk_owner);
+ pl_print_lock (pl_lock, 256, cmd, flock, &frame->root->lk_owner);
gf_log (this->name, GF_LOG_INFO,
"[BLOCKED] Locker = {%s} Lockee = {%s} Lock = {%s}",
@@ -435,46 +427,50 @@ pl_inode_get (xlator_t *this, inode_t *inode)
pl_inode_t *pl_inode = NULL;
int ret = 0;
- ret = inode_ctx_get (inode, this,&tmp_pl_inode);
- if (ret == 0) {
- pl_inode = (pl_inode_t *)(long)tmp_pl_inode;
- goto out;
- }
- pl_inode = GF_CALLOC (1, sizeof (*pl_inode),
- gf_locks_mt_pl_inode_t);
- if (!pl_inode) {
- goto out;
- }
+ LOCK (&inode->lock);
+ {
+ ret = __inode_ctx_get (inode, this, &tmp_pl_inode);
+ if (ret == 0) {
+ pl_inode = (pl_inode_t *)(long)tmp_pl_inode;
+ goto unlock;
+ }
+ pl_inode = GF_CALLOC (1, sizeof (*pl_inode),
+ gf_locks_mt_pl_inode_t);
+ if (!pl_inode) {
+ goto unlock;
+ }
- gf_log (this->name, GF_LOG_TRACE,
- "Allocating new pl inode");
+ gf_log (this->name, GF_LOG_TRACE,
+ "Allocating new pl inode");
- pthread_mutex_init (&pl_inode->mutex, NULL);
+ pthread_mutex_init (&pl_inode->mutex, NULL);
- INIT_LIST_HEAD (&pl_inode->dom_list);
- INIT_LIST_HEAD (&pl_inode->ext_list);
- INIT_LIST_HEAD (&pl_inode->rw_list);
- INIT_LIST_HEAD (&pl_inode->reservelk_list);
- INIT_LIST_HEAD (&pl_inode->blocked_reservelks);
- INIT_LIST_HEAD (&pl_inode->blocked_calls);
+ INIT_LIST_HEAD (&pl_inode->dom_list);
+ INIT_LIST_HEAD (&pl_inode->ext_list);
+ INIT_LIST_HEAD (&pl_inode->rw_list);
+ INIT_LIST_HEAD (&pl_inode->reservelk_list);
+ INIT_LIST_HEAD (&pl_inode->blocked_reservelks);
+ INIT_LIST_HEAD (&pl_inode->blocked_calls);
- inode_ctx_put (inode, this, (uint64_t)(long)(pl_inode));
+ __inode_ctx_put (inode, this, (uint64_t)(long)(pl_inode));
+ }
+unlock:
+ UNLOCK (&inode->lock);
-out:
return pl_inode;
}
/* Create a new posix_lock_t */
posix_lock_t *
-new_posix_lock (struct gf_flock *flock, void *transport, pid_t client_pid,
- uint64_t owner, fd_t *fd)
+new_posix_lock (struct gf_flock *flock, client_t *client, pid_t client_pid,
+ gf_lkowner_t *owner, fd_t *fd)
{
posix_lock_t *lock = NULL;
- GF_VALIDATE_OR_GOTO (POSIX_LOCKS, flock, out);
- GF_VALIDATE_OR_GOTO (POSIX_LOCKS, transport, out);
- GF_VALIDATE_OR_GOTO (POSIX_LOCKS, fd, out);
+ GF_VALIDATE_OR_GOTO ("posix-locks", flock, out);
+ GF_VALIDATE_OR_GOTO ("posix-locks", client, out);
+ GF_VALIDATE_OR_GOTO ("posix-locks", fd, out);
lock = GF_CALLOC (1, sizeof (posix_lock_t),
gf_locks_mt_posix_lock_t);
@@ -490,11 +486,11 @@ new_posix_lock (struct gf_flock *flock, void *transport, pid_t client_pid,
else
lock->fl_end = flock->l_start + flock->l_len - 1;
- lock->transport = transport;
+ lock->client = client;
lock->fd_num = fd_to_fdnum (fd);
lock->fd = fd;
lock->client_pid = client_pid;
- lock->owner = owner;
+ lock->owner = *owner;
INIT_LIST_HEAD (&lock->list);
@@ -569,8 +565,8 @@ int
same_owner (posix_lock_t *l1, posix_lock_t *l2)
{
- return ((l1->owner == l2->owner) &&
- (l1->transport == l2->transport));
+ return (is_same_lkowner (&l1->owner, &l2->owner) &&
+ (l1->client == l2->client));
}
@@ -699,7 +695,7 @@ subtract_locks (posix_lock_t *big, posix_lock_t *small)
}
GF_ASSERT (0);
- gf_log (POSIX_LOCKS, GF_LOG_ERROR, "Unexpected case in subtract_locks");
+ gf_log ("posix-locks", GF_LOG_ERROR, "Unexpected case in subtract_locks");
out:
if (v.locks[0]) {
@@ -719,6 +715,36 @@ done:
return v;
}
+static posix_lock_t *
+first_conflicting_overlap (pl_inode_t *pl_inode, posix_lock_t *lock)
+{
+ posix_lock_t *l = NULL;
+ posix_lock_t *conf = NULL;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry (l, &pl_inode->ext_list, list) {
+ if (l->blocked)
+ continue;
+
+ if (locks_overlap (l, lock)) {
+ if (same_owner (l, lock))
+ continue;
+
+ if ((l->fl_type == F_WRLCK) ||
+ (lock->fl_type == F_WRLCK)) {
+ conf = l;
+ goto unlock;
+ }
+ }
+ }
+ }
+unlock:
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ return conf;
+}
+
/*
Start searching from {begin}, and return the first lock that
conflicts, NULL if no conflict
@@ -777,6 +803,8 @@ __insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock)
struct _values v = { .locks = {0, 0, 0} };
list_for_each_entry_safe (conf, t, &pl_inode->ext_list, list) {
+ if (conf->blocked)
+ continue;
if (!locks_overlap (conf, lock))
continue;
@@ -785,7 +813,7 @@ __insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock)
sum = add_locks (lock, conf);
sum->fl_type = lock->fl_type;
- sum->transport = lock->transport;
+ sum->client = lock->client;
sum->fd_num = lock->fd_num;
sum->client_pid = lock->client_pid;
sum->owner = lock->owner;
@@ -794,6 +822,8 @@ __insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock)
__destroy_lock (conf);
__destroy_lock (lock);
+ INIT_LIST_HEAD (&sum->list);
+ posix_lock_to_flock (sum, &sum->user_flock);
__insert_and_merge (pl_inode, sum);
return;
@@ -801,7 +831,7 @@ __insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock)
sum = add_locks (lock, conf);
sum->fl_type = conf->fl_type;
- sum->transport = conf->transport;
+ sum->client = conf->client;
sum->fd_num = conf->fd_num;
sum->client_pid = conf->client_pid;
sum->owner = conf->owner;
@@ -821,6 +851,8 @@ __insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock)
continue;
INIT_LIST_HEAD (&v.locks[i]->list);
+ posix_lock_to_flock (v.locks[i],
+ &v.locks[i]->user_flock);
__insert_and_merge (pl_inode,
v.locks[i]);
}
@@ -889,10 +921,9 @@ __grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode, struct list_head *g
posix_lock_to_flock (l, &conf->user_flock);
gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) lk-owner:%"PRIu64" %"PRId64" - %"PRId64" => Granted",
+ "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => Granted",
l->fl_type == F_UNLCK ? "Unlock" : "Lock",
- l->client_pid,
- l->owner,
+ l->client_pid, lkowner_utoa (&l->owner),
l->user_flock.l_start,
l->user_flock.l_len);
@@ -928,7 +959,8 @@ grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode)
pl_trace_out (this, lock->frame, NULL, NULL, F_SETLKW,
&lock->user_flock, 0, 0, NULL);
- STACK_UNWIND (lock->frame, 0, 0, &lock->user_flock);
+ STACK_UNWIND_STRICT (lk, lock->frame, 0, 0,
+ &lock->user_flock, NULL);
GF_FREE (lock);
}
@@ -957,8 +989,8 @@ pl_send_prelock_unlock (xlator_t *this, pl_inode_t *pl_inode,
flock.l_len = old_lock->user_flock.l_len;
- unlock_lock = new_posix_lock (&flock, old_lock->transport,
- old_lock->client_pid, old_lock->owner,
+ unlock_lock = new_posix_lock (&flock, old_lock->client,
+ old_lock->client_pid, &old_lock->owner,
old_lock->fd);
GF_VALIDATE_OR_GOTO (this->name, unlock_lock, out);
ret = 0;
@@ -973,7 +1005,8 @@ pl_send_prelock_unlock (xlator_t *this, pl_inode_t *pl_inode,
pl_trace_out (this, lock->frame, NULL, NULL, F_SETLKW,
&lock->user_flock, 0, 0, NULL);
- STACK_UNWIND (lock->frame, 0, 0, &lock->user_flock);
+ STACK_UNWIND_STRICT (lk, lock->frame, 0, 0,
+ &lock->user_flock, NULL);
GF_FREE (lock);
}
@@ -1011,19 +1044,19 @@ pl_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
if (__is_lock_grantable (pl_inode, lock)) {
gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) lk-owner:%"PRIu64" %"PRId64" - %"PRId64" => OK",
+ "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => OK",
lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
lock->client_pid,
- lock->owner,
+ lkowner_utoa (&lock->owner),
lock->user_flock.l_start,
lock->user_flock.l_len);
__insert_and_merge (pl_inode, lock);
} else if (can_block) {
gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) lk-owner:%"PRIu64" %"PRId64" - %"PRId64" => Blocked",
+ "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => Blocked",
lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
lock->client_pid,
- lock->owner,
+ lkowner_utoa (&lock->owner),
lock->user_flock.l_start,
lock->user_flock.l_len);
lock->blocked = 1;
@@ -1031,10 +1064,10 @@ pl_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
ret = -1;
} else {
gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) lk-owner:%"PRIu64" %"PRId64" - %"PRId64" => NOK",
+ "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => NOK",
lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
lock->client_pid,
- lock->owner,
+ lkowner_utoa (&lock->owner),
lock->user_flock.l_start,
lock->user_flock.l_len);
errno = EAGAIN;
@@ -1056,7 +1089,7 @@ pl_getlk (pl_inode_t *pl_inode, posix_lock_t *lock)
{
posix_lock_t *conf = NULL;
- conf = first_overlap (pl_inode, lock);
+ conf = first_conflicting_overlap (pl_inode, lock);
if (conf == NULL) {
lock->fl_type = F_UNLCK;
@@ -1065,3 +1098,4 @@ pl_getlk (pl_inode_t *pl_inode, posix_lock_t *lock)
return conf;
}
+
diff --git a/xlators/features/locks/src/common.h b/xlators/features/locks/src/common.h
index d95e287cb..5ec630ee8 100644
--- a/xlators/features/locks/src/common.h
+++ b/xlators/features/locks/src/common.h
@@ -1,29 +1,41 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef __COMMON_H__
#define __COMMON_H__
+#include "lkowner.h"
+/*dump locks format strings */
+#define RANGE_FMT "type=%s, whence=%hd, start=%llu, len=%llu"
+#define ENTRY_FMT "type=%s on basename=%s"
+#define DUMP_GEN_FMT "pid = %llu, owner=%s, client=%p"
+#define GRNTD_AT "granted at %s"
+#define BLKD_AT "blocked at %s"
+#define CONN_ID "connection-id=%s"
+#define DUMP_BLKD_FMT DUMP_GEN_FMT", "CONN_ID", "BLKD_AT
+#define DUMP_GRNTD_FMT DUMP_GEN_FMT", "CONN_ID", "GRNTD_AT
+#define DUMP_BLKD_GRNTD_FMT DUMP_GEN_FMT", "CONN_ID", "BLKD_AT", "GRNTD_AT
+
+#define ENTRY_BLKD_FMT ENTRY_FMT", "DUMP_BLKD_FMT
+#define ENTRY_GRNTD_FMT ENTRY_FMT", "DUMP_GRNTD_FMT
+#define ENTRY_BLKD_GRNTD_FMT ENTRY_FMT", "DUMP_BLKD_GRNTD_FMT
+
+#define RANGE_BLKD_FMT RANGE_FMT", "DUMP_BLKD_FMT
+#define RANGE_GRNTD_FMT RANGE_FMT", "DUMP_GRNTD_FMT
+#define RANGE_BLKD_GRNTD_FMT RANGE_FMT", "DUMP_BLKD_GRNTD_FMT
+
#define SET_FLOCK_PID(flock, lock) ((flock)->l_pid = lock->client_pid)
+
+
posix_lock_t *
-new_posix_lock (struct gf_flock *flock, void *transport, pid_t client_pid,
- uint64_t owner, fd_t *fd);
+new_posix_lock (struct gf_flock *flock, client_t *client, pid_t client_pid,
+ gf_lkowner_t *owner, fd_t *fd);
pl_inode_t *
pl_inode_get (xlator_t *this, inode_t *inode);
@@ -55,24 +67,29 @@ pl_dom_list_t *
get_domain (pl_inode_t *pl_inode, const char *volume);
void
-grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom);
+grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
+ pl_dom_list_t *dom);
void
__delete_inode_lock (pl_inode_lock_t *lock);
void
-__destroy_inode_lock (pl_inode_lock_t *lock);
+__pl_inodelk_unref (pl_inode_lock_t *lock);
void
grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
- pl_entry_lock_t *unlocked, pl_dom_list_t *dom);
+ pl_dom_list_t *dom);
void pl_update_refkeeper (xlator_t *this, inode_t *inode);
int32_t
-get_inodelk_count (xlator_t *this, inode_t *inode);
+__get_inodelk_count (xlator_t *this, pl_inode_t *pl_inode, char *domname);
+int32_t
+get_inodelk_count (xlator_t *this, inode_t *inode, char *domname);
int32_t
+__get_entrylk_count (xlator_t *this, pl_inode_t *pl_inode);
+int32_t
get_entrylk_count (xlator_t *this, inode_t *inode);
void pl_trace_in (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc,
@@ -131,4 +148,11 @@ pl_verify_reservelk (xlator_t *this, pl_inode_t *pl_inode,
posix_lock_t *lock, int can_block);
int
pl_reserve_unlock (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *reqlock);
+
+uint32_t
+check_entrylk_on_basename (xlator_t *this, inode_t *parent, char *basename);
+
+void __pl_inodelk_unref (pl_inode_lock_t *lock);
+void __pl_entrylk_unref (pl_entry_lock_t *lock);
+
#endif /* __COMMON_H__ */
diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c
index 886a30b0e..ea6995627 100644
--- a/xlators/features/locks/src/entrylk.c
+++ b/xlators/features/locks/src/entrylk.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -33,10 +23,29 @@
#include "locks.h"
#include "common.h"
+
+void
+__pl_entrylk_unref (pl_entry_lock_t *lock)
+{
+ lock->ref--;
+ if (!lock->ref) {
+ GF_FREE ((char *)lock->basename);
+ GF_FREE (lock->connection_id);
+ GF_FREE (lock);
+ }
+}
+
+
+static void
+__pl_entrylk_ref (pl_entry_lock_t *lock)
+{
+ lock->ref++;
+}
+
+
static pl_entry_lock_t *
new_entrylk_lock (pl_inode_t *pinode, const char *basename, entrylk_type type,
- void *trans, pid_t client_pid, uint64_t owner, const char *volume)
-
+ const char *domain, call_frame_t *frame, char *conn_id)
{
pl_entry_lock_t *newlock = NULL;
@@ -46,16 +55,23 @@ new_entrylk_lock (pl_inode_t *pinode, const char *basename, entrylk_type type,
goto out;
}
- newlock->basename = basename ? gf_strdup (basename) : NULL;
- newlock->type = type;
- newlock->trans = trans;
- newlock->volume = volume;
- newlock->client_pid = client_pid;
- newlock->owner = owner;
+ newlock->basename = basename ? gf_strdup (basename) : NULL;
+ newlock->type = type;
+ newlock->client = frame->root->client;
+ newlock->client_pid = frame->root->pid;
+ newlock->volume = domain;
+ newlock->owner = frame->root->lk_owner;
+ newlock->frame = frame;
+
+ if (conn_id) {
+ newlock->connection_id = gf_strdup (conn_id);
+ }
INIT_LIST_HEAD (&newlock->domain_list);
INIT_LIST_HEAD (&newlock->blocked_locks);
+ INIT_LIST_HEAD (&newlock->client_list);
+ __pl_entrylk_ref (newlock);
out:
return newlock;
}
@@ -81,47 +97,47 @@ names_conflict (const char *n1, const char *n2)
}
-static int
+static inline int
__same_entrylk_owner (pl_entry_lock_t *l1, pl_entry_lock_t *l2)
{
- return ((l1->owner == l2->owner) &&
- (l1->trans == l2->trans));
+ return (is_same_lkowner (&l1->owner, &l2->owner) &&
+ (l1->client == l2->client));
}
/**
- * lock_grantable - is this lock grantable?
+ * entrylk_grantable - is this lock grantable?
* @inode: inode in which to look
* @basename: name we're trying to lock
* @type: type of lock
*/
static pl_entry_lock_t *
-__lock_grantable (pl_dom_list_t *dom, const char *basename, entrylk_type type)
+__entrylk_grantable (pl_dom_list_t *dom, pl_entry_lock_t *lock)
{
- pl_entry_lock_t *lock = NULL;
+ pl_entry_lock_t *tmp = NULL;
if (list_empty (&dom->entrylk_list))
return NULL;
- list_for_each_entry (lock, &dom->entrylk_list, domain_list) {
- if (names_conflict (lock->basename, basename))
- return lock;
+ list_for_each_entry (tmp, &dom->entrylk_list, domain_list) {
+ if (names_conflict (tmp->basename, lock->basename))
+ return tmp;
}
return NULL;
}
static pl_entry_lock_t *
-__blocked_lock_conflict (pl_dom_list_t *dom, const char *basename, entrylk_type type)
+__blocked_entrylk_conflict (pl_dom_list_t *dom, pl_entry_lock_t *lock)
{
- pl_entry_lock_t *lock = NULL;
+ pl_entry_lock_t *tmp = NULL;
if (list_empty (&dom->blocked_entrylks))
return NULL;
- list_for_each_entry (lock, &dom->blocked_entrylks, blocked_locks) {
- if (names_conflict (lock->basename, basename))
+ list_for_each_entry (tmp, &dom->blocked_entrylks, blocked_locks) {
+ if (names_conflict (tmp->basename, lock->basename))
return lock;
}
@@ -302,7 +318,7 @@ __find_most_matching_lock (pl_dom_list_t *dom, const char *basename)
}
/**
- * __lock_name - lock a name in a directory
+ * __lock_entrylk - lock a name in a directory
* @inode: inode for the directory in which to lock
* @basename: name of the entry to lock
* if null, lock the entire directory
@@ -313,90 +329,48 @@ __find_most_matching_lock (pl_dom_list_t *dom, const char *basename)
*/
int
-__lock_name (pl_inode_t *pinode, const char *basename, entrylk_type type,
- call_frame_t *frame, pl_dom_list_t *dom, xlator_t *this, int nonblock)
+__lock_entrylk (xlator_t *this, pl_inode_t *pinode, pl_entry_lock_t *lock,
+ int nonblock, pl_dom_list_t *dom)
{
- pl_entry_lock_t *lock = NULL;
- pl_entry_lock_t *conf = NULL;
- void *trans = NULL;
- pid_t client_pid = 0;
- uint64_t owner = 0;
-
- int ret = -EINVAL;
-
- trans = frame->root->trans;
- client_pid = frame->root->pid;
- owner = frame->root->lk_owner;
-
- lock = new_entrylk_lock (pinode, basename, type, trans, client_pid, owner, dom->domain);
- if (!lock) {
- ret = -ENOMEM;
- goto out;
- }
+ pl_entry_lock_t *conf = NULL;
+ int ret = -EAGAIN;
- lock->frame = frame;
- lock->this = this;
- lock->trans = trans;
-
- conf = __lock_grantable (dom, basename, type);
+ conf = __entrylk_grantable (dom, lock);
if (conf) {
ret = -EAGAIN;
- if (nonblock){
- if (lock->basename)
- GF_FREE ((char *)lock->basename);
- GF_FREE (lock);
+ if (nonblock)
goto out;
- }
-
gettimeofday (&lock->blkd_time, NULL);
list_add_tail (&lock->blocked_locks, &dom->blocked_entrylks);
gf_log (this->name, GF_LOG_TRACE,
"Blocking lock: {pinode=%p, basename=%s}",
- pinode, basename);
+ pinode, lock->basename);
goto out;
}
- if ( __blocked_lock_conflict (dom, basename, type) && !(__owner_has_lock (dom, lock))) {
+ if (__blocked_entrylk_conflict (dom, lock) && !(__owner_has_lock (dom, lock))) {
ret = -EAGAIN;
- if (nonblock) {
- if (lock->basename)
- GF_FREE ((char *) lock->basename);
- GF_FREE (lock);
+ if (nonblock)
goto out;
- }
- lock->frame = frame;
- lock->this = this;
-
gettimeofday (&lock->blkd_time, NULL);
list_add_tail (&lock->blocked_locks, &dom->blocked_entrylks);
- gf_log (this->name, GF_LOG_TRACE,
+ gf_log (this->name, GF_LOG_DEBUG,
"Lock is grantable, but blocking to prevent starvation");
gf_log (this->name, GF_LOG_TRACE,
"Blocking lock: {pinode=%p, basename=%s}",
- pinode, basename);
+ pinode, lock->basename);
- ret = -EAGAIN;
goto out;
}
- switch (type) {
- case ENTRYLK_WRLCK:
- gettimeofday (&lock->granted_time, NULL);
- list_add_tail (&lock->domain_list, &dom->entrylk_list);
- break;
-
- default:
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Invalid type for entrylk specified: %d", type);
- ret = -EINVAL;
- goto out;
- }
+ __pl_entrylk_ref (lock);
+ gettimeofday (&lock->granted_time, NULL);
+ list_add (&lock->domain_list, &dom->entrylk_list);
ret = 0;
out:
@@ -404,37 +378,36 @@ out:
}
/**
- * __unlock_name - unlock a name in a directory
+ * __unlock_entrylk - unlock a name in a directory
* @inode: inode for the directory to unlock in
* @basename: name of the entry to unlock
* if null, unlock the entire directory
*/
pl_entry_lock_t *
-__unlock_name (pl_dom_list_t *dom, const char *basename, entrylk_type type)
+__unlock_entrylk (pl_dom_list_t *dom, pl_entry_lock_t *lock)
{
- pl_entry_lock_t *lock = NULL;
+ pl_entry_lock_t *tmp = NULL;
pl_entry_lock_t *ret_lock = NULL;
- lock = __find_most_matching_lock (dom, basename);
+ tmp = __find_most_matching_lock (dom, lock->basename);
- if (!lock) {
- gf_log ("locks", GF_LOG_DEBUG,
+ if (!tmp) {
+ gf_log ("locks", GF_LOG_ERROR,
"unlock on %s (type=ENTRYLK_WRLCK) attempted but no matching lock found",
- basename);
+ lock->basename);
goto out;
}
- if (names_equal (lock->basename, basename)
- && lock->type == type) {
+ if (names_equal (tmp->basename, lock->basename)
+ && tmp->type == lock->type) {
+
+ list_del_init (&tmp->domain_list);
+ ret_lock = tmp;
- if (type == ENTRYLK_WRLCK) {
- list_del_init (&lock->domain_list);
- ret_lock = lock;
- }
} else {
- gf_log ("locks", GF_LOG_DEBUG,
- "Unlock for a non-existing lock!");
+ gf_log ("locks", GF_LOG_ERROR,
+ "Unlock on %s for a non-existing lock!", lock->basename);
goto out;
}
@@ -442,6 +415,32 @@ out:
return ret_lock;
}
+uint32_t
+check_entrylk_on_basename (xlator_t *this, inode_t *parent, char *basename)
+{
+ uint32_t entrylk = 0;
+ pl_inode_t *pinode = 0;
+ pl_dom_list_t *dom = NULL;
+ pl_entry_lock_t *conf = NULL;
+
+ pinode = pl_inode_get (this, parent);
+ if (!pinode)
+ goto out;
+ pthread_mutex_lock (&pinode->mutex);
+ {
+ list_for_each_entry (dom, &pinode->dom_list, inode_list) {
+ conf = __find_most_matching_lock (dom, basename);
+ if (conf && conf->basename) {
+ entrylk = 1;
+ break;
+ }
+ }
+ }
+ pthread_mutex_unlock (&pinode->mutex);
+
+out:
+ return entrylk;
+}
void
__grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
@@ -456,27 +455,14 @@ __grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
INIT_LIST_HEAD (&blocked_list);
list_splice_init (&dom->blocked_entrylks, &blocked_list);
- list_for_each_entry_safe (bl, tmp, &blocked_list,
- blocked_locks) {
+ list_for_each_entry_safe (bl, tmp, &blocked_list, blocked_locks) {
list_del_init (&bl->blocked_locks);
-
- gf_log ("locks", GF_LOG_TRACE,
- "Trying to unblock: {pinode=%p, basename=%s}",
- pl_inode, bl->basename);
-
- bl_ret = __lock_name (pl_inode, bl->basename, bl->type,
- bl->frame, dom, bl->this, 0);
+ bl_ret = __lock_entrylk (bl->this, pl_inode, bl, 0, dom);
if (bl_ret == 0) {
list_add (&bl->blocked_locks, granted);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "should never happen");
- if (bl->basename)
- GF_FREE ((char *)bl->basename);
- GF_FREE (bl);
}
}
return;
@@ -485,7 +471,7 @@ __grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
/* Grants locks if possible which are blocked on a lock */
void
grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
- pl_entry_lock_t *unlocked, pl_dom_list_t *dom)
+ pl_dom_list_t *dom)
{
struct list_head granted_list;
pl_entry_lock_t *tmp = NULL;
@@ -495,124 +481,56 @@ grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
pthread_mutex_lock (&pl_inode->mutex);
{
- __grant_blocked_entry_locks (this, pl_inode, dom, &granted_list);
+ __grant_blocked_entry_locks (this, pl_inode, dom,
+ &granted_list);
}
pthread_mutex_unlock (&pl_inode->mutex);
list_for_each_entry_safe (lock, tmp, &granted_list, blocked_locks) {
- list_del_init (&lock->blocked_locks);
-
entrylk_trace_out (this, lock->frame, NULL, NULL, NULL,
lock->basename, ENTRYLK_LOCK, lock->type,
0, 0);
- STACK_UNWIND_STRICT (entrylk, lock->frame, 0, 0);
+ STACK_UNWIND_STRICT (entrylk, lock->frame, 0, 0, NULL);
+ lock->frame = NULL;
+ }
- }
-
- GF_FREE ((char *)unlocked->basename);
- GF_FREE (unlocked);
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry_safe (lock, tmp, &granted_list, blocked_locks) {
+ list_del_init (&lock->blocked_locks);
+ __pl_entrylk_unref (lock);
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
return;
}
-/**
- * release_entry_locks_for_transport: release all entry locks from this
- * transport for this loc_t
- */
-
-static int
-release_entry_locks_for_transport (xlator_t *this, pl_inode_t *pinode,
- pl_dom_list_t *dom, void *trans)
-{
- pl_entry_lock_t *lock = NULL;
- pl_entry_lock_t *tmp = NULL;
- struct list_head granted;
- struct list_head released;
-
- INIT_LIST_HEAD (&granted);
- INIT_LIST_HEAD (&released);
-
- pthread_mutex_lock (&pinode->mutex);
- {
- list_for_each_entry_safe (lock, tmp, &dom->blocked_entrylks,
- blocked_locks) {
- if (lock->trans != trans)
- continue;
-
- list_del_init (&lock->blocked_locks);
-
- gf_log (this->name, GF_LOG_TRACE,
- "releasing lock on held by "
- "{transport=%p}",trans);
-
- list_add (&lock->blocked_locks, &released);
-
- }
-
- list_for_each_entry_safe (lock, tmp, &dom->entrylk_list,
- domain_list) {
- if (lock->trans != trans)
- continue;
-
- list_del_init (&lock->domain_list);
-
- gf_log (this->name, GF_LOG_TRACE,
- "releasing lock on held by "
- "{transport=%p}",trans);
-
- GF_FREE ((char *)lock->basename);
- GF_FREE (lock);
- }
-
- __grant_blocked_entry_locks (this, pinode, dom, &granted);
-
- }
-
- pthread_mutex_unlock (&pinode->mutex);
-
- list_for_each_entry_safe (lock, tmp, &released, blocked_locks) {
- list_del_init (&lock->blocked_locks);
-
- STACK_UNWIND_STRICT (entrylk, lock->frame, -1, EAGAIN);
-
- if (lock->basename)
- GF_FREE ((char *)lock->basename);
- GF_FREE (lock);
-
- }
-
- list_for_each_entry_safe (lock, tmp, &granted, blocked_locks) {
- list_del_init (&lock->blocked_locks);
-
- STACK_UNWIND_STRICT (entrylk, lock->frame, 0, 0);
-
- if (lock->basename)
- GF_FREE ((char *)lock->basename);
- GF_FREE (lock);
- }
-
- return 0;
-}
/* Common entrylk code called by pl_entrylk and pl_fentrylk */
int
pl_common_entrylk (call_frame_t *frame, xlator_t *this,
const char *volume, inode_t *inode, const char *basename,
- entrylk_cmd cmd, entrylk_type type, loc_t *loc, fd_t *fd)
-{
- uint64_t owner = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- void * transport = NULL;
+ entrylk_cmd cmd, entrylk_type type, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
- pl_inode_t * pinode = NULL;
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
int ret = -1;
- pl_entry_lock_t *unlocked = NULL;
char unwind = 1;
+ GF_UNUSED int dict_ret = -1;
+ pl_inode_t *pinode = NULL;
+ pl_entry_lock_t *reqlock = NULL;
+ pl_entry_lock_t *unlocked = NULL;
+ pl_dom_list_t *dom = NULL;
+ char *conn_id = NULL;
+ pl_ctx_t *ctx = NULL;
+ int nonblock = 0;
- pl_dom_list_t *dom = NULL;
+ if (xdata)
+ dict_ret = dict_get_str (xdata, "connection-id", &conn_id);
pinode = pl_inode_get (this, inode);
if (!pinode) {
@@ -620,6 +538,15 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this,
goto out;
}
+ if (frame->root->client) {
+ ctx = pl_ctx_get (frame->root->client, this);
+ if (!ctx) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_INFO, "pl_ctx_get() failed");
+ goto unwind;
+ }
+ }
+
dom = get_domain (pinode, volume);
if (!dom){
op_errno = ENOMEM;
@@ -628,74 +555,69 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this,
entrylk_trace_in (this, frame, volume, fd, loc, basename, cmd, type);
- owner = frame->root->lk_owner;
- transport = frame->root->trans;
-
- if (owner == 0) {
- /*
- this is a special case that means release
- all locks from this transport
- */
-
- gf_log (this->name, GF_LOG_TRACE,
- "Releasing locks for transport %p", transport);
-
- release_entry_locks_for_transport (this, pinode, dom, transport);
- op_ret = 0;
-
- goto out;
+ reqlock = new_entrylk_lock (pinode, basename, type, dom->domain, frame,
+ conn_id);
+ if (!reqlock) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
}
switch (cmd) {
- case ENTRYLK_LOCK:
- pthread_mutex_lock (&pinode->mutex);
- {
- ret = __lock_name (pinode, basename, type,
- frame, dom, this, 0);
- }
- pthread_mutex_unlock (&pinode->mutex);
-
- op_errno = -ret;
- if (ret < 0) {
- if (ret == -EAGAIN)
- unwind = 0;
- else
- unwind = 1;
- goto out;
- } else {
- op_ret = 0;
- op_errno = 0;
- unwind = 1;
- goto out;
- }
-
- break;
-
case ENTRYLK_LOCK_NB:
- unwind = 1;
+ nonblock = 1;
+ /* fall through */
+ case ENTRYLK_LOCK:
+ if (ctx)
+ pthread_mutex_lock (&ctx->lock);
pthread_mutex_lock (&pinode->mutex);
{
- ret = __lock_name (pinode, basename, type,
- frame, dom, this, 1);
+ reqlock->pinode = pinode;
+
+ ret = __lock_entrylk (this, pinode, reqlock, nonblock, dom);
+ if (ret == 0) {
+ reqlock->frame = NULL;
+ op_ret = 0;
+ } else {
+ op_errno = -ret;
+ }
+
+ if (ctx && (!ret || !nonblock))
+ list_add (&reqlock->client_list,
+ &ctx->entrylk_lockers);
+
+ if (ret == -EAGAIN && !nonblock) {
+ /* blocked */
+ unwind = 0;
+ } else {
+ __pl_entrylk_unref (reqlock);
+ }
}
pthread_mutex_unlock (&pinode->mutex);
-
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- break;
+ if (ctx)
+ pthread_mutex_unlock (&ctx->lock);
+ break;
case ENTRYLK_UNLOCK:
+ if (ctx)
+ pthread_mutex_lock (&ctx->lock);
pthread_mutex_lock (&pinode->mutex);
{
- unlocked = __unlock_name (dom, basename, type);
+ unlocked = __unlock_entrylk (dom, reqlock);
+ if (unlocked) {
+ list_del_init (&unlocked->client_list);
+ __pl_entrylk_unref (unlocked);
+ op_ret = 0;
+ } else {
+ op_errno = EINVAL;
+ }
+ __pl_entrylk_unref (reqlock);
}
pthread_mutex_unlock (&pinode->mutex);
+ if (ctx)
+ pthread_mutex_unlock (&ctx->lock);
- if (unlocked)
- grant_blocked_entry_locks (this, pinode, unlocked, dom);
+ grant_blocked_entry_locks (this, pinode, dom);
break;
@@ -705,21 +627,19 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this,
"a bug report at http://bugs.gluster.com", cmd);
goto out;
}
-
- op_ret = 0;
out:
pl_update_refkeeper (this, inode);
+
if (unwind) {
entrylk_trace_out (this, frame, volume, fd, loc, basename,
cmd, type, op_ret, op_errno);
-
- STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno);
+unwind:
+ STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno, NULL);
} else {
entrylk_trace_block (this, frame, volume, fd, loc, basename,
cmd, type);
}
-
return 0;
}
@@ -732,10 +652,10 @@ out:
int
pl_entrylk (call_frame_t *frame, xlator_t *this,
const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
-
- pl_common_entrylk (frame, this, volume, loc->inode, basename, cmd, type, loc, NULL);
+ pl_common_entrylk (frame, this, volume, loc->inode, basename, cmd,
+ type, loc, NULL, xdata);
return 0;
}
@@ -750,16 +670,98 @@ pl_entrylk (call_frame_t *frame, xlator_t *this,
int
pl_fentrylk (call_frame_t *frame, xlator_t *this,
const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
+{
+ pl_common_entrylk (frame, this, volume, fd->inode, basename, cmd,
+ type, NULL, fd, xdata);
+
+ return 0;
+}
+
+
+static void
+pl_entrylk_log_cleanup (pl_entry_lock_t *lock)
+{
+ pl_inode_t *pinode = NULL;
+ char *path = NULL;
+ char *file = NULL;
+
+ pinode = lock->pinode;
+
+ inode_path (pinode->refkeeper, NULL, &path);
+
+ if (path)
+ file = path;
+ else
+ file = uuid_utoa (pinode->refkeeper->gfid);
+
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "releasing lock on %s held by "
+ "{client=%p, pid=%"PRId64" lk-owner=%s}",
+ file, lock->client, (uint64_t) lock->client_pid,
+ lkowner_utoa (&lock->owner));
+ GF_FREE (path);
+}
+
+
+/* Release all entrylks from this client */
+int
+pl_entrylk_client_cleanup (xlator_t *this, pl_ctx_t *ctx)
{
+ pl_entry_lock_t *tmp = NULL;
+ pl_entry_lock_t *l = NULL;
+ pl_dom_list_t *dom = NULL;
+ pl_inode_t *pinode = NULL;
+
+ struct list_head released;
+
+ INIT_LIST_HEAD (&released);
+
+ pthread_mutex_lock (&ctx->lock);
+ {
+ list_for_each_entry_safe (l, tmp, &ctx->entrylk_lockers,
+ client_list) {
+ list_del_init (&l->client_list);
+ list_add_tail (&l->client_list, &released);
+
+ pl_entrylk_log_cleanup (l);
+
+ pinode = l->pinode;
- pl_common_entrylk (frame, this, volume, fd->inode, basename, cmd, type, NULL, fd);
+ pthread_mutex_lock (&pinode->mutex);
+ {
+ list_del_init (&l->domain_list);
+ }
+ pthread_mutex_unlock (&pinode->mutex);
+ }
+ }
+ pthread_mutex_unlock (&ctx->lock);
+
+ list_for_each_entry_safe (l, tmp, &released, client_list) {
+ list_del_init (&l->client_list);
+
+ if (l->frame)
+ STACK_UNWIND_STRICT (entrylk, l->frame, -1, EAGAIN,
+ NULL);
+
+ pinode = l->pinode;
+
+ dom = get_domain (pinode, l->volume);
+
+ grant_blocked_inode_locks (this, pinode, dom);
+
+ pthread_mutex_lock (&pinode->mutex);
+ {
+ __pl_entrylk_unref (l);
+ }
+ pthread_mutex_unlock (&pinode->mutex);
+ }
return 0;
}
-static int32_t
+int32_t
__get_entrylk_count (xlator_t *this, pl_inode_t *pl_inode)
{
int32_t count = 0;
@@ -768,24 +770,10 @@ __get_entrylk_count (xlator_t *this, pl_inode_t *pl_inode)
list_for_each_entry (dom, &pl_inode->dom_list, inode_list) {
list_for_each_entry (lock, &dom->entrylk_list, domain_list) {
-
- gf_log (this->name, GF_LOG_DEBUG,
- " XATTR DEBUG"
- " domain: %s %s on %s state = Active",
- dom->domain,
- lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" :
- "ENTRYLK_WRLCK", lock->basename);
count++;
}
list_for_each_entry (lock, &dom->blocked_entrylks, blocked_locks) {
-
- gf_log (this->name, GF_LOG_DEBUG,
- " XATTR DEBUG"
- " domain: %s %s on %s state = Blocked",
- dom->domain,
- lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" :
- "ENTRYLK_WRLCK", lock->basename);
count++;
}
diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c
index 94ecfc265..e7093e60e 100644
--- a/xlators/features/locks/src/inodelk.c
+++ b/xlators/features/locks/src/inodelk.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -33,20 +23,30 @@
#include "locks.h"
#include "common.h"
-void
+inline void
__delete_inode_lock (pl_inode_lock_t *lock)
{
list_del (&lock->list);
}
+static inline void
+__pl_inodelk_ref (pl_inode_lock_t *lock)
+{
+ lock->ref++;
+}
+
void
-__destroy_inode_lock (pl_inode_lock_t *lock)
+__pl_inodelk_unref (pl_inode_lock_t *lock)
{
- GF_FREE (lock);
+ lock->ref--;
+ if (!lock->ref) {
+ GF_FREE (lock->connection_id);
+ GF_FREE (lock);
+ }
}
/* Check if 2 inodelks are conflicting on type. Only 2 shared locks don't conflict */
-static int
+static inline int
inodelk_type_conflict (pl_inode_lock_t *l1, pl_inode_lock_t *l2)
{
if (l2->fl_type == F_WRLCK || l1->fl_type == F_WRLCK)
@@ -120,10 +120,11 @@ inodelk_overlap (pl_inode_lock_t *l1, pl_inode_lock_t *l2)
}
/* Returns true if the 2 inodelks have the same owner */
-static int same_inodelk_owner (pl_inode_lock_t *l1, pl_inode_lock_t *l2)
+static inline int
+same_inodelk_owner (pl_inode_lock_t *l1, pl_inode_lock_t *l2)
{
- return ((l1->owner == l2->owner) &&
- (l1->transport == l2->transport));
+ return (is_same_lkowner (&l1->owner, &l2->owner) &&
+ (l1->client == l2->client));
}
/* Returns true if the 2 inodelks conflict with each other */
@@ -203,7 +204,7 @@ __lock_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock,
int ret = -EINVAL;
conf = __inodelk_grantable (dom, lock);
- if (conf){
+ if (conf) {
ret = -EAGAIN;
if (can_block == 0)
goto out;
@@ -212,10 +213,10 @@ __lock_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock,
list_add_tail (&lock->blocked_locks, &dom->blocked_inodelks);
gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) lk-owner:%"PRIu64" %"PRId64" - %"PRId64" => Blocked",
+ "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => Blocked",
lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
lock->client_pid,
- lock->owner,
+ lkowner_utoa (&lock->owner),
lock->user_flock.l_start,
lock->user_flock.l_len);
@@ -231,19 +232,20 @@ __lock_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock,
gettimeofday (&lock->blkd_time, NULL);
list_add_tail (&lock->blocked_locks, &dom->blocked_inodelks);
- gf_log (this->name, GF_LOG_TRACE,
+ gf_log (this->name, GF_LOG_DEBUG,
"Lock is grantable, but blocking to prevent starvation");
gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" => Blocked",
+ "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => Blocked",
lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
lock->client_pid,
- lock->owner,
+ lkowner_utoa (&lock->owner),
lock->user_flock.l_start,
lock->user_flock.l_len);
goto out;
}
+ __pl_inodelk_ref (lock);
gettimeofday (&lock->granted_time, NULL);
list_add (&lock->list, &dom->inodelk_list);
@@ -288,21 +290,25 @@ __inode_unlock_lock (xlator_t *this, pl_inode_lock_t *lock, pl_dom_list_t *dom)
conf = find_matching_inodelk (lock, dom);
if (!conf) {
- gf_log (this->name, GF_LOG_DEBUG,
- " Matching lock not found for unlock");
+ gf_log (this->name, GF_LOG_ERROR,
+ " Matching lock not found for unlock %llu-%llu, by %s "
+ "on %p", (unsigned long long)lock->fl_start,
+ (unsigned long long)lock->fl_end,
+ lkowner_utoa (&lock->owner), lock->client);
goto out;
}
__delete_inode_lock (conf);
gf_log (this->name, GF_LOG_DEBUG,
- " Matching lock found for unlock");
- __destroy_inode_lock (lock);
-
+ " Matching lock found for unlock %llu-%llu, by %s on %p",
+ (unsigned long long)lock->fl_start,
+ (unsigned long long)lock->fl_end, lkowner_utoa (&lock->owner),
+ lock->client);
out:
return conf;
+}
-}
static void
__grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
struct list_head *granted, pl_dom_list_t *dom)
@@ -331,7 +337,8 @@ __grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
/* Grant all inodelks blocked on a lock */
void
-grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom)
+grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
+ pl_dom_list_t *dom)
{
struct list_head granted;
pl_inode_lock_t *lock;
@@ -339,11 +346,6 @@ grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *
INIT_LIST_HEAD (&granted);
- if (list_empty (&dom->blocked_inodelks)) {
- gf_log (this->name, GF_LOG_TRACE,
- "No blocked locks to be granted for domain: %s", dom->domain);
- }
-
pthread_mutex_lock (&pl_inode->mutex);
{
__grant_blocked_inode_locks (this, pl_inode, &granted, dom);
@@ -352,170 +354,184 @@ grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *
list_for_each_entry_safe (lock, tmp, &granted, blocked_locks) {
gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" => Granted",
+ "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => Granted",
lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
lock->client_pid,
- lock->owner,
+ lkowner_utoa (&lock->owner),
lock->user_flock.l_start,
lock->user_flock.l_len);
pl_trace_out (this, lock->frame, NULL, NULL, F_SETLKW,
&lock->user_flock, 0, 0, lock->volume);
- STACK_UNWIND_STRICT (inodelk, lock->frame, 0, 0);
+ STACK_UNWIND_STRICT (inodelk, lock->frame, 0, 0, NULL);
+ lock->frame = NULL;
}
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry_safe (lock, tmp, &granted, blocked_locks) {
+ list_del_init (&lock->blocked_locks);
+ __pl_inodelk_unref (lock);
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
}
-/* Release all inodelks from this transport */
-static int
-release_inode_locks_of_transport (xlator_t *this, pl_dom_list_t *dom,
- inode_t *inode, void *trans)
+
+static void
+pl_inodelk_log_cleanup (pl_inode_lock_t *lock)
+{
+ pl_inode_t *pl_inode = NULL;
+ char *path = NULL;
+ char *file = NULL;
+
+ pl_inode = lock->pl_inode;
+
+ inode_path (pl_inode->refkeeper, NULL, &path);
+
+ if (path)
+ file = path;
+ else
+ file = uuid_utoa (pl_inode->refkeeper->gfid);
+
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "releasing lock on %s held by "
+ "{client=%p, pid=%"PRId64" lk-owner=%s}",
+ file, lock->client, (uint64_t) lock->client_pid,
+ lkowner_utoa (&lock->owner));
+ GF_FREE (path);
+}
+
+
+/* Release all entrylks from this client */
+int
+pl_inodelk_client_cleanup (xlator_t *this, pl_ctx_t *ctx)
{
pl_inode_lock_t *tmp = NULL;
pl_inode_lock_t *l = NULL;
+ pl_dom_list_t *dom = NULL;
+ pl_inode_t *pl_inode = NULL;
- pl_inode_t * pinode = NULL;
-
- struct list_head granted;
struct list_head released;
- char *path = NULL;
-
- INIT_LIST_HEAD (&granted);
INIT_LIST_HEAD (&released);
- pinode = pl_inode_get (this, inode);
-
- pthread_mutex_lock (&pinode->mutex);
+ pthread_mutex_lock (&ctx->lock);
{
+ list_for_each_entry_safe (l, tmp, &ctx->inodelk_lockers,
+ client_list) {
+ list_del_init (&l->client_list);
+ list_add_tail (&l->client_list, &released);
- list_for_each_entry_safe (l, tmp, &dom->blocked_inodelks, blocked_locks) {
- if (l->transport != trans)
- continue;
+ pl_inodelk_log_cleanup (l);
- list_del_init (&l->blocked_locks);
+ pl_inode = l->pl_inode;
- if (inode_path (inode, NULL, &path) < 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "inode_path failed");
- goto unlock;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "releasing lock on %s held by "
- "{transport=%p, pid=%"PRId64" lk-owner=%"PRIu64"}",
- path, trans,
- (uint64_t) l->client_pid,
- l->owner);
-
- list_add (&l->blocked_locks, &released);
- if (path) {
- GF_FREE (path);
- path = NULL;
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ __delete_inode_lock (l);
}
+ pthread_mutex_unlock (&pl_inode->mutex);
}
+ }
+ pthread_mutex_unlock (&ctx->lock);
- list_for_each_entry_safe (l, tmp, &dom->inodelk_list, list) {
- if (l->transport != trans)
- continue;
+ list_for_each_entry_safe (l, tmp, &released, client_list) {
+ list_del_init (&l->client_list);
- __delete_inode_lock (l);
- __destroy_inode_lock (l);
+ if (l->frame)
+ STACK_UNWIND_STRICT (inodelk, l->frame, -1, EAGAIN,
+ NULL);
+ pl_inode = l->pl_inode;
- if (inode_path (inode, NULL, &path) < 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "inode_path failed");
- goto unlock;
- }
+ dom = get_domain (pl_inode, l->volume);
- gf_log (this->name, GF_LOG_TRACE,
- "releasing lock on %s held by "
- "{transport=%p, pid=%"PRId64" lk-owner=%"PRIu64"}",
- path, trans,
- (uint64_t) l->client_pid,
- l->owner);
- if (path) {
- GF_FREE (path);
- path = NULL;
- }
- }
- }
-unlock:
- if (path)
- GF_FREE (path);
-
- pthread_mutex_unlock (&pinode->mutex);
-
- list_for_each_entry_safe (l, tmp, &released, blocked_locks) {
- list_del_init (&l->blocked_locks);
+ grant_blocked_inode_locks (this, pl_inode, dom);
- STACK_UNWIND_STRICT (inodelk, l->frame, -1, EAGAIN);
- GF_FREE (l);
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ __pl_inodelk_unref (l);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
}
- grant_blocked_inode_locks (this, pinode, dom);
return 0;
}
static int
-pl_inode_setlk (xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock,
- int can_block, pl_dom_list_t *dom)
+pl_inode_setlk (xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
+ pl_inode_lock_t *lock, int can_block, pl_dom_list_t *dom)
{
int ret = -EINVAL;
pl_inode_lock_t *retlock = NULL;
+ gf_boolean_t unref = _gf_true;
+
+ lock->pl_inode = pl_inode;
+ if (ctx)
+ pthread_mutex_lock (&ctx->lock);
pthread_mutex_lock (&pl_inode->mutex);
{
if (lock->fl_type != F_UNLCK) {
ret = __lock_inodelk (this, pl_inode, lock, can_block, dom);
- if (ret == 0)
+ if (ret == 0) {
+ lock->frame = NULL;
gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" => OK",
+ "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => OK",
lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
lock->client_pid,
- lock->owner,
+ lkowner_utoa (&lock->owner),
lock->fl_start,
lock->fl_end);
-
- if (ret == -EAGAIN)
+ } else if (ret == -EAGAIN) {
gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" => NOK",
+ "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => NOK",
lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
lock->client_pid,
- lock->owner,
+ lkowner_utoa (&lock->owner),
lock->user_flock.l_start,
lock->user_flock.l_len);
+ if (can_block)
+ unref = _gf_false;
+ }
- goto out;
- }
-
+ if (ctx && (!ret || can_block))
+ list_add_tail (&lock->client_list,
+ &ctx->inodelk_lockers);
+ } else {
+ retlock = __inode_unlock_lock (this, lock, dom);
+ if (!retlock) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Bad Unlock issued on Inode lock");
+ ret = -EINVAL;
+ goto out;
+ }
+ list_del_init (&retlock->client_list);
+ __pl_inodelk_unref (retlock);
- retlock = __inode_unlock_lock (this, lock, dom);
- if (!retlock) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Bad Unlock issued on Inode lock");
- ret = -EINVAL;
- goto out;
+ ret = 0;
}
- __destroy_inode_lock (retlock);
-
- ret = 0;
-
-
- }
out:
+ if (unref)
+ __pl_inodelk_unref (lock);
+ }
pthread_mutex_unlock (&pl_inode->mutex);
+ if (ctx)
+ pthread_mutex_unlock (&ctx->lock);
+
grant_blocked_inode_locks (this, pl_inode, dom);
+
return ret;
}
/* Create a new inode_lock_t */
pl_inode_lock_t *
-new_inode_lock (struct gf_flock *flock, void *transport, pid_t client_pid,
- uint64_t owner, const char *volume)
+new_inode_lock (struct gf_flock *flock, client_t *client, pid_t client_pid,
+ call_frame_t *frame, xlator_t *this, const char *volume,
+ char *conn_id)
{
pl_inode_lock_t *lock = NULL;
@@ -534,33 +550,77 @@ new_inode_lock (struct gf_flock *flock, void *transport, pid_t client_pid,
else
lock->fl_end = flock->l_start + flock->l_len - 1;
- lock->transport = transport;
+ lock->client = client;
lock->client_pid = client_pid;
- lock->owner = owner;
lock->volume = volume;
+ lock->owner = frame->root->lk_owner;
+ lock->frame = frame;
+ lock->this = this;
+
+ if (conn_id) {
+ lock->connection_id = gf_strdup (conn_id);
+ }
INIT_LIST_HEAD (&lock->list);
INIT_LIST_HEAD (&lock->blocked_locks);
+ INIT_LIST_HEAD (&lock->client_list);
+ __pl_inodelk_ref (lock);
return lock;
}
+int32_t
+_pl_convert_volume (const char *volume, char **res)
+{
+ char *mdata_vol = NULL;
+ int ret = 0;
+
+ mdata_vol = strrchr (volume, ':');
+ //if the volume already ends with :metadata don't bother
+ if (mdata_vol && (strcmp (mdata_vol, ":metadata") == 0))
+ return 0;
+
+ ret = gf_asprintf (res, "%s:metadata", volume);
+ if (ret <= 0)
+ return ENOMEM;
+ return 0;
+}
+
+int32_t
+_pl_convert_volume_for_special_range (struct gf_flock *flock,
+ const char *volume, char **res)
+{
+ int32_t ret = 0;
+
+ if ((flock->l_start == LLONG_MAX -1) &&
+ (flock->l_len == 0)) {
+ ret = _pl_convert_volume (volume, res);
+ }
+
+ return ret;
+}
+
/* Common inodelk code called from pl_inodelk and pl_finodelk */
int
pl_common_inodelk (call_frame_t *frame, xlator_t *this,
const char *volume, inode_t *inode, int32_t cmd,
- struct gf_flock *flock, loc_t *loc, fd_t *fd)
+ struct gf_flock *flock, loc_t *loc, fd_t *fd, dict_t *xdata)
{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- int ret = -1;
- int can_block = 0;
- void * transport = NULL;
- pid_t client_pid = -1;
- uint64_t owner = -1;
- pl_inode_t * pinode = NULL;
- pl_inode_lock_t * reqlock = NULL;
- pl_dom_list_t * dom = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int ret = -1;
+ GF_UNUSED int dict_ret = -1;
+ int can_block = 0;
+ pl_inode_t * pinode = NULL;
+ pl_inode_lock_t * reqlock = NULL;
+ pl_dom_list_t * dom = NULL;
+ char *res = NULL;
+ char *res1 = NULL;
+ char *conn_id = NULL;
+ pl_ctx_t *ctx = NULL;
+
+ if (xdata)
+ dict_ret = dict_get_str (xdata, "connection-id", &conn_id);
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (inode, unwind);
@@ -571,11 +631,22 @@ pl_common_inodelk (call_frame_t *frame, xlator_t *this,
goto unwind;
}
+ op_errno = _pl_convert_volume_for_special_range (flock, volume, &res);
+ if (op_errno)
+ goto unwind;
+ if (res)
+ volume = res;
+
pl_trace_in (this, frame, fd, loc, cmd, flock, volume);
- transport = frame->root->trans;
- client_pid = frame->root->pid;
- owner = frame->root->lk_owner;
+ if (frame->root->client) {
+ ctx = pl_ctx_get (frame->root->client, this);
+ if (!ctx) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_INFO, "pl_ctx_get() failed");
+ goto unwind;
+ }
+ }
pinode = pl_inode_get (this, inode);
if (!pinode) {
@@ -584,22 +655,13 @@ pl_common_inodelk (call_frame_t *frame, xlator_t *this,
}
dom = get_domain (pinode, volume);
-
- if (owner == 0) {
- /*
- special case: this means release all locks
- from this transport
- */
- gf_log (this->name, GF_LOG_TRACE,
- "Releasing all locks from transport %p", transport);
-
- release_inode_locks_of_transport (this, dom, inode, transport);
-
- op_ret = 0;
+ if (!dom) {
+ op_errno = ENOMEM;
goto unwind;
}
- reqlock = new_inode_lock (flock, transport, client_pid, owner, volume);
+ reqlock = new_inode_lock (flock, frame->root->client, frame->root->pid,
+ frame, this, volume, conn_id);
if (!reqlock) {
op_ret = -1;
@@ -607,21 +669,17 @@ pl_common_inodelk (call_frame_t *frame, xlator_t *this,
goto unwind;
}
- reqlock->frame = frame;
- reqlock->this = this;
switch (cmd) {
case F_SETLKW:
can_block = 1;
- reqlock->frame = frame;
- reqlock->this = this;
/* fall through */
case F_SETLK:
memcpy (&reqlock->user_flock, flock, sizeof (struct gf_flock));
- ret = pl_inode_setlk (this, pinode, reqlock,
- can_block, dom);
+ ret = pl_inode_setlk (this, ctx, pinode, reqlock, can_block,
+ dom);
if (ret < 0) {
if ((can_block) && (F_UNLCK != flock->l_type)) {
@@ -631,7 +689,6 @@ pl_common_inodelk (call_frame_t *frame, xlator_t *this,
}
gf_log (this->name, GF_LOG_TRACE, "returning EAGAIN");
op_errno = -ret;
- __destroy_inode_lock (reqlock);
goto unwind;
}
break;
@@ -653,80 +710,80 @@ unwind:
pl_trace_out (this, frame, fd, loc, cmd, flock, op_ret, op_errno, volume);
}
- STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno, NULL);
out:
+ GF_FREE (res);
+ GF_FREE (res1);
return 0;
}
int
pl_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *flock)
+ const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *flock,
+ dict_t *xdata)
{
-
- pl_common_inodelk (frame, this, volume, loc->inode, cmd, flock, loc, NULL);
+ pl_common_inodelk (frame, this, volume, loc->inode, cmd, flock,
+ loc, NULL, xdata);
return 0;
}
int
pl_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *flock)
+ const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *flock,
+ dict_t *xdata)
{
-
- pl_common_inodelk (frame, this, volume, fd->inode, cmd, flock, NULL, fd);
+ pl_common_inodelk (frame, this, volume, fd->inode, cmd, flock,
+ NULL, fd, xdata);
return 0;
}
+static inline int32_t
+__get_inodelk_dom_count (pl_dom_list_t *dom)
+{
+ pl_inode_lock_t *lock = NULL;
+ int32_t count = 0;
-static int32_t
-__get_inodelk_count (xlator_t *this, pl_inode_t *pl_inode)
+ list_for_each_entry (lock, &dom->inodelk_list, list) {
+ count++;
+ }
+ list_for_each_entry (lock, &dom->blocked_inodelks, blocked_locks) {
+ count++;
+ }
+ return count;
+}
+
+/* Returns the no. of locks (blocked/granted) held on a given domain name
+ * If @domname is NULL, returns the no. of locks in all the domains present.
+ * If @domname is non-NULL and non-existent, returns 0 */
+int32_t
+__get_inodelk_count (xlator_t *this, pl_inode_t *pl_inode, char *domname)
{
int32_t count = 0;
- pl_inode_lock_t *lock = NULL;
pl_dom_list_t *dom = NULL;
list_for_each_entry (dom, &pl_inode->dom_list, inode_list) {
- list_for_each_entry (lock, &dom->inodelk_list, list) {
-
- gf_log (this->name, GF_LOG_DEBUG,
- " XATTR DEBUG"
- " domain: %s %s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" "
- "state = Active",
- dom->domain,
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid,
- lock->owner,
- lock->user_flock.l_start,
- lock->user_flock.l_len);
-
- count++;
- }
-
- list_for_each_entry (lock, &dom->blocked_inodelks, blocked_locks) {
+ if (domname) {
+ if (strcmp (domname, dom->domain) == 0) {
+ count = __get_inodelk_dom_count (dom);
+ goto out;
+ }
- gf_log (this->name, GF_LOG_DEBUG,
- " XATTR DEBUG"
- " domain: %s %s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" "
- "state = Blocked",
- dom->domain,
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid,
- lock->owner,
- lock->user_flock.l_start,
- lock->user_flock.l_len);
+ } else {
+ /* Counting locks from all domains */
+ count += __get_inodelk_dom_count (dom);
- count++;
}
-
}
+out:
return count;
}
int32_t
-get_inodelk_count (xlator_t *this, inode_t *inode)
+get_inodelk_count (xlator_t *this, inode_t *inode, char *domname)
{
pl_inode_t *pl_inode = NULL;
uint64_t tmp_pl_inode = 0;
@@ -742,7 +799,7 @@ get_inodelk_count (xlator_t *this, inode_t *inode)
pthread_mutex_lock (&pl_inode->mutex);
{
- count = __get_inodelk_count (this, pl_inode);
+ count = __get_inodelk_count (this, pl_inode, domname);
}
pthread_mutex_unlock (&pl_inode->mutex);
diff --git a/xlators/features/locks/src/locks-mem-types.h b/xlators/features/locks/src/locks-mem-types.h
index 9d44f0eba..08aeb0a79 100644
--- a/xlators/features/locks/src/locks-mem-types.h
+++ b/xlators/features/locks/src/locks-mem-types.h
@@ -1,23 +1,13 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __LOCKS_MEM_TYPES_H__
#define __LOCKS_MEM_TYPES_H__
@@ -32,7 +22,6 @@ enum gf_locks_mem_types_ {
gf_locks_mt_truncate_ops,
gf_locks_mt_pl_rw_req_t,
gf_locks_mt_posix_locks_private_t,
- gf_locks_mt_pl_local_t,
gf_locks_mt_pl_fdctx_t,
gf_locks_mt_end
};
diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h
index 476b9a83c..8c2a6f867 100644
--- a/xlators/features/locks/src/locks.h
+++ b/xlators/features/locks/src/locks.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef __POSIX_LOCKS_H__
#define __POSIX_LOCKS_H__
@@ -29,8 +19,10 @@
#include "stack.h"
#include "call-stub.h"
#include "locks-mem-types.h"
+#include "client_t.h"
+
+#include "lkowner.h"
-#define POSIX_LOCKS "posix-locks"
struct __pl_fd;
struct __posix_lock {
@@ -41,7 +33,7 @@ struct __posix_lock {
off_t fl_end;
short blocked; /* waiting to acquire */
- struct gf_flock user_flock; /* the flock supplied by the user */
+ struct gf_flock user_flock; /* the flock supplied by the user */
xlator_t *this; /* required for blocked locks */
unsigned long fd_num;
@@ -54,15 +46,16 @@ struct __posix_lock {
/* These two together serve to uniquely identify each process
across nodes */
- void *transport; /* to identify client node */
+ void *client; /* to identify client node */
+ gf_lkowner_t owner;
pid_t client_pid; /* pid of client process */
- uint64_t owner; /* lock owner from fuse */
};
typedef struct __posix_lock posix_lock_t;
struct __pl_inode_lock {
struct list_head list;
struct list_head blocked_locks; /* list_head pointing to blocked_inodelks */
+ int ref;
short fl_type;
off_t fl_start;
@@ -70,9 +63,9 @@ struct __pl_inode_lock {
const char *volume;
- struct gf_flock user_flock; /* the flock supplied by the user */
+ struct gf_flock user_flock; /* the flock supplied by the user */
xlator_t *this; /* required for blocked locks */
- fd_t *fd;
+ struct __pl_inode *pl_inode;
call_frame_t *frame;
@@ -82,9 +75,13 @@ struct __pl_inode_lock {
/* These two together serve to uniquely identify each process
across nodes */
- void *transport; /* to identify client node */
+ void *client; /* to identify client node */
+ gf_lkowner_t owner;
pid_t client_pid; /* pid of client process */
- uint64_t owner;
+
+ char *connection_id; /* stores the client connection id */
+
+ struct list_head client_list; /* list of all locks from a client */
};
typedef struct __pl_inode_lock pl_inode_lock_t;
@@ -108,9 +105,11 @@ typedef struct __pl_dom_list_t pl_dom_list_t;
struct __entry_lock {
struct list_head domain_list; /* list_head back to pl_dom_list_t */
struct list_head blocked_locks; /* list_head back to blocked_entrylks */
+ int ref;
call_frame_t *frame;
xlator_t *this;
+ struct __pl_inode *pinode;
const char *volume;
@@ -120,9 +119,13 @@ struct __entry_lock {
struct timeval blkd_time; /*time at which lock was queued into blkd list*/
struct timeval granted_time; /*time at which lock was queued into active list*/
- void *trans;
- pid_t client_pid; /* pid of client process */
- uint64_t owner;
+ void *client;
+ gf_lkowner_t owner;
+ pid_t client_pid; /* pid of client process */
+
+ char *connection_id; /* stores the client connection id */
+
+ struct list_head client_list; /* list of all locks from a client */
};
typedef struct __entry_lock pl_entry_lock_t;
@@ -147,25 +150,55 @@ struct __pl_inode {
typedef struct __pl_inode pl_inode_t;
-struct __pl_fd {
- gf_boolean_t nonblocking; /* whether O_NONBLOCK has been set */
-};
-typedef struct __pl_fd pl_fd_t;
-
-
typedef struct {
gf_boolean_t mandatory; /* if mandatory locking is enabled */
gf_boolean_t trace; /* trace lock requests in and out */
+ char *brickname;
} posix_locks_private_t;
+
typedef struct {
gf_boolean_t entrylk_count_req;
gf_boolean_t inodelk_count_req;
+ gf_boolean_t inodelk_dom_count_req;
gf_boolean_t posixlk_count_req;
+ gf_boolean_t parent_entrylk_req;
+
+ /* used by {f,}truncate */
+ loc_t loc;
+ fd_t *fd;
+ off_t offset;
+ dict_t *xdata;
+ enum {TRUNCATE, FTRUNCATE} op;
} pl_local_t;
+
typedef struct {
struct list_head locks_list;
} pl_fdctx_t;
+
+struct _locker {
+ struct list_head lockers;
+ char *volume;
+ inode_t *inode;
+ gf_lkowner_t owner;
+};
+
+typedef struct _locks_ctx {
+ pthread_mutex_t lock;
+ struct list_head inodelk_lockers;
+ struct list_head entrylk_lockers;
+} pl_ctx_t;
+
+
+pl_ctx_t *
+pl_ctx_get (client_t *client, xlator_t *xlator);
+
+int
+pl_inodelk_client_cleanup (xlator_t *this, pl_ctx_t *ctx);
+
+int
+pl_entrylk_client_cleanup (xlator_t *this, pl_ctx_t *ctx);
+
#endif /* __POSIX_LOCKS_H__ */
diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
index 59e2199fb..2db327687 100644
--- a/xlators/features/locks/src/posix.c
+++ b/xlators/features/locks/src/posix.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <unistd.h>
#include <fcntl.h>
#include <limits.h>
@@ -37,6 +27,9 @@
#include "locks.h"
#include "common.h"
#include "statedump.h"
+#include "clear.h"
+#include "defaults.h"
+#include "syncop.h"
#ifndef LLONG_MAX
#define LLONG_MAX LONG_LONG_MAX /* compat with old gcc */
@@ -47,13 +40,9 @@
void do_blocked_rw (pl_inode_t *);
static int __rw_allowable (pl_inode_t *, posix_lock_t *, glusterfs_fop_t);
-
-struct _truncate_ops {
- loc_t loc;
- fd_t *fd;
- off_t offset;
- enum {TRUNCATE, FTRUNCATE} op;
-};
+static int format_brickname(char *);
+int pl_lockinfo_get_brickname (xlator_t *, inode_t *, int32_t *);
+static int fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **);
static pl_fdctx_t *
pl_new_fdctx ()
@@ -62,7 +51,7 @@ pl_new_fdctx ()
fdctx = GF_CALLOC (1, sizeof (*fdctx),
gf_locks_mt_pl_fdctx_t);
- GF_VALIDATE_OR_GOTO (POSIX_LOCKS, fdctx, out);
+ GF_VALIDATE_OR_GOTO ("posix-locks", fdctx, out);
INIT_LIST_HEAD (&fdctx->locks_list);
@@ -77,7 +66,7 @@ pl_check_n_create_fdctx (xlator_t *this, fd_t *fd)
uint64_t tmp = 0;
pl_fdctx_t *fdctx = NULL;
- GF_VALIDATE_OR_GOTO (POSIX_LOCKS, this, out);
+ GF_VALIDATE_OR_GOTO ("posix-locks", this, out);
GF_VALIDATE_OR_GOTO (this->name, fd, out);
LOCK (&fd->lock);
@@ -108,25 +97,30 @@ out:
int
pl_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
- struct _truncate_ops *local = NULL;
+ pl_local_t *local = NULL;
local = frame->local;
if (local->op == TRUNCATE)
loc_wipe (&local->loc);
+ if (local->xdata)
+ dict_unref (local->xdata);
+ if (local->fd)
+ fd_unref (local->fd);
+
STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno,
- prebuf, postbuf);
+ prebuf, postbuf, xdata);
return 0;
}
static int
truncate_allowed (pl_inode_t *pl_inode,
- void *transport, pid_t client_pid,
- uint64_t owner, off_t offset)
+ client_t *client, pid_t client_pid,
+ gf_lkowner_t *owner, off_t offset)
{
posix_lock_t *l = NULL;
posix_lock_t region = {.list = {0, }, };
@@ -134,9 +128,9 @@ truncate_allowed (pl_inode_t *pl_inode,
region.fl_start = offset;
region.fl_end = LLONG_MAX;
- region.transport = transport;
+ region.client = client;
region.client_pid = client_pid;
- region.owner = owner;
+ region.owner = *owner;
pthread_mutex_lock (&pl_inode->mutex);
{
@@ -145,7 +139,7 @@ truncate_allowed (pl_inode_t *pl_inode,
&& locks_overlap (&region, l)
&& !same_owner (&region, l)) {
ret = 0;
- gf_log (POSIX_LOCKS, GF_LOG_TRACE, "Truncate "
+ gf_log ("posix-locks", GF_LOG_TRACE, "Truncate "
"allowed");
break;
}
@@ -159,10 +153,11 @@ truncate_allowed (pl_inode_t *pl_inode,
static int
truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
posix_locks_private_t *priv = NULL;
- struct _truncate_ops *local = NULL;
+ pl_local_t *local = NULL;
inode_t *inode = NULL;
pl_inode_t *pl_inode = NULL;
@@ -191,8 +186,8 @@ truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (priv->mandatory
&& pl_inode->mandatory
- && !truncate_allowed (pl_inode, frame->root->trans,
- frame->root->pid, frame->root->lk_owner,
+ && !truncate_allowed (pl_inode, frame->root->client,
+ frame->root->pid, &frame->root->lk_owner,
local->offset)) {
op_ret = -1;
op_errno = EAGAIN;
@@ -203,12 +198,12 @@ truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
case TRUNCATE:
STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD (this),
FIRST_CHILD (this)->fops->truncate,
- &local->loc, local->offset);
+ &local->loc, local->offset, local->xdata);
break;
case FTRUNCATE:
STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD (this),
FIRST_CHILD (this)->fops->ftruncate,
- local->fd, local->offset);
+ local->fd, local->offset, local->xdata);
break;
}
@@ -219,37 +214,42 @@ unwind:
"error: %s", op_ret, strerror (op_errno));
if (local->op == TRUNCATE)
loc_wipe (&local->loc);
+ if (local->xdata)
+ dict_unref (local->xdata);
+ if (local->fd)
+ fd_unref (local->fd);
- STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, buf, NULL);
+ STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, buf, NULL, xdata);
return 0;
}
int
pl_truncate (call_frame_t *frame, xlator_t *this,
- loc_t *loc, off_t offset)
+ loc_t *loc, off_t offset, dict_t *xdata)
{
- struct _truncate_ops *local = NULL;
+ pl_local_t *local = NULL;
- local = GF_CALLOC (1, sizeof (struct _truncate_ops),
- gf_locks_mt_truncate_ops);
+ local = mem_get0 (this->local_pool);
GF_VALIDATE_OR_GOTO (this->name, local, unwind);
local->op = TRUNCATE;
local->offset = offset;
loc_copy (&local->loc, loc);
+ if (xdata)
+ local->xdata = dict_ref (xdata);
frame->local = local;
STACK_WIND (frame, truncate_stat_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->stat, loc);
+ FIRST_CHILD (this)->fops->stat, loc, NULL);
return 0;
unwind:
gf_log (this->name, GF_LOG_ERROR, "truncate for %s failed with ret: %d, "
"error: %s", loc->path, -1, strerror (ENOMEM));
- STACK_UNWIND_STRICT (truncate, frame, -1, ENOMEM, NULL, NULL);
+ STACK_UNWIND_STRICT (truncate, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
@@ -257,32 +257,54 @@ unwind:
int
pl_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset)
+ fd_t *fd, off_t offset, dict_t *xdata)
{
- struct _truncate_ops *local = NULL;
+ pl_local_t *local = NULL;
- local = GF_CALLOC (1, sizeof (struct _truncate_ops),
- gf_locks_mt_truncate_ops);
+ local = mem_get0 (this->local_pool);
GF_VALIDATE_OR_GOTO (this->name, local, unwind);
local->op = FTRUNCATE;
local->offset = offset;
- local->fd = fd;
+ local->fd = fd_ref (fd);
+ if (xdata)
+ local->xdata = dict_ref (xdata);
frame->local = local;
STACK_WIND (frame, truncate_stat_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat, fd);
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
return 0;
unwind:
gf_log (this->name, GF_LOG_ERROR, "ftruncate failed with ret: %d, "
"error: %s", -1, strerror (ENOMEM));
- STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOMEM, NULL, NULL);
+ STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
+int
+pl_locks_by_fd (pl_inode_t *pl_inode, fd_t *fd)
+{
+ posix_lock_t *l = NULL;
+ int found = 0;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+
+ list_for_each_entry (l, &pl_inode->ext_list, list) {
+ if ((l->fd_num == fd_to_fdnum(fd))) {
+ found = 1;
+ break;
+ }
+ }
+
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+ return found;
+}
+
static void
delete_locks_of_fd (xlator_t *this, pl_inode_t *pl_inode, fd_t *fd)
{
@@ -312,7 +334,8 @@ delete_locks_of_fd (xlator_t *this, pl_inode_t *pl_inode, fd_t *fd)
list_for_each_entry_safe (l, tmp, &blocked_list, list) {
list_del_init(&l->list);
- STACK_UNWIND_STRICT (lk, l->frame, -1, EAGAIN, &l->user_flock);
+ STACK_UNWIND_STRICT (lk, l->frame, -1, EAGAIN, &l->user_flock,
+ NULL);
__destroy_lock (l);
}
@@ -324,7 +347,7 @@ delete_locks_of_fd (xlator_t *this, pl_inode_t *pl_inode, fd_t *fd)
static void
__delete_locks_of_owner (pl_inode_t *pl_inode,
- void *transport, uint64_t owner)
+ client_t *client, gf_lkowner_t *owner)
{
posix_lock_t *tmp = NULL;
posix_lock_t *l = NULL;
@@ -332,14 +355,16 @@ __delete_locks_of_owner (pl_inode_t *pl_inode,
/* TODO: what if it is a blocked lock with pending l->frame */
list_for_each_entry_safe (l, tmp, &pl_inode->ext_list, list) {
- if ((l->transport == transport)
- && (l->owner == owner)) {
+ if (l->blocked)
+ continue;
+ if ((l->client == client) &&
+ is_same_lkowner (&l->owner, owner)) {
gf_log ("posix-locks", GF_LOG_TRACE,
" Flushing lock"
- "%s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" state: %s",
+ "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" state: %s",
l->fl_type == F_UNLCK ? "Unlock" : "Lock",
l->client_pid,
- l->owner,
+ lkowner_utoa (&l->owner),
l->user_flock.l_start,
l->user_flock.l_len,
l->blocked == 1 ? "Blocked" : "Active");
@@ -352,13 +377,539 @@ __delete_locks_of_owner (pl_inode_t *pl_inode,
return;
}
+
+int32_t
+pl_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+
+}
+
+int32_t
+pl_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ int32_t op_errno = EINVAL;
+ int op_ret = -1;
+ int32_t bcount = 0;
+ int32_t gcount = 0;
+ char key[PATH_MAX] = {0, };
+ char *lk_summary = NULL;
+ pl_inode_t *pl_inode = NULL;
+ dict_t *dict = NULL;
+ clrlk_args args = {0,};
+ char *brickname = NULL;
+
+ if (!name)
+ goto usual;
+
+ if (strncmp (name, GF_XATTR_CLRLK_CMD, strlen (GF_XATTR_CLRLK_CMD)))
+ goto usual;
+
+ if (clrlk_parse_args (name, &args)) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ dict = dict_new ();
+ if (!dict) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ pl_inode = pl_inode_get (this, loc->inode);
+ if (!pl_inode) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ switch (args.type) {
+ case CLRLK_INODE:
+ case CLRLK_ENTRY:
+ op_ret = clrlk_clear_lks_in_all_domains (this, pl_inode,
+ &args, &bcount,
+ &gcount,
+ &op_errno);
+ if (op_ret)
+ goto out;
+ break;
+ case CLRLK_POSIX:
+ op_ret = clrlk_clear_posixlk (this, pl_inode, &args,
+ &bcount, &gcount,
+ &op_errno);
+ if (op_ret)
+ goto out;
+ break;
+ case CLRLK_TYPE_MAX:
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ op_ret = fetch_pathinfo (this, loc->inode, &op_errno, &brickname);
+ if (op_ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Couldn't get brickname");
+ } else {
+ op_ret = format_brickname(brickname);
+ if (op_ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Couldn't format brickname");
+ GF_FREE(brickname);
+ brickname = NULL;
+ }
+ }
+
+ if (!gcount && !bcount) {
+ if (gf_asprintf (&lk_summary, "No locks cleared.") == -1) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+ } else if (gf_asprintf (&lk_summary, "%s: %s blocked locks=%d "
+ "granted locks=%d",
+ (brickname == NULL)? this->name : brickname,
+ (args.type == CLRLK_INODE)? "inode":
+ (args.type == CLRLK_ENTRY)? "entry":
+ (args.type == CLRLK_POSIX)? "posix": " ",
+ bcount, gcount) == -1) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ strncpy (key, name, strlen (name));
+ if (dict_set_dynstr (dict, key, lk_summary)) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ op_ret = 0;
+out:
+ GF_FREE(brickname);
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata);
+
+ GF_FREE (args.opts);
+ if (op_ret && lk_summary)
+ GF_FREE (lk_summary);
+ if (dict)
+ dict_unref (dict);
+ return 0;
+
+usual:
+ STACK_WIND (frame, pl_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
+ return 0;
+}
+
+static int
+format_brickname(char *brickname)
+{
+ int ret = -1;
+ char *hostname = NULL;
+ char *volume = NULL;
+ char *saveptr = NULL;
+
+ if (!brickname)
+ goto out;
+
+ strtok_r(brickname, ":", &saveptr);
+ hostname = gf_strdup(strtok_r(NULL, ":", &saveptr));
+ if (hostname == NULL)
+ goto out;
+ volume = gf_strdup(strtok_r(NULL, ".", &saveptr));
+ if (volume == NULL)
+ goto out;
+
+ sprintf(brickname, "%s:%s", hostname, volume);
+
+ ret = 0;
+out:
+ GF_FREE(hostname);
+ GF_FREE(volume);
+ return ret;
+}
+
+static int
+fetch_pathinfo (xlator_t *this, inode_t *inode, int32_t *op_errno,
+ char **brickname)
+{
+ int ret = -1;
+ loc_t loc = {0, };
+ dict_t *dict = NULL;
+
+ if (!brickname)
+ goto out;
+
+ if (!op_errno)
+ goto out;
+
+ uuid_copy (loc.gfid, inode->gfid);
+ loc.inode = inode_ref (inode);
+
+ ret = syncop_getxattr (FIRST_CHILD(this), &loc, &dict,
+ GF_XATTR_PATHINFO_KEY);
+ if (ret < 0) {
+ *op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_str (dict, GF_XATTR_PATHINFO_KEY, brickname);
+ if (ret)
+ goto out;
+
+ *brickname = gf_strdup(*brickname);
+ if (*brickname == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (dict != NULL) {
+ dict_unref (dict);
+ }
+ loc_wipe(&loc);
+
+ return ret;
+}
+
+
+int
+pl_lockinfo_get_brickname (xlator_t *this, inode_t *inode, int32_t *op_errno)
+{
+ int ret = -1;
+ posix_locks_private_t *priv = NULL;
+ char *brickname = NULL;
+ char *end = NULL;
+ char *tmp = NULL;
+
+ priv = this->private;
+
+ ret = fetch_pathinfo (this, inode, op_errno, &brickname);
+ if (ret)
+ goto out;
+
+ end = strrchr (brickname, ':');
+ if (!end) {
+ GF_FREE(brickname);
+ ret = -1;
+ goto out;
+ }
+
+ tmp = brickname;
+ brickname = gf_strndup (brickname, (end - brickname));
+ if (brickname == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ priv->brickname = brickname;
+ ret = 0;
+out:
+ GF_FREE(tmp);
+ return ret;
+}
+
+char *
+pl_lockinfo_key (xlator_t *this, inode_t *inode, int32_t *op_errno)
+{
+ posix_locks_private_t *priv = NULL;
+ char *key = NULL;
+ int ret = 0;
+
+ priv = this->private;
+
+ if (priv->brickname == NULL) {
+ ret = pl_lockinfo_get_brickname (this, inode, op_errno);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "cannot get brickname");
+ goto out;
+ }
+ }
+
+ key = priv->brickname;
+out:
+ return key;
+}
+
+int32_t
+pl_fgetxattr_handle_lockinfo (xlator_t *this, fd_t *fd,
+ dict_t *dict, int32_t *op_errno)
+{
+ pl_inode_t *pl_inode = NULL;
+ char *key = NULL, *buf = NULL;
+ int32_t op_ret = 0;
+ unsigned long fdnum = 0, len = 0;
+ dict_t *tmp = NULL;
+
+ pl_inode = pl_inode_get (this, fd->inode);
+
+ if (!pl_inode) {
+ gf_log (this->name, GF_LOG_DEBUG, "Could not get inode.");
+ *op_errno = EBADFD;
+ op_ret = -1;
+ goto out;
+ }
+
+ if (!pl_locks_by_fd (pl_inode, fd)) {
+ op_ret = 0;
+ goto out;
+ }
+
+ fdnum = fd_to_fdnum (fd);
+
+ key = pl_lockinfo_key (this, fd->inode, op_errno);
+ if (key == NULL) {
+ op_ret = -1;
+ goto out;
+ }
+
+ tmp = dict_new ();
+ if (tmp == NULL) {
+ op_ret = -1;
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ op_ret = dict_set_uint64 (tmp, key, fdnum);
+ if (op_ret < 0) {
+ *op_errno = -op_ret;
+ op_ret = -1;
+ gf_log (this->name, GF_LOG_WARNING, "setting lockinfo value "
+ "(%lu) for fd (ptr:%p inode-gfid:%s) failed (%s)",
+ fdnum, fd, uuid_utoa (fd->inode->gfid),
+ strerror (*op_errno));
+ goto out;
+ }
+
+ len = dict_serialized_length (tmp);
+ if (len < 0) {
+ *op_errno = -op_ret;
+ op_ret = -1;
+ gf_log (this->name, GF_LOG_WARNING,
+ "dict_serialized_length failed (%s) while handling "
+ "lockinfo for fd (ptr:%p inode-gfid:%s)",
+ strerror (*op_errno), fd, uuid_utoa (fd->inode->gfid));
+ goto out;
+ }
+
+ buf = GF_CALLOC (1, len, gf_common_mt_char);
+ if (buf == NULL) {
+ op_ret = -1;
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ op_ret = dict_serialize (tmp, buf);
+ if (op_ret < 0) {
+ *op_errno = -op_ret;
+ op_ret = -1;
+ gf_log (this->name, GF_LOG_WARNING,
+ "dict_serialize failed (%s) while handling lockinfo "
+ "for fd (ptr: %p inode-gfid:%s)", strerror (*op_errno),
+ fd, uuid_utoa (fd->inode->gfid));
+ goto out;
+ }
+
+ op_ret = dict_set_dynptr (dict, GF_XATTR_LOCKINFO_KEY, buf, len);
+ if (op_ret < 0) {
+ *op_errno = -op_ret;
+ op_ret = -1;
+ gf_log (this->name, GF_LOG_WARNING, "setting lockinfo value "
+ "(%lu) for fd (ptr:%p inode-gfid:%s) failed (%s)",
+ fdnum, fd, uuid_utoa (fd->inode->gfid),
+ strerror (*op_errno));
+ goto out;
+ }
+
+ buf = NULL;
+out:
+ if (tmp != NULL) {
+ dict_unref (tmp);
+ }
+
+ if (buf != NULL) {
+ GF_FREE (buf);
+ }
+
+ return op_ret;
+}
+
+
+int32_t
+pl_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ int32_t op_ret = 0, op_errno = 0;
+ dict_t *dict = NULL;
+
+ if (!name) {
+ goto usual;
+ }
+
+ if (strcmp (name, GF_XATTR_LOCKINFO_KEY) == 0) {
+ dict = dict_new ();
+ if (dict == NULL) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ op_ret = pl_fgetxattr_handle_lockinfo (this, fd, dict,
+ &op_errno);
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "getting lockinfo on fd (ptr:%p inode-gfid:%s) "
+ "failed (%s)", fd, uuid_utoa (fd->inode->gfid),
+ strerror (op_errno));
+ }
+
+ goto unwind;
+ } else {
+ goto usual;
+ }
+
+unwind:
+ STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, NULL);
+ if (dict != NULL) {
+ dict_unref (dict);
+ }
+
+ return 0;
+
+usual:
+ STACK_WIND (frame, default_fgetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
+ return 0;
+}
+
+int32_t
+pl_migrate_locks (call_frame_t *frame, fd_t *newfd, uint64_t oldfd_num,
+ int32_t *op_errno)
+{
+ pl_inode_t *pl_inode = NULL;
+ uint64_t newfd_num = 0;
+ posix_lock_t *l = NULL;
+ int32_t op_ret = 0;
+
+ newfd_num = fd_to_fdnum (newfd);
+
+ pl_inode = pl_inode_get (frame->this, newfd->inode);
+ if (pl_inode == NULL) {
+ op_ret = -1;
+ *op_errno = EBADFD;
+ goto out;
+ }
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry (l, &pl_inode->ext_list, list) {
+ if (l->fd_num == oldfd_num) {
+ l->fd_num = newfd_num;
+ l->client = frame->root->client;
+ }
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ op_ret = 0;
+out:
+ return op_ret;
+}
+
+int32_t
+pl_fsetxattr_handle_lockinfo (call_frame_t *frame, fd_t *fd, char *lockinfo_buf,
+ int len, int32_t *op_errno)
+{
+ int32_t op_ret = -1;
+ dict_t *lockinfo = NULL;
+ uint64_t oldfd_num = 0;
+ char *key = NULL;
+
+ lockinfo = dict_new ();
+ if (lockinfo == NULL) {
+ op_ret = -1;
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ op_ret = dict_unserialize (lockinfo_buf, len, &lockinfo);
+ if (op_ret < 0) {
+ *op_errno = -op_ret;
+ op_ret = -1;
+ goto out;
+ }
+
+ key = pl_lockinfo_key (frame->this, fd->inode, op_errno);
+ if (key == NULL) {
+ op_ret = -1;
+ goto out;
+ }
+
+ op_ret = dict_get_uint64 (lockinfo, key, &oldfd_num);
+
+ if (oldfd_num == 0) {
+ op_ret = 0;
+ goto out;
+ }
+
+ op_ret = pl_migrate_locks (frame, fd, oldfd_num, op_errno);
+ if (op_ret < 0) {
+ gf_log (frame->this->name, GF_LOG_WARNING,
+ "migration of locks from oldfd (ptr:%p) to newfd "
+ "(ptr:%p) (inode-gfid:%s)", (void *)oldfd_num, fd,
+ uuid_utoa (fd->inode->gfid));
+ goto out;
+ }
+
+out:
+ dict_unref (lockinfo);
+
+ return op_ret;
+}
+
+int32_t
+pl_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ int32_t op_ret = 0, op_errno = 0;
+ void *lockinfo_buf = NULL;
+ int len = 0;
+
+ op_ret = dict_get_ptr_and_len (dict, GF_XATTR_LOCKINFO_KEY,
+ &lockinfo_buf, &len);
+ if (lockinfo_buf == NULL) {
+ goto usual;
+ }
+
+ op_ret = pl_fsetxattr_handle_lockinfo (frame, fd, lockinfo_buf, len,
+ &op_errno);
+ if (op_ret < 0) {
+ goto unwind;
+ }
+
+usual:
+ STACK_WIND (frame, default_fsetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, NULL);
+ return 0;
+}
+
int32_t
pl_opendir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd, dict_t *xdata)
{
pl_fdctx_t *fdctx = NULL;
@@ -377,28 +928,28 @@ unwind:
frame,
op_ret,
op_errno,
- fd);
+ fd, xdata);
return 0;
}
int32_t
pl_opendir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, fd_t *fd)
+ loc_t *loc, fd_t *fd, dict_t *xdata)
{
STACK_WIND (frame,
pl_opendir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->opendir,
- loc, fd);
+ loc, fd, xdata);
return 0;
}
int
pl_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, xdata);
return 0;
}
@@ -406,24 +957,21 @@ pl_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
pl_flush (call_frame_t *frame, xlator_t *this,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
- pl_inode_t *pl_inode = NULL;
- uint64_t owner = -1;
-
- owner = frame->root->lk_owner;
+ pl_inode_t *pl_inode = NULL;
pl_inode = pl_inode_get (this, fd->inode);
if (!pl_inode) {
gf_log (this->name, GF_LOG_DEBUG, "Could not get inode.");
- STACK_UNWIND_STRICT (flush, frame, -1, EBADFD);
+ STACK_UNWIND_STRICT (flush, frame, -1, EBADFD, NULL);
return 0;
}
pl_trace_flush (this, frame, fd);
- if (owner == 0) {
+ if (frame->root->lk_owner.len == 0) {
/* Handle special case when protocol/server sets lk-owner to zero.
* This usually happens due to a client disconnection. Hence, free
* all locks opened with this fd.
@@ -436,8 +984,8 @@ pl_flush (call_frame_t *frame, xlator_t *this,
}
pthread_mutex_lock (&pl_inode->mutex);
{
- __delete_locks_of_owner (pl_inode, frame->root->trans,
- owner);
+ __delete_locks_of_owner (pl_inode, frame->root->client,
+ &frame->root->lk_owner);
}
pthread_mutex_unlock (&pl_inode->mutex);
@@ -447,14 +995,14 @@ pl_flush (call_frame_t *frame, xlator_t *this,
wind:
STACK_WIND (frame, pl_flush_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush, fd);
+ FIRST_CHILD(this)->fops->flush, fd, xdata);
return 0;
}
int
pl_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
pl_fdctx_t *fdctx = NULL;
@@ -469,7 +1017,7 @@ pl_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
unwind:
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd);
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
return 0;
}
@@ -477,12 +1025,11 @@ unwind:
int
pl_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, int32_t wbflags)
+ fd_t *fd, dict_t *xdata)
{
- /* why isn't O_TRUNC being handled ? */
STACK_WIND (frame, pl_open_cbk,
FIRST_CHILD(this), FIRST_CHILD(this)->fops->open,
- loc, flags & ~O_TRUNC, fd, wbflags);
+ loc, flags, fd, xdata);
return 0;
}
@@ -492,7 +1039,7 @@ int
pl_create_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
fd_t *fd, inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
pl_fdctx_t *fdctx = NULL;
@@ -508,7 +1055,7 @@ pl_create_cbk (call_frame_t *frame, void *cookie,
unwind:
STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent);
+ preparent, postparent, xdata);
return 0;
}
@@ -516,11 +1063,12 @@ unwind:
int
pl_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode, fd_t *fd, dict_t *params)
+ loc_t *loc, int32_t flags, mode_t mode, mode_t umask, fd_t *fd,
+ dict_t *xdata)
{
STACK_WIND (frame, pl_create_cbk,
FIRST_CHILD (this), FIRST_CHILD (this)->fops->create,
- loc, flags, mode, fd, params);
+ loc, flags, mode, umask, fd, xdata);
return 0;
}
@@ -529,10 +1077,10 @@ int
pl_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
struct iovec *vector, int32_t count, struct iatt *stbuf,
- struct iobref *iobref)
+ struct iobref *iobref, dict_t *xdata)
{
STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno,
- vector, count, stbuf, iobref);
+ vector, count, stbuf, iobref, xdata);
return 0;
}
@@ -540,9 +1088,10 @@ pl_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
pl_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf);
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
return 0;
}
@@ -600,12 +1149,12 @@ __rw_allowable (pl_inode_t *pl_inode, posix_lock_t *region,
int
-pl_readv_cont (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset)
+pl_readv_cont (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
{
STACK_WIND (frame, pl_readv_cbk,
FIRST_CHILD (this), FIRST_CHILD (this)->fops->readv,
- fd, size, offset);
+ fd, size, offset, flags, xdata);
return 0;
}
@@ -613,7 +1162,7 @@ pl_readv_cont (call_frame_t *frame, xlator_t *this,
int
pl_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset)
+ fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata)
{
posix_locks_private_t *priv = NULL;
pl_inode_t *pl_inode = NULL;
@@ -630,7 +1179,7 @@ pl_readv (call_frame_t *frame, xlator_t *this,
if (priv->mandatory && pl_inode->mandatory) {
region.fl_start = offset;
region.fl_end = offset + size - 1;
- region.transport = frame->root->trans;
+ region.client = frame->root->client;
region.fd_num = fd_to_fdnum(fd);
region.client_pid = frame->root->pid;
region.owner = frame->root->lk_owner;
@@ -660,7 +1209,8 @@ pl_readv (call_frame_t *frame, xlator_t *this,
}
rw->stub = fop_readv_stub (frame, pl_readv_cont,
- fd, size, offset);
+ fd, size, offset, flags,
+ xdata);
if (!rw->stub) {
op_errno = ENOMEM;
op_ret = -1;
@@ -680,12 +1230,12 @@ pl_readv (call_frame_t *frame, xlator_t *this,
if (wind_needed) {
STACK_WIND (frame, pl_readv_cbk,
FIRST_CHILD (this), FIRST_CHILD (this)->fops->readv,
- fd, size, offset);
+ fd, size, offset, flags, xdata);
}
if (op_ret == -1)
STACK_UNWIND_STRICT (readv, frame, -1, op_errno,
- NULL, 0, NULL, NULL);
+ NULL, 0, NULL, NULL, NULL);
return 0;
}
@@ -694,11 +1244,11 @@ pl_readv (call_frame_t *frame, xlator_t *this,
int
pl_writev_cont (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iovec *vector, int count, off_t offset,
- struct iobref *iobref)
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
STACK_WIND (frame, pl_writev_cbk,
FIRST_CHILD (this), FIRST_CHILD (this)->fops->writev,
- fd, vector, count, offset, iobref);
+ fd, vector, count, offset, flags, iobref, xdata);
return 0;
}
@@ -707,7 +1257,7 @@ pl_writev_cont (call_frame_t *frame, xlator_t *this, fd_t *fd,
int
pl_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iovec *vector, int32_t count, off_t offset,
- struct iobref *iobref)
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
posix_locks_private_t *priv = NULL;
pl_inode_t *pl_inode = NULL;
@@ -717,14 +1267,13 @@ pl_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
int op_errno = 0;
char wind_needed = 1;
-
priv = this->private;
pl_inode = pl_inode_get (this, fd->inode);
if (priv->mandatory && pl_inode->mandatory) {
region.fl_start = offset;
region.fl_end = offset + iov_length (vector, count) - 1;
- region.transport = frame->root->trans;
+ region.client = frame->root->client;
region.fd_num = fd_to_fdnum(fd);
region.client_pid = frame->root->pid;
region.owner = frame->root->lk_owner;
@@ -755,7 +1304,7 @@ pl_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
rw->stub = fop_writev_stub (frame, pl_writev_cont,
fd, vector, count, offset,
- iobref);
+ flags, iobref, xdata);
if (!rw->stub) {
op_errno = ENOMEM;
op_ret = -1;
@@ -775,10 +1324,11 @@ pl_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
if (wind_needed)
STACK_WIND (frame, pl_writev_cbk,
FIRST_CHILD (this), FIRST_CHILD (this)->fops->writev,
- fd, vector, count, offset, iobref);
+ fd, vector, count, offset, flags, iobref, xdata);
if (op_ret == -1)
- STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL);
+ STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL,
+ NULL);
return 0;
}
@@ -804,8 +1354,8 @@ lock_dup (posix_lock_t *lock)
{
posix_lock_t *new_lock = NULL;
- new_lock = new_posix_lock (&lock->user_flock, lock->transport,
- lock->client_pid, lock->owner,
+ new_lock = new_posix_lock (&lock->user_flock, lock->client,
+ lock->client_pid, &lock->owner,
(fd_t *)lock->fd_num);
return new_lock;
}
@@ -962,22 +1512,15 @@ unlock:
int
pl_lk (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t cmd, struct gf_flock *flock)
+ fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
- void *transport = NULL;
- pid_t client_pid = 0;
- uint64_t owner = 0;
- pl_inode_t *pl_inode = NULL;
- int op_ret = 0;
- int op_errno = 0;
- int can_block = 0;
- posix_lock_t *reqlock = NULL;
- posix_lock_t *conf = NULL;
- int ret = 0;
-
- transport = frame->root->trans;
- client_pid = frame->root->pid;
- owner = frame->root->lk_owner;
+ pl_inode_t *pl_inode = NULL;
+ int op_ret = 0;
+ int op_errno = 0;
+ int can_block = 0;
+ posix_lock_t *reqlock = NULL;
+ posix_lock_t *conf = NULL;
+ int ret = 0;
if ((flock->l_start < 0) || (flock->l_len < 0)) {
op_ret = -1;
@@ -992,8 +1535,8 @@ pl_lk (call_frame_t *frame, xlator_t *this,
goto unwind;
}
- reqlock = new_posix_lock (flock, transport, client_pid,
- owner, fd);
+ reqlock = new_posix_lock (flock, frame->root->client, frame->root->pid,
+ &frame->root->lk_owner, fd);
if (!reqlock) {
op_ret = -1;
@@ -1110,13 +1653,22 @@ pl_lk (call_frame_t *frame, xlator_t *this,
op_ret = -1;
op_errno = EAGAIN;
__destroy_lock (reqlock);
+
+ } else if ((0 == ret) && (F_UNLCK == flock->l_type)) {
+ /* For NLM's last "unlock on fd" detection */
+ if (pl_locks_by_fd (pl_inode, fd))
+ flock->l_type = F_RDLCK;
+ else
+ flock->l_type = F_UNLCK;
}
}
unwind:
pl_trace_out (this, frame, fd, NULL, cmd, flock, op_ret, op_errno, NULL);
pl_update_refkeeper (this, fd->inode);
- STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, flock);
+
+
+ STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, flock, xdata);
out:
return 0;
}
@@ -1193,7 +1745,7 @@ pl_forget (xlator_t *this,
list_for_each_entry_safe (ino_l, ino_tmp, &dom->inodelk_list, list) {
__delete_inode_lock (ino_l);
- __destroy_inode_lock (ino_l);
+ __pl_inodelk_unref (ino_l);
}
list_splice_init (&dom->blocked_inodelks, &inodelks_released);
@@ -1207,8 +1759,8 @@ pl_forget (xlator_t *this,
list_for_each_entry_safe (entry_l, entry_tmp, &dom->entrylk_list, domain_list) {
list_del_init (&entry_l->domain_list);
- if (entry_l->basename)
- GF_FREE ((char *)entry_l->basename);
+ GF_FREE ((char *)entry_l->basename);
+ GF_FREE (entry_l->connection_id);
GF_FREE (entry_l);
}
@@ -1227,21 +1779,22 @@ pl_forget (xlator_t *this,
list_for_each_entry_safe (ext_l, ext_tmp, &posixlks_released, list) {
- STACK_UNWIND_STRICT (lk, ext_l->frame, -1, 0, &ext_l->user_flock);
+ STACK_UNWIND_STRICT (lk, ext_l->frame, -1, 0,
+ &ext_l->user_flock, NULL);
__destroy_lock (ext_l);
}
list_for_each_entry_safe (ino_l, ino_tmp, &inodelks_released, blocked_locks) {
- STACK_UNWIND_STRICT (inodelk, ino_l->frame, -1, 0);
- __destroy_inode_lock (ino_l);
+ STACK_UNWIND_STRICT (inodelk, ino_l->frame, -1, 0, NULL);
+ __pl_inodelk_unref (ino_l);
}
list_for_each_entry_safe (entry_l, entry_tmp, &entrylks_released, blocked_locks) {
- STACK_UNWIND_STRICT (entrylk, entry_l->frame, -1, 0);
- if (entry_l->basename)
- GF_FREE ((char *)entry_l->basename);
+ STACK_UNWIND_STRICT (entrylk, entry_l->frame, -1, 0, NULL);
+ GF_FREE ((char *)entry_l->basename);
+ GF_FREE (entry_l->connection_id);
GF_FREE (entry_l);
}
@@ -1317,7 +1870,7 @@ out:
return ret;
}
-static int32_t
+int32_t
__get_posixlk_count (xlator_t *this, pl_inode_t *pl_inode)
{
posix_lock_t *lock = NULL;
@@ -1325,16 +1878,6 @@ __get_posixlk_count (xlator_t *this, pl_inode_t *pl_inode)
list_for_each_entry (lock, &pl_inode->ext_list, list) {
- gf_log (this->name, GF_LOG_DEBUG,
- " XATTR DEBUG"
- "%s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" state: %s",
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid,
- lock->owner,
- lock->user_flock.l_start,
- lock->user_flock.l_len,
- lock->blocked == 1 ? "Blocked" : "Active");
-
count++;
}
@@ -1367,6 +1910,24 @@ out:
}
void
+pl_parent_entrylk_xattr_fill (xlator_t *this, inode_t *parent,
+ char *basename, dict_t *dict)
+{
+ uint32_t entrylk = 0;
+ int ret = -1;
+
+ if (!parent || !basename || !strlen (basename))
+ goto out;
+ entrylk = check_entrylk_on_basename (this, parent, basename);
+out:
+ ret = dict_set_uint32 (dict, GLUSTERFS_PARENT_ENTRYLK, entrylk);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ " dict_set failed on key %s", GLUSTERFS_PARENT_ENTRYLK);
+ }
+}
+
+void
pl_entrylk_xattr_fill (xlator_t *this, inode_t *inode,
dict_t *dict)
{
@@ -1383,19 +1944,34 @@ pl_entrylk_xattr_fill (xlator_t *this, inode_t *inode,
}
void
-pl_inodelk_xattr_fill (xlator_t *this, inode_t *inode,
- dict_t *dict)
+pl_inodelk_xattr_fill (xlator_t *this, inode_t *inode, dict_t *dict,
+ gf_boolean_t per_dom)
{
int32_t count = 0;
int ret = -1;
+ char *domname = NULL;
+
+
+ if (per_dom){
+ ret = dict_get_str (dict, GLUSTERFS_INODELK_DOM_COUNT,
+ &domname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get "
+ "value for key %s",GLUSTERFS_INODELK_DOM_COUNT);
+ goto out;
+ }
+ }
+
+ count = get_inodelk_count (this, inode, domname);
- count = get_inodelk_count (this, inode);
ret = dict_set_int32 (dict, GLUSTERFS_INODELK_COUNT, count);
if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- " dict_set failed on key %s", GLUSTERFS_INODELK_COUNT);
+ gf_log (this->name, GF_LOG_DEBUG, "Failed to set count for "
+ "key %s", GLUSTERFS_INODELK_COUNT);
}
+out:
+ return;
}
void
@@ -1422,50 +1998,57 @@ pl_lookup_cbk (call_frame_t *frame,
int32_t op_errno,
inode_t *inode,
struct iatt *buf,
- dict_t *dict,
+ dict_t *xdata,
struct iatt *postparent)
{
pl_local_t *local = NULL;
GF_VALIDATE_OR_GOTO (this->name, frame->local, out);
- if (op_ret) {
+ if (op_ret)
goto out;
- }
local = frame->local;
+ if (local->parent_entrylk_req)
+ pl_parent_entrylk_xattr_fill (this, local->loc.parent,
+ (char*)local->loc.name, xdata);
if (local->entrylk_count_req)
- pl_entrylk_xattr_fill (this, inode, dict);
+ pl_entrylk_xattr_fill (this, inode, xdata);
if (local->inodelk_count_req)
- pl_inodelk_xattr_fill (this, inode, dict);
+ pl_inodelk_xattr_fill (this, inode, xdata, _gf_false);
+ if (local->inodelk_dom_count_req)
+ pl_inodelk_xattr_fill (this, inode, xdata, _gf_true);
if (local->posixlk_count_req)
- pl_posixlk_xattr_fill (this, inode, dict);
+ pl_posixlk_xattr_fill (this, inode, xdata);
+out:
+ local = frame->local;
frame->local = NULL;
- if (local != NULL)
- GF_FREE (local);
+ if (local != NULL) {
+ loc_wipe (&local->loc);
+ mem_put (local);
+ }
-out:
STACK_UNWIND_STRICT (
lookup,
frame,
- op_ret,
- op_errno,
- inode,
- buf,
- dict,
- postparent);
+ op_ret,
+ op_errno,
+ inode,
+ buf,
+ xdata,
+ postparent);
return 0;
}
int32_t
pl_lookup (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *xattr_req)
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *xdata)
{
pl_local_t *local = NULL;
int ret = -1;
@@ -1474,40 +2057,114 @@ pl_lookup (call_frame_t *frame,
VALIDATE_OR_GOTO (this, out);
VALIDATE_OR_GOTO (loc, out);
- local = GF_CALLOC (1, sizeof (*local), gf_locks_mt_pl_local_t);
+ local = mem_get0 (this->local_pool);
GF_VALIDATE_OR_GOTO (this->name, local, out);
- if (xattr_req) {
- if (dict_get (xattr_req, GLUSTERFS_ENTRYLK_COUNT))
+ if (xdata) {
+ if (dict_get (xdata, GLUSTERFS_ENTRYLK_COUNT))
local->entrylk_count_req = 1;
- if (dict_get (xattr_req, GLUSTERFS_INODELK_COUNT))
+ if (dict_get (xdata, GLUSTERFS_INODELK_COUNT))
local->inodelk_count_req = 1;
- if (dict_get (xattr_req, GLUSTERFS_POSIXLK_COUNT))
+ if (dict_get (xdata, GLUSTERFS_INODELK_DOM_COUNT))
+ local->inodelk_dom_count_req = 1;
+ if (dict_get (xdata, GLUSTERFS_POSIXLK_COUNT))
local->posixlk_count_req = 1;
+ if (dict_get (xdata, GLUSTERFS_PARENT_ENTRYLK))
+ local->parent_entrylk_req = 1;
}
frame->local = local;
+ loc_copy (&local->loc, loc);
STACK_WIND (frame,
pl_lookup_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lookup,
- loc,
- xattr_req);
+ loc, xdata);
ret = 0;
out:
if (ret == -1)
- STACK_UNWIND_STRICT (lookup, frame, -1, 0, NULL, NULL, NULL, NULL);
+ STACK_UNWIND_STRICT (lookup, frame, -1, 0, NULL,
+ NULL, NULL, NULL);
+
+ return 0;
+}
+int
+pl_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *entries, dict_t *xdata)
+{
+ pl_local_t *local = NULL;
+ gf_dirent_t *entry = NULL;
+
+ local = frame->local;
+
+ if (op_ret <= 0)
+ goto unwind;
+
+ list_for_each_entry (entry, &entries->list, list) {
+ if (local->entrylk_count_req)
+ pl_entrylk_xattr_fill (this, entry->inode, entry->dict);
+ if (local->inodelk_count_req)
+ pl_inodelk_xattr_fill (this, entry->inode, entry->dict,
+ _gf_false);
+ if (local->inodelk_dom_count_req)
+ pl_inodelk_xattr_fill (this, entry->inode, entry->dict,
+ _gf_true);
+ if (local->posixlk_count_req)
+ pl_posixlk_xattr_fill (this, entry->inode, entry->dict);
+ }
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata);
+
+ if (local)
+ mem_put (local);
return 0;
}
+int
+pl_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *dict)
+{
+ pl_local_t *local = NULL;
+
+ local = mem_get0 (this->local_pool);
+ GF_VALIDATE_OR_GOTO (this->name, local, out);
+
+ if (dict) {
+ if (dict_get (dict, GLUSTERFS_ENTRYLK_COUNT))
+ local->entrylk_count_req = 1;
+ if (dict_get (dict, GLUSTERFS_INODELK_COUNT))
+ local->inodelk_count_req = 1;
+ if (dict_get (dict, GLUSTERFS_INODELK_DOM_COUNT))
+ local->inodelk_dom_count_req = 1;
+ if (dict_get (dict, GLUSTERFS_POSIXLK_COUNT))
+ local->posixlk_count_req = 1;
+ }
+
+ frame->local = local;
+
+ STACK_WIND (frame, pl_readdirp_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp,
+ fd, size, offset, dict);
+
+ return 0;
+out:
+ STACK_UNWIND_STRICT (readdirp, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+}
+
+
void
pl_dump_lock (char *str, int size, struct gf_flock *flock,
- uint64_t owner, void *trans, time_t *granted_time,
- time_t *blkd_time, gf_boolean_t active)
+ gf_lkowner_t *owner, void *trans, char *conn_id,
+ time_t *granted_time, time_t *blkd_time, gf_boolean_t active)
{
- char *type_str = NULL;
+ char *type_str = NULL;
+ char granted[32] = {0,};
+ char blocked[32] = {0,};
switch (flock->l_type) {
case F_RDLCK:
@@ -1526,31 +2183,32 @@ pl_dump_lock (char *str, int size, struct gf_flock *flock,
if (active) {
if (blkd_time && *blkd_time == 0) {
- snprintf (str, size, "type=%s, start=%llu, len=%llu, pid=%llu, lk-owner=%llu, transport=%p, "
- "granted at %s",
- type_str, (unsigned long long) flock->l_start,
+ snprintf (str, size, RANGE_GRNTD_FMT,
+ type_str, flock->l_whence,
+ (unsigned long long) flock->l_start,
(unsigned long long) flock->l_len,
(unsigned long long) flock->l_pid,
- (unsigned long long) owner,
- trans, ctime (granted_time));
+ lkowner_utoa (owner), trans, conn_id,
+ ctime_r (granted_time, granted));
} else {
- snprintf (str, size, "type=%s, start=%llu, len=%llu, pid=%llu, lk-owner=%llu, transport=%p, "
- "blocked at %s, granted at %s",
- type_str, (unsigned long long) flock->l_start,
+ snprintf (str, size, RANGE_BLKD_GRNTD_FMT,
+ type_str, flock->l_whence,
+ (unsigned long long) flock->l_start,
(unsigned long long) flock->l_len,
(unsigned long long) flock->l_pid,
- (unsigned long long) owner,
- trans, ctime (blkd_time), ctime (granted_time));
+ lkowner_utoa (owner), trans, conn_id,
+ ctime_r (blkd_time, blocked),
+ ctime_r (granted_time, granted));
}
}
else {
- snprintf (str, size, "type=%s, start=%llu, len=%llu, pid=%llu, lk-owner=%llu, transport=%p, "
- "blocked at %s",
- type_str, (unsigned long long) flock->l_start,
+ snprintf (str, size, RANGE_BLKD_FMT,
+ type_str, flock->l_whence,
+ (unsigned long long) flock->l_start,
(unsigned long long) flock->l_len,
(unsigned long long) flock->l_pid,
- (unsigned long long) owner,
- trans, ctime (blkd_time));
+ lkowner_utoa (owner), trans, conn_id,
+ ctime_r (blkd_time, blocked));
}
}
@@ -1560,8 +2218,10 @@ __dump_entrylks (pl_inode_t *pl_inode)
{
pl_dom_list_t *dom = NULL;
pl_entry_lock_t *lock = NULL;
- int count = 0;
- char key[GF_DUMP_MAX_BUF_LEN];
+ char blocked[32] = {0,};
+ char granted[32] = {0,};
+ int count = 0;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0,};
char tmp[256];
@@ -1580,22 +2240,22 @@ __dump_entrylks (pl_inode_t *pl_inode)
"xlator.feature.locks.lock-dump.domain.entrylk",
"entrylk[%d](ACTIVE)", count );
if (lock->blkd_time.tv_sec == 0 && lock->blkd_time.tv_usec == 0) {
- snprintf (tmp, 256," %s on %s pid = %llu, owner=%llu, transport=%p,"
- " granted at %s",
+ snprintf (tmp, 256, ENTRY_GRNTD_FMT,
lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" :
"ENTRYLK_WRLCK", lock->basename,
(unsigned long long) lock->client_pid,
- (unsigned long long) lock->owner, lock->trans,
- ctime (&lock->granted_time.tv_sec));
+ lkowner_utoa (&lock->owner), lock->client,
+ lock->connection_id,
+ ctime_r (&lock->granted_time.tv_sec, granted));
} else {
- snprintf (tmp, 256," %s on %s pid = %llu, owner=%llu, transport=%p,"
- " blocked at %s, granted at %s",
+ snprintf (tmp, 256, ENTRY_BLKD_GRNTD_FMT,
lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" :
"ENTRYLK_WRLCK", lock->basename,
(unsigned long long) lock->client_pid,
- (unsigned long long) lock->owner, lock->trans,
- ctime (&lock->blkd_time.tv_sec),
- ctime (&lock->granted_time.tv_sec));
+ lkowner_utoa (&lock->owner), lock->client,
+ lock->connection_id,
+ ctime_r (&lock->blkd_time.tv_sec, blocked),
+ ctime_r (&lock->granted_time.tv_sec, granted));
}
gf_proc_dump_write(key, tmp);
@@ -1608,13 +2268,13 @@ __dump_entrylks (pl_inode_t *pl_inode)
gf_proc_dump_build_key(key,
"xlator.feature.locks.lock-dump.domain.entrylk",
"entrylk[%d](BLOCKED)", count );
- snprintf (tmp, 256," %s on %s pid = %llu, owner=%llu, transport=%p,"
- " blocked at %s",
+ snprintf (tmp, 256, ENTRY_BLKD_FMT,
lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" :
"ENTRYLK_WRLCK", lock->basename,
(unsigned long long) lock->client_pid,
- (unsigned long long) lock->owner, lock->trans,
- ctime (&lock->blkd_time.tv_sec));
+ lkowner_utoa (&lock->owner), lock->client,
+ lock->connection_id,
+ ctime_r (&lock->blkd_time.tv_sec, blocked));
gf_proc_dump_write(key, tmp);
@@ -1663,7 +2323,8 @@ __dump_inodelks (pl_inode_t *pl_inode)
SET_FLOCK_PID (&lock->user_flock, lock);
pl_dump_lock (tmp, 256, &lock->user_flock,
- lock->owner, lock->transport,
+ &lock->owner,
+ lock->client, lock->connection_id,
&lock->granted_time.tv_sec,
&lock->blkd_time.tv_sec,
_gf_true);
@@ -1679,7 +2340,8 @@ __dump_inodelks (pl_inode_t *pl_inode)
"inodelk[%d](BLOCKED)",count );
SET_FLOCK_PID (&lock->user_flock, lock);
pl_dump_lock (tmp, 256, &lock->user_flock,
- lock->owner, lock->transport,
+ &lock->owner,
+ lock->client, lock->connection_id,
0, &lock->blkd_time.tv_sec,
_gf_false);
gf_proc_dump_write(key, tmp);
@@ -1720,16 +2382,13 @@ __dump_posixlks (pl_inode_t *pl_inode)
count,
lock->blocked ? "BLOCKED" : "ACTIVE");
pl_dump_lock (tmp, 256, &lock->user_flock,
- lock->owner, lock->transport,
+ &lock->owner, lock->client, NULL,
&lock->granted_time.tv_sec, &lock->blkd_time.tv_sec,
(lock->blocked)? _gf_false: _gf_true);
gf_proc_dump_write(key, tmp);
count++;
}
-
-
-
}
void
@@ -1750,46 +2409,81 @@ pl_dump_inode_priv (xlator_t *this, inode_t *inode)
int ret = -1;
uint64_t tmp_pl_inode = 0;
pl_inode_t *pl_inode = NULL;
+ char *pathname = NULL;
+ gf_boolean_t section_added = _gf_false;
int count = 0;
- GF_VALIDATE_OR_GOTO (this->name, inode, out);
-
- ret = inode_ctx_get (inode, this, &tmp_pl_inode);
+ if (!inode) {
+ errno = EINVAL;
+ goto out;
+ }
- if (ret != 0)
+ ret = TRY_LOCK (&inode->lock);
+ if (ret)
+ goto out;
+ {
+ ret = __inode_ctx_get (inode, this, &tmp_pl_inode);
+ if (ret)
+ goto unlock;
+ }
+unlock:
+ UNLOCK (&inode->lock);
+ if (ret)
goto out;
pl_inode = (pl_inode_t *)(long)tmp_pl_inode;
-
if (!pl_inode) {
ret = -1;
goto out;
}
gf_proc_dump_add_section("xlator.features.locks.%s.inode", this->name);
+ section_added = _gf_true;
+
+ /*We are safe to call __inode_path since we have the
+ * inode->table->lock */
+ __inode_path (inode, NULL, &pathname);
+ if (pathname)
+ gf_proc_dump_write ("path", "%s", pathname);
gf_proc_dump_write("mandatory", "%d", pl_inode->mandatory);
- count = get_entrylk_count (this, inode);
- if (count) {
- gf_proc_dump_write("entrylk-count", "%d", count);
- dump_entrylks(pl_inode);
- }
+ ret = pthread_mutex_trylock (&pl_inode->mutex);
+ if (ret)
+ goto out;
+ {
+ count = __get_entrylk_count (this, pl_inode);
+ if (count) {
+ gf_proc_dump_write("entrylk-count", "%d", count);
+ __dump_entrylks (pl_inode);
+ }
- count = get_inodelk_count (this, inode);
- if (count) {
- gf_proc_dump_write("inodlk-count", "%d", count);
- dump_inodelks(pl_inode);
- }
+ count = __get_inodelk_count (this, pl_inode, NULL);
+ if (count) {
+ gf_proc_dump_write("inodelk-count", "%d", count);
+ __dump_inodelks (pl_inode);
+ }
- count = get_posixlk_count (this, inode);
- if (count) {
- gf_proc_dump_write("posixlk-count", "%d", count);
- dump_posixlks(pl_inode);
+ count = __get_posixlk_count (this, pl_inode);
+ if (count) {
+ gf_proc_dump_write("posixlk-count", "%d", count);
+ __dump_posixlks (pl_inode);
+ }
}
+ pthread_mutex_unlock (&pl_inode->mutex);
out:
+ GF_FREE (pathname);
+
+ if (ret && inode) {
+ if (!section_added)
+ gf_proc_dump_add_section ("xlator.features.locks.%s."
+ "inode", this->name);
+ gf_proc_dump_write ("Unable to print lock state", "(Lock "
+ "acquisition failure) %s",
+ uuid_utoa (inode->gfid));
+ }
return ret;
}
@@ -1812,6 +2506,79 @@ mem_acct_init (xlator_t *this)
return ret;
}
+
+pl_ctx_t*
+pl_ctx_get (client_t *client, xlator_t *xlator)
+{
+ void *tmp = NULL;
+ pl_ctx_t *ctx = NULL;
+
+ client_ctx_get (client, xlator, &tmp);
+
+ ctx = tmp;
+
+ if (ctx != NULL)
+ goto out;
+
+ ctx = GF_CALLOC (1, sizeof (pl_ctx_t), gf_locks_mt_posix_lock_t);
+
+ if (ctx == NULL)
+ goto out;
+
+ pthread_mutex_init (&ctx->lock, NULL);
+ INIT_LIST_HEAD (&ctx->inodelk_lockers);
+ INIT_LIST_HEAD (&ctx->entrylk_lockers);
+
+ if (client_ctx_set (client, xlator, ctx) != 0) {
+ pthread_mutex_destroy (&ctx->lock);
+ GF_FREE (ctx);
+ ctx = NULL;
+ }
+out:
+ return ctx;
+}
+
+
+static int
+pl_client_disconnect_cbk (xlator_t *this, client_t *client)
+{
+ pl_ctx_t *pl_ctx = NULL;
+
+ pl_ctx = pl_ctx_get (client, this);
+
+ pl_inodelk_client_cleanup (this, pl_ctx);
+
+ pl_entrylk_client_cleanup (this, pl_ctx);
+
+ return 0;
+}
+
+
+static int
+pl_client_destroy_cbk (xlator_t *this, client_t *client)
+{
+ void *tmp = NULL;
+ pl_ctx_t *pl_ctx = NULL;
+
+ pl_client_disconnect_cbk (this, client);
+
+ client_ctx_del (client, this, &tmp);
+
+ if (tmp == NULL)
+ return 0;
+
+ pl_ctx = tmp;
+
+ GF_ASSERT (list_empty(&pl_ctx->inodelk_lockers));
+ GF_ASSERT (list_empty(&pl_ctx->entrylk_lockers));
+
+ pthread_mutex_destroy (&pl_ctx->lock);
+ GF_FREE (pl_ctx);
+
+ return 0;
+}
+
+
int
init (xlator_t *this)
{
@@ -1840,7 +2607,7 @@ init (xlator_t *this)
gf_log (this->name, GF_LOG_CRITICAL,
"'locks' translator is not loaded over a storage "
"translator");
- goto out;;
+ goto out;
}
priv = GF_CALLOC (1, sizeof (*priv),
@@ -1861,13 +2628,20 @@ init (xlator_t *this)
}
}
+ this->local_pool = mem_pool_new (pl_local_t, 32);
+ if (!this->local_pool) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ goto out;
+ }
+
this->private = priv;
ret = 0;
out:
if (ret) {
- if (priv)
- GF_FREE (priv);
+ GF_FREE (priv);
}
return ret;
}
@@ -1882,6 +2656,7 @@ fini (xlator_t *this)
if (!priv)
return 0;
this->private = NULL;
+ GF_FREE (priv->brickname);
GF_FREE (priv);
return 0;
@@ -1890,21 +2665,23 @@ fini (xlator_t *this)
int
pl_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *flock);
+ const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *flock,
+ dict_t *xdata);
int
pl_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *flock);
+ const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *flock,
+ dict_t *xdata);
int
pl_entrylk (call_frame_t *frame, xlator_t *this,
const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type);
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
int
pl_fentrylk (call_frame_t *frame, xlator_t *this,
const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type);
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
struct xlator_fops fops = {
.lookup = pl_lookup,
@@ -1921,6 +2698,10 @@ struct xlator_fops fops = {
.fentrylk = pl_fentrylk,
.flush = pl_flush,
.opendir = pl_opendir,
+ .readdirp = pl_readdirp,
+ .getxattr = pl_getxattr,
+ .fgetxattr = pl_fgetxattr,
+ .fsetxattr = pl_fsetxattr,
};
struct xlator_dumpops dumpops = {
@@ -1928,9 +2709,11 @@ struct xlator_dumpops dumpops = {
};
struct xlator_cbks cbks = {
- .forget = pl_forget,
- .release = pl_release,
- .releasedir = pl_releasedir,
+ .forget = pl_forget,
+ .release = pl_release,
+ .releasedir = pl_releasedir,
+ .client_destroy = pl_client_destroy_cbk,
+ .client_disconnect = pl_client_disconnect_cbk,
};
diff --git a/xlators/features/locks/src/reservelk.c b/xlators/features/locks/src/reservelk.c
index 4aac1803d..11abd26d8 100644
--- a/xlators/features/locks/src/reservelk.c
+++ b/xlators/features/locks/src/reservelk.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -81,10 +71,10 @@ out:
return ret_lock;
}
-static int
+static inline int
__same_owner_reservelk (posix_lock_t *l1, posix_lock_t *l2)
{
- return ((l1->owner == l2->owner));
+ return (is_same_lkowner (&l1->owner, &l2->owner));
}
@@ -187,10 +177,10 @@ __lock_reservelk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
list_add_tail (&lock->list, &pl_inode->blocked_reservelks);
gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) lk-owner:%"PRIu64" %"PRId64" - %"PRId64" => Blocked",
+ "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => Blocked",
lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
lock->client_pid,
- lock->owner,
+ lkowner_utoa (&lock->owner),
lock->user_flock.l_start,
lock->user_flock.l_len);
@@ -292,14 +282,15 @@ grant_blocked_reserve_locks (xlator_t *this, pl_inode_t *pl_inode)
list_for_each_entry_safe (lock, tmp, &granted, list) {
gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" => Granted",
+ "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => Granted",
lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
lock->client_pid,
- lock->owner,
+ lkowner_utoa (&lock->owner),
lock->user_flock.l_start,
lock->user_flock.l_len);
- STACK_UNWIND_STRICT (lk, lock->frame, 0, 0, &lock->user_flock);
+ STACK_UNWIND_STRICT (lk, lock->frame, 0, 0, &lock->user_flock,
+ NULL);
}
}
@@ -376,7 +367,9 @@ grant_blocked_lock_calls (xlator_t *this, pl_inode_t *pl_inode)
pl_trace_out (this, lock->frame, fd, NULL, cmd,
&lock->user_flock, -1, EAGAIN, NULL);
pl_update_refkeeper (this, fd->inode);
- STACK_UNWIND_STRICT (lk, lock->frame, -1, EAGAIN, &lock->user_flock);
+ STACK_UNWIND_STRICT (lk, lock->frame, -1,
+ EAGAIN, &lock->user_flock,
+ NULL);
__destroy_lock (lock);
}
}
@@ -429,18 +422,18 @@ pl_reserve_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
ret = __lock_reservelk (this, pl_inode, lock, can_block);
if (ret < 0)
gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" => NOK",
+ "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => NOK",
lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
lock->client_pid,
- lock->owner,
+ lkowner_utoa (&lock->owner),
lock->user_flock.l_start,
lock->user_flock.l_len);
else
gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) (lk-owner=%"PRIu64") %"PRId64" - %"PRId64" => OK",
+ "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => OK",
lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
lock->client_pid,
- lock->owner,
+ lkowner_utoa (&lock->owner),
lock->fl_start,
lock->fl_end);
diff --git a/xlators/features/locks/tests/unit-test.c b/xlators/features/locks/tests/unit-test.c
index e95612ad4..d2cca32de 100644
--- a/xlators/features/locks/tests/unit-test.c
+++ b/xlators/features/locks/tests/unit-test.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
diff --git a/xlators/features/mac-compat/src/Makefile.am b/xlators/features/mac-compat/src/Makefile.am
index 915c13e30..f8567edce 100644
--- a/xlators/features/mac-compat/src/Makefile.am
+++ b/xlators/features/mac-compat/src/Makefile.am
@@ -1,13 +1,14 @@
xlator_LTLIBRARIES = mac-compat.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
-mac_compat_la_LDFLAGS = -module -avoidversion
+mac_compat_la_LDFLAGS = -module -avoid-version
mac_compat_la_SOURCES = mac-compat.c
mac_compat_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/features/mac-compat/src/mac-compat.c b/xlators/features/mac-compat/src/mac-compat.c
index 188b593e4..7cb550ad5 100644
--- a/xlators/features/mac-compat/src/mac-compat.c
+++ b/xlators/features/mac-compat/src/mac-compat.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -57,7 +47,8 @@ static int32_t apple_xattr_len[] = {
int32_t
maccomp_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
intptr_t ax = (intptr_t)this->private;
int i = 0;
@@ -80,7 +71,7 @@ maccomp_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
}
- STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict);
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata);
return 0;
}
@@ -88,7 +79,7 @@ maccomp_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t
maccomp_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
+ const char *name, dict_t *xdata)
{
intptr_t ax = GF_XATTR_NONE;
int i = 0;
@@ -109,14 +100,14 @@ maccomp_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
STACK_WIND (frame, maccomp_getxattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->getxattr,
- loc, name);
+ loc, name, xdata);
return 0;
}
int32_t
maccomp_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name)
+ const char *name, dict_t *xdata)
{
intptr_t ax = GF_XATTR_NONE;
int i = 0;
@@ -137,21 +128,21 @@ maccomp_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
STACK_WIND (frame, maccomp_getxattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fgetxattr,
- fd, name);
+ fd, name, xdata);
return 0;
}
int32_t
maccomp_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
intptr_t ax = (intptr_t)this->private;
if (op_ret == -1 && ax != GF_XATTR_NONE)
op_ret = op_errno = 0;
- STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata);
return 0;
}
@@ -159,7 +150,7 @@ maccomp_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t
maccomp_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
- int32_t flags)
+ int32_t flags, dict_t *xdata)
{
intptr_t ax = GF_XATTR_NONE;
int i = 0;
@@ -177,14 +168,14 @@ maccomp_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
STACK_WIND (frame, maccomp_setxattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->setxattr,
- loc, dict, flags);
+ loc, dict, flags, xdata);
return 0;
}
int32_t
maccomp_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
- int32_t flags)
+ int32_t flags, dict_t *xdata)
{
intptr_t ax = GF_XATTR_NONE;
int i = 0;
@@ -202,7 +193,7 @@ maccomp_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
STACK_WIND (frame, maccomp_setxattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsetxattr,
- fd, dict, flags);
+ fd, dict, flags, xdata);
return 0;
}
@@ -239,8 +230,7 @@ struct xlator_fops fops = {
.fsetxattr = maccomp_fsetxattr,
};
-struct xlator_cbks cbks = {
-};
+struct xlator_cbks cbks;
struct volume_options options[] = {
{ .key = {NULL} },
diff --git a/xlators/features/marker/Makefile.am b/xlators/features/marker/Makefile.am
index a6ba2de16..a985f42a8 100644
--- a/xlators/features/marker/Makefile.am
+++ b/xlators/features/marker/Makefile.am
@@ -1,3 +1,3 @@
-SUBDIRS = src @SYNCDAEMON_SUBDIR@
+SUBDIRS = src
CLEANFILES =
diff --git a/xlators/features/marker/src/Makefile.am b/xlators/features/marker/src/Makefile.am
index 501586a76..a7c676472 100644
--- a/xlators/features/marker/src/Makefile.am
+++ b/xlators/features/marker/src/Makefile.am
@@ -1,15 +1,17 @@
xlator_LTLIBRARIES = marker.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
-marker_la_LDFLAGS = -module -avoidversion
+marker_la_LDFLAGS = -module -avoid-version
marker_la_SOURCES = marker.c marker-quota.c marker-quota-helper.c marker-common.c
marker_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = marker-mem-types.h marker.h marker-quota.h marker-quota-helper.h marker-common.h $(top_builddir)/xlators/lib/src/libxlator.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -fno-strict-aliasing -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/xlators/lib/src $(GF_CFLAGS) -shared -nostartfiles
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/xlators/lib/src
+
+AM_CFLAGS = -Wall -fno-strict-aliasing $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/features/marker/src/marker-common.c b/xlators/features/marker/src/marker-common.c
index a413781bc..84a718add 100644
--- a/xlators/features/marker/src/marker-common.c
+++ b/xlators/features/marker/src/marker-common.c
@@ -1,21 +1,12 @@
-/*Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -69,18 +60,10 @@ unlock: UNLOCK (&inode->lock);
return ret;
}
-void
+int
marker_filter_quota_xattr (dict_t *dict, char *key,
data_t *value, void *data)
{
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO ("marker", dict, out);
- GF_VALIDATE_OR_GOTO ("marker", key, out);
-
- ret = fnmatch ("trusted.glusterfs.quota*", key, 0);
- if (ret == 0)
- dict_del (dict, key);
-out:
- return;
+ dict_del (dict, key);
+ return 0;
}
diff --git a/xlators/features/marker/src/marker-common.h b/xlators/features/marker/src/marker-common.h
index 0a7ee2619..23dd846cb 100644
--- a/xlators/features/marker/src/marker-common.h
+++ b/xlators/features/marker/src/marker-common.h
@@ -1,21 +1,12 @@
-/*Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef _MARKER_COMMON_H
#define _MARKER_COMMON_H
@@ -31,6 +22,6 @@
int32_t
marker_force_inode_ctx_get (inode_t *, xlator_t *, marker_inode_ctx_t **);
-void
+int
marker_filter_quota_xattr (dict_t *, char *, data_t *, void *);
#endif
diff --git a/xlators/features/marker/src/marker-mem-types.h b/xlators/features/marker/src/marker-mem-types.h
index f2723dc26..1f74d5048 100644
--- a/xlators/features/marker/src/marker-mem-types.h
+++ b/xlators/features/marker/src/marker-mem-types.h
@@ -1,36 +1,24 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __MARKER_MEM_TYPES_H__
#define __MARKER_MEM_TYPES_H__
#include "mem-types.h"
enum gf_marker_mem_types_ {
- gf_marker_mt_marker_local_t = gf_common_mt_end + 1,
- gf_marker_mt_marker_conf_t,
+ gf_marker_mt_marker_conf_t = gf_common_mt_end + 1,
gf_marker_mt_loc_t,
gf_marker_mt_volume_mark,
gf_marker_mt_int64_t,
gf_marker_mt_quota_inode_ctx_t,
gf_marker_mt_marker_inode_ctx_t,
- gf_marker_mt_quota_local_t,
gf_marker_mt_inode_contribution_t,
gf_marker_mt_end
};
diff --git a/xlators/features/marker/src/marker-quota-helper.c b/xlators/features/marker/src/marker-quota-helper.c
index 03a8d78ba..ec0d83316 100644
--- a/xlators/features/marker/src/marker-quota-helper.c
+++ b/xlators/features/marker/src/marker-quota-helper.c
@@ -1,21 +1,12 @@
-/*Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -163,13 +154,28 @@ out:
inode_contribution_t *
-__mq_add_new_contribution_node (xlator_t *this, quota_inode_ctx_t *ctx, loc_t *loc)
+__mq_add_new_contribution_node (xlator_t *this, quota_inode_ctx_t *ctx,
+ loc_t *loc)
{
- int32_t ret = 0;
+ int32_t ret = 0;
inode_contribution_t *contribution = NULL;
- list_for_each_entry (contribution, &ctx->contribution_head, contri_list) {
- if (uuid_compare (contribution->gfid, loc->parent->gfid) == 0) {
+ if (!loc->parent) {
+ if (!uuid_is_null (loc->pargfid))
+ loc->parent = inode_find (loc->inode->table,
+ loc->pargfid);
+
+ if (!loc->parent)
+ loc->parent = inode_parent (loc->inode, loc->pargfid,
+ loc->name);
+ if (!loc->parent)
+ goto out;
+ }
+
+ list_for_each_entry (contribution, &ctx->contribution_head,
+ contri_list) {
+ if (loc->parent &&
+ uuid_compare (contribution->gfid, loc->parent->gfid) == 0) {
goto out;
}
}
@@ -193,14 +199,16 @@ out:
inode_contribution_t *
-mq_add_new_contribution_node (xlator_t *this, quota_inode_ctx_t *ctx, loc_t *loc)
+mq_add_new_contribution_node (xlator_t *this, quota_inode_ctx_t *ctx,
+ loc_t *loc)
{
inode_contribution_t *contribution = NULL;
if ((ctx == NULL) || (loc == NULL))
return NULL;
- if (strcmp (loc->path, "/") == 0)
+ if (((loc->path) && (strcmp (loc->path, "/") == 0))
+ || (!loc->path && uuid_is_null (loc->pargfid)))
return NULL;
LOCK (&ctx->lock);
@@ -224,10 +232,15 @@ mq_dict_set_contribution (xlator_t *this, dict_t *dict,
GF_VALIDATE_OR_GOTO ("marker", dict, out);
GF_VALIDATE_OR_GOTO ("marker", loc, out);
- GET_CONTRI_KEY (contri_key, loc->parent->gfid, ret);
- if (ret < 0) {
- ret = -1;
- goto out;
+ if (loc->parent) {
+ GET_CONTRI_KEY (contri_key, loc->parent->gfid, ret);
+ if (ret < 0) {
+ ret = -1;
+ goto out;
+ }
+ } else {
+ /* nameless lookup, fetch contributions to all parents */
+ GET_CONTRI_KEY (contri_key, NULL, ret);
}
ret = dict_set_int64 (dict, contri_key, 0);
@@ -323,21 +336,15 @@ mq_inode_ctx_new (inode_t * inode, xlator_t *this)
quota_local_t *
mq_local_new ()
{
- int32_t ret = -1;
quota_local_t *local = NULL;
- QUOTA_ALLOC (local, quota_local_t, ret);
- if (ret < 0)
+ local = mem_get0 (THIS->local_pool);
+ if (!local)
goto out;
local->ref = 1;
- local->delta = 0;
- local->err = 0;
LOCK_INIT (&local->lock);
- memset (&local->loc, 0, sizeof (loc_t));
- memset (&local->parent_loc, 0, sizeof (loc_t));
-
local->ctx = NULL;
local->contri = NULL;
@@ -379,7 +386,7 @@ mq_local_unref (xlator_t *this, quota_local_t *local)
LOCK_DESTROY (&local->lock);
- GF_FREE (local);
+ mem_put (local);
out:
return 0;
}
diff --git a/xlators/features/marker/src/marker-quota-helper.h b/xlators/features/marker/src/marker-quota-helper.h
index 56080f0dc..6cdd14881 100644
--- a/xlators/features/marker/src/marker-quota-helper.h
+++ b/xlators/features/marker/src/marker-quota-helper.h
@@ -1,20 +1,13 @@
-/*Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
+
#ifndef _MARKER_QUOTA_HELPER_H
#define _MARKER_QUOTA_HELPER
@@ -23,7 +16,7 @@
#include "config.h"
#endif
-#include "marker-quota.h"
+#include "marker.h"
#define QUOTA_FREE_CONTRIBUTION_NODE(_contribution) \
do { \
diff --git a/xlators/features/marker/src/marker-quota.c b/xlators/features/marker/src/marker-quota.c
index 57e8470d6..a758e938f 100644
--- a/xlators/features/marker/src/marker-quota.c
+++ b/xlators/features/marker/src/marker-quota.c
@@ -1,21 +1,12 @@
-/*Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -39,7 +30,8 @@ mq_loc_copy (loc_t *dst, loc_t *src)
GF_VALIDATE_OR_GOTO ("marker", src, out);
if (src->inode == NULL ||
- src->path == NULL) {
+ ((src->parent == NULL) && (uuid_is_null (src->pargfid))
+ && !__is_root_gfid (src->inode->gfid))) {
gf_log ("marker", GF_LOG_WARNING,
"src loc is not valid");
goto out;
@@ -152,7 +144,7 @@ mq_assign_lk_owner (xlator_t *this, call_frame_t *frame)
}
UNLOCK (&conf->lock);
- frame->root->lk_owner = lk_owner;
+ set_lk_owner_from_uint64 (&frame->root->lk_owner, lk_owner);
return;
}
@@ -179,6 +171,7 @@ mq_loc_fill_from_name (xlator_t *this, loc_t *newloc, loc_t *oldloc,
}
newloc->parent = inode_ref (oldloc->inode);
+ uuid_copy (newloc->pargfid, oldloc->inode->gfid);
len = strlen (oldloc->path);
@@ -207,7 +200,7 @@ out:
int32_t
mq_dirty_inode_updation_done (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
QUOTA_STACK_DESTROY (frame, this);
@@ -216,7 +209,7 @@ mq_dirty_inode_updation_done (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t
mq_release_lock_on_dirty_inode (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
struct gf_flock lock = {0, };
quota_local_t *local = NULL;
@@ -228,7 +221,7 @@ mq_release_lock_on_dirty_inode (call_frame_t *frame, void *cookie, xlator_t *thi
if (op_ret == -1) {
local->err = -1;
- mq_dirty_inode_updation_done (frame, NULL, this, 0, 0);
+ mq_dirty_inode_updation_done (frame, NULL, this, 0, 0, NULL);
return 0;
}
@@ -246,7 +239,7 @@ mq_release_lock_on_dirty_inode (call_frame_t *frame, void *cookie, xlator_t *thi
if (ret == -1) {
local->err = -1;
frame->local = NULL;
- mq_dirty_inode_updation_done (frame, NULL, this, 0, 0);
+ mq_dirty_inode_updation_done (frame, NULL, this, 0, 0, NULL);
return 0;
}
@@ -260,20 +253,21 @@ mq_release_lock_on_dirty_inode (call_frame_t *frame, void *cookie, xlator_t *thi
mq_dirty_inode_updation_done,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->inodelk,
- this->name, &loc, F_SETLKW, &lock);
+ this->name, &loc, F_SETLKW, &lock, NULL);
loc_wipe (&loc);
return 0;
out:
- mq_dirty_inode_updation_done (frame, NULL, this, -1, 0);
+ mq_dirty_inode_updation_done (frame, NULL, this, -1, 0, NULL);
return 0;
}
int32_t
mq_mark_inode_undirty (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
int32_t ret = -1;
int64_t *size = NULL;
@@ -307,17 +301,21 @@ wind:
if (ret)
goto err;
+ if (uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, local->loc.inode->gfid);
+
+ GF_UUID_ASSERT (local->loc.gfid);
STACK_WIND (frame, mq_release_lock_on_dirty_inode,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->setxattr,
- &local->loc, newdict, 0);
+ &local->loc, newdict, 0, NULL);
ret = 0;
err:
if (op_ret == -1 || ret == -1) {
local->err = -1;
- mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0);
+ mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0, NULL);
}
if (newdict)
@@ -373,9 +371,14 @@ mq_update_size_xattr (call_frame_t *frame, void *cookie, xlator_t *this,
if (ret)
goto err;
+ if (uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, buf->ia_gfid);
+
+ GF_UUID_ASSERT (local->loc.gfid);
+
STACK_WIND (frame, mq_mark_inode_undirty, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->xattrop, &local->loc,
- GF_XATTROP_ADD_ARRAY64, new_dict);
+ GF_XATTROP_ADD_ARRAY64, new_dict, NULL);
ret = 0;
@@ -383,7 +386,7 @@ err:
if (op_ret == -1 || ret == -1) {
local->err = -1;
- mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0);
+ mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0, NULL);
}
if (new_dict)
@@ -434,6 +437,11 @@ mq_get_dirty_inode_size (call_frame_t *frame, xlator_t *this)
if (ret)
goto err;
+ if (uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, local->loc.inode->gfid);
+
+ GF_UUID_ASSERT (local->loc.gfid);
+
STACK_WIND (frame, mq_update_size_xattr, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lookup, &local->loc, dict);
ret =0;
@@ -442,7 +450,7 @@ err:
if (ret) {
local->err = -1;
- mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0);
+ mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0, NULL);
}
if (dict)
@@ -480,7 +488,8 @@ mq_get_child_contribution (call_frame_t *frame,
val = -2;
if (!mq_test_and_set_local_err (local, &val) &&
val != -2)
- mq_release_lock_on_dirty_inode (local->frame, NULL, this, 0, 0);
+ mq_release_lock_on_dirty_inode (local->frame, NULL,
+ this, 0, 0, NULL);
goto exit;
}
@@ -508,7 +517,7 @@ out:
if (val == 0) {
mq_dirty_inode_readdir (local->frame, NULL, this,
- 0, 0, NULL);
+ 0, 0, NULL, NULL);
}
mq_local_unref (this, local);
@@ -524,7 +533,7 @@ mq_readdir_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- gf_dirent_t *entries)
+ gf_dirent_t *entries, dict_t *xdata)
{
char contri_key [512] = {0, };
int32_t ret = 0;
@@ -544,7 +553,7 @@ mq_readdir_cbk (call_frame_t *frame,
"readdir failed %s", strerror (op_errno));
local->err = -1;
- mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0);
+ mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0, NULL);
goto end;
} else if (op_ret == 0) {
@@ -666,7 +675,7 @@ mq_readdir_cbk (call_frame_t *frame,
}
if (ret && val != -2) {
- mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0);
+ mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0, NULL);
}
end:
mq_local_unref (this, local);
@@ -680,7 +689,7 @@ mq_dirty_inode_readdir (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
quota_local_t *local = NULL;
@@ -688,7 +697,7 @@ mq_dirty_inode_readdir (call_frame_t *frame,
if (op_ret == -1) {
local->err = -1;
- mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0);
+ mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0, NULL);
return 0;
}
@@ -699,7 +708,7 @@ mq_dirty_inode_readdir (call_frame_t *frame,
mq_readdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readdir,
- local->fd, READDIR_BUF, local->d_off);
+ local->fd, READDIR_BUF, local->d_off, xdata);
return 0;
}
@@ -739,7 +748,7 @@ mq_check_if_still_dirty (call_frame_t *frame,
//the inode is not dirty anymore
if (dirty == 0) {
- mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0);
+ mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0, NULL);
return 0;
}
@@ -748,18 +757,22 @@ mq_check_if_still_dirty (call_frame_t *frame,
local->d_off = 0;
+ if (uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, buf->ia_gfid);
+
+ GF_UUID_ASSERT (local->loc.gfid);
STACK_WIND(frame,
mq_dirty_inode_readdir,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->opendir,
- &local->loc, fd);
+ &local->loc, fd, NULL);
ret = 0;
err:
if (op_ret == -1 || ret == -1) {
local->err = -1;
- mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0);
+ mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0, NULL);
}
if (fd != NULL) {
@@ -770,15 +783,15 @@ err:
}
int32_t
-mq_get_dirty_xattr (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+mq_get_dirty_xattr (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
int32_t ret = -1;
dict_t *xattr_req = NULL;
quota_local_t *local = NULL;
if (op_ret == -1) {
- mq_dirty_inode_updation_done (frame, NULL, this, 0, 0);
+ mq_dirty_inode_updation_done (frame, NULL, this, 0, 0, NULL);
return 0;
}
@@ -794,6 +807,11 @@ mq_get_dirty_xattr (call_frame_t *frame, void *cookie,
if (ret)
goto err;
+ if (uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, local->loc.inode->gfid);
+
+ GF_UUID_ASSERT (local->loc.gfid);
+
STACK_WIND (frame,
mq_check_if_still_dirty,
FIRST_CHILD(this),
@@ -805,7 +823,7 @@ mq_get_dirty_xattr (call_frame_t *frame, void *cookie,
err:
if (ret) {
local->err = -1;
- mq_release_lock_on_dirty_inode(frame, NULL, this, 0, 0);
+ mq_release_lock_on_dirty_inode(frame, NULL, this, 0, 0, NULL);
}
if (xattr_req)
@@ -872,7 +890,7 @@ mq_update_dirty_inode (xlator_t *this,
mq_get_dirty_xattr,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->inodelk,
- this->name, &local->loc, F_SETLKW, &lock);
+ this->name, &local->loc, F_SETLKW, &lock, NULL);
return 1;
fr_destroy:
@@ -885,7 +903,7 @@ out:
int32_t
mq_inode_creation_done (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
quota_local_t *local = NULL;
@@ -907,7 +925,7 @@ mq_inode_creation_done (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t
mq_xattr_creation_release_lock (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
- int32_t op_errno)
+ int32_t op_errno, dict_t *xdata)
{
struct gf_flock lock = {0, };
quota_local_t *local = NULL;
@@ -925,7 +943,7 @@ mq_xattr_creation_release_lock (call_frame_t *frame, void *cookie,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->inodelk,
this->name, &local->loc,
- F_SETLKW, &lock);
+ F_SETLKW, &lock, NULL);
return 0;
}
@@ -933,7 +951,8 @@ mq_xattr_creation_release_lock (call_frame_t *frame, void *cookie,
int32_t
mq_create_dirty_xattr (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
int32_t ret = -1;
dict_t *newdict = NULL;
@@ -956,19 +975,22 @@ mq_create_dirty_xattr (call_frame_t *frame, void *cookie, xlator_t *this,
goto err;
}
+ uuid_copy (local->loc.gfid, local->loc.inode->gfid);
+ GF_UUID_ASSERT (local->loc.gfid);
+
STACK_WIND (frame, mq_xattr_creation_release_lock,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->setxattr,
- &local->loc, newdict, 0);
+ &local->loc, newdict, 0, NULL);
} else {
- mq_xattr_creation_release_lock (frame, NULL, this, 0, 0);
+ mq_xattr_creation_release_lock (frame, NULL, this, 0, 0, NULL);
}
ret = 0;
err:
if (ret < 0) {
- mq_xattr_creation_release_lock (frame, NULL, this, 0, 0);
+ mq_xattr_creation_release_lock (frame, NULL, this, 0, 0, NULL);
}
if (newdict != NULL)
@@ -1017,7 +1039,11 @@ mq_create_xattr (xlator_t *this, call_frame_t *frame)
goto free_size;
}
- if (strcmp (local->loc.path, "/") != 0) {
+ if ((local->loc.path && strcmp (local->loc.path, "/") != 0)
+ || (local->loc.inode && !uuid_is_null (local->loc.inode->gfid) &&
+ !__is_root_gfid (local->loc.inode->gfid))
+ || (!uuid_is_null (local->loc.gfid)
+ && !__is_root_gfid (local->loc.gfid))) {
contri = mq_add_new_contribution_node (this, ctx, &local->loc);
if (contri == NULL)
goto err;
@@ -1030,9 +1056,11 @@ mq_create_xattr (xlator_t *this, call_frame_t *frame)
goto free_value;
}
+ GF_UUID_ASSERT (local->loc.gfid);
+
STACK_WIND (frame, mq_create_dirty_xattr, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->xattrop, &local->loc,
- GF_XATTROP_ADD_ARRAY64, dict);
+ GF_XATTROP_ADD_ARRAY64, dict, NULL);
ret = 0;
free_size:
@@ -1050,7 +1078,7 @@ err:
out:
if (ret < 0) {
- mq_xattr_creation_release_lock (frame, NULL, this, 0, 0);
+ mq_xattr_creation_release_lock (frame, NULL, this, 0, 0, NULL);
}
return 0;
@@ -1084,7 +1112,12 @@ mq_check_n_set_inode_xattr (call_frame_t *frame, void *cookie,
goto create_xattr;
//check contribution xattr if not root
- if (strcmp (local->loc.path, "/") != 0) {
+ if ((local->loc.path && strcmp (local->loc.path, "/") != 0)
+ || (!uuid_is_null (local->loc.gfid)
+ && !__is_root_gfid (local->loc.gfid))
+ || (local->loc.inode
+ && !uuid_is_null (local->loc.inode->gfid)
+ && !__is_root_gfid (local->loc.inode->gfid))) {
GET_CONTRI_KEY (contri_key, local->loc.parent->gfid, ret);
if (ret < 0)
goto out;
@@ -1095,10 +1128,14 @@ mq_check_n_set_inode_xattr (call_frame_t *frame, void *cookie,
}
out:
- mq_xattr_creation_release_lock (frame, NULL, this, 0, 0);
+ mq_xattr_creation_release_lock (frame, NULL, this, 0, 0, NULL);
return 0;
create_xattr:
+ if (uuid_is_null (local->loc.gfid)) {
+ uuid_copy (local->loc.gfid, buf->ia_gfid);
+ }
+
mq_create_xattr (this, frame);
return 0;
}
@@ -1106,7 +1143,7 @@ create_xattr:
int32_t
mq_get_xattr (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
dict_t *xattr_req = NULL;
quota_local_t *local = NULL;
@@ -1129,6 +1166,11 @@ mq_get_xattr (call_frame_t *frame, void *cookie, xlator_t *this,
goto err;
}
+ if (uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, local->loc.inode->gfid);
+
+ GF_UUID_ASSERT (local->loc.gfid);
+
STACK_WIND (frame, mq_check_n_set_inode_xattr, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lookup, &local->loc, xattr_req);
@@ -1137,14 +1179,14 @@ mq_get_xattr (call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
err:
- mq_xattr_creation_release_lock (frame, NULL, this, 0, 0);
+ mq_xattr_creation_release_lock (frame, NULL, this, 0, 0, NULL);
if (xattr_req)
dict_unref (xattr_req);
return 0;
lock_err:
- mq_inode_creation_done (frame, NULL, this, 0, 0);
+ mq_inode_creation_done (frame, NULL, this, 0, 0, NULL);
return 0;
}
@@ -1186,7 +1228,7 @@ mq_set_inode_xattr (xlator_t *this, loc_t *loc)
mq_get_xattr,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->inodelk,
- this->name, &local->loc, F_SETLKW, &lock);
+ this->name, &local->loc, F_SETLKW, &lock, NULL);
return 0;
@@ -1202,6 +1244,7 @@ mq_get_parent_inode_local (xlator_t *this, quota_local_t *local)
{
int32_t ret = -1;
quota_inode_ctx_t *ctx = NULL;
+ inode_contribution_t *contribution = NULL;
GF_VALIDATE_OR_GOTO ("marker", this, out);
GF_VALIDATE_OR_GOTO ("marker", local, out);
@@ -1231,7 +1274,7 @@ mq_get_parent_inode_local (xlator_t *this, quota_local_t *local)
ret = mq_inode_ctx_get (local->loc.inode, this, &ctx);
if (ret < 0) {
gf_log_callingfn (this->name, GF_LOG_WARNING,
- "inode ctx get failed");
+ "inode ctx get failed");
goto out;
}
@@ -1245,7 +1288,31 @@ mq_get_parent_inode_local (xlator_t *this, quota_local_t *local)
goto out;
}
- local->contri = (inode_contribution_t *) ctx->contribution_head.next;
+ /* Earlier we used to get the next entry in the list maintained
+ by the context. In a good situation it works. i.e the next
+ parent in the directory hierarchy for this path is obtained.
+
+ But consider the below situation:
+ mount-point: /mnt/point
+ quota enabled directories within mount point: /a, /b, /c
+
+ Now when some file (file1) in the directory /c is written some data,
+ then to update the directories, marker has to get the contribution
+ object for the parent inode, i.e /c.
+ Beefore, it was being done by
+ local->contri = (inode_contribution_t *) ctx->contribution_head.next;
+ It works in the normal situations. But suppose /c is moved to /b.
+ Now /b's contribution object is added to the end of the list of
+ parents that the file file1 within /b/c is maintaining. Now if
+ the file /b/c/file1 is copied to /b/c/new, to update the parent in
+ the order c, b and / we cannot go to the next element in the list,
+ as in this case the next contribution object would be / and /b's
+ contribution will be at the end of the list. So get the proper
+ parent's contribution, by searching the entire list.
+ */
+ contribution = mq_get_contribution_node (local->loc.parent, ctx);
+ GF_ASSERT (contribution != NULL);
+ local->contri = contribution;
ret = 0;
out:
@@ -1259,7 +1326,7 @@ mq_xattr_updation_done (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- dict_t *dict)
+ dict_t *dict, dict_t *xdata)
{
QUOTA_STACK_DESTROY (frame, this);
return 0;
@@ -1268,7 +1335,7 @@ mq_xattr_updation_done (call_frame_t *frame,
int32_t
mq_inodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+ xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
int32_t ret = 0;
gf_boolean_t status = _gf_false;
@@ -1282,7 +1349,7 @@ mq_inodelk_cbk (call_frame_t *frame, void *cookie,
"unlocking failed on path (%s)(%s)",
local->parent_loc.path, strerror (op_errno));
}
- mq_xattr_updation_done (frame, NULL, this, 0, 0, NULL);
+ mq_xattr_updation_done (frame, NULL, this, 0, 0, NULL, NULL);
return 0;
}
@@ -1292,11 +1359,12 @@ mq_inodelk_cbk (call_frame_t *frame, void *cookie,
if ((strcmp (local->parent_loc.path, "/") == 0)
|| (local->delta == 0)) {
- mq_xattr_updation_done (frame, NULL, this, 0, 0, NULL);
+ mq_xattr_updation_done (frame, NULL, this, 0, 0, NULL, NULL);
} else {
ret = mq_get_parent_inode_local (this, local);
if (ret < 0) {
- mq_xattr_updation_done (frame, NULL, this, 0, 0, NULL);
+ mq_xattr_updation_done (frame, NULL, this, 0, 0, NULL,
+ NULL);
goto out;
}
status = _gf_true;
@@ -1305,7 +1373,8 @@ mq_inodelk_cbk (call_frame_t *frame, void *cookie,
if (ret == 0 && status == _gf_false) {
mq_get_lock_on_parent (frame, this);
} else {
- mq_xattr_updation_done (frame, NULL, this, 0, 0, NULL);
+ mq_xattr_updation_done (frame, NULL, this, 0, 0, NULL,
+ NULL);
}
}
out:
@@ -1317,7 +1386,7 @@ out:
int32_t
mq_release_parent_lock (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
- int32_t op_errno)
+ int32_t op_errno, dict_t *xdata)
{
int32_t ret = 0;
quota_local_t *local = NULL;
@@ -1346,7 +1415,7 @@ mq_release_parent_lock (call_frame_t *frame, void *cookie,
UNLOCK (&ctx->lock);
if (local->parent_loc.inode == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
+ gf_log (this->name, GF_LOG_DEBUG,
"Invalid parent inode.");
goto err;
}
@@ -1363,12 +1432,12 @@ wind:
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->inodelk,
this->name, &local->parent_loc,
- F_SETLKW, &lock);
+ F_SETLKW, &lock, NULL);
return 0;
err:
mq_xattr_updation_done (frame, NULL, this,
- 0, 0 , NULL);
+ 0, 0 , NULL, NULL);
return 0;
}
@@ -1379,7 +1448,7 @@ mq_mark_undirty (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- dict_t *dict)
+ dict_t *dict, dict_t *xdata)
{
int32_t ret = -1;
int64_t *size = NULL;
@@ -1434,17 +1503,20 @@ mq_mark_undirty (call_frame_t *frame,
goto err;
}
+ uuid_copy (local->parent_loc.gfid, local->parent_loc.inode->gfid);
+ GF_UUID_ASSERT (local->parent_loc.gfid);
+
STACK_WIND (frame, mq_release_parent_lock,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->setxattr,
- &local->parent_loc, newdict, 0);
+ &local->parent_loc, newdict, 0, NULL);
ret = 0;
err:
if (op_ret == -1 || ret == -1) {
local->err = op_errno;
- mq_release_parent_lock (frame, NULL, this, 0, 0);
+ mq_release_parent_lock (frame, NULL, this, 0, 0, NULL);
}
if (newdict)
@@ -1460,7 +1532,7 @@ mq_update_parent_size (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- dict_t *dict)
+ dict_t *dict, dict_t *xdata)
{
int64_t *size = NULL;
int32_t ret = -1;
@@ -1484,9 +1556,10 @@ mq_update_parent_size (call_frame_t *frame,
}
UNLOCK (&local->contri->lock);
- gf_log (this->name, GF_LOG_DEBUG, "%s %"PRId64 "%"PRId64,
- local->loc.path, local->ctx->size,
- local->contri->contribution);
+ gf_log_callingfn (this->name, GF_LOG_DEBUG, "path: %s size: %"PRId64
+ " contribution:%"PRId64,
+ local->loc.path, local->ctx->size,
+ local->contri->contribution);
if (dict == NULL) {
op_errno = EINVAL;
@@ -1516,18 +1589,23 @@ mq_update_parent_size (call_frame_t *frame,
goto err;
}
+ if (uuid_is_null (local->parent_loc.gfid))
+ uuid_copy (local->parent_loc.gfid,
+ local->parent_loc.inode->gfid);
+ GF_UUID_ASSERT (local->parent_loc.gfid);
+
STACK_WIND (frame,
mq_mark_undirty,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->xattrop,
&local->parent_loc,
GF_XATTROP_ADD_ARRAY64,
- newdict);
+ newdict, NULL);
ret = 0;
err:
if (op_ret == -1 || ret < 0) {
local->err = op_errno;
- mq_release_parent_lock (frame, NULL, this, 0, 0);
+ mq_release_parent_lock (frame, NULL, this, 0, 0, NULL);
}
if (newdict)
@@ -1609,13 +1687,13 @@ unlock:
}
UNLOCK (&contribution->lock);
- gf_log (this->name, GF_LOG_DEBUG, "%s %"PRId64 "%"PRId64,
+ gf_log (this->name, GF_LOG_DEBUG, "%s %"PRId64 " %"PRId64,
local->loc.path, size_int, contri_int);
local->delta = size_int - contri_int;
if (local->delta == 0) {
- mq_mark_undirty (frame, NULL, this, 0, 0, NULL);
+ mq_mark_undirty (frame, NULL, this, 0, 0, NULL, NULL);
return 0;
}
@@ -1637,20 +1715,25 @@ unlock:
goto err;
}
+ if (uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, buf->ia_gfid);
+
+ GF_UUID_ASSERT (local->loc.gfid);
+
STACK_WIND (frame,
mq_update_parent_size,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->xattrop,
&local->loc,
GF_XATTROP_ADD_ARRAY64,
- newdict);
+ newdict, NULL);
ret = 0;
err:
if (op_ret == -1 || ret < 0) {
local->err = op_errno;
- mq_release_parent_lock (frame, NULL, this, 0, 0);
+ mq_release_parent_lock (frame, NULL, this, 0, 0, NULL);
}
if (newdict)
@@ -1662,7 +1745,7 @@ err:
int32_t
mq_fetch_child_size_and_contri (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
- int32_t op_errno)
+ int32_t op_errno, dict_t *xdata)
{
int32_t ret = -1;
char contri_key [512] = {0, };
@@ -1683,7 +1766,8 @@ mq_fetch_child_size_and_contri (call_frame_t *frame, void *cookie,
VALIDATE_OR_GOTO (local->ctx, err);
VALIDATE_OR_GOTO (local->contri, err);
- gf_log (this->name, GF_LOG_DEBUG, "%s marked dirty", local->parent_loc.path);
+ gf_log (this->name, GF_LOG_DEBUG, "%s marked dirty",
+ local->parent_loc.path);
//update parent ctx
ret = mq_inode_ctx_get (local->parent_loc.inode, this, &ctx);
@@ -1728,6 +1812,11 @@ mq_fetch_child_size_and_contri (call_frame_t *frame, void *cookie,
mq_set_ctx_updation_status (local->ctx, _gf_false);
+ if (uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, local->loc.inode->gfid);
+
+ GF_UUID_ASSERT (local->loc.gfid);
+
STACK_WIND (frame, mq_update_inode_contribution, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lookup, &local->loc, newdict);
@@ -1739,7 +1828,7 @@ err:
mq_set_ctx_updation_status (local->ctx, _gf_false);
- mq_release_parent_lock (frame, NULL, this, 0, 0);
+ mq_release_parent_lock (frame, NULL, this, 0, 0, NULL);
}
if (newdict)
@@ -1750,7 +1839,7 @@ err:
int32_t
mq_markdirty (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+ xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
int32_t ret = -1;
dict_t *dict = NULL;
@@ -1767,7 +1856,7 @@ mq_markdirty (call_frame_t *frame, void *cookie,
mq_set_ctx_updation_status (local->ctx, _gf_false);
- mq_inodelk_cbk (frame, NULL, this, 0, 0);
+ mq_inodelk_cbk (frame, NULL, this, 0, 0, NULL);
return 0;
}
@@ -1785,10 +1874,14 @@ mq_markdirty (call_frame_t *frame, void *cookie,
if (ret == -1)
goto err;
+ uuid_copy (local->parent_loc.gfid,
+ local->parent_loc.inode->gfid);
+ GF_UUID_ASSERT (local->parent_loc.gfid);
+
STACK_WIND (frame, mq_fetch_child_size_and_contri,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->setxattr,
- &local->parent_loc, dict, 0);
+ &local->parent_loc, dict, 0, NULL);
ret = 0;
err:
@@ -1797,7 +1890,7 @@ err:
mq_set_ctx_updation_status (local->ctx, _gf_false);
- mq_release_parent_lock (frame, NULL, this, 0, 0);
+ mq_release_parent_lock (frame, NULL, this, 0, 0, NULL);
}
if (dict)
@@ -1835,7 +1928,7 @@ mq_get_lock_on_parent (call_frame_t *frame, xlator_t *this)
mq_markdirty,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->inodelk,
- this->name, &local->parent_loc, F_SETLKW, &lock);
+ this->name, &local->parent_loc, F_SETLKW, &lock, NULL);
return 0;
@@ -1845,15 +1938,18 @@ fr_destroy:
return -1;
}
-
int
-mq_start_quota_txn (xlator_t *this, loc_t *loc,
- quota_inode_ctx_t *ctx,
- inode_contribution_t *contri)
+mq_prepare_txn_frame (xlator_t *this, loc_t *loc,
+ quota_inode_ctx_t *ctx,
+ inode_contribution_t *contri,
+ call_frame_t **new_frame)
{
- int32_t ret = -1;
- call_frame_t *frame = NULL;
- quota_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ int ret = -1;
+ quota_local_t *local = NULL;
+
+ if (!this || !loc || !new_frame)
+ goto err;
frame = create_frame (this, this->ctx->pool);
if (frame == NULL)
@@ -1879,14 +1975,36 @@ mq_start_quota_txn (xlator_t *this, loc_t *loc,
local->ctx = ctx;
local->contri = contri;
+ ret = 0;
+ *new_frame = frame;
+
+ return ret;
+
+fr_destroy:
+ QUOTA_STACK_DESTROY (frame, this);
+err:
+ return ret;
+}
+
+int
+mq_start_quota_txn (xlator_t *this, loc_t *loc,
+ quota_inode_ctx_t *ctx,
+ inode_contribution_t *contri)
+{
+ int32_t ret = -1;
+ call_frame_t *frame = NULL;
+
+ ret = mq_prepare_txn_frame (this, loc, ctx,
+ contri, &frame);
+ if (ret)
+ goto err;
+
ret = mq_get_lock_on_parent (frame, this);
if (ret == -1)
goto err;
return 0;
-fr_destroy:
- QUOTA_STACK_DESTROY (frame, this);
err:
mq_set_ctx_updation_status (ctx, _gf_false);
@@ -1914,11 +2032,46 @@ mq_initiate_quota_txn (xlator_t *this, loc_t *loc)
goto out;
}
+ /* Create the contribution node if its absent. Is it right to
+ assume that if the contribution node is not there, then
+ create one and proceed instead of returning?
+ Reason for this assumption is for hard links. Suppose
+ hard link for a file f1 present in a directory d1 is
+ created in the directory d2 (as f2). Now, since d2's
+ contribution is not there in f1's inode ctx, d2's
+ contribution xattr wont be created and will create problems
+ for quota operations.
+ */
contribution = mq_get_contribution_node (loc->parent, ctx);
- if (contribution == NULL)
- goto out;
+ if (!contribution) {
+ if ((loc->path && strcmp (loc->path, "/"))
+ || (!uuid_is_null (loc->gfid)
+ && !__is_root_gfid (loc->gfid))
+ || (loc->inode && !uuid_is_null (loc->inode->gfid)
+ && !__is_root_gfid (loc->inode->gfid)))
+ gf_log_callingfn (this->name, GF_LOG_TRACE,
+ "contribution node for the "
+ "path (%s) with parent (%s) "
+ "not found", loc->path,
+ loc->parent?
+ uuid_utoa (loc->parent->gfid):
+ NULL);
- /* To improve performance, donot start another transaction
+ contribution = mq_add_new_contribution_node (this, ctx, loc);
+ if (!contribution) {
+ if(loc->path && strcmp (loc->path, "/"))
+ gf_log_callingfn (this->name, GF_LOG_WARNING,
+ "could not allocate "
+ " contribution node for (%s) "
+ "parent: (%s)", loc->path,
+ loc->parent?
+ uuid_utoa (loc->parent->gfid):
+ NULL);
+ goto out;
+ }
+ }
+
+ /* To improve performance, do not start another transaction
* if one is already in progress for same inode
*/
status = _gf_true;
@@ -1937,16 +2090,7 @@ out:
}
-/* int32_t */
-/* validate_inode_size_contribution (xlator_t *this, loc_t *loc, int64_t size, */
-/* int64_t contribution) */
-/* { */
-/* if (size != contribution) { */
-/* mq_initiate_quota_txn (this, loc); */
-/* } */
-/* return 0; */
-/* } */
int32_t
@@ -1975,11 +2119,13 @@ mq_inspect_directory_xattr (xlator_t *this,
}
}
- if (strcmp (loc->path, "/") != 0) {
+ if (!loc->path || (loc->path && strcmp (loc->path, "/") != 0)) {
contribution = mq_add_new_contribution_node (this, ctx, loc);
if (contribution == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "cannot add a new contribution node");
+ if (!uuid_is_null (loc->inode->gfid))
+ gf_log (this->name, GF_LOG_DEBUG,
+ "cannot add a new contribution node "
+ "(%s)", uuid_utoa (loc->inode->gfid));
ret = -1;
goto err;
}
@@ -1993,7 +2139,10 @@ mq_inspect_directory_xattr (xlator_t *this,
if (ret < 0)
goto out;
- if (strcmp (loc->path, "/") != 0) {
+ if ((loc->path && strcmp (loc->path, "/") != 0)
+ || (!uuid_is_null (loc->gfid) && !__is_root_gfid (loc->gfid))
+ || (loc->inode && !uuid_is_null (loc->inode->gfid) &&
+ !__is_root_gfid (loc->inode->gfid))) {
not_root = _gf_true;
GET_CONTRI_KEY (contri_key, contribution->gfid, ret);
@@ -2065,8 +2214,11 @@ mq_inspect_file_xattr (xlator_t *this,
}
contribution = mq_add_new_contribution_node (this, ctx, loc);
- if (contribution == NULL)
+ if (contribution == NULL) {
+ gf_log_callingfn (this->name, GF_LOG_DEBUG, "cannot allocate "
+ "contribution node (path:%s)", loc->path);
goto out;
+ }
LOCK (&ctx->lock);
{
@@ -2098,8 +2250,12 @@ mq_inspect_file_xattr (xlator_t *this,
if (size != contri_int) {
mq_initiate_quota_txn (this, loc);
}
- } else
- mq_initiate_quota_txn (this, loc);
+ } else {
+ if (size)
+ mq_initiate_quota_txn (this, loc);
+ else
+ mq_set_inode_xattr (this, loc);
+ }
}
out:
@@ -2112,8 +2268,8 @@ mq_xattr_state (xlator_t *this,
dict_t *dict,
struct iatt buf)
{
- if (buf.ia_type == IA_IFREG ||
- buf.ia_type == IA_IFLNK) {
+ if (((buf.ia_type == IA_IFREG) && !dht_is_linkfile (&buf, dict))
+ || (buf.ia_type == IA_IFLNK)) {
mq_inspect_file_xattr (this, loc, dict, buf);
} else if (buf.ia_type == IA_IFDIR)
mq_inspect_directory_xattr (this, loc, dict, buf);
@@ -2129,11 +2285,13 @@ mq_req_xattr (xlator_t *this,
int32_t ret = -1;
GF_VALIDATE_OR_GOTO ("marker", this, out);
- GF_VALIDATE_OR_GOTO ("marker", loc, out);
GF_VALIDATE_OR_GOTO ("marker", dict, out);
+ if (!loc)
+ goto set_size;
+
//if not "/" then request contribution
- if (strcmp (loc->path, "/") == 0)
+ if (loc->path && strcmp (loc->path, "/") == 0)
goto set_size;
ret = mq_dict_set_contribution (this, dict, loc);
@@ -2162,7 +2320,7 @@ out:
int32_t
mq_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
QUOTA_STACK_DESTROY (frame, this);
@@ -2171,32 +2329,105 @@ mq_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t
_mq_inode_remove_done (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
int32_t ret = 0;
char contri_key [512] = {0, };
quota_local_t *local = NULL;
+ inode_t *inode = NULL;
+ dentry_t *tmp = NULL;
+ gf_boolean_t last_dentry = _gf_true;
+ loc_t loc = {0, };
+ dentry_t *other_dentry = NULL;
+ gf_boolean_t remove = _gf_false;
local = (quota_local_t *) frame->local;
if (op_ret == -1 || local->err == -1) {
- mq_removexattr_cbk (frame, NULL, this, -1, 0);
+ mq_removexattr_cbk (frame, NULL, this, -1, 0, NULL);
return 0;
}
frame->local = NULL;
- if (local->hl_count > 1) {
- GET_CONTRI_KEY (contri_key, local->contri->gfid, ret);
+ GET_CONTRI_KEY (contri_key, local->contri->gfid, ret);
- STACK_WIND (frame, mq_removexattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr,
- &local->loc, contri_key);
- ret = 0;
- } else {
- mq_removexattr_cbk (frame, NULL, this, 0, 0);
+ if (!local->loc.inode)
+ inode = inode_grep (local->loc.parent->table, local->loc.parent,
+ local->loc.name);
+ else
+ inode = inode_ref (local->loc.inode);
+
+ /* Suppose there are 2 directories dir1 and dir2. Quota limit is set on
+ both the directories. There is a file (f1) in dir1. A hark link is
+ created for that file inside the directory dir2 (say f2). Now one
+ more xattr is set in the inode as a new hard link is created in a
+ separate directory.
+ i.e trusted.glusterfs.quota.<gfid of dir2>.contri=<contribution>
+
+ Now when the hardlink f2 is removed, then the new xattr added (i.e
+ the xattr indicating its contribution to ITS parent directory) should
+ be removed (IFF there is not another hardlink for that file in the
+ same directory).
+
+ To do that upon getting unlink first check whether any other hard
+ links for the same inode exists in the same directory. If so do not
+ do anything and proceed for quota transaction.
+ Otherwise, if the removed entry was the only link for that inode
+ within that directory, then get another dentry for the inode
+ (by traversing the list of dentries for the inode) and using the
+ the dentry's parent and name, send removexattr so that the xattr
+ is removed.
+
+ If it is not done, then if the volume is restarted or the brick
+ process is restarted, then wrong quota usage will be shown for the
+ directory dir2.
+ */
+ if (inode) {
+ tmp = NULL;
+ list_for_each_entry (tmp, &inode->dentry_list, inode_list) {
+ if (local->loc.parent == tmp->parent) {
+ if (strcmp (local->loc.name, local->loc.name)) {
+ last_dentry = _gf_false;
+ break;
+ }
+ }
+ }
+ remove = last_dentry;
}
+ if (remove) {
+ if (!other_dentry) {
+ list_for_each_entry (tmp, &inode->dentry_list,
+ inode_list) {
+ if (local->loc.parent != tmp->parent) {
+ other_dentry = tmp;
+ break;
+ }
+ }
+ }
+
+ if (!other_dentry)
+ mq_removexattr_cbk (frame, NULL, this, 0, 0, NULL);
+ else {
+ loc.parent = inode_ref (other_dentry->parent);
+ loc.name = gf_strdup (other_dentry->name);
+ uuid_copy (loc.pargfid , other_dentry->parent->gfid);
+ loc.inode = inode_ref (inode);
+ uuid_copy (loc.gfid, inode->gfid);
+ inode_path (other_dentry->parent, other_dentry->name,
+ (char **)&loc.path);
+
+ STACK_WIND (frame, mq_removexattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr,
+ &loc, contri_key, NULL);
+ }
+ } else
+ mq_removexattr_cbk (frame, NULL, this, 0, 0, NULL);
+
+ ret = 0;
+
if (strcmp (local->parent_loc.path, "/") != 0) {
ret = mq_get_parent_inode_local (this, local);
if (ret < 0)
@@ -2207,12 +2438,15 @@ _mq_inode_remove_done (call_frame_t *frame, void *cookie, xlator_t *this,
out:
mq_local_unref (this, local);
+ loc_wipe (&loc);
+ inode_unref (inode);
return 0;
}
int32_t
mq_inode_remove_done (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
int32_t ret = -1;
struct gf_flock lock = {0, };
@@ -2259,13 +2493,13 @@ mq_inode_remove_done (call_frame_t *frame, void *cookie, xlator_t *this,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->inodelk,
this->name, &local->parent_loc,
- F_SETLKW, &lock);
+ F_SETLKW, &lock, NULL);
return 0;
}
int32_t
-mq_reduce_parent_size_xattr (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+mq_reduce_parent_size_xattr (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
int32_t ret = -1;
int64_t *size = NULL;
@@ -2296,16 +2530,19 @@ mq_reduce_parent_size_xattr (call_frame_t *frame, void *cookie,
if (ret < 0)
goto err;
+ uuid_copy (local->parent_loc.gfid,
+ local->parent_loc.inode->gfid);
+ GF_UUID_ASSERT (local->parent_loc.gfid);
STACK_WIND (frame, mq_inode_remove_done, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->xattrop, &local->parent_loc,
- GF_XATTROP_ADD_ARRAY64, dict);
+ GF_XATTROP_ADD_ARRAY64, dict, NULL);
dict_unref (dict);
return 0;
err:
local->err = 1;
- mq_inode_remove_done (frame, NULL, this, -1, 0, NULL);
+ mq_inode_remove_done (frame, NULL, this, -1, 0, NULL, NULL);
if (dict)
dict_unref (dict);
return 0;
@@ -2329,8 +2566,11 @@ mq_reduce_parent_size (xlator_t *this, loc_t *loc, int64_t contri)
goto out;
contribution = mq_get_contribution_node (loc->parent, ctx);
- if (contribution == NULL)
+ if (contribution == NULL) {
+ gf_log_callingfn (this->name, GF_LOG_WARNING, "contribution for"
+ " the node %s is NULL", loc->path);
goto out;
+ }
local = mq_local_new ();
if (local == NULL) {
@@ -2349,6 +2589,8 @@ mq_reduce_parent_size (xlator_t *this, loc_t *loc, int64_t contri)
}
if (local->size == 0) {
+ gf_log_callingfn (this->name, GF_LOG_TRACE,
+ "local->size is 0 " "path: (%s)", loc->path);
ret = 0;
goto out;
}
@@ -2361,8 +2603,12 @@ mq_reduce_parent_size (xlator_t *this, loc_t *loc, int64_t contri)
local->contri = contribution;
ret = mq_inode_loc_fill (NULL, loc->parent, &local->parent_loc);
- if (ret < 0)
+ if (ret < 0) {
+ gf_log_callingfn (this->name, GF_LOG_INFO, "building parent loc"
+ " failed. (gfid: %s)",
+ uuid_utoa (loc->parent->gfid));
goto out;
+ }
frame = create_frame (this, this->ctx->pool);
if (!frame) {
@@ -2381,7 +2627,7 @@ mq_reduce_parent_size (xlator_t *this, loc_t *loc, int64_t contri)
if (local->parent_loc.inode == NULL) {
ret = -1;
- gf_log (this->name, GF_LOG_WARNING,
+ gf_log (this->name, GF_LOG_DEBUG,
"Inode is NULL, so can't stackwind.");
goto out;
}
@@ -2390,7 +2636,7 @@ mq_reduce_parent_size (xlator_t *this, loc_t *loc, int64_t contri)
mq_reduce_parent_size_xattr,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->inodelk,
- this->name, &local->parent_loc, F_SETLKW, &lock);
+ this->name, &local->parent_loc, F_SETLKW, &lock, NULL);
local = NULL;
ret = 0;
diff --git a/xlators/features/marker/src/marker-quota.h b/xlators/features/marker/src/marker-quota.h
index 7a90b28b7..42def9d22 100644
--- a/xlators/features/marker/src/marker-quota.h
+++ b/xlators/features/marker/src/marker-quota.h
@@ -1,21 +1,12 @@
-/*Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef _MARKER_QUOTA_H
#define _MARKER_QUOTA_H
@@ -24,7 +15,6 @@
#include "config.h"
#endif
-#include "marker.h"
#include "xlator.h"
#include "marker-mem-types.h"
@@ -52,8 +42,6 @@
var = GF_CALLOC (sizeof (type), 1, \
gf_marker_mt_##type); \
if (!var) { \
- gf_log ("", GF_LOG_ERROR, \
- "out of memory"); \
ret = -1; \
} \
} while (0);
@@ -71,13 +59,20 @@
ret = 0; \
} while (0);
-#define GET_CONTRI_KEY(var, _gfid, _ret) \
- do { \
- char _gfid_unparsed[40]; \
- uuid_unparse (_gfid, _gfid_unparsed); \
- _ret = snprintf (var, CONTRI_KEY_MAX, QUOTA_XATTR_PREFIX \
- ".%s.%s." CONTRIBUTION, "quota", \
- _gfid_unparsed); \
+#define GET_CONTRI_KEY(var, _gfid, _ret) \
+ do { \
+ if (_gfid != NULL) { \
+ char _gfid_unparsed[40]; \
+ uuid_unparse (_gfid, _gfid_unparsed); \
+ _ret = snprintf (var, CONTRI_KEY_MAX, \
+ QUOTA_XATTR_PREFIX \
+ ".%s.%s." CONTRIBUTION, "quota", \
+ _gfid_unparsed); \
+ } else { \
+ _ret = snprintf (var, CONTRI_KEY_MAX, \
+ QUOTA_XATTR_PREFIX \
+ ".%s.." CONTRIBUTION, "quota"); \
+ } \
} while (0);
#define QUOTA_SAFE_INCREMENT(lock, var) \
@@ -104,28 +99,6 @@ struct inode_contribution {
};
typedef struct inode_contribution inode_contribution_t;
-struct quota_local {
- int64_t delta;
- int64_t d_off;
- int32_t err;
- int32_t ref;
- int64_t sum;
- int64_t size;
- int32_t hl_count;
- int32_t dentry_child_count;
-
- fd_t *fd;
- call_frame_t *frame;
- gf_lock_t lock;
-
- loc_t loc;
- loc_t parent_loc;
-
- quota_inode_ctx_t *ctx;
- inode_contribution_t *contri;
-};
-typedef struct quota_local quota_local_t;
-
int32_t
mq_get_lock_on_parent (call_frame_t *, xlator_t *);
@@ -146,7 +119,7 @@ mq_initiate_quota_txn (xlator_t *, loc_t *);
int32_t
mq_dirty_inode_readdir (call_frame_t *, void *, xlator_t *,
- int32_t, int32_t, fd_t *);
+ int32_t, int32_t, fd_t *, dict_t *);
int32_t
mq_reduce_parent_size (xlator_t *, loc_t *, int64_t);
diff --git a/xlators/features/marker/src/marker.c b/xlators/features/marker/src/marker.c
index c40ef7245..a27a266f0 100644
--- a/xlators/features/marker/src/marker.c
+++ b/xlators/features/marker/src/marker.c
@@ -1,21 +1,12 @@
-/*Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -30,6 +21,7 @@
#include "marker-quota-helper.h"
#include "marker-common.h"
#include "byte-order.h"
+#include "syncop.h"
#define _GF_UID_GID_CHANGED 1
@@ -65,22 +57,25 @@ marker_loc_fill (loc_t *loc, inode_t *inode, inode_t *parent, char *path)
if (inode) {
loc->inode = inode_ref (inode);
+ if (uuid_is_null (loc->gfid)) {
+ uuid_copy (loc->gfid, loc->inode->gfid);
+ }
}
if (parent)
loc->parent = inode_ref (parent);
- loc->path = gf_strdup (path);
- if (!loc->path) {
- gf_log ("loc fill", GF_LOG_ERROR, "strdup failed");
- goto loc_wipe;
- }
+ if (path) {
+ loc->path = gf_strdup (path);
+ if (!loc->path) {
+ gf_log ("loc fill", GF_LOG_ERROR, "strdup failed");
+ goto loc_wipe;
+ }
- loc->name = strrchr (loc->path, '/');
- if (loc->name)
- loc->name++;
- else
- goto loc_wipe;
+ loc->name = strrchr (loc->path, '/');
+ if (loc->name)
+ loc->name++;
+ }
ret = 0;
loc_wipe:
@@ -94,23 +89,14 @@ int
marker_inode_loc_fill (inode_t *inode, loc_t *loc)
{
char *resolvedpath = NULL;
- inode_t *parent = NULL;
int ret = -1;
+ inode_t *parent = NULL;
if ((!inode) || (!loc))
return ret;
- if ((inode) && __is_root_gfid (inode->gfid)) {
- loc->parent = NULL;
- goto ignore_parent;
- }
+ parent = inode_parent (inode, NULL, NULL);
- parent = inode_parent (inode, 0, NULL);
- if (!parent) {
- goto err;
- }
-
-ignore_parent:
ret = inode_path (inode, NULL, &resolvedpath);
if (ret < 0)
goto err;
@@ -120,11 +106,10 @@ ignore_parent:
goto err;
err:
- if (parent)
- inode_unref (parent);
+ if (parent)
+ inode_unref (parent);
- if (resolvedpath)
- GF_FREE (resolvedpath);
+ GF_FREE (resolvedpath);
return ret;
}
@@ -134,8 +119,17 @@ marker_trav_parent (marker_local_t *local)
{
int32_t ret = 0;
loc_t loc = {0, };
+ inode_t *parent = NULL;
+ int8_t need_unref = 0;
- ret = marker_inode_loc_fill (local->loc.parent, &loc);
+ if (!local->loc.parent) {
+ parent = inode_parent (local->loc.inode, NULL, NULL);
+ if (parent)
+ need_unref = 1;
+ } else
+ parent = local->loc.parent;
+
+ ret = marker_inode_loc_fill (parent, &loc);
if (ret < 0) {
ret = -1;
@@ -146,16 +140,28 @@ marker_trav_parent (marker_local_t *local)
local->loc = loc;
out:
+ if (need_unref)
+ inode_unref (parent);
+
return ret;
}
int32_t
-marker_error_handler (xlator_t *this)
+marker_error_handler (xlator_t *this, marker_local_t *local, int32_t op_errno)
{
- marker_conf_t *priv = NULL;
+ marker_conf_t *priv = NULL;
+ const char *path = NULL;
- priv = (marker_conf_t *) this->private;
+ priv = (marker_conf_t *) this->private;
+ path = local
+ ? (local->loc.path
+ ? local->loc.path : uuid_utoa(local->loc.gfid))
+ : "<nul>";
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Indexing gone corrupt at %s (reason: %s)."
+ " Geo-replication slave content needs to be revalidated",
+ path, strerror (op_errno));
unlink (priv->timestamp_file);
return 0;
@@ -180,12 +186,14 @@ marker_local_unref (marker_local_t *local)
loc_wipe (&local->loc);
loc_wipe (&local->parent_loc);
+ if (local->xdata)
+ dict_unref (local->xdata);
if (local->oplocal) {
marker_local_unref (local->oplocal);
local->oplocal = NULL;
}
- GF_FREE (local);
+ mem_put (local);
out:
return 0;
}
@@ -220,13 +228,14 @@ stat_stampfile (xlator_t *this, marker_conf_t *priv,
int32_t
marker_getxattr_stampfile_cbk (call_frame_t *frame, xlator_t *this,
- const char *name, struct volume_mark *vol_mark)
+ const char *name, struct volume_mark *vol_mark,
+ dict_t *xdata)
{
int32_t ret = -1;
dict_t *dict = NULL;
if (vol_mark == NULL){
- STACK_UNWIND_STRICT (getxattr, frame, -1, ENOMEM, NULL);
+ STACK_UNWIND_STRICT (getxattr, frame, -1, ENOMEM, NULL, NULL);
goto out;
}
@@ -239,7 +248,7 @@ marker_getxattr_stampfile_cbk (call_frame_t *frame, xlator_t *this,
gf_log (this->name, GF_LOG_WARNING, "failed to set key %s",
name);
- STACK_UNWIND_STRICT (getxattr, frame, 0, 0, dict);
+ STACK_UNWIND_STRICT (getxattr, frame, 0, 0, dict, xdata);
dict_unref (dict);
out:
@@ -263,42 +272,105 @@ call_from_special_client (call_frame_t *frame, xlator_t *this, const char *name)
stat_stampfile (this, priv, &vol_mark);
- marker_getxattr_stampfile_cbk (frame, this, name, vol_mark);
+ marker_getxattr_stampfile_cbk (frame, this, name, vol_mark, NULL);
out:
return ret;
}
int32_t
marker_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
+ int ret = 0;
+ char *src = NULL;
+ char *dst = NULL;
+ int len = 0;
+ marker_local_t *local = NULL;
+
+ local = frame->local;
+
+
if (cookie) {
gf_log (this->name, GF_LOG_DEBUG,
"Filtering the quota extended attributes");
- dict_foreach (dict, marker_filter_quota_xattr, NULL);
+ /* If the getxattr is from a non special client, then do not
+ copy the quota related xattrs (except the quota limit key
+ i.e trusted.glusterfs.quota.limit-set which has been set by
+ glusterd on the directory on which quota limit is set.) for
+ directories. Let the healing of xattrs happen upon lookup.
+ NOTE: setting of trusted.glusterfs.quota.limit-set as of now
+ happens from glusterd. It should be moved to quotad. Also
+ trusted.glusterfs.quota.limit-set is set on directory which
+ is permanent till quota is removed on that directory or limit
+ is changed. So let that xattr be healed by other xlators
+ properly whenever directory healing is done.
+ */
+ ret = dict_get_ptr_and_len (dict, QUOTA_LIMIT_KEY,
+ (void **)&src, &len);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "dict_get on %s "
+ "failed", QUOTA_LIMIT_KEY);
+ } else {
+ dst = GF_CALLOC (len, sizeof (char), gf_common_mt_char);
+ if (dst)
+ memcpy (dst, src, len);
+ }
+
+ /*
+ * Except limit-set xattr, rest of the xattrs are maintained
+ * by quota xlator. Don't expose them to other xlators.
+ * This filter makes sure quota xattrs are not healed as part of
+ * metadata self-heal
+ */
+ GF_REMOVE_INTERNAL_XATTR ("trusted.glusterfs.quota*", dict);
+ if (!ret && IA_ISDIR (local->loc.inode->ia_type) && dst) {
+ ret = dict_set_dynptr (dict, QUOTA_LIMIT_KEY,
+ dst, len);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING, "setting "
+ "key %s failed", QUOTA_LIMIT_KEY);
+ else
+ dst = NULL;
+ }
}
- STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict);
+
+ GF_FREE (dst);
+
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata);
+ marker_local_unref (local);
return 0;
}
int32_t
marker_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
+ const char *name, dict_t *xdata)
{
gf_boolean_t ret = _gf_false;
marker_conf_t *priv = NULL;
unsigned long cookie = 0;
+ marker_local_t *local = NULL;
priv = this->private;
- if (priv == NULL || (priv->feature_enabled & GF_XTIME) == 0)
- goto wind;
+ frame->local = mem_get0 (this->local_pool);
+ local = frame->local;
+ if (local == NULL)
+ goto out;
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ ret = loc_copy (&local->loc, loc);
+ if (ret < 0)
+ goto out;
gf_log (this->name, GF_LOG_DEBUG, "USER:PID = %d", frame->root->pid);
- ret = call_from_special_client (frame, this, name);
-wind:
+ if (priv && priv->feature_enabled & GF_XTIME)
+ ret = call_from_special_client (frame, this, name);
+
if (ret == _gf_false) {
if (name == NULL) {
/* Signifies that marker translator
@@ -311,10 +383,15 @@ wind:
STACK_WIND_COOKIE (frame, marker_getxattr_cbk, (void *)cookie,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->getxattr, loc,
- name);
+ name, xdata);
}
return 0;
+out:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (getxattr, frame, -1, ENOMEM, NULL, NULL);
+ marker_local_unref (local);
+ return 0;
}
@@ -336,7 +413,7 @@ marker_setxattr_done (call_frame_t *frame)
int
marker_specific_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
int32_t ret = 0;
int32_t done = 0;
@@ -345,14 +422,20 @@ marker_specific_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local = (marker_local_t*) frame->local;
if (op_ret == -1 && op_errno == ENOSPC) {
- marker_error_handler (this);
+ marker_error_handler (this, local, op_errno);
done = 1;
goto out;
}
- if (strcmp (local->loc.path, "/") == 0) {
- done = 1;
- goto out;
+ if (local) {
+ if (local->loc.path && strcmp (local->loc.path, "/") == 0) {
+ done = 1;
+ goto out;
+ }
+ if (__is_root_gfid (local->loc.gfid)) {
+ done = 1;
+ goto out;
+ }
}
ret = marker_trav_parent (local);
@@ -379,7 +462,7 @@ out:
int32_t
marker_start_setxattr (call_frame_t *frame, xlator_t *this)
{
- int32_t ret = 0;
+ int32_t ret = -1;
dict_t *dict = NULL;
marker_local_t *local = NULL;
marker_conf_t *priv = NULL;
@@ -388,20 +471,37 @@ marker_start_setxattr (call_frame_t *frame, xlator_t *this)
local = (marker_local_t*) frame->local;
+ if (!local)
+ goto out;
+
dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ if (local->loc.inode && uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, local->loc.inode->gfid);
+
+ GF_UUID_ASSERT (local->loc.gfid);
+
ret = dict_set_static_bin (dict, priv->marker_xattr,
(void *)local->timebuf, 8);
- if (ret)
+ if (ret) {
gf_log (this->name, GF_LOG_WARNING,
"failed to set marker xattr (%s)", local->loc.path);
+ goto out;
+ }
STACK_WIND (frame, marker_specific_setxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr, &local->loc, dict, 0);
+ FIRST_CHILD(this)->fops->setxattr, &local->loc, dict, 0,
+ NULL);
- dict_unref (dict);
+ ret = 0;
+out:
+ if (dict)
+ dict_unref (dict);
- return 0;
+ return ret;
}
void
@@ -434,12 +534,24 @@ marker_create_frame (xlator_t *this, marker_local_t *local)
int32_t
marker_xtime_update_marks (xlator_t *this, marker_local_t *local)
{
+ marker_conf_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO ("marker", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, local, out);
+
+ priv = this->private;
+
+ if ((local->pid == GF_CLIENT_PID_GSYNCD
+ && !(priv->feature_enabled & GF_XTIME_GSYNC_FORCE))
+ || (local->pid == GF_CLIENT_PID_DEFRAG))
+ goto out;
+
marker_gettimeofday (local);
marker_local_ref (local);
marker_create_frame (this, local);
-
+out:
return 0;
}
@@ -448,7 +560,7 @@ int32_t
marker_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
marker_conf_t *priv = NULL;
marker_local_t *local = NULL;
@@ -463,11 +575,14 @@ marker_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
frame->local = NULL;
STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, inode,
- buf, preparent, postparent);
+ buf, preparent, postparent, xdata);
if (op_ret == -1 || local == NULL)
goto out;
+ if (uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, buf->ia_gfid);
+
priv = this->private;
if (priv->feature_enabled & GF_QUOTA)
@@ -484,7 +599,7 @@ out:
int
marker_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dict_t *params)
+ mode_t umask, dict_t *xdata)
{
int32_t ret = 0;
marker_local_t *local = NULL;
@@ -495,7 +610,7 @@ marker_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
if (priv->feature_enabled == 0)
goto wind;
- ALLOCATE_OR_GOTO (local, marker_local_t, err);
+ local = mem_get0 (this->local_pool);
MARKER_INIT_LOCAL (frame, local);
@@ -505,12 +620,12 @@ marker_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
goto err;
wind:
STACK_WIND (frame, marker_mkdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mkdir, loc, mode, params);
+ FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata);
return 0;
err:
STACK_UNWIND_STRICT (mkdir, frame, -1, ENOMEM, NULL,
- NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL);
return 0;
}
@@ -519,7 +634,7 @@ int32_t
marker_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
marker_local_t *local = NULL;
marker_conf_t *priv = NULL;
@@ -534,11 +649,14 @@ marker_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
frame->local = NULL;
STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent);
+ preparent, postparent, xdata);
if (op_ret == -1 || local == NULL)
goto out;
+ if (uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, buf->ia_gfid);
+
priv = this->private;
if (priv->feature_enabled & GF_QUOTA)
@@ -555,7 +673,7 @@ out:
int32_t
marker_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, fd_t *fd, dict_t *params)
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
int32_t ret = 0;
marker_local_t *local = NULL;
@@ -566,7 +684,7 @@ marker_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
if (priv->feature_enabled == 0)
goto wind;
- ALLOCATE_OR_GOTO (local, marker_local_t, err);
+ local = mem_get0 (this->local_pool);
MARKER_INIT_LOCAL (frame, local);
@@ -576,12 +694,12 @@ marker_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
goto err;
wind:
STACK_WIND (frame, marker_create_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create, loc, flags, mode, fd,
- params);
+ FIRST_CHILD(this)->fops->create, loc, flags, mode, umask,
+ fd, xdata);
return 0;
err:
STACK_UNWIND_STRICT (create, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
- NULL);
+ NULL, NULL);
return 0;
}
@@ -590,7 +708,7 @@ err:
int32_t
marker_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
marker_conf_t *priv = NULL;
marker_local_t *local = NULL;
@@ -604,7 +722,8 @@ marker_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
frame->local = NULL;
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf);
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
if (op_ret == -1 || local == NULL)
goto out;
@@ -629,8 +748,8 @@ marker_writev (call_frame_t *frame,
fd_t *fd,
struct iovec *vector,
int32_t count,
- off_t offset,
- struct iobref *iobref)
+ off_t offset, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
{
int32_t ret = 0;
marker_local_t *local = NULL;
@@ -641,7 +760,7 @@ marker_writev (call_frame_t *frame,
if (priv->feature_enabled == 0)
goto wind;
- ALLOCATE_OR_GOTO (local, marker_local_t, err);
+ local = mem_get0 (this->local_pool);
MARKER_INIT_LOCAL (frame, local);
@@ -652,10 +771,10 @@ marker_writev (call_frame_t *frame,
wind:
STACK_WIND (frame, marker_writev_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->writev, fd, vector, count, offset,
- iobref);
+ flags, iobref, xdata);
return 0;
err:
- STACK_UNWIND_STRICT (writev, frame, -1, ENOMEM, NULL, NULL);
+ STACK_UNWIND_STRICT (writev, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
@@ -664,7 +783,7 @@ err:
int32_t
marker_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
marker_conf_t *priv = NULL;
marker_local_t *local = NULL;
@@ -679,7 +798,7 @@ marker_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
frame->local = NULL;
STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno, preparent,
- postparent);
+ postparent, xdata);
if (op_ret == -1 || local == NULL)
goto out;
@@ -698,7 +817,8 @@ out:
}
int32_t
-marker_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)
+marker_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
{
int32_t ret = 0;
marker_local_t *local = NULL;
@@ -709,7 +829,7 @@ marker_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)
if (priv->feature_enabled == 0)
goto wind;
- ALLOCATE_OR_GOTO (local, marker_local_t, err);
+ local = mem_get0 (this->local_pool);
MARKER_INIT_LOCAL (frame, local);
@@ -719,10 +839,10 @@ marker_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)
goto err;
wind:
STACK_WIND (frame, marker_rmdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rmdir, loc, flags);
+ FIRST_CHILD(this)->fops->rmdir, loc, flags, xdata);
return 0;
err:
- STACK_UNWIND_STRICT (rmdir, frame, -1, ENOMEM, NULL, NULL);
+ STACK_UNWIND_STRICT (rmdir, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
@@ -731,7 +851,7 @@ err:
int32_t
marker_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
marker_conf_t *priv = NULL;
marker_local_t *local = NULL;
@@ -746,15 +866,17 @@ marker_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
frame->local = NULL;
STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno, preparent,
- postparent);
+ postparent, xdata);
if (op_ret == -1 || local == NULL)
goto out;
priv = this->private;
- if ((priv->feature_enabled & GF_QUOTA) && (local->ia_nlink == 1))
- mq_reduce_parent_size (this, &local->loc, -1);
+ if (priv->feature_enabled & GF_QUOTA) {
+ if (!local->skip_txn)
+ mq_reduce_parent_size (this, &local->loc, -1);
+ }
if (priv->feature_enabled & GF_XTIME)
marker_xtime_update_marks (this, local);
@@ -766,36 +888,8 @@ out:
int32_t
-marker_unlink_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
-{
- marker_local_t *local = NULL;
-
- local = frame->local;
- if (op_ret < 0) {
- goto err;
- }
-
- if (local == NULL) {
- op_errno = EINVAL;
- goto err;
- }
-
- local->ia_nlink = buf->ia_nlink;
-
- STACK_WIND (frame, marker_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink, &local->loc);
- return 0;
-err:
- frame->local = NULL;
- STACK_UNWIND_STRICT (unlink, frame, -1, op_errno, NULL, NULL);
- marker_local_unref (local);
- return 0;
-}
-
-
-int32_t
-marker_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
+marker_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
{
int32_t ret = 0;
marker_local_t *local = NULL;
@@ -806,8 +900,10 @@ marker_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
if (priv->feature_enabled == 0)
goto unlink_wind;
- ALLOCATE_OR_GOTO (local, marker_local_t, err);
-
+ local = mem_get0 (this->local_pool);
+ local->xflag = xflag;
+ if (xdata)
+ local->xdata = dict_ref (xdata);
MARKER_INIT_LOCAL (frame, local);
ret = loc_copy (&local->loc, loc);
@@ -815,17 +911,18 @@ marker_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
if (ret == -1)
goto err;
- STACK_WIND (frame, marker_unlink_stat_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat, loc);
- return 0;
+ if (xdata && dict_get (xdata, GLUSTERFS_MARKER_DONT_ACCOUNT_KEY)) {
+ local->skip_txn = 1;
+ goto unlink_wind;
+ }
unlink_wind:
STACK_WIND (frame, marker_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink, loc);
+ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
return 0;
err:
frame->local = NULL;
- STACK_UNWIND_STRICT (unlink, frame, -1, ENOMEM, NULL, NULL);
+ STACK_UNWIND_STRICT (unlink, frame, -1, ENOMEM, NULL, NULL, NULL);
marker_local_unref (local);
return 0;
}
@@ -835,7 +932,7 @@ int32_t
marker_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
marker_local_t *local = NULL;
marker_conf_t *priv = NULL;
@@ -850,15 +947,18 @@ marker_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
frame->local = NULL;
STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
+ preparent, postparent, xdata);
if (op_ret == -1 || local == NULL)
goto out;
priv = this->private;
- if (priv->feature_enabled & GF_QUOTA)
- mq_initiate_quota_txn (this, &local->loc);
+ if (priv->feature_enabled & GF_QUOTA) {
+ if (!local->skip_txn)
+ mq_set_inode_xattr (this, &local->loc);
+ }
+
if (priv->feature_enabled & GF_XTIME)
marker_xtime_update_marks (this, local);
@@ -869,7 +969,8 @@ out:
}
int32_t
-marker_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
+marker_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
int32_t ret = 0;
marker_local_t *local = NULL;
@@ -880,7 +981,7 @@ marker_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
if (priv->feature_enabled == 0)
goto wind;
- ALLOCATE_OR_GOTO (local, marker_local_t, err);
+ local = mem_get0 (this->local_pool);
MARKER_INIT_LOCAL (frame, local);
@@ -888,12 +989,16 @@ marker_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
if (ret == -1)
goto err;
+
+ if (xdata && dict_get (xdata, GLUSTERFS_MARKER_DONT_ACCOUNT_KEY))
+ local->skip_txn = 1;
wind:
STACK_WIND (frame, marker_link_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->link, oldloc, newloc);
+ FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
return 0;
err:
- STACK_UNWIND_STRICT (link, frame, -1, ENOMEM, NULL, NULL, NULL, NULL);
+ STACK_UNWIND_STRICT (link, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
+ NULL);
return 0;
}
@@ -901,7 +1006,7 @@ err:
int32_t
marker_rename_done (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
marker_local_t *local = NULL, *oplocal = NULL;
loc_t newloc = {0, };
@@ -916,7 +1021,7 @@ marker_rename_done (call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret < 0) {
if (local->err == 0) {
- local->err = op_errno;
+ local->err = op_errno ? op_errno : EINVAL;
}
gf_log (this->name, GF_LOG_WARNING,
@@ -931,7 +1036,12 @@ marker_rename_done (call_frame_t *frame, void *cookie, xlator_t *this,
local->stub = NULL;
} else if (local->err != 0) {
STACK_UNWIND_STRICT (rename, frame, -1, local->err, NULL, NULL,
- NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL);
+ } else {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "continuation stub to unwind the call is absent, hence "
+ "call will be hung (call-stack id = %"PRIu64")",
+ frame->root->unique);
}
mq_reduce_parent_size (this, &oplocal->loc, oplocal->contribution);
@@ -947,12 +1057,13 @@ marker_rename_done (call_frame_t *frame, void *cookie, xlator_t *this,
newloc.name++;
newloc.parent = inode_ref (local->loc.parent);
- mq_rename_update_newpath (this, &newloc);
+ mq_set_inode_xattr (this, &newloc);
loc_wipe (&newloc);
if (priv->feature_enabled & GF_XTIME) {
//update marks on oldpath
+ uuid_copy (local->loc.gfid, oplocal->loc.inode->gfid);
marker_xtime_update_marks (this, oplocal);
marker_xtime_update_marks (this, local);
}
@@ -966,7 +1077,7 @@ marker_rename_done (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t
marker_rename_release_newp_lock (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
- int32_t op_errno)
+ int32_t op_errno, dict_t *xdata)
{
marker_local_t *local = NULL, *oplocal = NULL;
struct gf_flock lock = {0, };
@@ -976,7 +1087,7 @@ marker_rename_release_newp_lock (call_frame_t *frame, void *cookie,
if (op_ret < 0) {
if (local->err == 0) {
- local->err = op_errno;
+ local->err = op_errno ? op_errno : EINVAL;
}
gf_log (this->name, GF_LOG_WARNING,
@@ -987,7 +1098,7 @@ marker_rename_release_newp_lock (call_frame_t *frame, void *cookie,
}
if (local->next_lock_on == NULL) {
- marker_rename_done (frame, NULL, this, 0, 0);
+ marker_rename_done (frame, NULL, this, 0, 0, NULL);
goto out;
}
@@ -1001,7 +1112,7 @@ marker_rename_release_newp_lock (call_frame_t *frame, void *cookie,
marker_rename_done,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->inodelk,
- this->name, &local->parent_loc, F_SETLKW, &lock);
+ this->name, &local->parent_loc, F_SETLKW, &lock, NULL);
out:
return 0;
@@ -1011,7 +1122,7 @@ out:
int32_t
marker_rename_release_oldp_lock (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
- int32_t op_errno)
+ int32_t op_errno, dict_t *xdata)
{
marker_local_t *local = NULL, *oplocal = NULL;
struct gf_flock lock = {0, };
@@ -1037,7 +1148,7 @@ marker_rename_release_oldp_lock (call_frame_t *frame, void *cookie,
marker_rename_release_newp_lock,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->inodelk,
- this->name, &oplocal->parent_loc, F_SETLKW, &lock);
+ this->name, &oplocal->parent_loc, F_SETLKW, &lock, NULL);
return 0;
}
@@ -1046,7 +1157,8 @@ int32_t
marker_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
marker_conf_t *priv = NULL;
marker_local_t *local = NULL;
@@ -1081,7 +1193,7 @@ marker_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
stub = fop_rename_cbk_stub (frame, default_rename_cbk, op_ret,
op_errno, buf, preoldparent,
postoldparent, prenewparent,
- postnewparent);
+ postnewparent, xdata);
if (stub == NULL) {
local->err = ENOMEM;
goto quota_err;
@@ -1106,11 +1218,12 @@ marker_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (newloc.name)
newloc.name++;
newloc.parent = inode_ref (local->loc.parent);
+ uuid_copy (newloc.gfid, oplocal->loc.inode->gfid);
STACK_WIND_COOKIE (frame, marker_rename_release_oldp_lock,
frame->cookie, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->removexattr,
- &newloc, contri_key);
+ &newloc, contri_key, NULL);
loc_wipe (&newloc);
} else {
@@ -1118,7 +1231,7 @@ marker_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, buf,
preoldparent, postoldparent, prenewparent,
- postnewparent);
+ postnewparent, xdata);
if ((op_ret < 0) || (local == NULL)) {
goto out;
@@ -1126,6 +1239,7 @@ marker_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (priv->feature_enabled & GF_XTIME) {
//update marks on oldpath
+ uuid_copy (local->loc.gfid, oplocal->loc.inode->gfid);
marker_xtime_update_marks (this, oplocal);
marker_xtime_update_marks (this, local);
}
@@ -1140,15 +1254,14 @@ out:
return 0;
quota_err:
- marker_rename_release_oldp_lock (frame, NULL, this, 0, 0);
+ marker_rename_release_oldp_lock (frame, NULL, this, 0, 0, NULL);
return 0;
}
int32_t
marker_do_rename (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
-
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
marker_local_t *local = NULL, *oplocal = NULL;
char contri_key[512] = {0, };
@@ -1163,7 +1276,7 @@ marker_do_rename (call_frame_t *frame, void *cookie, xlator_t *this,
MARKER_RESET_UID_GID (frame, frame->root, local);
if ((op_ret < 0) && (op_errno != ENOATTR)) {
- local->err = op_errno;
+ local->err = op_errno ? op_errno : EINVAL;
gf_log (this->name, GF_LOG_WARNING,
"fetching contribution values from %s (gfid:%s) "
"failed (%s)", local->loc.path,
@@ -1175,7 +1288,7 @@ marker_do_rename (call_frame_t *frame, void *cookie, xlator_t *this,
if (local->loc.inode != NULL) {
GET_CONTRI_KEY (contri_key, local->loc.parent->gfid, ret);
if (ret < 0) {
- local->err = errno;
+ local->err = errno ? errno : ENOMEM;
goto err;
}
@@ -1187,12 +1300,12 @@ marker_do_rename (call_frame_t *frame, void *cookie, xlator_t *this,
STACK_WIND (frame, marker_rename_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rename, &oplocal->loc,
- &local->loc);
+ &local->loc, NULL);
return 0;
err:
- marker_rename_release_oldp_lock (frame, NULL, this, 0, 0);
+ marker_rename_release_oldp_lock (frame, NULL, this, 0, 0, NULL);
return 0;
}
@@ -1200,7 +1313,7 @@ err:
int32_t
marker_get_newpath_contribution (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *dict)
+ int32_t op_errno, dict_t *dict, dict_t *xdata)
{
marker_local_t *local = NULL, *oplocal = NULL;
char contri_key[512] = {0, };
@@ -1215,7 +1328,7 @@ marker_get_newpath_contribution (call_frame_t *frame, void *cookie,
MARKER_RESET_UID_GID (frame, frame->root, local);
if ((op_ret < 0) && (op_errno != ENOATTR)) {
- local->err = op_errno;
+ local->err = op_errno ? op_errno : EINVAL;
gf_log (this->name, GF_LOG_WARNING,
"fetching contribution values from %s (gfid:%s) "
"failed (%s)", oplocal->loc.path,
@@ -1226,7 +1339,7 @@ marker_get_newpath_contribution (call_frame_t *frame, void *cookie,
GET_CONTRI_KEY (contri_key, oplocal->loc.parent->gfid, ret);
if (ret < 0) {
- local->err = errno;
+ local->err = errno ? errno : ENOMEM;
goto err;
}
@@ -1236,7 +1349,7 @@ marker_get_newpath_contribution (call_frame_t *frame, void *cookie,
if (local->loc.inode != NULL) {
GET_CONTRI_KEY (contri_key, local->loc.parent->gfid, ret);
if (ret < 0) {
- local->err = errno;
+ local->err = errno ? errno : ENOMEM;
goto err;
}
@@ -1244,18 +1357,22 @@ marker_get_newpath_contribution (call_frame_t *frame, void *cookie,
* reset them in the callback.
*/
MARKER_SET_UID_GID (frame, local, frame->root);
+ if (uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, local->loc.inode->gfid);
+
+ GF_UUID_ASSERT (local->loc.gfid);
STACK_WIND_COOKIE (frame, marker_do_rename,
frame->cookie, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->getxattr,
- &local->loc, contri_key);
+ &local->loc, contri_key, NULL);
} else {
- marker_do_rename (frame, NULL, this, 0, 0, NULL);
+ marker_do_rename (frame, NULL, this, 0, 0, NULL, NULL);
}
return 0;
err:
- marker_rename_release_oldp_lock (frame, NULL, this, 0, 0);
+ marker_rename_release_oldp_lock (frame, NULL, this, 0, 0, NULL);
return 0;
}
@@ -1263,7 +1380,7 @@ err:
int32_t
marker_get_oldpath_contribution (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
- int32_t op_errno)
+ int32_t op_errno, dict_t *xdata)
{
marker_local_t *local = NULL, *oplocal = NULL;
char contri_key[512] = {0, };
@@ -1273,7 +1390,7 @@ marker_get_oldpath_contribution (call_frame_t *frame, void *cookie,
oplocal = local->oplocal;
if (op_ret < 0) {
- local->err = op_errno;
+ local->err = op_errno ? op_errno : EINVAL;
gf_log (this->name, GF_LOG_WARNING,
"cannot hold inodelk on %s (gfid:%s) (%s)",
local->next_lock_on->path,
@@ -1284,7 +1401,7 @@ marker_get_oldpath_contribution (call_frame_t *frame, void *cookie,
GET_CONTRI_KEY (contri_key, oplocal->loc.parent->gfid, ret);
if (ret < 0) {
- local->err = errno;
+ local->err = errno ? errno : ENOMEM;
goto quota_err;
}
@@ -1293,23 +1410,29 @@ marker_get_oldpath_contribution (call_frame_t *frame, void *cookie,
*/
MARKER_SET_UID_GID (frame, local, frame->root);
+ if (uuid_is_null (oplocal->loc.gfid))
+ uuid_copy (oplocal->loc.gfid,
+ oplocal->loc.inode->gfid);
+
+ GF_UUID_ASSERT (oplocal->loc.gfid);
+
STACK_WIND_COOKIE (frame, marker_get_newpath_contribution,
frame->cookie, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->getxattr,
- &oplocal->loc, contri_key);
+ &oplocal->loc, contri_key, NULL);
return 0;
quota_err:
- marker_rename_release_oldp_lock (frame, NULL, this, 0, 0);
+ marker_rename_release_oldp_lock (frame, NULL, this, 0, 0, NULL);
return 0;
lock_err:
if ((local->next_lock_on == NULL)
|| (local->next_lock_on == &local->parent_loc)) {
local->next_lock_on = NULL;
- marker_rename_release_oldp_lock (frame, NULL, this, 0, 0);
+ marker_rename_release_oldp_lock (frame, NULL, this, 0, 0, NULL);
} else {
- marker_rename_release_newp_lock (frame, NULL, this, 0, 0);
+ marker_rename_release_newp_lock (frame, NULL, this, 0, 0, NULL);
}
return 0;
@@ -1318,7 +1441,7 @@ lock_err:
int32_t
marker_rename_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
marker_local_t *local = NULL, *oplocal = NULL;
loc_t *loc = NULL;
@@ -1334,7 +1457,7 @@ marker_rename_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
loc = &local->parent_loc;
}
- local->err = op_errno;
+ local->err = op_errno ? op_errno : EINVAL;
gf_log (this->name, GF_LOG_WARNING,
"cannot hold inodelk on %s (gfid:%s) (%s)",
loc->path, uuid_utoa (loc->inode->gfid),
@@ -1353,22 +1476,22 @@ marker_rename_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->inodelk,
this->name, local->next_lock_on,
- F_SETLKW, &lock);
+ F_SETLKW, &lock, NULL);
} else {
- marker_get_oldpath_contribution (frame, 0, this, 0, 0);
+ marker_get_oldpath_contribution (frame, 0, this, 0, 0, NULL);
}
return 0;
err:
- marker_rename_done (frame, NULL, this, 0, 0);
+ marker_rename_done (frame, NULL, this, 0, 0, NULL);
return 0;
}
int32_t
marker_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc)
+ loc_t *newloc, dict_t *xdata)
{
int32_t ret = 0;
marker_local_t *local = NULL;
@@ -1382,11 +1505,11 @@ marker_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
if (priv->feature_enabled == 0)
goto rename_wind;
- ALLOCATE_OR_GOTO (local, marker_local_t, err);
+ local = mem_get0 (this->local_pool);
MARKER_INIT_LOCAL (frame, local);
- ALLOCATE_OR_GOTO (oplocal, marker_local_t, err);
+ oplocal = mem_get0 (this->local_pool);
MARKER_INIT_LOCAL (frame, oplocal);
@@ -1437,18 +1560,18 @@ marker_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->inodelk,
this->name, lock_on,
- F_SETLKW, &lock);
+ F_SETLKW, &lock, NULL);
return 0;
rename_wind:
STACK_WIND (frame, marker_rename_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rename, oldloc, newloc);
+ FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
return 0;
err:
STACK_UNWIND_STRICT (rename, frame, -1, ENOMEM, NULL,
- NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -1457,7 +1580,7 @@ err:
int32_t
marker_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
marker_local_t *local = NULL;
marker_conf_t *priv = NULL;
@@ -1472,7 +1595,7 @@ marker_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
frame->local = NULL;
STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf,
- postbuf);
+ postbuf, xdata);
if (op_ret == -1 || local == NULL)
goto out;
@@ -1492,7 +1615,8 @@ out:
}
int32_t
-marker_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
+marker_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
{
int32_t ret = 0;
marker_local_t *local = NULL;
@@ -1503,7 +1627,7 @@ marker_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
if (priv->feature_enabled == 0)
goto wind;
- ALLOCATE_OR_GOTO (local, marker_local_t, err);
+ local = mem_get0 (this->local_pool);
MARKER_INIT_LOCAL (frame, local);
@@ -1513,10 +1637,10 @@ marker_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
goto err;
wind:
STACK_WIND (frame, marker_truncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate, loc, offset);
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
return 0;
err:
- STACK_UNWIND_STRICT (truncate, frame, -1, ENOMEM, NULL, NULL);
+ STACK_UNWIND_STRICT (truncate, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
@@ -1525,7 +1649,7 @@ err:
int32_t
marker_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
marker_local_t *local = NULL;
marker_conf_t *priv = NULL;
@@ -1540,7 +1664,7 @@ marker_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
frame->local = NULL;
STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, prebuf,
- postbuf);
+ postbuf, xdata);
if (op_ret == -1 || local == NULL)
goto out;
@@ -1559,7 +1683,8 @@ out:
}
int32_t
-marker_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
+marker_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
{
int32_t ret = 0;
marker_local_t *local = NULL;
@@ -1570,7 +1695,7 @@ marker_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
if (priv->feature_enabled == 0)
goto wind;
- ALLOCATE_OR_GOTO (local, marker_local_t, err);
+ local = mem_get0 (this->local_pool);
MARKER_INIT_LOCAL (frame, local);
@@ -1580,10 +1705,10 @@ marker_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
goto err;
wind:
STACK_WIND (frame, marker_ftruncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate, fd, offset);
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
return 0;
err:
- STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOMEM, NULL, NULL);
+ STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
@@ -1593,7 +1718,7 @@ int32_t
marker_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
marker_conf_t *priv = NULL;
marker_local_t *local = NULL;
@@ -1608,11 +1733,14 @@ marker_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
frame->local = NULL;
STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
+ preparent, postparent, xdata);
if (op_ret == -1 || local == NULL)
goto out;
+ if (uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, buf->ia_gfid);
+
priv = this->private;
if (priv->feature_enabled & GF_QUOTA)
@@ -1628,7 +1756,7 @@ out:
int
marker_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
- loc_t *loc, dict_t *params)
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
int32_t ret = 0;
marker_local_t *local = NULL;
@@ -1639,7 +1767,7 @@ marker_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
if (priv->feature_enabled == 0)
goto wind;
- ALLOCATE_OR_GOTO (local, marker_local_t, err);
+ local = mem_get0 (this->local_pool);
MARKER_INIT_LOCAL (frame, local);
@@ -1649,11 +1777,12 @@ marker_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
goto err;
wind:
STACK_WIND (frame, marker_symlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->symlink, linkpath, loc, params);
+ FIRST_CHILD(this)->fops->symlink, linkpath, loc, umask,
+ xdata);
return 0;
err:
STACK_UNWIND_STRICT (symlink, frame, -1, ENOMEM, NULL,
- NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL);
return 0;
}
@@ -1662,7 +1791,7 @@ int32_t
marker_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
marker_local_t *local = NULL;
marker_conf_t *priv = NULL;
@@ -1677,11 +1806,14 @@ marker_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
frame->local = NULL;
STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, inode,
- buf, preparent, postparent);
+ buf, preparent, postparent, xdata);
if (op_ret == -1 || local == NULL)
goto out;
+ if (uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, buf->ia_gfid);
+
priv = this->private;
if ((priv->feature_enabled & GF_QUOTA) && (S_ISREG (local->mode))) {
@@ -1698,7 +1830,7 @@ out:
int
marker_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t rdev, dict_t *parms)
+ dev_t rdev, mode_t umask, dict_t *xdata)
{
int32_t ret = 0;
marker_local_t *local = NULL;
@@ -1709,7 +1841,7 @@ marker_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
if (priv->feature_enabled == 0)
goto wind;
- ALLOCATE_OR_GOTO (local, marker_local_t, err);
+ local = mem_get0 (this->local_pool);
MARKER_INIT_LOCAL (frame, local);
@@ -1721,11 +1853,216 @@ marker_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
goto err;
wind:
STACK_WIND (frame, marker_mknod_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, parms);
+ FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask,
+ xdata);
return 0;
err:
STACK_UNWIND_STRICT (mknod, frame, -1, ENOMEM, NULL,
- NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+
+int32_t
+marker_fallocate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_TRACE, "%s occurred while "
+ "fallocating a file ", strerror (op_errno));
+ }
+
+ local = (marker_local_t *) frame->local;
+
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT (fallocate, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+
+ if (op_ret == -1 || local == NULL)
+ goto out;
+
+ priv = this->private;
+
+ if (priv->feature_enabled & GF_QUOTA)
+ mq_initiate_quota_txn (this, &local->loc);
+
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks (this, local);
+out:
+ marker_local_unref (local);
+
+ return 0;
+}
+
+int32_t
+marker_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->feature_enabled == 0)
+ goto wind;
+
+ local = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ ret = marker_inode_loc_fill (fd->inode, &local->loc);
+
+ if (ret == -1)
+ goto err;
+wind:
+ STACK_WIND (frame, marker_fallocate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len,
+ xdata);
+ return 0;
+err:
+ STACK_UNWIND_STRICT (fallocate, frame, -1, ENOMEM, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int32_t
+marker_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_TRACE, "%s occurred during discard",
+ strerror (op_errno));
+ }
+
+ local = (marker_local_t *) frame->local;
+
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT (discard, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+
+ if (op_ret == -1 || local == NULL)
+ goto out;
+
+ priv = this->private;
+
+ if (priv->feature_enabled & GF_QUOTA)
+ mq_initiate_quota_txn (this, &local->loc);
+
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks (this, local);
+out:
+ marker_local_unref (local);
+
+ return 0;
+}
+
+int32_t
+marker_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->feature_enabled == 0)
+ goto wind;
+
+ local = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ ret = marker_inode_loc_fill (fd->inode, &local->loc);
+
+ if (ret == -1)
+ goto err;
+wind:
+ STACK_WIND (frame, marker_discard_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata);
+ return 0;
+err:
+ STACK_UNWIND_STRICT (discard, frame, -1, ENOMEM, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+marker_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_TRACE, "%s occurred during zerofill",
+ strerror (op_errno));
+ }
+
+ local = (marker_local_t *) frame->local;
+
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT (zerofill, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+
+ if (op_ret == -1 || local == NULL)
+ goto out;
+
+ priv = this->private;
+
+ if (priv->feature_enabled & GF_QUOTA)
+ mq_initiate_quota_txn (this, &local->loc);
+
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks (this, local);
+out:
+ marker_local_unref (local);
+
+ return 0;
+}
+
+int32_t
+marker_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->feature_enabled == 0)
+ goto wind;
+
+ local = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ ret = marker_inode_loc_fill (fd->inode, &local->loc);
+
+ if (ret == -1)
+ goto err;
+wind:
+ STACK_WIND (frame, marker_zerofill_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata);
+ return 0;
+err:
+ STACK_UNWIND_STRICT (zerofill, frame, -1, ENOMEM, NULL, NULL, NULL);
+
return 0;
}
@@ -1785,7 +2122,7 @@ call_from_sp_client_to_reset_tmfile (call_frame_t *frame,
op_errno = EINVAL;
}
out:
- STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, NULL);
return 0;
}
@@ -1793,21 +2130,21 @@ out:
int32_t
marker_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
marker_local_t *local = NULL;
marker_conf_t *priv = NULL;
if (op_ret == -1) {
- gf_log (this->name, GF_LOG_TRACE, "%s occurred while "
- "creating symlinks ", strerror (op_errno));
+ gf_log (this->name, GF_LOG_TRACE, "%s occurred in "
+ "setxattr ", strerror (op_errno));
}
local = (marker_local_t *) frame->local;
frame->local = NULL;
- STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata);
if (op_ret == -1 || local == NULL)
goto out;
@@ -1822,16 +2159,150 @@ out:
return 0;
}
+int
+remove_quota_keys (dict_t *dict, char *k, data_t *v, void *data)
+{
+ call_frame_t *frame = data;
+ marker_local_t *local = frame->local;
+ xlator_t *this = frame->this;
+ int ret = -1;
+
+ ret = syncop_removexattr (FIRST_CHILD (this), &local->loc, k, 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s: Failed to remove "
+ "extended attribute: %s", local->loc.path, k);
+ return -1;
+ }
+ return 0;
+}
+
+int
+quota_xattr_cleaner_cbk (int ret, call_frame_t *frame, void *args)
+{
+ dict_t *xdata = args;
+ int op_ret = -1;
+ int op_errno = 0;
+ marker_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+
+ op_ret = (ret < 0)? -1: 0;
+ op_errno = -ret;
+
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata);
+ marker_local_unref (local);
+ return ret;
+}
+
+int
+quota_xattr_cleaner (void *args)
+{
+ struct synctask *task = NULL;
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ marker_local_t *local = NULL;
+ dict_t *xdata = NULL;
+ int ret = -1;
+
+ task = synctask_get ();
+ if (!task)
+ goto out;
+
+ frame = task->frame;
+ this = frame->this;
+ local = frame->local;
+
+ ret = syncop_listxattr (FIRST_CHILD(this), &local->loc, &xdata);
+ if (ret == -1) {
+ ret = -errno;
+ goto out;
+ }
+
+ ret = dict_foreach_fnmatch (xdata, "trusted.glusterfs.quota.*",
+ remove_quota_keys, frame);
+ if (ret == -1) {
+ ret = -errno;
+ goto out;
+ }
+ ret = dict_foreach_fnmatch (xdata, PGFID_XATTR_KEY_PREFIX"*",
+ remove_quota_keys, frame);
+ if (ret == -1) {
+ ret = -errno;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (xdata)
+ dict_unref (xdata);
+
+ return ret;
+}
+
+int
+marker_do_xattr_cleanup (call_frame_t *frame, xlator_t *this, dict_t *xdata,
+ loc_t *loc)
+{
+ int ret = -1;
+ marker_local_t *local = NULL;
+
+ local = mem_get0 (this->local_pool);
+ if (!local)
+ goto out;
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ loc_copy (&local->loc, loc);
+ ret = synctask_new (this->ctx->env, quota_xattr_cleaner,
+ quota_xattr_cleaner_cbk, frame, xdata);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to create synctask "
+ "for cleaning up quota extended attributes");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret) {
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (setxattr, frame, -1, ENOMEM, xdata);
+ marker_local_unref (local);
+ }
+ return ret;
+}
+
+static inline gf_boolean_t
+marker_xattr_cleanup_cmd (dict_t *dict)
+{
+ return (dict_get (dict, VIRTUAL_QUOTA_XATTR_CLEANUP_KEY) != NULL);
+}
+
int32_t
marker_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
- int32_t flags)
+ int32_t flags, dict_t *xdata)
{
- int32_t ret = 0;
- marker_local_t *local = NULL;
- marker_conf_t *priv = NULL;
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+ int op_errno = ENOMEM;
priv = this->private;
+ if (marker_xattr_cleanup_cmd (dict)) {
+ if (frame->root->uid != 0 || frame->root->gid != 0) {
+ op_errno = EPERM;
+ ret = -1;
+ goto err;
+ }
+
+ /* The following function does the cleanup and then unwinds the
+ * corresponding call*/
+ loc_path (loc, NULL);
+ marker_do_xattr_cleanup (frame, this, xdata, loc);
+ return 0;
+ }
+
if (priv->feature_enabled == 0)
goto wind;
@@ -1839,7 +2310,7 @@ marker_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
if (ret == 0)
return 0;
- ALLOCATE_OR_GOTO (local, marker_local_t, err);
+ local = mem_get0 (this->local_pool);
MARKER_INIT_LOCAL (frame, local);
@@ -1849,10 +2320,10 @@ marker_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
goto err;
wind:
STACK_WIND (frame, marker_setxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr, loc, dict, flags);
+ FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata);
return 0;
err:
- STACK_UNWIND_STRICT (setxattr, frame, -1, ENOMEM);
+ STACK_UNWIND_STRICT (setxattr, frame, -1, op_errno, NULL);
return 0;
}
@@ -1860,7 +2331,7 @@ err:
int32_t
marker_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
marker_local_t *local = NULL;
marker_conf_t *priv = NULL;
@@ -1874,7 +2345,7 @@ marker_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
frame->local = NULL;
- STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata);
if (op_ret == -1 || local == NULL)
goto out;
@@ -1891,7 +2362,7 @@ out:
int32_t
marker_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
- int32_t flags)
+ int32_t flags, dict_t *xdata)
{
int32_t ret = 0;
marker_local_t *local = NULL;
@@ -1906,7 +2377,7 @@ marker_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
if (ret == 0)
return 0;
- ALLOCATE_OR_GOTO (local, marker_local_t, err);
+ local = mem_get0 (this->local_pool);
MARKER_INIT_LOCAL (frame, local);
@@ -1916,10 +2387,10 @@ marker_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
goto err;
wind:
STACK_WIND (frame, marker_fsetxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags);
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
return 0;
err:
- STACK_UNWIND_STRICT (fsetxattr, frame, -1, ENOMEM);
+ STACK_UNWIND_STRICT (fsetxattr, frame, -1, ENOMEM, NULL);
return 0;
}
@@ -1928,7 +2399,7 @@ err:
int32_t
marker_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *statpre,
- struct iatt *statpost)
+ struct iatt *statpost, dict_t *xdata)
{
marker_local_t *local = NULL;
marker_conf_t *priv = NULL;
@@ -1943,7 +2414,7 @@ marker_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
frame->local = NULL;
STACK_UNWIND_STRICT (fsetattr, frame, op_ret, op_errno, statpre,
- statpost);
+ statpost, xdata);
if (op_ret == -1 || local == NULL)
goto out;
@@ -1961,7 +2432,7 @@ out:
int32_t
marker_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid)
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
int32_t ret = 0;
marker_local_t *local = NULL;
@@ -1972,7 +2443,7 @@ marker_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
if (priv->feature_enabled == 0)
goto wind;
- ALLOCATE_OR_GOTO (local, marker_local_t, err);
+ local = mem_get0 (this->local_pool);
MARKER_INIT_LOCAL (frame, local);
@@ -1982,10 +2453,10 @@ marker_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
goto err;
wind:
STACK_WIND (frame, marker_fsetattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fsetattr, fd, stbuf, valid);
+ FIRST_CHILD (this)->fops->fsetattr, fd, stbuf, valid, xdata);
return 0;
err:
- STACK_UNWIND_STRICT (fsetattr, frame, -1, ENOMEM, NULL, NULL);
+ STACK_UNWIND_STRICT (fsetattr, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
@@ -1994,7 +2465,7 @@ err:
int32_t
marker_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *statpre,
- struct iatt *statpost)
+ struct iatt *statpost, dict_t *xdata)
{
marker_local_t *local = NULL;
marker_conf_t *priv = NULL;
@@ -2012,7 +2483,7 @@ marker_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, statpre,
- statpost);
+ statpost, xdata);
if (op_ret == -1 || local == NULL)
goto out;
@@ -2029,7 +2500,7 @@ out:
int32_t
marker_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid)
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
int32_t ret = 0;
marker_local_t *local = NULL;
@@ -2040,7 +2511,7 @@ marker_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
if (priv->feature_enabled == 0)
goto wind;
- ALLOCATE_OR_GOTO (local, marker_local_t, err);
+ local = mem_get0 (this->local_pool);
MARKER_INIT_LOCAL (frame, local);
@@ -2050,10 +2521,10 @@ marker_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
goto err;
wind:
STACK_WIND (frame, marker_setattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->setattr, loc, stbuf, valid);
+ FIRST_CHILD (this)->fops->setattr, loc, stbuf, valid, xdata);
return 0;
err:
- STACK_UNWIND_STRICT (setattr, frame, -1, ENOMEM, NULL, NULL);
+ STACK_UNWIND_STRICT (setattr, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
@@ -2061,7 +2532,7 @@ err:
int32_t
marker_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
marker_local_t *local = NULL;
marker_conf_t *priv = NULL;
@@ -2075,7 +2546,7 @@ marker_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
frame->local = NULL;
- STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xdata);
if (op_ret == -1 || local == NULL)
goto out;
@@ -2092,7 +2563,7 @@ out:
int32_t
marker_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
+ const char *name, dict_t *xdata)
{
int32_t ret = 0;
marker_local_t *local = NULL;
@@ -2103,7 +2574,7 @@ marker_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
if (priv->feature_enabled == 0)
goto wind;
- ALLOCATE_OR_GOTO (local, marker_local_t, err);
+ local = mem_get0 (this->local_pool);
MARKER_INIT_LOCAL (frame, local);
@@ -2113,10 +2584,10 @@ marker_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
goto err;
wind:
STACK_WIND (frame, marker_removexattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr, loc, name);
+ FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
return 0;
err:
- STACK_UNWIND_STRICT (removexattr, frame, -1, ENOMEM);
+ STACK_UNWIND_STRICT (removexattr, frame, -1, ENOMEM, NULL);
return 0;
}
@@ -2145,6 +2616,15 @@ marker_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret == -1 || local == NULL)
goto out;
+ /* copy the gfid from the stat structure instead of inode,
+ * since if the lookup is fresh lookup, then the inode
+ * would have not yet linked to the inode table which happens
+ * in protocol/server.
+ */
+ if (uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, buf->ia_gfid);
+
+
priv = this->private;
if (priv->feature_enabled & GF_QUOTA) {
@@ -2170,7 +2650,7 @@ marker_lookup (call_frame_t *frame, xlator_t *this,
if (priv->feature_enabled == 0)
goto wind;
- ALLOCATE_OR_GOTO (local, marker_local_t, err);
+ local = mem_get0 (this->local_pool);
MARKER_INIT_LOCAL (frame, local);
@@ -2190,6 +2670,137 @@ err:
return 0;
}
+
+int
+marker_build_ancestry_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ gf_dirent_t *entry = NULL;
+ loc_t loc = {0, };
+ inode_t *parent = NULL;
+
+ if ((op_ret <= 0) || (entries == NULL)) {
+ goto out;
+ }
+
+
+ list_for_each_entry (entry, &entries->list, list) {
+ if (entry->inode == entry->inode->table->root) {
+ loc.path = gf_strdup ("/");
+ inode_unref (parent);
+ parent = NULL;
+ }
+
+ loc.inode = inode_ref (entry->inode);
+
+ if (parent != NULL) {
+ loc.parent = inode_ref (parent);
+ uuid_copy (loc.pargfid, parent->gfid);
+ }
+
+ uuid_copy (loc.gfid, entry->d_stat.ia_gfid);
+
+ mq_xattr_state (this, &loc, entry->dict, entry->d_stat);
+
+ inode_unref (parent);
+ parent = inode_ref (entry->inode);
+ loc_wipe (&loc);
+ }
+
+ if (parent)
+ inode_unref (parent);
+
+out:
+ STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata);
+ return 0;
+}
+
+int
+marker_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ gf_dirent_t *entry = NULL;
+ marker_conf_t *priv = NULL;
+ marker_local_t *local = NULL;
+ loc_t loc = {0, };
+
+ if (op_ret <= 0)
+ goto unwind;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (!(priv->feature_enabled & GF_QUOTA) || (local == NULL)) {
+ goto unwind;
+ }
+
+ list_for_each_entry (entry, &entries->list, list) {
+ if ((strcmp (entry->d_name, ".") == 0) ||
+ (strcmp (entry->d_name, "..") == 0))
+ continue;
+
+ loc.inode = inode_ref (entry->inode);
+ loc.parent = inode_ref (local->loc.inode);
+
+ uuid_copy (loc.gfid, entry->d_stat.ia_gfid);
+ uuid_copy (loc.pargfid, loc.parent->gfid);
+
+ mq_xattr_state (this, &loc, entry->dict, entry->d_stat);
+
+ loc_wipe (&loc);
+ }
+
+unwind:
+ local = frame->local;
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata);
+ marker_local_unref (local);
+
+ return 0;
+}
+
+int
+marker_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *dict)
+{
+ marker_conf_t *priv = NULL;
+ loc_t loc = {0, };
+ marker_local_t *local = NULL;
+
+ priv = this->private;
+
+ if ((dict != NULL) && dict_get (dict, GET_ANCESTRY_DENTRY_KEY)) {
+ STACK_WIND (frame, marker_build_ancestry_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp,
+ fd, size, offset, dict);
+ } else {
+ if (priv->feature_enabled & GF_QUOTA) {
+ local = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ loc.parent = local->loc.inode = inode_ref (fd->inode);
+
+ if (dict == NULL)
+ dict = dict_new ();
+
+ mq_req_xattr (this, &loc, dict);
+ }
+
+ STACK_WIND (frame, marker_readdirp_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp,
+ fd, size, offset, dict);
+ }
+
+ return 0;
+}
+
+
int32_t
mem_acct_init (xlator_t *this)
{
@@ -2290,14 +2901,11 @@ marker_xtime_priv_cleanup (xlator_t *this)
GF_VALIDATE_OR_GOTO (this->name, priv, out);
- if (priv->volume_uuid != NULL)
- GF_FREE (priv->volume_uuid);
+ GF_FREE (priv->volume_uuid);
- if (priv->timestamp_file != NULL)
- GF_FREE (priv->timestamp_file);
+ GF_FREE (priv->timestamp_file);
- if (priv->marker_xattr != NULL)
- GF_FREE (priv->marker_xattr);
+ GF_FREE (priv->marker_xattr);
out:
return;
}
@@ -2325,7 +2933,7 @@ out:
int32_t
reconfigure (xlator_t *this, dict_t *options)
{
- int32_t ret = -1;
+ int32_t ret = 0;
data_t *data = NULL;
gf_boolean_t flag = _gf_false;
marker_conf_t *priv = NULL;
@@ -2366,11 +2974,17 @@ reconfigure (xlator_t *this, dict_t *options)
"xtime updation will fail");
} else {
priv->feature_enabled |= GF_XTIME;
+ data = dict_get (options, "gsync-force-xtime");
+ if (!data)
+ goto out;
+ ret = gf_string2boolean (data->data, &flag);
+ if (ret == 0 && flag)
+ priv->feature_enabled |= GF_XTIME_GSYNC_FORCE;
}
}
}
out:
- return 0;
+ return ret;
}
@@ -2426,9 +3040,23 @@ init (xlator_t *this)
goto err;
priv->feature_enabled |= GF_XTIME;
+ data = dict_get (options, "gsync-force-xtime");
+ if (!data)
+ goto cont;
+ ret = gf_string2boolean (data->data, &flag);
+ if (ret == 0 && flag)
+ priv->feature_enabled |= GF_XTIME_GSYNC_FORCE;
}
}
+ cont:
+ this->local_pool = mem_pool_new (marker_local_t, 128);
+ if (!this->local_pool) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ goto err;
+ }
+
return 0;
err:
marker_priv_cleanup (this);
@@ -2481,7 +3109,11 @@ struct xlator_fops fops = {
.setattr = marker_setattr,
.fsetattr = marker_fsetattr,
.removexattr = marker_removexattr,
- .getxattr = marker_getxattr
+ .getxattr = marker_getxattr,
+ .readdirp = marker_readdirp,
+ .fallocate = marker_fallocate,
+ .discard = marker_discard,
+ .zerofill = marker_zerofill,
};
struct xlator_cbks cbks = {
@@ -2493,5 +3125,6 @@ struct volume_options options[] = {
{.key = {"timestamp-file"}},
{.key = {"quota"}},
{.key = {"xtime"}},
+ {.key = {"gsync-force-xtime"}},
{.key = {NULL}}
};
diff --git a/xlators/features/marker/src/marker.h b/xlators/features/marker/src/marker.h
index f5ed9df39..23d1580f0 100644
--- a/xlators/features/marker/src/marker.h
+++ b/xlators/features/marker/src/marker.h
@@ -1,21 +1,12 @@
-/*Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef _MARKER_H
#define _MARKER_H
@@ -37,8 +28,9 @@
#define TIMESTAMP_FILE "timestamp-file"
enum {
- GF_QUOTA=1,
- GF_XTIME=2
+ GF_QUOTA = 1,
+ GF_XTIME = 2,
+ GF_XTIME_GSYNC_FORCE = 4,
};
/*initialize the local variable*/
@@ -103,9 +95,29 @@ struct marker_local{
call_stub_t *stub;
int64_t contribution;
struct marker_local *oplocal;
+
+ /* marker quota specific */
+ int64_t delta;
+ int64_t d_off;
+ int64_t sum;
+ int64_t size;
+ int32_t hl_count;
+ int32_t dentry_child_count;
+
+ fd_t *fd;
+ call_frame_t *frame;
+
+ quota_inode_ctx_t *ctx;
+ inode_contribution_t *contri;
+
+ int xflag;
+ dict_t *xdata;
+ gf_boolean_t skip_txn;
};
typedef struct marker_local marker_local_t;
+#define quota_local_t marker_local_t
+
struct marker_inode_ctx {
struct quota_inode_ctx *quota_ctx;
};
diff --git a/xlators/features/marker/utils/Makefile.am b/xlators/features/marker/utils/Makefile.am
deleted file mode 100644
index 556951d9f..000000000
--- a/xlators/features/marker/utils/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = syncdaemon src
-
-CLEANFILES =
diff --git a/xlators/features/marker/utils/src/Makefile.am b/xlators/features/marker/utils/src/Makefile.am
deleted file mode 100644
index 73c99cb76..000000000
--- a/xlators/features/marker/utils/src/Makefile.am
+++ /dev/null
@@ -1,22 +0,0 @@
-gsyncddir = $(libexecdir)/glusterfs
-
-gsyncd_PROGRAMS = gsyncd
-
-gsyncd_SOURCES = gsyncd.c procdiggy.c
-
-gsyncd_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-gsyncd_LDFLAGS = $(GF_LDFLAGS) $(GF_GLUSTERFS_LDFLAGS)
-
-noinst_HEADERS = procdiggy.h
-
-AM_CFLAGS = -fPIC -Wall -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src\
- -DGSYNCD_PREFIX=\"$(libexecdir)/glusterfs\"\
- -DSBIN_DIR=\"$(sbindir)\" -DPYTHON=\"$(PYTHON)\"
-
-
-CLEANFILES =
-
-$(top_builddir)/libglusterfs/src/libglusterfs.la:
- $(MAKE) -C $(top_builddir)/libglusterfs/src/ all
diff --git a/xlators/features/marker/utils/src/gsyncd.c b/xlators/features/marker/utils/src/gsyncd.c
deleted file mode 100644
index d554e562b..000000000
--- a/xlators/features/marker/utils/src/gsyncd.c
+++ /dev/null
@@ -1,346 +0,0 @@
-/*
- Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <unistd.h>
-#include <string.h>
-
-#include "common-utils.h"
-#include "run.h"
-#include "procdiggy.h"
-
-#define _GLUSTERD_CALLED_ "_GLUSTERD_CALLED_"
-#define _GSYNCD_DISPATCHED_ "_GSYNCD_DISPATCHED_"
-#define GSYNCD_CONF "geo-replication/gsyncd.conf"
-#define GSYNCD_PY "gsyncd.py"
-#define RSYNC "rsync"
-
-int restricted = 0;
-
-static int
-duplexpand (void **buf, size_t tsiz, size_t *len)
-{
- size_t osiz = tsiz * *len;
-
- *buf = realloc (*buf, osiz << 1);
- if (!buf)
- return -1;
-
- memset ((char *)*buf + osiz, 0, osiz);
- *len <<= 1;
-
- return 0;
-}
-
-static int
-str2argv (char *str, char ***argv)
-{
- char *p = NULL;
- int argc = 0;
- size_t argv_len = 32;
- int ret = 0;
-
- assert (str);
- str = strdup (str);
- if (!str)
- return -1;
-
- *argv = calloc (argv_len, sizeof (**argv));
- if (!*argv)
- goto error;
-
- while ((p = strtok (str, " "))) {
- str = NULL;
-
- argc++;
- if (argc == argv_len) {
- ret = duplexpand ((void *)argv,
- sizeof (**argv),
- &argv_len);
- if (ret == -1)
- goto error;
- }
- (*argv)[argc - 1] = p;
- }
-
- return argc;
-
- error:
- fprintf (stderr, "out of memory\n");
- return -1;
-}
-
-static int
-invoke_gsyncd (int argc, char **argv)
-{
- char config_file[PATH_MAX] = {0,};
- size_t gluster_workdir_len = 0;
- runner_t runner = {0,};
- int i = 0;
- int j = 0;
- char *nargv[argc + 4];
-
- if (restricted) {
- /* in restricted mode we forcibly use the system-wide config */
- runinit (&runner);
- runner_add_args (&runner, SBIN_DIR"/gluster",
- "--log-file=-", "system::", "getwd",
- NULL);
- runner_redir (&runner, STDOUT_FILENO, RUN_PIPE);
- if (runner_start (&runner) == 0 &&
- fgets (config_file, PATH_MAX,
- runner_chio (&runner, STDOUT_FILENO)) != NULL &&
- config_file[strlen (config_file) - 1] == '\n' &&
- runner_end (&runner) == 0)
- gluster_workdir_len = strlen (config_file) - 1;
-
- if (gluster_workdir_len) {
- if (gluster_workdir_len + 1 + strlen (GSYNCD_CONF) + 1 >
- PATH_MAX)
- goto error;
- config_file[gluster_workdir_len] = '/';
- strcat (config_file, GSYNCD_CONF);
- } else
- goto error;
-
- if (setenv ("_GSYNCD_RESTRICTED_", "1", 1) == -1)
- goto error;
- }
-
- if (chdir ("/") == -1)
- goto error;
-
- j = 0;
- nargv[j++] = PYTHON;
- nargv[j++] = GSYNCD_PREFIX"/python/syncdaemon/"GSYNCD_PY;
- for (i = 1; i < argc; i++)
- nargv[j++] = argv[i];
- if (config_file[0]) {
- nargv[j++] = "-c";
- nargv[j++] = config_file;
- }
- nargv[j++] = NULL;
-
- execvp (PYTHON, nargv);
-
- fprintf (stderr, "exec of "PYTHON" failed\n");
- return 127;
-
- error:
- fprintf (stderr, "gsyncd initializaion failed\n");
- return 1;
-}
-
-
-static int
-find_gsyncd (pid_t pid, pid_t ppid, char *name, void *data)
-{
- char buf[NAME_MAX * 2] = {0,};
- char *p = NULL;
- int zeros = 0;
- int ret = 0;
- int fd = -1;
- pid_t *pida = (pid_t *)data;
-
- if (ppid != pida[0])
- return 0;
-
- ret = gf_asprintf (&p, PROC"/%d/cmdline", pid);
- if (ret == -1) {
- fprintf (stderr, "out of memory\n");
- return -1;
- }
-
- fd = open (p, O_RDONLY);
- if (fd == -1)
- return 0;
- ret = read (fd, buf, sizeof (buf));
- close (fd);
- if (ret == -1)
- return 0;
- for (zeros = 0, p = buf; zeros < 2 && p < buf + ret; p++)
- zeros += !*p;
-
- ret = 0;
- switch (zeros) {
- case 2:
- if ((strcmp (basename (buf), basename (PYTHON)) ||
- strcmp (basename (buf + strlen (buf) + 1), GSYNCD_PY)) == 0) {
- ret = 1;
- break;
- }
- /* fallthrough */
- case 1:
- if (strcmp (basename (buf), GSYNCD_PY) == 0)
- ret = 1;
- }
-
- if (ret == 1) {
- if (pida[1] != -1) {
- fprintf (stderr, GSYNCD_PY" sibling is not unique");
- return -1;
- }
- pida[1] = pid;
- }
-
- return 0;
-}
-
-static int
-invoke_rsync (int argc, char **argv)
-{
- int i = 0;
- char *p = NULL;
- pid_t pid = -1;
- pid_t ppid = -1;
- pid_t pida[] = {-1, -1};
- char *name = NULL;
- char buf[PATH_MAX + 1] = {0,};
- int ret = 0;
-
- assert (argv[argc] == NULL);
-
- if (argc < 2 || strcmp (argv[1], "--server") != 0)
- goto error;
-
- for (i = 2; i < argc && argv[i][0] == '-'; i++);
-
- if (!(i == argc - 2 && strcmp (argv[i], ".") == 0 && argv[i + 1][0] == '/')) {
- fprintf (stderr, "need an rsync invocation without protected args\n");
- goto error;
- }
-
- /* look up sshd we are spawned from */
- for (pid = getpid () ;; pid = ppid) {
- ppid = pidinfo (pid, &name);
- if (ppid < 0) {
- fprintf (stderr, "sshd ancestor not found\n");
- goto error;
- }
- if (strcmp (name, "sshd") == 0)
- break;
- }
- /* look up "ssh-sibling" gsyncd */
- pida[0] = pid;
- ret = prociter (find_gsyncd, pida);
- if (ret == -1 || pida[1] == -1) {
- fprintf (stderr, "gsyncd sibling not found\n");
- goto error;
- }
- /* check if rsync target matches gsyncd target */
- if (gf_asprintf (&p, PROC"/%d/cwd", pida[1]) == -1) {
- fprintf (stderr, "out of memory\n");
- goto error;
- }
- ret = readlink (p, buf, sizeof (buf));
- if (ret == -1 || ret == sizeof (buf))
- goto error;
- if (strcmp (argv[argc - 1], "/") == 0 /* root dir cannot be a target */ ||
- (strcmp (argv[argc - 1], p) /* match against gluster target */ &&
- strcmp (argv[argc - 1], buf) /* match against file target */) != 0) {
- fprintf (stderr, "rsync target does not match "GEOREP" session\n");
- goto error;
- }
-
- argv[0] = RSYNC;
-
- execvp (RSYNC, argv);
-
- fprintf (stderr, "exec of "RSYNC" failed\n");
- return 127;
-
- error:
- fprintf (stderr, "disallowed "RSYNC" invocation\n");
- return 1;
-}
-
-
-struct invocable {
- char *name;
- int (*invoker) (int argc, char **argv);
-};
-
-struct invocable invocables[] = {
- { "rsync", invoke_rsync },
- { "gsyncd", invoke_gsyncd },
- { NULL, NULL}
-};
-
-int
-main (int argc, char **argv)
-{
- char *evas = NULL;
- struct invocable *i = NULL;
- char *b = NULL;
- char *sargv = NULL;
-
- evas = getenv (_GLUSTERD_CALLED_);
- if (evas && strcmp (evas, "1") == 0)
- /* OK, we know glusterd called us, no need to look for further config
- * ... altough this conclusion should not inherit to our children
- */
- unsetenv (_GLUSTERD_CALLED_);
- else {
- /* we regard all gsyncd invocations unsafe
- * that do not come from glusterd and
- * therefore restrict it
- */
- restricted = 1;
-
- if (!getenv (_GSYNCD_DISPATCHED_)) {
- evas = getenv ("SSH_ORIGINAL_COMMAND");
- if (evas)
- sargv = evas;
- else {
- evas = getenv ("SHELL");
- if (evas && strcmp (basename (evas), "gsyncd") == 0 &&
- argc == 3 && strcmp (argv[1], "-c") == 0)
- sargv = argv[2];
- }
- }
-
- }
-
- if (!(sargv && restricted))
- return invoke_gsyncd (argc, argv);
-
- argc = str2argv (sargv, &argv);
- if (argc == -1 || setenv (_GSYNCD_DISPATCHED_, "1", 1) == -1) {
- fprintf (stderr, "internal error\n");
- return 1;
- }
-
- b = basename (argv[0]);
- for (i = invocables; i->name; i++) {
- if (strcmp (b, i->name) == 0)
- return i->invoker (argc, argv);
- }
-
- fprintf (stderr, "invoking %s in restricted SSH session is not allowed\n",
- b);
-
- return 1;
-}
diff --git a/xlators/features/marker/utils/src/procdiggy.c b/xlators/features/marker/utils/src/procdiggy.c
deleted file mode 100644
index fc0f97999..000000000
--- a/xlators/features/marker/utils/src/procdiggy.c
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <unistd.h>
-#include <string.h>
-#include <ctype.h>
-
-#include "common-utils.h"
-#include "procdiggy.h"
-
-pid_t
-pidinfo (pid_t pid, char **name)
-{
- char buf[NAME_MAX * 2] = {0,};
- FILE *f = NULL;
- char *p = NULL;
- int ret = 0;
-
- ret = gf_asprintf (&p, PROC"/%d/status", pid);
- if (ret == -1)
- goto oom;
-
- f = fopen (p, "r");
- if (!f)
- return -1;
-
- if (name)
- *name = NULL;
- for (;;) {
- memset (buf, 0, sizeof (buf));
- if (fgets (buf, sizeof (buf), f) == NULL ||
- buf[strlen (buf) - 1] != '\n') {
- pid = -1;
- goto out;
- }
- buf[strlen (buf) -1] = '\0';
-
- if (name && !*name) {
- p = strtail (buf, "Name:");
- if (p) {
- while (isspace (*++p));
- *name = gf_strdup (p);
- if (!*name)
- goto oom;
- continue;
- }
- }
-
- p = strtail (buf, "PPid:");
- if (p)
- break;
- }
-
- while (isspace (*++p));
- ret = gf_string2int (p, &pid);
- if (ret == -1)
- pid = -1;
-
- out:
- fclose (f);
- return pid;
-
- oom:
- fclose (f);
- fprintf (stderr, "out of memory\n");
- return -2;
-}
-
-int
-prociter (int (*proch) (pid_t pid, pid_t ppid, char *name, void *data),
- void *data)
-{
- char *name = NULL;
- DIR *d = NULL;
- struct dirent *de = NULL;
- pid_t pid = -1;
- pid_t ppid = -1;
- int ret = 0;
-
- d = opendir (PROC);
- while (errno = 0, de = readdir (d)) {
- if (gf_string2int (de->d_name, &pid) != -1 && pid >= 0) {
- ppid = pidinfo (pid, &name);
- switch (ppid) {
- case -1: continue;
- case -2: return -1;
- }
- ret = proch (pid, ppid, name, data);
- if (ret)
- return ret;
- }
- }
- if (errno) {
- fprintf (stderr, "failed to traverse "PROC" (%s)\n",
- strerror (errno));
- return -1;
- }
-
- return 0;
-}
diff --git a/xlators/features/marker/utils/src/procdiggy.h b/xlators/features/marker/utils/src/procdiggy.h
deleted file mode 100644
index f4586de6c..000000000
--- a/xlators/features/marker/utils/src/procdiggy.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#define PROC "/proc"
-
-pid_t pidinfo (pid_t pid, char **name);
-
-int prociter (int (*proch) (pid_t pid, pid_t ppid, char *name, void *data),
- void *data);
-
diff --git a/xlators/features/marker/utils/syncdaemon/Makefile.am b/xlators/features/marker/utils/syncdaemon/Makefile.am
deleted file mode 100644
index cc7cee102..000000000
--- a/xlators/features/marker/utils/syncdaemon/Makefile.am
+++ /dev/null
@@ -1,6 +0,0 @@
-syncdaemondir = $(libexecdir)/glusterfs/python/syncdaemon
-
-syncdaemon_PYTHON = gconf.py gsyncd.py __init__.py master.py README.md repce.py resource.py configinterface.py syncdutils.py monitor.py libcxattr.py \
- $(top_builddir)/contrib/ipaddr-py/ipaddr.py
-
-CLEANFILES =
diff --git a/xlators/features/marker/utils/syncdaemon/README.md b/xlators/features/marker/utils/syncdaemon/README.md
deleted file mode 100644
index d45006932..000000000
--- a/xlators/features/marker/utils/syncdaemon/README.md
+++ /dev/null
@@ -1,81 +0,0 @@
-gsycnd, the Gluster Syncdaemon
-==============================
-
-REQUIREMENTS
-------------
-
-_gsyncd_ is a program which can operate either in _master_ or in _slave_ mode.
-Requirements are categorized according to this.
-
-* supported OS is GNU/Linux
-* Python >= 2.5, or 2.4 with Ctypes (see below) (both)
-* OpenSSH >= 4.0 (master) / SSH2 compliant sshd (eg. openssh) (slave)
-* rsync (both)
-* glusterfs with marker support (master); glusterfs (optional on slave)
-* FUSE; for supported versions consult glusterfs
-
-INSTALLATION
-------------
-
-As of now, the supported way of operation is running from the source directory.
-
-If you use Python 2.4.x, you need to install the [Ctypes module](http://python.net/crew/theller/ctypes/).
-
-CONFIGURATION
--------------
-
-gsyncd tunables are a subset of the long command-line options; for listing them,
-type
-
- gsyncd.py --help
-
-and see the long options up to "--config-file". (The leading double dash should be omitted;
-interim underscores and dashes are interchangeable.) The set of options bear some resemblance
-to those of glusterfs and rsync.
-
-The config file format matches the following syntax:
-
- <option1>: <value1>
- <option2>: <value2>
- # comment
-
-By default (unless specified by the option `-c`), gsyncd looks for config file at _conf/gsyncd.conf_
-in the source tree.
-
-USAGE
------
-
-gsyncd is a utilitly for continous mirroring, ie. it mirrors master to slave incrementally.
-Assume we have a gluster volume _pop_ at localhost. We try to set up the following mirrors
-for it with gysncd:
-
-1. _/data/mirror_
-2. local gluster volume _yow_
-3. _/data/far_mirror_ at example.com
-4. gluster volume _moz_ at example.com
-
-The respective gsyncd invocations are (demoing some syntax sugaring):
-
-1.
-
- gsyncd.py gluster://localhost:pop file:///data/mirror
-
- or short form
-
- gsyncd.py :pop /data/mirror
-
-2. `gsyncd :pop :yow`
-3.
-
- gsyncd.py :pop ssh://example.com:/data/far_mirror
-
- or short form
-
- gsyncd.py :pop example.com:/data/far_mirror
-
-4. `gsyncd.py :pop example.com::moz`
-
-gsyncd has to be available on both sides; it's location on the remote side has to be specified
-via the "--remote-gsyncd" option (or "remote-gsyncd" config file parameter). (This option can also be
-used for setting options on the remote side, although the suggested mode of operation is to
-set parameters like log file / pid file in the configuration file.)
diff --git a/xlators/features/marker/utils/syncdaemon/__codecheck.py b/xlators/features/marker/utils/syncdaemon/__codecheck.py
deleted file mode 100644
index e3386afba..000000000
--- a/xlators/features/marker/utils/syncdaemon/__codecheck.py
+++ /dev/null
@@ -1,46 +0,0 @@
-import os
-import os.path
-import sys
-import tempfile
-import shutil
-
-ipd = tempfile.mkdtemp(prefix = 'codecheck-aux')
-
-try:
- # add a fake ipaddr module, we don't want to
- # deal with the real one (just test our code)
- f = open(os.path.join(ipd, 'ipaddr.py'), 'w')
- f.write("""
-class IPAddress(object):
- pass
-class IPNetwork(list):
- pass
-""")
- f.close()
- sys.path.append(ipd)
-
- fl = os.listdir(os.path.dirname(sys.argv[0]) or '.')
- fl.sort()
- for f in fl:
- if f[-3:] != '.py' or f[0] == '_':
- continue
- m = f[:-3]
- sys.stdout.write('importing %s ...' % m)
- __import__(m)
- print(' OK.')
-
- def sys_argv_set(a):
- sys.argv = sys.argv[:1] + a
-
- gsyncd = sys.modules['gsyncd']
- for a in [['--help'], ['--version'], ['--canonicalize-escape-url', '/foo']]:
- print('>>> invoking program with args: %s' % ' '.join(a))
- pid = os.fork()
- if not pid:
- sys_argv_set(a)
- gsyncd.main()
- _, r = os.waitpid(pid, 0)
- if r:
- raise RuntimeError('invocation failed')
-finally:
- shutil.rmtree(ipd)
diff --git a/xlators/features/marker/utils/syncdaemon/__init__.py b/xlators/features/marker/utils/syncdaemon/__init__.py
deleted file mode 100644
index e69de29bb..000000000
--- a/xlators/features/marker/utils/syncdaemon/__init__.py
+++ /dev/null
diff --git a/xlators/features/marker/utils/syncdaemon/configinterface.py b/xlators/features/marker/utils/syncdaemon/configinterface.py
deleted file mode 100644
index e55bec519..000000000
--- a/xlators/features/marker/utils/syncdaemon/configinterface.py
+++ /dev/null
@@ -1,224 +0,0 @@
-try:
- import ConfigParser
-except ImportError:
- # py 3
- import configparser as ConfigParser
-import re
-from string import Template
-
-from syncdutils import escape, unescape, norm, update_file, GsyncdError
-
-SECT_ORD = '__section_order__'
-SECT_META = '__meta__'
-config_version = 2.0
-
-re_type = type(re.compile(''))
-
-
-class MultiDict(object):
- """a virtual dict-like class which functions as the union of underlying dicts"""
-
- def __init__(self, *dd):
- self.dicts = dd
-
- def __getitem__(self, key):
- val = None
- for d in self.dicts:
- if d.get(key):
- val = d[key]
- if not val:
- raise KeyError(key)
- return val
-
-
-class GConffile(object):
- """A high-level interface to ConfigParser which flattens the two-tiered
- config layout by implenting automatic section dispatch based on initial
- parameters.
-
- Also ensure section ordering in terms of their time of addition -- a compat
- hack for Python < 2.7.
- """
-
- def _normconfig(self):
- """normalize config keys by s/-/_/g"""
- for n, s in self.config._sections.items():
- if n.find('__') == 0:
- continue
- s2 = type(s)()
- for k, v in s.items():
- if k.find('__') != 0:
- k = norm(k)
- s2[k] = v
- self.config._sections[n] = s2
-
- def __init__(self, path, peers, *dd):
- """
- - .path: location of config file
- - .config: underlying ConfigParser instance
- - .peers: on behalf of whom we flatten .config
- (master, or master-slave url pair)
- - .auxdicts: template subtituents
- """
- self.peers = peers
- self.path = path
- self.auxdicts = dd
- self.config = ConfigParser.RawConfigParser()
- self.config.read(path)
- self._normconfig()
-
- def section(self, rx=False):
- """get the section name of the section representing .peers in .config"""
- peers = self.peers
- if not peers:
- peers = ['.', '.']
- rx = True
- if rx:
- st = 'peersrx'
- else:
- st = 'peers'
- return ' '.join([st] + [escape(u) for u in peers])
-
- @staticmethod
- def parse_section(section):
- """retrieve peers sequence encoded by section name
- (as urls or regexen, depending on section type)
- """
- sl = section.split()
- st = sl.pop(0)
- sl = [unescape(u) for u in sl]
- if st == 'peersrx':
- sl = [re.compile(u) for u in sl]
- return sl
-
- def ord_sections(self):
- """Return an ordered list of sections.
-
- Ordering happens based on the auxiliary
- SECT_ORD section storing indices for each
- section added through the config API.
-
- To not to go corrupt in case of manually
- written config files, we take care to append
- also those sections which are not registered
- in SECT_ORD.
-
- Needed for python 2.{4,5,6} where ConfigParser
- cannot yet order sections/options internally.
- """
- so = {}
- if self.config.has_section(SECT_ORD):
- so = self.config._sections[SECT_ORD]
- so2 = {}
- for k, v in so.items():
- if k != '__name__':
- so2[k] = int(v)
- tv = 0
- if so2:
- tv = max(so2.values()) + 1
- ss = [s for s in self.config.sections() if s.find('__') != 0]
- for s in ss:
- if s in so.keys():
- continue
- so2[s] = tv
- tv += 1
- def scmp(x, y):
- return cmp(*(so2[s] for s in (x, y)))
- ss.sort(scmp)
- return ss
-
- def update_to(self, dct, allow_unresolved=False):
- """update @dct from key/values of ours.
-
- key/values are collected from .config by filtering the regexp sections
- according to match, and from .section. The values are treated as templates,
- which are substituted from .auxdicts and (in case of regexp sections)
- match groups.
- """
- if not self.peers:
- raise GsyncdError('no peers given, cannot select matching options')
- def update_from_sect(sect, mud):
- for k, v in self.config._sections[sect].items():
- if k == '__name__':
- continue
- if allow_unresolved:
- dct[k] = Template(v).safe_substitute(mud)
- else:
- dct[k] = Template(v).substitute(mud)
- for sect in self.ord_sections():
- sp = self.parse_section(sect)
- if isinstance(sp[0], re_type) and len(sp) == len(self.peers):
- match = True
- mad = {}
- for i in range(len(sp)):
- m = sp[i].search(self.peers[i])
- if not m:
- match = False
- break
- for j in range(len(m.groups())):
- mad['match%d_%d' % (i+1, j+1)] = m.groups()[j]
- if match:
- update_from_sect(sect, MultiDict(dct, mad, *self.auxdicts))
- if self.config.has_section(self.section()):
- update_from_sect(self.section(), MultiDict(dct, *self.auxdicts))
-
- def get(self, opt=None):
- """print the matching key/value pairs from .config,
- or if @opt given, the value for @opt (according to the
- logic described in .update_to)
- """
- d = {}
- self.update_to(d, allow_unresolved = True)
- if opt:
- opt = norm(opt)
- v = d.get(opt)
- if v:
- print(v)
- else:
- for k, v in d.iteritems():
- if k == '__name__':
- continue
- print("%s: %s" % (k, v))
-
- def write(self, trfn, opt, *a, **kw):
- """update on-disk config transactionally
-
- @trfn is the transaction function
- """
- def mergeconf(f):
- self.config = ConfigParser.RawConfigParser()
- self.config.readfp(f)
- self._normconfig()
- if not self.config.has_section(SECT_META):
- self.config.add_section(SECT_META)
- self.config.set(SECT_META, 'version', config_version)
- return trfn(norm(opt), *a, **kw)
- def updateconf(f):
- self.config.write(f)
- update_file(self.path, updateconf, mergeconf)
-
- def _set(self, opt, val, rx=False):
- """set @opt to @val in .section"""
- sect = self.section(rx)
- if not self.config.has_section(sect):
- self.config.add_section(sect)
- # regarding SECT_ORD, cf. ord_sections
- if not self.config.has_section(SECT_ORD):
- self.config.add_section(SECT_ORD)
- self.config.set(SECT_ORD, sect, len(self.config._sections[SECT_ORD]))
- self.config.set(sect, opt, val)
- return True
-
- def set(self, opt, *a, **kw):
- """perform ._set transactionally"""
- self.write(self._set, opt, *a, **kw)
-
- def _delete(self, opt, rx=False):
- """delete @opt from .section"""
- sect = self.section(rx)
- if self.config.has_section(sect):
- return self.config.remove_option(sect, opt)
-
- def delete(self, opt, *a, **kw):
- """perform ._delete transactionally"""
- self.write(self._delete, opt, *a, **kw)
diff --git a/xlators/features/marker/utils/syncdaemon/gconf.py b/xlators/features/marker/utils/syncdaemon/gconf.py
deleted file mode 100644
index 146c72a18..000000000
--- a/xlators/features/marker/utils/syncdaemon/gconf.py
+++ /dev/null
@@ -1,20 +0,0 @@
-import os
-
-class GConf(object):
- """singleton class to store globals
- shared between gsyncd modules"""
-
- ssh_ctl_dir = None
- ssh_ctl_args = None
- cpid = None
- pid_file_owned = False
- log_exit = False
- permanent_handles = []
- log_metadata = {}
-
- @classmethod
- def setup_ssh_ctl(cls, ctld):
- cls.ssh_ctl_dir = ctld
- cls.ssh_ctl_args = ["-oControlMaster=auto", "-S", os.path.join(ctld, "gsycnd-ssh-%r@%h:%p")]
-
-gconf = GConf()
diff --git a/xlators/features/marker/utils/syncdaemon/gsyncd.py b/xlators/features/marker/utils/syncdaemon/gsyncd.py
deleted file mode 100644
index f145cbc6d..000000000
--- a/xlators/features/marker/utils/syncdaemon/gsyncd.py
+++ /dev/null
@@ -1,369 +0,0 @@
-#!/usr/bin/env python
-
-import os
-import os.path
-import sys
-import time
-import logging
-import signal
-import optparse
-import fcntl
-import fnmatch
-from optparse import OptionParser, SUPPRESS_HELP
-from logging import Logger
-from errno import EEXIST, ENOENT
-
-from ipaddr import IPAddress, IPNetwork
-
-from gconf import gconf
-from syncdutils import FreeObject, norm, grabpidfile, finalize, log_raise_exception
-from syncdutils import GsyncdError, select
-from configinterface import GConffile
-import resource
-from monitor import monitor
-
-class GLogger(Logger):
- """Logger customizations for gsyncd.
-
- It implements a log format similar to that of glusterfs.
- """
-
- def makeRecord(self, name, level, *a):
- rv = Logger.makeRecord(self, name, level, *a)
- rv.nsecs = (rv.created - int(rv.created)) * 1000000
- fr = sys._getframe(4)
- callee = fr.f_locals.get('self')
- if callee:
- ctx = str(type(callee)).split("'")[1].split('.')[-1]
- else:
- ctx = '<top>'
- if not hasattr(rv, 'funcName'):
- rv.funcName = fr.f_code.co_name
- rv.lvlnam = logging.getLevelName(level)[0]
- rv.ctx = ctx
- return rv
-
- @classmethod
- def setup(cls, **kw):
- lbl = kw.get('label', "")
- if lbl:
- lbl = '(' + lbl + ')'
- lprm = {'datefmt': "%Y-%m-%d %H:%M:%S",
- 'format': "[%(asctime)s.%(nsecs)d] %(lvlnam)s [%(module)s" + lbl + ":%(lineno)s:%(funcName)s] %(ctx)s: %(message)s"}
- lprm.update(kw)
- lvl = kw.get('level', logging.INFO)
- lprm['level'] = lvl
- logging.root = cls("root", lvl)
- logging.setLoggerClass(cls)
- logging.getLogger().handlers = []
- logging.basicConfig(**lprm)
-
-def startup(**kw):
- """set up logging, pidfile grabbing, daemonization"""
- if getattr(gconf, 'pid_file', None) and kw.get('go_daemon') != 'postconn':
- if not grabpidfile():
- sys.stderr.write("pidfile is taken, exiting.\n")
- sys.exit(2)
- gconf.pid_file_owned = True
-
- if kw.get('go_daemon') == 'should':
- x, y = os.pipe()
- gconf.cpid = os.fork()
- if gconf.cpid:
- os.close(x)
- sys.exit()
- os.close(y)
- os.setsid()
- dn = os.open(os.devnull, os.O_RDWR)
- for f in (sys.stdin, sys.stdout, sys.stderr):
- os.dup2(dn, f.fileno())
- if getattr(gconf, 'pid_file', None):
- if not grabpidfile(gconf.pid_file + '.tmp'):
- raise GsyncdError("cannot grab temporary pidfile")
- os.rename(gconf.pid_file + '.tmp', gconf.pid_file)
- # wait for parent to terminate
- # so we can start up with
- # no messing from the dirty
- # ol' bustard
- select((x,), (), ())
- os.close(x)
-
- lkw = {}
- if gconf.log_level:
- lkw['level'] = gconf.log_level
- if kw.get('log_file'):
- if kw['log_file'] in ('-', '/dev/stderr'):
- lkw['stream'] = sys.stderr
- elif kw['log_file'] == '/dev/stdout':
- lkw['stream'] = sys.stdout
- else:
- lkw['filename'] = kw['log_file']
-
- GLogger.setup(label=kw.get('label'), **lkw)
-
- lkw.update({'saved_label': kw.get('label')})
- gconf.log_metadata = lkw
- gconf.log_exit = True
-
-def main():
- """main routine, signal/exception handling boilerplates"""
- signal.signal(signal.SIGTERM, lambda *a: finalize(*a, **{'exval': 1}))
- GLogger.setup()
- excont = FreeObject(exval = 0)
- try:
- try:
- main_i()
- except:
- log_raise_exception(excont)
- finally:
- finalize(exval = excont.exval)
-
-def main_i():
- """internal main routine
-
- parse command line, decide what action will be taken;
- we can either:
- - query/manipulate configuration
- - format gsyncd urls using gsyncd's url parsing engine
- - start service in following modes, in given stages:
- - monitor: startup(), monitor()
- - master: startup(), connect_remote(), connect(), service_loop()
- - slave: startup(), connect(), service_loop()
- """
- rconf = {'go_daemon': 'should'}
-
- def store_abs(opt, optstr, val, parser):
- if val and val != '-':
- val = os.path.abspath(val)
- setattr(parser.values, opt.dest, val)
- def store_local(opt, optstr, val, parser):
- rconf[opt.dest] = val
- def store_local_curry(val):
- return lambda o, oo, vx, p: store_local(o, oo, val, p)
- def store_local_obj(op, dmake):
- return lambda o, oo, vx, p: store_local(o, oo, FreeObject(op=op, **dmake(vx)), p)
-
- op = OptionParser(usage="%prog [options...] <master> <slave>", version="%prog 0.0.1")
- op.add_option('--gluster-command-dir', metavar='DIR', default='')
- op.add_option('--gluster-log-file', metavar='LOGF', default=os.devnull, type=str, action='callback', callback=store_abs)
- op.add_option('--gluster-log-level', metavar='LVL')
- op.add_option('--gluster-params', metavar='PRMS', default='')
- op.add_option('--gluster-cli-options', metavar='OPTS', default='--log-file=-')
- op.add_option('--mountbroker', metavar='LABEL')
- op.add_option('-p', '--pid-file', metavar='PIDF', type=str, action='callback', callback=store_abs)
- op.add_option('-l', '--log-file', metavar='LOGF', type=str, action='callback', callback=store_abs)
- op.add_option('--state-file', metavar='STATF', type=str, action='callback', callback=store_abs)
- op.add_option('--ignore-deletes', default=False, action='store_true')
- op.add_option('-L', '--log-level', metavar='LVL')
- op.add_option('-r', '--remote-gsyncd', metavar='CMD', default=os.path.abspath(sys.argv[0]))
- op.add_option('--volume-id', metavar='UUID')
- op.add_option('--session-owner', metavar='ID')
- op.add_option('-s', '--ssh-command', metavar='CMD', default='ssh')
- op.add_option('--rsync-command', metavar='CMD', default='rsync')
- op.add_option('--rsync-extra', metavar='ARGS', default='-S', help=SUPPRESS_HELP)
- op.add_option('--timeout', metavar='SEC', type=int, default=120)
- op.add_option('--sync-jobs', metavar='N', type=int, default=3)
- op.add_option('--turns', metavar='N', type=int, default=0, help=SUPPRESS_HELP)
- op.add_option('--allow-network', metavar='IPS', default='')
-
- op.add_option('-c', '--config-file', metavar='CONF', type=str, action='callback', callback=store_local)
- # duh. need to specify dest or value will be mapped to None :S
- op.add_option('--monitor', dest='monitor', action='callback', callback=store_local_curry(True))
- op.add_option('--feedback-fd', dest='feedback_fd', type=int, help=SUPPRESS_HELP, action='callback', callback=store_local)
- op.add_option('--listen', dest='listen', help=SUPPRESS_HELP, action='callback', callback=store_local_curry(True))
- op.add_option('-N', '--no-daemon', dest="go_daemon", action='callback', callback=store_local_curry('dont'))
- op.add_option('--debug', dest="go_daemon", action='callback', callback=lambda *a: (store_local_curry('dont')(*a),
- setattr(a[-1].values, 'log_file', '-'),
- setattr(a[-1].values, 'log_level', 'DEBUG'))),
-
- for a in ('check', 'get'):
- op.add_option('--config-' + a, metavar='OPT', type=str, dest='config', action='callback',
- callback=store_local_obj(a, lambda vx: {'opt': vx}))
- op.add_option('--config-get-all', dest='config', action='callback', callback=store_local_obj('get', lambda vx: {'opt': None}))
- for m in ('', '-rx', '-glob'):
- # call this code 'Pythonic' eh?
- # have to define a one-shot local function to be able to inject (a value depending on the)
- # iteration variable into the inner lambda
- def conf_mod_opt_regex_variant(rx):
- op.add_option('--config-set' + m, metavar='OPT VAL', type=str, nargs=2, dest='config', action='callback',
- callback=store_local_obj('set', lambda vx: {'opt': vx[0], 'val': vx[1], 'rx': rx}))
- op.add_option('--config-del' + m, metavar='OPT', type=str, dest='config', action='callback',
- callback=store_local_obj('del', lambda vx: {'opt': vx, 'rx': rx}))
- conf_mod_opt_regex_variant(m and m[1:] or False)
-
- op.add_option('--normalize-url', dest='url_print', action='callback', callback=store_local_curry('normal'))
- op.add_option('--canonicalize-url', dest='url_print', action='callback', callback=store_local_curry('canon'))
- op.add_option('--canonicalize-escape-url', dest='url_print', action='callback', callback=store_local_curry('canon_esc'))
-
- tunables = [ norm(o.get_opt_string()[2:]) for o in op.option_list if o.callback in (store_abs, 'store_true', None) and o.get_opt_string() not in ('--version', '--help') ]
- remote_tunables = [ 'listen', 'go_daemon', 'timeout', 'session_owner', 'config_file' ]
- rq_remote_tunables = { 'listen': True }
-
- # precedence for sources of values: 1) commandline, 2) cfg file, 3) defaults
- # -- for this to work out we need to tell apart defaults from explicitly set
- # options... so churn out the defaults here and call the parser with virgin
- # values container.
- defaults = op.get_default_values()
- opts, args = op.parse_args(values=optparse.Values())
- confdata = rconf.get('config')
- if not (len(args) == 2 or \
- (len(args) == 1 and rconf.get('listen')) or \
- (len(args) <= 2 and confdata) or \
- rconf.get('url_print')):
- sys.stderr.write("error: incorrect number of arguments\n\n")
- sys.stderr.write(op.get_usage() + "\n")
- sys.exit(1)
-
- restricted = os.getenv('_GSYNCD_RESTRICTED_')
-
- if restricted:
- allopts = {}
- allopts.update(opts.__dict__)
- allopts.update(rconf)
- bannedtuns = set(allopts.keys()) - set(remote_tunables)
- if bannedtuns:
- raise GsyncdError('following tunables cannot be set with restricted SSH invocaton: ' + \
- ', '.join(bannedtuns))
- for k, v in rq_remote_tunables.items():
- if not k in allopts or allopts[k] != v:
- raise GsyncdError('tunable %s is not set to value %s required for restricted SSH invocaton' % \
- (k, v))
-
- confrx = getattr(confdata, 'rx', None)
- if confrx:
- # peers are regexen, don't try to parse them
- if confrx == 'glob':
- args = [ '\A' + fnmatch.translate(a) for a in args ]
- canon_peers = args
- namedict = {}
- else:
- rscs = [resource.parse_url(u) for u in args]
- dc = rconf.get('url_print')
- if dc:
- for r in rscs:
- print(r.get_url(**{'normal': {},
- 'canon': {'canonical': True},
- 'canon_esc': {'canonical': True, 'escaped': True}}[dc]))
- return
- local = remote = None
- if rscs:
- local = rscs[0]
- if len(rscs) > 1:
- remote = rscs[1]
- if not local.can_connect_to(remote):
- raise GsyncdError("%s cannot work with %s" % (local.path, remote and remote.path))
- pa = ([], [], [])
- urlprms = ({}, {'canonical': True}, {'canonical': True, 'escaped': True})
- for x in rscs:
- for i in range(len(pa)):
- pa[i].append(x.get_url(**urlprms[i]))
- peers, canon_peers, canon_esc_peers = pa
- # creating the namedict, a dict representing various ways of referring to / repreenting
- # peers to be fillable in config templates
- mods = (lambda x: x, lambda x: x[0].upper() + x[1:], lambda x: 'e' + x[0].upper() + x[1:])
- if remote:
- rmap = { local: ('local', 'master'), remote: ('remote', 'slave') }
- else:
- rmap = { local: ('local', 'slave') }
- namedict = {}
- for i in range(len(rscs)):
- x = rscs[i]
- for name in rmap[x]:
- for j in range(3):
- namedict[mods[j](name)] = pa[j][i]
- if x.scheme == 'gluster':
- namedict[name + 'vol'] = x.volume
- if not 'config_file' in rconf:
- rconf['config_file'] = os.path.join(os.path.dirname(sys.argv[0]), "conf/gsyncd.conf")
- gcnf = GConffile(rconf['config_file'], canon_peers, defaults.__dict__, opts.__dict__, namedict)
-
- if confdata:
- opt_ok = norm(confdata.opt) in tunables + [None]
- if confdata.op == 'check':
- if opt_ok:
- sys.exit(0)
- else:
- sys.exit(1)
- elif not opt_ok:
- raise GsyncdError("not a valid option: " + confdata.opt)
- if confdata.op == 'get':
- gcnf.get(confdata.opt)
- elif confdata.op == 'set':
- gcnf.set(confdata.opt, confdata.val, confdata.rx)
- elif confdata.op == 'del':
- gcnf.delete(confdata.opt, confdata.rx)
- return
-
- gconf.__dict__.update(defaults.__dict__)
- gcnf.update_to(gconf.__dict__)
- gconf.__dict__.update(opts.__dict__)
- gconf.configinterface = gcnf
-
- if restricted and gconf.allow_network:
- ssh_conn = os.getenv('SSH_CONNECTION')
- if not ssh_conn:
- #legacy env var
- ssh_conn = os.getenv('SSH_CLIENT')
- if ssh_conn:
- allowed_networks = [ IPNetwork(a) for a in gconf.allow_network.split(',') ]
- client_ip = IPAddress(ssh_conn.split()[0])
- allowed = False
- for nw in allowed_networks:
- if client_ip in nw:
- allowed = True
- break
- if not allowed:
- raise GsyncdError("client IP address is not allowed")
-
- ffd = rconf.get('feedback_fd')
- if ffd:
- fcntl.fcntl(ffd, fcntl.F_SETFD, fcntl.FD_CLOEXEC)
-
- #normalize loglevel
- lvl0 = gconf.log_level
- if isinstance(lvl0, str):
- lvl1 = lvl0.upper()
- lvl2 = logging.getLevelName(lvl1)
- # I have _never_ _ever_ seen such an utterly braindead
- # error condition
- if lvl2 == "Level " + lvl1:
- raise GsyncdError('cannot recognize log level "%s"' % lvl0)
- gconf.log_level = lvl2
-
- go_daemon = rconf['go_daemon']
- be_monitor = rconf.get('monitor')
-
- if not be_monitor and isinstance(remote, resource.SSH) and \
- go_daemon == 'should':
- go_daemon = 'postconn'
- log_file = None
- else:
- log_file = gconf.log_file
- if be_monitor:
- label = 'monitor'
- elif remote:
- #master
- label = ''
- else:
- label = 'slave'
- startup(go_daemon=go_daemon, log_file=log_file, label=label)
-
- if be_monitor:
- return monitor()
-
- logging.info("syncing: %s" % " -> ".join(peers))
- resource.Popen.init_errhandler()
- if remote:
- go_daemon = remote.connect_remote(go_daemon=go_daemon)
- if go_daemon:
- startup(go_daemon=go_daemon, log_file=gconf.log_file)
- # complete remote connection in child
- remote.connect_remote(go_daemon='done')
- local.connect()
- if ffd:
- os.close(ffd)
- local.service_loop(*[r for r in [remote] if r])
-
-
-if __name__ == "__main__":
- main()
diff --git a/xlators/features/marker/utils/syncdaemon/libcxattr.py b/xlators/features/marker/utils/syncdaemon/libcxattr.py
deleted file mode 100644
index f0a9d2292..000000000
--- a/xlators/features/marker/utils/syncdaemon/libcxattr.py
+++ /dev/null
@@ -1,72 +0,0 @@
-import os
-from ctypes import *
-from ctypes.util import find_library
-
-class Xattr(object):
- """singleton that wraps the extended attribues system
- interface for python using ctypes
-
- Just implement it to the degree we need it, in particular
- - we need just the l*xattr variants, ie. we never want symlinks to be
- followed
- - don't need size discovery for getxattr, as we always know the exact
- sizes we expect
- """
-
- libc = CDLL(find_library("libc"))
-
- @classmethod
- def geterrno(cls):
- return c_int.in_dll(cls.libc, 'errno').value
-
- @classmethod
- def raise_oserr(cls):
- errn = cls.geterrno()
- raise OSError(errn, os.strerror(errn))
-
- @classmethod
- def _query_xattr(cls, path, siz, syscall, *a):
- if siz:
- buf = create_string_buffer('\0' * siz)
- else:
- buf = None
- ret = getattr(cls.libc, syscall)(*((path,) + a + (buf, siz)))
- if ret == -1:
- cls.raise_oserr()
- if siz:
- return buf.raw[:ret]
- else:
- return ret
-
- @classmethod
- def lgetxattr(cls, path, attr, siz=0):
- return cls._query_xattr( path, siz, 'lgetxattr', attr)
-
- @classmethod
- def llistxattr(cls, path, siz=0):
- ret = cls._query_xattr(path, siz, 'llistxattr')
- if isinstance(ret, str):
- ret = ret.split('\0')
- return ret
-
- @classmethod
- def lsetxattr(cls, path, attr, val):
- ret = cls.libc.lsetxattr(path, attr, val, len(val), 0)
- if ret == -1:
- cls.raise_oserr()
-
- @classmethod
- def lremovexattr(cls, path, attr):
- ret = cls.libc.lremovexattr(path, attr)
- if ret == -1:
- cls.raise_oserr()
-
- @classmethod
- def llistxattr_buf(cls, path):
- """listxattr variant with size discovery"""
- size = cls.llistxattr(path)
- if size == -1:
- cls.raise_oserr()
- if size == 0:
- return []
- return cls.llistxattr(path, size)
diff --git a/xlators/features/marker/utils/syncdaemon/master.py b/xlators/features/marker/utils/syncdaemon/master.py
deleted file mode 100644
index 9e54dc4fa..000000000
--- a/xlators/features/marker/utils/syncdaemon/master.py
+++ /dev/null
@@ -1,518 +0,0 @@
-import os
-import sys
-import time
-import stat
-import signal
-import logging
-import errno
-from errno import ENOENT, ENODATA
-from threading import currentThread, Condition, Lock
-
-from gconf import gconf
-from syncdutils import FreeObject, Thread, GsyncdError, boolify
-
-URXTIME = (-1, 0)
-
-class GMaster(object):
- """class impementling master role"""
-
- KFGN = 0
- KNAT = 1
-
- def get_sys_volinfo(self):
- """query volume marks on fs root
-
- err out on multiple foreign masters
- """
- fgn_vis, nat_vi = self.master.server.foreign_volume_infos(), \
- self.master.server.native_volume_info()
- fgn_vi = None
- if fgn_vis:
- if len(fgn_vis) > 1:
- raise GsyncdError("cannot work with multiple foreign masters")
- fgn_vi = fgn_vis[0]
- return fgn_vi, nat_vi
-
- @property
- def uuid(self):
- if self.volinfo:
- return self.volinfo['uuid']
-
- @property
- def volmark(self):
- if self.volinfo:
- return self.volinfo['volume_mark']
-
- @property
- def inter_master(self):
- """decide if we are an intermediate master
- in a cascading setup
- """
- return self.volinfo_state[self.KFGN] and True or False
-
- def xtime(self, path, *a, **opts):
- """get amended xtime
-
- as of amending, we can create missing xtime, or
- determine a valid value if what we get is expired
- (as of the volume mark expiry); way of amendig
- depends on @opts and on subject of query (master
- or slave).
- """
- if a:
- rsc = a[0]
- else:
- rsc = self.master
- if not 'create' in opts:
- opts['create'] = (rsc == self.master and not self.inter_master)
- if not 'default_xtime' in opts:
- if rsc == self.master and self.inter_master:
- opts['default_xtime'] = ENODATA
- else:
- opts['default_xtime'] = URXTIME
- xt = rsc.server.xtime(path, self.uuid)
- if isinstance(xt, int) and xt != ENODATA:
- return xt
- invalid_xtime = (xt == ENODATA or xt < self.volmark)
- if invalid_xtime and opts['create']:
- t = time.time()
- sec = int(t)
- nsec = int((t - sec) * 1000000)
- xt = (sec, nsec)
- rsc.server.set_xtime(path, self.uuid, xt)
- if invalid_xtime:
- xt = opts['default_xtime']
- return xt
-
- def __init__(self, master, slave):
- self.master = master
- self.slave = slave
- self.jobtab = {}
- self.syncer = Syncer(slave)
- # crawls vs. turns:
- # - self.crawls is simply the number of crawl() invocations on root
- # - one turn is a maximal consecutive sequence of crawls so that each
- # crawl in it detects a change to be synced
- # - self.turns is the number of turns since start
- # - self.total_turns is a limit so that if self.turns reaches it, then
- # we exit (for diagnostic purposes)
- # so, eg., if the master fs changes unceasingly, self.turns will remain 0.
- self.crawls = 0
- self.turns = 0
- self.total_turns = int(gconf.turns)
- self.lastreport = {'crawls': 0, 'turns': 0}
- self.start = None
- self.change_seen = None
- # the authoritative (foreign, native) volinfo pair
- # which lets us deduce what to do when we refetch
- # the volinfos from system
- uuid_preset = getattr(gconf, 'volume_id', None)
- self.volinfo_state = (uuid_preset and {'uuid': uuid_preset}, None)
- # the actual volinfo we make use of
- self.volinfo = None
- self.terminate = False
-
- def crawl_loop(self):
- """start the keep-alive thread and iterate .crawl"""
- timo = int(gconf.timeout or 0)
- if timo > 0:
- def keep_alive():
- while True:
- gap = timo * 0.5
- # first grab a reference as self.volinfo
- # can be changed in main thread
- vi = self.volinfo
- if vi:
- # then have a private copy which we can mod
- vi = vi.copy()
- vi['timeout'] = int(time.time()) + timo
- else:
- # send keep-alives more frequently to
- # avoid a delay in announcing our volume info
- # to slave if it becomes established in the
- # meantime
- gap = min(10, gap)
- self.slave.server.keep_alive(vi)
- time.sleep(gap)
- t = Thread(target=keep_alive)
- t.start()
- self.lastreport['time'] = time.time()
- while not self.terminate:
- self.crawl()
-
- def add_job(self, path, label, job, *a, **kw):
- """insert @job function to job table at @path with @label"""
- if self.jobtab.get(path) == None:
- self.jobtab[path] = []
- self.jobtab[path].append((label, a, lambda : job(*a, **kw)))
-
- def add_failjob(self, path, label):
- """invoke .add_job with a job that does nothing just fails"""
- logging.debug('salvaged: ' + label)
- self.add_job(path, label, lambda: False)
-
- def wait(self, path, *args):
- """perform jobs registered for @path
-
- Reset jobtab entry for @path,
- determine success as the conjuction of
- success of all the jobs. In case of
- success, call .sendmark on @path
- """
- jobs = self.jobtab.pop(path, [])
- succeed = True
- for j in jobs:
- ret = j[-1]()
- if not ret:
- succeed = False
- if succeed:
- self.sendmark(path, *args)
- return succeed
-
- def sendmark(self, path, mark, adct=None):
- """update slave side xtime for @path to master side xtime
-
- also can send a setattr payload (see Server.setattr).
- """
- if adct:
- self.slave.server.setattr(path, adct)
- self.slave.server.set_xtime(path, self.uuid, mark)
-
- @staticmethod
- def volinfo_state_machine(volinfo_state, volinfo_sys):
- """compute new volinfo_state from old one and incoming
- as of current system state, also indicating if there was a
- change regarding which volume mark is the authoritative one
-
- @volinfo_state, @volinfo_sys are pairs of volume mark dicts
- (foreign, native).
-
- Note this method is marked as static, ie. the computation is
- pure, without reliance on any excess implicit state. State
- transitions which are deemed as ambiguous or banned will raise
- an exception.
-
- """
- # store the value below "boxed" to emulate proper closures
- # (variables of the enclosing scope are available inner functions
- # provided they are no reassigned; mutation is OK).
- param = FreeObject(relax_mismatch = False, state_change = None, index=-1)
- def select_vi(vi0, vi):
- param.index += 1
- if vi and (not vi0 or vi0['uuid'] == vi['uuid']):
- if not vi0 and not param.relax_mismatch:
- param.state_change = param.index
- # valid new value found; for the rest, we are graceful about
- # uuid mismatch
- param.relax_mismatch = True
- return vi
- if vi0 and vi and vi0['uuid'] != vi['uuid'] and not param.relax_mismatch:
- # uuid mismatch for master candidate, bail out
- raise GsyncdError("aborting on uuid change from %s to %s" % \
- (vi0['uuid'], vi['uuid']))
- # fall back to old
- return vi0
- newstate = tuple(select_vi(*vip) for vip in zip(volinfo_state, volinfo_sys))
- srep = lambda vi: vi and vi['uuid'][0:8]
- logging.debug('(%s, %s) << (%s, %s) -> (%s, %s)' % \
- tuple(srep(vi) for vi in volinfo_state + volinfo_sys + newstate))
- return newstate, param.state_change
-
- def crawl(self, path='.', xtl=None):
- """crawling...
-
- Standing around
- All the right people
- Crawling
- Tennis on Tuesday
- The ladder is long
- It is your nature
- You've gotta suntan
- Football on Sunday
- Society boy
-
- Recursively walk the master side tree and check if updates are
- needed due to xtime differences. One invocation of crawl checks
- children of @path and do a recursive enter only on
- those directory children where there is an update needed.
-
- Way of updates depend on file type:
- - for symlinks, sync them directy and synchronously
- - for regular children, register jobs for @path (cf. .add_job) to start
- and wait on their rsync
- - for directory children, register a job for @path which waits (.wait)
- on jobs for the given child
- (other kind of filesystem nodes are not considered)
-
- Those slave side children which do not exist on master are simply
- purged (see Server.purge).
-
- Behavior is fault tolerant, synchronization is adaptive: if some action fails,
- just go on relentlessly, adding a fail job (see .add_failjob) which will prevent
- the .sendmark on @path, so when the next crawl will arrive to @path it will not
- see it as up-to-date and will try to sync it again. While this semantics can be
- supported by funky design principles (http://c2.com/cgi/wiki?LazinessImpatienceHubris),
- the ultimate reason which excludes other possibilities is simply transience: we cannot
- assert that the file systems (master / slave) underneath do not change and actions
- taken upon some condition will not lose their context by the time they are performed.
- """
- if path == '.':
- if self.start:
- self.crawls += 1
- logging.debug("... crawl #%d done, took %.6f seconds" % \
- (self.crawls, time.time() - self.start))
- time.sleep(1)
- self.start = time.time()
- should_display_info = self.start - self.lastreport['time'] >= 60
- if should_display_info:
- logging.info("completed %d crawls, %d turns",
- self.crawls - self.lastreport['crawls'],
- self.turns - self.lastreport['turns'])
- self.lastreport.update(crawls = self.crawls,
- turns = self.turns,
- time = self.start)
- volinfo_sys = self.get_sys_volinfo()
- self.volinfo_state, state_change = self.volinfo_state_machine(self.volinfo_state,
- volinfo_sys)
- if self.inter_master:
- self.volinfo = volinfo_sys[self.KFGN]
- else:
- self.volinfo = volinfo_sys[self.KNAT]
- if state_change == self.KFGN or (state_change == self.KNAT and not self.inter_master):
- logging.info('new master is %s', self.uuid)
- if self.volinfo:
- logging.info("%s master with volume id %s ..." % \
- (self.inter_master and "intermediate" or "primary",
- self.uuid))
- if state_change == self.KFGN:
- gconf.configinterface.set('volume_id', self.uuid)
- if self.volinfo:
- if self.volinfo['retval']:
- raise GsyncdError ("master is corrupt")
- else:
- if should_display_info or self.crawls == 0:
- if self.inter_master:
- logging.info("waiting for being synced from %s ..." % \
- self.volinfo_state[self.KFGN]['uuid'])
- else:
- logging.info("waiting for volume info ...")
- return
- logging.debug("entering " + path)
- if not xtl:
- xtl = self.xtime(path)
- if isinstance(xtl, int):
- self.add_failjob(path, 'no-local-node')
- return
- xtr0 = self.xtime(path, self.slave)
- if isinstance(xtr0, int):
- if xtr0 != ENOENT:
- self.slave.server.purge(path)
- try:
- self.slave.server.mkdir(path)
- except OSError:
- self.add_failjob(path, 'no-remote-node')
- return
- xtr = URXTIME
- else:
- xtr = xtr0
- if xtr > xtl:
- raise GsyncdError("timestamp corruption for " + path)
- if xtl == xtr:
- if path == '.' and self.change_seen:
- self.turns += 1
- self.change_seen = False
- if self.total_turns:
- logging.info("finished turn #%s/%s" % \
- (self.turns, self.total_turns))
- if self.turns == self.total_turns:
- logging.info("reached turn limit")
- self.terminate = True
- return
- if path == '.':
- self.change_seen = True
- try:
- dem = self.master.server.entries(path)
- except OSError:
- self.add_failjob(path, 'local-entries-fail')
- return
- try:
- des = self.slave.server.entries(path)
- except OSError:
- self.slave.server.purge(path)
- try:
- self.slave.server.mkdir(path)
- des = self.slave.server.entries(path)
- except OSError:
- self.add_failjob(path, 'remote-entries-fail')
- return
- dd = set(des) - set(dem)
- if dd and not boolify(gconf.ignore_deletes):
- self.slave.server.purge(path, dd)
- chld = []
- for e in dem:
- e = os.path.join(path, e)
- xte = self.xtime(e)
- if isinstance(xte, int):
- logging.warn("irregular xtime for %s: %s" % (e, errno.errorcode[xte]))
- elif xte > xtr:
- chld.append((e, xte))
- def indulgently(e, fnc, blame=None):
- if not blame:
- blame = path
- try:
- return fnc(e)
- except (IOError, OSError):
- ex = sys.exc_info()[1]
- if ex.errno == ENOENT:
- logging.warn("salvaged ENOENT for" + e)
- self.add_failjob(blame, 'by-indulgently')
- return False
- else:
- raise
- for e, xte in chld:
- st = indulgently(e, lambda e: os.lstat(e))
- if st == False:
- continue
- mo = st.st_mode
- adct = {'own': (st.st_uid, st.st_gid)}
- if stat.S_ISLNK(mo):
- if indulgently(e, lambda e: self.slave.server.symlink(os.readlink(e), e)) == False:
- continue
- self.sendmark(e, xte, adct)
- elif stat.S_ISREG(mo):
- logging.debug("syncing %s ..." % e)
- pb = self.syncer.add(e)
- def regjob(e, xte, pb):
- if pb.wait():
- logging.debug("synced " + e)
- self.sendmark(e, xte)
- return True
- else:
- logging.error("failed to sync " + e)
- self.add_job(path, 'reg', regjob, e, xte, pb)
- elif stat.S_ISDIR(mo):
- adct['mode'] = mo
- if indulgently(e, lambda e: (self.add_job(path, 'cwait', self.wait, e, xte, adct),
- self.crawl(e, xte),
- True)[-1], blame=e) == False:
- continue
- else:
- # ignore fifos, sockets and special files
- pass
- if path == '.':
- self.wait(path, xtl)
-
-class BoxClosedErr(Exception):
- pass
-
-class PostBox(list):
- """synchronized collection for storing things thought of as "requests" """
-
- def __init__(self, *a):
- list.__init__(self, *a)
- # too bad Python stdlib does not have read/write locks...
- # it would suffivce to grab the lock in .append as reader, in .close as writer
- self.lever = Condition()
- self.open = True
- self.done = False
-
- def wait(self):
- """wait on requests to be processed"""
- self.lever.acquire()
- if not self.done:
- self.lever.wait()
- self.lever.release()
- return self.result
-
- def wakeup(self, data):
- """wake up requestors with the result"""
- self.result = data
- self.lever.acquire()
- self.done = True
- self.lever.notifyAll()
- self.lever.release()
-
- def append(self, e):
- """post a request"""
- self.lever.acquire()
- if not self.open:
- raise BoxClosedErr
- list.append(self, e)
- self.lever.release()
-
- def close(self):
- """prohibit the posting of further requests"""
- self.lever.acquire()
- self.open = False
- self.lever.release()
-
-class Syncer(object):
- """a staged queue to relay rsync requests to rsync workers
-
- By "staged queue" its meant that when a consumer comes to the
- queue, it takes _all_ entries, leaving the queue empty.
- (I don't know if there is an official term for this pattern.)
-
- The queue uses a PostBox to accumulate incoming items.
- When a consumer (rsync worker) comes, a new PostBox is
- set up and the old one is passed on to the consumer.
-
- Instead of the simplistic scheme of having one big lock
- which synchronizes both the addition of new items and
- PostBox exchanges, use a separate lock to arbitrate consumers,
- and rely on PostBox's synchronization mechanisms take
- care about additions.
-
- There is a corner case racy situation, producers vs. consumers,
- which is not handled by this scheme: namely, when the PostBox
- exchange occurs in between being passed to the producer for posting
- and the post placement. But that's what Postbox.close is for:
- such a posting will find the PostBox closed, in which case
- the producer can re-try posting against the actual PostBox of
- the queue.
-
- To aid accumlation of items in the PostBoxen before grabbed
- by an rsync worker, the worker goes to sleep a bit after
- each completed syncjob.
- """
-
- def __init__(self, slave):
- """spawn worker threads"""
- self.slave = slave
- self.lock = Lock()
- self.pb = PostBox()
- for i in range(int(gconf.sync_jobs)):
- t = Thread(target=self.syncjob)
- t.start()
-
- def syncjob(self):
- """the life of a worker"""
- while True:
- pb = None
- while True:
- self.lock.acquire()
- if self.pb:
- pb, self.pb = self.pb, PostBox()
- self.lock.release()
- if pb:
- break
- time.sleep(0.5)
- pb.close()
- po = self.slave.rsync(pb)
- if po.returncode == 0:
- ret = True
- elif po.returncode in (23, 24):
- # partial transfer (cf. rsync(1)), that's normal
- ret = False
- else:
- po.errfail()
- pb.wakeup(ret)
-
- def add(self, e):
- while True:
- pb = self.pb
- try:
- pb.append(e)
- return pb
- except BoxClosedErr:
- pass
diff --git a/xlators/features/marker/utils/syncdaemon/monitor.py b/xlators/features/marker/utils/syncdaemon/monitor.py
deleted file mode 100644
index 9536f3e26..000000000
--- a/xlators/features/marker/utils/syncdaemon/monitor.py
+++ /dev/null
@@ -1,123 +0,0 @@
-import os
-import sys
-import time
-import signal
-import logging
-from gconf import gconf
-from syncdutils import update_file, select, waitpid
-
-class Monitor(object):
- """class which spawns and manages gsyncd workers"""
-
- def __init__(self):
- self.state = None
-
- def set_state(self, state):
- """set the state that can be used by external agents
- like glusterd for status reporting"""
- if state == self.state:
- return
- self.state = state
- logging.info('new state: %s' % state)
- if getattr(gconf, 'state_file', None):
- update_file(gconf.state_file, lambda f: f.write(state + '\n'))
-
- def monitor(self):
- """the monitor loop
-
- Basic logic is a blantantly simple blunt heuristics:
- if spawned client survives 60 secs, it's considered OK.
- This servers us pretty well as it's not vulneralbe to
- any kind of irregular behavior of the child...
-
- ... well, except for one: if children is hung up on
- waiting for some event, it can survive aeons, still
- will be defunct. So we tweak the above logic to
- expect the worker to send us a signal within 60 secs
- (in the form of closing its end of a pipe). The worker
- does this when it's done with the setup stage
- ready to enter the service loop (note it's the setup
- stage which is vulnerable to hangs -- the full
- blown worker blows up on EPIPE if the net goes down,
- due to the keep-alive thread)
- """
- def sigcont_handler(*a):
- """
- Re-init logging and send group kill signal
- """
- md = gconf.log_metadata
- logging.shutdown()
- lcls = logging.getLoggerClass()
- lcls.setup(label=md.get('saved_label'), **md)
- pid = os.getpid()
- os.kill(-pid, signal.SIGUSR1)
- signal.signal(signal.SIGUSR1, lambda *a: ())
- signal.signal(signal.SIGCONT, sigcont_handler)
-
- argv = sys.argv[:]
- for o in ('-N', '--no-daemon', '--monitor'):
- while o in argv:
- argv.remove(o)
- argv.extend(('-N', '-p', ''))
- argv.insert(0, os.path.basename(sys.executable))
-
- self.set_state('starting...')
- ret = 0
- def nwait(p, o=0):
- p2, r = waitpid(p, o)
- if not p2:
- return
- return r
- def exit_signalled(s):
- """ child teminated due to receipt of SIGUSR1 """
- return (os.WIFSIGNALED(s) and (os.WTERMSIG(s) == signal.SIGUSR1))
- def exit_status(s):
- if os.WIFEXITED(s):
- return os.WEXITSTATUS(s)
- return 1
- conn_timeout = 60
- while ret in (0, 1):
- logging.info('-' * conn_timeout)
- logging.info('starting gsyncd worker')
- pr, pw = os.pipe()
- cpid = os.fork()
- if cpid == 0:
- os.close(pr)
- os.execv(sys.executable, argv + ['--feedback-fd', str(pw)])
- os.close(pw)
- t0 = time.time()
- so = select((pr,), (), (), conn_timeout)[0]
- os.close(pr)
- if so:
- ret = nwait(cpid, os.WNOHANG)
- if ret != None:
- logging.debug("worker died before establishing connection")
- else:
- logging.debug("worker seems to be connected (?? racy check)")
- while time.time() < t0 + conn_timeout:
- ret = nwait(cpid, os.WNOHANG)
- if ret != None:
- logging.debug("worker died in startup phase")
- break
- time.sleep(1)
- else:
- logging.debug("worker not confirmed in %d sec, aborting it" % \
- conn_timeout)
- os.kill(cpid, signal.SIGKILL)
- ret = nwait(cpid)
- if ret == None:
- self.set_state('OK')
- ret = nwait(cpid)
- if exit_signalled(ret):
- ret = 0
- else:
- ret = exit_status(ret)
- if ret in (0,1):
- self.set_state('faulty')
- time.sleep(10)
- self.set_state('inconsistent')
- return ret
-
-def monitor():
- """oh yeah, actually Monitor is used as singleton, too"""
- return Monitor().monitor()
diff --git a/xlators/features/marker/utils/syncdaemon/repce.py b/xlators/features/marker/utils/syncdaemon/repce.py
deleted file mode 100644
index 755fb61df..000000000
--- a/xlators/features/marker/utils/syncdaemon/repce.py
+++ /dev/null
@@ -1,225 +0,0 @@
-import os
-import sys
-import time
-import logging
-from threading import Condition
-try:
- import thread
-except ImportError:
- # py 3
- import _thread as thread
-try:
- from Queue import Queue
-except ImportError:
- # py 3
- from queue import Queue
-try:
- import cPickle as pickle
-except ImportError:
- # py 3
- import pickle
-
-from syncdutils import Thread, select
-
-pickle_proto = -1
-repce_version = 1.0
-
-def ioparse(i, o):
- if isinstance(i, int):
- i = os.fdopen(i)
- # rely on duck typing for recognizing
- # streams as that works uniformly
- # in py2 and py3
- if hasattr(o, 'fileno'):
- o = o.fileno()
- return (i, o)
-
-def send(out, *args):
- """pickle args and write out wholly in one syscall
-
- ie. not use the ability of pickle to dump directly to
- a stream, as that would potentially mess up messages
- by interleaving them
- """
- os.write(out, pickle.dumps(args, pickle_proto))
-
-def recv(inf):
- """load an object from input stream"""
- return pickle.load(inf)
-
-
-class RepceServer(object):
- """RePCe is Hungarian for canola, http://hu.wikipedia.org/wiki/Repce
-
- ... also our homebrewed RPC backend where the transport layer is
- reduced to a pair of filehandles.
-
- This is the server component.
- """
-
- def __init__(self, obj, i, o, wnum=6):
- """register a backend object .obj to which incoming messages
- are dispatched, also incoming/outcoming streams
- """
- self.obj = obj
- self.inf, self.out = ioparse(i, o)
- self.wnum = wnum
- self.q = Queue()
-
- def service_loop(self):
- """fire up worker threads, get messages and dispatch among them"""
- for i in range(self.wnum):
- t = Thread(target=self.worker)
- t.start()
- try:
- while True:
- self.q.put(recv(self.inf))
- except EOFError:
- logging.info("terminating on reaching EOF.")
-
- def worker(self):
- """life of a worker
-
- Get message, extract its id, method name and arguments
- (kwargs not supported), call method on .obj.
- Send back message id + return value.
- If method call throws an exception, rescue it, and send
- back the exception as result (with flag marking it as
- exception).
- """
- while True:
- in_data = self.q.get(True)
- rid = in_data[0]
- rmeth = in_data[1]
- exc = False
- if rmeth == '__repce_version__':
- res = repce_version
- else:
- try:
- res = getattr(self.obj, rmeth)(*in_data[2:])
- except:
- res = sys.exc_info()[1]
- exc = True
- logging.exception("call failed: ")
- send(self.out, rid, exc, res)
-
-
-class RepceJob(object):
- """class representing message status we can use
- for waiting on reply"""
-
- def __init__(self, cbk):
- """
- - .rid: (process-wise) unique id
- - .cbk: what we do upon receiving reply
- """
- self.rid = (os.getpid(), thread.get_ident(), time.time())
- self.cbk = cbk
- self.lever = Condition()
- self.done = False
-
- def __repr__(self):
- return ':'.join([str(x) for x in self.rid])
-
- def wait(self):
- self.lever.acquire()
- if not self.done:
- self.lever.wait()
- self.lever.release()
- return self.result
-
- def wakeup(self, data):
- self.result = data
- self.lever.acquire()
- self.done = True
- self.lever.notify()
- self.lever.release()
-
-
-class RepceClient(object):
- """RePCe is Hungarian for canola, http://hu.wikipedia.org/wiki/Repce
-
- ... also our homebrewed RPC backend where the transport layer is
- reduced to a pair of filehandles.
-
- This is the client component.
- """
-
- def __init__(self, i, o):
- self.inf, self.out = ioparse(i, o)
- self.jtab = {}
- t = Thread(target = self.listen)
- t.start()
-
- def listen(self):
- while True:
- select((self.inf,), (), ())
- rid, exc, res = recv(self.inf)
- rjob = self.jtab.pop(rid)
- if rjob.cbk:
- rjob.cbk(rjob, [exc, res])
-
- def push(self, meth, *args, **kw):
- """wrap arguments in a RepceJob, send them to server
- and return the RepceJob
-
- @cbk to pass on RepceJob can be given as kwarg.
- """
- cbk = kw.get('cbk')
- if not cbk:
- def cbk(rj, res):
- if res[0]:
- raise res[1]
- rjob = RepceJob(cbk)
- self.jtab[rjob.rid] = rjob
- logging.debug("call %s %s%s ..." % (repr(rjob), meth, repr(args)))
- send(self.out, rjob.rid, meth, *args)
- return rjob
-
- def __call__(self, meth, *args):
- """RePCe client is callabe, calling it implements a synchronous remote call
-
- We do a .push with a cbk which does a wakeup upon receiving anwser, then wait
- on the RepceJob.
- """
- rjob = self.push(meth, *args, **{'cbk': lambda rj, res: rj.wakeup(res)})
- exc, res = rjob.wait()
- if exc:
- logging.error('call %s (%s) failed on peer with %s' % (repr(rjob), meth, str(type(res).__name__)))
- raise res
- logging.debug("call %s %s -> %s" % (repr(rjob), meth, repr(res)))
- return res
-
- class mprx(object):
- """method proxy, standard trick to implement rubyesque method_missing
- in Python
-
- A class is a closure factory, you know what I mean, or go read some SICP.
- """
- def __init__(self, ins, meth):
- self.ins = ins
- self.meth = meth
-
- def __call__(self, *a):
- return self.ins(self.meth, *a)
-
- def __getattr__(self, meth):
- """this implements transparent method dispatch to remote object,
- so that you don't need to call the RepceClient instance like
-
- rclient('how_old_are_you_if_born_in', 1979)
-
- but you can make it into an ordinary method call like
-
- rclient.how_old_are_you_if_born_in(1979)
- """
- return self.mprx(self, meth)
-
- def __version__(self):
- """used in handshake to verify compatibility"""
- d = {'proto': self('__repce_version__')}
- try:
- d['object'] = self('version')
- except AttributeError:
- pass
- return d
diff --git a/xlators/features/marker/utils/syncdaemon/resource.py b/xlators/features/marker/utils/syncdaemon/resource.py
deleted file mode 100644
index 3454c3823..000000000
--- a/xlators/features/marker/utils/syncdaemon/resource.py
+++ /dev/null
@@ -1,837 +0,0 @@
-import re
-import os
-import sys
-import stat
-import time
-import errno
-import struct
-import socket
-import logging
-import tempfile
-import threading
-import subprocess
-from errno import EEXIST, ENOENT, ENODATA, ENOTDIR, ELOOP, EISDIR
-
-from gconf import gconf
-import repce
-from repce import RepceServer, RepceClient
-from master import GMaster
-import syncdutils
-from syncdutils import GsyncdError, select
-
-UrlRX = re.compile('\A(\w+)://([^ *?[]*)\Z')
-HostRX = re.compile('[a-z\d](?:[a-z\d.-]*[a-z\d])?', re.I)
-UserRX = re.compile("[\w!\#$%&'*+-\/=?^_`{|}~]+")
-
-def sup(x, *a, **kw):
- """a rubyesque "super" for python ;)
-
- invoke caller method in parent class with given args.
- """
- return getattr(super(type(x), x), sys._getframe(1).f_code.co_name)(*a, **kw)
-
-def desugar(ustr):
- """transform sugared url strings to standard <scheme>://<urlbody> form
-
- parsing logic enforces the constraint that sugared forms should contatin
- a ':' or a '/', which ensures that sugared urls do not conflict with
- gluster volume names.
- """
- m = re.match('([^:]*):(.*)', ustr)
- if m:
- if not m.groups()[0]:
- return "gluster://localhost" + ustr
- elif '@' in m.groups()[0] or re.search('[:/]', m.groups()[1]):
- return "ssh://" + ustr
- else:
- return "gluster://" + ustr
- else:
- if ustr[0] != '/':
- raise GsyncdError("cannot resolve sugared url '%s'" % ustr)
- ap = os.path.normpath(ustr)
- if ap.startswith('//'):
- ap = ap[1:]
- return "file://" + ap
-
-def gethostbyname(hnam):
- """gethostbyname wrapper"""
- try:
- return socket.gethostbyname(hnam)
- except socket.gaierror:
- ex = sys.exc_info()[1]
- raise GsyncdError("failed to resolve %s: %s" % \
- (hnam, ex.strerror))
-
-def parse_url(ustr):
- """instantiate an url object by scheme-to-class dispatch
-
- The url classes taken into consideration are the ones in
- this module whose names are full-caps.
- """
- m = UrlRX.match(ustr)
- if not m:
- ustr = desugar(ustr)
- m = UrlRX.match(ustr)
- if not m:
- raise GsyncdError("malformed url")
- sch, path = m.groups()
- this = sys.modules[__name__]
- if not hasattr(this, sch.upper()):
- raise GsyncdError("unknown url scheme " + sch)
- return getattr(this, sch.upper())(path)
-
-
-class _MetaXattr(object):
- """singleton class, a lazy wrapper around the
- libcxattr module
-
- libcxattr (a heavy import due to ctypes) is
- loaded only when when the single
- instance is tried to be used.
-
- This reduces runtime for those invocations
- which do not need filesystem manipulation
- (eg. for config, url parsing)
- """
-
- def __getattr__(self, meth):
- from libcxattr import Xattr as LXattr
- xmeth = [ m for m in dir(LXattr) if m[0] != '_' ]
- if not meth in xmeth:
- return
- for m in xmeth:
- setattr(self, m, getattr(LXattr, m))
- return getattr(self, meth)
-
-Xattr = _MetaXattr()
-
-
-class Popen(subprocess.Popen):
- """customized subclass of subprocess.Popen with a ring
- buffer for children error ouput"""
-
- @classmethod
- def init_errhandler(cls):
- """start the thread which handles children's error output"""
- cls.errstore = {}
- def tailer():
- while True:
- for po in select([po.stderr for po in cls.errstore], [], []):
- po.lock.acquire()
- try:
- la = cls.errstore.get(po)
- if la == None:
- continue
- l = os.read(po.stderr.fileno(), 1024)
- tots = len(l)
- for lx in la:
- tots += len(lx)
- while tots > 1<<20 and la:
- tots -= len(la.pop(0))
- finally:
- po.lock.release()
- t = syncdutils.Thread(target = tailer)
- t.start()
- cls.errhandler = t
-
- def __init__(self, args, *a, **kw):
- """customizations for subprocess.Popen instantiation
-
- - 'close_fds' is taken to be the default
- - if child's stderr is chosen to be managed,
- register it with the error handler thread
- """
- self.args = args
- if 'close_fds' not in kw:
- kw['close_fds'] = True
- self.lock = threading.Lock()
- try:
- sup(self, args, *a, **kw)
- except:
- ex = sys.exc_info()[1]
- if not isinstance(ex, OSError):
- raise
- raise GsyncdError("""execution of "%s" failed with %s (%s)""" % \
- (args[0], errno.errorcode[ex.errno], os.strerror(ex.errno)))
- if kw['stderr'] == subprocess.PIPE:
- assert(getattr(self, 'errhandler', None))
- self.errstore[self] = []
-
- def errfail(self):
- """fail nicely if child did not terminate with success"""
- filling = None
- if self.elines:
- filling = ", saying:"
- logging.error("""command "%s" returned with %d%s""" % \
- (" ".join(self.args), self.returncode, filling))
- for l in self.elines:
- for ll in l.rstrip().split("\n"):
- logging.error(self.args[0] + "> " + ll.rstrip())
- syncdutils.finalize(exval = 1)
-
- def terminate_geterr(self, fail_on_err = True):
- """kill child, finalize stderr harvesting (unregister
- from errhandler, set up .elines), fail on error if
- asked for
- """
- self.lock.acquire()
- try:
- elines = self.errstore.pop(self)
- finally:
- self.lock.release()
- if self.poll() == None:
- self.terminate()
- if sp.poll() == None:
- time.sleep(0.1)
- sp.kill()
- while True:
- b = os.read(self.stderr.fileno(), 1024)
- if b:
- elines.append(b)
- else:
- break
- self.stderr.close()
- self.elines = elines
- if fail_on_err and self.returncode != 0:
- self.errfail()
-
-
-class Server(object):
- """singleton implemening those filesystem access primitives
- which are needed for geo-replication functionality
-
- (Singleton in the sense it's a class which has only static
- and classmethods and is used directly, without instantiation.)
- """
-
- GX_NSPACE = "trusted.glusterfs"
- NTV_FMTSTR = "!" + "B"*19 + "II"
- FRGN_XTRA_FMT = "I"
- FRGN_FMTSTR = NTV_FMTSTR + FRGN_XTRA_FMT
-
- def _pathguard(f):
- """decorator method that checks
- the path argument of the decorated
- functions to make sure it does not
- point out of the managed tree
- """
-
- fc = getattr(f, 'func_code', None)
- if not fc:
- # python 3
- fc = f.__code__
- pi = list(fc.co_varnames).index('path')
- def ff(*a):
- path = a[pi]
- ps = path.split('/')
- if path[0] == '/' or '..' in ps:
- raise ValueError('unsafe path')
- return f(*a)
- return ff
-
- @staticmethod
- @_pathguard
- def entries(path):
- """directory entries in an array"""
- # prevent symlinks being followed
- if not stat.S_ISDIR(os.lstat(path).st_mode):
- raise OSError(ENOTDIR, os.strerror(ENOTDIR))
- return os.listdir(path)
-
- @classmethod
- @_pathguard
- def purge(cls, path, entries=None):
- """force-delete subtrees
-
- If @entries is not specified, delete
- the whole subtree under @path (including
- @path).
-
- Otherwise, @entries should be a
- a sequence of children of @path, and
- the effect is identical with a joint
- @entries-less purge on them, ie.
-
- for e in entries:
- cls.purge(os.path.join(path, e))
- """
- me_also = entries == None
- if not entries:
- try:
- # if it's a symlink, prevent
- # following it
- try:
- os.unlink(path)
- return
- except OSError:
- ex = sys.exc_info()[1]
- if ex.errno == EISDIR:
- entries = os.listdir(path)
- else:
- raise
- except OSError:
- ex = sys.exc_info()[1]
- if ex.errno in (ENOTDIR, ENOENT, ELOOP):
- try:
- os.unlink(path)
- return
- except OSError:
- ex = sys.exc_info()[1]
- if ex.errno == ENOENT:
- return
- raise
- else:
- raise
- for e in entries:
- cls.purge(os.path.join(path, e))
- if me_also:
- os.rmdir(path)
-
- @classmethod
- @_pathguard
- def _create(cls, path, ctor):
- """path creation backend routine"""
- try:
- ctor(path)
- except OSError:
- ex = sys.exc_info()[1]
- if ex.errno == EEXIST:
- cls.purge(path)
- return ctor(path)
- raise
-
- @classmethod
- @_pathguard
- def mkdir(cls, path):
- cls._create(path, os.mkdir)
-
- @classmethod
- @_pathguard
- def symlink(cls, lnk, path):
- cls._create(path, lambda p: os.symlink(lnk, p))
-
- @classmethod
- @_pathguard
- def xtime(cls, path, uuid):
- """query xtime extended attribute
-
- Return xtime of @path for @uuid as a pair of integers.
- "Normal" errors due to non-existent @path or extended attribute
- are tolerated and errno is returned in such a case.
- """
-
- try:
- return struct.unpack('!II', Xattr.lgetxattr(path, '.'.join([cls.GX_NSPACE, uuid, 'xtime']), 8))
- except OSError:
- ex = sys.exc_info()[1]
- if ex.errno in (ENOENT, ENODATA, ENOTDIR):
- return ex.errno
- else:
- raise
-
- @classmethod
- @_pathguard
- def set_xtime(cls, path, uuid, mark):
- """set @mark as xtime for @uuid on @path"""
- Xattr.lsetxattr(path, '.'.join([cls.GX_NSPACE, uuid, 'xtime']), struct.pack('!II', *mark))
-
- @staticmethod
- @_pathguard
- def setattr(path, adct):
- """set file attributes
-
- @adct is a dict, where 'own', 'mode' and 'times'
- keys are looked for and values used to perform
- chown, chmod or utimes on @path.
- """
- own = adct.get('own')
- if own:
- os.lchown(path, *own)
- mode = adct.get('mode')
- if mode:
- os.chmod(path, stat.S_IMODE(mode))
- times = adct.get('times')
- if times:
- os.utime(path, times)
-
- @staticmethod
- def pid():
- return os.getpid()
-
- last_keep_alive = 0
- @classmethod
- def keep_alive(cls, dct):
- """process keepalive messages.
-
- Return keep-alive counter (number of received keep-alive
- messages).
-
- Now the "keep-alive" message can also have a payload which is
- used to set a foreign volume-mark on the underlying file system.
- """
- if dct:
- key = '.'.join([cls.GX_NSPACE, 'volume-mark', dct['uuid']])
- val = struct.pack(cls.FRGN_FMTSTR,
- *(dct['version'] +
- tuple(int(x,16) for x in re.findall('(?:[\da-f]){2}', dct['uuid'])) +
- (dct['retval'],) + dct['volume_mark'][0:2] + (dct['timeout'],)))
- Xattr.lsetxattr('.', key, val)
- cls.last_keep_alive += 1
- return cls.last_keep_alive
-
- @staticmethod
- def version():
- """version used in handshake"""
- return 1.0
-
-
-class SlaveLocal(object):
- """mix-in class to implement some factes of a slave server
-
- ("mix-in" is sort of like "abstract class", ie. it's not
- instantiated just included in the ancesty DAG. I use "mix-in"
- to indicate that it's not used as an abstract base class,
- rather just taken in to implement additional functionality
- on the basis of the assumed availability of certain interfaces.)
- """
-
- def can_connect_to(self, remote):
- """determine our position in the connectibility matrix"""
- return not remote
-
- def service_loop(self):
- """start a RePCe server serving self's server
-
- stop servicing if a timeout is configured and got no
- keep-alime in that inteval
- """
- repce = RepceServer(self.server, sys.stdin, sys.stdout, int(gconf.sync_jobs))
- t = syncdutils.Thread(target=lambda: (repce.service_loop(),
- syncdutils.finalize()))
- t.start()
- logging.info("slave listening")
- if gconf.timeout and int(gconf.timeout) > 0:
- while True:
- lp = self.server.last_keep_alive
- time.sleep(int(gconf.timeout))
- if lp == self.server.last_keep_alive:
- logging.info("connection inactive for %d seconds, stopping" % int(gconf.timeout))
- break
- else:
- select((), (), ())
-
-class SlaveRemote(object):
- """mix-in class to implement an interface to a remote slave"""
-
- def connect_remote(self, rargs=[], **opts):
- """connects to a remote slave
-
- Invoke an auxiliary utility (slave gsyncd, possibly wrapped)
- which sets up the connection and set up a RePCe client to
- communicate throuh its stdio.
- """
- slave = opts.get('slave', self.url)
- so = getattr(gconf, 'session_owner', None)
- if so:
- so_args = ['--session-owner', so]
- else:
- so_args = []
- po = Popen(rargs + gconf.remote_gsyncd.split() + so_args + \
- ['-N', '--listen', '--timeout', str(gconf.timeout), slave],
- stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- gconf.transport = po
- return self.start_fd_client(po.stdout, po.stdin, **opts)
-
- def start_fd_client(self, i, o, **opts):
- """set up RePCe client, handshake with server
-
- It's cut out as a separate method to let
- subclasses hook into client startup
- """
- self.server = RepceClient(i, o)
- rv = self.server.__version__()
- exrv = {'proto': repce.repce_version, 'object': Server.version()}
- da0 = (rv, exrv)
- da1 = ({}, {})
- for i in range(2):
- for k, v in da0[i].iteritems():
- da1[i][k] = int(v)
- if da1[0] != da1[1]:
- raise GsyncdError("RePCe major version mismatch: local %s, remote %s" % (exrv, rv))
-
- def rsync(self, files, *args):
- """invoke rsync"""
- if not files:
- raise GsyncdError("no files to sync")
- logging.debug("files: " + ", ".join(files))
- argv = gconf.rsync_command.split() + gconf.rsync_extra.split() + ['-aR'] + files + list(args)
- po = Popen(argv, stderr=subprocess.PIPE)
- po.wait()
- po.terminate_geterr(fail_on_err = False)
- return po
-
-
-class AbstractUrl(object):
- """abstract base class for url scheme classes"""
-
- def __init__(self, path, pattern):
- m = re.search(pattern, path)
- if not m:
- raise GsyncdError("malformed path")
- self.path = path
- return m.groups()
-
- @property
- def scheme(self):
- return type(self).__name__.lower()
-
- def canonical_path(self):
- return self.path
-
- def get_url(self, canonical=False, escaped=False):
- """format self's url in various styles"""
- if canonical:
- pa = self.canonical_path()
- else:
- pa = self.path
- u = "://".join((self.scheme, pa))
- if escaped:
- u = syncdutils.escape(u)
- return u
-
- @property
- def url(self):
- return self.get_url()
-
-
- ### Concrete resource classes ###
-
-
-class FILE(AbstractUrl, SlaveLocal, SlaveRemote):
- """scheme class for file:// urls
-
- can be used to represent a file slave server
- on slave side, or interface to a remote file
- file server on master side
- """
-
- class FILEServer(Server):
- """included server flavor"""
- pass
-
- server = FILEServer
-
- def __init__(self, path):
- sup(self, path, '^/')
-
- def connect(self):
- """inhibit the resource beyond"""
- os.chdir(self.path)
-
- def rsync(self, files):
- return sup(self, files, self.path)
-
-
-class GLUSTER(AbstractUrl, SlaveLocal, SlaveRemote):
- """scheme class for gluster:// urls
-
- can be used to represent a gluster slave server
- on slave side, or interface to a remote gluster
- slave on master side, or to represent master
- (slave-ish features come from the mixins, master
- functionality is outsourced to GMaster from master)
- """
-
- class GLUSTERServer(Server):
- "server enhancements for a glusterfs backend"""
-
- @classmethod
- def _attr_unpack_dict(cls, xattr, extra_fields = ''):
- """generic volume mark fetching/parsing backed"""
- fmt_string = cls.NTV_FMTSTR + extra_fields
- buf = Xattr.lgetxattr('.', xattr, struct.calcsize(fmt_string))
- vm = struct.unpack(fmt_string, buf)
- m = re.match('(.{8})(.{4})(.{4})(.{4})(.{12})', "".join(['%02x' % x for x in vm[2:18]]))
- uuid = '-'.join(m.groups())
- volinfo = { 'version': vm[0:2],
- 'uuid' : uuid,
- 'retval' : vm[18],
- 'volume_mark': vm[19:21],
- }
- if extra_fields:
- return volinfo, vm[-len(extra_fields):]
- else:
- return volinfo
-
- @classmethod
- def foreign_volume_infos(cls):
- """return list of valid (not expired) foreign volume marks"""
- dict_list = []
- xattr_list = Xattr.llistxattr_buf('.')
- for ele in xattr_list:
- if ele.find('.'.join([cls.GX_NSPACE, 'volume-mark', ''])) == 0:
- d, x = cls._attr_unpack_dict(ele, cls.FRGN_XTRA_FMT)
- now = int(time.time())
- if x[0] > now:
- logging.debug("volinfo[%s] expires: %d (%d sec later)" % \
- (d['uuid'], x[0], x[0] - now))
- dict_list.append(d)
- else:
- try:
- Xattr.lremovexattr('.', ele)
- except OSError:
- pass
- return dict_list
-
- @classmethod
- def native_volume_info(cls):
- """get the native volume mark of the underlying gluster volume"""
- try:
- return cls._attr_unpack_dict('.'.join([cls.GX_NSPACE, 'volume-mark']))
- except OSError:
- ex = sys.exc_info()[1]
- if ex.errno != ENODATA:
- raise
-
- server = GLUSTERServer
-
- def __init__(self, path):
- self.host, self.volume = sup(self, path, '^(%s):(.+)' % HostRX.pattern)
-
- def canonical_path(self):
- return ':'.join([gethostbyname(self.host), self.volume])
-
- def can_connect_to(self, remote):
- """determine our position in the connectibility matrix"""
- return True
-
- class Mounter(object):
- """Abstract base class for mounter backends"""
-
- def __init__(self, params):
- self.params = params
-
- @classmethod
- def get_glusterprog(cls):
- return os.path.join(gconf.gluster_command_dir, cls.glusterprog)
-
- def umount_l(self, d):
- """perform lazy umount"""
- po = Popen(self.make_umount_argv(d), stderr=subprocess.PIPE)
- po.wait()
- return po
-
- @classmethod
- def make_umount_argv(cls, d):
- raise NotImplementedError
-
- def make_mount_argv(self, *a):
- raise NotImplementedError
-
- def cleanup_mntpt(self):
- pass
-
- def handle_mounter(self, po):
- po.wait()
-
- def inhibit(self, *a):
- """inhibit a gluster filesystem
-
- Mount glusterfs over a temporary mountpoint,
- change into the mount, and lazy unmount the
- filesystem.
- """
- mounted = False
- try:
- po = Popen(self.make_mount_argv(*a), **self.mountkw)
- self.handle_mounter(po)
- po.terminate_geterr()
- d = self.mntpt
- mounted = True
- logging.debug('auxiliary glusterfs mount in place')
- os.chdir(d)
- self.umount_l(d).terminate_geterr()
- mounted = False
- finally:
- try:
- if mounted:
- self.umount_l(d).terminate_geterr(fail_on_err = False)
- self.cleanup_mntpt()
- except:
- logging.warn('stale mount possibly left behind on ' + d)
- logging.debug('auxiliary glusterfs mount prepared')
-
- class DirectMounter(Mounter):
- """mounter backend which calls mount(8), umount(8) directly"""
-
- mountkw = {'stderr': subprocess.PIPE}
- glusterprog = 'glusterfs'
-
- @staticmethod
- def make_umount_argv(d):
- return ['umount', '-l', d]
-
- def make_mount_argv(self):
- self.mntpt = tempfile.mkdtemp(prefix = 'gsyncd-aux-mount-')
- return [self.get_glusterprog()] + ['--' + p for p in self.params] + [self.mntpt]
-
- def cleanup_mntpt(self):
- os.rmdir(self.mntpt)
-
- class MountbrokerMounter(Mounter):
- """mounter backend using the mountbroker gluster service"""
-
- mountkw = {'stderr': subprocess.PIPE, 'stdout': subprocess.PIPE}
- glusterprog = 'gluster'
-
- @classmethod
- def make_cli_argv(cls):
- return [cls.get_glusterprog()] + gconf.gluster_cli_options.split() + ['system::']
-
- @classmethod
- def make_umount_argv(cls, d):
- return cls.make_cli_argv() + ['umount', d, 'lazy']
-
- def make_mount_argv(self, label):
- return self.make_cli_argv() + \
- ['mount', label, 'user-map-root=' + syncdutils.getusername()] + self.params
-
- def handle_mounter(self, po):
- self.mntpt = po.stdout.readline()[:-1]
- po.stdout.close()
- sup(self, po)
- if po.returncode != 0:
- # if cli terminated with error due to being
- # refused by glusterd, what it put
- # out on stdout is a diagnostic message
- logging.error('glusterd answered: %s' % self.mntpt)
-
- def connect(self):
- """inhibit the resource beyond
-
- Choose mounting backend (direct or mountbroker),
- set up glusterfs parameters and perform the mount
- with given backend
- """
-
- label = getattr(gconf, 'mountbroker', None)
- if not label:
- uid = os.geteuid()
- if uid != 0:
- label = syncdutils.getusername(uid)
- mounter = label and self.MountbrokerMounter or self.DirectMounter
- params = gconf.gluster_params.split() + \
- (gconf.gluster_log_level and ['log-level=' + gconf.gluster_log_level] or []) + \
- ['log-file=' + gconf.gluster_log_file, 'volfile-server=' + self.host,
- 'volfile-id=' + self.volume, 'client-pid=-1']
- mounter(params).inhibit(*[l for l in [label] if l])
-
- def connect_remote(self, *a, **kw):
- sup(self, *a, **kw)
- self.slavedir = "/proc/%d/cwd" % self.server.pid()
-
- def service_loop(self, *args):
- """enter service loop
-
- - if slave given, instantiate GMaster and
- pass control to that instance, which implements
- master behavior
- - else do that's what's inherited
- """
- if args:
- GMaster(self, args[0]).crawl_loop()
- else:
- sup(self, *args)
-
- def rsync(self, files):
- return sup(self, files, self.slavedir)
-
-
-class SSH(AbstractUrl, SlaveRemote):
- """scheme class for ssh:// urls
-
- interface to remote slave on master side
- implementing an ssh based proxy
- """
-
- def __init__(self, path):
- self.remote_addr, inner_url = sup(self, path,
- '^((?:%s@)?%s):(.+)' % tuple([ r.pattern for r in (UserRX, HostRX) ]))
- self.inner_rsc = parse_url(inner_url)
-
- def canonical_path(self):
- m = re.match('([^@]+)@(.+)', self.remote_addr)
- if m:
- u, h = m.groups()
- else:
- u, h = syncdutils.getusername(), self.remote_addr
- remote_addr = '@'.join([u, gethostbyname(h)])
- return ':'.join([remote_addr, self.inner_rsc.get_url(canonical=True)])
-
- def can_connect_to(self, remote):
- """determine our position in the connectibility matrix"""
- return False
-
- def start_fd_client(self, *a, **opts):
- """customizations for client startup
-
- - be a no-op if we are to daemonize (client startup is deferred
- to post-daemon stage)
- - determine target url for rsync after consulting server
- """
- if opts.get('deferred'):
- return a
- sup(self, *a)
- ityp = type(self.inner_rsc)
- if ityp == FILE:
- slavepath = self.inner_rsc.path
- elif ityp == GLUSTER:
- slavepath = "/proc/%d/cwd" % self.server.pid()
- else:
- raise NotImplementedError
- self.slaveurl = ':'.join([self.remote_addr, slavepath])
-
- def connect_remote(self, go_daemon=None):
- """connect to inner slave url through outer ssh url
-
- Wrap the connecting utility in ssh.
-
- Much care is put into daemonizing: in that case
- ssh is started before daemonization, but
- RePCe client is to be created after that (as ssh
- interactive password auth would be defeated by
- a daemonized ssh, while client should be present
- only in the final process). In that case the action
- is taken apart to two parts, this method is ivoked
- once pre-daemon, once post-daemon. Use @go_daemon
- to deiced what part to perform.
-
- [NB. ATM gluster product does not makes use of interactive
- authentication.]
- """
- if go_daemon == 'done':
- return self.start_fd_client(*self.fd_pair)
- gconf.setup_ssh_ctl(tempfile.mkdtemp(prefix='gsyncd-aux-ssh-'))
- deferred = go_daemon == 'postconn'
- ret = sup(self, gconf.ssh_command.split() + gconf.ssh_ctl_args + [self.remote_addr], slave=self.inner_rsc.url, deferred=deferred)
- if deferred:
- # send a message to peer so that we can wait for
- # the answer from which we know connection is
- # established and we can proceed with daemonization
- # (doing that too early robs the ssh passwd prompt...)
- # However, we'd better not start the RepceClient
- # before daemonization (that's not preserved properly
- # in daemon), we just do a an ad-hoc linear put/get.
- i, o = ret
- inf = os.fdopen(i)
- repce.send(o, None, '__repce_version__')
- select((inf,), (), ())
- repce.recv(inf)
- # hack hack hack: store a global reference to the file
- # to save it from getting GC'd which implies closing it
- gconf.permanent_handles.append(inf)
- self.fd_pair = (i, o)
- return 'should'
-
- def rsync(self, files):
- return sup(self, files, '-ze', " ".join(gconf.ssh_command.split() + gconf.ssh_ctl_args), self.slaveurl)
diff --git a/xlators/features/marker/utils/syncdaemon/syncdutils.py b/xlators/features/marker/utils/syncdaemon/syncdutils.py
deleted file mode 100644
index 59defa711..000000000
--- a/xlators/features/marker/utils/syncdaemon/syncdutils.py
+++ /dev/null
@@ -1,269 +0,0 @@
-import os
-import sys
-import pwd
-import time
-import fcntl
-import shutil
-import logging
-from threading import Lock, Thread as baseThread
-from errno import EACCES, EAGAIN, EPIPE, ENOTCONN, EINTR
-from signal import SIGTERM, SIGKILL
-from time import sleep
-import select as oselect
-from os import waitpid as owaitpid
-try:
- from cPickle import PickleError
-except ImportError:
- # py 3
- from pickle import PickleError
-
-from gconf import gconf
-
-try:
- # py 3
- from urllib import parse as urllib
-except ImportError:
- import urllib
-
-def escape(s):
- """the chosen flavor of string escaping, used all over
- to turn whatever data to creatable representation"""
- return urllib.quote_plus(s)
-
-def unescape(s):
- """inverse of .escape"""
- return urllib.unquote_plus(s)
-
-def norm(s):
- if s:
- return s.replace('-', '_')
-
-def update_file(path, updater, merger = lambda f: True):
- """update a file in a transaction-like manner"""
-
- fr = fw = None
- try:
- fd = os.open(path, os.O_CREAT|os.O_RDWR)
- try:
- fr = os.fdopen(fd, 'r+b')
- except:
- os.close(fd)
- raise
- fcntl.lockf(fr, fcntl.LOCK_EX)
- if not merger(fr):
- return
-
- tmpp = path + '.tmp.' + str(os.getpid())
- fd = os.open(tmpp, os.O_CREAT|os.O_EXCL|os.O_WRONLY)
- try:
- fw = os.fdopen(fd, 'wb', 0)
- except:
- os.close(fd)
- raise
- updater(fw)
- os.fsync(fd)
- os.rename(tmpp, path)
- finally:
- for fx in (fr, fw):
- if fx:
- fx.close()
-
-def grabfile(fname, content=None):
- """open @fname + contest for its fcntl lock
-
- @content: if given, set the file content to it
- """
- # damn those messy open() mode codes
- fd = os.open(fname, os.O_CREAT|os.O_RDWR)
- f = os.fdopen(fd, 'r+b', 0)
- try:
- fcntl.lockf(f, fcntl.LOCK_EX|fcntl.LOCK_NB)
- except:
- ex = sys.exc_info()[1]
- f.close()
- if isinstance(ex, IOError) and ex.errno in (EACCES, EAGAIN):
- # cannot grab, it's taken
- return
- raise
- if content:
- try:
- f.truncate()
- f.write(content)
- except:
- f.close()
- raise
- gconf.permanent_handles.append(f)
- return f
-
-def grabpidfile(fname=None, setpid=True):
- """.grabfile customization for pid files"""
- if not fname:
- fname = gconf.pid_file
- content = None
- if setpid:
- content = str(os.getpid()) + '\n'
- return grabfile(fname, content=content)
-
-final_lock = Lock()
-
-def finalize(*a, **kw):
- """all those messy final steps we go trough upon termination
-
- Do away with pidfile, ssh control dir and logging.
- """
- final_lock.acquire()
- if getattr(gconf, 'pid_file', None):
- rm_pidf = gconf.pid_file_owned
- if gconf.cpid:
- # exit path from parent branch of daemonization
- rm_pidf = False
- while True:
- f = grabpidfile(setpid=False)
- if not f:
- # child has already taken over pidfile
- break
- if os.waitpid(gconf.cpid, os.WNOHANG)[0] == gconf.cpid:
- # child has terminated
- rm_pidf = True
- break;
- time.sleep(0.1)
- if rm_pidf:
- try:
- os.unlink(gconf.pid_file)
- except:
- ex = sys.exc_info()[1]
- if ex.errno == ENOENT:
- pass
- else:
- raise
- if gconf.ssh_ctl_dir and not gconf.cpid:
- shutil.rmtree(gconf.ssh_ctl_dir)
- if gconf.log_exit:
- logging.info("exiting.")
- sys.stdout.flush()
- sys.stderr.flush()
- os._exit(kw.get('exval', 0))
-
-def log_raise_exception(excont):
- """top-level exception handler
-
- Try to some fancy things to cover up we face with an error.
- Translate some weird sounding but well understood exceptions
- into human-friendly lingo
- """
- is_filelog = False
- for h in logging.getLogger().handlers:
- fno = getattr(getattr(h, 'stream', None), 'fileno', None)
- if fno and not os.isatty(fno()):
- is_filelog = True
-
- exc = sys.exc_info()[1]
- if isinstance(exc, SystemExit):
- excont.exval = exc.code or 0
- raise
- else:
- logtag = None
- if isinstance(exc, GsyncdError):
- if is_filelog:
- logging.error(exc.message)
- sys.stderr.write('failure: ' + exc.message + "\n")
- elif isinstance(exc, PickleError) or isinstance(exc, EOFError) or \
- ((isinstance(exc, OSError) or isinstance(exc, IOError)) and \
- exc.errno == EPIPE):
- logging.error('connection to peer is broken')
- if hasattr(gconf, 'transport'):
- gconf.transport.wait()
- gconf.transport.terminate_geterr()
- elif isinstance(exc, OSError) and exc.errno == ENOTCONN:
- logging.error('glusterfs session went down')
- else:
- logtag = "FAIL"
- if not logtag and logging.getLogger().isEnabledFor(logging.DEBUG):
- logtag = "FULL EXCEPTION TRACE"
- if logtag:
- logging.exception(logtag + ": ")
- sys.stderr.write("failed with %s.\n" % type(exc).__name__)
- excont.exval = 1
- sys.exit(excont.exval)
-
-
-class FreeObject(object):
- """wildcard class for which any attribute can be set"""
-
- def __init__(self, **kw):
- for k,v in kw.items():
- setattr(self, k, v)
-
-class Thread(baseThread):
- """thread class flavor for gsyncd
-
- - always a daemon thread
- - force exit for whole program if thread
- function coughs up an exception
- """
- def __init__(self, *a, **kw):
- tf = kw.get('target')
- if tf:
- def twrap(*aa):
- excont = FreeObject(exval = 0)
- try:
- tf(*aa)
- except:
- try:
- log_raise_exception(excont)
- finally:
- finalize(exval = excont.exval)
- kw['target'] = twrap
- baseThread.__init__(self, *a, **kw)
- self.setDaemon(True)
-
-class GsyncdError(Exception):
- pass
-
-def getusername(uid = None):
- if uid == None:
- uid = os.geteuid()
- return pwd.getpwuid(uid).pw_name
-
-def boolify(s):
- """
- Generic string to boolean converter
-
- return
- - Quick return if string 's' is of type bool
- - True if it's in true_list
- - False if it's in false_list
- - Warn if it's not present in either and return False
- """
- true_list = ['true', 'yes', '1', 'on']
- false_list = ['false', 'no', '0', 'off']
-
- if isinstance(s, bool):
- return s
-
- rv = False
- lstr = s.lower()
- if lstr in true_list:
- rv = True
- elif not lstr in false_list:
- logging.warn("Unknown string (%s) in string to boolean conversion defaulting to False\n" % (s))
-
- return rv
-
-def eintr_wrap(func, exc, *a):
- """
- wrapper around syscalls resilient to interrupt caused
- by signals
- """
- while True:
- try:
- return func(*a)
- except exc, ex:
- if not ex[0] == EINTR:
- raise GsyncdError(ex[1])
-
-def select(*a):
- return eintr_wrap(oselect.select, oselect.error, *a)
-
-def waitpid (*a):
- return eintr_wrap(owaitpid, OSError, *a)
diff --git a/xlators/features/path-convertor/src/Makefile.am b/xlators/features/path-convertor/src/Makefile.am
index 58cfed0f9..393a7bd08 100644
--- a/xlators/features/path-convertor/src/Makefile.am
+++ b/xlators/features/path-convertor/src/Makefile.am
@@ -2,13 +2,14 @@
xlator_LTLIBRARIES = path-converter.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/features
-path_converter_la_LDFLAGS = -module -avoidversion
+path_converter_la_LDFLAGS = -module -avoid-version
path_converter_la_SOURCES = path.c
path_converter_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/features/path-convertor/src/path-mem-types.h b/xlators/features/path-convertor/src/path-mem-types.h
index c071513b6..77ada8d53 100644
--- a/xlators/features/path-convertor/src/path-mem-types.h
+++ b/xlators/features/path-convertor/src/path-mem-types.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __PATH_MEM_TYPES_H__
#define __PATH_MEM_TYPES_H__
diff --git a/xlators/features/path-convertor/src/path.c b/xlators/features/path-convertor/src/path.c
index f61630535..5c52e0a8d 100644
--- a/xlators/features/path-convertor/src/path.c
+++ b/xlators/features/path-convertor/src/path.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
/* TODO: add gf_log to all the cases returning errors */
#ifndef _CONFIG_H
@@ -52,7 +42,7 @@ static char *
name_this_to_that (xlator_t *xl, const char *path, const char *name)
{
path_private_t *priv = xl->private;
- char priv_path[ZR_PATH_MAX] = {0,};
+ char priv_path[PATH_MAX] = {0,};
char *tmp_name = NULL;
int32_t path_len = strlen (path);
int32_t name_len = strlen (name) - ZR_FILE_CONTENT_STRLEN;
@@ -848,8 +838,7 @@ path_setxattr (call_frame_t *frame,
if (tmp_path != loc_path)
GF_FREE (tmp_path);
- if (tmp_name)
- GF_FREE (tmp_name);
+ GF_FREE (tmp_name);
return 0;
}
diff --git a/xlators/protocol/legacy/client/Makefile.am b/xlators/features/protect/Makefile.am
index d471a3f92..d471a3f92 100644
--- a/xlators/protocol/legacy/client/Makefile.am
+++ b/xlators/features/protect/Makefile.am
diff --git a/xlators/features/protect/src/Makefile.am b/xlators/features/protect/src/Makefile.am
new file mode 100644
index 000000000..968e88c45
--- /dev/null
+++ b/xlators/features/protect/src/Makefile.am
@@ -0,0 +1,21 @@
+xlator_LTLIBRARIES = prot_dht.la prot_client.la prot_server.la
+
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+prot_dht_la_LDFLAGS = -module -avoid-version
+prot_dht_la_SOURCES = prot_dht.c
+prot_dht_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+prot_client_la_LDFLAGS = -module -avoid-version
+prot_client_la_SOURCES = prot_client.c
+prot_client_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+prot_server_la_LDFLAGS = -module -avoid-version
+prot_server_la_SOURCES = prot_server.c
+prot_server_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
+
diff --git a/xlators/features/protect/src/prot_client.c b/xlators/features/protect/src/prot_client.c
new file mode 100644
index 000000000..d09715067
--- /dev/null
+++ b/xlators/features/protect/src/prot_client.c
@@ -0,0 +1,217 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "defaults.h"
+
+#ifndef __NetBSD__
+#include <execinfo.h>
+#endif
+
+#define NUM_FRAMES 20
+
+static char PROTECT_KEY[] = "trusted.glusterfs.protect";
+
+enum {
+ PROT_ACT_NONE = 0,
+ PROT_ACT_LOG,
+ PROT_ACT_REJECT,
+};
+
+void
+pcli_print_trace (char *name, call_frame_t *frame)
+{
+ void *frames[NUM_FRAMES];
+ char **symbols;
+ int size;
+ int i;
+
+ gf_log (name, GF_LOG_INFO, "Translator stack:");
+ while (frame) {
+ gf_log (name, GF_LOG_INFO, "%s (%s)",
+ frame->wind_from, frame->this->name);
+ frame = frame->next;
+ }
+
+ size = backtrace(frames,NUM_FRAMES);
+ if (size <= 0) {
+ return;
+ }
+ symbols = backtrace_symbols(frames,size);
+ if (!symbols) {
+ return;
+ }
+
+ gf_log(name, GF_LOG_INFO, "Processor stack:");
+ for (i = 0; i < size; ++i) {
+ gf_log (name, GF_LOG_INFO, "%s", symbols[i]);
+ }
+ free(symbols);
+}
+
+int32_t
+pcli_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ uint64_t value;
+
+ if (newloc->parent == oldloc->parent) {
+ gf_log (this->name, GF_LOG_DEBUG, "rename in same directory");
+ goto simple_unwind;
+ }
+ if (!oldloc->parent) {
+ goto simple_unwind;
+ }
+ if (inode_ctx_get(oldloc->parent,this,&value) != 0) {
+ goto simple_unwind;
+ }
+
+ if (value != PROT_ACT_NONE) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "got rename for protected %s", oldloc->path);
+ pcli_print_trace(this->name,frame->next);
+ if (value == PROT_ACT_REJECT) {
+ STACK_UNWIND_STRICT (rename, frame, -1, EPERM,
+ NULL, NULL, NULL, NULL, NULL,
+ xdata);
+ return 0;
+ }
+ }
+
+simple_unwind:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename, oldloc, newloc,
+ xdata);
+ return 0;
+}
+
+int32_t
+pcli_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ data_t *data;
+ uint64_t value;
+
+ /*
+ * We can't use dict_get_str and strcmp here, because the value comes
+ * directly from the user and might not be NUL-terminated (it would
+ * be if we had set it ourselves.
+ */
+
+ data = dict_get(dict,PROTECT_KEY);
+ if (!data) {
+ goto simple_wind;
+ }
+
+ if (dict->count > 1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "attempted to mix %s with other keys", PROTECT_KEY);
+ goto simple_wind;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "got %s request", PROTECT_KEY);
+ if (!strncmp(data->data,"log",data->len)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "logging removals on %s", loc->path);
+ value = PROT_ACT_LOG;
+ }
+ else if (!strncmp(data->data,"reject",data->len)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "rejecting removals on %s", loc->path);
+ value = PROT_ACT_REJECT;
+ }
+ else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "removing protection on %s", loc->path);
+ value = PROT_ACT_NONE;
+ }
+ /* Right now the value doesn't matter - just the presence. */
+ if (inode_ctx_set(loc->inode,this,&value) != 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set protection status for %s", loc->path);
+ }
+ STACK_UNWIND_STRICT (setxattr, frame, 0, 0, NULL);
+ return 0;
+
+simple_wind:
+ STACK_WIND_TAIL (frame,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr,
+ loc, dict, flags, xdata);
+ return 0;
+}
+
+int32_t
+pcli_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
+{
+ uint64_t value;
+
+ if (!loc->parent || (inode_ctx_get(loc->parent,this,&value) != 0)) {
+ goto simple_unwind;
+ }
+
+ if (value != PROT_ACT_NONE) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "got unlink for protected %s", loc->path);
+ pcli_print_trace(this->name,frame->next);
+ if (value == PROT_ACT_REJECT) {
+ STACK_UNWIND_STRICT (unlink, frame, -1, EPERM,
+ NULL, NULL, NULL);
+ return 0;
+ }
+ }
+
+simple_unwind:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ return 0;
+}
+
+int32_t
+init (xlator_t *this)
+{
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "translator not configured with exactly one child");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ return 0;
+}
+
+
+void
+fini (xlator_t *this)
+{
+ return;
+}
+
+
+struct xlator_fops fops = {
+ .rename = pcli_rename,
+ .setxattr = pcli_setxattr,
+ .unlink = pcli_unlink,
+};
+
+struct xlator_cbks cbks = {
+};
+
+struct volume_options options[] = {
+ { .key = {NULL} },
+};
diff --git a/xlators/features/protect/src/prot_dht.c b/xlators/features/protect/src/prot_dht.c
new file mode 100644
index 000000000..feec6ffd6
--- /dev/null
+++ b/xlators/features/protect/src/prot_dht.c
@@ -0,0 +1,168 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "defaults.h"
+
+enum gf_pdht_mem_types_ {
+ gf_pdht_mt_coord_t = gf_common_mt_end + 1,
+ gf_pdht_mt_end
+};
+
+typedef struct {
+ pthread_mutex_t lock;
+ uint16_t refs;
+ int32_t op_ret;
+ int32_t op_errno;
+ dict_t *xdata;
+} pdht_coord_t;
+
+static char PROTECT_KEY[] = "trusted.glusterfs.protect";
+
+void
+pdht_unref_and_unlock (call_frame_t *frame, xlator_t *this,
+ pdht_coord_t *coord)
+{
+ gf_boolean_t should_unwind;
+
+ should_unwind = (--(coord->refs) == 0);
+ pthread_mutex_unlock(&coord->lock);
+
+ if (should_unwind) {
+ STACK_UNWIND_STRICT (setxattr, frame,
+ coord->op_ret, coord->op_errno,
+ coord->xdata);
+ if (coord->xdata) {
+ dict_unref(coord->xdata);
+ }
+ GF_FREE(coord);
+ }
+}
+
+int32_t
+pdht_recurse_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ pdht_coord_t *coord = cookie;
+
+ pthread_mutex_lock(&coord->lock);
+ if (op_ret) {
+ coord->op_ret = op_ret;
+ coord->op_errno = op_errno;
+ }
+ if (xdata) {
+ if (coord->xdata) {
+ dict_unref(coord->xdata);
+ }
+ coord->xdata = dict_ref(xdata);
+ }
+ pdht_unref_and_unlock(frame,this,coord);
+
+ return 0;
+}
+
+void
+pdht_recurse (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata, xlator_t *xl, pdht_coord_t *coord)
+{
+ xlator_list_t *iter;
+
+ if (!strcmp(xl->type,"features/prot_client")) {
+ pthread_mutex_lock(&coord->lock);
+ ++(coord->refs);
+ pthread_mutex_unlock(&coord->lock);
+ STACK_WIND_COOKIE (frame, pdht_recurse_cbk, coord, xl,
+ xl->fops->setxattr, loc, dict, flags, xdata);
+ }
+
+ else for (iter = xl->children; iter; iter = iter->next) {
+ pdht_recurse (frame, this, loc, dict, flags, xdata,
+ iter->xlator, coord);
+ }
+}
+
+int32_t
+pdht_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ pdht_coord_t *coord;
+
+ if (!dict_get(dict,PROTECT_KEY)) {
+ goto simple_wind;
+ }
+
+ if (dict->count > 1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "attempted to mix %s with other keys", PROTECT_KEY);
+ goto simple_wind;
+ }
+
+ coord = GF_CALLOC(1,sizeof(*coord),gf_pdht_mt_coord_t);
+ if (!coord) {
+ gf_log (this->name, GF_LOG_WARNING, "allocation failed");
+ goto simple_wind;
+ }
+
+ pthread_mutex_init(&coord->lock,NULL);
+ coord->refs = 1;
+ coord->op_ret = 0;
+ coord->xdata = NULL;
+
+ pdht_recurse(frame,this,loc,dict,flags,xdata,this,coord);
+ pthread_mutex_lock(&coord->lock);
+ pdht_unref_and_unlock(frame,this,coord);
+
+ return 0;
+
+simple_wind:
+ STACK_WIND_TAIL (frame,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr,
+ loc, dict, flags, xdata);
+ return 0;
+}
+
+int32_t
+init (xlator_t *this)
+{
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "translator not configured with exactly one child");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ return 0;
+}
+
+
+void
+fini (xlator_t *this)
+{
+ return;
+}
+
+struct xlator_fops fops = {
+ .setxattr = pdht_setxattr,
+};
+
+struct xlator_cbks cbks = {
+};
+
+struct volume_options options[] = {
+ { .key = {NULL} },
+};
diff --git a/xlators/features/protect/src/prot_server.c b/xlators/features/protect/src/prot_server.c
new file mode 100644
index 000000000..beaee0889
--- /dev/null
+++ b/xlators/features/protect/src/prot_server.c
@@ -0,0 +1,51 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "defaults.h"
+
+int32_t
+init (xlator_t *this)
+{
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "translator not configured with exactly one child");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ return 0;
+}
+
+
+void
+fini (xlator_t *this)
+{
+ return;
+}
+
+
+struct xlator_fops fops = {
+};
+
+struct xlator_cbks cbks = {
+};
+
+struct volume_options options[] = {
+ { .key = {NULL} },
+};
diff --git a/xlators/performance/stat-prefetch/Makefile.am b/xlators/features/qemu-block/Makefile.am
index af437a64d..af437a64d 100644
--- a/xlators/performance/stat-prefetch/Makefile.am
+++ b/xlators/features/qemu-block/Makefile.am
diff --git a/xlators/features/qemu-block/src/Makefile.am b/xlators/features/qemu-block/src/Makefile.am
new file mode 100644
index 000000000..08a7b62a0
--- /dev/null
+++ b/xlators/features/qemu-block/src/Makefile.am
@@ -0,0 +1,155 @@
+if ENABLE_QEMU_BLOCK
+xlator_LTLIBRARIES = qemu-block.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+qemu_block_la_LDFLAGS = -module -avoid-version
+qemu_block_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(GLIB_LIBS) -lz -lrt
+
+qemu_block_la_SOURCES_qemu = \
+ $(CONTRIBDIR)/qemu/qemu-coroutine.c \
+ $(CONTRIBDIR)/qemu/qemu-coroutine-lock.c \
+ $(CONTRIBDIR)/qemu/qemu-coroutine-sleep.c \
+ $(CONTRIBDIR)/qemu/coroutine-ucontext.c \
+ $(CONTRIBDIR)/qemu/block.c \
+ $(CONTRIBDIR)/qemu/nop-symbols.c
+
+qemu_block_la_SOURCES_qemu_util = \
+ $(CONTRIBDIR)/qemu/util/aes.c \
+ $(CONTRIBDIR)/qemu/util/bitmap.c \
+ $(CONTRIBDIR)/qemu/util/bitops.c \
+ $(CONTRIBDIR)/qemu/util/cutils.c \
+ $(CONTRIBDIR)/qemu/util/error.c \
+ $(CONTRIBDIR)/qemu/util/hbitmap.c \
+ $(CONTRIBDIR)/qemu/util/iov.c \
+ $(CONTRIBDIR)/qemu/util/module.c \
+ $(CONTRIBDIR)/qemu/util/oslib-posix.c \
+ $(CONTRIBDIR)/qemu/util/qemu-option.c \
+ $(CONTRIBDIR)/qemu/util/qemu-error.c \
+ $(CONTRIBDIR)/qemu/util/qemu-thread-posix.c \
+ $(CONTRIBDIR)/qemu/util/unicode.c \
+ $(CONTRIBDIR)/qemu/util/hexdump.c
+
+qemu_block_la_SOURCES_qemu_block = \
+ $(CONTRIBDIR)/qemu/block/snapshot.c \
+ $(CONTRIBDIR)/qemu/block/qcow2-cache.c \
+ $(CONTRIBDIR)/qemu/block/qcow2-cluster.c \
+ $(CONTRIBDIR)/qemu/block/qcow2-refcount.c \
+ $(CONTRIBDIR)/qemu/block/qcow2-snapshot.c \
+ $(CONTRIBDIR)/qemu/block/qcow2.c \
+ $(CONTRIBDIR)/qemu/block/qed-check.c \
+ $(CONTRIBDIR)/qemu/block/qed-cluster.c \
+ $(CONTRIBDIR)/qemu/block/qed-gencb.c \
+ $(CONTRIBDIR)/qemu/block/qed-l2-cache.c \
+ $(CONTRIBDIR)/qemu/block/qed-table.c \
+ $(CONTRIBDIR)/qemu/block/qed.c
+
+qemu_block_la_SOURCES_qemu_qobject = \
+ $(CONTRIBDIR)/qemu/qobject/json-lexer.c \
+ $(CONTRIBDIR)/qemu/qobject/json-parser.c \
+ $(CONTRIBDIR)/qemu/qobject/json-streamer.c \
+ $(CONTRIBDIR)/qemu/qobject/qbool.c \
+ $(CONTRIBDIR)/qemu/qobject/qdict.c \
+ $(CONTRIBDIR)/qemu/qobject/qerror.c \
+ $(CONTRIBDIR)/qemu/qobject/qfloat.c \
+ $(CONTRIBDIR)/qemu/qobject/qint.c \
+ $(CONTRIBDIR)/qemu/qobject/qjson.c \
+ $(CONTRIBDIR)/qemu/qobject/qlist.c \
+ $(CONTRIBDIR)/qemu/qobject/qstring.c
+
+qemu_block_la_SOURCES = \
+ $(qemu_block_la_SOURCES_qemu) \
+ $(qemu_block_la_SOURCES_qemu_util) \
+ $(qemu_block_la_SOURCES_qemu_block) \
+ $(qemu_block_la_SOURCES_qemu_qobject) \
+ bdrv-xlator.c \
+ coroutine-synctask.c \
+ bh-syncop.c \
+ monitor-logging.c \
+ clock-timer.c \
+ qemu-block.c \
+ qb-coroutines.c
+
+noinst_HEADERS_qemu = \
+ $(CONTRIBDIR)/qemu/config-host.h \
+ $(CONTRIBDIR)/qemu/qapi-types.h \
+ $(CONTRIBDIR)/qemu/qmp-commands.h \
+ $(CONTRIBDIR)/qemu/trace/generated-tracers.h \
+ $(CONTRIBDIR)/qemu/include/config.h \
+ $(CONTRIBDIR)/qemu/include/glib-compat.h \
+ $(CONTRIBDIR)/qemu/include/qemu-common.h \
+ $(CONTRIBDIR)/qemu/include/trace.h \
+ $(CONTRIBDIR)/qemu/include/block/coroutine.h \
+ $(CONTRIBDIR)/qemu/include/block/aio.h \
+ $(CONTRIBDIR)/qemu/include/block/block.h \
+ $(CONTRIBDIR)/qemu/include/block/block_int.h \
+ $(CONTRIBDIR)/qemu/include/block/blockjob.h \
+ $(CONTRIBDIR)/qemu/include/block/coroutine.h \
+ $(CONTRIBDIR)/qemu/include/block/coroutine_int.h \
+ $(CONTRIBDIR)/qemu/include/block/snapshot.h \
+ $(CONTRIBDIR)/qemu/include/exec/cpu-common.h \
+ $(CONTRIBDIR)/qemu/include/exec/hwaddr.h \
+ $(CONTRIBDIR)/qemu/include/exec/poison.h \
+ $(CONTRIBDIR)/qemu/include/fpu/softfloat.h \
+ $(CONTRIBDIR)/qemu/include/migration/migration.h \
+ $(CONTRIBDIR)/qemu/include/migration/qemu-file.h \
+ $(CONTRIBDIR)/qemu/include/migration/vmstate.h \
+ $(CONTRIBDIR)/qemu/include/monitor/monitor.h \
+ $(CONTRIBDIR)/qemu/include/monitor/readline.h \
+ $(CONTRIBDIR)/qemu/include/qapi/error.h \
+ $(CONTRIBDIR)/qemu/include/qapi/qmp/json-lexer.h \
+ $(CONTRIBDIR)/qemu/include/qapi/qmp/json-parser.h \
+ $(CONTRIBDIR)/qemu/include/qapi/qmp/json-streamer.h \
+ $(CONTRIBDIR)/qemu/include/qapi/qmp/qbool.h \
+ $(CONTRIBDIR)/qemu/include/qapi/qmp/qdict.h \
+ $(CONTRIBDIR)/qemu/include/qapi/qmp/qerror.h \
+ $(CONTRIBDIR)/qemu/include/qapi/qmp/qfloat.h \
+ $(CONTRIBDIR)/qemu/include/qapi/qmp/qint.h \
+ $(CONTRIBDIR)/qemu/include/qapi/qmp/qjson.h \
+ $(CONTRIBDIR)/qemu/include/qapi/qmp/qlist.h \
+ $(CONTRIBDIR)/qemu/include/qapi/qmp/qobject.h \
+ $(CONTRIBDIR)/qemu/include/qapi/qmp/qstring.h \
+ $(CONTRIBDIR)/qemu/include/qapi/qmp/types.h \
+ $(CONTRIBDIR)/qemu/include/qemu/aes.h \
+ $(CONTRIBDIR)/qemu/include/qemu/atomic.h \
+ $(CONTRIBDIR)/qemu/include/qemu/bitmap.h \
+ $(CONTRIBDIR)/qemu/include/qemu/bitops.h \
+ $(CONTRIBDIR)/qemu/include/qemu/bswap.h \
+ $(CONTRIBDIR)/qemu/include/qemu/compiler.h \
+ $(CONTRIBDIR)/qemu/include/qemu/error-report.h \
+ $(CONTRIBDIR)/qemu/include/qemu/event_notifier.h \
+ $(CONTRIBDIR)/qemu/include/qemu/hbitmap.h \
+ $(CONTRIBDIR)/qemu/include/qemu/host-utils.h \
+ $(CONTRIBDIR)/qemu/include/qemu/iov.h \
+ $(CONTRIBDIR)/qemu/include/qemu/main-loop.h \
+ $(CONTRIBDIR)/qemu/include/qemu/module.h \
+ $(CONTRIBDIR)/qemu/include/qemu/notify.h \
+ $(CONTRIBDIR)/qemu/include/qemu/option.h \
+ $(CONTRIBDIR)/qemu/include/qemu/option_int.h \
+ $(CONTRIBDIR)/qemu/include/qemu/osdep.h \
+ $(CONTRIBDIR)/qemu/include/qemu/queue.h \
+ $(CONTRIBDIR)/qemu/include/qemu/sockets.h \
+ $(CONTRIBDIR)/qemu/include/qemu/thread-posix.h \
+ $(CONTRIBDIR)/qemu/include/qemu/thread.h \
+ $(CONTRIBDIR)/qemu/include/qemu/timer.h \
+ $(CONTRIBDIR)/qemu/include/qemu/typedefs.h \
+ $(CONTRIBDIR)/qemu/include/sysemu/sysemu.h \
+ $(CONTRIBDIR)/qemu/include/sysemu/os-posix.h \
+ $(CONTRIBDIR)/qemu/block/qcow2.h \
+ $(CONTRIBDIR)/qemu/block/qed.h
+
+noinst_HEADERS = \
+ $(noinst_HEADERS_qemu) \
+ qemu-block.h \
+ qemu-block-memory-types.h \
+ qb-coroutines.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(CONTRIBDIR)/qemu \
+ -I$(CONTRIBDIR)/qemu/include \
+ -DGLUSTER_XLATOR
+
+AM_CFLAGS = -fno-strict-aliasing -Wall $(GF_CFLAGS) $(GLIB_CFLAGS)
+
+CLEANFILES =
+
+endif
diff --git a/xlators/features/qemu-block/src/bdrv-xlator.c b/xlators/features/qemu-block/src/bdrv-xlator.c
new file mode 100644
index 000000000..1e55b5fb7
--- /dev/null
+++ b/xlators/features/qemu-block/src/bdrv-xlator.c
@@ -0,0 +1,389 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "inode.h"
+#include "syncop.h"
+#include "qemu-block.h"
+#include "block/block_int.h"
+
+typedef struct BDRVGlusterState {
+ inode_t *inode;
+} BDRVGlusterState;
+
+static QemuOptsList runtime_opts = {
+ .name = "gluster",
+ .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
+ .desc = {
+ {
+ .name = "filename",
+ .type = QEMU_OPT_STRING,
+ .help = "GFID of file",
+ },
+ { /* end of list */ }
+ },
+};
+
+inode_t *
+qb_inode_from_filename (const char *filename)
+{
+ const char *iptr = NULL;
+ inode_t *inode = NULL;
+
+ iptr = filename + 17;
+ sscanf (iptr, "%p", &inode);
+
+ return inode;
+}
+
+
+int
+qb_inode_to_filename (inode_t *inode, char *filename, int size)
+{
+ return snprintf (filename, size, "gluster://inodep:%p", inode);
+}
+
+
+static fd_t *
+fd_from_bs (BlockDriverState *bs)
+{
+ BDRVGlusterState *s = bs->opaque;
+
+ return fd_anonymous (s->inode);
+}
+
+
+static int
+qemu_gluster_open (BlockDriverState *bs, QDict *options, int bdrv_flags)
+{
+ inode_t *inode = NULL;
+ BDRVGlusterState *s = bs->opaque;
+ QemuOpts *opts = NULL;
+ Error *local_err = NULL;
+ const char *filename = NULL;
+ char gfid_str[128];
+ int ret;
+ qb_conf_t *conf = THIS->private;
+
+ opts = qemu_opts_create_nofail(&runtime_opts);
+ qemu_opts_absorb_qdict(opts, options, &local_err);
+ if (error_is_set(&local_err)) {
+ qerror_report_err(local_err);
+ error_free(local_err);
+ return -EINVAL;
+ }
+
+ filename = qemu_opt_get(opts, "filename");
+
+ /*
+ * gfid:<gfid> format means we're opening a backing image.
+ */
+ ret = sscanf(filename, "gluster://gfid:%s", gfid_str);
+ if (ret) {
+ loc_t loc = {0,};
+ struct iatt buf = {0,};
+ uuid_t gfid;
+
+ uuid_parse(gfid_str, gfid);
+
+ loc.inode = inode_find(conf->root_inode->table, gfid);
+ if (!loc.inode) {
+ loc.inode = inode_new(conf->root_inode->table);
+ uuid_copy(loc.inode->gfid, gfid);
+ }
+
+ uuid_copy(loc.gfid, loc.inode->gfid);
+ ret = syncop_lookup(FIRST_CHILD(THIS), &loc, NULL, &buf, NULL,
+ NULL);
+ if (ret) {
+ loc_wipe(&loc);
+ return ret;
+ }
+
+ s->inode = inode_ref(loc.inode);
+ loc_wipe(&loc);
+ } else {
+ inode = qb_inode_from_filename (filename);
+ if (!inode)
+ return -EINVAL;
+
+ s->inode = inode_ref(inode);
+ }
+
+ return 0;
+}
+
+
+static int
+qemu_gluster_create (const char *filename, QEMUOptionParameter *options)
+{
+ uint64_t total_size = 0;
+ inode_t *inode = NULL;
+ fd_t *fd = NULL;
+ struct iatt stat = {0, };
+ int ret = 0;
+
+ inode = qb_inode_from_filename (filename);
+ if (!inode)
+ return -EINVAL;
+
+ while (options && options->name) {
+ if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
+ total_size = options->value.n / BDRV_SECTOR_SIZE;
+ }
+ options++;
+ }
+
+ fd = fd_anonymous (inode);
+ if (!fd)
+ return -ENOMEM;
+
+ ret = syncop_fstat (FIRST_CHILD(THIS), fd, &stat);
+ if (ret) {
+ fd_unref (fd);
+ return ret;
+ }
+
+ if (stat.ia_size) {
+ /* format ONLY if the filesize is 0 bytes */
+ fd_unref (fd);
+ return -EFBIG;
+ }
+
+ if (total_size) {
+ ret = syncop_ftruncate (FIRST_CHILD(THIS), fd, total_size);
+ if (ret) {
+ fd_unref (fd);
+ return ret;
+ }
+ }
+
+ fd_unref (fd);
+ return 0;
+}
+
+
+static int
+qemu_gluster_co_readv (BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+ QEMUIOVector *qiov)
+{
+ fd_t *fd = NULL;
+ off_t offset = 0;
+ size_t size = 0;
+ struct iovec *iov = NULL;
+ int count = 0;
+ struct iobref *iobref = NULL;
+ int ret = 0;
+
+ fd = fd_from_bs (bs);
+ if (!fd)
+ return -EIO;
+
+ offset = sector_num * BDRV_SECTOR_SIZE;
+ size = nb_sectors * BDRV_SECTOR_SIZE;
+
+ ret = syncop_readv (FIRST_CHILD(THIS), fd, size, offset, 0,
+ &iov, &count, &iobref);
+ if (ret < 0)
+ goto out;
+
+ iov_copy (qiov->iov, qiov->niov, iov, count); /* *choke!* */
+
+out:
+ GF_FREE (iov);
+ if (iobref)
+ iobref_unref (iobref);
+ fd_unref (fd);
+ return ret;
+}
+
+
+static int
+qemu_gluster_co_writev (BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+ QEMUIOVector *qiov)
+{
+ fd_t *fd = NULL;
+ off_t offset = 0;
+ size_t size = 0;
+ struct iobref *iobref = NULL;
+ struct iobuf *iobuf = NULL;
+ struct iovec iov = {0, };
+ int ret = -ENOMEM;
+
+ fd = fd_from_bs (bs);
+ if (!fd)
+ return -EIO;
+
+ offset = sector_num * BDRV_SECTOR_SIZE;
+ size = nb_sectors * BDRV_SECTOR_SIZE;
+
+ iobuf = iobuf_get2 (THIS->ctx->iobuf_pool, size);
+ if (!iobuf)
+ goto out;
+
+ iobref = iobref_new ();
+ if (!iobref) {
+ goto out;
+ }
+
+ iobref_add (iobref, iobuf);
+
+ iov_unload (iobuf_ptr (iobuf), qiov->iov, qiov->niov); /* *choke!* */
+
+ iov.iov_base = iobuf_ptr (iobuf);
+ iov.iov_len = size;
+
+ ret = syncop_writev (FIRST_CHILD(THIS), fd, &iov, 1, offset, iobref, 0);
+
+out:
+ if (iobuf)
+ iobuf_unref (iobuf);
+ if (iobref)
+ iobref_unref (iobref);
+ fd_unref (fd);
+ return ret;
+}
+
+
+static int
+qemu_gluster_co_flush (BlockDriverState *bs)
+{
+ fd_t *fd = NULL;
+ int ret = 0;
+
+ fd = fd_from_bs (bs);
+
+ ret = syncop_flush (FIRST_CHILD(THIS), fd);
+
+ fd_unref (fd);
+
+ return ret;
+}
+
+
+static int
+qemu_gluster_co_fsync (BlockDriverState *bs)
+{
+ fd_t *fd = NULL;
+ int ret = 0;
+
+ fd = fd_from_bs (bs);
+
+ ret = syncop_fsync (FIRST_CHILD(THIS), fd, 0);
+
+ fd_unref (fd);
+
+ return ret;
+}
+
+
+static int
+qemu_gluster_truncate (BlockDriverState *bs, int64_t offset)
+{
+ fd_t *fd = NULL;
+ int ret = 0;
+
+ fd = fd_from_bs (bs);
+
+ ret = syncop_ftruncate (FIRST_CHILD(THIS), fd, offset);
+
+ fd_unref (fd);
+
+ return ret;
+}
+
+
+static int64_t
+qemu_gluster_getlength (BlockDriverState *bs)
+{
+ fd_t *fd = NULL;
+ int ret = 0;
+ struct iatt iatt = {0, };
+
+ fd = fd_from_bs (bs);
+
+ ret = syncop_fstat (FIRST_CHILD(THIS), fd, &iatt);
+ if (ret < 0)
+ return -1;
+
+ return iatt.ia_size;
+}
+
+
+static int64_t
+qemu_gluster_allocated_file_size (BlockDriverState *bs)
+{
+ fd_t *fd = NULL;
+ int ret = 0;
+ struct iatt iatt = {0, };
+
+ fd = fd_from_bs (bs);
+
+ ret = syncop_fstat (FIRST_CHILD(THIS), fd, &iatt);
+ if (ret < 0)
+ return -1;
+
+ return iatt.ia_blocks * 512;
+}
+
+
+static void
+qemu_gluster_close (BlockDriverState *bs)
+{
+ BDRVGlusterState *s = NULL;
+
+ s = bs->opaque;
+
+ inode_unref (s->inode);
+
+ return;
+}
+
+
+static QEMUOptionParameter qemu_gluster_create_options[] = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = OPT_SIZE,
+ .help = "Virtual disk size"
+ },
+ { NULL }
+};
+
+
+static BlockDriver bdrv_gluster = {
+ .format_name = "gluster",
+ .protocol_name = "gluster",
+ .instance_size = sizeof(BDRVGlusterState),
+ .bdrv_file_open = qemu_gluster_open,
+ .bdrv_close = qemu_gluster_close,
+ .bdrv_create = qemu_gluster_create,
+ .bdrv_getlength = qemu_gluster_getlength,
+ .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
+ .bdrv_co_readv = qemu_gluster_co_readv,
+ .bdrv_co_writev = qemu_gluster_co_writev,
+ .bdrv_co_flush_to_os = qemu_gluster_co_flush,
+ .bdrv_co_flush_to_disk = qemu_gluster_co_fsync,
+ .bdrv_truncate = qemu_gluster_truncate,
+ .create_options = qemu_gluster_create_options,
+};
+
+
+static void bdrv_gluster_init(void)
+{
+ bdrv_register(&bdrv_gluster);
+}
+
+
+block_init(bdrv_gluster_init);
diff --git a/xlators/features/qemu-block/src/bh-syncop.c b/xlators/features/qemu-block/src/bh-syncop.c
new file mode 100644
index 000000000..e8686f6d4
--- /dev/null
+++ b/xlators/features/qemu-block/src/bh-syncop.c
@@ -0,0 +1,48 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "dict.h"
+#include "xlator.h"
+#include "syncop.h"
+#include "qemu-block-memory-types.h"
+
+#include "block/aio.h"
+
+void
+qemu_bh_schedule (QEMUBH *bh)
+{
+ return;
+}
+
+void
+qemu_bh_cancel (QEMUBH *bh)
+{
+ return;
+}
+
+void
+qemu_bh_delete (QEMUBH *bh)
+{
+
+}
+
+QEMUBH *
+qemu_bh_new (QEMUBHFunc *cb, void *opaque)
+{
+ return NULL;
+}
diff --git a/xlators/features/qemu-block/src/clock-timer.c b/xlators/features/qemu-block/src/clock-timer.c
new file mode 100644
index 000000000..fcbec6ad1
--- /dev/null
+++ b/xlators/features/qemu-block/src/clock-timer.c
@@ -0,0 +1,60 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "dict.h"
+#include "xlator.h"
+#include "syncop.h"
+#include "qemu-block-memory-types.h"
+
+#include "qemu/timer.h"
+
+QEMUClock *vm_clock;
+int use_rt_clock = 0;
+
+QEMUTimer *qemu_new_timer (QEMUClock *clock, int scale,
+ QEMUTimerCB *cb, void *opaque)
+{
+ return NULL;
+}
+
+int64_t qemu_get_clock_ns (QEMUClock *clock)
+{
+ return 0;
+}
+
+void qemu_mod_timer (QEMUTimer *ts, int64_t expire_time)
+{
+ return;
+}
+
+void qemu_free_timer (QEMUTimer *ts)
+{
+
+}
+
+void qemu_del_timer (QEMUTimer *ts)
+{
+
+}
+
+bool qemu_aio_wait()
+{
+ synctask_wake (synctask_get());
+ synctask_yield (synctask_get());
+ return 0;
+}
diff --git a/xlators/features/qemu-block/src/coroutine-synctask.c b/xlators/features/qemu-block/src/coroutine-synctask.c
new file mode 100644
index 000000000..e43988a95
--- /dev/null
+++ b/xlators/features/qemu-block/src/coroutine-synctask.c
@@ -0,0 +1,116 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "dict.h"
+#include "xlator.h"
+#include "syncop.h"
+#include "qemu-block-memory-types.h"
+
+#include "qemu-block.h"
+
+/*
+ * This code serves as the bridge from the main glusterfs context to the qemu
+ * coroutine context via synctask. We create a single threaded syncenv with a
+ * single synctask responsible for processing a queue of coroutines. The qemu
+ * code invoked from within the synctask function handlers uses the ucontext
+ * coroutine implementation and scheduling logic internal to qemu. This
+ * effectively donates a thread of execution to qemu and its internal coroutine
+ * management.
+ *
+ * NOTE: The existence of concurrent synctasks has proven quite racy with regard
+ * to qemu coroutine management, particularly related to the lifecycle
+ * differences with top-level synctasks and internally created coroutines and
+ * interactions with qemu-internal queues (and locks, in turn). We explicitly
+ * disallow this scenario, via the queue, until it is more well supported.
+ */
+
+static struct {
+ struct list_head queue;
+ gf_lock_t lock;
+ struct synctask *task;
+} qb_co;
+
+static void
+init_qbco()
+{
+ INIT_LIST_HEAD(&qb_co.queue);
+ LOCK_INIT(&qb_co.lock);
+}
+
+static int
+synctask_nop_cbk (int ret, call_frame_t *frame, void *opaque)
+{
+ return 0;
+}
+
+static int
+qb_synctask_wrap (void *opaque)
+{
+ qb_local_t *qb_local, *tmp;
+
+ LOCK(&qb_co.lock);
+
+ while (!list_empty(&qb_co.queue)) {
+ list_for_each_entry_safe(qb_local, tmp, &qb_co.queue, list) {
+ list_del_init(&qb_local->list);
+ break;
+ }
+
+ UNLOCK(&qb_co.lock);
+
+ qb_local->synctask_fn(qb_local);
+ /* qb_local is now unwound and gone! */
+
+ LOCK(&qb_co.lock);
+ }
+
+ qb_co.task = NULL;
+
+ UNLOCK(&qb_co.lock);
+
+ return 0;
+}
+
+int
+qb_coroutine (call_frame_t *frame, synctask_fn_t fn)
+{
+ qb_local_t *qb_local = NULL;
+ qb_conf_t *qb_conf = NULL;
+ static int init = 0;
+
+ qb_local = frame->local;
+ qb_local->synctask_fn = fn;
+ qb_conf = frame->this->private;
+
+ if (!init) {
+ init = 1;
+ init_qbco();
+ }
+
+ LOCK(&qb_co.lock);
+
+ if (!qb_co.task)
+ qb_co.task = synctask_create(qb_conf->env, qb_synctask_wrap,
+ synctask_nop_cbk, frame, NULL);
+
+ list_add_tail(&qb_local->list, &qb_co.queue);
+
+ UNLOCK(&qb_co.lock);
+
+ return 0;
+}
diff --git a/xlators/features/qemu-block/src/monitor-logging.c b/xlators/features/qemu-block/src/monitor-logging.c
new file mode 100644
index 000000000..d37c37f0f
--- /dev/null
+++ b/xlators/features/qemu-block/src/monitor-logging.c
@@ -0,0 +1,50 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "dict.h"
+#include "xlator.h"
+#include "qemu-block-memory-types.h"
+
+#include "block/block_int.h"
+
+Monitor *cur_mon;
+
+int
+monitor_cur_is_qmp()
+{
+ /* No QMP support here */
+ return 0;
+}
+
+void
+monitor_set_error (Monitor *mon, QError *qerror)
+{
+ /* NOP here */
+ return;
+}
+
+
+void
+monitor_vprintf(Monitor *mon, const char *fmt, va_list ap)
+{
+ char buf[4096];
+
+ vsnprintf(buf, sizeof(buf), fmt, ap);
+
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", buf);
+}
diff --git a/xlators/features/qemu-block/src/qb-coroutines.c b/xlators/features/qemu-block/src/qb-coroutines.c
new file mode 100644
index 000000000..974312f12
--- /dev/null
+++ b/xlators/features/qemu-block/src/qb-coroutines.c
@@ -0,0 +1,667 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "dict.h"
+#include "xlator.h"
+#include "inode.h"
+#include "call-stub.h"
+#include "defaults.h"
+#include "qemu-block-memory-types.h"
+#include "qemu-block.h"
+#include "qb-coroutines.h"
+
+
+int
+qb_format_and_resume (void *opaque)
+{
+ qb_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ call_stub_t *stub = NULL;
+ inode_t *inode = NULL;
+ char filename[64];
+ char base_filename[128];
+ int use_base = 0;
+ qb_inode_t *qb_inode = NULL;
+ Error *local_err = NULL;
+ fd_t *fd = NULL;
+ dict_t *xattr = NULL;
+ qb_conf_t *qb_conf = NULL;
+ int ret = -1;
+
+ local = opaque;
+ frame = local->frame;
+ stub = local->stub;
+ inode = local->inode;
+ qb_conf = frame->this->private;
+
+ qb_inode_to_filename (inode, filename, 64);
+
+ qb_inode = qb_inode_ctx_get (frame->this, inode);
+
+ /*
+ * See if the caller specified a backing image.
+ */
+ if (!uuid_is_null(qb_inode->backing_gfid) || qb_inode->backing_fname) {
+ loc_t loc = {0,};
+ char gfid_str[64];
+ struct iatt buf;
+
+ if (!uuid_is_null(qb_inode->backing_gfid)) {
+ loc.inode = inode_find(qb_conf->root_inode->table,
+ qb_inode->backing_gfid);
+ if (!loc.inode) {
+ loc.inode = inode_new(qb_conf->root_inode->table);
+ uuid_copy(loc.inode->gfid,
+ qb_inode->backing_gfid);
+ }
+ uuid_copy(loc.gfid, loc.inode->gfid);
+ } else if (qb_inode->backing_fname) {
+ loc.inode = inode_new(qb_conf->root_inode->table);
+ loc.name = qb_inode->backing_fname;
+ loc.parent = inode_parent(inode, NULL, NULL);
+ loc_path(&loc, loc.name);
+ }
+
+ /*
+ * Lookup the backing image. Verify existence and/or get the
+ * gfid if we don't already have it.
+ */
+ ret = syncop_lookup(FIRST_CHILD(frame->this), &loc, NULL, &buf,
+ NULL, NULL);
+ GF_FREE(qb_inode->backing_fname);
+ if (ret) {
+ loc_wipe(&loc);
+ ret = -ret;
+ goto err;
+ }
+
+ uuid_copy(qb_inode->backing_gfid, buf.ia_gfid);
+ loc_wipe(&loc);
+
+ /*
+ * We pass the filename of the backing image into the qemu block
+ * subsystem as the associated gfid. This is embedded into the
+ * clone image and passed along to the gluster bdrv backend when
+ * the block subsystem needs to operate on the backing image on
+ * behalf of the clone.
+ */
+ uuid_unparse(qb_inode->backing_gfid, gfid_str);
+ snprintf(base_filename, sizeof(base_filename),
+ "gluster://gfid:%s", gfid_str);
+ use_base = 1;
+ }
+
+ bdrv_img_create (filename, qb_inode->fmt,
+ use_base ? base_filename : NULL, 0, 0, qb_inode->size,
+ 0, &local_err, true);
+
+ if (error_is_set (&local_err)) {
+ gf_log (frame->this->name, GF_LOG_ERROR, "%s",
+ error_get_pretty (local_err));
+ error_free (local_err);
+ QB_STUB_UNWIND (stub, -1, EIO);
+ return 0;
+ }
+
+ fd = fd_anonymous (inode);
+ if (!fd) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "could not create anonymous fd for %s",
+ uuid_utoa (inode->gfid));
+ QB_STUB_UNWIND (stub, -1, ENOMEM);
+ return 0;
+ }
+
+ xattr = dict_new ();
+ if (!xattr) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "could not allocate xattr dict for %s",
+ uuid_utoa (inode->gfid));
+ QB_STUB_UNWIND (stub, -1, ENOMEM);
+ fd_unref (fd);
+ return 0;
+ }
+
+ ret = dict_set_str (xattr, qb_conf->qb_xattr_key, local->fmt);
+ if (ret) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "could not dict_set for %s",
+ uuid_utoa (inode->gfid));
+ QB_STUB_UNWIND (stub, -1, ENOMEM);
+ fd_unref (fd);
+ dict_unref (xattr);
+ return 0;
+ }
+
+ ret = syncop_fsetxattr (FIRST_CHILD(THIS), fd, xattr, 0);
+ if (ret) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "failed to setxattr for %s",
+ uuid_utoa (inode->gfid));
+ QB_STUB_UNWIND (stub, -1, -ret);
+ fd_unref (fd);
+ dict_unref (xattr);
+ return 0;
+ }
+
+ fd_unref (fd);
+ dict_unref (xattr);
+
+ QB_STUB_UNWIND (stub, 0, 0);
+
+ return 0;
+
+err:
+ QB_STUB_UNWIND(stub, -1, ret);
+ return 0;
+}
+
+
+static BlockDriverState *
+qb_bs_create (inode_t *inode, const char *fmt)
+{
+ char filename[64];
+ BlockDriverState *bs = NULL;
+ BlockDriver *drv = NULL;
+ int op_errno = 0;
+ int ret = 0;
+
+ bs = bdrv_new (uuid_utoa (inode->gfid));
+ if (!bs) {
+ op_errno = ENOMEM;
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "could not allocate @bdrv for gfid:%s",
+ uuid_utoa (inode->gfid));
+ goto err;
+ }
+
+ drv = bdrv_find_format (fmt);
+ if (!drv) {
+ op_errno = EINVAL;
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Unknown file format: %s for gfid:%s",
+ fmt, uuid_utoa (inode->gfid));
+ goto err;
+ }
+
+ qb_inode_to_filename (inode, filename, 64);
+
+ ret = bdrv_open (bs, filename, NULL, BDRV_O_RDWR, drv);
+ if (ret < 0) {
+ op_errno = -ret;
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Unable to bdrv_open() gfid:%s (%s)",
+ uuid_utoa (inode->gfid), strerror (op_errno));
+ goto err;
+ }
+
+ return bs;
+err:
+ errno = op_errno;
+ return NULL;
+}
+
+
+int
+qb_co_open (void *opaque)
+{
+ qb_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ call_stub_t *stub = NULL;
+ inode_t *inode = NULL;
+ qb_inode_t *qb_inode = NULL;
+
+ local = opaque;
+ frame = local->frame;
+ stub = local->stub;
+ inode = local->inode;
+
+ qb_inode = qb_inode_ctx_get (frame->this, inode);
+ if (!qb_inode->bs) {
+ /* FIXME: we need locks around this when
+ enabling multithreaded syncop/coroutine
+ for qemu-block
+ */
+
+ qb_inode->bs = qb_bs_create (inode, qb_inode->fmt);
+ if (!qb_inode->bs) {
+ QB_STUB_UNWIND (stub, -1, errno);
+ return 0;
+ }
+ }
+ qb_inode->refcnt++;
+
+ QB_STUB_RESUME (stub);
+
+ return 0;
+}
+
+
+int
+qb_co_writev (void *opaque)
+{
+ qb_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ call_stub_t *stub = NULL;
+ inode_t *inode = NULL;
+ qb_inode_t *qb_inode = NULL;
+ QEMUIOVector qiov = {0, };
+ int ret = 0;
+
+ local = opaque;
+ frame = local->frame;
+ stub = local->stub;
+ inode = local->inode;
+
+ qb_inode = qb_inode_ctx_get (frame->this, inode);
+ if (!qb_inode->bs) {
+ /* FIXME: we need locks around this when
+ enabling multithreaded syncop/coroutine
+ for qemu-block
+ */
+
+ qb_inode->bs = qb_bs_create (inode, qb_inode->fmt);
+ if (!qb_inode->bs) {
+ QB_STUB_UNWIND (stub, -1, errno);
+ return 0;
+ }
+ }
+
+ qemu_iovec_init_external (&qiov, stub->args.vector, stub->args.count);
+
+ ret = bdrv_pwritev (qb_inode->bs, stub->args.offset, &qiov);
+
+ if (ret < 0) {
+ QB_STUB_UNWIND (stub, -1, -ret);
+ } else {
+ QB_STUB_UNWIND (stub, ret, 0);
+ }
+
+ return 0;
+}
+
+
+int
+qb_co_readv (void *opaque)
+{
+ qb_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ call_stub_t *stub = NULL;
+ inode_t *inode = NULL;
+ qb_inode_t *qb_inode = NULL;
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ struct iovec iov = {0, };
+ int ret = 0;
+
+ local = opaque;
+ frame = local->frame;
+ stub = local->stub;
+ inode = local->inode;
+
+ qb_inode = qb_inode_ctx_get (frame->this, inode);
+ if (!qb_inode->bs) {
+ /* FIXME: we need locks around this when
+ enabling multithreaded syncop/coroutine
+ for qemu-block
+ */
+
+ qb_inode->bs = qb_bs_create (inode, qb_inode->fmt);
+ if (!qb_inode->bs) {
+ QB_STUB_UNWIND (stub, -1, errno);
+ return 0;
+ }
+ }
+
+ if (stub->args.offset >= qb_inode->size) {
+ QB_STUB_UNWIND (stub, 0, 0);
+ return 0;
+ }
+
+ iobuf = iobuf_get2 (frame->this->ctx->iobuf_pool, stub->args.size);
+ if (!iobuf) {
+ QB_STUB_UNWIND (stub, -1, ENOMEM);
+ return 0;
+ }
+
+ iobref = iobref_new ();
+ if (!iobref) {
+ QB_STUB_UNWIND (stub, -1, ENOMEM);
+ iobuf_unref (iobuf);
+ return 0;
+ }
+
+ if (iobref_add (iobref, iobuf) < 0) {
+ iobuf_unref (iobuf);
+ iobref_unref (iobref);
+ QB_STUB_UNWIND (stub, -1, ENOMEM);
+ return 0;
+ }
+
+ ret = bdrv_pread (qb_inode->bs, stub->args.offset, iobuf_ptr (iobuf),
+ stub->args.size);
+
+ if (ret < 0) {
+ QB_STUB_UNWIND (stub, -1, -ret);
+ iobref_unref (iobref);
+ return 0;
+ }
+
+ iov.iov_base = iobuf_ptr (iobuf);
+ iov.iov_len = ret;
+
+ stub->args_cbk.vector = iov_dup (&iov, 1);
+ stub->args_cbk.count = 1;
+ stub->args_cbk.iobref = iobref;
+
+ QB_STUB_UNWIND (stub, ret, 0);
+
+ return 0;
+}
+
+
+int
+qb_co_fsync (void *opaque)
+{
+ qb_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ call_stub_t *stub = NULL;
+ inode_t *inode = NULL;
+ qb_inode_t *qb_inode = NULL;
+ int ret = 0;
+
+ local = opaque;
+ frame = local->frame;
+ stub = local->stub;
+ inode = local->inode;
+
+ qb_inode = qb_inode_ctx_get (frame->this, inode);
+ if (!qb_inode->bs) {
+ /* FIXME: we need locks around this when
+ enabling multithreaded syncop/coroutine
+ for qemu-block
+ */
+
+ qb_inode->bs = qb_bs_create (inode, qb_inode->fmt);
+ if (!qb_inode->bs) {
+ QB_STUB_UNWIND (stub, -1, errno);
+ return 0;
+ }
+ }
+
+ ret = bdrv_flush (qb_inode->bs);
+
+ if (ret < 0) {
+ QB_STUB_UNWIND (stub, -1, -ret);
+ } else {
+ QB_STUB_UNWIND (stub, ret, 0);
+ }
+
+ return 0;
+}
+
+
+static void
+qb_update_size_xattr (xlator_t *this, fd_t *fd, const char *fmt, off_t offset)
+{
+ char val[QB_XATTR_VAL_MAX];
+ qb_conf_t *qb_conf = NULL;
+ dict_t *xattr = NULL;
+
+ qb_conf = this->private;
+
+ snprintf (val, QB_XATTR_VAL_MAX, "%s:%llu",
+ fmt, (long long unsigned) offset);
+
+ xattr = dict_new ();
+ if (!xattr)
+ return;
+
+ if (dict_set_str (xattr, qb_conf->qb_xattr_key, val) != 0) {
+ dict_unref (xattr);
+ return;
+ }
+
+ syncop_fsetxattr (FIRST_CHILD(this), fd, xattr, 0);
+ dict_unref (xattr);
+}
+
+
+int
+qb_co_truncate (void *opaque)
+{
+ qb_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ call_stub_t *stub = NULL;
+ inode_t *inode = NULL;
+ qb_inode_t *qb_inode = NULL;
+ int ret = 0;
+ off_t offset = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ local = opaque;
+ frame = local->frame;
+ stub = local->stub;
+ inode = local->inode;
+
+ qb_inode = qb_inode_ctx_get (frame->this, inode);
+ if (!qb_inode->bs) {
+ /* FIXME: we need locks around this when
+ enabling multithreaded syncop/coroutine
+ for qemu-block
+ */
+
+ qb_inode->bs = qb_bs_create (inode, qb_inode->fmt);
+ if (!qb_inode->bs) {
+ QB_STUB_UNWIND (stub, -1, errno);
+ return 0;
+ }
+ }
+
+ ret = syncop_fstat (FIRST_CHILD(this), local->fd,
+ &stub->args_cbk.prestat);
+ if (ret < 0)
+ goto out;
+ stub->args_cbk.prestat.ia_size = qb_inode->size;
+
+ ret = bdrv_truncate (qb_inode->bs, stub->args.offset);
+ if (ret < 0)
+ goto out;
+
+ offset = bdrv_getlength (qb_inode->bs);
+
+ qb_inode->size = offset;
+
+ ret = syncop_fstat (FIRST_CHILD(this), local->fd,
+ &stub->args_cbk.poststat);
+ if (ret < 0)
+ goto out;
+ stub->args_cbk.poststat.ia_size = qb_inode->size;
+
+ qb_update_size_xattr (this, local->fd, qb_inode->fmt, qb_inode->size);
+
+out:
+ if (ret < 0) {
+ QB_STUB_UNWIND (stub, -1, -ret);
+ } else {
+ QB_STUB_UNWIND (stub, ret, 0);
+ }
+
+ return 0;
+}
+
+
+int
+qb_co_close (void *opaque)
+{
+ qb_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ inode_t *inode = NULL;
+ qb_inode_t *qb_inode = NULL;
+ BlockDriverState *bs = NULL;
+
+ local = opaque;
+ inode = local->inode;
+
+ qb_inode = qb_inode_ctx_get (THIS, inode);
+
+ if (!--qb_inode->refcnt) {
+ bs = qb_inode->bs;
+ qb_inode->bs = NULL;
+ bdrv_delete (bs);
+ }
+
+ frame = local->frame;
+ frame->local = NULL;
+ qb_local_free (THIS, local);
+ STACK_DESTROY (frame->root);
+
+ return 0;
+}
+
+
+int
+qb_snapshot_create (void *opaque)
+{
+ qb_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ call_stub_t *stub = NULL;
+ inode_t *inode = NULL;
+ qb_inode_t *qb_inode = NULL;
+ QEMUSnapshotInfo sn;
+ struct timeval tv = {0, };
+ int ret = 0;
+
+ local = opaque;
+ frame = local->frame;
+ stub = local->stub;
+ inode = local->inode;
+
+ qb_inode = qb_inode_ctx_get (frame->this, inode);
+ if (!qb_inode->bs) {
+ /* FIXME: we need locks around this when
+ enabling multithreaded syncop/coroutine
+ for qemu-block
+ */
+
+ qb_inode->bs = qb_bs_create (inode, qb_inode->fmt);
+ if (!qb_inode->bs) {
+ QB_STUB_UNWIND (stub, -1, errno);
+ return 0;
+ }
+ }
+
+ memset (&sn, 0, sizeof (sn));
+ pstrcpy (sn.name, sizeof(sn.name), local->name);
+ gettimeofday (&tv, NULL);
+ sn.date_sec = tv.tv_sec;
+ sn.date_nsec = tv.tv_usec * 1000;
+
+ ret = bdrv_snapshot_create (qb_inode->bs, &sn);
+ if (ret < 0) {
+ QB_STUB_UNWIND (stub, -1, -ret);
+ } else {
+ QB_STUB_UNWIND (stub, ret, 0);
+ }
+
+ return 0;
+}
+
+
+int
+qb_snapshot_delete (void *opaque)
+{
+ qb_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ call_stub_t *stub = NULL;
+ inode_t *inode = NULL;
+ qb_inode_t *qb_inode = NULL;
+ int ret = 0;
+
+ local = opaque;
+ frame = local->frame;
+ stub = local->stub;
+ inode = local->inode;
+
+ qb_inode = qb_inode_ctx_get (frame->this, inode);
+ if (!qb_inode->bs) {
+ /* FIXME: we need locks around this when
+ enabling multithreaded syncop/coroutine
+ for qemu-block
+ */
+
+ qb_inode->bs = qb_bs_create (inode, qb_inode->fmt);
+ if (!qb_inode->bs) {
+ QB_STUB_UNWIND (stub, -1, errno);
+ return 0;
+ }
+ }
+
+ ret = bdrv_snapshot_delete (qb_inode->bs, local->name);
+
+ if (ret < 0) {
+ QB_STUB_UNWIND (stub, -1, -ret);
+ } else {
+ QB_STUB_UNWIND (stub, ret, 0);
+ }
+
+ return 0;
+}
+
+
+int
+qb_snapshot_goto (void *opaque)
+{
+ qb_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ call_stub_t *stub = NULL;
+ inode_t *inode = NULL;
+ qb_inode_t *qb_inode = NULL;
+ int ret = 0;
+
+ local = opaque;
+ frame = local->frame;
+ stub = local->stub;
+ inode = local->inode;
+
+ qb_inode = qb_inode_ctx_get (frame->this, inode);
+ if (!qb_inode->bs) {
+ /* FIXME: we need locks around this when
+ enabling multithreaded syncop/coroutine
+ for qemu-block
+ */
+
+ qb_inode->bs = qb_bs_create (inode, qb_inode->fmt);
+ if (!qb_inode->bs) {
+ QB_STUB_UNWIND (stub, -1, errno);
+ return 0;
+ }
+ }
+
+ ret = bdrv_snapshot_goto (qb_inode->bs, local->name);
+
+ if (ret < 0) {
+ QB_STUB_UNWIND (stub, -1, -ret);
+ } else {
+ QB_STUB_UNWIND (stub, ret, 0);
+ }
+
+ return 0;
+}
diff --git a/xlators/features/qemu-block/src/qb-coroutines.h b/xlators/features/qemu-block/src/qb-coroutines.h
new file mode 100644
index 000000000..583319f3b
--- /dev/null
+++ b/xlators/features/qemu-block/src/qb-coroutines.h
@@ -0,0 +1,30 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __QB_COROUTINES_H
+#define __QB_COROUTINES_H
+
+#include "syncop.h"
+#include "call-stub.h"
+#include "block/block_int.h"
+#include "monitor/monitor.h"
+
+int qb_format_and_resume (void *opaque);
+int qb_snapshot_create (void *opaque);
+int qb_snapshot_delete (void *opaque);
+int qb_snapshot_goto (void *opaque);
+int qb_co_open (void *opaque);
+int qb_co_close (void *opaque);
+int qb_co_writev (void *opaque);
+int qb_co_readv (void *opaque);
+int qb_co_fsync (void *opaque);
+int qb_co_truncate (void *opaque);
+
+#endif /* __QB_COROUTINES_H */
diff --git a/xlators/features/qemu-block/src/qemu-block-memory-types.h b/xlators/features/qemu-block/src/qemu-block-memory-types.h
new file mode 100644
index 000000000..267b3893f
--- /dev/null
+++ b/xlators/features/qemu-block/src/qemu-block-memory-types.h
@@ -0,0 +1,25 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef __QB_MEM_TYPES_H__
+#define __QB_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_qb_mem_types_ {
+ gf_qb_mt_qb_conf_t = gf_common_mt_end + 1,
+ gf_qb_mt_qb_inode_t,
+ gf_qb_mt_qb_local_t,
+ gf_qb_mt_coroutinesynctask_t,
+ gf_qb_mt_end
+};
+#endif
+
diff --git a/xlators/features/qemu-block/src/qemu-block.c b/xlators/features/qemu-block/src/qemu-block.c
new file mode 100644
index 000000000..48bbf3140
--- /dev/null
+++ b/xlators/features/qemu-block/src/qemu-block.c
@@ -0,0 +1,1140 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "dict.h"
+#include "xlator.h"
+#include "inode.h"
+#include "call-stub.h"
+#include "defaults.h"
+#include "qemu-block-memory-types.h"
+#include "qemu-block.h"
+#include "qb-coroutines.h"
+
+
+qb_inode_t *
+__qb_inode_ctx_get (xlator_t *this, inode_t *inode)
+{
+ uint64_t value = 0;
+ qb_inode_t *qb_inode = NULL;
+
+ __inode_ctx_get (inode, this, &value);
+ qb_inode = (qb_inode_t *)(unsigned long) value;
+
+ return qb_inode;
+}
+
+
+qb_inode_t *
+qb_inode_ctx_get (xlator_t *this, inode_t *inode)
+{
+ qb_inode_t *qb_inode = NULL;
+
+ LOCK (&inode->lock);
+ {
+ qb_inode = __qb_inode_ctx_get (this, inode);
+ }
+ UNLOCK (&inode->lock);
+
+ return qb_inode;
+}
+
+
+qb_inode_t *
+qb_inode_ctx_del (xlator_t *this, inode_t *inode)
+{
+ uint64_t value = 0;
+ qb_inode_t *qb_inode = NULL;
+
+ inode_ctx_del (inode, this, &value);
+ qb_inode = (qb_inode_t *)(unsigned long) value;
+
+ return qb_inode;
+}
+
+
+int
+qb_inode_cleanup (xlator_t *this, inode_t *inode, int warn)
+{
+ qb_inode_t *qb_inode = NULL;
+
+ qb_inode = qb_inode_ctx_del (this, inode);
+
+ if (!qb_inode)
+ return 0;
+
+ if (warn)
+ gf_log (this->name, GF_LOG_WARNING,
+ "inode %s no longer block formatted",
+ uuid_utoa (inode->gfid));
+
+ /* free (qb_inode->bs); */
+
+ GF_FREE (qb_inode);
+
+ return 0;
+}
+
+
+int
+qb_iatt_fixup (xlator_t *this, inode_t *inode, struct iatt *iatt)
+{
+ qb_inode_t *qb_inode = NULL;
+
+ qb_inode = qb_inode_ctx_get (this, inode);
+ if (!qb_inode)
+ return 0;
+
+ iatt->ia_size = qb_inode->size;
+
+ return 0;
+}
+
+
+int
+qb_format_extract (xlator_t *this, char *format, inode_t *inode)
+{
+ char *s, *save;
+ uint64_t size = 0;
+ char fmt[QB_XATTR_VAL_MAX+1] = {0, };
+ qb_inode_t *qb_inode = NULL;
+ char *formatstr = NULL;
+ uuid_t gfid = {0,};
+ char gfid_str[64] = {0,};
+ int ret;
+
+ strncpy(fmt, format, QB_XATTR_VAL_MAX);
+
+ s = strtok_r(fmt, ":", &save);
+ if (!s)
+ goto invalid;
+ formatstr = gf_strdup(s);
+
+ s = strtok_r(NULL, ":", &save);
+ if (!s)
+ goto invalid;
+ if (gf_string2bytesize (s, &size))
+ goto invalid;
+ if (!size)
+ goto invalid;
+
+ s = strtok_r(NULL, "\0", &save);
+ if (s && !strncmp(s, "<gfid:", strlen("<gfid:"))) {
+ /*
+ * Check for valid gfid backing image specifier.
+ */
+ if (strlen(s) + 1 > sizeof(gfid_str))
+ goto invalid;
+ ret = sscanf(s, "<gfid:%[^>]s", gfid_str);
+ if (ret == 1) {
+ ret = uuid_parse(gfid_str, gfid);
+ if (ret < 0)
+ goto invalid;
+ }
+ }
+
+ qb_inode = qb_inode_ctx_get (this, inode);
+ if (!qb_inode)
+ qb_inode = GF_CALLOC (1, sizeof (*qb_inode),
+ gf_qb_mt_qb_inode_t);
+ if (!qb_inode) {
+ GF_FREE(formatstr);
+ return ENOMEM;
+ }
+
+ strncpy(qb_inode->fmt, formatstr, QB_XATTR_VAL_MAX);
+ qb_inode->size = size;
+
+ /*
+ * If a backing gfid was not specified, interpret any remaining bytes
+ * associated with a backing image as a filename local to the parent
+ * directory. The format processing will validate further.
+ */
+ if (!uuid_is_null(gfid))
+ uuid_copy(qb_inode->backing_gfid, gfid);
+ else if (s)
+ qb_inode->backing_fname = gf_strdup(s);
+
+ inode_ctx_set (inode, this, (void *)&qb_inode);
+
+ GF_FREE(formatstr);
+
+ return 0;
+
+invalid:
+ GF_FREE(formatstr);
+
+ gf_log (this->name, GF_LOG_WARNING,
+ "invalid format '%s' in inode %s", format,
+ uuid_utoa (inode->gfid));
+ return EINVAL;
+}
+
+
+void
+qb_local_free (xlator_t *this, qb_local_t *local)
+{
+ if (local->inode)
+ inode_unref (local->inode);
+ if (local->fd)
+ fd_unref (local->fd);
+ GF_FREE (local);
+}
+
+
+int
+qb_local_init (call_frame_t *frame)
+{
+ qb_local_t *qb_local = NULL;
+
+ qb_local = GF_CALLOC (1, sizeof (*qb_local), gf_qb_mt_qb_local_t);
+ if (!qb_local)
+ return -1;
+ INIT_LIST_HEAD(&qb_local->list);
+
+ qb_local->frame = frame;
+ frame->local = qb_local;
+
+ return 0;
+}
+
+
+int
+qb_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode, struct iatt *buf,
+ dict_t *xdata, struct iatt *postparent)
+{
+ char *format = NULL;
+ qb_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (op_ret == -1)
+ goto out;
+
+ /*
+ * Cache the root inode for dealing with backing images. The format
+ * coroutine and the gluster qemu backend driver both use the root inode
+ * table to verify and/or redirect I/O to the backing image via
+ * anonymous fd's.
+ */
+ if (!conf->root_inode && __is_root_gfid(inode->gfid))
+ conf->root_inode = inode_ref(inode);
+
+ if (!xdata)
+ goto out;
+
+ if (dict_get_str (xdata, conf->qb_xattr_key, &format))
+ goto out;
+
+ if (!format) {
+ qb_inode_cleanup (this, inode, 1);
+ goto out;
+ }
+
+ op_errno = qb_format_extract (this, format, inode);
+ if (op_errno)
+ op_ret = -1;
+
+ qb_iatt_fixup (this, inode, buf);
+out:
+ QB_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, buf,
+ xdata, postparent);
+ return 0;
+}
+
+
+int
+qb_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ qb_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ xdata = xdata ? dict_ref (xdata) : dict_new ();
+
+ if (!xdata)
+ goto enomem;
+
+ if (dict_set_int32 (xdata, conf->qb_xattr_key, 0))
+ goto enomem;
+
+ STACK_WIND (frame, qb_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+ dict_unref (xdata);
+ return 0;
+enomem:
+ QB_STACK_UNWIND (lookup, frame, -1, ENOMEM, 0, 0, 0, 0);
+ if (xdata)
+ dict_unref (xdata);
+ return 0;
+}
+
+
+int
+qb_setxattr_format (call_frame_t *frame, xlator_t *this, call_stub_t *stub,
+ dict_t *xattr, inode_t *inode)
+{
+ char *format = NULL;
+ int op_errno = 0;
+ qb_local_t *qb_local = NULL;
+ data_t *data = NULL;
+ qb_inode_t *qb_inode;
+
+ if (!(data = dict_get (xattr, "trusted.glusterfs.block-format"))) {
+ QB_STUB_RESUME (stub);
+ return 0;
+ }
+
+ format = alloca (data->len + 1);
+ memcpy (format, data->data, data->len);
+ format[data->len] = 0;
+
+ op_errno = qb_format_extract (this, format, inode);
+ if (op_errno) {
+ QB_STUB_UNWIND (stub, -1, op_errno);
+ return 0;
+ }
+ qb_inode = qb_inode_ctx_get(this, inode);
+
+ qb_local = frame->local;
+
+ qb_local->stub = stub;
+ qb_local->inode = inode_ref (inode);
+
+ snprintf(qb_local->fmt, QB_XATTR_VAL_MAX, "%s:%lu", qb_inode->fmt,
+ qb_inode->size);
+
+ qb_coroutine (frame, qb_format_and_resume);
+
+ return 0;
+}
+
+
+int
+qb_setxattr_snapshot_create (call_frame_t *frame, xlator_t *this,
+ call_stub_t *stub, dict_t *xattr, inode_t *inode)
+{
+ qb_local_t *qb_local = NULL;
+ char *name = NULL;
+ data_t *data = NULL;
+
+ if (!(data = dict_get (xattr, "trusted.glusterfs.block-snapshot-create"))) {
+ QB_STUB_RESUME (stub);
+ return 0;
+ }
+
+ name = alloca (data->len + 1);
+ memcpy (name, data->data, data->len);
+ name[data->len] = 0;
+
+ qb_local = frame->local;
+
+ qb_local->stub = stub;
+ qb_local->inode = inode_ref (inode);
+ strncpy (qb_local->name, name, 128);
+
+ qb_coroutine (frame, qb_snapshot_create);
+
+ return 0;
+}
+
+
+int
+qb_setxattr_snapshot_delete (call_frame_t *frame, xlator_t *this,
+ call_stub_t *stub, dict_t *xattr, inode_t *inode)
+{
+ qb_local_t *qb_local = NULL;
+ char *name = NULL;
+ data_t *data = NULL;
+
+ if (!(data = dict_get (xattr, "trusted.glusterfs.block-snapshot-delete"))) {
+ QB_STUB_RESUME (stub);
+ return 0;
+ }
+
+ name = alloca (data->len + 1);
+ memcpy (name, data->data, data->len);
+ name[data->len] = 0;
+
+ qb_local = frame->local;
+
+ qb_local->stub = stub;
+ qb_local->inode = inode_ref (inode);
+ strncpy (qb_local->name, name, 128);
+
+ qb_coroutine (frame, qb_snapshot_delete);
+
+ return 0;
+}
+
+int
+qb_setxattr_snapshot_goto (call_frame_t *frame, xlator_t *this,
+ call_stub_t *stub, dict_t *xattr, inode_t *inode)
+{
+ qb_local_t *qb_local = NULL;
+ char *name = NULL;
+ data_t *data = NULL;
+
+ if (!(data = dict_get (xattr, "trusted.glusterfs.block-snapshot-goto"))) {
+ QB_STUB_RESUME (stub);
+ return 0;
+ }
+
+ name = alloca (data->len + 1);
+ memcpy (name, data->data, data->len);
+ name[data->len] = 0;
+
+ qb_local = frame->local;
+
+ qb_local->stub = stub;
+ qb_local->inode = inode_ref (inode);
+ strncpy (qb_local->name, name, 128);
+
+ qb_coroutine (frame, qb_snapshot_goto);
+
+ return 0;
+}
+
+
+int
+qb_setxattr_common (call_frame_t *frame, xlator_t *this, call_stub_t *stub,
+ dict_t *xattr, inode_t *inode)
+{
+ data_t *data = NULL;
+
+ if ((data = dict_get (xattr, "trusted.glusterfs.block-format"))) {
+ qb_setxattr_format (frame, this, stub, xattr, inode);
+ return 0;
+ }
+
+ if ((data = dict_get (xattr, "trusted.glusterfs.block-snapshot-create"))) {
+ qb_setxattr_snapshot_create (frame, this, stub, xattr, inode);
+ return 0;
+ }
+
+ if ((data = dict_get (xattr, "trusted.glusterfs.block-snapshot-delete"))) {
+ qb_setxattr_snapshot_delete (frame, this, stub, xattr, inode);
+ return 0;
+ }
+
+ if ((data = dict_get (xattr, "trusted.glusterfs.block-snapshot-goto"))) {
+ qb_setxattr_snapshot_goto (frame, this, stub, xattr, inode);
+ return 0;
+ }
+
+ QB_STUB_RESUME (stub);
+
+ return 0;
+}
+
+
+int
+qb_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr,
+ int flags, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ if (qb_local_init (frame) != 0)
+ goto enomem;
+
+ stub = fop_setxattr_stub (frame, default_setxattr_resume, loc, xattr,
+ flags, xdata);
+ if (!stub)
+ goto enomem;
+
+ qb_setxattr_common (frame, this, stub, xattr, loc->inode);
+
+ return 0;
+enomem:
+ QB_STACK_UNWIND (setxattr, frame, -1, ENOMEM, 0);
+ return 0;
+}
+
+
+int
+qb_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xattr,
+ int flags, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ if (qb_local_init (frame) != 0)
+ goto enomem;
+
+ stub = fop_fsetxattr_stub (frame, default_fsetxattr_resume, fd, xattr,
+ flags, xdata);
+ if (!stub)
+ goto enomem;
+
+ qb_setxattr_common (frame, this, stub, xattr, fd->inode);
+
+ return 0;
+enomem:
+ QB_STACK_UNWIND (fsetxattr, frame, -1, ENOMEM, 0);
+ return 0;
+}
+
+
+int
+qb_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ qb_local_t *qb_local = NULL;
+
+ qb_local = frame->local;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ if (!qb_inode_ctx_get (this, qb_local->inode))
+ goto unwind;
+
+ stub = fop_open_cbk_stub (frame, NULL, op_ret, op_errno, fd, xdata);
+ if (!stub) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ qb_local->stub = stub;
+
+ qb_coroutine (frame, qb_co_open);
+
+ return 0;
+unwind:
+ QB_STACK_UNWIND (open, frame, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+
+int
+qb_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ fd_t *fd, dict_t *xdata)
+{
+ qb_local_t *qb_local = NULL;
+ qb_inode_t *qb_inode = NULL;
+
+ qb_inode = qb_inode_ctx_get (this, loc->inode);
+ if (!qb_inode) {
+ STACK_WIND (frame, default_open_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, flags, fd,
+ xdata);
+ return 0;
+ }
+
+ if (qb_local_init (frame) != 0)
+ goto enomem;
+
+ qb_local = frame->local;
+
+ qb_local->inode = inode_ref (loc->inode);
+ qb_local->fd = fd_ref (fd);
+
+ STACK_WIND (frame, qb_open_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
+ return 0;
+enomem:
+ QB_STACK_UNWIND (open, frame, -1, ENOMEM, 0, 0);
+ return 0;
+}
+
+
+int
+qb_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
+{
+ qb_local_t *qb_local = NULL;
+ qb_inode_t *qb_inode = NULL;
+
+ qb_inode = qb_inode_ctx_get (this, fd->inode);
+ if (!qb_inode) {
+ STACK_WIND (frame, default_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector, count,
+ offset, flags, iobref, xdata);
+ return 0;
+ }
+
+ if (qb_local_init (frame) != 0)
+ goto enomem;
+
+ qb_local = frame->local;
+
+ qb_local->inode = inode_ref (fd->inode);
+ qb_local->fd = fd_ref (fd);
+
+ qb_local->stub = fop_writev_stub (frame, NULL, fd, vector, count,
+ offset, flags, iobref, xdata);
+ if (!qb_local->stub)
+ goto enomem;
+
+ qb_coroutine (frame, qb_co_writev);
+
+ return 0;
+enomem:
+ QB_STACK_UNWIND (writev, frame, -1, ENOMEM, 0, 0, 0);
+ return 0;
+}
+
+
+int
+qb_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ qb_local_t *qb_local = NULL;
+ qb_inode_t *qb_inode = NULL;
+
+ qb_inode = qb_inode_ctx_get (this, fd->inode);
+ if (!qb_inode) {
+ STACK_WIND (frame, default_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset,
+ flags, xdata);
+ return 0;
+ }
+
+ if (qb_local_init (frame) != 0)
+ goto enomem;
+
+ qb_local = frame->local;
+
+ qb_local->inode = inode_ref (fd->inode);
+ qb_local->fd = fd_ref (fd);
+
+ qb_local->stub = fop_readv_stub (frame, NULL, fd, size, offset,
+ flags, xdata);
+ if (!qb_local->stub)
+ goto enomem;
+
+ qb_coroutine (frame, qb_co_readv);
+
+ return 0;
+enomem:
+ QB_STACK_UNWIND (readv, frame, -1, ENOMEM, 0, 0, 0, 0, 0);
+ return 0;
+}
+
+
+int
+qb_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int dsync,
+ dict_t *xdata)
+{
+ qb_local_t *qb_local = NULL;
+ qb_inode_t *qb_inode = NULL;
+
+ qb_inode = qb_inode_ctx_get (this, fd->inode);
+ if (!qb_inode) {
+ STACK_WIND (frame, default_fsync_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync, fd, dsync, xdata);
+ return 0;
+ }
+
+ if (qb_local_init (frame) != 0)
+ goto enomem;
+
+ qb_local = frame->local;
+
+ qb_local->inode = inode_ref (fd->inode);
+ qb_local->fd = fd_ref (fd);
+
+ qb_local->stub = fop_fsync_stub (frame, NULL, fd, dsync, xdata);
+
+ if (!qb_local->stub)
+ goto enomem;
+
+ qb_coroutine (frame, qb_co_fsync);
+
+ return 0;
+enomem:
+ QB_STACK_UNWIND (fsync, frame, -1, ENOMEM, 0, 0, 0);
+ return 0;
+}
+
+
+int
+qb_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ qb_local_t *qb_local = NULL;
+ qb_inode_t *qb_inode = NULL;
+
+ qb_inode = qb_inode_ctx_get (this, fd->inode);
+ if (!qb_inode) {
+ STACK_WIND (frame, default_flush_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush, fd, xdata);
+ return 0;
+ }
+
+ if (qb_local_init (frame) != 0)
+ goto enomem;
+
+ qb_local = frame->local;
+
+ qb_local->inode = inode_ref (fd->inode);
+ qb_local->fd = fd_ref (fd);
+
+ qb_local->stub = fop_flush_stub (frame, NULL, fd, xdata);
+
+ if (!qb_local->stub)
+ goto enomem;
+
+ qb_coroutine (frame, qb_co_fsync);
+
+ return 0;
+enomem:
+ QB_STACK_UNWIND (flush, frame, -1, ENOMEM, 0);
+ return 0;
+}
+
+static int32_t
+qb_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ qb_conf_t *conf = this->private;
+ gf_dirent_t *entry;
+ char *format;
+
+ list_for_each_entry(entry, &entries->list, list) {
+ if (!entry->inode || !entry->dict)
+ continue;
+
+ format = NULL;
+ if (dict_get_str(entry->dict, conf->qb_xattr_key, &format))
+ continue;
+
+ if (!format) {
+ qb_inode_cleanup(this, entry->inode, 1);
+ continue;
+ }
+
+ if (qb_format_extract(this, format, entry->inode))
+ continue;
+
+ qb_iatt_fixup(this, entry->inode, &entry->d_stat);
+ }
+
+ STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, entries, xdata);
+ return 0;
+}
+
+static int32_t
+qb_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
+{
+ qb_conf_t *conf = this->private;
+
+ xdata = xdata ? dict_ref(xdata) : dict_new();
+ if (!xdata)
+ goto enomem;
+
+ if (dict_set_int32 (xdata, conf->qb_xattr_key, 0))
+ goto enomem;
+
+ STACK_WIND(frame, qb_readdirp_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, size, off, xdata);
+
+ dict_unref(xdata);
+ return 0;
+
+enomem:
+ QB_STACK_UNWIND(readdirp, frame, -1, ENOMEM, NULL, NULL);
+ if (xdata)
+ dict_unref(xdata);
+ return 0;
+}
+
+int
+qb_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ qb_local_t *qb_local = NULL;
+ qb_inode_t *qb_inode = NULL;
+
+ qb_inode = qb_inode_ctx_get (this, loc->inode);
+ if (!qb_inode) {
+ STACK_WIND (frame, default_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset,
+ xdata);
+ return 0;
+ }
+
+ if (qb_local_init (frame) != 0)
+ goto enomem;
+
+ qb_local = frame->local;
+
+ qb_local->inode = inode_ref (loc->inode);
+ qb_local->fd = fd_anonymous (loc->inode);
+
+ qb_local->stub = fop_truncate_stub (frame, NULL, loc, offset, xdata);
+
+ if (!qb_local->stub)
+ goto enomem;
+
+ qb_coroutine (frame, qb_co_truncate);
+
+ return 0;
+enomem:
+ QB_STACK_UNWIND (truncate, frame, -1, ENOMEM, 0, 0, 0);
+ return 0;
+}
+
+
+int
+qb_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ qb_local_t *qb_local = NULL;
+ qb_inode_t *qb_inode = NULL;
+
+ qb_inode = qb_inode_ctx_get (this, fd->inode);
+ if (!qb_inode) {
+ STACK_WIND (frame, default_ftruncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset,
+ xdata);
+ return 0;
+ }
+
+ if (qb_local_init (frame) != 0)
+ goto enomem;
+
+ qb_local = frame->local;
+
+ qb_local->inode = inode_ref (fd->inode);
+ qb_local->fd = fd_ref (fd);
+
+ qb_local->stub = fop_ftruncate_stub (frame, NULL, fd, offset, xdata);
+
+ if (!qb_local->stub)
+ goto enomem;
+
+ qb_coroutine (frame, qb_co_truncate);
+
+ return 0;
+enomem:
+ QB_STACK_UNWIND (ftruncate, frame, -1, ENOMEM, 0, 0, 0);
+ return 0;
+}
+
+
+int
+qb_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *iatt, dict_t *xdata)
+{
+ inode_t *inode = NULL;
+
+ inode = frame->local;
+ frame->local = NULL;
+
+ if (inode) {
+ qb_iatt_fixup (this, inode, iatt);
+ inode_unref (inode);
+ }
+
+ QB_STACK_UNWIND (stat, frame, op_ret, op_errno, iatt, xdata);
+
+ return 0;
+}
+
+int
+qb_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ if (qb_inode_ctx_get (this, loc->inode))
+ frame->local = inode_ref (loc->inode);
+
+ STACK_WIND (frame, qb_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc, xdata);
+ return 0;
+}
+
+
+int
+qb_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *iatt, dict_t *xdata)
+{
+ inode_t *inode = NULL;
+
+ inode = frame->local;
+ frame->local = NULL;
+
+ if (inode) {
+ qb_iatt_fixup (this, inode, iatt);
+ inode_unref (inode);
+ }
+
+ QB_STACK_UNWIND (fstat, frame, op_ret, op_errno, iatt, xdata);
+
+ return 0;
+}
+
+
+int
+qb_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ if (qb_inode_ctx_get (this, fd->inode))
+ frame->local = inode_ref (fd->inode);
+
+ STACK_WIND (frame, qb_fstat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
+ return 0;
+}
+
+
+int
+qb_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *pre, struct iatt *post,
+ dict_t *xdata)
+{
+ inode_t *inode = NULL;
+
+ inode = frame->local;
+ frame->local = NULL;
+
+ if (inode) {
+ qb_iatt_fixup (this, inode, pre);
+ qb_iatt_fixup (this, inode, post);
+ inode_unref (inode);
+ }
+
+ QB_STACK_UNWIND (setattr, frame, op_ret, op_errno, pre, post, xdata);
+
+ return 0;
+}
+
+
+int
+qb_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *buf,
+ int valid, dict_t *xdata)
+{
+ if (qb_inode_ctx_get (this, loc->inode))
+ frame->local = inode_ref (loc->inode);
+
+ STACK_WIND (frame, qb_setattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setattr, loc, buf, valid, xdata);
+ return 0;
+}
+
+
+int
+qb_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *pre, struct iatt *post,
+ dict_t *xdata)
+{
+ inode_t *inode = NULL;
+
+ inode = frame->local;
+ frame->local = NULL;
+
+ if (inode) {
+ qb_iatt_fixup (this, inode, pre);
+ qb_iatt_fixup (this, inode, post);
+ inode_unref (inode);
+ }
+
+ QB_STACK_UNWIND (fsetattr, frame, op_ret, op_errno, pre, post, xdata);
+
+ return 0;
+}
+
+
+int
+qb_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *buf,
+ int valid, dict_t *xdata)
+{
+ if (qb_inode_ctx_get (this, fd->inode))
+ frame->local = inode_ref (fd->inode);
+
+ STACK_WIND (frame, qb_setattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetattr, fd, buf, valid, xdata);
+ return 0;
+}
+
+
+int
+qb_forget (xlator_t *this, inode_t *inode)
+{
+ return qb_inode_cleanup (this, inode, 0);
+}
+
+
+int
+qb_release (xlator_t *this, fd_t *fd)
+{
+ call_frame_t *frame = NULL;
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not allocate frame. "
+ "Leaking QEMU BlockDriverState");
+ return -1;
+ }
+
+ if (qb_local_init (frame) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not allocate local. "
+ "Leaking QEMU BlockDriverState");
+ STACK_DESTROY (frame->root);
+ return -1;
+ }
+
+ if (qb_coroutine (frame, qb_co_close) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not allocate coroutine. "
+ "Leaking QEMU BlockDriverState");
+ qb_local_free (this, frame->local);
+ frame->local = NULL;
+ STACK_DESTROY (frame->root);
+ }
+
+ return 0;
+}
+
+int
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ ret = xlator_mem_acct_init (this, gf_qb_mt_end + 1);
+
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting init "
+ "failed");
+ return ret;
+}
+
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ return 0;
+}
+
+
+int
+init (xlator_t *this)
+{
+ qb_conf_t *conf = NULL;
+ int32_t ret = -1;
+ static int bdrv_inited = 0;
+
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "FATAL: qemu-block (%s) not configured with exactly "
+ "one child", this->name);
+ goto out;
+ }
+
+ conf = GF_CALLOC (1, sizeof (*conf), gf_qb_mt_qb_conf_t);
+ if (!conf)
+ goto out;
+
+ /* configure 'option window-size <size>' */
+ GF_OPTION_INIT ("default-password", conf->default_password, str, out);
+
+ /* qemu coroutines use "co_mutex" for synchronizing among themselves.
+ However "co_mutex" itself is not threadsafe if the coroutine framework
+ is multithreaded (which usually is not). However synctasks are
+ fundamentally multithreaded, so for now create a syncenv which has
+ scaling limits set to max 1 thread so that the qemu coroutines can
+ execute "safely".
+
+ Future work: provide an implementation of "co_mutex" which is
+ threadsafe and use the global multithreaded ctx->env syncenv.
+ */
+ conf->env = syncenv_new (0, 1, 1);
+
+ this->private = conf;
+
+ ret = 0;
+
+ snprintf (conf->qb_xattr_key, QB_XATTR_KEY_MAX, QB_XATTR_KEY_FMT,
+ this->name);
+
+ cur_mon = (void *) 1;
+
+ if (!bdrv_inited) {
+ bdrv_init ();
+ bdrv_inited = 1;
+ }
+
+out:
+ if (ret)
+ GF_FREE (conf);
+
+ return ret;
+}
+
+
+void
+fini (xlator_t *this)
+{
+ qb_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ this->private = NULL;
+
+ if (conf->root_inode)
+ inode_unref(conf->root_inode);
+ GF_FREE (conf);
+
+ return;
+}
+
+
+struct xlator_fops fops = {
+ .lookup = qb_lookup,
+ .fsetxattr = qb_fsetxattr,
+ .setxattr = qb_setxattr,
+ .open = qb_open,
+ .writev = qb_writev,
+ .readv = qb_readv,
+ .fsync = qb_fsync,
+ .truncate = qb_truncate,
+ .ftruncate = qb_ftruncate,
+ .stat = qb_stat,
+ .fstat = qb_fstat,
+ .setattr = qb_setattr,
+ .fsetattr = qb_fsetattr,
+ .flush = qb_flush,
+/*
+ .getxattr = qb_getxattr,
+ .fgetxattr = qb_fgetxattr
+*/
+ .readdirp = qb_readdirp,
+};
+
+
+struct xlator_cbks cbks = {
+ .forget = qb_forget,
+ .release = qb_release,
+};
+
+
+struct xlator_dumpops dumpops = {
+};
+
+
+struct volume_options options[] = {
+ { .key = {"default-password"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "",
+ .description = "Default password for the AES encrypted block images."
+ },
+ { .key = {NULL} },
+};
diff --git a/xlators/features/qemu-block/src/qemu-block.h b/xlators/features/qemu-block/src/qemu-block.h
new file mode 100644
index 000000000..c95f2799a
--- /dev/null
+++ b/xlators/features/qemu-block/src/qemu-block.h
@@ -0,0 +1,109 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __QEMU_BLOCK_H
+#define __QEMU_BLOCK_H
+
+#include "syncop.h"
+#include "call-stub.h"
+#include "block/block_int.h"
+#include "monitor/monitor.h"
+
+/* QB_XATTR_KEY_FMT is the on-disk xattr stored in the inode which
+ indicates that the file must be "interpreted" by the block format
+ logic. The value of the key is of the pattern:
+
+ "format:virtual_size"
+
+ e.g
+
+ "qcow2:20GB" or "qed:100GB"
+
+ The format and virtual size are colon separated. The format is
+ a case sensitive string which qemu recognizes. virtual_size is
+ specified as a size which glusterfs recognizes as size (i.e.,
+ value accepted by gf_string2bytesize())
+*/
+#define QB_XATTR_KEY_FMT "trusted.glusterfs.%s.format"
+
+#define QB_XATTR_KEY_MAX 64
+
+#define QB_XATTR_VAL_MAX 64
+
+
+typedef struct qb_inode {
+ char fmt[QB_XATTR_VAL_MAX]; /* this is only the format, not "format:size" */
+ size_t size; /* virtual size in bytes */
+ BlockDriverState *bs;
+ int refcnt;
+ uuid_t backing_gfid;
+ char *backing_fname;
+} qb_inode_t;
+
+
+typedef struct qb_conf {
+ Monitor *mon;
+ struct syncenv *env;
+ char qb_xattr_key[QB_XATTR_KEY_MAX];
+ char *default_password;
+ inode_t *root_inode;
+} qb_conf_t;
+
+
+typedef struct qb_local {
+ call_frame_t *frame; /* backpointer */
+ call_stub_t *stub;
+ inode_t *inode;
+ fd_t *fd;
+ char fmt[QB_XATTR_VAL_MAX+1];
+ char name[256];
+ synctask_fn_t synctask_fn;
+ struct list_head list;
+} qb_local_t;
+
+void qb_local_free (xlator_t *this, qb_local_t *local);
+int qb_coroutine (call_frame_t *frame, synctask_fn_t fn);
+inode_t *qb_inode_from_filename (const char *filename);
+int qb_inode_to_filename (inode_t *inode, char *filename, int size);
+int qb_format_extract (xlator_t *this, char *format, inode_t *inode);
+
+qb_inode_t *qb_inode_ctx_get (xlator_t *this, inode_t *inode);
+
+#define QB_STACK_UNWIND(typ, frame, args ...) do { \
+ qb_local_t *__local = frame->local; \
+ xlator_t *__this = frame->this; \
+ \
+ frame->local = NULL; \
+ STACK_UNWIND_STRICT (typ, frame, args); \
+ if (__local) \
+ qb_local_free (__this, __local); \
+ } while (0)
+
+#define QB_STUB_UNWIND(stub, op_ret, op_errno) do { \
+ qb_local_t *__local = stub->frame->local; \
+ xlator_t *__this = stub->frame->this; \
+ \
+ stub->frame->local = NULL; \
+ call_unwind_error (stub, op_ret, op_errno); \
+ if (__local) \
+ qb_local_free (__this, __local); \
+ } while (0)
+
+#define QB_STUB_RESUME(stub_errno) do { \
+ qb_local_t *__local = stub->frame->local; \
+ xlator_t *__this = stub->frame->this; \
+ \
+ stub->frame->local = NULL; \
+ call_resume (stub); \
+ if (__local) \
+ qb_local_free (__this, __local); \
+ } while (0)
+
+#endif /* !__QEMU_BLOCK_H */
diff --git a/xlators/features/quiesce/src/Makefile.am b/xlators/features/quiesce/src/Makefile.am
index e8ab4cb24..15e46629e 100644
--- a/xlators/features/quiesce/src/Makefile.am
+++ b/xlators/features/quiesce/src/Makefile.am
@@ -1,14 +1,15 @@
xlator_LTLIBRARIES = quiesce.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
-quiesce_la_LDFLAGS = -module -avoidversion
+quiesce_la_LDFLAGS = -module -avoid-version
quiesce_la_SOURCES = quiesce.c
quiesce_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = quiesce.h quiesce-mem-types.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/features/quiesce/src/quiesce-mem-types.h b/xlators/features/quiesce/src/quiesce-mem-types.h
index 00f7aa4f1..6e582f424 100644
--- a/xlators/features/quiesce/src/quiesce-mem-types.h
+++ b/xlators/features/quiesce/src/quiesce-mem-types.h
@@ -1,23 +1,13 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __QUIESCE_MEM_TYPES_H__
#define __QUIESCE_MEM_TYPES_H__
diff --git a/xlators/features/quiesce/src/quiesce.c b/xlators/features/quiesce/src/quiesce.c
index 84df12a31..24c7dc6ed 100644
--- a/xlators/features/quiesce/src/quiesce.c
+++ b/xlators/features/quiesce/src/quiesce.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -41,16 +31,13 @@ gf_quiesce_local_wipe (xlator_t *this, quiesce_local_t *local)
loc_wipe (&local->loc);
if (local->fd)
fd_unref (local->fd);
- if (local->name)
- GF_FREE (local->name);
- if (local->volname)
- GF_FREE (local->volname);
+ GF_FREE (local->name);
+ GF_FREE (local->volname);
if (local->dict)
dict_unref (local->dict);
if (local->iobref)
iobref_unref (local->iobref);
- if (local->vector)
- GF_FREE (local->vector);
+ GF_FREE (local->vector);
mem_put (local);
}
@@ -124,7 +111,7 @@ void
gf_quiesce_enqueue (xlator_t *this, call_stub_t *stub)
{
quiesce_priv_t *priv = NULL;
- struct timeval timeout = {0,};
+ struct timespec timeout = {0,};
priv = this->private;
if (!priv) {
@@ -142,7 +129,7 @@ gf_quiesce_enqueue (xlator_t *this, call_stub_t *stub)
if (!priv->timer) {
timeout.tv_sec = 20;
- timeout.tv_usec = 0;
+ timeout.tv_nsec = 0;
priv->timer = gf_timer_call_after (this->ctx,
timeout,
@@ -191,7 +178,8 @@ out:
int32_t
quiesce_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
call_stub_t *stub = NULL;
quiesce_local_t *local = NULL;
@@ -201,10 +189,10 @@ quiesce_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if ((op_ret == -1) && (op_errno == ENOTCONN)) {
/* Re-transmit (by putting in the queue) */
stub = fop_stat_stub (frame, default_stat_resume,
- &local->loc);
+ &local->loc, xdata);
if (!stub) {
STACK_UNWIND_STRICT (stat, frame, -1, ENOMEM,
- NULL);
+ NULL, NULL);
goto out;
}
@@ -212,7 +200,7 @@ quiesce_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf);
+ STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -221,7 +209,7 @@ out:
int32_t
quiesce_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
call_stub_t *stub = NULL;
quiesce_local_t *local = NULL;
@@ -231,9 +219,9 @@ quiesce_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if ((op_ret == -1) && (op_errno == ENOTCONN)) {
/* Re-transmit (by putting in the queue) */
stub = fop_access_stub (frame, default_access_resume,
- &local->loc, local->flag);
+ &local->loc, local->flag, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (access, frame, -1, ENOMEM);
+ STACK_UNWIND_STRICT (access, frame, -1, ENOMEM, NULL);
goto out;
}
@@ -241,7 +229,7 @@ quiesce_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- STACK_UNWIND_STRICT (access, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (access, frame, op_ret, op_errno, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -251,7 +239,7 @@ out:
int32_t
quiesce_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, const char *path,
- struct iatt *buf)
+ struct iatt *buf, dict_t *xdata)
{
call_stub_t *stub = NULL;
quiesce_local_t *local = NULL;
@@ -261,10 +249,10 @@ quiesce_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if ((op_ret == -1) && (op_errno == ENOTCONN)) {
/* Re-transmit (by putting in the queue) */
stub = fop_readlink_stub (frame, default_readlink_resume,
- &local->loc, local->size);
+ &local->loc, local->size, xdata);
if (!stub) {
STACK_UNWIND_STRICT (readlink, frame, -1, ENOMEM,
- NULL, NULL);
+ NULL, NULL, NULL);
goto out;
}
@@ -272,7 +260,7 @@ quiesce_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, path, buf);
+ STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, path, buf, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -281,7 +269,7 @@ out:
int32_t
quiesce_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
call_stub_t *stub = NULL;
quiesce_local_t *local = NULL;
@@ -292,10 +280,10 @@ quiesce_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
/* Re-transmit (by putting in the queue) */
stub = fop_open_stub (frame, default_open_resume,
&local->loc, local->flag, local->fd,
- local->wbflags);
+ xdata);
if (!stub) {
STACK_UNWIND_STRICT (open, frame, -1, ENOMEM,
- NULL);
+ NULL, NULL);
goto out;
}
@@ -303,7 +291,7 @@ quiesce_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd);
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -313,7 +301,7 @@ out:
int32_t
quiesce_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct iatt *stbuf, struct iobref *iobref)
+ int32_t count, struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
{
call_stub_t *stub = NULL;
quiesce_local_t *local = NULL;
@@ -323,10 +311,11 @@ quiesce_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if ((op_ret == -1) && (op_errno == ENOTCONN)) {
/* Re-transmit (by putting in the queue) */
stub = fop_readv_stub (frame, default_readv_resume,
- local->fd, local->size, local->offset);
+ local->fd, local->size, local->offset,
+ local->io_flag, xdata);
if (!stub) {
STACK_UNWIND_STRICT (readv, frame, -1, ENOMEM,
- NULL, 0, NULL, NULL);
+ NULL, 0, NULL, NULL, NULL);
goto out;
}
@@ -335,7 +324,7 @@ quiesce_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count,
- stbuf, iobref);
+ stbuf, iobref, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -344,7 +333,7 @@ out:
int32_t
quiesce_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
call_stub_t *stub = NULL;
quiesce_local_t *local = NULL;
@@ -354,9 +343,9 @@ quiesce_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if ((op_ret == -1) && (op_errno == ENOTCONN)) {
/* Re-transmit (by putting in the queue) */
stub = fop_flush_stub (frame, default_flush_resume,
- local->fd);
+ local->fd, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM);
+ STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM, NULL);
goto out;
}
@@ -364,7 +353,7 @@ quiesce_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -376,7 +365,7 @@ out:
int32_t
quiesce_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
call_stub_t *stub = NULL;
quiesce_local_t *local = NULL;
@@ -386,10 +375,10 @@ quiesce_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if ((op_ret == -1) && (op_errno == ENOTCONN)) {
/* Re-transmit (by putting in the queue) */
stub = fop_fsync_stub (frame, default_fsync_resume,
- local->fd, local->flag);
+ local->fd, local->flag, xdata);
if (!stub) {
STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM,
- NULL, NULL);
+ NULL, NULL, NULL);
goto out;
}
@@ -397,7 +386,7 @@ quiesce_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf);
+ STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -406,7 +395,7 @@ out:
int32_t
quiesce_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
call_stub_t *stub = NULL;
quiesce_local_t *local = NULL;
@@ -416,10 +405,10 @@ quiesce_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if ((op_ret == -1) && (op_errno == ENOTCONN)) {
/* Re-transmit (by putting in the queue) */
stub = fop_fstat_stub (frame, default_fstat_resume,
- local->fd);
+ local->fd, xdata);
if (!stub) {
STACK_UNWIND_STRICT (fstat, frame, -1, ENOMEM,
- NULL);
+ NULL, NULL);
goto out;
}
@@ -427,7 +416,7 @@ quiesce_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf);
+ STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -436,7 +425,7 @@ out:
int32_t
quiesce_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
call_stub_t *stub = NULL;
quiesce_local_t *local = NULL;
@@ -446,10 +435,10 @@ quiesce_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if ((op_ret == -1) && (op_errno == ENOTCONN)) {
/* Re-transmit (by putting in the queue) */
stub = fop_opendir_stub (frame, default_opendir_resume,
- &local->loc, local->fd);
+ &local->loc, local->fd, xdata);
if (!stub) {
STACK_UNWIND_STRICT (opendir, frame, -1, ENOMEM,
- NULL);
+ NULL, NULL);
goto out;
}
@@ -457,7 +446,7 @@ quiesce_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd);
+ STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -466,7 +455,7 @@ out:
int32_t
quiesce_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
call_stub_t *stub = NULL;
quiesce_local_t *local = NULL;
@@ -476,9 +465,9 @@ quiesce_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if ((op_ret == -1) && (op_errno == ENOTCONN)) {
/* Re-transmit (by putting in the queue) */
stub = fop_fsyncdir_stub (frame, default_fsyncdir_resume,
- local->fd, local->flag);
+ local->fd, local->flag, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (fsyncdir, frame, -1, ENOMEM);
+ STACK_UNWIND_STRICT (fsyncdir, frame, -1, ENOMEM, NULL);
goto out;
}
@@ -486,7 +475,7 @@ quiesce_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -495,7 +484,7 @@ out:
int32_t
quiesce_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *buf)
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf, dict_t *xdata)
{
call_stub_t *stub = NULL;
quiesce_local_t *local = NULL;
@@ -505,10 +494,10 @@ quiesce_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if ((op_ret == -1) && (op_errno == ENOTCONN)) {
/* Re-transmit (by putting in the queue) */
stub = fop_statfs_stub (frame, default_statfs_resume,
- &local->loc);
+ &local->loc, xdata);
if (!stub) {
STACK_UNWIND_STRICT (statfs, frame, -1, ENOMEM,
- NULL);
+ NULL, NULL);
goto out;
}
@@ -516,7 +505,7 @@ quiesce_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf);
+ STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -525,7 +514,7 @@ out:
int32_t
quiesce_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
call_stub_t *stub = NULL;
quiesce_local_t *local = NULL;
@@ -535,10 +524,10 @@ quiesce_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if ((op_ret == -1) && (op_errno == ENOTCONN)) {
/* Re-transmit (by putting in the queue) */
stub = fop_fgetxattr_stub (frame, default_fgetxattr_resume,
- local->fd, local->name);
+ local->fd, local->name, xdata);
if (!stub) {
STACK_UNWIND_STRICT (fgetxattr, frame, -1, ENOMEM,
- NULL);
+ NULL, NULL);
goto out;
}
@@ -546,7 +535,7 @@ quiesce_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict);
+ STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -556,7 +545,7 @@ out:
int32_t
quiesce_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
call_stub_t *stub = NULL;
quiesce_local_t *local = NULL;
@@ -566,10 +555,10 @@ quiesce_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if ((op_ret == -1) && (op_errno == ENOTCONN)) {
/* Re-transmit (by putting in the queue) */
stub = fop_getxattr_stub (frame, default_getxattr_resume,
- &local->loc, local->name);
+ &local->loc, local->name, xdata);
if (!stub) {
STACK_UNWIND_STRICT (getxattr, frame, -1, ENOMEM,
- NULL);
+ NULL, NULL);
goto out;
}
@@ -577,7 +566,7 @@ quiesce_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict);
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -588,7 +577,7 @@ out:
int32_t
quiesce_rchecksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, uint32_t weak_checksum,
- uint8_t *strong_checksum)
+ uint8_t *strong_checksum, dict_t *xdata)
{
call_stub_t *stub = NULL;
quiesce_local_t *local = NULL;
@@ -598,10 +587,10 @@ quiesce_rchecksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if ((op_ret == -1) && (op_errno == ENOTCONN)) {
/* Re-transmit (by putting in the queue) */
stub = fop_rchecksum_stub (frame, default_rchecksum_resume,
- local->fd, local->offset, local->flag);
+ local->fd, local->offset, local->flag, xdata);
if (!stub) {
STACK_UNWIND_STRICT (rchecksum, frame, -1, ENOMEM,
- 0, NULL);
+ 0, NULL, NULL);
goto out;
}
@@ -610,7 +599,7 @@ quiesce_rchecksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
STACK_UNWIND_STRICT (rchecksum, frame, op_ret, op_errno, weak_checksum,
- strong_checksum);
+ strong_checksum, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -620,7 +609,7 @@ out:
int32_t
quiesce_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, dict_t *xdata)
{
call_stub_t *stub = NULL;
quiesce_local_t *local = NULL;
@@ -630,10 +619,10 @@ quiesce_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if ((op_ret == -1) && (op_errno == ENOTCONN)) {
/* Re-transmit (by putting in the queue) */
stub = fop_readdir_stub (frame, default_readdir_resume,
- local->fd, local->size, local->offset);
+ local->fd, local->size, local->offset, xdata);
if (!stub) {
STACK_UNWIND_STRICT (readdir, frame, -1, ENOMEM,
- NULL);
+ NULL, NULL);
goto out;
}
@@ -641,7 +630,7 @@ quiesce_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, entries);
+ STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, entries, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -651,7 +640,7 @@ out:
int32_t
quiesce_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, dict_t *xdata)
{
call_stub_t *stub = NULL;
quiesce_local_t *local = NULL;
@@ -661,10 +650,11 @@ quiesce_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if ((op_ret == -1) && (op_errno == ENOTCONN)) {
/* Re-transmit (by putting in the queue) */
stub = fop_readdirp_stub (frame, default_readdirp_resume,
- local->fd, local->size, local->offset);
+ local->fd, local->size, local->offset,
+ local->dict);
if (!stub) {
STACK_UNWIND_STRICT (readdirp, frame, -1, ENOMEM,
- NULL);
+ NULL, NULL);
goto out;
}
@@ -672,7 +662,7 @@ quiesce_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries);
+ STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -685,7 +675,7 @@ out:
int32_t
quiesce_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -699,10 +689,11 @@ quiesce_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
/* Re-transmit (by putting in the queue) */
stub = fop_writev_stub (frame, default_writev_resume,
local->fd, local->vector, local->flag,
- local->offset, local->iobref);
+ local->offset, local->io_flags,
+ local->iobref, xdata);
if (!stub) {
STACK_UNWIND_STRICT (writev, frame, -1, ENOMEM,
- NULL, NULL);
+ NULL, NULL, NULL);
goto out;
}
@@ -710,7 +701,7 @@ quiesce_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf);
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -719,7 +710,7 @@ out:
int32_t
quiesce_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -733,10 +724,10 @@ quiesce_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
/* Re-transmit (by putting in the queue) */
stub = fop_xattrop_stub (frame, default_xattrop_resume,
&local->loc, local->xattrop_flags,
- local->dict);
+ local->dict, xdata);
if (!stub) {
STACK_UNWIND_STRICT (xattrop, frame, -1, ENOMEM,
- NULL);
+ NULL, NULL);
goto out;
}
@@ -744,7 +735,7 @@ quiesce_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, dict);
+ STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, dict, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -753,7 +744,7 @@ out:
int32_t
quiesce_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -767,10 +758,10 @@ quiesce_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
/* Re-transmit (by putting in the queue) */
stub = fop_fxattrop_stub (frame, default_fxattrop_resume,
local->fd, local->xattrop_flags,
- local->dict);
+ local->dict, xdata);
if (!stub) {
STACK_UNWIND_STRICT (fxattrop, frame, -1, ENOMEM,
- NULL);
+ NULL, NULL);
goto out;
}
@@ -778,7 +769,7 @@ quiesce_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- STACK_UNWIND_STRICT (fxattrop, frame, op_ret, op_errno, dict);
+ STACK_UNWIND_STRICT (fxattrop, frame, op_ret, op_errno, dict, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -787,7 +778,7 @@ out:
int32_t
quiesce_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock)
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -800,10 +791,10 @@ quiesce_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if ((op_ret == -1) && (op_errno == ENOTCONN)) {
/* Re-transmit (by putting in the queue) */
stub = fop_lk_stub (frame, default_lk_resume,
- local->fd, local->flag, &local->flock);
+ local->fd, local->flag, &local->flock, xdata);
if (!stub) {
STACK_UNWIND_STRICT (lk, frame, -1, ENOMEM,
- NULL);
+ NULL, NULL);
goto out;
}
@@ -811,7 +802,7 @@ quiesce_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock);
+ STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -820,7 +811,7 @@ out:
int32_t
quiesce_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -834,9 +825,9 @@ quiesce_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
/* Re-transmit (by putting in the queue) */
stub = fop_inodelk_stub (frame, default_inodelk_resume,
local->volname, &local->loc,
- local->flag, &local->flock);
+ local->flag, &local->flock, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (inodelk, frame, -1, ENOMEM);
+ STACK_UNWIND_STRICT (inodelk, frame, -1, ENOMEM, NULL);
goto out;
}
@@ -844,7 +835,7 @@ quiesce_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -854,7 +845,7 @@ out:
int32_t
quiesce_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -868,9 +859,9 @@ quiesce_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
/* Re-transmit (by putting in the queue) */
stub = fop_finodelk_stub (frame, default_finodelk_resume,
local->volname, local->fd,
- local->flag, &local->flock);
+ local->flag, &local->flock, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (finodelk, frame, -1, ENOMEM);
+ STACK_UNWIND_STRICT (finodelk, frame, -1, ENOMEM, NULL);
goto out;
}
@@ -878,7 +869,7 @@ quiesce_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- STACK_UNWIND_STRICT (finodelk, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (finodelk, frame, op_ret, op_errno, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -887,7 +878,7 @@ out:
int32_t
quiesce_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -901,9 +892,9 @@ quiesce_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
/* Re-transmit (by putting in the queue) */
stub = fop_entrylk_stub (frame, default_entrylk_resume,
local->volname, &local->loc,
- local->name, local->cmd, local->type);
+ local->name, local->cmd, local->type, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (entrylk, frame, -1, ENOMEM);
+ STACK_UNWIND_STRICT (entrylk, frame, -1, ENOMEM, NULL);
goto out;
}
@@ -911,7 +902,7 @@ quiesce_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -920,7 +911,7 @@ out:
int32_t
quiesce_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -934,9 +925,9 @@ quiesce_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
/* Re-transmit (by putting in the queue) */
stub = fop_fentrylk_stub (frame, default_fentrylk_resume,
local->volname, local->fd,
- local->name, local->cmd, local->type);
+ local->name, local->cmd, local->type, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (fentrylk, frame, -1, ENOMEM);
+ STACK_UNWIND_STRICT (fentrylk, frame, -1, ENOMEM, NULL);
goto out;
}
@@ -944,7 +935,7 @@ quiesce_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- STACK_UNWIND_STRICT (fentrylk, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (fentrylk, frame, op_ret, op_errno, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -954,7 +945,7 @@ out:
int32_t
quiesce_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *statpre,
- struct iatt *statpost)
+ struct iatt *statpost, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -967,10 +958,10 @@ quiesce_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if ((op_ret == -1) && (op_errno == ENOTCONN)) {
/* Re-transmit (by putting in the queue) */
stub = fop_setattr_stub (frame, default_setattr_resume,
- &local->loc, &local->stbuf, local->flag);
+ &local->loc, &local->stbuf, local->flag, xdata);
if (!stub) {
STACK_UNWIND_STRICT (setattr, frame, -1, ENOMEM,
- NULL, NULL);
+ NULL, NULL, NULL);
goto out;
}
@@ -979,7 +970,7 @@ quiesce_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, statpre,
- statpost);
+ statpost, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -989,7 +980,7 @@ out:
int32_t
quiesce_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *statpre,
- struct iatt *statpost)
+ struct iatt *statpost, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1003,10 +994,10 @@ quiesce_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if ((op_ret == -1) && (op_errno == ENOTCONN)) {
/* Re-transmit (by putting in the queue) */
stub = fop_fsetattr_stub (frame, default_fsetattr_resume,
- local->fd, &local->stbuf, local->flag);
+ local->fd, &local->stbuf, local->flag, xdata);
if (!stub) {
STACK_UNWIND_STRICT (fsetattr, frame, -1, ENOMEM,
- NULL, NULL);
+ NULL, NULL, NULL);
goto out;
}
@@ -1015,7 +1006,7 @@ quiesce_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
STACK_UNWIND_STRICT (fsetattr, frame, op_ret, op_errno, statpre,
- statpost);
+ statpost, xdata);
out:
gf_quiesce_local_wipe (this, local);
@@ -1033,7 +1024,7 @@ int32_t
quiesce_removexattr (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
- const char *name)
+ const char *name, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1046,14 +1037,14 @@ quiesce_removexattr (call_frame_t *frame,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->removexattr,
loc,
- name);
- return 0;
+ name, xdata);
+ return 0;
}
stub = fop_removexattr_stub (frame, default_removexattr_resume,
- loc, name);
+ loc, name, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (removexattr, frame, -1, ENOMEM);
+ STACK_UNWIND_STRICT (removexattr, frame, -1, ENOMEM, NULL);
return 0;
}
@@ -1066,7 +1057,7 @@ int32_t
quiesce_truncate (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
- off_t offset)
+ off_t offset, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1079,13 +1070,13 @@ quiesce_truncate (call_frame_t *frame,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->truncate,
loc,
- offset);
- return 0;
+ offset, xdata);
+ return 0;
}
- stub = fop_truncate_stub (frame, default_truncate_resume, loc, offset);
+ stub = fop_truncate_stub (frame, default_truncate_resume, loc, offset, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (truncate, frame, -1, ENOMEM, NULL, NULL);
+ STACK_UNWIND_STRICT (truncate, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
@@ -1099,7 +1090,7 @@ quiesce_fsetxattr (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
dict_t *dict,
- int32_t flags)
+ int32_t flags, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1113,14 +1104,14 @@ quiesce_fsetxattr (call_frame_t *frame,
FIRST_CHILD(this)->fops->fsetxattr,
fd,
dict,
- flags);
- return 0;
+ flags, xdata);
+ return 0;
}
stub = fop_fsetxattr_stub (frame, default_fsetxattr_resume,
- fd, dict, flags);
+ fd, dict, flags, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (fsetxattr, frame, -1, ENOMEM);
+ STACK_UNWIND_STRICT (fsetxattr, frame, -1, ENOMEM, NULL);
return 0;
}
@@ -1134,7 +1125,7 @@ quiesce_setxattr (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
dict_t *dict,
- int32_t flags)
+ int32_t flags, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1148,14 +1139,14 @@ quiesce_setxattr (call_frame_t *frame,
FIRST_CHILD(this)->fops->setxattr,
loc,
dict,
- flags);
- return 0;
+ flags, xdata);
+ return 0;
}
stub = fop_setxattr_stub (frame, default_setxattr_resume,
- loc, dict, flags);
+ loc, dict, flags, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (setxattr, frame, -1, ENOMEM);
+ STACK_UNWIND_STRICT (setxattr, frame, -1, ENOMEM, NULL);
return 0;
}
@@ -1167,7 +1158,7 @@ quiesce_setxattr (call_frame_t *frame,
int32_t
quiesce_create (call_frame_t *frame, xlator_t *this,
loc_t *loc, int32_t flags, mode_t mode,
- fd_t *fd, dict_t *params)
+ mode_t umask, fd_t *fd, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1180,15 +1171,15 @@ quiesce_create (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, default_create_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->create,
- loc, (flags & ~O_APPEND), mode, fd, params);
- return 0;
+ loc, (flags & ~O_APPEND), mode, umask, fd, xdata);
+ return 0;
}
stub = fop_create_stub (frame, default_create_resume,
- loc, (flags & ~O_APPEND), mode, fd, params);
+ loc, (flags & ~O_APPEND), mode, umask, fd, xdata);
if (!stub) {
STACK_UNWIND_STRICT (create, frame, -1, ENOMEM,
- NULL, NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -1201,7 +1192,7 @@ int32_t
quiesce_link (call_frame_t *frame,
xlator_t *this,
loc_t *oldloc,
- loc_t *newloc)
+ loc_t *newloc, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1213,14 +1204,14 @@ quiesce_link (call_frame_t *frame,
default_link_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->link,
- oldloc, newloc);
- return 0;
+ oldloc, newloc, xdata);
+ return 0;
}
- stub = fop_link_stub (frame, default_link_resume, oldloc, newloc);
+ stub = fop_link_stub (frame, default_link_resume, oldloc, newloc, xdata);
if (!stub) {
STACK_UNWIND_STRICT (link, frame, -1, ENOMEM,
- NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -1233,7 +1224,7 @@ int32_t
quiesce_rename (call_frame_t *frame,
xlator_t *this,
loc_t *oldloc,
- loc_t *newloc)
+ loc_t *newloc, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1245,14 +1236,14 @@ quiesce_rename (call_frame_t *frame,
default_rename_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rename,
- oldloc, newloc);
- return 0;
+ oldloc, newloc, xdata);
+ return 0;
}
- stub = fop_rename_stub (frame, default_rename_resume, oldloc, newloc);
+ stub = fop_rename_stub (frame, default_rename_resume, oldloc, newloc, xdata);
if (!stub) {
STACK_UNWIND_STRICT (rename, frame, -1, ENOMEM,
- NULL, NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -1264,7 +1255,7 @@ quiesce_rename (call_frame_t *frame,
int
quiesce_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *loc, dict_t *params)
+ const char *linkpath, loc_t *loc, mode_t umask, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1275,15 +1266,15 @@ quiesce_symlink (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, default_symlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->symlink,
- linkpath, loc, params);
- return 0;
+ linkpath, loc, umask, xdata);
+ return 0;
}
stub = fop_symlink_stub (frame, default_symlink_resume,
- linkpath, loc, params);
+ linkpath, loc, umask, xdata);
if (!stub) {
STACK_UNWIND_STRICT (symlink, frame, -1, ENOMEM,
- NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -1294,7 +1285,7 @@ quiesce_symlink (call_frame_t *frame, xlator_t *this,
int
-quiesce_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)
+quiesce_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1305,13 +1296,13 @@ quiesce_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)
STACK_WIND (frame, default_rmdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rmdir,
- loc, flags);
- return 0;
+ loc, flags, xdata);
+ return 0;
}
- stub = fop_rmdir_stub (frame, default_rmdir_resume, loc, flags);
+ stub = fop_rmdir_stub (frame, default_rmdir_resume, loc, flags, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (rmdir, frame, -1, ENOMEM, NULL, NULL);
+ STACK_UNWIND_STRICT (rmdir, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
@@ -1323,7 +1314,7 @@ quiesce_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)
int32_t
quiesce_unlink (call_frame_t *frame,
xlator_t *this,
- loc_t *loc)
+ loc_t *loc, int xflag, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1335,13 +1326,13 @@ quiesce_unlink (call_frame_t *frame,
default_unlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->unlink,
- loc);
- return 0;
+ loc, xflag, xdata);
+ return 0;
}
- stub = fop_unlink_stub (frame, default_unlink_resume, loc);
+ stub = fop_unlink_stub (frame, default_unlink_resume, loc, xflag, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (unlink, frame, -1, ENOMEM, NULL, NULL);
+ STACK_UNWIND_STRICT (unlink, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
@@ -1352,7 +1343,7 @@ quiesce_unlink (call_frame_t *frame,
int
quiesce_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dict_t *params)
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1363,15 +1354,15 @@ quiesce_mkdir (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, default_mkdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mkdir,
- loc, mode, params);
- return 0;
+ loc, mode, umask, xdata);
+ return 0;
}
stub = fop_mkdir_stub (frame, default_mkdir_resume,
- loc, mode, params);
+ loc, mode, umask, xdata);
if (!stub) {
STACK_UNWIND_STRICT (mkdir, frame, -1, ENOMEM,
- NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -1383,7 +1374,7 @@ quiesce_mkdir (call_frame_t *frame, xlator_t *this,
int
quiesce_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t rdev, dict_t *parms)
+ loc_t *loc, mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1394,15 +1385,15 @@ quiesce_mknod (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, default_mknod_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mknod,
- loc, mode, rdev, parms);
- return 0;
+ loc, mode, rdev, umask, xdata);
+ return 0;
}
stub = fop_mknod_stub (frame, default_mknod_resume,
- loc, mode, rdev, parms);
+ loc, mode, rdev, umask, xdata);
if (!stub) {
STACK_UNWIND_STRICT (mknod, frame, -1, ENOMEM,
- NULL, NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -1415,7 +1406,7 @@ int32_t
quiesce_ftruncate (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
- off_t offset)
+ off_t offset, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1428,13 +1419,13 @@ quiesce_ftruncate (call_frame_t *frame,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->ftruncate,
fd,
- offset);
- return 0;
+ offset, xdata);
+ return 0;
}
- stub = fop_ftruncate_stub (frame, default_ftruncate_resume, fd, offset);
+ stub = fop_ftruncate_stub (frame, default_ftruncate_resume, fd, offset, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOMEM, NULL, NULL);
+ STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
@@ -1449,7 +1440,7 @@ int32_t
quiesce_readlink (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
- size_t size)
+ size_t size, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1468,13 +1459,13 @@ quiesce_readlink (call_frame_t *frame,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readlink,
loc,
- size);
- return 0;
+ size, xdata);
+ return 0;
}
- stub = fop_readlink_stub (frame, default_readlink_resume, loc, size);
+ stub = fop_readlink_stub (frame, default_readlink_resume, loc, size, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (readlink, frame, -1, ENOMEM, NULL, NULL);
+ STACK_UNWIND_STRICT (readlink, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
@@ -1488,7 +1479,7 @@ int32_t
quiesce_access (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
- int32_t mask)
+ int32_t mask, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1507,13 +1498,13 @@ quiesce_access (call_frame_t *frame,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->access,
loc,
- mask);
- return 0;
+ mask, xdata);
+ return 0;
}
- stub = fop_access_stub (frame, default_access_resume, loc, mask);
+ stub = fop_access_stub (frame, default_access_resume, loc, mask, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (access, frame, -1, ENOMEM);
+ STACK_UNWIND_STRICT (access, frame, -1, ENOMEM, NULL);
return 0;
}
@@ -1526,7 +1517,7 @@ int32_t
quiesce_fgetxattr (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
- const char *name)
+ const char *name, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1547,13 +1538,13 @@ quiesce_fgetxattr (call_frame_t *frame,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fgetxattr,
fd,
- name);
+ name, xdata);
return 0;
}
- stub = fop_fgetxattr_stub (frame, default_fgetxattr_resume, fd, name);
+ stub = fop_fgetxattr_stub (frame, default_fgetxattr_resume, fd, name, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (fgetxattr, frame, -1, ENOMEM, NULL);
+ STACK_UNWIND_STRICT (fgetxattr, frame, -1, ENOMEM, NULL, NULL);
return 0;
}
@@ -1565,7 +1556,7 @@ quiesce_fgetxattr (call_frame_t *frame,
int32_t
quiesce_statfs (call_frame_t *frame,
xlator_t *this,
- loc_t *loc)
+ loc_t *loc, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1582,13 +1573,13 @@ quiesce_statfs (call_frame_t *frame,
quiesce_statfs_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->statfs,
- loc);
- return 0;
+ loc, xdata);
+ return 0;
}
- stub = fop_statfs_stub (frame, default_statfs_resume, loc);
+ stub = fop_statfs_stub (frame, default_statfs_resume, loc, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (statfs, frame, -1, ENOMEM, NULL);
+ STACK_UNWIND_STRICT (statfs, frame, -1, ENOMEM, NULL, NULL);
return 0;
}
@@ -1601,7 +1592,7 @@ int32_t
quiesce_fsyncdir (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
- int32_t flags)
+ int32_t flags, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1620,13 +1611,13 @@ quiesce_fsyncdir (call_frame_t *frame,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsyncdir,
fd,
- flags);
- return 0;
+ flags, xdata);
+ return 0;
}
- stub = fop_fsyncdir_stub (frame, default_fsyncdir_resume, fd, flags);
+ stub = fop_fsyncdir_stub (frame, default_fsyncdir_resume, fd, flags, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (fsyncdir, frame, -1, ENOMEM);
+ STACK_UNWIND_STRICT (fsyncdir, frame, -1, ENOMEM, NULL);
return 0;
}
@@ -1638,7 +1629,7 @@ quiesce_fsyncdir (call_frame_t *frame,
int32_t
quiesce_opendir (call_frame_t *frame,
xlator_t *this,
- loc_t *loc, fd_t *fd)
+ loc_t *loc, fd_t *fd, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1656,13 +1647,13 @@ quiesce_opendir (call_frame_t *frame,
quiesce_opendir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->opendir,
- loc, fd);
- return 0;
+ loc, fd, xdata);
+ return 0;
}
- stub = fop_opendir_stub (frame, default_opendir_resume, loc, fd);
+ stub = fop_opendir_stub (frame, default_opendir_resume, loc, fd, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (opendir, frame, -1, ENOMEM, NULL);
+ STACK_UNWIND_STRICT (opendir, frame, -1, ENOMEM, NULL, NULL);
return 0;
}
@@ -1674,7 +1665,7 @@ quiesce_opendir (call_frame_t *frame,
int32_t
quiesce_fstat (call_frame_t *frame,
xlator_t *this,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1691,13 +1682,13 @@ quiesce_fstat (call_frame_t *frame,
quiesce_fstat_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fstat,
- fd);
- return 0;
+ fd, xdata);
+ return 0;
}
- stub = fop_fstat_stub (frame, default_fstat_resume, fd);
+ stub = fop_fstat_stub (frame, default_fstat_resume, fd, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (fstat, frame, -1, ENOMEM, NULL);
+ STACK_UNWIND_STRICT (fstat, frame, -1, ENOMEM, NULL, NULL);
return 0;
}
@@ -1710,7 +1701,7 @@ int32_t
quiesce_fsync (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
- int32_t flags)
+ int32_t flags, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1729,13 +1720,13 @@ quiesce_fsync (call_frame_t *frame,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsync,
fd,
- flags);
- return 0;
+ flags, xdata);
+ return 0;
}
- stub = fop_fsync_stub (frame, default_fsync_resume, fd, flags);
+ stub = fop_fsync_stub (frame, default_fsync_resume, fd, flags, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM, NULL, NULL);
+ STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
@@ -1747,7 +1738,7 @@ quiesce_fsync (call_frame_t *frame,
int32_t
quiesce_flush (call_frame_t *frame,
xlator_t *this,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1764,13 +1755,13 @@ quiesce_flush (call_frame_t *frame,
quiesce_flush_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->flush,
- fd);
- return 0;
+ fd, xdata);
+ return 0;
}
- stub = fop_flush_stub (frame, default_flush_resume, fd);
+ stub = fop_flush_stub (frame, default_flush_resume, fd, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM);
+ STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM, NULL);
return 0;
}
@@ -1785,8 +1776,8 @@ quiesce_writev (call_frame_t *frame,
fd_t *fd,
struct iovec *vector,
int32_t count,
- off_t off,
- struct iobref *iobref)
+ off_t off, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1801,15 +1792,15 @@ quiesce_writev (call_frame_t *frame,
fd,
vector,
count,
- off,
- iobref);
- return 0;
+ off, flags,
+ iobref, xdata);
+ return 0;
}
stub = fop_writev_stub (frame, default_writev_resume,
- fd, vector, count, off, iobref);
+ fd, vector, count, off, flags, iobref, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (writev, frame, -1, ENOMEM, NULL, NULL);
+ STACK_UNWIND_STRICT (writev, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
@@ -1823,7 +1814,7 @@ quiesce_readv (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
size_t size,
- off_t offset)
+ off_t offset, uint32_t flags, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1836,6 +1827,7 @@ quiesce_readv (call_frame_t *frame,
local->fd = fd_ref (fd);
local->size = size;
local->offset = offset;
+ local->io_flag = flags;
frame->local = local;
STACK_WIND (frame,
@@ -1844,14 +1836,15 @@ quiesce_readv (call_frame_t *frame,
FIRST_CHILD(this)->fops->readv,
fd,
size,
- offset);
- return 0;
+ offset, flags, xdata);
+ return 0;
}
- stub = fop_readv_stub (frame, default_readv_resume, fd, size, offset);
+ stub = fop_readv_stub (frame, default_readv_resume, fd, size, offset,
+ flags, xdata);
if (!stub) {
STACK_UNWIND_STRICT (readv, frame, -1, ENOMEM,
- NULL, 0, NULL, NULL);
+ NULL, 0, NULL, NULL, NULL);
return 0;
}
@@ -1866,7 +1859,7 @@ quiesce_open (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
int32_t flags, fd_t *fd,
- int32_t wbflags)
+ dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1882,21 +1875,20 @@ quiesce_open (call_frame_t *frame,
/* Don't send O_APPEND below, as write() re-transmittions can
fail with O_APPEND */
local->flag = (flags & ~O_APPEND);
- local->wbflags = wbflags;
frame->local = local;
STACK_WIND (frame,
quiesce_open_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->open,
- loc, (flags & ~O_APPEND), fd, wbflags);
- return 0;
+ loc, (flags & ~O_APPEND), fd, xdata);
+ return 0;
}
stub = fop_open_stub (frame, default_open_resume, loc,
- (flags & ~O_APPEND), fd, wbflags);
+ (flags & ~O_APPEND), fd, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (open, frame, -1, ENOMEM, NULL);
+ STACK_UNWIND_STRICT (open, frame, -1, ENOMEM, NULL, NULL);
return 0;
}
@@ -1909,7 +1901,7 @@ int32_t
quiesce_getxattr (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
- const char *name)
+ const char *name, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1930,13 +1922,13 @@ quiesce_getxattr (call_frame_t *frame,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->getxattr,
loc,
- name);
- return 0;
+ name, xdata);
+ return 0;
}
- stub = fop_getxattr_stub (frame, default_getxattr_resume, loc, name);
+ stub = fop_getxattr_stub (frame, default_getxattr_resume, loc, name, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (getxattr, frame, -1, ENOMEM, NULL);
+ STACK_UNWIND_STRICT (getxattr, frame, -1, ENOMEM, NULL, NULL);
return 0;
}
@@ -1951,7 +1943,7 @@ quiesce_xattrop (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
gf_xattrop_flags_t flags,
- dict_t *dict)
+ dict_t *dict, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -1965,14 +1957,14 @@ quiesce_xattrop (call_frame_t *frame,
FIRST_CHILD(this)->fops->xattrop,
loc,
flags,
- dict);
- return 0;
+ dict, xdata);
+ return 0;
}
stub = fop_xattrop_stub (frame, default_xattrop_resume,
- loc, flags, dict);
+ loc, flags, dict, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (xattrop, frame, -1, ENOMEM, NULL);
+ STACK_UNWIND_STRICT (xattrop, frame, -1, ENOMEM, NULL, NULL);
return 0;
}
@@ -1986,7 +1978,7 @@ quiesce_fxattrop (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
gf_xattrop_flags_t flags,
- dict_t *dict)
+ dict_t *dict, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -2000,14 +1992,14 @@ quiesce_fxattrop (call_frame_t *frame,
FIRST_CHILD(this)->fops->fxattrop,
fd,
flags,
- dict);
- return 0;
+ dict, xdata);
+ return 0;
}
stub = fop_fxattrop_stub (frame, default_fxattrop_resume,
- fd, flags, dict);
+ fd, flags, dict, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (fxattrop, frame, -1, ENOMEM, NULL);
+ STACK_UNWIND_STRICT (fxattrop, frame, -1, ENOMEM, NULL, NULL);
return 0;
}
@@ -2021,7 +2013,7 @@ quiesce_lk (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
int32_t cmd,
- struct gf_flock *lock)
+ struct gf_flock *lock, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -2035,13 +2027,13 @@ quiesce_lk (call_frame_t *frame,
FIRST_CHILD(this)->fops->lk,
fd,
cmd,
- lock);
- return 0;
+ lock, xdata);
+ return 0;
}
- stub = fop_lk_stub (frame, default_lk_resume, fd, cmd, lock);
+ stub = fop_lk_stub (frame, default_lk_resume, fd, cmd, lock, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (lk, frame, -1, ENOMEM, NULL);
+ STACK_UNWIND_STRICT (lk, frame, -1, ENOMEM, NULL, NULL);
return 0;
}
@@ -2054,7 +2046,7 @@ quiesce_lk (call_frame_t *frame,
int32_t
quiesce_inodelk (call_frame_t *frame, xlator_t *this,
const char *volume, loc_t *loc, int32_t cmd,
- struct gf_flock *lock)
+ struct gf_flock *lock, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -2066,14 +2058,14 @@ quiesce_inodelk (call_frame_t *frame, xlator_t *this,
default_inodelk_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->inodelk,
- volume, loc, cmd, lock);
- return 0;
+ volume, loc, cmd, lock, xdata);
+ return 0;
}
stub = fop_inodelk_stub (frame, default_inodelk_resume,
- volume, loc, cmd, lock);
+ volume, loc, cmd, lock, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (inodelk, frame, -1, ENOMEM);
+ STACK_UNWIND_STRICT (inodelk, frame, -1, ENOMEM, NULL);
return 0;
}
@@ -2084,7 +2076,7 @@ quiesce_inodelk (call_frame_t *frame, xlator_t *this,
int32_t
quiesce_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *lock)
+ const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -2096,14 +2088,14 @@ quiesce_finodelk (call_frame_t *frame, xlator_t *this,
default_finodelk_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->finodelk,
- volume, fd, cmd, lock);
- return 0;
+ volume, fd, cmd, lock, xdata);
+ return 0;
}
stub = fop_finodelk_stub (frame, default_finodelk_resume,
- volume, fd, cmd, lock);
+ volume, fd, cmd, lock, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (finodelk, frame, -1, ENOMEM);
+ STACK_UNWIND_STRICT (finodelk, frame, -1, ENOMEM, NULL);
return 0;
}
@@ -2115,7 +2107,7 @@ quiesce_finodelk (call_frame_t *frame, xlator_t *this,
int32_t
quiesce_entrylk (call_frame_t *frame, xlator_t *this,
const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -2126,14 +2118,14 @@ quiesce_entrylk (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, default_entrylk_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->entrylk,
- volume, loc, basename, cmd, type);
- return 0;
+ volume, loc, basename, cmd, type, xdata);
+ return 0;
}
stub = fop_entrylk_stub (frame, default_entrylk_resume,
- volume, loc, basename, cmd, type);
+ volume, loc, basename, cmd, type, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (entrylk, frame, -1, ENOMEM);
+ STACK_UNWIND_STRICT (entrylk, frame, -1, ENOMEM, NULL);
return 0;
}
@@ -2145,7 +2137,7 @@ quiesce_entrylk (call_frame_t *frame, xlator_t *this,
int32_t
quiesce_fentrylk (call_frame_t *frame, xlator_t *this,
const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -2156,14 +2148,14 @@ quiesce_fentrylk (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, default_fentrylk_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fentrylk,
- volume, fd, basename, cmd, type);
- return 0;
+ volume, fd, basename, cmd, type, xdata);
+ return 0;
}
stub = fop_fentrylk_stub (frame, default_fentrylk_resume,
- volume, fd, basename, cmd, type);
+ volume, fd, basename, cmd, type, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (fentrylk, frame, -1, ENOMEM);
+ STACK_UNWIND_STRICT (fentrylk, frame, -1, ENOMEM, NULL);
return 0;
}
@@ -2176,7 +2168,7 @@ int32_t
quiesce_rchecksum (call_frame_t *frame,
xlator_t *this,
fd_t *fd, off_t offset,
- int32_t len)
+ int32_t len, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -2195,14 +2187,14 @@ quiesce_rchecksum (call_frame_t *frame,
quiesce_rchecksum_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rchecksum,
- fd, offset, len);
- return 0;
+ fd, offset, len, xdata);
+ return 0;
}
stub = fop_rchecksum_stub (frame, default_rchecksum_resume,
- fd, offset, len);
+ fd, offset, len, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (rchecksum, frame, -1, ENOMEM, 0, NULL);
+ STACK_UNWIND_STRICT (rchecksum, frame, -1, ENOMEM, 0, NULL, NULL);
return 0;
}
@@ -2217,7 +2209,7 @@ quiesce_readdir (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
size_t size,
- off_t off)
+ off_t off, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -2236,13 +2228,13 @@ quiesce_readdir (call_frame_t *frame,
quiesce_readdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readdir,
- fd, size, off);
- return 0;
+ fd, size, off, xdata);
+ return 0;
}
- stub = fop_readdir_stub (frame, default_readdir_resume, fd, size, off);
+ stub = fop_readdir_stub (frame, default_readdir_resume, fd, size, off, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (readdir, frame, -1, ENOMEM, NULL);
+ STACK_UNWIND_STRICT (readdir, frame, -1, ENOMEM, NULL, NULL);
return 0;
}
@@ -2257,7 +2249,7 @@ quiesce_readdirp (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
size_t size,
- off_t off)
+ off_t off, dict_t *dict)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -2270,19 +2262,21 @@ quiesce_readdirp (call_frame_t *frame,
local->fd = fd_ref (fd);
local->size = size;
local->offset = off;
+ local->dict = dict_ref (dict);
frame->local = local;
STACK_WIND (frame,
quiesce_readdirp_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readdirp,
- fd, size, off);
- return 0;
+ fd, size, off, dict);
+ return 0;
}
- stub = fop_readdirp_stub (frame, default_readdirp_resume, fd, size, off);
+ stub = fop_readdirp_stub (frame, default_readdirp_resume, fd, size,
+ off, dict);
if (!stub) {
- STACK_UNWIND_STRICT (readdirp, frame, -1, ENOMEM, NULL);
+ STACK_UNWIND_STRICT (readdirp, frame, -1, ENOMEM, NULL, NULL);
return 0;
}
@@ -2296,7 +2290,7 @@ quiesce_setattr (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
struct iatt *stbuf,
- int32_t valid)
+ int32_t valid, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -2308,14 +2302,14 @@ quiesce_setattr (call_frame_t *frame,
default_setattr_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->setattr,
- loc, stbuf, valid);
- return 0;
+ loc, stbuf, valid, xdata);
+ return 0;
}
stub = fop_setattr_stub (frame, default_setattr_resume,
- loc, stbuf, valid);
+ loc, stbuf, valid, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (setattr, frame, -1, ENOMEM, NULL, NULL);
+ STACK_UNWIND_STRICT (setattr, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
@@ -2328,7 +2322,7 @@ quiesce_setattr (call_frame_t *frame,
int32_t
quiesce_stat (call_frame_t *frame,
xlator_t *this,
- loc_t *loc)
+ loc_t *loc, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -2345,13 +2339,13 @@ quiesce_stat (call_frame_t *frame,
quiesce_stat_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->stat,
- loc);
- return 0;
+ loc, xdata);
+ return 0;
}
- stub = fop_stat_stub (frame, default_stat_resume, loc);
+ stub = fop_stat_stub (frame, default_stat_resume, loc, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (stat, frame, -1, ENOMEM, NULL);
+ STACK_UNWIND_STRICT (stat, frame, -1, ENOMEM, NULL, NULL);
return 0;
}
@@ -2382,9 +2376,8 @@ quiesce_lookup (call_frame_t *frame,
quiesce_lookup_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lookup,
- loc,
- xattr_req);
- return 0;
+ loc, xattr_req);
+ return 0;
}
stub = fop_lookup_stub (frame, default_lookup_resume, loc, xattr_req);
@@ -2404,7 +2397,7 @@ quiesce_fsetattr (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
struct iatt *stbuf,
- int32_t valid)
+ int32_t valid, dict_t *xdata)
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
@@ -2416,14 +2409,14 @@ quiesce_fsetattr (call_frame_t *frame,
default_fsetattr_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->fsetattr,
- fd, stbuf, valid);
- return 0;
+ fd, stbuf, valid, xdata);
+ return 0;
}
stub = fop_fsetattr_stub (frame, default_fsetattr_resume,
- fd, stbuf, valid);
+ fd, stbuf, valid, xdata);
if (!stub) {
- STACK_UNWIND_STRICT (fsetattr, frame, -1, ENOMEM, NULL, NULL);
+ STACK_UNWIND_STRICT (fsetattr, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
@@ -2499,7 +2492,7 @@ notify (xlator_t *this, int event, void *data, ...)
{
int ret = 0;
quiesce_priv_t *priv = NULL;
- struct timeval timeout = {0,};
+ struct timespec timeout = {0,};
priv = this->private;
if (!priv)
@@ -2532,7 +2525,7 @@ notify (xlator_t *this, int event, void *data, ...)
if (priv->timer)
break;
timeout.tv_sec = 20;
- timeout.tv_usec = 0;
+ timeout.tv_nsec = 0;
priv->timer = gf_timer_call_after (this->ctx,
timeout,
@@ -2606,12 +2599,10 @@ struct xlator_fops fops = {
};
-struct xlator_dumpops dumpops = {
-};
+struct xlator_dumpops dumpops;
-struct xlator_cbks cbks = {
-};
+struct xlator_cbks cbks;
struct volume_options options[] = {
diff --git a/xlators/features/quiesce/src/quiesce.h b/xlators/features/quiesce/src/quiesce.h
index 32b1935d9..878ed77e9 100644
--- a/xlators/features/quiesce/src/quiesce.h
+++ b/xlators/features/quiesce/src/quiesce.h
@@ -1,23 +1,13 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __QUIESCE_H__
#define __QUIESCE_H__
@@ -25,7 +15,7 @@
#include "xlator.h"
#include "timer.h"
-#define GF_FOPS_EXPECTED_IN_PARALLEL 4096
+#define GF_FOPS_EXPECTED_IN_PARALLEL 512
typedef struct {
gf_timer_t *timer;
@@ -55,6 +45,7 @@ typedef struct {
entrylk_type type;
gf_xattrop_flags_t xattrop_flags;
int32_t wbflags;
+ uint32_t io_flag;
} quiesce_local_t;
#endif
diff --git a/xlators/features/quota/src/Makefile.am b/xlators/features/quota/src/Makefile.am
index 4baa5f06e..7165adc59 100644
--- a/xlators/features/quota/src/Makefile.am
+++ b/xlators/features/quota/src/Makefile.am
@@ -1,16 +1,22 @@
-xlator_LTLIBRARIES = quota.la
+xlator_LTLIBRARIES = quota.la quotad.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
-quota_la_LDFLAGS = -module -avoidversion
+quota_la_LDFLAGS = -module -avoid-version
+quotad_la_LDFLAGS = -module -avoid-version
-quota_la_SOURCES = quota.c
+quota_la_SOURCES = quota.c quota-enforcer-client.c
quota_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = quota-mem-types.h quota.h
+quotad_la_SOURCES = quotad.c quotad-helpers.c quotad-aggregator.c
+quotad_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) \
- -I$(top_srcdir)/xlators/cluster/dht/src
+noinst_HEADERS = quota-mem-types.h quota.h quotad-aggregator.h quotad-helpers.h
-CLEANFILES =
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/xlators/cluster/dht/src -I$(top_srcdir)/rpc/xdr/src/ \
+ -I$(top_srcdir)/rpc/rpc-lib/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/quota/src/quota-enforcer-client.c b/xlators/features/quota/src/quota-enforcer-client.c
new file mode 100644
index 000000000..7d8ab937d
--- /dev/null
+++ b/xlators/features/quota/src/quota-enforcer-client.c
@@ -0,0 +1,403 @@
+/*
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <stdio.h>
+#include <string.h>
+#include <netinet/in.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/resource.h>
+#include <sys/file.h>
+#include <netdb.h>
+#include <signal.h>
+#include <libgen.h>
+
+#include <sys/utsname.h>
+
+#include <stdint.h>
+#include <pthread.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <time.h>
+#include <semaphore.h>
+#include <errno.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef HAVE_MALLOC_H
+#include <malloc.h>
+#endif
+
+#ifdef HAVE_MALLOC_STATS
+#ifdef DEBUG
+#include <mcheck.h>
+#endif
+#endif
+
+#include "quota.h"
+
+extern struct rpc_clnt_program quota_enforcer_clnt;
+
+int32_t
+quota_validate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent);
+
+int
+quota_enforcer_submit_request (void *req, call_frame_t *frame,
+ rpc_clnt_prog_t *prog,
+ int procnum, struct iobref *iobref,
+ xlator_t *this, fop_cbk_fn_t cbkfn,
+ xdrproc_t xdrproc)
+{
+ int ret = -1;
+ int count = 0;
+ struct iovec iov = {0, };
+ struct iobuf *iobuf = NULL;
+ char new_iobref = 0;
+ ssize_t xdr_size = 0;
+ quota_priv_t *priv = NULL;
+
+ GF_ASSERT (this);
+
+ priv = this->private;
+
+ if (req) {
+ xdr_size = xdr_sizeof (xdrproc, req);
+ iobuf = iobuf_get2 (this->ctx->iobuf_pool, xdr_size);
+ if (!iobuf) {
+ goto out;
+ }
+
+ if (!iobref) {
+ iobref = iobref_new ();
+ if (!iobref) {
+ goto out;
+ }
+
+ new_iobref = 1;
+ }
+
+ iobref_add (iobref, iobuf);
+
+ iov.iov_base = iobuf->ptr;
+ iov.iov_len = iobuf_size (iobuf);
+
+ /* Create the xdr payload */
+ ret = xdr_serialize_generic (iov, req, xdrproc);
+ if (ret == -1) {
+ goto out;
+ }
+ iov.iov_len = ret;
+ count = 1;
+ }
+
+ /* Send the msg */
+ ret = rpc_clnt_submit (priv->rpc_clnt, prog, procnum, cbkfn,
+ &iov, count,
+ NULL, 0, iobref, frame, NULL, 0, NULL, 0, NULL);
+ ret = 0;
+
+out:
+ if (new_iobref)
+ iobref_unref (iobref);
+ if (iobuf)
+ iobuf_unref (iobuf);
+
+ return ret;
+}
+
+int
+quota_enforcer_lookup_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ quota_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ int ret = 0;
+ gfs3_lookup_rsp rsp = {0,};
+ struct iatt stbuf = {0,};
+ struct iatt postparent = {0,};
+ int op_errno = EINVAL;
+ dict_t *xdata = NULL;
+ inode_t *inode = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ frame = myframe;
+ local = frame->local;
+ inode = local->validate_loc.inode;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_lookup_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ op_errno = gf_error_to_errno (rsp.op_errno);
+ gf_stat_to_iatt (&rsp.postparent, &postparent);
+
+ if (rsp.op_ret == -1)
+ goto out;
+
+ rsp.op_ret = -1;
+ gf_stat_to_iatt (&rsp.stat, &stbuf);
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), rsp.op_ret,
+ op_errno, out);
+
+ if ((!uuid_is_null (inode->gfid))
+ && (uuid_compare (stbuf.ia_gfid, inode->gfid) != 0)) {
+ gf_log (frame->this->name, GF_LOG_DEBUG,
+ "gfid changed for %s", local->validate_loc.path);
+ rsp.op_ret = -1;
+ op_errno = ESTALE;
+ goto out;
+ }
+
+ rsp.op_ret = 0;
+
+out:
+ rsp.op_errno = op_errno;
+ if (rsp.op_ret == -1) {
+ /* any error other than ENOENT */
+ if (rsp.op_errno != ENOENT)
+ gf_log (this->name, GF_LOG_WARNING,
+ "remote operation failed: %s. Path: %s (%s)",
+ strerror (rsp.op_errno),
+ local->validate_loc.path,
+ loc_gfid_utoa (&local->validate_loc));
+ else
+ gf_log (this->name, GF_LOG_TRACE,
+ "not found on remote node");
+
+ }
+
+ local->validate_cbk (frame, NULL, this, rsp.op_ret, rsp.op_errno, inode,
+ &stbuf, xdata, &postparent);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+quota_enforcer_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata, fop_lookup_cbk_t validate_cbk)
+{
+ quota_local_t *local = NULL;
+ gfs3_lookup_req req = {{0,},};
+ int ret = 0;
+ int op_errno = ESTALE;
+ quota_priv_t *priv = NULL;
+
+ if (!frame || !this || !loc)
+ goto unwind;
+
+ local = frame->local;
+ local->validate_cbk = validate_cbk;
+
+ priv = this->private;
+
+ if (!(loc && loc->inode))
+ goto unwind;
+
+ if (!uuid_is_null (loc->inode->gfid))
+ memcpy (req.gfid, loc->inode->gfid, 16);
+ else
+ memcpy (req.gfid, loc->gfid, 16);
+
+ if (xdata) {
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata,
+ (&req.xdata.xdata_val),
+ req.xdata.xdata_len,
+ op_errno, unwind);
+ }
+
+ if (loc->name)
+ req.bname = (char *)loc->name;
+ else
+ req.bname = "";
+
+ ret = quota_enforcer_submit_request (&req, frame,
+ priv->quota_enforcer,
+ GF_AGGREGATOR_LOOKUP,
+ NULL, this,
+ quota_enforcer_lookup_cbk,
+ (xdrproc_t)xdr_gfs3_lookup_req);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+
+unwind:
+ validate_cbk (frame, NULL, this, -1, op_errno, NULL, NULL, NULL, NULL);
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+quota_enforcer_notify (struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data)
+{
+ xlator_t *this = NULL;
+ int ret = 0;
+
+ this = mydata;
+
+ switch (event) {
+ case RPC_CLNT_CONNECT:
+ {
+ gf_log (this->name, GF_LOG_TRACE, "got RPC_CLNT_CONNECT");
+ break;
+ }
+
+ case RPC_CLNT_DISCONNECT:
+ {
+ gf_log (this->name, GF_LOG_TRACE, "got RPC_CLNT_DISCONNECT");
+ break;
+ }
+
+ default:
+ gf_log (this->name, GF_LOG_TRACE,
+ "got some other RPC event %d", event);
+ ret = 0;
+ break;
+ }
+
+ return ret;
+}
+
+int
+quota_enforcer_blocking_connect (rpc_clnt_t *rpc)
+{
+ dict_t *options = NULL;
+ int ret = -1;
+
+ options = dict_new ();
+ if (options == NULL)
+ goto out;
+
+ ret = dict_set_str (options, "non-blocking-io", "no");
+ if (ret)
+ goto out;
+
+ rpc->conn.trans->reconfigure (rpc->conn.trans, options);
+
+ rpc_clnt_start (rpc);
+
+ ret = dict_set_str (options, "non-blocking-io", "yes");
+ if (ret)
+ goto out;
+
+ rpc->conn.trans->reconfigure (rpc->conn.trans, options);
+
+ ret = 0;
+out:
+ dict_unref (options);
+
+ return ret;
+}
+
+//Returns a started rpc_clnt. Creates a new rpc_clnt if quota_priv doesn't have
+//one already
+struct rpc_clnt *
+quota_enforcer_init (xlator_t *this, dict_t *options)
+{
+ struct rpc_clnt *rpc = NULL;
+ quota_priv_t *priv = NULL;
+ int ret = -1;
+
+ priv = this->private;
+ if (priv->rpc_clnt) {
+ gf_log (this->name, GF_LOG_TRACE, "quota enforcer clnt already "
+ "inited");
+ //Turns out to be a NOP if the clnt is already connected.
+ ret = quota_enforcer_blocking_connect (priv->rpc_clnt);
+ if (ret)
+ goto out;
+
+ return priv->rpc_clnt;
+ }
+
+ priv->quota_enforcer = &quota_enforcer_clnt;
+
+ ret = dict_set_str (options, "transport.address-family", "unix");
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (options, "transport-type", "socket");
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (options, "transport.socket.connect-path",
+ "/tmp/quotad.socket");
+ if (ret)
+ goto out;
+
+ rpc = rpc_clnt_new (options, this->ctx, this->name, 16);
+ if (!rpc) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = rpc_clnt_register_notify (rpc, quota_enforcer_notify, this);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "failed to register notify");
+ goto out;
+ }
+
+ ret = quota_enforcer_blocking_connect (rpc);
+ if (ret)
+ goto out;
+
+ ret = 0;
+out:
+ if (ret) {
+ if (rpc)
+ rpc_clnt_unref (rpc);
+ rpc = NULL;
+ }
+
+ return rpc;
+}
+
+struct rpc_clnt_procedure quota_enforcer_actors[GF_AGGREGATOR_MAXVALUE] = {
+ [GF_AGGREGATOR_NULL] = {"NULL", NULL},
+ [GF_AGGREGATOR_LOOKUP] = {"LOOKUP", NULL},
+};
+
+struct rpc_clnt_program quota_enforcer_clnt = {
+ .progname = "Quota enforcer",
+ .prognum = GLUSTER_AGGREGATOR_PROGRAM,
+ .progver = GLUSTER_AGGREGATOR_VERSION,
+ .numproc = GF_AGGREGATOR_MAXVALUE,
+ .proctable = quota_enforcer_actors,
+};
diff --git a/xlators/features/quota/src/quota-mem-types.h b/xlators/features/quota/src/quota-mem-types.h
index da28be5b3..97d916568 100644
--- a/xlators/features/quota/src/quota-mem-types.h
+++ b/xlators/features/quota/src/quota-mem-types.h
@@ -1,30 +1,19 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __QUOTA_MEM_TYPES_H__
#define __QUOTA_MEM_TYPES_H__
#include "mem-types.h"
enum gf_quota_mem_types_ {
- gf_quota_mt_quota_local_t = gf_common_mt_end + 1,
- gf_quota_mt_quota_priv_t,
+ gf_quota_mt_quota_priv_t = gf_common_mt_end + 1,
gf_quota_mt_quota_inode_ctx_t,
gf_quota_mt_loc_t,
gf_quota_mt_char,
@@ -32,6 +21,9 @@ enum gf_quota_mem_types_ {
gf_quota_mt_int32_t,
gf_quota_mt_limits_t,
gf_quota_mt_quota_dentry_t,
+ gf_quota_mt_quota_limits_level_t,
+ gf_quota_mt_qd_vols_conf_t,
+ gf_quota_mt_aggregator_state_t,
gf_quota_mt_end
};
#endif
diff --git a/xlators/features/quota/src/quota.c b/xlators/features/quota/src/quota.c
index 3b219c643..4beaae341 100644
--- a/xlators/features/quota/src/quota.c
+++ b/xlators/features/quota/src/quota.c
@@ -1,63 +1,111 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
+#include <fnmatch.h>
#include "quota.h"
#include "common-utils.h"
#include "defaults.h"
+#include "statedump.h"
int32_t
quota_check_limit (call_frame_t *frame, inode_t *inode, xlator_t *this,
char *name, uuid_t par);
+
+int
+quota_fill_inodectx (xlator_t *this, inode_t *inode, dict_t *dict,
+ loc_t *loc, struct iatt *buf, int32_t *op_errno);
+
struct volume_options options[];
+static int32_t
+__quota_init_inode_ctx (inode_t *inode, xlator_t *this,
+ quota_inode_ctx_t **context)
+{
+ int32_t ret = -1;
+ quota_inode_ctx_t *ctx = NULL;
+
+ if (inode == NULL) {
+ goto out;
+ }
+
+ QUOTA_ALLOC_OR_GOTO (ctx, quota_inode_ctx_t, out);
+
+ LOCK_INIT(&ctx->lock);
+
+ if (context != NULL) {
+ *context = ctx;
+ }
+
+ INIT_LIST_HEAD (&ctx->parents);
+
+ ret = __inode_ctx_put (inode, this, (uint64_t )(long)ctx);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "cannot set quota context in inode (gfid:%s)",
+ uuid_utoa (inode->gfid));
+ }
+out:
+ return ret;
+}
+
+
+static int32_t
+quota_inode_ctx_get (inode_t *inode, xlator_t *this,
+ quota_inode_ctx_t **ctx, char create_if_absent)
+{
+ int32_t ret = 0;
+ uint64_t ctx_int;
+
+ LOCK (&inode->lock);
+ {
+ ret = __inode_ctx_get (inode, this, &ctx_int);
+
+ if ((ret == 0) && (ctx != NULL)) {
+ *ctx = (quota_inode_ctx_t *) (unsigned long)ctx_int;
+ } else if (create_if_absent) {
+ ret = __quota_init_inode_ctx (inode, this, ctx);
+ }
+ }
+ UNLOCK (&inode->lock);
+
+ return ret;
+}
+
int
quota_loc_fill (loc_t *loc, inode_t *inode, inode_t *parent, char *path)
{
int ret = -1;
- if (!loc) {
+ if (!loc || (inode == NULL))
return ret;
- }
if (inode) {
loc->inode = inode_ref (inode);
+ uuid_copy (loc->gfid, inode->gfid);
}
if (parent) {
loc->parent = inode_ref (parent);
}
- loc->path = gf_strdup (path);
- if (!loc->path) {
- goto loc_wipe;
- }
+ if (path != NULL) {
+ loc->path = gf_strdup (path);
- loc->name = strrchr (loc->path, '/');
- if (loc->name) {
- loc->name++;
- } else {
- goto loc_wipe;
+ loc->name = strrchr (loc->path, '/');
+ if (loc->name) {
+ loc->name++;
+ }
}
ret = 0;
-loc_wipe:
if (ret < 0) {
loc_wipe (loc);
}
@@ -87,19 +135,17 @@ quota_inode_loc_fill (inode_t *inode, loc_t *loc)
parent = inode_parent (inode, 0, NULL);
if (!parent) {
- gf_log (this->name, GF_LOG_WARNING,
+ gf_log (this->name, GF_LOG_DEBUG,
"cannot find parent for inode (gfid:%s)",
uuid_utoa (inode->gfid));
- goto err;
}
ignore_parent:
ret = inode_path (inode, NULL, &resolvedpath);
if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
+ gf_log (this->name, GF_LOG_DEBUG,
"cannot construct path for inode (gfid:%s)",
uuid_utoa (inode->gfid));
- goto err;
}
ret = quota_loc_fill (loc, inode, parent, resolvedpath);
@@ -134,19 +180,24 @@ quota_local_cleanup (xlator_t *this, quota_local_t *local)
inode_unref (local->inode);
LOCK_DESTROY (&local->lock);
+ mem_put (local);
out:
return 0;
}
-quota_local_t *
+static inline quota_local_t *
quota_local_new ()
{
- quota_local_t *local = NULL;
- GF_UNUSED int32_t ret = 0;
+ quota_local_t *local = NULL;
+ local = mem_get0 (THIS->local_pool);
+ if (local == NULL)
+ goto out;
- QUOTA_LOCAL_ALLOC_OR_GOTO (local, quota_local_t, err);
-err:
+ LOCK_INIT (&local->lock);
+ local->space_available = -1;
+
+out:
return local;
}
@@ -164,12 +215,15 @@ __quota_dentry_new (quota_inode_ctx_t *ctx, char *name, uuid_t par)
dentry->name = gf_strdup (name);
if (dentry->name == NULL) {
GF_FREE (dentry);
+ dentry = NULL;
goto err;
}
uuid_copy (dentry->par, par);
- list_add_tail (&dentry->next, &ctx->parents);
+ if (ctx != NULL)
+ list_add_tail (&dentry->next, &ctx->parents);
+
err:
return dentry;
}
@@ -190,18 +244,64 @@ out:
return;
}
+inline void
+quota_link_count_decrement (quota_local_t *local)
+{
+ call_stub_t *stub = NULL;
+ int link_count = -1;
+
+ if (local == NULL)
+ goto out;
+
+ LOCK (&local->lock);
+ {
+ link_count = --local->link_count;
+ if (link_count == 0) {
+ stub = local->stub;
+ local->stub = NULL;
+ }
+ }
+ UNLOCK (&local->lock);
+
+ if (stub != NULL) {
+ call_resume (stub);
+ }
+out:
+ return;
+}
+
+inline void
+quota_handle_validate_error (quota_local_t *local, int32_t op_ret,
+ int32_t op_errno)
+{
+ if (local == NULL)
+ goto out;
+
+ LOCK (&local->lock);
+ {
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ }
+ }
+ UNLOCK (&local->lock);
+
+ /* we abort checking limits on this path to root */
+ quota_link_count_decrement (local);
+out:
+ return;
+}
int32_t
quota_validate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
{
- quota_local_t *local = NULL;
- uint32_t validate_count = 0, link_count = 0;
- int32_t ret = 0;
- quota_inode_ctx_t *ctx = NULL;
- int64_t *size = 0;
- uint64_t value = 0;
- call_stub_t *stub = NULL;
+ quota_local_t *local = NULL;
+ int32_t ret = 0;
+ quota_inode_ctx_t *ctx = NULL;
+ int64_t *size = 0;
+ uint64_t value = 0;
local = frame->local;
@@ -213,7 +313,7 @@ quota_validate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
GF_ASSERT (frame);
GF_VALIDATE_OR_GOTO_WITH_ERROR ("quota", this, unwind, op_errno,
EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, dict, unwind, op_errno,
+ GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, xdata, unwind, op_errno,
EINVAL);
ret = inode_ctx_get (local->validate_loc.inode, this, &value);
@@ -227,7 +327,7 @@ quota_validate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto unwind;
}
- ret = dict_get_bin (dict, QUOTA_SIZE_KEY, (void **) &size);
+ ret = dict_get_bin (xdata, QUOTA_SIZE_KEY, (void **) &size);
if (ret < 0) {
gf_log (this->name, GF_LOG_WARNING,
"size key not present in dict");
@@ -246,29 +346,11 @@ quota_validate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
UNLOCK (&ctx->lock);
- quota_check_limit (frame, local->validate_loc.inode, this, NULL, 0);
+ quota_check_limit (frame, local->validate_loc.inode, this, NULL, NULL);
return 0;
unwind:
- LOCK (&local->lock);
- {
- local->op_ret = -1;
- local->op_errno = op_errno;
-
- validate_count = --local->validate_count;
- link_count = local->link_count;
-
- if ((validate_count == 0) && (link_count == 0)) {
- stub = local->stub;
- local->stub = NULL;
- }
- }
- UNLOCK (&local->lock);
-
- if (stub != NULL) {
- call_resume (stub);
- }
-
+ quota_handle_validate_error (local, op_ret, op_errno);
return 0;
}
@@ -295,340 +377,641 @@ quota_timeout (struct timeval *tv, int32_t timeout)
return timed_out;
}
+inline void
+quota_add_parent (quota_dentry_t *dentry, struct list_head *list)
+{
+ quota_dentry_t *entry = NULL;
+ gf_boolean_t found = _gf_false;
+
+ if ((dentry == NULL) || (list == NULL)) {
+ goto out;
+ }
+
+ list_for_each_entry (entry, list, next) {
+ if (uuid_compare (dentry->par, entry->par) == 0) {
+ found = _gf_true;
+ goto out;
+ }
+ }
+
+ list_add_tail (&dentry->next, list);
+
+out:
+ return;
+}
int32_t
-quota_check_limit (call_frame_t *frame, inode_t *inode, xlator_t *this,
- char *name, uuid_t par)
+quota_build_ancestry_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ gf_dirent_t *entries, dict_t *xdata)
{
- int32_t ret = -1;
- inode_t *_inode = NULL, *parent = NULL;
- quota_inode_ctx_t *ctx = NULL;
- quota_priv_t *priv = NULL;
- quota_local_t *local = NULL;
- char need_validate = 0, need_unwind = 0;
- int64_t delta = 0;
- call_stub_t *stub = NULL;
- int32_t validate_count = 0, link_count = 0;
- uint64_t value = 0;
- char just_validated = 0;
- uuid_t trav_uuid = {0,};
-
- GF_VALIDATE_OR_GOTO ("quota", this, out);
- GF_VALIDATE_OR_GOTO (this->name, frame, out);
- GF_VALIDATE_OR_GOTO (this->name, inode, out);
+ inode_t *parent = NULL, *tmp_parent = NULL;
+ gf_dirent_t *entry = NULL;
+ loc_t loc = {0, };
+ quota_dentry_t *dentry = NULL, *tmp = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ struct list_head parents = {0, };
+ quota_local_t *local = NULL;
+
+ INIT_LIST_HEAD (&parents);
local = frame->local;
- GF_VALIDATE_OR_GOTO (this->name, local, out);
+ frame->local = NULL;
- delta = local->delta;
+ if (op_ret < 0)
+ goto err;
- GF_VALIDATE_OR_GOTO (this->name, local->stub, out);
+ parent = inode_parent (local->loc.inode, 0, NULL);
+ if (parent == NULL) {
+ gf_log (this->name, GF_LOG_WARNING, "parent is NULL");
+ op_errno = EINVAL;
+ goto err;
+ }
- priv = this->private;
+ if ((op_ret > 0) && (entries != NULL)) {
+ list_for_each_entry (entry, &entries->list, list) {
+ if (__is_root_gfid (entry->inode->gfid)) {
+ /* The list contains a sub-list for each
+ * possible path to the target inode. Each
+ * sub-list starts with the root entry of the
+ * tree and is followed by the child entries
+ * for a particular path to the target entry.
+ * The root entry is an implied sub-list
+ * delimiter, as it denotes we have started
+ * processing a new path. Reset the parent
+ * pointer and continue
+ */
+
+ tmp_parent = NULL;
+ }
- inode_ctx_get (inode, this, &value);
- ctx = (quota_inode_ctx_t *)(unsigned long)value;
+ uuid_copy (loc.gfid, entry->d_stat.ia_gfid);
- _inode = inode_ref (inode);
+ loc.inode = inode_ref (entry->inode);
+ loc.parent = inode_ref (tmp_parent);
+ loc.name = entry->d_name;
- LOCK (&local->lock);
- {
- just_validated = local->just_validated;
- local->just_validated = 0;
+ quota_fill_inodectx (this, entry->inode, entry->dict,
+ &loc, &entry->d_stat, &op_errno);
- if (just_validated) {
- local->validate_count--;
+ tmp_parent = entry->inode;
+
+ loc_wipe (&loc);
}
}
- UNLOCK (&local->lock);
- uuid_copy (trav_uuid, par);
+ quota_inode_ctx_get (local->loc.inode, this, &ctx, 0);
- do {
- if (ctx != NULL) {
- LOCK (&ctx->lock);
- {
- if (ctx->limit >= 0) {
- if (!just_validated
- && quota_timeout (&ctx->tv,
- priv->timeout)) {
- need_validate = 1;
- } else if ((ctx->size + delta)
- >= ctx->limit) {
- local->op_ret = -1;
- local->op_errno = EDQUOT;
- need_unwind = 1;
- }
- }
- }
- UNLOCK (&ctx->lock);
+ if (ctx != NULL) {
+ LOCK (&ctx->lock);
+ {
+ list_for_each_entry (dentry, &ctx->parents, next) {
+ /* we built ancestry for a non-directory */
+ tmp = __quota_dentry_new (NULL, dentry->name,
+ dentry->par);
+ quota_add_parent (tmp, &parents);
- if (need_validate) {
- goto validate;
+ if (list_empty (&tmp->next)) {
+ __quota_dentry_free (tmp);
+ tmp = NULL;
+ }
}
+ }
+ UNLOCK (&ctx->lock);
+ }
- if (need_unwind) {
+ if (list_empty (&parents)) {
+ /* we built ancestry for a directory */
+ list_for_each_entry (entry, &entries->list, list) {
+ if (entry->inode == local->loc.inode)
break;
- }
}
- if (__is_root_gfid (_inode->gfid)) {
- break;
- }
+ GF_ASSERT (&entry->list != &entries->list);
- parent = inode_parent (_inode, trav_uuid, name);
+ tmp = __quota_dentry_new (NULL, entry->d_name, parent->gfid);
+ quota_add_parent (tmp, &parents);
+ }
- if (name != NULL) {
- name = NULL;
- uuid_clear (trav_uuid);
- }
+ local->ancestry_cbk (&parents, local->loc.inode, 0, 0,
+ local->ancestry_data);
+ goto cleanup;
- if (parent == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "cannot find parent for inode (gfid:%s), hence "
- "aborting enforcing quota-limits and continuing"
- " with the fop", uuid_utoa (_inode->gfid));
- }
+err:
+ local->ancestry_cbk (NULL, NULL, -1, op_errno, local->ancestry_data);
- inode_unref (_inode);
- _inode = parent;
- just_validated = 0;
+cleanup:
+ STACK_DESTROY (frame->root);
+ quota_local_cleanup (this, local);
- if (_inode == NULL) {
- break;
- }
+ if (parent != NULL) {
+ inode_unref (parent);
+ parent = NULL;
+ }
- value = 0;
- inode_ctx_get (_inode, this, &value);
- ctx = (quota_inode_ctx_t *)(unsigned long)value;
- } while (1);
+ list_for_each_entry_safe (dentry, tmp, &parents, next) {
+ __quota_dentry_free (dentry);
+ }
- ret = 0;
+ return 0;
+}
- if (_inode != NULL) {
- inode_unref (_inode);
+int32_t
+quota_build_ancestry_open_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ fd_t *fd, dict_t *xdata)
+{
+ dict_t *xdata_req = NULL;
+ quota_local_t *local = NULL;
+
+ if (op_ret < 0) {
+ goto err;
}
- LOCK (&local->lock);
- {
- validate_count = local->validate_count;
- link_count = local->link_count;
- if ((validate_count == 0) && (link_count == 0)) {
- stub = local->stub;
- local->stub = NULL;
- }
+ xdata_req = dict_new ();
+ if (xdata_req == NULL) {
+ op_ret = -ENOMEM;
+ goto err;
}
- UNLOCK (&local->lock);
- if (stub != NULL) {
- call_resume (stub);
+ op_ret = dict_set_int8 (xdata_req, QUOTA_LIMIT_KEY, 1);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto err;
}
-out:
- return ret;
+ op_ret = dict_set_int8 (xdata_req, GET_ANCESTRY_DENTRY_KEY, 1);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto err;
+ }
+
+ /* This would ask posix layer to construct dentry chain till root */
+ STACK_WIND (frame, quota_build_ancestry_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, 0, 0, xdata_req);
+
+ op_ret = 0;
+
+err:
+ fd_unref (fd);
+
+ dict_unref (xdata_req);
+
+ if (op_ret < 0) {
+ local = frame->local;
+ frame->local = NULL;
+
+ local->ancestry_cbk (NULL, NULL, -1, op_errno,
+ local->ancestry_data);
+ quota_local_cleanup (this, local);
+ STACK_DESTROY (frame->root);
+ }
+
+ return 0;
+}
+
+int
+quota_build_ancestry (inode_t *inode, quota_ancestry_built_t ancestry_cbk,
+ void *data)
+{
+ loc_t loc = {0, };
+ fd_t *fd = NULL;
+ quota_local_t *local = NULL;
+ call_frame_t *new_frame = NULL;
+ int op_errno = EINVAL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ loc.inode = inode_ref (inode);
+ uuid_copy (loc.gfid, inode->gfid);
+
+ fd = fd_create (inode, 0);
+
+ new_frame = create_frame (this, this->ctx->pool);
+ if (new_frame == NULL) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ new_frame->root->uid = new_frame->root->gid = 0;
+
+ local = quota_local_new ();
+ if (local == NULL) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ new_frame->local = local;
+ local->ancestry_cbk = ancestry_cbk;
+ local->ancestry_data = data;
+ local->loc.inode = inode_ref (inode);
+
+ if (IA_ISDIR (inode->ia_type)) {
+ STACK_WIND (new_frame, quota_build_ancestry_open_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->opendir, &loc, fd,
+ NULL);
+ } else {
+ STACK_WIND (new_frame, quota_build_ancestry_open_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, &loc, 0, fd,
+ NULL);
+ }
+
+ loc_wipe (&loc);
+ return 0;
+
+err:
+ ancestry_cbk (NULL, NULL, -1, op_errno, data);
+
+ fd_unref (fd);
+
+ local = new_frame->local;
+ new_frame->local = NULL;
+
+ if (local != NULL) {
+ quota_local_cleanup (this, local);
+ }
+
+ if (new_frame != NULL) {
+ STACK_DESTROY (new_frame->root);
+ }
+
+ loc_wipe (&loc);
+ return 0;
+}
+
+int
+quota_validate (call_frame_t *frame, inode_t *inode, xlator_t *this,
+ fop_lookup_cbk_t cbk_fn)
+{
+ quota_local_t *local = NULL;
+ int ret = 0;
+ dict_t *xdata = NULL;
+ quota_priv_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
-validate:
LOCK (&local->lock);
{
loc_wipe (&local->validate_loc);
- if (just_validated) {
- local->validate_count--;
- }
-
- local->validate_count++;
- ret = quota_inode_loc_fill (_inode, &local->validate_loc);
+ ret = quota_inode_loc_fill (inode, &local->validate_loc);
if (ret < 0) {
gf_log (this->name, GF_LOG_WARNING,
"cannot fill loc for inode (gfid:%s), hence "
"aborting quota-checks and continuing with fop",
- uuid_utoa (_inode->gfid));
- local->validate_count--;
+ uuid_utoa (inode->gfid));
}
}
UNLOCK (&local->lock);
if (ret < 0) {
- goto loc_fill_failed;
+ ret = -ENOMEM;
+ goto err;
}
- STACK_WIND (frame, quota_validate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr, &local->validate_loc,
- QUOTA_SIZE_KEY);
+ xdata = dict_new ();
+ if (xdata == NULL) {
+ ret = -ENOMEM;
+ goto err;
+ }
-loc_fill_failed:
- inode_unref (_inode);
- return 0;
-}
+ ret = dict_set_int8 (xdata, QUOTA_SIZE_KEY, 1);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING, "dict set failed");
+ ret = -ENOMEM;
+ goto err;
+ }
+ ret = dict_set_str (xdata, "volume-uuid", priv->volume_uuid);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING, "dict set failed");
+ ret = -ENOMEM;
+ goto err;
+ }
-int32_t
-quota_get_limit_value (inode_t *inode, xlator_t *this, int64_t *n)
+ ret = quota_enforcer_lookup (frame, this, &local->validate_loc, xdata,
+ cbk_fn);
+ if (ret < 0) {
+ ret = -ENOTCONN;
+ goto err;
+ }
+
+ ret = 0;
+err:
+ return ret;
+}
+
+void
+quota_check_limit_continuation (struct list_head *parents, inode_t *inode,
+ int32_t op_ret, int32_t op_errno, void *data)
{
- int32_t ret = 0;
- char *path = NULL;
- limits_t *limit_node = NULL;
- quota_priv_t *priv = NULL;
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ quota_local_t *local = NULL;
+ quota_dentry_t *entry = NULL;
+ inode_t *parent = NULL;
+ int parent_count = 0;
+
+ frame = data;
+ local = frame->local;
+ this = THIS;
- if (inode == NULL || n == NULL) {
- ret = -1;
+ if ((op_ret < 0) || list_empty (parents)) {
+ if (op_ret >= 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Couldn't build ancestry for inode (gfid:%s). "
+ "Without knowing ancestors till root, quota "
+ "cannot be enforced. "
+ "Hence, failing fop with EIO",
+ uuid_utoa (inode->gfid));
+ op_errno = EIO;
+ }
+
+ quota_handle_validate_error (local, -1, op_errno);
goto out;
}
- *n = 0;
+ list_for_each_entry (entry, parents, next) {
+ parent_count++;
+ }
- ret = inode_path (inode, NULL, &path);
- if (ret < 0) {
- ret = -1;
- goto out;
+ LOCK (&local->lock);
+ {
+ local->link_count += (parent_count - 1);
}
+ UNLOCK (&local->lock);
- priv = this->private;
+ list_for_each_entry (entry, parents, next) {
+ parent = inode_find (inode->table, entry->par);
- list_for_each_entry (limit_node, &priv->limit_head, limit_list) {
- if (strcmp (limit_node->path, path) == 0) {
- *n = limit_node->value;
- break;
- }
+ quota_check_limit (frame, parent, this, NULL, NULL);
}
out:
- return ret;
+ return;
}
-
-static int32_t
-__quota_init_inode_ctx (inode_t *inode, int64_t limit, xlator_t *this,
- dict_t *dict, struct iatt *buf,
- quota_inode_ctx_t **context)
+int32_t
+quota_check_limit (call_frame_t *frame, inode_t *inode, xlator_t *this,
+ char *name, uuid_t par)
{
- int32_t ret = -1;
- int64_t *size = 0;
- quota_inode_ctx_t *ctx = NULL;
+ int32_t ret = -1, op_errno = EINVAL;
+ inode_t *_inode = NULL, *parent = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_priv_t *priv = NULL;
+ quota_local_t *local = NULL;
+ char need_validate = 0;
+ gf_boolean_t hard_limit_exceeded = 0;
+ int64_t delta = 0, wouldbe_size = 0;
+ int64_t space_available = 0;
+ uint64_t value = 0;
+ char just_validated = 0;
+ uuid_t trav_uuid = {0,};
+ uint32_t timeout = 0;
+
+ GF_VALIDATE_OR_GOTO ("quota", this, err);
+ GF_VALIDATE_OR_GOTO (this->name, frame, err);
+ GF_VALIDATE_OR_GOTO (this->name, inode, err);
+
+ local = frame->local;
+ GF_VALIDATE_OR_GOTO (this->name, local, err);
- if (inode == NULL) {
- goto out;
+ delta = local->delta;
+
+ GF_VALIDATE_OR_GOTO (this->name, local->stub, err);
+ /* Allow all the trusted clients
+ * Don't block the gluster internal processes like rebalance, gsyncd,
+ * self heal etc from the disk quotas.
+ *
+ * Method: Allow all the clients with PID negative. This is by the
+ * assumption that any kernel assigned pid doesn't have the negative
+ * number.
+ */
+ if (0 > frame->root->pid) {
+ ret = 0;
+ quota_link_count_decrement (local);
+ goto done;
}
- QUOTA_ALLOC_OR_GOTO (ctx, quota_inode_ctx_t, out);
+ priv = this->private;
- ctx->limit = limit;
- if (buf)
- ctx->buf = *buf;
+ inode_ctx_get (inode, this, &value);
+ ctx = (quota_inode_ctx_t *)(unsigned long)value;
- LOCK_INIT(&ctx->lock);
+ _inode = inode_ref (inode);
- if (context != NULL) {
- *context = ctx;
+ LOCK (&local->lock);
+ {
+ just_validated = local->just_validated;
+ local->just_validated = 0;
}
+ UNLOCK (&local->lock);
- INIT_LIST_HEAD (&ctx->parents);
+ if ( par != NULL ) {
+ uuid_copy (trav_uuid, par);
+ }
- if (dict != NULL) {
- ret = dict_get_bin (dict, QUOTA_SIZE_KEY, (void **) &size);
- if (ret == 0) {
- ctx->size = ntoh64 (*size);
- gettimeofday (&ctx->tv, NULL);
+ do {
+ if (ctx != NULL && (ctx->hard_lim > 0 || ctx->soft_lim > 0)) {
+ wouldbe_size = ctx->size + delta;
+
+ LOCK (&ctx->lock);
+ {
+ timeout = priv->soft_timeout;
+
+ if ((ctx->soft_lim >= 0)
+ && (wouldbe_size > ctx->soft_lim)) {
+ timeout = priv->hard_timeout;
+ }
+
+ if (!just_validated
+ && quota_timeout (&ctx->tv, timeout)) {
+ need_validate = 1;
+ } else if (wouldbe_size >= ctx->hard_lim) {
+ hard_limit_exceeded = 1;
+ }
+ }
+ UNLOCK (&ctx->lock);
+
+ if (need_validate) {
+ ret = quota_validate (frame, _inode, this,
+ quota_validate_cbk);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto err;
+ }
+
+ break;
+ }
+
+ if (hard_limit_exceeded) {
+ local->op_ret = -1;
+ local->op_errno = EDQUOT;
+
+ space_available = ctx->hard_lim - ctx->size;
+
+ if (space_available < 0)
+ space_available = 0;
+
+ if ((local->space_available < 0)
+ || (local->space_available
+ > space_available)){
+ local->space_available
+ = space_available;
+
+ }
+
+ if (space_available == 0) {
+ op_errno = EDQUOT;
+ goto err;
+ }
+ }
+
+ /* We log usage only if quota limit is configured on
+ that inode. */
+ quota_log_usage (this, ctx, _inode, delta);
}
- }
- ret = __inode_ctx_put (inode, this, (uint64_t )(long)ctx);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "cannot set quota context in inode (gfid:%s)",
- uuid_utoa (inode->gfid));
- }
-out:
- return ret;
-}
+ if (__is_root_gfid (_inode->gfid)) {
+ quota_link_count_decrement (local);
+ break;
+ }
+ parent = inode_parent (_inode, trav_uuid, name);
-static int32_t
-quota_inode_ctx_get (inode_t *inode, int64_t limit, xlator_t *this,
- dict_t *dict, struct iatt *buf, quota_inode_ctx_t **ctx,
- char create_if_absent)
-{
- int32_t ret = 0;
- uint64_t ctx_int;
+ if (name != NULL) {
+ name = NULL;
+ uuid_clear (trav_uuid);
+ }
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &ctx_int);
+ if (parent == NULL) {
+ ret = quota_build_ancestry (_inode,
+ quota_check_limit_continuation,
+ frame);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto err;
+ }
- if ((ret == 0) && (ctx != NULL)) {
- *ctx = (quota_inode_ctx_t *) (unsigned long)ctx_int;
- } else if (create_if_absent) {
- ret = __quota_init_inode_ctx (inode, limit, this, dict,
- buf, ctx);
+ break;
+ }
+
+ inode_unref (_inode);
+ _inode = parent;
+ just_validated = 0;
+
+ if (_inode == NULL) {
+ break;
}
+
+ value = 0;
+ inode_ctx_get (_inode, this, &value);
+ ctx = (quota_inode_ctx_t *)(unsigned long)value;
+ } while (1);
+
+ if (_inode != NULL) {
+ inode_unref (_inode);
+ _inode = NULL;
}
- UNLOCK (&inode->lock);
- return ret;
+done:
+ return 0;
+
+err:
+ quota_handle_validate_error (local, -1, op_errno);
+
+ inode_unref (_inode);
+ return 0;
}
+inline int
+quota_get_limits (xlator_t *this, dict_t *dict, int64_t *hard_lim,
+ int64_t *soft_lim)
+{
+ quota_limit_t *limit = NULL;
+ quota_priv_t *priv = NULL;
+ int64_t soft_lim_percent = 0, *ptr = NULL;
+ int ret = 0;
-int32_t
-quota_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *dict, struct iatt *postparent)
+ if ((this == NULL) || (dict == NULL) || (hard_lim == NULL)
+ || (soft_lim == NULL))
+ goto out;
+
+ priv = this->private;
+
+ ret = dict_get_bin (dict, QUOTA_LIMIT_KEY, (void **) &ptr);
+ limit = (quota_limit_t *)ptr;
+
+ if (limit) {
+ *hard_lim = ntoh64 (limit->hard_lim);
+ soft_lim_percent = ntoh64 (limit->soft_lim_percent);
+ }
+
+ if (soft_lim_percent < 0) {
+ soft_lim_percent = priv->default_soft_lim;
+ }
+
+ if ((*hard_lim > 0) && (soft_lim_percent > 0)) {
+ *soft_lim = (soft_lim_percent * (*hard_lim))/100;
+ }
+
+out:
+ return 0;
+}
+
+int
+quota_fill_inodectx (xlator_t *this, inode_t *inode, dict_t *dict,
+ loc_t *loc, struct iatt *buf, int32_t *op_errno)
{
- int32_t ret = -1;
- char found = 0;
- quota_local_t *local = NULL;
- quota_inode_ctx_t *ctx = NULL;
- quota_dentry_t *dentry = NULL;
- int64_t *size = 0;
- uint64_t value = 0;
+ int32_t ret = -1;
+ char found = 0;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_dentry_t *dentry = NULL;
+ uint64_t value = 0;
+ int64_t hard_lim = -1, soft_lim = -1;
- local = frame->local;
+ quota_get_limits (this, dict, &hard_lim, &soft_lim);
inode_ctx_get (inode, this, &value);
ctx = (quota_inode_ctx_t *)(unsigned long)value;
- if ((op_ret < 0) || (local == NULL)
- || (((ctx == NULL) || (ctx->limit == local->limit))
- && (local->limit < 0) && !((IA_ISREG (buf->ia_type))
- || (IA_ISLNK (buf->ia_type))))) {
- goto unwind;
+ if ((((ctx == NULL) || (ctx->hard_lim == hard_lim))
+ && (hard_lim < 0) && !QUOTA_REG_OR_LNK_FILE (buf->ia_type))) {
+ ret = 0;
+ goto out;
}
- ret = quota_inode_ctx_get (local->loc.inode, local->limit, this, dict,
- buf, &ctx, 1);
+ ret = quota_inode_ctx_get (inode, this, &ctx, 1);
if ((ret == -1) || (ctx == NULL)) {
gf_log (this->name, GF_LOG_WARNING, "cannot create quota "
"context in inode(gfid:%s)",
- uuid_utoa (local->loc.inode->gfid));
- op_ret = -1;
- op_errno = ENOMEM;
- goto unwind;
+ uuid_utoa (inode->gfid));
+ ret = -1;
+ *op_errno = ENOMEM;
+ goto out;
}
LOCK (&ctx->lock);
{
-
- if (dict != NULL) {
- ret = dict_get_bin (dict, QUOTA_SIZE_KEY,
- (void **) &size);
- if (ret == 0) {
- ctx->size = ntoh64 (*size);
- gettimeofday (&ctx->tv, NULL);
- }
- }
-
- if (local->limit != ctx->limit) {
- ctx->limit = local->limit;
- }
+ ctx->hard_lim = hard_lim;
+ ctx->soft_lim = soft_lim;
ctx->buf = *buf;
- if (!(IA_ISREG (buf->ia_type) || IA_ISLNK (buf->ia_type))) {
+ if (!QUOTA_REG_OR_LNK_FILE (buf->ia_type)) {
goto unlock;
}
+ if (loc->name == NULL)
+ goto unlock;
+
list_for_each_entry (dentry, &ctx->parents, next) {
- if ((strcmp (dentry->name, local->loc.name) == 0) &&
- (uuid_compare (local->loc.parent->gfid,
+ if ((strcmp (dentry->name, loc->name) == 0) &&
+ (uuid_compare (loc->parent->gfid,
dentry->par) == 0)) {
found = 1;
break;
@@ -637,18 +1020,18 @@ quota_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (!found) {
dentry = __quota_dentry_new (ctx,
- (char *)local->loc.name,
- local->loc.parent->gfid);
+ (char *)loc->name,
+ loc->parent->gfid);
if (dentry == NULL) {
/*
- gf_log (this->name, GF_LOG_WARNING,
+ gf_log (this->name, GF_LOG_WARNING,
"cannot create a new dentry (par:%"
PRId64", name:%s) for inode(ino:%"
PRId64", gfid:%s)",
uuid_utoa (local->loc.inode->gfid));
*/
- op_ret = -1;
- op_errno = ENOMEM;
+ ret = -1;
+ *op_errno = ENOMEM;
goto unlock;
}
}
@@ -656,6 +1039,25 @@ quota_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
unlock:
UNLOCK (&ctx->lock);
+out:
+ return ret;
+}
+
+int32_t
+quota_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *dict, struct iatt *postparent)
+{
+ quota_local_t *local = NULL;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ local = frame->local;
+
+ op_ret = quota_fill_inodectx (this, inode, dict, &local->loc, buf,
+ &op_errno);
+
unwind:
QUOTA_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, buf,
dict, postparent);
@@ -667,141 +1069,64 @@ int32_t
quota_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
dict_t *xattr_req)
{
- int32_t ret = -1;
- int64_t limit = -1;
- limits_t *limit_node = NULL;
- gf_boolean_t dict_newed = _gf_false;
- quota_priv_t *priv = NULL;
- quota_local_t *local = NULL;
+ quota_priv_t *priv = NULL;
+ int32_t ret = -1;
+ quota_local_t *local = NULL;
priv = this->private;
- list_for_each_entry (limit_node, &priv->limit_head, limit_list) {
- if (strcmp (limit_node->path, loc->path) == 0) {
- limit = limit_node->value;
- }
- }
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
- local = quota_local_new ();
- if (local == NULL) {
+ xattr_req = xattr_req ? dict_ref(xattr_req) : dict_new();
+ if (!xattr_req)
goto err;
- }
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
+ local = quota_local_new ();
+ if (local == NULL) {
goto err;
}
frame->local = local;
+ loc_copy (&local->loc, loc);
- local->limit = limit;
-
- if (limit < 0) {
- goto wind;
- }
-
- if (xattr_req == NULL) {
- xattr_req = dict_new ();
- dict_newed = _gf_true;
- }
-
- ret = dict_set_uint64 (xattr_req, QUOTA_SIZE_KEY, 0);
+ ret = dict_set_int8 (xattr_req, QUOTA_LIMIT_KEY, 1);
if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dict set of key for hard-limit failed");
goto err;
}
-wind:
STACK_WIND (frame, quota_lookup_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
ret = 0;
err:
+ if (xattr_req)
+ dict_unref (xattr_req);
+
if (ret < 0) {
QUOTA_STACK_UNWIND (lookup, frame, -1, ENOMEM,
NULL, NULL, NULL, NULL);
}
- if (dict_newed == _gf_true) {
- dict_unref (xattr_req);
- }
-
return 0;
-}
-
-
-void
-quota_update_size (xlator_t *this, inode_t *inode, char *name, uuid_t par,
- int64_t delta)
-{
- inode_t *_inode = NULL;
- inode_t *parent = NULL;
- uint64_t value = 0;
- quota_inode_ctx_t *ctx = NULL;
- uuid_t trav_uuid = {0,};
-
- GF_VALIDATE_OR_GOTO ("quota", this, out);
- GF_VALIDATE_OR_GOTO (this->name, inode, out);
-
- inode_ctx_get (inode, this, &value);
- ctx = (quota_inode_ctx_t *)(unsigned long)value;
-
- _inode = inode_ref (inode);
-
- uuid_copy (trav_uuid, par);
- do {
- if ((ctx != NULL) && (ctx->limit >= 0)) {
- LOCK (&ctx->lock);
- {
- ctx->size += delta;
- }
- UNLOCK (&ctx->lock);
- }
-
- if (__is_root_gfid (_inode->gfid)) {
- break;
- }
-
- parent = inode_parent (_inode, trav_uuid, name);
- if (parent == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "cannot find parent for inode (gfid:%s), hence "
- "aborting size updation of parents",
- uuid_utoa (_inode->gfid));
- }
-
- if (name != NULL) {
- name = NULL;
- uuid_clear (trav_uuid);
- }
-
- inode_unref (_inode);
- _inode = parent;
-
- if (_inode == NULL) {
- break;
- }
- inode_ctx_get (_inode, this, &value);
- ctx = (quota_inode_ctx_t *)(unsigned long)value;
- } while (1);
-
-out:
- return;
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
+ return 0;
}
-
int32_t
quota_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
int32_t ret = 0;
uint64_t ctx_int = 0;
quota_inode_ctx_t *ctx = NULL;
quota_local_t *local = NULL;
- quota_dentry_t *dentry = NULL;
- int64_t delta = 0;
local = frame->local;
@@ -831,14 +1156,9 @@ quota_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
UNLOCK (&ctx->lock);
- list_for_each_entry (dentry, &ctx->parents, next) {
- delta = (postbuf->ia_blocks - prebuf->ia_blocks) * 512;
- quota_update_size (this, local->loc.inode,
- dentry->name, dentry->par, delta);
- }
-
out:
- QUOTA_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf);
+ QUOTA_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
return 0;
}
@@ -847,10 +1167,15 @@ out:
int32_t
quota_writev_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iovec *vector, int32_t count, off_t off,
- struct iobref *iobref)
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
- quota_local_t *local = NULL;
- int32_t op_errno = EINVAL;
+ quota_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+ quota_priv_t *priv = NULL;
+ struct iovec *new_vector = NULL;
+ int32_t new_count = 0;
+
+ priv = this->private;
local = frame->local;
if (local == NULL) {
@@ -860,16 +1185,42 @@ quota_writev_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
if (local->op_ret == -1) {
op_errno = local->op_errno;
- goto unwind;
+
+ if ((op_errno == EDQUOT) && (local->space_available > 0)) {
+ new_count = iov_subset (vector, count, 0,
+ local->space_available, NULL);
+
+ new_vector = GF_CALLOC (new_count,
+ sizeof (struct iovec),
+ gf_common_mt_iovec);
+ if (new_vector == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ new_count = iov_subset (vector, count, 0,
+ local->space_available,
+ new_vector);
+
+ vector = new_vector;
+ count = new_count;
+ } else {
+ goto unwind;
+ }
}
- STACK_WIND (frame, quota_writev_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev, fd, vector, count, off,
- iobref);
+ STACK_WIND (frame, quota_writev_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev, fd,
+ vector, count, off, flags, iobref, xdata);
+
+ if (new_vector != NULL)
+ GF_FREE (new_vector);
+
return 0;
unwind:
- QUOTA_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL);
+ QUOTA_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -877,16 +1228,23 @@ unwind:
int32_t
quota_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iovec *vector, int32_t count, off_t off,
- struct iobref *iobref)
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
+ quota_priv_t *priv = NULL;
int32_t ret = -1, op_errno = EINVAL;
int32_t parents = 0;
uint64_t size = 0;
quota_local_t *local = NULL;
quota_inode_ctx_t *ctx = NULL;
- quota_priv_t *priv = NULL;
+ quota_dentry_t *dentry = NULL, *tmp = NULL;
call_stub_t *stub = NULL;
- quota_dentry_t *dentry = NULL;
+ struct list_head head = {0, };
+
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
+
+ INIT_LIST_HEAD (&head);
GF_ASSERT (frame);
GF_VALIDATE_OR_GOTO ("quota", this, unwind);
@@ -900,16 +1258,17 @@ quota_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
frame->local = local;
local->loc.inode = inode_ref (fd->inode);
- ret = quota_inode_ctx_get (fd->inode, -1, this, NULL, NULL, &ctx, 0);
+ ret = quota_inode_ctx_get (fd->inode, this, &ctx, 0);
if (ctx == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "quota context not set in inode (gfid:%s)",
+ gf_log (this->name, GF_LOG_DEBUG, "quota context is NULL on "
+ "inode (%s). "
+ "If quota is not enabled recently and crawler has "
+ "finished crawling, its an error",
uuid_utoa (fd->inode->gfid));
- goto unwind;
}
stub = fop_writev_stub (frame, quota_writev_helper, fd, vector, count,
- off, iobref);
+ off, flags, iobref, xdata);
if (stub == NULL) {
op_errno = ENOMEM;
goto unwind;
@@ -919,46 +1278,50 @@ quota_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
GF_VALIDATE_OR_GOTO (this->name, priv, unwind);
size = iov_length (vector, count);
- LOCK (&ctx->lock);
- {
- list_for_each_entry (dentry, &ctx->parents, next) {
- parents++;
- }
- }
- UNLOCK (&ctx->lock);
-
- local->delta = size;
- local->stub = stub;
- local->link_count = parents;
-
- list_for_each_entry (dentry, &ctx->parents, next) {
- ret = quota_check_limit (frame, fd->inode, this, dentry->name,
- dentry->par);
- if (ret == -1) {
- break;
+ if (ctx != NULL) {
+ LOCK (&ctx->lock);
+ {
+ list_for_each_entry (dentry, &ctx->parents, next) {
+ tmp = __quota_dentry_new (NULL, dentry->name,
+ dentry->par);
+ list_add_tail (&tmp->next, &head);
+ parents++;
+ }
}
+ UNLOCK (&ctx->lock);
}
- stub = NULL;
-
LOCK (&local->lock);
{
- local->link_count = 0;
- if (local->validate_count == 0) {
- stub = local->stub;
- local->stub = NULL;
- }
+ local->delta = size;
+ local->link_count = (parents != 0) ? parents : 1;
+ local->stub = stub;
}
UNLOCK (&local->lock);
- if (stub != NULL) {
- call_resume (stub);
+ if (parents == 0) {
+ /* nameless lookup on this inode, allow quota to reconstruct
+ * ancestry as part of check_limit.
+ */
+ quota_check_limit (frame, fd->inode, this, NULL, NULL);
+ } else {
+ list_for_each_entry_safe (dentry, tmp, &head, next) {
+ quota_check_limit (frame, fd->inode, this, dentry->name,
+ dentry->par);
+ __quota_dentry_free (dentry);
+ }
}
return 0;
unwind:
- QUOTA_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL);
+ QUOTA_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd,
+ vector, count, off, flags, iobref, xdata);
return 0;
}
@@ -967,17 +1330,17 @@ int32_t
quota_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
QUOTA_STACK_UNWIND (mkdir, frame, op_ret, op_errno, inode,
- buf, preparent, postparent);
+ buf, preparent, postparent, xdata);
return 0;
}
int32_t
quota_mkdir_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- mode_t mode, dict_t *params)
+ mode_t mode, mode_t umask, dict_t *xdata)
{
quota_local_t *local = NULL;
int32_t op_errno = EINVAL;
@@ -994,24 +1357,31 @@ quota_mkdir_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
goto unwind;
}
- STACK_WIND (frame, quota_mkdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mkdir, loc, mode, params);
+ STACK_WIND (frame, quota_mkdir_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, loc,
+ mode, umask, xdata);
+
return 0;
unwind:
QUOTA_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL,
- NULL, NULL);
+ NULL, NULL, NULL);
return 0;
}
int32_t
quota_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dict_t *params)
+ mode_t umask, dict_t *xdata)
{
- int32_t ret = 0, op_errno = 0;
- quota_local_t *local = NULL;
- call_stub_t *stub = NULL;
+ quota_priv_t *priv = NULL;
+ int32_t ret = 0, op_errno = 0;
+ quota_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
local = quota_local_new ();
if (local == NULL) {
@@ -1021,8 +1391,6 @@ quota_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
frame->local = local;
- local->link_count = 1;
-
ret = loc_copy (&local->loc, loc);
if (ret) {
op_errno = ENOMEM;
@@ -1030,38 +1398,34 @@ quota_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
goto err;
}
- stub = fop_mkdir_stub (frame, quota_mkdir_helper, loc, mode, params);
+ stub = fop_mkdir_stub (frame, quota_mkdir_helper, loc, mode, umask,
+ xdata);
if (stub == NULL) {
op_errno = ENOMEM;
goto err;
}
- local->stub = stub;
- local->delta = 0;
-
- quota_check_limit (frame, loc->parent, this, NULL, 0);
-
- stub = NULL;
-
LOCK (&local->lock);
{
- if (local->validate_count == 0) {
- stub = local->stub;
- local->stub = NULL;
- }
-
- local->link_count = 0;
+ local->stub = stub;
+ local->delta = 0;
+ local->link_count = 1;
}
UNLOCK (&local->lock);
- if (stub != NULL) {
- call_resume (stub);
- }
-
+ quota_check_limit (frame, loc->parent, this, NULL, NULL);
return 0;
+
err:
QUOTA_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL,
- NULL);
+ NULL, NULL);
+
+ return 0;
+
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir,
+ loc, mode, umask, xdata);
return 0;
}
@@ -1071,7 +1435,7 @@ int32_t
quota_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
int32_t ret = -1;
quota_local_t *local = NULL;
@@ -1083,7 +1447,7 @@ quota_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto unwind;
}
- ret = quota_inode_ctx_get (inode, -1, this, NULL, buf, &ctx, 1);
+ ret = quota_inode_ctx_get (inode, this, &ctx, 1);
if ((ret == -1) || (ctx == NULL)) {
gf_log (this->name, GF_LOG_WARNING, "cannot create quota "
"context in inode(gfid:%s)",
@@ -1114,19 +1478,24 @@ unlock:
unwind:
QUOTA_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent);
+ preparent, postparent, xdata);
return 0;
}
int32_t
quota_create_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, mode_t mode, fd_t *fd, dict_t *params)
+ int32_t flags, mode_t mode, mode_t umask, fd_t *fd,
+ dict_t *xdata)
{
quota_local_t *local = NULL;
int32_t op_errno = EINVAL;
+ quota_priv_t *priv = NULL;
local = frame->local;
+
+ priv = this->private;
+
if (local == NULL) {
gf_log (this->name, GF_LOG_WARNING, "local is NULL");
goto unwind;
@@ -1137,28 +1506,36 @@ quota_create_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
goto unwind;
}
- STACK_WIND (frame, quota_create_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create, loc, flags, mode, fd,
- params);
+
+ STACK_WIND (frame, quota_create_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->create, loc,
+ flags, mode, umask, fd, xdata);
return 0;
unwind:
QUOTA_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL,
- NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL);
return 0;
}
int32_t
quota_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, fd_t *fd, dict_t *params)
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
- int32_t ret = -1;
- quota_local_t *local = NULL;
- call_stub_t *stub = NULL;
+ quota_priv_t *priv = NULL;
+ int32_t ret = -1;
+ quota_local_t *local = NULL;
+ int32_t op_errno = 0;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
local = quota_local_new ();
if (local == NULL) {
+ op_errno = ENOMEM;
goto err;
}
@@ -1167,42 +1544,36 @@ quota_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
ret = loc_copy (&local->loc, loc);
if (ret) {
gf_log (this->name, GF_LOG_WARNING, "loc_copy failed");
+ op_errno = ENOMEM;
goto err;
}
stub = fop_create_stub (frame, quota_create_helper, loc, flags, mode,
- fd, params);
+ umask, fd, xdata);
if (stub == NULL) {
goto err;
}
- local->link_count = 1;
- local->stub = stub;
- local->delta = 0;
-
- quota_check_limit (frame, loc->parent, this, NULL, 0);
-
- stub = NULL;
-
LOCK (&local->lock);
{
- local->link_count = 0;
- if (local->validate_count == 0) {
- stub = local->stub;
- local->stub = NULL;
- }
+ local->link_count = 1;
+ local->stub = stub;
+ local->delta = 0;
}
UNLOCK (&local->lock);
- if (stub != NULL) {
- call_resume (stub);
- }
-
+ quota_check_limit (frame, loc->parent, this, NULL, NULL);
return 0;
err:
- QUOTA_STACK_UNWIND (create, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
- NULL);
+ QUOTA_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL, NULL);
+
+ return 0;
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->create, loc,
+ flags, mode, umask, fd, xdata);
return 0;
}
@@ -1210,11 +1581,13 @@ err:
int32_t
quota_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
quota_local_t *local = NULL;
quota_inode_ctx_t *ctx = NULL;
uint64_t value = 0;
+ quota_dentry_t *dentry = NULL;
+ quota_dentry_t *old_dentry = NULL;
if (op_ret < 0) {
goto out;
@@ -1232,23 +1605,40 @@ quota_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- quota_update_size (this, local->loc.inode, (char *)local->loc.name,
- local->loc.parent->gfid,
- (-(ctx->buf.ia_blocks * 512)));
+ LOCK (&ctx->lock);
+ {
+ list_for_each_entry (dentry, &ctx->parents, next) {
+ if ((strcmp (dentry->name, local->loc.name) == 0) &&
+ (uuid_compare (local->loc.parent->gfid,
+ dentry->par) == 0)) {
+ old_dentry = dentry;
+ break;
+ }
+ }
+ if (old_dentry)
+ __quota_dentry_free (old_dentry);
+ }
+ UNLOCK (&ctx->lock);
out:
QUOTA_STACK_UNWIND (unlink, frame, op_ret, op_errno, preparent,
- postparent);
+ postparent, xdata);
return 0;
}
int32_t
-quota_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
+quota_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
{
- int32_t ret = 0;
+ quota_priv_t *priv = NULL;
+ int32_t ret = -1;
quota_local_t *local = NULL;
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
+
local = quota_local_new ();
if (local == NULL) {
goto err;
@@ -1256,6 +1646,10 @@ quota_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
frame->local = local;
+ if (xdata && dict_get (xdata, GLUSTERFS_INTERNAL_FOP_KEY)) {
+ local->skip_check = _gf_true;
+ }
+
ret = loc_copy (&local->loc, loc);
if (ret) {
gf_log (this->name, GF_LOG_WARNING, "loc_copy failed");
@@ -1263,16 +1657,21 @@ quota_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
}
STACK_WIND (frame, quota_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink, loc);
+ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
ret = 0;
err:
if (ret == -1) {
- QUOTA_STACK_UNWIND (unlink, frame, -1, 0, NULL, NULL);
+ QUOTA_STACK_UNWIND (unlink, frame, -1, 0, NULL, NULL, NULL);
}
return 0;
+
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ return 0;
}
@@ -1280,7 +1679,7 @@ int32_t
quota_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
int32_t ret = -1;
quota_local_t *local = NULL;
@@ -1294,16 +1693,16 @@ quota_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local = (quota_local_t *) frame->local;
- quota_update_size (this, local->loc.parent, NULL, 0,
- (buf->ia_blocks * 512));
+ if (local->skip_check)
+ goto out;
- ret = quota_inode_ctx_get (inode, -1, this, NULL, NULL, &ctx, 0);
+ ret = quota_inode_ctx_get (inode, this, &ctx, 0);
if ((ret == -1) || (ctx == NULL)) {
- gf_log (this->name, GF_LOG_WARNING, "cannot find quota "
- "context in %s (gfid:%s)", local->loc.path,
+ gf_log (this->name, GF_LOG_DEBUG, "quota context is NULL on "
+ "inode (%s). "
+ "If quota is not enabled recently and crawler has "
+ "finished crawling, its an error",
uuid_utoa (inode->gfid));
- op_ret = -1;
- op_errno = EINVAL;
goto out;
}
@@ -1345,7 +1744,7 @@ unlock:
out:
QUOTA_STACK_UNWIND (link, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
+ preparent, postparent, xdata);
return 0;
}
@@ -1353,10 +1752,13 @@ out:
int32_t
quota_link_helper (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc)
+ loc_t *newloc, dict_t *xdata)
{
quota_local_t *local = NULL;
int32_t op_errno = EINVAL;
+ quota_priv_t *priv = NULL;
+
+ priv = this->private;
local = frame->local;
if (local == NULL) {
@@ -1370,24 +1772,44 @@ quota_link_helper (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
goto unwind;
}
- STACK_WIND (frame, quota_link_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->link, oldloc, newloc);
+ STACK_WIND (frame, quota_link_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->link, oldloc,
+ newloc, xdata);
return 0;
unwind:
QUOTA_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL,
- NULL, NULL);
+ NULL, NULL, NULL);
return 0;
}
int32_t
-quota_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
+quota_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
+ quota_priv_t *priv = NULL;
int32_t ret = -1, op_errno = ENOMEM;
quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
call_stub_t *stub = NULL;
- quota_inode_ctx_t *ctx = NULL;
+
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
+
+ if (xdata && dict_get (xdata, GLUSTERFS_INTERNAL_FOP_KEY)) {
+ goto off;
+ }
+
+ quota_inode_ctx_get (oldloc->inode, this, &ctx, 0);
+ if (ctx == NULL) {
+ gf_log (this->name, GF_LOG_DEBUG, "quota context is NULL on "
+ "inode (%s). "
+ "If quota is not enabled recently and crawler has "
+ "finished crawling, its an error",
+ uuid_utoa (oldloc->inode->gfid));
+ }
local = quota_local_new ();
if (local == NULL) {
@@ -1396,58 +1818,38 @@ quota_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
frame->local = (void *) local;
+
ret = loc_copy (&local->loc, newloc);
if (ret == -1) {
gf_log (this->name, GF_LOG_WARNING, "loc_copy failed");
goto err;
}
- stub = fop_link_stub (frame, quota_link_helper, oldloc, newloc);
+ stub = fop_link_stub (frame, quota_link_helper, oldloc, newloc, xdata);
if (stub == NULL) {
goto err;
}
- local->link_count = 1;
- local->stub = stub;
-
- ret = quota_inode_ctx_get (oldloc->inode, -1, this, NULL, NULL, &ctx,
- 0);
- if (ctx == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "quota context not set in inode (gfid:%s)",
- oldloc->inode ? uuid_utoa (oldloc->inode->gfid) : "0");
- op_errno = EINVAL;
- goto err;
- }
-
- local->delta = ctx->buf.ia_blocks * 512;
-
- quota_check_limit (frame, newloc->parent, this, NULL, 0);
-
- stub = NULL;
-
LOCK (&local->lock);
{
- if (local->validate_count == 0) {
- stub = local->stub;
- local->stub = NULL;
- }
-
- local->link_count = 0;
+ local->link_count = 1;
+ local->stub = stub;
+ local->delta = (ctx != NULL) ? ctx->buf.ia_blocks * 512 : 0;
}
UNLOCK (&local->lock);
- if (stub != NULL) {
- call_resume (stub);
- }
+ quota_check_limit (frame, newloc->parent, this, NULL, NULL);
+ return 0;
- ret = 0;
err:
- if (ret < 0) {
- QUOTA_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL,
- NULL, NULL);
- }
+ QUOTA_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL,
+ NULL, NULL, NULL);
+ return 0;
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link, oldloc,
+ newloc, xdata);
return 0;
}
@@ -1456,14 +1858,15 @@ int32_t
quota_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
int32_t ret = -1;
+ int64_t size = 0;
quota_local_t *local = NULL;
quota_inode_ctx_t *ctx = NULL;
quota_dentry_t *old_dentry = NULL, *dentry = NULL;
char new_dentry_found = 0;
- int64_t size = 0;
if (op_ret < 0) {
goto out;
@@ -1477,29 +1880,22 @@ quota_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- if (IA_ISREG (local->oldloc.inode->ia_type)
- || IA_ISLNK (local->oldloc.inode->ia_type)) {
+ if (QUOTA_REG_OR_LNK_FILE (local->oldloc.inode->ia_type)) {
size = buf->ia_blocks * 512;
}
- if (local->oldloc.parent != local->newloc.parent) {
- quota_update_size (this, local->oldloc.parent, NULL, 0, (-size));
- quota_update_size (this, local->newloc.parent, NULL, 0, size);
- }
-
- if (!(IA_ISREG (local->oldloc.inode->ia_type)
- || IA_ISLNK (local->oldloc.inode->ia_type))) {
+ if (!QUOTA_REG_OR_LNK_FILE (local->oldloc.inode->ia_type)) {
goto out;
}
- ret = quota_inode_ctx_get (local->oldloc.inode, -1, this, NULL, NULL,
- &ctx, 0);
+ ret = quota_inode_ctx_get (local->oldloc.inode, this, &ctx, 0);
if ((ret == -1) || (ctx == NULL)) {
- gf_log (this->name, GF_LOG_WARNING, "quota context not"
- "set in inode(gfid:%s)",
+ gf_log (this->name, GF_LOG_DEBUG, "quota context is NULL on "
+ "inode (%s). "
+ "If quota is not enabled recently and crawler has "
+ "finished crawling, its an error",
uuid_utoa (local->oldloc.inode->gfid));
- op_ret = -1;
- op_errno = EINVAL;
+
goto out;
}
@@ -1545,7 +1941,8 @@ quota_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (dentry == NULL) {
gf_log (this->name, GF_LOG_WARNING,
"cannot create a new dentry (name:%s) "
- "for inode(gfid:%s)", local->newloc.name,
+ "for inode(gfid:%s)",
+ local->newloc.name,
uuid_utoa (local->newloc.inode->gfid));
op_ret = -1;
op_errno = ENOMEM;
@@ -1560,7 +1957,7 @@ unlock:
out:
QUOTA_STACK_UNWIND (rename, frame, op_ret, op_errno, buf, preoldparent,
- postoldparent, prenewparent, postnewparent);
+ postoldparent, prenewparent, postnewparent, xdata);
return 0;
}
@@ -1568,10 +1965,13 @@ out:
int32_t
quota_rename_helper (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc)
+ loc_t *newloc, dict_t *xdata)
{
quota_local_t *local = NULL;
int32_t op_errno = EINVAL;
+ quota_priv_t *priv = NULL;
+
+ priv = this->private;
local = frame->local;
if (local == NULL) {
@@ -1585,25 +1985,72 @@ quota_rename_helper (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
goto unwind;
}
- STACK_WIND (frame, quota_rename_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rename, oldloc, newloc);
+ STACK_WIND (frame, quota_rename_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->rename, oldloc,
+ newloc, xdata);
+
return 0;
unwind:
QUOTA_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL,
- NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL);
return 0;
}
+static int32_t
+quota_rename_get_size_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ quota_local_t *local = NULL;
+ int32_t ret = 0;
+ int64_t *size = 0;
+
+ GF_ASSERT (frame);
+ GF_VALIDATE_OR_GOTO_WITH_ERROR ("quota", this, out, op_errno,
+ EINVAL);
+ GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, xdata, out, op_errno,
+ EINVAL);
+ local = frame->local;
+ GF_ASSERT (local);
+ local->link_count = 1;
+
+ if (op_ret < 0)
+ goto out;
+
+
+ ret = dict_get_bin (xdata, QUOTA_SIZE_KEY, (void **) &size);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "size key not present in dict");
+ op_errno = EINVAL;
+ goto out;
+ }
+ local->delta = ntoh64 (*size);
+ quota_check_limit (frame, local->newloc.parent, this,
+ NULL, NULL);
+ return 0;
+
+out:
+ quota_handle_validate_error (local, -1, op_errno);
+ return 0;
+}
+
int32_t
quota_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc)
+ loc_t *newloc, dict_t *xdata)
{
- int32_t ret = -1, op_errno = ENOMEM;
- quota_local_t *local = NULL;
- call_stub_t *stub = NULL;
- quota_inode_ctx_t *ctx = NULL;
+ quota_priv_t *priv = NULL;
+ int32_t ret = -1, op_errno = ENOMEM;
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
local = quota_local_new ();
if (local == NULL) {
@@ -1624,56 +2071,69 @@ quota_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
goto err;
}
- stub = fop_rename_stub (frame, quota_rename_helper, oldloc, newloc);
+ stub = fop_rename_stub (frame, quota_rename_helper, oldloc, newloc,
+ xdata);
if (stub == NULL) {
goto err;
}
- local->link_count = 1;
- local->stub = stub;
+ LOCK (&local->lock);
+ {
+ local->link_count = 1;
+ local->stub = stub;
+ }
+ UNLOCK (&local->lock);
- if (IA_ISREG (oldloc->inode->ia_type)
- || IA_ISLNK (oldloc->inode->ia_type)) {
- ret = quota_inode_ctx_get (oldloc->inode, -1, this, NULL, NULL,
- &ctx, 0);
+ if (QUOTA_REG_OR_LNK_FILE (oldloc->inode->ia_type)) {
+ ret = quota_inode_ctx_get (oldloc->inode, this, &ctx, 0);
if (ctx == NULL) {
gf_log (this->name, GF_LOG_WARNING,
- "quota context not set in inode (gfid:%s)",
+ "quota context not set in inode (gfid:%s), "
+ "considering file size as zero while enforcing "
+ "quota on new ancestry",
oldloc->inode ? uuid_utoa (oldloc->inode->gfid)
: "0");
- op_errno = EINVAL;
- goto err;
- }
- local->delta = ctx->buf.ia_blocks * 512;
- } else {
- local->delta = 0;
- }
+ local->delta = 0;
- quota_check_limit (frame, newloc->parent, this, NULL, 0);
+ } else {
- stub = NULL;
+ /* FIXME: We need to account for the size occupied by this
+ * inode on the target directory. To avoid double
+ * accounting, we need to modify enforcer to perform
+ * quota_check_limit only uptil the least common ancestor
+ * directory inode*/
- LOCK (&local->lock);
- {
- if (local->validate_count == 0) {
- stub = local->stub;
- local->stub = NULL;
+ /* FIXME: The following code assumes that regular files and
+ *linkfiles are present, in their entirety, in a single
+ brick. This *assumption is invalid in the case of
+ stripe.*/
+
+ local->delta = ctx->buf.ia_blocks * 512;
}
- local->link_count = 0;
- }
- UNLOCK (&local->lock);
+ } else if (IA_ISDIR (oldloc->inode->ia_type)) {
+ ret = quota_validate (frame, oldloc->inode, this,
+ quota_rename_get_size_cbk);
+ if (ret){
+ op_errno = -ret;
+ goto err;
+ }
- if (stub != NULL) {
- call_resume (stub);
+ return 0;
}
- ret = 0;
+ quota_check_limit (frame, newloc->parent, this, NULL, NULL);
+ return 0;
+
err:
- if (ret == -1) {
- QUOTA_STACK_UNWIND (rename, frame, -1, op_errno, NULL,
- NULL, NULL, NULL, NULL);
- }
+ QUOTA_STACK_UNWIND (rename, frame, -1, op_errno, NULL,
+ NULL, NULL, NULL, NULL, NULL);
+ return 0;
+
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename, oldloc,
+ newloc, xdata);
return 0;
}
@@ -1683,9 +2143,8 @@ int32_t
quota_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
- int64_t size = 0;
quota_local_t *local = NULL;
quota_inode_ctx_t *ctx = NULL;
quota_dentry_t *dentry = NULL;
@@ -1695,16 +2154,15 @@ quota_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
local = frame->local;
- size = buf->ia_blocks * 512;
- quota_update_size (this, local->loc.parent, NULL, 0, size);
-
- quota_inode_ctx_get (local->loc.inode, -1, this, NULL, NULL,
- &ctx, 1);
+ quota_inode_ctx_get (local->loc.inode, this, &ctx, 1);
if (ctx == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "quota context not set in inode (gfid:%s)",
+ gf_log (this->name, GF_LOG_DEBUG, "quota context is NULL on "
+ "inode (%s). "
+ "If quota is not enabled recently and crawler has "
+ "finished crawling, its an error",
uuid_utoa (local->loc.inode->gfid));
+
goto out;
}
@@ -1727,7 +2185,7 @@ quota_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
out:
QUOTA_STACK_UNWIND (symlink, frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
+ preparent, postparent, xdata);
return 0;
}
@@ -1735,10 +2193,11 @@ out:
int
quota_symlink_helper (call_frame_t *frame, xlator_t *this, const char *linkpath,
- loc_t *loc, dict_t *params)
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
quota_local_t *local = NULL;
int32_t op_errno = EINVAL;
+ quota_priv_t *priv = NULL;
local = frame->local;
if (local == NULL) {
@@ -1746,30 +2205,38 @@ quota_symlink_helper (call_frame_t *frame, xlator_t *this, const char *linkpath,
goto unwind;
}
+ priv = this->private;
+
if (local->op_ret == -1) {
op_errno = local->op_errno;
goto unwind;
}
- STACK_WIND (frame, quota_symlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->symlink, linkpath, loc, params);
+ STACK_WIND (frame, quota_symlink_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->symlink,
+ linkpath, loc, umask, xdata);
return 0;
unwind:
QUOTA_STACK_UNWIND (symlink, frame, -1, op_errno, NULL, NULL,
- NULL, NULL);
+ NULL, NULL, NULL);
return 0;
}
int
quota_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
- loc_t *loc, dict_t *params)
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
- int32_t ret = -1;
- int32_t op_errno = ENOMEM;
- quota_local_t *local = NULL;
- call_stub_t *stub = NULL;
+ quota_priv_t *priv = NULL;
+ int32_t ret = -1;
+ int32_t op_errno = ENOMEM;
+ quota_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
local = quota_local_new ();
if (local == NULL) {
@@ -1784,53 +2251,43 @@ quota_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
goto err;
}
- local->link_count = 1;
-
stub = fop_symlink_stub (frame, quota_symlink_helper, linkpath, loc,
- params);
+ umask, xdata);
if (stub == NULL) {
goto err;
}
- local->stub = stub;
- local->delta = strlen (linkpath);
-
- quota_check_limit (frame, loc->parent, this, NULL, 0);
-
- stub = NULL;
-
LOCK (&local->lock);
{
- if (local->validate_count == 0) {
- stub = local->stub;
- local->stub = NULL;
- }
-
- local->link_count = 0;
+ local->stub = stub;
+ local->delta = strlen (linkpath);
+ local->link_count = 1;
}
UNLOCK (&local->lock);
- if (stub != NULL) {
- call_resume (stub);
- }
-
+ quota_check_limit (frame, loc->parent, this, NULL, NULL);
return 0;
err:
QUOTA_STACK_UNWIND (symlink, frame, -1, op_errno, NULL, NULL, NULL,
- NULL);
+ NULL, NULL);
return 0;
+
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->symlink,
+ linkpath, loc, umask, xdata);
+ return 0;
}
int32_t
quota_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
quota_local_t *local = NULL;
- int64_t delta = 0;
quota_inode_ctx_t *ctx = NULL;
if (op_ret < 0) {
@@ -1843,15 +2300,12 @@ quota_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- delta = (postbuf->ia_blocks - prebuf->ia_blocks) * 512;
-
- quota_update_size (this, local->loc.inode, NULL, 0, delta);
-
- quota_inode_ctx_get (local->loc.inode, -1, this, NULL, NULL,
- &ctx, 0);
+ quota_inode_ctx_get (local->loc.inode, this, &ctx, 0);
if (ctx == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "quota context not set in inode (gfid:%s)",
+ gf_log (this->name, GF_LOG_DEBUG, "quota context is NULL on "
+ "inode (%s). "
+ "If quota is not enabled recently and crawler has "
+ "finished crawling, its an error",
uuid_utoa (local->loc.inode->gfid));
goto out;
}
@@ -1864,17 +2318,23 @@ quota_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
out:
QUOTA_STACK_UNWIND (truncate, frame, op_ret, op_errno, prebuf,
- postbuf);
+ postbuf, xdata);
return 0;
}
int32_t
-quota_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
+quota_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
{
+ quota_priv_t *priv = NULL;
int32_t ret = -1;
quota_local_t *local = NULL;
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
+
local = quota_local_new ();
if (local == NULL) {
goto err;
@@ -1889,23 +2349,27 @@ quota_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
}
STACK_WIND (frame, quota_truncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate, loc, offset);
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
return 0;
+
err:
- QUOTA_STACK_UNWIND (truncate, frame, -1, ENOMEM, NULL, NULL);
+ QUOTA_STACK_UNWIND (truncate, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+ return 0;
}
int32_t
quota_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
quota_local_t *local = NULL;
- int64_t delta = 0;
quota_inode_ctx_t *ctx = NULL;
if (op_ret < 0) {
@@ -1918,15 +2382,12 @@ quota_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- delta = (postbuf->ia_blocks - prebuf->ia_blocks) * 512;
-
- quota_update_size (this, local->loc.inode, NULL, 0, delta);
-
- quota_inode_ctx_get (local->loc.inode, -1, this, NULL, NULL,
- &ctx, 0);
+ quota_inode_ctx_get (local->loc.inode, this, &ctx, 0);
if (ctx == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "quota context not set in inode (gfid:%s)",
+ gf_log (this->name, GF_LOG_DEBUG, "quota context is NULL on "
+ "inode (%s). "
+ "If quota is not enabled recently and crawler has "
+ "finished crawling, its an error",
uuid_utoa (local->loc.inode->gfid));
goto out;
}
@@ -1939,16 +2400,22 @@ quota_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
out:
QUOTA_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, prebuf,
- postbuf);
+ postbuf, xdata);
return 0;
}
int32_t
-quota_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
+quota_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
{
+ quota_priv_t *priv = NULL;
quota_local_t *local = NULL;
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
+
local = quota_local_new ();
if (local == NULL)
goto err;
@@ -1957,13 +2424,20 @@ quota_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
local->loc.inode = inode_ref (fd->inode);
- STACK_WIND (frame, quota_ftruncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate, fd, offset);
+ STACK_WIND (frame, quota_ftruncate_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->ftruncate, fd,
+ offset, xdata);
return 0;
err:
- QUOTA_STACK_UNWIND (ftruncate, frame, -1, ENOMEM, NULL, NULL);
+ QUOTA_STACK_UNWIND (ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL);
+
+ return 0;
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd,
+ offset, xdata);
return 0;
}
@@ -1977,14 +2451,23 @@ quota_send_dir_limit_to_cli (call_frame_t *frame, xlator_t *this,
dict_t *dict = NULL;
quota_inode_ctx_t *ctx = NULL;
uint64_t value = 0;
+ quota_priv_t *priv = NULL;
+
+ priv = this->private;
+ if (!priv->is_quota_on) {
+ snprintf (dir_limit, 1024, "Quota is disabled please turn on");
+ goto dict_set;
+ }
ret = inode_ctx_get (inode, this, &value);
if (ret < 0)
goto out;
ctx = (quota_inode_ctx_t *)(unsigned long)value;
- snprintf (dir_limit, 1024, "%"PRId64",%"PRId64, ctx->size, ctx->limit);
+ snprintf (dir_limit, 1024, "%"PRId64",%"PRId64, ctx->size,
+ ctx->hard_lim);
+dict_set:
dict = dict_new ();
if (dict == NULL) {
ret = -1;
@@ -1995,9 +2478,9 @@ quota_send_dir_limit_to_cli (call_frame_t *frame, xlator_t *this,
if (ret < 0)
goto out;
- gf_log (this->name, GF_LOG_INFO, "str = %s", dir_limit);
+ gf_log (this->name, GF_LOG_DEBUG, "str = %s", dir_limit);
- QUOTA_STACK_UNWIND (getxattr, frame, 0, 0, dict);
+ QUOTA_STACK_UNWIND (getxattr, frame, 0, 0, dict, NULL);
ret = 0;
@@ -2008,11 +2491,11 @@ out:
int32_t
quota_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name)
+ const char *name, dict_t *xdata)
{
int32_t ret = 0;
- if (strcasecmp (name, "trusted.limit.list") == 0) {
+ if (name && strcasecmp (name, "trusted.limit.list") == 0) {
ret = quota_send_dir_limit_to_cli (frame, this, fd->inode,
name);
if (ret == 0) {
@@ -2021,14 +2504,14 @@ quota_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
}
STACK_WIND (frame, default_fgetxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fgetxattr, fd, name);
+ FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
return 0;
}
int32_t
quota_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
+ const char *name, dict_t *xdata)
{
int32_t ret = 0;
@@ -2040,14 +2523,15 @@ quota_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
}
STACK_WIND (frame, default_getxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr, loc, name);
+ FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
return 0;
}
int32_t
quota_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
quota_local_t *local = NULL;
quota_inode_ctx_t *ctx = NULL;
@@ -2062,12 +2546,17 @@ quota_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- quota_inode_ctx_get (local->loc.inode, -1, this, NULL, NULL,
- &ctx, 0);
+ quota_inode_ctx_get (local->loc.inode, this, &ctx, 0);
if (ctx == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "quota context not set in inode (gfid:%s)",
- uuid_utoa (local->loc.inode->gfid));
+ if (!IA_ISDIR (buf->ia_type)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "quota context is NULL on "
+ "inode (%s). "
+ "If quota is not enabled recently and crawler "
+ "has finished crawling, its an error",
+ uuid_utoa (local->loc.inode->gfid));
+ }
+
goto out;
}
@@ -2079,17 +2568,22 @@ quota_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
UNLOCK (&ctx->lock);
out:
- QUOTA_STACK_UNWIND (stat, frame, op_ret, op_errno, buf);
+ QUOTA_STACK_UNWIND (stat, frame, op_ret, op_errno, buf, xdata);
return 0;
}
int32_t
-quota_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
+quota_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
+ quota_priv_t *priv = NULL;
quota_local_t *local = NULL;
int32_t ret = -1;
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
+
local = quota_local_new ();
if (local == NULL) {
goto unwind;
@@ -2103,18 +2597,26 @@ quota_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
}
STACK_WIND (frame, quota_stat_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat, loc);
+ FIRST_CHILD(this)->fops->stat, loc,
+ xdata);
return 0;
unwind:
- QUOTA_STACK_UNWIND (stat, frame, -1, ENOMEM, NULL);
+ QUOTA_STACK_UNWIND (stat, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc,
+ xdata);
return 0;
}
int32_t
quota_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
quota_local_t *local = NULL;
quota_inode_ctx_t *ctx = NULL;
@@ -2129,12 +2631,17 @@ quota_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- quota_inode_ctx_get (local->loc.inode, -1, this, NULL, NULL,
- &ctx, 0);
+ quota_inode_ctx_get (local->loc.inode, this, &ctx, 0);
if (ctx == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "quota context not set in inode (gfid:%s)",
- uuid_utoa (local->loc.inode->gfid));
+ if (!IA_ISDIR (buf->ia_type)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "quota context is NULL on "
+ "inode (%s). "
+ "If quota is not enabled recently and crawler "
+ "has finished crawling, its an error",
+ uuid_utoa (local->loc.inode->gfid));
+ }
+
goto out;
}
@@ -2146,16 +2653,21 @@ quota_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
UNLOCK (&ctx->lock);
out:
- QUOTA_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf);
+ QUOTA_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf, xdata);
return 0;
}
int32_t
-quota_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd)
+quota_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
+ quota_priv_t *priv = NULL;
quota_local_t *local = NULL;
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
+
local = quota_local_new ();
if (local == NULL) {
goto unwind;
@@ -2165,12 +2677,19 @@ quota_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd)
local->loc.inode = inode_ref (fd->inode);
- STACK_WIND (frame, quota_fstat_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat, fd);
+ STACK_WIND (frame, quota_fstat_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->fstat, fd,
+ xdata);
return 0;
unwind:
- QUOTA_STACK_UNWIND (fstat, frame, -1, ENOMEM, NULL);
+ QUOTA_STACK_UNWIND (fstat, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd,
+ xdata);
return 0;
}
@@ -2178,7 +2697,7 @@ unwind:
int32_t
quota_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, const char *path,
- struct iatt *buf)
+ struct iatt *buf, dict_t *xdata)
{
quota_local_t *local = NULL;
quota_inode_ctx_t *ctx = NULL;
@@ -2193,11 +2712,12 @@ quota_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- quota_inode_ctx_get (local->loc.inode, -1, this, NULL, NULL,
- &ctx, 0);
+ quota_inode_ctx_get (local->loc.inode, this, &ctx, 0);
if (ctx == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "quota context not set in inode (gfid:%s)",
+ gf_log (this->name, GF_LOG_DEBUG, "quota context is NULL on "
+ "inode (%s). "
+ "If quota is not enabled recently and crawler has "
+ "finished crawling, its an error",
uuid_utoa (local->loc.inode->gfid));
goto out;
}
@@ -2209,17 +2729,24 @@ quota_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
UNLOCK (&ctx->lock);
out:
- QUOTA_STACK_UNWIND (readlink, frame, op_ret, op_errno, path, buf);
+ QUOTA_STACK_UNWIND (readlink, frame, op_ret, op_errno, path, buf,
+ xdata);
return 0;
}
int32_t
-quota_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size)
+quota_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
{
+ quota_priv_t *priv = NULL;
quota_local_t *local = NULL;
int32_t ret = -1;
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
+
local = quota_local_new ();
if (local == NULL) {
goto unwind;
@@ -2233,12 +2760,19 @@ quota_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size)
goto unwind;
}
- STACK_WIND (frame, quota_readlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readlink, loc, size);
+ STACK_WIND (frame, quota_readlink_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->readlink, loc,
+ size, xdata);
return 0;
unwind:
- QUOTA_STACK_UNWIND (readlink, frame, -1, ENOMEM, NULL, NULL);
+ QUOTA_STACK_UNWIND (readlink, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readlink, loc,
+ size, xdata);
return 0;
}
@@ -2246,7 +2780,8 @@ unwind:
int32_t
quota_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct iatt *buf, struct iobref *iobref)
+ int32_t count, struct iatt *buf, struct iobref *iobref,
+ dict_t *xdata)
{
quota_local_t *local = NULL;
quota_inode_ctx_t *ctx = NULL;
@@ -2261,11 +2796,12 @@ quota_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- quota_inode_ctx_get (local->loc.inode, -1, this, NULL, NULL,
- &ctx, 0);
+ quota_inode_ctx_get (local->loc.inode, this, &ctx, 0);
if (ctx == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "quota context not set in inode (gfid:%s)",
+ gf_log (this->name, GF_LOG_DEBUG, "quota context is NULL on "
+ "inode (%s). "
+ "If quota is not enabled recently and crawler has "
+ "finished crawling, its an error",
uuid_utoa (local->loc.inode->gfid));
goto out;
}
@@ -2278,17 +2814,22 @@ quota_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
out:
QUOTA_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, count,
- buf, iobref);
+ buf, iobref, xdata);
return 0;
}
int32_t
quota_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
+ off_t offset, uint32_t flags, dict_t *xdata)
{
+ quota_priv_t *priv = NULL;
quota_local_t *local = NULL;
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
+
local = quota_local_new ();
if (local == NULL) {
goto unwind;
@@ -2298,12 +2839,20 @@ quota_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
local->loc.inode = inode_ref (fd->inode);
- STACK_WIND (frame, quota_readv_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readv, fd, size, offset);
+ STACK_WIND (frame, quota_readv_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->readv, fd,
+ size, offset, flags, xdata);
return 0;
unwind:
- QUOTA_STACK_UNWIND (readv, frame, -1, ENOMEM, NULL, -1, NULL, NULL);
+ QUOTA_STACK_UNWIND (readv, frame, -1, ENOMEM, NULL, -1, NULL, NULL,
+ NULL);
+ return 0;
+
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd,
+ size, offset, flags, xdata);
return 0;
}
@@ -2311,7 +2860,7 @@ unwind:
int32_t
quota_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+ struct iatt *postbuf, dict_t *xdata)
{
quota_local_t *local = NULL;
quota_inode_ctx_t *ctx = NULL;
@@ -2326,11 +2875,12 @@ quota_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- quota_inode_ctx_get (local->loc.inode, -1, this, NULL, NULL,
- &ctx, 0);
+ quota_inode_ctx_get (local->loc.inode, this, &ctx, 0);
if (ctx == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "quota context not set in inode (gfid:%s)",
+ gf_log (this->name, GF_LOG_DEBUG, "quota context is NULL on "
+ "inode (%s). "
+ "If quota is not enabled recently and crawler has "
+ "finished crawling, its an error",
uuid_utoa (local->loc.inode->gfid));
goto out;
}
@@ -2342,16 +2892,23 @@ quota_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
UNLOCK (&ctx->lock);
out:
- QUOTA_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf);
+ QUOTA_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
return 0;
}
int32_t
-quota_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
+quota_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata)
{
+ quota_priv_t *priv = NULL;
quota_local_t *local = NULL;
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
+
local = quota_local_new ();
if (local == NULL) {
goto unwind;
@@ -2362,20 +2919,26 @@ quota_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
frame->local = local;
STACK_WIND (frame, quota_fsync_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsync, fd, flags);
+ FIRST_CHILD(this)->fops->fsync, fd,
+ flags, xdata);
return 0;
unwind:
- QUOTA_STACK_UNWIND (fsync, frame, -1, ENOMEM, NULL, NULL);
+ QUOTA_STACK_UNWIND (fsync, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync, fd,
+ flags, xdata);
+ return 0;
}
int32_t
quota_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *statpre,
- struct iatt *statpost)
+ struct iatt *statpost, dict_t *xdata)
{
quota_local_t *local = NULL;
quota_inode_ctx_t *ctx = NULL;
@@ -2390,12 +2953,16 @@ quota_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- quota_inode_ctx_get (local->loc.inode, -1, this, NULL, NULL,
- &ctx, 0);
+ quota_inode_ctx_get (local->loc.inode, this, &ctx, 0);
if (ctx == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "quota context not set in inode (gfid:%s)",
- uuid_utoa (local->loc.inode->gfid));
+ if (!IA_ISDIR (statpost->ia_type)) {
+ gf_log (this->name, GF_LOG_DEBUG, "quota context is "
+ "NULL on inode (%s). "
+ "If quota is not enabled recently and crawler "
+ "has finished crawling, its an error",
+ uuid_utoa (local->loc.inode->gfid));
+ }
+
goto out;
}
@@ -2408,18 +2975,23 @@ quota_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
out:
QUOTA_STACK_UNWIND (setattr, frame, op_ret, op_errno, statpre,
- statpost);
+ statpost, xdata);
return 0;
}
int32_t
quota_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid)
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
+ quota_priv_t *priv = NULL;
quota_local_t *local = NULL;
int32_t ret = -1;
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
+
local = quota_local_new ();
if (local == NULL) {
goto unwind;
@@ -2434,11 +3006,18 @@ quota_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
}
STACK_WIND (frame, quota_setattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->setattr, loc, stbuf, valid);
+ FIRST_CHILD (this)->fops->setattr, loc,
+ stbuf, valid, xdata);
return 0;
unwind:
- QUOTA_STACK_UNWIND (setattr, frame, -1, ENOMEM, NULL, NULL);
+ QUOTA_STACK_UNWIND (setattr, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->setattr, loc,
+ stbuf, valid, xdata);
return 0;
}
@@ -2446,7 +3025,7 @@ unwind:
int32_t
quota_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *statpre,
- struct iatt *statpost)
+ struct iatt *statpost, dict_t *xdata)
{
quota_local_t *local = NULL;
quota_inode_ctx_t *ctx = NULL;
@@ -2461,12 +3040,16 @@ quota_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- quota_inode_ctx_get (local->loc.inode, -1, this, NULL, NULL,
- &ctx, 0);
+ quota_inode_ctx_get (local->loc.inode, this, &ctx, 0);
if (ctx == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "quota context not set in inode (gfid:%s)",
- uuid_utoa (local->loc.inode->gfid));
+ if (!IA_ISDIR (statpost->ia_type)) {
+ gf_log (this->name, GF_LOG_DEBUG, "quota context is "
+ "NULL on inode (%s). "
+ "If quota is not enabled recently and crawler "
+ "has finished crawling, its an error",
+ uuid_utoa (local->loc.inode->gfid));
+ }
+
goto out;
}
@@ -2478,17 +3061,22 @@ quota_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
out:
QUOTA_STACK_UNWIND (fsetattr, frame, op_ret, op_errno, statpre,
- statpost);
+ statpost, xdata);
return 0;
}
int32_t
quota_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid)
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
+ quota_priv_t *priv = NULL;
quota_local_t *local = NULL;
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
+
local = quota_local_new ();
if (local == NULL) {
goto unwind;
@@ -2498,12 +3086,19 @@ quota_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
local->loc.inode = inode_ref (fd->inode);
- STACK_WIND (frame, quota_fsetattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fsetattr, fd, stbuf, valid);
+ STACK_WIND (frame, quota_fsetattr_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->fsetattr, fd,
+ stbuf, valid, xdata);
return 0;
unwind:
- QUOTA_STACK_UNWIND (fsetattr, frame, -1, ENOMEM, NULL, NULL);
+ QUOTA_STACK_UNWIND (fsetattr, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fsetattr, fd,
+ stbuf, valid, xdata);
return 0;
}
@@ -2512,7 +3107,7 @@ int32_t
quota_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent)
+ struct iatt *postparent, dict_t *xdata)
{
int32_t ret = -1;
quota_local_t *local = NULL;
@@ -2524,7 +3119,7 @@ quota_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto unwind;
}
- ret = quota_inode_ctx_get (inode, -1, this, NULL, buf, &ctx, 1);
+ ret = quota_inode_ctx_get (inode, this, &ctx, 1);
if ((ret == -1) || (ctx == NULL)) {
gf_log (this->name, GF_LOG_WARNING, "cannot create quota "
"context in inode (gfid:%s)", uuid_utoa (inode->gfid));
@@ -2554,17 +3149,18 @@ unlock:
unwind:
QUOTA_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode,
- buf, preparent, postparent);
+ buf, preparent, postparent, xdata);
return 0;
}
int
quota_mknod_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- mode_t mode, dev_t rdev, dict_t *parms)
+ mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata)
{
quota_local_t *local = NULL;
int32_t op_errno = EINVAL;
+ quota_priv_t *priv = NULL;
local = frame->local;
if (local == NULL) {
@@ -2572,30 +3168,38 @@ quota_mknod_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
goto unwind;
}
+ priv = this->private;
+
if (local->op_ret == -1) {
op_errno = local->op_errno;
goto unwind;
}
- STACK_WIND (frame, quota_mknod_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, parms);
+ STACK_WIND (frame, quota_mknod_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod, loc,
+ mode, rdev, umask, xdata);
return 0;
unwind:
QUOTA_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, NULL,
- NULL, NULL);
+ NULL, NULL, NULL);
return 0;
}
int
quota_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t rdev, dict_t *parms)
+ dev_t rdev, mode_t umask, dict_t *xdata)
{
- int32_t ret = -1;
- quota_local_t *local = NULL;
- call_stub_t *stub = NULL;
+ quota_priv_t *priv = NULL;
+ int32_t ret = -1;
+ quota_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
local = quota_local_new ();
if (local == NULL) {
@@ -2611,136 +3215,861 @@ quota_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
}
stub = fop_mknod_stub (frame, quota_mknod_helper, loc, mode, rdev,
- parms);
+ umask, xdata);
if (stub == NULL) {
goto err;
}
- local->link_count = 1;
- local->stub = stub;
- local->delta = 0;
+ LOCK (&local->lock);
+ {
+ local->link_count = 1;
+ local->stub = stub;
+ local->delta = 0;
+ }
+ UNLOCK (&local->lock);
- quota_check_limit (frame, loc->parent, this, NULL, 0);
+ quota_check_limit (frame, loc->parent, this, NULL, NULL);
+ return 0;
- stub = NULL;
+err:
+ QUOTA_STACK_UNWIND (mknod, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
- LOCK (&local->lock);
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod, loc,
+ mode, rdev, umask, xdata);
+ return 0;
+}
+
+int
+quota_setxattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno, dict_t *xdata)
+{
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ int ret = 0;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ ret = quota_inode_ctx_get (local->loc.inode, this, &ctx, 1);
+ if ((ret < 0) || (ctx == NULL)) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ LOCK (&ctx->lock);
{
- local->link_count = 0;
- if (local->validate_count == 0) {
- stub = local->stub;
- local->stub = NULL;
+ ctx->hard_lim = local->limit.hard_lim;
+ ctx->soft_lim = local->limit.soft_lim_percent;
+ }
+ UNLOCK (&ctx->lock);
+
+out:
+ QUOTA_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+quota_setxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *dict, int flags, dict_t *xdata)
+{
+ quota_priv_t *priv = NULL;
+ int op_errno = EINVAL;
+ int op_ret = -1;
+ int64_t hard_lim = -1, soft_lim = -1;
+ quota_local_t *local = NULL;
+
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+
+ if (frame->root->pid >= 0) {
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.quota*", dict,
+ op_errno, err);
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.pgfid*", dict, op_errno,
+ err);
+ }
+
+ quota_get_limits (this, dict, &hard_lim, &soft_lim);
+
+ if (hard_lim > 0) {
+ local = quota_local_new ();
+ if (local == NULL) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ frame->local = local;
+ loc_copy (&local->loc, loc);
+
+ local->limit.hard_lim = hard_lim;
+ local->limit.soft_lim_percent = soft_lim;
+ }
+
+ STACK_WIND (frame, quota_setxattr_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr, loc,
+ dict, flags, xdata);
+ return 0;
+err:
+ QUOTA_STACK_UNWIND (setxattr, frame, op_ret, op_errno, NULL);
+ return 0;
+
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, loc,
+ dict, flags, xdata);
+ return 0;
+}
+
+int
+quota_fsetxattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno, dict_t *xdata)
+{
+ quota_inode_ctx_t *ctx = NULL;
+ quota_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ op_ret = quota_inode_ctx_get (local->loc.inode, this, &ctx, 1);
+ if ((op_ret < 0) || (ctx == NULL)) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ LOCK (&ctx->lock);
+ {
+ ctx->hard_lim = local->limit.hard_lim;
+ ctx->soft_lim = local->limit.soft_lim_percent;
+ }
+ UNLOCK (&ctx->lock);
+
+out:
+ QUOTA_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+quota_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *dict, int flags, dict_t *xdata)
+{
+ quota_priv_t *priv = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ quota_local_t *local = NULL;
+ int64_t hard_lim = -1, soft_lim = -1;
+
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ if (0 <= frame->root->pid) {
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.quota*",
+ dict, op_errno, err);
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.pgfid*", dict,
+ op_errno, err);
+ }
+
+ quota_get_limits (this, dict, &hard_lim, &soft_lim);
+
+ if (hard_lim > 0) {
+ local = quota_local_new ();
+ if (local == NULL) {
+ op_errno = ENOMEM;
+ goto err;
}
+ frame->local = local;
+ local->loc.inode = inode_ref (fd->inode);
+
+ local->limit.hard_lim = hard_lim;
+ local->limit.soft_lim_percent = soft_lim;
}
- UNLOCK (&local->lock);
- if (stub != NULL) {
- call_resume (stub);
+ STACK_WIND (frame, quota_fsetxattr_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsetxattr, fd,
+ dict, flags, xdata);
+ return 0;
+err:
+ QUOTA_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, NULL);
+ return 0;
+
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd,
+ dict, flags, xdata);
+ return 0;
+}
+
+
+int
+quota_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ QUOTA_STACK_UNWIND (removexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+quota_removexattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ quota_priv_t *priv = NULL;
+ int32_t op_errno = EINVAL;
+
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
+
+ VALIDATE_OR_GOTO (this, err);
+
+ /* all quota xattrs can be cleaned up by doing setxattr on special key.
+ * Hence its ok that we don't allow removexattr on quota keys here.
+ */
+ if (frame->root->pid >= 0) {
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.quota*",
+ name, op_errno, err);
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.pgfid*", name,
+ op_errno, err);
}
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (loc, err);
+
+ STACK_WIND (frame, quota_removexattr_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->removexattr,
+ loc, name, xdata);
+ return 0;
+
+err:
+ QUOTA_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
+ return 0;
+
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr,
+ loc, name, xdata);
+ return 0;
+}
+
+
+int
+quota_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ QUOTA_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+quota_fremovexattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
+{
+ quota_priv_t *priv = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ if (frame->root->pid >= 0) {
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.quota*",
+ name, op_errno, err);
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.pgfid*", name,
+ op_errno, err);
+ }
+ STACK_WIND (frame, quota_fremovexattr_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->fremovexattr,
+ fd, name, xdata);
return 0;
err:
- QUOTA_STACK_UNWIND (mknod, frame, -1, ENOMEM, NULL, NULL, NULL, NULL);
+ QUOTA_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, NULL);
+ return 0;
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr,
+ fd, name, xdata);
return 0;
}
int32_t
quota_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *buf)
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf,
+ dict_t *xdata)
{
- inode_t *root_inode = NULL;
- quota_priv_t *priv = NULL;
- uint64_t value = 0;
- quota_inode_ctx_t *ctx = NULL;
- limits_t *limit_node = NULL;
- int64_t usage = -1;
- int64_t avail = -1;
+ inode_t *inode = NULL;
+ uint64_t value = 0;
+ int64_t usage = -1;
+ int64_t avail = -1;
+ int64_t blocks = 0;
+ quota_inode_ctx_t *ctx = NULL;
+ int ret = 0;
+ gf_boolean_t dict_created = _gf_false;
+
+ inode = cookie;
- root_inode = cookie;
+ /* This fop will fail mostly in case of client disconnect's,
+ * which is already logged. Hence, not logging here */
+ if (op_ret == -1)
+ goto unwind;
/*
* We should never get here unless quota_statfs (below) sent us a
* cookie, and it would only do so if the value was non-NULL. This
* check is therefore just routine defensive coding.
*/
- if (!root_inode) {
+ if (!inode) {
gf_log(this->name,GF_LOG_WARNING,
"null inode, cannot adjust for quota");
goto unwind;
}
- if (!root_inode->table || (root_inode != root_inode->table->root)) {
- gf_log(this->name,GF_LOG_WARNING,
- "non-root inode, cannot adjust for quota");
- goto unwind;
- }
- inode_ctx_get (root_inode, this, &value);
+ inode_ctx_get (inode, this, &value);
if (!value) {
goto unwind;
}
+
+ /* if limit is set on this inode, report statfs based on this inode
+ * else report based on root.
+ */
+ ctx = (quota_inode_ctx_t *)(unsigned long)value;
+ if (ctx->hard_lim <= 0) {
+ inode_ctx_get (inode->table->root, this, &value);
+ ctx = (quota_inode_ctx_t *)(unsigned long) value;
+ if (!ctx || ctx->hard_lim < 0)
+ goto unwind;
+ }
+
+ { /* statfs is adjusted in this code block */
+ usage = (ctx->size) / buf->f_bsize;
+
+ blocks = ctx->hard_lim / buf->f_bsize;
+ buf->f_blocks = blocks;
+
+ avail = buf->f_blocks - usage;
+ avail = max (avail, 0);
+
+ buf->f_bfree = avail;
+ /*
+ * We have to assume that the total assigned quota
+ * won't cause us to dip into the reserved space,
+ * because dealing with the overcommitted cases is
+ * just too hairy (especially when different bricks
+ * might be using different reserved percentages and
+ * such).
+ */
+ buf->f_bavail = buf->f_bfree;
+ }
+
+ if (!xdata) {
+ xdata = dict_new ();
+ if (!xdata)
+ goto unwind;
+ dict_created = _gf_true;
+ }
+
+ ret = dict_set_int8 (xdata, "quota-deem-statfs", 1);
+ if (-1 == ret)
+ gf_log (this->name, GF_LOG_ERROR, "Dict set failed, "
+ "deem-statfs option may have no effect");
+
+unwind:
+ QUOTA_STACK_UNWIND (statfs, frame, op_ret, op_errno, buf, xdata);
+
+ if (dict_created)
+ dict_unref (xdata);
+ return 0;
+}
+
+
+int32_t
+quota_statfs_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ quota_local_t *local = NULL;
+ int op_errno = EINVAL;
+
+ GF_VALIDATE_OR_GOTO ("quota", (local = frame->local), err);
+
+ if (-1 == local->op_ret) {
+ op_errno = local->op_errno;
+ goto err;
+ }
+
+ STACK_WIND_COOKIE (frame, quota_statfs_cbk, loc->inode,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->statfs, loc, xdata);
+ return 0;
+err:
+ QUOTA_STACK_UNWIND (statfs, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+int32_t
+quota_statfs_validate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ quota_local_t *local = NULL;
+ int32_t ret = 0;
+ quota_inode_ctx_t *ctx = NULL;
+ int64_t *size = 0;
+ uint64_t value = 0;
+
+ local = frame->local;
+
+ if (op_ret < 0)
+ goto resume;
+
+ GF_ASSERT (local);
+ GF_ASSERT (frame);
+ GF_VALIDATE_OR_GOTO_WITH_ERROR ("quota", this, resume, op_errno,
+ EINVAL);
+ GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, xdata, resume, op_errno,
+ EINVAL);
+
+ ret = inode_ctx_get (local->validate_loc.inode, this, &value);
+
ctx = (quota_inode_ctx_t *)(unsigned long)value;
- usage = (ctx->size) / buf->f_bsize;
+ if ((ret == -1) || (ctx == NULL)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "quota context is not present in inode (gfid:%s)",
+ uuid_utoa (local->validate_loc.inode->gfid));
+ op_errno = EINVAL;
+ goto resume;
+ }
+
+ ret = dict_get_bin (xdata, QUOTA_SIZE_KEY, (void **) &size);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "size key not present in dict");
+ op_errno = EINVAL;
+ goto resume;
+ }
+
+ LOCK (&ctx->lock);
+ {
+ ctx->size = ntoh64 (*size);
+ gettimeofday (&ctx->tv, NULL);
+ }
+ UNLOCK (&ctx->lock);
+
+resume:
+ quota_link_count_decrement (local);
+ return 0;
+}
+
+int32_t
+quota_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ quota_local_t *local = NULL;
+ int op_errno = 0;
+ call_stub_t *stub = NULL;
+ quota_priv_t *priv = NULL;
+ int ret = 0;
+
priv = this->private;
- list_for_each_entry (limit_node, &priv->limit_head, limit_list) {
- /* Notice that this only works for volume-level quota. */
- if (strcmp (limit_node->path, "/") == 0) {
- buf->f_blocks = limit_node->value / buf->f_bsize;
- avail = buf->f_blocks - usage;
- if (buf->f_bfree > avail) {
- buf->f_bfree = avail;
- }
- /*
- * We have to assume that the total assigned quota
- * won't cause us to dip into the reserved space,
- * because dealing with the overcommitted cases is
- * just too hairy (especially when different bricks
- * might be using different reserved percentages and
- * such).
- */
- buf->f_bavail = buf->f_bfree;
- break;
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
+
+ if (priv->consider_statfs && loc->inode) {
+ local = quota_local_new ();
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ frame->local = local;
+
+ stub = fop_statfs_stub (frame, quota_statfs_helper, loc, xdata);
+ if (!stub) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ LOCK (&local->lock);
+ {
+ local->inode = inode_ref (loc->inode);
+ local->link_count = 1;
+ local->stub = stub;
+ }
+ UNLOCK (&local->lock);
+
+ ret = quota_validate (frame, local->inode, this,
+ quota_statfs_validate_cbk);
+ if (0 > ret) {
+ quota_handle_validate_error (local, -1, -ret);
}
+
+ return 0;
+ }
+
+ /*
+ * We have to make sure that we never get to quota_statfs_cbk
+ * with a cookie that points to something other than an inode,
+ * which is exactly what would happen with STACK_UNWIND using
+ * that as a callback. Therefore, use default_statfs_cbk in
+ * this case instead.
+ *
+ * Also if the option deem-statfs is not set to "on" don't
+ * bother calculating quota limit on / in statfs_cbk.
+ */
+ if (priv->consider_statfs)
+ gf_log (this->name,GF_LOG_WARNING,
+ "missing inode, cannot adjust for quota");
+
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->statfs, loc, xdata);
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT (statfs, frame, -1, op_errno, NULL, NULL);
+
+ if (local)
+ quota_local_cleanup (this, local);
+ return 0;
+}
+
+int
+quota_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ gf_dirent_t *entry = NULL;
+ quota_local_t *local = NULL;
+ loc_t loc = {0, };
+
+ if (op_ret <= 0)
+ goto unwind;
+
+ local = frame->local;
+
+ list_for_each_entry (entry, &entries->list, list) {
+ if ((strcmp (entry->d_name, ".") == 0)
+ || (strcmp (entry->d_name, "..") == 0))
+ continue;
+
+ uuid_copy (loc.gfid, entry->d_stat.ia_gfid);
+ loc.inode = inode_ref (entry->inode);
+ loc.parent = inode_ref (local->loc.inode);
+ uuid_copy (loc.pargfid, loc.parent->gfid);
+ loc.name = entry->d_name;
+
+ quota_fill_inodectx (this, entry->inode, entry->dict,
+ &loc, &entry->d_stat, &op_errno);
+
+ loc_wipe (&loc);
}
unwind:
- if (root_inode) {
- inode_unref(root_inode);
- }
- STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf);
+ QUOTA_STACK_UNWIND (readdirp, frame, op_ret, op_errno, entries, xdata);
+
return 0;
}
+int
+quota_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *dict)
+{
+ quota_priv_t *priv = NULL;
+ int ret = 0;
+ gf_boolean_t new_dict = _gf_false;
+ quota_local_t *local = NULL;
+
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
+
+ local = quota_local_new ();
+
+ if (local == NULL) {
+ goto err;
+ }
+
+ frame->local = local;
+
+ local->loc.inode = inode_ref (fd->inode);
+
+ if (dict == NULL) {
+ dict = dict_new ();
+ new_dict = _gf_true;
+ }
+
+ if (dict) {
+ ret = dict_set_int8 (dict, QUOTA_LIMIT_KEY, 1);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dict set of key for hard-limit failed");
+ goto err;
+ }
+ }
+
+ STACK_WIND (frame, quota_readdirp_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp, fd,
+ size, offset, dict);
+
+ if (new_dict) {
+ dict_unref (dict);
+ }
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (readdirp, frame, -1, EINVAL, NULL, NULL);
+
+ if (new_dict) {
+ dict_unref (dict);
+ }
+
+ return 0;
+
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd,
+ size, offset, dict);
+ return 0;
+}
int32_t
-quota_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
+quota_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- inode_t *root_inode = NULL;
+ int32_t ret = 0;
+ uint64_t ctx_int = 0;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_local_t *local = NULL;
- if (loc->inode) {
- root_inode = loc->inode->table->root;
- inode_ref(root_inode);
- STACK_WIND_COOKIE (frame, quota_statfs_cbk, root_inode,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->statfs, loc);
- }
- else {
- /*
- * We have to make sure that we never get to quota_statfs_cbk
- * with a cookie that points to something other than an inode,
- * which is exactly what would happen with STACK_UNWIND using
- * that as a callback. Therefore, use default_statfs_cbk in
- * this case instead.
- */
- gf_log(this->name,GF_LOG_WARNING,
- "missing inode, cannot adjust for quota");
- STACK_WIND (frame, default_statfs_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->statfs, loc);
- }
+ local = frame->local;
+
+ if ((op_ret < 0) || (local == NULL)) {
+ goto out;
+ }
+
+ ret = inode_ctx_get (local->loc.inode, this, &ctx_int);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to get the context", local->loc.path);
+ goto out;
+ }
+
+ ctx = (quota_inode_ctx_t *)(unsigned long) ctx_int;
+
+ if (ctx == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "quota context not set in %s (gfid:%s)",
+ local->loc.path, uuid_utoa (local->loc.inode->gfid));
+ goto out;
+ }
+
+ LOCK (&ctx->lock);
+ {
+ ctx->buf = *postbuf;
+ }
+ UNLOCK (&ctx->lock);
+
+out:
+ QUOTA_STACK_UNWIND (fallocate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+
+ return 0;
+}
+
+
+int32_t
+quota_fallocate_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t mode, off_t offset, size_t len, dict_t *xdata)
+{
+ quota_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+ quota_priv_t *priv = NULL;
+
+ local = frame->local;
+ if (local == NULL) {
+ gf_log (this->name, GF_LOG_WARNING, "local is NULL");
+ goto unwind;
+ }
+
+ priv = this->private;
+
+ if (local->op_ret == -1) {
+ op_errno = local->op_errno;
+ goto unwind;
+ }
+
+ STACK_WIND (frame, quota_fallocate_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len,
+ xdata);
+ return 0;
+
+unwind:
+ QUOTA_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+
+int32_t
+quota_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ int32_t ret = -1, op_errno = EINVAL;
+ int32_t parents = 0;
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_priv_t *priv = NULL;
+ quota_dentry_t *dentry = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, priv, unwind);
+
+ WIND_IF_QUOTAOFF (priv->is_quota_on, off);
+
+ GF_ASSERT (frame);
+ GF_VALIDATE_OR_GOTO ("quota", this, unwind);
+ GF_VALIDATE_OR_GOTO (this->name, fd, unwind);
+
+ local = quota_local_new ();
+ if (local == NULL) {
+ goto unwind;
+ }
+
+ frame->local = local;
+ local->loc.inode = inode_ref (fd->inode);
+
+ ret = quota_inode_ctx_get (fd->inode, this, &ctx, 0);
+ if (ctx == NULL) {
+ gf_log (this->name, GF_LOG_DEBUG, "quota context is "
+ "NULL on inode (%s). "
+ "If quota is not enabled recently and crawler "
+ "has finished crawling, its an error",
+ uuid_utoa (local->loc.inode->gfid));
+ }
+
+ stub = fop_fallocate_stub(frame, quota_fallocate_helper, fd, mode,
+ offset, len, xdata);
+ if (stub == NULL) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, priv, unwind);
+
+ if (ctx != NULL) {
+ LOCK (&ctx->lock);
+ {
+ list_for_each_entry (dentry, &ctx->parents, next) {
+ parents++;
+ }
+ }
+ UNLOCK (&ctx->lock);
+ }
+
+ /*
+ * Note that by using len as the delta we're assuming the range from
+ * offset to offset+len has not already been allocated. This can result
+ * in ENOSPC errors attempting to allocate an already allocated range.
+ */
+ local->delta = len;
+ local->stub = stub;
+ local->link_count = parents;
+
+ if (parents == 0) {
+ local->link_count = 1;
+ quota_check_limit (frame, fd->inode, this, NULL, NULL);
+ } else {
+ list_for_each_entry (dentry, &ctx->parents, next) {
+ quota_check_limit (frame, fd->inode, this, dentry->name,
+ dentry->par);
+ }
+ }
+
+ return 0;
+
+unwind:
+ QUOTA_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+
+off:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, fd, mode, offset,
+ len, xdata);
return 0;
}
+/* Logs if
+* i. Usage crossed soft limit
+* ii. Usage above soft limit and alert-time elapsed
+*/
+void
+quota_log_usage (xlator_t *this, quota_inode_ctx_t *ctx, inode_t *inode,
+ int64_t delta)
+{
+ struct timeval cur_time = {0,};
+ char *usage_str = NULL;
+ char size_str[32] = {0};
+ char *path = NULL;
+ int64_t cur_size = 0;
+ quota_priv_t *priv = NULL;
+ gf_boolean_t dyn_mem = _gf_true;
+
+ priv = this->private;
+ if ((ctx->soft_lim <= 0) || (timerisset (&ctx->prev_log) &&
+ !quota_timeout (&ctx->prev_log,
+ priv->log_timeout))) {
+ return;
+ }
+
+
+ cur_size = ctx->size + delta;
+ usage_str = gf_uint64_2human_readable (cur_size);
+ if (!usage_str) {
+ snprintf (size_str, sizeof (size_str), "%"PRId64, cur_size);
+ usage_str = (char*) size_str;
+ dyn_mem = _gf_false;
+ }
+ inode_path (inode, NULL, &path);
+ if (!path)
+ path = uuid_utoa (inode->gfid);
+
+ gettimeofday (&cur_time, NULL);
+ /* Usage crossed/reached soft limit */
+ if (DID_REACH_LIMIT (ctx->soft_lim, ctx->size, cur_size)) {
+
+ gf_log (this->name, GF_LOG_ALERT, "Usage crossed "
+ "soft limit: %s used by %s", usage_str, path);
+ ctx->prev_log = cur_time;
+ }
+ /* Usage is above soft limit */
+ else if (cur_size > ctx->soft_lim){
+ gf_log (this->name, GF_LOG_ALERT, "Usage is above "
+ "soft limit: %s used by %s", usage_str, path);
+ ctx->prev_log = cur_time;
+ }
+
+ if (dyn_mem)
+ GF_FREE (usage_str);
+}
int32_t
mem_acct_init (xlator_t *this)
@@ -2793,59 +4122,6 @@ quota_forget (xlator_t *this, inode_t *inode)
return 0;
}
-
-int
-quota_parse_limits (quota_priv_t *priv, xlator_t *this, dict_t *xl_options)
-{
- int32_t ret = -1;
- char *str = NULL;
- char *str_val = NULL;
- char *path = NULL;
- uint64_t value = 0;
- limits_t *quota_lim = NULL;
-
- ret = dict_get_str (xl_options, "limit-set", &str);
-
- if (str) {
- path = strtok (str, ":");
-
- while (path) {
- str_val = strtok (NULL, ",");
-
- ret = gf_string2bytesize (str_val, &value);
- if (ret != 0)
- goto err;
-
- QUOTA_ALLOC_OR_GOTO (quota_lim, limits_t, err);
-
- quota_lim->path = path;
-
- quota_lim->value = value;
-
- gf_log (this->name, GF_LOG_INFO, "%s:%"PRId64,
- quota_lim->path, quota_lim->value);
-
- list_add_tail (&quota_lim->limit_list,
- &priv->limit_head);
-
- path = strtok (NULL, ":");
- }
- } else {
- gf_log (this->name, GF_LOG_INFO,
- "no \"limit-set\" option provided");
- }
-
- list_for_each_entry (quota_lim, &priv->limit_head, limit_list) {
- gf_log (this->name, GF_LOG_INFO, "%s:%"PRId64, quota_lim->path,
- quota_lim->value);
- }
-
- ret = 0;
-err:
- return ret;
-}
-
-
int32_t
init (xlator_t *this)
{
@@ -2867,55 +4143,123 @@ init (xlator_t *this)
QUOTA_ALLOC_OR_GOTO (priv, quota_priv_t, err);
- INIT_LIST_HEAD (&priv->limit_head);
+ LOCK_INIT (&priv->lock);
this->private = priv;
- ret = quota_parse_limits (priv, this, this->options);
-
- if (ret) {
+ GF_OPTION_INIT ("deem-statfs", priv->consider_statfs, bool, err);
+ GF_OPTION_INIT ("server-quota", priv->is_quota_on, bool, err);
+ GF_OPTION_INIT ("default-soft-limit", priv->default_soft_lim, percent,
+ err);
+ GF_OPTION_INIT ("soft-timeout", priv->soft_timeout, time, err);
+ GF_OPTION_INIT ("hard-timeout", priv->hard_timeout, time, err);
+ GF_OPTION_INIT ("alert-time", priv->log_timeout, time, err);
+ GF_OPTION_INIT ("volume-uuid", priv->volume_uuid, str, err);
+
+ this->local_pool = mem_pool_new (quota_local_t, 64);
+ if (!this->local_pool) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
goto err;
}
- GF_OPTION_INIT ("timeout", priv->timeout, int64, err);
+ if (priv->is_quota_on) {
+ priv->rpc_clnt = quota_enforcer_init (this, this->options);
+ if (priv->rpc_clnt == NULL) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_WARNING,
+ "quota enforcer rpc init failed");
+ goto err;
+ }
+ }
ret = 0;
err:
return ret;
}
-
int
reconfigure (xlator_t *this, dict_t *options)
{
- int32_t ret = -1;
- quota_priv_t *priv = NULL;
- limits_t *limit = NULL;
- limits_t *next = NULL;
+ int32_t ret = -1;
+ quota_priv_t *priv = NULL;
+ gf_boolean_t quota_on = _gf_false;
priv = this->private;
- list_for_each_entry_safe (limit, next, &priv->limit_head, limit_list) {
- list_del (&limit->limit_list);
-
- GF_FREE (limit);
- }
+ GF_OPTION_RECONF ("deem-statfs", priv->consider_statfs, options, bool,
+ out);
+ GF_OPTION_RECONF ("server-quota", quota_on, options, bool,
+ out);
+ GF_OPTION_RECONF ("default-soft-limit", priv->default_soft_lim,
+ options, percent, out);
+ GF_OPTION_RECONF ("alert-time", priv->log_timeout, options,
+ time, out);
+ GF_OPTION_RECONF ("soft-timeout", priv->soft_timeout, options,
+ time, out);
+ GF_OPTION_RECONF ("hard-timeout", priv->hard_timeout, options,
+ time, out);
+
+ if (quota_on) {
+ priv->rpc_clnt = quota_enforcer_init (this,
+ this->options);
+ if (priv->rpc_clnt == NULL) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_WARNING,
+ "quota enforcer rpc init failed");
+ goto out;
+ }
- ret = quota_parse_limits (priv, this, options);
- if (ret == -1) {
- gf_log ("quota", GF_LOG_WARNING,
- "quota reconfigure failed, "
- "new changes will not take effect");
- goto out;
+ } else {
+ if (priv->rpc_clnt) {
+ // Quotad is shutdown when there is no started volume
+ // which has quota enabled. So, we should disable the
+ // enforcer client when quota is disabled on a volume,
+ // to avoid spurious reconnect attempts to a service
+ // (quotad), that is known to be down.
+ rpc_clnt_disable (priv->rpc_clnt);
+ }
}
- GF_OPTION_RECONF ("timeout", priv->timeout, options, int64, out);
+ priv->is_quota_on = quota_on;
ret = 0;
out:
return ret;
}
+int32_t
+quota_priv_dump (xlator_t *this)
+{
+ quota_priv_t *priv = NULL;
+ int32_t ret = -1;
+
+
+ GF_ASSERT (this);
+
+ priv = this->private;
+
+ gf_proc_dump_add_section ("xlators.features.quota.priv", this->name);
+
+ ret = TRY_LOCK (&priv->lock);
+ if (ret)
+ goto out;
+ else {
+ gf_proc_dump_write("soft-timeout", "%d", priv->soft_timeout);
+ gf_proc_dump_write("hard-timeout", "%d", priv->hard_timeout);
+ gf_proc_dump_write("alert-time", "%d", priv->log_timeout);
+ gf_proc_dump_write("quota-on", "%d", priv->is_quota_on);
+ gf_proc_dump_write("statfs", "%d", priv->consider_statfs);
+ gf_proc_dump_write("volume-uuid", "%s", priv->volume_uuid);
+ gf_proc_dump_write("validation-count", "%ld",
+ priv->validation_count);
+ }
+ UNLOCK (&priv->lock);
+
+out:
+ return 0;
+}
void
fini (xlator_t *this)
@@ -2925,40 +4269,105 @@ fini (xlator_t *this)
struct xlator_fops fops = {
- .statfs = quota_statfs,
- .lookup = quota_lookup,
- .writev = quota_writev,
- .create = quota_create,
- .mkdir = quota_mkdir,
- .truncate = quota_truncate,
- .ftruncate = quota_ftruncate,
- .unlink = quota_unlink,
- .symlink = quota_symlink,
- .link = quota_link,
- .rename = quota_rename,
- .getxattr = quota_getxattr,
- .fgetxattr = quota_fgetxattr,
- .stat = quota_stat,
- .fstat = quota_fstat,
- .readlink = quota_readlink,
- .readv = quota_readv,
- .fsync = quota_fsync,
- .setattr = quota_setattr,
- .fsetattr = quota_fsetattr,
- .mknod = quota_mknod
+ .statfs = quota_statfs,
+ .lookup = quota_lookup,
+ .writev = quota_writev,
+ .create = quota_create,
+ .mkdir = quota_mkdir,
+ .truncate = quota_truncate,
+ .ftruncate = quota_ftruncate,
+ .unlink = quota_unlink,
+ .symlink = quota_symlink,
+ .link = quota_link,
+ .rename = quota_rename,
+ .getxattr = quota_getxattr,
+ .fgetxattr = quota_fgetxattr,
+ .stat = quota_stat,
+ .fstat = quota_fstat,
+ .readlink = quota_readlink,
+ .readv = quota_readv,
+ .fsync = quota_fsync,
+ .setattr = quota_setattr,
+ .fsetattr = quota_fsetattr,
+ .mknod = quota_mknod,
+ .setxattr = quota_setxattr,
+ .fsetxattr = quota_fsetxattr,
+ .removexattr = quota_removexattr,
+ .fremovexattr = quota_fremovexattr,
+ .readdirp = quota_readdirp,
+ .fallocate = quota_fallocate,
};
struct xlator_cbks cbks = {
.forget = quota_forget
};
+struct xlator_dumpops dumpops = {
+ .priv = quota_priv_dump,
+};
struct volume_options options[] = {
{.key = {"limit-set"}},
- {.key = {"timeout"},
- .type = GF_OPTION_TYPE_SIZET,
- .default_value = "0",
- .description = "quota caches the directory sizes on client. Timeout "
- "indicates the timeout for the cache to be revalidated."
+ {.key = {"deem-statfs"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "If set to on, it takes quota limits into"
+ "consideration while estimating fs size. (df command)"
+ " (Default is off)."
+ },
+ {.key = {"server-quota"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Skip the quota enforcement if the feature is"
+ " not turned on. This is not a user exposed option."
+ },
+ {.key = {"default-soft-limit"},
+ .type = GF_OPTION_TYPE_PERCENT,
+ .default_value = "80%",
+ },
+ {.key = {"soft-timeout"},
+ .type = GF_OPTION_TYPE_TIME,
+ .min = 0,
+ .max = 1800,
+ .default_value = "60",
+ .description = "quota caches the directory sizes on client. "
+ "soft-timeout indicates the timeout for the validity of"
+ " cache before soft-limit has been crossed."
+ },
+ {.key = {"hard-timeout"},
+ .type = GF_OPTION_TYPE_TIME,
+ .min = 0,
+ .max = 60,
+ .default_value = "5",
+ .description = "quota caches the directory sizes on client. "
+ "hard-timeout indicates the timeout for the validity of"
+ " cache after soft-limit has been crossed."
+ },
+ { .key = {"username"},
+ .type = GF_OPTION_TYPE_ANY,
+ },
+ { .key = {"password"},
+ .type = GF_OPTION_TYPE_ANY,
+ },
+ { .key = {"transport-type"},
+ .value = {"tcp", "socket", "ib-verbs", "unix", "ib-sdp",
+ "tcp/client", "ib-verbs/client", "rdma"},
+ .type = GF_OPTION_TYPE_STR,
+ },
+ { .key = {"remote-host"},
+ .type = GF_OPTION_TYPE_INTERNET_ADDRESS,
+ },
+ { .key = {"remote-port"},
+ .type = GF_OPTION_TYPE_INT,
+ },
+ { .key = {"volume-uuid"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "uuid of the volume this brick is part of."
+ },
+ { .key = {"alert-time"},
+ .type = GF_OPTION_TYPE_TIME,
+ .min = 0,
+ .max = 7*86400,
+ .default_value = "86400",
},
{.key = {NULL}}
};
diff --git a/xlators/features/quota/src/quota.h b/xlators/features/quota/src/quota.h
index 9b94d5920..84c3257fe 100644
--- a/xlators/features/quota/src/quota.h
+++ b/xlators/features/quota/src/quota.h
@@ -1,50 +1,58 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
#endif
+#ifndef _QUOTA_H
+#define _QUOTA_H
+
#include "xlator.h"
#include "call-stub.h"
#include "defaults.h"
-#include "byte-order.h"
#include "common-utils.h"
#include "quota-mem-types.h"
+#include "glusterfs.h"
+#include "compat.h"
+#include "logging.h"
+#include "dict.h"
+#include "stack.h"
+#include "common-utils.h"
+#include "event.h"
+#include "globals.h"
+#include "rpcsvc.h"
+#include "rpc-clnt.h"
+#include "byte-order.h"
+#include "glusterfs3-xdr.h"
+#include "glusterfs3.h"
+#include "xdr-generic.h"
+#include "compat-errno.h"
+#include "protocol-common.h"
-#define QUOTA_XATTR_PREFIX "trusted."
#define DIRTY "dirty"
#define SIZE "size"
#define CONTRIBUTION "contri"
#define VAL_LENGTH 8
#define READDIR_BUF 4096
-#define QUOTA_STACK_DESTROY(_frame, _this) \
- do { \
- quota_local_t *_local = NULL; \
- _local = _frame->local; \
- _frame->local = NULL; \
- STACK_DESTROY (_frame->root); \
- quota_local_cleanup (_this, _local); \
- GF_FREE (_local); \
- } while (0)
+#ifndef UUID_CANONICAL_FORM_LEN
+#define UUID_CANONICAL_FORM_LEN 36
+#endif
+
+#define WIND_IF_QUOTAOFF(is_quota_on, label) \
+ if (!is_quota_on) \
+ goto label;
+
+#define DID_REACH_LIMIT(lim, prev_size, cur_size) \
+ ((cur_size) >= (lim) && (prev_size) < (lim))
#define QUOTA_SAFE_INCREMENT(lock, var) \
do { \
@@ -60,19 +68,13 @@
UNLOCK (lock); \
} while (0)
-#define QUOTA_LOCAL_ALLOC_OR_GOTO(local, type, label) \
- do { \
- QUOTA_ALLOC_OR_GOTO (local, type, label); \
- LOCK_INIT (&local->lock); \
- } while (0)
-
#define QUOTA_ALLOC_OR_GOTO(var, type, label) \
do { \
var = GF_CALLOC (sizeof (type), 1, \
gf_quota_mt_##type); \
if (!var) { \
gf_log ("", GF_LOG_ERROR, \
- "out of memory :("); \
+ "out of memory"); \
ret = -1; \
goto label; \
} \
@@ -89,7 +91,6 @@
} \
STACK_UNWIND_STRICT (fop, frame, params); \
quota_local_cleanup (_this, _local); \
- GF_FREE (_local); \
} while (0)
#define QUOTA_FREE_CONTRIBUTION_NODE(_contribution) \
@@ -101,11 +102,17 @@
#define GET_CONTRI_KEY(var, _vol_name, _gfid, _ret) \
do { \
char _gfid_unparsed[40]; \
- uuid_unparse (_gfid, _gfid_unparsed); \
- _ret = gf_asprintf (var, QUOTA_XATTR_PREFIX \
- "%s.%s." CONTRIBUTION, \
- _vol_name, _gfid_unparsed); \
- } while (0)
+ if (_gfid != NULL) { \
+ uuid_unparse (_gfid, _gfid_unparsed); \
+ _ret = gf_asprintf (var, QUOTA_XATTR_PREFIX \
+ "%s.%s." CONTRIBUTION, \
+ _vol_name, _gfid_unparsed); \
+ } else { \
+ _ret = gf_asprintf (var, QUOTA_XATTR_PREFIX \
+ "%s.." CONTRIBUTION, \
+ _vol_name); \
+ } \
+ } while (0)
#define GET_CONTRI_KEY_OR_GOTO(var, _vol_name, _gfid, label) \
@@ -123,6 +130,11 @@
goto label; \
} while (0)
+#define QUOTA_REG_OR_LNK_FILE(ia_type) \
+ (IA_ISREG (ia_type) || IA_ISLNK (ia_type))
+
+
+
struct quota_dentry {
char *name;
uuid_t par;
@@ -132,44 +144,77 @@ typedef struct quota_dentry quota_dentry_t;
struct quota_inode_ctx {
int64_t size;
- int64_t limit;
+ int64_t hard_lim;
+ int64_t soft_lim;
struct iatt buf;
struct list_head parents;
struct timeval tv;
+ struct timeval prev_log;
gf_lock_t lock;
};
typedef struct quota_inode_ctx quota_inode_ctx_t;
+struct quota_limit {
+ int64_t hard_lim;
+ int64_t soft_lim_percent;
+} __attribute__ ((packed));
+typedef struct quota_limit quota_limit_t;
+
+typedef void
+(*quota_ancestry_built_t) (struct list_head *parents, inode_t *inode,
+ int32_t op_ret, int32_t op_errno, void *data);
+
struct quota_local {
- gf_lock_t lock;
- uint32_t validate_count;
- uint32_t link_count;
- loc_t loc;
- loc_t oldloc;
- loc_t newloc;
- loc_t validate_loc;
- int64_t delta;
- int32_t op_ret;
- int32_t op_errno;
- int64_t size;
- int64_t limit;
- char just_validated;
- inode_t *inode;
- call_stub_t *stub;
+ gf_lock_t lock;
+ uint32_t validate_count;
+ uint32_t link_count;
+ loc_t loc;
+ loc_t oldloc;
+ loc_t newloc;
+ loc_t validate_loc;
+ int64_t delta;
+ int32_t op_ret;
+ int32_t op_errno;
+ int64_t size;
+ gf_boolean_t skip_check;
+ char just_validated;
+ fop_lookup_cbk_t validate_cbk;
+ inode_t *inode;
+ call_stub_t *stub;
+ struct iobref *iobref;
+ quota_limit_t limit;
+ int64_t space_available;
+ quota_ancestry_built_t ancestry_cbk;
+ void *ancestry_data;
};
-typedef struct quota_local quota_local_t;
+typedef struct quota_local quota_local_t;
struct quota_priv {
- int64_t timeout;
- struct list_head limit_head;
+ uint32_t soft_timeout;
+ uint32_t hard_timeout;
+ uint32_t log_timeout;
+ double default_soft_lim;
+ gf_boolean_t is_quota_on;
+ gf_boolean_t consider_statfs;
+ gf_lock_t lock;
+ rpc_clnt_prog_t *quota_enforcer;
+ struct rpcsvc_program *quotad_aggregator;
+ struct rpc_clnt *rpc_clnt;
+ rpcsvc_t *rpcsvc;
+ inode_table_t *itable;
+ char *volume_uuid;
+ uint64_t validation_count;
};
-typedef struct quota_priv quota_priv_t;
+typedef struct quota_priv quota_priv_t;
-struct limits {
- struct list_head limit_list;
- char *path;
- int64_t value;
-};
-typedef struct limits limits_t;
+int
+quota_enforcer_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata, fop_lookup_cbk_t cbk);
+struct rpc_clnt *
+quota_enforcer_init (xlator_t *this, dict_t *options);
+
+void
+quota_log_usage (xlator_t *this, quota_inode_ctx_t *ctx, inode_t *inode,
+ int64_t delta);
-uint64_t cn = 1;
+#endif
diff --git a/xlators/features/quota/src/quotad-aggregator.c b/xlators/features/quota/src/quotad-aggregator.c
new file mode 100644
index 000000000..f3f65ca2a
--- /dev/null
+++ b/xlators/features/quota/src/quotad-aggregator.c
@@ -0,0 +1,423 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "cli1-xdr.h"
+#include "quota.h"
+#include "quotad-helpers.h"
+#include "quotad-aggregator.h"
+
+struct rpcsvc_program quotad_aggregator_prog;
+
+struct iobuf *
+quotad_serialize_reply (rpcsvc_request_t *req, void *arg, struct iovec *outmsg,
+ xdrproc_t xdrproc)
+{
+ struct iobuf *iob = NULL;
+ ssize_t retlen = 0;
+ ssize_t xdr_size = 0;
+
+ GF_VALIDATE_OR_GOTO ("server", req, ret);
+
+ /* First, get the io buffer into which the reply in arg will
+ * be serialized.
+ */
+ if (arg && xdrproc) {
+ xdr_size = xdr_sizeof (xdrproc, arg);
+ iob = iobuf_get2 (req->svc->ctx->iobuf_pool, xdr_size);
+ if (!iob) {
+ gf_log_callingfn (THIS->name, GF_LOG_ERROR,
+ "Failed to get iobuf");
+ goto ret;
+ };
+
+ iobuf_to_iovec (iob, outmsg);
+ /* Use the given serializer to translate the give C structure in arg
+ * to XDR format which will be written into the buffer in outmsg.
+ */
+ /* retlen is used to received the error since size_t is unsigned and we
+ * need -1 for error notification during encoding.
+ */
+
+ retlen = xdr_serialize_generic (*outmsg, arg, xdrproc);
+ if (retlen == -1) {
+ /* Failed to Encode 'GlusterFS' msg in RPC is not exactly
+ failure of RPC return values.. client should get
+ notified about this, so there are no missing frames */
+ gf_log_callingfn ("", GF_LOG_ERROR, "Failed to encode message");
+ req->rpc_err = GARBAGE_ARGS;
+ retlen = 0;
+ }
+ }
+ outmsg->iov_len = retlen;
+ret:
+ if (retlen == -1) {
+ iobuf_unref (iob);
+ iob = NULL;
+ }
+
+ return iob;
+}
+
+int
+quotad_aggregator_submit_reply (call_frame_t *frame, rpcsvc_request_t *req,
+ void *arg, struct iovec *payload,
+ int payloadcount, struct iobref *iobref,
+ xdrproc_t xdrproc)
+{
+ struct iobuf *iob = NULL;
+ int ret = -1;
+ struct iovec rsp = {0,};
+ quotad_aggregator_state_t *state = NULL;
+ char new_iobref = 0;
+
+ GF_VALIDATE_OR_GOTO ("server", req, ret);
+
+ if (frame) {
+ state = frame->root->state;
+ frame->local = NULL;
+ }
+
+ if (!iobref) {
+ iobref = iobref_new ();
+ if (!iobref) {
+ goto ret;
+ }
+
+ new_iobref = 1;
+ }
+
+ iob = quotad_serialize_reply (req, arg, &rsp, xdrproc);
+ if (!iob) {
+ gf_log ("", GF_LOG_ERROR, "Failed to serialize reply");
+ goto ret;
+ }
+
+ iobref_add (iobref, iob);
+
+ ret = rpcsvc_submit_generic (req, &rsp, 1, payload, payloadcount,
+ iobref);
+
+ iobuf_unref (iob);
+
+ ret = 0;
+ret:
+ if (state) {
+ quotad_aggregator_free_state (state);
+ }
+
+ if (frame) {
+ if (frame->root->client)
+ gf_client_unref (frame->root->client);
+
+ STACK_DESTROY (frame->root);
+ }
+
+ if (new_iobref) {
+ iobref_unref (iobref);
+ }
+
+ return ret;
+}
+
+int
+quotad_aggregator_getlimit_cbk (xlator_t *this, call_frame_t *frame,
+ void *lookup_rsp)
+{
+ gfs3_lookup_rsp *rsp = lookup_rsp;
+ gf_cli_rsp cli_rsp = {0,};
+ dict_t *xdata = NULL;
+ int ret = -1;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->this, xdata,
+ (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), rsp->op_ret,
+ rsp->op_errno, out);
+
+ ret = 0;
+out:
+ rsp->op_ret = ret;
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to unserialize "
+ "nameless lookup rsp");
+ goto reply;
+ }
+ cli_rsp.op_ret = rsp->op_ret;
+ cli_rsp.op_errno = rsp->op_errno;
+ cli_rsp.op_errstr = "";
+ if (xdata) {
+ GF_PROTOCOL_DICT_SERIALIZE (frame->this, xdata,
+ (&cli_rsp.dict.dict_val),
+ (cli_rsp.dict.dict_len),
+ cli_rsp.op_errno, reply);
+ }
+
+reply:
+ quotad_aggregator_submit_reply (frame, frame->local, (void*)&cli_rsp, NULL, 0,
+ NULL, (xdrproc_t)xdr_gf_cli_rsp);
+
+ dict_unref (xdata);
+ GF_FREE (cli_rsp.dict.dict_val);
+ return 0;
+}
+
+int
+quotad_aggregator_getlimit (rpcsvc_request_t *req)
+{
+ call_frame_t *frame = NULL;
+ gf_cli_req cli_req = {{0}, };
+ gf_cli_rsp cli_rsp = {0};
+ gfs3_lookup_req args = {{0,},};
+ gfs3_lookup_rsp rsp = {0,};
+ quotad_aggregator_state_t *state = NULL;
+ xlator_t *this = NULL;
+ dict_t *dict = NULL;
+ int ret = -1, op_errno = 0;
+ char *gfid_str = NULL;
+ uuid_t gfid = {0};
+
+ GF_VALIDATE_OR_GOTO ("quotad-aggregator", req, err);
+
+ this = THIS;
+
+ ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ gf_log ("", GF_LOG_ERROR, "xdr decoding error");
+ req->rpc_err = GARBAGE_ARGS;
+ goto err;
+ }
+
+ if (cli_req.dict.dict_len) {
+ dict = dict_new ();
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len, &dict);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to "
+ "unserialize req-buffer to dictionary");
+ goto err;
+ }
+ }
+
+ ret = dict_get_str (dict, "gfid", &gfid_str);
+ if (ret) {
+ goto err;
+ }
+
+ uuid_parse ((const char*)gfid_str, gfid);
+
+ frame = quotad_aggregator_get_frame_from_req (req);
+ if (frame == NULL) {
+ rsp.op_errno = ENOMEM;
+ goto err;
+ }
+ state = frame->root->state;
+ state->xdata = dict;
+ ret = dict_set_int32 (state->xdata, QUOTA_LIMIT_KEY, 42);
+ if (ret)
+ goto err;
+
+ ret = dict_set_int32 (state->xdata, QUOTA_SIZE_KEY, 42);
+ if (ret)
+ goto err;
+
+ ret = dict_set_int32 (state->xdata, GET_ANCESTRY_PATH_KEY,42);
+ if (ret)
+ goto err;
+
+ memcpy (&args.gfid, &gfid, 16);
+
+ args.bname = alloca (req->msg[0].iov_len);
+ args.xdata.xdata_val = alloca (req->msg[0].iov_len);
+
+ ret = qd_nameless_lookup (this, frame, &args, state->xdata,
+ quotad_aggregator_getlimit_cbk);
+ if (ret) {
+ rsp.op_errno = ret;
+ goto err;
+ }
+
+ return ret;
+
+err:
+ cli_rsp.op_ret = -1;
+ cli_rsp.op_errno = op_errno;
+ cli_rsp.op_errstr = "";
+
+ quotad_aggregator_getlimit_cbk (this, frame, &cli_rsp);
+ dict_unref (dict);
+
+ return ret;
+}
+
+int
+quotad_aggregator_lookup_cbk (xlator_t *this, call_frame_t *frame,
+ void *rsp)
+{
+ quotad_aggregator_submit_reply (frame, frame->local, rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_lookup_rsp);
+
+ return 0;
+}
+
+
+int
+quotad_aggregator_lookup (rpcsvc_request_t *req)
+{
+ call_frame_t *frame = NULL;
+ gfs3_lookup_req args = {{0,},};
+ int ret = -1, op_errno = 0;
+ gfs3_lookup_rsp rsp = {0,};
+ quotad_aggregator_state_t *state = NULL;
+ xlator_t *this = NULL;
+
+ GF_VALIDATE_OR_GOTO ("quotad-aggregator", req, err);
+
+ this = THIS;
+
+ args.bname = alloca (req->msg[0].iov_len);
+ args.xdata.xdata_val = alloca (req->msg[0].iov_len);
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gfs3_lookup_req);
+ if (ret < 0) {
+ rsp.op_errno = EINVAL;
+ goto err;
+ }
+
+ frame = quotad_aggregator_get_frame_from_req (req);
+ if (frame == NULL) {
+ rsp.op_errno = ENOMEM;
+ goto err;
+ }
+
+ state = frame->root->state;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, state->xdata,
+ (args.xdata.xdata_val),
+ (args.xdata.xdata_len), ret,
+ op_errno, err);
+
+
+ ret = qd_nameless_lookup (this, frame, &args, state->xdata,
+ quotad_aggregator_lookup_cbk);
+ if (ret) {
+ rsp.op_errno = ret;
+ goto err;
+ }
+
+ return ret;
+
+err:
+ rsp.op_ret = -1;
+ rsp.op_errno = op_errno;
+
+ quotad_aggregator_lookup_cbk (this, frame, &rsp);
+ return ret;
+}
+
+int
+quotad_aggregator_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event,
+ void *data)
+{
+ if (!xl || !data) {
+ gf_log_callingfn ("server", GF_LOG_WARNING,
+ "Calling rpc_notify without initializing");
+ goto out;
+ }
+
+ switch (event) {
+ case RPCSVC_EVENT_ACCEPT:
+ break;
+
+ case RPCSVC_EVENT_DISCONNECT:
+ break;
+
+ default:
+ break;
+ }
+
+out:
+ return 0;
+}
+
+int
+quotad_aggregator_init (xlator_t *this)
+{
+ quota_priv_t *priv = NULL;
+ int ret = -1;
+
+ priv = this->private;
+
+ ret = dict_set_str (this->options, "transport.address-family", "unix");
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (this->options, "transport-type", "socket");
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (this->options, "transport.socket.listen-path",
+ "/tmp/quotad.socket");
+ if (ret)
+ goto out;
+
+ /* RPC related */
+ priv->rpcsvc = rpcsvc_init (this, this->ctx, this->options, 0);
+ if (priv->rpcsvc == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "creation of rpcsvc failed");
+ ret = -1;
+ goto out;
+ }
+
+ ret = rpcsvc_create_listeners (priv->rpcsvc, this->options,
+ this->name);
+ if (ret < 1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "creation of listener failed");
+ ret = -1;
+ goto out;
+ }
+
+ priv->quotad_aggregator = &quotad_aggregator_prog;
+ quotad_aggregator_prog.options = this->options;
+
+ ret = rpcsvc_program_register (priv->rpcsvc, &quotad_aggregator_prog);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "registration of program (name:%s, prognum:%d, "
+ "progver:%d) failed", quotad_aggregator_prog.progname,
+ quotad_aggregator_prog.prognum,
+ quotad_aggregator_prog.progver);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+rpcsvc_actor_t quotad_aggregator_actors[] = {
+ [GF_AGGREGATOR_NULL] = {"NULL", GF_AGGREGATOR_NULL, NULL, NULL, 0,
+ DRC_NA},
+ [GF_AGGREGATOR_LOOKUP] = {"LOOKUP", GF_AGGREGATOR_NULL,
+ quotad_aggregator_lookup, NULL, 0, DRC_NA},
+ [GF_AGGREGATOR_GETLIMIT] = {"GETLIMIT", GF_AGGREGATOR_GETLIMIT,
+ quotad_aggregator_getlimit, NULL, 0},
+};
+
+
+struct rpcsvc_program quotad_aggregator_prog = {
+ .progname = "GlusterFS 3.3",
+ .prognum = GLUSTER_AGGREGATOR_PROGRAM,
+ .progver = GLUSTER_AGGREGATOR_VERSION,
+ .numactors = GF_AGGREGATOR_MAXVALUE,
+ .actors = quotad_aggregator_actors
+};
diff --git a/xlators/features/quota/src/quotad-aggregator.h b/xlators/features/quota/src/quotad-aggregator.h
new file mode 100644
index 000000000..5ddea5b3c
--- /dev/null
+++ b/xlators/features/quota/src/quotad-aggregator.h
@@ -0,0 +1,37 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _QUOTAD_AGGREGATOR_H
+#define _QUOTAD_AGGREGATOR_H
+
+#include "quota.h"
+#include "stack.h"
+#include "glusterfs3-xdr.h"
+#include "inode.h"
+
+typedef struct {
+ void *pool;
+ xlator_t *this;
+ xlator_t *active_subvol;
+ inode_table_t *itable;
+ loc_t loc;
+ dict_t *xdata;
+} quotad_aggregator_state_t;
+
+typedef int (*quotad_aggregator_lookup_cbk_t) (xlator_t *this,
+ call_frame_t *frame,
+ void *rsp);
+int
+qd_nameless_lookup (xlator_t *this, call_frame_t *frame, gfs3_lookup_req *req,
+ dict_t *xdata, quotad_aggregator_lookup_cbk_t lookup_cbk);
+int
+quotad_aggregator_init (xlator_t *this);
+
+#endif
diff --git a/xlators/features/quota/src/quotad-helpers.c b/xlators/features/quota/src/quotad-helpers.c
new file mode 100644
index 000000000..fd3099114
--- /dev/null
+++ b/xlators/features/quota/src/quotad-helpers.c
@@ -0,0 +1,113 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "quotad-helpers.h"
+
+quotad_aggregator_state_t *
+get_quotad_aggregator_state (xlator_t *this, rpcsvc_request_t *req)
+{
+ quotad_aggregator_state_t *state = NULL;
+ xlator_t *active_subvol = NULL;
+ quota_priv_t *priv = NULL;
+
+ state = (void *)GF_CALLOC (1, sizeof (*state),
+ gf_quota_mt_aggregator_state_t);
+ if (!state)
+ return NULL;
+
+ state->this = THIS;
+ priv = this->private;
+
+ LOCK (&priv->lock);
+ {
+ active_subvol = state->active_subvol = FIRST_CHILD (this);
+ }
+ UNLOCK (&priv->lock);
+
+ if (active_subvol->itable == NULL)
+ active_subvol->itable = inode_table_new (4096, active_subvol);
+
+ state->itable = active_subvol->itable;
+
+ state->pool = this->ctx->pool;
+
+ return state;
+}
+
+void
+quotad_aggregator_free_state (quotad_aggregator_state_t *state)
+{
+ if (state->xdata)
+ dict_unref (state->xdata);
+
+ GF_FREE (state);
+}
+
+call_frame_t *
+quotad_aggregator_alloc_frame (rpcsvc_request_t *req)
+{
+ call_frame_t *frame = NULL;
+ quotad_aggregator_state_t *state = NULL;
+ xlator_t *this = NULL;
+
+ GF_VALIDATE_OR_GOTO ("server", req, out);
+ GF_VALIDATE_OR_GOTO ("server", req->trans, out);
+ GF_VALIDATE_OR_GOTO ("server", req->svc, out);
+ GF_VALIDATE_OR_GOTO ("server", req->svc->ctx, out);
+
+ this = req->svc->mydata;
+
+ frame = create_frame (this, req->svc->ctx->pool);
+ if (!frame)
+ goto out;
+
+ state = get_quotad_aggregator_state (this, req);
+ if (!state)
+ goto out;
+
+ frame->root->state = state;
+ frame->root->unique = 0;
+
+ frame->this = this;
+out:
+ return frame;
+}
+
+call_frame_t *
+quotad_aggregator_get_frame_from_req (rpcsvc_request_t *req)
+{
+ call_frame_t *frame = NULL;
+ client_t *client = NULL;
+
+ GF_VALIDATE_OR_GOTO ("server", req, out);
+
+ frame = quotad_aggregator_alloc_frame (req);
+ if (!frame)
+ goto out;
+
+ client = req->trans->xl_private;
+
+ frame->root->op = req->procnum;
+
+ frame->root->unique = req->xid;
+
+ frame->root->uid = req->uid;
+ frame->root->gid = req->gid;
+ frame->root->pid = req->pid;
+
+ gf_client_ref (client);
+ frame->root->client = client;
+
+ frame->root->lk_owner = req->lk_owner;
+
+ frame->local = req;
+out:
+ return frame;
+}
diff --git a/xlators/features/quota/src/quotad-helpers.h b/xlators/features/quota/src/quotad-helpers.h
new file mode 100644
index 000000000..a10fb7fa8
--- /dev/null
+++ b/xlators/features/quota/src/quotad-helpers.h
@@ -0,0 +1,24 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef QUOTAD_HELPERS_H
+#define QUOTAD_HELPERS_H
+
+#include "rpcsvc.h"
+#include "quota.h"
+#include "quotad-aggregator.h"
+
+void
+quotad_aggregator_free_state (quotad_aggregator_state_t *state);
+
+call_frame_t *
+quotad_aggregator_get_frame_from_req (rpcsvc_request_t *req);
+
+#endif
diff --git a/xlators/features/quota/src/quotad.c b/xlators/features/quota/src/quotad.c
new file mode 100644
index 000000000..243b943e9
--- /dev/null
+++ b/xlators/features/quota/src/quotad.c
@@ -0,0 +1,210 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include "quota.h"
+#include "quotad-aggregator.h"
+#include "common-utils.h"
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_quota_mt_end + 1);
+
+ if (0 != ret) {
+ gf_log (this->name, GF_LOG_WARNING, "Memory accounting "
+ "init failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+int32_t
+qd_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+{
+ quotad_aggregator_lookup_cbk_t lookup_cbk = NULL;
+ gfs3_lookup_rsp rsp = {0, };
+
+ lookup_cbk = cookie;
+
+ rsp.op_ret = op_ret;
+ rsp.op_errno = op_errno;
+
+ gf_stat_from_iatt (&rsp.postparent, postparent);
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&rsp.xdata.xdata_val),
+ rsp.xdata.xdata_len, rsp.op_errno, out);
+
+ gf_stat_from_iatt (&rsp.stat, buf);
+
+out:
+ lookup_cbk (this, frame, &rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ inode_unref (inode);
+
+ return 0;
+}
+
+xlator_t *
+qd_find_subvol (xlator_t *this, char *volume_uuid)
+{
+ xlator_list_t *child = NULL;
+ xlator_t *subvol = NULL;
+ char key[1024];
+ char *optstr = NULL;
+
+ if (!this || !volume_uuid)
+ goto out;
+
+ for (child = this->children; child; child = child->next) {
+ snprintf(key, 1024, "%s.volume-id", child->xlator->name);
+ if (dict_get_str(this->options, key, &optstr) < 0)
+ continue;
+
+ if (strcmp (optstr, volume_uuid) == 0) {
+ subvol = child->xlator;
+ break;
+ }
+ }
+
+out:
+ return subvol;
+}
+
+int
+qd_nameless_lookup (xlator_t *this, call_frame_t *frame, gfs3_lookup_req *req,
+ dict_t *xdata, quotad_aggregator_lookup_cbk_t lookup_cbk)
+{
+ gfs3_lookup_rsp rsp = {0, };
+ int op_errno = 0, ret = -1;
+ loc_t loc = {0, };
+ quotad_aggregator_state_t *state = NULL;
+ quota_priv_t *priv = NULL;
+ xlator_t *subvol = NULL;
+ char *volume_uuid = NULL;
+
+ priv = this->private;
+ state = frame->root->state;
+
+ frame->root->op = GF_FOP_LOOKUP;
+
+ loc.inode = inode_new (state->itable);
+ if (loc.inode == NULL) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ memcpy (loc.gfid, req->gfid, 16);
+
+ ret = dict_get_str (xdata, "volume-uuid", &volume_uuid);
+ if (ret < 0) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ subvol = qd_find_subvol (this, volume_uuid);
+ if (subvol == NULL) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ STACK_WIND_COOKIE (frame, qd_lookup_cbk, lookup_cbk, subvol,
+ subvol->fops->lookup, &loc, xdata);
+ return 0;
+
+out:
+ rsp.op_ret = -1;
+ rsp.op_errno = op_errno;
+
+ lookup_cbk (this, frame, &rsp);
+
+ inode_unref (loc.inode);
+ return 0;
+}
+
+int
+qd_reconfigure (xlator_t *this, dict_t *options)
+{
+ /* As of now quotad is restarted upon alteration of volfile */
+ return 0;
+}
+
+void
+qd_fini (xlator_t *this)
+{
+ return;
+}
+
+int32_t
+qd_init (xlator_t *this)
+{
+ int32_t ret = -1;
+ quota_priv_t *priv = NULL;
+
+ if (NULL == this->children) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "FATAL: quota (%s) not configured for min of 1 child",
+ this->name);
+ ret = -1;
+ goto err;
+ }
+
+ QUOTA_ALLOC_OR_GOTO (priv, quota_priv_t, err);
+ LOCK_INIT (&priv->lock);
+
+ this->private = priv;
+
+ ret = quotad_aggregator_init (this);
+ if (ret < 0)
+ goto err;
+
+ ret = 0;
+err:
+ if (ret) {
+ GF_FREE (priv);
+ }
+ return ret;
+}
+
+class_methods_t class_methods = {
+ .init = qd_init,
+ .fini = qd_fini,
+ .reconfigure = qd_reconfigure,
+};
+
+struct xlator_fops fops = {
+};
+
+struct xlator_cbks cbks = {
+};
+
+struct volume_options options[] = {
+ { .key = {"transport-type"},
+ .value = {"rpc", "rpc-over-rdma", "tcp", "socket", "ib-verbs",
+ "unix", "ib-sdp", "tcp/server", "ib-verbs/server", "rdma",
+ "rdma*([ \t]),*([ \t])socket",
+ "rdma*([ \t]),*([ \t])tcp",
+ "tcp*([ \t]),*([ \t])rdma",
+ "socket*([ \t]),*([ \t])rdma"},
+ .type = GF_OPTION_TYPE_STR
+ },
+ { .key = {"transport.*"},
+ .type = GF_OPTION_TYPE_ANY,
+ },
+ {.key = {NULL}}
+};
diff --git a/xlators/features/read-only/src/Makefile.am b/xlators/features/read-only/src/Makefile.am
index 31ae4f340..4c1462137 100644
--- a/xlators/features/read-only/src/Makefile.am
+++ b/xlators/features/read-only/src/Makefile.am
@@ -4,18 +4,19 @@ xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
noinst_HEADERS = read-only-common.h
-read_only_la_LDFLAGS = -module -avoidversion
+read_only_la_LDFLAGS = -module -avoid-version
read_only_la_SOURCES = read-only.c read-only-common.c
read_only_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-worm_la_LDFLAGS = -module -avoidversion
+worm_la_LDFLAGS = -module -avoid-version
worm_la_SOURCES = read-only-common.c worm.c
worm_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/features/read-only/src/read-only-common.c b/xlators/features/read-only/src/read-only-common.c
index 5e4949ee0..56a7a7176 100644
--- a/xlators/features/read-only/src/read-only-common.c
+++ b/xlators/features/read-only/src/read-only-common.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -27,220 +17,223 @@
int32_t
ro_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t flags, dict_t *dict)
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
- STACK_UNWIND_STRICT (xattrop, frame, -1, EROFS, NULL);
+ STACK_UNWIND_STRICT (xattrop, frame, -1, EROFS, NULL, xdata);
return 0;
}
int32_t
ro_fxattrop (call_frame_t *frame, xlator_t *this,
- fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict)
+ fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fxattrop, frame, -1, EROFS, NULL);
+ STACK_UNWIND_STRICT (fxattrop, frame, -1, EROFS, NULL, xdata);
return 0;
}
int32_t
ro_entrylk (call_frame_t *frame, xlator_t *this, const char *volume,
loc_t *loc, const char *basename, entrylk_cmd cmd,
- entrylk_type type)
+ entrylk_type type, dict_t *xdata)
{
- STACK_UNWIND_STRICT (entrylk, frame, -1, EROFS);
+ STACK_UNWIND_STRICT (entrylk, frame, -1, EROFS, xdata);
return 0;
}
int32_t
ro_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume,
- fd_t *fd, const char *basename, entrylk_cmd cmd, entrylk_type type)
+ fd_t *fd, const char *basename, entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fentrylk, frame, -1, EROFS);
+ STACK_UNWIND_STRICT (fentrylk, frame, -1, EROFS, xdata);
return 0;
}
int32_t
ro_inodelk (call_frame_t *frame, xlator_t *this, const char *volume,
- loc_t *loc, int32_t cmd, struct gf_flock *lock)
+ loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
- STACK_UNWIND_STRICT (inodelk, frame, -1, EROFS);
+ STACK_UNWIND_STRICT (inodelk, frame, -1, EROFS, xdata);
return 0;
}
int32_t
ro_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
- fd_t *fd, int32_t cmd, struct gf_flock *lock)
+ fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
- STACK_UNWIND_STRICT (finodelk, frame, -1, EROFS);
+ STACK_UNWIND_STRICT (finodelk, frame, -1, EROFS, xdata);
return 0;
}
int32_t
ro_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int cmd,
- struct gf_flock *flock)
+ struct gf_flock *flock, dict_t *xdata)
{
- STACK_UNWIND_STRICT (lk, frame, -1, EROFS, NULL);
+ STACK_UNWIND_STRICT (lk, frame, -1, EROFS, NULL, xdata);
return 0;
}
int32_t
ro_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid)
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- STACK_UNWIND_STRICT (setattr, frame, -1, EROFS, NULL, NULL);
+ STACK_UNWIND_STRICT (setattr, frame, -1, EROFS, NULL, NULL, xdata);
return 0;
}
int32_t
ro_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid)
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fsetattr, frame, -1, EROFS, NULL, NULL);
+ STACK_UNWIND_STRICT (fsetattr, frame, -1, EROFS, NULL, NULL, xdata);
return 0;
}
int32_t
-ro_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
+ro_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, dict_t *xdata)
{
- STACK_UNWIND_STRICT (truncate, frame, -1, EROFS, NULL, NULL);
+ STACK_UNWIND_STRICT (truncate, frame, -1, EROFS, NULL, NULL, xdata);
return 0;
}
int32_t
-ro_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
+ro_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata)
{
- STACK_UNWIND_STRICT (ftruncate, frame, -1, EROFS, NULL, NULL);
+ STACK_UNWIND_STRICT (ftruncate, frame, -1, EROFS, NULL, NULL, xdata);
return 0;
}
int
ro_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t rdev, dict_t *params)
+ dev_t rdev, mode_t umask, dict_t *xdata)
{
- STACK_UNWIND_STRICT (mknod, frame, -1, EROFS, NULL, NULL, NULL, NULL);
+ STACK_UNWIND_STRICT (mknod, frame, -1, EROFS, NULL, NULL, NULL, NULL, xdata);
return 0;
}
int
ro_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dict_t *params)
+ mode_t umask, dict_t *xdata)
{
- STACK_UNWIND_STRICT (mkdir, frame, -1, EROFS, NULL, NULL, NULL, NULL);
+ STACK_UNWIND_STRICT (mkdir, frame, -1, EROFS, NULL, NULL, NULL, NULL, xdata);
return 0;
}
int32_t
-ro_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
+ro_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
{
- STACK_UNWIND_STRICT (unlink, frame, -1, EROFS, NULL, NULL);
+ STACK_UNWIND_STRICT (unlink, frame, -1, EROFS, NULL, NULL, xdata);
return 0;
}
int
-ro_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)
+ro_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
{
- STACK_UNWIND_STRICT (rmdir, frame, -1, EROFS, NULL, NULL);
+ STACK_UNWIND_STRICT (rmdir, frame, -1, EROFS, NULL, NULL, xdata);
return 0;
}
int
ro_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
- loc_t *loc, dict_t *params)
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
- STACK_UNWIND_STRICT (symlink, frame, -1, EROFS, NULL, NULL, NULL, NULL);
+ STACK_UNWIND_STRICT (symlink, frame, -1, EROFS, NULL, NULL, NULL,
+ NULL, xdata);
return 0;
}
int32_t
-ro_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
+ro_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
STACK_UNWIND_STRICT (rename, frame, -1, EROFS, NULL, NULL, NULL, NULL,
- NULL);
+ NULL, xdata);
return 0;
}
int32_t
-ro_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
+ro_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
- STACK_UNWIND_STRICT (link, frame, -1, EROFS, NULL, NULL, NULL, NULL);
+ STACK_UNWIND_STRICT (link, frame, -1, EROFS, NULL, NULL, NULL, NULL, xdata);
return 0;
}
int32_t
ro_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, fd_t *fd, dict_t *params)
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
STACK_UNWIND_STRICT (create, frame, -1, EROFS, NULL, NULL, NULL,
- NULL, NULL);
+ NULL, NULL, xdata);
return 0;
}
static int32_t
ro_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, fd_t *fd)
+ int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd);
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
return 0;
}
int32_t
ro_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, int32_t wbflags)
+ fd_t *fd, dict_t *xdata)
{
if (((flags & O_ACCMODE) == O_WRONLY) ||
((flags & O_ACCMODE) == O_RDWR)) {
- STACK_UNWIND_STRICT (open, frame, -1, EROFS, NULL);
+ STACK_UNWIND_STRICT (open, frame, -1, EROFS, NULL, xdata);
return 0;
}
STACK_WIND (frame, ro_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, loc, flags, fd, wbflags);
+ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
return 0;
}
int32_t
ro_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
- int32_t flags)
+ int32_t flags, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fsetxattr, frame, -1, EROFS);
+ STACK_UNWIND_STRICT (fsetxattr, frame, -1, EROFS, xdata);
return 0;
}
int32_t
-ro_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
+ro_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fsyncdir, frame, -1, EROFS);
+ STACK_UNWIND_STRICT (fsyncdir, frame, -1, EROFS, xdata);
return 0;
}
int32_t
ro_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
- int32_t count, off_t off, struct iobref *iobref)
+ int32_t count, off_t off, uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
- STACK_UNWIND_STRICT (writev, frame, -1, EROFS, NULL, NULL);
+ STACK_UNWIND_STRICT (writev, frame, -1, EROFS, NULL, NULL, xdata);
return 0;
}
int32_t
ro_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
- int32_t flags)
+ int32_t flags, dict_t *xdata)
{
- STACK_UNWIND_STRICT (setxattr, frame, -1, EROFS);
+ STACK_UNWIND_STRICT (setxattr, frame, -1, EROFS, xdata);
return 0;
}
int32_t
ro_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
+ const char *name, dict_t *xdata)
{
- STACK_UNWIND_STRICT (removexattr, frame, -1, EROFS);
+ STACK_UNWIND_STRICT (removexattr, frame, -1, EROFS, xdata);
return 0;
}
diff --git a/xlators/features/read-only/src/read-only-common.h b/xlators/features/read-only/src/read-only-common.h
index 3bc008e59..5d4c7e260 100644
--- a/xlators/features/read-only/src/read-only-common.h
+++ b/xlators/features/read-only/src/read-only-common.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -27,97 +17,99 @@
int32_t
ro_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t flags, dict_t *dict);
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata);
int32_t
ro_fxattrop (call_frame_t *frame, xlator_t *this,
- fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict);
+ fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata);
int32_t
ro_entrylk (call_frame_t *frame, xlator_t *this, const char *volume,
loc_t *loc, const char *basename, entrylk_cmd cmd,
- entrylk_type type);
+ entrylk_type type, dict_t *xdata);
int32_t
ro_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume,
fd_t *fd, const char *basename, entrylk_cmd cmd, entrylk_type
- type);
+ type, dict_t *xdata);
int32_t
ro_inodelk (call_frame_t *frame, xlator_t *this, const char *volume,
- loc_t *loc, int32_t cmd, struct gf_flock *lock);
+ loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata);
int32_t
ro_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
- fd_t *fd, int32_t cmd, struct gf_flock *lock);
+ fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata);
int32_t
ro_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int cmd,
- struct gf_flock *flock);
+ struct gf_flock *flock, dict_t *xdata);
int32_t
ro_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid);
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
int32_t
ro_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid);
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
int32_t
-ro_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset);
+ro_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, dict_t *xdata);
int32_t
-ro_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset);
+ro_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata);
int
ro_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t rdev, dict_t *params);
+ dev_t rdev, mode_t umask, dict_t *xdata);
int
ro_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dict_t *params);
+ mode_t umask, dict_t *xdata);
int32_t
-ro_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc);
+ro_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata);
int
-ro_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags);
+ro_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata);
int
ro_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
- loc_t *loc, dict_t *params);
+ loc_t *loc, mode_t umask, dict_t *xdata);
int32_t
-ro_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc);
+ro_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata);
int32_t
-ro_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc);
+ro_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata);
int32_t
ro_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, fd_t *fd, dict_t *params);
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata);
int32_t
ro_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, int32_t wbflags);
+ fd_t *fd, dict_t *xdata);
int32_t
ro_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
- int32_t flags);
+ int32_t flags, dict_t *xdata);
int32_t
-ro_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags);
+ro_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata);
int32_t
ro_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
- int32_t count, off_t off, struct iobref *iobref);
+ int32_t count, off_t off, uint32_t flags, struct iobref *iobref, dict_t *xdata);
int32_t
ro_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
- int32_t flags);
+ int32_t flags, dict_t *xdata);
int32_t
ro_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name);
+ const char *name, dict_t *xdata);
diff --git a/xlators/features/read-only/src/read-only.c b/xlators/features/read-only/src/read-only.c
index b11e84f24..e49e54a1b 100644
--- a/xlators/features/read-only/src/read-only.c
+++ b/xlators/features/read-only/src/read-only.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
diff --git a/xlators/features/read-only/src/worm.c b/xlators/features/read-only/src/worm.c
index 790bc3234..16c3eb3da 100644
--- a/xlators/features/read-only/src/worm.c
+++ b/xlators/features/read-only/src/worm.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -28,25 +18,25 @@
static int32_t
worm_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, fd_t *fd)
+ int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd);
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
return 0;
}
int32_t
worm_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, int32_t wbflags)
+ fd_t *fd, dict_t *xdata)
{
if ((((flags & O_ACCMODE) == O_WRONLY) ||
((flags & O_ACCMODE) == O_RDWR)) &&
!(flags & O_APPEND)) {
- STACK_UNWIND_STRICT (open, frame, -1, EROFS, NULL);
+ STACK_UNWIND_STRICT (open, frame, -1, EROFS, NULL, NULL);
return 0;
}
STACK_WIND (frame, worm_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, loc, flags, fd, wbflags);
+ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
return 0;
}
@@ -75,7 +65,6 @@ fini (xlator_t *this)
}
struct xlator_fops fops = {
-
.open = worm_open,
.unlink = ro_unlink,
@@ -92,8 +81,7 @@ struct xlator_fops fops = {
.lk = ro_lk,
};
-struct xlator_cbks cbks = {
-};
+struct xlator_cbks cbks;
struct volume_options options[] = {
{ .key = {NULL} },
diff --git a/xlators/features/trash/src/Makefile.am b/xlators/features/trash/src/Makefile.am
index 4671d06d3..5251eb082 100644
--- a/xlators/features/trash/src/Makefile.am
+++ b/xlators/features/trash/src/Makefile.am
@@ -1,15 +1,16 @@
xlator_LTLIBRARIES = trash.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/features
-trash_la_LDFLAGS = -module -avoidversion
+trash_la_LDFLAGS = -module -avoid-version
trash_la_SOURCES = trash.c
trash_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = trash.h trash-mem-types.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/features/trash/src/trash-mem-types.h b/xlators/features/trash/src/trash-mem-types.h
index 6608abf6a..0e6ef572f 100644
--- a/xlators/features/trash/src/trash-mem-types.h
+++ b/xlators/features/trash/src/trash-mem-types.h
@@ -1,30 +1,19 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __TRASH_MEM_TYPES_H__
#define __TRASH_MEM_TYPES_H__
#include "mem-types.h"
enum gf_trash_mem_types_ {
- gf_trash_mt_trash_local_t = gf_common_mt_end + 1,
- gf_trash_mt_trash_private_t,
+ gf_trash_mt_trash_private_t = gf_common_mt_end + 1,
gf_trash_mt_char,
gf_trash_mt_trash_elim_pattern_t,
gf_trash_mt_end
diff --git a/xlators/features/trash/src/trash.c b/xlators/features/trash/src/trash.c
index 42ee79a96..addeb66a0 100644
--- a/xlators/features/trash/src/trash.c
+++ b/xlators/features/trash/src/trash.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -63,7 +53,7 @@ trash_local_wipe (trash_local_t *local)
if (local->newfd)
fd_unref (local->newfd);
- GF_FREE (local);
+ mem_put (local);
out:
return;
}
@@ -170,8 +160,7 @@ trash_unlink_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
out:
GF_FREE (cookie);
- if (tmp_str)
- GF_FREE (tmp_str);
+ GF_FREE (tmp_str);
return 0;
}
@@ -442,8 +431,7 @@ trash_rename_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
out:
GF_FREE (cookie); /* strdup (dir_name) was sent here :) */
- if (tmp_str)
- GF_FREE (tmp_str);
+ GF_FREE (tmp_str);
return 0;
}
@@ -504,9 +492,7 @@ trash_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
trash_elim_pattern_t *trav = NULL;
trash_private_t *priv = NULL;
trash_local_t *local = NULL;
- struct tm *tm = NULL;
- char timestr[256] = {0,};
- time_t utime = 0;
+ char timestr[64] = {0,};
int32_t match = 0;
priv = this->private;
@@ -533,8 +519,7 @@ trash_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
return 0;
}
- local = GF_CALLOC (1, sizeof (trash_local_t),
- gf_trash_mt_trash_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
gf_log (this->name, GF_LOG_ERROR, "out of memory");
TRASH_STACK_UNWIND (rename, frame, -1, ENOMEM,
@@ -554,9 +539,8 @@ trash_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
{
/* append timestamp to file name */
/* TODO: can we make it optional? */
- utime = time (NULL);
- tm = localtime (&utime);
- strftime (timestr, 256, ".%Y-%m-%d-%H%M%S", tm);
+ gf_time_ftm (timestr, sizeof timestr, time (NULL),
+ gf_timefmt_F_HMS);
strcat (local->newpath, timestr);
}
@@ -575,9 +559,7 @@ trash_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
trash_elim_pattern_t *trav = NULL;
trash_private_t *priv = NULL;
trash_local_t *local = NULL;
- struct tm *tm = NULL;
- char timestr[256] = {0,};
- time_t utime = 0;
+ char timestr[64] = {0,};
int32_t match = 0;
priv = this->private;
@@ -610,8 +592,7 @@ trash_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
return 0;
}
- local = GF_CALLOC (1, sizeof (trash_local_t),
- gf_trash_mt_trash_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
gf_log (this->name, GF_LOG_DEBUG, "out of memory");
TRASH_STACK_UNWIND (unlink, frame, -1, ENOMEM, NULL, NULL);
@@ -627,9 +608,8 @@ trash_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
{
/* append timestamp to file name */
/* TODO: can we make it optional? */
- utime = time (NULL);
- tm = localtime (&utime);
- strftime (timestr, 256, ".%Y-%m-%d-%H%M%S", tm);
+ gf_time_fmt (timestr, sizeof timestr, time (NULL),
+ gf_timefmt_F_HMS);
strcat (local->newpath, timestr);
}
@@ -690,7 +670,7 @@ trash_truncate_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->fsize = stbuf->ia_size;
STACK_WIND (frame, trash_truncate_writev_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->writev,
- local->newfd, vector, count, local->cur_offset, iobuf);
+ local->newfd, vector, count, local->cur_offset, 0, iobuf);
out:
return 0;
@@ -723,7 +703,7 @@ trash_truncate_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
STACK_WIND (frame, trash_truncate_readv_cbk,
FIRST_CHILD(this), FIRST_CHILD(this)->fops->readv,
local->fd, (size_t)GF_BLOCK_READV_SIZE,
- local->cur_offset);
+ local->cur_offset, 0);
goto out;
}
@@ -763,7 +743,7 @@ trash_truncate_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
STACK_WIND (frame, trash_truncate_readv_cbk,
FIRST_CHILD (this), FIRST_CHILD (this)->fops->readv,
- local->fd, (size_t)GF_BLOCK_READV_SIZE, local->cur_offset);
+ local->fd, (size_t)GF_BLOCK_READV_SIZE, local->cur_offset, 0);
out:
return 0;
@@ -932,8 +912,7 @@ trash_truncate_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
out:
GF_FREE (cookie); /* strdup (dir_name) was sent here :) */
- if (tmp_str)
- GF_FREE (tmp_str);
+ GF_FREE (tmp_str);
return 0;
}
@@ -945,10 +924,8 @@ trash_truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
{
trash_private_t *priv = NULL;
trash_local_t *local = NULL;
- struct tm *tm = NULL;
- char timestr[256] = {0,};
+ char timestr[64] = {0,};
char loc_newname[PATH_MAX] = {0,};
- time_t utime = 0;
int32_t flags = 0;
priv = this->private;
@@ -980,9 +957,8 @@ trash_truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
strcat (local->newpath, local->loc.path);
{
- utime = time (NULL);
- tm = localtime (&utime);
- strftime (timestr, 256, ".%Y-%m-%d-%H%M%S", tm);
+ gf_time_fmt (timestr, sizeof timestr, time (NULL),
+ gf_timefmt_F_HMS);
strcat (local->newpath, timestr);
}
strcpy (loc_newname,local->loc.name);
@@ -1044,8 +1020,7 @@ trash_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc,
LOCK_INIT (&frame->lock);
- local = GF_CALLOC (1, sizeof (trash_local_t),
- gf_trash_mt_trash_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
gf_log (this->name, GF_LOG_DEBUG, "out of memory");
TRASH_STACK_UNWIND (truncate, frame, -1, ENOMEM, NULL, NULL);
@@ -1110,7 +1085,7 @@ trash_ftruncate_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
STACK_WIND (frame, trash_ftruncate_readv_cbk,
FIRST_CHILD(this), FIRST_CHILD(this)->fops->readv,
local->fd, (size_t)GF_BLOCK_READV_SIZE,
- local->cur_offset);
+ local->cur_offset, 0);
return 0;
}
@@ -1142,7 +1117,7 @@ trash_ftruncate_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
STACK_WIND (frame, trash_ftruncate_writev_cbk,
FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev,
- local->newfd, vector, count, local->cur_offset, NULL);
+ local->newfd, vector, count, local->cur_offset, 0, NULL);
return 0;
}
@@ -1194,7 +1169,7 @@ trash_ftruncate_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
STACK_WIND (frame, trash_ftruncate_readv_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readv, local->fd,
- (size_t)GF_BLOCK_READV_SIZE, local->cur_offset);
+ (size_t)GF_BLOCK_READV_SIZE, local->cur_offset, 0);
return 0;
}
@@ -1299,8 +1274,7 @@ trash_ftruncate_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
out:
GF_FREE (cookie); /* strdup (dir_name) was sent here :) */
- if (tmp_str)
- GF_FREE (tmp_str);
+ GF_FREE (tmp_str);
return 0;
}
@@ -1349,11 +1323,9 @@ trash_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
trash_private_t *priv = NULL;
trash_local_t *local = NULL;
dentry_t *dir_entry = NULL;
- struct tm *tm = NULL;
char *pathbuf = NULL;
inode_t *newinode = NULL;
- time_t utime = 0;
- char timestr[256];
+ char timestr[64];
int32_t retval = 0;
int32_t match = 0;
@@ -1385,18 +1357,14 @@ trash_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
return 0;
}
- local = GF_CALLOC (1, sizeof (trash_local_t),
- gf_trash_mt_trash_local_t);
+ local = mem_get0 (this->local_pool);
if (!local) {
gf_log (this->name, GF_LOG_DEBUG, "out of memory");
TRASH_STACK_UNWIND (ftruncate, frame, -1, ENOMEM, NULL, NULL);
return 0;
}
- utime = time (NULL);
- tm = localtime (&utime);
- strftime (timestr, 256, ".%Y-%m-%d-%H%M%S", tm);
-
+ gf_time_fmt (timestr, sizeof timestr, time (NULL), gf_timefmt_F_HMS);
strcpy (local->newpath, priv->trash_dir);
strcat (local->newpath, pathbuf);
strcat (local->newpath, timestr);
@@ -1522,6 +1490,14 @@ init (xlator_t *this)
_priv->max_trash_file_size);
}
+ this->local_pool = mem_pool_new (trash_local_t, 64);
+ if (!this->local_pool) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ return -1;
+ }
+
+
this->private = (void *)_priv;
return 0;
}
@@ -1532,8 +1508,7 @@ fini (xlator_t *this)
trash_private_t *priv = NULL;
priv = this->private;
- if (priv)
- GF_FREE (priv);
+ GF_FREE (priv);
return;
}
diff --git a/xlators/features/trash/src/trash.h b/xlators/features/trash/src/trash.h
index d385ee346..9a7c03361 100644
--- a/xlators/features/trash/src/trash.h
+++ b/xlators/features/trash/src/trash.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __TRASH_H__
#define __TRASH_H__
diff --git a/xlators/lib/src/libxlator.c b/xlators/lib/src/libxlator.c
index df302d11d..4e680c510 100644
--- a/xlators/lib/src/libxlator.c
+++ b/xlators/lib/src/libxlator.c
@@ -1,7 +1,44 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include "mem-types.h"
#include "libxlator.h"
+int marker_xtime_default_gauge[] = {
+ [MCNT_FOUND] = 1,
+ [MCNT_NOTFOUND] = -1,
+ [MCNT_ENODATA] = -1,
+ [MCNT_ENOTCONN] = -1,
+ [MCNT_ENOENT] = -1,
+ [MCNT_EOTHER] = -1,
+};
+
+int marker_uuid_default_gauge[] = {
+ [MCNT_FOUND] = 1,
+ [MCNT_NOTFOUND] = 0,
+ [MCNT_ENODATA] = 0,
+ [MCNT_ENOTCONN] = 0,
+ [MCNT_ENOENT] = 0,
+ [MCNT_EOTHER] = 0,
+};
+
+static int marker_idx_errno_map[] = {
+ [MCNT_FOUND] = EINVAL,
+ [MCNT_NOTFOUND] = EINVAL,
+ [MCNT_ENOENT] = ENOENT,
+ [MCNT_ENOTCONN] = ENOTCONN,
+ [MCNT_ENODATA] = ENODATA,
+ [MCNT_EOTHER] = EINVAL,
+ [MCNT_MAX] = 0,
+};
+
/*Copy the contents of oldtimebuf to newtimbuf*/
static void
update_timebuf (uint32_t *oldtimbuf, uint32_t *newtimebuf)
@@ -35,139 +72,148 @@ match_uuid_local (const char *name, char *uuid)
return 0;
}
+static void
+marker_local_incr_errcount (xl_marker_local_t *local, int op_errno)
+{
+ marker_result_idx_t i = -1;
+ if (!local)
+ return;
+
+ switch (op_errno) {
+ case ENODATA:
+ i = MCNT_ENODATA;
+ break;
+ case ENOENT:
+ i = MCNT_ENOENT;
+ break;
+ case ENOTCONN:
+ i = MCNT_ENOTCONN;
+ break;
+ default:
+ i = MCNT_EOTHER;
+ break;
+ }
+ local->count[i]++;
+}
+
+static int
+evaluate_marker_results (int *gauge, int *count)
+{
+ int i = 0;
+ int op_errno = 0;
+ gf_boolean_t sane = _gf_true;
+
+ /* check if the policy of the gauge is violated;
+ * if yes, try to get the best errno, ie. look
+ * for the first position where there is a more
+ * specific kind of vioilation than the generic EINVAL
+ */
+ for (i = 0; i < MCNT_MAX; i++) {
+ if (sane) {
+ if ((gauge[i] > 0 && count[i] < gauge[i]) ||
+ (gauge[i] < 0 && count[i] >= -gauge[i])) {
+ sane = _gf_false;
+ /* generic action: adopt corresponding errno */
+ op_errno = marker_idx_errno_map[i];
+ }
+ } else {
+ /* already insane; trying to get a more informative
+ * errno by checking subsequent counters
+ */
+ if (count[i] > 0)
+ op_errno = marker_idx_errno_map[i];
+ }
+ if (op_errno && op_errno != EINVAL)
+ break;
+ }
+
+ return op_errno;
+}
/* Aggregate all the <volid>.xtime attrs of the cluster and send the max*/
int32_t
cluster_markerxtime_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *dict)
+ int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
{
- int32_t callcnt = 0;
- int ret = -1;
- uint32_t *net_timebuf = NULL;
- uint32_t host_timebuf[2] = {0,};
- char *marker_xattr = NULL;
- struct marker_str *local = NULL;
- char *vol_uuid = NULL;
+ int32_t callcnt = 0;
+ int ret = -1;
+ uint32_t *net_timebuf = NULL;
+ uint32_t host_timebuf[2] = {0,};
+ char *marker_xattr = NULL;
+ xl_marker_local_t *local = NULL;
+ char *vol_uuid = NULL;
+ char need_unwind = 0;
if (!this || !frame || !frame->local || !cookie) {
- gf_log (this->name, GF_LOG_DEBUG, "possible NULL deref");
+ gf_log ("", GF_LOG_DEBUG, "possible NULL deref");
+ need_unwind = 1;
goto out;
}
local = frame->local;
if (!local || !local->vol_uuid) {
gf_log (this->name, GF_LOG_DEBUG, "possible NULL deref");
+ need_unwind = 1;
goto out;
}
- if (local->esomerr) {
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- }
- goto done;
- }
-
- vol_uuid = local->vol_uuid;
-
- if (op_ret && op_errno == ENODATA) {
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- local->enodata_count++;
- }
- goto done;
- }
-
- if (op_ret && op_errno == ENOENT) {
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- local->enoent_count++;
- }
- goto done;
- }
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
- if (op_ret && op_errno == ENOTCONN) {
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- local->enotconn_count++;
- }
- goto done;
- }
+ vol_uuid = local->vol_uuid;
- if (op_ret) {
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- local->esomerr = op_errno;
+ if (op_ret) {
+ marker_local_incr_errcount (local, op_errno);
+ goto unlock;
}
- goto done;
- }
-
-
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (!gf_asprintf (& marker_xattr, "%s.%s.%s",
+ if (!gf_asprintf (&marker_xattr, "%s.%s.%s",
MARKER_XATTR_PREFIX, vol_uuid, XTIME)) {
op_errno = ENOMEM;
- goto done;
+ goto unlock;
}
if (dict_get_ptr (dict, marker_xattr, (void **)&net_timebuf)) {
gf_log (this->name, GF_LOG_WARNING,
"Unable to get <uuid>.xtime attr");
- local->noxtime_count++;
- goto done;
+ local->count[MCNT_NOTFOUND]++;
+ goto unlock;
}
- if (local->has_xtime) {
-
+ if (local->count[MCNT_FOUND]) {
get_hosttime (net_timebuf, host_timebuf);
if ( (host_timebuf[0]>local->host_timebuf[0]) ||
(host_timebuf[0] == local->host_timebuf[0] &&
host_timebuf[1] >= local->host_timebuf[1])) {
-
update_timebuf (net_timebuf, local->net_timebuf);
update_timebuf (host_timebuf, local->host_timebuf);
-
}
- }
- else {
+ } else {
get_hosttime (net_timebuf, local->host_timebuf);
update_timebuf (net_timebuf, local->net_timebuf);
- local->has_xtime = _gf_true;
+ local->count[MCNT_FOUND]++;
}
-
-
}
-done:
+unlock:
UNLOCK (&frame->lock);
if (!callcnt) {
-
op_ret = 0;
op_errno = 0;
- if (local->has_xtime) {
- if (!dict) {
+ need_unwind = 1;
+
+ if (local->count[MCNT_FOUND]) {
+ if (!dict)
dict = dict_new();
- if (ret) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
- }
+
ret = dict_set_static_bin (dict, marker_xattr,
(void *)local->net_timebuf, 8);
if (ret) {
@@ -175,178 +221,135 @@ done:
op_errno = ENOMEM;
goto out;
}
- goto out;
}
- if (local->noxtime_count)
- goto out;
-
- if (local->enodata_count) {
+ op_errno = evaluate_marker_results (local->gauge, local->count);
+ if (op_errno)
op_ret = -1;
- op_errno = ENODATA;
- goto out;
- }
- if (local->enotconn_count) {
- op_ret = -1;
- op_errno = ENOTCONN;
- goto out;
- }
- if (local->enoent_count) {
- op_ret = -1;
- op_errno = ENOENT;
- goto out;
- }
- else {
- op_errno = local->esomerr;
- goto out;
- }
-out:
- if (local->xl_specf_unwind) {
- frame->local = local->xl_local;
- local->xl_specf_unwind (frame, op_ret,
- op_errno, dict);
- return 0;
- }
- STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict);
+ }
+out:
+ if (need_unwind && local && local->xl_specf_unwind) {
+ frame->local = local->xl_local;
+ local->xl_specf_unwind (frame, op_ret,
+ op_errno, dict, xdata);
+ GF_FREE (local);
+ } else if (need_unwind) {
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno,
+ dict, xdata);
}
+ GF_FREE (marker_xattr);
return 0;
}
int32_t
cluster_markeruuid_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *dict)
+ int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
{
- int32_t callcnt = 0;
- data_t *data = NULL;
- struct volume_mark *volmark = NULL;
- struct marker_str *marker = NULL;
- char *vol_uuid;
+ int32_t callcnt = 0;
+ struct volume_mark *volmark = NULL;
+ xl_marker_local_t *local = NULL;
+ int32_t ret = -1;
+ char need_unwind = 0;
+ char *vol_uuid = NULL;
if (!this || !frame || !cookie) {
- gf_log (this->name, GF_LOG_DEBUG, "possible NULL deref");
+ gf_log ("", GF_LOG_DEBUG, "possible NULL deref");
+ need_unwind = 1;
goto out;
}
- marker = frame->local;
+ local = frame->local;
- if (!marker) {
+ if (!local) {
gf_log (this->name, GF_LOG_DEBUG, "possible NULL deref");
+ need_unwind = 1;
goto out;
}
- vol_uuid = marker->vol_uuid;
-
- if (op_ret && (ENOENT == op_errno)) {
- LOCK (&frame->lock);
- {
- callcnt = --marker->call_count;
- marker->enoent_count++;
- }
- goto done;
- }
-
- if (op_ret && (ENOTCONN == op_errno)) {
- LOCK (&frame->lock);
- {
- callcnt = --marker->call_count;
- marker->enotconn_count++;
- }
- goto done;
- }
-
- if (!(data = dict_get (dict, GF_XATTR_MARKER_KEY))) {
- LOCK (&frame->lock);
- {
- callcnt = --marker->call_count;
- }
- goto done;
- }
-
- volmark = (struct volume_mark *)data->data;
-
LOCK (&frame->lock);
{
- callcnt = --marker->call_count;
+ callcnt = --local->call_count;
+ vol_uuid = local->vol_uuid;
+
+ if (op_ret) {
+ marker_local_incr_errcount (local, op_errno);
+ goto unlock;
+ }
- if (marker_has_volinfo (marker)) {
+ ret = dict_get_bin (dict, GF_XATTR_MARKER_KEY,
+ (void *)&volmark);
+ if (ret)
+ goto unlock;
- if ((marker->volmark->major != volmark->major) ||
- (marker->volmark->minor != volmark->minor)) {
+ if (local->count[MCNT_FOUND]) {
+ if ((local->volmark->major != volmark->major) ||
+ (local->volmark->minor != volmark->minor)) {
op_ret = -1;
op_errno = EINVAL;
- goto done;
- }
- else if (volmark->retval) {
- data_unref ((data_t *) marker->volmark);
- marker->volmark = volmark;
- callcnt = 0;
+ goto unlock;
}
- else if ( (volmark->sec > marker->volmark->sec) ||
- ((volmark->sec == marker->volmark->sec)
- && (volmark->usec >= marker->volmark->usec))) {
- GF_FREE (marker->volmark);
- marker->volmark = memdup (volmark, sizeof (struct volume_mark));
- VALIDATE_OR_GOTO (marker->volmark, done);
+ if (local->retval)
+ goto unlock;
+ else if (volmark->retval) {
+ GF_FREE (local->volmark);
+ local->volmark =
+ memdup (volmark, sizeof (*volmark));
+ local->retval = volmark->retval;
+ } else if ((volmark->sec > local->volmark->sec) ||
+ ((volmark->sec == local->volmark->sec) &&
+ (volmark->usec >= local->volmark->usec))) {
+ GF_FREE (local->volmark);
+ local->volmark =
+ memdup (volmark, sizeof (*volmark));
}
} else {
- marker->volmark = memdup (volmark, sizeof (struct volume_mark));
- VALIDATE_OR_GOTO (marker->volmark, done);
-
+ local->volmark = memdup (volmark, sizeof (*volmark));
+ VALIDATE_OR_GOTO (local->volmark, unlock);
uuid_unparse (volmark->uuid, vol_uuid);
if (volmark->retval)
- callcnt = 0;
+ local->retval = volmark->retval;
+ local->count[MCNT_FOUND]++;
}
}
-done:
+unlock:
UNLOCK (&frame->lock);
if (!callcnt) {
op_ret = 0;
op_errno = 0;
- if (marker_has_volinfo (marker)) {
- if (!dict) {
+ need_unwind = 1;
+
+ if (local->count[MCNT_FOUND]) {
+ if (!dict)
dict = dict_new();
- if (!dict) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
- }
+
if (dict_set_bin (dict, GF_XATTR_MARKER_KEY,
- marker->volmark,
+ local->volmark,
sizeof (struct volume_mark))) {
op_ret = -1;
op_errno = ENOMEM;
}
- goto out;
- }
- if (marker->enotconn_count) {
- op_ret = -1;
- op_errno = ENOTCONN;
- goto out;
}
- if (marker->enoent_count) {
+ op_errno = evaluate_marker_results (local->gauge, local->count);
+ if (op_errno)
op_ret = -1;
- op_errno = ENOENT;
- }
- else {
- op_ret = -1;
- op_errno = EINVAL;
- }
+ }
out:
- if (marker->xl_specf_unwind) {
- frame->local = marker->xl_local;
- marker->xl_specf_unwind (frame, op_ret,
- op_errno, dict);
- return 0;
- }
- STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict);
+ if (need_unwind && local && local->xl_specf_unwind) {
+ frame->local = local->xl_local;
+ local->xl_specf_unwind (frame, op_ret,
+ op_errno, dict, xdata);
+ GF_FREE (local);
+ return 0;
+ } else if (need_unwind){
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno,
+ dict, xdata);
}
return 0;
}
@@ -357,10 +360,10 @@ cluster_getmarkerattr (call_frame_t *frame,xlator_t *this, loc_t *loc,
const char *name, void *xl_local,
xlator_specf_unwind_t xl_specf_getxattr_unwind,
xlator_t **sub_volumes, int count, int type,
- char *vol_uuid)
+ int *gauge, char *vol_uuid)
{
- int i;
- struct marker_str *local;
+ int i = 0;
+ xl_marker_local_t *local = NULL;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -373,33 +376,36 @@ cluster_getmarkerattr (call_frame_t *frame,xlator_t *this, loc_t *loc,
local = GF_CALLOC (sizeof (struct marker_str), 1,
gf_common_mt_libxl_marker_local);
- local->xl_local = xl_local;
- frame->local = local;
+ if (!local)
+ goto err;
+ local->xl_local = xl_local;
local->call_count = count;
-
local->xl_specf_unwind = xl_specf_getxattr_unwind;
-
local->vol_uuid = vol_uuid;
+ memcpy (local->gauge, gauge, sizeof (local->gauge));
+
+ frame->local = local;
for (i=0; i < count; i++) {
if (MARKER_UUID_TYPE == type)
STACK_WIND (frame, cluster_markeruuid_cbk,
*(sub_volumes + i),
(*(sub_volumes + i))->fops->getxattr,
- loc, name);
+ loc, name, NULL);
else if (MARKER_XTIME_TYPE == type)
STACK_WIND (frame, cluster_markerxtime_cbk,
*(sub_volumes + i),
(*(sub_volumes + i))->fops->getxattr,
- loc, name);
+ loc, name, NULL);
else {
gf_log (this->name, GF_LOG_WARNING,
- "Unrecognized type of marker attr received");
+ "Unrecognized type (%d) of marker attr "
+ "received", type);
STACK_WIND (frame, default_getxattr_cbk,
*(sub_volumes + i),
(*(sub_volumes + i))->fops->getxattr,
- loc, name);
+ loc, name, NULL);
break;
}
}
@@ -409,3 +415,113 @@ err:
return -1;
}
+
+int
+gf_get_min_stime (xlator_t *this, dict_t *dst, char *key, data_t *value)
+{
+ int ret = -1;
+ uint32_t *net_timebuf = NULL;
+ uint32_t *value_timebuf = NULL;
+ uint32_t host_timebuf[2] = {0,};
+ uint32_t host_value_timebuf[2] = {0,};
+
+ /* stime should be minimum of all the other nodes */
+ ret = dict_get_bin (dst, key, (void **)&net_timebuf);
+ if (ret < 0) {
+ net_timebuf = GF_CALLOC (1, sizeof (int64_t),
+ gf_common_mt_char);
+ if (!net_timebuf)
+ goto out;
+
+ ret = dict_set_bin (dst, key, net_timebuf, sizeof (int64_t));
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "key=%s: dict set failed", key);
+ goto error;
+ }
+ }
+
+ value_timebuf = data_to_bin (value);
+ if (!value_timebuf) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "key=%s: getting value of stime failed", key);
+ ret = -1;
+ goto out;
+ }
+
+ get_hosttime (value_timebuf, host_value_timebuf);
+ get_hosttime (net_timebuf, host_timebuf);
+
+ /* can't use 'min()' macro here as we need to compare two fields
+ in the array, selectively */
+ if ((host_value_timebuf[0] < host_timebuf[0]) ||
+ ((host_value_timebuf[0] == host_timebuf[0]) &&
+ (host_value_timebuf[1] < host_timebuf[1]))) {
+ update_timebuf (value_timebuf, net_timebuf);
+ }
+
+ ret = 0;
+out:
+ return ret;
+error:
+ /* To be used only when net_timebuf is not set in the dict */
+ if (net_timebuf)
+ GF_FREE (net_timebuf);
+
+ return ret;
+}
+
+int
+gf_get_max_stime (xlator_t *this, dict_t *dst, char *key, data_t *value)
+{
+ int ret = -1;
+ uint32_t *net_timebuf = NULL;
+ uint32_t *value_timebuf = NULL;
+ uint32_t host_timebuf[2] = {0,};
+ uint32_t host_value_timebuf[2] = {0,};
+
+ /* stime should be maximum of all the other nodes */
+ ret = dict_get_bin (dst, key, (void **)&net_timebuf);
+ if (ret < 0) {
+ net_timebuf = GF_CALLOC (1, sizeof (int64_t),
+ gf_common_mt_char);
+ if (!net_timebuf)
+ goto out;
+
+ ret = dict_set_bin (dst, key, net_timebuf, sizeof (int64_t));
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "key=%s: dict set failed", key);
+ goto error;
+ }
+ }
+
+ value_timebuf = data_to_bin (value);
+ if (!value_timebuf) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "key=%s: getting value of stime failed", key);
+ ret = -1;
+ goto out;
+ }
+
+ get_hosttime (value_timebuf, host_value_timebuf);
+ get_hosttime (net_timebuf, host_timebuf);
+
+ /* can't use 'max()' macro here as we need to compare two fields
+ in the array, selectively */
+ if ((host_value_timebuf[0] > host_timebuf[0]) ||
+ ((host_value_timebuf[0] == host_timebuf[0]) &&
+ (host_value_timebuf[1] > host_timebuf[1]))) {
+ update_timebuf (value_timebuf, net_timebuf);
+ }
+
+ ret = 0;
+out:
+ return ret;
+error:
+ /* To be used only when net_timebuf is not set in the dict */
+ if (net_timebuf)
+ GF_FREE (net_timebuf);
+
+ return ret;
+}
diff --git a/xlators/lib/src/libxlator.h b/xlators/lib/src/libxlator.h
index af7eb434a..175d3141d 100644
--- a/xlators/lib/src/libxlator.h
+++ b/xlators/lib/src/libxlator.h
@@ -1,3 +1,12 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _LIBXLATOR_H
#define _LIBXLATOR_H
@@ -23,10 +32,12 @@
#define MARKER_UUID_TYPE 1
#define MARKER_XTIME_TYPE 2
#define GF_XATTR_QUOTA_SIZE_KEY "trusted.glusterfs.quota.size"
+#define GF_XATTR_QUOTA_LIMIT_LIST "trusted.limit.list"
typedef int32_t (*xlator_specf_unwind_t) (call_frame_t *frame,
- int op_ret, int op_errno, dict_t *dict);
+ int op_ret, int op_errno,
+ dict_t *dict, dict_t *xdata);
struct volume_mark {
@@ -38,6 +49,69 @@ struct volume_mark {
uint32_t usec;
}__attribute__ ((__packed__));
+
+/*
+ * The enumerated type here
+ * is used to index two kind
+ * of integer arrays:
+ * - gauges
+ * - counters
+
+ * A counter is used internally,
+ * in getxattr callbacks, to count
+ * the results, categorized as
+ * the enum names suggest. So values
+ * in the counter are always non-negative.
+
+ * Gauges are part of the API.
+ * The caller passes one to the
+ * top-level aggregator function,
+ * cluster_getmarkerattr(). The gauge
+ * defines an evaluation policy for the
+ * counter. That is, at the
+ * end of the aggregation process
+ * the gauge is matched against the
+ * counter, and the policy
+ * represented by the gauge decides
+ * whether to return with success or failure,
+ * and in latter case, what particular failure
+ * case (errno).
+
+ * The rules are the following: for some index i,
+ * - if gauge[i] == 0, no requirement is set
+ * against counter[i];
+ * - if gauge[i] > 0, counter[i] >= gauge[i]
+ * is required;
+ * - if gauge[i] < 0, counter[i] < |gauge[i]|
+ * is required.
+
+ * If the requirement is not met, then i is mapped
+ * to the respective errno (MCNT_ENOENT -> ENOENT),
+ * or in lack of that, EINVAL.
+
+ * Cf. evaluate_marker_results() and marker_idx_errno_map[]
+ * in libxlator.c
+
+ * We provide two default gauges, one inteded for xtime
+ * aggregation, other for volume mark aggregation. The
+ * policies they represent agree with the hard-coded
+ * one prior to gauges. Cf. marker_xtime_default_gauge
+ * and marker_uuid_default_gauge in libxlator.c
+ */
+
+typedef enum {
+ MCNT_FOUND,
+ MCNT_NOTFOUND,
+ MCNT_ENODATA,
+ MCNT_ENOTCONN,
+ MCNT_ENOENT,
+ MCNT_EOTHER,
+ MCNT_MAX
+} marker_result_idx_t;
+
+extern int marker_xtime_default_gauge[];
+extern int marker_uuid_default_gauge[];
+
struct marker_str {
struct volume_mark *volmark;
data_t *data;
@@ -45,47 +119,39 @@ struct marker_str {
uint32_t host_timebuf[2];
uint32_t net_timebuf[2];
int32_t call_count;
- unsigned has_xtime:1;
- int32_t enoent_count;
- int32_t enotconn_count;
- int32_t enodata_count;
- int32_t noxtime_count;
-
- int esomerr;
+ int gauge[MCNT_MAX];
+ int count[MCNT_MAX];
xlator_specf_unwind_t xl_specf_unwind;
void *xl_local;
char *vol_uuid;
+ uint8_t retval;
};
-static inline gf_boolean_t
-marker_has_volinfo (struct marker_str *marker)
-{
- if (marker->volmark)
- return _gf_true;
- else
- return _gf_false;
-}
+typedef struct marker_str xl_marker_local_t;
int32_t
cluster_markerxtime_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *dict);
+ int op_ret, int op_errno, dict_t *dict, dict_t *xdata);
int32_t
cluster_markeruuid_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *dict);
+ int op_ret, int op_errno, dict_t *dict, dict_t *xdata);
int32_t
cluster_getmarkerattr (call_frame_t *frame,xlator_t *this, loc_t *loc,
const char *name, void *xl_local,
xlator_specf_unwind_t xl_specf_getxattr_unwind,
xlator_t **sub_volumes, int count, int type,
- char *vol_uuid);
+ int *gauge, char *vol_uuid);
int
match_uuid_local (const char *name, char *uuid);
+int
+gf_get_min_stime (xlator_t *this, dict_t *dst, char *key, data_t *value);
-
+int
+gf_get_max_stime (xlator_t *this, dict_t *dst, char *key, data_t *value);
#endif /* !_LIBXLATOR_H */
diff --git a/xlators/meta/src/Makefile.am b/xlators/meta/src/Makefile.am
index 385ff553f..f8fa7d4cb 100644
--- a/xlators/meta/src/Makefile.am
+++ b/xlators/meta/src/Makefile.am
@@ -4,7 +4,8 @@ xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/
meta_so_SOURCES = meta.c tree.c misc.c view.c
noinst_HEADERS = meta.h tree.h misc.h view.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall
CLEANFILES =
diff --git a/xlators/meta/src/meta-mem-types.h b/xlators/meta/src/meta-mem-types.h
index 9585b7838..62028b246 100644
--- a/xlators/meta/src/meta-mem-types.h
+++ b/xlators/meta/src/meta-mem-types.h
@@ -1,23 +1,13 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __META_MEM_TYPES_H__
#define __META_MEM_TYPES_H__
diff --git a/xlators/meta/src/meta.c b/xlators/meta/src/meta.c
index 412b4a2b5..e69719f3c 100644
--- a/xlators/meta/src/meta.c
+++ b/xlators/meta/src/meta.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
diff --git a/xlators/meta/src/meta.h b/xlators/meta/src/meta.h
index 5636487c9..73e0e50db 100644
--- a/xlators/meta/src/meta.h
+++ b/xlators/meta/src/meta.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __META_H__
#define __META_H__
diff --git a/xlators/meta/src/misc.c b/xlators/meta/src/misc.c
index bea07e70c..1a8dfa806 100644
--- a/xlators/meta/src/misc.c
+++ b/xlators/meta/src/misc.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#include <unistd.h>
#include <sys/uio.h>
diff --git a/xlators/meta/src/misc.h b/xlators/meta/src/misc.h
index f934a6d8d..30dd10e34 100644
--- a/xlators/meta/src/misc.h
+++ b/xlators/meta/src/misc.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __MISC_H__
#define __MISC_H__
diff --git a/xlators/meta/src/tree.c b/xlators/meta/src/tree.c
index cad5cbd71..dacbd665a 100644
--- a/xlators/meta/src/tree.c
+++ b/xlators/meta/src/tree.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -123,7 +113,7 @@ lookup_meta_entry (meta_dirent_t *root, const char *path,
gf_asprintf (remain, "/%s/%s", *remain, piece);
else
gf_asprintf (remain, "/%s", piece);
- if (tmp) GF_FREE (tmp);
+ GF_FREE (tmp);
piece = strtok (NULL, "/");
}
}
diff --git a/xlators/meta/src/tree.h b/xlators/meta/src/tree.h
index 8157148db..985df3bd7 100644
--- a/xlators/meta/src/tree.h
+++ b/xlators/meta/src/tree.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __TREE_H__
#define __TREE_H__
diff --git a/xlators/meta/src/view.c b/xlators/meta/src/view.c
index ba3c30e0b..b4e2d64a2 100644
--- a/xlators/meta/src/view.c
+++ b/xlators/meta/src/view.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
diff --git a/xlators/meta/src/view.h b/xlators/meta/src/view.h
index 440c0d34d..2eff6126e 100644
--- a/xlators/meta/src/view.h
+++ b/xlators/meta/src/view.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __VIEW_H__
#define __VIEW_H__
diff --git a/xlators/mgmt/glusterd/src/Makefile.am b/xlators/mgmt/glusterd/src/Makefile.am
index e1495c273..b109e6dff 100644
--- a/xlators/mgmt/glusterd/src/Makefile.am
+++ b/xlators/mgmt/glusterd/src/Makefile.am
@@ -1,26 +1,48 @@
xlator_LTLIBRARIES = glusterd.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/mgmt
-glusterd_la_LDFLAGS = -module -avoidversion $(LIBXML2_LIBS)
-glusterd_la_SOURCES = glusterd.c glusterd-handler.c glusterd-sm.c glusterd-op-sm.c \
- glusterd-utils.c glusterd-rpc-ops.c glusterd-store.c glusterd-handshake.c \
- glusterd-pmap.c glusterd-volgen.c glusterd-rebalance.c glusterd-quota.c \
+glusterd_la_CPPFLAGS = $(AM_CPPFLAGS) "-DFILTERDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/filter\""
+glusterd_la_LDFLAGS = -module -avoid-version
+if ENABLE_BD_XLATOR
+glusterd_la_LDFLAGS += -llvm2app
+endif
+glusterd_la_SOURCES = glusterd.c glusterd-handler.c glusterd-sm.c \
+ glusterd-op-sm.c glusterd-utils.c glusterd-rpc-ops.c \
+ glusterd-store.c glusterd-handshake.c glusterd-pmap.c \
+ glusterd-volgen.c glusterd-rebalance.c glusterd-quota.c \
glusterd-geo-rep.c glusterd-replace-brick.c glusterd-log-ops.c \
- glusterd-volume-ops.c glusterd-brick-ops.c glusterd-mountbroker.c
+ glusterd-volume-ops.c glusterd-brick-ops.c glusterd-mountbroker.c \
+ glusterd-syncop.c glusterd-hooks.c glusterd-volume-set.c \
+ glusterd-locks.c
glusterd_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
$(top_builddir)/rpc/xdr/src/libgfxdr.la \
- $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la
+ $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
+ $(XML_LIBS) -lcrypto
-noinst_HEADERS = glusterd.h glusterd-utils.h glusterd-op-sm.h glusterd-sm.h \
- glusterd-store.h glusterd-mem-types.h glusterd-pmap.h glusterd-volgen.h \
- glusterd-mountbroker.h
+noinst_HEADERS = glusterd.h glusterd-utils.h glusterd-op-sm.h \
+ glusterd-sm.h glusterd-store.h glusterd-mem-types.h \
+ glusterd-pmap.h glusterd-volgen.h glusterd-mountbroker.h \
+ glusterd-syncop.h glusterd-hooks.h glusterd-locks.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)\
- -I$(rpclibdir) -L$(xlatordir)/ -I$(CONTRIBDIR)/rbtree -I$(top_srcdir)/rpc/xdr/src\
- -I$(top_srcdir)/rpc/rpc-lib/src -I$(CONTRIBDIR)/uuid -I$(top_srcdir)/contrib/md5 -DSBIN_DIR=\"$(sbindir)\"\
- -DDATADIR=\"$(localstatedir)\" -DGSYNCD_PREFIX=\"$(libexecdir)/glusterfs\"\
- -DSYNCDAEMON_COMPILE=$(SYNCDAEMON_COMPILE) $(LIBXML2_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(rpclibdir) -I$(CONTRIBDIR)/rbtree \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_srcdir)/rpc/rpc-lib/src \
+ -I$(CONTRIBDIR)/uuid \
+ -DSBIN_DIR=\"$(sbindir)\" -DDATADIR=\"$(localstatedir)\" \
+ -DGSYNCD_PREFIX=\"$(libexecdir)/glusterfs\"\
+ -DSYNCDAEMON_COMPILE=$(SYNCDAEMON_COMPILE) $(XML_CPPFLAGS)
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+AM_LDFLAGS = -L$(xlatordir)
CLEANFILES =
+
+install-data-hook:
+
+if GF_INSTALL_VAR_LIB_GLUSTERD
+ $(mkdir_p) $(localstatedir)/lib/
+ (stat $(sysconfdir)/glusterd && \
+ mv $(sysconfdir)/glusterd $(localstatedir)/lib/) || true;
+ (ln -sf $(localstatedir)/lib/glusterd $(sysconfdir)/glusterd) || true;
+endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
index 6385ac678..1804dd02e 100644
--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -31,6 +21,7 @@
#include "glusterd-utils.h"
#include "glusterd-volgen.h"
#include "run.h"
+#include <sys/signal.h>
/* misc */
@@ -107,12 +98,19 @@ gd_addbr_validate_stripe_count (glusterd_volinfo_t *volinfo, int stripe_count,
case GF_CLUSTER_TYPE_NONE:
if ((volinfo->brick_count * stripe_count) == total_bricks) {
/* Change the volume type */
+ *type = GF_CLUSTER_TYPE_STRIPE;
gf_log (THIS->name, GF_LOG_INFO,
"Changing the type of volume %s from "
- "None to 'stripe'", volinfo->volname);
- *type = GF_CLUSTER_TYPE_STRIPE;
+ "'distribute' to 'stripe'", volinfo->volname);
ret = 0;
goto out;
+ } else {
+ snprintf (err_str, err_len, "Incorrect number of "
+ "bricks (%d) supplied for stripe count (%d).",
+ (total_bricks - volinfo->brick_count),
+ stripe_count);
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
}
break;
case GF_CLUSTER_TYPE_REPLICATE:
@@ -125,16 +123,25 @@ gd_addbr_validate_stripe_count (glusterd_volinfo_t *volinfo, int stripe_count,
volinfo->volname);
ret = 0;
goto out;
+ } else {
+ snprintf (err_str, err_len, "Incorrect number of "
+ "bricks (%d) supplied for changing volume's "
+ "stripe count to %d, need at least %d bricks",
+ (total_bricks - volinfo->brick_count),
+ stripe_count,
+ (volinfo->replica_count * stripe_count));
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
}
break;
case GF_CLUSTER_TYPE_STRIPE:
case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
if (stripe_count < volinfo->stripe_count) {
- snprintf (err_str, sizeof (err_str),
- "wrong stripe count (%d) given. "
- "already have %d",
+ snprintf (err_str, err_len,
+ "Incorrect stripe count (%d) supplied. "
+ "Volume already has stripe count (%d)",
stripe_count, volinfo->stripe_count);
- gf_log ("glusterd", GF_LOG_ERROR, "%s", err_str);
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str);
goto out;
}
if (stripe_count == volinfo->stripe_count) {
@@ -170,7 +177,8 @@ out:
static int
gd_addbr_validate_replica_count (glusterd_volinfo_t *volinfo, int replica_count,
- int total_bricks, int *type, char *err_str, int err_len)
+ int total_bricks, int *type, char *err_str,
+ int err_len)
{
int ret = -1;
@@ -179,12 +187,20 @@ gd_addbr_validate_replica_count (glusterd_volinfo_t *volinfo, int replica_count,
case GF_CLUSTER_TYPE_NONE:
if ((volinfo->brick_count * replica_count) == total_bricks) {
/* Change the volume type */
+ *type = GF_CLUSTER_TYPE_REPLICATE;
gf_log (THIS->name, GF_LOG_INFO,
"Changing the type of volume %s from "
- "None to 'replica'", volinfo->volname);
- *type = GF_CLUSTER_TYPE_REPLICATE;
+ "'distribute' to 'replica'", volinfo->volname);
ret = 0;
goto out;
+
+ } else {
+ snprintf (err_str, err_len, "Incorrect number of "
+ "bricks (%d) supplied for replica count (%d).",
+ (total_bricks - volinfo->brick_count),
+ replica_count);
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
}
break;
case GF_CLUSTER_TYPE_STRIPE:
@@ -197,16 +213,26 @@ gd_addbr_validate_replica_count (glusterd_volinfo_t *volinfo, int replica_count,
volinfo->volname);
ret = 0;
goto out;
+ } else {
+ snprintf (err_str, err_len, "Incorrect number of "
+ "bricks (%d) supplied for changing volume's "
+ "replica count to %d, need at least %d "
+ "bricks",
+ (total_bricks - volinfo->brick_count),
+ replica_count, (volinfo->dist_leaf_count *
+ replica_count));
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
}
break;
case GF_CLUSTER_TYPE_REPLICATE:
case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
if (replica_count < volinfo->replica_count) {
- snprintf (err_str, sizeof (err_str),
- "wrong replica count (%d) given. "
- "already have %d",
+ snprintf (err_str, err_len,
+ "Incorrect replica count (%d) supplied. "
+ "Volume already has (%d)",
replica_count, volinfo->replica_count);
- gf_log ("glusterd", GF_LOG_ERROR, "%s", err_str);
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str);
goto out;
}
if (replica_count == volinfo->replica_count) {
@@ -239,8 +265,10 @@ out:
}
static int
-gd_rmbr_validate_replica_count (glusterd_volinfo_t *volinfo, int32_t replica_count,
- int32_t brick_count, char *err_str)
+gd_rmbr_validate_replica_count (glusterd_volinfo_t *volinfo,
+ int32_t replica_count,
+ int32_t brick_count, char *err_str,
+ size_t err_len)
{
int ret = -1;
int replica_nodes = 0;
@@ -248,7 +276,7 @@ gd_rmbr_validate_replica_count (glusterd_volinfo_t *volinfo, int32_t replica_cou
switch (volinfo->type) {
case GF_CLUSTER_TYPE_NONE:
case GF_CLUSTER_TYPE_STRIPE:
- snprintf (err_str, 2048,
+ snprintf (err_str, err_len,
"replica count (%d) option given for non replicate "
"volume %s", replica_count, volinfo->volname);
gf_log (THIS->name, GF_LOG_WARNING, "%s", err_str);
@@ -258,7 +286,7 @@ gd_rmbr_validate_replica_count (glusterd_volinfo_t *volinfo, int32_t replica_cou
case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
/* in remove brick, you can only reduce the replica count */
if (replica_count > volinfo->replica_count) {
- snprintf (err_str, 2048,
+ snprintf (err_str, err_len,
"given replica count (%d) option is more "
"than volume %s's replica count (%d)",
replica_count, volinfo->volname,
@@ -267,15 +295,30 @@ gd_rmbr_validate_replica_count (glusterd_volinfo_t *volinfo, int32_t replica_cou
goto out;
}
if (replica_count == volinfo->replica_count) {
+ /* This means the 'replica N' option on CLI was
+ redundant. Check if the total number of bricks given
+ for removal is same as 'dist_leaf_count' */
+ if (brick_count % volinfo->dist_leaf_count) {
+ snprintf (err_str, err_len,
+ "number of bricks provided (%d) is "
+ "not valid. need at least %d "
+ "(or %dxN)", brick_count,
+ volinfo->dist_leaf_count,
+ volinfo->dist_leaf_count);
+ gf_log (THIS->name, GF_LOG_WARNING, "%s",
+ err_str);
+ goto out;
+ }
ret = 1;
goto out;
}
- replica_nodes = ((volinfo->brick_count / volinfo->replica_count) *
+ replica_nodes = ((volinfo->brick_count /
+ volinfo->replica_count) *
(volinfo->replica_count - replica_count));
if (brick_count % replica_nodes) {
- snprintf (err_str, 2048,
+ snprintf (err_str, err_len,
"need %d(xN) bricks for reducing replica "
"count of the volume from %d to %d",
replica_nodes, volinfo->replica_count,
@@ -290,35 +333,24 @@ out:
return ret;
}
-
-
/* Handler functions */
int
-glusterd_handle_add_brick (rpcsvc_request_t *req)
+__glusterd_handle_add_brick (rpcsvc_request_t *req)
{
int32_t ret = -1;
gf_cli_req cli_req = {{0,}};
dict_t *dict = NULL;
- glusterd_brickinfo_t *brickinfo = NULL;
- char *brick = NULL;
char *bricks = NULL;
char *volname = NULL;
int brick_count = 0;
- char *tmpptr = NULL;
- int i = 0;
- char *brick_list = NULL;
void *cli_rsp = NULL;
char err_str[2048] = {0,};
gf_cli_rsp rsp = {0,};
glusterd_volinfo_t *volinfo = NULL;
xlator_t *this = NULL;
- char *free_ptr = NULL;
- glusterd_brickinfo_t *tmpbrkinfo = NULL;
- glusterd_volinfo_t tmpvolinfo = {{0},};
int total_bricks = 0;
int32_t replica_count = 0;
int32_t stripe_count = 0;
- int count = 0;
int type = 0;
this = THIS;
@@ -326,17 +358,16 @@ glusterd_handle_add_brick (rpcsvc_request_t *req)
GF_ASSERT (req);
- INIT_LIST_HEAD (&tmpvolinfo.bricks);
-
- if (!xdr_to_generic (req->msg[0], &cli_req,
- (xdrproc_t)xdr_gf_cli_req)) {
+ ret = xdr_to_generic (req->msg[0], &cli_req,
+ (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
//failed to decode msg;
req->rpc_err = GARBAGE_ARGS;
snprintf (err_str, sizeof (err_str), "Garbage args received");
goto out;
}
- gf_log ("glusterd", GF_LOG_INFO, "Received add brick req");
+ gf_log (this->name, GF_LOG_INFO, "Received add brick req");
if (cli_req.dict.dict_len) {
/* Unserialize the dictionary */
@@ -346,74 +377,66 @@ glusterd_handle_add_brick (rpcsvc_request_t *req)
cli_req.dict.dict_len,
&dict);
if (ret < 0) {
- gf_log ("glusterd", GF_LOG_ERROR,
+ gf_log (this->name, GF_LOG_ERROR,
"failed to "
"unserialize req-buffer to dictionary");
snprintf (err_str, sizeof (err_str), "Unable to decode "
- "the buffer");
+ "the command");
goto out;
}
}
ret = dict_get_str (dict, "volname", &volname);
- gf_cmd_log ("Volume add-brick", "on volname: %s attempted",
- volname);
if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get volume name");
snprintf (err_str, sizeof (err_str), "Unable to get volume "
"name");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
goto out;
}
if (!(ret = glusterd_check_volume_exists (volname))) {
ret = -1;
- snprintf(err_str, 2048, "Volume %s does not exist", volname);
- gf_log ("glusterd", GF_LOG_ERROR, "%s", err_str);
+ snprintf (err_str, sizeof (err_str), "Volume %s does not exist",
+ volname);
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
goto out;
}
ret = dict_get_int32 (dict, "count", &brick_count);
if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get count");
snprintf (err_str, sizeof (err_str), "Unable to get volume "
"brick count");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
goto out;
}
ret = dict_get_int32 (dict, "replica-count", &replica_count);
if (!ret) {
- gf_log (THIS->name, GF_LOG_INFO, "replica-count is %d",
+ gf_log (this->name, GF_LOG_INFO, "replica-count is %d",
replica_count);
}
ret = dict_get_int32 (dict, "stripe-count", &stripe_count);
if (!ret) {
- gf_log (THIS->name, GF_LOG_INFO, "stripe-count is %d",
+ gf_log (this->name, GF_LOG_INFO, "stripe-count is %d",
stripe_count);
}
+ if (!dict_get (dict, "force")) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get flag");
+ goto out;
+ }
ret = glusterd_volinfo_find (volname, &volinfo);
if (ret) {
snprintf (err_str, sizeof (err_str), "Unable to get volinfo "
"for volume name %s", volname);
- gf_log ("glusterd", GF_LOG_ERROR, "%s", err_str);
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
goto out;
}
- if (volinfo->type == GF_CLUSTER_TYPE_NONE)
- goto brick_val;
-
- /* If any of this is true, some thing is wrong */
- if (!brick_count || !volinfo->sub_count) {
- ret = -1;
- snprintf (err_str, sizeof (err_str), "number of brick count "
- "for volume name %s is wrong", volname);
- goto out;
- }
-
total_bricks = volinfo->brick_count + brick_count;
if (!stripe_count && !replica_count) {
@@ -425,10 +448,10 @@ glusterd_handle_add_brick (rpcsvc_request_t *req)
goto brick_val;
if ((brick_count % volinfo->dist_leaf_count) != 0) {
- snprintf(err_str, 2048, "Incorrect number of bricks"
- " supplied %d with count %d",
+ snprintf (err_str, sizeof (err_str), "Incorrect number "
+ "of bricks supplied %d with count %d",
brick_count, volinfo->dist_leaf_count);
- gf_log("glusterd", GF_LOG_ERROR, "%s", err_str);
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
ret = -1;
goto out;
}
@@ -444,7 +467,7 @@ glusterd_handle_add_brick (rpcsvc_request_t *req)
err_str,
sizeof (err_str));
if (ret == -1) {
- gf_log ("glusterd", GF_LOG_ERROR, "%s", err_str);
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
goto out;
}
@@ -452,6 +475,12 @@ glusterd_handle_add_brick (rpcsvc_request_t *req)
if (ret == 1)
stripe_count = 0;
+ ret = dict_set_int32 (dict, "stripe-count", stripe_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to set the stripe-count in dict");
+ goto out;
+ }
goto brick_val;
}
@@ -460,7 +489,7 @@ glusterd_handle_add_brick (rpcsvc_request_t *req)
&type, err_str,
sizeof (err_str));
if (ret == -1) {
- gf_log ("glusterd", GF_LOG_ERROR, "%s", err_str);
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
goto out;
}
@@ -468,124 +497,135 @@ glusterd_handle_add_brick (rpcsvc_request_t *req)
if (ret == 1)
replica_count = 0;
+ ret = dict_set_int32 (dict, "replica-count", replica_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to set the replica-count in dict");
+ goto out;
+ }
+
brick_val:
ret = dict_get_str (dict, "bricks", &bricks);
if (ret) {
snprintf (err_str, sizeof (err_str), "Unable to get volume "
"bricks");
- gf_log ("glusterd", GF_LOG_ERROR, "%s", err_str);
- goto out;
- }
-
- if (bricks)
- brick_list = gf_strdup (bricks);
- if (!brick_list) {
- ret = -1;
- snprintf (err_str, sizeof (err_str), "Out of memory");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
goto out;
- } else {
- free_ptr = brick_list;
}
- gf_cmd_log ("Volume add-brick", "volname: %s type %d count:%d bricks:%s"
- ,volname, volinfo->type, brick_count, brick_list);
-
- count = 0;
- while ( i < brick_count) {
- i++;
- brick= strtok_r (brick_list, " \n", &tmpptr);
- brick_list = tmpptr;
- brickinfo = NULL;
- ret = glusterd_brickinfo_from_brick (brick, &brickinfo);
- if (ret) {
- snprintf (err_str, sizeof (err_str), "Unable to get "
- "brick info from brick %s", brick);
- goto out;
- }
- ret = glusterd_new_brick_validate (brick, brickinfo, err_str,
- sizeof (err_str));
- if (ret)
- goto out;
- ret = glusterd_volume_brickinfo_get (brickinfo->uuid,
- brickinfo->hostname,
- brickinfo->path,
- &tmpvolinfo, &tmpbrkinfo,
- GF_PATH_PARTIAL);
- if (!ret) {
- ret = -1;
- snprintf (err_str, sizeof (err_str), "Brick: %s:%s, %s"
- " one of the bricks contain the other",
- tmpbrkinfo->hostname, tmpbrkinfo->path, brick);
- goto out;
- }
-
- if (stripe_count || replica_count)
- add_brick_at_right_order (brickinfo, &tmpvolinfo, count,
- stripe_count, replica_count);
- else
- list_add_tail (&brickinfo->brick_list, &tmpvolinfo.bricks);
-
- count++;
- brickinfo = NULL;
- }
-
- if (stripe_count) {
- dict_del (dict, "stripe-count");
- ret = dict_set_int32 (dict, "stripe-count", stripe_count);
- if (ret)
- gf_log (THIS->name, GF_LOG_ERROR,
- "failed to set the stripe-count in dict");
- }
- if (replica_count) {
- dict_del (dict, "replica-count");
- ret = dict_set_int32 (dict, "replica-count", replica_count);
- if (ret)
- gf_log (THIS->name, GF_LOG_ERROR,
- "failed to set the replica-count in dict");
- }
if (type != volinfo->type) {
ret = dict_set_int32 (dict, "type", type);
if (ret)
- gf_log (THIS->name, GF_LOG_ERROR,
+ gf_log (this->name, GF_LOG_ERROR,
"failed to set the new type in dict");
}
- ret = glusterd_op_begin (req, GD_OP_ADD_BRICK, dict);
- gf_cmd_log ("Volume add-brick","on volname: %s %s", volname,
- (ret != 0)? "FAILED" : "SUCCESS");
+ ret = glusterd_op_begin_synctask (req, GD_OP_ADD_BRICK, dict);
out:
if (ret) {
- if (dict)
- dict_unref (dict);
rsp.op_ret = -1;
rsp.op_errno = 0;
if (err_str[0] == '\0')
snprintf (err_str, sizeof (err_str), "Operation failed");
rsp.op_errstr = err_str;
cli_rsp = &rsp;
- glusterd_submit_reply(req, cli_rsp, NULL, 0, NULL,
- (xdrproc_t)xdr_gf_cli_rsp);
+ glusterd_to_cli (req, cli_rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_cli_rsp, dict);
ret = 0; //sent error to cli, prevent second reply
}
- glusterd_friend_sm ();
- glusterd_op_sm ();
+ free (cli_req.dict.dict_val); //its malloced by xdr
- if (free_ptr)
- GF_FREE (free_ptr);
- glusterd_volume_brickinfos_delete (&tmpvolinfo);
- if (brickinfo)
- glusterd_brickinfo_delete (brickinfo);
- if (cli_req.dict.dict_val)
- free (cli_req.dict.dict_val); //its malloced by xdr
+ return ret;
+}
+
+int
+glusterd_handle_add_brick (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_add_brick);
+}
+
+static int
+subvol_matcher_init (int **subvols, int count)
+{
+ int ret = -1;
+
+ *subvols = GF_CALLOC (count, sizeof(int), gf_gld_mt_int);
+ if (*subvols)
+ ret = 0;
+
+ return ret;
+}
+
+static void
+subvol_matcher_update (int *subvols, glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo)
+{
+ glusterd_brickinfo_t *tmp = NULL;
+ int32_t sub_volume = 0;
+ int pos = 0;
+
+ list_for_each_entry (tmp, &volinfo->bricks, brick_list) {
+
+ if (strcmp (tmp->hostname, brickinfo->hostname) ||
+ strcmp (tmp->path, brickinfo->path)) {
+ pos++;
+ continue;
+ }
+ gf_log (THIS->name, GF_LOG_DEBUG, LOGSTR_FOUND_BRICK,
+ brickinfo->hostname, brickinfo->path,
+ volinfo->volname);
+ sub_volume = (pos / volinfo->dist_leaf_count);
+ subvols[sub_volume]++;
+ break;
+ }
+
+}
+
+static int
+subvol_matcher_verify (int *subvols, glusterd_volinfo_t *volinfo, char *err_str,
+ size_t err_len, char *vol_type, int replica_count)
+{
+ int i = 0;
+ int ret = 0;
+ int count = volinfo->replica_count-replica_count;
+
+ if (replica_count) {
+ for (i = 0; i < volinfo->subvol_count; i++) {
+ if (subvols[i] != count) {
+ ret = -1;
+ snprintf (err_str, err_len, "Remove exactly %d"
+ " brick(s) from each subvolume.", count);
+ break;
+ }
+ }
+ return ret;
+ }
+
+ do {
+
+ if (subvols[i] % volinfo->dist_leaf_count == 0) {
+ continue;
+ } else {
+ ret = -1;
+ snprintf (err_str, err_len,
+ "Bricks not from same subvol for %s", vol_type);
+ break;
+ }
+ } while (++i < volinfo->subvol_count);
return ret;
}
+static void
+subvol_matcher_destroy (int *subvols)
+{
+ GF_FREE (subvols);
+}
int
-glusterd_handle_remove_brick (rpcsvc_request_t *req)
+__glusterd_handle_remove_brick (rpcsvc_request_t *req)
{
int32_t ret = -1;
gf_cli_req cli_req = {{0,}};
@@ -597,33 +637,30 @@ glusterd_handle_remove_brick (rpcsvc_request_t *req)
int i = 1;
glusterd_volinfo_t *volinfo = NULL;
glusterd_brickinfo_t *brickinfo = NULL;
- int32_t pos = 0;
- int32_t sub_volume = 0;
- int32_t sub_volume_start = 0;
- int32_t sub_volume_end = 0;
- glusterd_brickinfo_t *tmp = NULL;
+ int *subvols = NULL;
char err_str[2048] = {0};
gf_cli_rsp rsp = {0,};
void *cli_rsp = NULL;
char vol_type[256] = {0,};
int32_t replica_count = 0;
- int32_t brick_index = 0;
- int32_t tmp_brick_idx = 0;
- int found = 0;
- int diff_count = 0;
char *volname = 0;
+ xlator_t *this = NULL;
GF_ASSERT (req);
+ this = THIS;
+ GF_ASSERT (this);
- if (!xdr_to_generic (req->msg[0], &cli_req,
- (xdrproc_t)xdr_gf_cli_req)) {
+ ret = xdr_to_generic (req->msg[0], &cli_req,
+ (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
//failed to decode msg;
req->rpc_err = GARBAGE_ARGS;
+ snprintf (err_str, sizeof (err_str), "Received garbage args");
goto out;
}
- gf_log ("glusterd", GF_LOG_INFO, "Received rem brick req");
+ gf_log (this->name, GF_LOG_INFO, "Received rem brick req");
if (cli_req.dict.dict_len) {
/* Unserialize the dictionary */
@@ -633,52 +670,61 @@ glusterd_handle_remove_brick (rpcsvc_request_t *req)
cli_req.dict.dict_len,
&dict);
if (ret < 0) {
- gf_log ("glusterd", GF_LOG_ERROR,
+ gf_log (this->name, GF_LOG_ERROR,
"failed to "
"unserialize req-buffer to dictionary");
+ snprintf (err_str, sizeof (err_str), "Unable to decode "
+ "the command");
goto out;
}
}
ret = dict_get_str (dict, "volname", &volname);
if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "Unable to get volname");
+ snprintf (err_str, sizeof (err_str), "Unable to get volume "
+ "name");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
goto out;
}
- gf_cmd_log ("Volume remove-brick","on volname: %s attempted", volname);
ret = dict_get_int32 (dict, "count", &count);
if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get count");
+ snprintf (err_str, sizeof (err_str), "Unable to get brick "
+ "count");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
goto out;
}
ret = glusterd_volinfo_find (volname, &volinfo);
if (ret) {
- snprintf (err_str, 2048, "Volume %s does not exist",
- volname);
- gf_log ("", GF_LOG_ERROR, "%s", err_str);
+ snprintf (err_str, sizeof (err_str),"Volume %s does not exist",
+ volname);
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
goto out;
}
ret = dict_get_int32 (dict, "replica-count", &replica_count);
if (!ret) {
- gf_log (THIS->name, GF_LOG_INFO,
+ gf_log (this->name, GF_LOG_INFO,
"request to change replica-count to %d", replica_count);
ret = gd_rmbr_validate_replica_count (volinfo, replica_count,
- count, err_str);
+ count, err_str,
+ sizeof (err_str));
if (ret < 0) {
- /* logging and error msg are done in above function itself */
+ /* logging and error msg are done in above function
+ itself */
goto out;
}
dict_del (dict, "replica-count");
if (ret) {
replica_count = 0;
} else {
- ret = dict_set_int32 (dict, "replica-count", replica_count);
+ ret = dict_set_int32 (dict, "replica-count",
+ replica_count);
if (ret) {
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to set the replica_count in dict");
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set the replica_count "
+ "in dict");
goto out;
}
}
@@ -695,25 +741,49 @@ glusterd_handle_remove_brick (rpcsvc_request_t *req)
strcpy (vol_type, "distribute");
}
- /* Do not allow remove-brick if the volume is plain stripe */
+ /* Do not allow remove-brick if the volume is a stripe volume*/
if ((volinfo->type == GF_CLUSTER_TYPE_STRIPE) &&
(volinfo->brick_count == volinfo->stripe_count)) {
- snprintf (err_str, 2048, "Removing brick from a plain stripe is not allowed");
- gf_log ("glusterd", GF_LOG_ERROR, "%s", err_str);
+ snprintf (err_str, sizeof (err_str),
+ "Removing brick from a stripe volume is not allowed");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
ret = -1;
goto out;
}
- /* Do not allow remove-brick if the bricks given is less than the replica count
- or stripe count */
- if (!replica_count && (volinfo->type != GF_CLUSTER_TYPE_NONE) &&
- !(volinfo->brick_count <= volinfo->dist_leaf_count)) {
+ if (!replica_count &&
+ (volinfo->type == GF_CLUSTER_TYPE_STRIPE_REPLICATE) &&
+ (volinfo->brick_count == volinfo->dist_leaf_count)) {
+ snprintf (err_str, sizeof(err_str),
+ "Removing bricks from stripe-replicate"
+ " configuration is not allowed without reducing "
+ "replica or stripe count explicitly.");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ ret = -1;
+ goto out;
+ }
+
+ if (!replica_count &&
+ (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) &&
+ (volinfo->brick_count == volinfo->dist_leaf_count)) {
+ snprintf (err_str, sizeof (err_str),
+ "Removing bricks from replicate configuration "
+ "is not allowed without reducing replica count "
+ "explicitly.");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ ret = -1;
+ goto out;
+ }
+
+ /* Do not allow remove-brick if the bricks given is less than
+ the replica count or stripe count */
+ if (!replica_count && (volinfo->type != GF_CLUSTER_TYPE_NONE)) {
if (volinfo->dist_leaf_count &&
(count % volinfo->dist_leaf_count)) {
- snprintf (err_str, 2048, "Remove brick incorrect"
- " brick count of %d for %s %d",
+ snprintf (err_str, sizeof (err_str), "Remove brick "
+ "incorrect brick count of %d for %s %d",
count, vol_type, volinfo->dist_leaf_count);
- gf_log ("", GF_LOG_ERROR, "%s", err_str);
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
ret = -1;
goto out;
}
@@ -727,23 +797,32 @@ glusterd_handle_remove_brick (rpcsvc_request_t *req)
}
strcpy (brick_list, " ");
+
+ if ((volinfo->type != GF_CLUSTER_TYPE_NONE) &&
+ (volinfo->subvol_count > 1)) {
+ ret = subvol_matcher_init (&subvols, volinfo->subvol_count);
+ if (ret)
+ goto out;
+ }
+
while ( i <= count) {
- snprintf (key, 256, "brick%d", i);
+ snprintf (key, sizeof (key), "brick%d", i);
ret = dict_get_str (dict, key, &brick);
if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get %s", key);
+ snprintf (err_str, sizeof (err_str), "Unable to get %s",
+ key);
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
goto out;
}
- gf_log ("", GF_LOG_DEBUG, "Remove brick count %d brick: %s",
- i, brick);
+ gf_log (this->name, GF_LOG_DEBUG, "Remove brick count %d brick:"
+ " %s", i, brick);
ret = glusterd_volume_brickinfo_get_by_brick(brick, volinfo,
- &brickinfo,
- GF_PATH_COMPLETE);
+ &brickinfo);
if (ret) {
- snprintf(err_str, 2048,"Incorrect brick %s for volume"
- " %s", brick, volname);
- gf_log ("", GF_LOG_ERROR, "%s", err_str);
+ snprintf (err_str, sizeof (err_str), "Incorrect brick "
+ "%s for volume %s", brick, volname);
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
goto out;
}
strcat(brick_list, brick);
@@ -754,110 +833,146 @@ glusterd_handle_remove_brick (rpcsvc_request_t *req)
(volinfo->brick_count <= volinfo->dist_leaf_count))
continue;
- if (replica_count) {
- /* do the validation of bricks here */
- /* -2 because i++ is already done, and i starts with 1,
- instead of 0 */
- diff_count = (volinfo->replica_count - replica_count);
- brick_index = (((i -2) / diff_count) * volinfo->replica_count);
- tmp_brick_idx = 0;
- found = 0;
- list_for_each_entry (tmp, &volinfo->bricks, brick_list) {
- tmp_brick_idx++;
- gf_log (THIS->name, GF_LOG_TRACE,
- "validate brick %s:%s (%d %d %d)",
- tmp->hostname, tmp->path, tmp_brick_idx,
- brick_index, volinfo->replica_count);
- if (tmp_brick_idx <= brick_index)
- continue;
- if (tmp_brick_idx >
- (brick_index + volinfo->replica_count))
- break;
- if ((!strcmp (tmp->hostname,brickinfo->hostname)) &&
- !strcmp (tmp->path, brickinfo->path)) {
- found = 1;
- break;
- }
- }
- if (found)
- continue;
+ /* Find which subvolume the brick belongs to */
+ subvol_matcher_update (subvols, volinfo, brickinfo);
+ }
- snprintf(err_str, 2048,"Bricks are from same subvol");
- gf_log (THIS->name, GF_LOG_INFO,
- "failed to validate brick %s:%s (%d %d %d)",
- tmp->hostname, tmp->path, tmp_brick_idx,
- brick_index, volinfo->replica_count);
- ret = -1;
- /* brick order is not valid */
+ /* Check if the bricks belong to the same subvolumes.*/
+ if ((volinfo->type != GF_CLUSTER_TYPE_NONE) &&
+ (volinfo->subvol_count > 1)) {
+ ret = subvol_matcher_verify (subvols, volinfo,
+ err_str, sizeof(err_str),
+ vol_type, replica_count);
+ if (ret)
goto out;
- }
-
- pos = 0;
- list_for_each_entry (tmp, &volinfo->bricks, brick_list) {
-
- if (strcmp (tmp->hostname,brickinfo->hostname) ||
- strcmp (tmp->path, brickinfo->path)) {
- pos++;
- continue;
- }
-
- gf_log ("", GF_LOG_INFO, "Found brick");
- if (!sub_volume && (volinfo->dist_leaf_count > 1)) {
- sub_volume = (pos / volinfo->dist_leaf_count) + 1;
- sub_volume_start = (volinfo->dist_leaf_count *
- (sub_volume - 1));
- sub_volume_end = (volinfo->dist_leaf_count *
- sub_volume) - 1;
- } else {
- if (pos < sub_volume_start ||
- pos >sub_volume_end) {
- ret = -1;
- snprintf(err_str, 2048,"Bricks not from"
- " same subvol for %s",
- vol_type);
- gf_log ("", GF_LOG_ERROR,
- "%s", err_str);
- goto out;
- }
- }
- break;
- }
}
- gf_cmd_log ("Volume remove-brick","volname: %s count:%d bricks:%s",
- volname, count, brick_list);
- ret = glusterd_op_begin (req, GD_OP_REMOVE_BRICK, dict);
- gf_cmd_log ("Volume remove-brick","on volname: %s %s", volname,
- (ret) ? "FAILED" : "SUCCESS");
+ ret = glusterd_op_begin_synctask (req, GD_OP_REMOVE_BRICK, dict);
out:
if (ret) {
- if (dict)
- dict_unref (dict);
rsp.op_ret = -1;
rsp.op_errno = 0;
if (err_str[0] == '\0')
- snprintf (err_str, sizeof (err_str), "Operation failed");
- gf_log ("", GF_LOG_ERROR, "%s", err_str);
+ snprintf (err_str, sizeof (err_str),
+ "Operation failed");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
rsp.op_errstr = err_str;
cli_rsp = &rsp;
- glusterd_submit_reply(req, cli_rsp, NULL, 0, NULL,
- (xdrproc_t)xdr_gf_cli_rsp);
+ glusterd_to_cli (req, cli_rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_cli_rsp, dict);
ret = 0; //sent error to cli, prevent second reply
}
- if (brick_list)
- GF_FREE (brick_list);
- if (cli_req.dict.dict_val)
- free (cli_req.dict.dict_val); //its malloced by xdr
- glusterd_friend_sm ();
- glusterd_op_sm ();
+ GF_FREE (brick_list);
+ subvol_matcher_destroy (subvols);
+ free (cli_req.dict.dict_val); //its malloced by xdr
return ret;
}
+int
+glusterd_handle_remove_brick (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_remove_brick);
+}
+
+static int
+_glusterd_restart_gsync_session (dict_t *this, char *key,
+ data_t *value, void *data)
+{
+ char *slave = NULL;
+ char *slave_buf = NULL;
+ char *path_list = NULL;
+ char *slave_vol = NULL;
+ char *slave_ip = NULL;
+ char *conf_path = NULL;
+ char **errmsg = NULL;
+ int ret = -1;
+ glusterd_gsync_status_temp_t *param = NULL;
+ gf_boolean_t is_running = _gf_false;
+
+ param = (glusterd_gsync_status_temp_t *)data;
+
+ GF_ASSERT (param);
+ GF_ASSERT (param->volinfo);
+
+ slave = strchr(value->data, ':');
+ if (slave) {
+ slave++;
+ slave_buf = gf_strdup (slave);
+ if (!slave_buf) {
+ gf_log ("", GF_LOG_ERROR,
+ "Failed to gf_strdup");
+ ret = -1;
+ goto out;
+ }
+ }
+ else
+ return 0;
+
+ ret = dict_set_dynstr (param->rsp_dict, "slave", slave_buf);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to store slave");
+ if (slave_buf)
+ GF_FREE(slave_buf);
+ goto out;
+ }
+
+ ret = glusterd_get_slave_details_confpath (param->volinfo,
+ param->rsp_dict,
+ &slave_ip, &slave_vol,
+ &conf_path, errmsg);
+ if (ret) {
+ if (*errmsg)
+ gf_log ("", GF_LOG_ERROR, "%s", *errmsg);
+ else
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to fetch slave or confpath details.");
+ goto out;
+ }
+
+ /* In cases that gsyncd is not running, we will not invoke it
+ * because of add-brick. */
+ ret = glusterd_check_gsync_running_local (param->volinfo->volname,
+ slave, conf_path,
+ &is_running);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "gsync running validation failed.");
+ goto out;
+ }
+ if (_gf_false == is_running) {
+ gf_log ("", GF_LOG_DEBUG, "gsync session for %s and %s is"
+ " not running on this node. Hence not restarting.",
+ param->volinfo->volname, slave);
+ ret = 0;
+ goto out;
+ }
+
+ ret = glusterd_get_local_brickpaths (param->volinfo, &path_list);
+ if (!path_list) {
+ gf_log ("", GF_LOG_DEBUG, "This node not being part of"
+ " volume should not be running gsyncd. Hence"
+ " no gsyncd process to restart.");
+ ret = 0;
+ goto out;
+ }
+
+ ret = glusterd_check_restart_gsync_session (param->volinfo, slave,
+ param->rsp_dict, path_list,
+ conf_path, 0);
+ if (ret)
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to restart gsync session.");
+
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d.", ret);
+ return ret;
+}
/* op-sm */
@@ -865,17 +980,22 @@ int
glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count,
char *bricks, dict_t *dict)
{
- glusterd_brickinfo_t *brickinfo = NULL;
- char *brick = NULL;
- int32_t i = 1;
- char *brick_list = NULL;
- char *free_ptr1 = NULL;
- char *free_ptr2 = NULL;
- char *saveptr = NULL;
- int32_t ret = -1;
- int32_t stripe_count = 0;
- int32_t replica_count = 0;
- int32_t type = 0;
+ char *brick = NULL;
+ int32_t i = 1;
+ char *brick_list = NULL;
+ char *free_ptr1 = NULL;
+ char *free_ptr2 = NULL;
+ char *saveptr = NULL;
+ int32_t ret = -1;
+ int32_t stripe_count = 0;
+ int32_t replica_count = 0;
+ int32_t type = 0;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_gsync_status_temp_t param = {0, };
+ gf_boolean_t restart_needed = 0;
+ char msg[1024] __attribute__((unused)) = {0, };
+ int caps = 0;
+ int brickid = 0;
GF_ASSERT (volinfo);
@@ -903,11 +1023,17 @@ glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count,
"type is set %d, need to change it", type);
}
+ brickid = glusterd_get_next_available_brickid (volinfo);
+ if (brickid < 0)
+ goto out;
while ( i <= count) {
- ret = glusterd_brickinfo_from_brick (brick, &brickinfo);
+ ret = glusterd_brickinfo_new_from_brick (brick, &brickinfo);
if (ret)
goto out;
+ GLUSTERD_ASSIGN_BRICKID_TO_BRICKINFO (brickinfo, volinfo,
+ brickid++);
+
ret = glusterd_resolve_brick (brickinfo);
if (ret)
goto out;
@@ -933,46 +1059,94 @@ glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count,
if (stripe_count) {
volinfo->stripe_count = stripe_count;
}
- volinfo->dist_leaf_count = (volinfo->stripe_count *
- volinfo->replica_count);
+ volinfo->dist_leaf_count = glusterd_get_dist_leaf_count (volinfo);
/* backward compatibility */
volinfo->sub_count = ((volinfo->dist_leaf_count == 1) ? 0:
volinfo->dist_leaf_count);
+ volinfo->subvol_count = (volinfo->brick_count /
+ volinfo->dist_leaf_count);
+
+ ret = glusterd_create_volfiles_and_notify_services (volinfo);
+ if (ret)
+ goto out;
+
+ ret = 0;
+ if (GLUSTERD_STATUS_STARTED != volinfo->status)
+ goto out;
+
brick_list = gf_strdup (bricks);
free_ptr2 = brick_list;
i = 1;
if (count)
brick = strtok_r (brick_list+1, " \n", &saveptr);
-
- ret = glusterd_create_volfiles_and_notify_services (volinfo);
- if (ret)
- goto out;
+#ifdef HAVE_BD_XLATOR
+ if (brickinfo->vg[0])
+ caps = CAPS_BD | CAPS_THIN |
+ CAPS_OFFLOAD_COPY | CAPS_OFFLOAD_SNAPSHOT;
+#endif
while (i <= count) {
-
ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo,
- &brickinfo,
- GF_PATH_PARTIAL);
+ &brickinfo);
if (ret)
goto out;
+#ifdef HAVE_BD_XLATOR
+ /* Check for VG/thin pool if its BD volume */
+ if (brickinfo->vg[0]) {
+ ret = glusterd_is_valid_vg (brickinfo, 0, msg);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_CRITICAL, "%s", msg);
+ goto out;
+ }
+ /* if anyone of the brick does not have thin support,
+ disable it for entire volume */
+ caps &= brickinfo->caps;
+ } else
+ caps = 0;
+#endif
- if (GLUSTERD_STATUS_STARTED == volinfo->status) {
- ret = glusterd_brick_start (volinfo, brickinfo);
- if (ret)
+ if (uuid_is_null (brickinfo->uuid)) {
+ ret = glusterd_resolve_brick (brickinfo);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, FMTSTR_RESOLVE_BRICK,
+ brickinfo->hostname, brickinfo->path);
goto out;
+ }
}
+
+ ret = glusterd_brick_start (volinfo, brickinfo,
+ _gf_true);
+ if (ret)
+ goto out;
i++;
brick = strtok_r (NULL, " \n", &saveptr);
+
+ /* Check if the brick is added in this node, and set
+ * the restart_needed flag. */
+ if ((!uuid_compare (brickinfo->uuid, MY_UUID)) &&
+ !restart_needed) {
+ restart_needed = 1;
+ gf_log ("", GF_LOG_DEBUG,
+ "Restart gsyncd session, if it's already "
+ "running.");
+ }
}
+ /* If the restart_needed flag is set, restart gsyncd sessions for that
+ * particular master with all the slaves. */
+ if (restart_needed) {
+ param.rsp_dict = dict;
+ param.volinfo = volinfo;
+ dict_foreach (volinfo->gsync_slaves,
+ _glusterd_restart_gsync_session, &param);
+ }
+ volinfo->caps = caps;
out:
- if (free_ptr1)
- GF_FREE (free_ptr1);
- if (free_ptr2)
- GF_FREE (free_ptr2);
+ GF_FREE (free_ptr1);
+ GF_FREE (free_ptr2);
gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
return ret;
@@ -984,7 +1158,6 @@ glusterd_op_perform_remove_brick (glusterd_volinfo_t *volinfo, char *brick,
int force, int *need_migrate)
{
glusterd_brickinfo_t *brickinfo = NULL;
- char *dup_brick = NULL;
int32_t ret = -1;
glusterd_conf_t *priv = NULL;
@@ -994,12 +1167,8 @@ glusterd_op_perform_remove_brick (glusterd_volinfo_t *volinfo, char *brick,
priv = THIS->private;
GF_ASSERT (priv);
- dup_brick = gf_strdup (brick);
- if (!dup_brick)
- goto out;
-
- ret = glusterd_volume_brickinfo_get_by_brick (dup_brick, volinfo,
- &brickinfo, GF_PATH_COMPLETE);
+ ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo,
+ &brickinfo);
if (ret)
goto out;
@@ -1007,30 +1176,27 @@ glusterd_op_perform_remove_brick (glusterd_volinfo_t *volinfo, char *brick,
if (ret)
goto out;
- if (!uuid_compare (brickinfo->uuid, priv->uuid)) {
+ glusterd_volinfo_reset_defrag_stats (volinfo);
+
+ if (!uuid_compare (brickinfo->uuid, MY_UUID)) {
/* Only if the brick is in this glusterd, do the rebalance */
if (need_migrate)
*need_migrate = 1;
}
if (force) {
- if (GLUSTERD_STATUS_STARTED == volinfo->status) {
- ret = glusterd_brick_stop (volinfo, brickinfo);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "Unable to stop "
- "glusterfs, ret: %d", ret);
- goto out;
- }
+ ret = glusterd_brick_stop (volinfo, brickinfo,
+ _gf_true);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Unable to stop "
+ "glusterfs, ret: %d", ret);
}
- glusterd_delete_brick (volinfo, brickinfo);
goto out;
}
brickinfo->decommissioned = 1;
+ ret = 0;
out:
- if (dup_brick)
- GF_FREE (dup_brick);
-
gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
return ret;
}
@@ -1041,6 +1207,7 @@ glusterd_op_stage_add_brick (dict_t *dict, char **op_errstr)
int ret = 0;
char *volname = NULL;
int count = 0;
+ int replica_count = 0;
int i = 0;
char *bricks = NULL;
char *brick_list = NULL;
@@ -1049,33 +1216,63 @@ glusterd_op_stage_add_brick (dict_t *dict, char **op_errstr)
char *brick = NULL;
glusterd_brickinfo_t *brickinfo = NULL;
glusterd_volinfo_t *volinfo = NULL;
- char cmd_str[1024];
- glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
char msg[2048] = {0,};
gf_boolean_t brick_alloc = _gf_false;
char *all_bricks = NULL;
char *str_ret = NULL;
+ gf_boolean_t is_force = _gf_false;
- priv = THIS->private;
- if (!priv)
- goto out;
+ this = THIS;
+ GF_ASSERT (this);
+ ret = dict_get_int32 (dict, "replica-count", &replica_count);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Unable to get replica count");
+ }
+
+ if (replica_count > 0) {
+ ret = op_version_check (this, GD_OP_VER_PERSISTENT_AFR_XATTRS,
+ msg, sizeof(msg));
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+ }
ret = dict_get_str (dict, "volname", &volname);
if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get volume name");
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Unable to get volume name");
goto out;
}
ret = glusterd_volinfo_find (volname, &volinfo);
if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to find volume: %s", volname);
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Unable to find volume: %s", volname);
+ goto out;
+ }
+
+ ret = glusterd_validate_volume_id (dict, volinfo);
+ if (ret)
+ goto out;
+
+ if (glusterd_is_rb_ongoing (volinfo)) {
+ snprintf (msg, sizeof (msg), "Replace brick is in progress on "
+ "volume %s. Please retry after replace-brick "
+ "operation is committed or aborted", volname);
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ ret = -1;
goto out;
}
if (glusterd_is_defrag_on(volinfo)) {
snprintf (msg, sizeof(msg), "Volume name %s rebalance is in "
"progress. Please retry after completion", volname);
- gf_log ("glusterd", GF_LOG_ERROR, "%s", msg);
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", msg);
*op_errstr = gf_strdup (msg);
ret = -1;
goto out;
@@ -1088,38 +1285,28 @@ glusterd_op_stage_add_brick (dict_t *dict, char **op_errstr)
ret = dict_get_str (dict, "bricks", &bricks);
if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get bricks");
+ gf_log (THIS->name, GF_LOG_ERROR, "Unable to get bricks");
goto out;
}
+ is_force = dict_get_str_boolean (dict, "force", _gf_false);
+
if (bricks) {
brick_list = gf_strdup (bricks);
all_bricks = gf_strdup (bricks);
free_ptr = brick_list;
}
- /* Check whether any of the bricks given is the destination brick of the
- replace brick running */
-
- str_ret = glusterd_check_brick_rb_part (all_bricks, count, volinfo);
- if (str_ret) {
- gf_log ("glusterd", GF_LOG_ERROR,
- "%s", str_ret);
- *op_errstr = gf_strdup (str_ret);
- ret = -1;
- goto out;
- }
-
if (count)
brick = strtok_r (brick_list+1, " \n", &saveptr);
while ( i < count) {
if (!glusterd_store_is_valid_brickpath (volname, brick) ||
- !glusterd_is_valid_volfpath (volname, brick)) {
- snprintf (msg, sizeof (msg), "brick path %s is too "
- "long.", brick);
- gf_log ("", GF_LOG_ERROR, "%s", msg);
+ !glusterd_is_valid_volfpath (volname, brick)) {
+ snprintf (msg, sizeof (msg), "brick path %s is "
+ "too long", brick);
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", msg);
*op_errstr = gf_strdup (msg);
ret = -1;
@@ -1127,37 +1314,39 @@ glusterd_op_stage_add_brick (dict_t *dict, char **op_errstr)
}
- ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo,
- &brickinfo,
- GF_PATH_PARTIAL);
- if (!ret) {
- gf_log ("", GF_LOG_ERROR, "Adding duplicate brick: %s",
- brick);
- ret = -1;
+ ret = glusterd_brickinfo_new_from_brick (brick, &brickinfo);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Add-brick: Unable"
+ " to get brickinfo");
goto out;
- } else {
- ret = glusterd_brickinfo_from_brick (brick, &brickinfo);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Add-brick: Unable"
- " to get brickinfo");
- goto out;
- }
- brick_alloc = _gf_true;
}
+ brick_alloc = _gf_true;
- snprintf (cmd_str, 1024, "%s", brickinfo->path);
- ret = glusterd_resolve_brick (brickinfo);
+ ret = glusterd_new_brick_validate (brick, brickinfo, msg,
+ sizeof (msg));
if (ret) {
- gf_log ("glusterd", GF_LOG_ERROR,
- "resolve brick failed");
+ *op_errstr = gf_strdup (msg);
+ ret = -1;
goto out;
}
- if (!uuid_compare (brickinfo->uuid, priv->uuid)) {
- ret = glusterd_brick_create_path (brickinfo->hostname,
- brickinfo->path,
+ if (!uuid_compare (brickinfo->uuid, MY_UUID)) {
+#ifdef HAVE_BD_XLATOR
+ if (brickinfo->vg[0]) {
+ ret = glusterd_is_valid_vg (brickinfo, 1, msg);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "%s",
+ msg);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+ }
+#endif
+
+ ret = glusterd_validate_and_create_brickpath (brickinfo,
volinfo->volume_id,
- 0777, op_errstr);
+ op_errstr, is_force);
if (ret)
goto out;
}
@@ -1170,16 +1359,13 @@ glusterd_op_stage_add_brick (dict_t *dict, char **op_errstr)
}
out:
- if (free_ptr)
- GF_FREE (free_ptr);
+ GF_FREE (free_ptr);
if (brick_alloc && brickinfo)
glusterd_brickinfo_delete (brickinfo);
- if (str_ret)
- GF_FREE (str_ret);
- if (all_bricks)
- GF_FREE (all_bricks);
+ GF_FREE (str_ret);
+ GF_FREE (all_bricks);
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
return ret;
}
@@ -1187,35 +1373,81 @@ out:
int
glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)
{
- int ret = -1;
- char *volname = NULL;
- glusterd_volinfo_t *volinfo = NULL;
- char *errstr = NULL;
- int32_t brick_count = 0;
- char msg[2048] = {0,};
- int32_t flag = 0;
- gf1_op_commands cmd = GF_OP_CMD_NONE;
+ int ret = -1;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char *errstr = NULL;
+ int32_t brick_count = 0;
+ char msg[2048] = {0,};
+ int32_t flag = 0;
+ gf1_op_commands cmd = GF_OP_CMD_NONE;
+ char *task_id_str = NULL;
+ xlator_t *this = NULL;
+ int i = 1;
+ char key[256] = {0,};
+ char *brick = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = op_version_check (this, GD_OP_VER_PERSISTENT_AFR_XATTRS,
+ msg, sizeof(msg));
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
ret = dict_get_str (dict, "volname", &volname);
if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get volume name");
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name");
goto out;
}
ret = glusterd_volinfo_find (volname, &volinfo);
if (ret) {
- gf_log ("", GF_LOG_ERROR, "Volume %s does not exist", volname);
+ gf_log (this->name, GF_LOG_ERROR, "Volume %s does not exist", volname);
+ goto out;
+ }
+
+ ret = glusterd_validate_volume_id (dict, volinfo);
+ if (ret)
+ goto out;
+
+ if (glusterd_is_rb_ongoing (volinfo)) {
+ snprintf (msg, sizeof (msg), "Replace brick is in progress on "
+ "volume %s. Please retry after replace-brick "
+ "operation is committed or aborted", volname);
+ gf_log (this->name, GF_LOG_ERROR, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ ret = -1;
goto out;
}
ret = dict_get_int32 (dict, "command", &flag);
if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get brick count");
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to get brick command");
goto out;
}
cmd = flag;
+ ret = dict_get_int32 (dict, "count", &brick_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get brick count");
+ goto out;
+ }
+
+ ret = 0;
+ if (volinfo->brick_count == brick_count) {
+ errstr = gf_strdup ("Deleting all the bricks of the "
+ "volume is not allowed");
+ ret = -1;
+ goto out;
+ }
+
ret = -1;
switch (cmd) {
case GF_OP_CMD_NONE:
@@ -1228,63 +1460,114 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)
case GF_OP_CMD_START:
{
+ if ((volinfo->type == GF_CLUSTER_TYPE_REPLICATE) &&
+ dict_get (dict, "replica-count")) {
+ snprintf (msg, sizeof(msg), "Migration of data is not "
+ "needed when reducing replica count. Use the"
+ " 'force' option");
+ errstr = gf_strdup (msg);
+ gf_log (this->name, GF_LOG_ERROR, "%s", errstr);
+ goto out;
+ }
+
if (GLUSTERD_STATUS_STARTED != volinfo->status) {
- snprintf (msg, sizeof (msg), "Volume %s needs to be started "
- "before remove-brick (you can use 'force' or "
- "'commit' to override this behavior)",
+ snprintf (msg, sizeof (msg), "Volume %s needs to be "
+ "started before remove-brick (you can use "
+ "'force' or 'commit' to override this "
+ "behavior)", volinfo->volname);
+ errstr = gf_strdup (msg);
+ gf_log (this->name, GF_LOG_ERROR, "%s", errstr);
+ goto out;
+ }
+ if (!gd_is_remove_brick_committed (volinfo)) {
+ snprintf (msg, sizeof (msg), "An earlier remove-brick "
+ "task exists for volume %s. Either commit it"
+ " or stop it before starting a new task.",
volinfo->volname);
errstr = gf_strdup (msg);
- gf_log (THIS->name, GF_LOG_ERROR, "%s", errstr);
+ gf_log (this->name, GF_LOG_ERROR, "Earlier remove-brick"
+ " task exists for volume %s.",
+ volinfo->volname);
goto out;
}
if (glusterd_is_defrag_on(volinfo)) {
- errstr = gf_strdup("Rebalance is in progress. Please retry"
- " after completion");
- gf_log ("glusterd", GF_LOG_ERROR, "%s", errstr);
+ errstr = gf_strdup("Rebalance is in progress. Please "
+ "retry after completion");
+ gf_log (this->name, GF_LOG_ERROR, "%s", errstr);
goto out;
}
- break;
- }
- case GF_OP_CMD_PAUSE:
- case GF_OP_CMD_ABORT:
- {
- if (!volinfo->decommission_in_progress) {
- errstr = gf_strdup("remove-brick is not in progress");
- gf_log ("glusterd", GF_LOG_ERROR, "%s", errstr);
- goto out;
+ if (is_origin_glusterd (dict)) {
+ ret = glusterd_generate_and_set_task_id
+ (dict, GF_REMOVE_BRICK_TID_KEY);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to generate task-id");
+ goto out;
+ }
+ } else {
+ ret = dict_get_str (dict, GF_REMOVE_BRICK_TID_KEY,
+ &task_id_str);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Missing remove-brick-id");
+ ret = 0;
+ }
}
break;
}
+ case GF_OP_CMD_STOP:
+ ret = 0;
+ break;
+
case GF_OP_CMD_COMMIT:
if (volinfo->decommission_in_progress) {
errstr = gf_strdup ("use 'force' option as migration "
"is in progress");
goto out;
}
+
+ /* Do not allow commit if the bricks are not decommissioned */
+ for ( i = 1; i <= brick_count; i++ ) {
+ snprintf (key, sizeof (key), "brick%d", i);
+ ret = dict_get_str (dict, key, &brick);
+ if (ret) {
+ snprintf (msg, sizeof (msg),
+ "Unable to get %s", key);
+ errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ ret =
+ glusterd_volume_brickinfo_get_by_brick(brick, volinfo,
+ &brickinfo);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Incorrect brick "
+ "%s for volume %s", brick, volname);
+ errstr = gf_strdup (msg);
+ goto out;
+ }
+ if ( !brickinfo->decommissioned ) {
+ snprintf (msg, sizeof (msg), "Brick %s "
+ "is not decommissioned. "
+ "Use start or force option",
+ brick);
+ errstr = gf_strdup (msg);
+ ret = -1;
+ goto out;
+ }
+ }
+
break;
case GF_OP_CMD_COMMIT_FORCE:
break;
}
-
- ret = dict_get_int32 (dict, "count", &brick_count);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get brick count");
- goto out;
- }
-
ret = 0;
- if (volinfo->brick_count == brick_count) {
- errstr = gf_strdup ("Deleting all the bricks of the "
- "volume is not allowed");
- ret = -1;
- goto out;
- }
out:
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
if (ret && errstr) {
if (op_errstr)
*op_errstr = errstr;
@@ -1329,6 +1612,7 @@ glusterd_remove_brick_migrate_cbk (glusterd_volinfo_t *volinfo,
brickinfo->path);
brickinfo->decommissioned = 0;
if (GLUSTERD_STATUS_STARTED == volinfo->status) {
+ /*TODO: use the 'atomic' flavour of brick_stop*/
ret = glusterd_brick_stop (volinfo, brickinfo);
if (ret) {
gf_log (THIS->name, GF_LOG_ERROR,
@@ -1419,17 +1703,6 @@ glusterd_op_add_brick (dict_t *dict, char **op_errstr)
goto out;
}
- /* Need to reset the defrag/rebalance status accordingly */
- switch (volinfo->defrag_status) {
- case GF_DEFRAG_STATUS_FAILED:
- case GF_DEFRAG_STATUS_COMPLETE:
- case GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE:
- case GF_DEFRAG_STATUS_MIGRATE_DATA_COMPLETE:
- volinfo->defrag_status = 0;
- default:
- break;
- }
-
ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
if (ret)
goto out;
@@ -1444,42 +1717,74 @@ out:
int
glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
{
- int ret = -1;
- char *volname = NULL;
- glusterd_volinfo_t *volinfo = NULL;
- char *brick = NULL;
- int32_t count = 0;
- int32_t i = 1;
- char key[256] = {0,};
- int32_t flag = 0;
- char err_str[4096] = {0,};
- int need_rebalance = 0;
- int force = 0;
- gf1_op_commands cmd = 0;
- int32_t replica_count = 0;
- glusterd_brickinfo_t *brickinfo = NULL;
- glusterd_brickinfo_t *tmp = NULL;
+ int ret = -1;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char *brick = NULL;
+ int32_t count = 0;
+ int32_t i = 1;
+ char key[256] = {0,};
+ int32_t flag = 0;
+ char err_str[4096] = {0,};
+ int need_rebalance = 0;
+ int force = 0;
+ gf1_op_commands cmd = 0;
+ int32_t replica_count = 0;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_brickinfo_t *tmp = NULL;
+ char *task_id_str = NULL;
+ xlator_t *this = NULL;
+ dict_t *bricks_dict = NULL;
+ char *brick_tmpstr = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
ret = dict_get_str (dict, "volname", &volname);
if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get volume name");
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name");
goto out;
}
ret = glusterd_volinfo_find (volname, &volinfo);
if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to allocate memory");
+ gf_log (this->name, GF_LOG_ERROR, "Unable to allocate memory");
goto out;
}
ret = dict_get_int32 (dict, "command", &flag);
if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get brick count");
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get command");
goto out;
}
cmd = flag;
+ /* Set task-id, if available, in ctx dict for operations other than
+ * start
+ */
+ if (is_origin_glusterd (dict) && (cmd != GF_OP_CMD_START)) {
+ if (!uuid_is_null (volinfo->rebal.rebalance_id)) {
+ ret = glusterd_copy_uuid_to_dict
+ (volinfo->rebal.rebalance_id, dict,
+ GF_REMOVE_BRICK_TID_KEY);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set remove-brick-id");
+ goto out;
+ }
+ }
+ }
+
+ /* Clear task-id, rebal.op and stored bricks on commmitting/stopping
+ * remove-brick */
+ if ((cmd != GF_OP_CMD_START) || (cmd != GF_OP_CMD_STATUS)) {
+ uuid_clear (volinfo->rebal.rebalance_id);
+ volinfo->rebal.op = GD_OP_NONE;
+ dict_unref (volinfo->rebal.dict);
+ volinfo->rebal.dict = NULL;
+ }
+
ret = -1;
switch (cmd) {
case GF_OP_CMD_NONE:
@@ -1489,46 +1794,49 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
ret = 0;
goto out;
- case GF_OP_CMD_PAUSE:
+ case GF_OP_CMD_STOP:
{
- if (volinfo->decommission_in_progress) {
- if (volinfo->defrag) {
- LOCK (&volinfo->defrag->lock);
-
- volinfo->defrag_status = GF_DEFRAG_STATUS_PAUSED;
-
- UNLOCK (&volinfo->defrag->lock);
- }
- }
-
- /* no need to update anything */
- ret = 0;
- goto out;
- }
-
- case GF_OP_CMD_ABORT:
- {
- if (volinfo->decommission_in_progress) {
- if (volinfo->defrag) {
- LOCK (&volinfo->defrag->lock);
-
- volinfo->defrag_status = GF_DEFRAG_STATUS_STOPPED;
-
- UNLOCK (&volinfo->defrag->lock);
- }
- }
-
/* Fall back to the old volume file */
- list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks, brick_list) {
+ list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks,
+ brick_list) {
if (!brickinfo->decommissioned)
continue;
brickinfo->decommissioned = 0;
}
+ ret = glusterd_create_volfiles_and_notify_services (volinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to create volfiles");
+ goto out;
+ }
+
+ ret = glusterd_store_volinfo (volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to store volinfo");
+ goto out;
+ }
+
ret = 0;
- break;
+ goto out;
}
case GF_OP_CMD_START:
+ /* Reset defrag status to 'NOT STARTED' whenever a
+ * remove-brick/rebalance command is issued to remove
+ * stale information from previous run.
+ */
+ volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_NOT_STARTED;
+ ret = dict_get_str (dict, GF_REMOVE_BRICK_TID_KEY, &task_id_str);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Missing remove-brick-id");
+ ret = 0;
+ } else {
+ uuid_parse (task_id_str, volinfo->rebal.rebalance_id) ;
+ volinfo->rebal.op = GD_OP_REMOVE_BRICK;
+ }
force = 0;
break;
@@ -1539,13 +1847,14 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
case GF_OP_CMD_COMMIT_FORCE:
if (volinfo->decommission_in_progress) {
- if (volinfo->defrag) {
- LOCK (&volinfo->defrag->lock);
+ if (volinfo->rebal.defrag) {
+ LOCK (&volinfo->rebal.defrag->lock);
/* Fake 'rebalance-complete' so the graph change
happens right away */
- volinfo->defrag_status = GF_DEFRAG_STATUS_COMPLETE;
+ volinfo->rebal.defrag_status =
+ GF_DEFRAG_STATUS_COMPLETE;
- UNLOCK (&volinfo->defrag->lock);
+ UNLOCK (&volinfo->rebal.defrag->lock);
}
/* Graph change happens in rebalance _cbk function,
no need to do anything here */
@@ -1563,30 +1872,70 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
goto out;
}
-
+ /* Save the list of bricks for later usage only on starting a
+ * remove-brick. Right now this is required for displaying the task
+ * parameters with task status in volume status.
+ */
+ if (GF_OP_CMD_START == cmd) {
+ bricks_dict = dict_new ();
+ if (!bricks_dict) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_int32 (bricks_dict, "count", count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to save remove-brick count");
+ goto out;
+ }
+ }
while ( i <= count) {
snprintf (key, 256, "brick%d", i);
ret = dict_get_str (dict, key, &brick);
if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get %s", key);
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get %s",
+ key);
goto out;
}
+ if (GF_OP_CMD_START == cmd) {
+ brick_tmpstr = gf_strdup (brick);
+ if (!brick_tmpstr) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to duplicate brick name");
+ goto out;
+ }
+ ret = dict_set_dynstr (bricks_dict, key, brick_tmpstr);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to add brick to dict");
+ goto out;
+ }
+ brick_tmpstr = NULL;
+ }
+
ret = glusterd_op_perform_remove_brick (volinfo, brick, force,
- (i == 1) ? &need_rebalance : NULL);
+ &need_rebalance);
if (ret)
goto out;
i++;
}
+ if (GF_OP_CMD_START == cmd)
+ volinfo->rebal.dict = dict_ref (bricks_dict);
+
ret = dict_get_int32 (dict, "replica-count", &replica_count);
if (!ret) {
- gf_log (THIS->name, GF_LOG_INFO,
+ gf_log (this->name, GF_LOG_INFO,
"changing replica count %d to %d on volume %s",
volinfo->replica_count, replica_count,
volinfo->volname);
volinfo->replica_count = replica_count;
- volinfo->dist_leaf_count = (volinfo->stripe_count *
- replica_count);
+ volinfo->sub_count = replica_count;
+ volinfo->dist_leaf_count = glusterd_get_dist_leaf_count (volinfo);
+ volinfo->subvol_count = (volinfo->brick_count /
+ volinfo->dist_leaf_count);
+
if (replica_count == 1) {
if (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) {
volinfo->type = GF_CLUSTER_TYPE_NONE;
@@ -1602,36 +1951,46 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
ret = glusterd_create_volfiles_and_notify_services (volinfo);
if (ret) {
- gf_log (THIS->name, GF_LOG_WARNING, "failed to create volfiles");
+ gf_log (this->name, GF_LOG_WARNING, "failed to create volfiles");
goto out;
}
ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
if (ret) {
- gf_log (THIS->name, GF_LOG_WARNING, "failed to store volinfo");
+ gf_log (this->name, GF_LOG_WARNING, "failed to store volinfo");
+ goto out;
+ }
+
+ if (GF_OP_CMD_START == cmd &&
+ volinfo->status == GLUSTERD_STATUS_STARTED) {
+ ret = glusterd_nodesvcs_handle_reconfigure (volinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Unable to reconfigure NFS-Server");
goto out;
+ }
}
/* Need to reset the defrag/rebalance status accordingly */
- switch (volinfo->defrag_status) {
+ switch (volinfo->rebal.defrag_status) {
case GF_DEFRAG_STATUS_FAILED:
case GF_DEFRAG_STATUS_COMPLETE:
- case GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE:
- case GF_DEFRAG_STATUS_MIGRATE_DATA_COMPLETE:
- volinfo->defrag_status = 0;
+ volinfo->rebal.defrag_status = 0;
default:
break;
}
if (!force && need_rebalance) {
/* perform the rebalance operations */
- ret = glusterd_handle_defrag_start (volinfo, err_str, 4096,
- GF_DEFRAG_CMD_START_FORCE,
- glusterd_remove_brick_migrate_cbk);
+ ret = glusterd_handle_defrag_start
+ (volinfo, err_str, sizeof (err_str),
+ GF_DEFRAG_CMD_START_FORCE,
+ glusterd_remove_brick_migrate_cbk, GD_OP_REMOVE_BRICK);
+
if (!ret)
volinfo->decommission_in_progress = 1;
if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
+ gf_log (this->name, GF_LOG_ERROR,
"failed to start the rebalance");
}
} else {
@@ -1643,5 +2002,9 @@ out:
if (ret && err_str[0] && op_errstr)
*op_errstr = gf_strdup (err_str);
+ GF_FREE (brick_tmpstr);
+ if (bricks_dict)
+ dict_unref (bricks_dict);
+
return ret;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c
index db43ecb4d..9433a128e 100644
--- a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c
+++ b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -31,19 +21,202 @@
#include "glusterd-utils.h"
#include "glusterd-volgen.h"
#include "run.h"
+#include "syscall.h"
#include <signal.h>
+static int
+dict_get_param (dict_t *dict, char *key, char **param);
+
+struct gsync_config_opt_vals_ gsync_confopt_vals[] = {
+ {.op_name = "change_detector",
+ .no_of_pos_vals = 2,
+ .case_sensitive = _gf_true,
+ .values = {"xsync", "changelog"},
+ },
+ {.op_name = "special_sync_mode",
+ .no_of_pos_vals = 2,
+ .case_sensitive = _gf_true,
+ .values = {"partial", "recover"}
+ },
+ {.op_name = "log-level",
+ .no_of_pos_vals = 5,
+ .case_sensitive = _gf_false,
+ .values = {"critical", "error", "warning", "info", "debug"}
+ },
+ {.op_name = "use-tarssh",
+ .no_of_pos_vals = 6,
+ .case_sensitive = _gf_false,
+ .values = {"true", "false", "0", "1", "yes", "no"}
+ },
+ {.op_name = NULL,
+ },
+};
+
static char *gsync_reserved_opts[] = {
- "gluster-command",
+ "gluster-command-dir",
"pid-file",
+ "remote-gsyncd"
"state-file",
"session-owner",
+ "state-socket-unencoded",
+ "socketdir",
+ "ignore-deletes",
+ "local-id",
+ "local-path",
+ "slave-id",
+ NULL
+};
+
+static char *gsync_no_restart_opts[] = {
+ "checkpoint",
NULL
};
int
-glusterd_handle_gsync_set (rpcsvc_request_t *req)
+__glusterd_handle_sys_exec (rpcsvc_request_t *req)
+{
+ int32_t ret = 0;
+ dict_t *dict = NULL;
+ gf_cli_req cli_req = {{0},};
+ glusterd_op_t cli_op = GD_OP_SYS_EXEC;
+ glusterd_conf_t *priv = NULL;
+ char *host_uuid = NULL;
+ char err_str[2048] = {0,};
+ xlator_t *this = NULL;
+
+ GF_ASSERT (req);
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req,
+ (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (err_str, sizeof (err_str), "Unable to decode "
+ "the command");
+ goto out;
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
+ }
+
+ host_uuid = gf_strdup (uuid_utoa(MY_UUID));
+ if (host_uuid == NULL) {
+ snprintf (err_str, sizeof (err_str), "Failed to get "
+ "the uuid of local glusterd");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr (dict, "host-uuid", host_uuid);
+ if (ret)
+ goto out;
+ }
+
+ ret = glusterd_op_begin_synctask (req, cli_op, dict);
+
+out:
+ if (ret) {
+ if (err_str[0] == '\0')
+ snprintf (err_str, sizeof (err_str),
+ "Operation failed");
+ ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
+ dict, err_str);
+ }
+ return ret;
+}
+
+int
+__glusterd_handle_copy_file (rpcsvc_request_t *req)
+{
+ int32_t ret = 0;
+ dict_t *dict = NULL;
+ gf_cli_req cli_req = {{0},};
+ glusterd_op_t cli_op = GD_OP_COPY_FILE;
+ glusterd_conf_t *priv = NULL;
+ char *host_uuid = NULL;
+ char err_str[2048] = {0,};
+ xlator_t *this = NULL;
+
+ GF_ASSERT (req);
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req,
+ (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (err_str, sizeof (err_str), "Unable to decode "
+ "the command");
+ goto out;
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
+ }
+
+ host_uuid = gf_strdup (uuid_utoa(MY_UUID));
+ if (host_uuid == NULL) {
+ snprintf (err_str, sizeof (err_str), "Failed to get "
+ "the uuid of local glusterd");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr (dict, "host-uuid", host_uuid);
+ if (ret)
+ goto out;
+ }
+
+ ret = glusterd_op_begin_synctask (req, cli_op, dict);
+
+out:
+ if (ret) {
+ if (err_str[0] == '\0')
+ snprintf (err_str, sizeof (err_str),
+ "Operation failed");
+ ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
+ dict, err_str);
+ }
+ return ret;
+}
+
+int
+__glusterd_handle_gsync_set (rpcsvc_request_t *req)
{
int32_t ret = 0;
dict_t *dict = NULL;
@@ -55,15 +228,19 @@ glusterd_handle_gsync_set (rpcsvc_request_t *req)
int type = 0;
glusterd_conf_t *priv = NULL;
char *host_uuid = NULL;
+ char err_str[2048] = {0,};
+ xlator_t *this = NULL;
GF_ASSERT (req);
- GF_ASSERT (THIS);
- GF_ASSERT (THIS->private);
- priv = THIS->private;
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
- if (!xdr_to_generic (req->msg[0], &cli_req,
- (xdrproc_t)xdr_gf_cli_req)) {
+ ret = xdr_to_generic (req->msg[0], &cli_req,
+ (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
req->rpc_err = GARBAGE_ARGS;
goto out;
}
@@ -77,17 +254,19 @@ glusterd_handle_gsync_set (rpcsvc_request_t *req)
cli_req.dict.dict_len,
&dict);
if (ret < 0) {
- gf_log ("glusterd", GF_LOG_ERROR, "failed to "
+ gf_log (this->name, GF_LOG_ERROR, "failed to "
"unserialize req-buffer to dictionary");
+ snprintf (err_str, sizeof (err_str), "Unable to decode "
+ "the command");
goto out;
} else {
dict->extra_stdfree = cli_req.dict.dict_val;
}
- host_uuid = gf_strdup (uuid_utoa(priv->uuid));
+ host_uuid = gf_strdup (uuid_utoa(MY_UUID));
if (host_uuid == NULL) {
- gf_log ("glusterd", GF_LOG_ERROR, "failed to get"
- "the uuid of the host machine");
+ snprintf (err_str, sizeof (err_str), "Failed to get "
+ "the uuid of local glusterd");
ret = -1;
goto out;
}
@@ -99,26 +278,31 @@ glusterd_handle_gsync_set (rpcsvc_request_t *req)
ret = dict_get_str (dict, "master", &master);
if (ret < 0) {
- gf_log ("", GF_LOG_INFO, "master not found, while handling"
- GEOREP" options");
+ gf_log (this->name, GF_LOG_INFO, "master not found, while "
+ "handling "GEOREP" options");
master = "(No Master)";
}
ret = dict_get_str (dict, "slave", &slave);
if (ret < 0) {
- gf_log ("", GF_LOG_INFO, "slave not not found, while"
+ gf_log (this->name, GF_LOG_INFO, "slave not found, while "
"handling "GEOREP" options");
slave = "(No Slave)";
}
ret = dict_get_int32 (dict, "type", &type);
if (ret < 0) {
- gf_log ("", GF_LOG_WARNING, "command type not found, while"
- "handling "GEOREP" options");
+ snprintf (err_str, sizeof (err_str), "Command type not found "
+ "while handling "GEOREP" options");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
goto out;
}
switch (type) {
+ case GF_GSYNC_OPTION_TYPE_CREATE:
+ strncpy (operation, "create", sizeof (operation));
+ cli_op = GD_OP_GSYNC_CREATE;
+ break;
case GF_GSYNC_OPTION_TYPE_START:
strncpy (operation, "start", sizeof (operation));
@@ -135,30 +319,38 @@ glusterd_handle_gsync_set (rpcsvc_request_t *req)
case GF_GSYNC_OPTION_TYPE_STATUS:
strncpy (operation, "status", sizeof (operation));
break;
- case GF_GSYNC_OPTION_TYPE_ROTATE:
- strncpy (operation, "rotate", sizeof(operation));
- break;
}
- gf_cmd_log ("volume "GEOREP, " %s command on %s,%s", operation, master,
- slave);
- ret = glusterd_op_begin (req, GD_OP_GSYNC_SET, dict);
- gf_cmd_log ("volume "GEOREP, " %s command on %s,%s %s ", operation,
- master, slave, (ret != 0)? "FAILED" : "SUCCEEDED");
+ ret = glusterd_op_begin_synctask (req, cli_op, dict);
out:
- glusterd_friend_sm ();
- glusterd_op_sm ();
-
if (ret) {
- if (dict)
- dict_unref (dict);
+ if (err_str[0] == '\0')
+ snprintf (err_str, sizeof (err_str),
+ "Operation failed");
ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
- NULL, "operation failed");
+ dict, err_str);
}
return ret;
}
+int
+glusterd_handle_sys_exec (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_sys_exec);
+}
+
+int
+glusterd_handle_copy_file (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_copy_file);
+}
+
+int
+glusterd_handle_gsync_set (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_gsync_set);
+}
/*****
*
@@ -180,7 +372,7 @@ glusterd_urltransform_add (runner_t *runner, const char *url)
runner_add_arg (runner, url);
}
-static void
+static int
_glusterd_urltransform_add_iter (dict_t *dict, char *key, data_t *value, void *data)
{
runner_t *runner = (runner_t *)data;
@@ -190,6 +382,8 @@ _glusterd_urltransform_add_iter (dict_t *dict, char *key, data_t *value, void *d
GF_ASSERT (slave);
slave++;
runner_add_arg (runner, slave);
+
+ return 0;
}
static void
@@ -228,6 +422,7 @@ glusterd_urltransform (runner_t *runner, char ***linearrp)
arr_idx = 0;
for (;;) {
+ size_t len;
line = GF_MALLOC (1024, gf_gld_mt_linebuf);
if (!line) {
error = _gf_true;
@@ -238,21 +433,24 @@ glusterd_urltransform (runner_t *runner, char ***linearrp)
NULL)
break;
- if (line[strlen (line) - 1] != '\n') {
+ len = strlen (line);
+ if (len == 0 || line[len - 1] != '\n') {
GF_FREE (line);
error = _gf_true;
goto out;
}
- line[strlen (line) - 1] = '\0';
+ line[len - 1] = '\0';
if (arr_idx == arr_len) {
+ void *p = linearr;
arr_len <<= 1;
- linearr = GF_REALLOC (linearr, arr_len);
- if (!linearr) {
+ p = GF_REALLOC (linearr, arr_len);
+ if (!p) {
GF_FREE (line);
error = _gf_true;
goto out;
}
+ linearr = p;
}
linearr[arr_idx] = line;
@@ -300,7 +498,7 @@ struct dictidxmark {
char *ikey;
};
-static void
+static int
_dict_mark_atindex (dict_t *dict, char *key, data_t *value, void *data)
{
struct dictidxmark *dim = data;
@@ -309,6 +507,7 @@ _dict_mark_atindex (dict_t *dict, char *key, data_t *value, void *data)
dim->ikey = key;
dim->ithis++;
+ return 0;
}
static char *
@@ -354,9 +553,9 @@ glusterd_get_slave (glusterd_volinfo_t *vol, const char *slaveurl, char **slavek
static int
-glusterd_query_extutil (char *resbuf, runner_t *runner)
+glusterd_query_extutil_generic (char *resbuf, size_t blen, runner_t *runner, void *data,
+ int (*fcbk)(char *resbuf, size_t blen, FILE *fp, void *data))
{
- char *ptr = NULL;
int ret = 0;
runner_redir (runner, STDOUT_FILENO, RUN_PIPE);
@@ -366,92 +565,117 @@ glusterd_query_extutil (char *resbuf, runner_t *runner)
return -1;
}
- ptr = fgets(resbuf, PATH_MAX, runner_chio (runner, STDOUT_FILENO));
- if (ptr)
- resbuf[strlen(resbuf)-1] = '\0'; //strip off \n
+ ret = fcbk (resbuf, blen, runner_chio (runner, STDOUT_FILENO), data);
- ret = runner_end (runner);
+ ret |= runner_end (runner);
if (ret)
gf_log ("", GF_LOG_ERROR, "reading data from child failed");
return ret ? -1 : 0;
}
-int
-glusterd_gsync_get_param_file (char *prmfile, const char *param, char *master,
- char *slave, char *gl_workdir)
+static int
+_fcbk_singleline(char *resbuf, size_t blen, FILE *fp, void *data)
{
- runner_t runner = {0,};
+ char *ptr = NULL;
+
+ errno = 0;
+ ptr = fgets (resbuf, blen, fp);
+ if (ptr) {
+ size_t len = strlen(resbuf);
+ if (len && resbuf[len-1] == '\n')
+ resbuf[len-1] = '\0'; //strip off \n
+ }
- runinit (&runner);
- runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "-c", NULL);
- runner_argprintf (&runner, "%s/"GSYNC_CONF, gl_workdir);
- runner_argprintf (&runner, ":%s", master);
- runner_add_args (&runner, slave, "--config-get", NULL);
- runner_argprintf (&runner, "%s-file", param);
+ return errno ? -1 : 0;
+}
- return glusterd_query_extutil (prmfile, &runner);
+static int
+glusterd_query_extutil (char *resbuf, runner_t *runner)
+{
+ return glusterd_query_extutil_generic (resbuf, PATH_MAX, runner, NULL,
+ _fcbk_singleline);
}
-int
-glusterd_gsync_get_session_owner (char *master, char *slave, char *session_owner,
- char *gl_workdir)
+static int
+_fcbk_conftodict (char *resbuf, size_t blen, FILE *fp, void *data)
{
- runner_t runner = {0,};
+ char *ptr = NULL;
+ dict_t *dict = data;
+ char *v = NULL;
- runinit(&runner);
- runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "-c", NULL);
- runner_argprintf (&runner, "%s/"GSYNC_CONF, gl_workdir);
- runner_argprintf (&runner, ":%s", master);
- runner_add_args (&runner, slave, "--config-get", "session-owner",
- NULL);
+ for (;;) {
+ errno = 0;
+ ptr = fgets (resbuf, blen, fp);
+ if (!ptr)
+ break;
+ v = resbuf + strlen(resbuf) - 1;
+ while (isspace (*v))
+ /* strip trailing space */
+ *v-- = '\0';
+ if (v == resbuf)
+ /* skip empty line */
+ continue;
+ v = strchr (resbuf, ':');
+ if (!v)
+ return -1;
+ *v++ = '\0';
+ while (isspace (*v))
+ v++;
+ v = gf_strdup (v);
+ if (!v)
+ return -1;
+ if (dict_set_dynstr (dict, resbuf, v) != 0) {
+ GF_FREE (v);
+ return -1;
+ }
+ }
- return glusterd_query_extutil (session_owner, &runner);
+ return errno ? -1 : 0;
}
-int
-glusterd_gsync_get_slave_log_file (char *master, char *slave, char *log_file)
+static int
+glusterd_gsync_get_config (char *master, char *slave, char *conf_path, dict_t *dict)
{
- int ret = -1;
- runner_t runner = {0,};
- char uuid_str[64] = {0,};
- glusterd_conf_t *priv = NULL;
- char *gl_workdir = NULL;
+ /* key + value, where value must be able to accommodate a path */
+ char resbuf[256 + PATH_MAX] = {0,};
+ runner_t runner = {0,};
- GF_ASSERT(THIS);
- GF_ASSERT(THIS->private);
-
- priv = THIS->private;
-
- GF_VALIDATE_OR_GOTO("gsyncd", master, out);
- GF_VALIDATE_OR_GOTO("gsyncd", slave, out);
+ runinit (&runner);
+ runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "-c", NULL);
+ runner_argprintf (&runner, "%s", conf_path);
+ runner_argprintf (&runner, ":%s", master);
+ runner_add_args (&runner, slave, "--config-get-all", NULL);
- gl_workdir = priv->workdir;
+ return glusterd_query_extutil_generic (resbuf, sizeof (resbuf),
+ &runner, dict, _fcbk_conftodict);
+}
- /* get the session owner for the master-slave session */
- ret = glusterd_gsync_get_session_owner (master, slave, uuid_str,
- gl_workdir);
- if (ret)
- goto out;
+static int
+glusterd_gsync_get_param_file (char *prmfile, const char *param, char *master,
+ char *slave, char *conf_path)
+{
+ runner_t runner = {0,};
- /* get the log file for the slave */
- runinit(&runner);
+ runinit (&runner);
runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "-c", NULL);
- runner_argprintf (&runner, "%s/"GSYNC_CONF, gl_workdir);
- runner_argprintf (&runner, "--session-owner=%s", uuid_str);
- runner_add_args (&runner, slave, "--config-get", "log-file", NULL);
-
- ret = glusterd_query_extutil (log_file, &runner);
+ runner_argprintf (&runner, "%s", conf_path);
+ runner_argprintf (&runner, ":%s", master);
+ runner_add_args (&runner, slave, "--config-get", NULL);
+ runner_argprintf (&runner, "%s-file", param);
- out:
- return ret;
+ return glusterd_query_extutil (prmfile, &runner);
}
static int
-gsyncd_getpidfile (char *master, char *slave, char *pidfile)
+gsyncd_getpidfile (char *master, char *slave, char *pidfile, char *conf_path)
{
int ret = -1;
glusterd_conf_t *priv = NULL;
+ char *confpath = NULL;
+ char conf_buf[PATH_MAX] = "";
+ struct stat stbuf = {0,};
+
GF_ASSERT (THIS);
GF_ASSERT (THIS->private);
@@ -461,8 +685,22 @@ gsyncd_getpidfile (char *master, char *slave, char *pidfile)
GF_VALIDATE_OR_GOTO ("gsync", master, out);
GF_VALIDATE_OR_GOTO ("gsync", slave, out);
+ ret = lstat (conf_path, &stbuf);
+ if (!ret) {
+ gf_log ("", GF_LOG_DEBUG, "Using passed config template(%s).",
+ conf_path);
+ confpath = conf_path;
+ } else {
+ ret = snprintf (conf_buf, sizeof(conf_buf) - 1,
+ "%s/"GSYNC_CONF_TEMPLATE, priv->workdir);
+ conf_buf[ret] = '\0';
+ confpath = conf_buf;
+ gf_log ("", GF_LOG_DEBUG, "Using default config template(%s).",
+ confpath);
+ }
+
ret = glusterd_gsync_get_param_file (pidfile, "pid", master,
- slave, priv->workdir);
+ slave, confpath);
if (ret == -1) {
ret = -2;
gf_log ("", GF_LOG_WARNING, "failed to create the pidfile string");
@@ -476,32 +714,6 @@ gsyncd_getpidfile (char *master, char *slave, char *pidfile)
}
static int
-glusterd_gsyncd_getlogfile (char *master, char *slave, char *log_file)
-{
- int ret = -1;
- glusterd_conf_t *priv = NULL;
-
- GF_ASSERT (THIS);
- GF_ASSERT (THIS->private);
-
- priv = THIS->private;
-
- GF_VALIDATE_OR_GOTO ("gsync", master, out);
- GF_VALIDATE_OR_GOTO ("gsync", slave, out);
-
- ret = glusterd_gsync_get_param_file (log_file, "log", master,
- slave, priv->workdir);
- if (ret == -1) {
- ret = -2;
- gf_log ("", GF_LOG_WARNING, "failed to gsyncd logfile");
- goto out;
- }
-
- out:
- return ret;
-}
-
-static int
gsync_status_byfd (int fd)
{
GF_ASSERT (fd >= -1);
@@ -518,23 +730,24 @@ gsync_status_byfd (int fd)
* return -1 when not running
*/
int
-gsync_status (char *master, char *slave, int *status)
+gsync_status (char *master, char *slave, char *conf_path, int *status)
{
char pidfile[PATH_MAX] = {0,};
int fd = -1;
- fd = gsyncd_getpidfile (master, slave, pidfile);
+ fd = gsyncd_getpidfile (master, slave, pidfile, conf_path);
if (fd == -2)
return -1;
*status = gsync_status_byfd (fd);
- close (fd);
+ sys_close (fd);
+
return 0;
}
-int32_t
+static int32_t
glusterd_gsync_volinfo_dict_set (glusterd_volinfo_t *volinfo,
char *key, char *value)
{
@@ -558,17 +771,49 @@ out:
return 0;
}
-int
-gsync_verify_config_options (dict_t *dict, char **op_errstr)
+static int
+glusterd_verify_gsyncd_spawn (char *master, char *slave)
+{
+ int ret = 0;
+ runner_t runner = {0,};
+
+ runinit (&runner);
+ runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd",
+ "--verify", "spawning", NULL);
+ runner_argprintf (&runner, ":%s", master);
+ runner_add_args (&runner, slave, NULL);
+ runner_redir (&runner, STDOUT_FILENO, RUN_PIPE);
+ ret = runner_start (&runner);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "spawning child failed");
+ ret = -1;
+ goto out;
+ }
+
+ if (runner_end (&runner) != 0)
+ ret = -1;
+
+out:
+ gf_log ("", GF_LOG_DEBUG, "returning %d", ret);
+ return ret;
+}
+
+static int
+gsync_verify_config_options (dict_t *dict, char **op_errstr, char *volname)
{
- char **resopt = NULL;
- int i = 0;
- char *subop = NULL;
- char *slave = NULL;
- char *op_name = NULL;
- char *op_value = NULL;
- char *t = NULL;
- gf_boolean_t banned = _gf_true;
+ char **resopt = NULL;
+ int i = 0;
+ int ret = -1;
+ char *subop = NULL;
+ char *slave = NULL;
+ char *op_name = NULL;
+ char *op_value = NULL;
+ char *t = NULL;
+ char errmsg[PATH_MAX] = "";
+ gf_boolean_t banned = _gf_true;
+ gf_boolean_t op_match = _gf_true;
+ gf_boolean_t val_match = _gf_true;
+ struct gsync_config_opt_vals_ *conf_vals = NULL;
if (dict_get_str (dict, "subop", &subop) != 0) {
gf_log ("", GF_LOG_WARNING, "missing subop");
@@ -592,6 +837,12 @@ gsync_verify_config_options (dict_t *dict, char **op_errstr)
}
if (runcmd (GSYNCD_PREFIX"/gsyncd", "--config-check", op_name, NULL)) {
+ ret = glusterd_verify_gsyncd_spawn (volname, slave);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to spawn gsyncd");
+ return 0;
+ }
+
gf_log ("", GF_LOG_WARNING, "Invalid option %s", op_name);
*op_errstr = gf_strdup ("Invalid option");
@@ -636,34 +887,117 @@ gsync_verify_config_options (dict_t *dict, char **op_errstr)
}
}
+ /* Check options in gsync_confopt_vals for invalid values */
+ for (conf_vals = gsync_confopt_vals; conf_vals->op_name; conf_vals++) {
+ op_match = _gf_true;
+ for (i = 0; conf_vals->op_name[i] && op_name[i]; i++) {
+ if (conf_vals->op_name[i] == op_name[i] ||
+ (conf_vals->op_name[i] == '_' && op_name[i] == '-'))
+ continue;
+ op_match = _gf_false;
+ }
+
+ if (op_match) {
+ if (!op_value)
+ goto out;
+ val_match = _gf_false;
+ for (i = 0; i < conf_vals->no_of_pos_vals; i++) {
+ if(conf_vals->case_sensitive){
+ if (!strcmp (conf_vals->values[i], op_value))
+ val_match = _gf_true;
+ } else {
+ if (!strcasecmp (conf_vals->values[i], op_value))
+ val_match = _gf_true;
+ }
+ }
+
+ if (!val_match) {
+ ret = snprintf (errmsg, sizeof(errmsg) - 1,
+ "Invalid value(%s) for"
+ " option %s", op_value,
+ op_name);
+ errmsg[ret] = '\0';
+
+ gf_log ("", GF_LOG_ERROR, "%s", errmsg);
+ *op_errstr = gf_strdup (errmsg);
+ return -1;
+ }
+ }
+ }
+out:
return 0;
}
-static void
+static int
+glusterd_get_gsync_status_mst_slv (glusterd_volinfo_t *volinfo,
+ char *slave, char *conf_path,
+ dict_t *rsp_dict, char *node);
+
+static int
_get_status_mst_slv (dict_t *this, char *key, data_t *value, void *data)
{
glusterd_gsync_status_temp_t *param = NULL;
char *slave = NULL;
- int ret = 0;
+ char *slave_buf = NULL;
+ char *slave_ip = NULL;
+ char *slave_vol = NULL;
+ char *errmsg = NULL;
+ char conf_path[PATH_MAX] = "";
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
param = (glusterd_gsync_status_temp_t *)data;
GF_ASSERT (param);
GF_ASSERT (param->volinfo);
+ if (THIS)
+ priv = THIS->private;
+ if (priv == NULL) {
+ gf_log ("", GF_LOG_ERROR, "priv of glusterd not present");
+ goto out;
+ }
+
slave = strchr(value->data, ':');
- if (slave)
- slave ++;
- else
- return;
+ if (!slave)
+ return 0;
+ slave++;
+
+ ret = glusterd_get_slave_info (slave, &slave_ip, &slave_vol, &errmsg);
+ if (ret) {
+ if (errmsg)
+ gf_log ("", GF_LOG_ERROR, "Unable to fetch "
+ "slave details. Error: %s", errmsg);
+ else
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to fetch slave details.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = snprintf (conf_path, sizeof(conf_path) - 1,
+ "%s/"GEOREP"/%s_%s_%s/gsyncd.conf",
+ priv->workdir, param->volinfo->volname,
+ slave_ip, slave_vol);
+ conf_path[ret] = '\0';
ret = glusterd_get_gsync_status_mst_slv(param->volinfo,
- slave, param->rsp_dict);
+ slave, conf_path,
+ param->rsp_dict,
+ param->node);
+out:
+
+ GF_FREE (errmsg);
+
+ if (slave_buf)
+ GF_FREE(slave_buf);
+ gf_log ("", GF_LOG_DEBUG, "Returning %d.", ret);
+ return ret;
}
-static void
+static int
_get_max_gsync_slave_num (dict_t *this, char *key, data_t *value, void *data)
{
int tmp_slvnum = 0;
@@ -672,25 +1006,30 @@ _get_max_gsync_slave_num (dict_t *this, char *key, data_t *value, void *data)
sscanf (key, "slave%d", &tmp_slvnum);
if (tmp_slvnum > *slvnum)
*slvnum = tmp_slvnum;
+
+ return 0;
}
static int
glusterd_remove_slave_in_info (glusterd_volinfo_t *volinfo, char *slave,
char **op_errstr)
{
+ int zero_slave_entries = _gf_true;
int ret = 0;
char *slavekey = NULL;
GF_ASSERT (volinfo);
GF_ASSERT (slave);
- ret = glusterd_get_slave (volinfo, slave, &slavekey);
- if (ret < 0) {
- ret++;
- goto out;
- }
-
- dict_del (volinfo->gsync_slaves, slavekey);
+ do {
+ ret = glusterd_get_slave (volinfo, slave, &slavekey);
+ if (ret < 0 && zero_slave_entries) {
+ ret++;
+ goto out;
+ }
+ zero_slave_entries = _gf_false;
+ dict_del (volinfo->gsync_slaves, slavekey);
+ } while (ret >= 0);
ret = glusterd_store_volinfo (volinfo,
GLUSTERD_VOLINFO_VER_AC_INCREMENT);
@@ -743,8 +1082,9 @@ glusterd_gsync_get_uuid (char *slave, glusterd_volinfo_t *vol,
return ret;
}
-static int
+int
glusterd_check_gsync_running_local (char *master, char *slave,
+ char *conf_path,
gf_boolean_t *is_run)
{
int ret = -1;
@@ -755,7 +1095,7 @@ glusterd_check_gsync_running_local (char *master, char *slave,
GF_ASSERT (is_run);
*is_run = _gf_false;
- ret = gsync_status (master, slave, &ret_status);
+ ret = gsync_status (master, slave, conf_path, &ret_status);
if (ret == 0 && ret_status == 0) {
*is_run = _gf_true;
} else if (ret == -1) {
@@ -772,7 +1112,8 @@ glusterd_check_gsync_running_local (char *master, char *slave,
static int
glusterd_store_slave_in_info (glusterd_volinfo_t *volinfo, char *slave,
- char *host_uuid, char **op_errstr)
+ char *host_uuid, char **op_errstr,
+ gf_boolean_t is_force)
{
int ret = 0;
int maxslv = 0;
@@ -795,7 +1136,8 @@ glusterd_store_slave_in_info (glusterd_volinfo_t *volinfo, char *slave,
case -1:
break;
default:
- GF_ASSERT (ret > 0);
+ if (!is_force)
+ GF_ASSERT (ret > 0);
ret = dict_get_str (volinfo->gsync_slaves, slavekey, &slaveentry);
GF_ASSERT (ret == 0);
@@ -804,13 +1146,23 @@ glusterd_store_slave_in_info (glusterd_volinfo_t *volinfo, char *slave,
* assert an uuid mismatch
*/
t = strtail (slaveentry, host_uuid);
- GF_ASSERT (!t || *t != ':')
+ if (!is_force)
+ GF_ASSERT (!t || *t != ':');
+
+ if (is_force) {
+ gf_log ("", GF_LOG_DEBUG, GEOREP" has already been "
+ "invoked for the %s (master) and %s (slave)."
+ " Allowing without saving info again due to"
+ " force command.", volinfo->volname, slave);
+ ret = 0;
+ goto out;
+ }
gf_log ("", GF_LOG_ERROR, GEOREP" has already been invoked for "
"the %s (master) and %s (slave) "
"from a different machine",
volinfo->volname, slave);
- *op_errstr = gf_strdup (GEOREP" already running in an an"
+ *op_errstr = gf_strdup (GEOREP" already running in "
"another machine");
ret = -1;
goto out;
@@ -843,23 +1195,26 @@ glusterd_store_slave_in_info (glusterd_volinfo_t *volinfo, char *slave,
return ret;
}
-
static int
glusterd_op_verify_gsync_start_options (glusterd_volinfo_t *volinfo,
- char *slave, char **op_errstr)
+ char *slave, char *conf_path,
+ char *statefile, char **op_errstr,
+ gf_boolean_t is_force)
{
int ret = -1;
gf_boolean_t is_running = _gf_false;
char msg[2048] = {0};
uuid_t uuid = {0};
- glusterd_conf_t *priv = NULL;
- xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ struct stat stbuf = {0,};
this = THIS;
GF_ASSERT (volinfo);
GF_ASSERT (slave);
GF_ASSERT (op_errstr);
+ GF_ASSERT (conf_path);
GF_ASSERT (this && this->private);
priv = this->private;
@@ -869,26 +1224,56 @@ glusterd_op_verify_gsync_start_options (glusterd_volinfo_t *volinfo,
"before "GEOREP" start", volinfo->volname);
goto out;
}
+
+ ret = lstat (statefile, &stbuf);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Session between %s and %s has"
+ " not been created. Please create session and retry.",
+ volinfo->volname, slave);
+ gf_log ("", GF_LOG_ERROR, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ /* Check if the gsync slave info is stored. If not
+ * session has not been created */
+ ret = glusterd_gsync_get_uuid (slave, volinfo, uuid);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Session between %s and %s has"
+ " not been created. Please create session and retry.",
+ volinfo->volname, slave);
+ gf_log ("", GF_LOG_ERROR, "%s", msg);
+ goto out;
+ }
+
+ if (is_force) {
+ ret = 0;
+ goto out;
+ }
+
/*Check if the gsync is already started in cmd. inited host
* If so initiate add it into the glusterd's priv*/
- ret = glusterd_gsync_get_uuid (slave, volinfo, uuid);
- if ((ret == 0) && (uuid_compare (priv->uuid, uuid) == 0)) {
- ret = glusterd_check_gsync_running_local (volinfo->volname,
- slave, &is_running);
- if (ret) {
- snprintf (msg, sizeof (msg), GEOREP" start option "
- "validation failed ");
- goto out;
- }
- if (_gf_true == is_running) {
- snprintf (msg, sizeof (msg), GEOREP " session between"
- " %s & %s already started", volinfo->volname,
- slave);
- ret = -1;
- goto out;
- }
+ ret = glusterd_check_gsync_running_local (volinfo->volname,
+ slave, conf_path,
+ &is_running);
+ if (ret) {
+ snprintf (msg, sizeof (msg), GEOREP" start option "
+ "validation failed ");
+ goto out;
+ }
+ if (_gf_true == is_running) {
+ snprintf (msg, sizeof (msg), GEOREP " session between"
+ " %s & %s already started", volinfo->volname,
+ slave);
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_verify_gsyncd_spawn (volinfo->volname, slave);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Unable to spawn gsyncd");
+ gf_log ("", GF_LOG_ERROR, "%s", msg);
}
- ret = 0;
out:
if (ret && (msg[0] != '\0')) {
*op_errstr = gf_strdup (msg);
@@ -912,13 +1297,168 @@ glusterd_check_gsync_running (glusterd_volinfo_t *volinfo, gf_boolean_t *flag)
return 0;
}
+/*
+ * is_geo_rep_active:
+ * This function reads the state_file and sets is_active to 1 if the
+ * monitor status is neither "Stopped" or "Not Started"
+ *
+ * RETURN VALUE:
+ * 0: On successful read of state_file.
+ * -1: error.
+ */
+
+static int
+is_geo_rep_active (glusterd_volinfo_t *volinfo, char *slave,
+ char *conf_path, int *is_active)
+{
+ dict_t *confd = NULL;
+ char *statefile = NULL;
+ char *master = NULL;
+ char monitor_status[PATH_MAX] = "";
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ master = volinfo->volname;
+
+ confd = dict_new ();
+ if (!confd) {
+ gf_log ("", GF_LOG_ERROR, "Not able to create dict.");
+ goto out;
+ }
+
+ ret = glusterd_gsync_get_config (master, slave, conf_path,
+ confd);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get configuration data "
+ "for %s(master), %s(slave)", master, slave);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_param (confd, "state_file", &statefile);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get state_file's name "
+ "for %s(master), %s(slave). Please check gsync "
+ "config file.", master, slave);
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_gsync_read_frm_status (statefile, monitor_status,
+ sizeof (monitor_status));
+ if (ret <= 0) {
+ gf_log ("", GF_LOG_ERROR, "Unable to read the status "
+ "file for %s(master), %s(slave)", master, slave);
+ strncpy (monitor_status, "defunct", sizeof (monitor_status));
+ }
+
+ if ((!strcmp(monitor_status, "Stopped")) ||
+ (!strcmp(monitor_status, "Not Started"))) {
+ *is_active = 0;
+ } else {
+ *is_active = 1;
+ }
+ ret = 0;
+out:
+ if (confd)
+ dict_destroy (confd);
+ return ret;
+}
+
+/*
+ * _get_slave_status:
+ * Called for each slave in the volume from dict_foreach.
+ * It calls is_geo_rep_active to get the monitor status.
+ *
+ * RETURN VALUE:
+ * 0: On successful read of state_file from is_geo_rep_active.
+ * When it is found geo-rep is already active from previous calls.
+ * When there is no slave.
+ * -1: On error.
+ */
+
+int
+_get_slave_status (dict_t *dict, char *key, data_t *value, void *data)
+{
+ gsync_status_param_t *param = NULL;
+ char *slave = NULL;
+ char *slave_ip = NULL;
+ char *slave_vol = NULL;
+ char *errmsg = NULL;
+ char conf_path[PATH_MAX] = "";
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ param = (gsync_status_param_t *)data;
+
+ GF_ASSERT (param);
+ GF_ASSERT (param->volinfo);
+
+ if (param->is_active) {
+ ret = 0;
+ goto out;
+ }
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ if (this)
+ priv = this->private;
+ if (priv == NULL) {
+ gf_log ("", GF_LOG_ERROR, "priv of glusterd not present");
+ goto out;
+ }
+
+ slave = strchr(value->data, ':');
+ if (!slave) {
+ ret = 0;
+ goto out;
+ }
+ slave++;
+
+ ret = glusterd_get_slave_info (slave, &slave_ip, &slave_vol, &errmsg);
+ if (ret) {
+ if (errmsg)
+ gf_log ("", GF_LOG_ERROR, "Unable to fetch "
+ "slave details. Error: %s", errmsg);
+ else
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to fetch slave details.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = snprintf (conf_path, sizeof(conf_path) - 1,
+ "%s/"GEOREP"/%s_%s_%s/gsyncd.conf",
+ priv->workdir, param->volinfo->volname,
+ slave_ip, slave_vol);
+ if (ret < 0) {
+ gf_log ("", GF_LOG_ERROR, "Unable to assign conf_path.");
+ ret = -1;
+ goto out;
+ }
+ conf_path[ret] = '\0';
+
+ ret = is_geo_rep_active (param->volinfo,slave, conf_path,
+ &param->is_active);
+out:
+ GF_FREE(errmsg);
+ return ret;
+}
+
static int
glusterd_op_verify_gsync_running (glusterd_volinfo_t *volinfo,
- char *slave, char **op_errstr)
+ char *slave, char *conf_path,
+ char **op_errstr)
{
- int ret = -1;
- char msg[2048] = {0};
- uuid_t uuid = {0};
+ int pfd = -1;
+ int ret = -1;
+ char msg[2048] = {0};
+ char pidfile[PATH_MAX] = {0,};
GF_ASSERT (THIS && THIS->private);
GF_ASSERT (volinfo);
@@ -931,13 +1471,26 @@ glusterd_op_verify_gsync_running (glusterd_volinfo_t *volinfo,
goto out;
}
- ret = glusterd_gsync_get_uuid (slave, volinfo, uuid);
- if (ret == -1) {
- snprintf (msg, sizeof (msg), GEOREP" session between %s & %s"
- " not active", volinfo->volname, slave);
+
+ pfd = gsyncd_getpidfile (volinfo->volname, slave, pidfile, conf_path);
+ if (pfd == -2) {
+ gf_log ("", GF_LOG_ERROR, GEOREP" stop validation "
+ "failed for %s & %s", volinfo->volname, slave);
+ ret = -1;
+ goto out;
+ }
+ if (gsync_status_byfd (pfd) == -1) {
+ snprintf (msg, sizeof (msg), GEOREP" session b/w %s & %s is not"
+ " running on this node.", volinfo->volname, slave);
+ gf_log ("", GF_LOG_ERROR, "%s", msg);
+ ret = -1;
+ /* monitor gsyncd already dead */
goto out;
}
+ if (pfd < 0)
+ goto out;
+
ret = 0;
out:
if (ret && (msg[0] != '\0')) {
@@ -956,6 +1509,18 @@ glusterd_verify_gsync_status_opts (dict_t *dict, char **op_errstr)
gf_boolean_t exists = _gf_false;
glusterd_volinfo_t *volinfo = NULL;
int ret = 0;
+ char *conf_path = NULL;
+ char *slave_ip = NULL;
+ char *slave_vol = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ if (THIS)
+ priv = THIS->private;
+ if (priv == NULL) {
+ gf_log ("", GF_LOG_ERROR, "priv of glusterd not present");
+ *op_errstr = gf_strdup ("glusterd defunct");
+ goto out;
+ }
ret = dict_get_str (dict, "master", &volname);
if (ret < 0) {
@@ -980,38 +1545,191 @@ glusterd_verify_gsync_status_opts (dict_t *dict, char **op_errstr)
goto out;
}
- out:
+ ret = glusterd_get_slave_details_confpath (volinfo, dict, &slave_ip,
+ &slave_vol, &conf_path,
+ op_errstr);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to fetch slave or confpath details.");
+ ret = -1;
+ goto out;
+ }
+
+out:
gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
return ret;
-
}
-static int
+int
glusterd_op_gsync_args_get (dict_t *dict, char **op_errstr,
- char **master, char **slave)
+ char **master, char **slave, char **host_uuid)
{
int ret = -1;
GF_ASSERT (dict);
GF_ASSERT (op_errstr);
- GF_ASSERT (master);
- GF_ASSERT (slave);
- ret = dict_get_str (dict, "master", master);
- if (ret < 0) {
- gf_log ("", GF_LOG_WARNING, "master not found");
- *op_errstr = gf_strdup ("master not found");
+ if (master) {
+ ret = dict_get_str (dict, "master", master);
+ if (ret < 0) {
+ gf_log ("", GF_LOG_WARNING, "master not found");
+ *op_errstr = gf_strdup ("master not found");
+ goto out;
+ }
+ }
+
+ if (slave) {
+ ret = dict_get_str (dict, "slave", slave);
+ if (ret < 0) {
+ gf_log ("", GF_LOG_WARNING, "slave not found");
+ *op_errstr = gf_strdup ("slave not found");
+ goto out;
+ }
+ }
+
+ if (host_uuid) {
+ ret = dict_get_str (dict, "host-uuid", host_uuid);
+ if (ret < 0) {
+ gf_log ("", GF_LOG_WARNING, "host_uuid not found");
+ *op_errstr = gf_strdup ("host_uuid not found");
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_op_stage_sys_exec (dict_t *dict, char **op_errstr)
+{
+ char errmsg[PATH_MAX] = "";
+ char *command = NULL;
+ char command_path[PATH_MAX] = "";
+ struct stat st = {0,};
+ int ret = -1;
+ glusterd_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ if (conf->op_version < 2) {
+ gf_log ("", GF_LOG_ERROR, "Op Version not supported.");
+ snprintf (errmsg, sizeof(errmsg), "One or more nodes do not"
+ " support the required op version.");
+ *op_errstr = gf_strdup (errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "command", &command);
+ if (ret) {
+ strcpy (errmsg, "internal error");
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to get command from dict");
+ goto out;
+ }
+
+ /* enforce local occurrence of the command */
+ if (strchr (command, '/')) {
+ strcpy (errmsg, "invalid command name");
+ ret = -1;
+ goto out;
+ }
+
+ sprintf (command_path, GSYNCD_PREFIX"/peer_%s", command);
+ /* check if it's executable */
+ ret = access (command_path, X_OK);
+ if (!ret)
+ /* check if it's a regular file */
+ ret = stat (command_path, &st);
+ if (!ret && !S_ISREG (st.st_mode))
+ ret = -1;
+
+out:
+ if (ret) {
+ if (errmsg[0] == '\0')
+ snprintf (errmsg, sizeof (errmsg), "%s not found.",
+ command);
+ *op_errstr = gf_strdup (errmsg);
+ gf_log ("", GF_LOG_ERROR, "%s", errmsg);
+ }
+
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_op_stage_copy_file (dict_t *dict, char **op_errstr)
+{
+ char abs_filename[PATH_MAX] = "";
+ char errmsg[PATH_MAX] = "";
+ char *filename = NULL;
+ char *host_uuid = NULL;
+ char uuid_str [64] = {0};
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
+ struct stat stbuf = {0,};
+
+ if (THIS)
+ priv = THIS->private;
+ if (priv == NULL) {
+ gf_log ("", GF_LOG_ERROR, "priv of glusterd not present");
+ *op_errstr = gf_strdup ("glusterd defunct");
goto out;
}
- ret = dict_get_str (dict, "slave", slave);
+ if (priv->op_version < 2) {
+ gf_log ("", GF_LOG_ERROR, "Op Version not supported.");
+ snprintf (errmsg, sizeof(errmsg), "One or more nodes do not"
+ " support the required op version.");
+ *op_errstr = gf_strdup (errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "host-uuid", &host_uuid);
if (ret < 0) {
- gf_log ("", GF_LOG_WARNING, "slave not found");
- *op_errstr = gf_strdup ("slave not found");
+ gf_log ("", GF_LOG_ERROR, "Unable to fetch"
+ " host-uuid from dict.");
goto out;
}
+ uuid_utoa_r (MY_UUID, uuid_str);
+ if (!strcmp (uuid_str, host_uuid)) {
+ ret = dict_get_str (dict, "source", &filename);
+ if (ret < 0) {
+ gf_log ("", GF_LOG_ERROR, "Unable to fetch"
+ " filename from dict.");
+ *op_errstr = gf_strdup ("command unsuccessful");
+ goto out;
+ }
+ snprintf (abs_filename, sizeof(abs_filename),
+ "%s/%s", priv->workdir, filename);
+
+ ret = lstat (abs_filename, &stbuf);
+ if (ret) {
+ snprintf (errmsg, sizeof (errmsg), "Source file"
+ " does not exist in %s", priv->workdir);
+ *op_errstr = gf_strdup (errmsg);
+ goto out;
+ }
+
+ if (!S_ISREG(stbuf.st_mode)) {
+ snprintf (errmsg, sizeof (errmsg), "Source file"
+ " is not a regular file.");
+ *op_errstr = gf_strdup (errmsg);
+ gf_log ("", GF_LOG_ERROR, "%s", errmsg);
+ ret = -1;
+ goto out;
+ }
+ }
ret = 0;
out:
@@ -1020,17 +1738,513 @@ out:
}
int
+glusterd_get_statefile_name (glusterd_volinfo_t *volinfo, char *slave,
+ char *conf_path, char **statefile)
+{
+ glusterd_conf_t *priv = NULL;
+ int ret = -1;
+ char *master = NULL;
+ char *buf = NULL;
+ dict_t *confd = NULL;
+ char *confpath = NULL;
+ char conf_buf[PATH_MAX] = "";
+ struct stat stbuf = {0,};
+
+ GF_ASSERT (THIS);
+ GF_ASSERT (THIS->private);
+ GF_ASSERT (volinfo);
+
+ master = volinfo->volname;
+
+ confd = dict_new ();
+ if (!confd) {
+ gf_log ("", GF_LOG_ERROR, "Unable to create new dict");
+ goto out;
+ }
+
+ priv = THIS->private;
+
+ ret = lstat (conf_path, &stbuf);
+ if (!ret) {
+ gf_log ("", GF_LOG_INFO, "Using passed config template(%s).",
+ conf_path);
+ confpath = conf_path;
+ } else {
+ ret = snprintf (conf_buf, sizeof(conf_buf) - 1,
+ "%s/"GSYNC_CONF_TEMPLATE, priv->workdir);
+ conf_buf[ret] = '\0';
+ confpath = conf_buf;
+ gf_log ("", GF_LOG_INFO, "Using default config template(%s).",
+ confpath);
+ }
+
+ ret = glusterd_gsync_get_config (master, slave, confpath,
+ confd);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get configuration data"
+ "for %s(master), %s(slave)", master, slave);
+ goto out;
+
+ }
+
+ ret = dict_get_param (confd, "state_file", &buf);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get state_file's name.");
+ goto out;
+ }
+
+ *statefile = gf_strdup(buf);
+ if (!*statefile) {
+ gf_log ("", GF_LOG_ERROR, "Unable to gf_strdup.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+ out:
+ if (confd)
+ dict_destroy (confd);
+
+ gf_log ("", GF_LOG_DEBUG, "Returning %d ", ret);
+ return ret;
+}
+
+static int
+glusterd_create_status_file (char *master, char *slave, char *slave_ip,
+ char *slave_vol, char *status)
+{
+ int ret = -1;
+ runner_t runner = {0,};
+ glusterd_conf_t *priv = NULL;
+
+ if (THIS)
+ priv = THIS->private;
+ if (priv == NULL) {
+ gf_log ("", GF_LOG_ERROR, "priv of glusterd not present");
+ goto out;
+ }
+
+ if (!status) {
+ gf_log ("", GF_LOG_ERROR, "Status Empty");
+ goto out;
+ }
+ gf_log ("", GF_LOG_DEBUG, "slave = %s", slave);
+
+ runinit (&runner);
+ runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "--create",
+ status, "-c", NULL);
+ runner_argprintf (&runner, "%s/"GEOREP"/%s_%s_%s/gsyncd.conf",
+ priv->workdir, master, slave_ip, slave_vol);
+ runner_argprintf (&runner, ":%s", master);
+ runner_add_args (&runner, slave, NULL);
+ synclock_unlock (&priv->big_lock);
+ ret = runner_run (&runner);
+ synclock_lock (&priv->big_lock);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Creating status file failed.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ gf_log ("", GF_LOG_DEBUG, "returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_verify_slave (char *volname, char *slave_ip, char *slave,
+ char **op_errstr, gf_boolean_t *is_force_blocker)
+{
+ int32_t ret = -1;
+ runner_t runner = {0,};
+ char log_file_path[PATH_MAX] = "";
+ char buf[PATH_MAX] = "";
+ char *tmp = NULL;
+ char *save_ptr = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ GF_ASSERT (volname);
+ GF_ASSERT (slave_ip);
+ GF_ASSERT (slave);
+
+ if (THIS)
+ priv = THIS->private;
+ if (priv == NULL) {
+ gf_log ("", GF_LOG_ERROR, "priv of glusterd not present");
+ goto out;
+ }
+
+ snprintf (log_file_path, sizeof(log_file_path),
+ DEFAULT_LOG_FILE_DIRECTORY"/create_verify_log");
+
+ runinit (&runner);
+ runner_add_args (&runner, GSYNCD_PREFIX"/gverify.sh", NULL);
+ runner_argprintf (&runner, "%s", volname);
+ runner_argprintf (&runner, "%s", slave_ip);
+ runner_argprintf (&runner, "%s", slave);
+ runner_argprintf (&runner, "%s", log_file_path);
+ runner_redir (&runner, STDOUT_FILENO, RUN_PIPE);
+ synclock_unlock (&priv->big_lock);
+ ret = runner_run (&runner);
+ synclock_lock (&priv->big_lock);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Not a valid slave");
+ ret = glusterd_gsync_read_frm_status (log_file_path,
+ buf, sizeof(buf));
+ if (ret <= 0) {
+ gf_log ("", GF_LOG_ERROR, "Unable to read from %s",
+ log_file_path);
+ goto out;
+ }
+
+ /* Tokenize the error message from gverify.sh to figure out
+ * if the error is a force blocker or not. */
+ tmp = strtok_r (buf, "|", &save_ptr);
+ if (!strcmp (tmp, "FORCE_BLOCKER"))
+ *is_force_blocker = 1;
+ else {
+ /* No FORCE_BLOCKER flag present so all that is
+ * present is the error message. */
+ *is_force_blocker = 0;
+ if (tmp)
+ *op_errstr = gf_strdup (tmp);
+ ret = -1;
+ goto out;
+ }
+
+ /* Copy rest of the error message to op_errstr */
+ tmp = strtok_r (NULL, "|", &save_ptr);
+ if (tmp)
+ *op_errstr = gf_strdup (tmp);
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+out:
+ unlink (log_file_path);
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_mountbroker_check (char **slave_ip, char **op_errstr)
+{
+ int ret = -1;
+ char *tmp = NULL;
+ char *save_ptr = NULL;
+ char *username = NULL;
+ char *host = NULL;
+ char errmsg[PATH_MAX] = "";
+
+ GF_ASSERT (slave_ip);
+ GF_ASSERT (*slave_ip);
+
+ /* Checking if hostname has user specified */
+ host = strstr (*slave_ip, "@");
+ if (!host) {
+ gf_log ("", GF_LOG_DEBUG, "No username provided.");
+ ret = 0;
+ goto out;
+ } else {
+ /* Moving the host past the '@' and checking if the
+ * actual hostname also has '@' */
+ host++;
+ if (strstr (host, "@")) {
+ gf_log ("", GF_LOG_DEBUG, "host = %s", host);
+ ret = snprintf (errmsg, sizeof(errmsg) - 1,
+ "Invalid Hostname (%s).", host);
+ errmsg[ret] = '\0';
+ gf_log ("", GF_LOG_ERROR, "%s", errmsg);
+ ret = -1;
+ if (op_errstr)
+ *op_errstr = gf_strdup (errmsg);
+ goto out;
+ }
+
+ /* Fetching the username and hostname
+ * and checking if the username is non-root */
+ username = strtok_r (*slave_ip, "@", &save_ptr);
+ tmp = strtok_r (NULL, "@", &save_ptr);
+ if (strcmp (username, "root")) {
+ ret = snprintf (errmsg, sizeof(errmsg) - 1,
+ "Non-root username (%s@%s) not allowed.",
+ username, tmp);
+ errmsg[ret] = '\0';
+ if (op_errstr)
+ *op_errstr = gf_strdup (errmsg);
+ gf_log ("", GF_LOG_ERROR,
+ "Non-Root username not allowed.");
+ ret = -1;
+ goto out;
+ }
+
+ *slave_ip = gf_strdup (tmp);
+ if (!*slave_ip) {
+ gf_log ("", GF_LOG_ERROR, "Out of memory");
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_op_stage_gsync_create (dict_t *dict, char **op_errstr)
+{
+ char *down_peerstr = NULL;
+ char *slave = NULL;
+ char *volname = NULL;
+ char *host_uuid = NULL;
+ char *statefile = NULL;
+ char *slave_ip = NULL;
+ char *slave_vol = NULL;
+ char *conf_path = NULL;
+ char errmsg[PATH_MAX] = "";
+ char common_pem_file[PATH_MAX] = "";
+ char hook_script[PATH_MAX] = "";
+ char uuid_str [64] = "";
+ int ret = -1;
+ int is_pem_push = -1;
+ gf_boolean_t is_force = -1;
+ gf_boolean_t is_force_blocker = -1;
+ gf_boolean_t exists = _gf_false;
+ glusterd_conf_t *conf = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ struct stat stbuf = {0,};
+ xlator_t *this = NULL;