summaryrefslogtreecommitdiffstats
path: root/xlators/debug
Commit message (Expand)AuthorAgeFilesLines
* build: out-of-tree builds generates files in the wrong directoryKaleb S KEITHLEY2016-09-183-4/+6
* io-stats: Add stats for upcall notificationsv3.10devPoornima G2016-08-311-0/+87
* core: fix unused variable warnings/errorsKaleb S. KEITHLEY2016-08-291-3/+2
* io-stats: Add ipc fop for display in the profile infoPoornima G2016-08-291-0/+20
* logging: Avoid re-initing log level in io-statsVijay Bellur2016-08-231-1/+2
* libglusterfs: move alloca0 definition to common-utilsRavishankar N2016-08-121-2/+1
* io-stats: fix translator namesJeff Darcy2016-07-291-4/+26
* build: correctly format some (s)size_t messagesNiels de Vos2016-07-171-1/+1
* io-stats: Fix io-stat dump to dump at all levelsShyam2016-06-151-13/+22
* io-stats: Fix overwriting of client profile by the bricksPoornima G2016-04-121-24/+43
* debug/trace: Print {acm}times as integersKrutika Dhananjay2016-03-291-13/+13
* debug/io-stats: Disable fop stats dump by defaultAnoop C S2016-03-241-1/+1
* all: fixes for clang compile warningsKaleb S KEITHLEY2016-02-151-1/+1
* all: fix various cppcheck warningsKaleb S KEITHLEY2016-02-151-5/+8
* build: export minimum symbols from xlators for correct resolutionKaleb S KEITHLEY2015-12-223-3/+3
* core: use syscall wrappers instead of direct syscalls - tailKaleb S KEITHLEY2015-11-161-2/+3
* io-stats: fix BSD build failureAtin Mukherjee2015-11-021-1/+1
* debug/io-stats: Add FOP sampling featureRichard Wareing2015-11-012-28/+468
* xlators: add JSON FOP statistics dumps every N secondsRichard Wareing2015-10-083-105/+478
* build: export minimum symbols from xlators for correct resolutionKaleb S. KEITHLEY2015-09-243-3/+3
* all: reduce "inline" usageJeff Darcy2015-09-012-3/+3
* debug/trace: Print total write size in trace_writevKrutika Dhananjay2015-08-311-2/+7
* build: do not #include "config.h" in each fileNiels de Vos2015-05-294-19/+0
* io-stats: Fixing dereference after null check.arao2015-04-101-1/+2
* Avoid conflict between contrib/uuid and system uuidEmmanuel Dreyfus2015-04-041-4/+4
* io-stats : null dereference coverity fix.Manikandan Selvaganesh2015-03-301-5/+7
* core: use gf_time_fmt() instead of localtime()+strftime()Kaleb S. KEITHLEY2014-11-201-37/+22
* xlator/io-stat: Check and copy loc->pathShyam2014-11-051-3/+9
* io-stats: Adding private data dumping methodVipul Nayyar2014-09-031-3/+74
* build: make GLUSTERD_WORKDIR rely on localstatedirHarshavardhana2014-08-071-2/+2
* debug/io-stats: Free conf on init failuresPranith Kumar K2014-06-301-15/+23
* libglusterfs: Don't allow '-0' as input value for numbersPranith Kumar K2014-06-221-0/+3
* logging: Introduce suppression of repetitive log messagesKrutika Dhananjay2014-04-301-0/+65
* debug/trace: Fix length issue found in compilationPranith Kumar K2014-04-291-44/+45
* build: MacOSX Porting fixesHarshavardhana2014-04-243-38/+39
* logging: Make logger and log format configurable through cliKrutika Dhananjay2014-04-111-0/+67
* debug/io-stats:fix compile warningRavishankar N2014-03-221-1/+1
* cli: Add options to the CLI that let the user control the reset ofDawit Alemu2014-01-241-21/+84
* cli: Add an option to fetch just incremental or cumulative I/0Dawit Alemu2013-12-032-19/+40
* zerofill: Change the type of len argument of glfs_zerofill() to off_tBharata B Rao2013-11-141-1/+1
* glusterfs: zerofill supportM. Mohan Kumar2013-11-101-0/+23
* cluster/afr : Implementation of command "gluster volume heal vn statistics"Venkatesh Somyajulu2013-10-141-1/+1
* glusterfs: discard (hole punch) supportBrian Foster2013-06-131-0/+27
* gluster: add fallocate fop supportBrian Foster2013-06-131-0/+26
* debug/error-gen: Add support for reconfiguring options.Vijay Bellur2013-02-261-66/+102
* features: add a directory-protection translatorJeff Darcy2013-02-171-1/+1
* Debug/io-stats: Enable INFO level in syslogRaghavendra Talur2013-02-111-1/+1
* Use proper libtool option -avoid-version instead of bogus -avoidversionAnand Avati2013-02-073-3/+3
* debug/io-stats: Handle open failures gracefullyPranith Kumar K2013-01-231-1/+0
* core: fixes for gcc's '-pedantic' flag buildAvra Sengupta2013-01-211-2/+1
th: 2.7%;'/> -rw-r--r--xlators/cluster/dht/src/dht-shared.c758
-rw-r--r--xlators/cluster/dht/src/dht.c546
-rw-r--r--xlators/cluster/dht/src/nufa.c341
-rw-r--r--xlators/cluster/dht/src/switch.c234
-rw-r--r--xlators/cluster/ha/src/Makefile.am7
-rw-r--r--xlators/cluster/ha/src/ha-helpers.c24
-rw-r--r--xlators/cluster/ha/src/ha-mem-types.h21
-rw-r--r--xlators/cluster/ha/src/ha.c32
-rw-r--r--xlators/cluster/ha/src/ha.h24
-rw-r--r--xlators/cluster/map/src/Makefile.am7
-rw-r--r--xlators/cluster/map/src/map-helper.c24
-rw-r--r--xlators/cluster/map/src/map-mem-types.h21
-rw-r--r--xlators/cluster/map/src/map.c27
-rw-r--r--xlators/cluster/map/src/map.h22
-rw-r--r--xlators/cluster/stripe/src/Makefile.am7
-rw-r--r--xlators/cluster/stripe/src/stripe-helpers.c97
-rw-r--r--xlators/cluster/stripe/src/stripe-mem-types.h3
-rw-r--r--xlators/cluster/stripe/src/stripe.c1055
-rw-r--r--xlators/cluster/stripe/src/stripe.h17
-rw-r--r--xlators/cluster/unify/src/Makefile.am16
-rw-r--r--xlators/cluster/unify/src/unify-mem-types.h41
-rw-r--r--xlators/cluster/unify/src/unify-self-heal.c1239
-rw-r--r--xlators/cluster/unify/src/unify.c4589
-rw-r--r--xlators/cluster/unify/src/unify.h146
-rw-r--r--xlators/debug/error-gen/src/Makefile.am7
-rw-r--r--xlators/debug/error-gen/src/error-gen-mem-types.h20
-rw-r--r--xlators/debug/error-gen/src/error-gen.c260
-rw-r--r--xlators/debug/error-gen/src/error-gen.h36
-rw-r--r--xlators/debug/io-stats/src/Makefile.am7
-rw-r--r--xlators/debug/io-stats/src/io-stats-mem-types.h21
-rw-r--r--xlators/debug/io-stats/src/io-stats.c132
-rw-r--r--xlators/debug/trace/src/Makefile.am8
-rw-r--r--xlators/debug/trace/src/trace-mem-types.h21
-rw-r--r--xlators/debug/trace/src/trace.c2895
-rw-r--r--xlators/debug/trace/src/trace.h98
-rw-r--r--xlators/encryption/Makefile.am2
-rw-r--r--xlators/encryption/crypt/Makefile.am (renamed from xlators/cluster/unify/Makefile.am)0
-rw-r--r--xlators/encryption/crypt/src/Makefile.am24
-rw-r--r--xlators/encryption/crypt/src/atom.c962
-rw-r--r--xlators/encryption/crypt/src/crypt-common.h141
-rw-r--r--xlators/encryption/crypt/src/crypt-mem-types.h43
-rw-r--r--xlators/encryption/crypt/src/crypt.c4498
-rw-r--r--xlators/encryption/crypt/src/crypt.h899
-rw-r--r--xlators/encryption/crypt/src/data.c769
-rw-r--r--xlators/encryption/crypt/src/keys.c302
-rw-r--r--xlators/encryption/crypt/src/metadata.c605
-rw-r--r--xlators/encryption/crypt/src/metadata.h74
-rw-r--r--xlators/encryption/rot-13/src/Makefile.am7
-rw-r--r--xlators/encryption/rot-13/src/rot-13.c43
-rw-r--r--xlators/encryption/rot-13/src/rot-13.h20
-rw-r--r--xlators/features/Makefile.am3
-rw-r--r--xlators/features/changelog/Makefile.am3
-rw-r--r--xlators/features/changelog/lib/Makefile.am (renamed from xlators/storage/bdb/Makefile.am)2
-rw-r--r--xlators/features/changelog/lib/examples/c/get-changes.c87
-rw-r--r--xlators/features/changelog/lib/examples/python/changes.py32
-rw-r--r--xlators/features/changelog/lib/examples/python/libgfchangelog.py64
-rw-r--r--xlators/features/changelog/lib/src/Makefile.am37
-rw-r--r--xlators/features/changelog/lib/src/changelog.h31
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-helpers.c180
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-helpers.h97
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-process.c571
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog.c515
-rw-r--r--xlators/features/changelog/src/Makefile.am19
-rw-r--r--xlators/features/changelog/src/changelog-encoders.c176
-rw-r--r--xlators/features/changelog/src/changelog-encoders.h46
-rw-r--r--xlators/features/changelog/src/changelog-helpers.c693
-rw-r--r--xlators/features/changelog/src/changelog-helpers.h395
-rw-r--r--xlators/features/changelog/src/changelog-mem-types.h29
-rw-r--r--xlators/features/changelog/src/changelog-misc.h101
-rw-r--r--xlators/features/changelog/src/changelog-notifier.c314
-rw-r--r--xlators/features/changelog/src/changelog-notifier.h19
-rw-r--r--xlators/features/changelog/src/changelog-rt.c72
-rw-r--r--xlators/features/changelog/src/changelog-rt.h33
-rw-r--r--xlators/features/changelog/src/changelog.c1477
-rw-r--r--xlators/features/compress/Makefile.am (renamed from xlators/protocol/legacy/lib/Makefile.am)2
-rw-r--r--xlators/features/compress/src/Makefile.am17
-rw-r--r--xlators/features/compress/src/cdc-helper.c547
-rw-r--r--xlators/features/compress/src/cdc-mem-types.h22
-rw-r--r--xlators/features/compress/src/cdc.c342
-rw-r--r--xlators/features/compress/src/cdc.h107
-rw-r--r--xlators/features/filter/src/Makefile.am7
-rw-r--r--xlators/features/filter/src/filter-mem-types.h20
-rw-r--r--xlators/features/filter/src/filter.c24
-rw-r--r--xlators/features/gfid-access/Makefile.am1
-rw-r--r--xlators/features/gfid-access/src/Makefile.am15
-rw-r--r--xlators/features/gfid-access/src/gfid-access-mem-types.h23
-rw-r--r--xlators/features/gfid-access/src/gfid-access.c1172
-rw-r--r--xlators/features/gfid-access/src/gfid-access.h128
-rw-r--r--xlators/features/glupy/Makefile.am (renamed from xlators/protocol/legacy/server/Makefile.am)2
-rw-r--r--xlators/features/glupy/doc/README.md44
-rw-r--r--xlators/features/glupy/doc/TESTING9
-rw-r--r--xlators/features/glupy/doc/test.vol10
-rw-r--r--xlators/features/glupy/src/Makefile.am20
-rw-r--r--xlators/features/glupy/src/debug-trace.py774
-rw-r--r--xlators/features/glupy/src/glupy.c2470
-rw-r--r--xlators/features/glupy/src/glupy.h69
-rw-r--r--xlators/features/glupy/src/gluster.py841
-rw-r--r--xlators/features/glupy/src/helloworld.py19
-rw-r--r--xlators/features/glupy/src/negative.py92
-rw-r--r--xlators/features/index/src/Makefile.am8
-rw-r--r--xlators/features/index/src/index-mem-types.h22
-rw-r--r--xlators/features/index/src/index.c405
-rw-r--r--xlators/features/index/src/index.h37
-rw-r--r--xlators/features/locks/src/Makefile.am9
-rw-r--r--xlators/features/locks/src/clear.c32
-rw-r--r--xlators/features/locks/src/clear.h22
-rw-r--r--xlators/features/locks/src/common.c213
-rw-r--r--xlators/features/locks/src/common.h81
-rw-r--r--xlators/features/locks/src/entrylk.c173
-rw-r--r--xlators/features/locks/src/inodelk.c245
-rw-r--r--xlators/features/locks/src/locks-mem-types.h20
-rw-r--r--xlators/features/locks/src/locks.h56
-rw-r--r--xlators/features/locks/src/posix.c769
-rw-r--r--xlators/features/locks/src/reservelk.c24
-rw-r--r--xlators/features/locks/tests/unit-test.c22
-rw-r--r--xlators/features/mac-compat/src/Makefile.am7
-rw-r--r--xlators/features/mac-compat/src/mac-compat.c27
-rw-r--r--xlators/features/marker/Makefile.am2
-rw-r--r--xlators/features/marker/src/Makefile.am8
-rw-r--r--xlators/features/marker/src/marker-common.c37
-rw-r--r--xlators/features/marker/src/marker-common.h27
-rw-r--r--xlators/features/marker/src/marker-mem-types.h20
-rw-r--r--xlators/features/marker/src/marker-quota-helper.c37
-rw-r--r--xlators/features/marker/src/marker-quota-helper.h25
-rw-r--r--xlators/features/marker/src/marker-quota.c30
-rw-r--r--xlators/features/marker/src/marker-quota.h25
-rw-r--r--xlators/features/marker/src/marker.c298
-rw-r--r--xlators/features/marker/src/marker.h31
-rw-r--r--xlators/features/marker/utils/Makefile.am3
-rw-r--r--xlators/features/marker/utils/src/Makefile.am22
-rw-r--r--xlators/features/marker/utils/src/gsyncd.c345
-rw-r--r--xlators/features/marker/utils/src/procdiggy.c131
-rw-r--r--xlators/features/marker/utils/src/procdiggy.h30
-rw-r--r--xlators/features/marker/utils/syncdaemon/Makefile.am6
-rw-r--r--xlators/features/marker/utils/syncdaemon/README.md81
-rw-r--r--xlators/features/marker/utils/syncdaemon/__codecheck.py46
-rw-r--r--xlators/features/marker/utils/syncdaemon/__init__.py0
-rw-r--r--xlators/features/marker/utils/syncdaemon/configinterface.py224
-rw-r--r--xlators/features/marker/utils/syncdaemon/gconf.py20
-rw-r--r--xlators/features/marker/utils/syncdaemon/gsyncd.py400
-rw-r--r--xlators/features/marker/utils/syncdaemon/libcxattr.py72
-rw-r--r--xlators/features/marker/utils/syncdaemon/master.py886
-rw-r--r--xlators/features/marker/utils/syncdaemon/monitor.py129
-rw-r--r--xlators/features/marker/utils/syncdaemon/repce.py225
-rw-r--r--xlators/features/marker/utils/syncdaemon/resource.py945
-rw-r--r--xlators/features/marker/utils/syncdaemon/syncdutils.py282
-rw-r--r--xlators/features/path-convertor/src/Makefile.am7
-rw-r--r--xlators/features/path-convertor/src/path-mem-types.h20
-rw-r--r--xlators/features/path-convertor/src/path.c27
-rw-r--r--xlators/features/protect/Makefile.am (renamed from xlators/protocol/legacy/client/Makefile.am)0
-rw-r--r--xlators/features/protect/src/Makefile.am21
-rw-r--r--xlators/features/protect/src/prot_client.c215
-rw-r--r--xlators/features/protect/src/prot_dht.c168
-rw-r--r--xlators/features/protect/src/prot_server.c51
-rw-r--r--xlators/features/qemu-block/Makefile.am1
-rw-r--r--xlators/features/qemu-block/src/Makefile.am155
-rw-r--r--xlators/features/qemu-block/src/bdrv-xlator.c397
-rw-r--r--xlators/features/qemu-block/src/bh-syncop.c48
-rw-r--r--xlators/features/qemu-block/src/clock-timer.c60
-rw-r--r--xlators/features/qemu-block/src/coroutine-synctask.c116
-rw-r--r--xlators/features/qemu-block/src/monitor-logging.c50
-rw-r--r--xlators/features/qemu-block/src/qb-coroutines.c662
-rw-r--r--xlators/features/qemu-block/src/qb-coroutines.h30
-rw-r--r--xlators/features/qemu-block/src/qemu-block-memory-types.h25
-rw-r--r--xlators/features/qemu-block/src/qemu-block.c1140
-rw-r--r--xlators/features/qemu-block/src/qemu-block.h109
-rw-r--r--xlators/features/quiesce/src/Makefile.am7
-rw-r--r--xlators/features/quiesce/src/quiesce-mem-types.h20
-rw-r--r--xlators/features/quiesce/src/quiesce.c47
-rw-r--r--xlators/features/quiesce/src/quiesce.h20
-rw-r--r--xlators/features/quota/src/Makefile.am7
-rw-r--r--xlators/features/quota/src/quota-mem-types.h20
-rw-r--r--xlators/features/quota/src/quota.c238
-rw-r--r--xlators/features/quota/src/quota.h36
-rw-r--r--xlators/features/read-only/src/Makefile.am9
-rw-r--r--xlators/features/read-only/src/read-only-common.c24
-rw-r--r--xlators/features/read-only/src/read-only-common.h24
-rw-r--r--xlators/features/read-only/src/read-only.c22
-rw-r--r--xlators/features/read-only/src/worm.c25
-rw-r--r--xlators/features/trash/src/Makefile.am7
-rw-r--r--xlators/features/trash/src/trash-mem-types.h20
-rw-r--r--xlators/features/trash/src/trash.c39
-rw-r--r--xlators/features/trash/src/trash.h22
-rw-r--r--xlators/lib/src/libxlator.c195
-rw-r--r--xlators/lib/src/libxlator.h112
-rw-r--r--xlators/meta/src/Makefile.am5
-rw-r--r--xlators/meta/src/meta-mem-types.h20
-rw-r--r--xlators/meta/src/meta.c20
-rw-r--r--xlators/meta/src/meta.h20
-rw-r--r--xlators/meta/src/misc.c20
-rw-r--r--xlators/meta/src/misc.h20
-rw-r--r--xlators/meta/src/tree.c22
-rw-r--r--xlators/meta/src/tree.h20
-rw-r--r--xlators/meta/src/view.c20
-rw-r--r--xlators/meta/src/view.h20
-rw-r--r--xlators/mgmt/glusterd/src/Makefile.am34
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-brick-ops.c850
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-geo-rep.c3185
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handler.c2326
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handshake.c1009
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-hooks.c47
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-hooks.h30
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-locks.c637
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-locks.h51
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-log-ops.c105
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mem-types.h26
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c924
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt.c1893
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt.h45
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mountbroker.c33
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mountbroker.h24
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.c3162
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.h77
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.c140
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.h22
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-quota.c96
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rebalance.c410
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-replace-brick.c795
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rpc-ops.c1228
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-sm.c103
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-sm.h59
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot.c5590
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.c2689
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.h158
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-syncop.c1595
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-syncop.h97
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c4740
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.h278
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c1107
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.h115
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-ops.c1069
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c1452
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.c721
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h567
-rw-r--r--xlators/mount/fuse/src/Makefile.am7
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.c1903
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.h259
-rw-r--r--xlators/mount/fuse/src/fuse-helpers.c193
-rw-r--r--xlators/mount/fuse/src/fuse-mem-types.h21
-rw-r--r--xlators/mount/fuse/src/fuse-resolve.c233
-rwxr-xr-xxlators/mount/fuse/utils/mount.glusterfs.in231
-rw-r--r--xlators/nfs/server/src/Makefile.am17
-rw-r--r--xlators/nfs/server/src/acl3.c708
-rw-r--r--xlators/nfs/server/src/acl3.h31
-rw-r--r--xlators/nfs/server/src/mount3.c994
-rw-r--r--xlators/nfs/server/src/mount3.h39
-rw-r--r--xlators/nfs/server/src/mount3udp_svc.c26
-rw-r--r--xlators/nfs/server/src/nfs-common.c69
-rw-r--r--xlators/nfs/server/src/nfs-common.h20
-rw-r--r--xlators/nfs/server/src/nfs-fops.c251
-rw-r--r--xlators/nfs/server/src/nfs-fops.h28
-rw-r--r--xlators/nfs/server/src/nfs-generics.c33
-rw-r--r--xlators/nfs/server/src/nfs-generics.h27
-rw-r--r--xlators/nfs/server/src/nfs-inodes.c27
-rw-r--r--xlators/nfs/server/src/nfs-inodes.h17
-rw-r--r--xlators/nfs/server/src/nfs-mem-types.h20
-rw-r--r--xlators/nfs/server/src/nfs.c730
-rw-r--r--xlators/nfs/server/src/nfs.h57
-rw-r--r--xlators/nfs/server/src/nfs3-fh.c39
-rw-r--r--xlators/nfs/server/src/nfs3-fh.h32
-rw-r--r--xlators/nfs/server/src/nfs3-helpers.c175
-rw-r--r--xlators/nfs/server/src/nfs3-helpers.h23
-rw-r--r--xlators/nfs/server/src/nfs3.c570
-rw-r--r--xlators/nfs/server/src/nfs3.h90
-rw-r--r--xlators/nfs/server/src/nlm4.c374
-rw-r--r--xlators/nfs/server/src/nlm4.h22
-rw-r--r--xlators/nfs/server/src/nlmcbk_svc.c27
-rw-r--r--xlators/performance/Makefile.am2
-rw-r--r--xlators/performance/io-cache/src/Makefile.am8
-rw-r--r--xlators/performance/io-cache/src/io-cache.c207
-rw-r--r--xlators/performance/io-cache/src/io-cache.h2
-rw-r--r--xlators/performance/io-cache/src/page.c21
-rw-r--r--xlators/performance/io-threads/src/Makefile.am7
-rw-r--r--xlators/performance/io-threads/src/io-threads.c318
-rw-r--r--xlators/performance/io-threads/src/io-threads.h13
-rw-r--r--xlators/performance/md-cache/src/Makefile.am8
-rw-r--r--xlators/performance/md-cache/src/md-cache.c431
-rw-r--r--xlators/performance/open-behind/Makefile.am1
-rw-r--r--xlators/performance/open-behind/src/Makefile.am15
-rw-r--r--xlators/performance/open-behind/src/open-behind-mem-types.h21
-rw-r--r--xlators/performance/open-behind/src/open-behind.c1001
-rw-r--r--xlators/performance/quick-read/src/Makefile.am7
-rw-r--r--xlators/performance/quick-read/src/quick-read-mem-types.h1
-rw-r--r--xlators/performance/quick-read/src/quick-read.c3717
-rw-r--r--xlators/performance/quick-read/src/quick-read.h57
-rw-r--r--xlators/performance/read-ahead/src/Makefile.am7
-rw-r--r--xlators/performance/read-ahead/src/read-ahead.c148
-rw-r--r--xlators/performance/readdir-ahead/Makefile.am3
-rw-r--r--xlators/performance/readdir-ahead/src/Makefile.am15
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h24
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead.c560
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead.h46
-rw-r--r--xlators/performance/symlink-cache/src/Makefile.am7
-rw-r--r--xlators/performance/symlink-cache/src/symlink-cache.c3
-rw-r--r--xlators/performance/write-behind/src/Makefile.am7
-rw-r--r--xlators/performance/write-behind/src/write-behind-mem-types.h1
-rw-r--r--xlators/performance/write-behind/src/write-behind.c3631
-rw-r--r--xlators/playground/Makefile.am2
-rw-r--r--xlators/playground/template/Makefile.am2
-rw-r--r--xlators/playground/template/src/Makefile.am16
-rw-r--r--xlators/playground/template/src/template.c49
-rw-r--r--xlators/playground/template/src/template.h24
-rw-r--r--xlators/protocol/auth/addr/src/Makefile.am8
-rw-r--r--xlators/protocol/auth/addr/src/addr.c20
-rw-r--r--xlators/protocol/auth/login/src/Makefile.am7
-rw-r--r--xlators/protocol/auth/login/src/login.c20
-rw-r--r--xlators/protocol/client/src/Makefile.am8
-rw-r--r--xlators/protocol/client/src/client-callback.c6
-rw-r--r--xlators/protocol/client/src/client-handshake.c289
-rw-r--r--xlators/protocol/client/src/client-helpers.c77
-rw-r--r--xlators/protocol/client/src/client-lk.c360
-rw-r--r--xlators/protocol/client/src/client-rpc-fops.c1310
-rw-r--r--xlators/protocol/client/src/client.c183
-rw-r--r--xlators/protocol/client/src/client.h56
-rw-r--r--xlators/protocol/legacy/Makefile.am3
-rw-r--r--xlators/protocol/legacy/client/src/Makefile.am21
-rw-r--r--xlators/protocol/legacy/client/src/client-mem-types.h43
-rw-r--r--xlators/protocol/legacy/client/src/client-protocol.c6683
-rw-r--r--xlators/protocol/legacy/client/src/client-protocol.h178
-rw-r--r--xlators/protocol/legacy/client/src/saved-frames.c196
-rw-r--r--xlators/protocol/legacy/client/src/saved-frames.h79
-rw-r--r--xlators/protocol/legacy/lib/src/Makefile.am14
-rw-r--r--xlators/protocol/legacy/lib/src/protocol.c108
-rw-r--r--xlators/protocol/legacy/lib/src/protocol.h1118
-rw-r--r--xlators/protocol/legacy/lib/src/transport.c422
-rw-r--r--xlators/protocol/legacy/lib/src/transport.h106
-rw-r--r--xlators/protocol/legacy/server/src/Makefile.am26
-rw-r--r--xlators/protocol/legacy/server/src/authenticate.c249
-rw-r--r--xlators/protocol/legacy/server/src/authenticate.h60
-rw-r--r--xlators/protocol/legacy/server/src/server-helpers.c622
-rw-r--r--xlators/protocol/legacy/server/src/server-helpers.h48
-rw-r--r--xlators/protocol/legacy/server/src/server-mem-types.h39
-rw-r--r--xlators/protocol/legacy/server/src/server-protocol.c6587
-rw-r--r--xlators/protocol/legacy/server/src/server-protocol.h191
-rw-r--r--xlators/protocol/legacy/server/src/server-resolve.c658
-rw-r--r--xlators/protocol/legacy/transport/Makefile.am3
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/Makefile.am1
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/Makefile.am19
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs-mem-types.h39
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs.c2625
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs.h220
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/name.c712
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/name.h47
-rw-r--r--xlators/protocol/legacy/transport/socket/Makefile.am1
-rw-r--r--xlators/protocol/legacy/transport/socket/src/Makefile.am19
-rw-r--r--xlators/protocol/legacy/transport/socket/src/name.c740
-rw-r--r--xlators/protocol/legacy/transport/socket/src/name.h44
-rw-r--r--xlators/protocol/legacy/transport/socket/src/socket-mem-types.h36
-rw-r--r--xlators/protocol/legacy/transport/socket/src/socket.c1625
-rw-r--r--xlators/protocol/legacy/transport/socket/src/socket.h129
-rw-r--r--xlators/protocol/server/src/Makefile.am17
-rw-r--r--xlators/protocol/server/src/authenticate.c90
-rw-r--r--xlators/protocol/server/src/authenticate.h19
-rw-r--r--xlators/protocol/server/src/server-handshake.c169
-rw-r--r--xlators/protocol/server/src/server-helpers.c1186
-rw-r--r--xlators/protocol/server/src/server-helpers.h74
-rw-r--r--xlators/protocol/server/src/server-mem-types.h19
-rw-r--r--xlators/protocol/server/src/server-resolve.c54
-rw-r--r--xlators/protocol/server/src/server-rpc-fops.c2723
-rw-r--r--xlators/protocol/server/src/server.c767
-rw-r--r--xlators/protocol/server/src/server.h127
-rw-r--r--xlators/storage/Makefile.am6
-rw-r--r--xlators/storage/bd/Makefile.am3
-rw-r--r--xlators/storage/bd/src/Makefile.am20
-rw-r--r--xlators/storage/bd/src/bd-aio.c527
-rw-r--r--xlators/storage/bd/src/bd-aio.h41
-rw-r--r--xlators/storage/bd/src/bd-helper.c783
-rw-r--r--xlators/storage/bd/src/bd.c2404
-rw-r--r--xlators/storage/bd/src/bd.h178
-rw-r--r--xlators/storage/bdb/src/Makefile.am18
-rw-r--r--xlators/storage/bdb/src/bctx.c341
-rw-r--r--xlators/storage/bdb/src/bdb-ll.c1464
-rw-r--r--xlators/storage/bdb/src/bdb-mem-types.h42
-rw-r--r--xlators/storage/bdb/src/bdb.c3603
-rw-r--r--xlators/storage/bdb/src/bdb.h530
-rw-r--r--xlators/storage/posix/src/Makefile.am17
-rw-r--r--xlators/storage/posix/src/posix-aio.c569
-rw-r--r--xlators/storage/posix/src/posix-aio.h39
-rw-r--r--xlators/storage/posix/src/posix-handle.c31
-rw-r--r--xlators/storage/posix/src/posix-handle.h20
-rw-r--r--xlators/storage/posix/src/posix-helpers.c562
-rw-r--r--xlators/storage/posix/src/posix-mem-types.h21
-rw-r--r--xlators/storage/posix/src/posix.c1452
-rw-r--r--xlators/storage/posix/src/posix.h96
-rw-r--r--xlators/system/posix-acl/src/Makefile.am10
-rw-r--r--xlators/system/posix-acl/src/posix-acl-xattr.c28
-rw-r--r--xlators/system/posix-acl/src/posix-acl-xattr.h40
-rw-r--r--xlators/system/posix-acl/src/posix-acl.c121
-rw-r--r--xlators/system/posix-acl/src/posix-acl.h71
434 files changed, 90777 insertions, 66415 deletions
diff --git a/xlators/Makefile.am b/xlators/Makefile.am
index b1643d26c..f60fa85ce 100644
--- a/xlators/Makefile.am
+++ b/xlators/Makefile.am
@@ -1,3 +1,4 @@
-SUBDIRS = cluster storage protocol performance debug features encryption mount nfs mgmt system
+SUBDIRS = cluster storage protocol performance debug features encryption mount nfs mgmt system \
+ playground
CLEANFILES =
diff --git a/xlators/bindings/python/src/Makefile.am b/xlators/bindings/python/src/Makefile.am
index c0b9141c6..90370d861 100644
--- a/xlators/bindings/python/src/Makefile.am
+++ b/xlators/bindings/python/src/Makefile.am
@@ -9,7 +9,7 @@ pythondir = $(xlatordir)/python
python_so_SOURCES = python.c
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall \
+AM_CFLAGS = -fPIC $(GF_CPPFLAGS) -Wall \
-I$(top_srcdir)/libglusterfs/src -shared -nostartfiles \
$(PYTHON_CPPLAGS) -DGLUSTER_PYTHON_PATH=\"$(pythondir)\"
diff --git a/xlators/bindings/python/src/gluster.py b/xlators/bindings/python/src/gluster.py
index ee0eb1310..337c983ec 100644
--- a/xlators/bindings/python/src/gluster.py
+++ b/xlators/bindings/python/src/gluster.py
@@ -1,19 +1,12 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
+
+# Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
from ctypes import *
from glustertypes import *
from glusterstack import *
diff --git a/xlators/bindings/python/src/glusterstack.py b/xlators/bindings/python/src/glusterstack.py
index ba24c8165..0c071ae98 100644
--- a/xlators/bindings/python/src/glusterstack.py
+++ b/xlators/bindings/python/src/glusterstack.py
@@ -1,19 +1,12 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
+
+# Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
from ctypes import *
from glustertypes import *
diff --git a/xlators/bindings/python/src/glustertypes.py b/xlators/bindings/python/src/glustertypes.py
index e9069d07c..98437d22e 100644
--- a/xlators/bindings/python/src/glustertypes.py
+++ b/xlators/bindings/python/src/glustertypes.py
@@ -1,19 +1,12 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
+
+# Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
from ctypes import *
import collections
diff --git a/xlators/bindings/python/src/python.c b/xlators/bindings/python/src/python.c
index 3310a2115..9b96790de 100644
--- a/xlators/bindings/python/src/python.c
+++ b/xlators/bindings/python/src/python.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2007-2010 Chris AtLee <chris@atlee.ca>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#include <Python.h>
#ifndef _CONFIG_H
@@ -45,7 +35,7 @@ python_writev (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
struct iovec *vector,
- int32_t count,
+ int32_t count,
off_t offset)
{
python_private_t *priv = (python_private_t *)this->private;
@@ -148,7 +138,7 @@ init (xlator_t *this)
Py_InitializeEx(0);
if (!this->children) {
- gf_log ("python", GF_LOG_ERROR,
+ gf_log ("python", GF_LOG_ERROR,
"FATAL: python should have exactly one child");
return -1;
}
@@ -166,7 +156,7 @@ init (xlator_t *this)
}
priv->pInterp = Py_NewInterpreter();
-
+
// Adjust python's path
PyObject *syspath = PySys_GetObject("path");
PyObject *path = PyString_FromString(GLUSTER_PYTHON_PATH);
@@ -188,7 +178,7 @@ init (xlator_t *this)
priv->pVectorType = PyObject_GetAttrString(priv->pGlusterModule, "iovec");
gf_log("python", GF_LOG_DEBUG, "Loading script...%s", priv->scriptname);
-
+
priv->pScriptModule = AnonModule_FromFile(priv->scriptname);
if (!priv->pScriptModule || PyErr_Occurred())
{
@@ -217,7 +207,7 @@ init (xlator_t *this)
return 0;
}
-void
+void
fini (xlator_t *this)
{
python_private_t *priv = (python_private_t*)(this->private);
diff --git a/xlators/bindings/python/src/testxlator.py b/xlators/bindings/python/src/testxlator.py
index 507455c85..59a991dca 100644
--- a/xlators/bindings/python/src/testxlator.py
+++ b/xlators/bindings/python/src/testxlator.py
@@ -1,19 +1,12 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
+"""
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+"""
"""
This is a test translator written in python.
diff --git a/xlators/cluster/afr/src/Makefile.am b/xlators/cluster/afr/src/Makefile.am
index 95db5dd96..35d18a6c0 100644
--- a/xlators/cluster/afr/src/Makefile.am
+++ b/xlators/cluster/afr/src/Makefile.am
@@ -7,11 +7,11 @@ afr_common_source = afr-dir-read.c afr-dir-write.c afr-inode-read.c \
afr-self-heal-algorithm.c afr-lk-common.c afr-self-heald.c \
$(top_builddir)/xlators/lib/src/libxlator.c
-afr_la_LDFLAGS = -module -avoidversion
+afr_la_LDFLAGS = -module -avoid-version
afr_la_SOURCES = $(afr_common_source) afr.c
afr_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-pump_la_LDFLAGS = -module -avoidversion
+pump_la_LDFLAGS = -module -avoid-version
pump_la_SOURCES = $(afr_common_source) pump.c
pump_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
@@ -21,9 +21,11 @@ noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h \
afr-self-heald.h $(top_builddir)/xlators/lib/src/libxlator.h \
$(top_builddir)/glusterfsd/src/glusterfsd.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
-I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/xlators/lib/src \
- -I$(top_srcdir)/rpc/rpc-lib/src -shared -nostartfiles $(GF_CFLAGS)
+ -I$(top_srcdir)/rpc/rpc-lib/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 6787dfc27..af01f2ef2 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -49,10 +49,9 @@
#include "afr-self-heald.h"
#include "pump.h"
-#define AFR_ICTX_OPENDIR_DONE_MASK 0x0000000200000000ULL
-#define AFR_ICTX_SPLIT_BRAIN_MASK 0x0000000100000000ULL
+#define AFR_ICTX_OPENDIR_DONE_MASK 0x0000000100000000ULL
#define AFR_ICTX_READ_CHILD_MASK 0x00000000FFFFFFFFULL
-
+#define AFR_STATISTICS_HISTORY_SIZE 50
int
afr_lookup_done_success_action (call_frame_t *frame, xlator_t *this,
gf_boolean_t fail_conflict);
@@ -203,60 +202,86 @@ out:
return ret;
}
-afr_inode_ctx_t*
-afr_inode_ctx_get_from_addr (uint64_t addr, int32_t child_count)
+void
+afr_inode_ctx_destroy (afr_inode_ctx_t *ctx)
{
- int ret = -1;
- afr_inode_ctx_t *ctx = NULL;
- size_t size = 0;
+ if (!ctx)
+ return;
+ GF_FREE (ctx->fresh_children);
+ GF_FREE (ctx);
+}
- GF_ASSERT (child_count > 0);
+afr_inode_ctx_t*
+__afr_inode_ctx_get (inode_t *inode, xlator_t *this)
+{
+ int ret = 0;
+ uint64_t ctx_addr = 0;
+ afr_inode_ctx_t *ctx = NULL;
+ afr_private_t *priv = NULL;
- if (!addr) {
- ctx = GF_CALLOC (1, sizeof (*ctx),
- gf_afr_mt_inode_ctx_t);
- if (!ctx)
- goto out;
- size = sizeof (*ctx->fresh_children);
- ctx->fresh_children = GF_CALLOC (child_count, size,
- gf_afr_mt_int32_t);
- if (!ctx->fresh_children)
- goto out;
- } else {
- ctx = (afr_inode_ctx_t*) (long) addr;
+ priv = this->private;
+ ret = __inode_ctx_get (inode, this, &ctx_addr);
+ if (ret < 0)
+ ctx_addr = 0;
+ if (ctx_addr != 0) {
+ ctx = (afr_inode_ctx_t*) (long) ctx_addr;
+ goto out;
}
- ret = 0;
+ ctx = GF_CALLOC (1, sizeof (*ctx),
+ gf_afr_mt_inode_ctx_t);
+ if (!ctx)
+ goto fail;
+ ctx->fresh_children = GF_CALLOC (priv->child_count,
+ sizeof (*ctx->fresh_children),
+ gf_afr_mt_int32_t);
+ if (!ctx->fresh_children)
+ goto fail;
+ ret = __inode_ctx_put (inode, this, (uint64_t)ctx);
+ if (ret) {
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "failed to "
+ "set the inode ctx (%s)",
+ uuid_utoa (inode->gfid));
+ goto fail;
+ }
+
out:
- if (ret && ctx) {
- if (ctx->fresh_children)
- GF_FREE (ctx->fresh_children);
- GF_FREE (ctx);
- ctx = NULL;
+ return ctx;
+
+fail:
+ afr_inode_ctx_destroy (ctx);
+ return NULL;
+}
+
+afr_inode_ctx_t*
+afr_inode_ctx_get (inode_t *inode, xlator_t *this)
+{
+ afr_inode_ctx_t *ctx = NULL;
+
+ LOCK (&inode->lock);
+ {
+ ctx = __afr_inode_ctx_get (inode, this);
}
+ UNLOCK (&inode->lock);
return ctx;
}
void
-afr_inode_get_ctx (xlator_t *this, inode_t *inode, afr_inode_params_t *params)
+afr_inode_get_ctx_params (xlator_t *this, inode_t *inode,
+ afr_inode_params_t *params)
{
GF_ASSERT (inode);
GF_ASSERT (params);
- int ret = 0;
afr_inode_ctx_t *ctx = NULL;
afr_private_t *priv = NULL;
int i = 0;
- uint64_t ctx_addr = 0;
int32_t read_child = -1;
int32_t *fresh_children = NULL;
priv = this->private;
LOCK (&inode->lock);
{
- ret = __inode_ctx_get (inode, this, &ctx_addr);
- if (ret < 0)
- goto unlock;
- ctx = afr_inode_ctx_get_from_addr (ctx_addr, priv->child_count);
+ ctx = __afr_inode_ctx_get (inode, this);
if (!ctx)
goto unlock;
switch (params->op) {
@@ -275,12 +300,6 @@ afr_inode_get_ctx (xlator_t *this, inode_t *inode, afr_inode_params_t *params)
if (ctx->masks & AFR_ICTX_OPENDIR_DONE_MASK)
params->u.value = _gf_true;
break;
- case AFR_INODE_GET_SPLIT_BRAIN:
- params->u.value = _gf_false;
- if (ctx->masks & AFR_ICTX_SPLIT_BRAIN_MASK)
- params->u.value = _gf_true;
- ;
- break;
default:
GF_ASSERT (0);
break;
@@ -293,11 +312,16 @@ unlock:
gf_boolean_t
afr_is_split_brain (xlator_t *this, inode_t *inode)
{
- afr_inode_params_t params = {0};
+ afr_inode_ctx_t *ctx = NULL;
+ gf_boolean_t spb = _gf_false;
- params.op = AFR_INODE_GET_SPLIT_BRAIN;
- afr_inode_get_ctx (this, inode, &params);
- return params.u.value;
+ ctx = afr_inode_ctx_get (inode, this);
+ if (!ctx)
+ goto out;
+ if ((ctx->mdata_spb == SPB) || (ctx->data_spb == SPB))
+ spb = _gf_true;
+out:
+ return spb;
}
gf_boolean_t
@@ -306,11 +330,10 @@ afr_is_opendir_done (xlator_t *this, inode_t *inode)
afr_inode_params_t params = {0};
params.op = AFR_INODE_GET_OPENDIR_DONE;
- afr_inode_get_ctx (this, inode, &params);
+ afr_inode_get_ctx_params (this, inode, &params);
return params.u.value;
}
-
int32_t
afr_inode_get_read_ctx (xlator_t *this, inode_t *inode, int32_t *fresh_children)
{
@@ -318,7 +341,7 @@ afr_inode_get_read_ctx (xlator_t *this, inode_t *inode, int32_t *fresh_children)
params.op = AFR_INODE_GET_READ_CTX;
params.u.read_ctx.children = fresh_children;
- afr_inode_get_ctx (this, inode, &params);
+ afr_inode_get_ctx_params (this, inode, &params);
return params.u.read_ctx.read_child;
}
@@ -380,31 +403,14 @@ afr_inode_ctx_set_opendir_done (afr_inode_ctx_t *ctx)
}
void
-afr_inode_ctx_set_splitbrain (afr_inode_ctx_t *ctx, gf_boolean_t set)
-{
- uint64_t remaining_mask = 0;
- uint64_t mask = 0;
-
- if (set) {
- remaining_mask = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx->masks);
- mask = (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_SPLIT_BRAIN_MASK);
- ctx->masks = remaining_mask | mask;
- } else {
- ctx->masks = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx->masks);
- }
-}
-
-void
-afr_inode_set_ctx (xlator_t *this, inode_t *inode, afr_inode_params_t *params)
+afr_inode_set_ctx_params (xlator_t *this, inode_t *inode,
+ afr_inode_params_t *params)
{
GF_ASSERT (inode);
GF_ASSERT (params);
- int ret = 0;
afr_inode_ctx_t *ctx = NULL;
afr_private_t *priv = NULL;
- uint64_t ctx_addr = 0;
- gf_boolean_t set = _gf_false;
int32_t read_child = -1;
int32_t *fresh_children = NULL;
int32_t *stale_children = NULL;
@@ -412,10 +418,7 @@ afr_inode_set_ctx (xlator_t *this, inode_t *inode, afr_inode_params_t *params)
priv = this->private;
LOCK (&inode->lock);
{
- ret = __inode_ctx_get (inode, this, &ctx_addr);
- if (ret < 0)
- ctx_addr = 0;
- ctx = afr_inode_ctx_get_from_addr (ctx_addr, priv->child_count);
+ ctx = __afr_inode_ctx_get (inode, this);
if (!ctx)
goto unlock;
switch (params->op) {
@@ -435,33 +438,26 @@ afr_inode_set_ctx (xlator_t *this, inode_t *inode, afr_inode_params_t *params)
case AFR_INODE_SET_OPENDIR_DONE:
afr_inode_ctx_set_opendir_done (ctx);
break;
- case AFR_INODE_SET_SPLIT_BRAIN:
- set = params->u.value;
- afr_inode_ctx_set_splitbrain (ctx, set);
- break;
default:
GF_ASSERT (0);
break;
}
- ret = __inode_ctx_put (inode, this, (uint64_t)ctx);
- if (ret) {
- gf_log_callingfn (this->name, GF_LOG_ERROR, "failed to "
- "set the inode ctx (%s)",
- uuid_utoa (inode->gfid));
- }
}
unlock:
UNLOCK (&inode->lock);
}
void
-afr_set_split_brain (xlator_t *this, inode_t *inode, gf_boolean_t set)
+afr_set_split_brain (xlator_t *this, inode_t *inode, afr_spb_state_t mdata_spb,
+ afr_spb_state_t data_spb)
{
- afr_inode_params_t params = {0};
+ afr_inode_ctx_t *ctx = NULL;
- params.op = AFR_INODE_SET_SPLIT_BRAIN;
- params.u.value = set;
- afr_inode_set_ctx (this, inode, &params);
+ ctx = afr_inode_ctx_get (inode, this);
+ if (mdata_spb != DONT_KNOW)
+ ctx->mdata_spb = mdata_spb;
+ if (data_spb != DONT_KNOW)
+ ctx->data_spb = data_spb;
}
void
@@ -470,7 +466,7 @@ afr_set_opendir_done (xlator_t *this, inode_t *inode)
afr_inode_params_t params = {0};
params.op = AFR_INODE_SET_OPENDIR_DONE;
- afr_inode_set_ctx (this, inode, &params);
+ afr_inode_set_ctx_params (this, inode, &params);
}
void
@@ -489,7 +485,7 @@ afr_inode_set_read_ctx (xlator_t *this, inode_t *inode, int32_t read_child,
params.op = AFR_INODE_SET_READ_CTX;
params.u.read_ctx.read_child = read_child;
params.u.read_ctx.children = fresh_children;
- afr_inode_set_ctx (this, inode, &params);
+ afr_inode_set_ctx_params (this, inode, &params);
}
void
@@ -502,7 +498,7 @@ afr_inode_rm_stale_children (xlator_t *this, inode_t *inode,
params.op = AFR_INODE_RM_STALE_CHILDREN;
params.u.read_ctx.children = stale_children;
- afr_inode_set_ctx (this, inode, &params);
+ afr_inode_set_ctx_params (this, inode, &params);
}
gf_boolean_t
@@ -571,6 +567,7 @@ afr_hash_child (int32_t *success_children, int32_t child_count,
unsigned int hmode, uuid_t gfid)
{
uuid_t gfid_copy = {0,};
+ pid_t pid;
if (!hmode) {
return -1;
@@ -589,7 +586,8 @@ afr_hash_child (int32_t *success_children, int32_t child_count,
* perfection here. All we need is a low probability that
* multiple clients won't converge on the same subvolume.
*/
- *((pid_t *)gfid_copy) = getpid();
+ pid = getpid();
+ memcpy (gfid_copy, &pid, sizeof(pid));
}
return SuperFastHash((char *)gfid_copy,
@@ -766,6 +764,13 @@ out:
}
void
+afr_xattr_array_destroy (dict_t **xattr, unsigned int child_count)
+{
+ afr_reset_xattr (xattr, child_count);
+ GF_FREE (xattr);
+}
+
+void
afr_local_sh_cleanup (afr_local_t *local, xlator_t *this)
{
afr_self_heal_t *sh = NULL;
@@ -774,60 +779,51 @@ afr_local_sh_cleanup (afr_local_t *local, xlator_t *this)
sh = &local->self_heal;
priv = this->private;
- if (sh->buf)
- GF_FREE (sh->buf);
+ if (sh->data_sh_info && strcmp (sh->data_sh_info, ""))
+ GF_FREE (sh->data_sh_info);
- if (sh->parentbufs)
- GF_FREE (sh->parentbufs);
+ if (sh->metadata_sh_info && strcmp (sh->metadata_sh_info, ""))
+ GF_FREE (sh->metadata_sh_info);
+
+ GF_FREE (sh->buf);
+
+ GF_FREE (sh->parentbufs);
if (sh->inode)
inode_unref (sh->inode);
- if (sh->xattr) {
- afr_reset_xattr (sh->xattr, priv->child_count);
- GF_FREE (sh->xattr);
- }
+ afr_xattr_array_destroy (sh->xattr, priv->child_count);
- if (sh->child_errno)
- GF_FREE (sh->child_errno);
+ GF_FREE (sh->child_errno);
afr_matrix_cleanup (sh->pending_matrix, priv->child_count);
afr_matrix_cleanup (sh->delta_matrix, priv->child_count);
- if (sh->sources)
- GF_FREE (sh->sources);
+ GF_FREE (sh->sources);
- if (sh->success)
- GF_FREE (sh->success);
+ GF_FREE (sh->success);
- if (sh->locked_nodes)
- GF_FREE (sh->locked_nodes);
+ GF_FREE (sh->locked_nodes);
if (sh->healing_fd) {
fd_unref (sh->healing_fd);
sh->healing_fd = NULL;
}
- if (sh->linkname)
- GF_FREE ((char *)sh->linkname);
+ GF_FREE ((char *)sh->linkname);
- if (sh->success_children)
- GF_FREE (sh->success_children);
+ GF_FREE (sh->success_children);
- if (sh->fresh_children)
- GF_FREE (sh->fresh_children);
+ GF_FREE (sh->fresh_children);
- if (sh->fresh_parent_dirs)
- GF_FREE (sh->fresh_parent_dirs);
+ GF_FREE (sh->fresh_parent_dirs);
loc_wipe (&sh->parent_loc);
loc_wipe (&sh->lookup_loc);
- if (sh->checksum)
- GF_FREE (sh->checksum);
+ GF_FREE (sh->checksum);
- if (sh->write_needed)
- GF_FREE (sh->write_needed);
+ GF_FREE (sh->write_needed);
if (sh->healing_fd)
fd_unref (sh->healing_fd);
}
@@ -836,7 +832,8 @@ afr_local_sh_cleanup (afr_local_t *local, xlator_t *this)
void
afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this)
{
- afr_private_t * priv = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
priv = this->private;
@@ -844,18 +841,15 @@ afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this)
afr_matrix_cleanup (local->transaction.txn_changelog,
priv->child_count);
- if (local->internal_lock.locked_nodes)
- GF_FREE (local->internal_lock.locked_nodes);
-
- if (local->internal_lock.inode_locked_nodes)
- GF_FREE (local->internal_lock.inode_locked_nodes);
+ GF_FREE (local->internal_lock.locked_nodes);
- if (local->internal_lock.entry_locked_nodes)
- GF_FREE (local->internal_lock.entry_locked_nodes);
+ for (i = 0; local->internal_lock.inodelk[i].domain; i++) {
+ GF_FREE (local->internal_lock.inodelk[i].locked_nodes);
+ }
- if (local->internal_lock.lower_locked_nodes)
- GF_FREE (local->internal_lock.lower_locked_nodes);
+ GF_FREE (local->internal_lock.lower_locked_nodes);
+ afr_entry_lockee_cleanup (&local->internal_lock);
GF_FREE (local->transaction.pre_op);
GF_FREE (local->transaction.eager_lock);
@@ -865,6 +859,8 @@ afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this)
loc_wipe (&local->transaction.parent_loc);
loc_wipe (&local->transaction.new_parent_loc);
+
+ GF_FREE (local->transaction.postop_piggybacked);
}
@@ -894,17 +890,13 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this)
if (local->dict)
dict_unref (local->dict);
- if (local->child_up)
- GF_FREE (local->child_up);
+ GF_FREE(local->replies);
- if (local->child_errno)
- GF_FREE (local->child_errno);
+ GF_FREE (local->child_up);
- if (local->fresh_children)
- GF_FREE (local->fresh_children);
+ GF_FREE (local->child_errno);
- if (local->fd_open_on)
- GF_FREE (local->fd_open_on);
+ GF_FREE (local->fresh_children);
{ /* lookup */
if (local->cont.lookup.xattrs) {
@@ -922,27 +914,23 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this)
inode_unref (local->cont.lookup.inode);
}
- if (local->cont.lookup.postparents)
- GF_FREE (local->cont.lookup.postparents);
+ GF_FREE (local->cont.lookup.postparents);
- if (local->cont.lookup.bufs)
- GF_FREE (local->cont.lookup.bufs);
+ GF_FREE (local->cont.lookup.bufs);
- if (local->cont.lookup.success_children)
- GF_FREE (local->cont.lookup.success_children);
+ GF_FREE (local->cont.lookup.success_children);
- if (local->cont.lookup.sources)
- GF_FREE (local->cont.lookup.sources);
+ GF_FREE (local->cont.lookup.sources);
+ afr_matrix_cleanup (local->cont.lookup.pending_matrix,
+ priv->child_count);
}
{ /* getxattr */
- if (local->cont.getxattr.name)
- GF_FREE (local->cont.getxattr.name);
+ GF_FREE (local->cont.getxattr.name);
}
{ /* lk */
- if (local->cont.lk.locked_nodes)
- GF_FREE (local->cont.lk.locked_nodes);
+ GF_FREE (local->cont.lk.locked_nodes);
}
{ /* create */
@@ -997,8 +985,7 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this)
}
{ /* opendir */
- if (local->cont.opendir.checksum)
- GF_FREE (local->cont.opendir.checksum);
+ GF_FREE (local->cont.opendir.checksum);
}
{ /* readdirp */
@@ -1093,6 +1080,88 @@ afr_update_loc_gfids (loc_t *loc, struct iatt *buf, struct iatt *postparent)
uuid_copy (loc->pargfid, postparent->ia_gfid);
}
+/*
+ * Quota size xattrs are not maintained by afr. There is a
+ * possibility that they differ even when both the directory changelog xattrs
+ * suggest everything is fine. So if there is at least one 'source' check among
+ * the sources which has the maximum quota size. Otherwise check among all the
+ * available ones for maximum quota size. This way if there is a source and
+ * stale copies it always votes for the 'source'.
+ * */
+
+static void
+afr_handle_quota_size (afr_local_t *local, xlator_t *this,
+ dict_t *rsp_dict)
+{
+ int32_t *sources = NULL;
+ dict_t *xattr = NULL;
+ data_t *max_data = NULL;
+ int64_t max_quota_size = -1;
+ data_t *data = NULL;
+ int64_t *size = NULL;
+ int64_t quota_size = -1;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int ret = -1;
+ gf_boolean_t source_present = _gf_false;
+
+ priv = this->private;
+ sources = local->cont.lookup.sources;
+
+ if (rsp_dict == NULL) {
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "%s: Invalid "
+ "response dictionary", local->loc.path);
+ return;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i]) {
+ source_present = _gf_true;
+ break;
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ /*
+ * If there is at least one source lets check
+ * for maximum quota sizes among sources, otherwise take the
+ * maximum of the ones present to be on the safer side.
+ */
+ if (source_present && !sources[i])
+ continue;
+
+ xattr = local->cont.lookup.xattrs[i];
+ if (!xattr)
+ continue;
+
+ data = dict_get (xattr, QUOTA_SIZE_KEY);
+ if (!data)
+ continue;
+
+ size = (int64_t*)data->data;
+ quota_size = ntoh64(*size);
+ gf_log (this->name, GF_LOG_DEBUG, "%s: %d, size: %"PRId64,
+ local->loc.path, i, quota_size);
+ if (quota_size > max_quota_size) {
+ if (max_data)
+ data_unref (max_data);
+
+ max_quota_size = quota_size;
+ max_data = data_ref (data);
+ }
+ }
+
+ if (max_data) {
+ ret = dict_set (rsp_dict, QUOTA_SIZE_KEY, max_data);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s: Failed to set "
+ "quota size", local->loc.path);
+ }
+
+ data_unref (max_data);
+ }
+}
+
int
afr_lookup_build_response_params (afr_local_t *local, xlator_t *this)
{
@@ -1137,13 +1206,18 @@ afr_lookup_build_response_params (afr_local_t *local, xlator_t *this)
ret = -1;
goto out;
}
+
gf_log (this->name, GF_LOG_DEBUG, "Building lookup response from %d",
read_child);
if (!*xattr)
*xattr = dict_ref (local->cont.lookup.xattrs[read_child]);
+
*buf = local->cont.lookup.bufs[read_child];
*postparent = local->cont.lookup.postparents[read_child];
+ if (dict_get (local->xattr_req, QUOTA_SIZE_KEY))
+ afr_handle_quota_size (local, this, *xattr);
+
if (IA_INVAL == local->cont.lookup.inode->ia_type) {
/* fix for RT #602 */
local->cont.lookup.inode->ia_type = buf->ia_type;
@@ -1181,41 +1255,97 @@ afr_lookup_update_lk_counts (afr_local_t *local, xlator_t *this,
local->cont.lookup.parent_entrylk += parent_entrylk;
}
+/*
+ * It's important to maintain a commutative property on do_*_self_heal and
+ * found*; once set, they must not be cleared by a subsequent iteration or
+ * call, so that they represent a logical OR of all iterations and calls
+ * regardless of child/key order. That allows the caller to call us multiple
+ * times without having to use a separate variable as a "reduce" accumulator.
+ */
static void
afr_lookup_set_self_heal_params_by_xattr (afr_local_t *local, xlator_t *this,
dict_t *xattr)
{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int ret = -1;
+ void *pending_raw = NULL;
+ int32_t *pending = NULL;
+
GF_ASSERT (local);
GF_ASSERT (this);
GF_ASSERT (xattr);
- if (afr_sh_has_metadata_pending (xattr, this)) {
- local->self_heal.do_metadata_self_heal = _gf_true;
- gf_log(this->name, GF_LOG_DEBUG,
- "metadata self-heal is pending for %s.",
- local->loc.path);
- }
+ priv = this->private;
- if (afr_sh_has_entry_pending (xattr, this)) {
- local->self_heal.do_entry_self_heal = _gf_true;
- gf_log(this->name, GF_LOG_DEBUG,
- "entry self-heal is pending for %s.", local->loc.path);
- }
+ for (i = 0; i < priv->child_count; i++) {
+ ret = dict_get_ptr (xattr, priv->pending_key[i],
+ &pending_raw);
+ if (ret != 0) {
+ continue;
+ }
+ pending = pending_raw;
- if (afr_sh_has_data_pending (xattr, this)) {
- local->self_heal.do_data_self_heal = _gf_true;
- gf_log(this->name, GF_LOG_DEBUG,
- "data self-heal is pending for %s.", local->loc.path);
+ if (pending[AFR_METADATA_TRANSACTION]) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "metadata self-heal is pending for %s.",
+ local->loc.path);
+ local->self_heal.do_metadata_self_heal = _gf_true;
+ }
+
+ if (pending[AFR_ENTRY_TRANSACTION]) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "entry self-heal is pending for %s.",
+ local->loc.path);
+ local->self_heal.do_entry_self_heal = _gf_true;
+ }
+
+ if (pending[AFR_DATA_TRANSACTION]) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "data self-heal is pending for %s.",
+ local->loc.path);
+ local->self_heal.do_data_self_heal = _gf_true;
+ }
}
}
+void
+afr_lookup_check_set_metadata_split_brain (afr_local_t *local, xlator_t *this)
+{
+ int32_t *sources = NULL;
+ afr_private_t *priv = NULL;
+ int32_t subvol_status = 0;
+ int32_t *success_children = NULL;
+ dict_t **xattrs = NULL;
+ struct iatt *bufs = NULL;
+ int32_t **pending_matrix = NULL;
+
+ priv = this->private;
+
+ sources = GF_CALLOC (priv->child_count, sizeof (*sources),
+ gf_afr_mt_int32_t);
+ if (NULL == sources)
+ goto out;
+ success_children = local->cont.lookup.success_children;
+ xattrs = local->cont.lookup.xattrs;
+ bufs = local->cont.lookup.bufs;
+ pending_matrix = local->cont.lookup.pending_matrix;
+ afr_build_sources (this, xattrs, bufs, pending_matrix,
+ sources, success_children, AFR_METADATA_TRANSACTION,
+ &subvol_status, _gf_false);
+ if (subvol_status & SPLIT_BRAIN)
+ local->cont.lookup.possible_spb = _gf_true;
+out:
+ GF_FREE (sources);
+}
+
static void
afr_detect_self_heal_by_iatt (afr_local_t *local, xlator_t *this,
struct iatt *buf, struct iatt *lookup_buf)
{
if (PERMISSION_DIFFERS (buf, lookup_buf)) {
/* mismatching permissions */
- gf_log (this->name, GF_LOG_INFO,
+ gf_log (this->name, GF_LOG_DEBUG,
"permissions differ for %s ", local->loc.path);
local->self_heal.do_metadata_self_heal = _gf_true;
}
@@ -1223,27 +1353,45 @@ afr_detect_self_heal_by_iatt (afr_local_t *local, xlator_t *this,
if (OWNERSHIP_DIFFERS (buf, lookup_buf)) {
/* mismatching permissions */
local->self_heal.do_metadata_self_heal = _gf_true;
- gf_log (this->name, GF_LOG_INFO,
+ gf_log (this->name, GF_LOG_DEBUG,
"ownership differs for %s ", local->loc.path);
}
if (SIZE_DIFFERS (buf, lookup_buf)
&& IA_ISREG (buf->ia_type)) {
- gf_log (this->name, GF_LOG_INFO,
+ gf_log (this->name, GF_LOG_DEBUG,
"size differs for %s ", local->loc.path);
local->self_heal.do_data_self_heal = _gf_true;
}
if (uuid_compare (buf->ia_gfid, lookup_buf->ia_gfid)) {
/* mismatching gfid */
- gf_log (this->name, GF_LOG_WARNING,
+ gf_log (this->name, GF_LOG_DEBUG,
"%s: gfid different on subvolume", local->loc.path);
}
}
static void
-afr_detect_self_heal_by_lookup_status (afr_local_t *local, xlator_t *this,
- gf_boolean_t split_brain)
+afr_detect_self_heal_by_split_brain_status (afr_local_t *local, xlator_t *this)
+{
+ gf_boolean_t split_brain = _gf_false;
+ afr_self_heal_t *sh = NULL;
+
+ sh = &local->self_heal;
+
+ split_brain = afr_is_split_brain (this, local->cont.lookup.inode);
+ split_brain = split_brain || local->cont.lookup.possible_spb;
+ if ((local->success_count > 0) && split_brain &&
+ IA_ISREG (local->cont.lookup.inode->ia_type)) {
+ sh->force_confirm_spb = _gf_true;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "split brain detected during lookup of %s.",
+ local->loc.path);
+ }
+}
+
+static void
+afr_detect_self_heal_by_lookup_status (afr_local_t *local, xlator_t *this)
{
GF_ASSERT (local);
GF_ASSERT (this);
@@ -1254,23 +1402,11 @@ afr_detect_self_heal_by_lookup_status (afr_local_t *local, xlator_t *this,
local->self_heal.do_entry_self_heal = _gf_true;
local->self_heal.do_gfid_self_heal = _gf_true;
local->self_heal.do_missing_entry_self_heal = _gf_true;
- gf_log(this->name, GF_LOG_INFO,
+ gf_log(this->name, GF_LOG_DEBUG,
"entries are missing in lookup of %s.",
local->loc.path);
- //If all self-heals are needed no need to check for other rules
- goto out;
- }
-
- if ((local->success_count > 0) && split_brain &&
- IA_ISREG (local->cont.lookup.inode->ia_type)) {
- local->self_heal.do_data_self_heal = _gf_true;
- local->self_heal.do_gfid_self_heal = _gf_true;
- gf_log (this->name, GF_LOG_WARNING,
- "split brain detected during lookup of %s.",
- local->loc.path);
}
-out:
return;
}
@@ -1280,6 +1416,8 @@ afr_can_self_heal_proceed (afr_self_heal_t *sh, afr_private_t *priv)
GF_ASSERT (sh);
GF_ASSERT (priv);
+ if (sh->force_confirm_spb)
+ return _gf_true;
return (sh->do_gfid_self_heal
|| sh->do_missing_entry_self_heal
|| (afr_data_self_heal_enabled (priv->data_self_heal) &&
@@ -1381,7 +1519,7 @@ afr_launch_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode,
if (background)
bg = "background";
- gf_log (this->name, GF_LOG_INFO,
+ gf_log (this->name, GF_LOG_DEBUG,
"%s %s self-heal triggered. path: %s, reason: %s", bg,
sh_type_str, local->loc.path, reason);
@@ -1452,7 +1590,7 @@ afr_conflicting_iattrs (struct iatt *bufs, int32_t *success_children,
child2 = &bufs[success_children[i-1]];
if (FILETYPE_DIFFERS (child1, child2)) {
- gf_log (xlator_name, GF_LOG_WARNING, "%s: filetype "
+ gf_log (xlator_name, GF_LOG_DEBUG, "%s: filetype "
"differs on subvolumes (%d, %d)", path,
success_children[i-1], success_children[i]);
conflicting = _gf_true;
@@ -1461,7 +1599,7 @@ afr_conflicting_iattrs (struct iatt *bufs, int32_t *success_children,
if (!gfid || uuid_is_null (child1->ia_gfid))
continue;
if (uuid_compare (*gfid, child1->ia_gfid)) {
- gf_log (xlator_name, GF_LOG_WARNING, "%s: gfid differs"
+ gf_log (xlator_name, GF_LOG_DEBUG, "%s: gfid differs"
" on subvolume %d", path, success_children[i]);
conflicting = _gf_true;
goto out;
@@ -1544,13 +1682,11 @@ afr_lookup_set_self_heal_params (afr_local_t *local, xlator_t *this)
int32_t child1 = -1;
int32_t child2 = -1;
afr_self_heal_t *sh = NULL;
- gf_boolean_t split_brain = _gf_false;
priv = this->private;
sh = &local->self_heal;
- split_brain = afr_is_split_brain (this, local->cont.lookup.inode);
- afr_detect_self_heal_by_lookup_status (local, this, split_brain);
+ afr_detect_self_heal_by_lookup_status (local, this);
if (afr_lookup_gfid_missing_count (local, this))
local->self_heal.do_gfid_self_heal = _gf_true;
@@ -1577,9 +1713,11 @@ afr_lookup_set_self_heal_params (afr_local_t *local, xlator_t *this)
afr_lookup_set_self_heal_params_by_xattr (local, this,
xattr[child1]);
}
- if (afr_open_only_data_self_heal (priv->data_self_heal)
- && !split_brain)
+ if (afr_open_only_data_self_heal (priv->data_self_heal))
sh->do_data_self_heal = _gf_false;
+ if (sh->do_metadata_self_heal)
+ afr_lookup_check_set_metadata_split_brain (local, this);
+ afr_detect_self_heal_by_split_brain_status (local, this);
}
int
@@ -1595,8 +1733,8 @@ afr_self_heal_lookup_unwind (call_frame_t *frame, xlator_t *this,
if (op_ret == -1) {
local->op_ret = -1;
- if (afr_error_more_important (local->op_errno, op_errno))
- local->op_errno = op_errno;
+ local->op_errno = afr_most_important_error(local->op_errno,
+ op_errno, _gf_true);
goto out;
} else {
@@ -1610,6 +1748,16 @@ afr_self_heal_lookup_unwind (call_frame_t *frame, xlator_t *this,
if (ret)
gf_log (this->name, GF_LOG_ERROR, "%s: Failed to set "
"sh-failed to %d", local->loc.path, sh_failed);
+
+ if (local->self_heal.actual_sh_started == _gf_true &&
+ sh_failed == 0) {
+ ret = dict_set_int32 (xattr, "actual-sh-done", 1);
+ if (ret)
+ gf_log(this->name, GF_LOG_ERROR, "%s: Failed to"
+ " set actual-sh-done to %d",
+ local->loc.path,
+ local->self_heal.actual_sh_started);
+ }
}
out:
AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
@@ -1684,7 +1832,8 @@ afr_lookup_perform_self_heal (call_frame_t *frame, xlator_t *this,
afr_lookup_set_self_heal_params (local, this);
if (afr_can_self_heal_proceed (&local->self_heal, priv)) {
- if (afr_is_transaction_running (local))
+ if (afr_is_transaction_running (local) &&
+ (!local->allow_sh_for_running_transaction))
goto out;
reason = "lookup detected pending operations";
@@ -1863,6 +2012,57 @@ out:
return;
}
+gf_boolean_t
+afr_is_entry_possibly_under_creation (afr_local_t *local, xlator_t *this)
+{
+ /*
+ * We need to perform this test in lookup done and treat on going
+ * create/DELETE as ENOENT.
+ * Reason:
+ Multiple clients A, B and C are attempting 'mkdir -p /mnt/a/b/c'
+
+ 1 Client A is in the middle of mkdir(/a). It has acquired lock.
+ It has performed mkdir(/a) on one subvol, and second one is still
+ in progress
+ 2 Client B performs a lookup, sees directory /a on one,
+ ENOENT on the other, succeeds lookup.
+ 3 Client B performs lookup on /a/b on both subvols, both return ENOENT
+ (one subvol because /a/b does not exist, another because /a
+ itself does not exist)
+ 4 Client B proceeds to mkdir /a/b. It obtains entrylk on inode=/a with
+ basename=b on one subvol, but fails on other subvol as /a is yet to
+ be created by Client A.
+ 5 Client A finishes mkdir of /a on other subvol
+ 6 Client C also attempts to create /a/b, lookup returns ENOENT on
+ both subvols.
+ 7 Client C tries to obtain entrylk on on inode=/a with basename=b,
+ obtains on one subvol (where B had failed), and waits for B to unlock
+ on other subvol.
+ 8 Client B finishes mkdir() on one subvol with GFID-1 and completes
+ transaction and unlocks
+ 9 Client C gets the lock on the second subvol, At this stage second
+ subvol already has /a/b created from Client B, but Client C does not
+ check that in the middle of mkdir transaction
+ 10 Client C attempts mkdir /a/b on both subvols. It succeeds on
+ ONLY ONE (where Client B could not get lock because of
+ missing parent /a dir) with GFID-2, and gets EEXIST from ONE subvol.
+ This way we have /a/b in GFID mismatch. One subvol got GFID-1 because
+ Client B performed transaction on only one subvol (because entrylk()
+ could not be obtained on second subvol because of missing parent dir --
+ caused by premature/speculative succeeding of lookup() on /a when locks
+ are detected). Other subvol gets GFID-2 from Client C because while
+ it was waiting for entrylk() on both subvols, Client B was in the
+ middle of creating mkdir() on only one subvol, and Client C does not
+ "expect" this when it is between lock() and pre-op()/op() phase of the
+ transaction.
+ */
+ if (local->cont.lookup.parent_entrylk && local->enoent_count)
+ return _gf_true;
+
+ return _gf_false;
+}
+
+
static void
afr_lookup_done (call_frame_t *frame, xlator_t *this)
{
@@ -1879,6 +2079,12 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
priv = this->private;
local = frame->local;
+ if (afr_is_entry_possibly_under_creation (local, this)) {
+ local->op_ret = -1;
+ local->op_errno = ENOENT;
+ goto unwind;
+ }
+
if (local->op_ret < 0)
goto unwind;
@@ -1936,25 +2142,20 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
* others in that they must be given higher priority while
* returning to the user.
*
- * The hierarchy is ESTALE > ENOENT > others
- *
+ * The hierarchy is ESTALE > EIO > ENOENT > others
*/
-
-gf_boolean_t
-afr_error_more_important (int32_t old_errno, int32_t new_errno)
+int32_t
+afr_most_important_error(int32_t old_errno, int32_t new_errno,
+ gf_boolean_t eio)
{
- gf_boolean_t ret = _gf_true;
+ if (old_errno == ESTALE || new_errno == ESTALE)
+ return ESTALE;
+ if (eio && (old_errno == EIO || new_errno == EIO))
+ return EIO;
+ if (old_errno == ENOENT || new_errno == ENOENT)
+ return ENOENT;
- /* Nothing should ever overwrite ESTALE */
- if (old_errno == ESTALE)
- ret = _gf_false;
-
- /* Nothing should overwrite ENOENT, except ESTALE/EIO*/
- else if ((old_errno == ENOENT) && (new_errno != ESTALE)
- && (new_errno != EIO))
- ret = _gf_false;
-
- return ret;
+ return new_errno;
}
int32_t
@@ -1973,8 +2174,9 @@ afr_resultant_errno_get (int32_t *children,
} else {
child = i;
}
- if (afr_error_more_important (op_errno, child_errno[child]))
- op_errno = child_errno[child];
+ op_errno = afr_most_important_error(op_errno,
+ child_errno[child],
+ _gf_false);
}
return op_errno;
}
@@ -1986,8 +2188,8 @@ afr_lookup_handle_error (afr_local_t *local, int32_t op_ret, int32_t op_errno)
if (op_errno == ENOENT)
local->enoent_count++;
- if (afr_error_more_important (local->op_errno, op_errno))
- local->op_errno = op_errno;
+ local->op_errno = afr_most_important_error(local->op_errno, op_errno,
+ _gf_false);
if (local->op_errno == ESTALE) {
local->op_ret = -1;
@@ -2171,6 +2373,7 @@ afr_lookup_cont_init (afr_local_t *local, unsigned int child_count)
struct iatt *iatts = NULL;
int32_t *success_children = NULL;
int32_t *sources = NULL;
+ int32_t **pending_matrix = NULL;
GF_ASSERT (local);
local->cont.lookup.xattrs = GF_CALLOC (child_count,
@@ -2203,6 +2406,11 @@ afr_lookup_cont_init (afr_local_t *local, unsigned int child_count)
goto out;
local->cont.lookup.sources = sources;
+ pending_matrix = afr_matrix_create (child_count, child_count);
+ if (NULL == pending_matrix)
+ goto out;
+ local->cont.lookup.pending_matrix = pending_matrix;
+
ret = 0;
out:
return ret;
@@ -2220,7 +2428,7 @@ afr_lookup (call_frame_t *frame, xlator_t *this,
int call_count = 0;
uint64_t ctx = 0;
int32_t op_errno = 0;
-
+ int allow_sh = 0;
priv = this->private;
AFR_LOCAL_ALLOC_OR_GOTO (local, out);
@@ -2237,6 +2445,13 @@ afr_lookup (call_frame_t *frame, xlator_t *this,
goto out;
}
+ if (local->loc.path &&
+ (strcmp (local->loc.path, "/" GF_REPLICATE_TRASH_DIR) == 0)) {
+ op_errno = EPERM;
+ ret = -1;
+ goto out;
+ }
+
ret = inode_ctx_get (local->loc.inode, this, &ctx);
if (ret == 0) {
/* lookup is a revalidate */
@@ -2285,6 +2500,11 @@ afr_lookup (call_frame_t *frame, xlator_t *this,
/* By default assume ENOTCONN. On success it will be set to 0. */
local->op_errno = ENOTCONN;
+ ret = dict_get_int32 (xattr_req, "allow-sh-for-running-transaction",
+ &allow_sh);
+ dict_del (xattr_req, "allow-sh-for-running-transaction");
+ local->allow_sh_for_running_transaction = allow_sh;
+
ret = afr_lookup_xattr_req_prepare (local, this, xattr_req, &local->loc,
&gfid_req);
if (ret) {
@@ -2401,8 +2621,10 @@ __afr_fd_ctx_set (xlator_t *this, fd_t *fd)
}
pthread_mutex_init (&fd_ctx->delay_lock, NULL);
- INIT_LIST_HEAD (&fd_ctx->paused_calls);
INIT_LIST_HEAD (&fd_ctx->entries);
+ fd_ctx->call_child = -1;
+
+ INIT_LIST_HEAD (&fd_ctx->eager_locked);
ret = __fd_ctx_set (fd, this, (uint64_t)(long) fd_ctx);
if (ret)
@@ -2430,135 +2652,70 @@ afr_fd_ctx_set (xlator_t *this, fd_t *fd)
/* {{{ flush */
int
-afr_flush_unwind (call_frame_t *frame, xlator_t *this)
+afr_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t * local = NULL;
- call_frame_t *main_frame = NULL;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (flush, main_frame,
- local->op_ret, local->op_errno,
- NULL);
- }
-
- return 0;
-}
-
-
-int
-afr_flush_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
int call_count = -1;
- int child_index = (long) cookie;
- int need_unwind = 0;
local = frame->local;
- priv = this->private;
LOCK (&frame->lock);
{
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
if (op_ret != -1) {
if (local->success_count == 0) {
local->op_ret = op_ret;
}
local->success_count++;
-
- if (local->success_count == priv->wait_count) {
- need_unwind = 1;
- }
}
local->op_errno = op_errno;
}
UNLOCK (&frame->lock);
- if (need_unwind)
- afr_flush_unwind (frame, this);
+ call_count = afr_frame_return (frame);
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
+ if (call_count == 0)
+ AFR_STACK_UNWIND(flush, frame, local->op_ret,
+ local->op_errno, NULL);
return 0;
}
-
-int
-afr_flush_wind (call_frame_t *frame, xlator_t *this)
+static int
+afr_flush_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- int call_count = -1;
+ int i = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
- local = frame->local;
priv = this->private;
-
- call_count = afr_up_children_count (local->child_up, priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
+ local = frame->local;
+ call_count = local->call_count;
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_flush_wind_cbk,
+ STACK_WIND_COOKIE (frame, afr_flush_cbk,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->flush,
local->fd, NULL);
-
if (!--call_count)
break;
+
}
}
return 0;
}
-
-int
-afr_flush_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
- return 0;
-}
-
-
int
afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- call_frame_t *transaction_frame = NULL;
+ call_stub_t *stub = NULL;
int ret = -1;
int op_errno = 0;
@@ -2568,47 +2725,27 @@ afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
priv = this->private;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
- goto out;
- }
-
- AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
- local = transaction_frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
-
- local->op = GF_FOP_FLUSH;
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- local->transaction.fop = afr_flush_wind;
- local->transaction.done = afr_flush_done;
- local->transaction.unwind = afr_flush_unwind;
+ ret = afr_local_init(local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- local->fd = fd_ref (fd);
-
- local->transaction.main_frame = frame;
- local->transaction.start = 0;
- local->transaction.len = 0;
-
- ret = afr_open_fd_fix (transaction_frame, this, _gf_false);
- if (ret) {
- op_errno = -ret;
+ local->fd = fd_ref(fd);
+ stub = fop_flush_stub (frame, afr_flush_wrapper, fd, xdata);
+ if (!stub) {
+ ret = -1;
+ op_errno = ENOMEM;
goto out;
}
- afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ afr_delayed_changelog_wake_resume (this, fd, stub);
+ ret = 0;
- ret = 0;
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
-
- AFR_STACK_UNWIND (flush, frame, -1, op_errno, NULL);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND(flush, frame, -1, op_errno, NULL);
return 0;
}
@@ -2622,8 +2759,6 @@ afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd)
uint64_t ctx = 0;
afr_fd_ctx_t *fd_ctx = NULL;
int ret = 0;
- afr_fd_paused_call_t *paused_call = NULL;
- afr_fd_paused_call_t *tmp = NULL;
ret = fd_ctx_get (fd, this, &ctx);
if (ret < 0)
@@ -2632,28 +2767,16 @@ afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd)
fd_ctx = (afr_fd_ctx_t *)(long) ctx;
if (fd_ctx) {
- if (fd_ctx->pre_op_done)
- GF_FREE (fd_ctx->pre_op_done);
-
- if (fd_ctx->opened_on)
- GF_FREE (fd_ctx->opened_on);
+ GF_FREE (fd_ctx->pre_op_done);
- if (fd_ctx->locked_on)
- GF_FREE (fd_ctx->locked_on);
+ GF_FREE (fd_ctx->opened_on);
- if (fd_ctx->pre_op_piggyback)
- GF_FREE (fd_ctx->pre_op_piggyback);
- list_for_each_entry_safe (paused_call, tmp, &fd_ctx->paused_calls,
- call_list) {
- list_del_init (&paused_call->call_list);
- GF_FREE (paused_call);
- }
+ GF_FREE (fd_ctx->locked_on);
- if (fd_ctx->lock_piggyback)
- GF_FREE (fd_ctx->lock_piggyback);
+ GF_FREE (fd_ctx->pre_op_piggyback);
+ GF_FREE (fd_ctx->lock_piggyback);
- if (fd_ctx->lock_acquired)
- GF_FREE (fd_ctx->lock_acquired);
+ GF_FREE (fd_ctx->lock_acquired);
pthread_mutex_destroy (&fd_ctx->delay_lock);
@@ -2693,6 +2816,16 @@ afr_release (xlator_t *this, fd_t *fd)
/* {{{ fsync */
int
+afr_fsync_unwind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ AFR_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+int
afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
struct iatt *postbuf, dict_t *xdata)
@@ -2701,6 +2834,7 @@ afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int call_count = -1;
int child_index = (long) cookie;
int read_child = 0;
+ call_stub_t *stub = NULL;
local = frame->local;
@@ -2716,13 +2850,13 @@ afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->op_ret = 0;
if (local->success_count == 0) {
- local->cont.fsync.prebuf = *prebuf;
- local->cont.fsync.postbuf = *postbuf;
+ local->cont.inode_wfop.prebuf = *prebuf;
+ local->cont.inode_wfop.postbuf = *postbuf;
}
if (child_index == read_child) {
- local->cont.fsync.prebuf = *prebuf;
- local->cont.fsync.postbuf = *postbuf;
+ local->cont.inode_wfop.prebuf = *prebuf;
+ local->cont.inode_wfop.postbuf = *postbuf;
}
local->success_count++;
@@ -2735,10 +2869,32 @@ afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
call_count = afr_frame_return (frame);
if (call_count == 0) {
- AFR_STACK_UNWIND (fsync, frame, local->op_ret, local->op_errno,
- &local->cont.fsync.prebuf,
- &local->cont.fsync.postbuf,
- NULL);
+ /* Make a stub out of the frame, and register it
+ with the waking up post-op. When the call-stub resumes,
+ we are guaranteed that there was no post-op pending
+ (i.e changelogs were unset in the server). This is an
+ essential "guarantee", that fsync() returns only after
+ completely finishing EVERYTHING, including the delayed
+ post-op. This guarantee is expected by FUSE graph switching
+ for example.
+ */
+ stub = fop_fsync_cbk_stub (frame, afr_fsync_unwind_cbk,
+ local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ xdata);
+ if (!stub) {
+ AFR_STACK_UNWIND (fsync, frame, -1, ENOMEM, 0, 0, 0);
+ return 0;
+ }
+
+ /* If no new unstable writes happened between the
+ time we cleared the unstable write witness flag in afr_fsync
+ and now, calling afr_delayed_changelog_wake_up() should
+ wake up and skip over the fsync phase and go straight to
+ afr_changelog_post_op_now()
+ */
+ afr_delayed_changelog_wake_resume (this, local->fd, stub);
}
return 0;
@@ -2773,6 +2929,10 @@ afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd,
local->fd = fd_ref (fd);
+ if (afr_fd_has_witnessed_unstable_write (this, fd)) {
+ /* don't care. we only wanted to CLEAR the bit */
+ }
+
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND_COOKIE (frame, afr_fsync_cbk,
@@ -3615,8 +3775,7 @@ afr_forget (xlator_t *this, inode_t *inode)
goto out;
ctx = (afr_inode_ctx_t *)(long)ctx_addr;
- if (ctx->fresh_children)
- GF_FREE (ctx->fresh_children);
+ GF_FREE (ctx->fresh_children);
GF_FREE (ctx);
out:
return 0;
@@ -3931,6 +4090,17 @@ afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno)
goto out;
}
+ local->transaction.postop_piggybacked = GF_CALLOC (priv->child_count,
+ sizeof (int),
+ gf_afr_mt_int32_t);
+ if (!local->transaction.postop_piggybacked) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->append_write = _gf_false;
+
ret = 0;
out:
return ret;
@@ -3942,16 +4112,6 @@ afr_internal_lock_init (afr_internal_lock_t *lk, size_t child_count,
{
int ret = -ENOMEM;
- lk->inode_locked_nodes = GF_CALLOC (sizeof (*lk->inode_locked_nodes),
- child_count, gf_afr_mt_char);
- if (NULL == lk->inode_locked_nodes)
- goto out;
-
- lk->entry_locked_nodes = GF_CALLOC (sizeof (*lk->entry_locked_nodes),
- child_count, gf_afr_mt_char);
- if (NULL == lk->entry_locked_nodes)
- goto out;
-
lk->locked_nodes = GF_CALLOC (sizeof (*lk->locked_nodes),
child_count, gf_afr_mt_char);
if (NULL == lk->locked_nodes)
@@ -4010,6 +4170,21 @@ out:
}
int
+afr_inodelk_init (afr_inodelk_t *lk, char *dom, size_t child_count)
+{
+ int ret = -ENOMEM;
+
+ lk->domain = dom;
+ lk->locked_nodes = GF_CALLOC (sizeof (*lk->locked_nodes),
+ child_count, gf_afr_mt_char);
+ if (NULL == lk->locked_nodes)
+ goto out;
+ ret = 0;
+out:
+ return ret;
+}
+
+int
afr_transaction_local_init (afr_local_t *local, xlator_t *this)
{
int child_up_count = 0;
@@ -4022,6 +4197,14 @@ afr_transaction_local_init (afr_local_t *local, xlator_t *this)
if (ret < 0)
goto out;
+ if ((local->transaction.type == AFR_DATA_TRANSACTION) ||
+ (local->transaction.type == AFR_METADATA_TRANSACTION)) {
+ ret = afr_inodelk_init (&local->internal_lock.inodelk[0],
+ this->name, priv->child_count);
+ if (ret < 0)
+ goto out;
+ }
+
ret = -ENOMEM;
child_up_count = afr_up_children_count (local->child_up,
priv->child_count);
@@ -4043,14 +4226,6 @@ afr_transaction_local_init (afr_local_t *local, xlator_t *this)
if (!local->fresh_children)
goto out;
- if (local->fd) {
- local->fd_open_on = GF_CALLOC (sizeof (*local->fd_open_on),
- priv->child_count,
- gf_afr_mt_char);
- if (!local->fd_open_on)
- goto out;
- }
-
local->transaction.pre_op = GF_CALLOC (sizeof (*local->transaction.pre_op),
priv->child_count,
gf_afr_mt_char);
@@ -4066,6 +4241,9 @@ afr_transaction_local_init (afr_local_t *local, xlator_t *this)
AFR_NUM_CHANGE_LOGS);
if (!local->transaction.txn_changelog)
goto out;
+
+ INIT_LIST_HEAD (&local->transaction.eager_locked);
+
ret = 0;
out:
return ret;
@@ -4253,6 +4431,16 @@ afr_priv_destroy (afr_private_t *priv)
if (priv->shd.split_brain)
eh_destroy (priv->shd.split_brain);
+ for (i = 0; i < priv->child_count; i++)
+ {
+ if (priv->shd.statistics[i])
+ eh_destroy (priv->shd.statistics[i]);
+ }
+
+ GF_FREE (priv->shd.statistics);
+
+ GF_FREE (priv->shd.crawl_events);
+
GF_FREE (priv->last_event);
if (priv->pending_key) {
for (i = 0; i < priv->child_count; i++)
@@ -4279,3 +4467,125 @@ xlator_subvolume_count (xlator_t *this)
i++;
return i;
}
+
+inline gf_boolean_t
+afr_is_errno_set (int *child_errno, int child)
+{
+ return child_errno[child];
+}
+
+inline gf_boolean_t
+afr_is_errno_unset (int *child_errno, int child)
+{
+ return !afr_is_errno_set (child_errno, child);
+}
+
+void
+afr_prepare_new_entry_pending_matrix (int32_t **pending,
+ gf_boolean_t (*is_pending) (int *, int),
+ int *ctx, struct iatt *buf,
+ unsigned int child_count)
+{
+ int midx = 0;
+ int idx = 0;
+ int i = 0;
+
+ midx = afr_index_for_transaction_type (AFR_METADATA_TRANSACTION);
+ if (IA_ISDIR (buf->ia_type))
+ idx = afr_index_for_transaction_type (AFR_ENTRY_TRANSACTION);
+ else if (IA_ISREG (buf->ia_type))
+ idx = afr_index_for_transaction_type (AFR_DATA_TRANSACTION);
+ else
+ idx = -1;
+ for (i = 0; i < child_count; i++) {
+ if (is_pending (ctx, i)) {
+ pending[i][midx] = hton32 (1);
+ if (idx == -1)
+ continue;
+ pending[i][idx] = hton32 (1);
+ }
+ }
+}
+
+gf_boolean_t
+afr_is_fd_fixable (fd_t *fd)
+{
+ if (!fd || !fd->inode)
+ return _gf_false;
+ else if (fd_is_anonymous (fd))
+ return _gf_false;
+ else if (uuid_is_null (fd->inode->gfid))
+ return _gf_false;
+
+ return _gf_true;
+}
+
+void
+afr_handle_open_fd_count (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ inode_t *inode = NULL;
+ afr_inode_ctx_t *ctx = NULL;
+
+ local = frame->local;
+
+ if (local->fd)
+ inode = local->fd->inode;
+ else
+ inode = local->loc.inode;
+
+ if (!inode)
+ return;
+
+ LOCK (&inode->lock);
+ {
+ ctx = __afr_inode_ctx_get (inode, this);
+ ctx->open_fd_count = local->open_fd_count;
+ }
+ UNLOCK (&inode->lock);
+}
+
+int
+afr_initialise_statistics (xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int ret = -1;
+ int i = 0;
+ int child_count = 0;
+ eh_t *stats_per_brick = NULL;
+ shd_crawl_event_t ***shd_crawl_events = NULL;
+ priv = this->private;
+
+ priv->shd.statistics = GF_CALLOC (sizeof(eh_t *), priv->child_count,
+ gf_common_mt_eh_t);
+ if (!priv->shd.statistics) {
+ ret = -1;
+ goto out;
+ }
+ child_count = priv->child_count;
+ for (i=0; i < child_count ; i++) {
+ stats_per_brick = eh_new (AFR_STATISTICS_HISTORY_SIZE,
+ _gf_false,
+ _destroy_crawl_event_data);
+ if (!stats_per_brick) {
+ ret = -1;
+ goto out;
+ }
+ priv->shd.statistics[i] = stats_per_brick;
+
+ }
+
+ shd_crawl_events = (shd_crawl_event_t***)(&priv->shd.crawl_events);
+ *shd_crawl_events = GF_CALLOC (sizeof(shd_crawl_event_t*),
+ priv->child_count,
+ gf_afr_mt_shd_crawl_event_t);
+
+ if (!priv->shd.crawl_events) {
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+
+}
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
index ce91ffba7..689dd84e6 100644
--- a/xlators/cluster/afr/src/afr-dir-read.c
+++ b/xlators/cluster/afr/src/afr-dir-read.c
@@ -253,7 +253,7 @@ unlock:
goto out;
if (!afr_is_opendir_done (this, local->fd->inode) &&
- up_children_count > 1) {
+ up_children_count > 1 && priv->entry_self_heal) {
/*
* This is the first opendir on this inode. We need
@@ -358,85 +358,6 @@ struct entry_name {
struct list_head list;
};
-
-static gf_boolean_t
-remembered_name (const char *name, struct list_head *entries)
-{
- struct entry_name *e = NULL;
- gf_boolean_t ret = _gf_false;
-
- list_for_each_entry (e, entries, list) {
- if (!strcmp (name, e->name)) {
- ret = _gf_true;
- goto out;
- }
- }
-
-out:
- return ret;
-}
-
-
-static void
-afr_remember_entries (gf_dirent_t *entries, fd_t *fd)
-{
- struct entry_name *n = NULL;
- gf_dirent_t *entry = NULL;
- int ret = 0;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
-
- ret = fd_ctx_get (fd, THIS, &ctx);
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_INFO,
- "could not get fd ctx for fd=%p", fd);
- return;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- list_for_each_entry (entry, &entries->list, list) {
- n = GF_CALLOC (1, sizeof (*n), gf_afr_mt_entry_name);
- n->name = gf_strdup (entry->d_name);
- INIT_LIST_HEAD (&n->list);
-
- list_add (&n->list, &fd_ctx->entries);
- }
-}
-
-
-static off_t
-afr_filter_entries (gf_dirent_t *entries, fd_t *fd)
-{
- gf_dirent_t *entry = NULL;
- gf_dirent_t *tmp = NULL;
- int ret = 0;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- off_t offset = 0;
-
- ret = fd_ctx_get (fd, THIS, &ctx);
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_INFO,
- "could not get fd ctx for fd=%p", fd);
- return -1;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- offset = entry->d_off;
-
- if (remembered_name (entry->d_name, &fd_ctx->entries)) {
- list_del (&entry->list);
- GF_FREE (entry);
- }
- }
-
- return offset;
-}
-
-
static void
afr_forget_entries (fd_t *fd)
{
@@ -462,14 +383,36 @@ afr_forget_entries (fd_t *fd)
}
}
+static void
+afr_readdir_filter_trash_dir (gf_dirent_t *entries, fd_t *fd)
+{
+ gf_dirent_t * entry = NULL;
+ gf_dirent_t * tmp = NULL;
+
+ list_for_each_entry_safe (entry, tmp, &entries->list, list) {
+ if (__is_root_gfid (fd->inode->gfid) &&
+ !strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR)) {
+ list_del_init (&entry->list);
+ GF_FREE (entry);
+ }
+ }
+}
int32_t
afr_readdir_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
gf_dirent_t *entries, dict_t *xdata)
{
- AFR_STACK_UNWIND (readdir, frame, op_ret, op_errno, entries, NULL);
+ afr_local_t *local = NULL;
+
+ if (op_ret == -1)
+ goto out;
+
+ local = frame->local;
+ afr_readdir_filter_trash_dir (entries, local->fd);
+out:
+ AFR_STACK_UNWIND (readdir, frame, op_ret, op_errno, entries, NULL);
return 0;
}
@@ -479,113 +422,16 @@ afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int32_t next_call_child = -1;
- int ret = 0;
- int32_t *last_index = NULL;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- off_t offset = 0;
- int32_t call_child = -1;
+ afr_local_t *local = NULL;
- priv = this->private;
- children = priv->children;
+ if (op_ret == -1)
+ goto out;
local = frame->local;
-
- read_child = (long) cookie;
- last_index = &local->cont.readdir.last_index;
- fresh_children = local->fresh_children;
-
- /* the value of the last_index changes if afr_next_call_child is
- * called. So to find the call_child of this callback use last_index
- * before the next_call_child call.
- */
- if (*last_index == -1)
- call_child = read_child;
- else
- call_child = fresh_children[*last_index];
-
- if (priv->strict_readdir) {
- ret = fd_ctx_get (local->fd, this, &ctx);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "could not get fd ctx for fd=%p", local->fd);
- op_ret = -1;
- op_errno = -ret;
- goto out;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- if (op_ret == -1) {
- next_call_child = afr_next_call_child (fresh_children,
- local->child_up,
- priv->child_count,
- last_index,
- read_child);
- if (next_call_child < 0)
- goto out;
- gf_log (this->name, GF_LOG_TRACE,
- "starting readdir afresh on child %d, offset %"PRId64,
- next_call_child, (uint64_t) 0);
-
- fd_ctx->failed_over = _gf_true;
-
- STACK_WIND_COOKIE (frame, afr_readdirp_cbk,
- (void *) (long) read_child,
- children[next_call_child],
- children[next_call_child]->fops->readdirp,
- local->fd,
- local->cont.readdir.size, 0,
- local->cont.readdir.dict);
- return 0;
- }
- }
-
- if (priv->strict_readdir) {
- if (fd_ctx->failed_over) {
- if (list_empty (&entries->list)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no entries found");
- goto out;
- }
-
- offset = afr_filter_entries (entries, local->fd);
-
- afr_remember_entries (entries, local->fd);
-
- if (list_empty (&entries->list)) {
- /* All the entries we got were duplicate. We
- shouldn't send an empty list now, because
- that will make the application stop reading. So
- try to get more entries */
-
- gf_log (this->name, GF_LOG_TRACE,
- "trying to fetch non-duplicate entries "
- "from offset %"PRId64", child %s",
- offset, children[call_child]->name);
-
- STACK_WIND_COOKIE (frame, afr_readdirp_cbk,
- (void *) (long) read_child,
- children[call_child],
- children[call_child]->fops->readdirp,
- local->fd, local->cont.readdir.size, offset,
- local->cont.readdir.dict);
- return 0;
- }
- } else {
- afr_remember_entries (entries, local->fd);
- }
- }
+ afr_readdir_filter_trash_dir (entries, local->fd);
out:
AFR_STACK_UNWIND (readdirp, frame, op_ret, op_errno, entries, NULL);
-
return 0;
}
@@ -593,15 +439,14 @@ int32_t
afr_do_readdir (call_frame_t *frame, xlator_t *this,
fd_t *fd, size_t size, off_t offset, int whichop, dict_t *dict)
{
- afr_private_t * priv = NULL;
- xlator_t ** children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- int ret = -1;
- int32_t op_errno = 0;
- uint64_t read_child = 0;
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ int call_child = 0;
+ afr_local_t *local = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int ret = -1;
+ int32_t op_errno = 0;
+ uint64_t read_child = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -626,42 +471,32 @@ afr_do_readdir (call_frame_t *frame, xlator_t *this,
read_child = afr_inode_get_read_ctx (this, fd->inode,
local->fresh_children);
ret = afr_get_call_child (this, local->child_up, read_child,
- local->fresh_children,
- &call_child,
- &local->cont.readdir.last_index);
+ local->fresh_children,
+ &call_child,
+ &local->cont.readdir.last_index);
if (ret < 0) {
op_errno = -ret;
goto out;
}
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx) {
+ op_errno = EBADF;
+ goto out;
+ }
+
+ if ((offset == 0) || (fd_ctx->call_child == -1)) {
+ fd_ctx->call_child = call_child;
+ } else if ((priv->readdir_failover == _gf_false) &&
+ (call_child != fd_ctx->call_child)) {
+ op_errno = EBADF;
+ goto out;
+ }
+
local->fd = fd_ref (fd);
local->cont.readdir.size = size;
local->cont.readdir.dict = (dict)? dict_ref (dict) : NULL;
- if (priv->strict_readdir) {
- ret = fd_ctx_get (fd, this, &ctx);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "could not get fd ctx for fd=%p", fd);
- op_errno = -ret;
- goto out;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- if (fd_ctx->last_tried != call_child) {
- gf_log (this->name, GF_LOG_TRACE,
- "first up child has changed from %d to %d, "
- "restarting readdir from offset 0",
- fd_ctx->last_tried, call_child);
-
- fd_ctx->failed_over = _gf_true;
- offset = 0;
- }
-
- fd_ctx->last_tried = call_child;
- }
-
if (whichop == GF_FOP_READDIR)
STACK_WIND_COOKIE (frame, afr_readdir_cbk,
(void *) (long) call_child,
@@ -675,10 +510,9 @@ afr_do_readdir (call_frame_t *frame, xlator_t *this,
children[call_child]->fops->readdirp, fd,
size, offset, dict);
- ret = 0;
+ return 0;
out:
- if (ret < 0)
- AFR_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL);
+ AFR_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL);
return 0;
}
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
index 0b804bef5..1943b719b 100644
--- a/xlators/cluster/afr/src/afr-dir-write.c
+++ b/xlators/cluster/afr/src/afr-dir-write.c
@@ -67,12 +67,195 @@ afr_build_parent_loc (loc_t *parent, loc_t *child, int32_t *op_errno)
ret = 0;
out:
- if (child_path)
- GF_FREE(child_path);
+ GF_FREE(child_path);
return ret;
}
+void
+__dir_entry_fop_common_cbk (call_frame_t *frame, int child_index,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, struct iatt *prenewparent,
+ struct iatt *postnewparent)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (afr_fop_failed (op_ret, op_errno))
+ afr_transaction_fop_failed (frame, this, child_index);
+
+ if (op_ret > -1) {
+ local->op_ret = op_ret;
+
+ if ((local->success_count == 0) ||
+ (child_index == local->read_child_index)) {
+ local->cont.dir_fop.preparent = *preparent;
+ local->cont.dir_fop.postparent = *postparent;
+ if (buf)
+ local->cont.dir_fop.buf = *buf;
+ if (prenewparent)
+ local->cont.dir_fop.prenewparent = *prenewparent;
+ if (postnewparent)
+ local->cont.dir_fop.postnewparent = *postnewparent;
+ }
+
+ local->cont.dir_fop.inode = inode;
+
+ local->fresh_children[local->success_count] = child_index;
+ local->success_count++;
+ local->child_errno[child_index] = 0;
+ } else {
+ local->child_errno[child_index] = op_errno;
+ }
+
+ local->op_errno = op_errno;
+}
+
+int
+afr_mark_new_entry_changelog_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ dict_t *xattr, dict_t *xdata)
+{
+ int call_count = 0;
+
+ call_count = afr_frame_return (frame);
+ if (call_count == 0) {
+ AFR_STACK_DESTROY (frame);
+ }
+ return 0;
+}
+
+void
+afr_mark_new_entry_changelog (call_frame_t *frame, xlator_t *this)
+{
+ call_frame_t *new_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_local_t *new_local = NULL;
+ afr_private_t *priv = NULL;
+ dict_t **xattr = NULL;
+ int32_t **changelog = NULL;
+ int i = 0;
+ GF_UNUSED int op_errno = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ new_frame = copy_frame (frame);
+ if (!new_frame) {
+ goto out;
+ }
+
+ AFR_LOCAL_ALLOC_OR_GOTO (new_frame->local, out);
+ new_local = new_frame->local;
+ changelog = afr_matrix_create (priv->child_count, AFR_NUM_CHANGE_LOGS);
+ if (!changelog)
+ goto out;
+
+ xattr = GF_CALLOC (priv->child_count, sizeof (*xattr),
+ gf_afr_mt_dict_t);
+ if (!xattr)
+ goto out;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_errno[i])
+ continue;
+ xattr[i] = dict_new ();
+ if (!xattr[i])
+ goto out;
+ }
+
+ afr_prepare_new_entry_pending_matrix (changelog,
+ afr_is_errno_set,
+ local->child_errno,
+ &local->cont.dir_fop.buf,
+ priv->child_count);
+
+ new_local->pending = changelog;
+ uuid_copy (new_local->loc.gfid, local->cont.dir_fop.buf.ia_gfid);
+ new_local->loc.inode = inode_ref (local->cont.dir_fop.inode);
+ new_local->call_count = local->success_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_errno[i])
+ continue;
+
+ afr_set_pending_dict (priv, xattr[i], changelog, i, LOCAL_LAST);
+ STACK_WIND_COOKIE (new_frame, afr_mark_new_entry_changelog_cbk,
+ (void *) (long) i, priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &new_local->loc, GF_XATTROP_ADD_ARRAY,
+ xattr[i], NULL);
+ }
+ new_frame = NULL;
+out:
+ if (new_frame)
+ AFR_STACK_DESTROY (new_frame);
+ afr_xattr_array_destroy (xattr, priv->child_count);
+ return;
+}
+
+gf_boolean_t
+afr_is_new_entry_changelog_needed (glusterfs_fop_t fop)
+{
+ glusterfs_fop_t fops[] = {GF_FOP_CREATE, GF_FOP_MKNOD, GF_FOP_NULL};
+ int i = 0;
+
+ for (i = 0; fops[i] != GF_FOP_NULL; i++) {
+ if (fop == fops[i])
+ return _gf_true;
+ }
+ return _gf_false;
+}
+
+void
+afr_dir_fop_mark_entry_pending_changelog (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (local->op_ret < 0)
+ goto out;
+
+ if (local->success_count == priv->child_count)
+ goto out;
+
+ if (!afr_is_new_entry_changelog_needed (local->op))
+ goto out;
+
+ afr_mark_new_entry_changelog (frame, this);
+
+out:
+ return;
+}
+
+void
+afr_dir_fop_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (local->cont.dir_fop.inode == NULL)
+ goto done;
+ afr_set_read_ctx_from_policy (this, local->cont.dir_fop.inode,
+ local->fresh_children,
+ local->read_child_index,
+ priv->read_child,
+ local->cont.dir_fop.buf.ia_gfid);
+done:
+ local->transaction.unwind (frame, this);
+ afr_dir_fop_mark_entry_pending_changelog (frame, this);
+ local->transaction.resume (frame, this);
+}
+
/* {{{ create */
int
@@ -80,7 +263,6 @@ afr_create_unwind (call_frame_t *frame, xlator_t *this)
{
call_frame_t *main_frame = NULL;
afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
local = frame->local;
@@ -94,19 +276,14 @@ afr_create_unwind (call_frame_t *frame, xlator_t *this)
UNLOCK (&frame->lock);
if (main_frame) {
- if (local->cont.create.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.create.read_child_buf;
- } else {
- unwind_buf = &local->cont.create.buf;
- }
-
AFR_STACK_UNWIND (create, main_frame,
local->op_ret, local->op_errno,
local->cont.create.fd,
- local->cont.create.inode,
- unwind_buf, &local->cont.create.preparent,
- &local->cont.create.postparent,
- NULL);
+ local->cont.dir_fop.inode,
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ local->xdata_rsp);
}
return 0;
@@ -121,29 +298,20 @@ afr_create_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
dict_t *xdata)
{
afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
uint64_t ctx = 0;
afr_fd_ctx_t *fd_ctx = NULL;
int ret = 0;
int call_count = -1;
int child_index = -1;
- int32_t *fresh_children = NULL;
local = frame->local;
- priv = this->private;
child_index = (long) cookie;
LOCK (&frame->lock);
{
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
+ if (op_ret > -1) {
ret = afr_fd_ctx_set (this, fd);
-
if (ret < 0) {
gf_log (this->name, GF_LOG_ERROR,
"could not set ctx on fd=%p", fd);
@@ -154,7 +322,6 @@ afr_create_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
ret = fd_ctx_get (fd, this, &ctx);
-
if (ret < 0) {
gf_log (this->name, GF_LOG_ERROR,
"could not get fd ctx for fd=%p", fd);
@@ -168,23 +335,14 @@ afr_create_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
fd_ctx->flags = local->cont.create.flags;
- if (local->success_count == 0)
- local->cont.create.buf = *buf;
-
- if (child_index == local->read_child_index) {
- local->cont.create.read_child_buf = *buf;
- local->cont.create.preparent = *preparent;
- local->cont.create.postparent = *postparent;
- }
-
- local->cont.create.inode = inode;
-
- fresh_children = local->fresh_children;
- fresh_children[local->success_count] = child_index;
- local->success_count++;
+ if (local->success_count == 0) {
+ if (xdata)
+ local->xdata_rsp = dict_ref(xdata);
+ }
}
-
- local->op_errno = op_errno;
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, inode, buf,
+ preparent, postparent, NULL, NULL);
}
unlock:
@@ -192,16 +350,8 @@ unlock:
call_count = afr_frame_return (frame);
- if (call_count == 0) {
- afr_set_read_ctx_from_policy (this, inode,
- local->fresh_children,
- local->read_child_index,
- priv->read_child,
- local->cont.create.buf.ia_gfid);
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
return 0;
}
@@ -269,11 +419,12 @@ afr_create (call_frame_t *frame, xlator_t *this,
loc_t *loc, int32_t flags, mode_t mode,
mode_t umask, fd_t *fd, dict_t *params)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -305,6 +456,7 @@ afr_create (call_frame_t *frame, xlator_t *this,
}
UNLOCK (&priv->read_child_lock);
+ local->op = GF_FOP_CREATE;
local->cont.create.flags = flags;
local->cont.create.mode = mode;
local->cont.create.fd = fd_ref (fd);
@@ -323,8 +475,22 @@ afr_create (call_frame_t *frame, xlator_t *this,
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
ret = 0;
out:
@@ -347,7 +513,6 @@ afr_mknod_unwind (call_frame_t *frame, xlator_t *this)
{
call_frame_t *main_frame = NULL;
afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
local = frame->local;
@@ -361,17 +526,12 @@ afr_mknod_unwind (call_frame_t *frame, xlator_t *this)
UNLOCK (&frame->lock);
if (main_frame) {
- if (local->cont.mknod.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.mknod.read_child_buf;
- } else {
- unwind_buf = &local->cont.mknod.buf;
- }
-
AFR_STACK_UNWIND (mknod, main_frame,
local->op_ret, local->op_errno,
- local->cont.mknod.inode,
- unwind_buf, &local->cont.mknod.preparent,
- &local->cont.mknod.postparent,
+ local->cont.dir_fop.inode,
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
NULL);
}
@@ -385,57 +545,23 @@ afr_mknod_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt *buf, struct iatt *preparent,
struct iatt *postparent, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
int call_count = -1;
int child_index = -1;
- int32_t *fresh_children = NULL;
-
- local = frame->local;
- priv = this->private;
child_index = (long) cookie;
LOCK (&frame->lock);
{
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0)
- local->cont.mknod.buf = *buf;
-
- if (child_index == local->read_child_index) {
- local->cont.mknod.read_child_buf = *buf;
- local->cont.mknod.preparent = *preparent;
- local->cont.mknod.postparent = *postparent;
- }
-
- local->cont.mknod.inode = inode;
-
- fresh_children = local->fresh_children;
- fresh_children[local->success_count] = child_index;
- local->success_count++;
- }
-
- local->op_errno = op_errno;
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, inode, buf,
+ preparent, postparent, NULL, NULL);
}
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
- if (call_count == 0) {
- afr_set_read_ctx_from_policy (this, inode,
- local->fresh_children,
- local->read_child_index,
- priv->read_child,
- local->cont.mknod.buf.ia_gfid);
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
return 0;
}
@@ -498,11 +624,12 @@ int
afr_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
dev_t dev, mode_t umask, dict_t *params)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -534,6 +661,7 @@ afr_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
}
UNLOCK (&priv->read_child_lock);
+ local->op = GF_FOP_MKNOD;
local->cont.mknod.mode = mode;
local->cont.mknod.dev = dev;
local->umask = umask;
@@ -551,8 +679,22 @@ afr_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
ret = 0;
out:
@@ -576,7 +718,6 @@ afr_mkdir_unwind (call_frame_t *frame, xlator_t *this)
{
call_frame_t *main_frame = NULL;
afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
local = frame->local;
@@ -590,17 +731,12 @@ afr_mkdir_unwind (call_frame_t *frame, xlator_t *this)
UNLOCK (&frame->lock);
if (main_frame) {
- if (local->cont.mkdir.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.mkdir.read_child_buf;
- } else {
- unwind_buf = &local->cont.mkdir.buf;
- }
-
AFR_STACK_UNWIND (mkdir, main_frame,
local->op_ret, local->op_errno,
- local->cont.mkdir.inode,
- unwind_buf, &local->cont.mkdir.preparent,
- &local->cont.mkdir.postparent,
+ local->cont.dir_fop.inode,
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
NULL);
}
@@ -614,57 +750,23 @@ afr_mkdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt *buf, struct iatt *preparent,
struct iatt *postparent, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
int call_count = -1;
int child_index = -1;
- int32_t *fresh_children = NULL;
-
- local = frame->local;
- priv = this->private;
child_index = (long) cookie;
LOCK (&frame->lock);
{
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0)
- local->cont.mkdir.buf = *buf;
-
- if (child_index == local->read_child_index) {
- local->cont.mkdir.read_child_buf = *buf;
- local->cont.mkdir.preparent = *preparent;
- local->cont.mkdir.postparent = *postparent;
- }
-
- local->cont.mkdir.inode = inode;
-
- fresh_children = local->fresh_children;
- fresh_children[local->success_count] = child_index;
- local->success_count++;
- }
-
- local->op_errno = op_errno;
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, inode, buf,
+ preparent, postparent, NULL, NULL);
}
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
- if (call_count == 0) {
- afr_set_read_ctx_from_policy (this, inode,
- local->fresh_children,
- local->read_child_index,
- priv->read_child,
- local->cont.mkdir.buf.ia_gfid);
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
return 0;
}
@@ -723,16 +825,16 @@ afr_mkdir_done (call_frame_t *frame, xlator_t *this)
return 0;
}
-
int
afr_mkdir (call_frame_t *frame, xlator_t *this,
loc_t *loc, mode_t mode, mode_t umask, dict_t *params)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -769,6 +871,7 @@ afr_mkdir (call_frame_t *frame, xlator_t *this,
if (params)
local->xdata_req = dict_ref (params);
+ local->op = GF_FOP_MKDIR;
local->transaction.fop = afr_mkdir_wind;
local->transaction.done = afr_mkdir_done;
local->transaction.unwind = afr_mkdir_unwind;
@@ -780,8 +883,22 @@ afr_mkdir (call_frame_t *frame, xlator_t *this,
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
ret = 0;
out:
@@ -806,7 +923,6 @@ afr_link_unwind (call_frame_t *frame, xlator_t *this)
{
call_frame_t *main_frame = NULL;
afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
local = frame->local;
@@ -820,17 +936,12 @@ afr_link_unwind (call_frame_t *frame, xlator_t *this)
UNLOCK (&frame->lock);
if (main_frame) {
- if (local->cont.link.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.link.read_child_buf;
- } else {
- unwind_buf = &local->cont.link.buf;
- }
-
AFR_STACK_UNWIND (link, main_frame,
local->op_ret, local->op_errno,
- local->cont.link.inode,
- unwind_buf, &local->cont.link.preparent,
- &local->cont.link.postparent,
+ local->cont.dir_fop.inode,
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
NULL);
}
@@ -844,58 +955,23 @@ afr_link_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt *buf, struct iatt *preparent,
struct iatt *postparent, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
int call_count = -1;
int child_index = -1;
- int32_t *fresh_children = NULL;
-
- local = frame->local;
- priv = this->private;
child_index = (long) cookie;
LOCK (&frame->lock);
{
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0) {
- local->cont.link.buf = *buf;
- }
-
- if (child_index == local->read_child_index) {
- local->cont.link.read_child_buf = *buf;
- local->cont.link.preparent = *preparent;
- local->cont.link.postparent = *postparent;
- }
-
- local->cont.link.inode = inode;
-
- fresh_children = local->fresh_children;
- fresh_children[local->success_count] = child_index;
- local->success_count++;
- }
-
- local->op_errno = op_errno;
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, inode, buf,
+ preparent, postparent, NULL, NULL);
}
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
- if (call_count == 0) {
- afr_set_read_ctx_from_policy (this, inode,
- local->fresh_children,
- local->read_child_index,
- priv->read_child,
- local->cont.link.buf.ia_gfid);
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
return 0;
}
@@ -924,7 +1000,8 @@ afr_link_wind (call_frame_t *frame, xlator_t *this)
for (i = 0; i < priv->child_count; i++) {
if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_link_wind_cbk, (void *) (long) i,
+ STACK_WIND_COOKIE (frame, afr_link_wind_cbk,
+ (void *) (long) i,
priv->children[i],
priv->children[i]->fops->link,
&local->loc,
@@ -956,11 +1033,12 @@ int
afr_link (call_frame_t *frame, xlator_t *this,
loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -995,6 +1073,7 @@ afr_link (call_frame_t *frame, xlator_t *this,
}
UNLOCK (&priv->read_child_lock);
+ local->op = GF_FOP_LINK;
local->transaction.fop = afr_link_wind;
local->transaction.done = afr_link_done;
local->transaction.unwind = afr_link_unwind;
@@ -1006,9 +1085,22 @@ afr_link (call_frame_t *frame, xlator_t *this,
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME (newloc->path);
+ int_lock = &local->internal_lock;
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
ret = 0;
out:
if (ret < 0) {
@@ -1031,7 +1123,6 @@ afr_symlink_unwind (call_frame_t *frame, xlator_t *this)
{
call_frame_t *main_frame = NULL;
afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
local = frame->local;
@@ -1045,17 +1136,12 @@ afr_symlink_unwind (call_frame_t *frame, xlator_t *this)
UNLOCK (&frame->lock);
if (main_frame) {
- if (local->cont.symlink.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.symlink.read_child_buf;
- } else {
- unwind_buf = &local->cont.symlink.buf;
- }
-
AFR_STACK_UNWIND (symlink, main_frame,
local->op_ret, local->op_errno,
- local->cont.symlink.inode,
- unwind_buf, &local->cont.symlink.preparent,
- &local->cont.symlink.postparent,
+ local->cont.dir_fop.inode,
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
NULL);
}
@@ -1069,57 +1155,23 @@ afr_symlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt *buf, struct iatt *preparent,
struct iatt *postparent, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
int call_count = -1;
int child_index = -1;
- int32_t *fresh_children = NULL;
-
- local = frame->local;
- priv = this->private;
child_index = (long) cookie;
LOCK (&frame->lock);
{
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0)
- local->cont.symlink.buf = *buf;
-
- if (child_index == local->read_child_index) {
- local->cont.symlink.read_child_buf = *buf;
- local->cont.symlink.preparent = *preparent;
- local->cont.symlink.postparent = *postparent;
- }
-
- local->cont.symlink.inode = inode;
-
- fresh_children = local->fresh_children;
- fresh_children[local->success_count] = child_index;
- local->success_count++;
- }
-
- local->op_errno = op_errno;
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, inode, buf,
+ preparent, postparent, NULL, NULL);
}
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
- if (call_count == 0) {
- afr_set_read_ctx_from_policy (this, inode,
- local->fresh_children,
- local->read_child_index,
- priv->read_child,
- local->cont.symlink.buf.ia_gfid);
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
return 0;
}
@@ -1184,11 +1236,12 @@ int
afr_symlink (call_frame_t *frame, xlator_t *this,
const char *linkpath, loc_t *loc, mode_t umask, dict_t *params)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -1225,6 +1278,7 @@ afr_symlink (call_frame_t *frame, xlator_t *this,
if (params)
local->xdata_req = dict_ref (params);
+ local->op = GF_FOP_SYMLINK;
local->transaction.fop = afr_symlink_wind;
local->transaction.done = afr_symlink_done;
local->transaction.unwind = afr_symlink_unwind;
@@ -1236,8 +1290,22 @@ afr_symlink (call_frame_t *frame, xlator_t *this,
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
ret = 0;
out:
@@ -1260,7 +1328,6 @@ afr_rename_unwind (call_frame_t *frame, xlator_t *this)
{
call_frame_t *main_frame = NULL;
afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
local = frame->local;
@@ -1274,19 +1341,13 @@ afr_rename_unwind (call_frame_t *frame, xlator_t *this)
UNLOCK (&frame->lock);
if (main_frame) {
- if (local->cont.rename.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.rename.read_child_buf;
- } else {
- unwind_buf = &local->cont.rename.buf;
- }
-
AFR_STACK_UNWIND (rename, main_frame,
local->op_ret, local->op_errno,
- unwind_buf,
- &local->cont.rename.preoldparent,
- &local->cont.rename.postoldparent,
- &local->cont.rename.prenewparent,
- &local->cont.rename.postnewparent,
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ &local->cont.dir_fop.prenewparent,
+ &local->cont.dir_fop.postnewparent,
NULL);
}
@@ -1313,38 +1374,22 @@ afr_rename_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
{
if (afr_fop_failed (op_ret, op_errno) && op_errno != ENOTEMPTY)
afr_transaction_fop_failed (frame, this, child_index);
+ local->op_errno = op_errno;
+ local->child_errno[child_index] = op_errno;
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
-
- if (buf) {
- local->cont.rename.buf = *buf;
- }
-
- local->success_count++;
- }
-
- if (child_index == local->read_child_index) {
- local->cont.rename.read_child_buf = *buf;
-
- local->cont.rename.preoldparent = *preoldparent;
- local->cont.rename.postoldparent = *postoldparent;
- local->cont.rename.prenewparent = *prenewparent;
- local->cont.rename.postnewparent = *postnewparent;
- }
- }
+ if (op_ret > -1)
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, NULL, buf,
+ preoldparent, postoldparent,
+ prenewparent, postnewparent);
- local->op_errno = op_errno;
}
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
- local->transaction.resume (frame, this);
- }
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
return 0;
}
@@ -1405,11 +1450,13 @@ int
afr_rename (call_frame_t *frame, xlator_t *this,
loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
+ int nlockee = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -1437,6 +1484,7 @@ afr_rename (call_frame_t *frame, xlator_t *this,
local->read_child_index = afr_inode_get_read_ctx (this, oldloc->inode, NULL);
+ local->op = GF_FOP_RENAME;
local->transaction.fop = afr_rename_wind;
local->transaction.done = afr_rename_done;
local->transaction.unwind = afr_rename_unwind;
@@ -1453,8 +1501,44 @@ afr_rename (call_frame_t *frame, xlator_t *this,
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME (oldloc->path);
local->transaction.new_basename = AFR_BASENAME (newloc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = nlockee = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
+ &local->transaction.new_parent_loc,
+ local->transaction.new_basename,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ nlockee++;
+ ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ nlockee++;
+ if (local->newloc.inode && IA_ISDIR (local->newloc.inode->ia_type)) {
+ ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
+ &local->newloc,
+ NULL,
+ priv->child_count);
+ if (ret)
+ goto out;
- afr_transaction (transaction_frame, this, AFR_ENTRY_RENAME_TRANSACTION);
+ nlockee++;
+ }
+ qsort (int_lock->lockee, nlockee, sizeof (*int_lock->lockee),
+ afr_entry_lockee_cmp);
+ int_lock->lockee_count = nlockee;
+
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_RENAME_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
ret = 0;
out:
@@ -1493,8 +1577,8 @@ afr_unlink_unwind (call_frame_t *frame, xlator_t *this)
if (main_frame) {
AFR_STACK_UNWIND (unlink, main_frame,
local->op_ret, local->op_errno,
- &local->cont.unlink.preparent,
- &local->cont.unlink.postparent,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
NULL);
}
@@ -1518,36 +1602,15 @@ afr_unlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (child_index == local->read_child_index) {
local->read_child_returned = _gf_true;
}
-
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.unlink.preparent = *preparent;
- local->cont.unlink.postparent = *postparent;
- }
-
- if (child_index == local->read_child_index) {
- local->cont.unlink.preparent = *preparent;
- local->cont.unlink.postparent = *postparent;
- }
-
- local->success_count++;
- }
-
- local->op_errno = op_errno;
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, NULL, NULL,
+ preparent, postparent, NULL, NULL);
}
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
return 0;
}
@@ -1609,11 +1672,12 @@ int32_t
afr_unlink (call_frame_t *frame, xlator_t *this,
loc_t *loc, int xflag, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -1641,6 +1705,7 @@ afr_unlink (call_frame_t *frame, xlator_t *this,
if (xdata)
local->xdata_req = dict_ref (xdata);
+ local->op = GF_FOP_UNLINK;
local->transaction.fop = afr_unlink_wind;
local->transaction.done = afr_unlink_done;
local->transaction.unwind = afr_unlink_unwind;
@@ -1652,8 +1717,22 @@ afr_unlink (call_frame_t *frame, xlator_t *this,
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
ret = 0;
out:
@@ -1693,8 +1772,8 @@ afr_rmdir_unwind (call_frame_t *frame, xlator_t *this)
if (main_frame) {
AFR_STACK_UNWIND (rmdir, main_frame,
local->op_ret, local->op_errno,
- &local->cont.rmdir.preparent,
- &local->cont.rmdir.postparent,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
NULL);
}
@@ -1719,36 +1798,22 @@ afr_rmdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (child_index == read_child) {
local->read_child_returned = _gf_true;
}
-
if (afr_fop_failed (op_ret, op_errno) && (op_errno != ENOTEMPTY))
afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.rmdir.preparent = *preparent;
- local->cont.rmdir.postparent = *postparent;
-
- }
-
- if (child_index == read_child) {
- local->cont.rmdir.preparent = *preparent;
- local->cont.rmdir.postparent = *postparent;
- }
-
- local->success_count++;
- }
-
local->op_errno = op_errno;
+ local->child_errno[child_index] = op_errno;
+ if (op_ret > -1)
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, NULL, NULL,
+ preparent, postparent, NULL,
+ NULL);
+
}
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
- local->transaction.resume (frame, this);
- }
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
return 0;
}
@@ -1810,11 +1875,13 @@ int
afr_rmdir (call_frame_t *frame, xlator_t *this,
loc_t *loc, int flags, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
+ int nlockee = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -1840,6 +1907,7 @@ afr_rmdir (call_frame_t *frame, xlator_t *this,
local->cont.rmdir.flags = flags;
loc_copy (&local->loc, loc);
+ local->op = GF_FOP_RMDIR;
local->transaction.fop = afr_rmdir_wind;
local->transaction.done = afr_rmdir_done;
local->transaction.unwind = afr_rmdir_unwind;
@@ -1851,8 +1919,34 @@ afr_rmdir (call_frame_t *frame, xlator_t *this,
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ int_lock->lockee_count = nlockee = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ nlockee++;
+ ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
+ &local->loc,
+ NULL,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ nlockee++;
+ qsort (int_lock->lockee, nlockee, sizeof (*int_lock->lockee),
+ afr_entry_lockee_cmp);
+ int_lock->lockee_count = nlockee;
+
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
ret = 0;
out:
diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c
index 40e5abd3a..e06e3b2f2 100644
--- a/xlators/cluster/afr/src/afr-inode-read.c
+++ b/xlators/cluster/afr/src/afr-inode-read.c
@@ -117,6 +117,8 @@ afr_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
children = priv->children;
+ AFR_SBRAIN_CHECK_LOC (loc, out);
+
AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
local = frame->local;
@@ -232,6 +234,8 @@ afr_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
children = priv->children;
+ AFR_SBRAIN_CHECK_LOC (loc, out);
+
AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
local = frame->local;
@@ -348,6 +352,8 @@ afr_fstat (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (fd->inode, out);
+ AFR_SBRAIN_CHECK_FD (fd, out);
+
AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
local = frame->local;
@@ -377,11 +383,8 @@ afr_fstat (call_frame_t *frame, xlator_t *this,
local->fd = fd_ref (fd);
- ret = afr_open_fd_fix (frame, this, _gf_false);
- if (ret) {
- op_errno = -ret;
- goto out;
- }
+ afr_open_fd_fix (fd, this);
+
STACK_WIND_COOKIE (frame, afr_fstat_cbk, (void *) (long) call_child,
children[call_child],
children[call_child]->fops->fstat,
@@ -470,6 +473,8 @@ afr_readlink (call_frame_t *frame, xlator_t *this,
children = priv->children;
+ AFR_SBRAIN_CHECK_LOC (loc, out);
+
AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
local = frame->local;
@@ -521,7 +526,7 @@ struct _xattr_key {
};
-void
+int
__gather_xattr_keys (dict_t *dict, char *key, data_t *value,
void *data)
{
@@ -533,13 +538,14 @@ __gather_xattr_keys (dict_t *dict, char *key, data_t *value,
xkey = GF_CALLOC (1, sizeof (*xkey), gf_afr_mt_xattr_key);
if (!xkey)
- return;
+ return -1;
xkey->key = key;
INIT_LIST_HEAD (&xkey->list);
list_add_tail (&xkey->list, list);
}
+ return 0;
}
@@ -628,6 +634,96 @@ afr_getxattr_unwind (call_frame_t *frame, int op_ret, int op_errno,
}
int32_t
+afr_fgetxattr_clrlk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ dict_t *xattr = NULL;
+ char *tmp_report = NULL;
+ char lk_summary[1024] = {0,};
+ int serz_len = 0;
+ int32_t callcnt = 0;
+ long int cky = 0;
+ int ret = 0;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+ cky = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+ if (op_ret == -1)
+ local->child_errno[cky] = op_errno;
+
+ if (!local->dict)
+ local->dict = dict_new ();
+ if (local->dict) {
+ ret = dict_get_str (dict, local->cont.getxattr.name,
+ &tmp_report);
+ if (ret)
+ goto unlock;
+ ret = dict_set_dynstr (local->dict,
+ children[cky]->name,
+ gf_strdup (tmp_report));
+ if (ret)
+ goto unlock;
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ xattr = dict_new ();
+ if (!xattr) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ ret = dict_serialize_value_with_delim (local->dict,
+ lk_summary,
+ &serz_len, '\n');
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error serializing dictionary");
+ goto unwind;
+ }
+ if (serz_len == -1)
+ snprintf (lk_summary, sizeof (lk_summary),
+ "No locks cleared.");
+ ret = dict_set_dynstr (xattr, local->cont.getxattr.name,
+ gf_strdup (lk_summary));
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error setting dictionary");
+ goto unwind;
+ }
+
+ unwind:
+ // Updating child_errno with more recent 'events'
+ local->child_errno[cky] = op_errno;
+ op_errno = afr_resultant_errno_get (NULL, local->child_errno,
+ priv->child_count);
+ AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, xattr,
+ xdata);
+
+ if (xattr)
+ dict_unref (xattr);
+ }
+
+ return ret;
+}
+
+int32_t
afr_getxattr_clrlk_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
dict_t *dict, dict_t *xdata)
@@ -761,12 +857,364 @@ afr_getxattr_node_uuid_cbk (call_frame_t *frame, void *cookie,
unwind:
if (unwind)
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, NULL);
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict,
+ NULL);
return 0;
}
int32_t
+afr_getxattr_lockinfo_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ int call_cnt = 0, len = 0;
+ char *lockinfo_buf = NULL;
+ dict_t *lockinfo = NULL, *newdict = NULL;
+ afr_local_t *local = NULL;
+
+ LOCK (&frame->lock);
+ {
+ local = frame->local;
+
+ call_cnt = --local->call_count;
+
+ if ((op_ret < 0) || (!dict && !xdata)) {
+ goto unlock;
+ }
+
+ if (xdata) {
+ if (!local->xdata_rsp) {
+ local->xdata_rsp = dict_new ();
+ if (!local->xdata_rsp) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+ }
+
+ if (!dict) {
+ goto unlock;
+ }
+
+ op_ret = dict_get_ptr_and_len (dict, GF_XATTR_LOCKINFO_KEY,
+ (void **)&lockinfo_buf, &len);
+
+ if (!lockinfo_buf) {
+ goto unlock;
+ }
+
+ if (!local->dict) {
+ local->dict = dict_new ();
+ if (!local->dict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ if (lockinfo_buf != NULL) {
+ lockinfo = dict_new ();
+ if (lockinfo == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ } else {
+ op_ret = dict_unserialize (lockinfo_buf, len,
+ &lockinfo);
+
+ if (lockinfo && local->dict) {
+ dict_copy (lockinfo, local->dict);
+ }
+ }
+ }
+
+ if (xdata && local->xdata_rsp) {
+ dict_copy (xdata, local->xdata_rsp);
+ }
+
+ if (!call_cnt) {
+ newdict = dict_new ();
+ if (!newdict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ len = dict_serialized_length (local->dict);
+ if (len == 0) {
+ goto unwind;
+ }
+
+ lockinfo_buf = GF_CALLOC (1, len, gf_common_mt_char);
+ if (!lockinfo_buf) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ op_ret = dict_serialize (local->dict, lockinfo_buf);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -op_ret;
+ }
+
+ op_ret = dict_set_dynptr (newdict, GF_XATTR_LOCKINFO_KEY,
+ (void *)lockinfo_buf, len);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -op_ret;
+ goto unwind;
+ }
+
+ unwind:
+ AFR_STACK_UNWIND (getxattr, frame, op_ret,
+ op_errno, newdict,
+ local->xdata_rsp);
+ }
+
+ dict_unref (lockinfo);
+
+ return 0;
+}
+
+int32_t
+afr_fgetxattr_lockinfo_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ int call_cnt = 0, len = 0;
+ char *lockinfo_buf = NULL;
+ dict_t *lockinfo = NULL, *newdict = NULL;
+ afr_local_t *local = NULL;
+
+ LOCK (&frame->lock);
+ {
+ local = frame->local;
+
+ call_cnt = --local->call_count;
+
+ if ((op_ret < 0) || (!dict && !xdata)) {
+ goto unlock;
+ }
+
+ if (xdata) {
+ if (!local->xdata_rsp) {
+ local->xdata_rsp = dict_new ();
+ if (!local->xdata_rsp) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+ }
+
+ if (!dict) {
+ goto unlock;
+ }
+
+ op_ret = dict_get_ptr_and_len (dict, GF_XATTR_LOCKINFO_KEY,
+ (void **)&lockinfo_buf, &len);
+
+ if (!lockinfo_buf) {
+ goto unlock;
+ }
+
+ if (!local->dict) {
+ local->dict = dict_new ();
+ if (!local->dict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ if (lockinfo_buf != NULL) {
+ lockinfo = dict_new ();
+ if (lockinfo == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ } else {
+ op_ret = dict_unserialize (lockinfo_buf, len,
+ &lockinfo);
+
+ if (lockinfo && local->dict) {
+ dict_copy (lockinfo, local->dict);
+ }
+ }
+ }
+
+ if (xdata && local->xdata_rsp) {
+ dict_copy (xdata, local->xdata_rsp);
+ }
+
+ if (!call_cnt) {
+ newdict = dict_new ();
+ if (!newdict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ len = dict_serialized_length (local->dict);
+ if (len <= 0) {
+ goto unwind;
+ }
+
+ lockinfo_buf = GF_CALLOC (1, len, gf_common_mt_char);
+ if (!lockinfo_buf) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ op_ret = dict_serialize (local->dict, lockinfo_buf);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -op_ret;
+ }
+
+ op_ret = dict_set_dynptr (newdict, GF_XATTR_LOCKINFO_KEY,
+ (void *)lockinfo_buf, len);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -op_ret;
+ goto unwind;
+ }
+
+ unwind:
+ AFR_STACK_UNWIND (fgetxattr, frame, op_ret,
+ op_errno, newdict,
+ local->xdata_rsp);
+ }
+
+ dict_unref (lockinfo);
+
+ return 0;
+}
+
+int32_t
+afr_fgetxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int32_t callcnt = 0;
+ int ret = 0;
+ char *xattr = NULL;
+ char *xattr_serz = NULL;
+ char xattr_cky[1024] = {0,};
+ dict_t *nxattr = NULL;
+ long cky = 0;
+ int32_t padding = 0;
+ int32_t tlen = 0;
+
+ if (!frame || !frame->local || !this) {
+ gf_log ("", GF_LOG_ERROR, "possible NULL deref");
+ goto out;
+ }
+
+ local = frame->local;
+ cky = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (!dict || (op_ret < 0))
+ goto out;
+
+ if (!local->dict)
+ local->dict = dict_new ();
+
+ if (local->dict) {
+ ret = dict_get_str (dict,
+ local->cont.getxattr.name,
+ &xattr);
+ if (ret)
+ goto out;
+
+ xattr = gf_strdup (xattr);
+
+ (void)snprintf (xattr_cky, 1024, "%s-%ld",
+ local->cont.getxattr.name, cky);
+ ret = dict_set_dynstr (local->dict,
+ xattr_cky, xattr);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Cannot set xattr cookie key");
+ goto out;
+ }
+
+ local->cont.getxattr.xattr_len
+ += strlen (xattr) + 1;
+ }
+ }
+out:
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ if (!local->cont.getxattr.xattr_len)
+ goto unwind;
+
+ nxattr = dict_new ();
+ if (!nxattr)
+ goto unwind;
+
+ /* extra bytes for decorations (brackets and <>'s) */
+ padding += strlen (this->name)
+ + strlen (AFR_PATHINFO_HEADER) + 4;
+ local->cont.getxattr.xattr_len += (padding + 2);
+
+ xattr_serz = GF_CALLOC (local->cont.getxattr.xattr_len,
+ sizeof (char), gf_common_mt_char);
+
+ if (!xattr_serz)
+ goto unwind;
+
+ /* the xlator info */
+ (void) sprintf (xattr_serz, "(<"AFR_PATHINFO_HEADER"%s> ",
+ this->name);
+
+ /* actual series of pathinfo */
+ ret = dict_serialize_value_with_delim (local->dict,
+ xattr_serz
+ + strlen (xattr_serz),
+ &tlen, ' ');
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Error serializing"
+ " dictionary");
+ goto unwind;
+ }
+
+ /* closing part */
+ *(xattr_serz + padding + tlen) = ')';
+ *(xattr_serz + padding + tlen + 1) = '\0';
+
+ ret = dict_set_dynstr (nxattr, local->cont.getxattr.name,
+ xattr_serz);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "Cannot set pathinfo"
+ " key in dict");
+
+ unwind:
+ AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, nxattr,
+ xdata);
+
+ if (nxattr)
+ dict_unref (nxattr);
+ }
+
+ return ret;
+}
+
+int32_t
afr_getxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
dict_t *dict, dict_t *xdata)
@@ -878,8 +1326,65 @@ afr_getxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,
return ret;
}
+static int
+afr_aggregate_stime_xattr (dict_t *this, char *key, data_t *value, void *data)
+{
+ int ret = 0;
+
+ if (fnmatch (GF_XATTR_STIME_PATTERN, key, FNM_NOESCAPE) == 0)
+ ret = gf_get_min_stime (THIS, data, key, value);
+
+ return ret;
+}
+
+int32_t
+afr_common_getxattr_stime_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int32_t callcnt = 0;
+
+ if (!frame || !frame->local || !this) {
+ gf_log ("", GF_LOG_ERROR, "possible NULL deref");
+ goto out;
+ }
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (!dict || (op_ret < 0)) {
+ local->op_errno = op_errno;
+ goto cleanup;
+ }
+
+ if (!local->dict)
+ local->dict = dict_copy_with_ref (dict, NULL);
+ else
+ dict_foreach (dict, afr_aggregate_stime_xattr,
+ local->dict);
+ local->op_ret = 0;
+ }
+
+cleanup:
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ AFR_STACK_UNWIND (getxattr, frame, local->op_ret,
+ local->op_errno, local->dict, xdata);
+ }
+
+out:
+ return 0;
+}
+
+
static gf_boolean_t
-afr_is_special_xattr (const char *name, fop_getxattr_cbk_t *cbk)
+afr_is_special_xattr (const char *name, fop_getxattr_cbk_t *cbk,
+ gf_boolean_t is_fgetxattr)
{
gf_boolean_t is_spl = _gf_true;
@@ -889,14 +1394,31 @@ afr_is_special_xattr (const char *name, fop_getxattr_cbk_t *cbk)
goto out;
}
- if (!strcmp (name, GF_XATTR_PATHINFO_KEY))
- *cbk = afr_getxattr_pathinfo_cbk;
-
- else if (!strncmp (name, GF_XATTR_CLRLK_CMD,
- strlen (GF_XATTR_CLRLK_CMD)))
- *cbk = afr_getxattr_clrlk_cbk;
- else
+ if (!strcmp (name, GF_XATTR_PATHINFO_KEY)) {
+ if (is_fgetxattr) {
+ *cbk = afr_fgetxattr_pathinfo_cbk;
+ } else {
+ *cbk = afr_getxattr_pathinfo_cbk;
+ }
+ } else if (!strncmp (name, GF_XATTR_CLRLK_CMD,
+ strlen (GF_XATTR_CLRLK_CMD))) {
+ if (is_fgetxattr) {
+ *cbk = afr_fgetxattr_clrlk_cbk;
+ } else {
+ *cbk = afr_getxattr_clrlk_cbk;
+ }
+ } else if (!strncmp (name, GF_XATTR_LOCKINFO_KEY,
+ strlen (GF_XATTR_LOCKINFO_KEY))) {
+ if (is_fgetxattr) {
+ *cbk = afr_fgetxattr_lockinfo_cbk;
+ } else {
+ *cbk = afr_getxattr_lockinfo_cbk;
+ }
+ } else if (fnmatch (GF_XATTR_STIME_PATTERN, name, FNM_NOESCAPE) == 0) {
+ *cbk = afr_common_getxattr_stime_cbk;
+ } else {
is_spl = _gf_false;
+ }
out:
return is_spl;
@@ -942,7 +1464,7 @@ afr_getxattr (call_frame_t *frame, xlator_t *this,
int32_t read_child = -1;
int ret = -1;
fop_getxattr_cbk_t cbk = NULL;
-
+ int afr_xtime_gauge[MCNT_MAX] = {0,};
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -953,6 +1475,8 @@ afr_getxattr (call_frame_t *frame, xlator_t *this,
children = priv->children;
+ AFR_SBRAIN_CHECK_LOC (loc, out);
+
AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
local = frame->local;
@@ -975,7 +1499,7 @@ afr_getxattr (call_frame_t *frame, xlator_t *this,
goto out;
}
if ((strcmp (GF_XATTR_MARKER_KEY, name) == 0)
- && (-1 == frame->root->pid)) {
+ && (GF_CLIENT_PID_GSYNCD == frame->root->pid)) {
local->marker.call_count = priv->child_count;
@@ -991,6 +1515,7 @@ afr_getxattr (call_frame_t *frame, xlator_t *this,
sub_volumes,
priv->child_count,
MARKER_UUID_TYPE,
+ marker_uuid_default_gauge,
priv->vol_uuid)) {
gf_log (this->name, GF_LOG_INFO,
@@ -1007,7 +1532,7 @@ afr_getxattr (call_frame_t *frame, xlator_t *this,
* if we are doing getxattr with pathinfo as the key then we
* collect information from all childs
*/
- if (afr_is_special_xattr (name, &cbk)) {
+ if (afr_is_special_xattr (name, &cbk, 0)) {
afr_getxattr_frm_all_children (this, frame, name,
loc, cbk);
return 0;
@@ -1025,10 +1550,11 @@ afr_getxattr (call_frame_t *frame, xlator_t *this,
if (*priv->vol_uuid) {
if ((match_uuid_local (name, priv->vol_uuid) == 0)
- && (-1 == frame->root->pid)) {
+ && (GF_CLIENT_PID_GSYNCD == frame->root->pid)) {
local->marker.call_count = priv->child_count;
- sub_volumes = alloca ( priv->child_count * sizeof (xlator_t *));
+ sub_volumes = alloca ( priv->child_count
+ * sizeof (xlator_t *));
for (i = 0, trav = this->children; trav ;
trav = trav->next, i++) {
@@ -1036,12 +1562,20 @@ afr_getxattr (call_frame_t *frame, xlator_t *this,
}
+ /* don't err out on getting ENOTCONN (brick down)
+ * from a subset of the bricks
+ */
+ memcpy (afr_xtime_gauge, marker_xtime_default_gauge,
+ sizeof (afr_xtime_gauge));
+ afr_xtime_gauge[MCNT_NOTFOUND] = 0;
+ afr_xtime_gauge[MCNT_ENOTCONN] = 0;
if (cluster_getmarkerattr (frame, this, loc,
name, local,
afr_getxattr_unwind,
sub_volumes,
priv->child_count,
MARKER_XTIME_TYPE,
+ afr_xtime_gauge,
priv->vol_uuid)) {
gf_log (this->name, GF_LOG_INFO,
"%s: failed to get marker attr (%s)",
@@ -1061,7 +1595,8 @@ no_name:
goto out;
}
- read_child = afr_inode_get_read_ctx (this, loc->inode, local->fresh_children);
+ read_child = afr_inode_get_read_ctx (this, loc->inode,
+ local->fresh_children);
ret = afr_get_call_child (this, local->child_up, read_child,
local->fresh_children,
&call_child,
@@ -1149,18 +1684,44 @@ afr_fgetxattr_unwind (call_frame_t *frame,
return 0;
}
+static void
+afr_fgetxattr_frm_all_children (xlator_t *this, call_frame_t *frame,
+ const char *name, fd_t *fd,
+ fop_fgetxattr_cbk_t cbk)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ xlator_t **children = NULL;
+ int i = 0;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+ local->call_count = priv->child_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ STACK_WIND_COOKIE (frame, cbk,
+ (void *) (long) i,
+ children[i], children[i]->fops->fgetxattr,
+ fd, name, NULL);
+ }
+
+ return;
+}
+
int32_t
afr_fgetxattr (call_frame_t *frame, xlator_t *this,
fd_t *fd, const char *name, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- xlator_t **children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- int32_t read_child = -1;
-
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ int call_child = 0;
+ afr_local_t *local = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int32_t read_child = -1;
+ fop_fgetxattr_cbk_t cbk = NULL;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -1171,6 +1732,8 @@ afr_fgetxattr (call_frame_t *frame, xlator_t *this,
children = priv->children;
+ AFR_SBRAIN_CHECK_FD (fd, out);
+
AFR_LOCAL_ALLOC_OR_GOTO (local, out);
frame->local = local;
@@ -1184,7 +1747,17 @@ afr_fgetxattr (call_frame_t *frame, xlator_t *this,
if (name)
local->cont.getxattr.name = gf_strdup (name);
- /* pathinfo gets handled only in getxattr() */
+ /* pathinfo gets handled only in getxattr(), but we need to handle
+ * lockinfo.
+ * If we are doing fgetxattr with lockinfo as the key then we
+ * collect information from all children.
+ */
+ if (afr_is_special_xattr (name, &cbk, 1)) {
+ afr_fgetxattr_frm_all_children (this, frame, name,
+ fd, cbk);
+ return 0;
+ }
+
local->fresh_children = afr_children_create (priv->child_count);
if (!local->fresh_children) {
@@ -1192,7 +1765,8 @@ afr_fgetxattr (call_frame_t *frame, xlator_t *this,
goto out;
}
- read_child = afr_inode_get_read_ctx (this, fd->inode, local->fresh_children);
+ read_child = afr_inode_get_read_ctx (this, fd->inode,
+ local->fresh_children);
op_ret = afr_get_call_child (this, local->child_up, read_child,
local->fresh_children,
&call_child,
@@ -1212,7 +1786,8 @@ afr_fgetxattr (call_frame_t *frame, xlator_t *this,
op_ret = 0;
out:
if (op_ret == -1) {
- AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, NULL, NULL);
+ AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, NULL,
+ NULL);
}
return 0;
}
@@ -1314,6 +1889,8 @@ afr_readv (call_frame_t *frame, xlator_t *this,
priv = this->private;
children = priv->children;
+ AFR_SBRAIN_CHECK_FD (fd, out);
+
AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
local = frame->local;
@@ -1343,11 +1920,8 @@ afr_readv (call_frame_t *frame, xlator_t *this,
local->cont.readv.offset = offset;
local->cont.readv.flags = flags;
- ret = afr_open_fd_fix (frame, this, _gf_false);
- if (ret) {
- op_errno = -ret;
- goto out;
- }
+ afr_open_fd_fix (fd, this);
+
STACK_WIND_COOKIE (frame, afr_readv_cbk,
(void *) (long) call_child,
children[call_child],
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
index 02e36b13a..c1ec69a55 100644
--- a/xlators/cluster/afr/src/afr-inode-write.c
+++ b/xlators/cluster/afr/src/afr-inode-write.c
@@ -39,34 +39,133 @@
#include "afr-transaction.h"
#include "afr-self-heal-common.h"
+void
+__inode_write_fop_cbk (call_frame_t *frame, int child_index, int read_child,
+ xlator_t *this, int32_t *op_ret, int32_t *op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (afr_fop_failed (*op_ret, *op_errno)) {
+ local->child_errno[child_index] = *op_errno;
+
+ switch (local->op) {
+ case GF_FOP_TRUNCATE:
+ case GF_FOP_FTRUNCATE:
+ if (*op_errno != EFBIG)
+ afr_transaction_fop_failed (frame, this,
+ child_index);
+ break;
+ default:
+ afr_transaction_fop_failed (frame, this, child_index);
+ break;
+ }
+ local->op_errno = *op_errno;
+ goto out;
+ }
+
+ if ((local->success_count == 0) || (read_child == child_index)) {
+ local->op_ret = *op_ret;
+ if (prebuf)
+ local->cont.inode_wfop.prebuf = *prebuf;
+ if (postbuf)
+ local->cont.inode_wfop.postbuf = *postbuf;
+ }
+
+ local->success_count++;
+out:
+ return;
+}
+
/* {{{ writev */
-int
+void
+afr_writev_copy_outvars (call_frame_t *src_frame, call_frame_t *dst_frame)
+{
+ afr_local_t *src_local = NULL;
+ afr_local_t *dst_local = NULL;
+
+ src_local = src_frame->local;
+ dst_local = dst_frame->local;
+
+ dst_local->op_ret = src_local->op_ret;
+ dst_local->op_errno = src_local->op_errno;
+ dst_local->cont.inode_wfop.prebuf = src_local->cont.inode_wfop.prebuf;
+ dst_local->cont.inode_wfop.postbuf = src_local->cont.inode_wfop.postbuf;
+}
+
+void
afr_writev_unwind (call_frame_t *frame, xlator_t *this)
{
afr_local_t * local = NULL;
- call_frame_t *main_frame = NULL;
+ local = frame->local;
+
+ AFR_STACK_UNWIND (writev, frame,
+ local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ NULL);
+}
+
+call_frame_t*
+afr_transaction_detach_fop_frame (call_frame_t *frame)
+{
+ afr_local_t * local = NULL;
+ call_frame_t *fop_frame = NULL;
local = frame->local;
LOCK (&frame->lock);
{
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
+ fop_frame = local->transaction.main_frame;
local->transaction.main_frame = NULL;
}
UNLOCK (&frame->lock);
- if (main_frame) {
- AFR_STACK_UNWIND (writev, main_frame,
- local->op_ret, local->op_errno,
- &local->cont.writev.prebuf,
- &local->cont.writev.postbuf,
- NULL);
+ return fop_frame;
+}
+
+int
+afr_transaction_writev_unwind (call_frame_t *frame, xlator_t *this)
+{
+ call_frame_t *fop_frame = NULL;
+
+ fop_frame = afr_transaction_detach_fop_frame (frame);
+
+ if (fop_frame) {
+ afr_writev_copy_outvars (frame, fop_frame);
+ afr_writev_unwind (fop_frame, this);
}
return 0;
}
+static void
+afr_writev_handle_short_writes (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+ /*
+ * We already have the best case result of the writev calls staged
+ * as the return value. Any writev that returns some value less
+ * than the best case is now out of sync, so mark the fop as
+ * failed. Note that fops that have returned with errors have
+ * already been marked as failed.
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if ((!local->replies[i].valid) ||
+ (local->replies[i].op_ret == -1))
+ continue;
+
+ if (local->replies[i].op_ret < local->op_ret)
+ afr_transaction_fop_failed(frame, this, i);
+ }
+}
int
afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
@@ -74,11 +173,17 @@ afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt *postbuf, dict_t *xdata)
{
afr_local_t * local = NULL;
+ afr_private_t *priv = NULL;
+ call_frame_t *fop_frame = NULL;
int child_index = (long) cookie;
int call_count = -1;
int read_child = 0;
+ int ret = 0;
+ uint32_t open_fd_count = 0;
+ uint32_t write_is_append = 0;
local = frame->local;
+ priv = this->private;
read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
@@ -88,32 +193,81 @@ afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->read_child_returned = _gf_true;
}
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.writev.prebuf = *prebuf;
- local->cont.writev.postbuf = *postbuf;
- }
-
- if (child_index == read_child) {
- local->cont.writev.prebuf = *prebuf;
- local->cont.writev.postbuf = *postbuf;
+ __inode_write_fop_cbk (frame, child_index, read_child, this,
+ &op_ret, &op_errno, prebuf, postbuf,
+ xdata);
+
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+
+
+ /* stage the best case return value for unwind */
+ if ((local->success_count == 0) || (op_ret > local->op_ret)) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ }
+
+ if (op_ret != -1) {
+ if (xdata) {
+ ret = dict_get_uint32 (xdata,
+ GLUSTERFS_OPEN_FD_COUNT,
+ &open_fd_count);
+ if ((ret == 0) &&
+ (open_fd_count > local->open_fd_count)) {
+ local->open_fd_count = open_fd_count;
+ local->update_open_fd_count = _gf_true;
+ }
+
+ write_is_append = 0;
+ ret = dict_get_uint32 (xdata,
+ GLUSTERFS_WRITE_IS_APPEND,
+ &write_is_append);
+ if (ret || !write_is_append)
+ local->append_write = _gf_false;
}
- }
- local->op_errno = op_errno;
+ }
}
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
if (call_count == 0) {
- local->transaction.unwind (frame, this);
- local->transaction.resume (frame, this);
+ if (local->update_open_fd_count)
+ afr_handle_open_fd_count (frame, this);
+
+ if (!local->stable_write && !local->append_write)
+ /* An appended write removes the necessity to
+ fsync() the file. This is because self-heal
+ has the logic to check for larger file when
+ the xattrs are not reliably pointing at
+ a stale file.
+ */
+ afr_fd_report_unstable_write (this, local->fd);
+
+ afr_writev_handle_short_writes (frame, this);
+ if (afr_any_fops_failed (local, priv)) {
+ //Don't unwind until post-op is complete
+ local->transaction.resume (frame, this);
+ } else {
+ /*
+ * Generally inode-write fops do transaction.unwind then
+ * transaction.resume, but writev needs to make sure that
+ * delayed post-op frame is placed in fdctx before unwind
+ * happens. This prevents the race of flush doing the
+ * changelog wakeup first in fuse thread and then this
+ * writev placing its delayed post-op frame in fdctx.
+ * This helps flush make sure all the delayed post-ops are
+ * completed.
+ */
+
+ fop_frame = afr_transaction_detach_fop_frame (frame);
+ afr_writev_copy_outvars (frame, fop_frame);
+ local->transaction.resume (frame, this);
+ afr_writev_unwind (fop_frame, this);
+ }
}
return 0;
}
@@ -125,6 +279,8 @@ afr_writev_wind (call_frame_t *frame, xlator_t *this)
afr_private_t *priv = NULL;
int i = 0;
int call_count = -1;
+ dict_t *xdata = NULL;
+ GF_UNUSED int ret = 0;
local = frame->local;
priv = this->private;
@@ -138,6 +294,28 @@ afr_writev_wind (call_frame_t *frame, xlator_t *this)
}
local->call_count = call_count;
+ local->replies = GF_CALLOC(priv->child_count, sizeof(*local->replies),
+ gf_afr_mt_reply_t);
+ if (!local->replies) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ local->transaction.unwind(frame, this);
+ local->transaction.resume(frame, this);
+ return 0;
+ }
+
+ xdata = dict_new ();
+ if (xdata) {
+ ret = dict_set_uint32 (xdata, GLUSTERFS_OPEN_FD_COUNT,
+ sizeof (uint32_t));
+ ret = dict_set_uint32 (xdata, GLUSTERFS_WRITE_IS_APPEND,
+ 0);
+ /* Set append_write to be true speculatively. If on any
+ server it turns not be true, we unset it in the
+ callback.
+ */
+ local->append_write = _gf_true;
+ }
for (i = 0; i < priv->child_count; i++) {
if (local->transaction.pre_op[i]) {
@@ -151,13 +329,16 @@ afr_writev_wind (call_frame_t *frame, xlator_t *this)
local->cont.writev.offset,
local->cont.writev.flags,
local->cont.writev.iobref,
- NULL);
+ xdata);
if (!--call_count)
break;
}
}
+ if (xdata)
+ dict_unref (xdata);
+
return 0;
}
@@ -197,7 +378,7 @@ afr_do_writev (call_frame_t *frame, xlator_t *this)
}
transaction_frame->local = local;
- frame->local = NULL;
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
local->op = GF_FOP_WRITE;
@@ -205,10 +386,17 @@ afr_do_writev (call_frame_t *frame, xlator_t *this)
local->transaction.fop = afr_writev_wind;
local->transaction.done = afr_writev_done;
- local->transaction.unwind = afr_writev_unwind;
+ local->transaction.unwind = afr_transaction_writev_unwind;
local->transaction.main_frame = frame;
if (local->fd->flags & O_APPEND) {
+ /*
+ * Backend vfs ignores the 'offset' for append mode fd so
+ * locking just the region provided for the writev does not
+ * give consistency gurantee. The actual write may happen at a
+ * completely different range than the one provided by the
+ * offset, len in the fop. So lock the entire file.
+ */
local->transaction.start = 0;
local->transaction.len = 0;
} else {
@@ -217,11 +405,15 @@ afr_do_writev (call_frame_t *frame, xlator_t *this)
local->cont.writev.count);
}
- afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ op_ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
op_ret = 0;
out:
- if (op_ret == -1) {
+ if (op_ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
AFR_STACK_UNWIND (writev, frame, op_ret, op_errno, NULL, NULL, NULL);
@@ -230,153 +422,74 @@ out:
return 0;
}
-static int
-afr_prepare_loc (call_frame_t *frame, fd_t *fd)
+static void
+afr_trigger_open_fd_self_heal (fd_t *fd, xlator_t *this)
{
- afr_local_t *local = NULL;
- char *name = NULL;
- char *path = NULL;
- int ret = 0;
-
- if ((!fd) || (!fd->inode))
- return -1;
-
- local = frame->local;
- ret = inode_path (fd->inode, NULL, (char **)&path);
- if (ret <= 0) {
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "Unable to get path for gfid: %s",
- uuid_utoa (fd->inode->gfid));
- return -1;
- }
-
- if (local->loc.path) {
- if (strcmp (path, local->loc.path))
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "overwriting old loc->path %s with %s",
- local->loc.path, path);
- GF_FREE ((char *)local->loc.path);
- }
- local->loc.path = path;
-
- name = strrchr (local->loc.path, '/');
- if (name)
- name++;
- local->loc.name = name;
-
- if (local->loc.inode) {
- inode_unref (local->loc.inode);
- }
- local->loc.inode = inode_ref (fd->inode);
-
- if (local->loc.parent) {
- inode_unref (local->loc.parent);
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ char *reason = NULL;
+ int32_t op_errno = 0;
+ int ret = 0;
+
+ if (!fd || !fd->inode || uuid_is_null (fd->inode->gfid)) {
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "Invalid args: "
+ "fd: %p, inode: %p", fd,
+ fd ? fd->inode : NULL);
+ goto out;
}
- local->loc.parent = inode_parent (local->loc.inode, 0, NULL);
-
- return 0;
-}
-
-afr_fd_paused_call_t*
-afr_paused_call_create (call_frame_t *frame)
-{
- afr_local_t *local = NULL;
- afr_fd_paused_call_t *paused_call = NULL;
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ goto out;
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
local = frame->local;
- GF_ASSERT (local->fop_call_continue);
-
- paused_call = GF_CALLOC (1, sizeof (*paused_call),
- gf_afr_fd_paused_call_t);
- if (paused_call) {
- INIT_LIST_HEAD (&paused_call->call_list);
- paused_call->frame = frame;
- }
-
- return paused_call;
-}
-
-static int
-afr_pause_fd_fop (call_frame_t *frame, xlator_t *this, afr_fd_ctx_t *fd_ctx)
-{
- afr_fd_paused_call_t *paused_call = NULL;
- int ret = 0;
-
- paused_call = afr_paused_call_create (frame);
- if (paused_call)
- list_add (&paused_call->call_list, &fd_ctx->paused_calls);
- else
- ret = -ENOMEM;
-
- return ret;
-}
+ ret = afr_local_init (local, this->private, &op_errno);
+ if (ret < 0)
+ goto out;
-static void
-afr_trigger_open_fd_self_heal (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- inode_t *inode = NULL;
- char *reason = NULL;
+ local->loc.inode = inode_ref (fd->inode);
+ ret = loc_path (&local->loc, NULL);
+ if (ret < 0)
+ goto out;
- local = frame->local;
sh = &local->self_heal;
- inode = local->fd->inode;
-
- sh->do_missing_entry_self_heal = _gf_true;
- sh->do_gfid_self_heal = _gf_true;
- sh->do_data_self_heal = _gf_true;
+ sh->do_metadata_self_heal = _gf_true;
+ if (fd->inode->ia_type == IA_IFREG)
+ sh->do_data_self_heal = _gf_true;
+ else if (fd->inode->ia_type == IA_IFDIR)
+ sh->do_entry_self_heal = _gf_true;
reason = "subvolume came online";
- afr_launch_self_heal (frame, this, inode, _gf_true, inode->ia_type,
- reason, NULL, NULL);
+ afr_launch_self_heal (frame, this, fd->inode, _gf_true,
+ fd->inode->ia_type, reason, NULL, NULL);
+ return;
+out:
+ AFR_STACK_DESTROY (frame);
}
-int
-afr_open_fd_fix (call_frame_t *frame, xlator_t *this, gf_boolean_t pause_fop)
-{
- int ret = 0;
- int i = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- inode_t *inode = NULL;
- gf_boolean_t need_self_heal = _gf_false;
- int *need_open = NULL;
- int need_open_count = 0;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- gf_boolean_t fop_continue = _gf_true;
+void
+afr_open_fd_fix (fd_t *fd, xlator_t *this)
+{
+ int ret = 0;
+ int i = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ gf_boolean_t need_self_heal = _gf_false;
+ int *need_open = NULL;
+ size_t need_open_count = 0;
+ afr_private_t *priv = NULL;
- local = frame->local;
priv = this->private;
- GF_ASSERT (local->fd);
-
- inode = local->fd->inode;
- //gfid is not set in rebalance, that case needs to be handled.
- if (fd_is_anonymous (local->fd) ||
- !inode || uuid_is_null (inode->gfid)) {
- fop_continue = _gf_true;
- goto out;
- }
-
- if (pause_fop)
- GF_ASSERT (local->fop_call_continue);
-
- ret = afr_prepare_loc (frame, local->fd);
- if (ret < 0) {
- //File does not exist we cant open it.
- ret = 0;
+ if (!afr_is_fd_fixable (fd))
goto out;
- }
- fd_ctx = afr_fd_ctx_get (local->fd, this);
- if (!fd_ctx) {
- ret = -EINVAL;
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx)
goto out;
- }
- LOCK (&local->fd->lock);
+ LOCK (&fd->lock);
{
if (fd_ctx->up_count < priv->up_count) {
need_self_heal = _gf_true;
@@ -384,56 +497,34 @@ afr_open_fd_fix (call_frame_t *frame, xlator_t *this, gf_boolean_t pause_fop)
fd_ctx->down_count = priv->down_count;
}
+ need_open = alloca (priv->child_count * sizeof (*need_open));
for (i = 0; i < priv->child_count; i++) {
- if ((fd_ctx->opened_on[i] == AFR_FD_NOT_OPENED) &&
- local->child_up[i]) {
- fd_ctx->opened_on[i] = AFR_FD_OPENING;
- if (!need_open)
- need_open = GF_CALLOC (priv->child_count,
- sizeof (*need_open),
- gf_afr_mt_int32_t);
- need_open[i] = 1;
- need_open_count++;
- } else if (pause_fop && local->child_up[i] &&
- (fd_ctx->opened_on[i] == AFR_FD_OPENING)) {
- local->fop_paused = _gf_true;
- }
- }
+ need_open[i] = 0;
+ if (fd_ctx->opened_on[i] != AFR_FD_NOT_OPENED)
+ continue;
+
+ if (!priv->child_up[i])
+ continue;
- if (local->fop_paused) {
- GF_ASSERT (pause_fop);
- gf_log (this->name, GF_LOG_INFO, "Pause fd %p",
- local->fd);
- ret = afr_pause_fd_fop (frame, this, fd_ctx);
- if (ret)
- goto unlock;
- fop_continue = _gf_false;
+ fd_ctx->opened_on[i] = AFR_FD_OPENING;
+
+ need_open[i] = 1;
+ need_open_count++;
}
}
-unlock:
- UNLOCK (&local->fd->lock);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to fix fd for %s",
- local->loc.path);
- fop_continue = _gf_false;
+ UNLOCK (&fd->lock);
+ if (ret)
goto out;
- }
if (need_self_heal)
- afr_trigger_open_fd_self_heal (frame, this);
+ afr_trigger_open_fd_self_heal (fd, this);
if (!need_open_count)
goto out;
- gf_log (this->name, GF_LOG_INFO, "Opening fd %p", local->fd);
- afr_fix_open (frame, this, fd_ctx, need_open_count, need_open);
- fop_continue = _gf_false;
+ afr_fix_open (this, fd, need_open_count, need_open);
out:
- if (need_open)
- GF_FREE (need_open);
- if (fop_continue && local->fop_call_continue)
- local->fop_call_continue (frame, this);
- return ret;
+ return;
}
int
@@ -452,6 +543,11 @@ afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
priv = this->private;
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
+ goto out;
+ }
+
QUORUM_CHECK(writev,out);
AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
@@ -468,13 +564,15 @@ afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
local->cont.writev.iobref = iobref_ref (iobref);
local->fd = fd_ref (fd);
- local->fop_call_continue = afr_do_writev;
- ret = afr_open_fd_fix (frame, this, _gf_true);
- if (ret) {
- op_errno = -ret;
- goto out;
- }
+ /* detect here, but set it in writev_wind_cbk *after* the unstable
+ write is performed
+ */
+ local->stable_write = !!((fd->flags|flags)&(O_SYNC|O_DSYNC));
+
+ afr_open_fd_fix (fd, this);
+
+ afr_do_writev (frame, this);
ret = 0;
out:
@@ -508,8 +606,8 @@ afr_truncate_unwind (call_frame_t *frame, xlator_t *this)
if (main_frame) {
AFR_STACK_UNWIND (truncate, main_frame, local->op_ret,
local->op_errno,
- &local->cont.truncate.prebuf,
- &local->cont.truncate.postbuf,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
NULL);
}
@@ -523,14 +621,11 @@ afr_truncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt *postbuf, dict_t *xdata)
{
afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
int child_index = (long) cookie;
int read_child = 0;
int call_count = -1;
- int need_unwind = 0;
local = frame->local;
- priv = this->private;
read_child = afr_inode_get_read_ctx (this, local->loc.inode, NULL);
@@ -540,38 +635,22 @@ afr_truncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->read_child_returned = _gf_true;
}
- if (afr_fop_failed (op_ret, op_errno) && op_errno != EFBIG)
- afr_transaction_fop_failed (frame, this, child_index);
-
if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.truncate.prebuf = *prebuf;
- local->cont.truncate.postbuf = *postbuf;
- }
-
- if (child_index == read_child) {
- local->cont.truncate.prebuf = *prebuf;
- local->cont.truncate.postbuf = *postbuf;
- }
-
- local->success_count++;
-
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
+ if (prebuf->ia_size != postbuf->ia_size)
+ local->stable_write = _gf_false;
}
- local->op_errno = op_errno;
+ __inode_write_fop_cbk (frame, child_index, read_child, this,
+ &op_ret, &op_errno, prebuf, postbuf,
+ xdata);
}
UNLOCK (&frame->lock);
- if (need_unwind)
- local->transaction.unwind (frame, this);
-
call_count = afr_frame_return (frame);
if (call_count == 0) {
+ if (local->stable_write && afr_txn_nothing_failed (frame, this))
+ local->transaction.unwind (frame, this);
+
local->transaction.resume (frame, this);
}
@@ -599,6 +678,7 @@ afr_truncate_wind (call_frame_t *frame, xlator_t *this)
}
local->call_count = call_count;
+ local->stable_write = _gf_true;
for (i = 0; i < priv->child_count; i++) {
if (local->transaction.pre_op[i]) {
@@ -677,7 +757,11 @@ afr_truncate (call_frame_t *frame, xlator_t *this,
local->transaction.start = offset;
local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
ret = 0;
out:
@@ -715,8 +799,8 @@ afr_ftruncate_unwind (call_frame_t *frame, xlator_t *this)
if (main_frame) {
AFR_STACK_UNWIND (ftruncate, main_frame, local->op_ret,
local->op_errno,
- &local->cont.ftruncate.prebuf,
- &local->cont.ftruncate.postbuf,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
NULL);
}
return 0;
@@ -729,14 +813,11 @@ afr_ftruncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt *postbuf, dict_t *xdata)
{
afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
int child_index = (long) cookie;
int call_count = -1;
- int need_unwind = 0;
int read_child = 0;
local = frame->local;
- priv = this->private;
read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
@@ -746,38 +827,22 @@ afr_ftruncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->read_child_returned = _gf_true;
}
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.ftruncate.prebuf = *prebuf;
- local->cont.ftruncate.postbuf = *postbuf;
- }
-
- if (child_index == read_child) {
- local->cont.ftruncate.prebuf = *prebuf;
- local->cont.ftruncate.postbuf = *postbuf;
- }
-
- local->success_count++;
-
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
+ if (prebuf->ia_size != postbuf->ia_size)
+ local->stable_write = _gf_false;
}
- local->op_errno = op_errno;
+ __inode_write_fop_cbk (frame, child_index, read_child, this,
+ &op_ret, &op_errno, prebuf, postbuf,
+ xdata);
}
UNLOCK (&frame->lock);
- if (need_unwind)
- local->transaction.unwind (frame, this);
-
call_count = afr_frame_return (frame);
if (call_count == 0) {
+ if (local->stable_write && afr_txn_nothing_failed (frame, this))
+ local->transaction.unwind (frame, this);
+
local->transaction.resume (frame, this);
}
@@ -805,6 +870,7 @@ afr_ftruncate_wind (call_frame_t *frame, xlator_t *this)
}
local->call_count = call_count;
+ local->stable_write = _gf_true;
for (i = 0; i < priv->child_count; i++) {
if (local->transaction.pre_op[i]) {
@@ -869,11 +935,15 @@ afr_do_ftruncate (call_frame_t *frame, xlator_t *this)
local->transaction.start = local->cont.ftruncate.offset;
local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ op_ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
op_ret = 0;
out:
- if (op_ret == -1) {
+ if (op_ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
AFR_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, NULL,
@@ -900,6 +970,10 @@ afr_ftruncate (call_frame_t *frame, xlator_t *this,
priv = this->private;
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
+ goto out;
+ }
QUORUM_CHECK(ftruncate,out);
AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
@@ -912,13 +986,10 @@ afr_ftruncate (call_frame_t *frame, xlator_t *this,
local->cont.ftruncate.offset = offset;
local->fd = fd_ref (fd);
- local->fop_call_continue = afr_do_ftruncate;
- ret = afr_open_fd_fix (frame, this, _gf_true);
- if (ret) {
- op_errno = -ret;
- goto out;
- }
+ afr_open_fd_fix (fd, this);
+
+ afr_do_ftruncate (frame, this);
ret = 0;
out:
@@ -954,8 +1025,8 @@ afr_setattr_unwind (call_frame_t *frame, xlator_t *this)
if (main_frame) {
AFR_STACK_UNWIND (setattr, main_frame, local->op_ret,
local->op_errno,
- &local->cont.setattr.preop_buf,
- &local->cont.setattr.postop_buf,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
NULL);
}
@@ -986,29 +1057,14 @@ afr_setattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->read_child_returned = _gf_true;
}
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.setattr.preop_buf = *preop;
- local->cont.setattr.postop_buf = *postop;
- }
-
- if (child_index == read_child) {
- local->cont.setattr.preop_buf = *preop;
- local->cont.setattr.postop_buf = *postop;
- }
-
- local->success_count++;
+ __inode_write_fop_cbk (frame, child_index, read_child, this,
+ &op_ret, &op_errno, preop, postop,
+ xdata);
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
+ if ((local->success_count >= priv->wait_count)
+ && local->read_child_returned) {
+ need_unwind = 1;
}
- local->op_errno = op_errno;
}
UNLOCK (&frame->lock);
@@ -1125,7 +1181,11 @@ afr_setattr (call_frame_t *frame, xlator_t *this,
local->transaction.start = LLONG_MAX - 1;
local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
ret = 0;
out:
@@ -1159,8 +1219,8 @@ afr_fsetattr_unwind (call_frame_t *frame, xlator_t *this)
if (main_frame) {
AFR_STACK_UNWIND (fsetattr, main_frame, local->op_ret,
local->op_errno,
- &local->cont.fsetattr.preop_buf,
- &local->cont.fsetattr.postop_buf,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
NULL);
}
@@ -1191,29 +1251,14 @@ afr_fsetattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->read_child_returned = _gf_true;
}
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.fsetattr.preop_buf = *preop;
- local->cont.fsetattr.postop_buf = *postop;
- }
-
- if (child_index == read_child) {
- local->cont.fsetattr.preop_buf = *preop;
- local->cont.fsetattr.postop_buf = *postop;
- }
+ __inode_write_fop_cbk (frame, child_index, read_child, this,
+ &op_ret, &op_errno, preop, postop,
+ xdata);
- local->success_count++;
-
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
+ if ((local->success_count >= priv->wait_count)
+ && local->read_child_returned) {
+ need_unwind = 1;
}
- local->op_errno = op_errno;
}
UNLOCK (&frame->lock);
@@ -1301,6 +1346,11 @@ afr_fsetattr (call_frame_t *frame, xlator_t *this,
priv = this->private;
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
+ goto out;
+ }
+
QUORUM_CHECK(fsetattr,out);
transaction_frame = copy_frame (frame);
@@ -1325,17 +1375,17 @@ afr_fsetattr (call_frame_t *frame, xlator_t *this,
local->fd = fd_ref (fd);
- ret = afr_open_fd_fix (transaction_frame, this, _gf_false);
- if (ret) {
- op_errno = -ret;
- goto out;
- }
+ afr_open_fd_fix (fd, this);
local->transaction.main_frame = frame;
local->transaction.start = LLONG_MAX - 1;
local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
ret = 0;
out:
@@ -1381,28 +1431,23 @@ int
afr_setxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int need_unwind = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int need_unwind = 0;
+ int child_index = (long) cookie;
local = frame->local;
priv = this->private;
LOCK (&frame->lock);
{
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- }
- local->success_count++;
-
- if (local->success_count == priv->child_count) {
- need_unwind = 1;
- }
+ __inode_write_fop_cbk (frame, child_index, -1, this,
+ &op_ret, &op_errno, NULL, NULL,
+ xdata);
+ if (local->success_count == priv->child_count) {
+ need_unwind = 1;
}
-
- local->op_errno = op_errno;
}
UNLOCK (&frame->lock);
@@ -1479,17 +1524,16 @@ afr_setxattr (call_frame_t *frame, xlator_t *this,
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
call_frame_t *transaction_frame = NULL;
- data_pair_t *trav = NULL;
int ret = -1;
int op_errno = EINVAL;
VALIDATE_OR_GOTO (this, out);
GF_IF_INTERNAL_XATTR_GOTO ("trusted.afr.*", dict,
- trav, op_errno, out);
+ op_errno, out);
GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.afr.*", dict,
- trav, op_errno, out);
+ op_errno, out);
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this->private, out);
@@ -1523,7 +1567,11 @@ afr_setxattr (call_frame_t *frame, xlator_t *this,
local->transaction.start = LLONG_MAX - 1;
local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
ret = 0;
out:
@@ -1568,28 +1616,24 @@ int
afr_fsetxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int need_unwind = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int need_unwind = 0;
+ int child_index = (long) cookie;
local = frame->local;
priv = this->private;
LOCK (&frame->lock);
{
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- }
- local->success_count++;
- if (local->success_count == priv->child_count) {
- need_unwind = 1;
- }
+ __inode_write_fop_cbk (frame, child_index, -1, this,
+ &op_ret, &op_errno, NULL, NULL,
+ xdata);
+ if (local->success_count == priv->child_count) {
+ need_unwind = 1;
}
-
- local->op_errno = op_errno;
}
UNLOCK (&frame->lock);
@@ -1668,23 +1712,24 @@ afr_fsetxattr (call_frame_t *frame, xlator_t *this,
call_frame_t *transaction_frame = NULL;
int ret = -1;
int op_errno = EINVAL;
- data_pair_t *trav = NULL;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
VALIDATE_OR_GOTO (this->private, out);
GF_IF_INTERNAL_XATTR_GOTO ("trusted.afr.*", dict,
- trav, op_errno, out);
+ op_errno, out);
GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.afr.*", dict,
- trav, op_errno, out);
-
- if (ret)
- goto out;
+ op_errno, out);
priv = this->private;
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
+ goto out;
+ }
+
QUORUM_CHECK(fsetxattr,out);
AFR_LOCAL_ALLOC_OR_GOTO (local, out);
@@ -1715,7 +1760,11 @@ afr_fsetxattr (call_frame_t *frame, xlator_t *this,
local->transaction.start = LLONG_MAX - 1;
local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
ret = 0;
out:
@@ -1763,28 +1812,23 @@ int
afr_removexattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int call_count = -1;
- int need_unwind = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int need_unwind = 0;
+ int child_index = (long) cookie;
local = frame->local;
priv = this->private;
LOCK (&frame->lock);
{
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- }
- local->success_count++;
-
- if (local->success_count == priv->wait_count) {
- need_unwind = 1;
- }
+ __inode_write_fop_cbk (frame, child_index, -1, this,
+ &op_ret, &op_errno, NULL, NULL,
+ xdata);
+ if (local->success_count == priv->wait_count) {
+ need_unwind = 1;
}
-
- local->op_errno = op_errno;
}
UNLOCK (&frame->lock);
@@ -1905,7 +1949,11 @@ afr_removexattr (call_frame_t *frame, xlator_t *this,
local->transaction.start = LLONG_MAX - 1;
local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
ret = 0;
out:
@@ -1948,28 +1996,24 @@ int
afr_fremovexattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int call_count = -1;
- int need_unwind = 0;
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ int call_count = -1;
+ int need_unwind = 0;
+ int child_index = (long) cookie;
local = frame->local;
priv = this->private;
LOCK (&frame->lock);
{
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- }
- local->success_count++;
+ __inode_write_fop_cbk (frame, child_index, -1, this,
+ &op_ret, &op_errno, NULL, NULL,
+ xdata);
- if (local->success_count == priv->wait_count) {
- need_unwind = 1;
- }
+ if (local->success_count == priv->wait_count) {
+ need_unwind = 1;
}
-
- local->op_errno = op_errno;
}
UNLOCK (&frame->lock);
@@ -2062,6 +2106,10 @@ afr_fremovexattr (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (this->private, out);
priv = this->private;
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
+ goto out;
+ }
QUORUM_CHECK(fremovexattr, out);
@@ -2094,11 +2142,15 @@ afr_fremovexattr (call_frame_t *frame, xlator_t *this,
local->transaction.start = LLONG_MAX - 1;
local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ op_ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
op_ret = 0;
out:
- if (op_ret == -1) {
+ if (op_ret < 0) {
if (transaction_frame)
AFR_STACK_DESTROY (transaction_frame);
AFR_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, NULL);
@@ -2106,3 +2158,704 @@ out:
return 0;
}
+
+static int
+afr_fallocate_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (fallocate, main_frame, local->op_ret,
+ local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ NULL);
+ }
+ return 0;
+}
+
+static int
+afr_fallocate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ int child_index = (long) cookie;
+ int call_count = -1;
+ int need_unwind = 0;
+ int read_child = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
+
+ LOCK (&frame->lock);
+ {
+ if (child_index == read_child) {
+ local->read_child_returned = _gf_true;
+ }
+
+ __inode_write_fop_cbk (frame, child_index, read_child, this,
+ &op_ret, &op_errno, prebuf, postbuf,
+ xdata);
+
+ if ((local->success_count >= priv->wait_count)
+ && local->read_child_returned) {
+ need_unwind = 1;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (need_unwind)
+ local->transaction.unwind (frame, this);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
+}
+
+static int
+afr_fallocate_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_fallocate_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fallocate,
+ local->fd,
+ local->cont.fallocate.mode,
+ local->cont.fallocate.offset,
+ local->cont.fallocate.len,
+ NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int
+afr_fallocate_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ local->transaction.unwind (frame, this);
+
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+static int
+afr_do_fallocate (call_frame_t *frame, xlator_t *this)
+{
+ call_frame_t * transaction_frame = NULL;
+ afr_local_t * local = NULL;
+ int op_ret = -1;
+ int op_errno = 0;
+
+ local = frame->local;
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ goto out;
+ }
+
+ transaction_frame->local = local;
+ frame->local = NULL;
+
+ local->op = GF_FOP_FALLOCATE;
+
+ local->transaction.fop = afr_fallocate_wind;
+ local->transaction.done = afr_fallocate_done;
+ local->transaction.unwind = afr_fallocate_unwind;
+
+ local->transaction.main_frame = frame;
+
+ local->transaction.start = local->cont.fallocate.offset;
+ local->transaction.len = 0;
+
+ /* fallocate can modify the file size */
+ op_ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
+
+ op_ret = 0;
+out:
+ if (op_ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (fallocate, frame, op_ret, op_errno, NULL,
+ NULL, NULL);
+ }
+
+ return 0;
+}
+
+int
+afr_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
+ goto out;
+ }
+ QUORUM_CHECK(fallocate,out);
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ local->cont.fallocate.mode = mode;
+ local->cont.fallocate.offset = offset;
+ local->cont.fallocate.len = len;
+
+ local->fd = fd_ref (fd);
+
+ afr_open_fd_fix (fd, this);
+
+ afr_do_fallocate (frame, this);
+
+ ret = 0;
+out:
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL);
+ }
+
+ return 0;
+}
+
+/* }}} */
+
+/* {{{ discard */
+
+static int
+afr_discard_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (discard, main_frame, local->op_ret,
+ local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ NULL);
+ }
+ return 0;
+}
+
+static int
+afr_discard_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ int child_index = (long) cookie;
+ int call_count = -1;
+ int need_unwind = 0;
+ int read_child = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
+
+ LOCK (&frame->lock);
+ {
+ if (child_index == read_child) {
+ local->read_child_returned = _gf_true;
+ }
+
+ __inode_write_fop_cbk (frame, child_index, read_child, this,
+ &op_ret, &op_errno, prebuf, postbuf,
+ xdata);
+
+ if ((local->success_count >= priv->wait_count)
+ && local->read_child_returned) {
+ need_unwind = 1;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (need_unwind)
+ local->transaction.unwind (frame, this);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
+}
+
+static int
+afr_discard_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_discard_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->discard,
+ local->fd,
+ local->cont.discard.offset,
+ local->cont.discard.len,
+ NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int
+afr_discard_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ local->transaction.unwind (frame, this);
+
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+static int
+afr_do_discard (call_frame_t *frame, xlator_t *this)
+{
+ call_frame_t * transaction_frame = NULL;
+ afr_local_t * local = NULL;
+ int op_ret = -1;
+ int op_errno = 0;
+
+ local = frame->local;
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ goto out;
+ }
+
+ transaction_frame->local = local;
+ frame->local = NULL;
+
+ local->op = GF_FOP_DISCARD;
+
+ local->transaction.fop = afr_discard_wind;
+ local->transaction.done = afr_discard_done;
+ local->transaction.unwind = afr_discard_unwind;
+
+ local->transaction.main_frame = frame;
+
+ local->transaction.start = local->cont.discard.offset;
+ local->transaction.len = 0;
+
+ op_ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
+
+ op_ret = 0;
+out:
+ if (op_ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (discard, frame, op_ret, op_errno, NULL,
+ NULL, NULL);
+ }
+
+ return 0;
+}
+
+int
+afr_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
+ goto out;
+ }
+ QUORUM_CHECK(discard, out);
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ local->cont.discard.offset = offset;
+ local->cont.discard.len = len;
+
+ local->fd = fd_ref (fd);
+
+ afr_open_fd_fix (fd, this);
+
+ afr_do_discard(frame, this);
+
+ ret = 0;
+out:
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (discard, frame, -1, op_errno, NULL, NULL, NULL);
+ }
+
+ return 0;
+}
+
+
+/* {{{ zerofill */
+
+static int
+afr_zerofill_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame) {
+ main_frame = local->transaction.main_frame;
+ }
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (zerofill, main_frame, local->op_ret,
+ local->op_errno,
+ &local->cont.zerofill.prebuf,
+ &local->cont.zerofill.postbuf,
+ NULL);
+ }
+ return 0;
+}
+
+static int
+afr_zerofill_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int child_index = (long) cookie;
+ int call_count = -1;
+ int need_unwind = 0;
+ int read_child = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
+
+ LOCK (&frame->lock);
+ {
+ if (child_index == read_child) {
+ local->read_child_returned = _gf_true;
+ }
+
+ if (afr_fop_failed (op_ret, op_errno)) {
+ afr_transaction_fop_failed (frame, this, child_index);
+ }
+
+ if (op_ret != -1) {
+ if (local->success_count == 0) {
+ local->op_ret = op_ret;
+ local->cont.zerofill.prebuf = *prebuf;
+ local->cont.zerofill.postbuf = *postbuf;
+ }
+
+ if (child_index == read_child) {
+ local->cont.zerofill.prebuf = *prebuf;
+ local->cont.zerofill.postbuf = *postbuf;
+ }
+
+ local->success_count++;
+
+ if ((local->success_count >= priv->wait_count)
+ && local->read_child_returned) {
+ need_unwind = 1;
+ }
+ }
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ if (need_unwind) {
+ local->transaction.unwind (frame, this);
+ }
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
+}
+
+static int
+afr_zerofill_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_zerofill_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->zerofill,
+ local->fd,
+ local->cont.zerofill.offset,
+ local->cont.zerofill.len,
+ NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int
+afr_zerofill_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ local->transaction.unwind (frame, this);
+
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+static int
+afr_do_zerofill(call_frame_t *frame, xlator_t *this)
+{
+ call_frame_t *transaction_frame = NULL;
+ afr_local_t *local = NULL;
+ int op_ret = -1;
+ int op_errno = 0;
+
+ local = frame->local;
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ goto out;
+ }
+
+ transaction_frame->local = local;
+ frame->local = NULL;
+
+ local->op = GF_FOP_ZEROFILL;
+
+ local->transaction.fop = afr_zerofill_wind;
+ local->transaction.done = afr_zerofill_done;
+ local->transaction.unwind = afr_zerofill_unwind;
+
+ local->transaction.main_frame = frame;
+
+ local->transaction.start = local->cont.zerofill.offset;
+ local->transaction.len = 0;
+
+ op_ret = afr_transaction (transaction_frame, this,
+ AFR_DATA_TRANSACTION);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
+
+ op_ret = 0;
+out:
+ if (op_ret < 0) {
+ if (transaction_frame) {
+ AFR_STACK_DESTROY (transaction_frame);
+ }
+ AFR_STACK_UNWIND (zerofill, frame, op_ret, op_errno, NULL,
+ NULL, NULL);
+ }
+
+ return 0;
+}
+
+int
+afr_zerofill (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
+ goto out;
+ }
+ QUORUM_CHECK(zerofill, out);
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0) {
+ goto out;
+ }
+ local->cont.zerofill.offset = offset;
+ local->cont.zerofill.len = len;
+
+ local->fd = fd_ref (fd);
+
+ afr_open_fd_fix (fd, this);
+
+ afr_do_zerofill(frame, this);
+
+ ret = 0;
+out:
+ if (ret < 0) {
+ if (transaction_frame) {
+ AFR_STACK_DESTROY (transaction_frame);
+ }
+ AFR_STACK_UNWIND (zerofill, frame, -1, op_errno, NULL,
+ NULL, NULL);
+ }
+
+ return 0;
+}
+
+/* }}} */
+
+
diff --git a/xlators/cluster/afr/src/afr-inode-write.h b/xlators/cluster/afr/src/afr-inode-write.h
index ed11079fd..8e93ca44a 100644
--- a/xlators/cluster/afr/src/afr-inode-write.h
+++ b/xlators/cluster/afr/src/afr-inode-write.h
@@ -68,4 +68,15 @@ int32_t
afr_fremovexattr (call_frame_t *frame, xlator_t *this,
fd_t *fd, const char *name, dict_t *xdata);
+int
+afr_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata);
+
+int
+afr_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata);
+
+int
+afr_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata);
#endif /* __INODE_WRITE_H__ */
diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c
index 5e61be4d4..060d78f35 100644
--- a/xlators/cluster/afr/src/afr-lk-common.c
+++ b/xlators/cluster/afr/src/afr-lk-common.c
@@ -55,7 +55,33 @@
} while (0);
int
-afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index);
+afr_entry_lockee_cmp (const void *l1, const void *l2)
+{
+ const afr_entry_lockee_t *r1 = l1;
+ const afr_entry_lockee_t *r2 = l2;
+ int ret = 0;
+ uuid_t gfid1 = {0};
+ uuid_t gfid2 = {0};
+
+ loc_gfid ((loc_t*)&r1->loc, gfid1);
+ loc_gfid ((loc_t*)&r2->loc, gfid2);
+ ret = uuid_compare (gfid1, gfid2);
+ /*Entrylks with NULL basename are the 'smallest'*/
+ if (ret == 0) {
+ if (!r1->basename)
+ return -1;
+ if (!r2->basename)
+ return 1;
+ ret = strcmp (r1->basename, r2->basename);
+ }
+
+ if (ret <= 0)
+ return -1;
+ else
+ return 1;
+}
+
+int afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index);
static int
afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this);
@@ -125,16 +151,9 @@ internal_lock_count (call_frame_t *frame, xlator_t *this)
local = frame->local;
priv = this->private;
- if (local->fd) {
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i] && local->fd_open_on[i])
- ++call_count;
- }
- } else {
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i])
- ++call_count;
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i])
+ ++call_count;
}
return call_count;
@@ -332,10 +351,13 @@ static void
afr_trace_entrylk_in (call_frame_t *frame, xlator_t *this,
afr_lock_call_type_t lock_call_type,
afr_lock_op_type_t lk_op_type, const char *basename,
- int32_t child_index)
+ int32_t cookie)
{
afr_local_t *local = NULL;
afr_internal_lock_t *int_lock = NULL;
+ afr_private_t *priv = NULL;
+ int child_index = 0;
+ int lockee_no = 0;
char lock[256];
char lockee[256];
@@ -343,28 +365,40 @@ afr_trace_entrylk_in (call_frame_t *frame, xlator_t *this,
local = frame->local;
int_lock = &local->internal_lock;
+ priv = this->private;
+
+ if (!priv->entrylk_trace) {
+ return;
+ }
+ lockee_no = cookie / priv->child_count;
+ child_index = cookie % priv->child_count;
afr_print_entrylk (lock, 256, basename, &frame->root->lk_owner);
- afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index);
+ afr_print_lockee (lockee, 256, &int_lock->lockee[lockee_no].loc, local->fd,
+ child_index);
afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock);
gf_log (this->name, GF_LOG_INFO,
- "[%s %s] Lock={%s} Lockee={%s} Number={%llu}",
+ "[%s %s] Lock={%s} Lockee={%s} Number={%llu}, Cookie={%d}",
lock_call_type_str,
lk_op_type == AFR_LOCK_OP ? "LOCK REQUEST" : "UNLOCK REQUEST",
lock, lockee,
- (unsigned long long) int_lock->lock_number);
+ (unsigned long long) int_lock->lock_number,
+ cookie);
}
static void
afr_trace_entrylk_out (call_frame_t *frame, xlator_t *this,
afr_lock_call_type_t lock_call_type,
afr_lock_op_type_t lk_op_type, const char *basename,
- int op_ret, int op_errno, int32_t child_index)
+ int op_ret, int op_errno, int32_t cookie)
{
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int lockee_no = 0;
+ int child_index = 0;
char lock[256];
char lockee[256];
@@ -373,20 +407,30 @@ afr_trace_entrylk_out (call_frame_t *frame, xlator_t *this,
local = frame->local;
int_lock = &local->internal_lock;
+ priv = this->private;
- afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index);
+ if (!priv->entrylk_trace) {
+ return;
+ }
+ lockee_no = cookie / priv->child_count;
+ child_index = cookie % priv->child_count;
+
+ afr_print_entrylk (lock, 256, basename, &frame->root->lk_owner);
+ afr_print_lockee (lockee, 256, &int_lock->lockee[lockee_no].loc, local->fd,
+ child_index);
afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock);
afr_print_verdict (op_ret, op_errno, verdict);
gf_log (this->name, GF_LOG_INFO,
- "[%s %s] [%s] Lock={%s} Lockee={%s} Number={%llu}",
+ "[%s %s] [%s] Lock={%s} Lockee={%s} Number={%llu} Cookie={%d}",
lock_call_type_str,
lk_op_type == AFR_LOCK_OP ? "LOCK REPLY" : "UNLOCK REPLY",
verdict,
lock, lockee,
- (unsigned long long) int_lock->lock_number);
+ (unsigned long long) int_lock->lock_number,
+ cookie);
}
@@ -439,6 +483,47 @@ is_afr_lock_transaction (afr_local_t *local)
return ret;
}
+int
+afr_init_entry_lockee (afr_entry_lockee_t *lockee, afr_local_t *local,
+ loc_t *loc, char *basename, int child_count)
+{
+ int ret = -1;
+
+ loc_copy (&lockee->loc, loc);
+ lockee->basename = (basename)? gf_strdup (basename): NULL;
+ if (basename && !lockee->basename)
+ goto out;
+
+ lockee->locked_count = 0;
+ lockee->locked_nodes = GF_CALLOC (child_count,
+ sizeof (*lockee->locked_nodes),
+ gf_afr_mt_afr_node_character);
+
+ if (!lockee->locked_nodes)
+ goto out;
+
+ ret = 0;
+out:
+ return ret;
+
+}
+
+void
+afr_entry_lockee_cleanup (afr_internal_lock_t *int_lock)
+{
+ int i = 0;
+
+ for (i = 0; i < int_lock->lockee_count; i++) {
+ loc_wipe (&int_lock->lockee[i].loc);
+ if (int_lock->lockee[i].basename)
+ GF_FREE (int_lock->lockee[i].basename);
+ if (int_lock->lockee[i].locked_nodes)
+ GF_FREE (int_lock->lockee[i].locked_nodes);
+ }
+
+ return;
+}
+
static int
initialize_entrylk_variables (call_frame_t *frame, xlator_t *this)
{
@@ -456,8 +541,13 @@ initialize_entrylk_variables (call_frame_t *frame, xlator_t *this)
int_lock->lock_op_ret = -1;
int_lock->lock_op_errno = 0;
- for (i = 0; i < priv->child_count; i++) {
- int_lock->entry_locked_nodes[i] = 0;
+ for (i = 0; i < AFR_LOCKEE_COUNT_MAX; i++) {
+ if (!int_lock->lockee[i].locked_nodes)
+ break;
+ int_lock->lockee[i].locked_count = 0;
+ memset (int_lock->lockee[i].locked_nodes, 0,
+ sizeof (*int_lock->lockee[i].locked_nodes) *
+ priv->child_count);
}
return 0;
@@ -469,19 +559,23 @@ initialize_inodelk_variables (call_frame_t *frame, xlator_t *this)
afr_local_t *local = NULL;
afr_internal_lock_t *int_lock = NULL;
afr_private_t *priv = NULL;
- int i = 0;
+ afr_inodelk_t *inodelk = NULL;
priv = this->private;
local = frame->local;
int_lock = &local->internal_lock;
- int_lock->inodelk_lock_count = 0;
- int_lock->lock_op_ret = -1;
- int_lock->lock_op_errno = 0;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
- for (i = 0; i < priv->child_count; i++) {
- int_lock->inode_locked_nodes[i] = 0;
- }
+ inodelk->lock_count = 0;
+ int_lock->lk_attempted_count = 0;
+ int_lock->lock_op_ret = -1;
+ int_lock->lock_op_errno = 0;
+
+ memset (inodelk->locked_nodes, 0,
+ sizeof (*inodelk->locked_nodes) * priv->child_count);
+ memset (int_lock->locked_nodes, 0,
+ sizeof (*int_lock->locked_nodes) * priv->child_count);
return 0;
}
@@ -503,6 +597,18 @@ lower_path (loc_t *l1, const char *b1, loc_t *l2, const char *b2)
}
int
+afr_lockee_locked_nodes_count (afr_internal_lock_t *int_lock)
+{
+ int call_count = 0;
+ int i = 0;
+
+ for (i = 0; i < int_lock->lockee_count; i++)
+ call_count += int_lock->lockee[i].locked_count;
+
+ return call_count;
+}
+
+int
afr_locked_nodes_count (unsigned char *locked_nodes, int child_count)
{
@@ -550,7 +656,9 @@ afr_unlock_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
{
afr_local_t *local = NULL;
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
int32_t child_index = (long)cookie;
+ afr_private_t *priv = NULL;
local = frame->local;
int_lock = &local->internal_lock;
@@ -559,14 +667,18 @@ afr_unlock_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
AFR_UNLOCK_OP, NULL, op_ret,
op_errno, child_index);
+ priv = this->private;
+
if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) {
- gf_log (this->name, GF_LOG_INFO, "%s: unlock failed on %d "
- "unlock by %s", local->loc.path, child_index,
+ gf_log (this->name, GF_LOG_INFO, "%s: unlock failed on subvolume %s "
+ "with lock owner %s", local->loc.path,
+ priv->children[child_index]->name,
lkowner_utoa (&frame->root->lk_owner));
}
- int_lock->inode_locked_nodes[child_index] &= LOCKED_NO;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ inodelk->locked_nodes[child_index] &= LOCKED_NO;
if (local->transaction.eager_lock)
local->transaction.eager_lock[child_index] = 0;
@@ -580,6 +692,7 @@ static int
afr_unlock_inodelk (call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
struct gf_flock flock = {0,};
@@ -595,12 +708,14 @@ afr_unlock_inodelk (call_frame_t *frame, xlator_t *this)
int_lock = &local->internal_lock;
priv = this->private;
- flock.l_start = int_lock->lk_flock.l_start;
- flock.l_len = int_lock->lk_flock.l_len;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+
+ flock.l_start = inodelk->flock.l_start;
+ flock.l_len = inodelk->flock.l_len;
flock.l_type = F_UNLCK;
full_flock.l_type = F_UNLCK;
- call_count = afr_locked_nodes_count (int_lock->inode_locked_nodes,
+ call_count = afr_locked_nodes_count (inodelk->locked_nodes,
priv->child_count);
int_lock->lk_call_count = call_count;
@@ -616,8 +731,7 @@ afr_unlock_inodelk (call_frame_t *frame, xlator_t *this)
fd_ctx = afr_fd_ctx_get (local->fd, this);
for (i = 0; i < priv->child_count; i++) {
- if ((int_lock->inode_locked_nodes[i] & LOCKED_YES)
- != LOCKED_YES)
+ if ((inodelk->locked_nodes[i] & LOCKED_YES) != LOCKED_YES)
continue;
if (local->fd) {
@@ -658,7 +772,7 @@ afr_unlock_inodelk (call_frame_t *frame, xlator_t *this)
(void *) (long)i,
priv->children[i],
priv->children[i]->fops->finodelk,
- this->name, local->fd,
+ int_lock->domain, local->fd,
F_SETLK, flock_use, NULL);
if (!--call_count)
@@ -673,7 +787,7 @@ afr_unlock_inodelk (call_frame_t *frame, xlator_t *this)
(void *) (long)i,
priv->children[i],
priv->children[i]->fops->inodelk,
- this->name, &local->loc,
+ int_lock->domain, &local->loc,
F_SETLK, &flock, NULL);
if (!--call_count)
@@ -689,13 +803,22 @@ afr_unlock_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
- int32_t child_index = (long)cookie;
+ afr_private_t *priv = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ int32_t child_index = 0;
+ int lockee_no = 0;
+
+ priv = this->private;
+ lockee_no = (int)((long) cookie) / priv->child_count;
+ child_index = (int) ((long) cookie) % priv->child_count;
local = frame->local;
+ int_lock = &local->internal_lock;
AFR_TRACE_ENTRYLK_OUT (frame, this, AFR_ENTRYLK_TRANSACTION,
- AFR_UNLOCK_OP, NULL, op_ret,
- op_errno, child_index);
+ AFR_UNLOCK_OP,
+ int_lock->lockee[lockee_no].basename, op_ret,
+ op_errno, (int) ((long)cookie));
if (op_ret < 0) {
gf_log (this->name, GF_LOG_ERROR,
@@ -703,6 +826,7 @@ afr_unlock_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->loc.path, child_index, strerror (op_errno));
}
+ int_lock->lockee[lockee_no].locked_nodes[child_index] &= LOCKED_NO;
afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno, NULL);
return 0;
@@ -711,24 +835,22 @@ afr_unlock_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
static int
afr_unlock_entrylk (call_frame_t *frame, xlator_t *this)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- const char *basename = NULL;
- loc_t *loc = NULL;
- int call_count = 0;
- int i = -1;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int index = 0;
+ int lockee_no = 0;
+ int copies = 0;
+ int i = -1;
local = frame->local;
int_lock = &local->internal_lock;
priv = this->private;
+ copies = priv->child_count;
- basename = int_lock->lk_basename;
- if (int_lock->lk_loc)
- loc = int_lock->lk_loc;
+ call_count = afr_lockee_locked_nodes_count (int_lock);
- call_count = afr_locked_nodes_count (int_lock->entry_locked_nodes,
- priv->child_count);
int_lock->lk_call_count = call_count;
if (!call_count){
@@ -738,18 +860,22 @@ afr_unlock_entrylk (call_frame_t *frame, xlator_t *this)
goto out;
}
- for (i = 0; i < priv->child_count; i++) {
- if (int_lock->entry_locked_nodes[i] & LOCKED_YES) {
- AFR_TRACE_ENTRYLK_IN (frame, this,
- AFR_ENTRYLK_NB_TRANSACTION,
- AFR_UNLOCK_OP, basename, i);
+ for (i = 0; i < int_lock->lockee_count * priv->child_count; i++) {
+ lockee_no = i / copies;
+ index = i % copies;
+ if (int_lock->lockee[lockee_no].locked_nodes[index] & LOCKED_YES) {
+ AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_NB_TRANSACTION,
+ AFR_UNLOCK_OP,
+ int_lock->lockee[lockee_no].basename,
+ i);
STACK_WIND_COOKIE (frame, afr_unlock_entrylk_cbk,
(void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- this->name,
- loc, basename,
+ priv->children[index],
+ priv->children[index]->fops->entrylk,
+ int_lock->domain,
+ &int_lock->lockee[lockee_no].loc,
+ int_lock->lockee[lockee_no].basename,
ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL);
if (!--call_count)
@@ -766,13 +892,20 @@ static int32_t
afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- int child_index = (long) cookie;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int cky = (long) cookie;
+ int child_index = 0;
+ int lockee_no = 0;
+ priv = this->private;
local = frame->local;
int_lock = &local->internal_lock;
+ child_index = ((int)cky) % priv->child_count;
+ lockee_no = ((int)cky) / priv->child_count;
+
LOCK (&frame->lock);
{
if (op_ret == -1) {
@@ -788,6 +921,8 @@ afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->op_errno = op_errno;
int_lock->lock_op_errno = op_errno;
}
+
+ int_lock->lk_attempted_count++;
}
UNLOCK (&frame->lock);
@@ -796,10 +931,17 @@ afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
afr_unlock (frame, this);
} else {
if (op_ret == 0) {
- int_lock->locked_nodes[child_index] |= LOCKED_YES;
- int_lock->lock_count++;
+ if (local->transaction.type == AFR_ENTRY_TRANSACTION ||
+ local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) {
+ int_lock->lockee[lockee_no].locked_nodes[child_index] |= LOCKED_YES;
+ int_lock->lockee[lockee_no].locked_count++;
+ int_lock->entrylk_lock_count++;
+ } else {
+ int_lock->locked_nodes[child_index] |= LOCKED_YES;
+ int_lock->lock_count++;
+ }
}
- afr_lock_blocking (frame, this, child_index + 1);
+ afr_lock_blocking (frame, this, cky + 1);
}
return 0;
@@ -819,79 +961,6 @@ afr_blocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
static int32_t
-afr_lock_lower_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- loc_t *lower = NULL;
- loc_t *higher = NULL;
- const char *higher_name = NULL;
- int child_index = (long) cookie;
-
- priv = this->private;
- local = frame->local;
- int_lock = &local->internal_lock;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- if (op_errno == ENOSYS) {
- /* return ENOTSUP */
-
- gf_log (this->name, GF_LOG_ERROR,
- "subvolume does not support locking. "
- "please load features/locks xlator on server");
-
- local->op_ret = op_ret;
- }
-
- local->op_errno = op_errno;
- }
- }
- UNLOCK (&frame->lock);
-
- if (op_ret != 0) {
- afr_copy_locked_nodes (frame, this);
- afr_unlock (frame, this);
- goto out;
- } else {
- int_lock->lower_locked_nodes[child_index] |= LOCKED_LOWER;
- int_lock->lock_count++;
- }
-
- /* The lower path has been locked. Now lock the higher path */
-
- lower = lower_path (&local->transaction.parent_loc,
- local->transaction.basename,
- &local->transaction.new_parent_loc,
- local->transaction.new_basename);
-
- higher = (lower == &local->transaction.parent_loc ?
- &local->transaction.new_parent_loc :
- &local->transaction.parent_loc);
-
- higher_name = (higher == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
- AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_TRANSACTION,
- AFR_LOCK_OP, higher_name, child_index);
-
-
- STACK_WIND_COOKIE (frame, afr_lock_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->entrylk,
- this->name, higher, higher_name,
- ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
-
-out:
- return 0;
-}
-
-static int32_t
afr_blocking_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
@@ -907,6 +976,7 @@ static int
afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
@@ -917,18 +987,16 @@ afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this)
switch (local->transaction.type) {
case AFR_DATA_TRANSACTION:
case AFR_METADATA_TRANSACTION:
- memcpy (int_lock->inode_locked_nodes,
- int_lock->locked_nodes,
- priv->child_count);
- int_lock->inodelk_lock_count = int_lock->lock_count;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ memcpy (inodelk->locked_nodes, int_lock->locked_nodes,
+ sizeof (*inodelk->locked_nodes) * priv->child_count);
+ inodelk->lock_count = int_lock->lock_count;
break;
case AFR_ENTRY_RENAME_TRANSACTION:
case AFR_ENTRY_TRANSACTION:
- memcpy (int_lock->entry_locked_nodes,
- int_lock->locked_nodes,
- priv->child_count);
- int_lock->entrylk_lock_count = int_lock->lock_count;
+ /*entrylk_count is being used in both non-blocking and blocking
+ * modes */
break;
}
@@ -936,25 +1004,67 @@ afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this)
}
+static inline gf_boolean_t
+afr_is_entrylk (afr_internal_lock_t *int_lock,
+ afr_transaction_type trans_type)
+{
+ gf_boolean_t is_entrylk = _gf_false;
+
+ if ((int_lock->transaction_lk_type == AFR_SELFHEAL_LK) &&
+ int_lock->selfheal_lk_type == AFR_ENTRY_SELF_HEAL_LK) {
+
+ is_entrylk = _gf_true;
+
+ } else if ((int_lock->transaction_lk_type == AFR_TRANSACTION_LK) &&
+ (trans_type == AFR_ENTRY_TRANSACTION ||
+ trans_type == AFR_ENTRY_RENAME_TRANSACTION)) {
+
+ is_entrylk = _gf_true;
+
+ } else {
+ is_entrylk = _gf_false;
+ }
+
+ return is_entrylk;
+}
+
+static gf_boolean_t
+_is_lock_wind_needed (afr_local_t *local, int child_index)
+{
+ if (!local->child_up[child_index])
+ return _gf_false;
+
+ return _gf_true;
+}
+
int
-afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index)
+afr_lock_blocking (call_frame_t *frame, xlator_t *this, int cookie)
{
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- loc_t *lower = NULL;
- const char *lower_name = NULL;
struct gf_flock flock = {0,};
uint64_t ctx = 0;
int ret = 0;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
-
- flock.l_start = int_lock->lk_flock.l_start;
- flock.l_len = int_lock->lk_flock.l_len;
- flock.l_type = int_lock->lk_flock.l_type;
+ int child_index = 0;
+ int lockee_no = 0;
+ gf_boolean_t is_entrylk = _gf_false;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ priv = this->private;
+ child_index = cookie % priv->child_count;
+ lockee_no = cookie / priv->child_count;
+ is_entrylk = afr_is_entrylk (int_lock, local->transaction.type);
+
+
+ if (!is_entrylk) {
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ flock.l_start = inodelk->flock.l_start;
+ flock.l_len = inodelk->flock.l_len;
+ flock.l_type = inodelk->flock.l_type;
+ }
if (local->fd) {
ret = fd_ctx_get (local->fd, this, &ctx);
@@ -973,42 +1083,26 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index)
return 0;
}
-
- /* skip over children that or down
- or don't have the fd open */
-
- while ((child_index < priv->child_count)
- && (!local->child_up[child_index] ||
- !local->fd_open_on[child_index]))
-
- child_index++;
- } else {
- /* skip over children that are down */
- while ((child_index < priv->child_count)
- && !local->child_up[child_index])
- child_index++;
}
- if ((child_index == priv->child_count) &&
- int_lock->lock_count == 0) {
-
- gf_log (this->name, GF_LOG_INFO,
- "unable to lock on even one child");
-
- local->op_ret = -1;
- int_lock->lock_op_ret = -1;
+ if (int_lock->lk_expected_count == int_lock->lk_attempted_count) {
+ if ((is_entrylk && int_lock->entrylk_lock_count == 0) ||
+ (!is_entrylk && int_lock->lock_count == 0)) {
+ gf_log (this->name, GF_LOG_INFO,
+ "unable to lock on even one child");
- afr_copy_locked_nodes (frame, this);
+ local->op_ret = -1;
+ int_lock->lock_op_ret = -1;
- afr_unlock(frame, this);
+ afr_copy_locked_nodes (frame, this);
- return 0;
+ afr_unlock(frame, this);
+ return 0;
+ }
}
- if ((child_index == priv->child_count)
- || (int_lock->lock_count == int_lock->lk_expected_count)) {
-
+ if (int_lock->lk_expected_count == int_lock->lk_attempted_count) {
/* we're done locking */
gf_log (this->name, GF_LOG_DEBUG,
@@ -1021,6 +1115,11 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index)
return 0;
}
+ if (!_is_lock_wind_needed (local, child_index)) {
+ afr_lock_blocking (frame, this, cookie + 1);
+ return 0;
+ }
+
switch (local->transaction.type) {
case AFR_DATA_TRANSACTION:
case AFR_METADATA_TRANSACTION:
@@ -1035,7 +1134,7 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index)
(void *) (long) child_index,
priv->children[child_index],
priv->children[child_index]->fops->finodelk,
- this->name, local->fd,
+ int_lock->domain, local->fd,
F_SETLKW, &flock, NULL);
} else {
@@ -1048,50 +1147,29 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index)
(void *) (long) child_index,
priv->children[child_index],
priv->children[child_index]->fops->inodelk,
- this->name, &local->loc,
+ int_lock->domain, &local->loc,
F_SETLKW, &flock, NULL);
}
break;
case AFR_ENTRY_RENAME_TRANSACTION:
- {
- lower = lower_path (&local->transaction.parent_loc,
- local->transaction.basename,
- &local->transaction.new_parent_loc,
- local->transaction.new_basename);
-
- lower_name = (lower == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
- AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_TRANSACTION,
- AFR_LOCK_OP, lower_name, child_index);
-
-
- STACK_WIND_COOKIE (frame, afr_lock_lower_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->entrylk,
- this->name, lower, lower_name,
- ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
-
- break;
- }
-
case AFR_ENTRY_TRANSACTION:
+ /*Accounting for child_index increments on 'down'
+ *and 'fd-less' children */
+
if (local->fd) {
- AFR_TRACE_ENTRYLK_IN (frame, this,
- AFR_ENTRYLK_TRANSACTION,
- AFR_LOCK_OP, local->transaction.basename,
- child_index);
+ AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_TRANSACTION,
+ AFR_LOCK_OP,
+ int_lock->lockee[lockee_no].basename,
+ cookie);
STACK_WIND_COOKIE (frame, afr_blocking_entrylk_cbk,
- (void *) (long) child_index,
+ (void *) (long) cookie,
priv->children[child_index],
priv->children[child_index]->fops->fentrylk,
- this->name, local->fd,
- local->transaction.basename,
+ int_lock->domain, local->fd,
+ int_lock->lockee[lockee_no].basename,
ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
} else {
AFR_TRACE_ENTRYLK_IN (frame, this,
@@ -1100,12 +1178,12 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index)
child_index);
STACK_WIND_COOKIE (frame, afr_blocking_entrylk_cbk,
- (void *) (long) child_index,
+ (void *) (long) cookie,
priv->children[child_index],
priv->children[child_index]->fops->entrylk,
- this->name,
- &local->transaction.parent_loc,
- local->transaction.basename,
+ int_lock->domain,
+ &int_lock->lockee[lockee_no].loc,
+ int_lock->lockee[lockee_no].basename,
ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
}
@@ -1134,11 +1212,12 @@ afr_blocking_lock (call_frame_t *frame, xlator_t *this)
break;
case AFR_ENTRY_RENAME_TRANSACTION:
+ case AFR_ENTRY_TRANSACTION:
up_count = afr_up_children_count (local->child_up,
priv->child_count);
- int_lock->lk_expected_count = 2 * up_count;
- //fallthrough
- case AFR_ENTRY_TRANSACTION:
+ int_lock->lk_call_count = int_lock->lk_expected_count
+ = (int_lock->lockee_count *
+ up_count);
initialize_entrylk_variables (frame, this);
break;
}
@@ -1154,35 +1233,48 @@ afr_nonblocking_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
{
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
- int call_count = 0;
- int child_index = (long) cookie;
+ int call_count = 0;
+ int child_index = (long) cookie;
+ int copies = 0;
+ int index = 0;
+ int lockee_no = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ copies = priv->child_count;
+ index = child_index % copies;
+ lockee_no = child_index / copies;
local = frame->local;
int_lock = &local->internal_lock;
AFR_TRACE_ENTRYLK_OUT (frame, this, AFR_ENTRYLK_TRANSACTION,
- AFR_LOCK_OP, NULL, op_ret,
+ AFR_LOCK_OP,
+ int_lock->lockee[lockee_no].basename, op_ret,
op_errno, (long) cookie);
- if (op_ret < 0 ) {
- if (op_errno == ENOSYS) {
+ LOCK (&frame->lock);
+ {
+ if (op_ret < 0 ) {
+ if (op_errno == ENOSYS) {
/* return ENOTSUP */
- gf_log (this->name, GF_LOG_ERROR,
- "subvolume does not support locking. "
- "please load features/locks xlator on server");
- local->op_ret = op_ret;
- int_lock->lock_op_ret = op_ret;
-
- int_lock->lock_op_errno = op_errno;
- local->op_errno = op_errno;
- }
- } else if (op_ret == 0) {
- int_lock->entry_locked_nodes[child_index] |= LOCKED_YES;
- int_lock->entrylk_lock_count++;
- }
+ gf_log (this->name, GF_LOG_ERROR,
+ "subvolume does not support locking. "
+ "please load features/locks xlator on server");
+ local->op_ret = op_ret;
+ int_lock->lock_op_ret = op_ret;
+
+ int_lock->lock_op_errno = op_errno;
+ local->op_errno = op_errno;
+ }
+ } else if (op_ret == 0) {
+ int_lock->lockee[lockee_no].locked_nodes[index] |= \
+ LOCKED_YES;
+ int_lock->lockee[lockee_no].locked_count++;
+ int_lock->entrylk_lock_count++;
+ }
- LOCK (&frame->lock);
- {
call_count = --int_lock->lk_call_count;
}
UNLOCK (&frame->lock);
@@ -1212,42 +1304,26 @@ afr_nonblocking_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-void
-afr_mark_fd_open_on (afr_local_t *local, afr_fd_ctx_t *fd_ctx,
- size_t child_count)
-{
- int i = 0;
-
- GF_ASSERT (local->fd_open_on);
-
- memset (local->fd_open_on, 0, sizeof (*local->fd_open_on)*child_count);
- for (i = 0; i < child_count; i++)
- if (fd_ctx->opened_on[i] == AFR_FD_OPENED)
- local->fd_open_on[i] = 1;
-}
-
int
afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_fd_ctx_t *fd_ctx = NULL;
- const char *basename = NULL;
- loc_t *loc = NULL;
- int32_t call_count = 0;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int copies = 0;
+ int index = 0;
+ int lockee_no = 0;
+ int32_t call_count = 0;
int i = 0;
local = frame->local;
int_lock = &local->internal_lock;
priv = this->private;
+ copies = priv->child_count;
initialize_entrylk_variables (frame, this);
- basename = int_lock->lk_basename;
- if (int_lock->lk_loc)
- loc = int_lock->lk_loc;
-
if (local->fd) {
fd_ctx = afr_fd_ctx_get (local->fd, this);
if (!fd_ctx) {
@@ -1260,11 +1336,11 @@ afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this)
local->op_errno = EINVAL;
int_lock->lock_op_errno = EINVAL;
+ afr_unlock (frame, this);
return -1;
}
- afr_mark_fd_open_on (local, fd_ctx, priv->child_count);
- call_count = internal_lock_count (frame, this);
+ call_count = int_lock->lockee_count * internal_lock_count (frame, this);
int_lock->lk_call_count = call_count;
int_lock->lk_expected_count = call_count;
@@ -1277,46 +1353,52 @@ afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this)
/* Send non-blocking entrylk calls only on up children
and where the fd has been opened */
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i] && local->fd_open_on[i]) {
- AFR_TRACE_ENTRYLK_IN (frame, this,
- AFR_ENTRYLK_NB_TRANSACTION,
- AFR_LOCK_OP, basename, i);
+ for (i = 0; i < int_lock->lockee_count*priv->child_count; i++) {
+ index = i%copies;
+ lockee_no = i/copies;
+ if (local->child_up[index]) {
+ AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_NB_TRANSACTION,
+ AFR_LOCK_OP,
+ int_lock->lockee[lockee_no].basename,
+ i);
STACK_WIND_COOKIE (frame, afr_nonblocking_entrylk_cbk,
(void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fentrylk,
+ priv->children[index],
+ priv->children[index]->fops->fentrylk,
this->name, local->fd,
- basename,
+ int_lock->lockee[lockee_no].basename,
ENTRYLK_LOCK_NB, ENTRYLK_WRLCK,
NULL);
+ if (!--call_count)
+ break;
}
}
} else {
- GF_ASSERT (loc);
-
- call_count = internal_lock_count (frame, this);
+ call_count = int_lock->lockee_count * internal_lock_count (frame, this);
int_lock->lk_call_count = call_count;
int_lock->lk_expected_count = call_count;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- AFR_TRACE_ENTRYLK_IN (frame, this,
- AFR_ENTRYLK_NB_TRANSACTION,
- AFR_LOCK_OP, basename, i);
+ for (i = 0; i < int_lock->lockee_count*priv->child_count; i++) {
+ index = i%copies;
+ lockee_no = i/copies;
+ if (local->child_up[index]) {
+ AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_NB_TRANSACTION,
+ AFR_LOCK_OP,
+ int_lock->lockee[lockee_no].basename,
+ i);
STACK_WIND_COOKIE (frame, afr_nonblocking_entrylk_cbk,
(void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- this->name, loc, basename,
+ priv->children[index],
+ priv->children[index]->fops->entrylk,
+ this->name, &int_lock->lockee[lockee_no].loc,
+ int_lock->lockee[lockee_no].basename,
ENTRYLK_LOCK_NB, ENTRYLK_WRLCK,
NULL);
if (!--call_count)
break;
-
}
}
}
@@ -1329,6 +1411,7 @@ afr_nonblocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
afr_local_t *local = NULL;
int call_count = 0;
int child_index = (long) cookie;
@@ -1337,58 +1420,57 @@ afr_nonblocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
int_lock = &local->internal_lock;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
AFR_TRACE_INODELK_OUT (frame, this, AFR_INODELK_NB_TRANSACTION,
AFR_LOCK_OP, NULL, op_ret,
op_errno, (long) cookie);
- if (op_ret < 0) {
- if (op_errno == ENOSYS) {
- /* return ENOTSUP */
- gf_log (this->name, GF_LOG_ERROR,
- "subvolume does not support locking. "
- "please load features/locks xlator on server");
- local->op_ret = op_ret;
- int_lock->lock_op_ret = op_ret;
- int_lock->lock_op_errno = op_errno;
- local->op_errno = op_errno;
- }
- if (local->transaction.eager_lock)
- local->transaction.eager_lock[child_index] = 0;
- } else {
- int_lock->inode_locked_nodes[child_index]
- |= LOCKED_YES;
- int_lock->inodelk_lock_count++;
-
- if (local->transaction.eager_lock &&
- local->transaction.eager_lock[child_index] && local->fd) {
- fd_ctx = afr_fd_ctx_get (local->fd, this);
- /* piggybacked */
-
- if (op_ret == 1) {
- /* piggybacked */
- } else if (op_ret == 0) {
- /* lock acquired from server */
- LOCK (&local->fd->lock);
- {
- fd_ctx->lock_acquired[child_index]++;
- }
- UNLOCK (&local->fd->lock);
- }
- }
- }
+ if (local->fd)
+ fd_ctx = afr_fd_ctx_get (local->fd, this);
LOCK (&frame->lock);
{
+ if (op_ret < 0) {
+ if (op_errno == ENOSYS) {
+ /* return ENOTSUP */
+ gf_log (this->name, GF_LOG_ERROR,
+ "subvolume does not support locking. "
+ "please load features/locks xlator on "
+ "server");
+ local->op_ret = op_ret;
+ int_lock->lock_op_ret = op_ret;
+ int_lock->lock_op_errno = op_errno;
+ local->op_errno = op_errno;
+ }
+ if (local->transaction.eager_lock)
+ local->transaction.eager_lock[child_index] = 0;
+ } else {
+ inodelk->locked_nodes[child_index] |= LOCKED_YES;
+ inodelk->lock_count++;
+
+ if (local->transaction.eager_lock &&
+ local->transaction.eager_lock[child_index] &&
+ local->fd) {
+ /* piggybacked */
+ if (op_ret == 1) {
+ /* piggybacked */
+ } else if (op_ret == 0) {
+ /* lock acquired from server */
+ fd_ctx->lock_acquired[child_index]++;
+ }
+ }
+ }
+
call_count = --int_lock->lk_call_count;
}
UNLOCK (&frame->lock);
+
if (call_count == 0) {
gf_log (this->name, GF_LOG_TRACE,
"Last inode locking reply received");
/* all locks successful. Proceed to call FOP */
- if (int_lock->inodelk_lock_count ==
- int_lock->lk_expected_count) {
+ if (inodelk->lock_count == int_lock->lk_expected_count) {
gf_log (this->name, GF_LOG_TRACE,
"All servers locked. Calling the cbk");
int_lock->lock_op_ret = 0;
@@ -1412,6 +1494,7 @@ int
afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
afr_fd_ctx_t *fd_ctx = NULL;
@@ -1427,11 +1510,13 @@ afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)
int_lock = &local->internal_lock;
priv = this->private;
- flock.l_start = int_lock->lk_flock.l_start;
- flock.l_len = int_lock->lk_flock.l_len;
- flock.l_type = int_lock->lk_flock.l_type;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+
+ flock.l_start = inodelk->flock.l_start;
+ flock.l_len = inodelk->flock.l_len;
+ flock.l_type = inodelk->flock.l_type;
- full_flock.l_type = int_lock->lk_flock.l_type;
+ full_flock.l_type = inodelk->flock.l_type;
initialize_inodelk_variables (frame, this);
@@ -1447,11 +1532,11 @@ afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)
local->op_errno = EINVAL;
int_lock->lock_op_errno = EINVAL;
+ afr_unlock (frame, this);
ret = -1;
goto out;
}
- afr_mark_fd_open_on (local, fd_ctx, priv->child_count);
call_count = internal_lock_count (frame, this);
int_lock->lk_call_count = call_count;
int_lock->lk_expected_count = call_count;
@@ -1466,11 +1551,11 @@ afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)
/* Send non-blocking inodelk calls only on up children
and where the fd has been opened */
for (i = 0; i < priv->child_count; i++) {
- if (!local->child_up[i] || !local->fd_open_on[i])
+ if (!local->child_up[i])
continue;
flock_use = &flock;
- if (!priv->eager_lock) {
+ if (!local->transaction.eager_lock_on) {
goto wind;
}
@@ -1506,7 +1591,7 @@ afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->finodelk,
- this->name, local->fd,
+ int_lock->domain, local->fd,
F_SETLK, flock_use, NULL);
if (!--call_count)
@@ -1528,7 +1613,7 @@ afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->inodelk,
- this->name, &local->loc,
+ int_lock->domain, &local->loc,
F_SETLK, &flock, NULL);
if (!--call_count)
@@ -1539,201 +1624,6 @@ out:
return ret;
}
-static int
-__is_lower_locked (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int count = 0;
- int i = 0;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- if (int_lock->lower_locked_nodes[i] & LOCKED_LOWER)
- count++;
- }
-
- return count;
-
-}
-
-static int
-__is_higher_locked (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int count = 0;
- int i = 0;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- if (int_lock->locked_nodes[i] & LOCKED_YES)
- count++;
- }
-
- return count;
-
-}
-
-static int
-afr_unlock_lower_entrylk (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- const char *basename = NULL;
- loc_t *loc = NULL;
- int call_count = 0;
- int i = -1;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
-
- basename = int_lock->lk_basename;
- if (int_lock->lk_loc)
- loc = int_lock->lk_loc;
-
- call_count = __is_lower_locked (frame, this);
- int_lock->lk_call_count = call_count;
-
- if (!call_count){
- gf_log (this->name, GF_LOG_TRACE,
- "No internal locks unlocked");
- int_lock->lock_cbk (frame, this);
- goto out;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (int_lock->lower_locked_nodes[i] & LOCKED_LOWER) {
- AFR_TRACE_ENTRYLK_IN (frame, this,
- AFR_ENTRYLK_NB_TRANSACTION,
- AFR_UNLOCK_OP, basename, i);
-
- STACK_WIND_COOKIE (frame, afr_unlock_entrylk_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- this->name,
- loc, basename,
- ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL);
-
- if (!--call_count)
- break;
-
- }
- }
-
-out:
- return 0;
-
-}
-
-
-static int
-afr_post_unlock_higher_cbk (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- local->transaction.done (frame, this);
- return 0;
-}
-
-static int
-afr_post_unlock_lower_cbk (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- loc_t *lower = NULL;
- loc_t *higher = NULL;
- const char *higher_name = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- lower = lower_path (&local->transaction.parent_loc,
- local->transaction.basename,
- &local->transaction.new_parent_loc,
- local->transaction.new_basename);
-
- higher = (lower == &local->transaction.parent_loc ?
- &local->transaction.new_parent_loc :
- &local->transaction.parent_loc);
-
- higher_name = (higher == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
- if (__is_higher_locked (frame, this)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "unlocking higher");
- int_lock->lk_basename = higher_name;
- int_lock->lk_loc = higher;
- int_lock->lock_cbk = afr_post_unlock_higher_cbk;
-
- afr_unlock_entrylk (frame, this);
- } else
- local->transaction.done (frame, this);
-
- return 0;
-}
-
-static int
-afr_rename_unlock (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- loc_t *lower = NULL;
- const char *lower_name = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- lower = lower_path (&local->transaction.parent_loc,
- local->transaction.basename,
- &local->transaction.new_parent_loc,
- local->transaction.new_basename);
-
- lower_name = (lower == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
- if (__is_lower_locked (frame, this)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "unlocking lower");
- int_lock->lk_basename = lower_name;
- int_lock->lk_loc = lower;
- int_lock->lock_cbk = afr_post_unlock_lower_cbk;
-
- afr_unlock_lower_entrylk (frame, this);
- } else
- afr_post_unlock_lower_cbk (frame, this);
-
- return 0;
-}
-
-static int
-afr_rename_transaction (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- return (local->transaction.type ==
- AFR_ENTRY_RENAME_TRANSACTION);
-
-}
-
int32_t
afr_unlock (call_frame_t *frame, xlator_t *this)
{
@@ -1745,10 +1635,8 @@ afr_unlock (call_frame_t *frame, xlator_t *this)
if (is_afr_lock_transaction (local))
afr_unlock_inodelk (frame, this);
else
- if (!afr_rename_transaction (frame, this))
- afr_unlock_entrylk (frame, this);
- else
- afr_rename_unlock (frame, this);
+ afr_unlock_entrylk (frame, this);
+
} else {
if (is_afr_lock_selfheal (local))
afr_unlock_inodelk (frame, this);
@@ -2249,29 +2137,38 @@ out:
return ret;
}
-void
-afr_lk_transfer_datalock (call_frame_t *dst, call_frame_t *src,
+int
+afr_lk_transfer_datalock (call_frame_t *dst, call_frame_t *src, char *dom,
unsigned int child_count)
{
- afr_local_t *dst_local = NULL;
- afr_local_t *src_local = NULL;
- afr_internal_lock_t *dst_lock = NULL;
- afr_internal_lock_t *src_lock = NULL;
+ afr_local_t *dst_local = NULL;
+ afr_local_t *src_local = NULL;
+ afr_internal_lock_t *dst_lock = NULL;
+ afr_internal_lock_t *src_lock = NULL;
+ afr_inodelk_t *dst_inodelk = NULL;
+ afr_inodelk_t *src_inodelk = NULL;
+ int ret = -1;
- dst_local = dst->local;
- dst_lock = &dst_local->internal_lock;
src_local = src->local;
src_lock = &src_local->internal_lock;
- if (src_lock->inode_locked_nodes) {
- memcpy (dst_lock->inode_locked_nodes,
- src_lock->inode_locked_nodes,
- sizeof (*dst_lock->inode_locked_nodes) * child_count);
- memset (src_lock->inode_locked_nodes, 0,
- sizeof (*src_lock->inode_locked_nodes) * child_count);
+ src_inodelk = afr_get_inodelk (src_lock, dom);
+ dst_local = dst->local;
+ dst_lock = &dst_local->internal_lock;
+ dst_inodelk = afr_get_inodelk (dst_lock, dom);
+ if (!dst_inodelk || !src_inodelk)
+ goto out;
+ if (src_inodelk->locked_nodes) {
+ memcpy (dst_inodelk->locked_nodes, src_inodelk->locked_nodes,
+ sizeof (*dst_inodelk->locked_nodes) * child_count);
+ memset (src_inodelk->locked_nodes, 0,
+ sizeof (*src_inodelk->locked_nodes) * child_count);
}
dst_lock->transaction_lk_type = src_lock->transaction_lk_type;
dst_lock->selfheal_lk_type = src_lock->selfheal_lk_type;
- dst_lock->inodelk_lock_count = src_lock->inodelk_lock_count;
- src_lock->inodelk_lock_count = 0;
+ dst_inodelk->lock_count = src_inodelk->lock_count;
+ src_inodelk->lock_count = 0;
+ ret = 0;
+out:
+ return ret;
}
diff --git a/xlators/cluster/afr/src/afr-mem-types.h b/xlators/cluster/afr/src/afr-mem-types.h
index 7e684801f..73594f265 100644
--- a/xlators/cluster/afr/src/afr-mem-types.h
+++ b/xlators/cluster/afr/src/afr-mem-types.h
@@ -41,6 +41,10 @@ enum gf_afr_mem_types_ {
gf_afr_mt_shd_event_t,
gf_afr_mt_time_t,
gf_afr_mt_pos_data_t,
+ gf_afr_mt_reply_t,
+ gf_afr_mt_stats_t,
+ gf_afr_mt_shd_crawl_event_t,
+ gf_afr_mt_uint64_t,
gf_afr_mt_end
};
#endif
diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c
index 35090d80e..643a5d692 100644
--- a/xlators/cluster/afr/src/afr-open.c
+++ b/xlators/cluster/afr/src/afr-open.c
@@ -249,188 +249,126 @@ out:
return 0;
}
-//NOTE: this function should be called with holding the lock on
-//fd to which fd_ctx belongs
-void
-afr_get_resumable_calls (xlator_t *this, afr_fd_ctx_t *fd_ctx,
- struct list_head *list)
-{
- afr_fd_paused_call_t *paused_call = NULL;
- afr_fd_paused_call_t *tmp = NULL;
- afr_local_t *call_local = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- gf_boolean_t call = _gf_false;
-
- priv = this->private;
- list_for_each_entry_safe (paused_call, tmp, &fd_ctx->paused_calls,
- call_list) {
- call = _gf_true;
- call_local = paused_call->frame->local;
- for (i = 0; i < priv->child_count; i++) {
- if (call_local->child_up[i] &&
- (fd_ctx->opened_on[i] == AFR_FD_OPENING))
- call = _gf_false;
- }
-
- if (call) {
- list_del_init (&paused_call->call_list);
- list_add (&paused_call->call_list, list);
- }
- }
-}
-
-void
-afr_resume_calls (xlator_t *this, struct list_head *list)
-{
- afr_fd_paused_call_t *paused_call = NULL;
- afr_fd_paused_call_t *tmp = NULL;
- afr_local_t *call_local = NULL;
-
- list_for_each_entry_safe (paused_call, tmp, list, call_list) {
- list_del_init (&paused_call->call_list);
- call_local = paused_call->frame->local;
- call_local->fop_call_continue (paused_call->frame, this);
- GF_FREE (paused_call);
- }
-}
-
int
afr_openfd_fix_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_fd_ctx_t *fd_ctx = NULL;
- int call_count = 0;
- int child_index = (long) cookie;
- struct list_head paused_calls = {0};
- gf_boolean_t fop_paused = _gf_false;
- fd_t *local_fd = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int call_count = 0;
+ int child_index = (long) cookie;
priv = this->private;
local = frame->local;
- fop_paused = local->fop_paused;
if (op_ret >= 0) {
- gf_log (this->name, GF_LOG_INFO, "fd for %s opened "
+ gf_log (this->name, GF_LOG_DEBUG, "fd for %s opened "
"successfully on subvolume %s", local->loc.path,
priv->children[child_index]->name);
+ } else {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to open %s "
+ "on subvolume %s", local->loc.path,
+ priv->children[child_index]->name);
}
- local_fd = fd_ref (local->fd);
- call_count = afr_frame_return (frame);
- //Note: Do not access any thing using the frame outside call_count 0
-
- //Note: No frame locking needed for this block of code
- fd_ctx = afr_fd_ctx_get (local_fd, this);
+ fd_ctx = afr_fd_ctx_get (local->fd, this);
if (!fd_ctx) {
gf_log (this->name, GF_LOG_WARNING,
- "failed to get fd context, %p", local_fd);
+ "failed to get fd context, %p", local->fd);
goto out;
}
- LOCK (&local_fd->lock);
+ LOCK (&local->fd->lock);
{
if (op_ret >= 0) {
fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
} else {
fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED;
}
- if (call_count == 0) {
- INIT_LIST_HEAD (&paused_calls);
- afr_get_resumable_calls (this, fd_ctx, &paused_calls);
- }
}
- UNLOCK (&local_fd->lock);
+ UNLOCK (&local->fd->lock);
out:
- if (call_count == 0) {
- afr_resume_calls (this, &paused_calls);
- //If the fop is paused then resume_calls will continue the fop
- if (fop_paused)
- goto done;
-
- if (local->fop_call_continue)
- local->fop_call_continue (frame, this);
- else
- AFR_STACK_DESTROY (frame);
- }
+ call_count = afr_frame_return (frame);
+ if (call_count == 0)
+ AFR_STACK_DESTROY (frame);
-done:
- fd_unref (local_fd);
return 0;
}
-int
-afr_fix_open (call_frame_t *frame, xlator_t *this, afr_fd_ctx_t *fd_ctx,
- int need_open_count, int *need_open)
+void
+afr_fix_open (xlator_t *this, fd_t *fd, size_t need_open_count, int *need_open)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- call_frame_t *open_frame = NULL;
- afr_local_t *open_local = NULL;
- int ret = -1;
- ia_type_t ia_type = IA_INVAL;
- int32_t op_errno = 0;
-
- GF_ASSERT (fd_ctx);
- GF_ASSERT (need_open_count > 0);
- GF_ASSERT (need_open);
+ afr_private_t *priv = NULL;
+ int i = 0;
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int32_t op_errno = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
- local = frame->local;
priv = this->private;
- if (!local->fop_call_continue) {
- open_frame = copy_frame (frame);
- if (!open_frame) {
- ret = -ENOMEM;
- goto out;
- }
- AFR_LOCAL_ALLOC_OR_GOTO (open_frame->local, out);
- open_local = open_frame->local;
- ret = afr_local_init (open_local, priv, &op_errno);
- if (ret < 0)
- goto out;
- loc_copy (&open_local->loc, &local->loc);
- open_local->fd = fd_ref (local->fd);
- } else {
- ret = 0;
- open_frame = frame;
- open_local = local;
+
+ if (!afr_is_fd_fixable (fd) || !need_open || !need_open_count)
+ goto out;
+
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx) {
+ ret = -1;
+ goto out;
}
- open_local->call_count = need_open_count;
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
- gf_log (this->name, GF_LOG_DEBUG, "need open count: %d",
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ local->loc.inode = inode_ref (fd->inode);
+ ret = loc_path (&local->loc, NULL);
+ if (ret < 0)
+ goto out;
+
+ local->fd = fd_ref (fd);
+ local->call_count = need_open_count;
+
+ gf_log (this->name, GF_LOG_DEBUG, "need open count: %zd",
need_open_count);
- ia_type = open_local->fd->inode->ia_type;
- GF_ASSERT (ia_type != IA_INVAL);
for (i = 0; i < priv->child_count; i++) {
if (!need_open[i])
continue;
- if (IA_IFDIR == ia_type) {
+
+ if (IA_IFDIR == fd->inode->ia_type) {
gf_log (this->name, GF_LOG_DEBUG,
"opening fd for dir %s on subvolume %s",
local->loc.path, priv->children[i]->name);
- STACK_WIND_COOKIE (open_frame, afr_openfd_fix_open_cbk,
+ STACK_WIND_COOKIE (frame, afr_openfd_fix_open_cbk,
(void*) (long) i,
priv->children[i],
priv->children[i]->fops->opendir,
- &open_local->loc, open_local->fd,
+ &local->loc, local->fd,
NULL);
} else {
gf_log (this->name, GF_LOG_DEBUG,
"opening fd for file %s on subvolume %s",
local->loc.path, priv->children[i]->name);
- STACK_WIND_COOKIE (open_frame, afr_openfd_fix_open_cbk,
+ STACK_WIND_COOKIE (frame, afr_openfd_fix_open_cbk,
(void *)(long) i,
priv->children[i],
priv->children[i]->fops->open,
- &open_local->loc, fd_ctx->flags,
- open_local->fd, NULL);
+ &local->loc,
+ fd_ctx->flags & (~O_TRUNC),
+ local->fd, NULL);
}
}
@@ -438,8 +376,7 @@ afr_fix_open (call_frame_t *frame, xlator_t *this, afr_fd_ctx_t *fd_ctx,
ret = 0;
out:
if (op_errno)
- ret = -op_errno;
- if (ret && open_frame)
- AFR_STACK_DESTROY (open_frame);
- return ret;
+ ret = -1; //For handling ALLOC_OR_GOTO
+ if (ret && frame)
+ AFR_STACK_DESTROY (frame);
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-algorithm.c b/xlators/cluster/afr/src/afr-self-heal-algorithm.c
index 18edca8c1..83846f152 100644
--- a/xlators/cluster/afr/src/afr-self-heal-algorithm.c
+++ b/xlators/cluster/afr/src/afr-self-heal-algorithm.c
@@ -63,8 +63,7 @@ sh_private_cleanup (call_frame_t *frame, xlator_t *this)
sh = &local->self_heal;
sh_priv = sh->private;
- if (sh_priv)
- GF_FREE (sh_priv);
+ GF_FREE (sh_priv);
}
static int
@@ -101,10 +100,10 @@ sh_loop_driver_done (call_frame_t *sh_frame, xlator_t *this,
}
sh_private_cleanup (sh_frame, this);
- if (sh->op_failed) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
GF_ASSERT (!last_loop_frame);
//loop_finish should have happened and the old_loop should be NULL
- gf_log (this->name, GF_LOG_INFO,
+ gf_log (this->name, GF_LOG_DEBUG,
"self-heal aborting on %s",
local->loc.path);
@@ -112,10 +111,10 @@ sh_loop_driver_done (call_frame_t *sh_frame, xlator_t *this,
} else {
GF_ASSERT (last_loop_frame);
if (diff_blocks == total_blocks) {
- gf_log (this->name, GF_LOG_INFO, "full self-heal "
+ gf_log (this->name, GF_LOG_DEBUG, "full self-heal "
"completed on %s",local->loc.path);
} else {
- gf_log (this->name, GF_LOG_INFO,
+ gf_log (this->name, GF_LOG_DEBUG,
"diff self-heal on %s: completed. "
"(%d blocks of %d were different (%.2f%%))",