summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBala.FA <barumuga@redhat.com>2013-08-09 17:21:56 +0530
committerVijay Bellur <vbellur@redhat.com>2013-08-14 13:05:50 -0700
commitfc82ae5ddf3b1821095de4188fead9a25bc24191 (patch)
treecd6e0f5c8ca83ec39be3871419b91c66d1c0cc04
parent1d1daa234eac97554103da16a7d6090bc25e5294 (diff)
log: add comments to rsyslog gluster configuration file
Comments are added to rsyslog gluster configuration files to give clear meanings and example email alerting. Change-Id: I23577dc3b39bca89696c46ab6252dc5673d0803f BUG: 928648 Signed-off-by: Bala.FA <barumuga@redhat.com> Reviewed-on: http://review.gluster.org/5621 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
-rw-r--r--extras/gluster-rsyslog-5.8.conf38
-rw-r--r--extras/gluster-rsyslog-7.2.conf34
2 files changed, 71 insertions, 1 deletions
diff --git a/extras/gluster-rsyslog-5.8.conf b/extras/gluster-rsyslog-5.8.conf
index da6232bc075..2519999bcac 100644
--- a/extras/gluster-rsyslog-5.8.conf
+++ b/extras/gluster-rsyslog-5.8.conf
@@ -1,15 +1,51 @@
##### gluster.conf #####
+
+#
+## If you want to log every message to the log file instead of
+## intelligently suppressing repeated messages, set off to
+## RepeatedMsgReduction. This change requires rsyslog restart
+## (eg. run 'service rsyslog restart')
+#
+#$RepeatedMsgReduction off
$RepeatedMsgReduction on
+#
+## The mmcount module provides the capability to count log messages by
+## severity or json property of given app-name. The count value is added
+## into the log message as json property named '$msgid'
+#
$ModLoad mmcount
-$mmcountKey gf_code
+$mmcountKey gf_code # start counting value of gf_code
$template Glusterfsd_dynLogFile,"/var/log/glusterfs/bricks/%app-name%.log"
$template Gluster_dynLogFile,"/var/log/glusterfs/%app-name%.log"
$template GLFS_Template,"%msgid%/%syslogfacility-text:::uppercase%/%syslogseverity-text:::uppercase% [%TIMESTAMP:::date-rfc3339%] %msg:::sp-if-no-1st-sp%%msg:::drop-last-lf%\n"
+#
+## Pass logs to mmcount if app-name is 'gluster'
+#
if $app-name contains 'gluster' then :mmcount:
+
if $app-name contains 'glusterfsd' then ?Glusterfsd_dynLogFile;GLFS_Template
if $app-name contains 'gluster' and not ( $app-name contains 'glusterfsd' ) then ?Gluster_dynLogFile;GLFS_Template
+
+#
+## Sample configuration to send a email alert for every 50th mmcount
+#
+#$ModLoad ommail
+#$ActionMailSMTPServer smtp.example.com
+#$ActionMailFrom rsyslog@example.com
+#$ActionMailTo glusteradmin@example.com
+#$template mailSubject,"50th message of gf_code=9999 on %hostname%"
+#$template mailBody,"RSYSLOG Alert\r\nmsg='%msg%'"
+#$ActionMailSubject mailSubject
+#$ActionExecOnlyOnceEveryInterval 30
+#if $app-name == 'glusterfsd' and $msgid != 0 and $msgid % 50 == 0 \
+#then :ommail:;RSYSLOG_SyslogProtocol23Format
+#
+
+#
+## discard logs where app-name is 'gluster' as we processed already
+#
if $app-name contains 'gluster' then ~
diff --git a/extras/gluster-rsyslog-7.2.conf b/extras/gluster-rsyslog-7.2.conf
index 744acbc9ef0..8b2841543f4 100644
--- a/extras/gluster-rsyslog-7.2.conf
+++ b/extras/gluster-rsyslog-7.2.conf
@@ -1,9 +1,28 @@
##### gluster.conf #####
+#
+## If you want to log every message to the log file instead of
+## intelligently suppressing repeated messages, set off to
+## RepeatedMsgReduction. This change requires rsyslog restart
+## (eg. run 'service rsyslog restart')
+#
+#$RepeatedMsgReduction off
$RepeatedMsgReduction on
$ModLoad mmjsonparse
*.* :mmjsonparse:
+#
+## The mmcount module provides the capability to count log messages by
+## severity or json property of given app-name. The count value is added
+## into the log message as json property named 'mmcount'
+##
+## More info at http://www.rsyslog.com/doc/mmcount.html
+#
+#module(load="mmcount")
+#action(type="mmcount" appname="glusterd" key="!gf_code") # count each value of gf_code of appname glusterd
+#action(type="mmcount" appname="glusterfsd" key="!gf_code") # count each value of gf_code of appname glusterfsd
+#action(type="mmcount" appname="glusterfs" key="!gf_code") # count each value of gf_code of appname glusterfs
+
template (name="Glusterfsd_dynLogFile" type="string" string="/var/log/glusterfs/bricks/%app-name%.log")
template (name="Gluster_dynLogFile" type="string" string="/var/log/glusterfs/%app-name%.log")
@@ -40,3 +59,18 @@ if $app-name contains 'gluster' then {
Template="GLFS_template")
stop
}
+
+#
+## send email for every 50th mmcount
+#$ModLoad ommail
+#if $app-name == 'glusterfsd' and $!mmcount <> 0 and $!mmcount % 50 == 0 then {
+# $ActionMailSMTPServer smtp.example.com
+# $ActionMailFrom rsyslog@example.com
+# $ActionMailTo glusteradmin@example.com
+# $template mailSubject,"50th message of gf_code=9999 on %hostname%"
+# $template mailBody,"RSYSLOG Alert\r\nmsg='%msg%'"
+# $ActionMailSubject mailSubject
+# $ActionExecOnlyOnceEveryInterval 30
+# :ommail:;RSYSLOG_SyslogProtocol23Format
+#}
+#
width='100%'> -rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.h142
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c2120
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c3596
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-metadata.c1225
-rw-r--r--xlators/cluster/afr/src/afr-self-heal.h51
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.c1787
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.h65
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c2555
-rw-r--r--xlators/cluster/afr/src/afr-transaction.h53
-rw-r--r--xlators/cluster/afr/src/afr.c3162
-rw-r--r--xlators/cluster/afr/src/afr.h1483
-rw-r--r--xlators/cluster/afr/src/pump.c2641
-rw-r--r--xlators/cluster/afr/src/pump.h78
-rw-r--r--xlators/cluster/dht/src/Makefile.am32
-rw-r--r--xlators/cluster/dht/src/dht-common.c6476
-rw-r--r--xlators/cluster/dht/src/dht-common.h860
-rw-r--r--xlators/cluster/dht/src/dht-diskusage.c503
-rw-r--r--xlators/cluster/dht/src/dht-hashfn.c147
-rw-r--r--xlators/cluster/dht/src/dht-helper.c1150
-rw-r--r--xlators/cluster/dht/src/dht-inode-read.c1139
-rw-r--r--xlators/cluster/dht/src/dht-inode-write.c1013
-rw-r--r--xlators/cluster/dht/src/dht-layout.c1041
-rw-r--r--xlators/cluster/dht/src/dht-linkfile.c426
-rw-r--r--xlators/cluster/dht/src/dht-mem-types.h35
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c1815
-rw-r--r--xlators/cluster/dht/src/dht-rename.c1161
-rw-r--r--xlators/cluster/dht/src/dht-selfheal.c1237
-rw-r--r--xlators/cluster/dht/src/dht-shared.c758
-rw-r--r--xlators/cluster/dht/src/dht.c458
-rw-r--r--xlators/cluster/dht/src/nufa.c998
-rw-r--r--xlators/cluster/dht/src/switch.c904
-rw-r--r--xlators/cluster/ha/src/Makefile.am7
-rw-r--r--xlators/cluster/ha/src/ha-helpers.c35
-rw-r--r--xlators/cluster/ha/src/ha-mem-types.h26
-rw-r--r--xlators/cluster/ha/src/ha.c312
-rw-r--r--xlators/cluster/ha/src/ha.h32
-rw-r--r--xlators/cluster/map/src/Makefile.am7
-rw-r--r--xlators/cluster/map/src/map-helper.c29
-rw-r--r--xlators/cluster/map/src/map-mem-types.h24
-rw-r--r--xlators/cluster/map/src/map.c215
-rw-r--r--xlators/cluster/map/src/map.h25
-rw-r--r--xlators/cluster/stripe/src/Makefile.am14
-rw-r--r--xlators/cluster/stripe/src/stripe-helpers.c675
-rw-r--r--xlators/cluster/stripe/src/stripe-mem-types.h31
-rw-r--r--xlators/cluster/stripe/src/stripe.c6255
-rw-r--r--xlators/cluster/stripe/src/stripe.h216
-rw-r--r--xlators/cluster/unify/src/Makefile.am16
-rw-r--r--xlators/cluster/unify/src/unify-self-heal.c1227
-rw-r--r--xlators/cluster/unify/src/unify.c4539
-rw-r--r--xlators/cluster/unify/src/unify.h145
-rw-r--r--xlators/debug/error-gen/src/Makefile.am9
-rw-r--r--xlators/debug/error-gen/src/error-gen-mem-types.h20
-rw-r--r--xlators/debug/error-gen/src/error-gen.c2879
-rw-r--r--xlators/debug/error-gen/src/error-gen.h48
-rw-r--r--xlators/debug/io-stats/src/Makefile.am9
-rw-r--r--xlators/debug/io-stats/src/io-stats-mem-types.h24
-rw-r--r--xlators/debug/io-stats/src/io-stats.c3327
-rw-r--r--xlators/debug/trace/src/Makefile.am8
-rw-r--r--xlators/debug/trace/src/trace-mem-types.h21
-rw-r--r--xlators/debug/trace/src/trace.c5486
-rw-r--r--xlators/debug/trace/src/trace.h98
-rw-r--r--xlators/encryption/Makefile.am2
-rw-r--r--xlators/encryption/crypt/Makefile.am (renamed from xlators/cluster/unify/Makefile.am)0
-rw-r--r--xlators/encryption/crypt/src/Makefile.am24
-rw-r--r--xlators/encryption/crypt/src/atom.c962
-rw-r--r--xlators/encryption/crypt/src/crypt-common.h141
-rw-r--r--xlators/encryption/crypt/src/crypt-mem-types.h43
-rw-r--r--xlators/encryption/crypt/src/crypt.c4498
-rw-r--r--xlators/encryption/crypt/src/crypt.h899
-rw-r--r--xlators/encryption/crypt/src/data.c769
-rw-r--r--xlators/encryption/crypt/src/keys.c302
-rw-r--r--xlators/encryption/crypt/src/metadata.c605
-rw-r--r--xlators/encryption/crypt/src/metadata.h74
-rw-r--r--xlators/encryption/rot-13/src/Makefile.am7
-rw-r--r--xlators/encryption/rot-13/src/rot-13.c95
-rw-r--r--xlators/encryption/rot-13/src/rot-13.h20
-rw-r--r--xlators/features/Makefile.am5
-rw-r--r--xlators/features/changelog/Makefile.am3
-rw-r--r--xlators/features/changelog/lib/Makefile.am3
-rw-r--r--xlators/features/changelog/lib/examples/c/get-changes.c87
-rw-r--r--xlators/features/changelog/lib/examples/python/changes.py32
-rw-r--r--xlators/features/changelog/lib/examples/python/libgfchangelog.py64
-rw-r--r--xlators/features/changelog/lib/src/Makefile.am37
-rw-r--r--xlators/features/changelog/lib/src/changelog.h31
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-helpers.c180
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-helpers.h97
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-process.c571
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog.c515
-rw-r--r--xlators/features/changelog/src/Makefile.am19
-rw-r--r--xlators/features/changelog/src/changelog-encoders.c176
-rw-r--r--xlators/features/changelog/src/changelog-encoders.h46
-rw-r--r--xlators/features/changelog/src/changelog-helpers.c693
-rw-r--r--xlators/features/changelog/src/changelog-helpers.h395
-rw-r--r--xlators/features/changelog/src/changelog-mem-types.h29
-rw-r--r--xlators/features/changelog/src/changelog-misc.h101
-rw-r--r--xlators/features/changelog/src/changelog-notifier.c314
-rw-r--r--xlators/features/changelog/src/changelog-notifier.h19
-rw-r--r--xlators/features/changelog/src/changelog-rt.c72
-rw-r--r--xlators/features/changelog/src/changelog-rt.h33
-rw-r--r--xlators/features/changelog/src/changelog.c1477
-rw-r--r--xlators/features/compress/Makefile.am3
-rw-r--r--xlators/features/compress/src/Makefile.am17
-rw-r--r--xlators/features/compress/src/cdc-helper.c547
-rw-r--r--xlators/features/compress/src/cdc-mem-types.h22
-rw-r--r--xlators/features/compress/src/cdc.c342
-rw-r--r--xlators/features/compress/src/cdc.h107
-rw-r--r--xlators/features/filter/src/Makefile.am9
-rw-r--r--xlators/features/filter/src/filter-mem-types.h20
-rw-r--r--xlators/features/filter/src/filter.c193
-rw-r--r--xlators/features/gfid-access/Makefile.am (renamed from xlators/performance/stat-prefetch/Makefile.am)0
-rw-r--r--xlators/features/gfid-access/src/Makefile.am15
-rw-r--r--xlators/features/gfid-access/src/gfid-access-mem-types.h23
-rw-r--r--xlators/features/gfid-access/src/gfid-access.c1172
-rw-r--r--xlators/features/gfid-access/src/gfid-access.h128
-rw-r--r--xlators/features/glupy/Makefile.am3
-rw-r--r--xlators/features/glupy/doc/README.md44
-rw-r--r--xlators/features/glupy/doc/TESTING9
-rw-r--r--xlators/features/glupy/doc/test.vol10
-rw-r--r--xlators/features/glupy/src/Makefile.am20
-rw-r--r--xlators/features/glupy/src/debug-trace.py774
-rw-r--r--xlators/features/glupy/src/glupy.c2470
-rw-r--r--xlators/features/glupy/src/glupy.h69
-rw-r--r--xlators/features/glupy/src/gluster.py841
-rw-r--r--xlators/features/glupy/src/helloworld.py19
-rw-r--r--xlators/features/glupy/src/negative.py92
-rw-r--r--xlators/features/index/Makefile.am3
-rw-r--r--xlators/features/index/src/Makefile.am17
-rw-r--r--xlators/features/index/src/index-mem-types.h22
-rw-r--r--xlators/features/index/src/index.c1489
-rw-r--r--xlators/features/index/src/index.h73
-rw-r--r--xlators/features/locks/src/Makefile.am17
-rw-r--r--xlators/features/locks/src/clear.c424
-rw-r--r--xlators/features/locks/src/clear.h76
-rw-r--r--xlators/features/locks/src/common.c1365
-rw-r--r--xlators/features/locks/src/common.h165
-rw-r--r--xlators/features/locks/src/entrylk.c939
-rw-r--r--xlators/features/locks/src/inodelk.c929
-rw-r--r--xlators/features/locks/src/locks-mem-types.h29
-rw-r--r--xlators/features/locks/src/locks.h161
-rw-r--r--xlators/features/locks/src/posix.c3016
-rw-r--r--xlators/features/locks/src/reservelk.c443
-rw-r--r--xlators/features/locks/tests/unit-test.c22
-rw-r--r--xlators/features/mac-compat/Makefile.am (renamed from xlators/storage/bdb/Makefile.am)0
-rw-r--r--xlators/features/mac-compat/src/Makefile.am14
-rw-r--r--xlators/features/mac-compat/src/mac-compat.c237
-rw-r--r--xlators/features/marker/Makefile.am3
-rw-r--r--xlators/features/marker/src/Makefile.am17
-rw-r--r--xlators/features/marker/src/marker-common.c69
-rw-r--r--xlators/features/marker/src/marker-common.h27
-rw-r--r--xlators/features/marker/src/marker-mem-types.h25
-rw-r--r--xlators/features/marker/src/marker-quota-helper.c414
-rw-r--r--xlators/features/marker/src/marker-quota-helper.h76
-rw-r--r--xlators/features/marker/src/marker-quota.c2520
-rw-r--r--xlators/features/marker/src/marker-quota.h130
-rw-r--r--xlators/features/marker/src/marker.c2862
-rw-r--r--xlators/features/marker/src/marker.h138
-rw-r--r--xlators/features/path-convertor/src/Makefile.am7
-rw-r--r--xlators/features/path-convertor/src/path-mem-types.h22
-rw-r--r--xlators/features/path-convertor/src/path.c184
-rw-r--r--xlators/features/protect/Makefile.am3
-rw-r--r--xlators/features/protect/src/Makefile.am21
-rw-r--r--xlators/features/protect/src/prot_client.c215
-rw-r--r--xlators/features/protect/src/prot_dht.c168
-rw-r--r--xlators/features/protect/src/prot_server.c51
-rw-r--r--xlators/features/qemu-block/Makefile.am1
-rw-r--r--xlators/features/qemu-block/src/Makefile.am155
-rw-r--r--xlators/features/qemu-block/src/bdrv-xlator.c397
-rw-r--r--xlators/features/qemu-block/src/bh-syncop.c48
-rw-r--r--xlators/features/qemu-block/src/clock-timer.c60
-rw-r--r--xlators/features/qemu-block/src/coroutine-synctask.c116
-rw-r--r--xlators/features/qemu-block/src/monitor-logging.c50
-rw-r--r--xlators/features/qemu-block/src/qb-coroutines.c662
-rw-r--r--xlators/features/qemu-block/src/qb-coroutines.h30
-rw-r--r--xlators/features/qemu-block/src/qemu-block-memory-types.h25
-rw-r--r--xlators/features/qemu-block/src/qemu-block.c1140
-rw-r--r--xlators/features/qemu-block/src/qemu-block.h109
-rw-r--r--xlators/features/quiesce/Makefile.am3
-rw-r--r--xlators/features/quiesce/src/Makefile.am15
-rw-r--r--xlators/features/quiesce/src/quiesce-mem-types.h20
-rw-r--r--xlators/features/quiesce/src/quiesce.c2610
-rw-r--r--xlators/features/quiesce/src/quiesce.h51
-rw-r--r--xlators/features/quota/src/Makefile.am12
-rw-r--r--xlators/features/quota/src/quota-mem-types.h27
-rw-r--r--xlators/features/quota/src/quota.c3915
-rw-r--r--xlators/features/quota/src/quota.h151
-rw-r--r--xlators/features/read-only/Makefile.am3
-rw-r--r--xlators/features/read-only/src/Makefile.am22
-rw-r--r--xlators/features/read-only/src/read-only-common.c239
-rw-r--r--xlators/features/read-only/src/read-only-common.h115
-rw-r--r--xlators/features/read-only/src/read-only.c77
-rw-r--r--xlators/features/read-only/src/worm.c89
-rw-r--r--xlators/features/trash/src/Makefile.am9
-rw-r--r--xlators/features/trash/src/trash-mem-types.h22
-rw-r--r--xlators/features/trash/src/trash.c2005
-rw-r--r--xlators/features/trash/src/trash.h79
-rw-r--r--xlators/lib/src/libxlator.c470
-rw-r--r--xlators/lib/src/libxlator.h153
-rw-r--r--xlators/meta/src/Makefile.am5
-rw-r--r--xlators/meta/src/meta-mem-types.h25
-rw-r--r--xlators/meta/src/meta.c88
-rw-r--r--xlators/meta/src/meta.h20
-rw-r--r--xlators/meta/src/misc.c20
-rw-r--r--xlators/meta/src/misc.h20
-rw-r--r--xlators/meta/src/tree.c43
-rw-r--r--xlators/meta/src/tree.h20
-rw-r--r--xlators/meta/src/view.c20
-rw-r--r--xlators/meta/src/view.h20
-rw-r--r--xlators/mgmt/Makefile.am3
-rw-r--r--xlators/mgmt/glusterd/Makefile.am3
-rw-r--r--xlators/mgmt/glusterd/src/Makefile.am50
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-brick-ops.c1953
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-geo-rep.c4236
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handler.c4237
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handshake.c1366
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-hooks.c531
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-hooks.h89
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-locks.c637
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-locks.h51
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-log-ops.c271
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mem-types.h75
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c924
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt.c1893
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt.h45
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mountbroker.c693
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mountbroker.h42
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.c6243
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.h299
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.c492
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.h54
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-quota.c839
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rebalance.c749
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-replace-brick.c2024
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rpc-ops.c1974
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-sm.c1109
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-sm.h217
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot.c5590
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.c3564
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.h164
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-syncop.c1639
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-syncop.h71
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c8894
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.h634
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c4065
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.h176
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-ops.c2225
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c1452
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.c1597
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h952
-rw-r--r--xlators/mount/fuse/src/Makefile.am39
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.c5312
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.h537
-rw-r--r--xlators/mount/fuse/src/fuse-helpers.c605
-rw-r--r--xlators/mount/fuse/src/fuse-mem-types.h28
-rw-r--r--xlators/mount/fuse/src/fuse-resolve.c724
-rwxr-xr-xxlators/mount/fuse/utils/mount.glusterfs.in499
-rwxr-xr-xxlators/mount/fuse/utils/mount_glusterfs.in28
-rw-r--r--xlators/nfs/Makefile.am3
-rw-r--r--xlators/nfs/server/Makefile.am3
-rw-r--r--xlators/nfs/server/src/Makefile.am24
-rw-r--r--xlators/nfs/server/src/acl3.c708
-rw-r--r--xlators/nfs/server/src/acl3.h31
-rw-r--r--xlators/nfs/server/src/mount3.c2759
-rw-r--r--xlators/nfs/server/src/mount3.h131
-rw-r--r--xlators/nfs/server/src/mount3udp_svc.c189
-rw-r--r--xlators/nfs/server/src/nfs-common.c465
-rw-r--r--xlators/nfs/server/src/nfs-common.h81
-rw-r--r--xlators/nfs/server/src/nfs-fops.c1661
-rw-r--r--xlators/nfs/server/src/nfs-fops.h248
-rw-r--r--xlators/nfs/server/src/nfs-generics.c345
-rw-r--r--xlators/nfs/server/src/nfs-generics.h168
-rw-r--r--xlators/nfs/server/src/nfs-inodes.c608
-rw-r--r--xlators/nfs/server/src/nfs-inodes.h76
-rw-r--r--xlators/nfs/server/src/nfs-mem-types.h53
-rw-r--r--xlators/nfs/server/src/nfs.c1825
-rw-r--r--xlators/nfs/server/src/nfs.h135
-rw-r--r--xlators/nfs/server/src/nfs3-fh.c188
-rw-r--r--xlators/nfs/server/src/nfs3-fh.h103
-rw-r--r--xlators/nfs/server/src/nfs3-helpers.c3881
-rw-r--r--xlators/nfs/server/src/nfs3-helpers.h337
-rw-r--r--xlators/nfs/server/src/nfs3.c5630
-rw-r--r--xlators/nfs/server/src/nfs3.h285
-rw-r--r--xlators/nfs/server/src/nlm4.c2525
-rw-r--r--xlators/nfs/server/src/nlm4.h77
-rw-r--r--xlators/nfs/server/src/nlmcbk_svc.c117
-rw-r--r--xlators/performance/Makefile.am2
-rw-r--r--xlators/performance/io-cache/src/Makefile.am10
-rw-r--r--xlators/performance/io-cache/src/io-cache.c2588
-rw-r--r--xlators/performance/io-cache/src/io-cache.h345
-rw-r--r--xlators/performance/io-cache/src/ioc-inode.c297
-rw-r--r--xlators/performance/io-cache/src/ioc-mem-types.h29
-rw-r--r--xlators/performance/io-cache/src/page.c1333
-rw-r--r--xlators/performance/io-threads/src/Makefile.am9
-rw-r--r--xlators/performance/io-threads/src/io-threads.c2529
-rw-r--r--xlators/performance/io-threads/src/io-threads.h178
-rw-r--r--xlators/performance/io-threads/src/iot-mem-types.h22
-rw-r--r--xlators/performance/md-cache/Makefile.am1
-rw-r--r--xlators/performance/md-cache/src/Makefile.am25
-rw-r--r--xlators/performance/md-cache/src/md-cache-mem-types.h24
-rw-r--r--xlators/performance/md-cache/src/md-cache.c2303
-rw-r--r--xlators/performance/open-behind/Makefile.am1
-rw-r--r--xlators/performance/open-behind/src/Makefile.am15
-rw-r--r--xlators/performance/open-behind/src/open-behind-mem-types.h21
-rw-r--r--xlators/performance/open-behind/src/open-behind.c1001
-rw-r--r--xlators/performance/quick-read/src/Makefile.am9
-rw-r--r--xlators/performance/quick-read/src/quick-read-mem-types.h27
-rw-r--r--xlators/performance/quick-read/src/quick-read.c2687
-rw-r--r--xlators/performance/quick-read/src/quick-read.h81
-rw-r--r--xlators/performance/read-ahead/src/Makefile.am9
-rw-r--r--xlators/performance/read-ahead/src/page.c696
-rw-r--r--xlators/performance/read-ahead/src/read-ahead-mem-types.h26
-rw-r--r--xlators/performance/read-ahead/src/read-ahead.c1590
-rw-r--r--xlators/performance/read-ahead/src/read-ahead.h148
-rw-r--r--xlators/performance/readdir-ahead/Makefile.am3
-rw-r--r--xlators/performance/readdir-ahead/src/Makefile.am15
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h24
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead.c560
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead.h46
-rw-r--r--xlators/performance/stat-prefetch/src/Makefile.am14
-rw-r--r--xlators/performance/stat-prefetch/src/stat-prefetch.c1754
-rw-r--r--xlators/performance/stat-prefetch/src/stat-prefetch.h77
-rw-r--r--xlators/performance/symlink-cache/src/Makefile.am7
-rw-r--r--xlators/performance/symlink-cache/src/symlink-cache.c76
-rw-r--r--xlators/performance/write-behind/src/Makefile.am9
-rw-r--r--xlators/performance/write-behind/src/write-behind-mem-types.h26
-rw-r--r--xlators/performance/write-behind/src/write-behind.c3521
-rw-r--r--xlators/playground/Makefile.am2
-rw-r--r--xlators/playground/template/Makefile.am2
-rw-r--r--xlators/playground/template/src/Makefile.am16
-rw-r--r--xlators/playground/template/src/template.c49
-rw-r--r--xlators/playground/template/src/template.h24
-rw-r--r--xlators/protocol/Makefile.am4
-rw-r--r--xlators/protocol/auth/Makefile.am1
-rw-r--r--xlators/protocol/auth/addr/Makefile.am1
-rw-r--r--xlators/protocol/auth/addr/src/Makefile.am14
-rw-r--r--xlators/protocol/auth/addr/src/addr.c229
-rw-r--r--xlators/protocol/auth/login/Makefile.am1
-rw-r--r--xlators/protocol/auth/login/src/Makefile.am12
-rw-r--r--xlators/protocol/auth/login/src/login.c128
-rw-r--r--xlators/protocol/client/Makefile.am2
-rw-r--r--xlators/protocol/client/src/Makefile.am18
-rw-r--r--xlators/protocol/client/src/client-callback.c56
-rw-r--r--xlators/protocol/client/src/client-handshake.c1960
-rw-r--r--xlators/protocol/client/src/client-helpers.c349
-rw-r--r--xlators/protocol/client/src/client-lk.c573
-rw-r--r--xlators/protocol/client/src/client-mem-types.h25
-rw-r--r--xlators/protocol/client/src/client-protocol.c6451
-rw-r--r--xlators/protocol/client/src/client-protocol.h171
-rw-r--r--xlators/protocol/client/src/client-rpc-fops.c6203
-rw-r--r--xlators/protocol/client/src/client.c2867
-rw-r--r--xlators/protocol/client/src/client.h257
-rw-r--r--xlators/protocol/client/src/saved-frames.c191
-rw-r--r--xlators/protocol/client/src/saved-frames.h79
-rw-r--r--xlators/protocol/server/Makefile.am2
-rw-r--r--xlators/protocol/server/src/Makefile.am28
-rw-r--r--xlators/protocol/server/src/authenticate.c253
-rw-r--r--xlators/protocol/server/src/authenticate.h51
-rw-r--r--xlators/protocol/server/src/server-dentry.c414
-rw-r--r--xlators/protocol/server/src/server-handshake.c781
-rw-r--r--xlators/protocol/server/src/server-helpers.c1831
-rw-r--r--xlators/protocol/server/src/server-helpers.h101
-rw-r--r--xlators/protocol/server/src/server-mem-types.h30
-rw-r--r--xlators/protocol/server/src/server-protocol.c7789
-rw-r--r--xlators/protocol/server/src/server-protocol.h155
-rw-r--r--xlators/protocol/server/src/server-resolve.c598
-rw-r--r--xlators/protocol/server/src/server-rpc-fops.c6179
-rw-r--r--xlators/protocol/server/src/server.c1214
-rw-r--r--xlators/protocol/server/src/server.h196
-rw-r--r--xlators/storage/Makefile.am8
-rw-r--r--xlators/storage/bd/Makefile.am3
-rw-r--r--xlators/storage/bd/src/Makefile.am20
-rw-r--r--xlators/storage/bd/src/bd-aio.c527
-rw-r--r--xlators/storage/bd/src/bd-aio.h41
-rw-r--r--xlators/storage/bd/src/bd-helper.c783
-rw-r--r--xlators/storage/bd/src/bd.c2404
-rw-r--r--xlators/storage/bd/src/bd.h178
-rw-r--r--xlators/storage/bdb/src/Makefile.am18
-rw-r--r--xlators/storage/bdb/src/bctx.c341
-rw-r--r--xlators/storage/bdb/src/bdb-ll.c1460
-rw-r--r--xlators/storage/bdb/src/bdb.c3585
-rw-r--r--xlators/storage/bdb/src/bdb.h530
-rw-r--r--xlators/storage/posix/src/Makefile.am18
-rw-r--r--xlators/storage/posix/src/posix-aio.c569
-rw-r--r--xlators/storage/posix/src/posix-aio.h39
-rw-r--r--xlators/storage/posix/src/posix-handle.c744
-rw-r--r--xlators/storage/posix/src/posix-handle.h143
-rw-r--r--xlators/storage/posix/src/posix-helpers.c1391
-rw-r--r--xlators/storage/posix/src/posix-mem-types.h27
-rw-r--r--xlators/storage/posix/src/posix.c5503
-rw-r--r--xlators/storage/posix/src/posix.h158
-rw-r--r--xlators/system/Makefile.am1
-rw-r--r--xlators/system/posix-acl/Makefile.am1
-rw-r--r--xlators/system/posix-acl/src/Makefile.am23
-rw-r--r--xlators/system/posix-acl/src/posix-acl-xattr.c180
-rw-r--r--xlators/system/posix-acl/src/posix-acl-xattr.h26
-rw-r--r--xlators/system/posix-acl/src/posix-acl.c2183
-rw-r--r--xlators/system/posix-acl/src/posix-acl.h30
420 files changed, 240467 insertions, 70250 deletions
diff --git a/xlators/Makefile.am b/xlators/Makefile.am
index 2abb52194..f60fa85ce 100644
--- a/xlators/Makefile.am
+++ b/xlators/Makefile.am
@@ -1,3 +1,4 @@
-SUBDIRS = cluster storage protocol performance debug features encryption mount
+SUBDIRS = cluster storage protocol performance debug features encryption mount nfs mgmt system \
+ playground
-CLEANFILES =
+CLEANFILES =
diff --git a/xlators/bindings/python/src/Makefile.am b/xlators/bindings/python/src/Makefile.am
index c0b9141c6..90370d861 100644
--- a/xlators/bindings/python/src/Makefile.am
+++ b/xlators/bindings/python/src/Makefile.am
@@ -9,7 +9,7 @@ pythondir = $(xlatordir)/python
python_so_SOURCES = python.c
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall \
+AM_CFLAGS = -fPIC $(GF_CPPFLAGS) -Wall \
-I$(top_srcdir)/libglusterfs/src -shared -nostartfiles \
$(PYTHON_CPPLAGS) -DGLUSTER_PYTHON_PATH=\"$(pythondir)\"
diff --git a/xlators/bindings/python/src/gluster.py b/xlators/bindings/python/src/gluster.py
index ee0eb1310..337c983ec 100644
--- a/xlators/bindings/python/src/gluster.py
+++ b/xlators/bindings/python/src/gluster.py
@@ -1,19 +1,12 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
+
+# Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
from ctypes import *
from glustertypes import *
from glusterstack import *
diff --git a/xlators/bindings/python/src/glusterstack.py b/xlators/bindings/python/src/glusterstack.py
index ba24c8165..0c071ae98 100644
--- a/xlators/bindings/python/src/glusterstack.py
+++ b/xlators/bindings/python/src/glusterstack.py
@@ -1,19 +1,12 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
+
+# Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
from ctypes import *
from glustertypes import *
diff --git a/xlators/bindings/python/src/glustertypes.py b/xlators/bindings/python/src/glustertypes.py
index e9069d07c..98437d22e 100644
--- a/xlators/bindings/python/src/glustertypes.py
+++ b/xlators/bindings/python/src/glustertypes.py
@@ -1,19 +1,12 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
+
+# Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
from ctypes import *
import collections
diff --git a/xlators/bindings/python/src/python.c b/xlators/bindings/python/src/python.c
index 7f32d7f29..9b96790de 100644
--- a/xlators/bindings/python/src/python.c
+++ b/xlators/bindings/python/src/python.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2007-2009 Chris AtLee <chris@atlee.ca>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#include <Python.h>
#ifndef _CONFIG_H
@@ -45,7 +35,7 @@ python_writev (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
struct iovec *vector,
- int32_t count,
+ int32_t count,
off_t offset)
{
python_private_t *priv = (python_private_t *)this->private;
@@ -77,9 +67,6 @@ struct xlator_fops fops = {
.writev = python_writev
};
-struct xlator_mops mops = {
-};
-
static PyObject *
AnonModule_FromFile (const char* fname)
{
@@ -151,7 +138,7 @@ init (xlator_t *this)
Py_InitializeEx(0);
if (!this->children) {
- gf_log ("python", GF_LOG_ERROR,
+ gf_log ("python", GF_LOG_ERROR,
"FATAL: python should have exactly one child");
return -1;
}
@@ -169,7 +156,7 @@ init (xlator_t *this)
}
priv->pInterp = Py_NewInterpreter();
-
+
// Adjust python's path
PyObject *syspath = PySys_GetObject("path");
PyObject *path = PyString_FromString(GLUSTER_PYTHON_PATH);
@@ -191,7 +178,7 @@ init (xlator_t *this)
priv->pVectorType = PyObject_GetAttrString(priv->pGlusterModule, "iovec");
gf_log("python", GF_LOG_DEBUG, "Loading script...%s", priv->scriptname);
-
+
priv->pScriptModule = AnonModule_FromFile(priv->scriptname);
if (!priv->pScriptModule || PyErr_Occurred())
{
@@ -220,7 +207,7 @@ init (xlator_t *this)
return 0;
}
-void
+void
fini (xlator_t *this)
{
python_private_t *priv = (python_private_t*)(this->private);
diff --git a/xlators/bindings/python/src/testxlator.py b/xlators/bindings/python/src/testxlator.py
index 507455c85..59a991dca 100644
--- a/xlators/bindings/python/src/testxlator.py
+++ b/xlators/bindings/python/src/testxlator.py
@@ -1,19 +1,12 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
+"""
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+"""
"""
This is a test translator written in python.
diff --git a/xlators/cluster/Makefile.am b/xlators/cluster/Makefile.am
index a6ddb3564..0990822a7 100644
--- a/xlators/cluster/Makefile.am
+++ b/xlators/cluster/Makefile.am
@@ -1,3 +1,3 @@
-SUBDIRS = unify stripe afr dht ha map
+SUBDIRS = stripe afr dht
CLEANFILES =
diff --git a/xlators/cluster/afr/src/Makefile.am b/xlators/cluster/afr/src/Makefile.am
index df284d12c..35d18a6c0 100644
--- a/xlators/cluster/afr/src/Makefile.am
+++ b/xlators/cluster/afr/src/Makefile.am
@@ -1,20 +1,37 @@
-xlator_LTLIBRARIES = afr.la
+xlator_LTLIBRARIES = afr.la pump.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
-afr_la_LDFLAGS = -module -avoidversion
+afr_common_source = afr-dir-read.c afr-dir-write.c afr-inode-read.c \
+ afr-inode-write.c afr-open.c afr-transaction.c afr-self-heal-data.c \
+ afr-self-heal-common.c afr-self-heal-metadata.c afr-self-heal-entry.c \
+ afr-self-heal-algorithm.c afr-lk-common.c afr-self-heald.c \
+ $(top_builddir)/xlators/lib/src/libxlator.c
-afr_la_SOURCES = afr.c afr-dir-read.c afr-dir-write.c afr-inode-read.c afr-inode-write.c afr-transaction.c afr-self-heal-data.c afr-self-heal-common.c afr-self-heal-metadata.c afr-self-heal-entry.c afr-self-heal-algorithm.c
+afr_la_LDFLAGS = -module -avoid-version
+afr_la_SOURCES = $(afr_common_source) afr.c
afr_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h afr-dir-read.h afr-dir-write.h afr-self-heal.h afr-self-heal-common.h afr-self-heal-algorithm.h
+pump_la_LDFLAGS = -module -avoid-version
+pump_la_SOURCES = $(afr_common_source) pump.c
+pump_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h \
+ afr-dir-read.h afr-dir-write.h afr-self-heal.h afr-self-heal-common.h \
+ afr-self-heal-algorithm.h pump.h afr-mem-types.h afr-common.c \
+ afr-self-heald.h $(top_builddir)/xlators/lib/src/libxlator.h \
+ $(top_builddir)/glusterfsd/src/glusterfsd.h
-CLEANFILES =
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
+ -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/xlators/lib/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
uninstall-local:
rm -f $(DESTDIR)$(xlatordir)/replicate.so
+ rm -f $(DESTDIR)$(xlatordir)/pump.so
install-data-hook:
- ln -sf afr.so $(DESTDIR)$(xlatordir)/replicate.so \ No newline at end of file
+ ln -sf afr.so $(DESTDIR)$(xlatordir)/replicate.so
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
new file mode 100644
index 000000000..af01f2ef2
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -0,0 +1,4591 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <libgen.h>
+#include <unistd.h>
+#include <fnmatch.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <signal.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "afr.h"
+#include "dict.h"
+#include "xlator.h"
+#include "hashfn.h"
+#include "logging.h"
+#include "stack.h"
+#include "list.h"
+#include "call-stub.h"
+#include "defaults.h"
+#include "common-utils.h"
+#include "compat-errno.h"
+#include "compat.h"
+#include "byte-order.h"
+#include "statedump.h"
+#include "inode.h"
+
+#include "fd.h"
+
+#include "afr-inode-read.h"
+#include "afr-inode-write.h"
+#include "afr-dir-read.h"
+#include "afr-dir-write.h"
+#include "afr-transaction.h"
+#include "afr-self-heal.h"
+#include "afr-self-heal-common.h"
+#include "afr-self-heald.h"
+#include "pump.h"
+
+#define AFR_ICTX_OPENDIR_DONE_MASK 0x0000000100000000ULL
+#define AFR_ICTX_READ_CHILD_MASK 0x00000000FFFFFFFFULL
+#define AFR_STATISTICS_HISTORY_SIZE 50
+int
+afr_lookup_done_success_action (call_frame_t *frame, xlator_t *this,
+ gf_boolean_t fail_conflict);
+void
+afr_children_copy (int32_t *dst, int32_t *src, unsigned int child_count)
+{
+ int i = 0;
+
+ for (i = 0; i < child_count; i++)
+ dst[i] = src[i];
+}
+
+void
+afr_xattr_req_prepare (xlator_t *this, dict_t *xattr_req, const char *path)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ int ret = 0;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ ret = dict_set_uint64 (xattr_req, priv->pending_key[i],
+ 3 * sizeof(int32_t));
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: Unable to set dict value for %s",
+ path, priv->pending_key[i]);
+ /* 3 = data+metadata+entry */
+ }
+ ret = dict_set_int32 (xattr_req, GF_GFIDLESS_LOOKUP, 1);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "%s: failed to set gfidless "
+ "lookup", path);
+ }
+}
+
+int
+afr_lookup_xattr_req_prepare (afr_local_t *local, xlator_t *this,
+ dict_t *xattr_req, loc_t *loc, void **gfid_req)
+{
+ int ret = -ENOMEM;
+
+ GF_ASSERT (gfid_req);
+
+ *gfid_req = NULL;
+ local->xattr_req = dict_new ();
+ if (!local->xattr_req)
+ goto out;
+ if (xattr_req)
+ dict_copy (xattr_req, local->xattr_req);
+
+ afr_xattr_req_prepare (this, local->xattr_req, loc->path);
+ ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_INODELK_COUNT, 0);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: Unable to set dict value for %s",
+ loc->path, GLUSTERFS_INODELK_COUNT);
+ }
+ ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_ENTRYLK_COUNT, 0);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: Unable to set dict value for %s",
+ loc->path, GLUSTERFS_ENTRYLK_COUNT);
+ }
+
+ ret = dict_set_uint32 (local->xattr_req, GLUSTERFS_PARENT_ENTRYLK, 0);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: Unable to set dict value for %s",
+ loc->path, GLUSTERFS_PARENT_ENTRYLK);
+ }
+
+ ret = dict_get_ptr (local->xattr_req, "gfid-req", gfid_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s: failed to get the gfid from dict", loc->path);
+ *gfid_req = NULL;
+ } else {
+ if (loc->parent != NULL)
+ dict_del (local->xattr_req, "gfid-req");
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+void
+afr_lookup_save_gfid (uuid_t dst, void* new, const loc_t *loc)
+{
+ inode_t *inode = NULL;
+
+ inode = loc->inode;
+ if (inode && !uuid_is_null (inode->gfid))
+ uuid_copy (dst, inode->gfid);
+ else if (!uuid_is_null (loc->gfid))
+ uuid_copy (dst, loc->gfid);
+ else if (new && !uuid_is_null (new))
+ uuid_copy (dst, new);
+}
+
+int
+afr_errno_count (int32_t *children, int *child_errno,
+ unsigned int child_count, int32_t op_errno)
+{
+ int i = 0;
+ int errno_count = 0;
+ int child = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (children) {
+ child = children[i];
+ if (child == -1)
+ break;
+ } else {
+ child = i;
+ }
+ if (child_errno[child] == op_errno)
+ errno_count++;
+ }
+ return errno_count;
+}
+
+int32_t
+afr_set_dict_gfid (dict_t *dict, uuid_t gfid)
+{
+ int ret = 0;
+ uuid_t *pgfid = NULL;
+
+ GF_ASSERT (gfid);
+
+ pgfid = GF_CALLOC (1, sizeof (uuid_t), gf_common_mt_char);
+ if (!pgfid) {
+ ret = -1;
+ goto out;
+ }
+
+ uuid_copy (*pgfid, gfid);
+
+ ret = dict_set_dynptr (dict, "gfid-req", pgfid, sizeof (uuid_t));
+ if (ret)
+ gf_log (THIS->name, GF_LOG_ERROR, "gfid set failed");
+
+out:
+ if (ret && pgfid)
+ GF_FREE (pgfid);
+
+ return ret;
+}
+
+void
+afr_inode_ctx_destroy (afr_inode_ctx_t *ctx)
+{
+ if (!ctx)
+ return;
+ GF_FREE (ctx->fresh_children);
+ GF_FREE (ctx);
+}
+
+afr_inode_ctx_t*
+__afr_inode_ctx_get (inode_t *inode, xlator_t *this)
+{
+ int ret = 0;
+ uint64_t ctx_addr = 0;
+ afr_inode_ctx_t *ctx = NULL;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ ret = __inode_ctx_get (inode, this, &ctx_addr);
+ if (ret < 0)
+ ctx_addr = 0;
+ if (ctx_addr != 0) {
+ ctx = (afr_inode_ctx_t*) (long) ctx_addr;
+ goto out;
+ }
+ ctx = GF_CALLOC (1, sizeof (*ctx),
+ gf_afr_mt_inode_ctx_t);
+ if (!ctx)
+ goto fail;
+ ctx->fresh_children = GF_CALLOC (priv->child_count,
+ sizeof (*ctx->fresh_children),
+ gf_afr_mt_int32_t);
+ if (!ctx->fresh_children)
+ goto fail;
+ ret = __inode_ctx_put (inode, this, (uint64_t)ctx);
+ if (ret) {
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "failed to "
+ "set the inode ctx (%s)",
+ uuid_utoa (inode->gfid));
+ goto fail;
+ }
+
+out:
+ return ctx;
+
+fail:
+ afr_inode_ctx_destroy (ctx);
+ return NULL;
+}
+
+afr_inode_ctx_t*
+afr_inode_ctx_get (inode_t *inode, xlator_t *this)
+{
+ afr_inode_ctx_t *ctx = NULL;
+
+ LOCK (&inode->lock);
+ {
+ ctx = __afr_inode_ctx_get (inode, this);
+ }
+ UNLOCK (&inode->lock);
+ return ctx;
+}
+
+void
+afr_inode_get_ctx_params (xlator_t *this, inode_t *inode,
+ afr_inode_params_t *params)
+{
+ GF_ASSERT (inode);
+ GF_ASSERT (params);
+
+ afr_inode_ctx_t *ctx = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int32_t read_child = -1;
+ int32_t *fresh_children = NULL;
+
+ priv = this->private;
+ LOCK (&inode->lock);
+ {
+ ctx = __afr_inode_ctx_get (inode, this);
+ if (!ctx)
+ goto unlock;
+ switch (params->op) {
+ case AFR_INODE_GET_READ_CTX:
+ fresh_children = params->u.read_ctx.children;
+ read_child = (int32_t)(ctx->masks &
+ AFR_ICTX_READ_CHILD_MASK);
+ params->u.read_ctx.read_child = read_child;
+ if (!fresh_children)
+ goto unlock;
+ for (i = 0; i < priv->child_count; i++)
+ fresh_children[i] = ctx->fresh_children[i];
+ break;
+ case AFR_INODE_GET_OPENDIR_DONE:
+ params->u.value = _gf_false;
+ if (ctx->masks & AFR_ICTX_OPENDIR_DONE_MASK)
+ params->u.value = _gf_true;
+ break;
+ default:
+ GF_ASSERT (0);
+ break;
+ }
+ }
+unlock:
+ UNLOCK (&inode->lock);
+}
+
+gf_boolean_t
+afr_is_split_brain (xlator_t *this, inode_t *inode)
+{
+ afr_inode_ctx_t *ctx = NULL;
+ gf_boolean_t spb = _gf_false;
+
+ ctx = afr_inode_ctx_get (inode, this);
+ if (!ctx)
+ goto out;
+ if ((ctx->mdata_spb == SPB) || (ctx->data_spb == SPB))
+ spb = _gf_true;
+out:
+ return spb;
+}
+
+gf_boolean_t
+afr_is_opendir_done (xlator_t *this, inode_t *inode)
+{
+ afr_inode_params_t params = {0};
+
+ params.op = AFR_INODE_GET_OPENDIR_DONE;
+ afr_inode_get_ctx_params (this, inode, &params);
+ return params.u.value;
+}
+
+int32_t
+afr_inode_get_read_ctx (xlator_t *this, inode_t *inode, int32_t *fresh_children)
+{
+ afr_inode_params_t params = {0};
+
+ params.op = AFR_INODE_GET_READ_CTX;
+ params.u.read_ctx.children = fresh_children;
+ afr_inode_get_ctx_params (this, inode, &params);
+ return params.u.read_ctx.read_child;
+}
+
+void
+afr_inode_ctx_set_read_child (afr_inode_ctx_t *ctx, int32_t read_child)
+{
+ uint64_t remaining_mask = 0;
+ uint64_t mask = 0;
+
+ remaining_mask = (~AFR_ICTX_READ_CHILD_MASK & ctx->masks);
+ mask = (AFR_ICTX_READ_CHILD_MASK & read_child);
+ ctx->masks = remaining_mask | mask;
+}
+
+void
+afr_inode_ctx_set_read_ctx (afr_inode_ctx_t *ctx, int32_t read_child,
+ int32_t *fresh_children, int32_t child_count)
+{
+ int i = 0;
+
+ afr_inode_ctx_set_read_child (ctx, read_child);
+ for (i = 0; i < child_count; i++) {
+ if (fresh_children)
+ ctx->fresh_children[i] = fresh_children[i];
+ else
+ ctx->fresh_children[i] = -1;
+ }
+}
+
+void
+afr_inode_ctx_rm_stale_children (afr_inode_ctx_t *ctx, int32_t *stale_children,
+ int32_t child_count)
+{
+ int i = 0;
+ int32_t read_child = -1;
+
+ GF_ASSERT (stale_children);
+ for (i = 0; i < child_count; i++) {
+ if (stale_children[i] == -1)
+ break;
+ afr_children_rm_child (ctx->fresh_children,
+ stale_children[i], child_count);
+ }
+ read_child = (int32_t)(ctx->masks & AFR_ICTX_READ_CHILD_MASK);
+ if (!afr_is_child_present (ctx->fresh_children, child_count,
+ read_child))
+ afr_inode_ctx_set_read_child (ctx, ctx->fresh_children[0]);
+}
+
+void
+afr_inode_ctx_set_opendir_done (afr_inode_ctx_t *ctx)
+{
+ uint64_t remaining_mask = 0;
+ uint64_t mask = 0;
+
+ remaining_mask = (~AFR_ICTX_OPENDIR_DONE_MASK & ctx->masks);
+ mask = (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_OPENDIR_DONE_MASK);
+ ctx->masks = remaining_mask | mask;
+}
+
+void
+afr_inode_set_ctx_params (xlator_t *this, inode_t *inode,
+ afr_inode_params_t *params)
+{
+ GF_ASSERT (inode);
+ GF_ASSERT (params);
+
+ afr_inode_ctx_t *ctx = NULL;
+ afr_private_t *priv = NULL;
+ int32_t read_child = -1;
+ int32_t *fresh_children = NULL;
+ int32_t *stale_children = NULL;
+
+ priv = this->private;
+ LOCK (&inode->lock);
+ {
+ ctx = __afr_inode_ctx_get (inode, this);
+ if (!ctx)
+ goto unlock;
+ switch (params->op) {
+ case AFR_INODE_SET_READ_CTX:
+ read_child = params->u.read_ctx.read_child;
+ fresh_children = params->u.read_ctx.children;
+ afr_inode_ctx_set_read_ctx (ctx, read_child,
+ fresh_children,
+ priv->child_count);
+ break;
+ case AFR_INODE_RM_STALE_CHILDREN:
+ stale_children = params->u.read_ctx.children;
+ afr_inode_ctx_rm_stale_children (ctx,
+ stale_children,
+ priv->child_count);
+ break;
+ case AFR_INODE_SET_OPENDIR_DONE:
+ afr_inode_ctx_set_opendir_done (ctx);
+ break;
+ default:
+ GF_ASSERT (0);
+ break;
+ }
+ }
+unlock:
+ UNLOCK (&inode->lock);
+}
+
+void
+afr_set_split_brain (xlator_t *this, inode_t *inode, afr_spb_state_t mdata_spb,
+ afr_spb_state_t data_spb)
+{
+ afr_inode_ctx_t *ctx = NULL;
+
+ ctx = afr_inode_ctx_get (inode, this);
+ if (mdata_spb != DONT_KNOW)
+ ctx->mdata_spb = mdata_spb;
+ if (data_spb != DONT_KNOW)
+ ctx->data_spb = data_spb;
+}
+
+void
+afr_set_opendir_done (xlator_t *this, inode_t *inode)
+{
+ afr_inode_params_t params = {0};
+
+ params.op = AFR_INODE_SET_OPENDIR_DONE;
+ afr_inode_set_ctx_params (this, inode, &params);
+}
+
+void
+afr_inode_set_read_ctx (xlator_t *this, inode_t *inode, int32_t read_child,
+ int32_t *fresh_children)
+{
+ afr_inode_params_t params = {0};
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ GF_ASSERT (read_child >= 0);
+ GF_ASSERT (fresh_children);
+ GF_ASSERT (afr_is_child_present (fresh_children, priv->child_count,
+ read_child));
+
+ params.op = AFR_INODE_SET_READ_CTX;
+ params.u.read_ctx.read_child = read_child;
+ params.u.read_ctx.children = fresh_children;
+ afr_inode_set_ctx_params (this, inode, &params);
+}
+
+void
+afr_inode_rm_stale_children (xlator_t *this, inode_t *inode,
+ int32_t *stale_children)
+{
+ afr_inode_params_t params = {0};
+
+ GF_ASSERT (stale_children);
+
+ params.op = AFR_INODE_RM_STALE_CHILDREN;
+ params.u.read_ctx.children = stale_children;
+ afr_inode_set_ctx_params (this, inode, &params);
+}
+
+gf_boolean_t
+afr_is_source_child (int32_t *sources, int32_t child_count, int32_t child)
+{
+ gf_boolean_t source_xattrs = _gf_false;
+
+ GF_ASSERT (child < child_count);
+
+ if ((child >= 0) && (child < child_count) &&
+ sources[child]) {
+ source_xattrs = _gf_true;
+ }
+ return source_xattrs;
+}
+
+gf_boolean_t
+afr_is_child_present (int32_t *success_children, int32_t child_count,
+ int32_t child)
+{
+ gf_boolean_t success_child = _gf_false;
+ int i = 0;
+
+ GF_ASSERT (child < child_count);
+
+ for (i = 0; i < child_count; i++) {
+ if (success_children[i] == -1)
+ break;
+ if (child == success_children[i]) {
+ success_child = _gf_true;
+ break;
+ }
+ }
+ return success_child;
+}
+
+gf_boolean_t
+afr_is_read_child (int32_t *success_children, int32_t *sources,
+ int32_t child_count, int32_t child)
+{
+ gf_boolean_t success_child = _gf_false;
+ gf_boolean_t source = _gf_false;
+
+ if (child < 0) {
+ return _gf_false;
+ }
+
+ GF_ASSERT (success_children);
+ GF_ASSERT (child_count > 0);
+
+ success_child = afr_is_child_present (success_children, child_count,
+ child);
+ if (!success_child)
+ goto out;
+ if (NULL == sources) {
+ source = _gf_true;
+ goto out;
+ }
+ source = afr_is_source_child (sources, child_count, child);
+out:
+ return (success_child && source);
+}
+
+int32_t
+afr_hash_child (int32_t *success_children, int32_t child_count,
+ unsigned int hmode, uuid_t gfid)
+{
+ uuid_t gfid_copy = {0,};
+ pid_t pid;
+
+ if (!hmode) {
+ return -1;
+ }
+
+ if (gfid) {
+ uuid_copy(gfid_copy,gfid);
+ }
+ if (hmode > 1) {
+ /*
+ * Why getpid? Because it's one of the cheapest calls
+ * available - faster than gethostname etc. - and returns a
+ * constant-length value that's sure to be shorter than a UUID.
+ * It's still very unlikely to be the same across clients, so
+ * it still provides good mixing. We're not trying for
+ * perfection here. All we need is a low probability that
+ * multiple clients won't converge on the same subvolume.
+ */
+ pid = getpid();
+ memcpy (gfid_copy, &pid, sizeof(pid));
+ }
+
+ return SuperFastHash((char *)gfid_copy,
+ sizeof(gfid_copy)) % child_count;
+}
+
+/* If sources is NULL the xattrs are assumed to be of source for all
+ * success_children.
+ */
+int
+afr_select_read_child_from_policy (int32_t *success_children,
+ int32_t child_count, int32_t prev_read_child,
+ int32_t config_read_child, int32_t *sources,
+ unsigned int hmode, uuid_t gfid)
+{
+ int32_t read_child = -1;
+ int i = 0;
+
+ GF_ASSERT (success_children);
+
+ read_child = config_read_child;
+ if (afr_is_read_child (success_children, sources, child_count,
+ read_child))
+ goto out;
+
+ read_child = prev_read_child;
+ if (afr_is_read_child (success_children, sources, child_count,
+ read_child))
+ goto out;
+
+ read_child = afr_hash_child (success_children, child_count,
+ hmode, gfid);
+ if (afr_is_read_child (success_children, sources, child_count,
+ read_child)) {
+ goto out;
+ }
+
+ for (i = 0; i < child_count; i++) {
+ read_child = success_children[i];
+ if (read_child < 0)
+ break;
+ if (afr_is_read_child (success_children, sources, child_count,
+ read_child))
+ goto out;
+ }
+ read_child = -1;
+
+out:
+ return read_child;
+}
+
+/* This function should be used when all the success_children are sources
+ */
+void
+afr_set_read_ctx_from_policy (xlator_t *this, inode_t *inode,
+ int32_t *fresh_children, int32_t prev_read_child,
+ int32_t config_read_child, uuid_t gfid)
+{
+ int read_child = -1;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ read_child = afr_select_read_child_from_policy (fresh_children,
+ priv->child_count,
+ prev_read_child,
+ config_read_child,
+ NULL,
+ priv->hash_mode, gfid);
+ if (read_child >= 0)
+ afr_inode_set_read_ctx (this, inode, read_child,
+ fresh_children);
+}
+
+/* afr_next_call_child ()
+ * This is a common function used by all the read-type fops
+ * This function should not be called with the inode's read_children array.
+ * The fop's handler should make a copy of the inode's read_children,
+ * preferred read_child into the local vars, because while this function is
+ * in execution there is a chance for inode's read_ctx to change.
+ */
+int32_t
+afr_next_call_child (int32_t *fresh_children, unsigned char *child_up,
+ size_t child_count, int32_t *last_index,
+ int32_t read_child)
+{
+ int next_index = 0;
+ int32_t next_call_child = -1;
+
+ GF_ASSERT (last_index);
+
+ next_index = *last_index;
+retry:
+ next_index++;
+ if ((next_index >= child_count) ||
+ (fresh_children[next_index] == -1))
+ goto out;
+ if ((fresh_children[next_index] == read_child) ||
+ (!child_up[fresh_children[next_index]]))
+ goto retry;
+ *last_index = next_index;
+ next_call_child = fresh_children[next_index];
+out:
+ return next_call_child;
+}
+
+ /* This function should not be called with the inode's read_children array.
+ * The fop's handler should make a copy of the inode's read_children,
+ * preferred read_child into the local vars, because while this function is
+ * in execution there is a chance for inode's read_ctx to change.
+ */
+int32_t
+afr_get_call_child (xlator_t *this, unsigned char *child_up, int32_t read_child,
+ int32_t *fresh_children,
+ int32_t *call_child, int32_t *last_index)
+{
+ int ret = 0;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ GF_ASSERT (child_up);
+ GF_ASSERT (call_child);
+ GF_ASSERT (last_index);
+ GF_ASSERT (fresh_children);
+
+ if (read_child < 0) {
+ ret = -EIO;
+ goto out;
+ }
+ priv = this->private;
+ *call_child = -1;
+ *last_index = -1;
+
+ if (child_up[read_child]) {
+ *call_child = read_child;
+ } else {
+ for (i = 0; i < priv->child_count; i++) {
+ if (fresh_children[i] == -1)
+ break;
+ if (child_up[fresh_children[i]]) {
+ *call_child = fresh_children[i];
+ ret = 0;
+ break;
+ }
+ }
+
+ if (*call_child == -1) {
+ ret = -ENOTCONN;
+ goto out;
+ }
+
+ *last_index = i;
+ }
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d, call_child: %d, "
+ "last_index: %d", ret, *call_child, *last_index);
+ return ret;
+}
+
+void
+afr_reset_xattr (dict_t **xattr, unsigned int child_count)
+{
+ unsigned int i = 0;
+
+ if (!xattr)
+ goto out;
+ for (i = 0; i < child_count; i++) {
+ if (xattr[i]) {
+ dict_unref (xattr[i]);
+ xattr[i] = NULL;
+ }
+ }
+out:
+ return;
+}
+
+void
+afr_xattr_array_destroy (dict_t **xattr, unsigned int child_count)
+{
+ afr_reset_xattr (xattr, child_count);
+ GF_FREE (xattr);
+}
+
+void
+afr_local_sh_cleanup (afr_local_t *local, xlator_t *this)
+{
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+
+ sh = &local->self_heal;
+ priv = this->private;
+
+ if (sh->data_sh_info && strcmp (sh->data_sh_info, ""))
+ GF_FREE (sh->data_sh_info);
+
+ if (sh->metadata_sh_info && strcmp (sh->metadata_sh_info, ""))
+ GF_FREE (sh->metadata_sh_info);
+
+ GF_FREE (sh->buf);
+
+ GF_FREE (sh->parentbufs);
+
+ if (sh->inode)
+ inode_unref (sh->inode);
+
+ afr_xattr_array_destroy (sh->xattr, priv->child_count);
+
+ GF_FREE (sh->child_errno);
+
+ afr_matrix_cleanup (sh->pending_matrix, priv->child_count);
+ afr_matrix_cleanup (sh->delta_matrix, priv->child_count);
+
+ GF_FREE (sh->sources);
+
+ GF_FREE (sh->success);
+
+ GF_FREE (sh->locked_nodes);
+
+ if (sh->healing_fd) {
+ fd_unref (sh->healing_fd);
+ sh->healing_fd = NULL;
+ }
+
+ GF_FREE ((char *)sh->linkname);
+
+ GF_FREE (sh->success_children);
+
+ GF_FREE (sh->fresh_children);
+
+ GF_FREE (sh->fresh_parent_dirs);
+
+ loc_wipe (&sh->parent_loc);
+ loc_wipe (&sh->lookup_loc);
+
+ GF_FREE (sh->checksum);
+
+ GF_FREE (sh->write_needed);
+ if (sh->healing_fd)
+ fd_unref (sh->healing_fd);
+}
+
+
+void
+afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ priv = this->private;
+
+ afr_matrix_cleanup (local->pending, priv->child_count);
+ afr_matrix_cleanup (local->transaction.txn_changelog,
+ priv->child_count);
+
+ GF_FREE (local->internal_lock.locked_nodes);
+
+ for (i = 0; local->internal_lock.inodelk[i].domain; i++) {
+ GF_FREE (local->internal_lock.inodelk[i].locked_nodes);
+ }
+
+ GF_FREE (local->internal_lock.lower_locked_nodes);
+
+ afr_entry_lockee_cleanup (&local->internal_lock);
+
+ GF_FREE (local->transaction.pre_op);
+ GF_FREE (local->transaction.eager_lock);
+
+ GF_FREE (local->transaction.basename);
+ GF_FREE (local->transaction.new_basename);
+
+ loc_wipe (&local->transaction.parent_loc);
+ loc_wipe (&local->transaction.new_parent_loc);
+
+ GF_FREE (local->transaction.postop_piggybacked);
+}
+
+
+void
+afr_local_cleanup (afr_local_t *local, xlator_t *this)
+{
+ afr_private_t * priv = NULL;
+
+ if (!local)
+ return;
+
+ afr_local_sh_cleanup (local, this);
+
+ afr_local_transaction_cleanup (local, this);
+
+ priv = this->private;
+
+ loc_wipe (&local->loc);
+ loc_wipe (&local->newloc);
+
+ if (local->fd)
+ fd_unref (local->fd);
+
+ if (local->xattr_req)
+ dict_unref (local->xattr_req);
+
+ if (local->dict)
+ dict_unref (local->dict);
+
+ GF_FREE(local->replies);
+
+ GF_FREE (local->child_up);
+
+ GF_FREE (local->child_errno);
+
+ GF_FREE (local->fresh_children);
+
+ { /* lookup */
+ if (local->cont.lookup.xattrs) {
+ afr_reset_xattr (local->cont.lookup.xattrs,
+ priv->child_count);
+ GF_FREE (local->cont.lookup.xattrs);
+ local->cont.lookup.xattrs = NULL;
+ }
+
+ if (local->cont.lookup.xattr) {
+ dict_unref (local->cont.lookup.xattr);
+ }
+
+ if (local->cont.lookup.inode) {
+ inode_unref (local->cont.lookup.inode);
+ }
+
+ GF_FREE (local->cont.lookup.postparents);
+
+ GF_FREE (local->cont.lookup.bufs);
+
+ GF_FREE (local->cont.lookup.success_children);
+
+ GF_FREE (local->cont.lookup.sources);
+ afr_matrix_cleanup (local->cont.lookup.pending_matrix,
+ priv->child_count);
+ }
+
+ { /* getxattr */
+ GF_FREE (local->cont.getxattr.name);
+ }
+
+ { /* lk */
+ GF_FREE (local->cont.lk.locked_nodes);
+ }
+
+ { /* create */
+ if (local->cont.create.fd)
+ fd_unref (local->cont.create.fd);
+ if (local->cont.create.params)
+ dict_unref (local->cont.create.params);
+ }
+
+ { /* mknod */
+ if (local->cont.mknod.params)
+ dict_unref (local->cont.mknod.params);
+ }
+
+ { /* mkdir */
+ if (local->cont.mkdir.params)
+ dict_unref (local->cont.mkdir.params);
+ }
+
+ { /* symlink */
+ if (local->cont.symlink.params)
+ dict_unref (local->cont.symlink.params);
+ }
+
+ { /* writev */
+ GF_FREE (local->cont.writev.vector);
+ }
+
+ { /* setxattr */
+ if (local->cont.setxattr.dict)
+ dict_unref (local->cont.setxattr.dict);
+ }
+
+ { /* fsetxattr */
+ if (local->cont.fsetxattr.dict)
+ dict_unref (local->cont.fsetxattr.dict);
+ }
+
+ { /* removexattr */
+ GF_FREE (local->cont.removexattr.name);
+ }
+ { /* xattrop */
+ if (local->cont.xattrop.xattr)
+ dict_unref (local->cont.xattrop.xattr);
+ }
+ { /* fxattrop */
+ if (local->cont.fxattrop.xattr)
+ dict_unref (local->cont.fxattrop.xattr);
+ }
+ { /* symlink */
+ GF_FREE (local->cont.symlink.linkpath);
+ }
+
+ { /* opendir */
+ GF_FREE (local->cont.opendir.checksum);
+ }
+
+ { /* readdirp */
+ if (local->cont.readdir.dict)
+ dict_unref (local->cont.readdir.dict);
+ }
+
+ if (local->xdata_req)
+ dict_unref (local->xdata_req);
+
+ if (local->xdata_rsp)
+ dict_unref (local->xdata_rsp);
+}
+
+
+int
+afr_frame_return (call_frame_t *frame)
+{
+ afr_local_t *local = NULL;
+ int call_count = 0;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ call_count = --local->call_count;
+ }
+ UNLOCK (&frame->lock);
+
+ return call_count;
+}
+
+int
+afr_set_elem_count_get (unsigned char *elems, int child_count)
+{
+ int i = 0;
+ int ret = 0;
+
+ for (i = 0; i < child_count; i++)
+ if (elems[i])
+ ret++;
+ return ret;
+}
+
+/**
+ * up_children_count - return the number of children that are up
+ */
+
+unsigned int
+afr_up_children_count (unsigned char *child_up, unsigned int child_count)
+{
+ return afr_set_elem_count_get (child_up, child_count);
+}
+
+unsigned int
+afr_locked_children_count (unsigned char *children, unsigned int child_count)
+{
+ return afr_set_elem_count_get (children, child_count);
+}
+
+unsigned int
+afr_pre_op_done_children_count (unsigned char *pre_op,
+ unsigned int child_count)
+{
+ return afr_set_elem_count_get (pre_op, child_count);
+}
+
+gf_boolean_t
+afr_is_fresh_lookup (loc_t *loc, xlator_t *this)
+{
+ uint64_t ctx = 0;
+ int32_t ret = 0;
+
+ GF_ASSERT (loc);
+ GF_ASSERT (this);
+ GF_ASSERT (loc->inode);
+
+ ret = inode_ctx_get (loc->inode, this, &ctx);
+ if (0 == ret)
+ return _gf_false;
+ return _gf_true;
+}
+
+void
+afr_update_loc_gfids (loc_t *loc, struct iatt *buf, struct iatt *postparent)
+{
+ GF_ASSERT (loc);
+ GF_ASSERT (buf);
+
+ uuid_copy (loc->gfid, buf->ia_gfid);
+ if (postparent)
+ uuid_copy (loc->pargfid, postparent->ia_gfid);
+}
+
+/*
+ * Quota size xattrs are not maintained by afr. There is a
+ * possibility that they differ even when both the directory changelog xattrs
+ * suggest everything is fine. So if there is at least one 'source' check among
+ * the sources which has the maximum quota size. Otherwise check among all the
+ * available ones for maximum quota size. This way if there is a source and
+ * stale copies it always votes for the 'source'.
+ * */
+
+static void
+afr_handle_quota_size (afr_local_t *local, xlator_t *this,
+ dict_t *rsp_dict)
+{
+ int32_t *sources = NULL;
+ dict_t *xattr = NULL;
+ data_t *max_data = NULL;
+ int64_t max_quota_size = -1;
+ data_t *data = NULL;
+ int64_t *size = NULL;
+ int64_t quota_size = -1;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int ret = -1;
+ gf_boolean_t source_present = _gf_false;
+
+ priv = this->private;
+ sources = local->cont.lookup.sources;
+
+ if (rsp_dict == NULL) {
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "%s: Invalid "
+ "response dictionary", local->loc.path);
+ return;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i]) {
+ source_present = _gf_true;
+ break;
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ /*
+ * If there is at least one source lets check
+ * for maximum quota sizes among sources, otherwise take the
+ * maximum of the ones present to be on the safer side.
+ */
+ if (source_present && !sources[i])
+ continue;
+
+ xattr = local->cont.lookup.xattrs[i];
+ if (!xattr)
+ continue;
+
+ data = dict_get (xattr, QUOTA_SIZE_KEY);
+ if (!data)
+ continue;
+
+ size = (int64_t*)data->data;
+ quota_size = ntoh64(*size);
+ gf_log (this->name, GF_LOG_DEBUG, "%s: %d, size: %"PRId64,
+ local->loc.path, i, quota_size);
+ if (quota_size > max_quota_size) {
+ if (max_data)
+ data_unref (max_data);
+
+ max_quota_size = quota_size;
+ max_data = data_ref (data);
+ }
+ }
+
+ if (max_data) {
+ ret = dict_set (rsp_dict, QUOTA_SIZE_KEY, max_data);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s: Failed to set "
+ "quota size", local->loc.path);
+ }
+
+ data_unref (max_data);
+ }
+}
+
+int
+afr_lookup_build_response_params (afr_local_t *local, xlator_t *this)
+{
+ struct iatt *buf = NULL;
+ struct iatt *postparent = NULL;
+ dict_t **xattr = NULL;
+ int32_t *success_children = NULL;
+ int32_t *sources = NULL;
+ afr_private_t *priv = NULL;
+ int32_t read_child = -1;
+ int ret = 0;
+ int i = 0;
+
+ GF_ASSERT (local);
+
+ buf = &local->cont.lookup.buf;
+ postparent = &local->cont.lookup.postparent;
+ xattr = &local->cont.lookup.xattr;
+ priv = this->private;
+
+ read_child = afr_inode_get_read_ctx (this, local->cont.lookup.inode,
+ local->fresh_children);
+ if (read_child < 0) {
+ ret = -1;
+ goto out;
+ }
+ success_children = local->cont.lookup.success_children;
+ sources = local->cont.lookup.sources;
+ memset (sources, 0, sizeof (*sources) * priv->child_count);
+ afr_children_intersection_get (local->fresh_children, success_children,
+ sources, priv->child_count);
+ if (!sources[read_child]) {
+ read_child = -1;
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i]) {
+ read_child = i;
+ break;
+ }
+ }
+ }
+ if (read_child < 0) {
+ ret = -1;
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "Building lookup response from %d",
+ read_child);
+ if (!*xattr)
+ *xattr = dict_ref (local->cont.lookup.xattrs[read_child]);
+
+ *buf = local->cont.lookup.bufs[read_child];
+ *postparent = local->cont.lookup.postparents[read_child];
+
+ if (dict_get (local->xattr_req, QUOTA_SIZE_KEY))
+ afr_handle_quota_size (local, this, *xattr);
+
+ if (IA_INVAL == local->cont.lookup.inode->ia_type) {
+ /* fix for RT #602 */
+ local->cont.lookup.inode->ia_type = buf->ia_type;
+ }
+out:
+ return ret;
+}
+
+static void
+afr_lookup_update_lk_counts (afr_local_t *local, xlator_t *this,
+ int child_index, dict_t *xattr)
+{
+ uint32_t inodelk_count = 0;
+ uint32_t entrylk_count = 0;
+ int ret = -1;
+ uint32_t parent_entrylk = 0;
+
+ GF_ASSERT (local);
+ GF_ASSERT (this);
+ GF_ASSERT (xattr);
+ GF_ASSERT (child_index >= 0);
+
+ ret = dict_get_uint32 (xattr, GLUSTERFS_INODELK_COUNT,
+ &inodelk_count);
+ if (ret == 0)
+ local->inodelk_count += inodelk_count;
+
+ ret = dict_get_uint32 (xattr, GLUSTERFS_ENTRYLK_COUNT,
+ &entrylk_count);
+ if (ret == 0)
+ local->entrylk_count += entrylk_count;
+ ret = dict_get_uint32 (xattr, GLUSTERFS_PARENT_ENTRYLK,
+ &parent_entrylk);
+ if (!ret)
+ local->cont.lookup.parent_entrylk += parent_entrylk;
+}
+
+/*
+ * It's important to maintain a commutative property on do_*_self_heal and
+ * found*; once set, they must not be cleared by a subsequent iteration or
+ * call, so that they represent a logical OR of all iterations and calls
+ * regardless of child/key order. That allows the caller to call us multiple
+ * times without having to use a separate variable as a "reduce" accumulator.
+ */
+static void
+afr_lookup_set_self_heal_params_by_xattr (afr_local_t *local, xlator_t *this,
+ dict_t *xattr)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int ret = -1;
+ void *pending_raw = NULL;
+ int32_t *pending = NULL;
+
+ GF_ASSERT (local);
+ GF_ASSERT (this);
+ GF_ASSERT (xattr);
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ ret = dict_get_ptr (xattr, priv->pending_key[i],
+ &pending_raw);
+ if (ret != 0) {
+ continue;
+ }
+ pending = pending_raw;
+
+ if (pending[AFR_METADATA_TRANSACTION]) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "metadata self-heal is pending for %s.",
+ local->loc.path);
+ local->self_heal.do_metadata_self_heal = _gf_true;
+ }
+
+ if (pending[AFR_ENTRY_TRANSACTION]) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "entry self-heal is pending for %s.",
+ local->loc.path);
+ local->self_heal.do_entry_self_heal = _gf_true;
+ }
+
+ if (pending[AFR_DATA_TRANSACTION]) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "data self-heal is pending for %s.",
+ local->loc.path);
+ local->self_heal.do_data_self_heal = _gf_true;
+ }
+ }
+}
+
+void
+afr_lookup_check_set_metadata_split_brain (afr_local_t *local, xlator_t *this)
+{
+ int32_t *sources = NULL;
+ afr_private_t *priv = NULL;
+ int32_t subvol_status = 0;
+ int32_t *success_children = NULL;
+ dict_t **xattrs = NULL;
+ struct iatt *bufs = NULL;
+ int32_t **pending_matrix = NULL;
+
+ priv = this->private;
+
+ sources = GF_CALLOC (priv->child_count, sizeof (*sources),
+ gf_afr_mt_int32_t);
+ if (NULL == sources)
+ goto out;
+ success_children = local->cont.lookup.success_children;
+ xattrs = local->cont.lookup.xattrs;
+ bufs = local->cont.lookup.bufs;
+ pending_matrix = local->cont.lookup.pending_matrix;
+ afr_build_sources (this, xattrs, bufs, pending_matrix,
+ sources, success_children, AFR_METADATA_TRANSACTION,
+ &subvol_status, _gf_false);
+ if (subvol_status & SPLIT_BRAIN)
+ local->cont.lookup.possible_spb = _gf_true;
+out:
+ GF_FREE (sources);
+}
+
+static void
+afr_detect_self_heal_by_iatt (afr_local_t *local, xlator_t *this,
+ struct iatt *buf, struct iatt *lookup_buf)
+{
+ if (PERMISSION_DIFFERS (buf, lookup_buf)) {
+ /* mismatching permissions */
+ gf_log (this->name, GF_LOG_DEBUG,
+ "permissions differ for %s ", local->loc.path);
+ local->self_heal.do_metadata_self_heal = _gf_true;
+ }
+
+ if (OWNERSHIP_DIFFERS (buf, lookup_buf)) {
+ /* mismatching permissions */
+ local->self_heal.do_metadata_self_heal = _gf_true;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "ownership differs for %s ", local->loc.path);
+ }
+
+ if (SIZE_DIFFERS (buf, lookup_buf)
+ && IA_ISREG (buf->ia_type)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "size differs for %s ", local->loc.path);
+ local->self_heal.do_data_self_heal = _gf_true;
+ }
+
+ if (uuid_compare (buf->ia_gfid, lookup_buf->ia_gfid)) {
+ /* mismatching gfid */
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s: gfid different on subvolume", local->loc.path);
+ }
+}
+
+static void
+afr_detect_self_heal_by_split_brain_status (afr_local_t *local, xlator_t *this)
+{
+ gf_boolean_t split_brain = _gf_false;
+ afr_self_heal_t *sh = NULL;
+
+ sh = &local->self_heal;
+
+ split_brain = afr_is_split_brain (this, local->cont.lookup.inode);
+ split_brain = split_brain || local->cont.lookup.possible_spb;
+ if ((local->success_count > 0) && split_brain &&
+ IA_ISREG (local->cont.lookup.inode->ia_type)) {
+ sh->force_confirm_spb = _gf_true;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "split brain detected during lookup of %s.",
+ local->loc.path);
+ }
+}
+
+static void
+afr_detect_self_heal_by_lookup_status (afr_local_t *local, xlator_t *this)
+{
+ GF_ASSERT (local);
+ GF_ASSERT (this);
+
+ if ((local->success_count > 0) && (local->enoent_count > 0)) {
+ local->self_heal.do_metadata_self_heal = _gf_true;
+ local->self_heal.do_data_self_heal = _gf_true;
+ local->self_heal.do_entry_self_heal = _gf_true;
+ local->self_heal.do_gfid_self_heal = _gf_true;
+ local->self_heal.do_missing_entry_self_heal = _gf_true;
+ gf_log(this->name, GF_LOG_DEBUG,
+ "entries are missing in lookup of %s.",
+ local->loc.path);
+ }
+
+ return;
+}
+
+gf_boolean_t
+afr_can_self_heal_proceed (afr_self_heal_t *sh, afr_private_t *priv)
+{
+ GF_ASSERT (sh);
+ GF_ASSERT (priv);
+
+ if (sh->force_confirm_spb)
+ return _gf_true;
+ return (sh->do_gfid_self_heal
+ || sh->do_missing_entry_self_heal
+ || (afr_data_self_heal_enabled (priv->data_self_heal) &&
+ sh->do_data_self_heal)
+ || (priv->metadata_self_heal && sh->do_metadata_self_heal)
+ || (priv->entry_self_heal && sh->do_entry_self_heal));
+}
+
+afr_transaction_type
+afr_transaction_type_get (ia_type_t ia_type)
+{
+ afr_transaction_type type = AFR_METADATA_TRANSACTION;
+
+ GF_ASSERT (ia_type != IA_INVAL);
+
+ if (IA_ISDIR (ia_type)) {
+ type = AFR_ENTRY_TRANSACTION;
+ } else if (IA_ISREG (ia_type)) {
+ type = AFR_DATA_TRANSACTION;
+ }
+ return type;
+}
+
+int
+afr_lookup_select_read_child (afr_local_t *local, xlator_t *this,
+ int32_t *read_child)
+{
+ ia_type_t ia_type = IA_INVAL;
+ int32_t source = -1;
+ int ret = -1;
+ dict_t **xattrs = NULL;
+ int32_t *success_children = NULL;
+ afr_transaction_type type = AFR_METADATA_TRANSACTION;
+ uuid_t *gfid = NULL;
+
+ GF_ASSERT (local);
+ GF_ASSERT (this);
+ GF_ASSERT (local->success_count > 0);
+
+ success_children = local->cont.lookup.success_children;
+ /*We can take the success_children[0] only because we already
+ *handle the conflicting children other wise, we could select the
+ *read_child based on wrong file type
+ */
+ ia_type = local->cont.lookup.bufs[success_children[0]].ia_type;
+ type = afr_transaction_type_get (ia_type);
+ xattrs = local->cont.lookup.xattrs;
+ gfid = &local->cont.lookup.buf.ia_gfid;
+ source = afr_lookup_select_read_child_by_txn_type (this, local, xattrs,
+ type, *gfid);
+ if (source < 0) {
+ gf_log (this->name, GF_LOG_DEBUG, "failed to select source "
+ "for %s", local->loc.path);
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "Source selected as %d for %s",
+ source, local->loc.path);
+ *read_child = source;
+ ret = 0;
+out:
+ return ret;
+}
+
+static inline gf_boolean_t
+afr_is_transaction_running (afr_local_t *local)
+{
+ GF_ASSERT (local->fop == GF_FOP_LOOKUP);
+ return ((local->inodelk_count > 0) || (local->entrylk_count > 0));
+}
+
+void
+afr_launch_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ gf_boolean_t background, ia_type_t ia_type, char *reason,
+ void (*gfid_sh_success_cbk) (call_frame_t *sh_frame,
+ xlator_t *this),
+ int (*unwind) (call_frame_t *frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ int32_t sh_failed))
+{
+ afr_local_t *local = NULL;
+ char sh_type_str[256] = {0,};
+ char *bg = "";
+
+ GF_ASSERT (frame);
+ GF_ASSERT (this);
+ GF_ASSERT (inode);
+ GF_ASSERT (ia_type != IA_INVAL);
+
+ local = frame->local;
+ local->self_heal.background = background;
+ local->self_heal.type = ia_type;
+ local->self_heal.unwind = unwind;
+ local->self_heal.gfid_sh_success_cbk = gfid_sh_success_cbk;
+
+ afr_self_heal_type_str_get (&local->self_heal,
+ sh_type_str,
+ sizeof (sh_type_str));
+
+ if (background)
+ bg = "background";
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s %s self-heal triggered. path: %s, reason: %s", bg,
+ sh_type_str, local->loc.path, reason);
+
+ afr_self_heal (frame, this, inode);
+}
+
+unsigned int
+afr_gfid_missing_count (const char *xlator_name, int32_t *success_children,
+ struct iatt *bufs, unsigned int child_count,
+ const char *path)
+{
+ unsigned int gfid_miss_count = 0;
+ int i = 0;
+ struct iatt *child1 = NULL;
+
+ for (i = 0; i < child_count; i++) {
+ if (success_children[i] == -1)
+ break;
+ child1 = &bufs[success_children[i]];
+ if (uuid_is_null (child1->ia_gfid)) {
+ gf_log (xlator_name, GF_LOG_DEBUG, "%s: gfid is null"
+ " on subvolume %d", path, success_children[i]);
+ gfid_miss_count++;
+ }
+ }
+
+ return gfid_miss_count;
+}
+
+static int
+afr_lookup_gfid_missing_count (afr_local_t *local, xlator_t *this)
+{
+ int32_t *success_children = NULL;
+ afr_private_t *priv = NULL;
+ struct iatt *bufs = NULL;
+ int miss_count = 0;
+
+ priv = this->private;
+ bufs = local->cont.lookup.bufs;
+ success_children = local->cont.lookup.success_children;
+
+ miss_count = afr_gfid_missing_count (this->name, success_children,
+ bufs, priv->child_count,
+ local->loc.path);
+ return miss_count;
+}
+
+gf_boolean_t
+afr_conflicting_iattrs (struct iatt *bufs, int32_t *success_children,
+ unsigned int child_count, const char *path,
+ const char *xlator_name)
+{
+ gf_boolean_t conflicting = _gf_false;
+ int i = 0;
+ struct iatt *child1 = NULL;
+ struct iatt *child2 = NULL;
+ uuid_t *gfid = NULL;
+
+ for (i = 0; i < child_count; i++) {
+ if (success_children[i] == -1)
+ break;
+ child1 = &bufs[success_children[i]];
+ if ((!gfid) && (!uuid_is_null (child1->ia_gfid)))
+ gfid = &child1->ia_gfid;
+
+ if (i == 0)
+ continue;
+
+ child2 = &bufs[success_children[i-1]];
+ if (FILETYPE_DIFFERS (child1, child2)) {
+ gf_log (xlator_name, GF_LOG_DEBUG, "%s: filetype "
+ "differs on subvolumes (%d, %d)", path,
+ success_children[i-1], success_children[i]);
+ conflicting = _gf_true;
+ goto out;
+ }
+ if (!gfid || uuid_is_null (child1->ia_gfid))
+ continue;
+ if (uuid_compare (*gfid, child1->ia_gfid)) {
+ gf_log (xlator_name, GF_LOG_DEBUG, "%s: gfid differs"
+ " on subvolume %d", path, success_children[i]);
+ conflicting = _gf_true;
+ goto out;
+ }
+ }
+out:
+ return conflicting;
+}
+
+/* afr_update_gfid_from_iatts: This function should be called only if the
+ * iatts are not conflicting.
+ */
+void
+afr_update_gfid_from_iatts (uuid_t uuid, struct iatt *bufs,
+ int32_t *success_children, unsigned int child_count)
+{
+ uuid_t *gfid = NULL;
+ int i = 0;
+ int child = 0;
+
+ for (i = 0; i < child_count; i++) {
+ child = success_children[i];
+ if (child == -1)
+ break;
+ if ((!gfid) && (!uuid_is_null (bufs[child].ia_gfid))) {
+ gfid = &bufs[child].ia_gfid;
+ } else if (gfid && (!uuid_is_null (bufs[child].ia_gfid))) {
+ if (uuid_compare (*gfid, bufs[child].ia_gfid)) {
+ GF_ASSERT (0);
+ goto out;
+ }
+ }
+ }
+ if (gfid && (!uuid_is_null (*gfid)))
+ uuid_copy (uuid, *gfid);
+out:
+ return;
+}
+
+static gf_boolean_t
+afr_lookup_conflicting_entries (afr_local_t *local, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ gf_boolean_t conflict = _gf_false;
+
+ priv = this->private;
+ conflict = afr_conflicting_iattrs (local->cont.lookup.bufs,
+ local->cont.lookup.success_children,
+ priv->child_count, local->loc.path,
+ this->name);
+ return conflict;
+}
+
+gf_boolean_t
+afr_open_only_data_self_heal (char *data_self_heal)
+{
+ return !strcmp (data_self_heal, "open");
+}
+
+gf_boolean_t
+afr_data_self_heal_enabled (char *data_self_heal)
+{
+ gf_boolean_t enabled = _gf_false;
+
+ if (gf_string2boolean (data_self_heal, &enabled) == -1) {
+ enabled = !strcmp (data_self_heal, "open");
+ GF_ASSERT (enabled);
+ }
+
+ return enabled;
+}
+
+static void
+afr_lookup_set_self_heal_params (afr_local_t *local, xlator_t *this)
+{
+ int i = 0;
+ struct iatt *bufs = NULL;
+ dict_t **xattr = NULL;
+ afr_private_t *priv = NULL;
+ int32_t child1 = -1;
+ int32_t child2 = -1;
+ afr_self_heal_t *sh = NULL;
+
+ priv = this->private;
+ sh = &local->self_heal;
+
+ afr_detect_self_heal_by_lookup_status (local, this);
+
+ if (afr_lookup_gfid_missing_count (local, this))
+ local->self_heal.do_gfid_self_heal = _gf_true;
+
+ if (_gf_true == afr_lookup_conflicting_entries (local, this))
+ local->self_heal.do_missing_entry_self_heal = _gf_true;
+ else
+ afr_update_gfid_from_iatts (local->self_heal.sh_gfid_req,
+ local->cont.lookup.bufs,
+ local->cont.lookup.success_children,
+ priv->child_count);
+
+ bufs = local->cont.lookup.bufs;
+ for (i = 1; i < local->success_count; i++) {
+ child1 = local->cont.lookup.success_children[i-1];
+ child2 = local->cont.lookup.success_children[i];
+ afr_detect_self_heal_by_iatt (local, this,
+ &bufs[child1], &bufs[child2]);
+ }
+
+ xattr = local->cont.lookup.xattrs;
+ for (i = 0; i < local->success_count; i++) {
+ child1 = local->cont.lookup.success_children[i];
+ afr_lookup_set_self_heal_params_by_xattr (local, this,
+ xattr[child1]);
+ }
+ if (afr_open_only_data_self_heal (priv->data_self_heal))
+ sh->do_data_self_heal = _gf_false;
+ if (sh->do_metadata_self_heal)
+ afr_lookup_check_set_metadata_split_brain (local, this);
+ afr_detect_self_heal_by_split_brain_status (local, this);
+}
+
+int
+afr_self_heal_lookup_unwind (call_frame_t *frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ int32_t sh_failed)
+{
+ afr_local_t *local = NULL;
+ int ret = -1;
+ dict_t *xattr = NULL;
+
+ local = frame->local;
+
+ if (op_ret == -1) {
+ local->op_ret = -1;
+ local->op_errno = afr_most_important_error(local->op_errno,
+ op_errno, _gf_true);
+
+ goto out;
+ } else {
+ local->op_ret = 0;
+ }
+
+ afr_lookup_done_success_action (frame, this, _gf_true);
+ xattr = local->cont.lookup.xattr;
+ if (xattr) {
+ ret = dict_set_int32 (xattr, "sh-failed", sh_failed);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "%s: Failed to set "
+ "sh-failed to %d", local->loc.path, sh_failed);
+
+ if (local->self_heal.actual_sh_started == _gf_true &&
+ sh_failed == 0) {
+ ret = dict_set_int32 (xattr, "actual-sh-done", 1);
+ if (ret)
+ gf_log(this->name, GF_LOG_ERROR, "%s: Failed to"
+ " set actual-sh-done to %d",
+ local->loc.path,
+ local->self_heal.actual_sh_started);
+ }
+ }
+out:
+ AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
+ local->cont.lookup.inode, &local->cont.lookup.buf,
+ local->cont.lookup.xattr,
+ &local->cont.lookup.postparent);
+
+ return 0;
+}
+
+//TODO: At the moment only lookup needs this, so not doing any checks, in the
+// future we will have to do fop specific operations
+void
+afr_post_gfid_sh_success (call_frame_t *sh_frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_local_t *sh_local = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heal_t *sh = NULL;
+ int i = 0;
+ struct iatt *lookup_bufs = NULL;
+ struct iatt *lookup_parentbufs = NULL;
+
+ sh_local = sh_frame->local;
+ sh = &sh_local->self_heal;
+ local = sh->orig_frame->local;
+ lookup_bufs = local->cont.lookup.bufs;
+ lookup_parentbufs = local->cont.lookup.postparents;
+ priv = this->private;
+
+ memcpy (lookup_bufs, sh->buf, priv->child_count * sizeof (*sh->buf));
+ memcpy (lookup_parentbufs, sh->parentbufs,
+ priv->child_count * sizeof (*sh->parentbufs));
+
+ afr_reset_xattr (local->cont.lookup.xattrs, priv->child_count);
+ if (local->cont.lookup.xattr) {
+ dict_unref (local->cont.lookup.xattr);
+ local->cont.lookup.xattr = NULL;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sh->xattr[i])
+ local->cont.lookup.xattrs[i] = dict_ref (sh->xattr[i]);
+ }
+
+ afr_reset_children (local->cont.lookup.success_children,
+ priv->child_count);
+ afr_children_copy (local->cont.lookup.success_children,
+ sh->fresh_children, priv->child_count);
+}
+
+static void
+afr_lookup_perform_self_heal (call_frame_t *frame, xlator_t *this,
+ gf_boolean_t *sh_launched)
+{
+ unsigned int up_count = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ char *reason = NULL;
+
+ GF_ASSERT (sh_launched);
+ *sh_launched = _gf_false;
+ priv = this->private;
+ local = frame->local;
+
+ up_count = afr_up_children_count (local->child_up, priv->child_count);
+ if (up_count == 1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Only 1 child up - do not attempt to detect self heal");
+ goto out;
+ }
+
+ afr_lookup_set_self_heal_params (local, this);
+ if (afr_can_self_heal_proceed (&local->self_heal, priv)) {
+ if (afr_is_transaction_running (local) &&
+ (!local->allow_sh_for_running_transaction))
+ goto out;
+
+ reason = "lookup detected pending operations";
+ afr_launch_self_heal (frame, this, local->cont.lookup.inode,
+ _gf_true, local->cont.lookup.buf.ia_type,
+ reason, afr_post_gfid_sh_success,
+ afr_self_heal_lookup_unwind);
+ *sh_launched = _gf_true;
+ }
+out:
+ return;
+}
+
+void
+afr_get_fresh_children (int32_t *success_children, int32_t *sources,
+ int32_t *fresh_children, unsigned int child_count)
+{
+ unsigned int i = 0;
+ unsigned int j = 0;
+
+ GF_ASSERT (success_children);
+ GF_ASSERT (sources);
+ GF_ASSERT (fresh_children);
+
+ afr_reset_children (fresh_children, child_count);
+ for (i = 0; i < child_count; i++) {
+ if (success_children[i] == -1)
+ break;
+ if (afr_is_read_child (success_children, sources, child_count,
+ success_children[i])) {
+ fresh_children[j] = success_children[i];
+ j++;
+ }
+ }
+}
+
+static int
+afr_lookup_set_read_ctx (afr_local_t *local, xlator_t *this, int32_t read_child)
+{
+ afr_private_t *priv = NULL;
+
+ GF_ASSERT (read_child >= 0);
+
+ priv = this->private;
+ afr_get_fresh_children (local->cont.lookup.success_children,
+ local->cont.lookup.sources,
+ local->fresh_children, priv->child_count);
+ afr_inode_set_read_ctx (this, local->cont.lookup.inode, read_child,
+ local->fresh_children);
+
+ return 0;
+}
+
+int
+afr_lookup_done_success_action (call_frame_t *frame, xlator_t *this,
+ gf_boolean_t fail_conflict)
+{
+ int32_t read_child = -1;
+ int32_t ret = -1;
+ afr_local_t *local = NULL;
+ gf_boolean_t fresh_lookup = _gf_false;
+
+ local = frame->local;
+ fresh_lookup = local->cont.lookup.fresh_lookup;
+
+ if (local->loc.parent == NULL)
+ fail_conflict = _gf_true;
+
+ if (afr_lookup_conflicting_entries (local, this)) {
+ if (fail_conflict == _gf_false)
+ ret = 0;
+ goto out;
+ }
+
+ ret = afr_lookup_select_read_child (local, this, &read_child);
+ if (!afr_is_transaction_running (local) || fresh_lookup) {
+ if (read_child < 0)
+ goto out;
+
+ ret = afr_lookup_set_read_ctx (local, this, read_child);
+ if (ret)
+ goto out;
+ }
+
+ ret = afr_lookup_build_response_params (local, this);
+ if (ret)
+ goto out;
+ afr_update_loc_gfids (&local->loc,
+ &local->cont.lookup.buf,
+ &local->cont.lookup.postparent);
+
+ ret = 0;
+out:
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ }
+ return ret;
+}
+
+int
+afr_lookup_get_latest_subvol (afr_local_t *local, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int32_t *success_children = NULL;
+ struct iatt *bufs = NULL;
+ int i = 0;
+ int child = 0;
+ int lsubvol = -1;
+
+ priv = this->private;
+ success_children = local->cont.lookup.success_children;
+ bufs = local->cont.lookup.bufs;
+ for (i = 0; i < priv->child_count; i++) {
+ child = success_children[i];
+ if (child == -1)
+ break;
+ if (uuid_is_null (bufs[child].ia_gfid))
+ continue;
+ if (lsubvol < 0) {
+ lsubvol = child;
+ } else if (bufs[lsubvol].ia_ctime < bufs[child].ia_ctime) {
+ lsubvol = child;
+ } else if ((bufs[lsubvol].ia_ctime == bufs[child].ia_ctime) &&
+ (bufs[lsubvol].ia_ctime_nsec < bufs[child].ia_ctime_nsec)) {
+ lsubvol = child;
+ }
+ }
+ return lsubvol;
+}
+
+void
+afr_lookup_mark_other_entries_stale (afr_local_t *local, xlator_t *this,
+ int subvol)
+{
+ afr_private_t *priv = NULL;
+ int32_t *success_children = NULL;
+ struct iatt *bufs = NULL;
+ int i = 0;
+ int child = 0;
+
+ priv = this->private;
+ success_children = local->cont.lookup.success_children;
+ bufs = local->cont.lookup.bufs;
+ memcpy (local->fresh_children, success_children,
+ sizeof (*success_children) * priv->child_count);
+ for (i = 0; i < priv->child_count; i++) {
+ child = local->fresh_children[i];
+ if (child == -1)
+ break;
+ if (child == subvol)
+ continue;
+ if (uuid_is_null (bufs[child].ia_gfid) &&
+ (bufs[child].ia_type == bufs[subvol].ia_type))
+ continue;
+ afr_children_rm_child (success_children, child,
+ priv->child_count);
+ local->success_count--;
+ }
+ afr_reset_children (local->fresh_children, priv->child_count);
+}
+
+void
+afr_succeed_lookup_on_latest_iatt (afr_local_t *local, xlator_t *this)
+{
+ int lsubvol = 0;
+
+ if (!afr_lookup_conflicting_entries (local, this))
+ goto out;
+
+ lsubvol = afr_lookup_get_latest_subvol (local, this);
+ if (lsubvol < 0)
+ goto out;
+ afr_lookup_mark_other_entries_stale (local, this, lsubvol);
+out:
+ return;
+}
+
+gf_boolean_t
+afr_is_entry_possibly_under_creation (afr_local_t *local, xlator_t *this)
+{
+ /*
+ * We need to perform this test in lookup done and treat on going
+ * create/DELETE as ENOENT.
+ * Reason:
+ Multiple clients A, B and C are attempting 'mkdir -p /mnt/a/b/c'
+
+ 1 Client A is in the middle of mkdir(/a). It has acquired lock.
+ It has performed mkdir(/a) on one subvol, and second one is still
+ in progress
+ 2 Client B performs a lookup, sees directory /a on one,
+ ENOENT on the other, succeeds lookup.
+ 3 Client B performs lookup on /a/b on both subvols, both return ENOENT
+ (one subvol because /a/b does not exist, another because /a
+ itself does not exist)
+ 4 Client B proceeds to mkdir /a/b. It obtains entrylk on inode=/a with
+ basename=b on one subvol, but fails on other subvol as /a is yet to
+ be created by Client A.
+ 5 Client A finishes mkdir of /a on other subvol
+ 6 Client C also attempts to create /a/b, lookup returns ENOENT on
+ both subvols.
+ 7 Client C tries to obtain entrylk on on inode=/a with basename=b,
+ obtains on one subvol (where B had failed), and waits for B to unlock
+ on other subvol.
+ 8 Client B finishes mkdir() on one subvol with GFID-1 and completes
+ transaction and unlocks
+ 9 Client C gets the lock on the second subvol, At this stage second
+ subvol already has /a/b created from Client B, but Client C does not
+ check that in the middle of mkdir transaction
+ 10 Client C attempts mkdir /a/b on both subvols. It succeeds on
+ ONLY ONE (where Client B could not get lock because of
+ missing parent /a dir) with GFID-2, and gets EEXIST from ONE subvol.
+ This way we have /a/b in GFID mismatch. One subvol got GFID-1 because
+ Client B performed transaction on only one subvol (because entrylk()
+ could not be obtained on second subvol because of missing parent dir --
+ caused by premature/speculative succeeding of lookup() on /a when locks
+ are detected). Other subvol gets GFID-2 from Client C because while
+ it was waiting for entrylk() on both subvols, Client B was in the
+ middle of creating mkdir() on only one subvol, and Client C does not
+ "expect" this when it is between lock() and pre-op()/op() phase of the
+ transaction.
+ */
+ if (local->cont.lookup.parent_entrylk && local->enoent_count)
+ return _gf_true;
+
+ return _gf_false;
+}
+
+
+static void
+afr_lookup_done (call_frame_t *frame, xlator_t *this)
+{
+ int unwind = 1;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ gf_boolean_t sh_launched = _gf_false;
+ gf_boolean_t fail_conflict = _gf_false;
+ int gfid_miss_count = 0;
+ int enotconn_count = 0;
+ int up_children_count = 0;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (afr_is_entry_possibly_under_creation (local, this)) {
+ local->op_ret = -1;
+ local->op_errno = ENOENT;
+ goto unwind;
+ }
+
+ if (local->op_ret < 0)
+ goto unwind;
+
+ if (local->cont.lookup.parent_entrylk && local->success_count > 1)
+ afr_succeed_lookup_on_latest_iatt (local, this);
+
+ gfid_miss_count = afr_lookup_gfid_missing_count (local, this);
+ up_children_count = afr_up_children_count (local->child_up,
+ priv->child_count);
+ enotconn_count = priv->child_count - up_children_count;
+ if ((gfid_miss_count == local->success_count) &&
+ (enotconn_count > 0)) {
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ gf_log (this->name, GF_LOG_ERROR, "Failing lookup for %s, "
+ "LOOKUP on a file without gfid is not allowed when "
+ "some of the children are down", local->loc.path);
+ goto unwind;
+ }
+
+ if ((gfid_miss_count == local->success_count) &&
+ uuid_is_null (local->cont.lookup.gfid_req)) {
+ local->op_ret = -1;
+ local->op_errno = ENODATA;
+ gf_log (this->name, GF_LOG_ERROR, "%s: No gfid present",
+ local->loc.path);
+ goto unwind;
+ }
+
+ if (gfid_miss_count && uuid_is_null (local->cont.lookup.gfid_req))
+ fail_conflict = _gf_true;
+ ret = afr_lookup_done_success_action (frame, this, fail_conflict);
+ if (ret)
+ goto unwind;
+ uuid_copy (local->self_heal.sh_gfid_req, local->cont.lookup.gfid_req);
+
+ afr_lookup_perform_self_heal (frame, this, &sh_launched);
+ if (sh_launched) {
+ unwind = 0;
+ goto unwind;
+ }
+
+ unwind:
+ if (unwind) {
+ AFR_STACK_UNWIND (lookup, frame, local->op_ret,
+ local->op_errno, local->cont.lookup.inode,
+ &local->cont.lookup.buf,
+ local->cont.lookup.xattr,
+ &local->cont.lookup.postparent);
+ }
+}
+
+/*
+ * During a lookup, some errors are more "important" than
+ * others in that they must be given higher priority while
+ * returning to the user.
+ *
+ * The hierarchy is ESTALE > EIO > ENOENT > others
+ */
+int32_t
+afr_most_important_error(int32_t old_errno, int32_t new_errno,
+ gf_boolean_t eio)
+{
+ if (old_errno == ESTALE || new_errno == ESTALE)
+ return ESTALE;
+ if (eio && (old_errno == EIO || new_errno == EIO))
+ return EIO;
+ if (old_errno == ENOENT || new_errno == ENOENT)
+ return ENOENT;
+
+ return new_errno;
+}
+
+int32_t
+afr_resultant_errno_get (int32_t *children,
+ int *child_errno, unsigned int child_count)
+{
+ int i = 0;
+ int32_t op_errno = 0;
+ int child = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (children) {
+ child = children[i];
+ if (child == -1)
+ break;
+ } else {
+ child = i;
+ }
+ op_errno = afr_most_important_error(op_errno,
+ child_errno[child],
+ _gf_false);
+ }
+ return op_errno;
+}
+
+static void
+afr_lookup_handle_error (afr_local_t *local, int32_t op_ret, int32_t op_errno)
+{
+ GF_ASSERT (local);
+ if (op_errno == ENOENT)
+ local->enoent_count++;
+
+ local->op_errno = afr_most_important_error(local->op_errno, op_errno,
+ _gf_false);
+
+ if (local->op_errno == ESTALE) {
+ local->op_ret = -1;
+ }
+}
+
+static void
+afr_set_root_inode_on_first_lookup (afr_local_t *local, xlator_t *this,
+ inode_t *inode)
+{
+ afr_private_t *priv = NULL;
+ GF_ASSERT (inode);
+
+ if (!__is_root_gfid (inode->gfid))
+ goto out;
+ if (!afr_is_fresh_lookup (&local->loc, this))
+ goto out;
+ priv = this->private;
+ if ((priv->first_lookup)) {
+ gf_log (this->name, GF_LOG_INFO, "added root inode");
+ priv->root_inode = inode_ref (inode);
+ priv->first_lookup = 0;
+ }
+out:
+ return;
+}
+
+static void
+afr_lookup_cache_args (afr_local_t *local, int child_index, dict_t *xattr,
+ struct iatt *buf, struct iatt *postparent)
+{
+ GF_ASSERT (child_index >= 0);
+ local->cont.lookup.xattrs[child_index] = dict_ref (xattr);
+ local->cont.lookup.postparents[child_index] = *postparent;
+ local->cont.lookup.bufs[child_index] = *buf;
+}
+
+static void
+afr_lookup_handle_first_success (afr_local_t *local, xlator_t *this,
+ inode_t *inode, struct iatt *buf)
+{
+ local->cont.lookup.inode = inode_ref (inode);
+ local->cont.lookup.buf = *buf;
+ afr_set_root_inode_on_first_lookup (local, this, inode);
+}
+
+static int32_t
+afr_discovery_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ int ret = 0;
+ char *pathinfo = NULL;
+ gf_boolean_t is_local = _gf_false;
+ afr_private_t *priv = NULL;
+ int32_t child_index = -1;
+
+ if (op_ret != 0) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, GF_XATTR_PATHINFO_KEY, &pathinfo);
+ if (ret != 0) {
+ goto out;
+ }
+
+ ret = afr_local_pathinfo (pathinfo, &is_local);
+ if (ret) {
+ goto out;
+ }
+
+ priv = this->private;
+ /*
+ * Note that one local subvolume will override another here. The only
+ * way to avoid that would be to retain extra information about whether
+ * the previous read_child is local, and it's just not worth it. Even
+ * the slowest local subvolume is far preferable to a remote one.
+ */
+ if (is_local) {
+ child_index = (int32_t)(long)cookie;
+ gf_log (this->name, GF_LOG_INFO,
+ "selecting local read_child %s",
+ priv->children[child_index]->name);
+ priv->read_child = child_index;
+ }
+
+out:
+ STACK_DESTROY(frame->root);
+ return 0;
+}
+
+static void
+afr_attempt_local_discovery (xlator_t *this, int32_t child_index)
+{
+ call_frame_t *newframe = NULL;
+ loc_t tmploc = {0,};
+ afr_private_t *priv = this->private;
+
+ newframe = create_frame(this,this->ctx->pool);
+ if (!newframe) {
+ return;
+ }
+
+ tmploc.gfid[sizeof(tmploc.gfid)-1] = 1;
+ STACK_WIND_COOKIE (newframe, afr_discovery_cbk,
+ (void *)(long)child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->getxattr,
+ &tmploc, GF_XATTR_PATHINFO_KEY, NULL);
+}
+
+static void
+afr_lookup_handle_success (afr_local_t *local, xlator_t *this, int32_t child_index,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ afr_private_t *priv = this->private;
+
+ if (local->success_count == 0) {
+ if (local->op_errno != ESTALE) {
+ local->op_ret = op_ret;
+ local->op_errno = 0;
+ }
+ afr_lookup_handle_first_success (local, this, inode, buf);
+ }
+ afr_lookup_update_lk_counts (local, this,
+ child_index, xattr);
+
+ afr_lookup_cache_args (local, child_index, xattr,
+ buf, postparent);
+
+ if (local->do_discovery && (priv->read_child == (-1))) {
+ afr_attempt_local_discovery(this,child_index);
+ }
+
+ local->cont.lookup.success_children[local->success_count] = child_index;
+ local->success_count++;
+}
+
+int
+afr_lookup_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ afr_local_t * local = NULL;
+ int call_count = -1;
+ int child_index = -1;
+
+ child_index = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ local = frame->local;
+
+ if (op_ret == -1) {
+ afr_lookup_handle_error (local, op_ret, op_errno);
+ goto unlock;
+ }
+ afr_lookup_handle_success (local, this, child_index, op_ret,
+ op_errno, inode, buf, xattr,
+ postparent);
+
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+ if (call_count == 0) {
+ afr_lookup_done (frame, this);
+ }
+
+ return 0;
+}
+
+int
+afr_lookup_cont_init (afr_local_t *local, unsigned int child_count)
+{
+ int ret = -ENOMEM;
+ struct iatt *iatts = NULL;
+ int32_t *success_children = NULL;
+ int32_t *sources = NULL;
+ int32_t **pending_matrix = NULL;
+
+ GF_ASSERT (local);
+ local->cont.lookup.xattrs = GF_CALLOC (child_count,
+ sizeof (*local->cont.lookup.xattr),
+ gf_afr_mt_dict_t);
+ if (NULL == local->cont.lookup.xattrs)
+ goto out;
+
+ iatts = GF_CALLOC (child_count, sizeof (*iatts), gf_afr_mt_iatt);
+ if (NULL == iatts)
+ goto out;
+ local->cont.lookup.postparents = iatts;
+
+ iatts = GF_CALLOC (child_count, sizeof (*iatts), gf_afr_mt_iatt);
+ if (NULL == iatts)
+ goto out;
+ local->cont.lookup.bufs = iatts;
+
+ success_children = afr_children_create (child_count);
+ if (NULL == success_children)
+ goto out;
+ local->cont.lookup.success_children = success_children;
+
+ local->fresh_children = afr_children_create (child_count);
+ if (NULL == local->fresh_children)
+ goto out;
+
+ sources = GF_CALLOC (sizeof (*sources), child_count, gf_afr_mt_int32_t);
+ if (NULL == sources)
+ goto out;
+ local->cont.lookup.sources = sources;
+
+ pending_matrix = afr_matrix_create (child_count, child_count);
+ if (NULL == pending_matrix)
+ goto out;
+ local->cont.lookup.pending_matrix = pending_matrix;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+afr_lookup (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xattr_req)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ void *gfid_req = NULL;
+ int ret = -1;
+ int i = 0;
+ int call_count = 0;
+ uint64_t ctx = 0;
+ int32_t op_errno = 0;
+ int allow_sh = 0;
+ priv = this->private;
+
+ AFR_LOCAL_ALLOC_OR_GOTO (local, out);
+
+ local->op_ret = -1;
+
+ frame->local = local;
+ local->fop = GF_FOP_LOOKUP;
+
+ loc_copy (&local->loc, loc);
+ ret = loc_path (&local->loc, NULL);
+ if (ret < 0) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ if (local->loc.path &&
+ (strcmp (local->loc.path, "/" GF_REPLICATE_TRASH_DIR) == 0)) {
+ op_errno = EPERM;
+ ret = -1;
+ goto out;
+ }
+
+ ret = inode_ctx_get (local->loc.inode, this, &ctx);
+ if (ret == 0) {
+ /* lookup is a revalidate */
+
+ local->read_child_index = afr_inode_get_read_ctx (this,
+ local->loc.inode,
+ NULL);
+ } else {
+ LOCK (&priv->read_child_lock);
+ {
+ if (priv->hash_mode) {
+ local->read_child_index = -1;
+ }
+ else {
+ local->read_child_index =
+ (++priv->read_child_rr) %
+ (priv->child_count);
+ }
+ }
+ UNLOCK (&priv->read_child_lock);
+ local->cont.lookup.fresh_lookup = _gf_true;
+ }
+
+ local->child_up = memdup (priv->child_up,
+ sizeof (*local->child_up) * priv->child_count);
+ if (NULL == local->child_up) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ ret = afr_lookup_cont_init (local, priv->child_count);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ local->call_count = afr_up_children_count (local->child_up,
+ priv->child_count);
+ call_count = local->call_count;
+ if (local->call_count == 0) {
+ ret = -1;
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ /* By default assume ENOTCONN. On success it will be set to 0. */
+ local->op_errno = ENOTCONN;
+
+ ret = dict_get_int32 (xattr_req, "allow-sh-for-running-transaction",
+ &allow_sh);
+ dict_del (xattr_req, "allow-sh-for-running-transaction");
+ local->allow_sh_for_running_transaction = allow_sh;
+
+ ret = afr_lookup_xattr_req_prepare (local, this, xattr_req, &local->loc,
+ &gfid_req);
+ if (ret) {
+ local->op_errno = -ret;
+ goto out;
+ }
+ afr_lookup_save_gfid (local->cont.lookup.gfid_req, gfid_req,
+ &local->loc);
+ local->fop = GF_FOP_LOOKUP;
+ if (priv->choose_local && !priv->did_discovery) {
+ if (gfid_req && __is_root_gfid(gfid_req)) {
+ local->do_discovery = _gf_true;
+ priv->did_discovery = _gf_true;
+ }
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_lookup_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->lookup,
+ &local->loc, local->xattr_req);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ ret = 0;
+out:
+ if (ret)
+ AFR_STACK_UNWIND (lookup, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+/* {{{ open */
+
+int
+__afr_fd_ctx_set (xlator_t *this, fd_t *fd)
+{
+ afr_private_t * priv = NULL;
+ int ret = -1;
+ uint64_t ctx = 0;
+ afr_fd_ctx_t * fd_ctx = NULL;
+
+ VALIDATE_OR_GOTO (this->private, out);
+ VALIDATE_OR_GOTO (fd, out);
+
+ priv = this->private;
+
+ ret = __fd_ctx_get (fd, this, &ctx);
+
+ if (ret == 0)
+ goto out;
+
+ fd_ctx = GF_CALLOC (1, sizeof (afr_fd_ctx_t),
+ gf_afr_mt_afr_fd_ctx_t);
+ if (!fd_ctx) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ fd_ctx->pre_op_done = GF_CALLOC (sizeof (*fd_ctx->pre_op_done),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!fd_ctx->pre_op_done) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ fd_ctx->pre_op_piggyback = GF_CALLOC (sizeof (*fd_ctx->pre_op_piggyback),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!fd_ctx->pre_op_piggyback) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ fd_ctx->opened_on = GF_CALLOC (sizeof (*fd_ctx->opened_on),
+ priv->child_count,
+ gf_afr_mt_int32_t);
+ if (!fd_ctx->opened_on) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ fd_ctx->lock_piggyback = GF_CALLOC (sizeof (*fd_ctx->lock_piggyback),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!fd_ctx->lock_piggyback) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ fd_ctx->lock_acquired = GF_CALLOC (sizeof (*fd_ctx->lock_acquired),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!fd_ctx->lock_acquired) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ fd_ctx->up_count = priv->up_count;
+ fd_ctx->down_count = priv->down_count;
+
+ fd_ctx->locked_on = GF_CALLOC (sizeof (*fd_ctx->locked_on),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!fd_ctx->locked_on) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ pthread_mutex_init (&fd_ctx->delay_lock, NULL);
+ INIT_LIST_HEAD (&fd_ctx->entries);
+ fd_ctx->call_child = -1;
+
+ INIT_LIST_HEAD (&fd_ctx->eager_locked);
+
+ ret = __fd_ctx_set (fd, this, (uint64_t)(long) fd_ctx);
+ if (ret)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to set fd ctx (%p)", fd);
+out:
+ return ret;
+}
+
+
+int
+afr_fd_ctx_set (xlator_t *this, fd_t *fd)
+{
+ int ret = -1;
+
+ LOCK (&fd->lock);
+ {
+ ret = __afr_fd_ctx_set (this, fd);
+ }
+ UNLOCK (&fd->lock);
+
+ return ret;
+}
+
+/* {{{ flush */
+
+int
+afr_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_local_t * local = NULL;
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret != -1) {
+ if (local->success_count == 0) {
+ local->op_ret = op_ret;
+ }
+ local->success_count++;
+ }
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND(flush, frame, local->op_ret,
+ local->op_errno, NULL);
+
+ return 0;
+}
+
+static int
+afr_flush_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ int i = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+
+ priv = this->private;
+ local = frame->local;
+ call_count = local->call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_flush_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->flush,
+ local->fd, NULL);
+ if (!--call_count)
+ break;
+
+ }
+ }
+
+ return 0;
+}
+
+int
+afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+ int ret = -1;
+ int op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init(local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ local->fd = fd_ref(fd);
+ stub = fop_flush_stub (frame, afr_flush_wrapper, fd, xdata);
+ if (!stub) {
+ ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ afr_delayed_changelog_wake_resume (this, fd, stub);
+ ret = 0;
+
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND(flush, frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+/* }}} */
+
+
+int
+afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd)
+{
+ uint64_t ctx = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int ret = 0;
+
+ ret = fd_ctx_get (fd, this, &ctx);
+ if (ret < 0)
+ goto out;
+
+ fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+
+ if (fd_ctx) {
+ GF_FREE (fd_ctx->pre_op_done);
+
+ GF_FREE (fd_ctx->opened_on);
+
+ GF_FREE (fd_ctx->locked_on);
+
+ GF_FREE (fd_ctx->pre_op_piggyback);
+ GF_FREE (fd_ctx->lock_piggyback);
+
+ GF_FREE (fd_ctx->lock_acquired);
+
+ pthread_mutex_destroy (&fd_ctx->delay_lock);
+
+ GF_FREE (fd_ctx);
+ }
+
+out:
+ return 0;
+}
+
+
+int
+afr_release (xlator_t *this, fd_t *fd)
+{
+ afr_locked_fd_t *locked_fd = NULL;
+ afr_locked_fd_t *tmp = NULL;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ afr_cleanup_fd_ctx (this, fd);
+
+ list_for_each_entry_safe (locked_fd, tmp, &priv->saved_fds,
+ list) {
+
+ if (locked_fd->fd == fd) {
+ list_del_init (&locked_fd->list);
+ GF_FREE (locked_fd);
+ }
+
+ }
+
+ return 0;
+}
+
+
+/* {{{ fsync */
+
+int
+afr_fsync_unwind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ AFR_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+int
+afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int call_count = -1;
+ int child_index = (long) cookie;
+ int read_child = 0;
+ call_stub_t *stub = NULL;
+
+ local = frame->local;
+
+ read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
+
+ LOCK (&frame->lock);
+ {
+ if (child_index == read_child) {
+ local->read_child_returned = _gf_true;
+ }
+
+ if (op_ret == 0) {
+ local->op_ret = 0;
+
+ if (local->success_count == 0) {
+ local->cont.inode_wfop.prebuf = *prebuf;
+ local->cont.inode_wfop.postbuf = *postbuf;
+ }
+
+ if (child_index == read_child) {
+ local->cont.inode_wfop.prebuf = *prebuf;
+ local->cont.inode_wfop.postbuf = *postbuf;
+ }
+
+ local->success_count++;
+ }
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ /* Make a stub out of the frame, and register it
+ with the waking up post-op. When the call-stub resumes,
+ we are guaranteed that there was no post-op pending
+ (i.e changelogs were unset in the server). This is an
+ essential "guarantee", that fsync() returns only after
+ completely finishing EVERYTHING, including the delayed
+ post-op. This guarantee is expected by FUSE graph switching
+ for example.
+ */
+ stub = fop_fsync_cbk_stub (frame, afr_fsync_unwind_cbk,
+ local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ xdata);
+ if (!stub) {
+ AFR_STACK_UNWIND (fsync, frame, -1, ENOMEM, 0, 0, 0);
+ return 0;
+ }
+
+ /* If no new unstable writes happened between the
+ time we cleared the unstable write witness flag in afr_fsync
+ and now, calling afr_delayed_changelog_wake_up() should
+ wake up and skip over the fsync phase and go straight to
+ afr_changelog_post_op_now()
+ */
+ afr_delayed_changelog_wake_resume (this, local->fd, stub);
+ }
+
+ return 0;
+}
+
+
+int
+afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t datasync, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ call_count = local->call_count;
+
+ local->fd = fd_ref (fd);
+
+ if (afr_fd_has_witnessed_unstable_write (this, fd)) {
+ /* don't care. we only wanted to CLEAR the bit */
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_fsync_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fsync,
+ fd, datasync, xdata);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+/* }}} */
+
+/* {{{ fsync */
+
+int32_t
+afr_fsyncdir_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0)
+ local->op_ret = 0;
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (fsyncdir, frame, local->op_ret,
+ local->op_errno, xdata);
+
+ return 0;
+}
+
+
+int32_t
+afr_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t datasync, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ call_count = local->call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_fsyncdir_cbk,
+ priv->children[i],
+ priv->children[i]->fops->fsyncdir,
+ fd, datasync, xdata);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND (fsyncdir, frame, -1, op_errno, NULL);
+ return 0;
+}
+
+/* }}} */
+
+/* {{{ xattrop */
+
+int32_t
+afr_xattrop_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xattr, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0) {
+ if (!local->cont.xattrop.xattr)
+ local->cont.xattrop.xattr = dict_ref (xattr);
+ local->op_ret = 0;
+ }
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (xattrop, frame, local->op_ret, local->op_errno,
+ local->cont.xattrop.xattr, xdata);
+
+ return 0;
+}
+
+
+int32_t
+afr_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ call_count = local->call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_xattrop_cbk,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ loc, optype, xattr, xdata);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+/* }}} */
+
+/* {{{ fxattrop */
+
+int32_t
+afr_fxattrop_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xattr, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0) {
+ if (!local->cont.fxattrop.xattr)
+ local->cont.fxattrop.xattr = dict_ref (xattr);
+
+ local->op_ret = 0;
+ }
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (fxattrop, frame, local->op_ret, local->op_errno,
+ local->cont.fxattrop.xattr, xdata);
+
+ return 0;
+}
+
+
+int32_t
+afr_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ call_count = local->call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_fxattrop_cbk,
+ priv->children[i],
+ priv->children[i]->fops->fxattrop,
+ fd, optype, xattr, xdata);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+/* }}} */
+
+
+int32_t
+afr_inodelk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
+
+{
+ afr_local_t *local = NULL;
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0)
+ local->op_ret = 0;
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (inodelk, frame, local->op_ret,
+ local->op_errno, xdata);
+
+ return 0;
+}
+
+
+int32_t
+afr_inodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ call_count = local->call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_inodelk_cbk,
+ priv->children[i],
+ priv->children[i]->fops->inodelk,
+ volume, loc, cmd, flock, xdata);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND (inodelk, frame, -1, op_errno, NULL);
+ return 0;
+}
+
+
+int32_t
+afr_finodelk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
+
+{
+ afr_local_t *local = NULL;
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0)
+ local->op_ret = 0;
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (finodelk, frame, local->op_ret,
+ local->op_errno, xdata);
+
+ return 0;
+}
+
+
+int32_t
+afr_finodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *flock,
+ dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ call_count = local->call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_finodelk_cbk,
+ priv->children[i],
+ priv->children[i]->fops->finodelk,
+ volume, fd, cmd, flock, xdata);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND (finodelk, frame, -1, op_errno, NULL);
+ return 0;
+}
+
+
+int32_t
+afr_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0)
+ local->op_ret = 0;
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (entrylk, frame, local->op_ret,
+ local->op_errno, xdata);
+
+ return 0;
+}
+
+
+int32_t
+afr_entrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ call_count = local->call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_entrylk_cbk,
+ priv->children[i],
+ priv->children[i]->fops->entrylk,
+ volume, loc, basename, cmd, type, xdata);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND (entrylk, frame, -1, op_errno, NULL);
+ return 0;
+}
+
+
+
+int32_t
+afr_fentrylk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
+
+{
+ afr_local_t *local = NULL;
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0)
+ local->op_ret = 0;
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (fentrylk, frame, local->op_ret,
+ local->op_errno, xdata);
+
+ return 0;
+}
+
+
+int32_t
+afr_fentrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd,
+ const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ call_count = local->call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_fentrylk_cbk,
+ priv->children[i],
+ priv->children[i]->fops->fentrylk,
+ volume, fd, basename, cmd, type, xdata);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND (fentrylk, frame, -1, op_errno, NULL);
+ return 0;
+}
+
+int32_t
+afr_statfs_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ struct statvfs *statvfs, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int call_count = 0;
+
+ LOCK (&frame->lock);
+ {
+ local = frame->local;
+
+ if (op_ret == 0) {
+ local->op_ret = op_ret;
+
+ if (local->cont.statfs.buf_set) {
+ if (statvfs->f_bavail < local->cont.statfs.buf.f_bavail)
+ local->cont.statfs.buf = *statvfs;
+ } else {
+ local->cont.statfs.buf = *statvfs;
+ local->cont.statfs.buf_set = 1;
+ }
+ }
+
+ if (op_ret == -1)
+ local->op_errno = op_errno;
+
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (statfs, frame, local->op_ret, local->op_errno,
+ &local->cont.statfs.buf, xdata);
+
+ return 0;
+}
+
+
+int32_t
+afr_statfs (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xdata)
+{
+ afr_private_t * priv = NULL;
+ int child_count = 0;
+ afr_local_t * local = NULL;
+ int i = 0;
+ int ret = -1;
+ int call_count = 0;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+ VALIDATE_OR_GOTO (loc, out);
+
+ priv = this->private;
+ child_count = priv->child_count;
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ call_count = local->call_count;
+
+ for (i = 0; i < child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_statfs_cbk,
+ priv->children[i],
+ priv->children[i]->fops->statfs,
+ loc, xdata);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND (statfs, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+
+int32_t
+afr_lk_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
+{
+ afr_local_t * local = NULL;
+ int call_count = -1;
+
+ local = frame->local;
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
+ lock, xdata);
+
+ return 0;
+}
+
+
+int32_t
+afr_lk_unlock (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ int i = 0;
+ int call_count = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_locked_nodes_count (local->cont.lk.locked_nodes,
+ priv->child_count);
+
+ if (call_count == 0) {
+ AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
+ &local->cont.lk.ret_flock, NULL);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ local->cont.lk.user_flock.l_type = F_UNLCK;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->cont.lk.locked_nodes[i]) {
+ STACK_WIND (frame, afr_lk_unlock_cbk,
+ priv->children[i],
+ priv->children[i]->fops->lk,
+ local->fd, F_SETLK,
+ &local->cont.lk.user_flock, NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+int32_t
+afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int child_index = -1;
+/* int ret = 0; */
+
+
+ local = frame->local;
+ priv = this->private;
+
+ child_index = (long) cookie;
+
+ if (!child_went_down (op_ret, op_errno) && (op_ret == -1)) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+
+ afr_lk_unlock (frame, this);
+ return 0;
+ }
+
+ if (op_ret == 0) {
+ local->op_ret = 0;
+ local->op_errno = 0;
+ local->cont.lk.locked_nodes[child_index] = 1;
+ local->cont.lk.ret_flock = *lock;
+ }
+
+ child_index++;
+
+ if (child_index < priv->child_count) {
+ STACK_WIND_COOKIE (frame, afr_lk_cbk, (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->lk,
+ local->fd, local->cont.lk.cmd,
+ &local->cont.lk.user_flock, xdata);
+ } else if (local->op_ret == -1) {
+ /* all nodes have gone down */
+
+ AFR_STACK_UNWIND (lk, frame, -1, ENOTCONN,
+ &local->cont.lk.ret_flock, NULL);
+ } else {
+ /* locking has succeeded on all nodes that are up */
+
+ /* temporarily
+ ret = afr_mark_locked_nodes (this, local->fd,
+ local->cont.lk.locked_nodes);
+ if (ret)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Could not save locked nodes info in fdctx");
+
+ ret = afr_save_locked_fd (this, local->fd);
+ if (ret)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Could not save locked fd");
+
+ */
+ AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
+ &local->cont.lk.ret_flock, NULL);
+ }
+
+ return 0;
+}
+
+
+int
+afr_lk (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+ int32_t op_errno = 0;
+ int ret = -1;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ local->cont.lk.locked_nodes = GF_CALLOC (priv->child_count,
+ sizeof (*local->cont.lk.locked_nodes),
+ gf_afr_mt_char);
+
+ if (!local->cont.lk.locked_nodes) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->fd = fd_ref (fd);
+ local->cont.lk.cmd = cmd;
+ local->cont.lk.user_flock = *flock;
+ local->cont.lk.ret_flock = *flock;
+
+ STACK_WIND_COOKIE (frame, afr_lk_cbk, (void *) (long) 0,
+ priv->children[i],
+ priv->children[i]->fops->lk,
+ fd, cmd, flock, xdata);
+
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND (lk, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+int
+afr_forget (xlator_t *this, inode_t *inode)
+{
+ uint64_t ctx_addr = 0;
+ afr_inode_ctx_t *ctx = NULL;
+
+ inode_ctx_get (inode, this, &ctx_addr);
+
+ if (!ctx_addr)
+ goto out;
+
+ ctx = (afr_inode_ctx_t *)(long)ctx_addr;
+ GF_FREE (ctx->fresh_children);
+ GF_FREE (ctx);
+out:
+ return 0;
+}
+
+int
+afr_priv_dump (xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i = 0;
+
+
+ GF_ASSERT (this);
+ priv = this->private;
+
+ GF_ASSERT (priv);
+ snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name);
+ gf_proc_dump_add_section(key_prefix);
+ gf_proc_dump_write("child_count", "%u", priv->child_count);
+ gf_proc_dump_write("read_child_rr", "%u", priv->read_child_rr);
+ for (i = 0; i < priv->child_count; i++) {
+ sprintf (key, "child_up[%d]", i);
+ gf_proc_dump_write(key, "%d", priv->child_up[i]);
+ sprintf (key, "pending_key[%d]", i);
+ gf_proc_dump_write(key, "%s", priv->pending_key[i]);
+ }
+ gf_proc_dump_write("data_self_heal", "%s", priv->data_self_heal);
+ gf_proc_dump_write("metadata_self_heal", "%d", priv->metadata_self_heal);
+ gf_proc_dump_write("entry_self_heal", "%d", priv->entry_self_heal);
+ gf_proc_dump_write("data_change_log", "%d", priv->data_change_log);
+ gf_proc_dump_write("metadata_change_log", "%d", priv->metadata_change_log);
+ gf_proc_dump_write("entry-change_log", "%d", priv->entry_change_log);
+ gf_proc_dump_write("read_child", "%d", priv->read_child);
+ gf_proc_dump_write("favorite_child", "%d", priv->favorite_child);
+ gf_proc_dump_write("wait_count", "%u", priv->wait_count);
+
+ return 0;
+}
+
+
+/**
+ * find_child_index - find the child's index in the array of subvolumes
+ * @this: AFR
+ * @child: child
+ */
+
+static int
+find_child_index (xlator_t *this, xlator_t *child)
+{
+ afr_private_t *priv = NULL;
+ int i = -1;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if ((xlator_t *) child == priv->children[i])
+ break;
+ }
+
+ return i;
+}
+
+int32_t
+afr_notify (xlator_t *this, int32_t event,
+ void *data, void *data2)
+{
+ afr_private_t *priv = NULL;
+ int i = -1;
+ int up_children = 0;
+ int down_children = 0;
+ int propagate = 0;
+ int had_heard_from_all = 0;
+ int have_heard_from_all = 0;
+ int idx = -1;
+ int ret = -1;
+ int call_psh = 0;
+ int up_child = AFR_ALL_CHILDREN;
+ dict_t *input = NULL;
+ dict_t *output = NULL;
+
+ priv = this->private;
+
+ if (!priv)
+ return 0;
+
+ /*
+ * We need to reset this in case children come up in "staggered"
+ * fashion, so that we discover a late-arriving local subvolume. Note
+ * that we could end up issuing N lookups to the first subvolume, and
+ * O(N^2) overall, but N is small for AFR so it shouldn't be an issue.
+ */
+ priv->did_discovery = _gf_false;
+
+ had_heard_from_all = 1;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!priv->last_event[i]) {
+ had_heard_from_all = 0;
+ }
+ }
+
+ /* parent xlators dont need to know about every child_up, child_down
+ * because of afr ha. If all subvolumes go down, child_down has
+ * to be triggered. In that state when 1 subvolume comes up child_up
+ * needs to be triggered. dht optimizes revalidate lookup by sending
+ * it only to one of its subvolumes. When child up/down happens
+ * for afr's subvolumes dht should be notified by child_modified. The
+ * subsequent revalidate lookup happens on all the dht's subvolumes
+ * which triggers afr self-heals if any.
+ */
+ idx = find_child_index (this, data);
+ if (idx < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Received child_up "
+ "from invalid subvolume");
+ goto out;
+ }
+
+ switch (event) {
+ case GF_EVENT_CHILD_UP:
+ LOCK (&priv->lock);
+ {
+ /*
+ * This only really counts if the child was never up
+ * (value = -1) or had been down (value = 0). See
+ * comment at GF_EVENT_CHILD_DOWN for a more detailed
+ * explanation.
+ */
+ if (priv->child_up[idx] != 1) {
+ priv->up_count++;
+ }
+ priv->child_up[idx] = 1;
+
+ call_psh = 1;
+ up_child = idx;
+ for (i = 0; i < priv->child_count; i++)
+ if (priv->child_up[i] == 1)
+ up_children++;
+ if (up_children == 1) {
+ gf_log (this->name, GF_LOG_INFO,
+ "Subvolume '%s' came back up; "
+ "going online.", ((xlator_t *)data)->name);
+ } else {
+ event = GF_EVENT_CHILD_MODIFIED;
+ }
+
+ priv->last_event[idx] = event;
+ }
+ UNLOCK (&priv->lock);
+
+ break;
+
+ case GF_EVENT_CHILD_DOWN:
+ LOCK (&priv->lock);
+ {
+ /*
+ * If a brick is down when we start, we'll get a
+ * CHILD_DOWN to indicate its initial state. There
+ * was never a CHILD_UP in this case, so if we
+ * increment "down_count" the difference between than
+ * and "up_count" will no longer be the number of
+ * children that are currently up. This has serious
+ * implications e.g. for quorum enforcement, so we
+ * don't increment these values unless the event
+ * represents an actual state transition between "up"
+ * (value = 1) and anything else.
+ */
+ if (priv->child_up[idx] == 1) {
+ priv->down_count++;
+ }
+ priv->child_up[idx] = 0;
+
+ for (i = 0; i < priv->child_count; i++)
+ if (priv->child_up[i] == 0)
+ down_children++;
+ if (down_children == priv->child_count) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "All subvolumes are down. Going offline "
+ "until atleast one of them comes back up.");
+ } else {
+ event = GF_EVENT_CHILD_MODIFIED;
+ }
+
+ priv->last_event[idx] = event;
+ }
+ UNLOCK (&priv->lock);
+
+ break;
+
+ case GF_EVENT_CHILD_CONNECTING:
+ LOCK (&priv->lock);
+ {
+ priv->last_event[idx] = event;
+ }
+ UNLOCK (&priv->lock);
+
+ break;
+
+ case GF_EVENT_TRANSLATOR_OP:
+ input = data;
+ output = data2;
+ ret = afr_xl_op (this, input, output);
+ goto out;
+ break;
+
+ default:
+ propagate = 1;
+ break;
+ }
+
+ /* have all subvolumes reported status once by now? */
+ have_heard_from_all = 1;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!priv->last_event[i])
+ have_heard_from_all = 0;
+ }
+
+ /* if all subvols have reported status, no need to hide anything
+ or wait for anything else. Just propagate blindly */
+ if (have_heard_from_all)
+ propagate = 1;
+
+ if (!had_heard_from_all && have_heard_from_all) {
+ /* This is the first event which completes aggregation
+ of events from all subvolumes. If at least one subvol
+ had come up, propagate CHILD_UP, but only this time
+ */
+ event = GF_EVENT_CHILD_DOWN;
+
+ LOCK (&priv->lock);
+ {
+ up_children = afr_up_children_count (priv->child_up,
+ priv->child_count);
+ for (i = 0; i < priv->child_count; i++) {
+ if (priv->last_event[i] == GF_EVENT_CHILD_UP) {
+ event = GF_EVENT_CHILD_UP;
+ break;
+ }
+
+ if (priv->last_event[i] ==
+ GF_EVENT_CHILD_CONNECTING) {
+ event = GF_EVENT_CHILD_CONNECTING;
+ /* continue to check other events for CHILD_UP */
+ }
+ }
+ }
+ UNLOCK (&priv->lock);
+ }
+
+ ret = 0;
+ if (propagate)
+ ret = default_notify (this, event, data);
+ if (call_psh && priv->shd.iamshd)
+ afr_proactive_self_heal ((void*) (long) up_child);
+
+out:
+ return ret;
+}
+
+int
+afr_first_up_child (unsigned char *child_up, size_t child_count)
+{
+ int ret = -1;
+ int i = 0;
+
+ GF_ASSERT (child_up);
+
+ for (i = 0; i < child_count; i++) {
+ if (child_up[i]) {
+ ret = i;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+int
+afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno)
+{
+ int ret = -1;
+
+ local->op_ret = -1;
+ local->op_errno = EUCLEAN;
+
+ local->child_up = GF_CALLOC (priv->child_count,
+ sizeof (*local->child_up),
+ gf_afr_mt_char);
+ if (!local->child_up) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ memcpy (local->child_up, priv->child_up,
+ sizeof (*local->child_up) * priv->child_count);
+ local->call_count = afr_up_children_count (local->child_up,
+ priv->child_count);
+ if (local->call_count == 0) {
+ gf_log (THIS->name, GF_LOG_INFO, "no subvolumes up");
+ if (op_errno)
+ *op_errno = ENOTCONN;
+ goto out;
+ }
+
+ local->child_errno = GF_CALLOC (priv->child_count,
+ sizeof (*local->child_errno),
+ gf_afr_mt_int32_t);
+ if (!local->child_errno) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->transaction.postop_piggybacked = GF_CALLOC (priv->child_count,
+ sizeof (int),
+ gf_afr_mt_int32_t);
+ if (!local->transaction.postop_piggybacked) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->append_write = _gf_false;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+afr_internal_lock_init (afr_internal_lock_t *lk, size_t child_count,
+ transaction_lk_type_t lk_type)
+{
+ int ret = -ENOMEM;
+
+ lk->locked_nodes = GF_CALLOC (sizeof (*lk->locked_nodes),
+ child_count, gf_afr_mt_char);
+ if (NULL == lk->locked_nodes)
+ goto out;
+
+ lk->lower_locked_nodes = GF_CALLOC (sizeof (*lk->lower_locked_nodes),
+ child_count, gf_afr_mt_char);
+ if (NULL == lk->lower_locked_nodes)
+ goto out;
+
+ lk->lock_op_ret = -1;
+ lk->lock_op_errno = EUCLEAN;
+ lk->transaction_lk_type = lk_type;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+void
+afr_matrix_cleanup (int32_t **matrix, unsigned int m)
+{
+ int i = 0;
+
+ if (!matrix)
+ goto out;
+ for (i = 0; i < m; i++) {
+ GF_FREE (matrix[i]);
+ }
+
+ GF_FREE (matrix);
+out:
+ return;
+}
+
+int32_t**
+afr_matrix_create (unsigned int m, unsigned int n)
+{
+ int32_t **matrix = NULL;
+ int i = 0;
+
+ matrix = GF_CALLOC (sizeof (*matrix), m, gf_afr_mt_int32_t);
+ if (!matrix)
+ goto out;
+
+ for (i = 0; i < m; i++) {
+ matrix[i] = GF_CALLOC (sizeof (*matrix[i]), n,
+ gf_afr_mt_int32_t);
+ if (!matrix[i])
+ goto out;
+ }
+ return matrix;
+out:
+ afr_matrix_cleanup (matrix, m);
+ return NULL;
+}
+
+int
+afr_inodelk_init (afr_inodelk_t *lk, char *dom, size_t child_count)
+{
+ int ret = -ENOMEM;
+
+ lk->domain = dom;
+ lk->locked_nodes = GF_CALLOC (sizeof (*lk->locked_nodes),
+ child_count, gf_afr_mt_char);
+ if (NULL == lk->locked_nodes)
+ goto out;
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+afr_transaction_local_init (afr_local_t *local, xlator_t *this)
+{
+ int child_up_count = 0;
+ int ret = -ENOMEM;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ ret = afr_internal_lock_init (&local->internal_lock, priv->child_count,
+ AFR_TRANSACTION_LK);
+ if (ret < 0)
+ goto out;
+
+ if ((local->transaction.type == AFR_DATA_TRANSACTION) ||
+ (local->transaction.type == AFR_METADATA_TRANSACTION)) {
+ ret = afr_inodelk_init (&local->internal_lock.inodelk[0],
+ this->name, priv->child_count);
+ if (ret < 0)
+ goto out;
+ }
+
+ ret = -ENOMEM;
+ child_up_count = afr_up_children_count (local->child_up,
+ priv->child_count);
+ if (priv->optimistic_change_log && child_up_count == priv->child_count)
+ local->optimistic_change_log = 1;
+
+ local->first_up_child = afr_first_up_child (local->child_up,
+ priv->child_count);
+
+ local->transaction.eager_lock =
+ GF_CALLOC (sizeof (*local->transaction.eager_lock),
+ priv->child_count,
+ gf_afr_mt_int32_t);
+
+ if (!local->transaction.eager_lock)
+ goto out;
+
+ local->fresh_children = afr_children_create (priv->child_count);
+ if (!local->fresh_children)
+ goto out;
+
+ local->transaction.pre_op = GF_CALLOC (sizeof (*local->transaction.pre_op),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!local->transaction.pre_op)
+ goto out;
+
+ local->pending = afr_matrix_create (priv->child_count,
+ AFR_NUM_CHANGE_LOGS);
+ if (!local->pending)
+ goto out;
+
+ local->transaction.txn_changelog = afr_matrix_create (priv->child_count,
+ AFR_NUM_CHANGE_LOGS);
+ if (!local->transaction.txn_changelog)
+ goto out;
+
+ INIT_LIST_HEAD (&local->transaction.eager_locked);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+void
+afr_reset_children (int32_t *fresh_children, int32_t child_count)
+{
+ unsigned int i = 0;
+ for (i = 0; i < child_count; i++)
+ fresh_children[i] = -1;
+}
+
+int32_t*
+afr_children_create (int32_t child_count)
+{
+ int32_t *children = NULL;
+ int i = 0;
+
+ GF_ASSERT (child_count > 0);
+
+ children = GF_CALLOC (child_count, sizeof (*children),
+ gf_afr_mt_int32_t);
+ if (NULL == children)
+ goto out;
+ for (i = 0; i < child_count; i++)
+ children[i] = -1;
+out:
+ return children;
+}
+
+void
+afr_children_add_child (int32_t *children, int32_t child,
+ int32_t child_count)
+{
+ gf_boolean_t child_found = _gf_false;
+ int i = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (children[i] == -1)
+ break;
+ if (children[i] == child) {
+ child_found = _gf_true;
+ break;
+ }
+ }
+
+ if (!child_found) {
+ GF_ASSERT (i < child_count);
+ children[i] = child;
+ }
+}
+
+void
+afr_children_rm_child (int32_t *children, int32_t child, int32_t child_count)
+{
+ int i = 0;
+
+ GF_ASSERT ((child >= 0) && (child < child_count));
+ for (i = 0; i < child_count; i++) {
+ if (children[i] == -1)
+ break;
+ if (children[i] == child) {
+ if (i != (child_count - 1))
+ memmove (children + i, children + i + 1,
+ sizeof (*children)*(child_count - i - 1));
+ children[child_count - 1] = -1;
+ break;
+ }
+ }
+}
+
+int
+afr_get_children_count (int32_t *children, unsigned int child_count)
+{
+ int count = 0;
+ int i = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (children[i] == -1)
+ break;
+ count++;
+ }
+ return count;
+}
+
+void
+afr_set_low_priority (call_frame_t *frame)
+{
+ frame->root->pid = LOW_PRIO_PROC_PID;
+}
+
+int
+afr_child_fd_ctx_set (xlator_t *this, fd_t *fd, int32_t child,
+ int flags)
+{
+ int ret = 0;
+ uint64_t ctx = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ GF_ASSERT (fd && fd->inode);
+ ret = afr_fd_ctx_set (this, fd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not set fd ctx for fd=%p", fd);
+ goto out;
+ }
+
+ ret = fd_ctx_get (fd, this, &ctx);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not get fd ctx for fd=%p", fd);
+ goto out;
+ }
+
+ fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ fd_ctx->opened_on[child] = AFR_FD_OPENED;
+ if (!IA_ISDIR (fd->inode->ia_type)) {
+ fd_ctx->flags = flags;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+gf_boolean_t
+afr_have_quorum (char *logname, afr_private_t *priv)
+{
+ unsigned int quorum = 0;
+
+ GF_VALIDATE_OR_GOTO(logname,priv,out);
+
+ quorum = priv->quorum_count;
+ if (quorum != AFR_QUORUM_AUTO) {
+ return (priv->up_count >= (priv->down_count + quorum));
+ }
+
+ quorum = priv->child_count / 2 + 1;
+ if (priv->up_count >= (priv->down_count + quorum)) {
+ return _gf_true;
+ }
+
+ /*
+ * Special case for even numbers of nodes: if we have exactly half
+ * and that includes the first ("senior-most") node, then that counts
+ * as quorum even if it wouldn't otherwise. This supports e.g. N=2
+ * while preserving the critical property that there can only be one
+ * such group.
+ */
+ if ((priv->child_count % 2) == 0) {
+ quorum = priv->child_count / 2;
+ if (priv->up_count >= (priv->down_count + quorum)) {
+ if (priv->child_up[0]) {
+ return _gf_true;
+ }
+ }
+ }
+
+out:
+ return _gf_false;
+}
+
+void
+afr_priv_destroy (afr_private_t *priv)
+{
+ int i = 0;
+
+ if (!priv)
+ goto out;
+ inode_unref (priv->root_inode);
+ GF_FREE (priv->shd.pos);
+ GF_FREE (priv->shd.pending);
+ GF_FREE (priv->shd.inprogress);
+// for (i = 0; i < priv->child_count; i++)
+// if (priv->shd.timer && priv->shd.timer[i])
+// gf_timer_call_cancel (this->ctx, priv->shd.timer[i]);
+ GF_FREE (priv->shd.timer);
+
+ if (priv->shd.healed)
+ eh_destroy (priv->shd.healed);
+
+ if (priv->shd.heal_failed)
+ eh_destroy (priv->shd.heal_failed);
+
+ if (priv->shd.split_brain)
+ eh_destroy (priv->shd.split_brain);
+
+ for (i = 0; i < priv->child_count; i++)
+ {
+ if (priv->shd.statistics[i])
+ eh_destroy (priv->shd.statistics[i]);
+ }
+
+ GF_FREE (priv->shd.statistics);
+
+ GF_FREE (priv->shd.crawl_events);
+
+ GF_FREE (priv->last_event);
+ if (priv->pending_key) {
+ for (i = 0; i < priv->child_count; i++)
+ GF_FREE (priv->pending_key[i]);
+ }
+ GF_FREE (priv->pending_key);
+ GF_FREE (priv->children);
+ GF_FREE (priv->child_up);
+ LOCK_DESTROY (&priv->lock);
+ LOCK_DESTROY (&priv->read_child_lock);
+ pthread_mutex_destroy (&priv->mutex);
+ GF_FREE (priv);
+out:
+ return;
+}
+
+int
+xlator_subvolume_count (xlator_t *this)
+{
+ int i = 0;
+ xlator_list_t *list = NULL;
+
+ for (list = this->children; list; list = list->next)
+ i++;
+ return i;
+}
+
+inline gf_boolean_t
+afr_is_errno_set (int *child_errno, int child)
+{
+ return child_errno[child];
+}
+
+inline gf_boolean_t
+afr_is_errno_unset (int *child_errno, int child)
+{
+ return !afr_is_errno_set (child_errno, child);
+}
+
+void
+afr_prepare_new_entry_pending_matrix (int32_t **pending,
+ gf_boolean_t (*is_pending) (int *, int),
+ int *ctx, struct iatt *buf,
+ unsigned int child_count)
+{
+ int midx = 0;
+ int idx = 0;
+ int i = 0;
+
+ midx = afr_index_for_transaction_type (AFR_METADATA_TRANSACTION);
+ if (IA_ISDIR (buf->ia_type))
+ idx = afr_index_for_transaction_type (AFR_ENTRY_TRANSACTION);
+ else if (IA_ISREG (buf->ia_type))
+ idx = afr_index_for_transaction_type (AFR_DATA_TRANSACTION);
+ else
+ idx = -1;
+ for (i = 0; i < child_count; i++) {
+ if (is_pending (ctx, i)) {
+ pending[i][midx] = hton32 (1);
+ if (idx == -1)
+ continue;
+ pending[i][idx] = hton32 (1);
+ }
+ }
+}
+
+gf_boolean_t
+afr_is_fd_fixable (fd_t *fd)
+{
+ if (!fd || !fd->inode)
+ return _gf_false;
+ else if (fd_is_anonymous (fd))
+ return _gf_false;
+ else if (uuid_is_null (fd->inode->gfid))
+ return _gf_false;
+
+ return _gf_true;
+}
+
+void
+afr_handle_open_fd_count (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ inode_t *inode = NULL;
+ afr_inode_ctx_t *ctx = NULL;
+
+ local = frame->local;
+
+ if (local->fd)
+ inode = local->fd->inode;
+ else
+ inode = local->loc.inode;
+
+ if (!inode)
+ return;
+
+ LOCK (&inode->lock);
+ {
+ ctx = __afr_inode_ctx_get (inode, this);
+ ctx->open_fd_count = local->open_fd_count;
+ }
+ UNLOCK (&inode->lock);
+}
+
+int
+afr_initialise_statistics (xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int ret = -1;
+ int i = 0;
+ int child_count = 0;
+ eh_t *stats_per_brick = NULL;
+ shd_crawl_event_t ***shd_crawl_events = NULL;
+ priv = this->private;
+
+ priv->shd.statistics = GF_CALLOC (sizeof(eh_t *), priv->child_count,
+ gf_common_mt_eh_t);
+ if (!priv->shd.statistics) {
+ ret = -1;
+ goto out;
+ }
+ child_count = priv->child_count;
+ for (i=0; i < child_count ; i++) {
+ stats_per_brick = eh_new (AFR_STATISTICS_HISTORY_SIZE,
+ _gf_false,
+ _destroy_crawl_event_data);
+ if (!stats_per_brick) {
+ ret = -1;
+ goto out;
+ }
+ priv->shd.statistics[i] = stats_per_brick;
+
+ }
+
+ shd_crawl_events = (shd_crawl_event_t***)(&priv->shd.crawl_events);
+ *shd_crawl_events = GF_CALLOC (sizeof(shd_crawl_event_t*),
+ priv->child_count,
+ gf_afr_mt_shd_crawl_event_t);
+
+ if (!priv->shd.crawl_events) {
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+
+}
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
index e259f624b..689dd84e6 100644
--- a/xlators/cluster/afr/src/afr-dir-read.c
+++ b/xlators/cluster/afr/src/afr-dir-read.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -24,6 +15,7 @@
#include <sys/time.h>
#include <stdlib.h>
#include <signal.h>
+#include <string.h>
#ifndef _CONFIG_H
#define _CONFIG_H
@@ -42,102 +34,317 @@
#include "common-utils.h"
#include "compat-errno.h"
#include "compat.h"
+#include "checksum.h"
#include "afr.h"
+#include "afr-self-heal.h"
+#include "afr-self-heal-common.h"
+
+int
+afr_examine_dir_sh_unwind (call_frame_t *frame, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, int32_t sh_failed)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ afr_set_opendir_done (this, local->fd->inode);
+
+ AFR_STACK_UNWIND (opendir, frame, local->op_ret,
+ local->op_errno, local->fd, NULL);
+
+ return 0;
+}
+
+
+gf_boolean_t
+__checksums_differ (uint32_t *checksum, int child_count,
+ unsigned char *child_up)
+{
+ int ret = _gf_false;
+ int i = 0;
+ uint32_t cksum = 0;
+ gf_boolean_t activate_check = _gf_false;
+
+ for (i = 0; i < child_count; i++) {
+ if (!child_up[i])
+ continue;
+ if (_gf_false == activate_check) {
+ cksum = checksum[i];
+ activate_check = _gf_true;
+ continue;
+ }
+
+ if (cksum != checksum[i]) {
+ ret = _gf_true;
+ break;
+ }
+
+ cksum = checksum[i];
+ }
+
+ return ret;
+}
+
+
+int32_t
+afr_examine_dir_readdir_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ gf_dirent_t *entries, dict_t *xdata)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ afr_self_heal_t * sh = NULL;
+ gf_dirent_t * entry = NULL;
+ gf_dirent_t * tmp = NULL;
+ char *reason = NULL;
+ int child_index = 0;
+ uint32_t entry_cksum = 0;
+ int call_count = 0;
+ off_t last_offset = 0;
+ inode_t *inode = NULL;
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+ inode = local->fd->inode;
+
+ child_index = (long) cookie;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_INFO,
+ "%s: failed to do opendir on %s",
+ local->loc.path, priv->children[child_index]->name);
+ local->op_ret = -1;
+ local->op_ret = op_errno;
+ goto out;
+ }
+
+ if (op_ret == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s: no entries found in %s",
+ local->loc.path, priv->children[child_index]->name);
+ goto out;
+ }
+
+ list_for_each_entry_safe (entry, tmp, &entries->list, list) {
+ entry_cksum = gf_rsync_weak_checksum ((unsigned char *)entry->d_name,
+ strlen (entry->d_name));
+ local->cont.opendir.checksum[child_index] ^= entry_cksum;
+ }
+
+ list_for_each_entry (entry, &entries->list, list) {
+ last_offset = entry->d_off;
+ }
+
+ /* read more entries */
+
+ STACK_WIND_COOKIE (frame, afr_examine_dir_readdir_cbk,
+ (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->readdir,
+ local->fd, 131072, last_offset, NULL);
+
+ return 0;
+
+out:
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ if (__checksums_differ (local->cont.opendir.checksum,
+ priv->child_count,
+ local->child_up)) {
+
+ sh->do_entry_self_heal = _gf_true;
+ sh->forced_merge = _gf_true;
+
+ reason = "checksums of directory differ";
+ afr_launch_self_heal (frame, this, inode, _gf_false,
+ inode->ia_type, reason, NULL,
+ afr_examine_dir_sh_unwind);
+ } else {
+ afr_set_opendir_done (this, inode);
+
+ AFR_STACK_UNWIND (opendir, frame, local->op_ret,
+ local->op_errno, local->fd, NULL);
+ }
+ }
+
+ return 0;
+}
+
+
+int
+afr_examine_dir (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ int i = 0;
+ int call_count = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ local->cont.opendir.checksum = GF_CALLOC (priv->child_count,
+ sizeof (*local->cont.opendir.checksum),
+ gf_afr_mt_int32_t);
+
+ call_count = afr_up_children_count (local->child_up, priv->child_count);
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_examine_dir_readdir_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->readdir,
+ local->fd, 131072, 0, NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
int32_t
afr_opendir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- fd_t *fd)
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ fd_t *fd, dict_t *xdata)
{
- afr_local_t * local = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int32_t up_children_count = 0;
+ int ret = -1;
+ int call_count = -1;
+ int32_t child_index = 0;
+
+ priv = this->private;
+ local = frame->local;
+ child_index = (long) cookie;
- int call_count = -1;
+ up_children_count = afr_up_children_count (local->child_up,
+ priv->child_count);
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret >= 0) {
+ local->op_ret = op_ret;
+ ret = afr_child_fd_ctx_set (this, fd, child_index, 0);
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = -ret;
+ goto unlock;
+ }
+ }
- LOCK (&frame->lock);
- {
- local = frame->local;
+ local->op_errno = op_errno;
+ }
+unlock:
+ UNLOCK (&frame->lock);
- if (op_ret == 0)
- local->op_ret = 0;
+ call_count = afr_frame_return (frame);
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ if (call_count == 0) {
+ if (local->op_ret != 0)
+ goto out;
- call_count = afr_frame_return (frame);
+ if (!afr_is_opendir_done (this, local->fd->inode) &&
+ up_children_count > 1 && priv->entry_self_heal) {
- if (call_count == 0) {
- AFR_STACK_UNWIND (frame, local->op_ret,
- local->op_errno, local->fd);
- }
+ /*
+ * This is the first opendir on this inode. We need
+ * to check if the directory's entries are the same
+ * on all subvolumes. This is needed in addition
+ * to regular entry self-heal because the readdir
+ * call is sent only to the first subvolume, and
+ * thus files that exist only there will never be healed
+ * otherwise (assuming changelog shows no anomalies).
+ */
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "reading contents of directory %s looking for mismatch",
+ local->loc.path);
+
+ afr_examine_dir (frame, this);
+
+ } else {
+ /* do the unwind */
+ goto out;
+ }
+ }
+
+ return 0;
+
+out:
+ AFR_STACK_UNWIND (opendir, frame, local->op_ret,
+ local->op_errno, local->fd, NULL);
- return 0;
+ return 0;
}
-int32_t
+int32_t
afr_opendir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, fd_t *fd)
+ loc_t *loc, fd_t *fd)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ int child_count = 0;
+ int i = 0;
+ int ret = -1;
+ int call_count = -1;
+ int32_t op_errno = 0;
- int child_count = 0;
- int i = 0;
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
- int ret = -1;
- int call_count = -1;
+ priv = this->private;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
+ child_count = priv->child_count;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- priv = this->private;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- child_count = priv->child_count;
+ loc_copy (&local->loc, loc);
- ALLOC_OR_GOTO (local, afr_local_t, out);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ local->fd = fd_ref (fd);
- frame->local = local;
- local->fd = fd_ref (fd);
+ call_count = local->call_count;
- call_count = local->call_count;
-
- for (i = 0; i < child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_opendir_cbk,
- priv->children[i],
- priv->children[i]->fops->opendir,
- loc, fd);
+ for (i = 0; i < child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_opendir_cbk,
+ (void*) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->opendir,
+ loc, fd, NULL);
- if (!--call_count)
- break;
- }
- }
+ if (!--call_count)
+ break;
+ }
+ }
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno, fd);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND (opendir, frame, -1, op_errno, fd, NULL);
- return 0;
+ return 0;
}
/**
* Common algorithm for directory read calls:
- *
+ *
* - Try the fop on the first child that is up
* - if we have failed due to ENOTCONN:
* try the next child
@@ -145,252 +352,194 @@ out:
* Applicable to: readdir
*/
-int32_t
-afr_readdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
-
- gf_dirent_t * entry = NULL;
- int child_index = -1;
+struct entry_name {
+ char *name;
+ struct list_head list;
+};
- priv = this->private;
- children = priv->children;
+static void
+afr_forget_entries (fd_t *fd)
+{
+ struct entry_name *entry = NULL;
+ struct entry_name *tmp = NULL;
+ int ret = 0;
+ uint64_t ctx = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ ret = fd_ctx_get (fd, THIS, &ctx);
+ if (ret < 0) {
+ gf_log (THIS->name, GF_LOG_INFO,
+ "could not get fd ctx for fd=%p", fd);
+ return;
+ }
- local = frame->local;
+ fd_ctx = (afr_fd_ctx_t *)(long) ctx;
- child_index = (long) cookie;
+ list_for_each_entry_safe (entry, tmp, &fd_ctx->entries, list) {
+ GF_FREE (entry->name);
+ list_del (&entry->list);
+ GF_FREE (entry);
+ }
+}
- if (op_ret != -1) {
- list_for_each_entry (entry, &entries->list, list) {
- entry->d_ino = afr_itransform (entry->d_ino,
- priv->child_count,
- child_index);
+static void
+afr_readdir_filter_trash_dir (gf_dirent_t *entries, fd_t *fd)
+{
+ gf_dirent_t * entry = NULL;
+ gf_dirent_t * tmp = NULL;
+
+ list_for_each_entry_safe (entry, tmp, &entries->list, list) {
+ if (__is_root_gfid (fd->inode->gfid) &&
+ !strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR)) {
+ list_del_init (&entry->list);
+ GF_FREE (entry);
}
- }
+ }
+}
- AFR_STACK_UNWIND (frame, op_ret, op_errno, entries);
+int32_t
+afr_readdir_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ gf_dirent_t *entries, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+
+ if (op_ret == -1)
+ goto out;
+
+ local = frame->local;
+ afr_readdir_filter_trash_dir (entries, local->fd);
- return 0;
+out:
+ AFR_STACK_UNWIND (readdir, frame, op_ret, op_errno, entries, NULL);
+ return 0;
}
int32_t
afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- ino_t inum = 0;
+ afr_local_t *local = NULL;
- gf_dirent_t * entry = NULL;
+ if (op_ret == -1)
+ goto out;
- int child_index = -1;
+ local = frame->local;
+ afr_readdir_filter_trash_dir (entries, local->fd);
+
+out:
+ AFR_STACK_UNWIND (readdirp, frame, op_ret, op_errno, entries, NULL);
+ return 0;
+}
+
+int32_t
+afr_do_readdir (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t offset, int whichop, dict_t *dict)
+{
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ int call_child = 0;
+ afr_local_t *local = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int ret = -1;
+ int32_t op_errno = 0;
+ uint64_t read_child = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
priv = this->private;
children = priv->children;
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
local = frame->local;
- child_index = (long) cookie;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- if (op_ret != -1) {
- list_for_each_entry (entry, &entries->list, list) {
- inum = afr_itransform (entry->d_ino, priv->child_count,
- child_index);
- entry->d_ino = inum;
- inum = afr_itransform (entry->d_stat.st_ino,
- priv->child_count, child_index);
- entry->d_stat.st_ino = inum;
- }
+ local->fresh_children = afr_children_create (priv->child_count);
+ if (!local->fresh_children) {
+ op_errno = ENOMEM;
+ goto out;
}
- AFR_STACK_UNWIND (frame, op_ret, op_errno, entries);
-
- return 0;
-}
+ read_child = afr_inode_get_read_ctx (this, fd->inode,
+ local->fresh_children);
+ ret = afr_get_call_child (this, local->child_up, read_child,
+ local->fresh_children,
+ &call_child,
+ &local->cont.readdir.last_index);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx) {
+ op_errno = EBADF;
+ goto out;
+ }
-int32_t
-afr_do_readdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset, int whichop)
-{
- afr_private_t * priv = NULL;
- xlator_t ** children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
-
- int ret = -1;
-
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
- children = priv->children;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- frame->local = local;
-
- call_child = afr_first_up_child (priv);
- if (call_child == -1) {
- op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_DEBUG,
- "no child is up");
- goto out;
- }
+ if ((offset == 0) || (fd_ctx->call_child == -1)) {
+ fd_ctx->call_child = call_child;
+ } else if ((priv->readdir_failover == _gf_false) &&
+ (call_child != fd_ctx->call_child)) {
+ op_errno = EBADF;
+ goto out;
+ }
local->fd = fd_ref (fd);
local->cont.readdir.size = size;
- local->cont.readdir.offset = offset;
+ local->cont.readdir.dict = (dict)? dict_ref (dict) : NULL;
if (whichop == GF_FOP_READDIR)
STACK_WIND_COOKIE (frame, afr_readdir_cbk,
(void *) (long) call_child,
children[call_child],
children[call_child]->fops->readdir, fd,
- size, offset);
+ size, offset, dict);
else
STACK_WIND_COOKIE (frame, afr_readdirp_cbk,
(void *) (long) call_child,
children[call_child],
children[call_child]->fops->readdirp, fd,
- size, offset);
+ size, offset, dict);
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
- return 0;
+ AFR_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL);
+ return 0;
}
int32_t
afr_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
+ off_t offset, dict_t *xdata)
{
- afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIR);
+ afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIR, xdata);
return 0;
}
int32_t
afr_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
+ off_t offset, dict_t *dict)
{
- afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIRP);
+ afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIRP, dict);
return 0;
}
-int32_t
-afr_getdents_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dir_entry_t *entry, int32_t count)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
-
- int unwind = 1;
- int last_tried = -1;
- int this_try = -1;
-
- priv = this->private;
- children = priv->children;
-
- local = frame->local;
-
- if (op_ret == -1) {
- last_tried = local->cont.getdents.last_tried;
-
- if (all_tried (last_tried, priv->child_count)) {
- goto out;
- }
-
- this_try = ++local->cont.getdents.last_tried;
- unwind = 0;
-
- STACK_WIND (frame, afr_getdents_cbk,
- children[this_try],
- children[this_try]->fops->getdents,
- local->fd, local->cont.getdents.size,
- local->cont.getdents.offset, local->cont.getdents.flag);
- }
-
-out:
- if (unwind) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno, entry, count);
- }
-
- return 0;
-}
-
int32_t
-afr_getdents (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset, int32_t flag)
+afr_releasedir (xlator_t *this, fd_t *fd)
{
- afr_private_t * priv = NULL;
- xlator_t ** children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
-
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
- children = priv->children;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- call_child = afr_first_up_child (priv);
- if (call_child == -1) {
- op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_DEBUG,
- "no child is up.");
- goto out;
- }
+ afr_forget_entries (fd);
+ afr_cleanup_fd_ctx (this, fd);
- local->cont.getdents.last_tried = call_child;
-
- local->fd = fd_ref (fd);
-
- local->cont.getdents.size = size;
- local->cont.getdents.offset = offset;
- local->cont.getdents.flag = flag;
-
- frame->local = local;
-
- STACK_WIND (frame, afr_getdents_cbk,
- children[call_child], children[call_child]->fops->getdents,
- fd, size, offset, flag);
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
-
- return 0;
+ return 0;
}
-
-
diff --git a/xlators/cluster/afr/src/afr-dir-read.h b/xlators/cluster/afr/src/afr-dir-read.h
index 8749c35ff..09456d159 100644
--- a/xlators/cluster/afr/src/afr-dir-read.h
+++ b/xlators/cluster/afr/src/afr-dir-read.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __DIR_READ_H__
@@ -23,29 +14,23 @@
int32_t
afr_opendir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, fd_t *fd);
+ loc_t *loc, fd_t *fd, dict_t *xdata);
int32_t
-afr_closedir (call_frame_t *frame, xlator_t *this,
- fd_t *fd);
+afr_releasedir (xlator_t *this, fd_t *fd);
int32_t
afr_readdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset);
+ fd_t *fd, size_t size, off_t offset, dict_t *xdata);
int32_t
afr_readdirp (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset);
-
-int32_t
-afr_getdents (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset, int32_t flag);
-
+ fd_t *fd, size_t size, off_t offset, dict_t *dict);
int32_t
afr_checksum (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags);
+ loc_t *loc, int32_t flags, dict_t *xdata);
#endif /* __DIR_READ_H__ */
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
index 5cd05b138..1943b719b 100644
--- a/xlators/cluster/afr/src/afr-dir-write.c
+++ b/xlators/cluster/afr/src/afr-dir-write.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -47,282 +38,470 @@
#include "afr.h"
#include "afr-transaction.h"
-
-void
-afr_build_parent_loc (loc_t *parent, loc_t *child)
+int
+afr_build_parent_loc (loc_t *parent, loc_t *child, int32_t *op_errno)
{
- char *tmp = NULL;
+ int ret = -1;
+ char *child_path = NULL;
- if (!child->parent) {
- loc_copy (parent, child);
- return;
- }
+ if (!child->parent) {
+ if (op_errno)
+ *op_errno = EINVAL;
+ goto out;
+ }
- tmp = strdup (child->path);
- parent->path = strdup (dirname (tmp));
- FREE (tmp);
+ child_path = gf_strdup (child->path);
+ if (!child_path) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+ parent->path = gf_strdup( dirname (child_path) );
+ if (!parent->path) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+ parent->inode = inode_ref (child->parent);
+ uuid_copy (parent->gfid, child->pargfid);
- parent->name = strrchr (parent->path, '/');
- if (parent->name)
- parent->name++;
+ ret = 0;
+out:
+ GF_FREE(child_path);
- parent->inode = inode_ref (child->parent);
- parent->parent = inode_parent (parent->inode, 0, NULL);
- parent->ino = parent->inode->ino;
+ return ret;
}
-/* {{{ create */
+void
+__dir_entry_fop_common_cbk (call_frame_t *frame, int child_index,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, struct iatt *prenewparent,
+ struct iatt *postnewparent)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (afr_fop_failed (op_ret, op_errno))
+ afr_transaction_fop_failed (frame, this, child_index);
+
+ if (op_ret > -1) {
+ local->op_ret = op_ret;
+
+ if ((local->success_count == 0) ||
+ (child_index == local->read_child_index)) {
+ local->cont.dir_fop.preparent = *preparent;
+ local->cont.dir_fop.postparent = *postparent;
+ if (buf)
+ local->cont.dir_fop.buf = *buf;
+ if (prenewparent)
+ local->cont.dir_fop.prenewparent = *prenewparent;
+ if (postnewparent)
+ local->cont.dir_fop.postnewparent = *postnewparent;
+ }
+
+ local->cont.dir_fop.inode = inode;
+
+ local->fresh_children[local->success_count] = child_index;
+ local->success_count++;
+ local->child_errno[child_index] = 0;
+ } else {
+ local->child_errno[child_index] = op_errno;
+ }
+
+ local->op_errno = op_errno;
+}
int
-afr_create_unwind (call_frame_t *frame, xlator_t *this)
+afr_mark_new_entry_changelog_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ dict_t *xattr, dict_t *xdata)
{
- call_frame_t *main_frame = NULL;
- afr_private_t * priv = NULL;
- afr_local_t *local = NULL;
- struct stat *unwind_buf = NULL;
+ int call_count = 0;
- priv = this->private;
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- if (local->cont.create.read_child_buf.st_ino) {
- unwind_buf = &local->cont.create.read_child_buf;
- } else {
- unwind_buf = &local->cont.create.buf;
- }
+ call_count = afr_frame_return (frame);
+ if (call_count == 0) {
+ AFR_STACK_DESTROY (frame);
+ }
+ return 0;
+}
- unwind_buf->st_ino = local->cont.create.ino;
+void
+afr_mark_new_entry_changelog (call_frame_t *frame, xlator_t *this)
+{
+ call_frame_t *new_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_local_t *new_local = NULL;
+ afr_private_t *priv = NULL;
+ dict_t **xattr = NULL;
+ int32_t **changelog = NULL;
+ int i = 0;
+ GF_UNUSED int op_errno = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ new_frame = copy_frame (frame);
+ if (!new_frame) {
+ goto out;
+ }
- AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
- local->cont.create.fd,
- local->cont.create.inode,
- unwind_buf);
+ AFR_LOCAL_ALLOC_OR_GOTO (new_frame->local, out);
+ new_local = new_frame->local;
+ changelog = afr_matrix_create (priv->child_count, AFR_NUM_CHANGE_LOGS);
+ if (!changelog)
+ goto out;
+
+ xattr = GF_CALLOC (priv->child_count, sizeof (*xattr),
+ gf_afr_mt_dict_t);
+ if (!xattr)
+ goto out;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_errno[i])
+ continue;
+ xattr[i] = dict_new ();
+ if (!xattr[i])
+ goto out;
}
-
- return 0;
+
+ afr_prepare_new_entry_pending_matrix (changelog,
+ afr_is_errno_set,
+ local->child_errno,
+ &local->cont.dir_fop.buf,
+ priv->child_count);
+
+ new_local->pending = changelog;
+ uuid_copy (new_local->loc.gfid, local->cont.dir_fop.buf.ia_gfid);
+ new_local->loc.inode = inode_ref (local->cont.dir_fop.inode);
+ new_local->call_count = local->success_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_errno[i])
+ continue;
+
+ afr_set_pending_dict (priv, xattr[i], changelog, i, LOCAL_LAST);
+ STACK_WIND_COOKIE (new_frame, afr_mark_new_entry_changelog_cbk,
+ (void *) (long) i, priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &new_local->loc, GF_XATTROP_ADD_ARRAY,
+ xattr[i], NULL);
+ }
+ new_frame = NULL;
+out:
+ if (new_frame)
+ AFR_STACK_DESTROY (new_frame);
+ afr_xattr_array_destroy (xattr, priv->child_count);
+ return;
}
+gf_boolean_t
+afr_is_new_entry_changelog_needed (glusterfs_fop_t fop)
+{
+ glusterfs_fop_t fops[] = {GF_FOP_CREATE, GF_FOP_MKNOD, GF_FOP_NULL};
+ int i = 0;
-int
-afr_create_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- fd_t *fd, inode_t *inode, struct stat *buf,
- struct stat *preparent, struct stat *postparent)
+ for (i = 0; fops[i] != GF_FOP_NULL; i++) {
+ if (fop == fops[i])
+ return _gf_true;
+ }
+ return _gf_false;
+}
+
+void
+afr_dir_fop_mark_entry_pending_changelog (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (local->op_ret < 0)
+ goto out;
+
+ if (local->success_count == priv->child_count)
+ goto out;
+
+ if (!afr_is_new_entry_changelog_needed (local->op))
+ goto out;
- int ret = 0;
+ afr_mark_new_entry_changelog (frame, this);
+
+out:
+ return;
+}
+
+void
+afr_dir_fop_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- int call_count = -1;
- int child_index = -1;
+ local = frame->local;
+ priv = this->private;
- local = frame->local;
- priv = this->private;
+ if (local->cont.dir_fop.inode == NULL)
+ goto done;
+ afr_set_read_ctx_from_policy (this, local->cont.dir_fop.inode,
+ local->fresh_children,
+ local->read_child_index,
+ priv->read_child,
+ local->cont.dir_fop.buf.ia_gfid);
+done:
+ local->transaction.unwind (frame, this);
+ afr_dir_fop_mark_entry_pending_changelog (frame, this);
+ local->transaction.resume (frame, this);
+}
- child_index = (long) cookie;
+/* {{{ create */
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- ret = afr_fd_ctx_set (this, fd);
+int
+afr_create_unwind (call_frame_t *frame, xlator_t *this)
+{
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "could not set ctx on fd=%p", fd);
+ local = frame->local;
- local->op_ret = -1;
- local->op_errno = -ret;
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame) {
+ main_frame = local->transaction.main_frame;
}
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (create, main_frame,
+ local->op_ret, local->op_errno,
+ local->cont.create.fd,
+ local->cont.dir_fop.inode,
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ local->xdata_rsp);
+ }
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
+ return 0;
+}
- if (local->success_count == 0) {
- local->cont.create.buf = *buf;
-
- local->cont.create.ino =
- afr_itransform (buf->st_ino,
- priv->child_count,
- child_index);
-
- if (priv->read_child >= 0) {
- afr_set_read_child (this, inode,
- priv->read_child);
- } else {
- afr_set_read_child (this, inode,
- local->read_child_index);
- }
- }
-
- if (child_index == local->first_up_child) {
- local->cont.create.ino =
- afr_itransform (buf->st_ino,
- priv->child_count,
- local->first_up_child);
+
+int
+afr_create_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ fd_t *fd, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ uint64_t ctx = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int ret = 0;
+ int call_count = -1;
+ int child_index = -1;
+
+ local = frame->local;
+
+ child_index = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret > -1) {
+ ret = afr_fd_ctx_set (this, fd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not set ctx on fd=%p", fd);
+
+ local->op_ret = -1;
+ local->op_errno = -ret;
+ goto unlock;
}
-
- if (child_index == local->read_child_index) {
- local->cont.create.read_child_buf = *buf;
+
+ ret = fd_ctx_get (fd, this, &ctx);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not get fd ctx for fd=%p", fd);
+ local->op_ret = -1;
+ local->op_errno = -ret;
+ goto unlock;
}
- local->cont.create.inode = inode;
+ fd_ctx = (afr_fd_ctx_t *)(long) ctx;
- local->success_count++;
- }
+ fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
+ fd_ctx->flags = local->cont.create.flags;
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ if (local->success_count == 0) {
+ if (xdata)
+ local->xdata_rsp = dict_ref(xdata);
+ }
+ }
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, inode, buf,
+ preparent, postparent, NULL, NULL);
+ }
+
+unlock:
+ UNLOCK (&frame->lock);
- call_count = afr_frame_return (frame);
+ call_count = afr_frame_return (frame);
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return 0;
}
int
afr_create_wind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_create_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->create,
- &local->loc,
- local->cont.create.flags,
- local->cont.create.mode,
- local->cont.create.fd);
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_create_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->create,
+ &local->loc,
+ local->cont.create.flags,
+ local->cont.create.mode,
+ local->umask,
+ local->cont.create.fd,
+ local->xdata_req);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
}
int
afr_create_done (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
+ afr_local_t * local = NULL;
- local = frame->local;
+ local = frame->local;
- local->transaction.unwind (frame, this);
+ local->transaction.unwind (frame, this);
- AFR_STACK_DESTROY (frame);
+ AFR_STACK_DESTROY (frame);
- return 0;
+ return 0;
}
int
afr_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode, fd_t *fd)
+ loc_t *loc, int32_t flags, mode_t mode,
+ mode_t umask, fd_t *fd, dict_t *params)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
- int ret = -1;
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
- int op_ret = -1;
- int op_errno = 0;
+ priv = this->private;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ QUORUM_CHECK(create,out);
- priv = this->private;
-
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
- transaction_frame->local = local;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- loc_copy (&local->loc, loc);
+ loc_copy (&local->loc, loc);
LOCK (&priv->read_child_lock);
{
- local->read_child_index = (++priv->read_child_rr)
+ local->read_child_index = (++priv->read_child_rr)
% (priv->child_count);
}
UNLOCK (&priv->read_child_lock);
- local->cont.create.flags = flags;
- local->cont.create.mode = mode;
- local->cont.create.fd = fd_ref (fd);
-
- local->transaction.fop = afr_create_wind;
- local->transaction.done = afr_create_done;
- local->transaction.unwind = afr_create_unwind;
-
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (loc->path);
-
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ local->op = GF_FOP_CREATE;
+ local->cont.create.flags = flags;
+ local->cont.create.mode = mode;
+ local->cont.create.fd = fd_ref (fd);
+ local->umask = umask;
+ if (params)
+ local->xdata_req = dict_ref (params);
+
+ local->transaction.fop = afr_create_wind;
+ local->transaction.done = afr_create_done;
+ local->transaction.unwind = afr_create_unwind;
+
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (create, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL, NULL);
+ }
- return 0;
+ return 0;
}
/* }}} */
@@ -332,232 +511,201 @@ out:
int
afr_mknod_unwind (call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
-
- struct stat *unwind_buf = NULL;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- if (local->cont.mknod.read_child_buf.st_ino) {
- unwind_buf = &local->cont.mknod.read_child_buf;
- } else {
- unwind_buf = &local->cont.mknod.buf;
- }
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- unwind_buf->st_ino = local->cont.mknod.ino;
+ local = frame->local;
- AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
- local->cont.mknod.inode,
- unwind_buf);
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame) {
+ main_frame = local->transaction.main_frame;
+ }
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (mknod, main_frame,
+ local->op_ret, local->op_errno,
+ local->cont.dir_fop.inode,
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ NULL);
}
- return 0;
+ return 0;
}
int
afr_mknod_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct stat *buf, struct stat *preparent,
- struct stat *postparent)
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int call_count = -1;
- int child_index = -1;
-
- local = frame->local;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0){
- local->cont.mknod.buf = *buf;
- local->cont.mknod.ino =
- afr_itransform (buf->st_ino,
- priv->child_count,
- child_index);
-
- if (priv->read_child >= 0) {
- afr_set_read_child (this, inode,
- priv->read_child);
- } else {
- afr_set_read_child (this, inode,
- local->read_child_index);
- }
- }
-
- if (child_index == local->first_up_child) {
- local->cont.mknod.ino =
- afr_itransform (buf->st_ino,
- priv->child_count,
- local->first_up_child);
- }
-
- if (child_index == local->read_child_index) {
- local->cont.mknod.read_child_buf = *buf;
- }
-
- local->cont.mknod.inode = inode;
+ int call_count = -1;
+ int child_index = -1;
- local->success_count++;
- }
+ child_index = (long) cookie;
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ LOCK (&frame->lock);
+ {
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, inode, buf,
+ preparent, postparent, NULL, NULL);
+ }
+ UNLOCK (&frame->lock);
- call_count = afr_frame_return (frame);
+ call_count = afr_frame_return (frame);
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return 0;
}
int32_t
afr_mknod_wind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_mknod_wind_cbk, (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->mknod,
- &local->loc, local->cont.mknod.mode,
- local->cont.mknod.dev);
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_mknod_wind_cbk, (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->mknod,
+ &local->loc, local->cont.mknod.mode,
+ local->cont.mknod.dev,
+ local->umask,
+ local->xdata_req);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
}
int
afr_mknod_done (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
+ afr_local_t * local = NULL;
- local = frame->local;
+ local = frame->local;
- local->transaction.unwind (frame, this);
- AFR_STACK_DESTROY (frame);
+ local->transaction.unwind (frame, this);
+ AFR_STACK_DESTROY (frame);
- return 0;
+ return 0;
}
int
-afr_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t dev)
+afr_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t dev, mode_t umask, dict_t *params)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
- int ret = -1;
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
- int op_ret = -1;
- int op_errno = 0;
+ priv = this->private;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ QUORUM_CHECK(mknod,out);
- priv = this->private;
-
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
- transaction_frame->local = local;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- loc_copy (&local->loc, loc);
+ loc_copy (&local->loc, loc);
LOCK (&priv->read_child_lock);
{
- local->read_child_index = (++priv->read_child_rr)
+ local->read_child_index = (++priv->read_child_rr)
% (priv->child_count);
}
UNLOCK (&priv->read_child_lock);
- local->cont.mknod.mode = mode;
- local->cont.mknod.dev = dev;
-
- local->transaction.fop = afr_mknod_wind;
- local->transaction.done = afr_mknod_done;
- local->transaction.unwind = afr_mknod_unwind;
-
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (loc->path);
-
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ local->op = GF_FOP_MKNOD;
+ local->cont.mknod.mode = mode;
+ local->cont.mknod.dev = dev;
+ local->umask = umask;
+ if (params)
+ local->xdata_req = dict_ref (params);
+
+ local->transaction.fop = afr_mknod_wind;
+ local->transaction.done = afr_mknod_done;
+ local->transaction.unwind = afr_mknod_unwind;
+
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (mknod, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL);
+ }
- return 0;
+ return 0;
}
/* }}} */
@@ -568,234 +716,201 @@ out:
int
afr_mkdir_unwind (call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
-
- struct stat *unwind_buf = NULL;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- if (local->cont.mkdir.read_child_buf.st_ino) {
- unwind_buf = &local->cont.mkdir.read_child_buf;
- } else {
- unwind_buf = &local->cont.mkdir.buf;
- }
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- unwind_buf->st_ino = local->cont.mkdir.ino;
+ local = frame->local;
- AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
- local->cont.mkdir.inode,
- unwind_buf);
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame) {
+ main_frame = local->transaction.main_frame;
+ }
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (mkdir, main_frame,
+ local->op_ret, local->op_errno,
+ local->cont.dir_fop.inode,
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ NULL);
}
- return 0;
+ return 0;
}
int
afr_mkdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct stat *buf, struct stat *preparent,
- struct stat *postparent)
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int call_count = -1;
- int child_index = -1;
-
- local = frame->local;
- priv = this->private;
-
- child_index = (long) cookie;
+ int call_count = -1;
+ int child_index = -1;
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
+ child_index = (long) cookie;
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0) {
- local->cont.mkdir.buf = *buf;
+ LOCK (&frame->lock);
+ {
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, inode, buf,
+ preparent, postparent, NULL, NULL);
+ }
+ UNLOCK (&frame->lock);
- local->cont.mkdir.ino =
- afr_itransform (buf->st_ino,
- priv->child_count,
- child_index);
+ call_count = afr_frame_return (frame);
- if (priv->read_child >= 0) {
- afr_set_read_child (this, inode,
- priv->read_child);
- } else {
- afr_set_read_child (this, inode,
- local->read_child_index);
- }
- }
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
- if (child_index == local->first_up_child) {
- local->cont.mkdir.ino =
- afr_itransform (buf->st_ino,
- priv->child_count,
- local->first_up_child);
- }
-
- if (child_index == local->read_child_index) {
- local->cont.mkdir.read_child_buf = *buf;
- }
-
- local->cont.mkdir.inode = inode;
+ return 0;
+}
- local->success_count++;
- }
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+int
+afr_mkdir_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
- call_count = afr_frame_return (frame);
+ local = frame->local;
+ priv = this->private;
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
- local->transaction.resume (frame, this);
- }
-
- return 0;
-}
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_mkdir_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->mkdir,
+ &local->loc, local->cont.mkdir.mode,
+ local->umask,
+ local->xdata_req);
+ if (!--call_count)
+ break;
+ }
+ }
-int
-afr_mkdir_wind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_mkdir_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->mkdir,
- &local->loc, local->cont.mkdir.mode);
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ return 0;
}
int
afr_mkdir_done (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
+ afr_local_t * local = NULL;
- local = frame->local;
+ local = frame->local;
- local->transaction.unwind (frame, this);
+ local->transaction.unwind (frame, this);
- AFR_STACK_DESTROY (frame);
+ AFR_STACK_DESTROY (frame);
- return 0;
+ return 0;
}
-
int
afr_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode)
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *params)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
- int ret = -1;
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
- int op_ret = -1;
- int op_errno = 0;
+ priv = this->private;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ QUORUM_CHECK(mkdir,out);
- priv = this->private;
-
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
- transaction_frame->local = local;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- loc_copy (&local->loc, loc);
+ loc_copy (&local->loc, loc);
LOCK (&priv->read_child_lock);
{
- local->read_child_index = (++priv->read_child_rr)
+ local->read_child_index = (++priv->read_child_rr)
% (priv->child_count);
}
UNLOCK (&priv->read_child_lock);
- local->cont.mkdir.mode = mode;
-
- local->transaction.fop = afr_mkdir_wind;
- local->transaction.done = afr_mkdir_done;
- local->transaction.unwind = afr_mkdir_unwind;
-
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (loc->path);
-
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ local->cont.mkdir.mode = mode;
+ local->umask = umask;
+ if (params)
+ local->xdata_req = dict_ref (params);
+
+ local->op = GF_FOP_MKDIR;
+ local->transaction.fop = afr_mkdir_wind;
+ local->transaction.done = afr_mkdir_done;
+ local->transaction.unwind = afr_mkdir_unwind;
+
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
+ AFR_STACK_UNWIND (mkdir, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL);
+ }
- return 0;
+ return 0;
}
/* }}} */
@@ -806,222 +921,196 @@ out:
int
afr_link_unwind (call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
-
- struct stat *unwind_buf = NULL;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- if (local->cont.link.read_child_buf.st_ino) {
- unwind_buf = &local->cont.link.read_child_buf;
- } else {
- unwind_buf = &local->cont.link.buf;
- }
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- unwind_buf->st_ino = local->cont.link.ino;
+ local = frame->local;
- AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
- local->cont.link.inode,
- unwind_buf);
- }
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame) {
+ main_frame = local->transaction.main_frame;
+ }
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (link, main_frame,
+ local->op_ret, local->op_errno,
+ local->cont.dir_fop.inode,
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ NULL);
+ }
- return 0;
+ return 0;
}
int
-afr_link_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct stat *buf, struct stat *preparent,
- struct stat *postparent)
+afr_link_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
+ int call_count = -1;
+ int child_index = -1;
- int call_count = -1;
- int child_index = -1;
+ child_index = (long) cookie;
- local = frame->local;
- priv = this->private;
-
- child_index = (long) cookie;
+ LOCK (&frame->lock);
+ {
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, inode, buf,
+ preparent, postparent, NULL, NULL);
+ }
+ UNLOCK (&frame->lock);
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
+ call_count = afr_frame_return (frame);
- if (op_ret != -1) {
- local->op_ret = op_ret;
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
- if (local->success_count == 0) {
- local->cont.link.buf = *buf;
+ return 0;
+}
- if (priv->read_child >= 0) {
- afr_set_read_child (this, inode,
- priv->read_child);
- } else {
- afr_set_read_child (this, inode,
- local->read_child_index);
- }
- }
- if (child_index == local->read_child_index) {
- local->cont.link.read_child_buf = *buf;
- }
-
- local->cont.link.inode = inode;
+int
+afr_link_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
- local->success_count++;
- }
+ local = frame->local;
+ priv = this->private;
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
- call_count = afr_frame_return (frame);
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
+ local->call_count = call_count;
- local->transaction.resume (frame, this);
- }
-
- return 0;
-}
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_link_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->link,
+ &local->loc,
+ &local->newloc, local->xdata_req);
+ if (!--call_count)
+ break;
+ }
+ }
-int
-afr_link_wind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_link_wind_cbk, (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->link,
- &local->loc,
- &local->newloc);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ return 0;
}
int
afr_link_done (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = frame->local;
+ afr_local_t * local = frame->local;
- local->transaction.unwind (frame, this);
+ local->transaction.unwind (frame, this);
- AFR_STACK_DESTROY (frame);
+ AFR_STACK_DESTROY (frame);
- return 0;
+ return 0;
}
int
afr_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
-
- int ret = -1;
-
- int op_ret = -1;
- int op_errno = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
- priv = this->private;
+ priv = this->private;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
+ QUORUM_CHECK(link,out);
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
- transaction_frame->local = local;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;