summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAnand Avati <avati@redhat.com>2014-01-16 16:14:36 -0800
committerVijay Bellur <vbellur@redhat.com>2014-03-22 05:25:57 -0700
commit6d3739292b7b51d2ddbab75b5f884fb38925b943 (patch)
treecf332a881a49c0904a7e023935750c2d080fc1c5
parenteb87c96f49b3dd2c7460e58c54ce909c706cd475 (diff)
cluster/afr: refactor
- Remove client side self-healing completely (opendir, openfd, lookup) - Re-work readdir-failover to work reliably in case of NFS - Remove unused/dead lock recovery code - Consistently use xdata in both calls and callbacks in all FOPs - Per-inode event generation, used to force inode ctx refresh - Implement dirty flag support (in place of pending counts) - Eliminate inode ctx structure, use read subvol bits + event_generation - Implement inode ctx refreshing based on event generation - Provide backward compatibility in transactions - remove unused variables and functions - make code more consistent in style and pattern - regularize and clean up inode-write transaction code - regularize and clean up dir-write transaction code - regularize and clean up common FOPs - reorganize transaction framework code - skip setting xattrs in pending dict if nothing is pending - re-write self-healing code using syncops - re-write simpler self-heal-daemon Change-Id: I1e4080c9796c8a2815c2dab4be3073f389d614a8 BUG: 1021686 Signed-off-by: Anand Avati <avati@redhat.com> Reviewed-on: http://review.gluster.org/6010 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
-rw-r--r--libglusterfs/src/gf-dirent.c2
-rw-r--r--libglusterfs/src/glusterfs.h2
-rw-r--r--libglusterfs/src/xlator.c8
-rw-r--r--tests/basic/pump.t2
-rwxr-xr-xtests/bugs/859927/repl.t6
-rwxr-xr-xtests/bugs/bug-1015990-rep.t1
-rw-r--r--tests/bugs/bug-1035576.t5
-rwxr-xr-xtests/bugs/bug-1037501.t242
-rw-r--r--tests/bugs/bug-1058797.t2
-rwxr-xr-xtests/bugs/bug-767585-gfid.t7
-rwxr-xr-xtests/bugs/bug-802417.t20
-rwxr-xr-xtests/bugs/bug-830665.t8
-rwxr-xr-xtests/bugs/bug-853690.t8
-rwxr-xr-xtests/bugs/bug-865825.t12
-rwxr-xr-xtests/bugs/bug-873962.t13
-rw-r--r--tests/bugs/bug-888174.t15
-rw-r--r--tests/bugs/bug-906646.t2
-rw-r--r--tests/bugs/bug-913051.t4
-rw-r--r--tests/bugs/bug-913544.t2
-rw-r--r--tests/bugs/bug-918437-sh-mtime.t7
-rwxr-xr-xtests/bugs/bug-977797.t39
-rw-r--r--tests/volume.rc2
-rw-r--r--xlators/cluster/afr/src/Makefile.am21
-rw-r--r--xlators/cluster/afr/src/afr-common.c4223
-rw-r--r--xlators/cluster/afr/src/afr-dir-read.c629
-rw-r--r--xlators/cluster/afr/src/afr-dir-write.c1649
-rw-r--r--xlators/cluster/afr/src/afr-inode-read.c1055
-rw-r--r--xlators/cluster/afr/src/afr-inode-write.c2609
-rw-r--r--xlators/cluster/afr/src/afr-lk-common.c509
-rw-r--r--xlators/cluster/afr/src/afr-mem-types.h6
-rw-r--r--xlators/cluster/afr/src/afr-open.c245
-rw-r--r--xlators/cluster/afr/src/afr-read-txn.c239
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-algorithm.c837
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-algorithm.h32
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c3287
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.h144
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c2094
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c2787
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-metadata.c969
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-name.c457
-rw-r--r--xlators/cluster/afr/src/afr-self-heal.h162
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.c2605
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.h95
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c1457
-rw-r--r--xlators/cluster/afr/src/afr-transaction.h26
-rw-r--r--xlators/cluster/afr/src/afr.c144
-rw-r--r--xlators/cluster/afr/src/afr.h823
-rw-r--r--xlators/cluster/afr/src/pump.c602
-rw-r--r--xlators/cluster/afr/src/pump.h3
-rw-r--r--xlators/cluster/dht/src/dht-common.c2
-rw-r--r--xlators/cluster/stripe/src/stripe.c2
-rw-r--r--xlators/features/index/src/index.c398
-rw-r--r--xlators/features/index/src/index.h14
53 files changed, 8915 insertions, 19619 deletions
diff --git a/libglusterfs/src/gf-dirent.c b/libglusterfs/src/gf-dirent.c
index bb028c9..0cda83a 100644
--- a/libglusterfs/src/gf-dirent.c
+++ b/libglusterfs/src/gf-dirent.c
@@ -83,6 +83,8 @@ gf_link_inodes_from_dirent (xlator_t *this, inode_t *parent,
if (entry->inode) {
link_inode = inode_link (entry->inode, parent,
entry->d_name, &entry->d_stat);
+ if (!link_inode)
+ continue;
inode_lookup (link_inode);
inode_unref (link_inode);
}
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
index 31c46b7..5ce0d6e 100644
--- a/libglusterfs/src/glusterfs.h
+++ b/libglusterfs/src/glusterfs.h
@@ -132,7 +132,7 @@
/* Index xlator related */
#define GF_XATTROP_INDEX_GFID "glusterfs.xattrop_index_gfid"
-#define GF_BASE_INDICES_HOLDER_GFID "glusterfs.base_indicies_holder_gfid"
+#define GF_XATTROP_INDEX_COUNT "glusterfs.xattrop_index_count"
#define GF_GFIDLESS_LOOKUP "gfidless-lookup"
/* replace-brick and pump related internal xattrs */
diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c
index f3df8e2..1bded6d 100644
--- a/libglusterfs/src/xlator.c
+++ b/libglusterfs/src/xlator.c
@@ -680,7 +680,9 @@ loc_copy_overload_parent (loc_t *dst, loc_t *src, inode_t *parent)
dst->name = strrchr (dst->path, '/');
if (dst->name)
dst->name++;
- }
+ } else if (src->name) {
+ dst->name = src->name;
+ }
ret = 0;
out:
@@ -718,7 +720,9 @@ loc_copy (loc_t *dst, loc_t *src)
dst->name = strrchr (dst->path, '/');
if (dst->name)
dst->name++;
- }
+ } else if (src->name) {
+ dst->name = src->name;
+ }
ret = 0;
out:
diff --git a/tests/basic/pump.t b/tests/basic/pump.t
index 3faf06f..23bdc18 100644
--- a/tests/basic/pump.t
+++ b/tests/basic/pump.t
@@ -22,7 +22,7 @@ done
cd
TEST umount $M0
TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1 start
-EXPECT_WITHIN 60 "Y" gd_is_replace_brick_completed $H0 $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+EXPECT_WITHIN 600 "Y" gd_is_replace_brick_completed $H0 $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1
TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1 commit
TEST $CLI volume stop $V0
TEST diff -r --exclude=.glusterfs $B0/${V0}0 $B0/${V0}1
diff --git a/tests/bugs/859927/repl.t b/tests/bugs/859927/repl.t
index 73c86e7..856b057 100755
--- a/tests/bugs/859927/repl.t
+++ b/tests/bugs/859927/repl.t
@@ -33,20 +33,20 @@ TEST $CLI volume set $V0 cluster.data-self-heal-algorithm full
EXPECT full volume_option $V0 cluster.data-self-heal-algorithm
create_setup_for_self_heal $M0/a
EXPECT_WITHIN 20 "1" afr_child_up_status $V0 0
-ls -l $file 2>&1 > /dev/null
+cat $file 2>&1 > /dev/null
TEST cmp $B0/${V0}1/a $B0/${V0}2/a
TEST $CLI volume set $V0 cluster.data-self-heal-algorithm diff
EXPECT diff volume_option $V0 cluster.data-self-heal-algorithm
create_setup_for_self_heal $M0/a
EXPECT_WITHIN 20 "1" afr_child_up_status $V0 0
-ls -l $file 2>&1 > /dev/null
+cat $file 2>&1 > /dev/null
TEST cmp $B0/${V0}1/a $B0/${V0}2/a
TEST $CLI volume reset $V0 cluster.data-self-heal-algorithm
create_setup_for_self_heal $M0/a
EXPECT_WITHIN 20 "1" afr_child_up_status $V0 0
-ls -l $file 2>&1 > /dev/null
+cat $file 2>&1 > /dev/null
TEST cmp $B0/${V0}1/a $B0/${V0}2/a
TEST ! $CLI volume set $V0 cluster.data-self-heal-algorithm ""
diff --git a/tests/bugs/bug-1015990-rep.t b/tests/bugs/bug-1015990-rep.t
index f59bb2f..bca0d7a 100755
--- a/tests/bugs/bug-1015990-rep.t
+++ b/tests/bugs/bug-1015990-rep.t
@@ -35,7 +35,6 @@ for i in {1..100}; do echo "STRING" > $M0/File$i; done
brick_2_sh_entries=$(count_sh_entries $B0/$V0"2")
brick_4_sh_entries=$(count_sh_entries $B0/$V0"4")
-
command_output=$(gluster volume heal $V0 statistics heal-count replica $H0:$B0/$V0"1")
diff --git a/tests/bugs/bug-1035576.t b/tests/bugs/bug-1035576.t
index 52d93dd..938306a 100644
--- a/tests/bugs/bug-1035576.t
+++ b/tests/bugs/bug-1035576.t
@@ -34,7 +34,8 @@ quota_limit_val1=$(get_hex_xattr trusted.glusterfs.quota.limit-set $B0/${V0}1/a)
quota_size_val1=$(get_hex_xattr trusted.glusterfs.quota.size $B0/${V0}1/a)
#Trigger entry,metadata self-heal
-TEST stat $M0/a
+TEST ls $M0/a
+
quota_limit_val0=$(get_hex_xattr trusted.glusterfs.quota.limit-set $B0/${V0}0/a)
quota_size_val0=$(get_hex_xattr trusted.glusterfs.quota.size $B0/${V0}0/a)
@@ -43,7 +44,7 @@ TEST [ $quota_limit_val0 == $quota_limit_val1 ]
#Only entry, metadata self-heal is done quota size value should not be same
TEST [ $quota_size_val0 != $quota_size_val1 ]
-TEST stat $M0/a/f
+TEST cat $M0/a/f
#Now that data self-heal is done quota size value should be same
quota_size_val0=$(get_hex_xattr trusted.glusterfs.quota.size $B0/${V0}0/a)
diff --git a/tests/bugs/bug-1037501.t b/tests/bugs/bug-1037501.t
index d11c788..596122a 100755
--- a/tests/bugs/bug-1037501.t
+++ b/tests/bugs/bug-1037501.t
@@ -24,14 +24,6 @@ TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}-{0,1,2}
EXPECT "$V0" volinfo_field $V0 'Volume Name';
EXPECT 'Created' volinfo_field $V0 'Status';
-## Make sure io-cache and write-behind don't interfere.
-TEST $CLI volume set $V0 data-self-heal off;
-
-## Make sure automatic self-heal doesn't perturb our results.
-TEST $CLI volume set $V0 cluster.self-heal-daemon off
-
-TEST $CLI volume set $V0 background-self-heal-count 0
-
## Start volume and verify
TEST $CLI volume start $V0;
EXPECT 'Started' volinfo_field $V0 'Status';
@@ -48,206 +40,38 @@ TEST $CLI volume add-brick $V0 replica 4 $H0:$B0/$V0-3 force
TEST $CLI volume add-brick $V0 replica 5 $H0:$B0/$V0-4 force
TEST $CLI volume add-brick $V0 replica 6 $H0:$B0/$V0-5 force
-sleep 10
-
-TEST ls $M0/
-
-
-function compare()
-{
- var=-1;
- if [ $1 == $2 ]; then
- var=0;
- else
- var=-1;
- fi
-
- echo $var
-}
-
-var2="000000000000000000000000"
-
-var1=`getfattr -d -m . $B0/$V0-0/File -e hex 2>&1 | grep "client-3"`
-EXPECT "0" echo $?
-var3=`echo $var1 | cut -d x -f 2`
-EXPECT_NOT $var2 echo $var3
-
-var1=`getfattr -d -m . $B0/$V0-0/File -e hex 2>&1 | grep "client-4"`
-EXPECT "0" echo $?
-var3=`echo $var1 | cut -d x -f 2`
-EXPECT_NOT $var2 echo $var3
-
-var1=`getfattr -d -m . $B0/$V0-0/File -e hex 2>&1 | grep "client-5"`
-EXPECT "0" echo $?
-var3=`echo $var1 | cut -d x -f 2`
-EXPECT_NOT $var2 echo $var3
-
-var1=`getfattr -d -m . $B0/$V0-1/File -e hex 2>&1 | grep "client-3"`
-EXPECT "0" echo $?
-var3=`echo $var1| cut -d x -f 2`
-EXPECT_NOT $var2 echo $var3
-
-var1=`getfattr -d -m . $B0/$V0-1/File -e hex 2>&1 | grep "client-4"`
-EXPECT "0" echo $?
-var3=`echo $var1 | cut -d x -f 2`
-EXPECT_NOT $var2 echo $var3
-
-var1=`getfattr -d -m . $B0/$V0-1/File -e hex 2>&1 | grep "client-5"`
-EXPECT "0" echo $?
-var3=`echo $var1 | cut -d x -f 2`
-EXPECT_NOT $var2 echo $var3
-
-var1=`getfattr -d -m . $B0/$V0-2/File -e hex 2>&1 | grep "client-3"`
-EXPECT "0" echo $?
-var3=`echo $var1 | cut -d x -f 2`
-EXPECT_NOT $var2 echo $var3
-
-var1=`getfattr -d -m . $B0/$V0-2/File -e hex 2>&1 | grep "client-4"`
-EXPECT "0" echo $?
-var3=`echo $var1 | cut -d x -f 2`
-EXPECT_NOT $var2 echo $var3
-
-var1=`getfattr -d -m . $B0/$V0-2/File -e hex 2>&1 | grep "client-5"`
-EXPECT "0" echo $?
-var3=`echo $var1 | cut -d x -f 2`
-EXPECT_NOT $var2 echo $var3
-
-var1=`getfattr -d -m . $B0/$V0-0/Dir -e hex 2>&1 | grep "client-3"`
-EXPECT "0" echo $?
-var3=`echo $var1 | cut -d x -f 2`
-EXPECT_NOT $var2 echo $var3
-
-var1=`getfattr -d -m . $B0/$V0-0/Dir -e hex 2>&1 | grep "client-4"`
-EXPECT "0" echo $?
-var3=`echo $var1 | cut -d x -f 2`
-EXPECT_NOT $var2 echo $var3
-
-var1=`getfattr -d -m . $B0/$V0-0/Dir -e hex 2>&1 | grep "client-5"`
-EXPECT "0" echo $?
-var3=`echo $var1 | cut -d x -f 2`
-EXPECT_NOT $var2 echo $var3
-
-var1=`getfattr -d -m . $B0/$V0-1/Dir -e hex 2>&1 | grep "client-3"`
-EXPECT "0" echo $?
-var3=`echo $var1 | cut -d x -f 2`
-EXPECT_NOT $var2 echo $var3
-
-var1=`getfattr -d -m . $B0/$V0-1/Dir -e hex 2>&1 | grep "client-4"`
-EXPECT "0" echo $?
-var3=`echo $var1 | cut -d x -f 2`
-EXPECT_NOT $var2 echo $var3
-
-var1=`getfattr -d -m . $B0/$V0-1/Dir -e hex 2>&1 | grep "client-5"`
-EXPECT "0" echo $?
-var3=`echo $var1 | cut -d x -f 2`
-EXPECT_NOT $var2 echo $var3
-
-var1=`getfattr -d -m . $B0/$V0-2/Dir -e hex 2>&1 | grep "client-3"`
-EXPECT "0" echo $?
-var3=`echo $var1 | cut -d x -f 2`
-EXPECT_NOT $var2 echo $var3
-
-var1=`getfattr -d -m . $B0/$V0-2/Dir -e hex 2>&1 | grep "client-4"`
-EXPECT "0" echo $?
-var3=`echo $var1 | cut -d x -f 2`
-EXPECT_NOT $var2 echo $var3
-
-var1=`getfattr -d -m . $B0/$V0-2/Dir -e hex 2>&1 | grep "client-5"`
-EXPECT "0" echo $?
-var3=`echo $var1 | cut -d x -f 2`
-EXPECT_NOT $var2 echo $var3
-
-
-var1=`getfattr -d -m . $B0/$V0-0/Link -e hex 2>&1 | grep "client-3"`
-EXPECT "0" echo $?
-var3=`echo $var1 | cut -d x -f 2`
-EXPECT_NOT $var2 echo $var3
-
-var1=`getfattr -d -m . $B0/$V0-0/Link -e hex 2>&1 | grep "client-4"`
-EXPECT "0" echo $?
-var3=`echo $var1 | cut -d x -f 2`
-EXPECT_NOT $var2 echo $var3
-
-var1=`getfattr -d -m . $B0/$V0-0/Link -e hex 2>&1 | grep "client-5"`
-EXPECT "0" echo $?
-var3=`echo $var1 | cut -d x -f 2`
-EXPECT_NOT $var2 echo $var3
-
-var1=`getfattr -d -m . $B0/$V0-1/Link -e hex 2>&1 | grep "client-3"`
-EXPECT "0" echo $?
-var3=`echo $var1 | cut -d x -f 2`
-EXPECT_NOT $var2 echo $var3
-
-var1=`getfattr -d -m . $B0/$V0-1/Link -e hex 2>&1 | grep "client-4"`
-EXPECT "0" echo $?
-var3=`echo $var1 | cut -d x -f 2`
-EXPECT_NOT $var2 echo $var3
-
-var1=`getfattr -d -m . $B0/$V0-1/Link -e hex 2>&1 | grep "client-5"`
-EXPECT "0" echo $?
-var3=`echo $var1 | cut -d x -f 2`
-EXPECT_NOT $var2 echo $var3
-
-var1=`getfattr -d -m . $B0/$V0-2/Link -e hex 2>&1 | grep "client-3"`
-EXPECT "0" echo $?
-var3=`echo $var1 | cut -d x -f 2`
-EXPECT_NOT $var2 echo $var3
-
-var1=`getfattr -d -m . $B0/$V0-2/Link -e hex 2>&1 | grep "client-4"`
-EXPECT "0" echo $?
-var3=`echo $var1 | cut -d x -f 2`
-EXPECT_NOT $var2 echo $var3
-
-var1=`getfattr -d -m . $B0/$V0-2/Link -e hex 2>&1 | grep "client-5"`
-EXPECT "0" echo $?
-var3=`echo $var1 | cut -d x -f 2`
-EXPECT_NOT $var2 echo $var3
-
-
-
-var1=`getfattr -d -m . $B0/$V0-0/FIFO -e hex 2>&1 | grep "client-3"`
-EXPECT "0" echo $?
-var3=`echo $var1 | cut -d x -f 2`
-EXPECT_NOT $var2 echo $var3
-
-var1=`getfattr -d -m . $B0/$V0-0/FIFO -e hex 2>&1 | grep "client-4"`
-EXPECT "0" echo $?
-var3=`echo $var1 | cut -d x -f 2`
-EXPECT_NOT $var2 echo $var3
-
-var1=`getfattr -d -m . $B0/$V0-0/FIFO -e hex 2>&1 | grep "client-5"`
-EXPECT "0" echo $?
-var3=`echo $var1 | cut -d x -f 2`
-EXPECT_NOT $var2 echo $var3
-
-var1=`getfattr -d -m . $B0/$V0-1/FIFO -e hex 2>&1 | grep "client-3"`
-EXPECT "0" echo $?
-var3=`echo $var1 | cut -d x -f 2`
-EXPECT_NOT $var2 echo $var3
-
-var1=`getfattr -d -m . $B0/$V0-1/FIFO -e hex 2>&1 | grep "client-4"`
-EXPECT "0" echo $?
-var3=`echo $var1 | cut -d x -f 2`
-EXPECT_NOT $var2 echo $var3
-
-var1=`getfattr -d -m . $B0/$V0-1/FIFO -e hex 2>&1 | grep "client-5"`
-EXPECT "0" echo $?
-var3=`echo $var1 | cut -d x -f 2`
-EXPECT_NOT $var2 echo $var3
-
-var1=`getfattr -d -m . $B0/$V0-2/FIFO -e hex 2>&1 | grep "client-3"`
-EXPECT "0" echo $?
-var3=`echo $var1 | cut -d x -f 2`
-EXPECT_NOT $var2 echo $var3
-
-var1=`getfattr -d -m . $B0/$V0-2/FIFO -e hex 2>&1 | grep "client-4"`
-EXPECT "0" echo $?
-var3=`echo $var1 | cut -d x -f 2`
-EXPECT_NOT $var2 echo $var3
-
-var1=`getfattr -d -m . $B0/$V0-2/FIFO -e hex 2>&1 | grep "client-5"`
-EXPECT "0" echo $?
-var3=`echo $var1 | cut -d x -f 2`
-EXPECT_NOT $var2 echo $var3
+sleep 5
+
+TEST gluster volume heal $V0 full
+
+sleep 5
+
+EXPECT 10 stat -c '%s' $B0/$V0-0/File
+EXPECT 10 stat -c '%s' $B0/$V0-1/File
+EXPECT 10 stat -c '%s' $B0/$V0-2/File
+EXPECT 10 stat -c '%s' $B0/$V0-3/File
+EXPECT 10 stat -c '%s' $B0/$V0-4/File
+EXPECT 10 stat -c '%s' $B0/$V0-5/File
+
+EXPECT 3 stat -c '%h' $B0/$V0-0/Link
+EXPECT 3 stat -c '%h' $B0/$V0-1/Link
+EXPECT 3 stat -c '%h' $B0/$V0-2/Link
+EXPECT 3 stat -c '%h' $B0/$V0-3/Link
+EXPECT 3 stat -c '%h' $B0/$V0-4/Link
+EXPECT 3 stat -c '%h' $B0/$V0-5/Link
+
+EXPECT 'directory' stat -c '%F' $B0/$V0-0/Dir
+EXPECT 'directory' stat -c '%F' $B0/$V0-1/Dir
+EXPECT 'directory' stat -c '%F' $B0/$V0-2/Dir
+EXPECT 'directory' stat -c '%F' $B0/$V0-3/Dir
+EXPECT 'directory' stat -c '%F' $B0/$V0-4/Dir
+EXPECT 'directory' stat -c '%F' $B0/$V0-5/Dir
+
+EXPECT 'fifo' stat -c '%F' $B0/$V0-0/FIFO
+EXPECT 'fifo' stat -c '%F' $B0/$V0-1/FIFO
+EXPECT 'fifo' stat -c '%F' $B0/$V0-2/FIFO
+EXPECT 'fifo' stat -c '%F' $B0/$V0-3/FIFO
+EXPECT 'fifo' stat -c '%F' $B0/$V0-4/FIFO
+EXPECT 'fifo' stat -c '%F' $B0/$V0-5/FIFO
cleanup;
diff --git a/tests/bugs/bug-1058797.t b/tests/bugs/bug-1058797.t
index 2b80794..1e9f09a 100644
--- a/tests/bugs/bug-1058797.t
+++ b/tests/bugs/bug-1058797.t
@@ -29,7 +29,7 @@ EXPECT "s" echo $setuid_bit1
#Restart volume and do lookup from mount to trigger heal
TEST $CLI volume start $V0 force
EXPECT_WITHIN 20 "1" afr_child_up_status $V0 1
-TEST ls -l $M0/file
+TEST dd if=$M0/file of=/dev/null
#Get file permissions from healed brick1 and verify that S_ISUID is indeed set
file_permissions2=`ls -l $B0/brick1/file | awk '{print $1}' | cut -d. -f1 | cut -d- -f2,3,4,5,6`
diff --git a/tests/bugs/bug-767585-gfid.t b/tests/bugs/bug-767585-gfid.t
index 49cf742..41043a0 100755
--- a/tests/bugs/bug-767585-gfid.t
+++ b/tests/bugs/bug-767585-gfid.t
@@ -26,10 +26,9 @@ TEST setfattr -n trusted.gfid -v $gfid2 $B0/${V0}1/c
sleep 2
-cd $M0
-TEST ls -l a
-TEST ls -l b
-TEST ls -l c
+TEST stat $M0/a
+TEST stat $M0/b
+TEST stat $M0/c
TEST gf_get_gfid_xattr $B0/${V0}0/a
TEST gf_get_gfid_xattr $B0/${V0}1/a
diff --git a/tests/bugs/bug-802417.t b/tests/bugs/bug-802417.t
index 314141f..b596df3 100755
--- a/tests/bugs/bug-802417.t
+++ b/tests/bugs/bug-802417.t
@@ -55,7 +55,7 @@ EXPECT_WITHIN 20 "1" afr_child_up_status $V0 0
EXPECT_WITHIN 20 "1" afr_child_up_status $V0 1
EXPECT_WITHIN 20 "1" afr_child_up_status $V0 2
TEST kill_brick ${V0} ${H0} ${B0}/${V0}-2
-TEST ls -l ${M0}/a_file
+TEST dd if=${M0}/a_file of=/dev/null
obs_path_0=${B0}/${V0}-0/a_file
@@ -67,31 +67,31 @@ tgt_xattr_1="trusted.afr.${V0}-client-1"
tgt_xattr_2="trusted.afr.${V0}-client-2"
actual=$(afr_get_changelog_xattr $obs_path_0 $tgt_xattr_0)
-EXPECT "0x000000000000000000000000" echo $actual
+EXPECT "0x000000000000000000000000|^\$" echo $actual
actual=$(afr_get_changelog_xattr $obs_path_0 $tgt_xattr_1)
-EXPECT "0x000000000000000000000000" echo $actual
+EXPECT "0x000000000000000000000000|^\$" echo $actual
actual=$(afr_get_changelog_xattr $obs_path_0 $tgt_xattr_2)
-EXPECT "0x000000020000000000000000" echo $actual
+EXPECT "0x000000030000000000000000" echo $actual
actual=$(afr_get_changelog_xattr $obs_path_1 $tgt_xattr_0)
-EXPECT "0x000000000000000000000000" echo $actual
+EXPECT "0x000000000000000000000000|^\$" echo $actual
actual=$(afr_get_changelog_xattr $obs_path_1 $tgt_xattr_1)
-EXPECT "0x000000000000000000000000" echo $actual
+EXPECT "0x000000000000000000000000|^\$" echo $actual
actual=$(afr_get_changelog_xattr $obs_path_1 $tgt_xattr_2)
-EXPECT "0x000000020000000000000000" echo $actual
+EXPECT "0x000000010000000000000000" echo $actual
actual=$(afr_get_changelog_xattr $obs_path_2 $tgt_xattr_0)
-EXPECT "0x000000000000000000000000" echo $actual
+EXPECT "0x000000000000000000000000|^\$" echo $actual
actual=$(afr_get_changelog_xattr $obs_path_2 $tgt_xattr_1)
-EXPECT "0x000000000000000000000000" echo $actual
+EXPECT "0x000000000000000000000000|^\$" echo $actual
actual=$(afr_get_changelog_xattr $obs_path_2 $tgt_xattr_2)
-EXPECT "0x000000000000000000000000" echo $actual
+EXPECT "0x000000000000000000000000|^\$" echo $actual
if [ "$EXIT_EARLY" = "1" ]; then
exit 0;
diff --git a/tests/bugs/bug-830665.t b/tests/bugs/bug-830665.t
index 0073ff1..dd6f3ce 100755
--- a/tests/bugs/bug-830665.t
+++ b/tests/bugs/bug-830665.t
@@ -81,15 +81,17 @@ ls -l $N0 &> /dev/null;
sleep 5;
## Force entry self-heal.
-find $N0 | xargs stat > /dev/null;
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+sleep 1
+TEST gluster volume heal $V0 full
#ls -lR $N0 > /dev/null;
## Do NOT check through the NFS mount here. That will force a new self-heal
## check, but we want to test whether self-heal already happened.
## Make sure everything's in order on the recreated brick.
-EXPECT 'test_data' cat $B0/${V0}-0/a_file;
-EXPECT 'more_test_data' cat $B0/${V0}-0/a_dir/another_file;
+EXPECT_WITHIN 20 'test_data' cat $B0/${V0}-0/a_file;
+EXPECT_WITHIN 20 'more_test_data' cat $B0/${V0}-0/a_dir/another_file;
if [ "$EXIT_EARLY" = "1" ]; then
exit 0;
diff --git a/tests/bugs/bug-853690.t b/tests/bugs/bug-853690.t
index 77a581f..c2f82d1 100755
--- a/tests/bugs/bug-853690.t
+++ b/tests/bugs/bug-853690.t
@@ -66,7 +66,6 @@ TEST glusterfs --volfile=$B0/test.vol --attribute-timeout=0 --entry-timeout=0 $M
# file sizes and immediate split-brain (EIO).
TEST dd if=/dev/zero of=$M0/file bs=128k count=1
TEST dd if=$M0/file of=/dev/null bs=128k count=1
-
########
#
# Test self-heal with short writes...
@@ -76,14 +75,11 @@ TEST dd if=$M0/file of=/dev/null bs=128k count=1
# Cause a lookup and wait a few seconds for posterity. This self-heal also fails
# due to a short write.
TEST ls $M0/file
-
# Verify the attributes on the healthy replica do not reflect consistency with
# the other replica.
-TEST "getfattr -n trusted.afr.test-locks-0 $B0/test2/file --only-values > $B0/out1 2> /dev/null"
-TEST "getfattr -n trusted.afr.test-locks-1 $B0/test2/file --only-values > $B0/out2 2> /dev/null"
-TEST ! cmp $B0/out1 $B0/out2
+xa=`getfattr -n trusted.afr.test-locks-0 -e hex $B0/test2/file 2>&1 | grep = | cut -f2 -d=`
+EXPECT_NOT 0x000000000000000000000000 echo $xa
-TEST rm -f $B0/out1 $B0/out2
TEST rm -f $M0/file
TEST umount $M0
diff --git a/tests/bugs/bug-865825.t b/tests/bugs/bug-865825.t
index 6bb1c23..8ee7518 100755
--- a/tests/bugs/bug-865825.t
+++ b/tests/bugs/bug-865825.t
@@ -2,6 +2,8 @@
. $(dirname $0)/../include.rc
+cleanup;
+
TEST glusterd
TEST pidof glusterd
TEST $CLI volume info;
@@ -28,6 +30,7 @@ EXPECT 'Created' volinfo_field $V0 'Status';
## Make sure io-cache and write-behind don't interfere.
TEST $CLI volume set $V0 cluster.background-self-heal-count 0
TEST $CLI volume set $V0 performance.io-cache off;
+TEST $CLI volume set $V0 performance.quick-read off;
TEST $CLI volume set $V0 performance.write-behind off;
TEST $CLI volume set $V0 performance.stat-prefetch off
@@ -54,19 +57,18 @@ setfattr -n trusted.afr.${V0}-client-2 -v $value $B0/${V0}-0/a_file
setfattr -x trusted.afr.${V0}-client-2 $B0/${V0}-1/a_file
echo "wrong_data" > $B0/${V0}-2/a_file
-## Remount and force a self-heal.
-TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
-stat ${M0}/a_file > /dev/null
+gluster volume set $V0 cluster.self-heal-daemon on
+sleep 3
+gluster volume heal $V0 full
## Make sure brick 2 now has the correct contents.
-EXPECT "test_data" cat $B0/${V0}-2/a_file
+EXPECT_WITHIN 30 "test_data" cat $B0/${V0}-2/a_file
if [ "$EXIT_EARLY" = "1" ]; then
exit 0;
fi
## Finish up
-TEST umount $M0;
TEST $CLI volume stop $V0;
EXPECT 'Stopped' volinfo_field $V0 'Status';
diff --git a/tests/bugs/bug-873962.t b/tests/bugs/bug-873962.t
index b245cc3..0281417 100755
--- a/tests/bugs/bug-873962.t
+++ b/tests/bugs/bug-873962.t
@@ -61,11 +61,12 @@ EXPECT_WITHIN 20 "1" afr_child_up_status $V0 0
EXPECT_WITHIN 20 "1" afr_child_up_status $V0 1
TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $M1 --direct-io-mode=enable
+
#Files are in split-brain, so open should fail
TEST ! cat $M0/a;
TEST ! cat $M1/a;
-TEST ! cat $M0/b;
-TEST ! cat $M1/b;
+TEST cat $M0/b;
+TEST cat $M1/b;
#Reset split-brain status
TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000000 $B0/${V0}1/a;
@@ -75,6 +76,7 @@ TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000000 $B0/${V0
EXPECT "2" cat $M0/a;
# FAIL HERE - see comment about cluster.self-heal-background-count above.
EXPECT "2" cat $M1/a;
+TEST dd if=$M0/b of=/dev/null bs=1M
EXPECT "def" getfattr -n trusted.mdata --only-values $M0/b 2>/dev/null
EXPECT "def" getfattr -n trusted.mdata --only-values $M1/b 2>/dev/null
@@ -90,8 +92,8 @@ TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $
#Files are in split-brain, so open should fail
TEST ! cat $M0/c
TEST ! cat $M1/c
-TEST ! cat $M0/d
-TEST ! cat $M1/d
+TEST cat $M0/d
+TEST cat $M1/d
TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000000 $B0/${V0}1/c
TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000000 $B0/${V0}1/d
@@ -102,7 +104,4 @@ EXPECT "2" cat $M1/c
EXPECT "1" cat $M0/d
EXPECT "1" cat $M1/d
-#Check that the self-heal is not triggered.
-EXPECT "1" cat $B0/${V0}1/c
-EXPECT "abc" getfattr -n trusted.mdata --only-values $B0/${V0}1/d 2>/dev/null
cleanup;
diff --git a/tests/bugs/bug-888174.t b/tests/bugs/bug-888174.t
index 4ea3464..ef653f7 100644
--- a/tests/bugs/bug-888174.t
+++ b/tests/bugs/bug-888174.t
@@ -38,10 +38,9 @@ TEST [ -z $inodelk_max_latency ]
TEST dd of=$M0/a if=/dev/urandom bs=1M count=10 conv=fsync
#Check for no trace of pending changelog. Flush should make sure of it.
-EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/r2_0/a trusted.afr.$V0-client-0
-EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/r2_0/a trusted.afr.$V0-client-1
-EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/r2_1/a trusted.afr.$V0-client-0
-EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/r2_1/a trusted.afr.$V0-client-1
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/r2_0/a trusted.afr.dirty
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/r2_1/a trusted.afr.dirty
+
dd of=$M0/a if=/dev/urandom bs=1M count=1024 2>/dev/null &
p=$!
@@ -51,15 +50,13 @@ TEST $CLI volume set $V0 performance.io-cache off
TEST $CLI volume set $V0 performance.stat-prefetch off
TEST $CLI volume set $V0 performance.read-ahead off
-kill -SIGTERM $p
+kill -TERM $p
#wait for dd to exit
wait > /dev/null 2>&1
#Goal is to check if there is permanent FOOL changelog
sleep 5
-EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/r2_0/a trusted.afr.$V0-client-0
-EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/r2_0/a trusted.afr.$V0-client-1
-EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/r2_1/a trusted.afr.$V0-client-0
-EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/r2_1/a trusted.afr.$V0-client-1
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/r2_0/a trusted.afr.dirty
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/r2_1/a trusted.afr.dirty
cleanup;
diff --git a/tests/bugs/bug-906646.t b/tests/bugs/bug-906646.t
index 0e6a3bc..b2cbf6b 100644
--- a/tests/bugs/bug-906646.t
+++ b/tests/bugs/bug-906646.t
@@ -84,7 +84,7 @@ TEST $CLI volume start $V0 force
EXPECT_WITHIN 20 "1" afr_child_up_status $V0 `expr $brick_id - 1`
-stat $pth
+cat $pth >/dev/null
# check backends - xattr should not be present anywhere
EXPECT 1 xattr_query_check ${backend_paths_array[0]} "trusted.name"
diff --git a/tests/bugs/bug-913051.t b/tests/bugs/bug-913051.t
index 69e90cf..9a59424 100644
--- a/tests/bugs/bug-913051.t
+++ b/tests/bugs/bug-913051.t
@@ -48,8 +48,8 @@ EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $B0/${V0}0/dir/b
#attempt self-heal so that the files are created on brick-0
-TEST ls -l $M0/dir/a
-TEST ls -l $M0/dir/b
+TEST dd if=$M0/dir/a of=/dev/null bs=1M
+TEST dd if=$M0/dir/b of=/dev/null bs=1M
#trigger writev for attempting open-fd-fix in afr
TEST fd_write $wfd "open sesame"
diff --git a/tests/bugs/bug-913544.t b/tests/bugs/bug-913544.t
index 790bc08..db28ca8 100644
--- a/tests/bugs/bug-913544.t
+++ b/tests/bugs/bug-913544.t
@@ -17,7 +17,7 @@ TEST touch a
#simulate no-changelog data split-brain
echo "abc" > $B0/${V0}1/a
echo "abcd" > $B0/${V0}0/a
-TEST ! truncate -s 0 a
+TEST truncate -s 0 a
TEST ls
cd
diff --git a/tests/bugs/bug-918437-sh-mtime.t b/tests/bugs/bug-918437-sh-mtime.t
index 080956f..11155ad 100644
--- a/tests/bugs/bug-918437-sh-mtime.t
+++ b/tests/bugs/bug-918437-sh-mtime.t
@@ -38,7 +38,12 @@ TEST $CLI volume start $V0 force
EXPECT_WITHIN 20 "1" afr_child_up_status $V0 0
EXPECT_WITHIN 20 "1" afr_child_up_status $V0 1
-find $M0 | xargs stat 1>/dev/null
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+sleep 1
+TEST gluster volume heal $V0 full
+
+size=`stat -c '%s' /etc/passwd`
+EXPECT_WITHIN 60 $size stat -c '%s' $B0/gfs0/brick01/a
TEST modify_atstamp1=$(get_mtime $B0/gfs0/brick01/a)
TEST modify_atstamp2=$(get_mtime $B0/gfs0/brick02/a)
diff --git a/tests/bugs/bug-977797.t b/tests/bugs/bug-977797.t
index 08cdbe8..f225215 100755
--- a/tests/bugs/bug-977797.t
+++ b/tests/bugs/bug-977797.t
@@ -54,7 +54,7 @@ TEST chmod 757 $M0/a/file
TEST $CLI volume start $V0 force
EXPECT_WITHIN 20 "1" afr_child_up_status $V0 1;
-TEST ls -l $M0/a/file
+TEST dd if=$M0/a/file of=/dev/null bs=1M
b1c0dir=$(afr_get_specific_changelog_xattr $B0/$V0"1"/a \
trusted.afr.$V0-client-0 "entry")
@@ -75,34 +75,15 @@ b2c0f=$(afr_get_specific_changelog_xattr $B0/$V0"2"/a/file \
b2c1f=$(afr_get_specific_changelog_xattr $B0/$V0"2"/a/file \
trusted.afr.$V0-client-1 "data")
-EXPECT "00000000" echo $b1c0f
-EXPECT "00000000" echo $b1c1f
-EXPECT "00000000" echo $b2c0f
-EXPECT "00000000" echo $b2c1f
-
-EXPECT "00000000" echo $b1c0dir
-EXPECT "00000000" echo $b1c1dir
-EXPECT "00000000" echo $b2c0dir
-EXPECT "00000000" echo $b2c1dir
-
-contains() {
- string="$1"
- substring="$2"
- var="-1"
- if test "${string#*$substring}" != "$string"
- then
- var="0" # $substring is in $string
- else
- var="1" # $substring is not in $string
- fi
- echo $var
-}
-
-var1=$(cat $M0/a/file 2>&1)
-var2="Input/output error"
-
-
-EXPECT "0" contains "$var1" "$var2"
+EXPECT "00000000|^$" echo $b1c0f
+EXPECT "00000000|^$" echo $b1c1f
+EXPECT "00000000|^$" echo $b2c0f
+EXPECT "00000000|^$" echo $b2c1f
+
+EXPECT "00000000|^$" echo $b1c0dir
+EXPECT "00000000|^$" echo $b1c1dir
+EXPECT "00000000|^$" echo $b2c0dir
+EXPECT "00000000|^$" echo $b2c1dir
## Finish up
TEST $CLI volume stop $V0;
diff --git a/tests/volume.rc b/tests/volume.rc
index 5e2f95e..9a06687 100644
--- a/tests/volume.rc
+++ b/tests/volume.rc
@@ -169,7 +169,7 @@ function check_option_help_presence {
function afr_get_changelog_xattr {
local file=$1
local xkey=$2
- getfattr -n $xkey -e hex $file 2>/dev/null | grep "client-" | cut -f2 -d'='
+ getfattr -n $xkey -e hex $file 2>/dev/null | grep "$xkey" | cut -f2 -d'='
}
function afr_get_pending_heal_count {
diff --git a/xlators/cluster/afr/src/Makefile.am b/xlators/cluster/afr/src/Makefile.am
index 35d18a6..ea5a90a 100644
--- a/xlators/cluster/afr/src/Makefile.am
+++ b/xlators/cluster/afr/src/Makefile.am
@@ -2,24 +2,26 @@ xlator_LTLIBRARIES = afr.la pump.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
afr_common_source = afr-dir-read.c afr-dir-write.c afr-inode-read.c \
- afr-inode-write.c afr-open.c afr-transaction.c afr-self-heal-data.c \
- afr-self-heal-common.c afr-self-heal-metadata.c afr-self-heal-entry.c \
- afr-self-heal-algorithm.c afr-lk-common.c afr-self-heald.c \
+ afr-inode-write.c afr-open.c afr-transaction.c afr-lk-common.c \
+ afr-read-txn.c \
$(top_builddir)/xlators/lib/src/libxlator.c
+AFR_SELFHEAL_SOURCES = afr-self-heal-common.c afr-self-heal-data.c \
+ afr-self-heal-entry.c afr-self-heal-metadata.c afr-self-heald.c \
+ afr-self-heal-name.c
+
afr_la_LDFLAGS = -module -avoid-version
-afr_la_SOURCES = $(afr_common_source) afr.c
+afr_la_SOURCES = $(afr_common_source) $(AFR_SELFHEAL_SOURCES) afr.c
afr_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
pump_la_LDFLAGS = -module -avoid-version
-pump_la_SOURCES = $(afr_common_source) pump.c
+pump_la_SOURCES = $(afr_common_source) $(AFR_SELFHEAL_SOURCES) pump.c
pump_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h \
- afr-dir-read.h afr-dir-write.h afr-self-heal.h afr-self-heal-common.h \
- afr-self-heal-algorithm.h pump.h afr-mem-types.h afr-common.c \
- afr-self-heald.h $(top_builddir)/xlators/lib/src/libxlator.h \
- $(top_builddir)/glusterfsd/src/glusterfsd.h
+ afr-dir-read.h afr-dir-write.h afr-self-heal.h afr-mem-types.h \
+ afr-common.c afr-self-heald.h pump.h \
+ $(top_builddir)/xlators/lib/src/libxlator.h
AM_CPPFLAGS = $(GF_CPPFLAGS) \
-I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/xlators/lib/src \
@@ -31,7 +33,6 @@ CLEANFILES =
uninstall-local:
rm -f $(DESTDIR)$(xlatordir)/replicate.so
- rm -f $(DESTDIR)$(xlatordir)/pump.so
install-data-hook:
ln -sf afr.so $(DESTDIR)$(xlatordir)/replicate.so
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 224d305..2bab0f8 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -45,787 +45,797 @@
#include "afr-dir-write.h"
#include "afr-transaction.h"
#include "afr-self-heal.h"
-#include "afr-self-heal-common.h"
#include "afr-self-heald.h"
-#include "pump.h"
-#define AFR_ICTX_OPENDIR_DONE_MASK 0x0000000100000000ULL
-#define AFR_ICTX_READ_CHILD_MASK 0x00000000FFFFFFFFULL
-#define AFR_STATISTICS_HISTORY_SIZE 50
-int
-afr_lookup_done_success_action (call_frame_t *frame, xlator_t *this,
- gf_boolean_t fail_conflict);
-void
-afr_children_copy (int32_t *dst, int32_t *src, unsigned int child_count)
-{
- int i = 0;
- for (i = 0; i < child_count; i++)
- dst[i] = src[i];
-}
-
-void
-afr_xattr_req_prepare (xlator_t *this, dict_t *xattr_req, const char *path)
+call_frame_t *
+afr_copy_frame (call_frame_t *base)
{
- int i = 0;
- afr_private_t *priv = NULL;
- int ret = 0;
+ afr_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ int op_errno = 0;
- priv = this->private;
+ frame = copy_frame (base);
+ if (!frame)
+ return NULL;
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local) {
+ AFR_STACK_DESTROY (frame);
+ return NULL;
+ }
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_set_uint64 (xattr_req, priv->pending_key[i],
- 3 * sizeof(int32_t));
- if (ret < 0)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: Unable to set dict value for %s",
- path, priv->pending_key[i]);
- /* 3 = data+metadata+entry */
- }
- ret = dict_set_int32 (xattr_req, GF_GFIDLESS_LOOKUP, 1);
- if (ret) {
- gf_log (this->name, GF_LOG_DEBUG, "%s: failed to set gfidless "
- "lookup", path);
- }
+ return frame;
}
+/*
+ * INODE CTX 64-bit VALUE FORMAT FOR SMALL (<= 16) SUBVOL COUNTS:
+ *
+ * |<---------- 64bit ------------>|
+ * 63 32 31 16 15 0
+ * | EVENT_GEN | DATA | METADATA |
+ *
+ *
+ * METADATA (bit-0 .. bit-15): bitmap representing subvolumes from which
+ * metadata can be attempted to be read.
+ *
+ * bit-0 => priv->subvolumes[0]
+ * bit-1 => priv->subvolumes[1]
+ * ... etc. till bit-15
+ *
+ * DATA (bit-16 .. bit-31): bitmap representing subvolumes from which data
+ * can be attempted to be read.
+ *
+ * bit-16 => priv->subvolumes[0]
+ * bit-17 => priv->subvolumes[1]
+ * ... etc. till bit-31
+ *
+ * EVENT_GEN (bit-32 .. bit-63): event generation (i.e priv->event_generation)
+ * when DATA and METADATA was last updated.
+ *
+ * If EVENT_GEN is < priv->event_generation,
+ * or is 0, it means afr_inode_refresh() needs
+ * to be called to recalculate the bitmaps.
+ */
+
int
-afr_lookup_xattr_req_prepare (afr_local_t *local, xlator_t *this,
- dict_t *xattr_req, loc_t *loc, void **gfid_req)
+__afr_inode_read_subvol_get_small (inode_t *inode, xlator_t *this,
+ unsigned char *data, unsigned char *metadata,
+ int *event_p)
{
- int ret = -ENOMEM;
+ afr_private_t *priv = NULL;
+ int ret = -1;
+ uint16_t datamap = 0;
+ uint16_t metadatamap = 0;
+ uint32_t event = 0;
+ uint64_t val = 0;
+ int i = 0;
- GF_ASSERT (gfid_req);
+ priv = this->private;
- *gfid_req = NULL;
- local->xattr_req = dict_new ();
- if (!local->xattr_req)
- goto out;
- if (xattr_req)
- dict_copy (xattr_req, local->xattr_req);
+ ret = __inode_ctx_get (inode, this, &val);
+ if (ret < 0)
+ return ret;
- afr_xattr_req_prepare (this, local->xattr_req, loc->path);
- ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_INODELK_COUNT, 0);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: Unable to set dict value for %s",
- loc->path, GLUSTERFS_INODELK_COUNT);
- }
- ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_ENTRYLK_COUNT, 0);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: Unable to set dict value for %s",
- loc->path, GLUSTERFS_ENTRYLK_COUNT);
- }
+ metadatamap = (val & 0x000000000000ffff);
+ datamap = (val & 0x00000000ffff0000) >> 16;
+ event = (val & 0xffffffff00000000) >> 32;
- ret = dict_set_uint32 (local->xattr_req, GLUSTERFS_PARENT_ENTRYLK, 0);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: Unable to set dict value for %s",
- loc->path, GLUSTERFS_PARENT_ENTRYLK);
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (metadata)
+ metadata[i] = (metadatamap >> i) & 1;
+ if (data)
+ data[i] = (datamap >> i) & 1;
+ }
- ret = dict_get_ptr (local->xattr_req, "gfid-req", gfid_req);
- if (ret) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: failed to get the gfid from dict", loc->path);
- *gfid_req = NULL;
- } else {
- if (loc->parent != NULL)
- dict_del (local->xattr_req, "gfid-req");
- }
- ret = 0;
-out:
- return ret;
+ if (event_p)
+ *event_p = event;
+ return ret;
}
-void
-afr_lookup_save_gfid (uuid_t dst, void* new, const loc_t *loc)
-{
- inode_t *inode = NULL;
-
- inode = loc->inode;
- if (inode && !uuid_is_null (inode->gfid))
- uuid_copy (dst, inode->gfid);
- else if (!uuid_is_null (loc->gfid))
- uuid_copy (dst, loc->gfid);
- else if (new && !uuid_is_null (new))
- uuid_copy (dst, new);
-}
int
-afr_errno_count (int32_t *children, int *child_errno,
- unsigned int child_count, int32_t op_errno)
-{
- int i = 0;
- int errno_count = 0;
- int child = 0;
+__afr_inode_read_subvol_set_small (inode_t *inode, xlator_t *this,
+ unsigned char *data, unsigned char *metadata,
+ int event)
+{
+ afr_private_t *priv = NULL;
+ uint16_t datamap = 0;
+ uint16_t metadatamap = 0;
+ uint64_t val = 0;
+ int i = 0;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (data[i])
+ datamap |= (1 << i);
+ if (metadata[i])
+ metadatamap |= (1 << i);
+ }
- for (i = 0; i < child_count; i++) {
- if (children) {
- child = children[i];
- if (child == -1)
- break;
- } else {
- child = i;
- }
- if (child_errno[child] == op_errno)
- errno_count++;
- }
- return errno_count;
-}
+ val = ((uint64_t) metadatamap) |
+ (((uint64_t) datamap) << 16) |
+ (((uint64_t) event) << 32);
-int32_t
-afr_set_dict_gfid (dict_t *dict, uuid_t gfid)
-{
- int ret = 0;
- uuid_t *pgfid = NULL;
+ return __inode_ctx_set (inode, this, &val);
+}
- GF_ASSERT (gfid);
- pgfid = GF_CALLOC (1, sizeof (uuid_t), gf_common_mt_char);
- if (!pgfid) {
- ret = -1;
- goto out;
- }
+int
+__afr_inode_read_subvol_reset_small (inode_t *inode, xlator_t *this)
+{
+ int ret = -1;
+ uint16_t datamap = 0;
+ uint16_t metadatamap = 0;
+ uint32_t event = 0;
+ uint64_t val = 0;
- uuid_copy (*pgfid, gfid);
+ ret = __inode_ctx_get (inode, this, &val);
+ (void) ret;
- ret = dict_set_dynptr (dict, "gfid-req", pgfid, sizeof (uuid_t));
- if (ret)
- gf_log (THIS->name, GF_LOG_ERROR, "gfid set failed");
+ metadatamap = (val & 0x000000000000ffff) >> 0;
+ datamap = (val & 0x00000000ffff0000) >> 16;
+ event = 0;
-out:
- if (ret && pgfid)
- GF_FREE (pgfid);
+ val = ((uint64_t) metadatamap) |
+ (((uint64_t) datamap) << 16) |
+ (((uint64_t) event) << 32);
- return ret;
+ return __inode_ctx_set (inode, this, &val);
}
-void
-afr_inode_ctx_destroy (afr_inode_ctx_t *ctx)
-{
- if (!ctx)
- return;
- GF_FREE (ctx->fresh_children);
- GF_FREE (ctx);
-}
-afr_inode_ctx_t*
-__afr_inode_ctx_get (inode_t *inode, xlator_t *this)
+int
+__afr_inode_read_subvol_get (inode_t *inode, xlator_t *this,
+ unsigned char *data, unsigned char *metadata,
+ int *event_p)
{
- int ret = 0;
- uint64_t ctx_addr = 0;
- afr_inode_ctx_t *ctx = NULL;
- afr_private_t *priv = NULL;
+ afr_private_t *priv = NULL;
+ int ret = -1;
- priv = this->private;
- ret = __inode_ctx_get (inode, this, &ctx_addr);
- if (ret < 0)
- ctx_addr = 0;
- if (ctx_addr != 0) {
- ctx = (afr_inode_ctx_t*) (long) ctx_addr;
- goto out;
- }
- ctx = GF_CALLOC (1, sizeof (*ctx),
- gf_afr_mt_inode_ctx_t);
- if (!ctx)
- goto fail;
- ctx->fresh_children = GF_CALLOC (priv->child_count,
- sizeof (*ctx->fresh_children),
- gf_afr_mt_int32_t);
- if (!ctx->fresh_children)
- goto fail;
- ret = __inode_ctx_put (inode, this, (uint64_t)ctx);
- if (ret) {
- gf_log_callingfn (this->name, GF_LOG_ERROR, "failed to "
- "set the inode ctx (%s)",
- uuid_utoa (inode->gfid));
- goto fail;
- }
+ priv = this->private;
-out:
- return ctx;
+ if (priv->child_count <= 16)
+ ret = __afr_inode_read_subvol_get_small (inode, this, data,
+ metadata, event_p);
+ else
+ /* TBD: allocate structure with array and read from it */
+ ret = -1;
-fail:
- afr_inode_ctx_destroy (ctx);
- return NULL;
+ return ret;
}
-afr_inode_ctx_t*
-afr_inode_ctx_get (inode_t *inode, xlator_t *this)
+
+int
+__afr_inode_read_subvol_set (inode_t *inode, xlator_t *this, unsigned char *data,
+ unsigned char *metadata, int event)
{
- afr_inode_ctx_t *ctx = NULL;
+ afr_private_t *priv = NULL;
+ int ret = -1;
- LOCK (&inode->lock);
- {
- ctx = __afr_inode_ctx_get (inode, this);
- }
- UNLOCK (&inode->lock);
- return ctx;
+ priv = this->private;
+
+ if (priv->child_count <= 16)
+ ret = __afr_inode_read_subvol_set_small (inode, this, data,
+ metadata, event);
+ else
+ ret = -1;
+
+ return ret;
}
-void
-afr_inode_get_ctx_params (xlator_t *this, inode_t *inode,
- afr_inode_params_t *params)
+
+int
+__afr_inode_read_subvol_reset (inode_t *inode, xlator_t *this)
{
- GF_ASSERT (inode);
- GF_ASSERT (params);
+ afr_private_t *priv = NULL;
+ int ret = -1;
- afr_inode_ctx_t *ctx = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
+ priv = this->private;
- priv = this->private;
- LOCK (&inode->lock);
- {
- ctx = __afr_inode_ctx_get (inode, this);
- if (!ctx)
- goto unlock;
- switch (params->op) {
- case AFR_INODE_GET_READ_CTX:
- fresh_children = params->u.read_ctx.children;
- read_child = (int32_t)(ctx->masks &
- AFR_ICTX_READ_CHILD_MASK);
- params->u.read_ctx.read_child = read_child;
- if (!fresh_children)
- goto unlock;
- for (i = 0; i < priv->child_count; i++)
- fresh_children[i] = ctx->fresh_children[i];
- break;
- case AFR_INODE_GET_OPENDIR_DONE:
- params->u.value = _gf_false;
- if (ctx->masks & AFR_ICTX_OPENDIR_DONE_MASK)
- params->u.value = _gf_true;
- break;
- default:
- GF_ASSERT (0);
- break;
- }
- }
-unlock:
- UNLOCK (&inode->lock);
+ if (priv->child_count <= 16)
+ ret = __afr_inode_read_subvol_reset_small (inode, this);
+ else
+ ret = -1;
+
+ return ret;
}
-gf_boolean_t
-afr_is_split_brain (xlator_t *this, inode_t *inode)
+
+int
+afr_inode_read_subvol_get (inode_t *inode, xlator_t *this, unsigned char *data,
+ unsigned char *metadata, int *event_p)
{
- afr_inode_ctx_t *ctx = NULL;
- gf_boolean_t spb = _gf_false;
+ int ret = -1;
- ctx = afr_inode_ctx_get (inode, this);
- if (!ctx)
- goto out;
- if ((ctx->mdata_spb == SPB) || (ctx->data_spb == SPB))
- spb = _gf_true;
-out:
- return spb;
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_read_subvol_get (inode, this, data,
+ metadata, event_p);
+ }
+ UNLOCK(&inode->lock);
+
+ return ret;
}
-gf_boolean_t
-afr_is_opendir_done (xlator_t *this, inode_t *inode)
+
+int
+afr_inode_read_subvol_set (inode_t *inode, xlator_t *this, unsigned char *data,
+ unsigned char *metadata, int event)
{
- afr_inode_params_t params = {0};
+ int ret = -1;
+
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_read_subvol_set (inode, this, data, metadata,
+ event);
+ }
+ UNLOCK(&inode->lock);
- params.op = AFR_INODE_GET_OPENDIR_DONE;
- afr_inode_get_ctx_params (this, inode, &params);
- return params.u.value;
+ return ret;
}
-int32_t
-afr_inode_get_read_ctx (xlator_t *this, inode_t *inode, int32_t *fresh_children)
+
+int
+afr_inode_read_subvol_reset (inode_t *inode, xlator_t *this)
{
- afr_inode_params_t params = {0};
+ int ret = -1;
+
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_read_subvol_reset (inode, this);
+ }
+ UNLOCK(&inode->lock);
- params.op = AFR_INODE_GET_READ_CTX;
- params.u.read_ctx.children = fresh_children;
- afr_inode_get_ctx_params (this, inode, &params);
- return params.u.read_ctx.read_child;
+ return ret;
}
-void
-afr_inode_ctx_set_read_child (afr_inode_ctx_t *ctx, int32_t read_child)
-{
- uint64_t remaining_mask = 0;
- uint64_t mask = 0;
- remaining_mask = (~AFR_ICTX_READ_CHILD_MASK & ctx->masks);
- mask = (AFR_ICTX_READ_CHILD_MASK & read_child);
- ctx->masks = remaining_mask | mask;
-}
+int
+afr_accused_fill (xlator_t *this, dict_t *xdata, unsigned char *accused,
+ afr_transaction_type type)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int idx = afr_index_for_transaction_type (type);
+ void *pending_raw = NULL;
+ int pending[3];
+ int ret = 0;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ ret = dict_get_ptr (xdata, priv->pending_key[i],
+ &pending_raw);
+ if (ret) /* no pending flags */
+ continue;
+ memcpy (pending, pending_raw, sizeof(pending));
+
+ if (ntoh32 (pending[idx]))
+ accused[i] = 1;
+ }
-void
-afr_inode_ctx_set_read_ctx (afr_inode_ctx_t *ctx, int32_t read_child,
- int32_t *fresh_children, int32_t child_count)
-{
- int i = 0;
-
- afr_inode_ctx_set_read_child (ctx, read_child);
- for (i = 0; i < child_count; i++) {
- if (fresh_children)
- ctx->fresh_children[i] = fresh_children[i];
- else
- ctx->fresh_children[i] = -1;
- }
+ return 0;
}
-void
-afr_inode_ctx_rm_stale_children (afr_inode_ctx_t *ctx, int32_t *stale_children,
- int32_t child_count)
+
+int
+afr_accuse_smallfiles (xlator_t *this, struct afr_reply *replies,
+ unsigned char *data_accused)
{
- int i = 0;
- int32_t read_child = -1;
+ int i = 0;
+ afr_private_t *priv = NULL;
+ uint64_t maxsize = 0;
- GF_ASSERT (stale_children);
- for (i = 0; i < child_count; i++) {
- if (stale_children[i] == -1)
- break;
- afr_children_rm_child (ctx->fresh_children,
- stale_children[i], child_count);
- }
- read_child = (int32_t)(ctx->masks & AFR_ICTX_READ_CHILD_MASK);
- if (!afr_is_child_present (ctx->fresh_children, child_count,
- read_child))
- afr_inode_ctx_set_read_child (ctx, ctx->fresh_children[0]);
-}
+ priv = this->private;
-void
-afr_inode_ctx_set_opendir_done (afr_inode_ctx_t *ctx)
-{
- uint64_t remaining_mask = 0;
- uint64_t mask = 0;
+ for (i = 0; i < priv->child_count; i++) {
+ if (data_accused[i])
+ continue;
+ if (replies[i].poststat.ia_size > maxsize)
+ maxsize = replies[i].poststat.ia_size;
+ }
- remaining_mask = (~AFR_ICTX_OPENDIR_DONE_MASK & ctx->masks);
- mask = (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_OPENDIR_DONE_MASK);
- ctx->masks = remaining_mask | mask;
+ for (i = 0; i < priv->child_count; i++) {
+ if (data_accused[i])
+ continue;
+ if (replies[i].poststat.ia_size < maxsize)
+ data_accused[i] = 1;
+ }
+
+ return 0;
}
-void
-afr_inode_set_ctx_params (xlator_t *this, inode_t *inode,
- afr_inode_params_t *params)
-{
- GF_ASSERT (inode);
- GF_ASSERT (params);
- afr_inode_ctx_t *ctx = NULL;
- afr_private_t *priv = NULL;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
- int32_t *stale_children = NULL;
+int
+afr_replies_interpret (call_frame_t *frame, xlator_t *this, inode_t *inode)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ struct afr_reply *replies = NULL;
+ int event_generation = 0;
+ int i = 0;
+ unsigned char *data_accused = NULL;
+ unsigned char *metadata_accused = NULL;
+ unsigned char *data_readable = NULL;
+ unsigned char *metadata_readable = NULL;
+ int ret = 0;
- priv = this->private;
- LOCK (&inode->lock);
- {
- ctx = __afr_inode_ctx_get (inode, this);
- if (!ctx)
- goto unlock;
- switch (params->op) {
- case AFR_INODE_SET_READ_CTX:
- read_child = params->u.read_ctx.read_child;
- fresh_children = params->u.read_ctx.children;
- afr_inode_ctx_set_read_ctx (ctx, read_child,
- fresh_children,
- priv->child_count);
- break;
- case AFR_INODE_RM_STALE_CHILDREN:
- stale_children = params->u.read_ctx.children;
- afr_inode_ctx_rm_stale_children (ctx,
- stale_children,
- priv->child_count);
- break;
- case AFR_INODE_SET_OPENDIR_DONE:
- afr_inode_ctx_set_opendir_done (ctx);
- break;
- default:
- GF_ASSERT (0);
- break;
- }
- }
-unlock:
- UNLOCK (&inode->lock);
-}
+ local = frame->local;
+ priv = this->private;
+ replies = local->replies;
+ event_generation = local->event_generation;
+
+ data_accused = alloca0 (priv->child_count);
+ data_readable = alloca0 (priv->child_count);
+ metadata_accused = alloca0 (priv->child_count);
+ metadata_readable = alloca0 (priv->child_count);
+
+ for (i = 0; i < priv->child_count; i++) {
+ data_readable[i] = 1;
+ metadata_readable[i] = 1;
+ }
-void
-afr_set_split_brain (xlator_t *this, inode_t *inode, afr_spb_state_t mdata_spb,
- afr_spb_state_t data_spb)
-{
- afr_inode_ctx_t *ctx = NULL;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid) {
+ data_readable[i] = 0;
+ metadata_readable[i] = 0;
+ continue;
+ }
+
+ if (replies[i].op_ret == -1) {
+ data_readable[i] = 0;
+ metadata_readable[i] = 0;
+ continue;
+ }
+
+ afr_accused_fill (this, replies[i].xdata, data_accused,
+ (inode->ia_type == IA_IFDIR) ?
+ AFR_ENTRY_TRANSACTION : AFR_DATA_TRANSACTION);
+
+ afr_accused_fill (this, replies[i].xdata,
+ metadata_accused, AFR_METADATA_TRANSACTION);
+
+ }
- ctx = afr_inode_ctx_get (inode, this);
- if (mdata_spb != DONT_KNOW)
- ctx->mdata_spb = mdata_spb;
- if (data_spb != DONT_KNOW)
- ctx->data_spb = data_spb;
+ if (inode->ia_type != IA_IFDIR)
+ afr_accuse_smallfiles (this, replies, data_accused);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (data_accused[i]) {
+ data_readable[i] = 0;
+ ret = 1;
+ }
+ if (metadata_accused[i]) {
+ metadata_readable[i] = 0;
+ ret = 1;
+ }
+ }
+
+ afr_inode_read_subvol_set (inode, this, data_readable,
+ metadata_readable, event_generation);
+ return ret;
}
-void
-afr_set_opendir_done (xlator_t *this, inode_t *inode)
-{
- afr_inode_params_t params = {0};
- params.op = AFR_INODE_SET_OPENDIR_DONE;
- afr_inode_set_ctx_params (this, inode, &params);
+
+int
+afr_refresh_selfheal_done (int ret, call_frame_t *heal, void *opaque)
+{
+ if (heal)
+ STACK_DESTROY (heal->root);
+ return 0;
}
-void
-afr_inode_set_read_ctx (xlator_t *this, inode_t *inode, int32_t read_child,
- int32_t *fresh_children)
+int
+afr_inode_refresh_err (call_frame_t *frame, xlator_t *this)
{
- afr_inode_params_t params = {0};
- afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int err = 0;
- priv = this->private;
- GF_ASSERT (read_child >= 0);
- GF_ASSERT (fresh_children);
- GF_ASSERT (afr_is_child_present (fresh_children, priv->child_count,
- read_child));
-
- params.op = AFR_INODE_SET_READ_CTX;
- params.u.read_ctx.read_child = read_child;
- params.u.read_ctx.children = fresh_children;
- afr_inode_set_ctx_params (this, inode, &params);
+ local = frame->local;
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].valid && !local->replies[i].op_ret) {
+ err = 0;
+ goto ret;
+ }
+ }
+
+ err = afr_final_errno (local, priv);
+ret:
+ return -err;
}
-void
-afr_inode_rm_stale_children (xlator_t *this, inode_t *inode,
- int32_t *stale_children)
+
+int
+afr_refresh_selfheal_wrap (void *opaque)
{
- afr_inode_params_t params = {0};
+ call_frame_t *frame = opaque;
+ afr_local_t *local = NULL;
+ xlator_t *this = NULL;
+ int err = 0;
+
+ local = frame->local;
+ this = frame->this;
- GF_ASSERT (stale_children);
+ afr_selfheal (frame->this, local->refreshinode->gfid);
- params.op = AFR_INODE_RM_STALE_CHILDREN;
- params.u.read_ctx.children = stale_children;
- afr_inode_set_ctx_params (this, inode, &params);
+ afr_selfheal_unlocked_discover (frame, local->refreshinode,
+ local->refreshinode->gfid,
+ local->replies);
+
+ afr_replies_interpret (frame, this, local->refreshinode);
+
+ err = afr_inode_refresh_err (frame, this);
+
+ afr_replies_wipe (local, this->private);
+
+ local->refreshfn (frame, this, err);
+
+ return 0;
}
+
gf_boolean_t
-afr_is_source_child (int32_t *sources, int32_t child_count, int32_t child)
+afr_selfheal_enabled (xlator_t *this)
{
- gf_boolean_t source_xattrs = _gf_false;
+ afr_private_t *priv = NULL;
+ gf_boolean_t data = _gf_false;
- GF_ASSERT (child < child_count);
+ priv = this->private;
- if ((child >= 0) && (child < child_count) &&
- sources[child]) {
- source_xattrs = _gf_true;
- }
- return source_xattrs;
+ gf_string2boolean (priv->data_self_heal, &data);
+
+ return data || priv->metadata_self_heal || priv->entry_self_heal;
}
-gf_boolean_t
-afr_is_child_present (int32_t *success_children, int32_t child_count,
- int32_t child)
+
+
+int
+afr_inode_refresh_done (call_frame_t *frame, xlator_t *this)
{
- gf_boolean_t success_child = _gf_false;
- int i = 0;
+ call_frame_t *heal = NULL;
+ afr_local_t *local = NULL;
+ int ret = 0;
+ int err = 0;
- GF_ASSERT (child < child_count);
+ local = frame->local;
- for (i = 0; i < child_count; i++) {
- if (success_children[i] == -1)
- break;
- if (child == success_children[i]) {
- success_child = _gf_true;
- break;
- }
- }
- return success_child;
+ ret = afr_replies_interpret (frame, this, local->refreshinode);
+
+ err = afr_inode_refresh_err (frame, this);
+
+ afr_replies_wipe (local, this->private);
+
+ if (ret && afr_selfheal_enabled (this)) {
+ heal = copy_frame (frame);
+ if (heal)
+ heal->root->pid = -1;
+ ret = synctask_new (this->ctx->env, afr_refresh_selfheal_wrap,
+ afr_refresh_selfheal_done, heal, frame);
+ if (ret)
+ goto refresh_done;
+ } else {
+ refresh_done:
+ local->refreshfn (frame, this, err);
+ }
+
+ return 0;
}
-gf_boolean_t
-afr_is_read_child (int32_t *success_children, int32_t *sources,
- int32_t child_count, int32_t child)
+
+int
+afr_inode_refresh_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *par)
{
- gf_boolean_t success_child = _gf_false;
- gf_boolean_t source = _gf_false;
+ afr_local_t *local = NULL;
+ int call_child = (long) cookie;
+ int call_count = 0;
- if (child < 0) {
- return _gf_false;
- }
+ local = frame->local;
- GF_ASSERT (success_children);
- GF_ASSERT (child_count > 0);
+ local->replies[call_child].valid = 1;
+ local->replies[call_child].op_ret = op_ret;
+ local->replies[call_child].op_errno = op_errno;
+ if (op_ret != -1) {
+ local->replies[call_child].poststat = *buf;
+ local->replies[call_child].postparent = *par;
+ local->replies[call_child].xdata = dict_ref (xdata);
+ }
- success_child = afr_is_child_present (success_children, child_count,
- child);
- if (!success_child)
- goto out;
- if (NULL == sources) {
- source = _gf_true;
- goto out;
- }
- source = afr_is_source_child (sources, child_count, child);
-out:
- return (success_child && source);
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ afr_inode_refresh_done (frame, this);
+
+ return 0;
}
-int32_t
-afr_hash_child (int32_t *success_children, int32_t child_count,
- unsigned int hmode, uuid_t gfid)
+
+int
+afr_inode_refresh_subvol (call_frame_t *frame, xlator_t *this, int i,
+ inode_t *inode, dict_t *xdata)
{
- uuid_t gfid_copy = {0,};
- pid_t pid;
+ loc_t loc = {0, };
+ afr_private_t *priv = NULL;
- if (!hmode) {
- return -1;
- }
+ priv = this->private;
- if (gfid) {
- uuid_copy(gfid_copy,gfid);
- }
- if (hmode > 1) {
- /*
- * Why getpid? Because it's one of the cheapest calls
- * available - faster than gethostname etc. - and returns a
- * constant-length value that's sure to be shorter than a UUID.
- * It's still very unlikely to be the same across clients, so
- * it still provides good mixing. We're not trying for
- * perfection here. All we need is a low probability that
- * multiple clients won't converge on the same subvolume.
- */
- pid = getpid();
- memcpy (gfid_copy, &pid, sizeof(pid));
- }
+ loc.inode = inode;
+ uuid_copy (loc.gfid, inode->gfid);
- return SuperFastHash((char *)gfid_copy,
- sizeof(gfid_copy)) % child_count;
+ STACK_WIND_COOKIE (frame, afr_inode_refresh_subvol_cbk,
+ (void *) (long) i, priv->children[i],
+ priv->children[i]->fops->lookup, &loc, xdata);
+ return 0;
}
-/* If sources is NULL the xattrs are assumed to be of source for all
- * success_children.
- */
+
int
-afr_select_read_child_from_policy (int32_t *success_children,
- int32_t child_count, int32_t prev_read_child,
- int32_t config_read_child, int32_t *sources,
- unsigned int hmode, uuid_t gfid)
+afr_inode_refresh_do (call_frame_t *frame, xlator_t *this)
{
- int32_t read_child = -1;
- int i = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int i = 0;
+ dict_t *xdata = NULL;
- GF_ASSERT (success_children);
+ priv = this->private;
+ local = frame->local;
- read_child = config_read_child;
- if (afr_is_read_child (success_children, sources, child_count,
- read_child))
- goto out;
+ afr_replies_wipe (local, priv);
- read_child = prev_read_child;
- if (afr_is_read_child (success_children, sources, child_count,
- read_child))
- goto out;
+ xdata = dict_new ();
+ if (!xdata) {
+ afr_inode_refresh_done (frame, this);
+ return 0;
+ }
- read_child = afr_hash_child (success_children, child_count,
- hmode, gfid);
- if (afr_is_read_child (success_children, sources, child_count,
- read_child)) {
- goto out;
- }
+ if (afr_xattr_req_prepare (this, xdata) != 0) {
+ dict_unref (xdata);
+ afr_inode_refresh_done (frame, this);
+ return 0;
+ }
- for (i = 0; i < child_count; i++) {
- read_child = success_children[i];
- if (read_child < 0)
- break;
- if (afr_is_read_child (success_children, sources, child_count,
- read_child))
- goto out;
- }
- read_child = -1;
+ local->call_count = AFR_COUNT (local->child_up, priv->child_count);
-out:
- return read_child;
+ call_count = local->call_count;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->child_up[i])
+ continue;
+
+ afr_inode_refresh_subvol (frame, this, i, local->refreshinode,
+ xdata);
+
+ if (!--call_count)
+ break;
+ }
+
+ dict_unref (xdata);
+
+ return 0;
}
-/* This function should be used when all the success_children are sources
- */
-void
-afr_set_read_ctx_from_policy (xlator_t *this, inode_t *inode,
- int32_t *fresh_children, int32_t prev_read_child,
- int32_t config_read_child, uuid_t gfid)
+
+int
+afr_inode_refresh (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ afr_inode_refresh_cbk_t refreshfn)
{
- int read_child = -1;
- afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
- priv = this->private;
- read_child = afr_select_read_child_from_policy (fresh_children,
- priv->child_count,
- prev_read_child,
- config_read_child,
- NULL,
- priv->hash_mode, gfid);
- if (read_child >= 0)
- afr_inode_set_read_ctx (this, inode, read_child,
- fresh_children);
+ local = frame->local;
+
+ local->refreshfn = refreshfn;
+
+ if (local->refreshinode) {
+ inode_unref (local->refreshinode);
+ local->refreshinode = NULL;
+ }
+
+ local->refreshinode = inode_ref (inode);
+
+ afr_inode_refresh_do (frame, this);
+
+ return 0;
}
-/* afr_next_call_child ()
- * This is a common function used by all the read-type fops
- * This function should not be called with the inode's read_children array.
- * The fop's handler should make a copy of the inode's read_children,
- * preferred read_child into the local vars, because while this function is
- * in execution there is a chance for inode's read_ctx to change.
- */
-int32_t
-afr_next_call_child (int32_t *fresh_children, unsigned char *child_up,
- size_t child_count, int32_t *last_index,
- int32_t read_child)
+
+int
+afr_xattr_req_prepare (xlator_t *this, dict_t *xattr_req)
{
- int next_index = 0;
- int32_t next_call_child = -1;
+ int i = 0;
+ afr_private_t *priv = NULL;
+ int ret = 0;
- GF_ASSERT (last_index);
+ priv = this->private;
- next_index = *last_index;
-retry:
- next_index++;
- if ((next_index >= child_count) ||
- (fresh_children[next_index] == -1))
- goto out;
- if ((fresh_children[next_index] == read_child) ||
- (!child_up[fresh_children[next_index]]))
- goto retry;
- *last_index = next_index;
- next_call_child = fresh_children[next_index];
-out:
- return next_call_child;
+ for (i = 0; i < priv->child_count; i++) {
+ ret = dict_set_uint64 (xattr_req, priv->pending_key[i],
+ AFR_NUM_CHANGE_LOGS * sizeof(int));
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_WARNING,
+ "Unable to set dict value for %s",
+ priv->pending_key[i]);
+ /* 3 = data+metadata+entry */
+ }
+ ret = dict_set_uint64 (xattr_req, AFR_DIRTY,
+ AFR_NUM_CHANGE_LOGS * sizeof(int));
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "failed to set dirty "
+ "query flag");
+ }
+
+ return ret;
}
- /* This function should not be called with the inode's read_children array.
- * The fop's handler should make a copy of the inode's read_children,
- * preferred read_child into the local vars, because while this function is
- * in execution there is a chance for inode's read_ctx to change.
- */
-int32_t
-afr_get_call_child (xlator_t *this, unsigned char *child_up, int32_t read_child,
- int32_t *fresh_children,
- int32_t *call_child, int32_t *last_index)
+int
+afr_lookup_xattr_req_prepare (afr_local_t *local, xlator_t *this,
+ dict_t *xattr_req, loc_t *loc)
{
- int ret = 0;
- afr_private_t *priv = NULL;
- int i = 0;
-
- GF_ASSERT (child_up);
- GF_ASSERT (call_child);
- GF_ASSERT (last_index);
- GF_ASSERT (fresh_children);
+ int ret = -ENOMEM;
- if (read_child < 0) {
- ret = -EIO;
+ local->xattr_req = dict_new ();
+ if (!local->xattr_req)
goto out;
- }
- priv = this->private;
- *call_child = -1;
- *last_index = -1;
+ if (xattr_req)
+ dict_copy (xattr_req, local->xattr_req);
- if (child_up[read_child]) {
- *call_child = read_child;
- } else {
- for (i = 0; i < priv->child_count; i++) {
- if (fresh_children[i] == -1)
- break;
- if (child_up[fresh_children[i]]) {
- *call_child = fresh_children[i];
- ret = 0;
- break;
- }
- }
+ ret = afr_xattr_req_prepare (this, local->xattr_req);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: Unable to prepare xattr_req", loc->path);
+ }
- if (*call_child == -1) {
- ret = -ENOTCONN;
- goto out;
- }
+ ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_INODELK_COUNT, 0);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: Unable to set dict value for %s",
+ loc->path, GLUSTERFS_INODELK_COUNT);
+ }
+ ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_ENTRYLK_COUNT, 0);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: Unable to set dict value for %s",
+ loc->path, GLUSTERFS_ENTRYLK_COUNT);
+ }
- *last_index = i;
+ ret = dict_set_uint32 (local->xattr_req, GLUSTERFS_PARENT_ENTRYLK, 0);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: Unable to set dict value for %s",
+ loc->path, GLUSTERFS_PARENT_ENTRYLK);
}
+
+ ret = 0;
out:
- gf_log (this->name, GF_LOG_DEBUG, "Returning %d, call_child: %d, "
- "last_index: %d", ret, *call_child, *last_index);
return ret;
}
-void
-afr_reset_xattr (dict_t **xattr, unsigned int child_count)
+
+int
+afr_hash_child (inode_t *inode, int32_t child_count, int hashmode)
{
- unsigned int i = 0;
+ uuid_t gfid_copy = {0,};
+ pid_t pid;
- if (!xattr)
- goto out;
- for (i = 0; i < child_count; i++) {
- if (xattr[i]) {
- dict_unref (xattr[i]);
- xattr[i] = NULL;
- }
+ if (!hashmode) {
+ return -1;
}
-out:
- return;
-}
-void
-afr_xattr_array_destroy (dict_t **xattr, unsigned int child_count)
-{
- afr_reset_xattr (xattr, child_count);
- GF_FREE (xattr);
-}
+ if (inode) {
+ uuid_copy (gfid_copy, inode->gfid);
+ }
-void
-afr_local_sh_cleanup (afr_local_t *local, xlator_t *this)
-{
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
+ if (hashmode > 1) {
+ /*
+ * Why getpid? Because it's one of the cheapest calls
+ * available - faster than gethostname etc. - and returns a
+ * constant-length value that's sure to be shorter than a UUID.
+ * It's still very unlikely to be the same across clients, so
+ * it still provides good mixing. We're not trying for
+ * perfection here. All we need is a low probability that
+ * multiple clients won't converge on the same subvolume.
+ */
+ pid = getpid();
+ memcpy (gfid_copy, &pid, sizeof(pid));
+ }
- sh = &local->self_heal;
- priv = this->private;
+ return SuperFastHash((char *)gfid_copy,
+ sizeof(gfid_copy)) % child_count;
+}
- if (sh->data_sh_info && strcmp (sh->data_sh_info, ""))
- GF_FREE (sh->data_sh_info);
- if (sh->metadata_sh_info && strcmp (sh->metadata_sh_info, ""))
- GF_FREE (sh->metadata_sh_info);
+int
+afr_read_subvol_select_by_policy (inode_t *inode, xlator_t *this,
+ unsigned char *readable)
+{
+ afr_private_t *priv = NULL;
+ int read_subvol = -1;
+ int i = 0;
- GF_FREE (sh->buf);
+ priv = this->private;
- GF_FREE (sh->parentbufs);
+ /* first preference - explicitly specified or local subvolume */
+ if (priv->read_child >= 0 && readable[priv->read_child])
+ return priv->read_child;
- if (sh->inode)
- inode_unref (sh->inode);
+ /* second preference - use hashed mode */
+ read_subvol = afr_hash_child (inode, priv->child_count,
+ priv->hash_mode);
+ if (read_subvol >= 0 && readable[read_subvol])
+ return read_subvol;
- afr_xattr_array_destroy (sh->xattr, priv->child_count);
+ for (i = 0; i < priv->child_count; i++) {
+ if (readable[i])
+ return i;
+ }
- GF_FREE (sh->child_errno);
+ /* no readable subvolumes, either split brain or all subvols down */
- afr_matrix_cleanup (sh->pending_matrix, priv->child_count);
- afr_matrix_cleanup (sh->delta_matrix, priv->child_count);
+ return -1;
+}
- GF_FREE (sh->sources);
- GF_FREE (sh->success);
+int
+afr_inode_read_subvol_type_get (inode_t *inode, xlator_t *this,
+ unsigned char *readable, int *event_p,
+ int type)
+{
+ int ret = -1;
- GF_FREE (sh->locked_nodes);
+ if (type == AFR_METADATA_TRANSACTION)
+ ret = afr_inode_read_subvol_get (inode, this, 0, readable,
+ event_p);
+ else
+ ret = afr_inode_read_subvol_get (inode, this, readable, 0,
+ event_p);
+ return ret;
+}
- if (sh->healing_fd) {
- fd_unref (sh->healing_fd);
- sh->healing_fd = NULL;
- }
- GF_FREE ((char *)sh->linkname);
+int
+afr_read_subvol_get (inode_t *inode, xlator_t *this, int *subvol_p,
+ int *event_p, afr_transaction_type type)
+{
+ afr_private_t *priv = NULL;
+ unsigned char *data_readable = NULL;
+ unsigned char *metadata_readable = NULL;
+ unsigned char *readable = NULL;
+ unsigned char *intersection = NULL;
+ int subvol = -1;
+ int event = 0;
- GF_FREE (sh->success_children);
+ priv = this->private;
- GF_FREE (sh->fresh_children);
+ readable = alloca0 (priv->child_count);
+ data_readable = alloca0 (priv->child_count);
+ metadata_readable = alloca0 (priv->child_count);
+ intersection = alloca0 (priv->child_count);
- GF_FREE (sh->fresh_parent_dirs);
+ afr_inode_read_subvol_type_get (inode, this, readable, &event, type);
- loc_wipe (&sh->parent_loc);
- loc_wipe (&sh->lookup_loc);
+ afr_inode_read_subvol_get (inode, this, data_readable, metadata_readable,
+ &event);
- GF_FREE (sh->checksum);
+ AFR_INTERSECT (intersection, data_readable, metadata_readable,
+ priv->child_count);
- GF_FREE (sh->write_needed);
- if (sh->healing_fd)
- fd_unref (sh->healing_fd);
+ if (AFR_COUNT (intersection, priv->child_count) > 0)
+ subvol = afr_read_subvol_select_by_policy (inode, this,
+ intersection);
+ else
+ subvol = afr_read_subvol_select_by_policy (inode, this,
+ readable);
+ if (subvol_p)
+ *subvol_p = subvol;
+ if (event_p)
+ *event_p = event;
+ return subvol;
}
@@ -838,8 +848,6 @@ afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this)
priv = this->private;
afr_matrix_cleanup (local->pending, priv->child_count);
- afr_matrix_cleanup (local->transaction.txn_changelog,
- priv->child_count);
GF_FREE (local->internal_lock.locked_nodes);
@@ -860,7 +868,25 @@ afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this)
loc_wipe (&local->transaction.parent_loc);
loc_wipe (&local->transaction.new_parent_loc);
- GF_FREE (local->transaction.postop_piggybacked);
+}
+
+
+void
+afr_replies_wipe (afr_local_t *local, afr_private_t *priv)
+{
+ int i;
+
+ if (!local->replies)
+ return;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].xdata) {
+ dict_unref (local->replies[i].xdata);
+ local->replies[i].xdata = NULL;
+ }
+ }
+
+ memset (local->replies, 0, sizeof(*local->replies) * priv->child_count);
}
@@ -872,7 +898,7 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this)
if (!local)
return;
- afr_local_sh_cleanup (local, this);
+ syncbarrier_destroy (&local->barrier);
afr_local_transaction_cleanup (local, this);
@@ -890,40 +916,26 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this)
if (local->dict)
dict_unref (local->dict);
+ afr_replies_wipe (local, priv);
GF_FREE(local->replies);
GF_FREE (local->child_up);
- GF_FREE (local->child_errno);
+ GF_FREE (local->read_attempted);
- GF_FREE (local->fresh_children);
+ GF_FREE (local->readable);
- { /* lookup */
- if (local->cont.lookup.xattrs) {
- afr_reset_xattr (local->cont.lookup.xattrs,
- priv->child_count);
- GF_FREE (local->cont.lookup.xattrs);
- local->cont.lookup.xattrs = NULL;
- }
+ if (local->inode)
+ inode_unref (local->inode);
- if (local->cont.lookup.xattr) {
- dict_unref (local->cont.lookup.xattr);
- }
-
- if (local->cont.lookup.inode) {
- inode_unref (local->cont.lookup.inode);
- }
+ if (local->parent)
+ inode_unref (local->parent);
- GF_FREE (local->cont.lookup.postparents);
+ if (local->parent2)
+ inode_unref (local->parent2);
- GF_FREE (local->cont.lookup.bufs);
-
- GF_FREE (local->cont.lookup.success_children);
-
- GF_FREE (local->cont.lookup.sources);
- afr_matrix_cleanup (local->cont.lookup.pending_matrix,
- priv->child_count);
- }
+ if (local->refreshinode)
+ inode_unref (local->refreshinode);
{ /* getxattr */
GF_FREE (local->cont.getxattr.name);
@@ -1018,67 +1030,29 @@ afr_frame_return (call_frame_t *frame)
return call_count;
}
-int
-afr_set_elem_count_get (unsigned char *elems, int child_count)
-{
- int i = 0;
- int ret = 0;
-
- for (i = 0; i < child_count; i++)
- if (elems[i])
- ret++;
- return ret;
-}
-
-/**
- * up_children_count - return the number of children that are up
- */
-
-unsigned int
-afr_up_children_count (unsigned char *child_up, unsigned int child_count)
-{
- return afr_set_elem_count_get (child_up, child_count);
-}
-
-unsigned int
-afr_locked_children_count (unsigned char *children, unsigned int child_count)
-{
- return afr_set_elem_count_get (children, child_count);
-}
-
-unsigned int
-afr_pre_op_done_children_count (unsigned char *pre_op,
- unsigned int child_count)
-{
- return afr_set_elem_count_get (pre_op, child_count);
-}
gf_boolean_t
-afr_is_fresh_lookup (loc_t *loc, xlator_t *this)
-{
- uint64_t ctx = 0;
- int32_t ret = 0;
-
- GF_ASSERT (loc);
- GF_ASSERT (this);
- GF_ASSERT (loc->inode);
+afr_is_entry_possibly_under_txn (afr_local_t *local, xlator_t *this)
+{
+ int i = 0;
+ int tmp = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].xdata)
+ continue;
+ if (dict_get_int32 (local->replies[i].xdata,
+ GLUSTERFS_PARENT_ENTRYLK,
+ &tmp) == 0)
+ if (tmp)
+ return _gf_true;
+ }
- ret = inode_ctx_get (loc->inode, this, &ctx);
- if (0 == ret)
- return _gf_false;
- return _gf_true;
+ return _gf_false;
}
-void
-afr_update_loc_gfids (loc_t *loc, struct iatt *buf, struct iatt *postparent)
-{
- GF_ASSERT (loc);
- GF_ASSERT (buf);
-
- uuid_copy (loc->gfid, buf->ia_gfid);
- if (postparent)
- uuid_copy (loc->pargfid, postparent->ia_gfid);
-}
/*
* Quota size xattrs are not maintained by afr. There is a
@@ -1090,1467 +1064,845 @@ afr_update_loc_gfids (loc_t *loc, struct iatt *buf, struct iatt *postparent)
* */
static void
-afr_handle_quota_size (afr_local_t *local, xlator_t *this,
- dict_t *rsp_dict)
+afr_handle_quota_size (call_frame_t *frame, xlator_t *this)
{
- int32_t *sources = NULL;
- dict_t *xattr = NULL;
- data_t *max_data = NULL;
- int64_t max_quota_size = -1;
- data_t *data = NULL;
- int64_t *size = NULL;
- int64_t quota_size = -1;
- afr_private_t *priv = NULL;
- int i = 0;
- int ret = -1;
- gf_boolean_t source_present = _gf_false;
-
- priv = this->private;
- sources = local->cont.lookup.sources;
-
- if (rsp_dict == NULL) {
- gf_log_callingfn (this->name, GF_LOG_ERROR, "%s: Invalid "
- "response dictionary", local->loc.path);
- return;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (sources[i]) {
- source_present = _gf_true;
- break;
- }
- }
-
- for (i = 0; i < priv->child_count; i++) {
- /*
- * If there is at least one source lets check
- * for maximum quota sizes among sources, otherwise take the
- * maximum of the ones present to be on the safer side.
- */
- if (source_present && !sources[i])
- continue;
-
- xattr = local->cont.lookup.xattrs[i];
- if (!xattr)
- continue;
-
- data = dict_get (xattr, QUOTA_SIZE_KEY);
- if (!data)
- continue;
-
- size = (int64_t*)data->data;
- quota_size = ntoh64(*size);
- gf_log (this->name, GF_LOG_DEBUG, "%s: %d, size: %"PRId64,
- local->loc.path, i, quota_size);
- if (quota_size > max_quota_size) {
- if (max_data)
- data_unref (max_data);
-
- max_quota_size = quota_size;
- max_data = data_ref (data);
- }
- }
+ unsigned char *readable = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ struct afr_reply *replies = NULL;
+ int i = 0;
+ uint64_t size = 0;
+ uint64_t max_size = 0;
+ int readable_cnt = 0;
- if (max_data) {
- ret = dict_set (rsp_dict, QUOTA_SIZE_KEY, max_data);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "%s: Failed to set "
- "quota size", local->loc.path);
- }
+ local = frame->local;
+ priv = this->private;
+ replies = local->replies;
+
+ readable = alloca0 (priv->child_count);
+
+ afr_inode_read_subvol_get (local->inode, this, readable, 0, 0);
+
+ readable_cnt = AFR_COUNT (readable, priv->child_count);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret == -1)
+ continue;
+ if (readable_cnt && !readable[i])
+ continue;
+ if (!replies[i].xdata)
+ continue;
+ if (dict_get_uint64 (replies[i].xdata, QUOTA_SIZE_KEY, &size))
+ continue;
+ if (size > max_size)
+ max_size = size;
+ }
- data_unref (max_data);
- }
+ if (!max_size)
+ return;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret == -1)
+ continue;
+ if (readable_cnt && !readable[i])
+ continue;
+ if (!replies[i].xdata)
+ continue;
+ if (dict_set_uint64 (replies[i].xdata, QUOTA_SIZE_KEY, max_size))
+ continue;
+ }
}
-int
-afr_lookup_build_response_params (afr_local_t *local, xlator_t *this)
-{
- struct iatt *buf = NULL;
- struct iatt *postparent = NULL;
- dict_t **xattr = NULL;
- int32_t *success_children = NULL;
- int32_t *sources = NULL;
- afr_private_t *priv = NULL;
- int32_t read_child = -1;
- int ret = 0;
- int i = 0;
-
- GF_ASSERT (local);
-
- buf = &local->cont.lookup.buf;
- postparent = &local->cont.lookup.postparent;
- xattr = &local->cont.lookup.xattr;
- priv = this->private;
-
- read_child = afr_inode_get_read_ctx (this, local->cont.lookup.inode,
- local->fresh_children);
- if (read_child < 0) {
- ret = -1;
- goto out;
- }
- success_children = local->cont.lookup.success_children;
- sources = local->cont.lookup.sources;
- memset (sources, 0, sizeof (*sources) * priv->child_count);
- afr_children_intersection_get (local->fresh_children, success_children,
- sources, priv->child_count);
- if (!sources[read_child]) {
- read_child = -1;
- for (i = 0; i < priv->child_count; i++) {
- if (sources[i]) {
- read_child = i;
- break;
- }
- }
- }
- if (read_child < 0) {
- ret = -1;
- goto out;
- }
-
- gf_log (this->name, GF_LOG_DEBUG, "Building lookup response from %d",
- read_child);
- if (!*xattr)
- *xattr = dict_ref (local->cont.lookup.xattrs[read_child]);
-
- *buf = local->cont.lookup.bufs[read_child];
- *postparent = local->cont.lookup.postparents[read_child];
-
- if (dict_get (local->xattr_req, QUOTA_SIZE_KEY))
- afr_handle_quota_size (local, this, *xattr);
-
- if (IA_INVAL == local->cont.lookup.inode->ia_type) {
- /* fix for RT #602 */
- local->cont.lookup.inode->ia_type = buf->ia_type;
- }
-out:
- return ret;
-}
static void
-afr_lookup_update_lk_counts (afr_local_t *local, xlator_t *this,
- int child_index, dict_t *xattr)
+afr_lookup_done (call_frame_t *frame, xlator_t *this)
{
- uint32_t inodelk_count = 0;
- uint32_t entrylk_count = 0;
- int ret = -1;
- uint32_t parent_entrylk = 0;
-
- GF_ASSERT (local);
- GF_ASSERT (this);
- GF_ASSERT (xattr);
- GF_ASSERT (child_index >= 0);
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = -1;
+ int op_errno = 0;
+ int read_subvol = 0;
+ unsigned char *readable = NULL;
+ int event = 0;
+ struct afr_reply *replies = NULL;
+ uuid_t read_gfid = {0, };
+ gf_boolean_t locked_entry = _gf_false;
+ gf_boolean_t can_interpret = _gf_true;
- ret = dict_get_uint32 (xattr, GLUSTERFS_INODELK_COUNT,
- &inodelk_count);
- if (ret == 0)
- local->inodelk_count += inodelk_count;
+ priv = this->private;
+ local = frame->local;
+ replies = local->replies;
- ret = dict_get_uint32 (xattr, GLUSTERFS_ENTRYLK_COUNT,
- &entrylk_count);
- if (ret == 0)
- local->entrylk_count += entrylk_count;
- ret = dict_get_uint32 (xattr, GLUSTERFS_PARENT_ENTRYLK,
- &parent_entrylk);
- if (!ret)
- local->cont.lookup.parent_entrylk += parent_entrylk;
-}
+ locked_entry = afr_is_entry_possibly_under_txn (local, this);
-/*
- * It's important to maintain a commutative property on do_*_self_heal and
- * found*; once set, they must not be cleared by a subsequent iteration or
- * call, so that they represent a logical OR of all iterations and calls
- * regardless of child/key order. That allows the caller to call us multiple
- * times without having to use a separate variable as a "reduce" accumulator.
- */
-static void
-afr_lookup_set_self_heal_params_by_xattr (afr_local_t *local, xlator_t *this,
- dict_t *xattr)
-{
- afr_private_t *priv = NULL;
- int i = 0;
- int ret = -1;
- void *pending_raw = NULL;
- int32_t *pending = NULL;
+ readable = alloca0 (priv->child_count);
- GF_ASSERT (local);
- GF_ASSERT (this);
- GF_ASSERT (xattr);
+ afr_inode_read_subvol_get (local->loc.parent, this, readable,
+ NULL, &event);
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_get_ptr (xattr, priv->pending_key[i],
- &pending_raw);
- if (ret != 0) {
- continue;
- }
- pending = pending_raw;
+ /* First, check if we have an ESTALE from somewhere,
+ If so, propagate that so that a revalidate can be
+ issued
+ */
+ op_errno = afr_final_errno (frame->local, this->private);
+ local->op_errno = op_errno;
+ if (op_errno == ESTALE) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ goto unwind;
+ }
- if (pending[AFR_METADATA_TRANSACTION]) {
- gf_log(this->name, GF_LOG_DEBUG,
- "metadata self-heal is pending for %s.",
- local->loc.path);
- local->self_heal.do_metadata_self_heal = _gf_true;
- }
+ read_subvol = -1;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if (locked_entry && replies[i].op_ret == -1 &&
+ replies[i].op_errno == ENOENT) {
+ /* Second, check entry is still
+ "underway" in creation */
+ local->op_ret = -1;
+ local->op_errno = ENOENT;
+ read_subvol = i;
+ goto unwind;
+ }
- if (pending[AFR_ENTRY_TRANSACTION]) {
- gf_log(this->name, GF_LOG_DEBUG,
- "entry self-heal is pending for %s.",
- local->loc.path);
- local->self_heal.do_entry_self_heal = _gf_true;
- }
+ if (replies[i].op_ret == -1)
+ continue;
- if (pending[AFR_DATA_TRANSACTION]) {
- gf_log(this->name, GF_LOG_DEBUG,
- "data self-heal is pending for %s.",
- local->loc.path);
- local->self_heal.do_data_self_heal = _gf_true;
- }
- }
-}
+ if (read_subvol == -1 || !readable[read_subvol]) {
+ read_subvol = i;
+ uuid_copy (read_gfid, replies[i].poststat.ia_gfid);
+ local->op_ret = 0;
+ }
+ }
-void
-afr_lookup_check_set_metadata_split_brain (afr_local_t *local, xlator_t *this)
-{
- int32_t *sources = NULL;
- afr_private_t *priv = NULL;
- int32_t subvol_status = 0;
- int32_t *success_children = NULL;
- dict_t **xattrs = NULL;
- struct iatt *bufs = NULL;
- int32_t **pending_matrix = NULL;
+ if (read_subvol == -1)
+ goto unwind;
+ /* We now have a read_subvol, which is readable[] (if there
+ were any). Next we look for GFID mismatches. We don't
+ consider a GFID mismatch as an error if read_subvol is
+ readable[] but the mismatching GFID subvol is not.
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret == -1) {
+ if (priv->child_up[i])
+ can_interpret = _gf_false;
+ continue;
+ }
- priv = this->private;
+ if (!uuid_compare (replies[i].poststat.ia_gfid,
+ read_gfid))
+ continue;
- sources = GF_CALLOC (priv->child_count, sizeof (*sources),
- gf_afr_mt_int32_t);
- if (NULL == sources)
- goto out;
- success_children = local->cont.lookup.success_children;
- xattrs = local->cont.lookup.xattrs;
- bufs = local->cont.lookup.bufs;
- pending_matrix = local->cont.lookup.pending_matrix;
- afr_build_sources (this, xattrs, bufs, pending_matrix,
- sources, success_children, AFR_METADATA_TRANSACTION,
- &subvol_status, _gf_false);
- if (subvol_status & SPLIT_BRAIN)
- local->cont.lookup.possible_spb = _gf_true;
-out:
- GF_FREE (sources);
-}
+ can_interpret = _gf_false;
-static void
-afr_detect_self_heal_by_iatt (afr_local_t *local, xlator_t *this,
- struct iatt *buf, struct iatt *lookup_buf)
-{
- if (PERMISSION_DIFFERS (buf, lookup_buf)) {
- /* mismatching permissions */
- gf_log (this->name, GF_LOG_DEBUG,
- "permissions differ for %s ", local->loc.path);
- local->self_heal.do_metadata_self_heal = _gf_true;
- }
+ if (locked_entry)
+ continue;
- if (OWNERSHIP_DIFFERS (buf, lookup_buf)) {
- /* mismatching permissions */
- local->self_heal.do_metadata_self_heal = _gf_true;
- gf_log (this->name, GF_LOG_DEBUG,
- "ownership differs for %s ", local->loc.path);
- }
+ /* Now GFIDs mismatch. It's OK as long as this subvol
+ is not readable[] but read_subvol is */
+ if (readable[read_subvol] && !readable[i])
+ continue;
- if (SIZE_DIFFERS (buf, lookup_buf)
- && IA_ISREG (buf->ia_type)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "size differs for %s ", local->loc.path);
- local->self_heal.do_data_self_heal = _gf_true;
- }
+ /* LOG ERROR */
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto unwind;
+ }
- if (uuid_compare (buf->ia_gfid, lookup_buf->ia_gfid)) {
- /* mismatching gfid */
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: gfid different on subvolume", local->loc.path);
- }
-}
+ /* Forth, for the finalized GFID, pick the best subvolume
+ to return stats from.
+ */
+ if (can_interpret) {
+ /* It is safe to call afr_replies_interpret() because we have
+ a response from all the UP subvolumes and all of them resolved
+ to the same GFID
+ */
+ if (afr_replies_interpret (frame, this, local->inode)) {
+ read_subvol = afr_data_subvol_get (local->inode, this,
+ 0, 0);
+ afr_inode_read_subvol_reset (local->inode, this);
+ goto cant_interpret;
+ } else {
+ read_subvol = afr_data_subvol_get (local->inode, this,
+ 0, 0);
+ }
+ } else {
+ cant_interpret:
+ if (read_subvol == -1)
+ dict_del (replies[0].xdata, GF_CONTENT_KEY);
+ else
+ dict_del (replies[read_subvol].xdata, GF_CONTENT_KEY);
+ }
-static void
-afr_detect_self_heal_by_split_brain_status (afr_local_t *local, xlator_t *this)
-{
- gf_boolean_t split_brain = _gf_false;
- afr_self_heal_t *sh = NULL;
+ afr_handle_quota_size (frame, this);
- sh = &local->self_heal;
+unwind:
+ if (read_subvol == -1)
+ read_subvol = 0;
- split_brain = afr_is_split_brain (this, local->cont.lookup.inode);
- split_brain = split_brain || local->cont.lookup.possible_spb;
- if ((local->success_count > 0) && split_brain &&
- IA_ISREG (local->cont.lookup.inode->ia_type)) {
- sh->force_confirm_spb = _gf_true;
- gf_log (this->name, GF_LOG_DEBUG,
- "split brain detected during lookup of %s.",
- local->loc.path);
- }
+ AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
+ local->inode, &local->replies[read_subvol].poststat,
+ local->replies[read_subvol].xdata,
+ &local->replies[read_subvol].postparent);
}
-static void
-afr_detect_self_heal_by_lookup_status (afr_local_t *local, xlator_t *this)
-{
- GF_ASSERT (local);
- GF_ASSERT (this);
-
- if ((local->success_count > 0) && (local->enoent_count > 0)) {
- local->self_heal.do_metadata_self_heal = _gf_true;
- local->self_heal.do_data_self_heal = _gf_true;
- local->self_heal.do_entry_self_heal = _gf_true;
- local->self_heal.do_gfid_self_heal = _gf_true;
- local->self_heal.do_missing_entry_self_heal = _gf_true;
- gf_log(this->name, GF_LOG_DEBUG,
- "entries are missing in lookup of %s.",
- local->loc.path);
- }
-
- return;
-}
+/*
+ * During a lookup, some errors are more "important" than
+ * others in that they must be given higher priority while
+ * returning to the user.
+ *
+ * The hierarchy is ESTALE > ENOENT > others
+ */
-gf_boolean_t
-afr_can_self_heal_proceed (afr_self_heal_t *sh, afr_private_t *priv)
+int
+afr_higher_errno (int32_t old_errno, int32_t new_errno)
{
- GF_ASSERT (sh);
- GF_ASSERT (priv);
+ if (old_errno == ENODATA || new_errno == ENODATA)
+ return ENODATA;
+ if (old_errno == ESTALE || new_errno == ESTALE)
+ return ESTALE;
+ if (old_errno == ENOENT || new_errno == ENOENT)
+ return ENOENT;
- if (sh->force_confirm_spb)
- return _gf_true;
- return (sh->do_gfid_self_heal
- || sh->do_missing_entry_self_heal
- || (afr_data_self_heal_enabled (priv->data_self_heal) &&
- sh->do_data_self_heal)
- || (priv->metadata_self_heal && sh->do_metadata_self_heal)
- || (priv->entry_self_heal && sh->do_entry_self_heal));
+ return new_errno;
}
-afr_transaction_type
-afr_transaction_type_get (ia_type_t ia_type)
-{
- afr_transaction_type type = AFR_METADATA_TRANSACTION;
- GF_ASSERT (ia_type != IA_INVAL);
+int
+afr_final_errno (afr_local_t *local, afr_private_t *priv)
+{
+ int i = 0;
+ int op_errno = 0;
+ int tmp_errno = 0;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret == 0)
+ continue;
+ tmp_errno = local->replies[i].op_errno;
+ op_errno = afr_higher_errno (op_errno, tmp_errno);
+ }
- if (IA_ISDIR (ia_type)) {
- type = AFR_ENTRY_TRANSACTION;
- } else if (IA_ISREG (ia_type)) {
- type = AFR_DATA_TRANSACTION;
- }
- return type;
+ return op_errno;
}
-int
-afr_lookup_select_read_child (afr_local_t *local, xlator_t *this,
- int32_t *read_child)
+static int
+get_pathinfo_host (char *pathinfo, char *hostname, size_t size)
{
- ia_type_t ia_type = IA_INVAL;
- int32_t source = -1;
- int ret = -1;
- dict_t **xattrs = NULL;
- int32_t *success_children = NULL;
- afr_transaction_type type = AFR_METADATA_TRANSACTION;
- uuid_t *gfid = NULL;
-
- GF_ASSERT (local);
- GF_ASSERT (this);
- GF_ASSERT (local->success_count > 0);
+ char *start = NULL;
+ char *end = NULL;
+ int ret = -1;
+ int i = 0;
- success_children = local->cont.lookup.success_children;
- /*We can take the success_children[0] only because we already
- *handle the conflicting children other wise, we could select the
- *read_child based on wrong file type
- */
- ia_type = local->cont.lookup.bufs[success_children[0]].ia_type;
- type = afr_transaction_type_get (ia_type);
- xattrs = local->cont.lookup.xattrs;
- gfid = &local->cont.lookup.buf.ia_gfid;
- source = afr_lookup_select_read_child_by_txn_type (this, local, xattrs,
- type, *gfid);
- if (source < 0) {
- gf_log (this->name, GF_LOG_DEBUG, "failed to select source "
- "for %s", local->loc.path);
+ if (!pathinfo)
+ goto out;
+
+ start = strchr (pathinfo, ':');
+ if (!start)
+ goto out;
+ end = strrchr (pathinfo, ':');
+ if (start == end)
goto out;
- }
- gf_log (this->name, GF_LOG_DEBUG, "Source selected as %d for %s",
- source, local->loc.path);
- *read_child = source;
+ memset (hostname, 0, size);
+ i = 0;
+ while (++start != end)
+ hostname[i++] = *start;
ret = 0;
out:
return ret;
}
-static inline gf_boolean_t
-afr_is_transaction_running (afr_local_t *local)
-{
- GF_ASSERT (local->fop == GF_FOP_LOOKUP);
- return ((local->inodelk_count > 0) || (local->entrylk_count > 0));
-}
-
-void
-afr_launch_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode,
- gf_boolean_t background, ia_type_t ia_type, char *reason,
- void (*gfid_sh_success_cbk) (call_frame_t *sh_frame,
- xlator_t *this),
- int (*unwind) (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- int32_t sh_failed))
-{
- afr_local_t *local = NULL;
- char sh_type_str[256] = {0,};
- char *bg = "";
-
- GF_ASSERT (frame);
- GF_ASSERT (this);
- GF_ASSERT (inode);
- GF_ASSERT (ia_type != IA_INVAL);
-
- local = frame->local;
- local->self_heal.background = background;
- local->self_heal.type = ia_type;
- local->self_heal.unwind = unwind;
- local->self_heal.gfid_sh_success_cbk = gfid_sh_success_cbk;
-
- afr_self_heal_type_str_get (&local->self_heal,
- sh_type_str,
- sizeof (sh_type_str));
-
- if (background)
- bg = "background";
- gf_log (this->name, GF_LOG_DEBUG,
- "%s %s self-heal triggered. path: %s, reason: %s", bg,
- sh_type_str, local->loc.path, reason);
-
- afr_self_heal (frame, this, inode);
-}
-
-unsigned int
-afr_gfid_missing_count (const char *xlator_name, int32_t *success_children,
- struct iatt *bufs, unsigned int child_count,
- const char *path)
+int
+afr_local_pathinfo (char *pathinfo, gf_boolean_t *local)
{
- unsigned int gfid_miss_count = 0;
- int i = 0;
- struct iatt *child1 = NULL;
+ int ret = 0;
+ char pathinfohost[1024] = {0};
+ char localhost[1024] = {0};
+ xlator_t *this = THIS;
- for (i = 0; i < child_count; i++) {
- if (success_children[i] == -1)
- break;
- child1 = &bufs[success_children[i]];
- if (uuid_is_null (child1->ia_gfid)) {
- gf_log (xlator_name, GF_LOG_DEBUG, "%s: gfid is null"
- " on subvolume %d", path, success_children[i]);
- gfid_miss_count++;
- }
+ *local = _gf_false;
+ ret = get_pathinfo_host (pathinfo, pathinfohost, sizeof (pathinfohost));
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Invalid pathinfo: %s",
+ pathinfo);
+ goto out;
}
- return gfid_miss_count;
-}
-
-static int
-afr_lookup_gfid_missing_count (afr_local_t *local, xlator_t *this)
-{
- int32_t *success_children = NULL;
- afr_private_t *priv = NULL;
- struct iatt *bufs = NULL;
- int miss_count = 0;
-
- priv = this->private;
- bufs = local->cont.lookup.bufs;
- success_children = local->cont.lookup.success_children;
-
- miss_count = afr_gfid_missing_count (this->name, success_children,
- bufs, priv->child_count,
- local->loc.path);
- return miss_count;
-}
-
-gf_boolean_t
-afr_conflicting_iattrs (struct iatt *bufs, int32_t *success_children,
- unsigned int child_count, const char *path,
- const char *xlator_name)
-{
- gf_boolean_t conflicting = _gf_false;
- int i = 0;
- struct iatt *child1 = NULL;
- struct iatt *child2 = NULL;
- uuid_t *gfid = NULL;
-
- for (i = 0; i < child_count; i++) {
- if (success_children[i] == -1)
- break;
- child1 = &bufs[success_children[i]];
- if ((!gfid) && (!uuid_is_null (child1->ia_gfid)))
- gfid = &child1->ia_gfid;
-
- if (i == 0)
- continue;
-
- child2 = &bufs[success_children[i-1]];
- if (FILETYPE_DIFFERS (child1, child2)) {
- gf_log (xlator_name, GF_LOG_DEBUG, "%s: filetype "
- "differs on subvolumes (%d, %d)", path,
- success_children[i-1], success_children[i]);
- conflicting = _gf_true;
- goto out;
- }
- if (!gfid || uuid_is_null (child1->ia_gfid))
- continue;
- if (uuid_compare (*gfid, child1->ia_gfid)) {
- gf_log (xlator_name, GF_LOG_DEBUG, "%s: gfid differs"
- " on subvolume %d", path, success_children[i]);
- conflicting = _gf_true;
- goto out;
- }
+ ret = gethostname (localhost, sizeof (localhost));
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "gethostname() failed, "
+ "reason: %s", strerror (errno));
+ goto out;
}
-out:
- return conflicting;
-}
-/* afr_update_gfid_from_iatts: This function should be called only if the
- * iatts are not conflicting.
- */
-void
-afr_update_gfid_from_iatts (uuid_t uuid, struct iatt *bufs,
- int32_t *success_children, unsigned int child_count)
-{
- uuid_t *gfid = NULL;
- int i = 0;
- int child = 0;
-
- for (i = 0; i < child_count; i++) {
- child = success_children[i];
- if (child == -1)
- break;
- if ((!gfid) && (!uuid_is_null (bufs[child].ia_gfid))) {
- gfid = &bufs[child].ia_gfid;
- } else if (gfid && (!uuid_is_null (bufs[child].ia_gfid))) {
- if (uuid_compare (*gfid, bufs[child].ia_gfid)) {
- GF_ASSERT (0);
- goto out;
- }
- }
- }
- if (gfid && (!uuid_is_null (*gfid)))
- uuid_copy (uuid, *gfid);
+ if (!strcmp (localhost, pathinfohost))
+ *local = _gf_true;
out:
- return;
-}
-
-static gf_boolean_t
-afr_lookup_conflicting_entries (afr_local_t *local, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- gf_boolean_t conflict = _gf_false;
-
- priv = this->private;
- conflict = afr_conflicting_iattrs (local->cont.lookup.bufs,
- local->cont.lookup.success_children,
- priv->child_count, local->loc.path,
- this->name);
- return conflict;
-}
-
-gf_boolean_t
-afr_open_only_data_self_heal (char *data_self_heal)
-{
- return !strcmp (data_self_heal, "open");
+ return ret;
}
-gf_boolean_t
-afr_data_self_heal_enabled (char *data_self_heal)
+static int32_t
+afr_local_discovery_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
- gf_boolean_t enabled = _gf_false;
+ int ret = 0;
+ char *pathinfo = NULL;
+ gf_boolean_t is_local = _gf_false;
+ afr_private_t *priv = NULL;
+ int32_t child_index = -1;
- if (gf_string2boolean (data_self_heal, &enabled) == -1) {
- enabled = !strcmp (data_self_heal, "open");
- GF_ASSERT (enabled);
+ if (op_ret != 0) {
+ goto out;
}
- return enabled;
-}
-
-static void
-afr_lookup_set_self_heal_params (afr_local_t *local, xlator_t *this)
-{
- int i = 0;
- struct iatt *bufs = NULL;
- dict_t **xattr = NULL;
- afr_private_t *priv = NULL;
- int32_t child1 = -1;
- int32_t child2 = -1;
- afr_self_heal_t *sh = NULL;
-
- priv = this->private;
- sh = &local->self_heal;
-
- afr_detect_self_heal_by_lookup_status (local, this);
-
- if (afr_lookup_gfid_missing_count (local, this))
- local->self_heal.do_gfid_self_heal = _gf_true;
-
- if (_gf_true == afr_lookup_conflicting_entries (local, this))
- local->self_heal.do_missing_entry_self_heal = _gf_true;
- else
- afr_update_gfid_from_iatts (local->self_heal.sh_gfid_req,
- local->cont.lookup.bufs,
- local->cont.lookup.success_children,
- priv->child_count);
-
- bufs = local->cont.lookup.bufs;
- for (i = 1; i < local->success_count; i++) {
- child1 = local->cont.lookup.success_children[i-1];
- child2 = local->cont.lookup.success_children[i];
- afr_detect_self_heal_by_iatt (local, this,
- &bufs[child1], &bufs[child2]);
- }
+ priv = this->private;
+ child_index = (int32_t)(long)cookie;
- xattr = local->cont.lookup.xattrs;
- for (i = 0; i < local->success_count; i++) {
- child1 = local->cont.lookup.success_children[i];
- afr_lookup_set_self_heal_params_by_xattr (local, this,
- xattr[child1]);
+ ret = dict_get_str (dict, GF_XATTR_PATHINFO_KEY, &pathinfo);
+ if (ret != 0) {
+ goto out;
}
- if (afr_open_only_data_self_heal (priv->data_self_heal))
- sh->do_data_self_heal = _gf_false;
- if (sh->do_metadata_self_heal)
- afr_lookup_check_set_metadata_split_brain (local, this);
- afr_detect_self_heal_by_split_brain_status (local, this);
-}
-
-int
-afr_self_heal_lookup_unwind (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- int32_t sh_failed)
-{
- afr_local_t *local = NULL;
- int ret = -1;
- dict_t *xattr = NULL;
-
- local = frame->local;
-
- if (op_ret == -1) {
- local->op_ret = -1;
- local->op_errno = afr_most_important_error(local->op_errno,
- op_errno, _gf_true);
+ ret = afr_local_pathinfo (pathinfo, &is_local);
+ if (ret) {
goto out;
- } else {
- local->op_ret = 0;
}
- afr_lookup_done_success_action (frame, this, _gf_true);
- xattr = local->cont.lookup.xattr;
- if (xattr) {
- ret = dict_set_int32 (xattr, "sh-failed", sh_failed);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR, "%s: Failed to set "
- "sh-failed to %d", local->loc.path, sh_failed);
-
- if (local->self_heal.actual_sh_started == _gf_true &&
- sh_failed == 0) {
- ret = dict_set_int32 (xattr, "actual-sh-done", 1);
- if (ret)
- gf_log(this->name, GF_LOG_ERROR, "%s: Failed to"
- " set actual-sh-done to %d",
- local->loc.path,
- local->self_heal.actual_sh_started);
- }
+ /*
+ * Note that one local subvolume will override another here. The only
+ * way to avoid that would be to retain extra information about whether
+ * the previous read_child is local, and it's just not worth it. Even
+ * the slowest local subvolume is far preferable to a remote one.
+ */
+ if (is_local) {
+ gf_log (this->name, GF_LOG_INFO,
+ "selecting local read_child %s",
+ priv->children[child_index]->name);
+ priv->read_child = child_index;
}
out:
- AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
- local->cont.lookup.inode, &local->cont.lookup.buf,
- local->cont.lookup.xattr,
- &local->cont.lookup.postparent);
-
+ STACK_DESTROY(frame->root);
return 0;
}
-//TODO: At the moment only lookup needs this, so not doing any checks, in the
-// future we will have to do fop specific operations
-void
-afr_post_gfid_sh_success (call_frame_t *sh_frame, xlator_t *this)
+static void
+afr_attempt_local_discovery (xlator_t *this, int32_t child_index)
{
- afr_local_t *local = NULL;
- afr_local_t *sh_local = NULL;
- afr_private_t *priv = NULL;
- afr_self_heal_t *sh = NULL;
- int i = 0;
- struct iatt *lookup_bufs = NULL;
- struct iatt *lookup_parentbufs = NULL;
-
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
- local = sh->orig_frame->local;
- lookup_bufs = local->cont.lookup.bufs;
- lookup_parentbufs = local->cont.lookup.postparents;
- priv = this->private;
-
- memcpy (lookup_bufs, sh->buf, priv->child_count * sizeof (*sh->buf));
- memcpy (lookup_parentbufs, sh->parentbufs,
- priv->child_count * sizeof (*sh->parentbufs));
-
- afr_reset_xattr (local->cont.lookup.xattrs, priv->child_count);
- if (local->cont.lookup.xattr) {
- dict_unref (local->cont.lookup.xattr);
- local->cont.lookup.xattr = NULL;
- }
+ call_frame_t *newframe = NULL;
+ loc_t tmploc = {0,};
+ afr_private_t *priv = this->private;
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i])
- local->cont.lookup.xattrs[i] = dict_ref (sh->xattr[i]);
+ newframe = create_frame(this,this->ctx->pool);
+ if (!newframe) {
+ return;
}
- afr_reset_children (local->cont.lookup.success_children,
- priv->child_count);
- afr_children_copy (local->cont.lookup.success_children,
- sh->fresh_children, priv->child_count);
+ tmploc.gfid[sizeof(tmploc.gfid)-1] = 1;
+ STACK_WIND_COOKIE (newframe, afr_local_discovery_cbk,
+ (void *)(long)child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->getxattr,
+ &tmploc, GF_XATTR_PATHINFO_KEY, NULL);
}
-static void
-afr_lookup_perform_self_heal (call_frame_t *frame, xlator_t *this,
- gf_boolean_t *sh_launched)
-{
- unsigned int up_count = 0;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- char *reason = NULL;
-
- GF_ASSERT (sh_launched);
- *sh_launched = _gf_false;
- priv = this->private;
- local = frame->local;
-
- up_count = afr_up_children_count (local->child_up, priv->child_count);
- if (up_count == 1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Only 1 child up - do not attempt to detect self heal");
- goto out;
- }
-
- afr_lookup_set_self_heal_params (local, this);
- if (afr_can_self_heal_proceed (&local->self_heal, priv)) {
- if (afr_is_transaction_running (local) &&
- /*Forcefully call afr_launch_self_heal (which will go on to
- fail) for SB files.This prevents stale data being served
- due to race in afr_is_transaction_running() when
- multiple clients access the same SB file*/
- !local->cont.lookup.possible_spb &&
- (!local->attempt_self_heal))
- goto out;
- reason = "lookup detected pending operations";
- afr_launch_self_heal (frame, this, local->cont.lookup.inode,
- !local->foreground_self_heal,
- local->cont.lookup.buf.ia_type,
- reason, afr_post_gfid_sh_success,
- afr_self_heal_lookup_unwind);
- *sh_launched = _gf_true;
- }
-out:
- return;
-}
-
-void
-afr_get_fresh_children (int32_t *success_children, int32_t *sources,
- int32_t *fresh_children, unsigned int child_count)
+int
+afr_lookup_selfheal_wrap (void *opaque)
{
- unsigned int i = 0;
- unsigned int j = 0;
-
- GF_ASSERT (success_children);
- GF_ASSERT (sources);
- GF_ASSERT (fresh_children);
+ call_frame_t *frame = opaque;
+ afr_local_t *local = NULL;
+ xlator_t *this = NULL;
+ inode_t *inode = NULL;
- afr_reset_children (fresh_children, child_count);
- for (i = 0; i < child_count; i++) {
- if (success_children[i] == -1)
- break;
- if (afr_is_read_child (success_children, sources, child_count,
- success_children[i])) {
- fresh_children[j] = success_children[i];
- j++;
- }
- }
-}
+ local = frame->local;
+ this = frame->this;
-static int
-afr_lookup_set_read_ctx (afr_local_t *local, xlator_t *this, int32_t read_child)
-{
- afr_private_t *priv = NULL;
+ afr_selfheal_name (frame->this, local->loc.pargfid, local->loc.name);
- GF_ASSERT (read_child >= 0);
+ afr_replies_wipe (local, this->private);
- priv = this->private;
- afr_get_fresh_children (local->cont.lookup.success_children,
- local->cont.lookup.sources,
- local->fresh_children, priv->child_count);
- afr_inode_set_read_ctx (this, local->cont.lookup.inode, read_child,
- local->fresh_children);
+ inode = afr_selfheal_unlocked_lookup_on (frame, local->loc.parent,
+ local->loc.name, local->replies,
+ local->child_up);
+ if (inode)
+ inode_unref (inode);
+ afr_lookup_done (frame, this);
- return 0;
+ return 0;
}
+
int
-afr_lookup_done_success_action (call_frame_t *frame, xlator_t *this,
- gf_boolean_t fail_conflict)
+afr_lookup_entry_heal (call_frame_t *frame, xlator_t *this)
{
- int32_t read_child = -1;
- int32_t ret = -1;
- afr_local_t *local = NULL;
- gf_boolean_t fresh_lookup = _gf_false;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ call_frame_t *heal = NULL;
+ int i = 0, first = -1;
+ gf_boolean_t need_heal = _gf_false;
+ struct afr_reply *replies = NULL;
+ int ret = 0;
- local = frame->local;
- fresh_lookup = local->cont.lookup.fresh_lookup;
+ local = frame->local;
+ replies = local->replies;
+ priv = this->private;
- if (local->loc.parent == NULL)
- fail_conflict = _gf_true;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
- if (afr_lookup_conflicting_entries (local, this)) {
- if (fail_conflict == _gf_false)
- ret = 0;
- goto out;
- }
+ if (first == -1) {
+ first = i;
+ continue;
+ }
- ret = afr_lookup_select_read_child (local, this, &read_child);
- if (!afr_is_transaction_running (local) || fresh_lookup) {
- if (read_child < 0)
- goto out;
+ if (replies[i].op_ret != replies[first].op_ret) {
+ need_heal = _gf_true;
+ break;
+ }
- ret = afr_lookup_set_read_ctx (local, this, read_child);
- if (ret)
- goto out;
- }
+ if (uuid_compare (replies[i].poststat.ia_gfid,
+ replies[first].poststat.ia_gfid)) {
+ need_heal = _gf_true;
+ break;
+ }
+ }
- ret = afr_lookup_build_response_params (local, this);
- if (ret)
- goto out;
- afr_update_loc_gfids (&local->loc,
- &local->cont.lookup.buf,
- &local->cont.lookup.postparent);
+ if (need_heal) {
+ heal = copy_frame (frame);
+ if (heal)
+ heal->root->pid = -1;
+ ret = synctask_new (this->ctx->env, afr_lookup_selfheal_wrap,
+ afr_refresh_selfheal_done, heal, frame);
+ if (ret)
+ goto lookup_done;
+ } else {
+ lookup_done:
+ afr_lookup_done (frame, this);
+ }
- ret = 0;
-out:
- if (ret) {
- local->op_ret = -1;
- local->op_errno = EIO;
- }
- return ret;
+ return ret;
}
+
int
-afr_lookup_get_latest_subvol (afr_local_t *local, xlator_t *this)
+afr_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode, struct iatt *buf,
+ dict_t *xdata, struct iatt *postparent)
{
- afr_private_t *priv = NULL;
- int32_t *success_children = NULL;
- struct iatt *bufs = NULL;
- int i = 0;
- int child = 0;
- int lsubvol = -1;
-
- priv = this->private;
- success_children = local->cont.lookup.success_children;
- bufs = local->cont.lookup.bufs;
- for (i = 0; i < priv->child_count; i++) {
- child = success_children[i];
- if (child == -1)
- break;
- if (uuid_is_null (bufs[child].ia_gfid))
- continue;
- if (lsubvol < 0) {
- lsubvol = child;
- } else if (bufs[lsubvol].ia_ctime < bufs[child].ia_ctime) {
- lsubvol = child;
- } else if ((bufs[lsubvol].ia_ctime == bufs[child].ia_ctime) &&
- (bufs[lsubvol].ia_ctime_nsec < bufs[child].ia_ctime_nsec)) {
- lsubvol = child;
- }
- }
- return lsubvol;
-}
+ afr_local_t * local = NULL;
+ int call_count = -1;
+ int child_index = -1;
-void
-afr_lookup_mark_other_entries_stale (afr_local_t *local, xlator_t *this,
- int subvol)
-{
- afr_private_t *priv = NULL;
- int32_t *success_children = NULL;
- struct iatt *bufs = NULL;
- int i = 0;
- int child = 0;
+ child_index = (long) cookie;
- priv = this->private;
- success_children = local->cont.lookup.success_children;
- bufs = local->cont.lookup.bufs;
- memcpy (local->fresh_children, success_children,
- sizeof (*success_children) * priv->child_count);
- for (i = 0; i < priv->child_count; i++) {
- child = local->fresh_children[i];
- if (child == -1)
- break;
- if (child == subvol)
- continue;
- if (uuid_is_null (bufs[child].ia_gfid) &&
- (bufs[child].ia_type == bufs[subvol].ia_type))
- continue;
- afr_children_rm_child (success_children, child,
- priv->child_count);
- local->success_count--;
- }
- afr_reset_children (local->fresh_children, priv->child_count);
-}
+ local = frame->local;
-void
-afr_succeed_lookup_on_latest_iatt (afr_local_t *local, xlator_t *this)
-{
- int lsubvol = 0;
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+ if (op_ret != -1) {
+ local->replies[child_index].poststat = *buf;
+ local->replies[child_index].postparent = *postparent;
+ if (xdata)
+ local->replies[child_index].xdata = dict_ref (xdata);
+ }
- if (!afr_lookup_conflicting_entries (local, this))
- goto out;
+ call_count = afr_frame_return (frame);
+ if (call_count == 0) {
+ afr_lookup_entry_heal (frame, this);
+ }
- lsubvol = afr_lookup_get_latest_subvol (local, this);
- if (lsubvol < 0)
- goto out;
- afr_lookup_mark_other_entries_stale (local, this, lsubvol);
-out:
- return;
+ return 0;
}
-gf_boolean_t
-afr_is_entry_possibly_under_creation (afr_local_t *local, xlator_t *this)
-{
- /*
- * We need to perform this test in lookup done and treat on going
- * create/DELETE as ENOENT.
- * Reason:
- Multiple clients A, B and C are attempting 'mkdir -p /mnt/a/b/c'
-
- 1 Client A is in the middle of mkdir(/a). It has acquired lock.
- It has performed mkdir(/a) on one subvol, and second one is still
- in progress
- 2 Client B performs a lookup, sees directory /a on one,
- ENOENT on the other, succeeds lookup.
- 3 Client B performs lookup on /a/b on both subvols, both return ENOENT
- (one subvol because /a/b does not exist, another because /a
- itself does not exist)
- 4 Client B proceeds to mkdir /a/b. It obtains entrylk on inode=/a with
- basename=b on one subvol, but fails on other subvol as /a is yet to
- be created by Client A.
- 5 Client A finishes mkdir of /a on other subvol
- 6 Client C also attempts to create /a/b, lookup returns ENOENT on
- both subvols.
- 7 Client C tries to obtain entrylk on on inode=/a with basename=b,
- obtains on one subvol (where B had failed), and waits for B to unlock
- on other subvol.
- 8 Client B finishes mkdir() on one subvol with GFID-1 and completes
- transaction and unlocks
- 9 Client C gets the lock on the second subvol, At this stage second
- subvol already has /a/b created from Client B, but Client C does not
- check that in the middle of mkdir transaction
- 10 Client C attempts mkdir /a/b on both subvols. It succeeds on
- ONLY ONE (where Client B could not get lock because of
- missing parent /a dir) with GFID-2, and gets EEXIST from ONE subvol.
- This way we have /a/b in GFID mismatch. One subvol got GFID-1 because
- Client B performed transaction on only one subvol (because entrylk()
- could not be obtained on second subvol because of missing parent dir --
- caused by premature/speculative succeeding of lookup() on /a when locks
- are detected). Other subvol gets GFID-2 from Client C because while
- it was waiting for entrylk() on both subvols, Client B was in the
- middle of creating mkdir() on only one subvol, and Client C does not
- "expect" this when it is between lock() and pre-op()/op() phase of the
- transaction.
- */
- if (local->cont.lookup.parent_entrylk && local->enoent_count)
- return _gf_true;
-
- return _gf_false;
-}
static void
-afr_lookup_done (call_frame_t *frame, xlator_t *this)
+afr_discover_done (call_frame_t *frame, xlator_t *this)
{
- int unwind = 1;
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int ret = -1;
- gf_boolean_t sh_launched = _gf_false;
- gf_boolean_t fail_conflict = _gf_false;
- int gfid_miss_count = 0;
- int enotconn_count = 0;
- int up_children_count = 0;
+ int i = -1;
+ int op_errno = 0;
+ int read_subvol = 0;
priv = this->private;
local = frame->local;
- if (afr_is_entry_possibly_under_creation (local, this)) {
- local->op_ret = -1;
- local->op_errno = ENOENT;
- goto unwind;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret == 0)
+ local->op_ret = 0;
}
- if (local->op_ret < 0)
- goto unwind;
+ op_errno = afr_final_errno (frame->local, this->private);
- if (local->cont.lookup.parent_entrylk && local->success_count > 1)
- afr_succeed_lookup_on_latest_iatt (local, this);
-
- gfid_miss_count = afr_lookup_gfid_missing_count (local, this);
- up_children_count = afr_up_children_count (local->child_up,
- priv->child_count);
- enotconn_count = priv->child_count - up_children_count;
- if ((gfid_miss_count == local->success_count) &&
- (enotconn_count > 0)) {
- local->op_ret = -1;
- local->op_errno = EIO;
- gf_log (this->name, GF_LOG_ERROR, "Failing lookup for %s, "
- "LOOKUP on a file without gfid is not allowed when "
- "some of the children are down", local->loc.path);
- goto unwind;
- }
-
- if ((gfid_miss_count == local->success_count) &&
- uuid_is_null (local->cont.lookup.gfid_req)) {
- local->op_ret = -1;
- local->op_errno = ENODATA;
- gf_log (this->name, GF_LOG_ERROR, "%s: No gfid present",
- local->loc.path);
+ if (local->op_ret < 0) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
goto unwind;
- }
+ }
- if (gfid_miss_count && uuid_is_null (local->cont.lookup.gfid_req))
- fail_conflict = _gf_true;
- ret = afr_lookup_done_success_action (frame, this, fail_conflict);
- if (ret)
- goto unwind;
- uuid_copy (local->self_heal.sh_gfid_req, local->cont.lookup.gfid_req);
+ afr_replies_interpret (frame, this, local->inode);
- afr_lookup_perform_self_heal (frame, this, &sh_launched);
- if (sh_launched) {
- unwind = 0;
- goto unwind;
- }
+ read_subvol = afr_data_subvol_get (local->inode, this, 0, 0);
+ if (read_subvol == -1) {
+ gf_log (this->name, GF_LOG_WARNING, "no read subvols for %s",
+ local->loc.path);
- unwind:
- if (unwind) {
- AFR_STACK_UNWIND (lookup, frame, local->op_ret,
- local->op_errno, local->cont.lookup.inode,
- &local->cont.lookup.buf,
- local->cont.lookup.xattr,
- &local->cont.lookup.postparent);
- }
-}
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid ||
+ local->replies[i].op_ret == -1)
+ continue;
+ read_subvol = i;
+ break;
+ }
+ }
-/*
- * During a lookup, some errors are more "important" than
- * others in that they must be given higher priority while
- * returning to the user.
- *
- * The hierarchy is ESTALE > EIO > ENOENT > others
- */
-int32_t
-afr_most_important_error(int32_t old_errno, int32_t new_errno,
- gf_boolean_t eio)
-{
- if (old_errno == ESTALE || new_errno == ESTALE)
- return ESTALE;
- if (eio && (old_errno == EIO || new_errno == EIO))
- return EIO;
- if (old_errno == ENOENT || new_errno == ENOENT)
- return ENOENT;
+unwind:
+ if (read_subvol == -1)
+ read_subvol = 0;
- return new_errno;
+ AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
+ local->inode, &local->replies[read_subvol].poststat,
+ local->replies[read_subvol].xdata,
+ &local->replies[read_subvol].postparent);
}
-int32_t
-afr_resultant_errno_get (int32_t *children,
- int *child_errno, unsigned int child_count)
-{
- int i = 0;
- int32_t op_errno = 0;
- int child = 0;
-
- for (i = 0; i < child_count; i++) {
- if (children) {
- child = children[i];
- if (child == -1)
- break;
- } else {
- child = i;
- }
- op_errno = afr_most_important_error(op_errno,
- child_errno[child],
- _gf_false);
- }
- return op_errno;
-}
-static void
-afr_lookup_handle_error (afr_local_t *local, int32_t op_ret, int32_t op_errno)
+int
+afr_discover_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode, struct iatt *buf,
+ dict_t *xdata, struct iatt *postparent)
{
- GF_ASSERT (local);
- if (op_errno == ENOENT)
- local->enoent_count++;
+ afr_local_t * local = NULL;
+ int call_count = -1;
+ int child_index = -1;
- local->op_errno = afr_most_important_error(local->op_errno, op_errno,
- _gf_false);
+ child_index = (long) cookie;
- if (local->op_errno == ESTALE) {
- local->op_ret = -1;
- }
-}
+ local = frame->local;
-static void
-afr_set_root_inode_on_first_lookup (afr_local_t *local, xlator_t *this,
- inode_t *inode)
-{
- afr_private_t *priv = NULL;
- GF_ASSERT (inode);
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+ if (op_ret != -1) {
+ local->replies[child_index].poststat = *buf;
+ local->replies[child_index].postparent = *postparent;
+ if (xdata)
+ local->replies[child_index].xdata = dict_ref (xdata);
+ }
- if (!__is_root_gfid (inode->gfid))
- goto out;
- if (!afr_is_fresh_lookup (&local->loc, this))
- goto out;
- priv = this->private;
- if ((priv->first_lookup)) {
- gf_log (this->name, GF_LOG_INFO, "added root inode");
- priv->root_inode = inode_ref (inode);
- priv->first_lookup = 0;
+ if (local->do_discovery && (op_ret == 0))
+ afr_attempt_local_discovery (this, child_index);
+
+ call_count = afr_frame_return (frame);
+ if (call_count == 0) {
+ afr_discover_done (frame, this);
}
-out:
- return;
-}
-static void
-afr_lookup_cache_args (afr_local_t *local, int child_index, dict_t *xattr,
- struct iatt *buf, struct iatt *postparent)
-{
- GF_ASSERT (child_index >= 0);
- local->cont.lookup.xattrs[child_index] = dict_ref (xattr);
- local->cont.lookup.postparents[child_index] = *postparent;
- local->cont.lookup.bufs[child_index] = *buf;
+ return 0;
}
-static void
-afr_lookup_handle_first_success (afr_local_t *local, xlator_t *this,
- inode_t *inode, struct iatt *buf)
-{
- local->cont.lookup.inode = inode_ref (inode);
- local->cont.lookup.buf = *buf;
- afr_set_root_inode_on_first_lookup (local, this, inode);
-}
-static int32_t
-afr_discovery_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict,
- dict_t *xdata)
+int
+afr_discover_do (call_frame_t *frame, xlator_t *this, int err)
{
- int ret = 0;
- char *pathinfo = NULL;
- gf_boolean_t is_local = _gf_false;
- afr_private_t *priv = NULL;
- int32_t child_index = -1;
+ int ret = 0;
+ int i = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
- if (op_ret != 0) {
- goto out;
- }
+ local = frame->local;
+ priv = this->private;
- ret = dict_get_str (dict, GF_XATTR_PATHINFO_KEY, &pathinfo);
- if (ret != 0) {
- goto out;
- }
+ if (err) {
+ local->op_errno = -err;
+ ret = -1;
+ goto out;
+ }
- ret = afr_local_pathinfo (pathinfo, &is_local);
+ call_count = local->call_count = AFR_COUNT (local->child_up,
+ priv->child_count);
+
+ ret = afr_lookup_xattr_req_prepare (local, this, local->xattr_req,
+ &local->loc);
if (ret) {
+ local->op_errno = -ret;
+ ret = -1;
goto out;
}
- priv = this->private;
- /*
- * Note that one local subvolume will override another here. The only
- * way to avoid that would be to retain extra information about whether
- * the previous read_child is local, and it's just not worth it. Even
- * the slowest local subvolume is far preferable to a remote one.
- */
- if (is_local) {
- child_index = (int32_t)(long)cookie;
- gf_log (this->name, GF_LOG_INFO,
- "selecting local read_child %s",
- priv->children[child_index]->name);
- priv->read_child = child_index;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_discover_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->lookup,
+ &local->loc, local->xattr_req);
+ if (!--call_count)
+ break;
+ }
}
+ return 0;
out:
- STACK_DESTROY(frame->root);
- return 0;
+ AFR_STACK_UNWIND (lookup, frame, -1, local->op_errno, 0, 0, 0, 0);
+ return 0;
}
-static void
-afr_attempt_local_discovery (xlator_t *this, int32_t child_index)
+
+int
+afr_discover (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
{
- call_frame_t *newframe = NULL;
- loc_t tmploc = {0,};
- afr_private_t *priv = this->private;
+ int op_errno = ENOMEM;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int event = 0;
- newframe = create_frame(this,this->ctx->pool);
- if (!newframe) {
- return;
- }
+ priv = this->private;
- tmploc.gfid[sizeof(tmploc.gfid)-1] = 1;
- STACK_WIND_COOKIE (newframe, afr_discovery_cbk,
- (void *)(long)child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->getxattr,
- &tmploc, GF_XATTR_PATHINFO_KEY, NULL);
-}
-
-static void
-afr_lookup_handle_success (afr_local_t *local, xlator_t *this, int32_t child_index,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *xattr,
- struct iatt *postparent)
-{
- afr_private_t *priv = this->private;
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
- if (local->success_count == 0) {
- if (local->op_errno != ESTALE) {
- local->op_ret = op_ret;
- local->op_errno = 0;
- }
- afr_lookup_handle_first_success (local, this, inode, buf);
+ if (!local->call_count) {
+ op_errno = ENOTCONN;
+ goto out;
}
- afr_lookup_update_lk_counts (local, this,
- child_index, xattr);
- afr_lookup_cache_args (local, child_index, xattr,
- buf, postparent);
+ if (__is_root_gfid (loc->inode->gfid)) {
+ if (!this->itable)
+ this->itable = loc->inode->table;
+ if (!priv->root_inode)
+ priv->root_inode = inode_ref (loc->inode);
- if (local->do_discovery && (priv->read_child == (-1))) {
- afr_attempt_local_discovery(this,child_index);
- }
+ if (priv->choose_local && !priv->did_discovery) {
+ /* Logic to detect which subvolumes of AFR are
+ local, in order to prefer them for reads
+ */
+ local->do_discovery = _gf_true;
+ priv->did_discovery = _gf_true;
+ }
+ }
- local->cont.lookup.success_children[local->success_count] = child_index;
- local->success_count++;
-}
+ local->op = GF_FOP_LOOKUP;
-int
-afr_lookup_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf, dict_t *xattr,
- struct iatt *postparent)
-{
- afr_local_t * local = NULL;
- int call_count = -1;
- int child_index = -1;
+ loc_copy (&local->loc, loc);
- child_index = (long) cookie;
+ local->inode = inode_ref (loc->inode);
- LOCK (&frame->lock);
- {
- local = frame->local;
+ if (xattr_req)
+ /* If xattr_req was null, afr_lookup_xattr_req_prepare() will
+ allocate one for us */
+ local->xattr_req = dict_ref (xattr_req);
- if (op_ret == -1) {
- afr_lookup_handle_error (local, op_ret, op_errno);
- goto unlock;
- }
- afr_lookup_handle_success (local, this, child_index, op_ret,
- op_errno, inode, buf, xattr,
- postparent);
+ if (uuid_is_null (loc->inode->gfid)) {
+ afr_discover_do (frame, this, 0);
+ return 0;
+ }
- }
-unlock:
- UNLOCK (&frame->lock);
+ afr_read_subvol_get (loc->inode, this, NULL, &event,
+ AFR_DATA_TRANSACTION);
- call_count = afr_frame_return (frame);
- if (call_count == 0) {
- afr_lookup_done (frame, this);
- }
+ if (event != local->event_generation)
+ afr_inode_refresh (frame, this, loc->inode, afr_discover_do);
+ else
+ afr_discover_do (frame, this, 0);
- return 0;
+ return 0;
+out:
+ AFR_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ return 0;
}
+
int
-afr_lookup_cont_init (afr_local_t *local, unsigned int child_count)
+afr_lookup_do (call_frame_t *frame, xlator_t *this, int err)
{
- int ret = -ENOMEM;
- struct iatt *iatts = NULL;
- int32_t *success_children = NULL;
- int32_t *sources = NULL;
- int32_t **pending_matrix = NULL;
-
- GF_ASSERT (local);
- local->cont.lookup.xattrs = GF_CALLOC (child_count,
- sizeof (*local->cont.lookup.xattr),
- gf_afr_mt_dict_t);
- if (NULL == local->cont.lookup.xattrs)
- goto out;
-
- iatts = GF_CALLOC (child_count, sizeof (*iatts), gf_afr_mt_iatt);
- if (NULL == iatts)
- goto out;
- local->cont.lookup.postparents = iatts;
+ int ret = 0;
+ int i = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
- iatts = GF_CALLOC (child_count, sizeof (*iatts), gf_afr_mt_iatt);
- if (NULL == iatts)
- goto out;
- local->cont.lookup.bufs = iatts;
+ local = frame->local;
+ priv = this->private;
- success_children = afr_children_create (child_count);
- if (NULL == success_children)
- goto out;
- local->cont.lookup.success_children = success_children;
+ if (err < 0) {
+ local->op_errno = -err;
+ ret = -1;
+ goto out;
+ }
- local->fresh_children = afr_children_create (child_count);
- if (NULL == local->fresh_children)
- goto out;
+ call_count = local->call_count = AFR_COUNT (local->child_up,
+ priv->child_count);
- sources = GF_CALLOC (sizeof (*sources), child_count, gf_afr_mt_int32_t);
- if (NULL == sources)
- goto out;
- local->cont.lookup.sources = sources;
-
- pending_matrix = afr_matrix_create (child_count, child_count);
- if (NULL == pending_matrix)
+ ret = afr_lookup_xattr_req_prepare (local, this, local->xattr_req,
+ &local->loc);
+ if (ret) {
+ local->op_errno = -ret;
+ ret = -1;
goto out;
- local->cont.lookup.pending_matrix = pending_matrix;
+ }
- ret = 0;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_lookup_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->lookup,
+ &local->loc, local->xattr_req);
+ if (!--call_count)
+ break;
+ }
+ }
+ return 0;
out:
- return ret;
+ AFR_STACK_UNWIND (lookup, frame, -1, local->op_errno, 0, 0, 0, 0);
+ return 0;
}
+/*
+ * afr_lookup()
+ *
+ * The goal here is to figure out what the element getting looked up is.
+ * i.e what is the GFID, inode type and a conservative estimate of the
+ * inode attributes are.
+ *
+ * As we lookup, operations may be underway on the entry name and the
+ * inode. In lookup() we are primarily concerned only with the entry
+ * operations. If the entry is getting unlinked or renamed, we detect
+ * what operation is underway by querying for on-going transactions and
+ * pending self-healing on the entry through xdata.
+ *
+ * If the entry is a file/dir, it may need self-heal and/or in a
+ * split-brain condition. Lookup is not the place to worry about these
+ * conditions. Outcast marking will naturally handle them in the read
+ * paths.
+ *
+ * Here is a brief goal of what we are trying to achieve:
+ *
+ * - LOOKUP on all subvolumes concurrently, querying on-going transaction
+ * and pending self-heal info from the servers.
+ *
+ * - If all servers reply the same inode type and GFID, the overall call
+ * MUST be a success.
+ *
+ * - If inode types or GFIDs mismatch, and there IS either an on-going
+ * transaction or pending self-heal, inspect what the nature of the
+ * transaction or pending heal is, and select the appropriate subvolume's
+ * reply as the winner.
+ *
+ * - If inode types or GFIDs mismatch, and there are no on-going transactions
+ * or pending self-heal on the entry name on any of the servers, fail the
+ * lookup with EIO. Something has gone wrong beyond reasonable action.
+ */
+
int
-afr_lookup (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr_req)
+afr_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- void *gfid_req = NULL;
- int ret = -1;
- int i = 0;
- int call_count = 0;
- uint64_t ctx = 0;
- int32_t op_errno = 0;
- priv = this->private;
-
- AFR_LOCAL_ALLOC_OR_GOTO (local, out);
+ afr_local_t *local = NULL;
+ int32_t op_errno = 0;
+ int event = 0;
- local->op_ret = -1;
+ if (!loc->parent) {
+ afr_discover (frame, this, loc, xattr_req);
+ return 0;
+ }
- frame->local = local;
- local->fop = GF_FOP_LOOKUP;
+ if (__is_root_gfid (loc->parent->gfid)) {
+ if (!strcmp (loc->name, GF_REPLICATE_TRASH_DIR)) {
+ op_errno = EPERM;
+ goto out;
+ }
+ }
- loc_copy (&local->loc, loc);
- ret = loc_path (&local->loc, NULL);
- if (ret < 0) {
- op_errno = EINVAL;
- goto out;
- }
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
- if (local->loc.path &&
- (strcmp (local->loc.path, "/" GF_REPLICATE_TRASH_DIR) == 0)) {
- op_errno = EPERM;
- ret = -1;
+ if (!local->call_count) {
+ op_errno = ENOTCONN;
goto out;
}
- ret = inode_ctx_get (local->loc.inode, this, &ctx);
- if (ret == 0) {
- /* lookup is a revalidate */
+ local->op = GF_FOP_LOOKUP;
- local->read_child_index = afr_inode_get_read_ctx (this,
- local->loc.inode,
- NULL);
- } else {
- LOCK (&priv->read_child_lock);
- {
- if (priv->hash_mode) {
- local->read_child_index = -1;
- }
- else {
- local->read_child_index =
- (++priv->read_child_rr) %
- (priv->child_count);
- }
- }
- UNLOCK (&priv->read_child_lock);
- local->cont.lookup.fresh_lookup = _gf_true;
- }
+ loc_copy (&local->loc, loc);
- local->child_up = memdup (priv->child_up,
- sizeof (*local->child_up) * priv->child_count);
- if (NULL == local->child_up) {
- op_errno = ENOMEM;
- goto out;
- }
+ local->inode = inode_ref (loc->inode);
- ret = afr_lookup_cont_init (local, priv->child_count);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ if (xattr_req)
+ /* If xattr_req was null, afr_lookup_xattr_req_prepare() will
+ allocate one for us */
+ local->xattr_req = dict_ref (xattr_req);
- local->call_count = afr_up_children_count (local->child_up,
- priv->child_count);
- call_count = local->call_count;
- if (local->call_count == 0) {
- ret = -1;
- op_errno = ENOTCONN;
- goto out;
- }
+ afr_read_subvol_get (loc->parent, this, NULL, &event,
+ AFR_DATA_TRANSACTION);
- /* By default assume ENOTCONN. On success it will be set to 0. */
- local->op_errno = ENOTCONN;
+ if (event != local->event_generation)
+ afr_inode_refresh (frame, this, loc->parent, afr_lookup_do);
+ else
+ afr_lookup_do (frame, this, 0);
- ret = dict_get_int32 (xattr_req, "attempt-self-heal",
- &local->attempt_self_heal);
- dict_del (xattr_req, "attempt-self-heal");
+ return 0;
+out:
+ AFR_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
- ret = dict_get_int32 (xattr_req, "foreground-self-heal",
- &local->foreground_self_heal);
- dict_del (xattr_req, "foreground-self-heal");
+ return 0;
+}
- ret = afr_lookup_xattr_req_prepare (local, this, xattr_req, &local->loc,
- &gfid_req);
- if (ret) {
- local->op_errno = -ret;
- goto out;
- }
- afr_lookup_save_gfid (local->cont.lookup.gfid_req, gfid_req,
- &local->loc);
- local->fop = GF_FOP_LOOKUP;
- if (priv->choose_local && !priv->did_discovery) {
- if (gfid_req && __is_root_gfid(gfid_req)) {
- local->do_discovery = _gf_true;
- priv->did_discovery = _gf_true;
- }
- }
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_lookup_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->lookup,
- &local->loc, local->xattr_req);
- if (!--call_count)
- break;
- }
+
+/* {{{ open */
+
+afr_fd_ctx_t *
+__afr_fd_ctx_get (fd_t *fd, xlator_t *this)
+{
+ uint64_t ctx = 0;
+ int ret = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ ret = __fd_ctx_get (fd, this, &ctx);
+
+ if (ret < 0) {
+ ret = __afr_fd_ctx_set (this, fd);
+ if (ret < 0)
+ goto out;
+
+ ret = __fd_ctx_get (fd, this, &ctx);
+ if (ret < 0)
+ goto out;
}
- ret = 0;
+ fd_ctx = (afr_fd_ctx_t *)(long) ctx;
out:
- if (ret)
- AFR_STACK_UNWIND (lookup, frame, -1, op_errno,
- NULL, NULL, NULL, NULL);
-
- return 0;
+ return fd_ctx;
}
-/* {{{ open */
+afr_fd_ctx_t *
+afr_fd_ctx_get (fd_t *fd, xlator_t *this)
+{
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ LOCK(&fd->lock);
+ {
+ fd_ctx = __afr_fd_ctx_get (fd, this);
+ }
+ UNLOCK(&fd->lock);
+
+ return fd_ctx;
+}
+
int
__afr_fd_ctx_set (xlator_t *this, fd_t *fd)
@@ -2559,6 +1911,7 @@ __afr_fd_ctx_set (xlator_t *this, fd_t *fd)
int ret = -1;
uint64_t ctx = 0;
afr_fd_ctx_t * fd_ctx = NULL;
+ int i = 0;
VALIDATE_OR_GOTO (this->private, out);
VALIDATE_OR_GOTO (fd, out);
@@ -2577,21 +1930,15 @@ __afr_fd_ctx_set (xlator_t *this, fd_t *fd)
goto out;
}
- fd_ctx->pre_op_done = GF_CALLOC (sizeof (*fd_ctx->pre_op_done),
- priv->child_count,
- gf_afr_mt_char);
- if (!fd_ctx->pre_op_done) {
- ret = -ENOMEM;
- goto out;
- }
-
- fd_ctx->pre_op_piggyback = GF_CALLOC (sizeof (*fd_ctx->pre_op_piggyback),
- priv->child_count,
- gf_afr_mt_char);
- if (!fd_ctx->pre_op_piggyback) {
- ret = -ENOMEM;
- goto out;
- }
+ for (i = 0; i < AFR_NUM_CHANGE_LOGS; i++) {
+ fd_ctx->pre_op_done[i] = GF_CALLOC (sizeof (*fd_ctx->pre_op_done[i]),
+ priv->child_count,
+ gf_afr_mt_int32_t);
+ if (!fd_ctx->pre_op_done[i]) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
fd_ctx->opened_on = GF_CALLOC (sizeof (*fd_ctx->opened_on),
priv->child_count,
@@ -2601,6 +1948,13 @@ __afr_fd_ctx_set (xlator_t *this, fd_t *fd)
goto out;
}
+ for (i = 0; i < priv->child_count; i++) {
+ if (fd_is_anonymous (fd))
+ fd_ctx->opened_on[i] = AFR_FD_OPENED;
+ else
+ fd_ctx->opened_on[i] = AFR_FD_NOT_OPENED;
+ }
+
fd_ctx->lock_piggyback = GF_CALLOC (sizeof (*fd_ctx->lock_piggyback),
priv->child_count,
gf_afr_mt_char);
@@ -2617,20 +1971,7 @@ __afr_fd_ctx_set (xlator_t *this, fd_t *fd)
goto out;
}
- fd_ctx->up_count = priv->up_count;
- fd_ctx->down_count = priv->down_count;
-
- fd_ctx->locked_on = GF_CALLOC (sizeof (*fd_ctx->locked_on),
- priv->child_count,
- gf_afr_mt_char);
- if (!fd_ctx->locked_on) {
- ret = -ENOMEM;
- goto out;
- }
-
pthread_mutex_init (&fd_ctx->delay_lock, NULL);
- INIT_LIST_HEAD (&fd_ctx->entries);
- fd_ctx->call_child = -1;
INIT_LIST_HEAD (&fd_ctx->eager_locked);
@@ -2660,32 +2001,31 @@ afr_fd_ctx_set (xlator_t *this, fd_t *fd)
/* {{{ flush */
int
-afr_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+afr_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_local_t * local = NULL;
- int call_count = -1;
+ afr_local_t *local = NULL;
+ int call_count = -1;
local = frame->local;
LOCK (&frame->lock);
{
if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- }
- local->success_count++;
- }
-
- local->op_errno = op_errno;
+ local->op_ret = op_ret;
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref (xdata);
+ } else {
+ local->op_errno = op_errno;
+ }
}
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
if (call_count == 0)
- AFR_STACK_UNWIND(flush, frame, local->op_ret,
- local->op_errno, NULL);
+ AFR_STACK_UNWIND (flush, frame, local->op_ret,
+ local->op_errno, local->xdata_rsp);
return 0;
}
@@ -2708,7 +2048,7 @@ afr_flush_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->flush,
- local->fd, NULL);
+ local->fd, xdata);
if (!--call_count)
break;
@@ -2721,40 +2061,30 @@ afr_flush_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
int
afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- afr_private_t *priv = NULL;
afr_local_t *local = NULL;
call_stub_t *stub = NULL;
- int ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ int op_errno = ENOMEM;
- priv = this->private;
-
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
- ret = afr_local_init(local, priv, &op_errno);
- if (ret < 0)
+ if (!local->call_count) {
+ op_errno = ENOTCONN;
goto out;
+ }
local->fd = fd_ref(fd);
+
stub = fop_flush_stub (frame, afr_flush_wrapper, fd, xdata);
- if (!stub) {
- ret = -1;
- op_errno = ENOMEM;
+ if (!stub)
goto out;
- }
afr_delayed_changelog_wake_resume (this, fd, stub);
- ret = 0;
+ return 0;
out:
- if (ret < 0)
- AFR_STACK_UNWIND(flush, frame, -1, op_errno, NULL);
-
+ AFR_STACK_UNWIND (flush, frame, -1, op_errno, NULL);
return 0;
}
@@ -2767,6 +2097,7 @@ afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd)
uint64_t ctx = 0;
afr_fd_ctx_t *fd_ctx = NULL;
int ret = 0;
+ int i = 0;
ret = fd_ctx_get (fd, this, &ctx);
if (ret < 0)
@@ -2775,13 +2106,11 @@ afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd)
fd_ctx = (afr_fd_ctx_t *)(long) ctx;
if (fd_ctx) {
- GF_FREE (fd_ctx->pre_op_done);
+ for (i = 0; i < AFR_NUM_CHANGE_LOGS; i++)
+ GF_FREE (fd_ctx->pre_op_done[i]);
GF_FREE (fd_ctx->opened_on);
- GF_FREE (fd_ctx->locked_on);
-
- GF_FREE (fd_ctx->pre_op_piggyback);
GF_FREE (fd_ctx->lock_piggyback);
GF_FREE (fd_ctx->lock_acquired);
@@ -2799,24 +2128,8 @@ out:
int
afr_release (xlator_t *this, fd_t *fd)
{
- afr_locked_fd_t *locked_fd = NULL;
- afr_locked_fd_t *tmp = NULL;
- afr_private_t *priv = NULL;
-
- priv = this->private;
-
afr_cleanup_fd_ctx (this, fd);
- list_for_each_entry_safe (locked_fd, tmp, &priv->saved_fds,
- list) {
-
- if (locked_fd->fd == fd) {
- list_del_init (&locked_fd->list);
- GF_FREE (locked_fd);
- }
-
- }
-
return 0;
}
@@ -2841,36 +2154,38 @@ afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
afr_local_t *local = NULL;
int call_count = -1;
int child_index = (long) cookie;
- int read_child = 0;
+ int read_subvol = 0;
call_stub_t *stub = NULL;
local = frame->local;
- read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
+ read_subvol = afr_data_subvol_get (local->inode, this, 0, 0);
LOCK (&frame->lock);
{
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
-
if (op_ret == 0) {
- local->op_ret = 0;
+ if (local->op_ret == -1) {
+ local->op_ret = 0;
- if (local->success_count == 0) {
local->cont.inode_wfop.prebuf = *prebuf;
local->cont.inode_wfop.postbuf = *postbuf;
+
+ if (xdata)
+ local->xdata_rsp = dict_ref (xdata);
}
- if (child_index == read_child) {
+ if (child_index == read_subvol) {
local->cont.inode_wfop.prebuf = *prebuf;
local->cont.inode_wfop.postbuf = *postbuf;
+ if (xdata) {
+ if (local->xdata_rsp)
+ dict_unref (local->xdata_rsp);
+ local->xdata_rsp = dict_ref (xdata);
+ }
}
-
- local->success_count++;
- }
-
- local->op_errno = op_errno;
+ } else {
+ local->op_errno = op_errno;
+ }
}
UNLOCK (&frame->lock);
@@ -2890,7 +2205,7 @@ afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->op_ret, local->op_errno,
&local->cont.inode_wfop.prebuf,
&local->cont.inode_wfop.postbuf,
- xdata);
+ local->xdata_rsp);
if (!stub) {
AFR_STACK_UNWIND (fsync, frame, -1, ENOMEM, 0, 0, 0);
return 0;
@@ -2910,37 +2225,35 @@ afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
-afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t datasync, dict_t *xdata)
+afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
{
- afr_private_t *priv = NULL;
+ afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int ret = -1;
int i = 0;
int32_t call_count = 0;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
+ int32_t op_errno = ENOMEM;
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+ priv = this->private;
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
call_count = local->call_count;
+ if (!call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
- local->fd = fd_ref (fd);
+ local->fd = fd_ref (fd);
if (afr_fd_has_witnessed_unstable_write (this, fd)) {
/* don't care. we only wanted to CLEAR the bit */
}
+ local->inode = inode_ref (fd->inode);
+
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND_COOKIE (frame, afr_fsync_cbk,
@@ -2953,10 +2266,10 @@ afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd,
}
}
- ret = 0;
+ return 0;
out:
- if (ret < 0)
- AFR_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL, NULL);
+ AFR_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL, NULL);
+
return 0;
}
@@ -2964,10 +2277,9 @@ out:
/* {{{ fsync */
-int32_t
-afr_fsyncdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xdata)
+int
+afr_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
int call_count = -1;
@@ -2976,10 +2288,13 @@ afr_fsyncdir_cbk (call_frame_t *frame, void *cookie,
LOCK (&frame->lock);
{
- if (op_ret == 0)
+ if (op_ret == 0) {
local->op_ret = 0;
-
- local->op_errno = op_errno;
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref (xdata);
+ } else {
+ local->op_errno = op_errno;
+ }
}
UNLOCK (&frame->lock);
@@ -2987,37 +2302,33 @@ afr_fsyncdir_cbk (call_frame_t *frame, void *cookie,
if (call_count == 0)
AFR_STACK_UNWIND (fsyncdir, frame, local->op_ret,
- local->op_errno, xdata);
+ local->op_errno, local->xdata_rsp);
return 0;
}
-int32_t
-afr_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t datasync, dict_t *xdata)
+int
+afr_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
{
- afr_private_t *priv = NULL;
+ afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int ret = -1;
int i = 0;
int32_t call_count = 0;
- int32_t op_errno = 0;
+ int32_t op_errno = ENOMEM;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ priv = this->private;
- priv = this->private;
-
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
call_count = local->call_count;
+ if (!call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
@@ -3030,10 +2341,10 @@ afr_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
}
}
- ret = 0;
+ return 0;
out:
- if (ret < 0)
- AFR_STACK_UNWIND (fsyncdir, frame, -1, op_errno, NULL);
+ AFR_STACK_UNWIND (fsyncdir, frame, -1, op_errno, NULL);
+
return 0;
}
@@ -3056,6 +2367,10 @@ afr_xattrop_cbk (call_frame_t *frame, void *cookie,
if (op_ret == 0) {
if (!local->cont.xattrop.xattr)
local->cont.xattrop.xattr = dict_ref (xattr);
+
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref (xdata);
+
local->op_ret = 0;
}
@@ -3067,7 +2382,7 @@ afr_xattrop_cbk (call_frame_t *frame, void *cookie,
if (call_count == 0)
AFR_STACK_UNWIND (xattrop, frame, local->op_ret, local->op_errno,
- local->cont.xattrop.xattr, xdata);
+ local->cont.xattrop.xattr, local->xdata_rsp);
return 0;
}
@@ -3079,25 +2394,21 @@ afr_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int ret = -1;
int i = 0;
int32_t call_count = 0;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ int32_t op_errno = ENOMEM;
priv = this->private;
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
call_count = local->call_count;
+ if (!call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
@@ -3110,10 +2421,10 @@ afr_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
}
}
- ret = 0;
+ return 0;
out:
- if (ret < 0)
- AFR_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL, NULL);
+ AFR_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL, NULL);
+
return 0;
}
@@ -3138,6 +2449,8 @@ afr_fxattrop_cbk (call_frame_t *frame, void *cookie,
if (!local->cont.fxattrop.xattr)
local->cont.fxattrop.xattr = dict_ref (xattr);
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref (xdata);
local->op_ret = 0;
}
@@ -3149,7 +2462,7 @@ afr_fxattrop_cbk (call_frame_t *frame, void *cookie,
if (call_count == 0)
AFR_STACK_UNWIND (fxattrop, frame, local->op_ret, local->op_errno,
- local->cont.fxattrop.xattr, xdata);
+ local->cont.fxattrop.xattr, local->xdata_rsp);
return 0;
}
@@ -3161,25 +2474,21 @@ afr_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int ret = -1;
int i = 0;
int32_t call_count = 0;
int32_t op_errno = 0;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
priv = this->private;
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
goto out;
call_count = local->call_count;
+ if (!call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
@@ -3192,10 +2501,10 @@ afr_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
}
}
- ret = 0;
+ return 0;
out:
- if (ret < 0)
- AFR_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL, NULL);
+ AFR_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL, NULL);
+
return 0;
}
@@ -3203,8 +2512,8 @@ out:
int32_t
-afr_inodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
+afr_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
@@ -3238,25 +2547,21 @@ afr_inodelk (call_frame_t *frame, xlator_t *this,
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int ret = -1;
int i = 0;
int32_t call_count = 0;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ int32_t op_errno = ENOMEM;
priv = this->private;
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
goto out;
call_count = local->call_count;
+ if (!call_count) {
+ op_errno = ENOMEM;
+ goto out;
+ }
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
@@ -3270,18 +2575,17 @@ afr_inodelk (call_frame_t *frame, xlator_t *this,
}
}
- ret = 0;
+ return 0;
out:
- if (ret < 0)
- AFR_STACK_UNWIND (inodelk, frame, -1, op_errno, NULL);
+ AFR_STACK_UNWIND (inodelk, frame, -1, op_errno, NULL);
+
return 0;
}
int32_t
-afr_finodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xdata)
+afr_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
@@ -3309,31 +2613,26 @@ afr_finodelk_cbk (call_frame_t *frame, void *cookie,
int32_t
-afr_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *flock,
- dict_t *xdata)
+afr_finodelk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int ret = -1;
int i = 0;
int32_t call_count = 0;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ int32_t op_errno = ENOMEM;
priv = this->private;
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
call_count = local->call_count;
+ if (!call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
@@ -3347,10 +2646,10 @@ afr_finodelk (call_frame_t *frame, xlator_t *this,
}
}
- ret = 0;
+ return 0;
out:
- if (ret < 0)
- AFR_STACK_UNWIND (finodelk, frame, -1, op_errno, NULL);
+ AFR_STACK_UNWIND (finodelk, frame, -1, op_errno, NULL);
+
return 0;
}
@@ -3383,33 +2682,28 @@ afr_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
-int32_t
-afr_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc,
- const char *basename, entrylk_cmd cmd, entrylk_type type,
- dict_t *xdata)
+int
+afr_entrylk (call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int ret = -1;
int i = 0;
int32_t call_count = 0;
int32_t op_errno = 0;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
priv = this->private;
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
call_count = local->call_count;
+ if (!call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
@@ -3423,18 +2717,18 @@ afr_entrylk (call_frame_t *frame, xlator_t *this,
}
}
- ret = 0;
+ return 0;
out:
- if (ret < 0)
- AFR_STACK_UNWIND (entrylk, frame, -1, op_errno, NULL);
+ AFR_STACK_UNWIND (entrylk, frame, -1, op_errno, NULL);
+
return 0;
}
-int32_t
-afr_fentrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
+int
+afr_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
@@ -3461,33 +2755,28 @@ afr_fentrylk_cbk (call_frame_t *frame, void *cookie,
}
-int32_t
-afr_fentrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd,
- const char *basename, entrylk_cmd cmd,
- entrylk_type type, dict_t *xdata)
+int
+afr_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int ret = -1;
int i = 0;
int32_t call_count = 0;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ int32_t op_errno = ENOMEM;
priv = this->private;
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
call_count = local->call_count;
+ if (!call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
@@ -3501,82 +2790,85 @@ afr_fentrylk (call_frame_t *frame, xlator_t *this,
}
}
- ret = 0;
+ return 0;
out:
- if (ret < 0)
- AFR_STACK_UNWIND (fentrylk, frame, -1, op_errno, NULL);
+ AFR_STACK_UNWIND (fentrylk, frame, -1, op_errno, NULL);
+
return 0;
}
-int32_t
-afr_statfs_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct statvfs *statvfs, dict_t *xdata)
+
+int
+afr_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct statvfs *statvfs, dict_t *xdata)
{
afr_local_t *local = NULL;
int call_count = 0;
+ struct statvfs *buf = NULL;
LOCK (&frame->lock);
{
local = frame->local;
- if (op_ret == 0) {
- local->op_ret = op_ret;
-
- if (local->cont.statfs.buf_set) {
- if (statvfs->f_bavail < local->cont.statfs.buf.f_bavail)
- local->cont.statfs.buf = *statvfs;
- } else {
- local->cont.statfs.buf = *statvfs;
- local->cont.statfs.buf_set = 1;
- }
- }
-
- if (op_ret == -1)
+ if (op_ret != 0) {
local->op_errno = op_errno;
+ goto unlock;
+ }
+ local->op_ret = op_ret;
+
+ buf = &local->cont.statfs.buf;
+ if (local->cont.statfs.buf_set) {
+ if (statvfs->f_bavail < buf->f_bavail) {
+ *buf = *statvfs;
+ if (xdata) {
+ if (local->xdata_rsp)
+ dict_unref (local->xdata_rsp);
+ local->xdata_rsp = dict_ref (xdata);
+ }
+ }
+ } else {
+ *buf = *statvfs;
+ local->cont.statfs.buf_set = 1;
+ if (xdata)
+ local->xdata_rsp = dict_ref (xdata);
+ }
}
+unlock:
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
if (call_count == 0)
AFR_STACK_UNWIND (statfs, frame, local->op_ret, local->op_errno,
- &local->cont.statfs.buf, xdata);
+ &local->cont.statfs.buf, local->xdata_rsp);
return 0;
}
-int32_t
-afr_statfs (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xdata)
+int
+afr_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- int child_count = 0;
afr_local_t * local = NULL;
+ afr_private_t *priv = NULL;
int i = 0;
- int ret = -1;
int call_count = 0;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (loc, out);
-
- priv = this->private;
- child_count = priv->child_count;
+ int32_t op_errno = ENOMEM;
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+ priv = this->private;
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
call_count = local->call_count;
+ if (!call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
- for (i = 0; i < child_count; i++) {
+ for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND (frame, afr_statfs_cbk,
priv->children[i],
@@ -3587,10 +2879,10 @@ afr_statfs (call_frame_t *frame, xlator_t *this,
}
}
- ret = 0;
+ return 0;
out:
- if (ret < 0)
- AFR_STACK_UNWIND (statfs, frame, -1, op_errno, NULL, NULL);
+ AFR_STACK_UNWIND (statfs, frame, -1, op_errno, NULL, NULL);
+
return 0;
}
@@ -3699,21 +2991,6 @@ afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
AFR_STACK_UNWIND (lk, frame, -1, ENOTCONN,
&local->cont.lk.ret_flock, NULL);
} else {
- /* locking has succeeded on all nodes that are up */
-
- /* temporarily
- ret = afr_mark_locked_nodes (this, local->fd,
- local->cont.lk.locked_nodes);
- if (ret)
- gf_log (this->name, GF_LOG_DEBUG,
- "Could not save locked nodes info in fdctx");
-
- ret = afr_save_locked_fd (this, local->fd);
- if (ret)
- gf_log (this->name, GF_LOG_DEBUG,
- "Could not save locked fd");
-
- */
AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
&local->cont.lk.ret_flock, NULL);
}
@@ -3729,20 +3006,12 @@ afr_lk (call_frame_t *frame, xlator_t *this,
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
int i = 0;
- int32_t op_errno = 0;
- int ret = -1;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ int32_t op_errno = ENOMEM;
priv = this->private;
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
goto out;
local->cont.lk.locked_nodes = GF_CALLOC (priv->child_count,
@@ -3764,28 +3033,16 @@ afr_lk (call_frame_t *frame, xlator_t *this,
priv->children[i]->fops->lk,
fd, cmd, flock, xdata);
- ret = 0;
+ return 0;
out:
- if (ret < 0)
- AFR_STACK_UNWIND (lk, frame, -1, op_errno, NULL, NULL);
+ AFR_STACK_UNWIND (lk, frame, -1, op_errno, NULL, NULL);
+
return 0;
}
int
afr_forget (xlator_t *this, inode_t *inode)
{
- uint64_t ctx_addr = 0;
- afr_inode_ctx_t *ctx = NULL;
-
- inode_ctx_get (inode, this, &ctx_addr);
-
- if (!ctx_addr)
- goto out;
-
- ctx = (afr_inode_ctx_t *)(long)ctx_addr;
- GF_FREE (ctx->fresh_children);
- GF_FREE (ctx);
-out:
return 0;
}
@@ -3805,7 +3062,6 @@ afr_priv_dump (xlator_t *this)
snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name);
gf_proc_dump_add_section(key_prefix);
gf_proc_dump_write("child_count", "%u", priv->child_count);
- gf_proc_dump_write("read_child_rr", "%u", priv->read_child_rr);
for (i = 0; i < priv->child_count; i++) {
sprintf (key, "child_up[%d]", i);
gf_proc_dump_write(key, "%d", priv->child_up[i]);
@@ -3862,7 +3118,7 @@ afr_notify (xlator_t *this, int32_t event,
int idx = -1;
int ret = -1;
int call_psh = 0;
- int up_child = AFR_ALL_CHILDREN;
+ int up_child = -1;
dict_t *input = NULL;
dict_t *output = NULL;
@@ -3914,6 +3170,7 @@ afr_notify (xlator_t *this, int32_t event,
*/
if (priv->child_up[idx] != 1) {
priv->up_count++;
+ priv->event_generation++;
}
priv->child_up[idx] = 1;
@@ -3953,6 +3210,7 @@ afr_notify (xlator_t *this, int32_t event,
*/
if (priv->child_up[idx] == 1) {
priv->down_count++;
+ priv->event_generation++;
}
priv->child_up[idx] = 0;
@@ -4019,8 +3277,7 @@ afr_notify (xlator_t *this, int32_t event,
LOCK (&priv->lock);
{
- up_children = afr_up_children_count (priv->child_up,
- priv->child_count);
+ up_children = AFR_COUNT (priv->child_up, priv->child_count);
for (i = 0; i < priv->child_count; i++) {
if (priv->last_event[i] == GF_EVENT_CHILD_UP) {
event = GF_EVENT_CHILD_UP;
@@ -4040,39 +3297,23 @@ afr_notify (xlator_t *this, int32_t event,
ret = 0;
if (propagate)
ret = default_notify (this, event, data);
- if (call_psh && priv->shd.iamshd)
- afr_proactive_self_heal ((void*) (long) up_child);
+ if (call_psh && priv->shd.iamshd) {
+ afr_selfheal_childup (this, up_child);
+ }
out:
return ret;
}
-int
-afr_first_up_child (unsigned char *child_up, size_t child_count)
-{
- int ret = -1;
- int i = 0;
-
- GF_ASSERT (child_up);
-
- for (i = 0; i < child_count; i++) {
- if (child_up[i]) {
- ret = i;
- break;
- }
- }
-
- return ret;
-}
int
afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno)
{
- int ret = -1;
-
local->op_ret = -1;
local->op_errno = EUCLEAN;
+ syncbarrier_init (&local->barrier);
+
local->child_up = GF_CALLOC (priv->child_count,
sizeof (*local->child_up),
gf_afr_mt_char);
@@ -4084,38 +3325,42 @@ afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno)
memcpy (local->child_up, priv->child_up,
sizeof (*local->child_up) * priv->child_count);
- local->call_count = afr_up_children_count (local->child_up,
- priv->child_count);
+ local->call_count = AFR_COUNT (local->child_up, priv->child_count);
if (local->call_count == 0) {
gf_log (THIS->name, GF_LOG_INFO, "no subvolumes up");
if (op_errno)
*op_errno = ENOTCONN;
goto out;
}
+ local->event_generation = priv->event_generation;
- local->child_errno = GF_CALLOC (priv->child_count,
- sizeof (*local->child_errno),
- gf_afr_mt_int32_t);
- if (!local->child_errno) {
- if (op_errno)
- *op_errno = ENOMEM;
- goto out;
- }
+ local->read_attempted = GF_CALLOC (priv->child_count, sizeof (char),
+ gf_afr_mt_char);
+ if (!local->read_attempted) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
- local->transaction.postop_piggybacked = GF_CALLOC (priv->child_count,
- sizeof (int),
- gf_afr_mt_int32_t);
- if (!local->transaction.postop_piggybacked) {
- if (op_errno)
- *op_errno = ENOMEM;
- goto out;
- }
+ local->readable = GF_CALLOC (priv->child_count, sizeof (char),
+ gf_afr_mt_char);
+ if (!local->readable) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
- local->append_write = _gf_false;
+ local->replies = GF_CALLOC(priv->child_count, sizeof(*local->replies),
+ gf_afr_mt_reply_t);
+ if (!local->replies) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
- ret = 0;
+ return 0;
out:
- return ret;
+ return -1;
}
int
@@ -4218,13 +3463,11 @@ afr_transaction_local_init (afr_local_t *local, xlator_t *this)
}
ret = -ENOMEM;
- child_up_count = afr_up_children_count (local->child_up,
- priv->child_count);
+ child_up_count = AFR_COUNT (local->child_up, priv->child_count);
if (priv->optimistic_change_log && child_up_count == priv->child_count)
local->optimistic_change_log = 1;
- local->first_up_child = afr_first_up_child (local->child_up,
- priv->child_count);
+ local->pre_op_compat = priv->pre_op_compat;
local->transaction.eager_lock =
GF_CALLOC (sizeof (*local->transaction.eager_lock),
@@ -4234,26 +3477,29 @@ afr_transaction_local_init (afr_local_t *local, xlator_t *this)
if (!local->transaction.eager_lock)
goto out;
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children)
- goto out;
-
local->transaction.pre_op = GF_CALLOC (sizeof (*local->transaction.pre_op),
priv->child_count,
gf_afr_mt_char);
if (!local->transaction.pre_op)
goto out;
+ local->transaction.fop_subvols = GF_CALLOC (sizeof (*local->transaction.fop_subvols),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!local->transaction.fop_subvols)
+ goto out;
+
+ local->transaction.failed_subvols = GF_CALLOC (sizeof (*local->transaction.failed_subvols),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!local->transaction.failed_subvols)
+ goto out;
+
local->pending = afr_matrix_create (priv->child_count,
AFR_NUM_CHANGE_LOGS);
if (!local->pending)
goto out;
- local->transaction.txn_changelog = afr_matrix_create (priv->child_count,
- AFR_NUM_CHANGE_LOGS);
- if (!local->transaction.txn_changelog)
- goto out;
-
INIT_LIST_HEAD (&local->transaction.eager_locked);
ret = 0;
@@ -4261,86 +3507,6 @@ out:
return ret;
}
-void
-afr_reset_children (int32_t *fresh_children, int32_t child_count)
-{
- unsigned int i = 0;
- for (i = 0; i < child_count; i++)
- fresh_children[i] = -1;
-}
-
-int32_t*
-afr_children_create (int32_t child_count)
-{
- int32_t *children = NULL;
- int i = 0;
-
- GF_ASSERT (child_count > 0);
-
- children = GF_CALLOC (child_count, sizeof (*children),
- gf_afr_mt_int32_t);
- if (NULL == children)
- goto out;
- for (i = 0; i < child_count; i++)
- children[i] = -1;
-out:
- return children;
-}
-
-void
-afr_children_add_child (int32_t *children, int32_t child,
- int32_t child_count)
-{
- gf_boolean_t child_found = _gf_false;
- int i = 0;
-
- for (i = 0; i < child_count; i++) {
- if (children[i] == -1)
- break;
- if (children[i] == child) {
- child_found = _gf_true;
- break;
- }
- }
-
- if (!child_found) {
- GF_ASSERT (i < child_count);
- children[i] = child;
- }
-}
-
-void
-afr_children_rm_child (int32_t *children, int32_t child, int32_t child_count)
-{
- int i = 0;
-
- GF_ASSERT ((child >= 0) && (child < child_count));
- for (i = 0; i < child_count; i++) {
- if (children[i] == -1)
- break;
- if (children[i] == child) {
- if (i != (child_count - 1))
- memmove (children + i, children + i + 1,
- sizeof (*children)*(child_count - i - 1));
- children[child_count - 1] = -1;
- break;
- }
- }
-}
-
-int
-afr_get_children_count (int32_t *children, unsigned int child_count)
-{
- int count = 0;
- int i = 0;
-
- for (i = 0; i < child_count; i++) {
- if (children[i] == -1)
- break;
- count++;
- }
- return count;
-}
void
afr_set_low_priority (call_frame_t *frame)
@@ -4348,38 +3514,6 @@ afr_set_low_priority (call_frame_t *frame)
frame->root->pid = LOW_PRIO_PROC_PID;
}
-int
-afr_child_fd_ctx_set (xlator_t *this, fd_t *fd, int32_t child,
- int flags)
-{
- int ret = 0;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
-
- GF_ASSERT (fd && fd->inode);
- ret = afr_fd_ctx_set (this, fd);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "could not set fd ctx for fd=%p", fd);
- goto out;
- }
-
- ret = fd_ctx_get (fd, this, &ctx);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "could not get fd ctx for fd=%p", fd);
- goto out;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
- fd_ctx->opened_on[child] = AFR_FD_OPENED;
- if (!IA_ISDIR (fd->inode->ia_type)) {
- fd_ctx->flags = flags;
- }
- ret = 0;
-out:
- return ret;
-}
gf_boolean_t
afr_have_quorum (char *logname, afr_private_t *priv)
@@ -4426,33 +3560,6 @@ afr_priv_destroy (afr_private_t *priv)
if (!priv)
goto out;
inode_unref (priv->root_inode);
- GF_FREE (priv->shd.pos);
- GF_FREE (priv->shd.pending);
- GF_FREE (priv->shd.inprogress);
-// for (i = 0; i < priv->child_count; i++)
-// if (priv->shd.timer && priv->shd.timer[i])
-// gf_timer_call_cancel (this->ctx, priv->shd.timer[i]);
- GF_FREE (priv->shd.timer);
-
- if (priv->shd.healed)
- eh_destroy (priv->shd.healed);
-
- if (priv->shd.heal_failed)
- eh_destroy (priv->shd.heal_failed);
-
- if (priv->shd.split_brain)
- eh_destroy (priv->shd.split_brain);
-
- for (i = 0; i < priv->child_count; i++)
- {
- if (priv->shd.statistics[i])
- eh_destroy (priv->shd.statistics[i]);
- }
-
- GF_FREE (priv->shd.statistics);
-
- GF_FREE (priv->shd.crawl_events);
-
GF_FREE (priv->last_event);
if (priv->pending_key) {
for (i = 0; i < priv->child_count; i++)
@@ -4462,8 +3569,7 @@ afr_priv_destroy (afr_private_t *priv)
GF_FREE (priv->children);
GF_FREE (priv->child_up);
LOCK_DESTROY (&priv->lock);
- LOCK_DESTROY (&priv->read_child_lock);
- pthread_mutex_destroy (&priv->mutex);
+
GF_FREE (priv);
out:
return;
@@ -4480,124 +3586,21 @@ xlator_subvolume_count (xlator_t *this)
return i;
}
-inline gf_boolean_t
-afr_is_errno_set (int *child_errno, int child)
-{
- return child_errno[child];
-}
-
-inline gf_boolean_t
-afr_is_errno_unset (int *child_errno, int child)
-{
- return !afr_is_errno_set (child_errno, child);
-}
-
-void
-afr_prepare_new_entry_pending_matrix (int32_t **pending,
- gf_boolean_t (*is_pending) (int *, int),
- int *ctx, struct iatt *buf,
- unsigned int child_count)
-{
- int midx = 0;
- int idx = 0;
- int i = 0;
-
- midx = afr_index_for_transaction_type (AFR_METADATA_TRANSACTION);
- if (IA_ISDIR (buf->ia_type))
- idx = afr_index_for_transaction_type (AFR_ENTRY_TRANSACTION);
- else if (IA_ISREG (buf->ia_type))
- idx = afr_index_for_transaction_type (AFR_DATA_TRANSACTION);
- else
- idx = -1;
- for (i = 0; i < child_count; i++) {
- if (is_pending (ctx, i)) {
- pending[i][midx] = hton32 (1);
- if (idx == -1)
- continue;
- pending[i][idx] = hton32 (1);
- }
- }
-}
-
-gf_boolean_t
-afr_is_fd_fixable (fd_t *fd)
-{
- if (!fd || !fd->inode)
- return _gf_false;
- else if (fd_is_anonymous (fd))
- return _gf_false;
- else if (uuid_is_null (fd->inode->gfid))
- return _gf_false;
-
- return _gf_true;
-}
void
afr_handle_open_fd_count (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
- inode_t *inode = NULL;
- afr_inode_ctx_t *ctx = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
local = frame->local;
- if (local->fd)
- inode = local->fd->inode;
- else
- inode = local->loc.inode;
-
- if (!inode)
- return;
-
- LOCK (&inode->lock);
- {
- ctx = __afr_inode_ctx_get (inode, this);
- ctx->open_fd_count = local->open_fd_count;
- }
- UNLOCK (&inode->lock);
-}
-
-int
-afr_initialise_statistics (xlator_t *this)
-{
- afr_private_t *priv = NULL;
- int ret = -1;
- int i = 0;
- int child_count = 0;
- eh_t *stats_per_brick = NULL;
- shd_crawl_event_t ***shd_crawl_events = NULL;
- priv = this->private;
-
- priv->shd.statistics = GF_CALLOC (sizeof(eh_t *), priv->child_count,
- gf_common_mt_eh_t);
- if (!priv->shd.statistics) {
- ret = -1;
- goto out;
- }
- child_count = priv->child_count;
- for (i=0; i < child_count ; i++) {
- stats_per_brick = eh_new (AFR_STATISTICS_HISTORY_SIZE,
- _gf_false,
- _destroy_crawl_event_data);
- if (!stats_per_brick) {
- ret = -1;
- goto out;
- }
- priv->shd.statistics[i] = stats_per_brick;
-
- }
-
- shd_crawl_events = (shd_crawl_event_t***)(&priv->shd.crawl_events);
- *shd_crawl_events = GF_CALLOC (sizeof(shd_crawl_event_t*),
- priv->child_count,
- gf_afr_mt_shd_crawl_event_t);
+ if (!local->fd)
+ return;
- if (!priv->shd.crawl_events) {
- ret = -1;
- goto out;
- }
- ret = 0;
-out:
- return ret;
+ fd_ctx = afr_fd_ctx_get (local->fd, this);
+ if (!fd_ctx)
+ return;
+ fd_ctx->open_fd_count = local->open_fd_count;
}
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
index 689dd84..fa1da39 100644
--- a/xlators/cluster/afr/src/afr-dir-read.c
+++ b/xlators/cluster/afr/src/afr-dir-read.c
@@ -37,177 +37,7 @@
#include "checksum.h"
#include "afr.h"
-#include "afr-self-heal.h"
-#include "afr-self-heal-common.h"
-
-int
-afr_examine_dir_sh_unwind (call_frame_t *frame, xlator_t *this, int32_t op_ret,
- int32_t op_errno, int32_t sh_failed)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- afr_set_opendir_done (this, local->fd->inode);
-
- AFR_STACK_UNWIND (opendir, frame, local->op_ret,
- local->op_errno, local->fd, NULL);
-
- return 0;
-}
-
-
-gf_boolean_t
-__checksums_differ (uint32_t *checksum, int child_count,
- unsigned char *child_up)
-{
- int ret = _gf_false;
- int i = 0;
- uint32_t cksum = 0;
- gf_boolean_t activate_check = _gf_false;
-
- for (i = 0; i < child_count; i++) {
- if (!child_up[i])
- continue;
- if (_gf_false == activate_check) {
- cksum = checksum[i];
- activate_check = _gf_true;
- continue;
- }
-
- if (cksum != checksum[i]) {
- ret = _gf_true;
- break;
- }
-
- cksum = checksum[i];
- }
-
- return ret;
-}
-
-
-int32_t
-afr_examine_dir_readdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries, dict_t *xdata)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
- gf_dirent_t * entry = NULL;
- gf_dirent_t * tmp = NULL;
- char *reason = NULL;
- int child_index = 0;
- uint32_t entry_cksum = 0;
- int call_count = 0;
- off_t last_offset = 0;
- inode_t *inode = NULL;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
- inode = local->fd->inode;
-
- child_index = (long) cookie;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_INFO,
- "%s: failed to do opendir on %s",
- local->loc.path, priv->children[child_index]->name);
- local->op_ret = -1;
- local->op_ret = op_errno;
- goto out;
- }
-
- if (op_ret == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: no entries found in %s",
- local->loc.path, priv->children[child_index]->name);
- goto out;
- }
-
- list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- entry_cksum = gf_rsync_weak_checksum ((unsigned char *)entry->d_name,
- strlen (entry->d_name));
- local->cont.opendir.checksum[child_index] ^= entry_cksum;
- }
-
- list_for_each_entry (entry, &entries->list, list) {
- last_offset = entry->d_off;
- }
-
- /* read more entries */
-
- STACK_WIND_COOKIE (frame, afr_examine_dir_readdir_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->readdir,
- local->fd, 131072, last_offset, NULL);
-
- return 0;
-
-out:
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if (__checksums_differ (local->cont.opendir.checksum,
- priv->child_count,
- local->child_up)) {
-
- sh->do_entry_self_heal = _gf_true;
- sh->forced_merge = _gf_true;
-
- reason = "checksums of directory differ";
- afr_launch_self_heal (frame, this, inode, _gf_false,
- inode->ia_type, reason, NULL,
- afr_examine_dir_sh_unwind);
- } else {
- afr_set_opendir_done (this, inode);
-
- AFR_STACK_UNWIND (opendir, frame, local->op_ret,
- local->op_errno, local->fd, NULL);
- }
- }
-
- return 0;
-}
-
-
-int
-afr_examine_dir (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- int i = 0;
- int call_count = 0;
-
- local = frame->local;
- priv = this->private;
-
- local->cont.opendir.checksum = GF_CALLOC (priv->child_count,
- sizeof (*local->cont.opendir.checksum),
- gf_afr_mt_int32_t);
-
- call_count = afr_up_children_count (local->child_up, priv->child_count);
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_examine_dir_readdir_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->readdir,
- local->fd, 131072, 0, NULL);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
+#include "afr-transaction.h"
int32_t
@@ -215,112 +45,66 @@ afr_opendir_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
fd_t *fd, dict_t *xdata)
{
- afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int32_t up_children_count = 0;
- int ret = -1;
int call_count = -1;
int32_t child_index = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
- priv = this->private;
local = frame->local;
+ fd_ctx = local->fd_ctx;
child_index = (long) cookie;
- up_children_count = afr_up_children_count (local->child_up,
- priv->child_count);
-
LOCK (&frame->lock);
{
- if (op_ret >= 0) {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED;
+ } else {
local->op_ret = op_ret;
- ret = afr_child_fd_ctx_set (this, fd, child_index, 0);
- if (ret) {
- local->op_ret = -1;
- local->op_errno = -ret;
- goto unlock;
- }
+ fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref (xdata);
}
-
- local->op_errno = op_errno;
}
-unlock:
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
- if (call_count == 0) {
- if (local->op_ret != 0)
- goto out;
-
- if (!afr_is_opendir_done (this, local->fd->inode) &&
- up_children_count > 1 && priv->entry_self_heal) {
-
- /*
- * This is the first opendir on this inode. We need
- * to check if the directory's entries are the same
- * on all subvolumes. This is needed in addition
- * to regular entry self-heal because the readdir
- * call is sent only to the first subvolume, and
- * thus files that exist only there will never be healed
- * otherwise (assuming changelog shows no anomalies).
- */
-
- gf_log (this->name, GF_LOG_TRACE,
- "reading contents of directory %s looking for mismatch",
- local->loc.path);
-
- afr_examine_dir (frame, this);
-
- } else {
- /* do the unwind */
- goto out;
- }
- }
-
- return 0;
-
-out:
- AFR_STACK_UNWIND (opendir, frame, local->op_ret,
- local->op_errno, local->fd, NULL);
-
+ if (call_count == 0)
+ AFR_STACK_UNWIND (opendir, frame, local->op_ret,
+ local->op_errno, local->fd, NULL);
return 0;
}
-int32_t
-afr_opendir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, fd_t *fd)
+int
+afr_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
- int child_count = 0;
int i = 0;
- int ret = -1;
int call_count = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ int32_t op_errno = ENOMEM;
+ afr_fd_ctx_t *fd_ctx = NULL;
priv = this->private;
- child_count = priv->child_count;
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx)
+ goto out;
loc_copy (&local->loc, loc);
local->fd = fd_ref (fd);
+ local->fd_ctx = fd_ctx;
call_count = local->call_count;
- for (i = 0; i < child_count; i++) {
+ for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND_COOKIE (frame, afr_opendir_cbk,
(void*) (long) i,
@@ -333,182 +117,280 @@ afr_opendir (call_frame_t *frame, xlator_t *this,
}
}
- ret = 0;
+ return 0;
out:
- if (ret < 0)
- AFR_STACK_UNWIND (opendir, frame, -1, op_errno, fd, NULL);
-
+ AFR_STACK_UNWIND (opendir, frame, -1, op_errno, fd, NULL);
return 0;
}
-/**
- * Common algorithm for directory read calls:
- *
- * - Try the fop on the first child that is up
- * - if we have failed due to ENOTCONN:
- * try the next child
- *
- * Applicable to: readdir
- */
+#define BACKEND_D_OFF_BITS 63
+#define PRESENT_D_OFF_BITS 63
+#define ONE 1ULL
+#define MASK (~0ULL)
+#define PRESENT_MASK (MASK >> (64 - PRESENT_D_OFF_BITS))
+#define BACKEND_MASK (MASK >> (64 - BACKEND_D_OFF_BITS))
-struct entry_name {
- char *name;
- struct list_head list;
-};
+#define TOP_BIT (ONE << (PRESENT_D_OFF_BITS - 1))
+#define SHIFT_BITS (max (0, (BACKEND_D_OFF_BITS - PRESENT_D_OFF_BITS + 1)))
-static void
-afr_forget_entries (fd_t *fd)
+static uint64_t
+afr_bits_for (uint64_t num)
{
- struct entry_name *entry = NULL;
- struct entry_name *tmp = NULL;
- int ret = 0;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
-
- ret = fd_ctx_get (fd, THIS, &ctx);
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_INFO,
- "could not get fd ctx for fd=%p", fd);
- return;
- }
+ uint64_t bits = 0, ctrl = 1;
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ while (ctrl < num) {
+ ctrl *= 2;
+ bits ++;
+ }
- list_for_each_entry_safe (entry, tmp, &fd_ctx->entries, list) {
- GF_FREE (entry->name);
- list_del (&entry->list);
- GF_FREE (entry);
- }
+ return bits;
}
-static void
-afr_readdir_filter_trash_dir (gf_dirent_t *entries, fd_t *fd)
+int
+afr_itransform (xlator_t *this, int subvol, uint64_t x, uint64_t *y_p)
{
- gf_dirent_t * entry = NULL;
- gf_dirent_t * tmp = NULL;
+ afr_private_t *conf = NULL;
+ int cnt = 0;
+ int max = 0;
+ uint64_t y = 0;
+ uint64_t hi_mask = 0;
+ uint64_t off_mask = 0;
+ int max_bits = 0;
+
+ if (x == ((uint64_t) -1)) {
+ y = (uint64_t) -1;
+ goto out;
+ }
- list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- if (__is_root_gfid (fd->inode->gfid) &&
- !strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR)) {
- list_del_init (&entry->list);
- GF_FREE (entry);
- }
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ max = conf->child_count;
+ cnt = subvol;
+
+ if (max == 1) {
+ y = x;
+ goto out;
+ }
+
+ max_bits = afr_bits_for (max);
+
+ hi_mask = ~(PRESENT_MASK >> (max_bits + 1));
+
+ if (x & hi_mask) {
+ /* HUGE d_off */
+ off_mask = MASK << max_bits;
+ y = TOP_BIT | ((x >> SHIFT_BITS) & off_mask) | cnt;
+ } else {
+ /* small d_off */
+ y = ((x * max) + cnt);
}
+
+out:
+ if (y_p)
+ *y_p = y;
+
+ return 0;
}
-int32_t
-afr_readdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries, dict_t *xdata)
+
+int
+afr_deitransform (xlator_t *this, uint64_t y, int *subvol_p,
+ uint64_t *x_p)
{
- afr_local_t *local = NULL;
+ afr_private_t *conf = NULL;
+ int cnt = 0;
+ int max = 0;
+ uint64_t x = 0;
+ int subvol = 0;
+ int max_bits = 0;
+ uint64_t off_mask = 0;
+ uint64_t host_mask = 0;
+
+ if (!this->private)
+ return -1;
+
+ conf = this->private;
+ max = conf->child_count;
+
+ if (max == 1) {
+ x = y;
+ cnt = 0;
+ goto out;
+ }
+
+ if (y & TOP_BIT) {
+ /* HUGE d_off */
+ max_bits = afr_bits_for (max);
+ off_mask = (MASK << max_bits);
+ host_mask = ~(off_mask);
+
+ x = ((y & ~TOP_BIT) & off_mask) << SHIFT_BITS;
+
+ cnt = y & host_mask;
+ } else {
+ /* small d_off */
+ cnt = y % max;
+ x = y / max;
+ }
- if (op_ret == -1)
- goto out;
+out:
+ subvol = cnt;
- local = frame->local;
- afr_readdir_filter_trash_dir (entries, local->fd);
+ if (subvol_p)
+ *subvol_p = subvol;
+
+ if (x_p)
+ *x_p = x;
-out:
- AFR_STACK_UNWIND (readdir, frame, op_ret, op_errno, entries, NULL);
return 0;
}
-int32_t
-afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
- dict_t *xdata)
+static void
+afr_readdir_transform_entries (gf_dirent_t *subvol_entries, int subvol,
+ gf_dirent_t *entries, fd_t *fd)
{
- afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
+ unsigned char *data_readable = NULL;
+ unsigned char *metadata_readable = NULL;
+ int gen = 0;
- if (op_ret == -1)
- goto out;
+ priv = THIS->private;
- local = frame->local;
- afr_readdir_filter_trash_dir (entries, local->fd);
+ data_readable = alloca0 (priv->child_count);
+ metadata_readable = alloca0 (priv->child_count);
-out:
- AFR_STACK_UNWIND (readdirp, frame, op_ret, op_errno, entries, NULL);
- return 0;
+ list_for_each_entry_safe (entry, tmp, &subvol_entries->list, list) {
+ if (__is_root_gfid (fd->inode->gfid) &&
+ !strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR)) {
+ continue;
+ }
+
+ list_del_init (&entry->list);
+ afr_itransform (THIS, subvol, entry->d_off, &entry->d_off);
+ list_add_tail (&entry->list, &entries->list);
+
+ if (entry->inode) {
+ gen = 0;
+ afr_inode_read_subvol_get (entry->inode, THIS,
+ data_readable,
+ metadata_readable, &gen);
+
+ if (gen != priv->event_generation ||
+ !data_readable[subvol] ||
+ !metadata_readable[subvol]) {
+
+ inode_unref (entry->inode);
+ entry->inode = NULL;
+ }
+ }
+ }
}
+
int32_t
-afr_do_readdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset, int whichop, dict_t *dict)
+afr_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *subvol_entries,
+ dict_t *xdata)
{
- afr_private_t *priv = NULL;
- xlator_t **children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
- afr_fd_ctx_t *fd_ctx = NULL;
- int ret = -1;
- int32_t op_errno = 0;
- uint64_t read_child = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ afr_local_t *local = NULL;
+ gf_dirent_t entries;
- priv = this->private;
- children = priv->children;
+ INIT_LIST_HEAD (&entries.list);
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
local = frame->local;
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ if (op_ret < 0 && !local->cont.readdir.offset) {
+ /* failover only if this was first readdir, detected
+ by offset == 0 */
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children) {
- op_errno = ENOMEM;
- goto out;
- }
+ afr_read_txn_continue (frame, this, (long) cookie);
+ return 0;
+ }
- read_child = afr_inode_get_read_ctx (this, fd->inode,
- local->fresh_children);
- ret = afr_get_call_child (this, local->child_up, read_child,
- local->fresh_children,
- &call_child,
- &local->cont.readdir.last_index);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ if (op_ret >= 0)
+ afr_readdir_transform_entries (subvol_entries, (long) cookie,
+ &entries, local->fd);
- fd_ctx = afr_fd_ctx_get (fd, this);
- if (!fd_ctx) {
- op_errno = EBADF;
- goto out;
- }
+ AFR_STACK_UNWIND (readdir, frame, op_ret, op_errno, &entries, xdata);
- if ((offset == 0) || (fd_ctx->call_child == -1)) {
- fd_ctx->call_child = call_child;
- } else if ((priv->readdir_failover == _gf_false) &&
- (call_child != fd_ctx->call_child)) {
- op_errno = EBADF;
- goto out;
- }
+ return 0;
+}
+
+
+int
+afr_readdir_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ local = frame->local;
- local->fd = fd_ref (fd);
- local->cont.readdir.size = size;
- local->cont.readdir.dict = (dict)? dict_ref (dict) : NULL;
+ if (subvol == -1) {
+ AFR_STACK_UNWIND (readdir, frame, local->op_ret,
+ local->op_errno, 0, 0);
+ return 0;
+ }
- if (whichop == GF_FOP_READDIR)
+ if (local->op == GF_FOP_READDIR)
STACK_WIND_COOKIE (frame, afr_readdir_cbk,
- (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->readdir, fd,
- size, offset, dict);
+ (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->readdir,
+ local->fd, local->cont.readdir.size,
+ local->cont.readdir.offset,
+ local->xdata_req);
else
- STACK_WIND_COOKIE (frame, afr_readdirp_cbk,
- (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->readdirp, fd,
- size, offset, dict);
+ STACK_WIND_COOKIE (frame, afr_readdir_cbk,
+ (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->readdirp,
+ local->fd, local->cont.readdir.size,
+ local->cont.readdir.offset,
+ local->xdata_req);
+ return 0;
+}
+
+
+int
+afr_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, int whichop, dict_t *dict)
+{
+ afr_local_t *local = NULL;
+ int32_t op_errno = 0;
+ int subvol = -1;
+
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->op = whichop;
+ local->fd = fd_ref (fd);
+ local->cont.readdir.size = size;
+ local->cont.readdir.offset = offset;
+ local->xdata_req = (dict)? dict_ref (dict) : NULL;
+
+ if (offset == 0) {
+ /* First readdir has option of failing over and selecting
+ an appropriate read subvolume */
+ afr_read_txn (frame, this, fd->inode, afr_readdir_wind,
+ AFR_DATA_TRANSACTION);
+ } else {
+ /* But continued readdirs MUST stick to the same subvolume
+ without an option to failover */
+ afr_deitransform (this, offset, &subvol,
+ (uint64_t *)&local->cont.readdir.offset);
+ afr_readdir_wind (frame, this, subvol);
+ }
return 0;
out:
@@ -521,7 +403,8 @@ int32_t
afr_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
off_t offset, dict_t *xdata)
{
- afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIR, xdata);
+ afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIR, xdata);
+
return 0;
}
@@ -531,6 +414,7 @@ afr_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
off_t offset, dict_t *dict)
{
afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIRP, dict);
+
return 0;
}
@@ -538,7 +422,6 @@ afr_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
int32_t
afr_releasedir (xlator_t *this, fd_t *fd)
{
- afr_forget_entries (fd);
afr_cleanup_fd_ctx (this, fd);
return 0;
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
index 1943b71..465dde5 100644
--- a/xlators/cluster/afr/src/afr-dir-write.c
+++ b/xlators/cluster/afr/src/afr-dir-write.c
@@ -34,10 +34,14 @@
#include "common-utils.h"
#include "compat-errno.h"
#include "compat.h"
+#include "byte-order.h"
#include "afr.h"
#include "afr-transaction.h"
+void
+afr_mark_entry_pending_changelog (call_frame_t *frame, xlator_t *this);
+
int
afr_build_parent_loc (loc_t *parent, loc_t *child, int32_t *op_errno)
{
@@ -56,79 +60,214 @@ afr_build_parent_loc (loc_t *parent, loc_t *child, int32_t *op_errno)
*op_errno = ENOMEM;
goto out;
}
- parent->path = gf_strdup( dirname (child_path) );
- if (!parent->path) {
+
+ parent->path = gf_strdup (dirname (child_path));
+ if (!parent->path) {
if (op_errno)
*op_errno = ENOMEM;
goto out;
}
- parent->inode = inode_ref (child->parent);
- uuid_copy (parent->gfid, child->pargfid);
+
+ parent->inode = inode_ref (child->parent);
+ uuid_copy (parent->gfid, child->pargfid);
ret = 0;
out:
- GF_FREE(child_path);
+ GF_FREE (child_path);
return ret;
}
-void
-__dir_entry_fop_common_cbk (call_frame_t *frame, int child_index,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, struct iatt *prenewparent,
- struct iatt *postnewparent)
+
+static void
+__afr_dir_write_finalize (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int inode_read_subvol = -1;
+ int parent_read_subvol = -1;
+ int parent2_read_subvol = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (local->inode) {
+ afr_replies_interpret (frame, this, local->inode);
+ inode_read_subvol = afr_data_subvol_get (local->inode, this,
+ NULL, NULL);
+ }
+ if (local->parent)
+ parent_read_subvol = afr_data_subvol_get (local->parent, this,
+ NULL, NULL);
+ if (local->parent2)
+ parent2_read_subvol = afr_data_subvol_get (local->parent2, this,
+ NULL, NULL);
+
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno (local, priv);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret < 0) {
+ if (local->inode)
+ afr_inode_read_subvol_reset (local->inode,
+ this);
+ if (local->parent)
+ afr_inode_read_subvol_reset (local->parent,
+ this);
+ if (local->parent2)
+ afr_inode_read_subvol_reset (local->parent2,
+ this);
+ continue;
+ }
+
+ if (local->op_ret == -1) {
+ local->op_ret = local->replies[i].op_ret;
+ local->op_errno = local->replies[i].op_errno;
+
+ local->cont.dir_fop.buf =
+ local->replies[i].poststat;
+ local->cont.dir_fop.preparent =
+ local->replies[i].preparent;
+ local->cont.dir_fop.postparent =
+ local->replies[i].postparent;
+ local->cont.dir_fop.prenewparent =
+ local->replies[i].preparent2;
+ local->cont.dir_fop.postnewparent =
+ local->replies[i].postparent2;
+ if (local->replies[i].xdata)
+ local->xdata_rsp =
+ dict_ref (local->replies[i].xdata);
+ continue;
+ }
+
+ if (i == inode_read_subvol) {
+ local->cont.dir_fop.buf =
+ local->replies[i].poststat;
+ if (local->replies[i].xdata) {
+ if (local->xdata_rsp)
+ dict_unref (local->xdata_rsp);
+ local->xdata_rsp =
+ dict_ref (local->replies[i].xdata);
+ }
+ }
+
+ if (i == parent_read_subvol) {
+ local->cont.dir_fop.preparent =
+ local->replies[i].preparent;
+ local->cont.dir_fop.postparent =
+ local->replies[i].postparent;
+ }
+
+ if (i == parent2_read_subvol) {
+ local->cont.dir_fop.prenewparent =
+ local->replies[i].preparent2;
+ local->cont.dir_fop.postnewparent =
+ local->replies[i].postparent2;
+ }
+ }
+}
+
+
+static void
+__afr_dir_write_fill (call_frame_t *frame, xlator_t *this, int child_index,
+ int op_ret, int op_errno, struct iatt *poststat,
+ struct iatt *preparent, struct iatt *postparent,
+ struct iatt *preparent2, struct iatt *postparent2,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
local = frame->local;
+ fd_ctx = local->fd_ctx;
+
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+
+ if (op_ret >= 0) {
+ if (poststat)
+ local->replies[child_index].poststat = *poststat;
+ if (preparent)
+ local->replies[child_index].preparent = *preparent;
+ if (postparent)
+ local->replies[child_index].postparent = *postparent;
+ if (preparent2)
+ local->replies[child_index].preparent2 = *preparent2;
+ if (postparent2)
+ local->replies[child_index].postparent2 = *postparent2;
+ if (xdata)
+ local->replies[child_index].xdata = dict_ref (xdata);
+
+ if (fd_ctx)
+ fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
+ } else {
+ if (op_errno != ENOTEMPTY)
+ afr_transaction_fop_failed (frame, this, child_index);
+ if (fd_ctx)
+ fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED;
+ }
+
+ return;
+}
+
+
+static int
+__afr_dir_write_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ struct iatt *preparent2, struct iatt *postparent2,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int child_index = (long) cookie;
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ __afr_dir_write_fill (frame, this, child_index, op_ret,
+ op_errno, buf, preparent, postparent,
+ preparent2, postparent2, xdata);
+ }
+ UNLOCK (&frame->lock);
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ __afr_dir_write_finalize (frame, this);
+
+ if (afr_txn_nothing_failed (frame, this))
+ local->transaction.unwind (frame, this);
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret > -1) {
- local->op_ret = op_ret;
-
- if ((local->success_count == 0) ||
- (child_index == local->read_child_index)) {
- local->cont.dir_fop.preparent = *preparent;
- local->cont.dir_fop.postparent = *postparent;
- if (buf)
- local->cont.dir_fop.buf = *buf;
- if (prenewparent)
- local->cont.dir_fop.prenewparent = *prenewparent;
- if (postnewparent)
- local->cont.dir_fop.postnewparent = *postnewparent;
- }
-
- local->cont.dir_fop.inode = inode;
-
- local->fresh_children[local->success_count] = child_index;
- local->success_count++;
- local->child_errno[child_index] = 0;
- } else {
- local->child_errno[child_index] = op_errno;
+ afr_mark_entry_pending_changelog (frame, this);
+
+ local->transaction.resume (frame, this);
}
- local->op_errno = op_errno;
+ return 0;
}
+
int
afr_mark_new_entry_changelog_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
+ xlator_t *this, int op_ret, int op_errno,
dict_t *xattr, dict_t *xdata)
{
- int call_count = 0;
+ int call_count = 0;
call_count = afr_frame_return (frame);
- if (call_count == 0) {
+
+ if (call_count == 0)
AFR_STACK_DESTROY (frame);
- }
+
return 0;
}
+
void
afr_mark_new_entry_changelog (call_frame_t *frame, xlator_t *this)
{
@@ -136,125 +275,109 @@ afr_mark_new_entry_changelog (call_frame_t *frame, xlator_t *this)
afr_local_t *local = NULL;
afr_local_t *new_local = NULL;
afr_private_t *priv = NULL;
- dict_t **xattr = NULL;
+ dict_t *xattr = NULL;
int32_t **changelog = NULL;
int i = 0;
- GF_UNUSED int op_errno = 0;
+ int idx = 0;
+ int op_errno = ENOMEM;
+ unsigned char *pending = NULL;
+ int call_count = 0;
local = frame->local;
priv = this->private;
new_frame = copy_frame (frame);
- if (!new_frame) {
+ if (!new_frame)
goto out;
- }
- AFR_LOCAL_ALLOC_OR_GOTO (new_frame->local, out);
- new_local = new_frame->local;
+ new_local = AFR_FRAME_INIT (new_frame, op_errno);
+ if (!new_local)
+ goto out;
+
changelog = afr_matrix_create (priv->child_count, AFR_NUM_CHANGE_LOGS);
if (!changelog)
goto out;
- xattr = GF_CALLOC (priv->child_count, sizeof (*xattr),
- gf_afr_mt_dict_t);
- if (!xattr)
- goto out;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_errno[i])
- continue;
- xattr[i] = dict_new ();
- if (!xattr[i])
- goto out;
- }
+ xattr = dict_new ();
+ if (!xattr)
+ goto out;
+
+ idx = afr_index_for_transaction_type (AFR_DATA_TRANSACTION);
- afr_prepare_new_entry_pending_matrix (changelog,
- afr_is_errno_set,
- local->child_errno,
- &local->cont.dir_fop.buf,
- priv->child_count);
+ pending = alloca0 (priv->child_count);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i] &&
+ !local->transaction.failed_subvols[i]) {
+ call_count ++;
+ continue;
+ }
+
+ changelog[i][idx] = hton32(1);
+ pending[i] = 1;
+ }
new_local->pending = changelog;
uuid_copy (new_local->loc.gfid, local->cont.dir_fop.buf.ia_gfid);
- new_local->loc.inode = inode_ref (local->cont.dir_fop.inode);
- new_local->call_count = local->success_count;
+ new_local->loc.inode = inode_ref (local->inode);
+
+
+ afr_set_pending_dict (priv, xattr, changelog);
+
+ new_local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_errno[i])
+ if (pending[i])
continue;
- afr_set_pending_dict (priv, xattr[i], changelog, i, LOCAL_LAST);
STACK_WIND_COOKIE (new_frame, afr_mark_new_entry_changelog_cbk,
(void *) (long) i, priv->children[i],
priv->children[i]->fops->xattrop,
&new_local->loc, GF_XATTROP_ADD_ARRAY,
- xattr[i], NULL);
+ xattr, NULL);
+ if (!--call_count)
+ break;
}
+
new_frame = NULL;
out:
if (new_frame)
AFR_STACK_DESTROY (new_frame);
- afr_xattr_array_destroy (xattr, priv->child_count);
+ if (xattr)
+ dict_unref (xattr);
return;
}
-gf_boolean_t
-afr_is_new_entry_changelog_needed (glusterfs_fop_t fop)
-{
- glusterfs_fop_t fops[] = {GF_FOP_CREATE, GF_FOP_MKNOD, GF_FOP_NULL};
- int i = 0;
-
- for (i = 0; fops[i] != GF_FOP_NULL; i++) {
- if (fop == fops[i])
- return _gf_true;
- }
- return _gf_false;
-}
void
-afr_dir_fop_mark_entry_pending_changelog (call_frame_t *frame, xlator_t *this)
+afr_mark_entry_pending_changelog (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int pre_op_count = 0;
+ int failed_count = 0;
local = frame->local;
priv = this->private;
if (local->op_ret < 0)
- goto out;
+ return;
- if (local->success_count == priv->child_count)
- goto out;
+ if (local->op != GF_FOP_CREATE && local->op != GF_FOP_MKNOD)
+ return;
- if (!afr_is_new_entry_changelog_needed (local->op))
- goto out;
+ pre_op_count = AFR_COUNT (local->transaction.pre_op, priv->child_count);
+ failed_count = AFR_COUNT (local->transaction.failed_subvols,
+ priv->child_count);
+
+ if (pre_op_count == priv->child_count && !failed_count)
+ return;
afr_mark_new_entry_changelog (frame, this);
-out:
return;
}
-void
-afr_dir_fop_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- local = frame->local;
- priv = this->private;
-
- if (local->cont.dir_fop.inode == NULL)
- goto done;
- afr_set_read_ctx_from_policy (this, local->cont.dir_fop.inode,
- local->fresh_children,
- local->read_child_index,
- priv->read_child,
- local->cont.dir_fop.buf.ia_gfid);
-done:
- local->transaction.unwind (frame, this);
- afr_dir_fop_mark_entry_pending_changelog (frame, this);
- local->transaction.resume (frame, this);
-}
/* {{{ create */
@@ -266,26 +389,16 @@ afr_create_unwind (call_frame_t *frame, xlator_t *this)
local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (create, main_frame,
- local->op_ret, local->op_errno,
- local->cont.create.fd,
- local->cont.dir_fop.inode,
- &local->cont.dir_fop.buf,
- &local->cont.dir_fop.preparent,
- &local->cont.dir_fop.postparent,
- local->xdata_rsp);
- }
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
+
+ AFR_STACK_UNWIND (create, main_frame, local->op_ret, local->op_errno,
+ local->cont.create.fd, local->inode,
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
return 0;
}
@@ -297,175 +410,79 @@ afr_create_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt *preparent, struct iatt *postparent,
dict_t *xdata)
{
- afr_local_t *local = NULL;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- int ret = 0;
- int call_count = -1;
- int child_index = -1;
-
- local = frame->local;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret > -1) {
- ret = afr_fd_ctx_set (this, fd);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "could not set ctx on fd=%p", fd);
-
- local->op_ret = -1;
- local->op_errno = -ret;
- goto unlock;
- }
-
- ret = fd_ctx_get (fd, this, &ctx);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "could not get fd ctx for fd=%p", fd);
- local->op_ret = -1;
- local->op_errno = -ret;
- goto unlock;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
- fd_ctx->flags = local->cont.create.flags;
-
- if (local->success_count == 0) {
- if (xdata)
- local->xdata_rsp = dict_ref(xdata);
- }
- }
- __dir_entry_fop_common_cbk (frame, child_index, this,
- op_ret, op_errno, inode, buf,
- preparent, postparent, NULL, NULL);
- }
-
-unlock:
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- afr_dir_fop_done (frame, this);
-
- return 0;
+ return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
}
int
-afr_create_wind (call_frame_t *frame, xlator_t *this)
+afr_create_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
local = frame->local;
priv = this->private;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_create_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->create,
- &local->loc,
- local->cont.create.flags,
- local->cont.create.mode,
- local->umask,
- local->cont.create.fd,
- local->xdata_req);
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
-
-
-int
-afr_create_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = NULL;
-
- local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
+ STACK_WIND_COOKIE (frame, afr_create_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->create,
+ &local->loc, local->cont.create.flags,
+ local->cont.create.mode, local->umask,
+ local->cont.create.fd, local->xdata_req);
return 0;
}
int
-afr_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode,
- mode_t umask, fd_t *fd, dict_t *params)
+afr_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
afr_internal_lock_t *int_lock = NULL;
call_frame_t *transaction_frame = NULL;
int ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ int op_errno = ENOMEM;
priv = this->private;
QUORUM_CHECK(create,out);
transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
+ if (!transaction_frame)
goto out;
- }
- AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
- local = transaction_frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
loc_copy (&local->loc, loc);
- LOCK (&priv->read_child_lock);
- {
- local->read_child_index = (++priv->read_child_rr)
- % (priv->child_count);
- }
- UNLOCK (&priv->read_child_lock);
+ local->fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!local->fd_ctx)
+ goto out;
+
+ local->inode = inode_ref (loc->inode);
+ local->parent = inode_ref (loc->parent);
local->op = GF_FOP_CREATE;
local->cont.create.flags = flags;
local->cont.create.mode = mode;
local->cont.create.fd = fd_ref (fd);
local->umask = umask;
- if (params)
- local->xdata_req = dict_ref (params);
- local->transaction.fop = afr_create_wind;
- local->transaction.done = afr_create_done;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->transaction.wind = afr_create_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
local->transaction.unwind = afr_create_unwind;
ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
@@ -492,15 +509,13 @@ afr_create (call_frame_t *frame, xlator_t *this,
goto out;
}
- ret = 0;
+ return 0;
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (create, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL, NULL);
return 0;
}
@@ -516,25 +531,14 @@ afr_mknod_unwind (call_frame_t *frame, xlator_t *this)
local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (mknod, main_frame,
- local->op_ret, local->op_errno,
- local->cont.dir_fop.inode,
- &local->cont.dir_fop.buf,
- &local->cont.dir_fop.preparent,
- &local->cont.dir_fop.postparent,
- NULL);
- }
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
+ AFR_STACK_UNWIND (mknod, main_frame, local->op_ret, local->op_errno,
+ local->inode, &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
return 0;
}
@@ -545,131 +549,72 @@ afr_mknod_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt *buf, struct iatt *preparent,
struct iatt *postparent, dict_t *xdata)
{
- int call_count = -1;
- int child_index = -1;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- __dir_entry_fop_common_cbk (frame, child_index, this,
- op_ret, op_errno, inode, buf,
- preparent, postparent, NULL, NULL);
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- afr_dir_fop_done (frame, this);
-
- return 0;
+ return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
}
-int32_t
-afr_mknod_wind (call_frame_t *frame, xlator_t *this)
+int
+afr_mknod_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
local = frame->local;
priv = this->private;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_mknod_wind_cbk, (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->mknod,
- &local->loc, local->cont.mknod.mode,
- local->cont.mknod.dev,
- local->umask,
- local->xdata_req);
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
-
-
-int
-afr_mknod_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = NULL;
-
- local = frame->local;
-
- local->transaction.unwind (frame, this);
- AFR_STACK_DESTROY (frame);
-
+ STACK_WIND_COOKIE (frame, afr_mknod_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->mknod,
+ &local->loc, local->cont.mknod.mode,
+ local->cont.mknod.dev, local->umask,
+ local->xdata_req);
return 0;
}
-
int
afr_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t dev, mode_t umask, dict_t *params)
+ dev_t dev, mode_t umask, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
afr_internal_lock_t *int_lock = NULL;
call_frame_t *transaction_frame = NULL;
int ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ int op_errno = ENOMEM;
priv = this->private;
QUORUM_CHECK(mknod,out);
transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
+ if (!transaction_frame)
goto out;
- }
- AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
- local = transaction_frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
loc_copy (&local->loc, loc);
-
- LOCK (&priv->read_child_lock);
- {
- local->read_child_index = (++priv->read_child_rr)
- % (priv->child_count);
- }
- UNLOCK (&priv->read_child_lock);
+ local->inode = inode_ref (loc->inode);
+ local->parent = inode_ref (loc->parent);
local->op = GF_FOP_MKNOD;
local->cont.mknod.mode = mode;
local->cont.mknod.dev = dev;
local->umask = umask;
- if (params)
- local->xdata_req = dict_ref (params);
- local->transaction.fop = afr_mknod_wind;
- local->transaction.done = afr_mknod_done;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->transaction.wind = afr_mknod_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
local->transaction.unwind = afr_mknod_unwind;
ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
@@ -692,19 +637,17 @@ afr_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
int_lock->lockee_count++;
ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
if (ret < 0) {
- op_errno = -ret;
- goto out;
+ op_errno = -ret;
+ goto out;
}
- ret = 0;
+ return 0;
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (mknod, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL);
return 0;
}
@@ -721,25 +664,14 @@ afr_mkdir_unwind (call_frame_t *frame, xlator_t *this)
local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (mkdir, main_frame,
- local->op_ret, local->op_errno,
- local->cont.dir_fop.inode,
- &local->cont.dir_fop.buf,
- &local->cont.dir_fop.preparent,
- &local->cont.dir_fop.postparent,
- NULL);
- }
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
+ AFR_STACK_UNWIND (mkdir, main_frame, local->op_ret, local->op_errno,
+ local->inode, &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
return 0;
}
@@ -750,130 +682,71 @@ afr_mkdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt *buf, struct iatt *preparent,
struct iatt *postparent, dict_t *xdata)
{
- int call_count = -1;
- int child_index = -1;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- __dir_entry_fop_common_cbk (frame, child_index, this,
- op_ret, op_errno, inode, buf,
- preparent, postparent, NULL, NULL);
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- afr_dir_fop_done (frame, this);
-
- return 0;
+ return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
}
int
-afr_mkdir_wind (call_frame_t *frame, xlator_t *this)
+afr_mkdir_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
local = frame->local;
priv = this->private;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_mkdir_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->mkdir,
- &local->loc, local->cont.mkdir.mode,
- local->umask,
- local->xdata_req);
- if (!--call_count)
- break;
- }
- }
-
+ STACK_WIND_COOKIE (frame, afr_mkdir_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->mkdir, &local->loc,
+ local->cont.mkdir.mode, local->umask,
+ local->xdata_req);
return 0;
}
int
-afr_mkdir_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = NULL;
-
- local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
- return 0;
-}
-
-int
-afr_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, mode_t umask, dict_t *params)
+afr_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
afr_internal_lock_t *int_lock = NULL;
call_frame_t *transaction_frame = NULL;
int ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ int op_errno = ENOMEM;
priv = this->private;
QUORUM_CHECK(mkdir,out);
transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
+ if (!transaction_frame)
goto out;
- }
- AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
- local = transaction_frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
loc_copy (&local->loc, loc);
-
- LOCK (&priv->read_child_lock);
- {
- local->read_child_index = (++priv->read_child_rr)
- % (priv->child_count);
- }
- UNLOCK (&priv->read_child_lock);
+ local->inode = inode_ref (loc->inode);
+ local->parent = inode_ref (loc->parent);
local->cont.mkdir.mode = mode;
local->umask = umask;
- if (params)
- local->xdata_req = dict_ref (params);
+
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
+
+ if (!local->xdata_req)
+ goto out;
local->op = GF_FOP_MKDIR;
- local->transaction.fop = afr_mkdir_wind;
- local->transaction.done = afr_mkdir_done;
+ local->transaction.wind = afr_mkdir_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
local->transaction.unwind = afr_mkdir_unwind;
ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
@@ -896,20 +769,17 @@ afr_mkdir (call_frame_t *frame, xlator_t *this,
int_lock->lockee_count++;
ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
if (ret < 0) {
- op_errno = -ret;
- goto out;
+ op_errno = -ret;
+ goto out;
}
- ret = 0;
+ return 0;
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
-
- AFR_STACK_UNWIND (mkdir, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL);
return 0;
}
@@ -926,25 +796,14 @@ afr_link_unwind (call_frame_t *frame, xlator_t *this)
local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (link, main_frame,
- local->op_ret, local->op_errno,
- local->cont.dir_fop.inode,
- &local->cont.dir_fop.buf,
- &local->cont.dir_fop.preparent,
- &local->cont.dir_fop.postparent,
- NULL);
- }
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
+ AFR_STACK_UNWIND (link, main_frame, local->op_ret, local->op_errno,
+ local->inode, &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
return 0;
}
@@ -955,127 +814,70 @@ afr_link_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt *buf, struct iatt *preparent,
struct iatt *postparent, dict_t *xdata)
{
- int call_count = -1;
- int child_index = -1;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- __dir_entry_fop_common_cbk (frame, child_index, this,
- op_ret, op_errno, inode, buf,
- preparent, postparent, NULL, NULL);
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- afr_dir_fop_done (frame, this);
-
- return 0;
+ return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
}
int
-afr_link_wind (call_frame_t *frame, xlator_t *this)
+afr_link_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
local = frame->local;
priv = this->private;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_link_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->link,
- &local->loc,
- &local->newloc, local->xdata_req);
-
- if (!--call_count)
- break;
- }
- }
-
+ STACK_WIND_COOKIE (frame, afr_link_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->link,
+ &local->loc, &local->newloc, local->xdata_req);
return 0;
}
int
-afr_link_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
- return 0;
-}
-
-
-int
-afr_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+afr_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
afr_internal_lock_t *int_lock = NULL;
call_frame_t *transaction_frame = NULL;
int ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ int op_errno = ENOMEM;
priv = this->private;
QUORUM_CHECK(link,out);
transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
+ if (!transaction_frame)
goto out;
- }
- AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
- local = transaction_frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
loc_copy (&local->loc, oldloc);
loc_copy (&local->newloc, newloc);
+
+ local->inode = inode_ref (oldloc->inode);
+ local->parent = inode_ref (newloc->parent);
+
if (xdata)
- local->xdata_req = dict_ref (xdata);
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- LOCK (&priv->read_child_lock);
- {
- local->read_child_index = (++priv->read_child_rr)
- % (priv->child_count);
- }
- UNLOCK (&priv->read_child_lock);
+ if (!local->xdata_req)
+ goto out;
local->op = GF_FOP_LINK;
- local->transaction.fop = afr_link_wind;
- local->transaction.done = afr_link_done;
+
+ local->transaction.wind = afr_link_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
local->transaction.unwind = afr_link_unwind;
ret = afr_build_parent_loc (&local->transaction.parent_loc, newloc,
@@ -1098,18 +900,17 @@ afr_link (call_frame_t *frame, xlator_t *this,
int_lock->lockee_count++;
ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
if (ret < 0) {
- op_errno = -ret;
- goto out;
+ op_errno = -ret;
+ goto out;
}
- ret = 0;
+
+ return 0;
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (link, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL);
return 0;
}
@@ -1126,25 +927,14 @@ afr_symlink_unwind (call_frame_t *frame, xlator_t *this)
local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (symlink, main_frame,
- local->op_ret, local->op_errno,
- local->cont.dir_fop.inode,
- &local->cont.dir_fop.buf,
- &local->cont.dir_fop.preparent,
- &local->cont.dir_fop.postparent,
- NULL);
- }
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
+ AFR_STACK_UNWIND (symlink, main_frame, local->op_ret, local->op_errno,
+ local->inode, &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
return 0;
}
@@ -1155,132 +945,71 @@ afr_symlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt *buf, struct iatt *preparent,
struct iatt *postparent, dict_t *xdata)
{
- int call_count = -1;
- int child_index = -1;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- __dir_entry_fop_common_cbk (frame, child_index, this,
- op_ret, op_errno, inode, buf,
- preparent, postparent, NULL, NULL);
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- afr_dir_fop_done (frame, this);
-
- return 0;
+ return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
}
int
-afr_symlink_wind (call_frame_t *frame, xlator_t *this)
+afr_symlink_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
local = frame->local;
priv = this->private;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_symlink_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->symlink,
- local->cont.symlink.linkpath,
- &local->loc,
- local->umask,
- local->xdata_req);
-
- if (!--call_count)
- break;
-
- }
- }
-
- return 0;
-}
-
-
-int
-afr_symlink_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
+ STACK_WIND_COOKIE (frame, afr_symlink_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->symlink,
+ local->cont.symlink.linkpath, &local->loc,
+ local->umask, local->xdata_req);
return 0;
}
int
-afr_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *loc, mode_t umask, dict_t *params)
+afr_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
afr_internal_lock_t *int_lock = NULL;
call_frame_t *transaction_frame = NULL;
int ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ int op_errno = ENOMEM;
priv = this->private;
QUORUM_CHECK(symlink,out);
transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
+ if (!transaction_frame)
goto out;
- }
-
- AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
- local = transaction_frame->local;
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
loc_copy (&local->loc, loc);
-
- LOCK (&priv->read_child_lock);
- {
- local->read_child_index = (++priv->read_child_rr)
- % (priv->child_count);
- }
- UNLOCK (&priv->read_child_lock);
+ local->inode = inode_ref (loc->inode);
+ local->parent = inode_ref (loc->parent);
local->cont.symlink.linkpath = gf_strdup (linkpath);
local->umask = umask;
- if (params)
- local->xdata_req = dict_ref (params);
+
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
+
+ if (!local->xdata_req)
+ goto out;
local->op = GF_FOP_SYMLINK;
- local->transaction.fop = afr_symlink_wind;
- local->transaction.done = afr_symlink_done;
+ local->transaction.wind = afr_symlink_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
local->transaction.unwind = afr_symlink_unwind;
ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
@@ -1303,19 +1032,17 @@ afr_symlink (call_frame_t *frame, xlator_t *this,
int_lock->lockee_count++;
ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
if (ret < 0) {
- op_errno = -ret;
- goto out;
+ op_errno = -ret;
+ goto out;
}
- ret = 0;
+ return 0;
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (symlink, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (symlink, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL);
return 0;
}
@@ -1331,26 +1058,16 @@ afr_rename_unwind (call_frame_t *frame, xlator_t *this)
local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (rename, main_frame,
- local->op_ret, local->op_errno,
- &local->cont.dir_fop.buf,
- &local->cont.dir_fop.preparent,
- &local->cont.dir_fop.postparent,
- &local->cont.dir_fop.prenewparent,
- &local->cont.dir_fop.postnewparent,
- NULL);
- }
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
+ AFR_STACK_UNWIND (rename, main_frame, local->op_ret, local->op_errno,
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ &local->cont.dir_fop.prenewparent,
+ &local->cont.dir_fop.postnewparent, local->xdata_rsp);
return 0;
}
@@ -1362,131 +1079,72 @@ afr_rename_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt *prenewparent, struct iatt *postnewparent,
dict_t *xdata)
{
- afr_local_t * local = NULL;
- int call_count = -1;
- int child_index = -1;
-
- local = frame->local;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno) && op_errno != ENOTEMPTY)
- afr_transaction_fop_failed (frame, this, child_index);
- local->op_errno = op_errno;
- local->child_errno[child_index] = op_errno;
-
- if (op_ret > -1)
- __dir_entry_fop_common_cbk (frame, child_index, this,
- op_ret, op_errno, NULL, buf,
- preoldparent, postoldparent,
- prenewparent, postnewparent);
-
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- afr_dir_fop_done (frame, this);
-
- return 0;
+ return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, buf,
+ preoldparent, postoldparent, prenewparent,
+ postnewparent, xdata);
}
-int32_t
-afr_rename_wind (call_frame_t *frame, xlator_t *this)
+int
+afr_rename_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
- local = frame->local;
- priv = this->private;
-
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_rename_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->rename,
- &local->loc,
- &local->newloc, NULL);
- if (!--call_count)
- break;
- }
- }
+ local = frame->local;
+ priv = this->private;
+ STACK_WIND_COOKIE (frame, afr_rename_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->rename,
+ &local->loc, &local->newloc, local->xdata_req);
return 0;
}
int
-afr_rename_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
- return 0;
-}
-
-
-int
-afr_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+afr_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
afr_internal_lock_t *int_lock = NULL;
call_frame_t *transaction_frame = NULL;
int ret = -1;
- int op_errno = 0;
+ int op_errno = ENOMEM;
int nlockee = 0;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
priv = this->private;
QUORUM_CHECK(rename,out);
transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
+ if (!transaction_frame)
op_errno = ENOMEM;
- goto out;
- }
- AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
- local = transaction_frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
loc_copy (&local->loc, oldloc);
loc_copy (&local->newloc, newloc);
- local->read_child_index = afr_inode_get_read_ctx (this, oldloc->inode, NULL);
+ local->inode = inode_ref (oldloc->inode);
+ local->parent = inode_ref (oldloc->parent);
+ local->parent2 = inode_ref (newloc->parent);
+
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
+
+ if (!local->xdata_req)
+ goto out;
local->op = GF_FOP_RENAME;
- local->transaction.fop = afr_rename_wind;
- local->transaction.done = afr_rename_done;
+ local->transaction.wind = afr_rename_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
local->transaction.unwind = afr_rename_unwind;
ret = afr_build_parent_loc (&local->transaction.parent_loc, oldloc,
@@ -1536,20 +1194,17 @@ afr_rename (call_frame_t *frame, xlator_t *this,
ret = afr_transaction (transaction_frame, this, AFR_ENTRY_RENAME_TRANSACTION);
if (ret < 0) {
- op_errno = -ret;
- goto out;
+ op_errno = -ret;
+ goto out;
}
- ret = 0;
+ return 0;
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
-
- AFR_STACK_UNWIND (rename, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL, NULL);
return 0;
}
@@ -1565,23 +1220,13 @@ afr_unlink_unwind (call_frame_t *frame, xlator_t *this)
local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (unlink, main_frame,
- local->op_ret, local->op_errno,
- &local->cont.dir_fop.preparent,
- &local->cont.dir_fop.postparent,
- NULL);
- }
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
+ AFR_STACK_UNWIND (unlink, main_frame, local->op_ret, local->op_errno,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
return 0;
}
@@ -1591,123 +1236,69 @@ afr_unlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *preparent,
struct iatt *postparent, dict_t *xdata)
{
- afr_local_t * local = NULL;
- int call_count = -1;
- int child_index = (long) cookie;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (child_index == local->read_child_index) {
- local->read_child_returned = _gf_true;
- }
- __dir_entry_fop_common_cbk (frame, child_index, this,
- op_ret, op_errno, NULL, NULL,
- preparent, postparent, NULL, NULL);
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
- if (call_count == 0)
- afr_dir_fop_done (frame, this);
-
- return 0;
+ return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, NULL,
+ preparent, postparent, NULL, NULL, xdata);
}
-int32_t
-afr_unlink_wind (call_frame_t *frame, xlator_t *this)
+int
+afr_unlink_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
local = frame->local;
priv = this->private;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_unlink_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->unlink,
- &local->loc, local->xflag,
- local->xdata_req);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
-
-
-int32_t
-afr_unlink_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
+ STACK_WIND_COOKIE (frame, afr_unlink_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->unlink,
+ &local->loc, local->xflag, local->xdata_req);
return 0;
}
-int32_t
-afr_unlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int xflag, dict_t *xdata)
+int
+afr_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
afr_internal_lock_t *int_lock = NULL;
call_frame_t *transaction_frame = NULL;
int ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ int op_errno = ENOMEM;
priv = this->private;
QUORUM_CHECK(unlink,out);
transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
+ if (!transaction_frame)
goto out;
- }
-
- AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
- local = transaction_frame->local;
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
loc_copy (&local->loc, loc);
local->xflag = xflag;
+
+ local->inode = inode_ref (loc->inode);
+ local->parent = inode_ref (loc->parent);
+
if (xdata)
- local->xdata_req = dict_ref (xdata);
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
+
+ if (!local->xdata_req)
+ goto out;
local->op = GF_FOP_UNLINK;
- local->transaction.fop = afr_unlink_wind;
- local->transaction.done = afr_unlink_done;
+ local->transaction.wind = afr_unlink_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
local->transaction.unwind = afr_unlink_unwind;
ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
@@ -1730,19 +1321,16 @@ afr_unlink (call_frame_t *frame, xlator_t *this,
int_lock->lockee_count++;
ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
if (ret < 0) {
- op_errno = -ret;
- goto out;
+ op_errno = -ret;
+ goto out;
}
- ret = 0;
+ return 0;
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (unlink, frame, -1, op_errno,
- NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -1760,23 +1348,13 @@ afr_rmdir_unwind (call_frame_t *frame, xlator_t *this)
local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (rmdir, main_frame,
- local->op_ret, local->op_errno,
- &local->cont.dir_fop.preparent,
- &local->cont.dir_fop.postparent,
- NULL);
- }
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
+ AFR_STACK_UNWIND (rmdir, main_frame, local->op_ret, local->op_errno,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
return 0;
}
@@ -1786,130 +1364,71 @@ afr_rmdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *preparent,
struct iatt *postparent, dict_t *xdata)
{
- afr_local_t * local = NULL;
- int call_count = -1;
- int child_index = (long) cookie;
- int read_child = 0;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
- if (afr_fop_failed (op_ret, op_errno) && (op_errno != ENOTEMPTY))
- afr_transaction_fop_failed (frame, this, child_index);
- local->op_errno = op_errno;
- local->child_errno[child_index] = op_errno;
- if (op_ret > -1)
- __dir_entry_fop_common_cbk (frame, child_index, this,
- op_ret, op_errno, NULL, NULL,
- preparent, postparent, NULL,
- NULL);
-
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
- if (call_count == 0)
- afr_dir_fop_done (frame, this);
-
- return 0;
+ return __afr_dir_write_cbk (frame, cookie, this, op_ret, op_errno, NULL,
+ preparent, postparent, NULL, NULL, xdata);
}
int
-afr_rmdir_wind (call_frame_t *frame, xlator_t *this)
+afr_rmdir_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
local = frame->local;
priv = this->private;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_rmdir_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->rmdir,
- &local->loc, local->cont.rmdir.flags,
- NULL);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
-
-
-int
-afr_rmdir_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
+ STACK_WIND_COOKIE (frame, afr_rmdir_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->rmdir,
+ &local->loc, local->cont.rmdir.flags, local->xdata_req);
return 0;
}
int
-afr_rmdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int flags, dict_t *xdata)
+afr_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
afr_internal_lock_t *int_lock = NULL;
call_frame_t *transaction_frame = NULL;
int ret = -1;
- int op_errno = 0;
+ int op_errno = ENOMEM;
int nlockee = 0;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
priv = this->private;
QUORUM_CHECK(rmdir,out);
transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
+ if (!transaction_frame)
goto out;
- }
- AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
- local = transaction_frame->local;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
- local->cont.rmdir.flags = flags;
loc_copy (&local->loc, loc);
+ local->inode = inode_ref (loc->inode);
+ local->parent = inode_ref (loc->parent);
+
+ local->cont.rmdir.flags = flags;
+
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
+
+ if (!local->xdata_req)
+ goto out;
local->op = GF_FOP_RMDIR;
- local->transaction.fop = afr_rmdir_wind;
- local->transaction.done = afr_rmdir_done;
+ local->transaction.wind = afr_rmdir_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
local->transaction.unwind = afr_rmdir_unwind;
ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
@@ -1944,18 +1463,16 @@ afr_rmdir (call_frame_t *frame, xlator_t *this,
ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
if (ret < 0) {
- op_errno = -ret;
- goto out;
+ op_errno = -ret;
+ goto out;
}
- ret = 0;
+ return 0;
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c
index 0cfebcb..01e078c 100644
--- a/xlators/cluster/afr/src/afr-inode-read.c
+++ b/xlators/cluster/afr/src/afr-inode-read.c
@@ -35,241 +35,153 @@
#include "compat-errno.h"
#include "compat.h"
-/**
- * Common algorithm for inode read calls:
- *
- * - Try the fop on the first child that is up
- * - if we have failed due to ENOTCONN:
- * try the next child
- *
- * Applicable to: access, stat, fstat, readlink, getxattr
- */
+#include "afr-transaction.h"
+
/* {{{ access */
-int32_t
-afr_access_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
+int
+afr_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int unwind = 1;
- int32_t *last_index = NULL;
- int32_t next_call_child = -1;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
-
- priv = this->private;
- children = priv->children;
+ afr_local_t *local = NULL;
local = frame->local;
- read_child = (long) cookie;
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
- if (op_ret == -1) {
- last_index = &local->cont.access.last_index;
- fresh_children = local->fresh_children;
- next_call_child = afr_next_call_child (fresh_children,
- local->child_up,
- priv->child_count,
- last_index, read_child);
- if (next_call_child < 0)
- goto out;
+ afr_read_txn_continue (frame, this, (long) cookie);
+ return 0;
+ }
- unwind = 0;
-
- STACK_WIND_COOKIE (frame, afr_access_cbk,
- (void *) (long) read_child,
- children[next_call_child],
- children[next_call_child]->fops->access,
- &local->loc, local->cont.access.mask,
- NULL);
- }
-
-out:
- if (unwind) {
- AFR_STACK_UNWIND (access, frame, op_ret, op_errno, xdata);
- }
+ AFR_STACK_UNWIND (access, frame, op_ret, op_errno, xdata);
return 0;
}
-int32_t
-afr_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
- dict_t *xdata)
+int
+afr_access_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_private_t *priv = NULL;
- xlator_t **children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
- int32_t op_errno = 0;
- int32_t read_child = -1;
- int ret = -1;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
-
- children = priv->children;
-
- AFR_SBRAIN_CHECK_LOC (loc, out);
-
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
-
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children) {
- op_errno = ENOMEM;
- goto out;
- }
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (subvol == -1) {
+ AFR_STACK_UNWIND (access, frame, local->op_ret,
+ local->op_errno, 0);
+ return 0;
+ }
+
+ STACK_WIND_COOKIE (frame, afr_access_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->access,
+ &local->loc, local->cont.access.mask,
+ local->xdata_req);
+ return 0;
+}
+int
+afr_access (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int mask, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int op_errno = 0;
- read_child = afr_inode_get_read_ctx (this, loc->inode,
- local->fresh_children);
- ret = afr_get_call_child (this, local->child_up, read_child,
- local->fresh_children,
- &call_child,
- &local->cont.access.last_index);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
- loc_copy (&local->loc, loc);
- local->cont.access.mask = mask;
+ local->op = GF_FOP_ACCESS;
+ loc_copy (&local->loc, loc);
+ local->cont.access.mask = mask;
+ if (xdata)
+ local->xdata_req = dict_ref (xdata);
- STACK_WIND_COOKIE (frame, afr_access_cbk,
- (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->access,
- loc, mask, xdata);
+ afr_read_txn (frame, this, loc->inode, afr_access_wind,
+ AFR_METADATA_TRANSACTION);
- ret = 0;
+ return 0;
out:
- if (ret < 0)
- AFR_STACK_UNWIND (access, frame, -1, op_errno, NULL);
+ AFR_STACK_UNWIND (access, frame, -1, op_errno, NULL);
+
return 0;
}
-
/* }}} */
/* {{{ stat */
-int32_t
+int
afr_stat_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
struct iatt *buf, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int unwind = 1;
- int32_t *last_index = NULL;
- int32_t next_call_child = -1;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
-
- priv = this->private;
- children = priv->children;
-
- read_child = (long) cookie;
+ afr_local_t *local = NULL;
local = frame->local;
- if (op_ret == -1) {
- last_index = &local->cont.stat.last_index;
- fresh_children = local->fresh_children;
- next_call_child = afr_next_call_child (fresh_children,
- local->child_up,
- priv->child_count,
- last_index, read_child);
- if (next_call_child < 0)
- goto out;
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
- unwind = 0;
+ afr_read_txn_continue (frame, this, (long) cookie);
+ return 0;
+ }
- STACK_WIND_COOKIE (frame, afr_stat_cbk,
- (void *) (long) read_child,
- children[next_call_child],
- children[next_call_child]->fops->stat,
- &local->loc, NULL);
- }
-
-out:
- if (unwind) {
- AFR_STACK_UNWIND (stat, frame, op_ret, op_errno, buf, xdata);
- }
+ AFR_STACK_UNWIND (stat, frame, op_ret, op_errno, buf, xdata);
return 0;
}
-int32_t
-afr_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+int
+afr_stat_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- xlator_t **children = NULL;
- int call_child = 0;
- int32_t op_errno = 0;
- int32_t read_child = -1;
- int ret = -1;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
-
- children = priv->children;
-
- AFR_SBRAIN_CHECK_LOC (loc, out);
-
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (subvol == -1) {
+ AFR_STACK_UNWIND (stat, frame, local->op_ret, local->op_errno,
+ 0, 0);
+ return 0;
+ }
+
+ STACK_WIND_COOKIE (frame, afr_stat_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->stat,
+ &local->loc, local->xdata_req);
+ return 0;
+}
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+int
+afr_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int op_errno = 0;
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children) {
- op_errno = ENOMEM;
- goto out;
- }
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
- read_child = afr_inode_get_read_ctx (this, loc->inode,
- local->fresh_children);
- ret = afr_get_call_child (this, local->child_up, read_child,
- local->fresh_children,
- &call_child,
- &local->cont.stat.last_index);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
- loc_copy (&local->loc, loc);
+ local->op = GF_FOP_STAT;
+ loc_copy (&local->loc, loc);
+ if (xdata)
+ local->xdata_req = dict_ref (xdata);
- STACK_WIND_COOKIE (frame, afr_stat_cbk, (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->stat,
- loc, xdata);
+ afr_read_txn (frame, this, loc->inode, afr_stat_wind,
+ AFR_DATA_TRANSACTION);
- ret = 0;
+ return 0;
out:
- if (ret < 0)
- AFR_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
+ AFR_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -279,52 +191,49 @@ out:
/* {{{ fstat */
-int32_t
+int
afr_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- xlator_t **children = NULL;
- int unwind = 1;
- int32_t *last_index = NULL;
- int32_t next_call_child = -1;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
-
- priv = this->private;
- children = priv->children;
+ afr_local_t *local = NULL;
local = frame->local;
- read_child = (long) cookie;
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
- if (op_ret == -1) {
- last_index = &local->cont.fstat.last_index;
- fresh_children = local->fresh_children;
- next_call_child = afr_next_call_child (fresh_children,
- local->child_up,
- priv->child_count,
- last_index, read_child);
- if (next_call_child < 0)
- goto out;
+ afr_read_txn_continue (frame, this, (long) cookie);
+ return 0;
+ }
- unwind = 0;
+ AFR_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf, xdata);
- STACK_WIND_COOKIE (frame, afr_fstat_cbk,
- (void *) (long) read_child,
- children[next_call_child],
- children[next_call_child]->fops->fstat,
- local->fd, NULL);
- }
+ return 0;
+}
-out:
- if (unwind) {
- AFR_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf, xdata);
- }
- return 0;
+int
+afr_fstat_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (subvol == -1) {
+ AFR_STACK_UNWIND (fstat, frame, local->op_ret, local->op_errno,
+ 0, 0);
+ return 0;
+ }
+
+ STACK_WIND_COOKIE (frame, afr_fstat_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fstat,
+ local->fd, local->xdata_req);
+ return 0;
}
@@ -332,68 +241,26 @@ int32_t
afr_fstat (call_frame_t *frame, xlator_t *this,
fd_t *fd, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- xlator_t **children = NULL;
- int call_child = 0;
- int32_t op_errno = 0;
- int32_t read_child = 0;
- int ret = -1;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (fd, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
-
- children = priv->children;
-
- VALIDATE_OR_GOTO (fd->inode, out);
-
- AFR_SBRAIN_CHECK_FD (fd, out);
-
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
-
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children) {
- op_errno = ENOMEM;
- goto out;
- }
+ afr_local_t *local = NULL;
+ int op_errno = 0;
- read_child = afr_inode_get_read_ctx (this, fd->inode,
- local->fresh_children);
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
+ local->op = GF_FOP_FSTAT;
+ local->fd = fd_ref (fd);
+ if (xdata)
+ local->xdata_req = dict_ref (xdata);
+ afr_fix_open (fd, this);
- ret = afr_get_call_child (this, local->child_up, read_child,
- local->fresh_children,
- &call_child,
- &local->cont.fstat.last_index);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- local->fd = fd_ref (fd);
-
- afr_open_fd_fix (fd, this);
+ afr_read_txn (frame, this, fd->inode, afr_fstat_wind,
+ AFR_DATA_TRANSACTION);
- STACK_WIND_COOKIE (frame, afr_fstat_cbk, (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->fstat,
- fd, xdata);
-
- ret = 0;
+ return 0;
out:
- if (ret < 0)
- AFR_STACK_UNWIND (fstat, frame, -1, op_errno, NULL, NULL);
+ AFR_STACK_UNWIND (fstat, frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -402,117 +269,77 @@ out:
/* {{{ readlink */
-int32_t
+int
afr_readlink_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
const char *buf, struct iatt *sbuf, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int unwind = 1;
- int32_t *last_index = NULL;
- int32_t next_call_child = -1;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
+ afr_local_t *local = NULL;
- priv = this->private;
- children = priv->children;
+ local = frame->local;
- local = frame->local;
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
- read_child = (long) cookie;
+ afr_read_txn_continue (frame, this, (long) cookie);
+ return 0;
+ }
- if (op_ret == -1) {
- last_index = &local->cont.readlink.last_index;
- fresh_children = local->fresh_children;
- next_call_child = afr_next_call_child (fresh_children,
- local->child_up,
- priv->child_count,
- last_index, read_child);
- if (next_call_child < 0)
- goto out;
-
- unwind = 0;
- STACK_WIND_COOKIE (frame, afr_readlink_cbk,
- (void *) (long) read_child,
- children[next_call_child],
- children[next_call_child]->fops->readlink,
- &local->loc,
- local->cont.readlink.size, NULL);
- }
-
-out:
- if (unwind) {
- AFR_STACK_UNWIND (readlink, frame, op_ret, op_errno, buf, sbuf,
- xdata);
- }
+ AFR_STACK_UNWIND (readlink, frame, op_ret, op_errno,
+ buf, sbuf, xdata);
+ return 0;
+}
- return 0;
+int
+afr_readlink_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (subvol == -1) {
+ AFR_STACK_UNWIND (readlink, frame, local->op_ret,
+ local->op_errno, 0, 0, 0);
+ return 0;
+ }
+
+ STACK_WIND_COOKIE (frame, afr_readlink_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->readlink,
+ &local->loc, local->cont.readlink.size,
+ local->xdata_req);
+ return 0;
}
-int32_t
+int
afr_readlink (call_frame_t *frame, xlator_t *this,
loc_t *loc, size_t size, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- xlator_t **children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
+ afr_local_t * local = NULL;
int32_t op_errno = 0;
- int32_t read_child = -1;
- int ret = -1;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
- children = priv->children;
-
- AFR_SBRAIN_CHECK_LOC (loc, out);
-
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
-
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children) {
- op_errno = ENOMEM;
- goto out;
- }
- read_child = afr_inode_get_read_ctx (this, loc->inode,
- local->fresh_children);
- ret = afr_get_call_child (this, local->child_up, read_child,
- local->fresh_children,
- &call_child,
- &local->cont.readlink.last_index);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
+ local->op = GF_FOP_READLINK;
loc_copy (&local->loc, loc);
+ local->cont.readlink.size = size;
+ if (xdata)
+ local->xdata_req = dict_ref (xdata);
- local->cont.readlink.size = size;
-
- STACK_WIND_COOKIE (frame, afr_readlink_cbk,
- (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->readlink,
- loc, size, xdata);
+ afr_read_txn (frame, this, loc->inode, afr_readlink_wind,
+ AFR_DATA_TRANSACTION);
- ret = 0;
-out:
- if (ret < 0)
- AFR_STACK_UNWIND (readlink, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
+out:
+ AFR_STACK_UNWIND(readlink, frame, -1, op_errno, 0, 0, 0);
+
+ return 0;
}
@@ -550,7 +377,7 @@ __gather_xattr_keys (dict_t *dict, char *key, data_t *value,
void
-__filter_xattrs (dict_t *dict)
+afr_filter_xattrs (dict_t *dict)
{
struct list_head keys = {0,};
struct _xattr_key *key = NULL;
@@ -571,59 +398,56 @@ __filter_xattrs (dict_t *dict)
}
-
-int32_t
+int
afr_getxattr_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
dict_t *dict, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int unwind = 1;
- int32_t *last_index = NULL;
- int32_t next_call_child = -1;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
-
- priv = this->private;
- children = priv->children;
+ afr_local_t *local = NULL;
local = frame->local;
- read_child = (long) cookie;
-
- if (op_ret == -1) {
- last_index = &local->cont.getxattr.last_index;
- fresh_children = local->fresh_children;
- next_call_child = afr_next_call_child (fresh_children,
- local->child_up,
- priv->child_count,
- last_index, read_child);
- if (next_call_child < 0)
- goto out;
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
- unwind = 0;
- STACK_WIND_COOKIE (frame, afr_getxattr_cbk,
- (void *) (long) read_child,
- children[next_call_child],
- children[next_call_child]->fops->getxattr,
- &local->loc,
- local->cont.getxattr.name,
- NULL);
- }
+ afr_read_txn_continue (frame, this, (long) cookie);
+ return 0;
+ }
-out:
- if (unwind) {
- if (op_ret >= 0 && dict)
- __filter_xattrs (dict);
+ if (dict)
+ afr_filter_xattrs (dict);
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
- }
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
return 0;
}
+
+int
+afr_getxattr_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (subvol == -1) {
+ AFR_STACK_UNWIND (getxattr, frame, local->op_ret,
+ local->op_errno, NULL, NULL);
+ return 0;
+ }
+
+ STACK_WIND_COOKIE (frame, afr_getxattr_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->getxattr,
+ &local->loc, local->cont.getxattr.name,
+ local->xdata_req);
+ return 0;
+}
+
+
int32_t
afr_getxattr_unwind (call_frame_t *frame, int op_ret, int op_errno,
dict_t *dict, dict_t *xdata)
@@ -659,7 +483,7 @@ afr_fgetxattr_clrlk_cbk (call_frame_t *frame, void *cookie,
{
callcnt = --local->call_count;
if (op_ret == -1)
- local->child_errno[cky] = op_errno;
+ local->replies[cky].op_errno = op_errno;
if (!local->dict)
local->dict = dict_new ();
@@ -710,12 +534,10 @@ unlock:
unwind:
// Updating child_errno with more recent 'events'
- local->child_errno[cky] = op_errno;
- op_errno = afr_resultant_errno_get (NULL, local->child_errno,
- priv->child_count);
+ op_errno = afr_final_errno (local, priv);
+
AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, xattr,
xdata);
-
if (xattr)
dict_unref (xattr);
}
@@ -749,7 +571,7 @@ afr_getxattr_clrlk_cbk (call_frame_t *frame, void *cookie,
{
callcnt = --local->call_count;
if (op_ret == -1)
- local->child_errno[cky] = op_errno;
+ local->replies[cky].op_errno = op_errno;
if (!local->dict)
local->dict = dict_new ();
@@ -800,9 +622,8 @@ unlock:
unwind:
// Updating child_errno with more recent 'events'
- local->child_errno[cky] = op_errno;
- op_errno = afr_resultant_errno_get (NULL, local->child_errno,
- priv->child_count);
+ op_errno = afr_final_errno (local, priv);
+
AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr, xdata);
if (xattr)
@@ -1411,7 +1232,7 @@ afr_is_special_xattr (const char *name, fop_getxattr_cbk_t *cbk,
}
if (!strcmp (name, GF_XATTR_PATHINFO_KEY) ||
- !strcmp (name, GF_XATTR_USER_PATHINFO_KEY)) {
+ !strcmp (name, GF_XATTR_USER_PATHINFO_KEY)) {
if (is_fgetxattr) {
*cbk = afr_fgetxattr_pathinfo_cbk;
} else {
@@ -1442,18 +1263,16 @@ out:
}
static void
-afr_getxattr_frm_all_children (xlator_t *this, call_frame_t *frame,
- const char *name, loc_t *loc,
- fop_getxattr_cbk_t cbk)
+afr_getxattr_all_subvols (xlator_t *this, call_frame_t *frame,
+ const char *name, loc_t *loc,
+ fop_getxattr_cbk_t cbk)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- xlator_t **children = NULL;
int i = 0;
int call_count = 0;
priv = this->private;
- children = priv->children;
local = frame->local;
//local->call_count set in afr_local_init
@@ -1465,8 +1284,8 @@ afr_getxattr_frm_all_children (xlator_t *this, call_frame_t *frame,
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND_COOKIE (frame, cbk,
- (void *) (long) i, children[i],
- children[i]->fops->getxattr,
+ (void *) (long) i, priv->children[i],
+ priv->children[i]->fops->getxattr,
loc, name, NULL);
if (!--call_count)
break;
@@ -1481,41 +1300,41 @@ afr_getxattr (call_frame_t *frame, xlator_t *this,
{
afr_private_t *priv = NULL;
xlator_t **children = NULL;
- int call_child = 0;
afr_local_t *local = NULL;
xlator_list_t *trav = NULL;
xlator_t **sub_volumes = NULL;
int i = 0;
int32_t op_errno = 0;
- int32_t read_child = -1;
int ret = -1;
fop_getxattr_cbk_t cbk = NULL;
int afr_xtime_gauge[MCNT_MAX] = {0,};
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
children = priv->children;
- AFR_SBRAIN_CHECK_LOC (loc, out);
+ loc_copy (&local->loc, loc);
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+ local->op = GF_FOP_GETXATTR;
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ if (xdata)
+ local->xdata_req = dict_ref (xdata);
- loc_copy (&local->loc, loc);
if (!name)
goto no_name;
local->cont.getxattr.name = gf_strdup (name);
+ if (!local->cont.getxattr.name) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
if (!strncmp (name, AFR_XATTR_PREFIX,
strlen (AFR_XATTR_PREFIX))) {
gf_log (this->name, GF_LOG_INFO,
@@ -1559,8 +1378,7 @@ afr_getxattr (call_frame_t *frame, xlator_t *this,
* collect information from all childs
*/
if (afr_is_special_xattr (name, &cbk, 0)) {
- afr_getxattr_frm_all_children (this, frame, name,
- loc, cbk);
+ afr_getxattr_all_subvols (this, frame, name, loc, cbk);
return 0;
}
@@ -1615,28 +1433,9 @@ afr_getxattr (call_frame_t *frame, xlator_t *this,
}
no_name:
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children) {
- op_errno = ENOMEM;
- goto out;
- }
- read_child = afr_inode_get_read_ctx (this, loc->inode,
- local->fresh_children);
- ret = afr_get_call_child (this, local->child_up, read_child,
- local->fresh_children,
- &call_child,
- &local->cont.getxattr.last_index);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- STACK_WIND_COOKIE (frame, afr_getxattr_cbk,
- (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->getxattr,
- loc, name, xdata);
+ afr_read_txn (frame, this, local->loc.inode, afr_getxattr_wind,
+ AFR_METADATA_TRANSACTION);
ret = 0;
out:
@@ -1653,76 +1452,60 @@ afr_fgetxattr_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
dict_t *dict, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int unwind = 1;
- int32_t *last_index = NULL;
- int32_t next_call_child = -1;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
-
- priv = this->private;
- children = priv->children;
-
- local = frame->local;
+ afr_local_t *local = NULL;
- read_child = (long) cookie;
+ local = frame->local;
- if (op_ret == -1) {
- last_index = &local->cont.getxattr.last_index;
- fresh_children = local->fresh_children;
- next_call_child = afr_next_call_child (fresh_children,
- local->child_up,
- priv->child_count,
- last_index, read_child);
- if (next_call_child < 0)
- goto out;
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
- unwind = 0;
- STACK_WIND_COOKIE (frame, afr_fgetxattr_cbk,
- (void *) (long) read_child,
- children[next_call_child],
- children[next_call_child]->fops->fgetxattr,
- local->fd,
- local->cont.getxattr.name,
- NULL);
- }
+ afr_read_txn_continue (frame, this, (long) cookie);
+ return 0;
+ }
-out:
- if (unwind) {
- if (op_ret >= 0 && dict)
- __filter_xattrs (dict);
+ if (dict)
+ afr_filter_xattrs (dict);
- AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, dict,
- xdata);
- }
+ AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, dict, xdata);
- return 0;
+ return 0;
}
-int32_t
-afr_fgetxattr_unwind (call_frame_t *frame,
- int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
-
+int
+afr_fgetxattr_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
- AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, dict, xdata);
- return 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (subvol == -1) {
+ AFR_STACK_UNWIND (fgetxattr, frame, local->op_ret,
+ local->op_errno, NULL, NULL);
+ return 0;
+ }
+
+ STACK_WIND_COOKIE (frame, (void *) (long) subvol, afr_fgetxattr_cbk,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fgetxattr,
+ local->fd, local->cont.getxattr.name,
+ local->xdata_req);
+ return 0;
}
+
static void
-afr_fgetxattr_frm_all_children (xlator_t *this, call_frame_t *frame,
- const char *name, fd_t *fd,
- fop_fgetxattr_cbk_t cbk)
+afr_fgetxattr_all_subvols (xlator_t *this, call_frame_t *frame,
+ fop_fgetxattr_cbk_t cbk)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- xlator_t **children = NULL;
int i = 0;
int call_count = 0;
priv = this->private;
- children = priv->children;
local = frame->local;
//local->call_count set in afr_local_init
@@ -1735,9 +1518,10 @@ afr_fgetxattr_frm_all_children (xlator_t *this, call_frame_t *frame,
if (local->child_up[i]) {
STACK_WIND_COOKIE (frame, cbk,
(void *) (long) i,
- children[i],
- children[i]->fops->fgetxattr,
- fd, name, NULL);
+ priv->children[i],
+ priv->children[i]->fops->fgetxattr,
+ local->fd, local->cont.getxattr.name,
+ NULL);
if (!--call_count)
break;
}
@@ -1746,42 +1530,30 @@ afr_fgetxattr_frm_all_children (xlator_t *this, call_frame_t *frame,
return;
}
-int32_t
+
+int
afr_fgetxattr (call_frame_t *frame, xlator_t *this,
fd_t *fd, const char *name, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- xlator_t **children = NULL;
- int call_child = 0;
afr_local_t *local = NULL;
- int32_t op_ret = -1;
int32_t op_errno = 0;
- int32_t read_child = -1;
fop_fgetxattr_cbk_t cbk = NULL;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
-
- children = priv->children;
-
- AFR_SBRAIN_CHECK_FD (fd, out);
-
- AFR_LOCAL_ALLOC_OR_GOTO (local, out);
- frame->local = local;
-
- op_ret = afr_local_init (local, priv, &op_errno);
- if (op_ret < 0) {
- op_errno = -op_ret;
- goto out;
- }
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
+ local->op = GF_FOP_FGETXATTR;
local->fd = fd_ref (fd);
- if (name)
+ if (name) {
local->cont.getxattr.name = gf_strdup (name);
+ if (!local->cont.getxattr.name) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ }
+ if (xdata)
+ local->xdata_req = dict_ref (xdata);
/* pathinfo gets handled only in getxattr(), but we need to handle
* lockinfo.
@@ -1789,42 +1561,19 @@ afr_fgetxattr (call_frame_t *frame, xlator_t *this,
* collect information from all children.
*/
if (afr_is_special_xattr (name, &cbk, 1)) {
- afr_fgetxattr_frm_all_children (this, frame, name,
- fd, cbk);
+ afr_fgetxattr_all_subvols (this, frame, cbk);
return 0;
}
+ afr_fix_open (fd, this);
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children) {
- op_errno = ENOMEM;
- goto out;
- }
-
- read_child = afr_inode_get_read_ctx (this, fd->inode,
- local->fresh_children);
- op_ret = afr_get_call_child (this, local->child_up, read_child,
- local->fresh_children,
- &call_child,
- &local->cont.getxattr.last_index);
- if (op_ret < 0) {
- op_errno = -op_ret;
- op_ret = -1;
- goto out;
- }
-
- STACK_WIND_COOKIE (frame, afr_fgetxattr_cbk,
- (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->fgetxattr,
- fd, name, xdata);
+ afr_read_txn (frame, this, fd->inode, afr_fgetxattr_wind,
+ AFR_METADATA_TRANSACTION);
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, NULL,
- NULL);
- }
+ AFR_STACK_UNWIND (fgetxattr, frame, -1, op_errno, NULL, NULL);
+
return 0;
}
@@ -1833,144 +1582,84 @@ out:
/* {{{ readv */
-/**
- * read algorithm:
- *
- * if the user has specified a read subvolume, use it
- * otherwise -
- * use the inode number to hash it to one of the subvolumes, and
- * read from there (to balance read load)
- *
- * if any of the above read's fail, try the children in sequence
- * beginning at the beginning
- */
-
-int32_t
+int
afr_readv_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
struct iovec *vector, int32_t count, struct iatt *buf,
struct iobref *iobref, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int unwind = 1;
- int32_t *last_index = NULL;
- int32_t next_call_child = -1;
- int32_t *fresh_children = NULL;
- int32_t read_child = -1;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
-
- children = priv->children;
+ afr_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- read_child = (long) cookie;
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
- if (op_ret == -1) {
- last_index = &local->cont.readv.last_index;
- fresh_children = local->fresh_children;
- next_call_child = afr_next_call_child (fresh_children,
- local->child_up,
- priv->child_count,
- last_index, read_child);
- if (next_call_child < 0)
- goto out;
-
- unwind = 0;
+ afr_read_txn_continue (frame, this, (long) cookie);
+ return 0;
+ }
- STACK_WIND_COOKIE (frame, afr_readv_cbk,
- (void *) (long) read_child,
- children[next_call_child],
- children[next_call_child]->fops->readv,
- local->fd, local->cont.readv.size,
- local->cont.readv.offset,
- local->cont.readv.flags,
- NULL);
- }
+ AFR_STACK_UNWIND (readv, frame, op_ret, op_errno,
+ vector, count, buf, iobref, xdata);
+ return 0;
+}
-out:
- if (unwind) {
- AFR_STACK_UNWIND (readv, frame, op_ret, op_errno,
- vector, count, buf, iobref, xdata);
- }
- return 0;
+int
+afr_readv_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (subvol == -1) {
+ AFR_STACK_UNWIND (readv, frame, local->op_ret, local->op_errno,
+ 0, 0, 0, 0, 0);
+ return 0;
+ }
+
+ STACK_WIND_COOKIE (frame, afr_readv_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->readv,
+ local->fd, local->cont.readv.size,
+ local->cont.readv.offset, local->cont.readv.flags,
+ local->xdata_req);
+ return 0;
}
-int32_t
-afr_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata)
+int
+afr_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
{
- afr_private_t * priv = NULL;
afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int call_child = 0;
int32_t op_errno = 0;
- int32_t read_child = -1;
- int ret = -1;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (fd, out);
-
- priv = this->private;
- children = priv->children;
- AFR_SBRAIN_CHECK_FD (fd, out);
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
-
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children) {
- op_errno = ENOMEM;
- goto out;
- }
-
- read_child = afr_inode_get_read_ctx (this, fd->inode, local->fresh_children);
- ret = afr_get_call_child (this, local->child_up, read_child,
- local->fresh_children,
- &call_child,
- &local->cont.readv.last_index);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- local->fd = fd_ref (fd);
-
- local->cont.readv.size = size;
- local->cont.readv.offset = offset;
- local->cont.readv.flags = flags;
+ local->op = GF_FOP_READ;
+ local->fd = fd_ref (fd);
+ local->cont.readv.size = size;
+ local->cont.readv.offset = offset;
+ local->cont.readv.flags = flags;
+ if (xdata)
+ local->xdata_req = dict_ref (xdata);
- afr_open_fd_fix (fd, this);
+ afr_fix_open (fd, this);
- STACK_WIND_COOKIE (frame, afr_readv_cbk,
- (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->readv,
- fd, size, offset, flags, xdata);
+ afr_read_txn (frame, this, fd->inode, afr_readv_wind,
+ AFR_DATA_TRANSACTION);
- ret = 0;
-out:
- if (ret < 0) {
- AFR_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL,
- NULL, NULL);
- }
return 0;
+out:
+ AFR_STACK_UNWIND(readv, frame, -1, op_errno, 0, 0, 0, 0, 0);
+
+ return 0;
}
/* }}} */
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
index d62847d..3dacfc8 100644
--- a/xlators/cluster/afr/src/afr-inode-write.c
+++ b/xlators/cluster/afr/src/afr-inode-write.c
@@ -37,46 +37,128 @@
#include "afr.h"
#include "afr-transaction.h"
-#include "afr-self-heal-common.h"
+//#include "afr-self-heal-common.h"
-void
-__inode_write_fop_cbk (call_frame_t *frame, int child_index, int read_child,
- xlator_t *this, int32_t *op_ret, int32_t *op_errno,
- struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+
+static void
+__afr_inode_write_finalize (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int read_subvol = 0;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (local->inode) {
+ if (local->transaction.type == AFR_METADATA_TRANSACTION)
+ read_subvol = afr_metadata_subvol_get (local->inode, this,
+ NULL, NULL);
+ else
+ read_subvol = afr_data_subvol_get (local->inode, this,
+ NULL, NULL);
+ }
+
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno (local, priv);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret < 0) {
+ afr_inode_read_subvol_reset (local->inode, this);
+ continue;
+ }
+
+ /* Order of checks in the compound conditional
+ below is important.
+
+ - Highest precedence: largest op_ret
+ - Next precendence: if all op_rets are equal, read subvol
+ - Least precedence: any succeeded subvol
+ */
+ if ((local->op_ret < local->replies[i].op_ret) ||
+ ((local->op_ret == local->replies[i].op_ret) &&
+ (i == read_subvol))) {
+
+ local->op_ret = local->replies[i].op_ret;
+ local->op_errno = local->replies[i].op_errno;
+
+ local->cont.inode_wfop.prebuf =
+ local->replies[i].prestat;
+ local->cont.inode_wfop.postbuf =
+ local->replies[i].poststat;
+
+ if (local->replies[i].xdata) {
+ if (local->xdata_rsp)
+ dict_unref (local->xdata_rsp);
+ local->xdata_rsp =
+ dict_ref (local->replies[i].xdata);
+ }
+ }
+ }
+}
+
+
+static void
+__afr_inode_write_fill (call_frame_t *frame, xlator_t *this, int child_index,
+ int op_ret, int op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
- afr_local_t *local = NULL;
+ afr_local_t *local = NULL;
local = frame->local;
- if (afr_fop_failed (*op_ret, *op_errno)) {
- local->child_errno[child_index] = *op_errno;
-
- switch (local->op) {
- case GF_FOP_TRUNCATE:
- case GF_FOP_FTRUNCATE:
- if (*op_errno != EFBIG)
- afr_transaction_fop_failed (frame, this,
- child_index);
- break;
- default:
- afr_transaction_fop_failed (frame, this, child_index);
- break;
- }
- local->op_errno = *op_errno;
- goto out;
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+
+ if (op_ret >= 0) {
+ if (prebuf)
+ local->replies[child_index].prestat = *prebuf;
+ if (postbuf)
+ local->replies[child_index].poststat = *postbuf;
+ if (xdata)
+ local->replies[child_index].xdata = dict_ref (xdata);
+ } else {
+ afr_transaction_fop_failed (frame, this, child_index);
+ }
+
+ return;
+}
+
+
+static int
+__afr_inode_write_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int child_index = (long) cookie;
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ __afr_inode_write_fill (frame, this, child_index, op_ret,
+ op_errno, prebuf, postbuf, xdata);
}
+ UNLOCK (&frame->lock);
- if ((local->success_count == 0) || (read_child == child_index)) {
- local->op_ret = *op_ret;
- if (prebuf)
- local->cont.inode_wfop.prebuf = *prebuf;
- if (postbuf)
- local->cont.inode_wfop.postbuf = *postbuf;
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ __afr_inode_write_finalize (frame, this);
+
+ if (afr_txn_nothing_failed (frame, this))
+ local->transaction.unwind (frame, this);
+
+ local->transaction.resume (frame, this);
}
- local->success_count++;
-out:
- return;
+ return 0;
}
/* {{{ writev */
@@ -94,6 +176,8 @@ afr_writev_copy_outvars (call_frame_t *src_frame, call_frame_t *dst_frame)
dst_local->op_errno = src_local->op_errno;
dst_local->cont.inode_wfop.prebuf = src_local->cont.inode_wfop.prebuf;
dst_local->cont.inode_wfop.postbuf = src_local->cont.inode_wfop.postbuf;
+ if (src_local->xdata_rsp)
+ dst_local->xdata_rsp = dict_ref (src_local->xdata_rsp);
}
void
@@ -106,26 +190,9 @@ afr_writev_unwind (call_frame_t *frame, xlator_t *this)
local->op_ret, local->op_errno,
&local->cont.inode_wfop.prebuf,
&local->cont.inode_wfop.postbuf,
- NULL);
+ local->xdata_rsp);
}
-call_frame_t*
-afr_transaction_detach_fop_frame (call_frame_t *frame)
-{
- afr_local_t * local = NULL;
- call_frame_t *fop_frame = NULL;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- fop_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- return fop_frame;
-}
int
afr_transaction_writev_unwind (call_frame_t *frame, xlator_t *this)
@@ -173,82 +240,60 @@ afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt *postbuf, dict_t *xdata)
{
afr_local_t * local = NULL;
- afr_private_t *priv = NULL;
call_frame_t *fop_frame = NULL;
int child_index = (long) cookie;
int call_count = -1;
- int read_child = 0;
- int ret = 0;
+ int ret = 0;
uint32_t open_fd_count = 0;
uint32_t write_is_append = 0;
local = frame->local;
- priv = this->private;
-
- read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
LOCK (&frame->lock);
{
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
-
- __inode_write_fop_cbk (frame, child_index, read_child, this,
- &op_ret, &op_errno, prebuf, postbuf,
- xdata);
-
- local->replies[child_index].valid = 1;
- local->replies[child_index].op_ret = op_ret;
- local->replies[child_index].op_errno = op_errno;
-
-
- /* stage the best case return value for unwind */
- if ((local->success_count == 0) || (op_ret > local->op_ret)) {
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- }
-
- if (op_ret != -1) {
- if (xdata) {
- ret = dict_get_uint32 (xdata,
- GLUSTERFS_OPEN_FD_COUNT,
- &open_fd_count);
- if ((ret == 0) &&
- (open_fd_count > local->open_fd_count)) {
- local->open_fd_count = open_fd_count;
- local->update_open_fd_count = _gf_true;
- }
-
- write_is_append = 0;
- ret = dict_get_uint32 (xdata,
- GLUSTERFS_WRITE_IS_APPEND,
- &write_is_append);
- if (ret || !write_is_append)
- local->append_write = _gf_false;
- }
-
+ __afr_inode_write_fill (frame, this, child_index, op_ret,
+ op_errno, prebuf, postbuf, xdata);
+ if (op_ret == -1 || !xdata)
+ goto unlock;
+
+ write_is_append = 0;
+ ret = dict_get_uint32 (xdata, GLUSTERFS_WRITE_IS_APPEND,
+ &write_is_append);
+ if (ret || !write_is_append)
+ local->append_write = _gf_false;
+
+ ret = dict_get_uint32 (xdata, GLUSTERFS_OPEN_FD_COUNT,
+ &open_fd_count);
+ if (ret == -1)
+ goto unlock;
+ if ((open_fd_count > local->open_fd_count)) {
+ local->open_fd_count = open_fd_count;
+ local->update_open_fd_count = _gf_true;
}
}
+unlock:
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
if (call_count == 0) {
-
- if (local->update_open_fd_count)
- afr_handle_open_fd_count (frame, this);
-
- if (!local->stable_write && !local->append_write)
+ if (!local->stable_write && !local->append_write)
/* An appended write removes the necessity to
fsync() the file. This is because self-heal
has the logic to check for larger file when
the xattrs are not reliably pointing at
a stale file.
*/
- afr_fd_report_unstable_write (this, local->fd);
+ afr_fd_report_unstable_write (this, local->fd);
+
+ __afr_inode_write_finalize (frame, this);
afr_writev_handle_short_writes (frame, this);
- if (afr_any_fops_failed (local, priv)) {
+
+ if (local->update_open_fd_count)
+ afr_handle_open_fd_count (frame, this);
+
+ if (!afr_txn_nothing_failed (frame, this)) {
//Don't unwind until post-op is complete
local->transaction.resume (frame, this);
} else {
@@ -272,91 +317,23 @@ afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
+
int
-afr_writev_wind (call_frame_t *frame, xlator_t *this)
+afr_writev_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- int i = 0;
- int call_count = -1;
- dict_t *xdata = NULL;
- GF_UNUSED int ret = 0;
local = frame->local;
priv = this->private;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
- local->replies = GF_CALLOC(priv->child_count, sizeof(*local->replies),
- gf_afr_mt_reply_t);
- if (!local->replies) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- local->transaction.unwind(frame, this);
- local->transaction.resume(frame, this);
- return 0;
- }
-
- xdata = dict_new ();
- if (xdata) {
- ret = dict_set_uint32 (xdata, GLUSTERFS_OPEN_FD_COUNT,
- sizeof (uint32_t));
- ret = dict_set_uint32 (xdata, GLUSTERFS_WRITE_IS_APPEND,
- 0);
- /* Set append_write to be true speculatively. If on any
- server it turns not be true, we unset it in the
- callback.
- */
- local->append_write = _gf_true;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_writev_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->writev,
- local->fd,
- local->cont.writev.vector,
- local->cont.writev.count,
- local->cont.writev.offset,
- local->cont.writev.flags,
- local->cont.writev.iobref,
- xdata);
-
- if (!--call_count)
- break;
- }
- }
-
- if (xdata)
- dict_unref (xdata);
-
- return 0;
-}
-
-
-int
-afr_writev_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- iobref_unref (local->cont.writev.iobref);
- local->cont.writev.iobref = NULL;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
+ STACK_WIND_COOKIE (frame, afr_writev_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->writev,
+ local->fd, local->cont.writev.vector,
+ local->cont.writev.count, local->cont.writev.offset,
+ local->cont.writev.flags, local->cont.writev.iobref,
+ local->xdata_req);
return 0;
}
@@ -366,29 +343,29 @@ afr_do_writev (call_frame_t *frame, xlator_t *this)
{
call_frame_t *transaction_frame = NULL;
afr_local_t *local = NULL;
- int op_ret = -1;
- int op_errno = 0;
-
- local = frame->local;
+ int ret = -1;
+ int op_errno = ENOMEM;
transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
+ if (!transaction_frame)
goto out;
- }
+ local = frame->local;
transaction_frame->local = local;
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ frame->local = NULL;
- local->op = GF_FOP_WRITE;
+ if (!AFR_FRAME_INIT (frame, op_errno))
+ goto out;
- local->success_count = 0;
+ local->op = GF_FOP_WRITE;
- local->transaction.fop = afr_writev_wind;
- local->transaction.done = afr_writev_done;
+ local->transaction.wind = afr_writev_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
local->transaction.unwind = afr_transaction_writev_unwind;
local->transaction.main_frame = frame;
+
if (local->fd->flags & O_APPEND) {
/*
* Backend vfs ignores the 'offset' for append mode fd so
@@ -405,179 +382,86 @@ afr_do_writev (call_frame_t *frame, xlator_t *this)
local->cont.writev.count);
}
- op_ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
- if (op_ret < 0) {
- op_errno = -op_ret;
+ ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
goto out;
}
- op_ret = 0;
+ return 0;
out:
- if (op_ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (writev, frame, op_ret, op_errno, NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
-static void
-afr_trigger_open_fd_self_heal (fd_t *fd, xlator_t *this)
-{
- call_frame_t *frame = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- char *reason = NULL;
- int32_t op_errno = 0;
- int ret = 0;
-
- if (!fd || !fd->inode || uuid_is_null (fd->inode->gfid)) {
- gf_log_callingfn (this->name, GF_LOG_ERROR, "Invalid args: "
- "fd: %p, inode: %p", fd,
- fd ? fd->inode : NULL);
- goto out;
- }
-
- frame = create_frame (this, this->ctx->pool);
- if (!frame)
- goto out;
-
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
- ret = afr_local_init (local, this->private, &op_errno);
- if (ret < 0)
- goto out;
-
- local->loc.inode = inode_ref (fd->inode);
- ret = loc_path (&local->loc, NULL);
- if (ret < 0)
- goto out;
-
- sh = &local->self_heal;
- sh->do_metadata_self_heal = _gf_true;
- if (fd->inode->ia_type == IA_IFREG)
- sh->do_data_self_heal = _gf_true;
- else if (fd->inode->ia_type == IA_IFDIR)
- sh->do_entry_self_heal = _gf_true;
-
- reason = "subvolume came online";
- afr_launch_self_heal (frame, this, fd->inode, _gf_true,
- fd->inode->ia_type, reason, NULL, NULL);
- return;
-out:
- AFR_STACK_DESTROY (frame);
-}
-
-void
-afr_open_fd_fix (fd_t *fd, xlator_t *this)
-{
- int ret = 0;
- int i = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- gf_boolean_t need_self_heal = _gf_false;
- int *need_open = NULL;
- size_t need_open_count = 0;
- afr_private_t *priv = NULL;
-
- priv = this->private;
-
- if (!afr_is_fd_fixable (fd))
- goto out;
-
- fd_ctx = afr_fd_ctx_get (fd, this);
- if (!fd_ctx)
- goto out;
-
- LOCK (&fd->lock);
- {
- if (fd_ctx->up_count < priv->up_count) {
- need_self_heal = _gf_true;
- fd_ctx->up_count = priv->up_count;
- fd_ctx->down_count = priv->down_count;
- }
-
- need_open = alloca (priv->child_count * sizeof (*need_open));
- for (i = 0; i < priv->child_count; i++) {
- need_open[i] = 0;
- if (fd_ctx->opened_on[i] != AFR_FD_NOT_OPENED)
- continue;
-
- if (!priv->child_up[i])
- continue;
-
- fd_ctx->opened_on[i] = AFR_FD_OPENING;
-
- need_open[i] = 1;
- need_open_count++;
- }
- }
- UNLOCK (&fd->lock);
- if (ret)
- goto out;
-
- if (need_self_heal)
- afr_trigger_open_fd_self_heal (fd, this);
-
- if (!need_open_count)
- goto out;
-
- afr_fix_open (this, fd, need_open_count, need_open);
-out:
- return;
-}
int
afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iovec *vector, int32_t count, off_t offset,
uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- int ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int op_errno = ENOMEM;
priv = this->private;
- if (afr_is_split_brain (this, fd->inode)) {
- op_errno = EIO;
- goto out;
- }
-
QUORUM_CHECK(writev,out);
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
- local->cont.writev.vector = iov_dup (vector, count);
+ local->cont.writev.vector = iov_dup (vector, count);
+ if (!local->cont.writev.vector)
+ goto out;
local->cont.writev.count = count;
local->cont.writev.offset = offset;
local->cont.writev.flags = flags;
local->cont.writev.iobref = iobref_ref (iobref);
- local->fd = fd_ref (fd);
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->fd = fd_ref (fd);
+ local->inode = inode_ref (fd->inode);
+
+ if (dict_set_uint32 (local->xdata_req, GLUSTERFS_OPEN_FD_COUNT, 4)) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ if (dict_set_uint32 (local->xdata_req, GLUSTERFS_WRITE_IS_APPEND, 4)) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ /* Set append_write to be true speculatively. If on any
+ server it turns not be true, we unset it in the
+ callback.
+ */
+ local->append_write = _gf_true;
/* detect here, but set it in writev_wind_cbk *after* the unstable
write is performed
*/
local->stable_write = !!((fd->flags|flags)&(O_SYNC|O_DSYNC));
- afr_open_fd_fix (fd, this);
+ afr_fix_open (fd, this);
afr_do_writev (frame, this);
- ret = 0;
+ return 0;
out:
- if (ret < 0)
- AFR_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
+ AFR_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -595,22 +479,13 @@ afr_truncate_unwind (call_frame_t *frame, xlator_t *this)
local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (truncate, main_frame, local->op_ret,
- local->op_errno,
- &local->cont.inode_wfop.prebuf,
- &local->cont.inode_wfop.postbuf,
- NULL);
- }
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
+ AFR_STACK_UNWIND (truncate, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
return 0;
}
@@ -620,96 +495,32 @@ afr_truncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
struct iatt *postbuf, dict_t *xdata)
{
- afr_local_t * local = NULL;
- int child_index = (long) cookie;
- int read_child = 0;
- int call_count = -1;
-
- local = frame->local;
-
- read_child = afr_inode_get_read_ctx (this, local->loc.inode, NULL);
+ afr_local_t *local = NULL;
- LOCK (&frame->lock);
- {
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
+ local = frame->local;
- if (op_ret != -1) {
- if (prebuf->ia_size != postbuf->ia_size)
- local->stable_write = _gf_false;
- }
- __inode_write_fop_cbk (frame, child_index, read_child, this,
- &op_ret, &op_errno, prebuf, postbuf,
- xdata);
- }
- UNLOCK (&frame->lock);
+ if (op_ret == 0 && prebuf->ia_size != postbuf->ia_size)
+ local->stable_write = _gf_false;
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if (local->stable_write && afr_txn_nothing_failed (frame, this))
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ prebuf, postbuf, xdata);
}
-int32_t
-afr_truncate_wind (call_frame_t *frame, xlator_t *this)
+int
+afr_truncate_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
local = frame->local;
priv = this->private;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
- local->stable_write = _gf_true;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_truncate_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->truncate,
- &local->loc,
- local->cont.truncate.offset,
- NULL);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
-
-
-int
-afr_truncate_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
+ STACK_WIND_COOKIE (frame, afr_truncate_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->truncate,
+ &local->loc, local->cont.truncate.offset,
+ local->xdata_req);
return 0;
}
@@ -721,56 +532,60 @@ afr_truncate (call_frame_t *frame, xlator_t *this,
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ int ret = -1;
+ int op_errno = ENOMEM;
priv = this->private;
QUORUM_CHECK(truncate,out);
transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
+ if (!transaction_frame)
goto out;
- }
- AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
- local = transaction_frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
local->cont.truncate.offset = offset;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
+
+ if (!local->xdata_req)
+ goto out;
- local->transaction.fop = afr_truncate_wind;
- local->transaction.done = afr_truncate_done;
+ local->transaction.wind = afr_truncate_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
local->transaction.unwind = afr_truncate_unwind;
loc_copy (&local->loc, loc);
+ local->inode = inode_ref (loc->inode);
+
+ local->op = GF_FOP_TRUNCATE;
local->transaction.main_frame = frame;
local->transaction.start = offset;
local->transaction.len = 0;
+ /* Set it true speculatively, will get reset in afr_truncate_wind_cbk
+ if truncate was not a NOP */
+ local->stable_write = _gf_true;
+
ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
if (ret < 0) {
- op_errno = -ret;
- goto out;
+ op_errno = -ret;
+ goto out;
}
- ret = 0;
+ return 0;
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -788,21 +603,13 @@ afr_ftruncate_unwind (call_frame_t *frame, xlator_t *this)
local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
- if (main_frame) {
- AFR_STACK_UNWIND (ftruncate, main_frame, local->op_ret,
- local->op_errno,
- &local->cont.inode_wfop.prebuf,
- &local->cont.inode_wfop.postbuf,
- NULL);
- }
+ AFR_STACK_UNWIND (ftruncate, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
return 0;
}
@@ -812,122 +619,75 @@ afr_ftruncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
struct iatt *postbuf, dict_t *xdata)
{
- afr_local_t * local = NULL;
- int child_index = (long) cookie;
- int call_count = -1;
- int read_child = 0;
-
- local = frame->local;
-
- read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
-
- LOCK (&frame->lock);
- {
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
-
- if (op_ret != -1) {
- if (prebuf->ia_size != postbuf->ia_size)
- local->stable_write = _gf_false;
- }
- __inode_write_fop_cbk (frame, child_index, read_child, this,
- &op_ret, &op_errno, prebuf, postbuf,
- xdata);
- }
- UNLOCK (&frame->lock);
+ afr_local_t *local = NULL;
- call_count = afr_frame_return (frame);
+ local = frame->local;
- if (call_count == 0) {
- if (local->stable_write && afr_txn_nothing_failed (frame, this))
- local->transaction.unwind (frame, this);
+ if (op_ret == 0 && prebuf->ia_size != postbuf->ia_size)
+ local->stable_write = _gf_false;
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ prebuf, postbuf, xdata);
}
int
-afr_ftruncate_wind (call_frame_t *frame, xlator_t *this)
+afr_ftruncate_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
- local->call_count = call_count;
- local->stable_write = _gf_true;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_ftruncate_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->ftruncate,
- local->fd,
- local->cont.ftruncate.offset,
- NULL);
-
- if (!--call_count)
- break;
- }
- }
+ local = frame->local;
+ priv = this->private;
+ STACK_WIND_COOKIE (frame, afr_ftruncate_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->ftruncate,
+ local->fd, local->cont.ftruncate.offset,
+ local->xdata_req);
return 0;
}
int
-afr_ftruncate_done (call_frame_t *frame, xlator_t *this)
+afr_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
{
+ afr_private_t *priv = NULL;
afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- local = frame->local;
-
- local->transaction.unwind (frame, this);
+ priv = this->private;
- AFR_STACK_DESTROY (frame);
+ QUORUM_CHECK(ftruncate,out);
- return 0;
-}
+ transaction_frame = copy_frame (frame);
+ if (!frame)
+ goto out;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
-int
-afr_do_ftruncate (call_frame_t *frame, xlator_t *this)
-{
- call_frame_t * transaction_frame = NULL;
- afr_local_t * local = NULL;
- int op_ret = -1;
- int op_errno = 0;
+ local->cont.ftruncate.offset = offset;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- local = frame->local;
+ if (!local->xdata_req)
+ goto out;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- goto out;
- }
-
- transaction_frame->local = local;
- frame->local = NULL;
+ local->fd = fd_ref (fd);
+ local->inode = inode_ref (fd->inode);
local->op = GF_FOP_FTRUNCATE;
- local->transaction.fop = afr_ftruncate_wind;
- local->transaction.done = afr_ftruncate_done;
+ local->transaction.wind = afr_ftruncate_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
local->transaction.unwind = afr_ftruncate_unwind;
local->transaction.main_frame = frame;
@@ -935,69 +695,21 @@ afr_do_ftruncate (call_frame_t *frame, xlator_t *this)
local->transaction.start = local->cont.ftruncate.offset;
local->transaction.len = 0;
- op_ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
- if (op_ret < 0) {
- op_errno = -op_ret;
- goto out;
- }
+ afr_fix_open (fd, this);
- op_ret = 0;
-out:
- if (op_ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, NULL,
- NULL, NULL);
- }
-
- return 0;
-}
-
-
-int
-afr_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset, dict_t *xdata)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
+ /* Set it true speculatively, will get reset in afr_ftruncate_wind_cbk
+ if truncate was not a NOP */
+ local->stable_write = _gf_true;
- if (afr_is_split_brain (this, fd->inode)) {
- op_errno = EIO;
- goto out;
+ ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
}
- QUORUM_CHECK(ftruncate,out);
-
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
- local->cont.ftruncate.offset = offset;
-
- local->fd = fd_ref (fd);
-
- afr_open_fd_fix (fd, this);
-
- afr_do_ftruncate (frame, this);
-
- ret = 0;
+ return 0;
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
- }
+ AFR_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -1009,173 +721,92 @@ out:
int
afr_setattr_unwind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (setattr, main_frame, local->op_ret,
- local->op_errno,
- &local->cont.inode_wfop.prebuf,
- &local->cont.inode_wfop.postbuf,
- NULL);
- }
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
+ AFR_STACK_UNWIND (setattr, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ local->xdata_rsp);
return 0;
}
int
afr_setattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
+ int op_ret, int op_errno,
struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int child_index = (long) cookie;
- int read_child = 0;
- int call_count = -1;
- int need_unwind = 0;
-
- local = frame->local;
- priv = this->private;
-
- read_child = afr_inode_get_read_ctx (this, local->loc.inode, NULL);
-
- LOCK (&frame->lock);
- {
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
-
- __inode_write_fop_cbk (frame, child_index, read_child, this,
- &op_ret, &op_errno, preop, postop,
- xdata);
-
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
- }
- UNLOCK (&frame->lock);
-
- if (need_unwind)
- local->transaction.unwind (frame, this);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ preop, postop, xdata);
}
-int32_t
-afr_setattr_wind (call_frame_t *frame, xlator_t *this)
+int
+afr_setattr_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
local = frame->local;
priv = this->private;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_setattr_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->setattr,
- &local->loc,
- &local->cont.setattr.in_buf,
- local->cont.setattr.valid,
- NULL);
-
- if (!--call_count)
- break;
- }
- }
-
+ STACK_WIND_COOKIE (frame, afr_setattr_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->setattr,
+ &local->loc, &local->cont.setattr.in_buf,
+ local->cont.setattr.valid, local->xdata_req);
return 0;
}
int
-afr_setattr_done (call_frame_t *frame, xlator_t *this)
+afr_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *buf,
+ int32_t valid, dict_t *xdata)
{
+ afr_private_t *priv = NULL;
afr_local_t *local = NULL;
-
- local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
- return 0;
-}
-
-
-int
-afr_setattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, struct iatt *buf, int32_t valid, dict_t *xdata)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
+ call_frame_t *transaction_frame = NULL;
int ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ int op_errno = ENOMEM;
priv = this->private;
QUORUM_CHECK(setattr,out);
transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
+ if (!transaction_frame)
goto out;
- }
- AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
- local = transaction_frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
local->cont.setattr.in_buf = *buf;
local->cont.setattr.valid = valid;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- local->transaction.fop = afr_setattr_wind;
- local->transaction.done = afr_setattr_done;
+ if (!local->xdata_req)
+ goto out;
+
+ local->transaction.wind = afr_setattr_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
local->transaction.unwind = afr_setattr_unwind;
loc_copy (&local->loc, loc);
+ local->inode = inode_ref (loc->inode);
+
+ local->op = GF_FOP_SETATTR;
local->transaction.main_frame = frame;
local->transaction.start = LLONG_MAX - 1;
@@ -1183,18 +814,16 @@ afr_setattr (call_frame_t *frame, xlator_t *this,
ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
if (ret < 0) {
- op_errno = -ret;
- goto out;
+ op_errno = -ret;
+ goto out;
}
- ret = 0;
+ return 0;
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -1208,22 +837,13 @@ afr_fsetattr_unwind (call_frame_t *frame, xlator_t *this)
local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (fsetattr, main_frame, local->op_ret,
- local->op_errno,
- &local->cont.inode_wfop.prebuf,
- &local->cont.inode_wfop.postbuf,
- NULL);
- }
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
+ AFR_STACK_UNWIND (fsetattr, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
return 0;
}
@@ -1233,149 +853,72 @@ afr_fsetattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int child_index = (long) cookie;
- int read_child = 0;
- int call_count = -1;
- int need_unwind = 0;
-
- local = frame->local;
- priv = this->private;
-
- read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
-
- LOCK (&frame->lock);
- {
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
-
- __inode_write_fop_cbk (frame, child_index, read_child, this,
- &op_ret, &op_errno, preop, postop,
- xdata);
-
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
- }
- UNLOCK (&frame->lock);
-
- if (need_unwind)
- local->transaction.unwind (frame, this);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ preop, postop, xdata);
}
-int32_t
-afr_fsetattr_wind (call_frame_t *frame, xlator_t *this)
+int
+afr_fsetattr_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
local = frame->local;
priv = this->private;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_fsetattr_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fsetattr,
- local->fd,
- &local->cont.fsetattr.in_buf,
- local->cont.fsetattr.valid,
- NULL);
-
- if (!--call_count)
- break;
- }
- }
-
+ STACK_WIND_COOKIE (frame, afr_fsetattr_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fsetattr,
+ local->fd, &local->cont.fsetattr.in_buf,
+ local->cont.fsetattr.valid, local->xdata_req);
return 0;
}
int
-afr_fsetattr_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
- return 0;
-}
-
-int
afr_fsetattr (call_frame_t *frame, xlator_t *this,
fd_t *fd, struct iatt *buf, int32_t valid, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
int ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ int op_errno = ENOMEM;
priv = this->private;
- if (afr_is_split_brain (this, fd->inode)) {
- op_errno = EIO;
- goto out;
- }
-
QUORUM_CHECK(fsetattr,out);
transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
+ if (!transaction_frame)
goto out;
- }
- AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
- local = transaction_frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
local->cont.fsetattr.in_buf = *buf;
local->cont.fsetattr.valid = valid;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- local->transaction.fop = afr_fsetattr_wind;
- local->transaction.done = afr_fsetattr_done;
+ if (!local->xdata_req)
+ goto out;
+
+ local->transaction.wind = afr_fsetattr_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
local->transaction.unwind = afr_fsetattr_unwind;
local->fd = fd_ref (fd);
+ local->inode = inode_ref (fd->inode);
+
+ local->op = GF_FOP_FSETATTR;
- afr_open_fd_fix (fd, this);
+ afr_fix_open (fd, this);
local->transaction.main_frame = frame;
local->transaction.start = LLONG_MAX - 1;
@@ -1383,18 +926,16 @@ afr_fsetattr (call_frame_t *frame, xlator_t *this,
ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
if (ret < 0) {
- op_errno = -ret;
- goto out;
+ op_errno = -ret;
+ goto out;
}
- ret = 0;
+ return 0;
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
@@ -1410,19 +951,12 @@ afr_setxattr_unwind (call_frame_t *frame, xlator_t *this)
local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
- if (main_frame) {
- AFR_STACK_UNWIND (setxattr, main_frame,
- local->op_ret, local->op_errno,
- NULL);
- }
+ AFR_STACK_UNWIND (setxattr, main_frame, local->op_ret, local->op_errno,
+ local->xdata_rsp);
return 0;
}
@@ -1431,95 +965,32 @@ int
afr_setxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int need_unwind = 0;
- int child_index = (long) cookie;
-
- local = frame->local;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- __inode_write_fop_cbk (frame, child_index, -1, this,
- &op_ret, &op_errno, NULL, NULL,
- xdata);
- if (local->success_count == priv->child_count) {
- need_unwind = 1;
- }
- }
- UNLOCK (&frame->lock);
-
- if (need_unwind)
- local->transaction.unwind (frame, this);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ NULL, NULL, xdata);
}
int
-afr_setxattr_wind (call_frame_t *frame, xlator_t *this)
+afr_setxattr_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
local = frame->local;
priv = this->private;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_setxattr_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->setxattr,
- &local->loc,
- local->cont.setxattr.dict,
- local->cont.setxattr.flags,
- NULL);
-
- if (!--call_count)
- break;
- }
- }
-
+ STACK_WIND_COOKIE (frame, afr_setxattr_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->setxattr,
+ &local->loc, local->cont.setxattr.dict,
+ local->cont.setxattr.flags, local->xdata_req);
return 0;
}
int
-afr_setxattr_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
- return 0;
-}
-
-int
-afr_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata)
+afr_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
@@ -1527,59 +998,60 @@ afr_setxattr (call_frame_t *frame, xlator_t *this,
int ret = -1;
int op_errno = EINVAL;
- VALIDATE_OR_GOTO (this, out);
-
GF_IF_INTERNAL_XATTR_GOTO ("trusted.afr.*", dict,
op_errno, out);
GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.afr.*", dict,
op_errno, out);
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this->private, out);
-
priv = this->private;
QUORUM_CHECK(setxattr,out);
+
transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
+ if (!transaction_frame)
goto out;
- }
-
- AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
- local = transaction_frame->local;
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
goto out;
local->cont.setxattr.dict = dict_ref (dict);
local->cont.setxattr.flags = flags;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- local->transaction.fop = afr_setxattr_wind;
- local->transaction.done = afr_setxattr_done;
+ if (!local->xdata_req)
+ goto out;
+
+ local->transaction.wind = afr_setxattr_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
local->transaction.unwind = afr_setxattr_unwind;
loc_copy (&local->loc, loc);
+ local->inode = inode_ref (loc->inode);
local->transaction.main_frame = frame;
local->transaction.start = LLONG_MAX - 1;
local->transaction.len = 0;
+ local->op = GF_FOP_SETXATTR;
+
ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
if (ret < 0) {
- op_errno = -ret;
- goto out;
+ op_errno = -ret;
+ goto out;
}
- ret = 0;
+ return 0;
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+
+ AFR_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
return 0;
}
@@ -1595,19 +1067,12 @@ afr_fsetxattr_unwind (call_frame_t *frame, xlator_t *this)
local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
- if (main_frame) {
- AFR_STACK_UNWIND (fsetxattr, main_frame,
- local->op_ret, local->op_errno,
- NULL);
- }
+ AFR_STACK_UNWIND (fsetxattr, main_frame, local->op_ret, local->op_errno,
+ local->xdata_rsp);
return 0;
}
@@ -1616,94 +1081,30 @@ int
afr_fsetxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int need_unwind = 0;
- int child_index = (long) cookie;
-
- local = frame->local;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
-
- __inode_write_fop_cbk (frame, child_index, -1, this,
- &op_ret, &op_errno, NULL, NULL,
- xdata);
- if (local->success_count == priv->child_count) {
- need_unwind = 1;
- }
- }
- UNLOCK (&frame->lock);
-
- if (need_unwind)
- local->transaction.unwind (frame, this);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ NULL, NULL, xdata);
}
int
-afr_fsetxattr_wind (call_frame_t *frame, xlator_t *this)
+afr_fsetxattr_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
local = frame->local;
priv = this->private;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_fsetxattr_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fsetxattr,
- local->fd,
- local->cont.fsetxattr.dict,
- local->cont.fsetxattr.flags,
- NULL);
-
- if (!--call_count)
- break;
- }
- }
-
+ STACK_WIND_COOKIE (frame, afr_fsetxattr_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fsetxattr,
+ local->fd, local->cont.fsetxattr.dict,
+ local->cont.fsetxattr.flags, local->xdata_req);
return 0;
}
int
-afr_fsetxattr_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
- return 0;
-}
-
-int
afr_fsetxattr (call_frame_t *frame, xlator_t *this,
fd_t *fd, dict_t *dict, int32_t flags, dict_t *xdata)
{
@@ -1711,11 +1112,7 @@ afr_fsetxattr (call_frame_t *frame, xlator_t *this,
afr_local_t *local = NULL;
call_frame_t *transaction_frame = NULL;
int ret = -1;
- int op_errno = EINVAL;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ int op_errno = ENOMEM;
GF_IF_INTERNAL_XATTR_GOTO ("trusted.afr.*", dict,
op_errno, out);
@@ -1725,36 +1122,36 @@ afr_fsetxattr (call_frame_t *frame, xlator_t *this,
priv = this->private;
- if (afr_is_split_brain (this, fd->inode)) {
- op_errno = EIO;
- goto out;
- }
-
QUORUM_CHECK(fsetxattr,out);
- AFR_LOCAL_ALLOC_OR_GOTO (local, out);
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
-
transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
+ if (!transaction_frame)
goto out;
- }
-
- transaction_frame->local = local;
- local->op_ret = -1;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
local->cont.fsetxattr.dict = dict_ref (dict);
local->cont.fsetxattr.flags = flags;
- local->transaction.fop = afr_fsetxattr_wind;
- local->transaction.done = afr_fsetxattr_done;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->transaction.wind = afr_fsetxattr_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
local->transaction.unwind = afr_fsetxattr_unwind;
local->fd = fd_ref (fd);
+ local->inode = inode_ref (fd->inode);
+
+ local->op = GF_FOP_FSETXATTR;
local->transaction.main_frame = frame;
local->transaction.start = LLONG_MAX - 1;
@@ -1762,18 +1159,16 @@ afr_fsetxattr (call_frame_t *frame, xlator_t *this,
ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
if (ret < 0) {
- op_errno = -ret;
- goto out;
+ op_errno = -ret;
+ goto out;
}
- ret = 0;
+ return 0;
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL);
return 0;
}
@@ -1791,19 +1186,12 @@ afr_removexattr_unwind (call_frame_t *frame, xlator_t *this)
local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
- if (main_frame) {
- AFR_STACK_UNWIND (removexattr, main_frame,
- local->op_ret, local->op_errno,
- NULL);
- }
+ AFR_STACK_UNWIND (removexattr, main_frame, local->op_ret, local->op_errno,
+ local->xdata_rsp);
return 0;
}
@@ -1812,88 +1200,25 @@ int
afr_removexattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int need_unwind = 0;
- int child_index = (long) cookie;
-
- local = frame->local;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- __inode_write_fop_cbk (frame, child_index, -1, this,
- &op_ret, &op_errno, NULL, NULL,
- xdata);
- if (local->success_count == priv->wait_count) {
- need_unwind = 1;
- }
- }
- UNLOCK (&frame->lock);
-
- if (need_unwind)
- local->transaction.unwind (frame, this);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ NULL, NULL, xdata);
}
-int32_t
-afr_removexattr_wind (call_frame_t *frame, xlator_t *this)
+int
+afr_removexattr_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_removexattr_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->removexattr,
- &local->loc,
- local->cont.removexattr.name,
- NULL);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
-
-
-int
-afr_removexattr_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
+ local = frame->local;
+ priv = this->private;
+ STACK_WIND_COOKIE (frame, afr_removexattr_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->removexattr,
+ &local->loc, local->cont.removexattr.name,
+ local->xdata_req);
return 0;
}
@@ -1906,9 +1231,7 @@ afr_removexattr (call_frame_t *frame, xlator_t *this,
afr_local_t *local = NULL;
call_frame_t *transaction_frame = NULL;
int ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (this, out);
+ int op_errno = ENOMEM;
GF_IF_NATIVE_XATTR_GOTO ("trusted.afr.*",
name, op_errno, out);
@@ -1916,34 +1239,37 @@ afr_removexattr (call_frame_t *frame, xlator_t *this,
GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.afr.*",
name, op_errno, out);
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (loc, out);
-
priv = this->private;
QUORUM_CHECK(removexattr,out);
transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
+ if (!transaction_frame)
goto out;
- }
- AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
- local = transaction_frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
local->cont.removexattr.name = gf_strdup (name);
- local->transaction.fop = afr_removexattr_wind;
- local->transaction.done = afr_removexattr_done;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->transaction.wind = afr_removexattr_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
local->transaction.unwind = afr_removexattr_unwind;
loc_copy (&local->loc, loc);
+ local->inode = inode_ref (loc->inode);
+
+ local->op = GF_FOP_REMOVEXATTR;
local->transaction.main_frame = frame;
local->transaction.start = LLONG_MAX - 1;
@@ -1951,18 +1277,16 @@ afr_removexattr (call_frame_t *frame, xlator_t *this,
ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
if (ret < 0) {
- op_errno = -ret;
- goto out;
+ op_errno = -ret;
+ goto out;
}
- ret = 0;
+ return 0;
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
return 0;
}
@@ -1975,19 +1299,12 @@ afr_fremovexattr_unwind (call_frame_t *frame, xlator_t *this)
local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
- if (main_frame) {
- AFR_STACK_UNWIND (fremovexattr, main_frame,
- local->op_ret, local->op_errno,
- NULL);
- }
+ AFR_STACK_UNWIND (fremovexattr, main_frame, local->op_ret, local->op_errno,
+ local->xdata_rsp);
return 0;
}
@@ -1996,105 +1313,38 @@ int
afr_fremovexattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int call_count = -1;
- int need_unwind = 0;
- int child_index = (long) cookie;
-
- local = frame->local;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- __inode_write_fop_cbk (frame, child_index, -1, this,
- &op_ret, &op_errno, NULL, NULL,
- xdata);
-
- if (local->success_count == priv->wait_count) {
- need_unwind = 1;
- }
- }
- UNLOCK (&frame->lock);
-
- if (need_unwind)
- local->transaction.unwind (frame, this);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ NULL, NULL, xdata);
}
-int32_t
-afr_fremovexattr_wind (call_frame_t *frame, xlator_t *this)
+int
+afr_fremovexattr_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
local = frame->local;
priv = this->private;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_fremovexattr_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fremovexattr,
- local->fd,
- local->cont.removexattr.name,
- NULL);
-
- if (!--call_count)
- break;
- }
- }
-
+ STACK_WIND_COOKIE (frame, afr_fremovexattr_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fremovexattr,
+ local->fd, local->cont.removexattr.name,
+ local->xdata_req);
return 0;
}
int
-afr_fremovexattr_done (call_frame_t *frame, xlator_t *this)
+afr_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
{
- afr_local_t * local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
- return 0;
-}
-
-
-int
-afr_fremovexattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, const char *name, dict_t *xdata)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (this, out);
+ int op_errno = ENOMEM;
GF_IF_NATIVE_XATTR_GOTO ("trusted.afr.*",
name, op_errno, out);
@@ -2102,64 +1352,59 @@ afr_fremovexattr (call_frame_t *frame, xlator_t *this,
GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.afr.*",
name, op_errno, out);
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
- if (afr_is_split_brain (this, fd->inode)) {
- op_errno = EIO;
- goto out;
- }
+ priv = this->private;
QUORUM_CHECK(fremovexattr, out);
transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
+ if (!transaction_frame)
goto out;
- }
- AFR_LOCAL_ALLOC_OR_GOTO (local, out);
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0) {
- op_errno = -ret;
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
goto out;
- }
-
- transaction_frame->local = local;
-
- local->op_ret = -1;
local->cont.removexattr.name = gf_strdup (name);
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- local->transaction.fop = afr_fremovexattr_wind;
- local->transaction.done = afr_fremovexattr_done;
+ if (!local->xdata_req)
+ goto out;
+
+ local->transaction.wind = afr_fremovexattr_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
local->transaction.unwind = afr_fremovexattr_unwind;
local->fd = fd_ref (fd);
+ local->inode = inode_ref (fd->inode);
+
+ local->op = GF_FOP_FREMOVEXATTR;
local->transaction.main_frame = frame;
local->transaction.start = LLONG_MAX - 1;
local->transaction.len = 0;
- op_ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
- if (op_ret < 0) {
- op_errno = -op_ret;
- goto out;
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
}
- op_ret = 0;
+ return 0;
out:
- if (op_ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+
+ AFR_STACK_UNWIND (fremovexattr, frame, -1, op_errno, NULL);
return 0;
}
-static int
+
+int
afr_fallocate_unwind (call_frame_t *frame, xlator_t *this)
{
afr_local_t * local = NULL;
@@ -2167,147 +1412,88 @@ afr_fallocate_unwind (call_frame_t *frame, xlator_t *this)
local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
- if (main_frame) {
- AFR_STACK_UNWIND (fallocate, main_frame, local->op_ret,
- local->op_errno,
- &local->cont.inode_wfop.prebuf,
- &local->cont.inode_wfop.postbuf,
- NULL);
- }
+ AFR_STACK_UNWIND (fallocate, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
return 0;
}
-static int
+
+int
afr_fallocate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
struct iatt *postbuf, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int child_index = (long) cookie;
- int call_count = -1;
- int need_unwind = 0;
- int read_child = 0;
-
- local = frame->local;
- priv = this->private;
-
- read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
-
- LOCK (&frame->lock);
- {
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
-
- __inode_write_fop_cbk (frame, child_index, read_child, this,
- &op_ret, &op_errno, prebuf, postbuf,
- xdata);
-
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
- }
- UNLOCK (&frame->lock);
-
- if (need_unwind)
- local->transaction.unwind (frame, this);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ prebuf, postbuf, xdata);
}
-static int
-afr_fallocate_wind (call_frame_t *frame, xlator_t *this)
+
+int
+afr_fallocate_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
local = frame->local;
priv = this->private;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_fallocate_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fallocate,
- local->fd,
- local->cont.fallocate.mode,
- local->cont.fallocate.offset,
- local->cont.fallocate.len,
- NULL);
-
- if (!--call_count)
- break;
- }
- }
-
+ STACK_WIND_COOKIE (frame, afr_fallocate_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fallocate,
+ local->fd, local->cont.fallocate.mode,
+ local->cont.fallocate.offset,
+ local->cont.fallocate.len, local->xdata_req);
return 0;
}
-static int
-afr_fallocate_done (call_frame_t *frame, xlator_t *this)
+
+int
+afr_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
{
+ afr_private_t *priv = NULL;
+ call_frame_t *transaction_frame = NULL;
afr_local_t *local = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- local = frame->local;
+ priv = this->private;
- local->transaction.unwind (frame, this);
+ QUORUM_CHECK(fallocate,out);
- AFR_STACK_DESTROY (frame);
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
+ goto out;
- return 0;
-}
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
-static int
-afr_do_fallocate (call_frame_t *frame, xlator_t *this)
-{
- call_frame_t * transaction_frame = NULL;
- afr_local_t * local = NULL;
- int op_ret = -1;
- int op_errno = 0;
+ local->cont.fallocate.mode = mode;
+ local->cont.fallocate.offset = offset;
+ local->cont.fallocate.len = len;
- local = frame->local;
+ local->fd = fd_ref (fd);
+ local->inode = inode_ref (fd->inode);
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- goto out;
- }
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- transaction_frame->local = local;
- frame->local = NULL;
+ if (!local->xdata_req)
+ goto out;
local->op = GF_FOP_FALLOCATE;
- local->transaction.fop = afr_fallocate_wind;
- local->transaction.done = afr_fallocate_done;
+ local->transaction.wind = afr_fallocate_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
local->transaction.unwind = afr_fallocate_unwind;
local->transaction.main_frame = frame;
@@ -2315,80 +1501,29 @@ afr_do_fallocate (call_frame_t *frame, xlator_t *this)
local->transaction.start = local->cont.fallocate.offset;
local->transaction.len = 0;
- /* fallocate can modify the file size */
- op_ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
- if (op_ret < 0) {
- op_errno = -op_ret;
- goto out;
- }
-
- op_ret = 0;
-out:
- if (op_ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (fallocate, frame, op_ret, op_errno, NULL,
- NULL, NULL);
- }
-
- return 0;
-}
-
-int
-afr_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
- off_t offset, size_t len, dict_t *xdata)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ afr_fix_open (fd, this);
- priv = this->private;
-
- if (afr_is_split_brain (this, fd->inode)) {
- op_errno = EIO;
- goto out;
+ ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
}
- QUORUM_CHECK(fallocate,out);
-
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
- local->cont.fallocate.mode = mode;
- local->cont.fallocate.offset = offset;
- local->cont.fallocate.len = len;
-
- local->fd = fd_ref (fd);
-
- afr_open_fd_fix (fd, this);
-
- afr_do_fallocate (frame, this);
-
- ret = 0;
+ return 0;
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
+
/* }}} */
/* {{{ discard */
-static int
+int
afr_discard_unwind (call_frame_t *frame, xlator_t *this)
{
afr_local_t * local = NULL;
@@ -2396,146 +1531,86 @@ afr_discard_unwind (call_frame_t *frame, xlator_t *this)
local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
- if (main_frame) {
- AFR_STACK_UNWIND (discard, main_frame, local->op_ret,
- local->op_errno,
- &local->cont.inode_wfop.prebuf,
- &local->cont.inode_wfop.postbuf,
- NULL);
- }
+ AFR_STACK_UNWIND (discard, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
return 0;
}
-static int
+
+int
afr_discard_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
struct iatt *postbuf, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int child_index = (long) cookie;
- int call_count = -1;
- int need_unwind = 0;
- int read_child = 0;
-
- local = frame->local;
- priv = this->private;
-
- read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
-
- LOCK (&frame->lock);
- {
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
-
- __inode_write_fop_cbk (frame, child_index, read_child, this,
- &op_ret, &op_errno, prebuf, postbuf,
- xdata);
-
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
- }
- UNLOCK (&frame->lock);
-
- if (need_unwind)
- local->transaction.unwind (frame, this);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ prebuf, postbuf, xdata);
}
-static int
-afr_discard_wind (call_frame_t *frame, xlator_t *this)
+
+int
+afr_discard_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
local = frame->local;
priv = this->private;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_discard_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->discard,
- local->fd,
- local->cont.discard.offset,
- local->cont.discard.len,
- NULL);
-
- if (!--call_count)
- break;
- }
- }
-
+ STACK_WIND_COOKIE (frame, afr_discard_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->discard,
+ local->fd, local->cont.discard.offset,
+ local->cont.discard.len, local->xdata_req);
return 0;
}
-static int
-afr_discard_done (call_frame_t *frame, xlator_t *this)
+
+int
+afr_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
{
+ afr_private_t *priv = NULL;
afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- local = frame->local;
+ priv = this->private;
- local->transaction.unwind (frame, this);
+ QUORUM_CHECK(discard, out);
- AFR_STACK_DESTROY (frame);
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
+ goto out;
- return 0;
-}
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
-static int
-afr_do_discard (call_frame_t *frame, xlator_t *this)
-{
- call_frame_t * transaction_frame = NULL;
- afr_local_t * local = NULL;
- int op_ret = -1;
- int op_errno = 0;
+ local->cont.discard.offset = offset;
+ local->cont.discard.len = len;
- local = frame->local;
+ local->fd = fd_ref (fd);
+ local->inode = inode_ref (fd->inode);
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- goto out;
- }
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- transaction_frame->local = local;
- frame->local = NULL;
+ if (!local->xdata_req)
+ goto out;
local->op = GF_FOP_DISCARD;
- local->transaction.fop = afr_discard_wind;
- local->transaction.done = afr_discard_done;
+ local->transaction.wind = afr_discard_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
local->transaction.unwind = afr_discard_unwind;
local->transaction.main_frame = frame;
@@ -2543,316 +1618,134 @@ afr_do_discard (call_frame_t *frame, xlator_t *this)
local->transaction.start = local->cont.discard.offset;
local->transaction.len = 0;
- op_ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
- if (op_ret < 0) {
- op_errno = -op_ret;
- goto out;
- }
+ afr_fix_open (fd, this);
- op_ret = 0;
-out:
- if (op_ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (discard, frame, op_ret, op_errno, NULL,
- NULL, NULL);
- }
-
- return 0;
-}
-
-int
-afr_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- size_t len, dict_t *xdata)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- if (afr_is_split_brain (this, fd->inode)) {
- op_errno = EIO;
- goto out;
+ ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
}
- QUORUM_CHECK(discard, out);
-
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
- local->cont.discard.offset = offset;
- local->cont.discard.len = len;
-
- local->fd = fd_ref (fd);
-
- afr_open_fd_fix (fd, this);
-
- afr_do_discard(frame, this);
-
- ret = 0;
+ return 0;
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (discard, frame, -1, op_errno, NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (discard, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
/* {{{ zerofill */
-static int
+int
afr_zerofill_unwind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- call_frame_t *main_frame = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
- if (main_frame) {
- AFR_STACK_UNWIND (zerofill, main_frame, local->op_ret,
- local->op_errno,
- &local->cont.zerofill.prebuf,
- &local->cont.zerofill.postbuf,
- NULL);
- }
+ AFR_STACK_UNWIND (discard, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
return 0;
}
-static int
-afr_zerofill_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int child_index = (long) cookie;
- int call_count = -1;
- int need_unwind = 0;
- int read_child = 0;
-
- local = frame->local;
- priv = this->private;
-
- read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
-
- LOCK (&frame->lock);
- {
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
-
- if (afr_fop_failed (op_ret, op_errno)) {
- afr_transaction_fop_failed (frame, this, child_index);
- }
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.zerofill.prebuf = *prebuf;
- local->cont.zerofill.postbuf = *postbuf;
- }
-
- if (child_index == read_child) {
- local->cont.zerofill.prebuf = *prebuf;
- local->cont.zerofill.postbuf = *postbuf;
- }
-
- local->success_count++;
-
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
- }
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- if (need_unwind) {
- local->transaction.unwind (frame, this);
- }
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
- return 0;
+int
+afr_zerofill_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ prebuf, postbuf, xdata);
}
-static int
-afr_zerofill_wind (call_frame_t *frame, xlator_t *this)
+
+int
+afr_zerofill_wind (call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
local = frame->local;
priv = this->private;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_zerofill_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->zerofill,
- local->fd,
- local->cont.zerofill.offset,
- local->cont.zerofill.len,
- NULL);
-
- if (!--call_count)
- break;
- }
- }
-
+ STACK_WIND_COOKIE (frame, afr_zerofill_wind_cbk, (void *) (long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->zerofill,
+ local->fd, local->cont.zerofill.offset,
+ local->cont.zerofill.len, local->xdata_req);
return 0;
}
-static int
-afr_zerofill_done (call_frame_t *frame, xlator_t *this)
+int
+afr_zerofill (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
{
+ afr_private_t *priv = NULL;
afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- local = frame->local;
+ priv = this->private;
- local->transaction.unwind (frame, this);
+ QUORUM_CHECK(discard, out);
- AFR_STACK_DESTROY (frame);
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
+ goto out;
- return 0;
-}
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
-static int
-afr_do_zerofill(call_frame_t *frame, xlator_t *this)
-{
- call_frame_t *transaction_frame = NULL;
- afr_local_t *local = NULL;
- int op_ret = -1;
- int op_errno = 0;
+ local->cont.zerofill.offset = offset;
+ local->cont.zerofill.len = len;
- local = frame->local;
+ local->fd = fd_ref (fd);
+ local->inode = inode_ref (fd->inode);
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- goto out;
- }
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
- transaction_frame->local = local;
- frame->local = NULL;
+ if (!local->xdata_req)
+ goto out;
local->op = GF_FOP_ZEROFILL;
- local->transaction.fop = afr_zerofill_wind;
- local->transaction.done = afr_zerofill_done;
+ local->transaction.wind = afr_zerofill_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
local->transaction.unwind = afr_zerofill_unwind;
local->transaction.main_frame = frame;
- local->transaction.start = local->cont.zerofill.offset;
- local->transaction.len = 0;
-
- op_ret = afr_transaction (transaction_frame, this,
- AFR_DATA_TRANSACTION);
- if (op_ret < 0) {
- op_errno = -op_ret;
- goto out;
- }
-
- op_ret = 0;
-out:
- if (op_ret < 0) {
- if (transaction_frame) {
- AFR_STACK_DESTROY (transaction_frame);
- }
- AFR_STACK_UNWIND (zerofill, frame, op_ret, op_errno, NULL,
- NULL, NULL);
- }
-
- return 0;
-}
-
-int
-afr_zerofill (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- off_t len, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- if (afr_is_split_brain (this, fd->inode)) {
- op_errno = EIO;
- goto out;
- }
- QUORUM_CHECK(zerofill, out);
+ local->transaction.start = local->cont.discard.offset;
+ local->transaction.len = len;
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+ afr_fix_open (fd, this);
- ret = afr_local_init (local, priv, &op_errno);
+ ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
if (ret < 0) {
- goto out;
+ op_errno = -ret;
+ goto out;
}
- local->cont.zerofill.offset = offset;
- local->cont.zerofill.len = len;
-
- local->fd = fd_ref (fd);
-
- afr_open_fd_fix (fd, this);
- afr_do_zerofill(frame, this);
-
- ret = 0;
+ return 0;
out:
- if (ret < 0) {
- if (transaction_frame) {
- AFR_STACK_DESTROY (transaction_frame);
- }
- AFR_STACK_UNWIND (zerofill, frame, -1, op_errno, NULL,
- NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (zerofill, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c
index 060d78f..a2a758f 100644
--- a/xlators/cluster/afr/src/afr-lk-common.c
+++ b/xlators/cluster/afr/src/afr-lk-common.c
@@ -580,22 +580,6 @@ initialize_inodelk_variables (call_frame_t *frame, xlator_t *this)
return 0;
}
-loc_t *
-lower_path (loc_t *l1, const char *b1, loc_t *l2, const char *b2)
-{
- int ret = 0;
-
- ret = uuid_compare (l1->inode->gfid, l2->inode->gfid);
-
- if (ret == 0)
- ret = strcmp (b1, b2);
-
- if (ret <= 0)
- return l1;
- else
- return l2;
-}
-
int
afr_lockee_locked_nodes_count (afr_internal_lock_t *int_lock)
{
@@ -1213,8 +1197,7 @@ afr_blocking_lock (call_frame_t *frame, xlator_t *this)
case AFR_ENTRY_RENAME_TRANSACTION:
case AFR_ENTRY_TRANSACTION:
- up_count = afr_up_children_count (local->child_up,
- priv->child_count);
+ up_count = AFR_COUNT (local->child_up, priv->child_count);
int_lock->lk_call_count = int_lock->lk_expected_count
= (int_lock->lockee_count *
up_count);
@@ -1648,496 +1631,6 @@ afr_unlock (call_frame_t *frame, xlator_t *this)
}
int
-afr_mark_locked_nodes (xlator_t *this, fd_t *fd,
- unsigned char *locked_nodes)
-{
- afr_private_t *priv = NULL;
- afr_fd_ctx_t *fdctx = NULL;
- uint64_t tmp = 0;
- int ret = 0;
-
- priv = this->private;
-
- ret = afr_fd_ctx_set (this, fd);
- if (ret)
- goto out;
-
- ret = fd_ctx_get (fd, this, &tmp);
- if (ret) {
- gf_log (this->name, GF_LOG_INFO,
- "failed to get the fd ctx");
- goto out;
- }
- fdctx = (afr_fd_ctx_t *) (long) tmp;
-
- GF_ASSERT (fdctx->locked_on);
-
- memcpy (fdctx->locked_on, locked_nodes,
- priv->child_count);
-
-out:
- return ret;
-}
-
-static int
-__is_fd_saved (xlator_t *this, fd_t *fd)
-{
- afr_locked_fd_t *locked_fd = NULL;
- afr_private_t *priv = NULL;
- int found = 0;
-
- priv = this->private;
-
- list_for_each_entry (locked_fd, &priv->saved_fds, list) {
- if (locked_fd->fd == fd) {
- found = 1;
- break;
- }
- }
-
- return found;
-}
-
-static int
-__afr_save_locked_fd (xlator_t *this, fd_t *fd)
-{
- afr_private_t *priv = NULL;
- afr_locked_fd_t *locked_fd = NULL;
- int ret = 0;
-
- priv = this->private;
-
- locked_fd = GF_CALLOC (1, sizeof (*locked_fd),
- gf_afr_mt_locked_fd);
- if (!locked_fd) {
- ret = -1;
- goto out;
- }
-
- locked_fd->fd = fd;
- INIT_LIST_HEAD (&locked_fd->list);
-
- list_add_tail (&locked_fd->list, &priv->saved_fds);
-
-out:
- return ret;
-}
-
-int
-afr_save_locked_fd (xlator_t *this, fd_t *fd)
-{
- afr_private_t *priv = NULL;
- int ret = 0;
-
- priv = this->private;
-
- pthread_mutex_lock (&priv->mutex);
- {
- if (__is_fd_saved (this, fd)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "fd=%p already saved", fd);
- goto unlock;
- }
-
- ret = __afr_save_locked_fd (this, fd);
- if (ret) {
- gf_log (this->name, GF_LOG_INFO,
- "fd=%p could not be saved", fd);
- goto unlock;
- }
- }
-unlock:
- pthread_mutex_unlock (&priv->mutex);
-
- return ret;
-}
-
-static int
-afr_lock_recovery_cleanup (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_locked_fd_t *locked_fd = NULL;
-
- local = frame->local;
-
- locked_fd = local->locked_fd;
-
- STACK_DESTROY (frame->root);
- afr_local_cleanup (local, this);
-
- afr_save_locked_fd (this, locked_fd->fd);
-
- return 0;
-
-}
-
-static int
-afr_get_source_lock_recovery (xlator_t *this, fd_t *fd)
-{
- afr_fd_ctx_t *fdctx = NULL;
- afr_private_t *priv = NULL;
- uint64_t tmp = 0;
- int i = 0;
- int source_child = -1;
- int ret = 0;
-
- priv = this->private;
-
- ret = fd_ctx_get (fd, this, &tmp);
- if (ret)
- goto out;
-
- fdctx = (afr_fd_ctx_t *) (long) tmp;
-
- for (i = 0; i < priv->child_count; i++) {
- if (fdctx->locked_on[i]) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Found lock recovery source=%d", i);
- source_child = i;
- break;
- }
- }
-
-out:
- return source_child;
-
-}
-
-int32_t
-afr_get_locks_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
- dict_t *xdata);
-int32_t
-afr_recover_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
- dict_t *xdata)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int32_t source_child = 0;
- struct gf_flock flock = {0,};
-
- local = frame->local;
- priv = this->private;
-
- if (op_ret) {
- gf_log (this->name, GF_LOG_INFO,
- "lock recovery failed");
- goto cleanup;
- }
-
- source_child = local->source_child;
-
- memcpy (&flock, lock, sizeof (*lock));
-
- STACK_WIND_COOKIE (frame, afr_get_locks_fd_cbk,
- (void *) (long) source_child,
- priv->children[source_child],
- priv->children[source_child]->fops->lk,
- local->fd, F_GETLK_FD, &flock, NULL);
-
- return 0;
-
-cleanup:
- afr_lock_recovery_cleanup (frame, this);
- return 0;
-}
-
-int
-afr_recover_lock (call_frame_t *frame, xlator_t *this,
- struct gf_flock *flock)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int32_t lock_recovery_child = 0;
-
- priv = this->private;
- local = frame->local;
-
- lock_recovery_child = local->lock_recovery_child;
-
- frame->root->lk_owner = flock->l_owner;
-
- STACK_WIND_COOKIE (frame, afr_recover_lock_cbk,
- (void *) (long) lock_recovery_child,
- priv->children[lock_recovery_child],
- priv->children[lock_recovery_child]->fops->lk,
- local->fd, F_SETLK, flock, NULL);
-
- return 0;
-}
-
-static int
-is_afr_lock_eol (struct gf_flock *lock)
-{
- int ret = 0;
-
- if ((lock->l_type == GF_LK_EOL))
- ret = 1;
-
- return ret;
-}
-
-int32_t
-afr_get_locks_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
- dict_t *xdata)
-{
- if (op_ret) {
- gf_log (this->name, GF_LOG_INFO,
- "Failed to get locks on fd");
- goto cleanup;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Got a lock on fd");
-
- if (is_afr_lock_eol (lock)) {
- gf_log (this->name, GF_LOG_INFO,
- "Reached EOL on locks on fd");
- goto cleanup;
- }
-
- afr_recover_lock (frame, this, lock);
-
- return 0;
-
-cleanup:
- afr_lock_recovery_cleanup (frame, this);
-
- return 0;
-}
-
-static int
-afr_lock_recovery (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- fd_t *fd = NULL;
- int ret = 0;
- int32_t source_child = 0;
- struct gf_flock flock = {0,};
-
- priv = this->private;
- local = frame->local;
-
- fd = local->fd;
-
- source_child = afr_get_source_lock_recovery (this, fd);
- if (source_child < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "Could not recover locks due to lock "
- "split brain");
- ret = -1;
- goto out;
- }
-
- local->source_child = source_child;
-
- /* the flock can be zero filled as we're querying incrementally
- the locks held on the fd.
- */
- STACK_WIND_COOKIE (frame, afr_get_locks_fd_cbk,
- (void *) (long) source_child,
- priv->children[source_child],
- priv->children[source_child]->fops->lk,
- local->fd, F_GETLK_FD, &flock, NULL);
-
-out:
- return ret;
-}
-
-
-static int
-afr_mark_fd_opened (xlator_t *this, fd_t *fd, int32_t child_index)
-{
- afr_fd_ctx_t *fdctx = NULL;
- uint64_t tmp = 0;
- int ret = 0;
-
- ret = fd_ctx_get (fd, this, &tmp);
- if (ret)
- goto out;
-
- fdctx = (afr_fd_ctx_t *) (long) tmp;
-
- fdctx->opened_on[child_index] = AFR_FD_OPENED;
-
-out:
- return ret;
-}
-
-int32_t
-afr_lock_recovery_preopen_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd,
- dict_t *xdata)
-{
- int32_t child_index = (long )cookie;
- int ret = 0;
-
- if (op_ret) {
- gf_log (this->name, GF_LOG_INFO,
- "Reopen during lock-recovery failed");
- goto cleanup;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Open succeeded => proceed to recover locks");
-
- ret = afr_lock_recovery (frame, this);
- if (ret) {
- gf_log (this->name, GF_LOG_INFO,
- "Lock recovery failed");
- goto cleanup;
- }
-
- ret = afr_mark_fd_opened (this, fd, child_index);
- if (ret) {
- gf_log (this->name, GF_LOG_INFO,
- "Marking fd open failed");
- goto cleanup;
- }
-
- return 0;
-
-cleanup:
- afr_lock_recovery_cleanup (frame, this);
- return 0;
-}
-
-static int
-afr_lock_recovery_preopen (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- uint64_t tmp = 0;
- afr_fd_ctx_t *fdctx = NULL;
- loc_t loc = {0,};
- int32_t child_index = 0;
- int ret = 0;
-
- priv = this->private;
- local = frame->local;
-
- GF_ASSERT (local && local->fd);
-
- ret = fd_ctx_get (local->fd, this, &tmp);
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to get the context of fd",
- uuid_utoa (local->fd->inode->gfid));
- fdctx = (afr_fd_ctx_t *) (long) tmp;
- /* TODO: instead we should return from the function */
- GF_ASSERT (fdctx);
-
- child_index = local->lock_recovery_child;
-
- inode_path (local->fd->inode, NULL, (char **)&loc.path);
- loc.name = strrchr (loc.path, '/');
- loc.inode = inode_ref (local->fd->inode);
- loc.parent = inode_parent (local->fd->inode, 0, NULL);
-
-
- STACK_WIND_COOKIE (frame, afr_lock_recovery_preopen_cbk,
- (void *)(long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->open,
- &loc, fdctx->flags, local->fd, NULL);
-
- return 0;
-}
-
-static int
-is_fd_opened (fd_t *fd, int32_t child_index)
-{
- afr_fd_ctx_t *fdctx = NULL;
- uint64_t tmp = 0;
- int ret = 0;
-
- ret = fd_ctx_get (fd, THIS, &tmp);
- if (ret)
- goto out;
-
- fdctx = (afr_fd_ctx_t *) (long) tmp;
-
- if (fdctx->opened_on[child_index] == AFR_FD_OPENED)
- ret = 1;
-
-out:
- return ret;
-}
-
-int
-afr_attempt_lock_recovery (xlator_t *this, int32_t child_index)
-{
- call_frame_t *frame = NULL;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_locked_fd_t *locked_fd = NULL;
- afr_locked_fd_t *tmp = NULL;
- int ret = -1;
- struct list_head locks_list = {0,};
- int32_t op_errno = 0;
-
-
- priv = this->private;
-
- if (list_empty (&priv->saved_fds))
- goto out;
-
- frame = create_frame (this, this->ctx->pool);
- if (!frame) {
- ret = -1;
- goto out;
- }
-
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0) {
- ret = -1;
- goto out;
- }
-
- frame->local = local;
-
- INIT_LIST_HEAD (&locks_list);
-
- pthread_mutex_lock (&priv->mutex);
- {
- list_splice_init (&priv->saved_fds, &locks_list);
- }
- pthread_mutex_unlock (&priv->mutex);
-
- list_for_each_entry_safe (locked_fd, tmp,
- &locks_list, list) {
-
- list_del_init (&locked_fd->list);
-
- local->fd = fd_ref (locked_fd->fd);
- local->lock_recovery_child = child_index;
- local->locked_fd = locked_fd;
-
- if (!is_fd_opened (locked_fd->fd, child_index)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "attempting open before lock "
- "recovery");
- afr_lock_recovery_preopen (frame, this);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "attempting lock recovery "
- "without a preopen");
- afr_lock_recovery (frame, this);
- }
- }
-
-out:
- if ((ret < 0) && frame)
- AFR_STACK_DESTROY (frame);
- return ret;
-}
-
-int
afr_lk_transfer_datalock (call_frame_t *dst, call_frame_t *src, char *dom,
unsigned int child_count)
{
diff --git a/xlators/cluster/afr/src/afr-mem-types.h b/xlators/cluster/afr/src/afr-mem-types.h
index 73594f2..05df90c 100644
--- a/xlators/cluster/afr/src/afr-mem-types.h
+++ b/xlators/cluster/afr/src/afr-mem-types.h
@@ -41,10 +41,8 @@ enum gf_afr_mem_types_ {
gf_afr_mt_shd_event_t,
gf_afr_mt_time_t,
gf_afr_mt_pos_data_t,
- gf_afr_mt_reply_t,
- gf_afr_mt_stats_t,
- gf_afr_mt_shd_crawl_event_t,
- gf_afr_mt_uint64_t,
+ gf_afr_mt_reply_t,
+ gf_afr_mt_subvol_healer_t,
gf_afr_mt_end
};
#endif
diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c
index 643a5d6..f86aa7f 100644
--- a/xlators/cluster/afr/src/afr-open.c
+++ b/xlators/cluster/afr/src/afr-open.c
@@ -43,85 +43,29 @@
#include "afr-dir-read.h"
#include "afr-dir-write.h"
#include "afr-transaction.h"
-#include "afr-self-heal.h"
-#include "afr-self-heal-common.h"
-int
-afr_stale_child_up (afr_local_t *local, xlator_t *this)
-{
- int i = 0;
- afr_private_t *priv = NULL;
- int up = -1;
-
- priv = this->private;
-
- if (!local->fresh_children)
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children)
- goto out;
-
- afr_inode_get_read_ctx (this, local->fd->inode, local->fresh_children);
- if (priv->child_count == afr_get_children_count (local->fresh_children,
- priv->child_count))
- goto out;
- for (i = 0; i < priv->child_count; i++) {
- if (!local->child_up[i])
- continue;
- if (afr_is_child_present (local->fresh_children,
- priv->child_count, i))
- continue;
- up = i;
- break;
- }
-out:
- return up;
-}
-
-void
-afr_perform_data_self_heal (call_frame_t *frame, xlator_t *this)
+gf_boolean_t
+afr_is_fd_fixable (fd_t *fd)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- inode_t *inode = NULL;
- int st_child = -1;
- char reason[64] = {0};
-
- local = frame->local;
- sh = &local->self_heal;
- inode = local->fd->inode;
-
- if (!IA_ISREG (inode->ia_type))
- goto out;
-
- st_child = afr_stale_child_up (local, this);
- if (st_child < 0)
- goto out;
-
- sh->do_data_self_heal = _gf_true;
- sh->do_metadata_self_heal = _gf_true;
- sh->do_gfid_self_heal = _gf_true;
- sh->do_missing_entry_self_heal = _gf_true;
-
- snprintf (reason, sizeof (reason), "stale subvolume %d detected",
- st_child);
- afr_launch_self_heal (frame, this, inode, _gf_true, inode->ia_type,
- reason, NULL, NULL);
-out:
- return;
+ if (!fd || !fd->inode)
+ return _gf_false;
+ else if (fd_is_anonymous (fd))
+ return _gf_false;
+ else if (uuid_is_null (fd->inode->gfid))
+ return _gf_false;
+
+ return _gf_true;
}
+
int
afr_open_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
struct iatt *postbuf, dict_t *xdata)
{
afr_local_t * local = frame->local;
- afr_private_t *priv = NULL;
- priv = this->private;
- if (afr_open_only_data_self_heal (priv->data_self_heal))
- afr_perform_data_self_heal (frame, this);
AFR_STACK_UNWIND (open, frame, local->op_ret, local->op_errno,
local->fd, xdata);
return 0;
@@ -134,49 +78,38 @@ afr_open_cbk (call_frame_t *frame, void *cookie,
fd_t *fd, dict_t *xdata)
{
afr_local_t * local = NULL;
- int ret = 0;
int call_count = -1;
int child_index = (long) cookie;
- afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
- priv = this->private;
local = frame->local;
+ fd_ctx = local->fd_ctx;
LOCK (&frame->lock);
{
if (op_ret == -1) {
local->op_errno = op_errno;
- }
-
- if (op_ret >= 0) {
+ fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED;
+ } else {
local->op_ret = op_ret;
- local->success_count++;
-
- ret = afr_child_fd_ctx_set (this, fd, child_index,
- local->cont.open.flags);
- if (ret) {
- local->op_ret = -1;
- local->op_errno = -ret;
- goto unlock;
- }
+ fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref (xdata);
}
}
-unlock:
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
if (call_count == 0) {
- if ((local->cont.open.flags & O_TRUNC)
- && (local->op_ret >= 0)) {
+ if ((fd_ctx->flags & O_TRUNC) && (local->op_ret >= 0)) {
STACK_WIND (frame, afr_open_ftruncate_cbk,
this, this->fops->ftruncate,
fd, 0, NULL);
} else {
- if (afr_open_only_data_self_heal (priv->data_self_heal))
- afr_perform_data_self_heal (frame, this);
AFR_STACK_UNWIND (open, frame, local->op_ret,
- local->op_errno, local->fd, xdata);
+ local->op_errno, local->fd,
+ local->xdata_rsp);
}
}
@@ -190,16 +123,11 @@ afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
int i = 0;
- int ret = -1;
int32_t call_count = 0;
int32_t op_errno = 0;
- int32_t wind_flags = flags & (~O_TRUNC);
- //We can't let truncation to happen outside transaction.
+ afr_fd_ctx_t *fd_ctx = NULL;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (loc, out);
+ //We can't let truncation to happen outside transaction.
priv = this->private;
@@ -207,44 +135,38 @@ afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
QUORUM_CHECK(open,out);
}
- if (afr_is_split_brain (this, loc->inode)) {
- /* self-heal failed */
- gf_log (this->name, GF_LOG_WARNING,
- "failed to open as split brain seen, returning EIO");
- op_errno = EIO;
- goto out;
- }
-
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- call_count = local->call_count;
- loc_copy (&local->loc, loc);
+ local->fd = fd_ref (fd);
+ local->fd_ctx = fd_ctx;
+ fd_ctx->flags = flags;
- local->cont.open.flags = flags;
+ call_count = local->call_count;
- local->fd = fd_ref (fd);
+ local->cont.open.flags = flags;
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND_COOKIE (frame, afr_open_cbk, (void *) (long) i,
priv->children[i],
priv->children[i]->fops->open,
- loc, wind_flags, fd, xdata);
-
+ loc, (flags & ~O_TRUNC), fd, xdata);
if (!--call_count)
break;
}
}
- ret = 0;
+ return 0;
out:
- if (ret < 0)
- AFR_STACK_UNWIND (open, frame, -1, op_errno, fd, xdata);
+ AFR_STACK_UNWIND (open, frame, -1, op_errno, fd, NULL);
return 0;
}
@@ -273,12 +195,7 @@ afr_openfd_fix_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
priv->children[child_index]->name);
}
- fd_ctx = afr_fd_ctx_get (local->fd, this);
- if (!fd_ctx) {
- gf_log (this->name, GF_LOG_WARNING,
- "failed to get fd context, %p", local->fd);
- goto out;
- }
+ fd_ctx = local->fd_ctx;
LOCK (&local->fd->lock);
{
@@ -289,7 +206,7 @@ afr_openfd_fix_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
}
UNLOCK (&local->fd->lock);
-out:
+
call_count = afr_frame_return (frame);
if (call_count == 0)
AFR_STACK_DESTROY (frame);
@@ -297,8 +214,42 @@ out:
return 0;
}
+
+static int
+afr_fd_ctx_need_open (fd_t *fd, xlator_t *this, unsigned char *need_open)
+{
+ afr_fd_ctx_t *fd_ctx = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int count = 0;
+
+ priv = this->private;
+
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx)
+ return 0;
+
+ LOCK (&fd->lock);
+ {
+ for (i = 0; i < priv->child_count; i++) {
+ if (fd_ctx->opened_on[i] == AFR_FD_NOT_OPENED &&
+ priv->child_up[i]) {
+ fd_ctx->opened_on[i] = AFR_FD_OPENING;
+ need_open[i] = 1;
+ count++;
+ } else {
+ need_open[i] = 0;
+ }
+ }
+ }
+ UNLOCK (&fd->lock);
+
+ return count;
+}
+
+
void
-afr_fix_open (xlator_t *this, fd_t *fd, size_t need_open_count, int *need_open)
+afr_fix_open (fd_t *fd, xlator_t *this)
{
afr_private_t *priv = NULL;
int i = 0;
@@ -307,29 +258,31 @@ afr_fix_open (xlator_t *this, fd_t *fd, size_t need_open_count, int *need_open)
int ret = -1;
int32_t op_errno = 0;
afr_fd_ctx_t *fd_ctx = NULL;
+ unsigned char *need_open = NULL;
+ int call_count = 0;
priv = this->private;
- if (!afr_is_fd_fixable (fd) || !need_open || !need_open_count)
+ if (!afr_is_fd_fixable (fd))
goto out;
fd_ctx = afr_fd_ctx_get (fd, this);
- if (!fd_ctx) {
- ret = -1;
+ if (!fd_ctx)
goto out;
- }
+
+ need_open = alloca0 (priv->child_count);
+
+ call_count = afr_fd_ctx_need_open (fd, this, need_open);
+ if (!call_count)
+ goto out;
frame = create_frame (this, this->ctx->pool);
- if (!frame) {
- ret = -1;
+ if (!frame)
goto out;
- }
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ local = AFR_FRAME_INIT (frame, op_errno);
+ if (!local)
+ goto out;
local->loc.inode = inode_ref (fd->inode);
ret = loc_path (&local->loc, NULL);
@@ -337,10 +290,12 @@ afr_fix_open (xlator_t *this, fd_t *fd, size_t need_open_count, int *need_open)
goto out;
local->fd = fd_ref (fd);
- local->call_count = need_open_count;
+ local->fd_ctx = fd_ctx;
+
+ local->call_count = call_count;
- gf_log (this->name, GF_LOG_DEBUG, "need open count: %zd",
- need_open_count);
+ gf_log (this->name, GF_LOG_DEBUG, "need open count: %d",
+ call_count);
for (i = 0; i < priv->child_count; i++) {
if (!need_open[i])
@@ -371,12 +326,12 @@ afr_fix_open (xlator_t *this, fd_t *fd, size_t need_open_count, int *need_open)
local->fd, NULL);
}
+ if (!--call_count)
+ break;
}
- op_errno = 0;
- ret = 0;
+
+ return;
out:
- if (op_errno)
- ret = -1; //For handling ALLOC_OR_GOTO
- if (ret && frame)
+ if (frame)
AFR_STACK_DESTROY (frame);
}
diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c
new file mode 100644
index 0000000..186f68c
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-read-txn.c
@@ -0,0 +1,239 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "afr.h"
+#include "afr-transaction.h"
+
+int
+afr_read_txn_next_subvol (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int subvol = -1;
+
+ local = frame->local;
+ priv = this->private;
+
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->readable[i]) {
+ /* don't even bother trying here.
+ just mark as attempted and move on. */
+ local->read_attempted[i] = 1;
+ continue;
+ }
+
+ if (!local->read_attempted[i]) {
+ subvol = i;
+ break;
+ }
+ }
+
+ /* If no more subvols were available for reading, we leave
+ @subvol as -1, which is an indication we have run out of
+ readable subvols. */
+ if (subvol != -1)
+ local->read_attempted[subvol] = 1;
+ local->readfn (frame, this, subvol);
+
+ return 0;
+}
+
+
+int
+afr_read_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err)
+{
+ afr_local_t *local = NULL;
+ int read_subvol = 0;
+ int event_generation = 0;
+ inode_t *inode = NULL;
+ int ret = -1;
+
+ local = frame->local;
+ inode = local->inode;
+
+ if (err) {
+ local->op_errno = -err;
+ local->op_ret = -1;
+ read_subvol = -1;
+ goto readfn;
+ }
+
+ ret = afr_inode_read_subvol_type_get (inode, this, local->readable,
+ &event_generation,
+ local->transaction.type);
+
+ if (ret == -1 || !event_generation) {
+ /* Even after refresh, we don't have a good
+ read subvolume. Time to bail */
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ read_subvol = -1;
+ goto readfn;
+ }
+
+ read_subvol = afr_read_subvol_select_by_policy (inode, this,
+ local->readable);
+
+ if (read_subvol == -1) {
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto readfn;
+ }
+
+ if (local->read_attempted[read_subvol]) {
+ afr_read_txn_next_subvol (frame, this);
+ return 0;
+ }
+
+ local->read_attempted[read_subvol] = 1;
+readfn:
+ local->readfn (frame, this, read_subvol);
+
+ return 0;
+}
+
+
+int
+afr_read_txn_continue (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (!local->refreshed) {
+ local->refreshed = _gf_true;
+ afr_inode_refresh (frame, this, local->inode,
+ afr_read_txn_refresh_done);
+ } else {
+ afr_read_txn_next_subvol (frame, this);
+ }
+
+ return 0;
+}
+
+
+/* afr_read_txn_wipe:
+
+ clean internal variables in @local in order to make
+ it possible to call afr_read_txn() multiple times from
+ the same frame
+*/
+
+void
+afr_read_txn_wipe (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ local->readfn = NULL;
+
+ if (local->inode)
+ inode_unref (local->inode);
+
+ for (i = 0; i < priv->child_count; i++) {
+ local->read_attempted[i] = 0;
+ local->readable[i] = 0;
+ }
+}
+
+
+/*
+ afr_read_txn:
+
+ This is the read transaction function. The way it works:
+
+ - Determine read-subvolume from inode ctx.
+
+ - If read-subvolume's generation was stale, refresh ctx once by
+ calling afr_inode_refresh()
+
+ Else make an attempt to read on read-subvolume.
+
+ - If attempted read on read-subvolume fails, refresh ctx once
+ by calling afr_inode_refresh()
+
+ - After ctx refresh, query read-subvolume freshly and attempt
+ read once.
+
+ - If read fails, try every other readable[] subvolume before
+ finally giving up. readable[] elements are set by afr_inode_refresh()
+ based on dirty and pending flags.
+
+ - If file is in split brain in the backend, generation will be
+ kept 0 by afr_inode_refresh() and readable[] will be set 0 for
+ all elements. Therefore reads always fail.
+*/
+
+int
+afr_read_txn (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ afr_read_txn_wind_t readfn, afr_transaction_type type)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int read_subvol = -1;
+ int event_generation = 0;
+ int ret = -1;
+
+ priv = this->private;
+ local = frame->local;
+
+ afr_read_txn_wipe (frame, this);
+
+ local->readfn = readfn;
+ local->inode = inode_ref (inode);
+
+ local->transaction.type = type;
+ ret = afr_inode_read_subvol_type_get (inode, this, local->readable,
+ &event_generation, type);
+ if (ret == -1)
+ /* very first transaction on this inode */
+ goto refresh;
+
+ if (local->event_generation != event_generation)
+ /* servers have disconnected / reconnected, and possibly
+ rebooted, very likely changing the state of freshness
+ of copies */
+ goto refresh;
+
+ read_subvol = afr_read_subvol_select_by_policy (inode, this,
+ local->readable);
+
+ if (read_subvol < 0 || read_subvol > priv->child_count) {
+ gf_log (this->name, GF_LOG_WARNING, "Unreadable subvolume %d "
+ "found with event generation %d", read_subvol,
+ event_generation);
+ goto refresh;
+ }
+
+ if (!local->child_up[read_subvol]) {
+ /* should never happen, just in case */
+ gf_log (this->name, GF_LOG_WARNING, "subvolume %d is the "
+ "read subvolume in this generation, but is not up",
+ read_subvol);
+ goto refresh;
+ }
+
+ local->read_attempted[read_subvol] = 1;
+
+ local->readfn (frame, this, read_subvol);
+
+ return 0;
+
+refresh:
+ afr_inode_refresh (frame, this, inode, afr_read_txn_refresh_done);
+
+ return 0;
+}
diff --git a/xlators/cluster/afr/src/afr-self-heal-algorithm.c b/xlators/cluster/afr/src/afr-self-heal-algorithm.c
deleted file mode 100644
index 83846f1..0000000
--- a/xlators/cluster/afr/src/afr-self-heal-algorithm.c
+++ /dev/null
@@ -1,837 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-
-#include <openssl/md5.h>
-#include "glusterfs.h"
-#include "afr.h"
-#include "xlator.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-#include "byte-order.h"
-
-#include "afr-transaction.h"
-#include "afr-self-heal.h"
-#include "afr-self-heal-common.h"
-#include "afr-self-heal-algorithm.h"
-
-/*
- This file contains the various self-heal algorithms
-*/
-
-static int
-sh_loop_driver (call_frame_t *sh_frame, xlator_t *this,
- gf_boolean_t is_first_call, call_frame_t *old_loop_frame);
-static int
-sh_loop_return (call_frame_t *sh_frame, xlator_t *this, call_frame_t *loop_frame,
- int32_t op_ret, int32_t op_errno);
-static int
-sh_destroy_frame (call_frame_t *frame, xlator_t *this)
-{
- if (!frame)
- goto out;
-
- AFR_STACK_DESTROY (frame);
-out:
- return 0;
-}
-
-static void
-sh_private_cleanup (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_sh_algo_private_t *sh_priv = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
-
- sh_priv = sh->private;
- GF_FREE (sh_priv);
-}
-
-static int
-sh_number_of_writes_needed (unsigned char *write_needed, int child_count)
-{
- int writes = 0;
- int i = 0;
-
- for (i = 0; i < child_count; i++) {
- if (write_needed[i])
- writes++;
- }
-
- return writes;
-}
-
-
-static int
-sh_loop_driver_done (call_frame_t *sh_frame, xlator_t *this,
- call_frame_t *last_loop_frame)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_sh_algo_private_t *sh_priv = NULL;
- int32_t total_blocks = 0;
- int32_t diff_blocks = 0;
-
- local = sh_frame->local;
- sh = &local->self_heal;
- sh_priv = sh->private;
- if (sh_priv) {
- total_blocks = sh_priv->total_blocks;
- diff_blocks = sh_priv->diff_blocks;
- }
-
- sh_private_cleanup (sh_frame, this);
- if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
- GF_ASSERT (!last_loop_frame);
- //loop_finish should have happened and the old_loop should be NULL
- gf_log (this->name, GF_LOG_DEBUG,
- "self-heal aborting on %s",
- local->loc.path);
-
- local->self_heal.algo_abort_cbk (sh_frame, this);
- } else {
- GF_ASSERT (last_loop_frame);
- if (diff_blocks == total_blocks) {
- gf_log (this->name, GF_LOG_DEBUG, "full self-heal "
- "completed on %s",local->loc.path);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "diff self-heal on %s: completed. "
- "(%d blocks of %d were different (%.2f%%))",
- local->loc.path, diff_blocks, total_blocks,
- ((diff_blocks * 1.0)/total_blocks) * 100);
- }
-
- sh->old_loop_frame = last_loop_frame;
- local->self_heal.algo_completion_cbk (sh_frame, this);
- }
-
- return 0;
-}
-
-int
-sh_loop_finish (call_frame_t *loop_frame, xlator_t *this)
-{
- afr_local_t *loop_local = NULL;
- afr_self_heal_t *loop_sh = NULL;
-
- if (!loop_frame)
- goto out;
-
- loop_local = loop_frame->local;
- if (loop_local) {
- loop_sh = &loop_local->self_heal;
- }
-
- if (loop_sh && loop_sh->data_lock_held) {
- afr_sh_data_unlock (loop_frame, this, this->name,
- sh_destroy_frame);
- } else {
- sh_destroy_frame (loop_frame, this);
- }
-out:
- return 0;
-}
-
-static int
-sh_loop_lock_success (call_frame_t *loop_frame, xlator_t *this)
-{
- afr_local_t *loop_local = NULL;
- afr_self_heal_t *loop_sh = NULL;
-
- loop_local = loop_frame->local;
- loop_sh = &loop_local->self_heal;
-
- sh_loop_finish (loop_sh->old_loop_frame, this);
- loop_sh->old_loop_frame = NULL;
-
- gf_log (this->name, GF_LOG_DEBUG, "Acquired lock for range %"PRIu64
- " %"PRIu64, loop_sh->offset, loop_sh->block_size);
- loop_sh->data_lock_held = _gf_true;
- loop_sh->sh_data_algo_start (loop_frame, this);
- return 0;
-}
-
-static int
-sh_loop_lock_failure (call_frame_t *loop_frame, xlator_t *this)
-{
- call_frame_t *sh_frame = NULL;
- afr_local_t *loop_local = NULL;
- afr_self_heal_t *loop_sh = NULL;
-
- loop_local = loop_frame->local;
- loop_sh = &loop_local->self_heal;
- sh_frame = loop_sh->sh_frame;
-
- gf_log (this->name, GF_LOG_ERROR, "failed lock for range %"PRIu64
- " %"PRIu64, loop_sh->offset, loop_sh->block_size);
- sh_loop_finish (loop_sh->old_loop_frame, this);
- loop_sh->old_loop_frame = NULL;
- sh_loop_return (sh_frame, this, loop_frame, -1, ENOTCONN);
- return 0;
-}
-
-static int
-sh_loop_frame_create (call_frame_t *sh_frame, xlator_t *this,
- call_frame_t *old_loop_frame, call_frame_t **loop_frame)
-{
- call_frame_t *new_loop_frame = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_local_t *new_loop_local = NULL;
- afr_self_heal_t *new_loop_sh = NULL;
- afr_private_t *priv = NULL;
-
- GF_ASSERT (sh_frame);
- GF_ASSERT (loop_frame);
-
- *loop_frame = NULL;
- local = sh_frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- new_loop_frame = copy_frame (sh_frame);
- if (!new_loop_frame)
- goto out;
- //We want the frame to have same lk_owner as sh_frame
- //so that locks translator allows conflicting locks
- new_loop_local = afr_self_heal_local_init (local, this);
- if (!new_loop_local)
- goto out;
- new_loop_frame->local = new_loop_local;
-
- new_loop_sh = &new_loop_local->self_heal;
- new_loop_sh->sources = memdup (sh->sources,
- priv->child_count * sizeof (*sh->sources));
- if (!new_loop_sh->sources)
- goto out;
- new_loop_sh->write_needed = GF_CALLOC (priv->child_count,
- sizeof (*new_loop_sh->write_needed),
- gf_afr_mt_char);
- if (!new_loop_sh->write_needed)
- goto out;
- new_loop_sh->checksum = GF_CALLOC (priv->child_count, MD5_DIGEST_LENGTH,
- gf_afr_mt_uint8_t);
- if (!new_loop_sh->checksum)
- goto out;
- new_loop_sh->inode = inode_ref (sh->inode);
- new_loop_sh->sh_data_algo_start = sh->sh_data_algo_start;
- new_loop_sh->source = sh->source;
- new_loop_sh->active_sinks = sh->active_sinks;
- new_loop_sh->healing_fd = fd_ref (sh->healing_fd);
- new_loop_sh->file_has_holes = sh->file_has_holes;
- new_loop_sh->old_loop_frame = old_loop_frame;
- new_loop_sh->sh_frame = sh_frame;
- *loop_frame = new_loop_frame;
- return 0;
-out:
- sh_destroy_frame (new_loop_frame, this);
- return -ENOMEM;
-}
-
-static int
-sh_loop_start (call_frame_t *sh_frame, xlator_t *this, off_t offset,
- call_frame_t *old_loop_frame)
-{
- call_frame_t *new_loop_frame = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_local_t *new_loop_local = NULL;
- afr_self_heal_t *new_loop_sh = NULL;
- int ret = 0;
-
- GF_ASSERT (sh_frame);
-
- local = sh_frame->local;
- sh = &local->self_heal;
-
- ret = sh_loop_frame_create (sh_frame, this, old_loop_frame,
- &new_loop_frame);
- if (ret)
- goto out;
- new_loop_local = new_loop_frame->local;
- new_loop_sh = &new_loop_local->self_heal;
- new_loop_sh->offset = offset;
- new_loop_sh->block_size = sh->block_size;
- afr_sh_data_lock (new_loop_frame, this, offset, new_loop_sh->block_size,
- _gf_true, this->name, sh_loop_lock_success, sh_loop_lock_failure);
- return 0;
-out:
- afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
- if (old_loop_frame)
- sh_loop_finish (old_loop_frame, this);
- sh_loop_return (sh_frame, this, new_loop_frame, -1, ENOMEM);
- return 0;
-}
-
-static int
-sh_loop_driver (call_frame_t *sh_frame, xlator_t *this,
- gf_boolean_t is_first_call, call_frame_t *old_loop_frame)
-{
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
- afr_sh_algo_private_t *sh_priv = NULL;
- gf_boolean_t is_driver_done = _gf_false;
- blksize_t block_size = 0;
- int loop = 0;
- off_t offset = 0;
- afr_private_t *priv = NULL;
-
- priv = this->private;
- local = sh_frame->local;
- sh = &local->self_heal;
- sh_priv = sh->private;
-
- LOCK (&sh_priv->lock);
- {
- if (!is_first_call)
- sh_priv->loops_running--;
- offset = sh_priv->offset;
- block_size = sh->block_size;
- while ((!sh->eof_reached) &&
- (!is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) &&
- (sh_priv->loops_running < priv->data_self_heal_window_size)
- && (sh_priv->offset < sh->file_size)) {
-
- loop++;
- sh_priv->offset += block_size;
- sh_priv->loops_running++;
-
- if (!is_first_call)
- break;
- }
- if (0 == sh_priv->loops_running) {
- is_driver_done = _gf_true;
- }
- }
- UNLOCK (&sh_priv->lock);
-
- if (0 == loop) {
- //loop finish does unlock, but the erasing of the pending
- //xattrs needs to happen before that so do not finish the loop
- if (is_driver_done &&
- !is_self_heal_failed (sh, AFR_CHECK_SPECIFIC))
- goto driver_done;
- if (old_loop_frame) {
- sh_loop_finish (old_loop_frame, this);
- old_loop_frame = NULL;
- }
- }
-
- //If we have more loops to form we should finish previous loop after
- //the next loop lock
- while (loop--) {
- if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
- // op failed in other loop, stop spawning more loops
- if (old_loop_frame) {
- sh_loop_finish (old_loop_frame, this);
- old_loop_frame = NULL;
- }
- sh_loop_driver (sh_frame, this, _gf_false, NULL);
- } else {
- gf_log (this->name, GF_LOG_TRACE, "spawning a loop "
- "for offset %"PRId64, offset);
-
- sh_loop_start (sh_frame, this, offset, old_loop_frame);
- old_loop_frame = NULL;
- offset += block_size;
- }
- }
-
-driver_done:
- if (is_driver_done) {
- sh_loop_driver_done (sh_frame, this, old_loop_frame);
- }
- return 0;
-}
-
-static int
-sh_loop_return (call_frame_t *sh_frame, xlator_t *this, call_frame_t *loop_frame,
- int32_t op_ret, int32_t op_errno)
-{
- afr_local_t * loop_local = NULL;
- afr_self_heal_t * loop_sh = NULL;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t *sh = NULL;
-
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
-
- if (loop_frame) {
- loop_local = loop_frame->local;
- if (loop_local)
- loop_sh = &loop_local->self_heal;
- if (loop_sh)
- gf_log (this->name, GF_LOG_TRACE, "loop for offset "
- "%"PRId64" returned", loop_sh->offset);
- }
-
- if (op_ret == -1) {
- afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
- afr_sh_set_error (sh, op_errno);
- if (loop_frame) {
- sh_loop_finish (loop_frame, this);
- loop_frame = NULL;
- }
- }
-
- sh_loop_driver (sh_frame, this, _gf_false, loop_frame);
-
- return 0;
-}
-
-static int
-sh_loop_write_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- struct iatt *postbuf, dict_t *xdata)
-{
- afr_private_t * priv = NULL;
- afr_local_t * loop_local = NULL;
- afr_self_heal_t * loop_sh = NULL;
- call_frame_t *sh_frame = NULL;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t *sh = NULL;
- int call_count = 0;
- int child_index = 0;
-
- priv = this->private;
- loop_local = loop_frame->local;
- loop_sh = &loop_local->self_heal;
-
- sh_frame = loop_sh->sh_frame;
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
-
- child_index = (long) cookie;
-
- gf_log (this->name, GF_LOG_TRACE,
- "wrote %d bytes of data from %s to child %d, offset %"PRId64"",
- op_ret, sh_local->loc.path, child_index, loop_sh->offset);
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "write to %s failed on subvolume %s (%s)",
- sh_local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
-
- afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
- afr_sh_set_error (loop_sh, op_errno);
- } else if (op_ret < loop_local->cont.writev.vector->iov_len) {
- gf_log (this->name, GF_LOG_ERROR,
- "incomplete write to %s on subvolume %s "
- "(expected %lu, returned %d)", sh_local->loc.path,
- priv->children[child_index]->name,
- loop_local->cont.writev.vector->iov_len, op_ret);
- afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
- }
-
- call_count = afr_frame_return (loop_frame);
-
- if (call_count == 0) {
- iobref_unref(loop_local->cont.writev.iobref);
-
- sh_loop_return (sh_frame, this, loop_frame,
- loop_sh->op_ret, loop_sh->op_errno);
- }
-
- return 0;
-}
-
-static void
-sh_prune_writes_needed (call_frame_t *sh_frame, call_frame_t *loop_frame,
- afr_private_t *priv)
-{
- afr_local_t *sh_local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_local_t *loop_local = NULL;
- afr_self_heal_t *loop_sh = NULL;
- int i = 0;
-
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
-
- if (!strcmp (sh->algo->name, "diff"))
- return;
-
- loop_local = loop_frame->local;
- loop_sh = &loop_local->self_heal;
-
- /* full self-heal guarantees there exists atleast 1 file with size 0
- * That means for other files we can preserve holes that come after
- * its size before 'trim'
- */
- for (i = 0; i < priv->child_count; i++) {
- if (loop_sh->write_needed[i] &&
- ((loop_sh->offset + 1) > sh->buf[i].ia_size))
- loop_sh->write_needed[i] = 0;
- }
-}
-
-static int
-sh_loop_read_cbk (call_frame_t *loop_frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iovec *vector, int32_t count, struct iatt *buf,
- struct iobref *iobref, dict_t *xdata)
-{
- afr_private_t * priv = NULL;
- afr_local_t * loop_local = NULL;
- afr_self_heal_t * loop_sh = NULL;
- call_frame_t *sh_frame = NULL;
- int i = 0;
- int call_count = 0;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t * sh = NULL;
-
- priv = this->private;
- loop_local = loop_frame->local;
- loop_sh = &loop_local->self_heal;
-
- sh_frame = loop_sh->sh_frame;
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
-
- gf_log (this->name, GF_LOG_TRACE,
- "read %d bytes of data from %s, offset %"PRId64"",
- op_ret, loop_local->loc.path, loop_sh->offset);
-
- if (op_ret <= 0) {
- if (op_ret < 0) {
- afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
- gf_log (this->name, GF_LOG_ERROR, "read failed on %d "
- "for %s reason :%s", sh->source,
- sh_local->loc.path, strerror (errno));
- } else {
- sh->eof_reached = _gf_true;
- gf_log (this->name, GF_LOG_DEBUG, "Eof reached for %s",
- sh_local->loc.path);
- }
- sh_loop_return (sh_frame, this, loop_frame, op_ret, op_errno);
- goto out;
- }
-
- if (loop_sh->file_has_holes && iov_0filled (vector, count) == 0)
- sh_prune_writes_needed (sh_frame, loop_frame, priv);
-
- call_count = sh_number_of_writes_needed (loop_sh->write_needed,
- priv->child_count);
- if (call_count == 0) {
- sh_loop_return (sh_frame, this, loop_frame, 0, 0);
- goto out;
- }
-
- loop_local->call_count = call_count;
-
- /*
- * We only really need the request size at the moment, but the buffer
- * is required if we want to issue a retry in the event of a short write.
- * Therefore, we duplicate the vector and ref the iobref here...
- */
- loop_local->cont.writev.vector = iov_dup(vector, count);
- loop_local->cont.writev.iobref = iobref_ref(iobref);
-
- for (i = 0; i < priv->child_count; i++) {
- if (!loop_sh->write_needed[i])
- continue;
- STACK_WIND_COOKIE (loop_frame, sh_loop_write_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->writev,
- loop_sh->healing_fd, vector, count,
- loop_sh->offset, 0, iobref, NULL);
-
- if (!--call_count)
- break;
- }
-
-out:
- return 0;
-}
-
-
-static int
-sh_loop_read (call_frame_t *loop_frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_local_t *loop_local = NULL;
- afr_self_heal_t *loop_sh = NULL;
-
- priv = this->private;
- loop_local = loop_frame->local;
- loop_sh = &loop_local->self_heal;
-
- STACK_WIND_COOKIE (loop_frame, sh_loop_read_cbk,
- (void *) (long) loop_sh->source,
- priv->children[loop_sh->source],
- priv->children[loop_sh->source]->fops->readv,
- loop_sh->healing_fd, loop_sh->block_size,
- loop_sh->offset, 0, NULL);
-
- return 0;
-}
-
-
-static int
-sh_diff_checksum_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- uint32_t weak_checksum, uint8_t *strong_checksum,
- dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- afr_local_t *loop_local = NULL;
- afr_self_heal_t *loop_sh = NULL;
- call_frame_t *sh_frame = NULL;
- afr_local_t *sh_local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_sh_algo_private_t *sh_priv = NULL;
- int child_index = 0;
- int call_count = 0;
- int i = 0;
- int write_needed = 0;
-
- priv = this->private;
-
- loop_local = loop_frame->local;
- loop_sh = &loop_local->self_heal;
-
- sh_frame = loop_sh->sh_frame;
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
-
- sh_priv = sh->private;
-
- child_index = (long) cookie;
-
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "checksum on %s failed on subvolume %s (%s)",
- sh_local->loc.path, priv->children[child_index]->name,
- strerror (op_errno));
- afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
- } else {
- memcpy (loop_sh->checksum + child_index * MD5_DIGEST_LENGTH,
- strong_checksum, MD5_DIGEST_LENGTH);
- }
-
- call_count = afr_frame_return (loop_frame);
-
- if (call_count == 0) {
- for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] || !sh_local->child_up[i])
- continue;
-
- if (memcmp (loop_sh->checksum + (i * MD5_DIGEST_LENGTH),
- loop_sh->checksum + (sh->source * MD5_DIGEST_LENGTH),
- MD5_DIGEST_LENGTH)) {
- /*
- Checksums differ, so this block
- must be written to this sink
- */
-
- gf_log (this->name, GF_LOG_DEBUG,
- "checksum on subvolume %s at offset %"
- PRId64" differs from that on source",
- priv->children[i]->name, loop_sh->offset);
-
- write_needed = loop_sh->write_needed[i] = 1;
- }
- }
-
- LOCK (&sh_priv->lock);
- {
- sh_priv->total_blocks++;
- if (write_needed)
- sh_priv->diff_blocks++;
- }
- UNLOCK (&sh_priv->lock);
-
- if (write_needed &&
- !is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
- sh_loop_read (loop_frame, this);
- } else {
- sh_loop_return (sh_frame, this, loop_frame,
- op_ret, op_errno);
- }
- }
-
- return 0;
-}
-
-static int
-sh_diff_checksum (call_frame_t *loop_frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_local_t *loop_local = NULL;
- afr_self_heal_t *loop_sh = NULL;
- int call_count = 0;
- int i = 0;
-
- priv = this->private;
- loop_local = loop_frame->local;
- loop_sh = &loop_local->self_heal;
-
- call_count = loop_sh->active_sinks + 1; /* sinks and source */
-
- loop_local->call_count = call_count;
-
- STACK_WIND_COOKIE (loop_frame, sh_diff_checksum_cbk,
- (void *) (long) loop_sh->source,
- priv->children[loop_sh->source],
- priv->children[loop_sh->source]->fops->rchecksum,
- loop_sh->healing_fd,
- loop_sh->offset, loop_sh->block_size, NULL);
-
- for (i = 0; i < priv->child_count; i++) {
- if (loop_sh->sources[i] || !loop_local->child_up[i])
- continue;
-
- STACK_WIND_COOKIE (loop_frame, sh_diff_checksum_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->rchecksum,
- loop_sh->healing_fd,
- loop_sh->offset, loop_sh->block_size, NULL);
-
- if (!--call_count)
- break;
- }
-
- return 0;
-}
-
-static int
-sh_full_read_write_to_sinks (call_frame_t *loop_frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_local_t *loop_local = NULL;
- afr_self_heal_t *loop_sh = NULL;
- int i = 0;
-
- priv = this->private;
- loop_local = loop_frame->local;
- loop_sh = &loop_local->self_heal;
-
- for (i = 0; i < priv->child_count; i++) {
- if (loop_sh->sources[i] || !loop_local->child_up[i])
- continue;
- loop_sh->write_needed[i] = 1;
- }
- sh_loop_read (loop_frame, this);
- return 0;
-}
-
-afr_sh_algo_private_t*
-afr_sh_priv_init ()
-{
- afr_sh_algo_private_t *sh_priv = NULL;
-
- sh_priv = GF_CALLOC (1, sizeof (*sh_priv),
- gf_afr_mt_afr_private_t);
- if (!sh_priv)
- goto out;
-
- LOCK_INIT (&sh_priv->lock);
-out:
- return sh_priv;
-}
-
-int
-afr_sh_transfer_lock (call_frame_t *dst, call_frame_t *src, char *dom,
- unsigned int child_count)
-{
- afr_local_t *dst_local = NULL;
- afr_self_heal_t *dst_sh = NULL;
- afr_local_t *src_local = NULL;
- afr_self_heal_t *src_sh = NULL;
- int ret = -1;
-
- dst_local = dst->local;
- dst_sh = &dst_local->self_heal;
- src_local = src->local;
- src_sh = &src_local->self_heal;
- GF_ASSERT (src_sh->data_lock_held);
- GF_ASSERT (!dst_sh->data_lock_held);
- ret = afr_lk_transfer_datalock (dst, src, dom, child_count);
- if (ret)
- return ret;
- src_sh->data_lock_held = _gf_false;
- dst_sh->data_lock_held = _gf_true;
- return 0;
-}
-
-int
-afr_sh_start_loops (call_frame_t *sh_frame, xlator_t *this,
- afr_sh_algo_fn sh_data_algo_start)
-{
- call_frame_t *first_loop_frame = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int ret = 0;
- afr_private_t *priv = NULL;
-
- local = sh_frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- sh->sh_data_algo_start = sh_data_algo_start;
- local->call_count = 0;
- ret = sh_loop_frame_create (sh_frame, this, NULL, &first_loop_frame);
- if (ret)
- goto out;
- ret = afr_sh_transfer_lock (first_loop_frame, sh_frame, this->name,
- priv->child_count);
- if (ret)
- goto out;
- sh->private = afr_sh_priv_init ();
- if (!sh->private) {
- ret = -1;
- goto out;
- }
- sh_loop_driver (sh_frame, this, _gf_true, first_loop_frame);
- ret = 0;
-out:
- if (ret) {
- afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
- sh_loop_driver_done (sh_frame, this, NULL);
- }
- return 0;
-}
-
-int
-afr_sh_algo_diff (call_frame_t *sh_frame, xlator_t *this)
-{
- afr_sh_start_loops (sh_frame, this, sh_diff_checksum);
- return 0;
-}
-
-int
-afr_sh_algo_full (call_frame_t *sh_frame, xlator_t *this)
-{
- afr_sh_start_loops (sh_frame, this, sh_full_read_write_to_sinks);
- return 0;
-}
-
-struct afr_sh_algorithm afr_self_heal_algorithms[] = {
- {.name = "full", .fn = afr_sh_algo_full},
- {.name = "diff", .fn = afr_sh_algo_diff},
- {0, 0},
-};
diff --git a/xlators/cluster/afr/src/afr-self-heal-algorithm.h b/xlators/cluster/afr/src/afr-self-heal-algorithm.h
deleted file mode 100644
index 6b20789..0000000
--- a/xlators/cluster/afr/src/afr-self-heal-algorithm.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __AFR_SELF_HEAL_ALGORITHM_H__
-#define __AFR_SELF_HEAL_ALGORITHM_H__
-
-typedef int (*afr_sh_algo_fn) (call_frame_t *frame,
- xlator_t *this);
-
-struct afr_sh_algorithm {
- const char *name;
- afr_sh_algo_fn fn;
-};
-
-extern struct afr_sh_algorithm afr_self_heal_algorithms[3];
-typedef struct {
- gf_lock_t lock;
- unsigned int loops_running;
- off_t offset;
-
- int32_t total_blocks;
- int32_t diff_blocks;
-} afr_sh_algo_private_t;
-
-#endif /* __AFR_SELF_HEAL_ALGORITHM_H__ */
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index ef92b42..4dac831 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
@@ -8,2805 +8,1002 @@
cases as published by the Free Software Foundation.
*/
-#include "glusterfs.h"
-#include "xlator.h"
-#include "byte-order.h"
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
#include "afr.h"
-#include "afr-transaction.h"
-#include "afr-self-heal-common.h"
#include "afr-self-heal.h"
-#include "pump.h"
-
-#define ADD_FMT_STRING(msg, off, sh_str, status, print_log) \
- do { \
- if (AFR_SELF_HEAL_NOT_ATTEMPTED != status) { \
- off += snprintf (msg + off, sizeof (msg) - off, \
- " "sh_str" self heal %s,", \
- get_sh_completion_status (status));\
- print_log = 1; \
- } \
- } while (0)
-
-#define ADD_FMT_STRING_SYNC(msg, off, sh_str, status, print_log) \
- do { \
- if (AFR_SELF_HEAL_SYNC_BEGIN == status || \
- AFR_SELF_HEAL_FAILED == status) { \
- off += snprintf (msg + off, sizeof (msg) - off, \
- " "sh_str" self heal %s,", \
- get_sh_completion_status (status));\
- print_log = 1; \
- } \
- } while (0)
+#include "byte-order.h"
-void
-afr_sh_reset (call_frame_t *frame, xlator_t *this)
+int
+afr_selfheal_post_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- memset (sh->child_errno, 0,
- sizeof (*sh->child_errno) * priv->child_count);
- memset (sh->buf, 0, sizeof (*sh->buf) * priv->child_count);
- memset (sh->parentbufs, 0,
- sizeof (*sh->parentbufs) * priv->child_count);
- memset (sh->success, 0, sizeof (*sh->success) * priv->child_count);
- memset (sh->locked_nodes, 0,
- sizeof (*sh->locked_nodes) * priv->child_count);
- sh->active_sinks = 0;
-
- afr_reset_xattr (sh->xattr, priv->child_count);
-}
+ afr_local_t *local = NULL;
-//Intersection[child]=1 if child is part of intersection
-void
-afr_children_intersection_get (int32_t *set1, int32_t *set2,
- int *intersection, unsigned int child_count)
-{
- int i = 0;
-
- memset (intersection, 0, sizeof (*intersection) * child_count);
- for (i = 0; i < child_count; i++) {
- intersection[i] = afr_is_child_present (set1, child_count, i)
- && afr_is_child_present (set2, child_count,
- i);
- }
+ local = frame->local;
+
+ syncbarrier_wake (&local->barrier);
+
+ return 0;
}
-/**
- * select_source - select a source and return it
- */
int
-afr_sh_select_source (int sources[], int child_count)
+afr_selfheal_post_op (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ int subvol, dict_t *xattr)
{
- int i = 0;
- for (i = 0; i < child_count; i++)
- if (sources[i])
- return i;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ loc_t loc = {0, };
- return -1;
-}
+ priv = this->private;
+ local = frame->local;
-void
-afr_sh_mark_source_sinks (call_frame_t *frame, xlator_t *this)
-{
- int i = 0;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int active_sinks = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] == 0 && local->child_up[i] == 1) {
- active_sinks++;
- sh->success[i] = 1;
- } else if (sh->sources[i] == 1 && local->child_up[i] == 1) {
- sh->success[i] = 1;
- }
- }
- sh->active_sinks = active_sinks;
-}
+ loc.inode = inode_ref (inode);
+ uuid_copy (loc.gfid, inode->gfid);
-int
-afr_sh_source_count (int sources[], int child_count)
-{
- int i = 0;
- int nsource = 0;
+ STACK_WIND (frame, afr_selfheal_post_op_cbk, priv->children[subvol],
+ priv->children[subvol]->fops->xattrop, &loc,
+ GF_XATTROP_ADD_ARRAY, xattr, NULL);
- for (i = 0; i < child_count; i++)
- if (sources[i])
- nsource++;
- return nsource;
-}
+ syncbarrier_wait (&local->barrier, 1);
-void
-afr_sh_set_error (afr_self_heal_t *sh, int32_t op_errno)
-{
- sh->op_ret = -1;
- sh->op_errno = afr_most_important_error(sh->op_errno, op_errno,
- _gf_false);
+ return 0;
}
-void
-afr_sh_print_pending_matrix (int32_t *pending_matrix[], xlator_t *this)
-{
- afr_private_t * priv = this->private;
- char *buf = NULL;
- char *ptr = NULL;
- int i = 0;
- int j = 0;
-
- /* 10 digits per entry + 1 space + '[' and ']' */
- buf = GF_MALLOC (priv->child_count * 11 + 8, gf_afr_mt_char);
-
- for (i = 0; i < priv->child_count; i++) {
- ptr = buf;
- ptr += sprintf (ptr, "[ ");
- for (j = 0; j < priv->child_count; j++) {
- ptr += sprintf (ptr, "%d ", pending_matrix[i][j]);
- }
- sprintf (ptr, "]");
- gf_log (this->name, GF_LOG_DEBUG, "pending_matrix: %s", buf);
- }
-
- GF_FREE (buf);
-}
-char*
-afr_get_pending_matrix_str (int32_t *pending_matrix[], xlator_t *this)
+dict_t *
+afr_selfheal_output_xattr (xlator_t *this, afr_transaction_type type,
+ int *output_dirty, int **output_matrix, int subvol)
{
- afr_private_t * priv = this->private;
- char *buf = NULL;
- char *ptr = NULL;
- int i = 0;
- int j = 0;
- int child_count = priv->child_count;
- char *matrix_begin = "[ [ ";
- char *matrix_end = "] ]";
- char *seperator = "] [ ";
- int pending_entry_strlen = 12; //Including space after entry
- int matrix_begin_strlen = 0;
- int matrix_end_strlen = 0;
- int seperator_strlen = 0;
- int string_length = 0;
- char *msg = "- Pending matrix: ";
-
- /*
- * for a list of lists of [ [ a b ] [ c d ] ]
- * */
-
- matrix_begin_strlen = strlen (matrix_begin);
- matrix_end_strlen = strlen (matrix_end);
- seperator_strlen = strlen (seperator);
- string_length = matrix_begin_strlen + matrix_end_strlen
- + (child_count -1) * seperator_strlen
- + (child_count * child_count * pending_entry_strlen);
-
- buf = GF_CALLOC (1, 1 + strlen (msg) + string_length , gf_afr_mt_char);
- if (!buf)
- goto out;
-
- ptr = buf;
- ptr += sprintf (ptr, "%s", msg);
- ptr += sprintf (ptr, "%s", matrix_begin);
- for (i = 0; i < priv->child_count; i++) {
- for (j = 0; j < priv->child_count; j++) {
- ptr += sprintf (ptr, "%d ", pending_matrix[i][j]);
- }
- if (i < priv->child_count -1)
- ptr += sprintf (ptr, "%s", seperator);
- }
-
- ptr += sprintf (ptr, "%s", matrix_end);
+ dict_t *xattr = NULL;
+ afr_private_t *priv = NULL;
+ int j = 0;
+ int idx = 0;
+ int ret = 0;
+ int *raw = 0;
-out:
- return buf;
-}
+ priv = this->private;
+ idx = afr_index_for_transaction_type (type);
-void
-afr_sh_print_split_brain_log (int32_t *pending_matrix[], xlator_t *this,
- const char *loc)
-{
- char *buf = NULL;
- char *free_ptr = NULL;
+ xattr = dict_new ();
+ if (!xattr)
+ return NULL;
- buf = afr_get_pending_matrix_str (pending_matrix, this);
- if (buf)
- free_ptr = buf;
- else
- buf = "";
+ if (output_dirty[subvol]) {
+ /* clear dirty */
+ raw = GF_CALLOC (sizeof(int), AFR_NUM_CHANGE_LOGS, gf_afr_mt_int32_t);
+ if (!raw)
+ goto err;
+ raw[idx] = hton32 (output_dirty[subvol]);
+ ret = dict_set_bin (xattr, AFR_DIRTY, raw,
+ sizeof(int) * AFR_NUM_CHANGE_LOGS);
+ if (ret)
+ goto err;
+ }
- gf_log (this->name, GF_LOG_ERROR, "Unable to self-heal contents of '%s'"
- " (possible split-brain). Please delete the file from all but "
- "the preferred subvolume.%s", loc, buf);
- GF_FREE (free_ptr);
- return;
-}
+ /* clear/set pending */
+ for (j = 0; j < priv->child_count; j++) {
+ if (!output_matrix[subvol][j])
+ continue;
+ raw = GF_CALLOC (sizeof(int), AFR_NUM_CHANGE_LOGS,
+ gf_afr_mt_int32_t);
+ if (!raw)
+ goto err;
-void
-afr_init_pending_matrix (int32_t **pending_matrix, size_t child_count)
-{
- int i = 0;
- int j = 0;
+ raw[idx] = hton32 (output_matrix[subvol][j]);
- GF_ASSERT (pending_matrix);
+ ret = dict_set_bin (xattr, priv->pending_key[j],
+ raw, sizeof(int) * AFR_NUM_CHANGE_LOGS);
+ if (ret)
+ goto err;
+ }
- for (i = 0; i < child_count; i++) {
- for (j = 0; j < child_count; j++) {
- pending_matrix[i][j] = 0;
- }
- }
+ return xattr;
+err:
+ if (xattr)
+ dict_unref (xattr);
+ return NULL;
}
-void
-afr_mark_ignorant_subvols_as_pending (int32_t **pending_matrix,
- unsigned char *ignorant_subvols,
- size_t child_count)
-{
- int i = 0;
- int j = 0;
-
- GF_ASSERT (pending_matrix);
- GF_ASSERT (ignorant_subvols);
-
- for (i = 0; i < child_count; i++) {
- if (ignorant_subvols[i]) {
- for (j = 0; j < child_count; j++) {
- if (!ignorant_subvols[j])
- pending_matrix[j][i] += 1;
- }
- }
- }
-}
int
-afr_build_pending_matrix (char **pending_key, int32_t **pending_matrix,
- unsigned char *ignorant_subvols,
- dict_t *xattr[], afr_transaction_type type,
- size_t child_count)
-{
- /* Indexable by result of afr_index_for_transaction_type(): 0 -- 2. */
- int32_t pending[3] = {0,};
- void *pending_raw = NULL;
- int ret = -1;
- int i = 0;
- int j = 0;
- int k = 0;
-
- afr_init_pending_matrix (pending_matrix, child_count);
-
- for (i = 0; i < child_count; i++) {
- pending_raw = NULL;
-
- for (j = 0; j < child_count; j++) {
- ret = dict_get_ptr (xattr[i], pending_key[j],
- &pending_raw);
-
- if (ret != 0) {
- /*
- * There is no xattr present. This means this
- * subvolume should be considered an 'ignorant'
- * subvolume.
- */
-
- if (ignorant_subvols)
- ignorant_subvols[i] = 1;
- continue;
- }
-
- memcpy (pending, pending_raw, sizeof(pending));
- k = afr_index_for_transaction_type (type);
-
- pending_matrix[i][j] = ntoh32 (pending[k]);
- }
- }
-
- return ret;
-}
+afr_selfheal_undo_pending (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ unsigned char *sources, unsigned char *sinks,
+ unsigned char *healed_sinks, afr_transaction_type type,
+ struct afr_reply *replies, unsigned char *locked_on)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int j = 0;
+ unsigned char *pending = NULL;
+ int *input_dirty = NULL;
+ int **input_matrix = NULL;
+ int *output_dirty = NULL;
+ int **output_matrix = NULL;
+ dict_t *xattr = NULL;
+
+ priv = this->private;
+
+ pending = alloca0 (priv->child_count);
+
+ input_dirty = alloca0 (priv->child_count * sizeof (int));
+ input_matrix = ALLOC_MATRIX (priv->child_count, int);
+ output_dirty = alloca0 (priv->child_count * sizeof (int));
+ output_matrix = ALLOC_MATRIX (priv->child_count, int);
+
+ afr_selfheal_extract_xattr (this, replies, type, input_dirty,
+ input_matrix);
+
+ for (i = 0; i < priv->child_count; i++)
+ if (sinks[i] && !healed_sinks[i])
+ pending[i] = 1;
+
+ for (i = 0; i < priv->child_count; i++) {
+ for (j = 0; j < priv->child_count; j++) {
+ if (pending[j])
+ output_matrix[i][j] = 1;
+ else
+ output_matrix[i][j] = -input_matrix[i][j];
+ }
+ }
-typedef enum {
- AFR_NODE_INVALID,
- AFR_NODE_INNOCENT,
- AFR_NODE_FOOL,
- AFR_NODE_WISE,
-} afr_node_type;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!pending[i])
+ output_dirty[i] = -input_dirty[i];
+ }
-typedef struct {
- afr_node_type type;
- int wisdom;
-} afr_node_character;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!locked_on[i])
+ /* perform post-op only on subvols we had locked
+ and inspected on.
+ */
+ continue;
+ xattr = afr_selfheal_output_xattr (this, type, output_dirty,
+ output_matrix, i);
+ if (!xattr) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "unable to allocate xdata for subvol %d", i);
+ continue;
+ }
-static int
-afr_sh_is_innocent (int32_t *array, int child_count)
-{
- int i = 0;
- int ret = 1; /* innocent until proven guilty */
+ afr_selfheal_post_op (frame, this, inode, i, xattr);
- for (i = 0; i < child_count; i++) {
- if (array[i]) {
- ret = 0;
- break;
- }
- }
+ dict_unref (xattr);
+ }
- return ret;
+ return 0;
}
-static int
-afr_sh_is_fool (int32_t *array, int i, int child_count)
-{
- return array[i]; /* fool if accuses itself */
+void
+afr_replies_copy (struct afr_reply *dst, struct afr_reply *src, int count)
+{
+ int i = 0;
+ dict_t *xdata = NULL;
+
+ if (dst == src)
+ return;
+
+ for (i = 0; i < count; i++) {
+ dst[i].valid = src[i].valid;
+ dst[i].op_ret = src[i].op_ret;
+ dst[i].op_errno = src[i].op_errno;
+ dst[i].prestat = src[i].prestat;
+ dst[i].poststat = src[i].poststat;
+ dst[i].preparent = src[i].preparent;
+ dst[i].postparent = src[i].postparent;
+ dst[i].preparent2 = src[i].preparent2;
+ dst[i].postparent2 = src[i].postparent2;
+ if (src[i].xdata)
+ xdata = dict_ref (src[i].xdata);
+ else
+ xdata = NULL;
+ if (dst[i].xdata)
+ dict_unref (dst[i].xdata);
+ dst[i].xdata = xdata;
+ memcpy (dst[i].checksum, src[i].checksum,
+ MD5_DIGEST_LENGTH);
+ }
}
-static int
-afr_sh_is_wise (int32_t *array, int i, int child_count)
+int
+afr_selfheal_fill_dirty (xlator_t *this, int *dirty, int subvol,
+ int idx, dict_t *xdata)
{
- return !array[i]; /* wise if does not accuse itself */
-}
+ void *pending_raw = NULL;
+ int pending[3] = {0, };
+ if (dict_get_ptr (xdata, AFR_DIRTY, &pending_raw))
+ return -1;
-static int
-afr_sh_all_nodes_innocent (afr_node_character *characters,
- int child_count)
-{
- int i = 0;
- int ret = 1;
+ if (!pending_raw)
+ return -1;
+
+ memcpy (pending, pending_raw, sizeof(pending));
- for (i = 0; i < child_count; i++) {
- if (characters[i].type != AFR_NODE_INNOCENT) {
- ret = 0;
- break;
- }
- }
+ dirty[subvol] = ntoh32 (pending[idx]);
- return ret;
+ return 0;
}
-static int
-afr_sh_wise_nodes_exist (afr_node_character *characters, int child_count)
+int
+afr_selfheal_fill_matrix (xlator_t *this, int **matrix, int subvol,
+ int idx, dict_t *xdata)
{
- int i = 0;
- int ret = 0;
+ int i = 0;
+ void *pending_raw = NULL;
+ int pending[3] = {0, };
+ afr_private_t *priv = NULL;
- for (i = 0; i < child_count; i++) {
- if (characters[i].type == AFR_NODE_WISE) {
- ret = 1;
- break;
- }
- }
+ priv = this->private;
- return ret;
-}
+ for (i = 0; i < priv->child_count; i++) {
+ if (dict_get_ptr (xdata, priv->pending_key[i], &pending_raw))
+ continue;
+ if (!pending_raw)
+ continue;
-/*
- * The 'wisdom' of a wise node is 0 if any other wise node accuses it.
- * It is 1 if no other wise node accuses it.
- * Only wise nodes with wisdom 1 are sources.
- *
- * If no nodes with wisdom 1 exist, a split-brain has occurred.
- */
+ memcpy (pending, pending_raw, sizeof(pending));
-static void
-afr_sh_compute_wisdom (int32_t *pending_matrix[],
- afr_node_character characters[], int child_count)
-{
- int i = 0;
- int j = 0;
-
- for (i = 0; i < child_count; i++) {
- if (characters[i].type == AFR_NODE_WISE) {
- characters[i].wisdom = 1;
-
- for (j = 0; j < child_count; j++) {
- if ((characters[j].type == AFR_NODE_WISE)
- && pending_matrix[j][i]) {
-
- characters[i].wisdom = 0;
- }
- }
- }
- }
+ matrix[subvol][i] = ntoh32 (pending[idx]);
+ }
+
+ return 0;
}
-static int
-afr_sh_wise_nodes_conflict (afr_node_character *characters,
- int child_count)
+int
+afr_selfheal_extract_xattr (xlator_t *this, struct afr_reply *replies,
+ afr_transaction_type type, int *dirty, int **matrix)
{
- int i = 0;
- int ret = 1;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ dict_t *xdata = NULL;
+ int idx = -1;
+
+ idx = afr_index_for_transaction_type (type);
- for (i = 0; i < child_count; i++) {
- if ((characters[i].type == AFR_NODE_WISE)
- && characters[i].wisdom == 1) {
+ priv = this->private;
- /* There is atleast one bona-fide wise node */
- ret = 0;
- break;
- }
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].xdata)
+ continue;
+
+ xdata = replies[i].xdata;
- return ret;
+ afr_selfheal_fill_dirty (this, dirty, i, idx, xdata);
+ afr_selfheal_fill_matrix (this, matrix, i, idx, xdata);
+ }
+
+ return 0;
}
-static int
-afr_sh_mark_wisest_as_sources (int sources[],
- afr_node_character *characters,
- int child_count)
-{
- int nsources = 0;
- int i = 0;
- for (i = 0; i < child_count; i++) {
- if (characters[i].wisdom == 1) {
- sources[i] = 1;
- nsources++;
- }
- }
+/*
+ * This function determines if a self-heal is required for a given inode,
+ * and if needed, in what direction.
+ *
+ * locked_on[] is the array representing servers which have been locked and
+ * from which xattrs have been fetched for analysis.
+ *
+ * The output of the function is by filling the arrays sources[] and sinks[].
+ *
+ * sources[i] is set if i'th server is an eligible source for a selfheal.
+ *
+ * sinks[i] is set if i'th server needs to be healed.
+ *
+ * if sources[0..N] are all set, there is no need for a selfheal.
+ *
+ * if sinks[0..N] are all set, the inode is in split brain.
+ *
+ */
- return nsources;
-}
+int
+afr_selfheal_find_direction (call_frame_t *frame, xlator_t *this,
+ struct afr_reply *replies,
+ afr_transaction_type type, unsigned char *locked_on,
+ unsigned char *sources, unsigned char *sinks)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int j = 0;
+ int *dirty = NULL;
+ int **matrix = NULL;
+ char *accused = NULL;
+
+ priv = this->private;
+
+ dirty = alloca0 (priv->child_count * sizeof (int));
+ accused = alloca0 (priv->child_count);
+ matrix = ALLOC_MATRIX(priv->child_count, int);
+
+ /* First construct the pending matrix for further analysis */
+ afr_selfheal_extract_xattr (this, replies, type, dirty, matrix);
+
+ /* Next short list all accused to exclude them from being sources */
+ for (i = 0; i < priv->child_count; i++) {
+ for (j = 0; j < priv->child_count; j++) {
+ if (matrix[i][j])
+ accused[j] = 1;
+ }
+ }
-static void
-afr_compute_witness_of_fools (int32_t *witnesses, int32_t **pending_matrix,
- afr_node_character *characters,
- int32_t child_count)
-{
- int i = 0;
- int j = 0;
- int witness = 0;
-
- GF_ASSERT (witnesses);
- GF_ASSERT (pending_matrix);
- GF_ASSERT (characters);
- GF_ASSERT (child_count > 0);
-
- for (i = 0; i < child_count; i++) {
- if (characters[i].type != AFR_NODE_FOOL)
- continue;
-
- witness = 0;
- for (j = 0; j < child_count; j++) {
- if (i == j)
- continue;
- witness += pending_matrix[i][j];
- }
- witnesses[i] = witness;
- }
-}
+ /* Short list all non-accused as sources */
+ memset (sources, 0, priv->child_count);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!accused[i] && locked_on[i])
+ sources[i] = 1;
+ }
-static int32_t
-afr_find_biggest_witness_among_fools (int32_t *witnesses,
- afr_node_character *characters,
- int32_t child_count)
-{
- int i = 0;
- int biggest_witness = -1;
- int biggest_witness_idx = -1;
- int biggest_witness_cnt = -1;
-
- GF_ASSERT (witnesses);
- GF_ASSERT (characters);
- GF_ASSERT (child_count > 0);
-
- for (i = 0; i < child_count; i++) {
- if (characters[i].type != AFR_NODE_FOOL)
- continue;
-
- if (biggest_witness < witnesses[i]) {
- biggest_witness = witnesses[i];
- biggest_witness_idx = i;
- biggest_witness_cnt = 1;
+ /* Everyone accused by sources are sinks */
+ memset (sinks, 0, priv->child_count);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
continue;
+ for (j = 0; j < priv->child_count; j++) {
+ if (matrix[i][j])
+ sinks[j] = 1;
}
+ }
- if (biggest_witness == witnesses[i])
- biggest_witness_cnt++;
- }
+ /* If any source has 'dirty' bit, pick first
+ 'dirty' source and make everybody else sinks */
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i] && dirty[i]) {
+ for (j = 0; j < priv->child_count; j++) {
+ if (j != i) {
+ sources[j] = 0;
+ sinks[j] = 1;
+ }
+ }
+ break;
+ }
+ }
- if (biggest_witness_cnt != 1)
- return -1;
+ /* If no sources, all locked nodes are sinks - split brain */
+ if (AFR_COUNT (sources, priv->child_count) == 0) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (locked_on[i])
+ sinks[i] = 1;
+ }
+ }
- return biggest_witness_idx;
+ return 0;
}
+
int
-afr_mark_fool_as_source_by_witness (int32_t *sources, int32_t *witnesses,
- afr_node_character *characters,
- int32_t child_count, int32_t witness)
+afr_selfheal_discover_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *parbuf)
{
- int i = 0;
- int nsources = 0;
-
- GF_ASSERT (sources);
- GF_ASSERT (witnesses);
- GF_ASSERT (characters);
- GF_ASSERT (child_count > 0);
-
- for (i = 0; i < child_count; i++) {
- if (characters[i].type != AFR_NODE_FOOL)
- continue;
-
- if (witness == witnesses[i]) {
- sources[i] = 1;
- nsources++;
- }
- }
- return nsources;
-}
+ afr_local_t *local = NULL;
+ int i = -1;
+ local = frame->local;
+ i = (long) cookie;
+
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
+ if (buf)
+ local->replies[i].poststat = *buf;
+ if (parbuf)
+ local->replies[i].postparent = *parbuf;
+ if (xdata)
+ local->replies[i].xdata = dict_ref (xdata);
+
+ syncbarrier_wake (&local->barrier);
-int
-afr_mark_fool_as_source_by_idx (int32_t *sources, int child_count, int idx)
-{
- if (idx >= 0 && idx < child_count) {
- sources[idx] = 1;
- return 1;
- }
return 0;
}
-static int
-afr_find_largest_file_size (struct iatt *bufs, int32_t *success_children,
- int child_count)
+inode_t *
+afr_selfheal_unlocked_lookup_on (call_frame_t *frame, inode_t *parent,
+ const char *name, struct afr_reply *replies,
+ unsigned char *lookup_on)
{
- int idx = -1;
- int i = -1;
- int child = -1;
- uint64_t max_size = 0;
- uint64_t min_size = 0;
- int num_children = 0;
+ loc_t loc = {0, };
+ dict_t *xattr_req = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ inode_t *inode = NULL;
- for (i = 0; i < child_count; i++) {
- if (success_children[i] == -1)
- break;
+ local = frame->local;
+ priv = frame->this->private;
- child = success_children[i];
- if (bufs[child].ia_size > max_size) {
- max_size = bufs[child].ia_size;
- idx = child;
- }
-
- if ((num_children == 0) || (bufs[child].ia_size < min_size)) {
- min_size = bufs[child].ia_size;
- }
+ xattr_req = dict_new ();
+ if (!xattr_req)
+ return NULL;
- num_children++;
+ if (afr_xattr_req_prepare (frame->this, xattr_req) != 0) {
+ dict_destroy (xattr_req);
+ return NULL;
}
- /* If sizes are same for all of them, finding sources will have to
- * happen with pending changelog. So return -1
- */
- if ((num_children > 1) && (min_size == max_size))
- return -1;
- return idx;
-}
+ inode = inode_new (parent->table);
+ if (!inode) {
+ dict_destroy (xattr_req);
+ return NULL;
+ }
+ loc.parent = inode_ref (parent);
+ uuid_copy (loc.pargfid, parent->gfid);
+ loc.name = name;
+ loc.inode = inode_ref (inode);
-static int
-afr_find_newest_file (struct iatt *bufs, int32_t *success_children,
- int child_count)
-{
- int idx = -1;
- int i = -1;
- int child = -1;
- uint64_t max_ctime = 0;
+ AFR_ONLIST (lookup_on, frame, afr_selfheal_discover_cbk, lookup, &loc,
+ xattr_req);
- for (i = 0; i < child_count; i++) {
- if (success_children[i] == -1)
- break;
+ afr_replies_copy (replies, local->replies, priv->child_count);
- child = success_children[i];
- if (bufs[child].ia_ctime > max_ctime) {
- max_ctime = bufs[child].ia_ctime;
- idx = child;
- }
- }
+ loc_wipe (&loc);
+ dict_unref (xattr_req);
- return idx;
+ return inode;
}
-static int
-afr_mark_biggest_of_fools_as_source (int32_t *sources, int32_t **pending_matrix,
- afr_node_character *characters,
- int32_t *success_children,
- int child_count, struct iatt *bufs)
-{
- int32_t biggest_witness = 0;
- int nsources = 0;
- int32_t *witnesses = NULL;
-
- GF_ASSERT (child_count > 0);
-
- biggest_witness = afr_find_largest_file_size (bufs, success_children,
- child_count);
- if (biggest_witness != -1)
- goto found;
-
- witnesses = GF_CALLOC (child_count, sizeof (*witnesses),
- gf_afr_mt_int32_t);
- if (NULL == witnesses) {
- nsources = -1;
- goto out;
- }
-
- afr_compute_witness_of_fools (witnesses, pending_matrix, characters,
- child_count);
- biggest_witness = afr_find_biggest_witness_among_fools (witnesses,
- characters,
- child_count);
- if (biggest_witness != -1)
- goto found;
-
- biggest_witness = afr_find_newest_file (bufs, success_children,
- child_count);
-
-found:
- nsources = afr_mark_fool_as_source_by_idx (sources, child_count,
- biggest_witness);
-out:
- GF_FREE (witnesses);
- return nsources;
-}
-
int
-afr_mark_child_as_source_by_uid (int32_t *sources, struct iatt *bufs,
- int32_t *success_children,
- unsigned int child_count, uint32_t uid)
+afr_selfheal_unlocked_discover_on (call_frame_t *frame, inode_t *inode,
+ uuid_t gfid, struct afr_reply *replies,
+ unsigned char *discover_on)
{
- int i = 0;
- int nsources = 0;
- int child = 0;
-
- for (i = 0; i < child_count; i++) {
- if (-1 == success_children[i])
- break;
-
- child = success_children[i];
- if (uid == bufs[child].ia_uid) {
- sources[child] = 1;
- nsources++;
- }
- }
- return nsources;
-}
+ loc_t loc = {0, };
+ dict_t *xattr_req = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
-int
-afr_get_child_with_lowest_uid (struct iatt *bufs, int32_t *success_children,
- unsigned int child_count)
-{
- int i = 0;
- int smallest = -1;
- int child = 0;
-
- for (i = 0; i < child_count; i++) {
- if (-1 == success_children[i])
- break;
- child = success_children[i];
- if ((smallest == -1) ||
- (bufs[child].ia_uid < bufs[smallest].ia_uid)) {
- smallest = child;
- }
- }
- return smallest;
-}
+ local = frame->local;
+ priv = frame->this->private;
-static int
-afr_sh_mark_lowest_uid_as_source (struct iatt *bufs, int32_t *success_children,
- int child_count, int32_t *sources)
-{
- int nsources = 0;
- int smallest = 0;
-
- smallest = afr_get_child_with_lowest_uid (bufs, success_children,
- child_count);
- if (smallest < 0) {
- nsources = -1;
- goto out;
- }
- nsources = afr_mark_child_as_source_by_uid (sources, bufs,
- success_children, child_count,
- bufs[smallest].ia_uid);
-out:
- return nsources;
-}
+ xattr_req = dict_new ();
+ if (!xattr_req)
+ return -ENOMEM;
-int
-afr_get_no_xattr_dir_read_child (xlator_t *this, int32_t *success_children,
- struct iatt *bufs)
-{
- afr_private_t *priv = NULL;
- int i = 0;
- int child = -1;
- int read_child = -1;
-
- priv = this->private;
- for (i = 0; i < priv->child_count; i++) {
- child = success_children[i];
- if (child < 0)
- break;
- if (read_child < 0)
- read_child = child;
- else if (bufs[read_child].ia_size < bufs[child].ia_size)
- read_child = child;
- }
- return read_child;
-}
+ if (afr_xattr_req_prepare (frame->this, xattr_req) != 0) {
+ dict_destroy (xattr_req);
+ return -ENOMEM;
+ }
-int
-afr_sh_mark_zero_size_file_as_sink (struct iatt *bufs, int32_t *success_children,
- int child_count, int32_t *sources)
-{
- int nsources = 0;
- int i = 0;
- int child = 0;
- gf_boolean_t sink_exists = _gf_false;
- gf_boolean_t source_exists = _gf_false;
- int source = -1;
-
- for (i = 0; i < child_count; i++) {
- child = success_children[i];
- if (child < 0)
- break;
- if (!bufs[child].ia_size) {
- sink_exists = _gf_true;
- continue;
- }
- if (!source_exists) {
- source_exists = _gf_true;
- source = child;
- continue;
- }
- if (bufs[source].ia_size != bufs[child].ia_size) {
- nsources = -1;
- goto out;
- }
- }
- if (!source_exists && !sink_exists) {
- nsources = -1;
- goto out;
- }
-
- if (!source_exists || !sink_exists)
- goto out;
-
- for (i = 0; i < child_count; i++) {
- child = success_children[i];
- if (child < 0)
- break;
- if (bufs[child].ia_size) {
- sources[child] = 1;
- nsources++;
- }
- }
-out:
- return nsources;
-}
+ loc.inode = inode_ref (inode);
+ uuid_copy (loc.gfid, gfid);
-char *
-afr_get_character_str (afr_node_type type)
-{
- char *character = NULL;
-
- switch (type) {
- case AFR_NODE_INNOCENT:
- character = "innocent";
- break;
- case AFR_NODE_FOOL:
- character = "fool";
- break;
- case AFR_NODE_WISE:
- character = "wise";
- break;
- default:
- character = "invalid";
- break;
- }
- return character;
-}
+ AFR_ONLIST (discover_on, frame, afr_selfheal_discover_cbk, lookup, &loc,
+ xattr_req);
-afr_node_type
-afr_find_child_character_type (int32_t *pending_row, int32_t child,
- unsigned int child_count)
-{
- afr_node_type type = AFR_NODE_INVALID;
+ afr_replies_copy (replies, local->replies, priv->child_count);
- GF_ASSERT ((child >= 0) && (child < child_count));
+ loc_wipe (&loc);
+ dict_unref (xattr_req);
- if (afr_sh_is_innocent (pending_row, child_count))
- type = AFR_NODE_INNOCENT;
- else if (afr_sh_is_fool (pending_row, child, child_count))
- type = AFR_NODE_FOOL;
- else if (afr_sh_is_wise (pending_row, child, child_count))
- type = AFR_NODE_WISE;
- return type;
+ return 0;
}
int
-afr_build_sources (xlator_t *this, dict_t **xattr, struct iatt *bufs,
- int32_t **pending_matrix, int32_t *sources,
- int32_t *success_children, afr_transaction_type type,
- int32_t *subvol_status, gf_boolean_t ignore_ignorant)
+afr_selfheal_unlocked_discover (call_frame_t *frame, inode_t *inode,
+ uuid_t gfid, struct afr_reply *replies)
{
- afr_private_t *priv = NULL;
- afr_self_heal_type sh_type = AFR_SELF_HEAL_INVALID;
- int nsources = -1;
- unsigned char *ignorant_subvols = NULL;
- unsigned int child_count = 0;
-
- priv = this->private;
- child_count = priv->child_count;
-
- if (afr_get_children_count (success_children, priv->child_count) == 0)
- goto out;
-
- if (!ignore_ignorant) {
- ignorant_subvols = GF_CALLOC (sizeof (*ignorant_subvols),
- child_count, gf_afr_mt_char);
- if (NULL == ignorant_subvols)
- goto out;
- }
-
- afr_build_pending_matrix (priv->pending_key, pending_matrix,
- ignorant_subvols, xattr, type,
- priv->child_count);
-
- if (!ignore_ignorant)
- afr_mark_ignorant_subvols_as_pending (pending_matrix,
- ignorant_subvols,
- priv->child_count);
- sh_type = afr_self_heal_type_for_transaction (type);
- if (AFR_SELF_HEAL_INVALID == sh_type)
- goto out;
-
- afr_sh_print_pending_matrix (pending_matrix, this);
-
- nsources = afr_mark_sources (this, sources, pending_matrix, bufs,
- sh_type, success_children, subvol_status);
-out:
- GF_FREE (ignorant_subvols);
- return nsources;
-}
+ afr_private_t *priv = NULL;
-void
-afr_find_character_types (afr_node_character *characters,
- int32_t **pending_matrix, int32_t *success_children,
- unsigned int child_count)
-{
- afr_node_type type = AFR_NODE_INVALID;
- int child = 0;
- int i = 0;
-
- for (i = 0; i < child_count; i++) {
- child = success_children[i];
- if (child == -1)
- break;
- type = afr_find_child_character_type (pending_matrix[child],
- child, child_count);
- characters[child].type = type;
- }
-}
+ priv = frame->this->private;
-void
-afr_mark_success_children_sources (int32_t *sources, int32_t *success_children,
- unsigned int child_count)
-{
- int i = 0;
- for (i = 0; i < child_count; i++) {
- if (success_children[i] == -1)
- break;
- sources[success_children[i]] = 1;
- }
+ return afr_selfheal_unlocked_discover_on (frame, inode, gfid, replies,
+ priv->child_up);
}
-/**
- * mark_sources: Mark all 'source' nodes and return number of source
- * nodes found
- *
- * A node (a row in the pending matrix) belongs to one of
- * three categories:
- *
- * M is the pending matrix.
- *
- * 'innocent' - M[i] is all zeroes
- * 'fool' - M[i] has i'th element = 1 (self-reference)
- * 'wise' - M[i] has i'th element = 0, others are 1 or 0.
- *
- * All 'innocent' nodes are sinks. If all nodes are innocent, no self-heal is
- * needed.
- *
- * A 'wise' node can be a source. If two 'wise' nodes conflict, it is
- * a split-brain. If one wise node refers to the other but the other doesn't
- * refer back, the referrer is a source.
- *
- * All fools are sinks, unless there are no 'wise' nodes. In that case,
- * one of the fools is made a source.
- */
+
int
-afr_mark_sources (xlator_t *this, int32_t *sources, int32_t **pending_matrix,
- struct iatt *bufs, afr_self_heal_type type,
- int32_t *success_children, int32_t *subvol_status)
+afr_selfheal_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
{
- /* stores the 'characters' (innocent, fool, wise) of the nodes */
- afr_node_character *characters = NULL;
- int nsources = -1;
- unsigned int child_count = 0;
- afr_private_t *priv = NULL;
-
- priv = this->private;
- child_count = priv->child_count;
- characters = GF_CALLOC (sizeof (afr_node_character),
- child_count, gf_afr_mt_afr_node_character);
- if (!characters)
- goto out;
-
- this = THIS;
-
- /* start clean */
- memset (sources, 0, sizeof (*sources) * child_count);
- nsources = 0;
- afr_find_character_types (characters, pending_matrix, success_children,
- child_count);
- if (afr_sh_all_nodes_innocent (characters, child_count)) {
- switch (type) {
- case AFR_SELF_HEAL_METADATA:
- nsources = afr_sh_mark_lowest_uid_as_source (bufs,
- success_children,
- child_count,
- sources);
- break;
- case AFR_SELF_HEAL_DATA:
- nsources = afr_sh_mark_zero_size_file_as_sink (bufs,
- success_children,
- child_count,
- sources);
- if ((nsources < 0) && subvol_status)
- *subvol_status |= SPLIT_BRAIN;
- break;
- default:
- break;
- }
- goto out;
- }
-
- if (afr_sh_wise_nodes_exist (characters, child_count)) {
- afr_sh_compute_wisdom (pending_matrix, characters, child_count);
-
- if (afr_sh_wise_nodes_conflict (characters, child_count)) {
- if (subvol_status)
- *subvol_status |= SPLIT_BRAIN;
- nsources = -1;
- } else {
- nsources = afr_sh_mark_wisest_as_sources (sources,
- characters,
- child_count);
- }
- } else {
- if (subvol_status)
- *subvol_status |= ALL_FOOLS;
- nsources = afr_mark_biggest_of_fools_as_source (sources,
- pending_matrix,
- characters,
- success_children,
- child_count, bufs);
- }
+ afr_local_t *local = NULL;
+ int i = 0;
-out:
- if (nsources == 0)
- afr_mark_success_children_sources (sources, success_children,
- child_count);
- GF_FREE (characters);
+ local = frame->local;
+ i = (long) cookie;
- gf_log (this->name, GF_LOG_DEBUG, "Number of sources: %d", nsources);
- return nsources;
-}
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
-void
-afr_sh_pending_to_delta (afr_private_t *priv, dict_t **xattr,
- int32_t *delta_matrix[], unsigned char success[],
- int child_count, afr_transaction_type type)
-{
- int tgt = 0;
- int src = 0;
- int value = 0;
-
- afr_build_pending_matrix (priv->pending_key, delta_matrix, NULL,
- xattr, type, priv->child_count);
-
- /*
- * The algorithm here has two parts. First, for each subvol indexed
- * as tgt, we try to figure out what count everyone should have for it.
- * If the self-heal succeeded, that's easy; the value is zero.
- * Otherwise, the value is the maximum of the succeeding nodes' counts.
- * Once we know the value, we loop through (possibly for a second time)
- * setting each count to the difference so that when we're done all
- * succeeding nodes will have the same count for tgt.
- */
- for (tgt = 0; tgt < priv->child_count; ++tgt) {
- value = 0;
- if (!success[tgt]) {
- /* Find the maximum. */
- for (src = 0; src < priv->child_count; ++src) {
- if (!success[src]) {
- continue;
- }
- if (delta_matrix[src][tgt] > value) {
- value = delta_matrix[src][tgt];
- }
- }
- }
- /* Force everyone who succeeded to the chosen value. */
- for (src = 0; src < priv->child_count; ++src) {
- if (success[src]) {
- delta_matrix[src][tgt] = value
- - delta_matrix[src][tgt];
- }
- else {
- delta_matrix[src][tgt] = 0;
- }
- }
- }
+ syncbarrier_wake (&local->barrier);
+
+ return 0;
}
int
-afr_sh_delta_to_xattr (xlator_t *this,
- int32_t *delta_matrix[], dict_t *xattr[],
- int child_count, afr_transaction_type type)
-{
- int i = 0;
- int j = 0;
- int k = 0;
- int ret = 0;
- int32_t *pending = NULL;
- int32_t *local_pending = NULL;
- afr_private_t *priv = NULL;
-
- priv = this->private;
- for (i = 0; i < child_count; i++) {
- if (!xattr[i])
- continue;
-
- local_pending = NULL;
- for (j = 0; j < child_count; j++) {
- pending = GF_CALLOC (sizeof (int32_t), 3,
- gf_afr_mt_int32_t);
-
- if (!pending) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to allocate pending entry "
- "for %s[%d] on %s",
- priv->pending_key[j], type,
- priv->children[i]->name);
- continue;
- }
- /* 3 = data+metadata+entry */
-
- k = afr_index_for_transaction_type (type);
-
- pending[k] = hton32 (delta_matrix[i][j]);
-
- if (j == i) {
- local_pending = pending;
- continue;
- }
- ret = dict_set_bin (xattr[i], priv->pending_key[j],
- pending,
- AFR_NUM_CHANGE_LOGS * sizeof (int32_t));
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Unable to set dict value.");
- GF_FREE (pending);
- }
- }
- if (local_pending) {
- ret = dict_set_bin (xattr[i], priv->pending_key[i],
- local_pending,
- AFR_NUM_CHANGE_LOGS * sizeof (int32_t));
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Unable to set dict value.");
- GF_FREE (local_pending);
- }
- }
- }
- return 0;
+afr_selfheal_locked_fill (call_frame_t *frame, xlator_t *this,
+ unsigned char *locked_on)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int count = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].valid && local->replies[i].op_ret == 0) {
+ locked_on[i] = 1;
+ count++;
+ } else {
+ locked_on[i] = 0;
+ }
+ }
+
+ return count;
}
int
-afr_sh_missing_entries_done (call_frame_t *frame, xlator_t *this)
+afr_selfheal_tryinodelk (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, off_t off, size_t size,
+ unsigned char *locked_on)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
-
- afr_sh_reset (frame, this);
-
- if (local->unhealable) {
- gf_log (this->name, GF_LOG_DEBUG,
- "split brain found, aborting selfheal of %s",
- local->loc.path);
- }
-
- if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
- sh->completion_cbk (frame, this);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "proceeding to metadata check on %s",
- local->loc.path);
- afr_self_heal_metadata (frame, this);
- }
-
- return 0;
-}
+ loc_t loc = {0,};
+ struct gf_flock flock = {0, };
+ loc.inode = inode_ref (inode);
+ uuid_copy (loc.gfid, inode->gfid);
-static int
-afr_sh_missing_entries_finish (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
+ flock.l_type = F_WRLCK;
+ flock.l_start = off;
+ flock.l_len = size;
- local = frame->local;
- int_lock = &local->internal_lock;
+ AFR_ONALL (frame, afr_selfheal_lock_cbk, inodelk, dom,
+ &loc, F_SETLK, &flock, NULL);
- int_lock->lock_cbk = afr_sh_missing_entries_done;
- afr_unlock (frame, this);
+ loc_wipe (&loc);
- return 0;
+ return afr_selfheal_locked_fill (frame, this, locked_on);
}
+
int
-afr_sh_common_create (afr_self_heal_t *sh, unsigned int child_count)
-{
- int ret = -ENOMEM;
- sh->buf = GF_CALLOC (child_count, sizeof (*sh->buf),
- gf_afr_mt_iatt);
- if (!sh->buf)
- goto out;
- sh->parentbufs = GF_CALLOC (child_count, sizeof (*sh->parentbufs),
- gf_afr_mt_iatt);
- if (!sh->parentbufs)
- goto out;
- sh->child_errno = GF_CALLOC (child_count, sizeof (*sh->child_errno),
- gf_afr_mt_int);
- if (!sh->child_errno)
- goto out;
- sh->success_children = afr_children_create (child_count);
- if (!sh->success_children)
- goto out;
- sh->fresh_children = afr_children_create (child_count);
- if (!sh->fresh_children)
- goto out;
- sh->xattr = GF_CALLOC (child_count, sizeof (*sh->xattr),
- gf_afr_mt_dict_t);
- if (!sh->xattr)
- goto out;
- ret = 0;
-out:
- return ret;
-}
+afr_selfheal_inodelk (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, off_t off, size_t size,
+ unsigned char *locked_on)
+{
+ loc_t loc = {0,};
+ struct gf_flock flock = {0, };
+ afr_local_t *local = NULL;
+ int i = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ loc.inode = inode_ref (inode);
+ uuid_copy (loc.gfid, inode->gfid);
+
+ flock.l_type = F_WRLCK;
+ flock.l_start = off;
+ flock.l_len = size;
+
+ AFR_ONALL (frame, afr_selfheal_lock_cbk, inodelk, dom,
+ &loc, F_SETLK, &flock, NULL);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].op_ret == -1 &&
+ local->replies[i].op_errno == EAGAIN) {
+ afr_selfheal_locked_fill (frame, this, locked_on);
+ afr_selfheal_uninodelk (frame, this, inode, dom, off,
+ size, locked_on);
+
+ AFR_SEQ (frame, afr_selfheal_lock_cbk, inodelk, dom,
+ &loc, F_SETLKW, &flock, NULL);
+ break;
+ }
+ }
-void
-afr_sh_common_lookup_resp_handler (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf,
- dict_t *xattr, struct iatt *postparent,
- loc_t *loc)
-{
- int child_index = 0;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- priv = this->private;
- sh = &local->self_heal;
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0) {
- sh->buf[child_index] = *buf;
- sh->parentbufs[child_index] = *postparent;
- sh->success_children[sh->success_count] = child_index;
- sh->success_count++;
- sh->xattr[child_index] = dict_ref (xattr);
- } else {
- gf_log (this->name, GF_LOG_DEBUG, "path %s on subvolume"
- " %s => -1 (%s)", loc->path,
- priv->children[child_index]->name,
- strerror (op_errno));
- local->self_heal.child_errno[child_index] = op_errno;
- }
- }
- UNLOCK (&frame->lock);
- return;
-}
+ loc_wipe (&loc);
-gf_boolean_t
-afr_valid_ia_type (ia_type_t ia_type)
-{
- switch (ia_type) {
- case IA_IFSOCK:
- case IA_IFREG:
- case IA_IFBLK:
- case IA_IFCHR:
- case IA_IFIFO:
- case IA_IFLNK:
- case IA_IFDIR:
- return _gf_true;
- default:
- return _gf_false;
- }
- return _gf_false;
+ return afr_selfheal_locked_fill (frame, this, locked_on);
}
+
int
-afr_impunge_frame_create (call_frame_t *frame, xlator_t *this,
- int active_source, call_frame_t **impunge_frame)
+afr_selfheal_uninodelk (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, off_t off, size_t size,
+ const unsigned char *locked_on)
{
- afr_local_t *local = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int32_t op_errno = 0;
- afr_private_t *priv = NULL;
- int ret = 0;
- call_frame_t *new_frame = NULL;
-
- op_errno = ENOMEM;
- priv = this->private;
- new_frame = copy_frame (frame);
- if (!new_frame) {
- goto out;
- }
-
- AFR_LOCAL_ALLOC_OR_GOTO (impunge_local, out);
-
- local = frame->local;
- new_frame->local = impunge_local;
- impunge_sh = &impunge_local->self_heal;
- impunge_sh->sh_frame = frame;
- impunge_sh->active_source = active_source;
- impunge_local->child_up = memdup (local->child_up,
- sizeof (*local->child_up) *
- priv->child_count);
- if (!impunge_local->child_up)
- goto out;
-
- impunge_local->pending = afr_matrix_create (priv->child_count,
- AFR_NUM_CHANGE_LOGS);
- if (!impunge_local->pending)
- goto out;
-
- ret = afr_sh_common_create (impunge_sh, priv->child_count);
- if (ret) {
- op_errno = -ret;
- goto out;
- }
- op_errno = 0;
- *impunge_frame = new_frame;
-out:
- if (op_errno && new_frame)
- AFR_STACK_DESTROY (new_frame);
- return -op_errno;
-}
+ loc_t loc = {0,};
+ struct gf_flock flock = {0, };
-void
-afr_sh_missing_entry_call_impunge_recreate (call_frame_t *frame, xlator_t *this,
- struct iatt *buf,
- struct iatt *postparent,
- afr_impunge_done_cbk_t impunge_done)
-{
- call_frame_t *impunge_frame = NULL;
- afr_local_t *local = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int ret = 0;
- unsigned int enoent_count = 0;
- afr_private_t *priv = NULL;
- int i = 0;
- int32_t op_errno = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- enoent_count = afr_errno_count (NULL, sh->child_errno,
- priv->child_count, ENOENT);
- if (!enoent_count) {
- gf_log (this->name, GF_LOG_INFO,
- "no missing files - %s. proceeding to metadata check",
- local->loc.path);
- goto out;
- }
- sh->impunge_done = impunge_done;
- ret = afr_impunge_frame_create (frame, this, sh->source, &impunge_frame);
- if (ret)
- goto out;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- loc_copy (&impunge_local->loc, &local->loc);
- ret = afr_build_parent_loc (&impunge_sh->parent_loc,
- &impunge_local->loc, &op_errno);
- if (ret) {
- ret = -op_errno;
- goto out;
- }
- impunge_local->call_count = enoent_count;
- impunge_sh->entrybuf = sh->buf[sh->source];
- impunge_sh->parentbuf = sh->parentbufs[sh->source];
- for (i = 0; i < priv->child_count; i++) {
- if (!impunge_local->child_up[i]) {
- impunge_sh->child_errno[i] = ENOTCONN;
- continue;
- }
- if (sh->child_errno[i] != ENOENT) {
- impunge_sh->child_errno[i] = EEXIST;
- continue;
- }
- }
- for (i = 0; i < priv->child_count; i++) {
- if (sh->child_errno[i] != ENOENT)
- continue;
- afr_sh_entry_impunge_create (impunge_frame, this, i);
- enoent_count--;
- }
- GF_ASSERT (!enoent_count);
- return;
-out:
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "impunge of %s failed, "
- "reason: %s", local->loc.path, strerror (-ret));
- afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
- }
- afr_sh_missing_entries_finish (frame, this);
-}
-int
-afr_sh_create_entry_cbk (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
- if (op_ret < 0)
- afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
- afr_sh_missing_entries_finish (frame, this);
- return 0;
-}
+ loc.inode = inode_ref (inode);
+ uuid_copy (loc.gfid, inode->gfid);
-static int
-sh_missing_entries_create (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int type = 0;
- struct iatt *buf = NULL;
- struct iatt *postparent = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
-
- buf = &sh->buf[sh->source];
- postparent = &sh->parentbufs[sh->source];
-
- type = buf->ia_type;
- if (!afr_valid_ia_type (type)) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: unknown file type: 0%o", local->loc.path, type);
- afr_set_local_for_unhealable (local);
- afr_sh_missing_entries_finish (frame, this);
- goto out;
- }
-
- afr_sh_missing_entry_call_impunge_recreate (frame, this,
- buf, postparent,
- afr_sh_create_entry_cbk);
-out:
- return 0;
-}
+ flock.l_type = F_UNLCK;
+ flock.l_start = off;
+ flock.l_len = size;
-void
-afr_sh_missing_entries_lookup_done (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- ia_type_t ia_type = IA_INVAL;
- int32_t nsources = 0;
- loc_t *loc = NULL;
- int32_t subvol_status = 0;
- afr_transaction_type txn_type = AFR_DATA_TRANSACTION;
- gf_boolean_t split_brain = _gf_false;
- int read_child = -1;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
- loc = &local->loc;
-
- if (op_ret < 0) {
- if (op_errno == EIO) {
- afr_set_local_for_unhealable (local);
- }
- // EIO can happen if finding the fresh parent dir failed
- goto out;
- }
-
- //now No chance for the ia_type to conflict
- ia_type = sh->buf[sh->success_children[0]].ia_type;
- txn_type = afr_transaction_type_get (ia_type);
- nsources = afr_build_sources (this, sh->xattr, sh->buf,
- sh->pending_matrix, sh->sources,
- sh->success_children, txn_type,
- &subvol_status, _gf_false);
- if (nsources < 0) {
- gf_log (this->name, GF_LOG_INFO, "No sources for dir of %s,"
- " in missing entry self-heal, continuing with the rest"
- " of the self-heals", local->loc.path);
- if (subvol_status & SPLIT_BRAIN) {
- split_brain = _gf_true;
- switch (txn_type) {
- case AFR_DATA_TRANSACTION:
- nsources = 1;
- sh->sources[sh->success_children[0]] = 1;
- break;
- case AFR_ENTRY_TRANSACTION:
- read_child = afr_get_no_xattr_dir_read_child
- (this,
- sh->success_children,
- sh->buf);
- sh->sources[read_child] = 1;
- nsources = 1;
- break;
- default:
- op_errno = EIO;
- goto out;
- }
- } else {
- op_errno = EIO;
- goto out;
- }
- }
-
- afr_get_fresh_children (sh->success_children, sh->sources,
- sh->fresh_children, priv->child_count);
- sh->source = sh->fresh_children[0];
- if (sh->source == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "No active sources found.");
- op_errno = EIO;
- goto out;
- }
-
- if (sh->gfid_sh_success_cbk)
- sh->gfid_sh_success_cbk (frame, this);
- sh->type = sh->buf[sh->source].ia_type;
- if (uuid_is_null (loc->inode->gfid))
- uuid_copy (loc->gfid, sh->buf[sh->source].ia_gfid);
- if (split_brain) {
- afr_sh_missing_entries_finish (frame, this);
- } else {
- sh_missing_entries_create (frame, this);
- }
- return;
-out:
- afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
- afr_sh_set_error (sh, op_errno);
- afr_sh_missing_entries_finish (frame, this);
- return;
-}
+ AFR_ONLIST (locked_on, frame, afr_selfheal_lock_cbk, inodelk,
+ dom, &loc, F_SETLK, &flock, NULL);
-static int
-afr_sh_common_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *xattr,
- struct iatt *postparent)
-{
- int call_count = 0;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- afr_sh_common_lookup_resp_handler (frame, cookie, this, op_ret,
- op_errno, inode, buf, xattr,
- postparent, &sh->lookup_loc);
- call_count = afr_frame_return (frame);
-
- if (call_count)
- goto out;
- op_ret = -1;
- if (!sh->success_count) {
- op_errno = afr_resultant_errno_get (NULL, sh->child_errno,
- priv->child_count);
- gf_log (this->name, GF_LOG_ERROR, "Failed to lookup %s, "
- "reason %s", sh->lookup_loc.path,
- strerror (op_errno));
- goto done;
- }
-
- if ((sh->lookup_flags & AFR_LOOKUP_FAIL_CONFLICTS) &&
- (afr_conflicting_iattrs (sh->buf, sh->success_children,
- priv->child_count,
- sh->lookup_loc.path, this->name))) {
- op_errno = EIO;
- gf_log (this->name, GF_LOG_ERROR, "Conflicting entries "
- "for %s", sh->lookup_loc.path);
- goto done;
- }
-
- if ((sh->lookup_flags & AFR_LOOKUP_FAIL_MISSING_GFIDS) &&
- (afr_gfid_missing_count (this->name, sh->success_children,
- sh->buf, priv->child_count,
- sh->lookup_loc.path))) {
- op_errno = ENODATA;
- gf_log (this->name, GF_LOG_ERROR, "Missing Gfids "
- "for %s", sh->lookup_loc.path);
- goto done;
- }
- op_ret = 0;
-
-done:
- sh->lookup_done (frame, this, op_ret, op_errno);
-out:
- return 0;
+ loc_wipe (&loc);
+
+ return 0;
}
+
int
-afr_sh_remove_entry_cbk (call_frame_t *frame, xlator_t *this, int child,
- int32_t op_ret, int32_t op_errno)
+afr_selfheal_tryentrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, const char *name, unsigned char *locked_on)
{
- int call_count = 0;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
-
- GF_ASSERT (sh->post_remove_call);
- if ((op_ret == -1) && (op_errno != ENOENT)) {
- gf_log (this->name, GF_LOG_ERROR,
- "purge entry %s failed, on child %d reason, %s",
- local->loc.path, child, strerror (op_errno));
- LOCK (&frame->lock);
- {
- afr_sh_set_error (sh, EIO);
- afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
- }
- UNLOCK (&frame->lock);
- }
- call_count = afr_frame_return (frame);
- if (call_count == 0)
- sh->post_remove_call (frame, this);
- return 0;
-}
+ loc_t loc = {0,};
-void
-afr_sh_call_entry_expunge_remove (call_frame_t *frame, xlator_t *this,
- int child_index, struct iatt *buf,
- struct iatt *parentbuf,
- afr_expunge_done_cbk_t expunge_done)
-{
- call_frame_t *expunge_frame = NULL;
- afr_local_t *local = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- int32_t op_errno = 0;
- int ret = 0;
-
- expunge_frame = copy_frame (frame);
- if (!expunge_frame) {
- goto out;
- }
-
- AFR_LOCAL_ALLOC_OR_GOTO (expunge_local, out);
-
- local = frame->local;
- sh = &local->self_heal;
- expunge_frame->local = expunge_local;
- expunge_sh = &expunge_local->self_heal;
- expunge_sh->sh_frame = frame;
- loc_copy (&expunge_local->loc, &local->loc);
- ret = afr_build_parent_loc (&expunge_sh->parent_loc,
- &expunge_local->loc, &op_errno);
- if (ret) {
- ret = -op_errno;
- goto out;
- }
- sh->expunge_done = expunge_done;
- afr_sh_entry_expunge_remove (expunge_frame, this, child_index, buf,
- parentbuf);
- return;
-out:
- gf_log (this->name, GF_LOG_ERROR, "Expunge of %s failed, reason: %s",
- local->loc.path, strerror (op_errno));
- expunge_done (frame, this, child_index, -1, op_errno);
-}
+ loc.inode = inode_ref (inode);
+ uuid_copy (loc.gfid, inode->gfid);
-void
-afr_sh_remove_stale_lookup_info (afr_self_heal_t *sh, int32_t *success_children,
- int32_t *fresh_children,
- unsigned int child_count)
-{
- int i = 0;
-
- for (i = 0; i < child_count; i++) {
- if (afr_is_child_present (success_children, child_count, i) &&
- !afr_is_child_present (fresh_children, child_count, i)) {
- sh->child_errno[i] = ENOENT;
- GF_ASSERT (sh->xattr[i]);
- dict_unref (sh->xattr[i]);
- sh->xattr[i] = NULL;
- }