summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--tests/basic/afr/resolve.t8
-rw-r--r--tests/volume.rc7
-rw-r--r--xlators/cluster/afr/src/afr-common.c56
-rw-r--r--xlators/cluster/afr/src/afr-dir-read.c61
4 files changed, 105 insertions, 27 deletions
diff --git a/tests/basic/afr/resolve.t b/tests/basic/afr/resolve.t
index ce7ec841418..2d400563c2e 100644
--- a/tests/basic/afr/resolve.t
+++ b/tests/basic/afr/resolve.t
@@ -24,7 +24,7 @@ TEST kill_brick $V0 $H0 $B0/${V0}0
rm -rf $B0/${V0}0/.glusterfs $B0/${V0}0/a
TEST $CLI volume start $V0 force
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
#Test that the lookup returns ENOENT instead of ESTALE
#If lookup returns ESTALE this command will fail with ESTALE
TEST touch f
@@ -44,8 +44,8 @@ echo jkl > $M1/b
#gfid-mismatch happened. This should result in EIO
TEST setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1
TEST $CLI volume start $V0 force
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
-# The kernel knows nothing about the tricks done to the volume, and the file
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
+# The kernel knows nothing about the tricks done to the volume, and the file
# may still be in page cache. Wait for timeout.
-EXPECT_WITHIN 10 "" cat $M0/b
+EXPECT_WITHIN 10 "^$" cat $M0/b
cleanup
diff --git a/tests/volume.rc b/tests/volume.rc
index 36f1350b9bc..7e8810852f5 100644
--- a/tests/volume.rc
+++ b/tests/volume.rc
@@ -103,6 +103,13 @@ function _afr_child_up_status {
echo "$up"
}
+function afr_child_up_status_meta {
+ local mnt=$1
+ local repl=$2
+ local child=$3
+ grep "child_up\[$child\]" $mnt/.meta/graphs/active/$repl/private | awk '{print $3}'
+}
+
function afr_child_up_status {
local vol=$1
#brick_id is (brick-num in volume info - 1)
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 533a7b5d5a1..04e4ca315f2 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -1219,6 +1219,50 @@ afr_xattrs_are_equal (dict_t *dict1, dict_t *dict2)
return _gf_true;
}
+static int
+afr_get_parent_read_subvol (xlator_t *this, inode_t *parent,
+ struct afr_reply *replies, unsigned char *readable)
+{
+ int i = 0;
+ int par_read_subvol = -1;
+ int par_read_subvol_iter = -1;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ if (parent)
+ par_read_subvol = afr_data_subvol_get (parent, this, 0, 0);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if (replies[i].op_ret < 0)
+ continue;
+
+ if (par_read_subvol_iter == -1) {
+ par_read_subvol_iter = i;
+ continue;
+ }
+
+ if ((i != par_read_subvol) && readable[i])
+ par_read_subvol_iter = i;
+
+ if (i == par_read_subvol)
+ par_read_subvol_iter = i;
+ }
+ /* At the end of the for-loop, the only reason why @par_read_subvol_iter
+ * could be -1 is when this LOOKUP has failed on all sub-volumes.
+ * So it is okay to send an arbitrary subvolume (0 in this case)
+ * as parent read subvol.
+ */
+ if (par_read_subvol_iter == -1)
+ par_read_subvol_iter = 0;
+
+ return par_read_subvol_iter;
+
+}
+
static void
afr_lookup_done (call_frame_t *frame, xlator_t *this)
{
@@ -1227,23 +1271,25 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
int i = -1;
int op_errno = 0;
int read_subvol = 0;
+ int par_read_subvol = 0;
unsigned char *readable = NULL;
int event = 0;
struct afr_reply *replies = NULL;
uuid_t read_gfid = {0, };
gf_boolean_t locked_entry = _gf_false;
gf_boolean_t can_interpret = _gf_true;
+ inode_t *parent = NULL;
priv = this->private;
local = frame->local;
replies = local->replies;
+ parent = local->loc.parent;
locked_entry = afr_is_entry_possibly_under_txn (local, this);
readable = alloca0 (priv->child_count);
- afr_inode_read_subvol_get (local->loc.parent, this, readable,
- NULL, &event);
+ afr_inode_read_subvol_get (parent, this, readable, NULL, &event);
/* First, check if we have a gfid-change from somewhere,
If so, propagate that so that a fresh lookup can be
@@ -1269,7 +1315,6 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
"underway" in creation */
local->op_ret = -1;
local->op_errno = ENOENT;
- read_subvol = i;
goto unwind;
}
@@ -1347,10 +1392,13 @@ unwind:
if (read_subvol == -1)
read_subvol = 0;
+ par_read_subvol = afr_get_parent_read_subvol (this, parent, replies,
+ readable);
+
AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
local->inode, &local->replies[read_subvol].poststat,
local->replies[read_subvol].xdata,
- &local->replies[read_subvol].postparent);
+ &local->replies[par_read_subvol].postparent);
}
/*
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
index af6a1787593..28bf89f2842 100644
--- a/xlators/cluster/afr/src/afr-dir-read.c
+++ b/xlators/cluster/afr/src/afr-dir-read.c
@@ -123,22 +123,50 @@ out:
return 0;
}
+static int
+afr_validate_read_subvol (inode_t *inode, xlator_t *this, int par_read_subvol)
+{
+ int gen = 0;
+ int entry_read_subvol = 0;
+ unsigned char *data_readable = NULL;
+ unsigned char *metadata_readable = NULL;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ data_readable = alloca0 (priv->child_count);
+ metadata_readable = alloca0 (priv->child_count);
+
+ afr_inode_read_subvol_get (inode, this, data_readable,
+ metadata_readable, &gen);
+
+ if (gen != priv->event_generation ||
+ !data_readable[par_read_subvol] ||
+ !metadata_readable[par_read_subvol])
+ return -1;
+
+ /* Once the control reaches the following statement, it means that the
+ * parent's read subvol is perfectly readable. So calling
+ * either afr_data_subvol_get() or afr_metadata_subvol_get() would
+ * yield the same result. Hence, choosing afr_data_subvol_get() below.
+ */
+ entry_read_subvol = afr_data_subvol_get (inode, this, 0, 0);
+ if (entry_read_subvol != par_read_subvol)
+ return -1;
+
+ return 0;
+
+}
static void
afr_readdir_transform_entries (gf_dirent_t *subvol_entries, int subvol,
gf_dirent_t *entries, fd_t *fd)
{
- afr_private_t *priv = NULL;
- gf_dirent_t *entry = NULL;
- gf_dirent_t *tmp = NULL;
- unsigned char *data_readable = NULL;
- unsigned char *metadata_readable = NULL;
- int gen = 0;
+ int ret = -1;
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
+ xlator_t *this = NULL;
- priv = THIS->private;
-
- data_readable = alloca0 (priv->child_count);
- metadata_readable = alloca0 (priv->child_count);
+ this = THIS;
list_for_each_entry_safe (entry, tmp, &subvol_entries->list, list) {
if (__is_root_gfid (fd->inode->gfid) &&
@@ -150,17 +178,12 @@ afr_readdir_transform_entries (gf_dirent_t *subvol_entries, int subvol,
list_add_tail (&entry->list, &entries->list);
if (entry->inode) {
- gen = 0;
- afr_inode_read_subvol_get (entry->inode, THIS,
- data_readable,
- metadata_readable, &gen);
-
- if (gen != priv->event_generation ||
- !data_readable[subvol] ||
- !metadata_readable[subvol]) {
-
+ ret = afr_validate_read_subvol (entry->inode, this,
+ subvol);
+ if (ret == -1) {
inode_unref (entry->inode);
entry->inode = NULL;
+ continue;
}
}
}