summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xtests/bugs/bug-892730.t76
-rw-r--r--xlators/cluster/afr/src/afr-common.c13
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c3
-rw-r--r--xlators/cluster/afr/src/afr.h3
4 files changed, 88 insertions, 7 deletions
diff --git a/tests/bugs/bug-892730.t b/tests/bugs/bug-892730.t
new file mode 100755
index 00000000000..0a677069eb3
--- /dev/null
+++ b/tests/bugs/bug-892730.t
@@ -0,0 +1,76 @@
+#!/bin/bash
+#
+# Bug 892730 - Verify that afr handles EIO errors from the brick properly.
+#
+# The associated bug describes a problem where EIO errors returned from the
+# local filesystem of a brick that is part of a replica volume are exposed to
+# the user. This test simulates such failures and verifies that the volume
+# operates as expected.
+#
+########
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+TEST mkdir -p $B0/test{1,2}
+
+# The graph is a two brick replica with error-gen enabled on the second brick
+# and configured to return EIO lookup errors 100% of the time. This simulates
+# a brick with a crashed or shut down local filesystem. Note that the order in
+# which errors occur is a factor in reproducing the original bug (error-gen
+# must be enabled in the second brick for this test to be effective).
+
+cat > $B0/test.vol <<EOF
+volume test-posix-0
+ type storage/posix
+ option directory $B0/test1
+end-volume
+
+volume test-locks-0
+ type features/locks
+ subvolumes test-posix-0
+end-volume
+
+volume test-posix-1
+ type storage/posix
+ option directory $B0/test2
+end-volume
+
+volume test-error-1
+ type debug/error-gen
+ option failure 100
+ option enable lookup
+ option error-no EIO
+ subvolumes test-posix-1
+end-volume
+
+volume test-locks-1
+ type features/locks
+ subvolumes test-error-1
+end-volume
+
+volume test-replicate-0
+ type cluster/replicate
+ option background-self-heal-count 0
+ subvolumes test-locks-0 test-locks-1
+end-volume
+EOF
+
+TEST glusterd
+
+TEST glusterfs --volfile=$B0/test.vol --attribute-timeout=0 --entry-timeout=0 $M0
+
+# We should be able to create and remove a file without interference from the
+# "broken" brick.
+
+TEST touch $M0/file
+TEST rm $M0/file
+
+TEST umount $M0
+
+rm -f $B0/test.vol
+rm -rf $B0/test1 $B0/test2
+
+cleanup;
+
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index ab1d9018c47..97303d1065b 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -1639,7 +1639,7 @@ afr_self_heal_lookup_unwind (call_frame_t *frame, xlator_t *this,
if (op_ret == -1) {
local->op_ret = -1;
local->op_errno = afr_most_important_error(local->op_errno,
- op_errno);
+ op_errno, _gf_true);
goto out;
} else {
@@ -1993,11 +1993,12 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
* The hierarchy is ESTALE > EIO > ENOENT > others
*/
int32_t
-afr_most_important_error(int32_t old_errno, int32_t new_errno)
+afr_most_important_error(int32_t old_errno, int32_t new_errno,
+ gf_boolean_t eio)
{
if (old_errno == ESTALE || new_errno == ESTALE)
return ESTALE;
- if (old_errno == EIO || new_errno == EIO)
+ if (eio && (old_errno == EIO || new_errno == EIO))
return EIO;
if (old_errno == ENOENT || new_errno == ENOENT)
return ENOENT;
@@ -2022,7 +2023,8 @@ afr_resultant_errno_get (int32_t *children,
child = i;
}
op_errno = afr_most_important_error(op_errno,
- child_errno[child]);
+ child_errno[child],
+ _gf_false);
}
return op_errno;
}
@@ -2034,7 +2036,8 @@ afr_lookup_handle_error (afr_local_t *local, int32_t op_ret, int32_t op_errno)
if (op_errno == ENOENT)
local->enoent_count++;
- local->op_errno = afr_most_important_error(local->op_errno, op_errno);
+ local->op_errno = afr_most_important_error(local->op_errno, op_errno,
+ _gf_false);
if (local->op_errno == ESTALE) {
local->op_ret = -1;
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index e7026081bda..f59a02557c1 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -112,7 +112,8 @@ void
afr_sh_set_error (afr_self_heal_t *sh, int32_t op_errno)
{
sh->op_ret = -1;
- sh->op_errno = afr_most_important_error(sh->op_errno, op_errno);
+ sh->op_errno = afr_most_important_error(sh->op_errno, op_errno,
+ _gf_false);
}
void
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 93bd92f25e7..85b4b6831b8 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -954,7 +954,8 @@ afr_children_rm_child (int32_t *children, int32_t child,
void
afr_reset_children (int32_t *children, int32_t child_count);
int32_t
-afr_most_important_error(int32_t old_errno, int32_t new_errno);
+afr_most_important_error(int32_t old_errno, int32_t new_errno,
+ gf_boolean_t eio);
int
afr_errno_count (int32_t *children, int *child_errno,
unsigned int child_count, int32_t op_errno);