summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--tests/basic/ec/ec-fix-openfd.t109
-rwxr-xr-xtests/bugs/core/bug-908146.t12
-rw-r--r--tests/include.rc7
-rw-r--r--tests/volume.rc12
-rw-r--r--xlators/cluster/ec/src/ec-common.c113
-rw-r--r--xlators/cluster/ec/src/ec-common.h3
-rw-r--r--xlators/cluster/ec/src/ec-dir-read.c8
-rw-r--r--xlators/cluster/ec/src/ec-dir-write.c1
-rw-r--r--xlators/cluster/ec/src/ec-helpers.c29
-rw-r--r--xlators/cluster/ec/src/ec-inode-read.c3
-rw-r--r--xlators/cluster/ec/src/ec-types.h59
11 files changed, 313 insertions, 43 deletions
diff --git a/tests/basic/ec/ec-fix-openfd.t b/tests/basic/ec/ec-fix-openfd.t
new file mode 100644
index 00000000000..b62fbf429c8
--- /dev/null
+++ b/tests/basic/ec/ec-fix-openfd.t
@@ -0,0 +1,109 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+
+# This test checks for open fd heal on EC
+
+#Create Volume
+cleanup
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..2}
+TEST $CLI volume set $V0 performance.read-after-open yes
+TEST $CLI volume set $V0 performance.lazy-open no
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 disperse.background-heals 0
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume start $V0
+
+#Mount the volume
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+#Touch a file
+TEST touch "$M0/test_file"
+
+#Kill a brick
+TEST kill_brick $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+
+#Open the file in write mode
+TEST fd=`fd_available`
+TEST fd_open $fd 'rw' "$M0/test_file"
+
+#Bring up the killed brick
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+#Test the fd count
+EXPECT "0" get_fd_count $V0 $H0 $B0/${V0}0 test_file
+EXPECT "1" get_fd_count $V0 $H0 $B0/${V0}1 test_file
+EXPECT "1" get_fd_count $V0 $H0 $B0/${V0}2 test_file
+
+#Write to file
+dd iflag=fullblock if=/dev/random bs=1024 count=2 >&$fd 2>/dev/null
+
+#Test the fd count
+EXPECT "1" get_fd_count $V0 $H0 $B0/${V0}0 test_file
+
+#Close fd
+TEST fd_close $fd
+
+#Stop the volume
+TEST $CLI volume stop $V0
+
+#Start the volume
+TEST $CLI volume start $V0
+
+#Kill brick1
+TEST kill_brick $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+
+#Unmount and mount
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0;
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+
+#Calculate md5 sum
+md5sum0=`get_md5_sum "$M0/test_file"`
+
+#Bring up the brick
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+#Kill brick2
+TEST kill_brick $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+
+#Unmount and mount
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+
+#Calculate md5 sum
+md5sum1=`get_md5_sum "$M0/test_file"`
+
+#Bring up the brick
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+#Kill brick3
+TEST kill_brick $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+
+#Unmount and mount
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+
+#Calculate md5 sum
+md5sum2=`get_md5_sum "$M0/test_file"`
+
+#compare the md5sum
+EXPECT "$md5sum0" echo $md5sum1
+EXPECT "$md5sum0" echo $md5sum2
+EXPECT "$md5sum1" echo $md5sum2
+
+cleanup
diff --git a/tests/bugs/core/bug-908146.t b/tests/bugs/core/bug-908146.t
index bf34992fee5..327be6e54bc 100755
--- a/tests/bugs/core/bug-908146.t
+++ b/tests/bugs/core/bug-908146.t
@@ -2,18 +2,8 @@
. $(dirname $0)/../../include.rc
. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
-function get_fd_count {
- local vol=$1
- local host=$2
- local brick=$3
- local fname=$4
- local gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $brick/$fname))
- local statedump=$(generate_brick_statedump $vol $host $brick)
- local count=$(grep "gfid=$gfid_str" $statedump -A2 | grep fd-count | cut -f2 -d'=' | tail -1)
- rm -f $statedump
- echo $count
-}
cleanup;
TEST glusterd
diff --git a/tests/include.rc b/tests/include.rc
index f5d6758eab1..5af4b241bf2 100644
--- a/tests/include.rc
+++ b/tests/include.rc
@@ -1230,3 +1230,10 @@ function STAT_INO()
echo 0
fi
}
+
+function get_md5_sum()
+{
+ local file=$1;
+ md5_sum=$(md5sum $file | awk '{print $1}');
+ echo $md5_sum
+}
diff --git a/tests/volume.rc b/tests/volume.rc
index 1cee648993b..1ca17ab3456 100644
--- a/tests/volume.rc
+++ b/tests/volume.rc
@@ -796,3 +796,15 @@ function count_sh_entries()
{
ls $1/.glusterfs/indices/xattrop | grep -v "xattrop-" | wc -l
}
+
+function get_fd_count {
+ local vol=$1
+ local host=$2
+ local brick=$3
+ local fname=$4
+ local gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $brick/$fname))
+ local statedump=$(generate_brick_statedump $vol $host $brick)
+ local count=$(grep "gfid=$gfid_str" $statedump -A2 | grep fd-count | cut -f2 -d'=' | tail -1)
+ rm -f $statedump
+ echo $count
+}
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
index 732d422517d..f8974e98762 100644
--- a/xlators/cluster/ec/src/ec-common.c
+++ b/xlators/cluster/ec/src/ec-common.c
@@ -21,6 +21,114 @@
#include "ec.h"
#include "ec-messages.h"
+void
+ec_update_fd_status (fd_t *fd, xlator_t *xl, int idx,
+ int32_t ret_status)
+{
+ ec_fd_t *fd_ctx;
+
+ if (fd == NULL)
+ return;
+
+ LOCK (&fd->lock);
+ {
+ fd_ctx = __ec_fd_get(fd, xl);
+ if (fd_ctx) {
+ if (ret_status >= 0)
+ fd_ctx->fd_status[idx] = EC_FD_OPENED;
+ else
+ fd_ctx->fd_status[idx] = EC_FD_NOT_OPENED;
+ }
+ }
+ UNLOCK (&fd->lock);
+}
+
+static int
+ec_fd_ctx_need_open (fd_t *fd, xlator_t *this, uintptr_t *need_open)
+{
+ int i = 0;
+ int count = 0;
+ ec_t *ec = NULL;
+ ec_fd_t *fd_ctx = NULL;
+
+ ec = this->private;
+ *need_open = 0;
+
+ fd_ctx = ec_fd_get (fd, this);
+ if (!fd_ctx)
+ return count;
+
+ LOCK (&fd->lock);
+ {
+ for (i = 0; i < ec->nodes; i++) {
+ if ((fd_ctx->fd_status[i] == EC_FD_NOT_OPENED) &&
+ (ec->xl_up & (1<<i))) {
+ fd_ctx->fd_status[i] = EC_FD_OPENING;
+ *need_open |= (1<<i);
+ count++;
+ }
+ }
+ }
+ UNLOCK (&fd->lock);
+
+ /* If fd needs to open on minimum number of nodes
+ * then ignore fixing the fd as it has been
+ * requested from heal operation.
+ */
+ if (count >= ec->fragments)
+ count = 0;
+
+ return count;
+}
+
+static gf_boolean_t
+ec_is_fd_fixable (fd_t *fd)
+{
+ if (!fd || !fd->inode)
+ return _gf_false;
+ else if (fd_is_anonymous (fd))
+ return _gf_false;
+ else if (gf_uuid_is_null (fd->inode->gfid))
+ return _gf_false;
+
+ return _gf_true;
+}
+
+static void
+ec_fix_open (ec_fop_data_t *fop)
+{
+ int call_count = 0;
+ uintptr_t need_open = 0;
+ int ret = 0;
+ loc_t loc = {0, };
+
+ if (!ec_is_fd_fixable (fop->fd))
+ goto out;
+
+ /* Evaluate how many remote fd's to be opened */
+ call_count = ec_fd_ctx_need_open (fop->fd, fop->xl, &need_open);
+ if (!call_count)
+ goto out;
+
+ loc.inode = inode_ref (fop->fd->inode);
+ gf_uuid_copy (loc.gfid, fop->fd->inode->gfid);
+ ret = loc_path (&loc, NULL);
+ if (ret < 0) {
+ goto out;
+ }
+
+ if (IA_IFDIR == fop->fd->inode->ia_type) {
+ ec_opendir(fop->frame, fop->xl, need_open, EC_MINIMUM_ONE,
+ NULL, NULL, &fop->loc[0], fop->fd, NULL);
+ } else{
+ ec_open(fop->frame, fop->xl, need_open, EC_MINIMUM_ONE,
+ NULL, NULL, &loc, fop->fd->flags, fop->fd, NULL);
+ }
+
+out:
+ loc_wipe (&loc);
+}
+
uint32_t
ec_select_first_by_read_policy (ec_t *ec, ec_fop_data_t *fop)
{
@@ -1532,6 +1640,11 @@ void ec_lock_acquired(ec_lock_link_t *link)
ec_lock_apply(link);
+ if (fop->use_fd &&
+ (link->update[EC_DATA_TXN] || link->update[EC_METADATA_TXN])) {
+ ec_fix_open(fop);
+ }
+
ec_lock_resume_shared(&list);
}
diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h
index a03a590402a..50d65ed27fd 100644
--- a/xlators/cluster/ec/src/ec-common.h
+++ b/xlators/cluster/ec/src/ec-common.h
@@ -126,4 +126,7 @@ ec_heal_inspect (call_frame_t *frame, ec_t *ec,
int32_t
ec_get_heal_info (xlator_t *this, loc_t *loc, dict_t **dict);
+void
+ec_update_fd_status (fd_t *fd, xlator_t *xl,
+ int child_index, int32_t ret_status);
#endif /* __EC_COMMON_H__ */
diff --git a/xlators/cluster/ec/src/ec-dir-read.c b/xlators/cluster/ec/src/ec-dir-read.c
index 4fe82e3c0b6..a371d99f2ff 100644
--- a/xlators/cluster/ec/src/ec-dir-read.c
+++ b/xlators/cluster/ec/src/ec-dir-read.c
@@ -19,7 +19,11 @@
#include "ec-method.h"
#include "ec-fops.h"
-/* FOP: opendir */
+/****************************************************************
+ *
+ * File Operation: opendir
+ *
+ ***************************************************************/
int32_t ec_combine_opendir(ec_fop_data_t * fop, ec_cbk_data_t * dst,
ec_cbk_data_t * src)
@@ -88,6 +92,8 @@ int32_t ec_opendir_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
}
ec_combine(cbk, ec_combine_opendir);
+
+ ec_update_fd_status (fd, this, idx, op_ret);
}
out:
diff --git a/xlators/cluster/ec/src/ec-dir-write.c b/xlators/cluster/ec/src/ec-dir-write.c
index 150dc66f21b..7779d4849f3 100644
--- a/xlators/cluster/ec/src/ec-dir-write.c
+++ b/xlators/cluster/ec/src/ec-dir-write.c
@@ -71,6 +71,7 @@ ec_dir_write_cbk (call_frame_t *frame, xlator_t *this,
out:
if (cbk)
ec_combine (cbk, ec_combine_write);
+
if (fop)
ec_complete (fop);
return 0;
diff --git a/xlators/cluster/ec/src/ec-helpers.c b/xlators/cluster/ec/src/ec-helpers.c
index 64b010fc480..cef71b5a3ac 100644
--- a/xlators/cluster/ec/src/ec-helpers.c
+++ b/xlators/cluster/ec/src/ec-helpers.c
@@ -751,27 +751,32 @@ ec_inode_t * ec_inode_get(inode_t * inode, xlator_t * xl)
ec_fd_t * __ec_fd_get(fd_t * fd, xlator_t * xl)
{
+ int i = 0;
ec_fd_t * ctx = NULL;
uint64_t value = 0;
+ ec_t *ec = xl->private;
- if ((__fd_ctx_get(fd, xl, &value) != 0) || (value == 0))
- {
- ctx = GF_MALLOC(sizeof(*ctx), ec_mt_ec_fd_t);
- if (ctx != NULL)
- {
+ if ((__fd_ctx_get(fd, xl, &value) != 0) || (value == 0)) {
+ ctx = GF_MALLOC(sizeof(*ctx) + (sizeof (ec_fd_status_t) * ec->nodes),
+ ec_mt_ec_fd_t);
+ if (ctx != NULL) {
memset(ctx, 0, sizeof(*ctx));
- value = (uint64_t)(uintptr_t)ctx;
- if (__fd_ctx_set(fd, xl, value) != 0)
- {
- GF_FREE(ctx);
+ for (i = 0; i < ec->nodes; i++) {
+ if (fd_is_anonymous (fd)) {
+ ctx->fd_status[i] = EC_FD_OPENED;
+ } else {
+ ctx->fd_status[i] = EC_FD_NOT_OPENED;
+ }
+ }
+ value = (uint64_t)(uintptr_t)ctx;
+ if (__fd_ctx_set(fd, xl, value) != 0) {
+ GF_FREE (ctx);
return NULL;
}
}
- }
- else
- {
+ } else {
ctx = (ec_fd_t *)(uintptr_t)value;
}
diff --git a/xlators/cluster/ec/src/ec-inode-read.c b/xlators/cluster/ec/src/ec-inode-read.c
index d925e82ba36..270eef42981 100644
--- a/xlators/cluster/ec/src/ec-inode-read.c
+++ b/xlators/cluster/ec/src/ec-inode-read.c
@@ -736,6 +736,9 @@ int32_t ec_open_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
}
ec_combine(cbk, ec_combine_open);
+
+ ec_update_fd_status (fd, this, idx, op_ret);
+
}
out:
diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
index 3e93a1a32cc..4051f4e3a2b 100644
--- a/xlators/cluster/ec/src/ec-types.h
+++ b/xlators/cluster/ec/src/ec-types.h
@@ -115,6 +115,13 @@ enum _ec_read_policy {
EC_READ_POLICY_MAX
};
+/* Enumartions to indicate FD status. */
+typedef enum {
+ EC_FD_NOT_OPENED,
+ EC_FD_OPENED,
+ EC_FD_OPENING
+} ec_fd_status_t;
+
struct _ec_config {
uint32_t version;
uint8_t algorithm;
@@ -128,6 +135,7 @@ struct _ec_fd {
loc_t loc;
uintptr_t open;
int32_t flags;
+ ec_fd_status_t fd_status[0];
};
struct _ec_inode {
@@ -252,17 +260,21 @@ struct _ec_lock_link {
uint64_t size;
};
+/* EC xlator data structure to collect all the data required to perform
+ * the file operation.*/
struct _ec_fop_data {
- int32_t id;
+ int32_t id; /* ID of the file operation */
int32_t refs;
int32_t state;
- int32_t minimum;
+ int32_t minimum; /* Mininum number of successful
+ operation required to conclude a
+ fop as successful */
int32_t expected;
int32_t winds;
int32_t jobs;
int32_t error;
ec_fop_data_t *parent;
- xlator_t *xl;
+ xlator_t *xl; /* points to EC xlator */
call_frame_t *req_frame; /* frame of the calling xlator */
call_frame_t *frame; /* frame used by this fop */
struct list_head cbk_list; /* sorted list of groups of answers */
@@ -288,10 +300,10 @@ struct _ec_fop_data {
uid_t uid;
gid_t gid;
- ec_wind_f wind;
- ec_handler_f handler;
+ ec_wind_f wind; /* Function to wind to */
+ ec_handler_f handler; /* FOP manager function */
ec_resume_f resume;
- ec_cbk_t cbks;
+ ec_cbk_t cbks; /* Callback function for this FOP */
void *data;
ec_heal_t *heal;
struct list_head healer;
@@ -299,7 +311,8 @@ struct _ec_fop_data {
uint64_t user_size;
uint32_t head;
- int32_t use_fd;
+ int32_t use_fd; /* Indicates whether this FOP uses FD or
+ not */
dict_t *xdata;
dict_t *dict;
@@ -313,10 +326,12 @@ struct _ec_fop_data {
gf_xattrop_flags_t xattrop_flags;
dev_t dev;
inode_t *inode;
- fd_t *fd;
+ fd_t *fd; /* FD of the file on which FOP is
+ being carried upon */
struct iatt iatt;
char *str[2];
- loc_t loc[2];
+ loc_t loc[2]; /* Holds the location details for
+ the file */
struct gf_flock flock;
struct iovec *vector;
struct iobref *buffers;
@@ -544,18 +559,24 @@ struct _ec {
xlator_t *xl;
int32_t healers;
int32_t heal_waiters;
- int32_t nodes;
+ int32_t nodes; /* Total number of bricks(n) */
int32_t bits_for_nodes;
- int32_t fragments;
- int32_t redundancy;
- uint32_t fragment_size;
- uint32_t stripe_size;
- int32_t up;
+ int32_t fragments; /* Data bricks(k) */
+ int32_t redundancy; /* Redundant bricks(m) */
+ uint32_t fragment_size; /* Size of fragment/chunk on a
+ brick. */
+ uint32_t stripe_size; /* (fragment_size * fragments)
+ maximum size of user data
+ stored in one stripe. */
+ int32_t up; /* Represents whether EC volume is
+ up or not. */
uint32_t idx;
- uint32_t xl_up_count;
- uintptr_t xl_up;
- uint32_t xl_notify_count;
- uintptr_t xl_notify;
+ uint32_t xl_up_count; /* Number of UP bricks. */
+ uintptr_t xl_up; /* Bit flag representing UP
+ bricks */
+ uint32_t xl_notify_count; /* Number of notifications. */
+ uintptr_t xl_notify; /* Bit flag representing
+ notification for bricks. */
uintptr_t node_mask;
xlator_t **xl_list;
gf_lock_t lock;