summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--tests/basic/ec/ec-fix-openfd.t109
-rwxr-xr-xtests/bugs/core/bug-908146.t12
-rw-r--r--tests/volume.rc12
-rw-r--r--xlators/cluster/ec/src/ec-common.c113
-rw-r--r--xlators/cluster/ec/src/ec-common.h3
-rw-r--r--xlators/cluster/ec/src/ec-dir-read.c8
-rw-r--r--xlators/cluster/ec/src/ec-dir-write.c1
-rw-r--r--xlators/cluster/ec/src/ec-helpers.c29
-rw-r--r--xlators/cluster/ec/src/ec-inode-read.c3
-rw-r--r--xlators/cluster/ec/src/ec-types.h177
10 files changed, 365 insertions, 102 deletions
diff --git a/tests/basic/ec/ec-fix-openfd.t b/tests/basic/ec/ec-fix-openfd.t
new file mode 100644
index 00000000000..b62fbf429c8
--- /dev/null
+++ b/tests/basic/ec/ec-fix-openfd.t
@@ -0,0 +1,109 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+
+# This test checks for open fd heal on EC
+
+#Create Volume
+cleanup
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..2}
+TEST $CLI volume set $V0 performance.read-after-open yes
+TEST $CLI volume set $V0 performance.lazy-open no
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 disperse.background-heals 0
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume start $V0
+
+#Mount the volume
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+#Touch a file
+TEST touch "$M0/test_file"
+
+#Kill a brick
+TEST kill_brick $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+
+#Open the file in write mode
+TEST fd=`fd_available`
+TEST fd_open $fd 'rw' "$M0/test_file"
+
+#Bring up the killed brick
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+#Test the fd count
+EXPECT "0" get_fd_count $V0 $H0 $B0/${V0}0 test_file
+EXPECT "1" get_fd_count $V0 $H0 $B0/${V0}1 test_file
+EXPECT "1" get_fd_count $V0 $H0 $B0/${V0}2 test_file
+
+#Write to file
+dd iflag=fullblock if=/dev/random bs=1024 count=2 >&$fd 2>/dev/null
+
+#Test the fd count
+EXPECT "1" get_fd_count $V0 $H0 $B0/${V0}0 test_file
+
+#Close fd
+TEST fd_close $fd
+
+#Stop the volume
+TEST $CLI volume stop $V0
+
+#Start the volume
+TEST $CLI volume start $V0
+
+#Kill brick1
+TEST kill_brick $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+
+#Unmount and mount
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0;
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+
+#Calculate md5 sum
+md5sum0=`get_md5_sum "$M0/test_file"`
+
+#Bring up the brick
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+#Kill brick2
+TEST kill_brick $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+
+#Unmount and mount
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+
+#Calculate md5 sum
+md5sum1=`get_md5_sum "$M0/test_file"`
+
+#Bring up the brick
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+#Kill brick3
+TEST kill_brick $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+
+#Unmount and mount
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+
+#Calculate md5 sum
+md5sum2=`get_md5_sum "$M0/test_file"`
+
+#compare the md5sum
+EXPECT "$md5sum0" echo $md5sum1
+EXPECT "$md5sum0" echo $md5sum2
+EXPECT "$md5sum1" echo $md5sum2
+
+cleanup
diff --git a/tests/bugs/core/bug-908146.t b/tests/bugs/core/bug-908146.t
index bf34992fee5..327be6e54bc 100755
--- a/tests/bugs/core/bug-908146.t
+++ b/tests/bugs/core/bug-908146.t
@@ -2,18 +2,8 @@
. $(dirname $0)/../../include.rc
. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
-function get_fd_count {
- local vol=$1
- local host=$2
- local brick=$3
- local fname=$4
- local gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $brick/$fname))
- local statedump=$(generate_brick_statedump $vol $host $brick)
- local count=$(grep "gfid=$gfid_str" $statedump -A2 | grep fd-count | cut -f2 -d'=' | tail -1)
- rm -f $statedump
- echo $count
-}
cleanup;
TEST glusterd
diff --git a/tests/volume.rc b/tests/volume.rc
index 1cee648993b..1ca17ab3456 100644
--- a/tests/volume.rc
+++ b/tests/volume.rc
@@ -796,3 +796,15 @@ function count_sh_entries()
{
ls $1/.glusterfs/indices/xattrop | grep -v "xattrop-" | wc -l
}
+
+function get_fd_count {
+ local vol=$1
+ local host=$2
+ local brick=$3
+ local fname=$4
+ local gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $brick/$fname))
+ local statedump=$(generate_brick_statedump $vol $host $brick)
+ local count=$(grep "gfid=$gfid_str" $statedump -A2 | grep fd-count | cut -f2 -d'=' | tail -1)
+ rm -f $statedump
+ echo $count
+}
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
index b7088e54724..cb627a92c9c 100644
--- a/xlators/cluster/ec/src/ec-common.c
+++ b/xlators/cluster/ec/src/ec-common.c
@@ -26,6 +26,114 @@
EC_FLAG_WAITING_DATA_DIRTY |\
EC_FLAG_WAITING_METADATA_DIRTY)
+void
+ec_update_fd_status (fd_t *fd, xlator_t *xl, int idx,
+ int32_t ret_status)
+{
+ ec_fd_t *fd_ctx;
+
+ if (fd == NULL)
+ return;
+
+ LOCK (&fd->lock);
+ {
+ fd_ctx = __ec_fd_get(fd, xl);
+ if (fd_ctx) {
+ if (ret_status >= 0)
+ fd_ctx->fd_status[idx] = EC_FD_OPENED;
+ else
+ fd_ctx->fd_status[idx] = EC_FD_NOT_OPENED;
+ }
+ }
+ UNLOCK (&fd->lock);
+}
+
+static int
+ec_fd_ctx_need_open (fd_t *fd, xlator_t *this, uintptr_t *need_open)
+{
+ int i = 0;
+ int count = 0;
+ ec_t *ec = NULL;
+ ec_fd_t *fd_ctx = NULL;
+
+ ec = this->private;
+ *need_open = 0;
+
+ fd_ctx = ec_fd_get (fd, this);
+ if (!fd_ctx)
+ return count;
+
+ LOCK (&fd->lock);
+ {
+ for (i = 0; i < ec->nodes; i++) {
+ if ((fd_ctx->fd_status[i] == EC_FD_NOT_OPENED) &&
+ (ec->xl_up & (1<<i))) {
+ fd_ctx->fd_status[i] = EC_FD_OPENING;
+ *need_open |= (1<<i);
+ count++;
+ }
+ }
+ }
+ UNLOCK (&fd->lock);
+
+ /* If fd needs to open on minimum number of nodes
+ * then ignore fixing the fd as it has been
+ * requested from heal operation.
+ */
+ if (count >= ec->fragments)
+ count = 0;
+
+ return count;
+}
+
+static gf_boolean_t
+ec_is_fd_fixable (fd_t *fd)
+{
+ if (!fd || !fd->inode)
+ return _gf_false;
+ else if (fd_is_anonymous (fd))
+ return _gf_false;
+ else if (gf_uuid_is_null (fd->inode->gfid))
+ return _gf_false;
+
+ return _gf_true;
+}
+
+static void
+ec_fix_open (ec_fop_data_t *fop)
+{
+ int call_count = 0;
+ uintptr_t need_open = 0;
+ int ret = 0;
+ loc_t loc = {0, };
+
+ if (!ec_is_fd_fixable (fop->fd))
+ goto out;
+
+ /* Evaluate how many remote fd's to be opened */
+ call_count = ec_fd_ctx_need_open (fop->fd, fop->xl, &need_open);
+ if (!call_count)
+ goto out;
+
+ loc.inode = inode_ref (fop->fd->inode);
+ gf_uuid_copy (loc.gfid, fop->fd->inode->gfid);
+ ret = loc_path (&loc, NULL);
+ if (ret < 0) {
+ goto out;
+ }
+
+ if (IA_IFDIR == fop->fd->inode->ia_type) {
+ ec_opendir(fop->frame, fop->xl, need_open, EC_MINIMUM_ONE,
+ NULL, NULL, &fop->loc[0], fop->fd, NULL);
+ } else{
+ ec_open(fop->frame, fop->xl, need_open, EC_MINIMUM_ONE,
+ NULL, NULL, &loc, fop->fd->flags, fop->fd, NULL);
+ }
+
+out:
+ loc_wipe (&loc);
+}
+
off_t
ec_range_end_get (off_t fl_start, size_t fl_size)
{
@@ -1677,6 +1785,11 @@ void ec_lock_acquired(ec_lock_link_t *link)
ec_lock_apply(link);
+ if (fop->use_fd &&
+ (link->update[EC_DATA_TXN] || link->update[EC_METADATA_TXN])) {
+ ec_fix_open(fop);
+ }
+
ec_lock_resume_shared(&list);
}
diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h
index 35c6a8107c2..c3e291585ef 100644
--- a/xlators/cluster/ec/src/ec-common.h
+++ b/xlators/cluster/ec/src/ec-common.h
@@ -140,4 +140,7 @@ int32_t ec_lock_unlocked(call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
dict_t *xdata);
+void
+ec_update_fd_status (fd_t *fd, xlator_t *xl,
+ int child_index, int32_t ret_status);
#endif /* __EC_COMMON_H__ */
diff --git a/xlators/cluster/ec/src/ec-dir-read.c b/xlators/cluster/ec/src/ec-dir-read.c
index 48afe54460f..b44bb4239b1 100644
--- a/xlators/cluster/ec/src/ec-dir-read.c
+++ b/xlators/cluster/ec/src/ec-dir-read.c
@@ -19,7 +19,11 @@
#include "ec-method.h"
#include "ec-fops.h"
-/* FOP: opendir */
+/****************************************************************
+ *
+ * File Operation: opendir
+ *
+ ***************************************************************/
int32_t ec_combine_opendir(ec_fop_data_t * fop, ec_cbk_data_t * dst,
ec_cbk_data_t * src)
@@ -88,6 +92,8 @@ int32_t ec_opendir_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
}
ec_combine(cbk, ec_combine_opendir);
+
+ ec_update_fd_status (fd, this, idx, op_ret);
}
out:
diff --git a/xlators/cluster/ec/src/ec-dir-write.c b/xlators/cluster/ec/src/ec-dir-write.c
index 150dc66f21b..7779d4849f3 100644
--- a/xlators/cluster/ec/src/ec-dir-write.c
+++ b/xlators/cluster/ec/src/ec-dir-write.c
@@ -71,6 +71,7 @@ ec_dir_write_cbk (call_frame_t *frame, xlator_t *this,
out:
if (cbk)
ec_combine (cbk, ec_combine_write);
+
if (fop)
ec_complete (fop);
return 0;
diff --git a/xlators/cluster/ec/src/ec-helpers.c b/xlators/cluster/ec/src/ec-helpers.c
index fe610748f0f..83f96ba6cb2 100644
--- a/xlators/cluster/ec/src/ec-helpers.c
+++ b/xlators/cluster/ec/src/ec-helpers.c
@@ -764,27 +764,32 @@ ec_inode_t * ec_inode_get(inode_t * inode, xlator_t * xl)
ec_fd_t * __ec_fd_get(fd_t * fd, xlator_t * xl)
{
+ int i = 0;
ec_fd_t * ctx = NULL;
uint64_t value = 0;
+ ec_t *ec = xl->private;
- if ((__fd_ctx_get(fd, xl, &value) != 0) || (value == 0))
- {
- ctx = GF_MALLOC(sizeof(*ctx), ec_mt_ec_fd_t);
- if (ctx != NULL)
- {
+ if ((__fd_ctx_get(fd, xl, &value) != 0) || (value == 0)) {
+ ctx = GF_MALLOC(sizeof(*ctx) + (sizeof (ec_fd_status_t) * ec->nodes),
+ ec_mt_ec_fd_t);
+ if (ctx != NULL) {
memset(ctx, 0, sizeof(*ctx));
- value = (uint64_t)(uintptr_t)ctx;
- if (__fd_ctx_set(fd, xl, value) != 0)
- {
- GF_FREE(ctx);
+ for (i = 0; i < ec->nodes; i++) {
+ if (fd_is_anonymous (fd)) {
+ ctx->fd_status[i] = EC_FD_OPENED;
+ } else {
+ ctx->fd_status[i] = EC_FD_NOT_OPENED;
+ }
+ }
+ value = (uint64_t)(uintptr_t)ctx;
+ if (__fd_ctx_set(fd, xl, value) != 0) {
+ GF_FREE (ctx);
return NULL;
}
}
- }
- else
- {
+ } else {
ctx = (ec_fd_t *)(uintptr_t)value;
}
diff --git a/xlators/cluster/ec/src/ec-inode-read.c b/xlators/cluster/ec/src/ec-inode-read.c
index 03690ab8e96..d58ed9e5795 100644
--- a/xlators/cluster/ec/src/ec-inode-read.c
+++ b/xlators/cluster/ec/src/ec-inode-read.c
@@ -749,6 +749,9 @@ int32_t ec_open_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
}
ec_combine(cbk, ec_combine_open);
+
+ ec_update_fd_status (fd, this, idx, op_ret);
+
}
out:
diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
index 7b2b7b8247d..23dc434bc42 100644
--- a/xlators/cluster/ec/src/ec-types.h
+++ b/xlators/cluster/ec/src/ec-types.h
@@ -145,6 +145,13 @@ enum _ec_stripe_part {
EC_STRIPE_TAIL
};
+/* Enumartions to indicate FD status. */
+typedef enum {
+ EC_FD_NOT_OPENED,
+ EC_FD_OPENED,
+ EC_FD_OPENING
+} ec_fd_status_t;
+
struct _ec_config {
uint32_t version;
uint8_t algorithm;
@@ -158,6 +165,7 @@ struct _ec_fd {
loc_t loc;
uintptr_t open;
int32_t flags;
+ ec_fd_status_t fd_status[0];
};
struct _ec_stripe {
@@ -309,75 +317,82 @@ struct _ec_fragment_range {
the bricks (offset on brick) */
};
+/* EC xlator data structure to collect all the data required to perform
+ * the file operation.*/
struct _ec_fop_data {
- int32_t id;
- int32_t refs;
- int32_t state;
- int32_t minimum;
- int32_t expected;
- int32_t winds;
- int32_t jobs;
- int32_t error;
- ec_fop_data_t *parent;
- xlator_t *xl;
- call_frame_t *req_frame; /* frame of the calling xlator */
- call_frame_t *frame; /* frame used by this fop */
- struct list_head cbk_list; /* sorted list of groups of answers */
- struct list_head answer_list; /* list of answers */
- struct list_head pending_list; /* member of ec_t.pending_fops */
- ec_cbk_data_t *answer; /* accepted answer */
- int32_t lock_count;
- int32_t locked;
- ec_lock_link_t locks[2];
- int32_t first_lock;
- gf_lock_t lock;
-
- uint32_t flags;
- uint32_t first;
- uintptr_t mask;
- uintptr_t healing; /*Dispatch is done but call is successful
- only if fop->minimum number of subvolumes
- succeed which are not healing*/
- uintptr_t remaining;
- uintptr_t received; /* Mask of responses */
- uintptr_t good;
-
- uid_t uid;
- gid_t gid;
-
- ec_wind_f wind;
- ec_handler_f handler;
- ec_resume_f resume;
- ec_cbk_t cbks;
- void *data;
- ec_heal_t *heal;
- struct list_head healer;
-
- uint64_t user_size;
- uint32_t head;
-
- int32_t use_fd;
-
- dict_t *xdata;
- dict_t *dict;
- int32_t int32;
- uint32_t uint32;
- uint64_t size;
- off_t offset;
- mode_t mode[2];
- entrylk_cmd entrylk_cmd;
- entrylk_type entrylk_type;
- gf_xattrop_flags_t xattrop_flags;
- dev_t dev;
- inode_t *inode;
- fd_t *fd;
- struct iatt iatt;
- char *str[2];
- loc_t loc[2];
- struct gf_flock flock;
- struct iovec *vector;
- struct iobref *buffers;
- gf_seek_what_t seek;
+ int32_t id; /* ID of the file operation */
+ int32_t refs;
+ int32_t state;
+ int32_t minimum; /* Mininum number of successful
+ operation required to conclude a
+ fop as successful */
+ int32_t expected;
+ int32_t winds;
+ int32_t jobs;
+ int32_t error;
+ ec_fop_data_t *parent;
+ xlator_t *xl; /* points to EC xlator */
+ call_frame_t *req_frame; /* frame of the calling xlator */
+ call_frame_t *frame; /* frame used by this fop */
+ struct list_head cbk_list; /* sorted list of groups of answers */
+ struct list_head answer_list; /* list of answers */
+ struct list_head pending_list; /* member of ec_t.pending_fops */
+ ec_cbk_data_t *answer; /* accepted answer */
+ int32_t lock_count;
+ int32_t locked;
+ ec_lock_link_t locks[2];
+ int32_t first_lock;
+ gf_lock_t lock;
+
+ uint32_t flags;
+ uint32_t first;
+ uintptr_t mask;
+ uintptr_t healing; /*Dispatch is done but call is successful only
+ if fop->minimum number of subvolumes succeed
+ which are not healing*/
+ uintptr_t remaining;
+ uintptr_t received; /* Mask of responses */
+ uintptr_t good;
+
+ uid_t uid;
+ gid_t gid;
+
+ ec_wind_f wind; /* Function to wind to */
+ ec_handler_f handler; /* FOP manager function */
+ ec_resume_f resume;
+ ec_cbk_t cbks; /* Callback function for this FOP */
+ void *data;
+ ec_heal_t *heal;
+ struct list_head healer;
+
+ uint64_t user_size;
+ uint32_t head;
+
+ int32_t use_fd; /* Indicates whether this FOP uses FD or
+ not */
+
+ dict_t *xdata;
+ dict_t *dict;
+ int32_t int32;
+ uint32_t uint32;
+ uint64_t size;
+ off_t offset;
+ mode_t mode[2];
+ entrylk_cmd entrylk_cmd;
+ entrylk_type entrylk_type;
+ gf_xattrop_flags_t xattrop_flags;
+ dev_t dev;
+ inode_t *inode;
+ fd_t *fd; /* FD of the file on which FOP is
+ being carried upon */
+ struct iatt iatt;
+ char *str[2];
+ loc_t loc[2]; /* Holds the location details for
+ the file */
+ struct gf_flock flock;
+ struct iovec *vector;
+ struct iobref *buffers;
+ gf_seek_what_t seek;
ec_fragment_range_t frag_range; /* This will hold the range of stripes
affected by the fop. */
};
@@ -623,18 +638,24 @@ struct _ec {
xlator_t *xl;
int32_t healers;
int32_t heal_waiters;
- int32_t nodes;
+ int32_t nodes; /* Total number of bricks(n) */
int32_t bits_for_nodes;
- int32_t fragments;
- int32_t redundancy;
- uint32_t fragment_size;
- uint32_t stripe_size;
- int32_t up;
+ int32_t fragments; /* Data bricks(k) */
+ int32_t redundancy; /* Redundant bricks(m) */
+ uint32_t fragment_size; /* Size of fragment/chunk on a
+ brick. */
+ uint32_t stripe_size; /* (fragment_size * fragments)
+ maximum size of user data
+ stored in one stripe. */
+ int32_t up; /* Represents whether EC volume is
+ up or not. */
uint32_t idx;
- uint32_t xl_up_count;
- uintptr_t xl_up;
- uint32_t xl_notify_count;
- uintptr_t xl_notify;
+ uint32_t xl_up_count; /* Number of UP bricks. */
+ uintptr_t xl_up; /* Bit flag representing UP
+ bricks */
+ uint32_t xl_notify_count; /* Number of notifications. */
+ uintptr_t xl_notify; /* Bit flag representing
+ notification for bricks. */
uintptr_t node_mask;
xlator_t **xl_list;
gf_lock_t lock;