summaryrefslogtreecommitdiffstats
path: root/xlators
diff options
context:
space:
mode:
authorSunil Kumar Acharya <sheggodu@redhat.com>2017-03-23 12:50:41 +0530
committerjiffin tony Thottan <jthottan@redhat.com>2018-02-02 06:50:35 +0000
commitb5a8e10a2aa80be463e1d20f4099d24294bc5e8a (patch)
treed384c1a1c9693c0a95b2deba19f27a5af5892d0b /xlators
parentc949870a7fb2179f8e75ab070a5eae4fc11f78b3 (diff)
cluster/ec: OpenFD heal implementation for EC
Existing EC code doesn't try to heal the OpenFD to avoid unnecessary healing of the data later. Fix implements the healing of open FDs before carrying out file operations on them by making an attempt to open the FDs on required up nodes. Backport of: >BUG: 1431955 BUG: 1536334 Change-Id: Ib696f59c41ffd8d5678a484b23a00bb02764ed15 Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com>
Diffstat (limited to 'xlators')
-rw-r--r--xlators/cluster/ec/src/ec-common.c113
-rw-r--r--xlators/cluster/ec/src/ec-common.h3
-rw-r--r--xlators/cluster/ec/src/ec-dir-read.c8
-rw-r--r--xlators/cluster/ec/src/ec-dir-write.c1
-rw-r--r--xlators/cluster/ec/src/ec-helpers.c29
-rw-r--r--xlators/cluster/ec/src/ec-inode-read.c3
-rw-r--r--xlators/cluster/ec/src/ec-types.h59
7 files changed, 184 insertions, 32 deletions
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
index 732d422517d..f8974e98762 100644
--- a/xlators/cluster/ec/src/ec-common.c
+++ b/xlators/cluster/ec/src/ec-common.c
@@ -21,6 +21,114 @@
#include "ec.h"
#include "ec-messages.h"
+void
+ec_update_fd_status (fd_t *fd, xlator_t *xl, int idx,
+ int32_t ret_status)
+{
+ ec_fd_t *fd_ctx;
+
+ if (fd == NULL)
+ return;
+
+ LOCK (&fd->lock);
+ {
+ fd_ctx = __ec_fd_get(fd, xl);
+ if (fd_ctx) {
+ if (ret_status >= 0)
+ fd_ctx->fd_status[idx] = EC_FD_OPENED;
+ else
+ fd_ctx->fd_status[idx] = EC_FD_NOT_OPENED;
+ }
+ }
+ UNLOCK (&fd->lock);
+}
+
+static int
+ec_fd_ctx_need_open (fd_t *fd, xlator_t *this, uintptr_t *need_open)
+{
+ int i = 0;
+ int count = 0;
+ ec_t *ec = NULL;
+ ec_fd_t *fd_ctx = NULL;
+
+ ec = this->private;
+ *need_open = 0;
+
+ fd_ctx = ec_fd_get (fd, this);
+ if (!fd_ctx)
+ return count;
+
+ LOCK (&fd->lock);
+ {
+ for (i = 0; i < ec->nodes; i++) {
+ if ((fd_ctx->fd_status[i] == EC_FD_NOT_OPENED) &&
+ (ec->xl_up & (1<<i))) {
+ fd_ctx->fd_status[i] = EC_FD_OPENING;
+ *need_open |= (1<<i);
+ count++;
+ }
+ }
+ }
+ UNLOCK (&fd->lock);
+
+ /* If fd needs to open on minimum number of nodes
+ * then ignore fixing the fd as it has been
+ * requested from heal operation.
+ */
+ if (count >= ec->fragments)
+ count = 0;
+
+ return count;
+}
+
+static gf_boolean_t
+ec_is_fd_fixable (fd_t *fd)
+{
+ if (!fd || !fd->inode)
+ return _gf_false;
+ else if (fd_is_anonymous (fd))
+ return _gf_false;
+ else if (gf_uuid_is_null (fd->inode->gfid))
+ return _gf_false;
+
+ return _gf_true;
+}
+
+static void
+ec_fix_open (ec_fop_data_t *fop)
+{
+ int call_count = 0;
+ uintptr_t need_open = 0;
+ int ret = 0;
+ loc_t loc = {0, };
+
+ if (!ec_is_fd_fixable (fop->fd))
+ goto out;
+
+ /* Evaluate how many remote fd's to be opened */
+ call_count = ec_fd_ctx_need_open (fop->fd, fop->xl, &need_open);
+ if (!call_count)
+ goto out;
+
+ loc.inode = inode_ref (fop->fd->inode);
+ gf_uuid_copy (loc.gfid, fop->fd->inode->gfid);
+ ret = loc_path (&loc, NULL);
+ if (ret < 0) {
+ goto out;
+ }
+
+ if (IA_IFDIR == fop->fd->inode->ia_type) {
+ ec_opendir(fop->frame, fop->xl, need_open, EC_MINIMUM_ONE,
+ NULL, NULL, &fop->loc[0], fop->fd, NULL);
+ } else{
+ ec_open(fop->frame, fop->xl, need_open, EC_MINIMUM_ONE,
+ NULL, NULL, &loc, fop->fd->flags, fop->fd, NULL);
+ }
+
+out:
+ loc_wipe (&loc);
+}
+
uint32_t
ec_select_first_by_read_policy (ec_t *ec, ec_fop_data_t *fop)
{
@@ -1532,6 +1640,11 @@ void ec_lock_acquired(ec_lock_link_t *link)
ec_lock_apply(link);
+ if (fop->use_fd &&
+ (link->update[EC_DATA_TXN] || link->update[EC_METADATA_TXN])) {
+ ec_fix_open(fop);
+ }
+
ec_lock_resume_shared(&list);
}
diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h
index a03a590402a..50d65ed27fd 100644
--- a/xlators/cluster/ec/src/ec-common.h
+++ b/xlators/cluster/ec/src/ec-common.h
@@ -126,4 +126,7 @@ ec_heal_inspect (call_frame_t *frame, ec_t *ec,
int32_t
ec_get_heal_info (xlator_t *this, loc_t *loc, dict_t **dict);
+void
+ec_update_fd_status (fd_t *fd, xlator_t *xl,
+ int child_index, int32_t ret_status);
#endif /* __EC_COMMON_H__ */
diff --git a/xlators/cluster/ec/src/ec-dir-read.c b/xlators/cluster/ec/src/ec-dir-read.c
index 4fe82e3c0b6..a371d99f2ff 100644
--- a/xlators/cluster/ec/src/ec-dir-read.c
+++ b/xlators/cluster/ec/src/ec-dir-read.c
@@ -19,7 +19,11 @@
#include "ec-method.h"
#include "ec-fops.h"
-/* FOP: opendir */
+/****************************************************************
+ *
+ * File Operation: opendir
+ *
+ ***************************************************************/
int32_t ec_combine_opendir(ec_fop_data_t * fop, ec_cbk_data_t * dst,
ec_cbk_data_t * src)
@@ -88,6 +92,8 @@ int32_t ec_opendir_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
}
ec_combine(cbk, ec_combine_opendir);
+
+ ec_update_fd_status (fd, this, idx, op_ret);
}
out:
diff --git a/xlators/cluster/ec/src/ec-dir-write.c b/xlators/cluster/ec/src/ec-dir-write.c
index 150dc66f21b..7779d4849f3 100644
--- a/xlators/cluster/ec/src/ec-dir-write.c
+++ b/xlators/cluster/ec/src/ec-dir-write.c
@@ -71,6 +71,7 @@ ec_dir_write_cbk (call_frame_t *frame, xlator_t *this,
out:
if (cbk)
ec_combine (cbk, ec_combine_write);
+
if (fop)
ec_complete (fop);
return 0;
diff --git a/xlators/cluster/ec/src/ec-helpers.c b/xlators/cluster/ec/src/ec-helpers.c
index 64b010fc480..cef71b5a3ac 100644
--- a/xlators/cluster/ec/src/ec-helpers.c
+++ b/xlators/cluster/ec/src/ec-helpers.c
@@ -751,27 +751,32 @@ ec_inode_t * ec_inode_get(inode_t * inode, xlator_t * xl)
ec_fd_t * __ec_fd_get(fd_t * fd, xlator_t * xl)
{
+ int i = 0;
ec_fd_t * ctx = NULL;
uint64_t value = 0;
+ ec_t *ec = xl->private;
- if ((__fd_ctx_get(fd, xl, &value) != 0) || (value == 0))
- {
- ctx = GF_MALLOC(sizeof(*ctx), ec_mt_ec_fd_t);
- if (ctx != NULL)
- {
+ if ((__fd_ctx_get(fd, xl, &value) != 0) || (value == 0)) {
+ ctx = GF_MALLOC(sizeof(*ctx) + (sizeof (ec_fd_status_t) * ec->nodes),
+ ec_mt_ec_fd_t);
+ if (ctx != NULL) {
memset(ctx, 0, sizeof(*ctx));
- value = (uint64_t)(uintptr_t)ctx;
- if (__fd_ctx_set(fd, xl, value) != 0)
- {
- GF_FREE(ctx);
+ for (i = 0; i < ec->nodes; i++) {
+ if (fd_is_anonymous (fd)) {
+ ctx->fd_status[i] = EC_FD_OPENED;
+ } else {
+ ctx->fd_status[i] = EC_FD_NOT_OPENED;
+ }
+ }
+ value = (uint64_t)(uintptr_t)ctx;
+ if (__fd_ctx_set(fd, xl, value) != 0) {
+ GF_FREE (ctx);
return NULL;
}
}
- }
- else
- {
+ } else {
ctx = (ec_fd_t *)(uintptr_t)value;
}
diff --git a/xlators/cluster/ec/src/ec-inode-read.c b/xlators/cluster/ec/src/ec-inode-read.c
index d925e82ba36..270eef42981 100644
--- a/xlators/cluster/ec/src/ec-inode-read.c
+++ b/xlators/cluster/ec/src/ec-inode-read.c
@@ -736,6 +736,9 @@ int32_t ec_open_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
}
ec_combine(cbk, ec_combine_open);
+
+ ec_update_fd_status (fd, this, idx, op_ret);
+
}
out:
diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
index 3e93a1a32cc..4051f4e3a2b 100644
--- a/xlators/cluster/ec/src/ec-types.h
+++ b/xlators/cluster/ec/src/ec-types.h
@@ -115,6 +115,13 @@ enum _ec_read_policy {
EC_READ_POLICY_MAX
};
+/* Enumartions to indicate FD status. */
+typedef enum {
+ EC_FD_NOT_OPENED,
+ EC_FD_OPENED,
+ EC_FD_OPENING
+} ec_fd_status_t;
+
struct _ec_config {
uint32_t version;
uint8_t algorithm;
@@ -128,6 +135,7 @@ struct _ec_fd {
loc_t loc;
uintptr_t open;
int32_t flags;
+ ec_fd_status_t fd_status[0];
};
struct _ec_inode {
@@ -252,17 +260,21 @@ struct _ec_lock_link {
uint64_t size;
};
+/* EC xlator data structure to collect all the data required to perform
+ * the file operation.*/
struct _ec_fop_data {
- int32_t id;
+ int32_t id; /* ID of the file operation */
int32_t refs;
int32_t state;
- int32_t minimum;
+ int32_t minimum; /* Mininum number of successful
+ operation required to conclude a
+ fop as successful */
int32_t expected;
int32_t winds;
int32_t jobs;
int32_t error;
ec_fop_data_t *parent;
- xlator_t *xl;
+ xlator_t *xl; /* points to EC xlator */
call_frame_t *req_frame; /* frame of the calling xlator */
call_frame_t *frame; /* frame used by this fop */
struct list_head cbk_list; /* sorted list of groups of answers */
@@ -288,10 +300,10 @@ struct _ec_fop_data {
uid_t uid;
gid_t gid;
- ec_wind_f wind;
- ec_handler_f handler;
+ ec_wind_f wind; /* Function to wind to */
+ ec_handler_f handler; /* FOP manager function */
ec_resume_f resume;
- ec_cbk_t cbks;
+ ec_cbk_t cbks; /* Callback function for this FOP */
void *data;
ec_heal_t *heal;
struct list_head healer;
@@ -299,7 +311,8 @@ struct _ec_fop_data {
uint64_t user_size;
uint32_t head;
- int32_t use_fd;
+ int32_t use_fd; /* Indicates whether this FOP uses FD or
+ not */
dict_t *xdata;
dict_t *dict;
@@ -313,10 +326,12 @@ struct _ec_fop_data {
gf_xattrop_flags_t xattrop_flags;
dev_t dev;
inode_t *inode;
- fd_t *fd;
+ fd_t *fd; /* FD of the file on which FOP is
+ being carried upon */
struct iatt iatt;
char *str[2];
- loc_t loc[2];
+ loc_t loc[2]; /* Holds the location details for
+ the file */
struct gf_flock flock;
struct iovec *vector;
struct iobref *buffers;
@@ -544,18 +559,24 @@ struct _ec {
xlator_t *xl;
int32_t healers;
int32_t heal_waiters;
- int32_t nodes;
+ int32_t nodes; /* Total number of bricks(n) */
int32_t bits_for_nodes;
- int32_t fragments;
- int32_t redundancy;
- uint32_t fragment_size;
- uint32_t stripe_size;
- int32_t up;
+ int32_t fragments; /* Data bricks(k) */
+ int32_t redundancy; /* Redundant bricks(m) */
+ uint32_t fragment_size; /* Size of fragment/chunk on a
+ brick. */
+ uint32_t stripe_size; /* (fragment_size * fragments)
+ maximum size of user data
+ stored in one stripe. */
+ int32_t up; /* Represents whether EC volume is
+ up or not. */
uint32_t idx;
- uint32_t xl_up_count;
- uintptr_t xl_up;
- uint32_t xl_notify_count;
- uintptr_t xl_notify;
+ uint32_t xl_up_count; /* Number of UP bricks. */
+ uintptr_t xl_up; /* Bit flag representing UP
+ bricks */
+ uint32_t xl_notify_count; /* Number of notifications. */
+ uintptr_t xl_notify; /* Bit flag representing
+ notification for bricks. */
uintptr_t node_mask;
xlator_t **xl_list;
gf_lock_t lock;