summaryrefslogtreecommitdiffstats
path: root/xlators
diff options
context:
space:
mode:
authorAnand Avati <avati@redhat.com>2013-08-07 04:16:52 -0700
committerAnand Avati <avati@redhat.com>2013-08-14 09:30:43 -0700
commit1d1daa234eac97554103da16a7d6090bc25e5294 (patch)
treee2e32bcb385ac38f3fa3a8aa5d73ca162c41f18f /xlators
parent1e49b3ac9b1019c742236be8db0ca8ec00750ae7 (diff)
cluster/afr: Add largest file is source policy
For Write Once Read Many times type of work-load choosing largest file to be the source will always resolve fool-fool scenarios correctly. In other cases we fsync() the files and will have a reliable 'wise man'. Change-Id: Ic4dbea8d06db6d578fbcb866fb65ee2d066ac7ba BUG: 958118 Signed-off-by: Anand Avati <avati@redhat.com> Reviewed-on: http://review.gluster.org/5519 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Diffstat (limited to 'xlators')
-rw-r--r--xlators/cluster/afr/src/afr-inode-write.c22
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c100
2 files changed, 93 insertions, 29 deletions
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
index a744167..7f7d9b4 100644
--- a/xlators/cluster/afr/src/afr-inode-write.c
+++ b/xlators/cluster/afr/src/afr-inode-write.c
@@ -578,14 +578,11 @@ afr_truncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt *postbuf, dict_t *xdata)
{
afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
int child_index = (long) cookie;
int read_child = 0;
int call_count = -1;
- int need_unwind = 0;
local = frame->local;
- priv = this->private;
read_child = afr_inode_get_read_ctx (this, local->loc.inode, NULL);
@@ -611,19 +608,11 @@ afr_truncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
local->success_count++;
-
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
}
local->op_errno = op_errno;
}
UNLOCK (&frame->lock);
- if (need_unwind)
- local->transaction.unwind (frame, this);
-
call_count = afr_frame_return (frame);
if (call_count == 0) {
@@ -788,14 +777,11 @@ afr_ftruncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt *postbuf, dict_t *xdata)
{
afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
int child_index = (long) cookie;
int call_count = -1;
- int need_unwind = 0;
int read_child = 0;
local = frame->local;
- priv = this->private;
read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
@@ -821,19 +807,11 @@ afr_ftruncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
local->success_count++;
-
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
}
local->op_errno = op_errno;
}
UNLOCK (&frame->lock);
- if (need_unwind)
- local->transaction.unwind (frame, this);
-
call_count = afr_frame_return (frame);
if (call_count == 0) {
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 929def2..e6a8092 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -489,6 +489,8 @@ afr_find_biggest_witness_among_fools (int32_t *witnesses,
{
int i = 0;
int biggest_witness = -1;
+ int biggest_witness_idx = -1;
+ int biggest_witness_cnt = -1;
GF_ASSERT (witnesses);
GF_ASSERT (characters);
@@ -498,10 +500,21 @@ afr_find_biggest_witness_among_fools (int32_t *witnesses,
if (characters[i].type != AFR_NODE_FOOL)
continue;
- if (biggest_witness < witnesses[i])
+ if (biggest_witness < witnesses[i]) {
biggest_witness = witnesses[i];
+ biggest_witness_idx = i;
+ biggest_witness_cnt = 1;
+ continue;
+ }
+
+ if (biggest_witness == witnesses[i])
+ biggest_witness_cnt++;
}
- return biggest_witness;
+
+ if (biggest_witness_cnt != 1)
+ return -1;
+
+ return biggest_witness_idx;
}
int
@@ -529,10 +542,71 @@ afr_mark_fool_as_source_by_witness (int32_t *sources, int32_t *witnesses,
return nsources;
}
+
+int
+afr_mark_fool_as_source_by_idx (int32_t *sources, int child_count, int idx)
+{
+ if (idx >= 0 && idx < child_count) {
+ sources[idx] = 1;
+ return 1;
+ }
+ return 0;
+}
+
+
+static int
+afr_find_largest_file_size (struct iatt *bufs, int32_t *success_children,
+ int child_count)
+{
+ int idx = -1;
+ int i = -1;
+ int child = -1;
+ uint64_t max_size = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (success_children[i] == -1)
+ break;
+
+ child = success_children[i];
+ if (bufs[child].ia_size > max_size) {
+ max_size = bufs[child].ia_size;
+ idx = child;
+ }
+ }
+
+ return idx;
+}
+
+
+static int
+afr_find_newest_file (struct iatt *bufs, int32_t *success_children,
+ int child_count)
+{
+ int idx = -1;
+ int i = -1;
+ int child = -1;
+ uint64_t max_ctime = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (success_children[i] == -1)
+ break;
+
+ child = success_children[i];
+ if (bufs[child].ia_ctime > max_ctime) {
+ max_ctime = bufs[child].ia_ctime;
+ idx = child;
+ }
+ }
+
+ return idx;
+}
+
+
static int
afr_mark_biggest_of_fools_as_source (int32_t *sources, int32_t **pending_matrix,
afr_node_character *characters,
- int child_count)
+ int32_t *success_children,
+ int child_count, struct iatt *bufs)
{
int32_t biggest_witness = 0;
int nsources = 0;
@@ -540,6 +614,11 @@ afr_mark_biggest_of_fools_as_source (int32_t *sources, int32_t **pending_matrix,
GF_ASSERT (child_count > 0);
+ biggest_witness = afr_find_largest_file_size (bufs, success_children,
+ child_count);
+ if (biggest_witness != -1)
+ goto found;
+
witnesses = GF_CALLOC (child_count, sizeof (*witnesses),
gf_afr_mt_int32_t);
if (NULL == witnesses) {
@@ -552,9 +631,15 @@ afr_mark_biggest_of_fools_as_source (int32_t *sources, int32_t **pending_matrix,
biggest_witness = afr_find_biggest_witness_among_fools (witnesses,
characters,
child_count);
- nsources = afr_mark_fool_as_source_by_witness (sources, witnesses,
- characters, child_count,
- biggest_witness);
+ if (biggest_witness != -1)
+ goto found;
+
+ biggest_witness = afr_find_newest_file (bufs, success_children,
+ child_count);
+
+found:
+ nsources = afr_mark_fool_as_source_by_idx (sources, child_count,
+ biggest_witness);
out:
GF_FREE (witnesses);
return nsources;
@@ -898,7 +983,8 @@ afr_mark_sources (xlator_t *this, int32_t *sources, int32_t **pending_matrix,
nsources = afr_mark_biggest_of_fools_as_source (sources,
pending_matrix,
characters,
- child_count);
+ success_children,
+ child_count, bufs);
}
out: