summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorshishir gowda <sgowda@redhat.com>2012-12-12 15:03:02 +0530
committerVijay Bellur <vbellur@redhat.com>2012-12-17 12:34:27 -0500
commit72237352cca4542c0d3b6f6a9276c962b33780d9 (patch)
treedcf91cd0e9cd8e29120a3cd86fe6fab267e475aa
parente5411f154f5f815a9b9d9aec647ee72cc9662541 (diff)
cluster/dht: fail fix-layout if any of the subvol is down
If any subvolume is down, and a layout is re-written and hash values change, entry names in the downed subvol can be reused in the other subvol which got the same hash range. when the downed subvol is brought back up, duplicate entried might appear Also separated handling of ENOSPC and ENOTCONN error. Change-Id: I1a49a689f6891a32128adcfb92dc46f39eaddec7 BUG: 860599 Signed-off-by: shishir gowda <sgowda@redhat.com> Reviewed-on: https://code.engineering.redhat.com/gerrit/1898 Reviewed-by: Vijay Bellur <vbellur@redhat.com> Tested-by: Vijay Bellur <vbellur@redhat.com>
-rw-r--r--xlators/cluster/dht/src/dht-common.c22
-rw-r--r--xlators/cluster/dht/src/dht-common.h3
-rw-r--r--xlators/cluster/dht/src/dht-layout.c11
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c3
-rw-r--r--xlators/cluster/dht/src/dht-selfheal.c43
5 files changed, 47 insertions, 35 deletions
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index ade05f38d3c..1f4f234afe5 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -2336,9 +2336,13 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,
gf_log (this->name, GF_LOG_INFO,
"fixing the layout of %s", loc->path);
- dht_fix_directory_layout (frame, dht_common_setxattr_cbk,
- layout);
- return 0;
+ ret = dht_fix_directory_layout (frame, dht_common_setxattr_cbk,
+ layout);
+ if (ret) {
+ op_errno = ENOTCONN;
+ goto err;
+ }
+ return ret;
}
tmp = dict_get (xattr, "distribute.directory-spread-count");
@@ -2350,10 +2354,14 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,
(dir_spread > 0))) {
layout->spread_cnt = dir_spread;
- dht_fix_directory_layout (frame,
- dht_common_setxattr_cbk,
- layout);
- return 0;
+ ret = dht_fix_directory_layout (frame,
+ dht_common_setxattr_cbk,
+ layout);
+ if (ret) {
+ op_errno = ENOTCONN;
+ goto err;
+ }
+ return ret;
}
gf_log (this->name, GF_LOG_ERROR,
"wrong 'directory-spread-count' value (%s)", value);
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index d244921279d..d03c0e3e019 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -121,7 +121,6 @@ struct dht_local {
struct {
uint32_t hole_cnt;
uint32_t overlaps_cnt;
- uint32_t missing;
uint32_t down;
uint32_t misc;
dht_selfheal_dir_cbk_t dir_cbk;
@@ -340,7 +339,7 @@ int dht_layout_normalize (xlator_t *this, l
int dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout,
uint32_t *holes_p, uint32_t *overlaps_p,
uint32_t *missing_p, uint32_t *down_p,
- uint32_t *misc_p);
+ uint32_t *misc_p, uint32_t *no_space_p);
int dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout,
xlator_t *subvol, loc_t *loc, dict_t *xattr);
diff --git a/xlators/cluster/dht/src/dht-layout.c b/xlators/cluster/dht/src/dht-layout.c
index 08697d0421e..38afa892c07 100644
--- a/xlators/cluster/dht/src/dht-layout.c
+++ b/xlators/cluster/dht/src/dht-layout.c
@@ -481,7 +481,8 @@ dht_layout_sort_volname (dht_layout_t *layout)
int
dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout,
uint32_t *holes_p, uint32_t *overlaps_p,
- uint32_t *missing_p, uint32_t *down_p, uint32_t *misc_p)
+ uint32_t *missing_p, uint32_t *down_p, uint32_t *misc_p,
+ uint32_t *no_space_p)
{
uint32_t overlaps = 0;
uint32_t missing = 0;
@@ -494,6 +495,7 @@ dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout,
uint32_t prev_stop = 0;
uint32_t last_stop = 0;
char is_virgin = 1;
+ uint32_t no_space = 0;
/* TODO: explain what is happening */
@@ -511,7 +513,7 @@ dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout,
down++;
break;
case ENOSPC:
- down++;
+ no_space++;
break;
default:
misc++;
@@ -550,6 +552,9 @@ dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout,
if (misc_p)
*misc_p = misc;
+ if (no_space_p)
+ *no_space_p = no_space;
+
return ret;
}
@@ -574,7 +579,7 @@ dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout)
ret = dht_layout_anomalies (this, loc, layout,
&holes, &overlaps,
- &missing, &down, &misc);
+ &missing, &down, &misc, NULL);
if (ret == -1) {
gf_log (this->name, GF_LOG_WARNING,
"error while finding anomalies in %s -- not good news",
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index e96db2971e1..30febceb6af 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -1364,6 +1364,7 @@ gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
"failed for %s", entry_loc.path);
defrag->defrag_status =
GF_DEFRAG_STATUS_FAILED;
+ defrag->total_failures ++;
goto out;
}
ret = gf_defrag_fix_layout (this, defrag, &entry_loc,
@@ -1372,6 +1373,7 @@ gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
if (ret) {
gf_log (this->name, GF_LOG_ERROR, "Fix layout "
"failed for %s", entry_loc.path);
+ defrag->total_failures++;
goto out;
}
@@ -1455,6 +1457,7 @@ gf_defrag_start_crawl (void *data)
if (ret) {
gf_log (this->name, GF_LOG_ERROR, "fix layout on %s failed",
loc.path);
+ defrag->total_failures++;
goto out;
}
diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c
index 68d9416fabf..e67b64c3826 100644
--- a/xlators/cluster/dht/src/dht-selfheal.c
+++ b/xlators/cluster/dht/src/dht-selfheal.c
@@ -571,6 +571,8 @@ dht_fix_layout_of_directory (call_frame_t *frame, loc_t *loc,
dht_layout_t *new_layout = NULL;
dht_conf_t *priv = NULL;
dht_local_t *local = NULL;
+ uint32_t subvol_down = 0;
+ int ret = 0;
this = frame->this;
priv = this->private;
@@ -592,6 +594,17 @@ dht_fix_layout_of_directory (call_frame_t *frame, loc_t *loc,
if (!new_layout)
goto done;
+ /* If a subvolume is down, do not re-write the layout. */
+ ret = dht_layout_anomalies (this, loc, layout, NULL, NULL, NULL,
+ &subvol_down, NULL, NULL);
+
+ if (subvol_down || (ret == -1)) {
+ gf_log (this->name, GF_LOG_WARNING, "%u subvolume(s) are down"
+ ". Skipping fix layout.", subvol_down);
+ GF_FREE (new_layout);
+ return NULL;
+ }
+
for (i = 0; i < new_layout->cnt; i++) {
/* TODO: fix this in layout_alloc() itself */
new_layout->list[i].err = -ENOENT;
@@ -758,35 +771,17 @@ int
dht_selfheal_dir_getafix (call_frame_t *frame, loc_t *loc,
dht_layout_t *layout)
{
- dht_conf_t *conf = NULL;
- xlator_t *this = NULL;
dht_local_t *local = NULL;
- int missing = -1;
- int down = -1;
- int holes = -1;
+ uint32_t holes = 0;
int ret = -1;
int i = -1;
- int overlaps = -1;
+ uint32_t overlaps = 0;
- this = frame->this;
- conf = this->private;
local = frame->local;
- missing = local->selfheal.missing;
- down = local->selfheal.down;
holes = local->selfheal.hole_cnt;
overlaps = local->selfheal.overlaps_cnt;
- if ((missing + down) == conf->subvolume_cnt) {
- dht_selfheal_layout_new_directory (frame, loc, layout);
- ret = 0;
- }
-
- if (holes <= down) {
- /* the down subvol might fill up the holes */
- ret = 0;
- }
-
if (holes || overlaps) {
dht_selfheal_layout_new_directory (frame, loc, layout);
ret = 0;
@@ -838,6 +833,9 @@ dht_fix_directory_layout (call_frame_t *frame,
/* No layout sorting required here */
tmp_layout = dht_fix_layout_of_directory (frame, &local->loc, layout);
+ if (!tmp_layout) {
+ return -1;
+ }
dht_fix_dir_xattr (frame, &local->loc, tmp_layout);
return 0;
@@ -860,9 +858,8 @@ dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
dht_layout_anomalies (this, loc, layout,
&local->selfheal.hole_cnt,
&local->selfheal.overlaps_cnt,
- &local->selfheal.missing,
- &local->selfheal.down,
- &local->selfheal.misc);
+ NULL, &local->selfheal.down,
+ &local->selfheal.misc, NULL);
down = local->selfheal.down;
misc = local->selfheal.misc;