summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/afr
diff options
context:
space:
mode:
authorAnand Avati <avati@gluster.com>2009-10-13 06:42:15 +0000
committerAnand V. Avati <avati@dev.gluster.com>2009-10-13 05:46:47 -0700
commitf085beebd03b2c8be2fa57039ad3cbcb6eaa66d3 (patch)
tree509511c56cf3c9f96b59f959f29817c86841643a /xlators/cluster/afr
parentb20cee457232a88517af44ae4505361dd3a4de15 (diff)
prevent spurious unlocks from afr selfheal
afr selfheal now remembers all the nodes on which locks were successfully held and sends unlocks only to those nodes Signed-off-by: Anand V. Avati <avati@dev.gluster.com> BUG: 112 (parallel deletion of files mounted by different clients on the same back-end hangs and/or does not completely delete) URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=112
Diffstat (limited to 'xlators/cluster/afr')
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c34
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c28
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c26
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-metadata.c21
-rw-r--r--xlators/cluster/afr/src/afr.c3
-rw-r--r--xlators/cluster/afr/src/afr.h1
6 files changed, 90 insertions, 23 deletions
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 07d6a18b6..b42801ced 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -713,7 +713,11 @@ afr_sh_missing_entries_done (call_frame_t *frame, xlator_t *this)
// memset (sh->child_errno, 0, sizeof (int) * priv->child_count);
memset (sh->buf, 0, sizeof (struct stat) * priv->child_count);
-
+
+ for (i = 0; i < priv->child_count; i++) {
+ sh->locked_nodes[i] = 0;
+ }
+
for (i = 0; i < priv->child_count; i++) {
if (sh->xattr[i])
dict_unref (sh->xattr[i]);
@@ -780,12 +784,20 @@ sh_missing_entries_finish (call_frame_t *frame, xlator_t *this)
sh = &local->self_heal;
priv = this->private;
- call_count = local->child_count;
+ for (i = 0; i < priv->child_count; i++) {
+ if (sh->locked_nodes[i])
+ call_count++;
+ }
+
+ if (call_count == 0) {
+ afr_sh_missing_entries_done (frame, this);
+ return 0;
+ }
local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
+ if (sh->locked_nodes[i]) {
gf_log (this->name, GF_LOG_TRACE,
"unlocking %"PRId64"/%s on subvolume %s",
sh->parent_loc.inode->ino, local->loc.name,
@@ -1284,11 +1296,13 @@ sh_missing_entries_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret == -1) {
sh->op_failed = 1;
+ sh->locked_nodes[child_index] = 0;
gf_log (this->name, GF_LOG_DEBUG,
"locking inode of %s on child %d failed: %s",
local->loc.path, child_index,
strerror (op_errno));
} else {
+ sh->locked_nodes[child_index] = 1;
gf_log (this->name, GF_LOG_TRACE,
"inode of %s on child %d locked",
local->loc.path, child_index);
@@ -1337,12 +1351,13 @@ afr_self_heal_missing_entries (call_frame_t *frame, xlator_t *this)
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
- STACK_WIND (frame, sh_missing_entries_lk_cbk,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- this->name,
- &sh->parent_loc, local->loc.name,
- ENTRYLK_LOCK_NB, ENTRYLK_WRLCK);
+ STACK_WIND_COOKIE (frame, sh_missing_entries_lk_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->entrylk,
+ this->name,
+ &sh->parent_loc, local->loc.name,
+ ENTRYLK_LOCK_NB, ENTRYLK_WRLCK);
if (!--call_count)
break;
}
@@ -1380,6 +1395,7 @@ afr_self_heal (call_frame_t *frame, xlator_t *this,
sh->success = CALLOC (priv->child_count, sizeof (int));
sh->xattr = CALLOC (priv->child_count, sizeof (dict_t *));
sh->sources = CALLOC (sizeof (*sh->sources), priv->child_count);
+ sh->locked_nodes = CALLOC (sizeof (*sh->locked_nodes), priv->child_count);
sh->pending_matrix = CALLOC (sizeof (int32_t *), priv->child_count);
for (i = 0; i < priv->child_count; i++) {
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index 620ad1a2c..d2224ec92 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -56,6 +56,7 @@ afr_sh_data_done (call_frame_t *frame, xlator_t *this)
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
afr_private_t *priv = NULL;
+ int i = 0;
local = frame->local;
sh = &local->self_heal;
@@ -65,6 +66,14 @@ afr_sh_data_done (call_frame_t *frame, xlator_t *this)
TODO: cleanup sh->*
*/
+ if (sh->healing_fd) {
+ fd_unref (sh->healing_fd);
+ sh->healing_fd = NULL;
+ }
+
+ for (i = 0; i < priv->child_count; i++)
+ sh->locked_nodes[i] = 0;
+
gf_log (this->name, GF_LOG_TRACE,
"self heal of %s completed",
local->loc.path);
@@ -96,8 +105,6 @@ afr_sh_data_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
call_count = afr_frame_return (frame);
if (call_count == 0) {
- fd_unref (sh->healing_fd);
- sh->healing_fd = NULL;
afr_sh_data_done (frame, this);
}
@@ -261,7 +268,15 @@ afr_sh_data_unlock (call_frame_t *frame, xlator_t *this)
sh = &local->self_heal;
priv = this->private;
- call_count = local->child_count;
+ for (i = 0; i < priv->child_count; i++) {
+ if (sh->locked_nodes[i])
+ call_count++;
+ }
+
+ if (call_count == 0) {
+ afr_sh_data_close (frame, this);
+ return 0;
+ }
local->call_count = call_count;
@@ -270,7 +285,7 @@ afr_sh_data_unlock (call_frame_t *frame, xlator_t *this)
flock.l_type = F_UNLCK;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
+ if (sh->locked_nodes[i]) {
gf_log (this->name, GF_LOG_TRACE,
"unlocking %s on subvolume %s",
local->loc.path, priv->children[i]->name);
@@ -861,12 +876,13 @@ afr_sh_data_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret == -1) {
sh->op_failed = 1;
- gf_log (this->name,
- GF_LOG_DEBUG,
+ sh->locked_nodes[child_index] = 0;
+ gf_log (this->name, GF_LOG_DEBUG,
"locking of %s on child %d failed: %s",
local->loc.path, child_index,
strerror (op_errno));
} else {
+ sh->locked_nodes[child_index] = 1;
gf_log (this->name, GF_LOG_TRACE,
"inode of %s on child %d locked",
local->loc.path, child_index);
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index b44418dd6..5953fce15 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -56,6 +56,7 @@ afr_sh_entry_done (call_frame_t *frame, xlator_t *this)
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
afr_private_t *priv = NULL;
+ int i = 0;
local = frame->local;
sh = &local->self_heal;
@@ -65,6 +66,14 @@ afr_sh_entry_done (call_frame_t *frame, xlator_t *this)
TODO: cleanup sh->*
*/
+ if (sh->healing_fd)
+ fd_unref (sh->healing_fd);
+ sh->healing_fd = NULL;
+
+ for (i = 0; i < priv->child_count; i++) {
+ sh->locked_nodes[i] = 0;
+ }
+
gf_log (this->name, GF_LOG_TRACE,
"self heal of %s completed",
local->loc.path);
@@ -107,9 +116,6 @@ afr_sh_entry_unlck_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
call_count = afr_frame_return (frame);
if (call_count == 0) {
- if (sh->healing_fd)
- fd_unref (sh->healing_fd);
- sh->healing_fd = NULL;
afr_sh_entry_done (frame, this);
}
@@ -132,12 +138,20 @@ afr_sh_entry_unlock (call_frame_t *frame, xlator_t *this)
sh = &local->self_heal;
priv = this->private;
- call_count = local->child_count;
+ for (i = 0; i < priv->child_count; i++) {
+ if (sh->locked_nodes[i])
+ call_count++;
+ }
+
+ if (call_count == 0) {
+ afr_sh_entry_done (frame, this);
+ return 0;
+ }
local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
+ if (sh->locked_nodes[i]) {
gf_log (this->name, GF_LOG_TRACE,
"unlocking %s on subvolume %s",
local->loc.path, priv->children[i]->name);
@@ -1977,11 +1991,13 @@ afr_sh_entry_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret == -1) {
sh->op_failed = 1;
+ sh->locked_nodes[child_index] = 0;
gf_log (this->name, GF_LOG_DEBUG,
"locking inode of %s on child %d failed: %s",
local->loc.path, child_index,
strerror (op_errno));
} else {
+ sh->locked_nodes[child_index] = 1;
gf_log (this->name, GF_LOG_TRACE,
"inode of %s on child %d locked",
local->loc.path, child_index);
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
index 9e8e995b4..9842902e6 100644
--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
@@ -64,7 +64,11 @@ afr_sh_metadata_done (call_frame_t *frame, xlator_t *this)
// memset (sh->child_errno, 0, sizeof (int) * priv->child_count);
memset (sh->buf, 0, sizeof (struct stat) * priv->child_count);
memset (sh->success, 0, sizeof (int) * priv->child_count);
-
+
+ for (i = 0; i < priv->child_count; i++) {
+ sh->locked_nodes[i] = 1;
+ }
+
for (i = 0; i < priv->child_count; i++) {
if (sh->xattr[i])
dict_unref (sh->xattr[i]);
@@ -137,7 +141,16 @@ afr_sh_metadata_finish (call_frame_t *frame, xlator_t *this)
sh = &local->self_heal;
priv = this->private;
- call_count = local->child_count;
+ for (i = 0; i < priv->child_count; i++) {
+ if (sh->locked_nodes[i])
+ call_count++;
+ }
+
+ if (call_count == 0) {
+ afr_sh_metadata_done (frame, this);
+ return 0;
+ }
+
local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
@@ -145,7 +158,7 @@ afr_sh_metadata_finish (call_frame_t *frame, xlator_t *this)
flock.l_len = 0;
flock.l_type = F_UNLCK;
- if (local->child_up[i]) {
+ if (sh->locked_nodes[i]) {
gf_log (this->name, GF_LOG_TRACE,
"unlocking %s on subvolume %s",
local->loc.path, priv->children[i]->name);
@@ -712,11 +725,13 @@ afr_sh_metadata_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret == -1) {
sh->op_failed = 1;
+ sh->locked_nodes[child_index] = 0;
gf_log (this->name, GF_LOG_DEBUG,
"locking of %s on child %d failed: %s",
local->loc.path, child_index,
strerror (op_errno));
} else {
+ sh->locked_nodes[child_index] = 1;
gf_log (this->name, GF_LOG_TRACE,
"inode of %s on child %d locked",
local->loc.path, child_index);
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index 429ab97c5..eea030ad1 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -216,6 +216,9 @@ afr_local_sh_cleanup (afr_local_t *local, xlator_t *this)
if (sh->success)
FREE (sh->success);
+ if (sh->locked_nodes)
+ FREE (sh->locked_nodes);
+
if (sh->healing_fd) {
fd_unref (sh->healing_fd);
sh->healing_fd = NULL;
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 61f630b54..4cbb1b19c 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -84,6 +84,7 @@ typedef struct {
int active_source;
int active_sinks;
int *success;
+ int *locked_nodes;
fd_t *healing_fd;
int op_failed;