summaryrefslogtreecommitdiffstats
path: root/xlators/cluster
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/cluster')
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c245
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.h12
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c11
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c24
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-metadata.c2
-rw-r--r--xlators/cluster/afr/src/afr.h5
6 files changed, 152 insertions, 147 deletions
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 44bced74cc6..0558fafaae5 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -180,6 +180,7 @@ afr_mark_ignorant_subvols_as_pending (int32_t **pending_matrix,
int
afr_build_pending_matrix (char **pending_key, int32_t **pending_matrix,
+ unsigned char *ignorant_subvols,
dict_t *xattr[], afr_transaction_type type,
size_t child_count)
{
@@ -190,12 +191,6 @@ afr_build_pending_matrix (char **pending_key, int32_t **pending_matrix,
int i = 0;
int j = 0;
int k = 0;
- unsigned char *ignorant_subvols = NULL;
-
- ignorant_subvols = GF_CALLOC (sizeof (*ignorant_subvols), child_count,
- gf_afr_mt_char);
- if (NULL == ignorant_subvols)
- goto out;
afr_init_pending_matrix (pending_matrix, child_count);
@@ -213,7 +208,8 @@ afr_build_pending_matrix (char **pending_key, int32_t **pending_matrix,
* subvolume.
*/
- ignorant_subvols[i] = 1;
+ if (ignorant_subvols)
+ ignorant_subvols[i] = 1;
continue;
}
@@ -224,19 +220,14 @@ afr_build_pending_matrix (char **pending_key, int32_t **pending_matrix,
}
}
- afr_mark_ignorant_subvols_as_pending (pending_matrix,
- ignorant_subvols,
- child_count);
- GF_FREE (ignorant_subvols);
-out:
return ret;
}
typedef enum {
+ AFR_NODE_INVALID,
AFR_NODE_INNOCENT,
AFR_NODE_FOOL,
AFR_NODE_WISE,
- AFR_NODE_INVALID = -1,
} afr_node_type;
typedef struct {
@@ -490,23 +481,18 @@ out:
int
afr_mark_child_as_source_by_uid (int32_t *sources, struct iatt *bufs,
- int32_t *valid_children, int child_count,
- uint32_t uid)
+ int32_t *success_children,
+ unsigned int child_count, uint32_t uid)
{
int i = 0;
int nsources = 0;
int child = 0;
- GF_ASSERT (bufs);
- GF_ASSERT (valid_children);
- GF_ASSERT (sources);
- GF_ASSERT (child_count > 0);
-
for (i = 0; i < child_count; i++) {
- if (-1 == valid_children[i])
- continue;
+ if (-1 == success_children[i])
+ break;
- child = valid_children[i];
+ child = success_children[i];
if (uid == bufs[child].ia_uid) {
sources[child] = 1;
nsources++;
@@ -516,21 +502,17 @@ afr_mark_child_as_source_by_uid (int32_t *sources, struct iatt *bufs,
}
int
-afr_get_child_with_lowest_uid (struct iatt *bufs, int32_t *valid_children,
- int child_count)
+afr_get_child_with_lowest_uid (struct iatt *bufs, int32_t *success_children,
+ unsigned int child_count)
{
int i = 0;
int smallest = -1;
int child = 0;
- GF_ASSERT (bufs);
- GF_ASSERT (valid_children);
- GF_ASSERT (child_count > 0);
-
for (i = 0; i < child_count; i++) {
- if (-1 == valid_children[i])
- continue;
- child = valid_children[i];
+ if (-1 == success_children[i])
+ break;
+ child = success_children[i];
if ((smallest == -1) ||
(bufs[child].ia_uid < bufs[smallest].ia_uid)) {
smallest = child;
@@ -540,20 +522,20 @@ afr_get_child_with_lowest_uid (struct iatt *bufs, int32_t *valid_children,
}
static int
-afr_sh_mark_lowest_uid_as_source (struct iatt *bufs, int32_t *valid_children,
+afr_sh_mark_lowest_uid_as_source (struct iatt *bufs, int32_t *success_children,
int child_count, int32_t *sources)
{
int nsources = 0;
int smallest = 0;
- smallest = afr_get_child_with_lowest_uid (bufs, valid_children,
+ smallest = afr_get_child_with_lowest_uid (bufs, success_children,
child_count);
if (smallest < 0) {
nsources = -1;
goto out;
}
nsources = afr_mark_child_as_source_by_uid (sources, bufs,
- valid_children, child_count,
+ success_children, child_count,
bufs[smallest].ia_uid);
out:
return nsources;
@@ -583,12 +565,10 @@ afr_get_character_str (afr_node_type type)
afr_node_type
afr_find_child_character_type (int32_t *pending_row, int32_t child,
- int32_t child_count, const char *xlator_name)
+ unsigned int child_count)
{
afr_node_type type = AFR_NODE_INVALID;
- GF_ASSERT (pending_row);
- GF_ASSERT (child_count > 0);
GF_ASSERT ((child >= 0) && (child < child_count));
if (afr_sh_is_innocent (pending_row, child_count))
@@ -597,44 +577,85 @@ afr_find_child_character_type (int32_t *pending_row, int32_t child,
type = AFR_NODE_FOOL;
else if (afr_sh_is_wise (pending_row, child, child_count))
type = AFR_NODE_WISE;
- else
- GF_ASSERT (0);
-
- gf_log (xlator_name, GF_LOG_DEBUG, "child %d character %s",
- child, afr_get_character_str (type));
return type;
}
int
afr_build_sources (xlator_t *this, dict_t **xattr, struct iatt *bufs,
int32_t **pending_matrix, int32_t *sources,
- int32_t *success_children, afr_transaction_type type)
+ int32_t *success_children, afr_transaction_type type,
+ int32_t *subvol_status, gf_boolean_t ignore_ignorant)
{
afr_private_t *priv = NULL;
afr_self_heal_type sh_type = AFR_SELF_HEAL_INVALID;
int nsources = -1;
+ unsigned char *ignorant_subvols = NULL;
+ unsigned int child_count = 0;
priv = this->private;
+ child_count = priv->child_count;
if (afr_get_children_count (success_children, priv->child_count) == 0)
goto out;
+ if (!ignore_ignorant) {
+ ignorant_subvols = GF_CALLOC (sizeof (*ignorant_subvols),
+ child_count, gf_afr_mt_char);
+ if (NULL == ignorant_subvols)
+ goto out;
+ }
+
afr_build_pending_matrix (priv->pending_key, pending_matrix,
- xattr, type, priv->child_count);
+ ignorant_subvols, xattr, type,
+ priv->child_count);
+ if (!ignore_ignorant)
+ afr_mark_ignorant_subvols_as_pending (pending_matrix,
+ ignorant_subvols,
+ priv->child_count);
sh_type = afr_self_heal_type_for_transaction (type);
if (AFR_SELF_HEAL_INVALID == sh_type)
goto out;
afr_sh_print_pending_matrix (pending_matrix, this);
- nsources = afr_mark_sources (sources, pending_matrix, bufs,
- priv->child_count, sh_type,
- success_children, this->name);
+ nsources = afr_mark_sources (this, sources, pending_matrix, bufs,
+ sh_type, success_children, subvol_status);
out:
+ GF_FREE (ignorant_subvols);
return nsources;
}
+void
+afr_find_character_types (afr_node_character *characters,
+ int32_t **pending_matrix, int32_t *success_children,
+ unsigned int child_count)
+{
+ afr_node_type type = AFR_NODE_INVALID;
+ int child = 0;
+ int i = 0;
+
+ for (i = 0; i < child_count; i++) {
+ child = success_children[i];
+ if (child == -1)
+ break;
+ type = afr_find_child_character_type (pending_matrix[child],
+ child, child_count);
+ characters[child].type = type;
+ }
+}
+
+void
+afr_mark_success_children_sources (int32_t *sources, int32_t *success_children,
+ unsigned int child_count)
+{
+ int i = 0;
+ for (i = 0; i < child_count; i++) {
+ if (success_children[i] == -1)
+ break;
+ sources[success_children[i]] = 1;
+ }
+}
/**
* mark_sources: Mark all 'source' nodes and return number of source
* nodes found
@@ -660,17 +681,18 @@ out:
*/
int
-afr_mark_sources (int32_t *sources, int32_t **pending_matrix, struct iatt *bufs,
- int32_t child_count, afr_self_heal_type type,
- int32_t *valid_children, const char *xlator_name)
+afr_mark_sources (xlator_t *this, int32_t *sources, int32_t **pending_matrix,
+ struct iatt *bufs, afr_self_heal_type type,
+ int32_t *success_children, int32_t *subvol_status)
{
/* stores the 'characters' (innocent, fool, wise) of the nodes */
-
afr_node_character *characters = NULL;
- int i = 0;
- int nsources = -1;
- xlator_t *this = NULL;
+ int nsources = -1;
+ unsigned int child_count = 0;
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ child_count = priv->child_count;
characters = GF_CALLOC (sizeof (afr_node_character),
child_count, gf_afr_mt_afr_node_character);
if (!characters)
@@ -679,26 +701,14 @@ afr_mark_sources (int32_t *sources, int32_t **pending_matrix, struct iatt *bufs,
this = THIS;
/* start clean */
- for (i = 0; i < child_count; i++) {
- sources[i] = 0;
- }
-
+ memset (sources, 0, sizeof (*sources) * child_count);
nsources = 0;
- for (i = 0; i < child_count; i++) {
- characters[i].type =
- afr_find_child_character_type (pending_matrix[i], i,
- child_count,
- xlator_name);
- if (AFR_NODE_INVALID == characters[i].type)
- gf_log (xlator_name, GF_LOG_WARNING,
- "child %d had invalid xattrs", i);
- }
-
- if ((type == AFR_SELF_HEAL_METADATA)
- && afr_sh_all_nodes_innocent (characters, child_count)) {
-
- nsources = afr_sh_mark_lowest_uid_as_source (bufs,
- valid_children,
+ afr_find_character_types (characters, pending_matrix, success_children,
+ child_count);
+ if (afr_sh_all_nodes_innocent (characters, child_count)) {
+ if (type == AFR_SELF_HEAL_METADATA)
+ nsources = afr_sh_mark_lowest_uid_as_source (bufs,
+ success_children,
child_count,
sources);
goto out;
@@ -708,17 +718,17 @@ afr_mark_sources (int32_t *sources, int32_t **pending_matrix, struct iatt *bufs,
afr_sh_compute_wisdom (pending_matrix, characters, child_count);
if (afr_sh_wise_nodes_conflict (characters, child_count)) {
- /* split-brain */
- gf_log (this->name, GF_LOG_INFO,
- "split-brain possible, no source detected");
+ if (subvol_status)
+ *subvol_status |= SPLIT_BRAIN;
nsources = -1;
-
} else {
nsources = afr_sh_mark_wisest_as_sources (sources,
characters,
child_count);
}
} else {
+ if (subvol_status)
+ *subvol_status |= ALL_FOOLS;
nsources = afr_mark_biggest_of_fools_as_source (sources,
pending_matrix,
characters,
@@ -726,14 +736,10 @@ afr_mark_sources (int32_t *sources, int32_t **pending_matrix, struct iatt *bufs,
}
out:
- if (nsources == 0) {
- for (i = 0; i < child_count; i++) {
- if (valid_children[i] != -1)
- sources[valid_children[i]] = 1;
- }
- }
- if (characters)
- GF_FREE (characters);
+ if (nsources == 0)
+ afr_mark_success_children_sources (sources, success_children,
+ child_count);
+ GF_FREE (characters);
gf_log (this->name, GF_LOG_DEBUG, "Number of sources: %d", nsources);
return nsources;
@@ -744,45 +750,14 @@ afr_sh_pending_to_delta (afr_private_t *priv, dict_t **xattr,
int32_t *delta_matrix[], unsigned char success[],
int child_count, afr_transaction_type type)
{
- /* Indexable by result of afr_index_for_transaction_type(): 0 -- 2. */
- int32_t pending[3] = {0,};
- void *pending_raw = NULL;
- int ret = 0;
- int i = 0;
- int j = 0;
- int k = 0;
-
- /* start clean */
- for (i = 0; i < child_count; i++) {
- for (j = 0; j < child_count; j++) {
- delta_matrix[i][j] = 0;
- }
- }
-
- for (i = 0; i < child_count; i++) {
- if (pending_raw)
- pending_raw = NULL;
-
- for (j = 0; j < child_count; j++) {
- ret = dict_get_ptr (xattr[i], priv->pending_key[j],
- &pending_raw);
- if (ret < 0)
- gf_log (THIS->name, GF_LOG_DEBUG,
- "Unable to get dict value.");
- if (!success[j])
- continue;
-
- k = afr_index_for_transaction_type (type);
-
- if (pending_raw != NULL) {
- memcpy (pending, pending_raw, sizeof(pending));
- delta_matrix[i][j] = -(ntoh32 (pending[k]));
- } else {
- delta_matrix[i][j] = 0;
- }
+ int i = 0;
+ int j = 0;
- }
- }
+ afr_build_pending_matrix (priv->pending_key, delta_matrix, NULL,
+ xattr, type, priv->child_count);
+ for (i = 0; i < priv->child_count; i++)
+ for (j = 0; j < priv->child_count; j++)
+ delta_matrix[i][j] = -delta_matrix[i][j];
}
@@ -1262,7 +1237,8 @@ afr_sh_missing_entries_lookup_done (call_frame_t *frame, xlator_t *this,
nsources = afr_build_sources (this, sh->xattr, sh->buf,
sh->pending_matrix, sh->sources,
sh->success_children,
- afr_transaction_type_get (ia_type));
+ afr_transaction_type_get (ia_type),
+ NULL, _gf_false);
if (nsources < 0) {
gf_log (this->name, GF_LOG_INFO, "No sources for dir of %s,"
" in missing entry self-heal, continuing with the rest"
@@ -1699,6 +1675,7 @@ afr_sh_find_fresh_parents (call_frame_t *frame, xlator_t *this,
int enoent_count = 0;
int nsources = 0;
int source = -1;
+ int32_t subvol_status = 0;
local = frame->local;
sh = &local->self_heal;
@@ -1728,11 +1705,19 @@ afr_sh_find_fresh_parents (call_frame_t *frame, xlator_t *this,
nsources = afr_build_sources (this, sh->xattr, sh->buf,
sh->pending_matrix, sh->sources,
sh->success_children,
- AFR_ENTRY_TRANSACTION);
- if (nsources < 0) {
- gf_log (this->name, GF_LOG_ERROR, "No sources for dir of %s,"
- " in missing entry self-heal, aborting self-heal",
- local->loc.path);
+ AFR_ENTRY_TRANSACTION, &subvol_status,
+ _gf_true);
+ if ((subvol_status & ALL_FOOLS) ||
+ (subvol_status & SPLIT_BRAIN)) {
+ gf_log (this->name, GF_LOG_INFO, "%s: Performing conservative "
+ "merge", sh->parent_loc.path);
+ afr_mark_success_children_sources (sh->sources,
+ sh->success_children,
+ priv->child_count);
+ } else if (nsources < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "No sources for dir "
+ "of %s, in missing entry self-heal, aborting "
+ "self-heal", local->loc.path);
goto out;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.h b/xlators/cluster/afr/src/afr-self-heal-common.h
index 77c3375cc6a..42730a852e7 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.h
+++ b/xlators/cluster/afr/src/afr-self-heal-common.h
@@ -48,6 +48,7 @@ afr_sh_print_pending_matrix (int32_t *pending_matrix[], xlator_t *this);
int
afr_build_pending_matrix (char **pending_key, int32_t **pending_matrix,
+ unsigned char *ignorant_subvols,
dict_t *xattr[], afr_transaction_type type,
size_t child_count);
@@ -57,9 +58,9 @@ afr_sh_pending_to_delta (afr_private_t *priv, dict_t **xattr,
int child_count, afr_transaction_type type);
int
-afr_mark_sources (int32_t *sources, int32_t **pending_matrix, struct iatt *bufs,
- int32_t child_count, afr_self_heal_type type,
- int32_t *valid_children, const char *xlator_name);
+afr_mark_sources (xlator_t *this, int32_t *sources, int32_t **pending_matrix,
+ struct iatt *bufs, afr_self_heal_type type,
+ int32_t *success_children, int32_t *subvol_status);
int
afr_sh_delta_to_xattr (afr_private_t *priv,
@@ -77,9 +78,10 @@ afr_self_heal_type
afr_self_heal_type_for_transaction (afr_transaction_type type);
int
-afr_build_sources (xlator_t *xlator, dict_t **xattr, struct iatt *bufs,
+afr_build_sources (xlator_t *this, dict_t **xattr, struct iatt *bufs,
int32_t **pending_matrix, int32_t *sources,
- int32_t *success_children, afr_transaction_type type);
+ int32_t *success_children, afr_transaction_type type,
+ int32_t *subvol_status, gf_boolean_t ignore_ignorant);
void
afr_sh_common_reset (afr_self_heal_t *sh, unsigned int child_count);
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index 431cef492c7..83920c081b7 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -662,7 +662,7 @@ afr_sh_data_fix (call_frame_t *frame, xlator_t *this)
frame->root->lk_owner);
nsources = afr_build_sources (this, sh->xattr, sh->buf, sh->pending_matrix,
sh->sources, sh->success_children,
- AFR_DATA_TRANSACTION);
+ AFR_DATA_TRANSACTION, NULL, _gf_false);
if (nsources == 0) {
gf_log (this->name, GF_LOG_DEBUG,
"No self-heal needed for %s",
@@ -806,6 +806,7 @@ afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,
int32_t nsources = 0;
int32_t prev_read_child = -1;
int32_t config_read_child = -1;
+ int32_t subvol_status = 0;
priv = this->private;
bufs = local->cont.lookup.bufs;
@@ -819,7 +820,11 @@ afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,
memset (sources, 0, sizeof (*sources) * priv->child_count);
nsources = afr_build_sources (this, xattr, bufs, pending_matrix,
- sources, success_children, txn_type);
+ sources, success_children, txn_type,
+ &subvol_status, _gf_false);
+ if (subvol_status & SPLIT_BRAIN)
+ gf_log (this->name, GF_LOG_WARNING, "%s: Possible split-brain",
+ local->loc.path);
if (nsources < 0)
goto out;
@@ -991,7 +996,7 @@ afr_post_sh_data_fxattrop_cbk (call_frame_t *frame, void *cookie,
(void) afr_build_sources (this, sh->xattr, NULL,
sh->pending_matrix,
sh->sources, sh->success_children,
- AFR_DATA_TRANSACTION);
+ AFR_DATA_TRANSACTION, NULL, _gf_false);
ret = afr_sh_inode_set_read_ctx (sh, this);
if (ret)
afr_sh_data_fail (frame, this);
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index 3359029c3ac..ba29656e2cd 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -1598,7 +1598,7 @@ afr_sh_need_recreate (afr_self_heal_t *impunge_sh, int *sources,
GF_ASSERT (sources);
success_children = impunge_sh->success_children;
- if (sources[child] || (child == impunge_sh->active_source)) {
+ if (child == impunge_sh->active_source) {
GF_ASSERT (afr_is_child_present (success_children,
child_count, child));
goto out;
@@ -2115,8 +2115,8 @@ afr_sh_entry_fix (call_frame_t *frame, xlator_t *this,
afr_self_heal_t *sh = NULL;
afr_private_t *priv = NULL;
int source = 0;
-
- int nsources = 0;
+ int nsources = 0;
+ int32_t subvol_status = 0;
local = frame->local;
sh = &local->self_heal;
@@ -2137,23 +2137,31 @@ afr_sh_entry_fix (call_frame_t *frame, xlator_t *this,
nsources = afr_build_sources (this, sh->xattr, sh->buf,
sh->pending_matrix, sh->sources,
sh->success_children,
- AFR_ENTRY_TRANSACTION);
- if (nsources == 0) {
+ AFR_ENTRY_TRANSACTION, &subvol_status,
+ _gf_true);
+ if ((subvol_status & ALL_FOOLS) ||
+ (subvol_status & SPLIT_BRAIN)) {
+ gf_log (this->name, GF_LOG_INFO, "%s: Performing conservative "
+ "merge", local->loc.path);
+ source = -1;
+ memset (sh->sources, 0,
+ sizeof (*sh->sources) * priv->child_count);
+ } else if (nsources == 0) {
gf_log (this->name, GF_LOG_TRACE,
"No self-heal needed for %s",
local->loc.path);
afr_sh_entry_finish (frame, this);
return;
+ } else {
+ source = afr_sh_select_source (sh->sources, priv->child_count);
}
- source = afr_sh_select_source (sh->sources, priv->child_count);
-
sh->source = source;
afr_reset_children (sh->fresh_children, priv->child_count);
afr_get_fresh_children (sh->success_children, sh->sources,
- sh->fresh_children, priv->child_count);
+ sh->fresh_children, priv->child_count);
if (sh->source >= 0)
afr_inode_set_read_ctx (this, sh->inode, sh->source,
sh->fresh_children);
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
index 072ed74f8bf..992e9d88c3f 100644
--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
@@ -478,7 +478,7 @@ afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this,
nsources = afr_build_sources (this, sh->xattr, sh->buf,
sh->pending_matrix, sh->sources,
sh->success_children,
- AFR_METADATA_TRANSACTION);
+ AFR_METADATA_TRANSACTION, NULL, _gf_false);
if (nsources == 0) {
gf_log (this->name, GF_LOG_TRACE,
"No self-heal needed for %s",
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 4aea44c4275..37a13d5de2a 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -58,6 +58,11 @@ typedef enum {
} afr_child_pos_t;
typedef enum {
+ SPLIT_BRAIN = 1,
+ ALL_FOOLS = 2
+} afr_subvol_status_t;
+
+typedef enum {
AFR_INODE_SET_READ_CTX = 1,
AFR_INODE_RM_STALE_CHILDREN,
AFR_INODE_SET_OPENDIR_DONE,