From 443e48abf9b373bb1a8c56d016aad3b974554b80 Mon Sep 17 00:00:00 2001 From: Pranith Kumar K Date: Wed, 20 Aug 2014 21:50:06 +0530 Subject: cluster/afr: Add afr-v1 xattr compatibility All the special cases v1 handles and also self-accusing pending changelog from v1 pre-op also is handled in this patch. Change-Id: Ie10f71633fb20276f01ecafbd728f20483e7029c BUG: 1128721 Signed-off-by: Pranith Kumar K Reviewed-on: http://review.gluster.org/8536 Reviewed-by: Ravishankar N Tested-by: Gluster Build System --- xlators/cluster/afr/src/afr-self-heal-common.c | 139 ++++++++++++----- xlators/cluster/afr/src/afr-self-heal-data.c | 187 ++++++++++++++++++++--- xlators/cluster/afr/src/afr-self-heal-entry.c | 21 ++- xlators/cluster/afr/src/afr-self-heal-metadata.c | 31 +++- xlators/cluster/afr/src/afr-self-heal-name.c | 20 ++- xlators/cluster/afr/src/afr-self-heal.h | 15 +- 6 files changed, 330 insertions(+), 83 deletions(-) (limited to 'xlators/cluster') diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index ddccc7f38ed..14a514beffa 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -287,7 +287,36 @@ afr_selfheal_extract_xattr (xlator_t *this, struct afr_reply *replies, return 0; } +void +afr_mark_active_sinks (xlator_t *this, unsigned char *sources, + unsigned char *locked_on, unsigned char *sinks) +{ + int i = 0; + afr_private_t *priv = NULL; + + priv = this->private; + + memset (sinks, 0, sizeof (*sinks) * priv->child_count); + for (i = 0; i < priv->child_count; i++) { + if (!sources[i] && locked_on[i]) + sinks[i] = 1; + } +} +gf_boolean_t +afr_does_witness_exist (xlator_t *this, uint64_t *witness) +{ + int i = 0; + afr_private_t *priv = NULL; + + priv = this->private; + + for (i = 0; i < priv->child_count; i++) { + if (witness[i]) + return _gf_true; + } + return _gf_false; +} /* * This function determines if a self-heal is required for a given inode, @@ -309,22 +338,29 @@ afr_selfheal_extract_xattr (xlator_t *this, struct afr_reply *replies, */ int -afr_selfheal_find_direction (xlator_t *this, struct afr_reply *replies, - afr_transaction_type type, unsigned char *locked_on, - unsigned char *sources, unsigned char *sinks) +afr_selfheal_find_direction (call_frame_t *frame, xlator_t *this, + struct afr_reply *replies, + afr_transaction_type type, + unsigned char *locked_on, unsigned char *sources, + unsigned char *sinks, uint64_t *witness) { - afr_private_t *priv = NULL; - int i = 0; - int j = 0; - int *dirty = NULL; - int **matrix = NULL; - char *accused = NULL; + afr_private_t *priv = NULL; + int i = 0; + int j = 0; + int *dirty = NULL; /* Denotes if dirty xattr is set */ + int **matrix = NULL;/* Changelog matrix */ + char *accused = NULL;/* Accused others without any self-accusal */ + char *pending = NULL;/* Have pending operations on others */ + char *self_accused = NULL; /* Accused itself */ priv = this->private; dirty = alloca0 (priv->child_count * sizeof (int)); accused = alloca0 (priv->child_count); + pending = alloca0 (priv->child_count); + self_accused = alloca0 (priv->child_count); matrix = ALLOC_MATRIX(priv->child_count, int); + memset (witness, 0, sizeof (*witness) * priv->child_count); if (afr_success_count (replies, priv->child_count) < AFR_SH_MIN_PARTICIPANTS) { @@ -335,11 +371,23 @@ afr_selfheal_find_direction (xlator_t *this, struct afr_reply *replies, /* First construct the pending matrix for further analysis */ afr_selfheal_extract_xattr (this, replies, type, dirty, matrix); + /* short list all self-accused */ + for (i = 0; i < priv->child_count; i++) { + if (matrix[i][i]) + self_accused[i] = 1; + } + /* Next short list all accused to exclude them from being sources */ + /* Self-accused can't accuse others as they are FOOLs */ for (i = 0; i < priv->child_count; i++) { for (j = 0; j < priv->child_count; j++) { - if (matrix[i][j]) - accused[j] = 1; + if (matrix[i][j]) { + if (!self_accused[i]) + accused[j] = 1; + + if (i != j) + pending[i] = 1; + } } } @@ -350,38 +398,47 @@ afr_selfheal_find_direction (xlator_t *this, struct afr_reply *replies, sources[i] = 1; } - /* Everyone accused by sources are sinks */ - memset (sinks, 0, priv->child_count); - for (i = 0; i < priv->child_count; i++) { - if (!sources[i]) - continue; - for (j = 0; j < priv->child_count; j++) { - if (matrix[i][j]) - sinks[j] = 1; - } - } + /* Everyone accused by non-self-accused sources are sinks */ + memset (sinks, 0, priv->child_count); + for (i = 0; i < priv->child_count; i++) { + if (!sources[i]) + continue; + if (self_accused[i]) + continue; + for (j = 0; j < priv->child_count; j++) { + if (matrix[i][j]) + sinks[j] = 1; + } + } - /* If any source has 'dirty' bit, pick first - 'dirty' source and make everybody else sinks */ - for (i = 0; i < priv->child_count; i++) { - if (sources[i] && dirty[i]) { - for (j = 0; j < priv->child_count; j++) { - if (j != i) { - sources[j] = 0; - sinks[j] = 1; - } - } - break; - } - } + /* For breaking ties provide with number of fops they witnessed */ - /* If no sources, all locked nodes are sinks - split brain */ - if (AFR_COUNT (sources, priv->child_count) == 0) { - for (i = 0; i < priv->child_count; i++) { - if (locked_on[i]) - sinks[i] = 1; - } - } + /* + * count the pending fops witnessed from itself to others when it is + * self-accused + */ + for (i = 0; i < priv->child_count; i++) { + if (!self_accused[i]) + continue; + for (j = 0; j < priv->child_count; j++) { + if (i == j) + continue; + witness[i] += matrix[i][j]; + } + } + + /* In afr-v1 if a file is self-accused but didn't have any pending + * operations on others then it is similar to 'dirty' in afr-v2. + * Consider such cases as witness. + */ + for (i = 0; i < priv->child_count; i++) { + if (self_accused[i] && !pending[i]) + witness[i] += matrix[i][i]; + } + + /* count the number of dirty fops witnessed */ + for (i = 0; i < priv->child_count; i++) + witness[i] += dirty[i]; return 0; } diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index 74088f4bf6d..f7503faa719 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -372,21 +372,160 @@ __afr_selfheal_truncate_sinks (call_frame_t *frame, xlator_t *this, return 0; } +gf_boolean_t +afr_has_source_witnesses (xlator_t *this, unsigned char *sources, + uint64_t *witness) +{ + int i = 0; + afr_private_t *priv = NULL; + + priv = this->private; + + for (i = 0; i < priv->child_count; i++) { + if (sources[i] && witness[i]) + return _gf_true; + } + return _gf_false; +} + +static gf_boolean_t +afr_does_size_mismatch (xlator_t *this, unsigned char *sources, + struct afr_reply *replies) +{ + int i = 0; + afr_private_t *priv = NULL; + struct iatt *min = NULL; + struct iatt *max = NULL; + + priv = this->private; + + for (i = 0; i < priv->child_count; i++) { + if (!replies[i].valid) + continue; + + if (replies[i].op_ret < 0) + continue; + + if (!min) + min = &replies[i].poststat; + + if (!max) + max = &replies[i].poststat; + + if (min->ia_size > replies[i].poststat.ia_size) + min = &replies[i].poststat; + + if (max->ia_size < replies[i].poststat.ia_size) + max = &replies[i].poststat; + } + + if (min && max) { + if (min->ia_size != max->ia_size) + return _gf_true; + } + + return _gf_false; +} /* * If by chance there are multiple sources with differing sizes, select * the largest file as the source. * - * This can only happen if data was directly modified in the backend. + * This can happen if data was directly modified in the backend or for snapshots */ + +static void +afr_mark_largest_file_as_source (xlator_t *this, unsigned char *sources, + struct afr_reply *replies) +{ + int i = 0; + afr_private_t *priv = NULL; + uint64_t size = 0; + + /* Find source with biggest file size */ + priv = this->private; + for (i = 0; i < priv->child_count; i++) { + if (!sources[i]) + continue; + if (size <= replies[i].poststat.ia_size) { + size = replies[i].poststat.ia_size; + } + } + + /* Mark sources with less size as not source */ + for (i = 0; i < priv->child_count; i++) { + if (!sources[i]) + continue; + if (size > replies[i].poststat.ia_size) + sources[i] = 0; + } + + return; +} + +static void +afr_mark_biggest_witness_as_source (xlator_t *this, unsigned char *sources, + uint64_t *witness) +{ + int i = 0; + afr_private_t *priv = NULL; + uint64_t biggest_witness = 0; + + priv = this->private; + /* Find source with biggest witness count */ + for (i = 0; i < priv->child_count; i++) { + if (!sources[i]) + continue; + if (biggest_witness < witness[i]) + biggest_witness = witness[i]; + } + + /* Mark files with less witness count as not source */ + for (i = 0; i < priv->child_count; i++) { + if (!sources[i]) + continue; + if (witness[i] < biggest_witness) + sources[i] = 0; + } + + return; +} + +/* This is a tie breaker function. Only one source be assigned here */ +static void +afr_mark_newest_file_as_source (xlator_t *this, unsigned char *sources, + struct afr_reply *replies) +{ + int i = 0; + afr_private_t *priv = NULL; + int source = -1; + uint32_t max_ctime = 0; + + priv = this->private; + /* Find source with latest ctime */ + for (i = 0; i < priv->child_count; i++) { + if (!sources[i]) + continue; + + if (max_ctime <= replies[i].poststat.ia_ctime) { + source = i; + max_ctime = replies[i].poststat.ia_ctime; + } + } + + /* Only mark one of the files as source to break ties */ + memset (sources, 0, sizeof (*sources) * priv->child_count); + sources[source] = 1; +} + static int __afr_selfheal_data_finalize_source (xlator_t *this, unsigned char *sources, unsigned char *healed_sinks, unsigned char *locked_on, - struct afr_reply *replies) + struct afr_reply *replies, + uint64_t *witness) { int i = 0; afr_private_t *priv = NULL; - uint64_t size = 0; int source = -1; int sources_count = 0; @@ -400,24 +539,24 @@ __afr_selfheal_data_finalize_source (xlator_t *this, unsigned char *sources, return -EIO; } - for (i = 0; i < priv->child_count; i++) { - if (!sources[i]) - continue; - if (size <= replies[i].poststat.ia_size) { - size = replies[i].poststat.ia_size; - source = i; - } - } + /* If there are no witnesses/size-mismatches on sources we are done*/ + if (!afr_does_size_mismatch (this, sources, replies) && + !afr_has_source_witnesses (this, sources, witness)) + goto out; - for (i = 0; i < priv->child_count; i++) { - if (!sources[i]) - continue; - if (replies[i].poststat.ia_size < size) { - sources[i] = 0; - healed_sinks[i] = 1; - } - } + afr_mark_largest_file_as_source (this, sources, replies); + afr_mark_biggest_witness_as_source (this, sources, witness); + afr_mark_newest_file_as_source (this, sources, replies); + +out: + afr_mark_active_sinks (this, sources, locked_on, healed_sinks); + for (i = 0; i < priv->child_count; i++) { + if (sources[i]) { + source = i; + break; + } + } return source; } @@ -439,6 +578,7 @@ __afr_selfheal_data_prepare (call_frame_t *frame, xlator_t *this, fd_t *fd, int ret = -1; int source = -1; afr_private_t *priv = NULL; + uint64_t *witness = NULL; priv = this->private; @@ -447,15 +587,16 @@ __afr_selfheal_data_prepare (call_frame_t *frame, xlator_t *this, fd_t *fd, if (ret) return ret; - ret = afr_selfheal_find_direction (this, replies, AFR_DATA_TRANSACTION, - locked_on, sources, sinks); + witness = alloca0(priv->child_count * sizeof (*witness)); + ret = afr_selfheal_find_direction (frame, this, replies, + AFR_DATA_TRANSACTION, + locked_on, sources, sinks, witness); if (ret) return ret; /* Initialize the healed_sinks[] array optimistically to the intersection of to-be-healed (i.e sinks[]) and the list of servers which are up (i.e locked_on[]). - As we encounter failures in the healing process, we will unmark the respective servers in the healed_sinks[] array. @@ -464,7 +605,7 @@ __afr_selfheal_data_prepare (call_frame_t *frame, xlator_t *this, fd_t *fd, source = __afr_selfheal_data_finalize_source (this, sources, healed_sinks, locked_on, - replies); + replies, witness); if (source < 0) return -EIO; diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index df6dfaaf396..3ea30a6a9d0 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -326,7 +326,9 @@ __afr_selfheal_entry_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd, static int __afr_selfheal_entry_finalize_source (xlator_t *this, unsigned char *sources, unsigned char *healed_sinks, - unsigned char *locked_on) + unsigned char *locked_on, + struct afr_reply *replies, + uint64_t *witness) { int i = 0; afr_private_t *priv = NULL; @@ -338,7 +340,10 @@ __afr_selfheal_entry_finalize_source (xlator_t *this, unsigned char *sources, sources_count = AFR_COUNT (sources, priv->child_count); if ((AFR_CMP (locked_on, healed_sinks, priv->child_count) == 0) - || !sources_count) { + || !sources_count || afr_does_witness_exist (this, witness)) { + + memset (sources, 0, sizeof (*sources) * priv->child_count); + afr_mark_active_sinks (this, sources, locked_on, healed_sinks); return -1; } @@ -362,6 +367,7 @@ __afr_selfheal_entry_prepare (call_frame_t *frame, xlator_t *this, fd_t *fd, int ret = -1; int source = -1; afr_private_t *priv = NULL; + uint64_t *witness = NULL; priv = this->private; @@ -370,8 +376,10 @@ __afr_selfheal_entry_prepare (call_frame_t *frame, xlator_t *this, fd_t *fd, if (ret) return ret; - ret = afr_selfheal_find_direction (this, replies, AFR_ENTRY_TRANSACTION, - locked_on, sources, sinks); + witness = alloca0 (sizeof (*witness) * priv->child_count); + ret = afr_selfheal_find_direction (frame, this, replies, + AFR_ENTRY_TRANSACTION, + locked_on, sources, sinks, witness); if (ret) return ret; @@ -386,7 +394,10 @@ __afr_selfheal_entry_prepare (call_frame_t *frame, xlator_t *this, fd_t *fd, AFR_INTERSECT (healed_sinks, sinks, locked_on, priv->child_count); source = __afr_selfheal_entry_finalize_source (this, sources, - healed_sinks, locked_on); + healed_sinks, + locked_on, replies, + witness); + if (source < 0) { /* If source is < 0 (typically split-brain), we perform a conservative merge of entries rather than erroring out */ diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c index b4714fe9e05..96b3262e471 100644 --- a/xlators/cluster/afr/src/afr-self-heal-metadata.c +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -169,7 +169,6 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this, return source; } - static int __afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, inode_t *inode, unsigned char *locked_on, unsigned char *sources, @@ -179,6 +178,8 @@ __afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, inode_t *i int ret = -1; int source = -1; afr_private_t *priv = NULL; + int i = 0; + uint64_t *witness = NULL; priv = this->private; @@ -187,9 +188,10 @@ __afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, inode_t *i if (ret) return ret; - ret = afr_selfheal_find_direction (this, replies, - AFR_METADATA_TRANSACTION, - locked_on, sources, sinks); + witness = alloca0 (sizeof (*witness) * priv->child_count); + ret = afr_selfheal_find_direction (frame, this, replies, + AFR_METADATA_TRANSACTION, + locked_on, sources, sinks, witness); if (ret) return ret; @@ -203,9 +205,28 @@ __afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, inode_t *i */ AFR_INTERSECT (healed_sinks, sinks, locked_on, priv->child_count); + /* If any source has witness, pick first + * witness source and make everybody else sinks */ + for (i = 0; i < priv->child_count; i++) { + if (sources[i] && witness[i]) { + source = i; + break; + } + } + + if (source != -1) { + for (i = 0; i < priv->child_count; i++) { + if (i != source && sources[i]) { + sources[i] = 0; + healed_sinks[i] = 1; + } + } + } + source = __afr_selfheal_metadata_finalize_source (frame, this, sources, healed_sinks, - locked_on, replies); + locked_on, replies); + if (source < 0) return -EIO; diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c index c5d126185c7..af635f06d52 100644 --- a/xlators/cluster/afr/src/afr-self-heal-name.c +++ b/xlators/cluster/afr/src/afr-self-heal-name.c @@ -457,7 +457,9 @@ __afr_selfheal_name_do (call_frame_t *frame, xlator_t *this, inode_t *parent, int __afr_selfheal_name_finalize_source (xlator_t *this, unsigned char *sources, unsigned char *healed_sinks, - unsigned char *locked_on) + unsigned char *locked_on, + struct afr_reply *replies, + uint64_t *witness) { int i = 0; afr_private_t *priv = NULL; @@ -469,7 +471,9 @@ __afr_selfheal_name_finalize_source (xlator_t *this, unsigned char *sources, sources_count = AFR_COUNT (sources, priv->child_count); if ((AFR_CMP (locked_on, healed_sinks, priv->child_count) == 0) - || !sources_count) { + || !sources_count || afr_does_witness_exist (this, witness)) { + memset (sources, 0, sizeof (*sources) * priv->child_count); + afr_mark_active_sinks (this, sources, locked_on, healed_sinks); return -1; } @@ -483,7 +487,6 @@ __afr_selfheal_name_finalize_source (xlator_t *this, unsigned char *sources, return source; } - int __afr_selfheal_name_prepare (call_frame_t *frame, xlator_t *this, inode_t *parent, uuid_t pargfid, unsigned char *locked_on, @@ -494,6 +497,7 @@ __afr_selfheal_name_prepare (call_frame_t *frame, xlator_t *this, inode_t *paren int source = -1; afr_private_t *priv = NULL; struct afr_reply *replies = NULL; + uint64_t *witness = NULL; priv = this->private; @@ -503,8 +507,10 @@ __afr_selfheal_name_prepare (call_frame_t *frame, xlator_t *this, inode_t *paren if (ret) goto out; - ret = afr_selfheal_find_direction (this, replies, AFR_ENTRY_TRANSACTION, - locked_on, sources, sinks); + witness = alloca0 (sizeof (*witness) * priv->child_count); + ret = afr_selfheal_find_direction (frame, this, replies, + AFR_ENTRY_TRANSACTION, + locked_on, sources, sinks, witness); if (ret) goto out; @@ -519,7 +525,9 @@ __afr_selfheal_name_prepare (call_frame_t *frame, xlator_t *this, inode_t *paren AFR_INTERSECT (healed_sinks, sinks, locked_on, priv->child_count); source = __afr_selfheal_name_finalize_source (this, sources, - healed_sinks, locked_on); + healed_sinks, + locked_on, replies, + witness); if (source < 0) { /* If source is < 0 (typically split-brain), we perform a conservative merge of entries rather than erroring out */ diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h index c32ec120a50..f208e6bc813 100644 --- a/xlators/cluster/afr/src/afr-self-heal.h +++ b/xlators/cluster/afr/src/afr-self-heal.h @@ -136,9 +136,11 @@ afr_selfheal_unlocked_lookup_on (call_frame_t *frame, inode_t *parent, unsigned char *lookup_on, dict_t *xattr); int -afr_selfheal_find_direction (xlator_t *this, struct afr_reply *replies, - afr_transaction_type type, unsigned char *locked_on, - unsigned char *sources, unsigned char *sinks); +afr_selfheal_find_direction (call_frame_t *frame, xlator_t *this, + struct afr_reply *replies, + afr_transaction_type type, + unsigned char *locked_on, unsigned char *sources, + unsigned char *sinks, uint64_t *witness); int afr_selfheal_extract_xattr (xlator_t *this, struct afr_reply *replies, @@ -189,4 +191,11 @@ afr_success_count (struct afr_reply *replies, unsigned int count); void afr_log_selfheal (uuid_t gfid, xlator_t *this, int ret, char *type, int source, unsigned char *healed_sinks); + +void +afr_mark_active_sinks (xlator_t *this, unsigned char *sources, + unsigned char *locked_on, unsigned char *sinks); + +gf_boolean_t +afr_does_witness_exist (xlator_t *this, uint64_t *witness); #endif /* !_AFR_SELFHEAL_H */ -- cgit