From f7902b7072015c9c20f47e999af85c923f202d33 Mon Sep 17 00:00:00 2001 From: Vikas Gorur Date: Tue, 14 Apr 2009 08:15:50 -0700 Subject: Changed xattr format of afr changelog to support adding and removing of subvolumes while keeping existing data. Signed-off-by: Anand V. Avati --- xlators/cluster/afr/src/afr-dir-write.c | 9 - xlators/cluster/afr/src/afr-inode-write.c | 11 -- xlators/cluster/afr/src/afr-self-heal-common.c | 239 ++++++++++------------- xlators/cluster/afr/src/afr-self-heal-common.h | 20 +- xlators/cluster/afr/src/afr-self-heal-data.c | 27 ++- xlators/cluster/afr/src/afr-self-heal-entry.c | 25 +-- xlators/cluster/afr/src/afr-self-heal-metadata.c | 35 ++-- xlators/cluster/afr/src/afr-transaction.c | 132 ++++++++----- xlators/cluster/afr/src/afr-transaction.h | 6 - xlators/cluster/afr/src/afr.c | 52 ++--- xlators/cluster/afr/src/afr.h | 53 ++++- 11 files changed, 322 insertions(+), 287 deletions(-) (limited to 'xlators/cluster') diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c index 2a3d1548a..e357c7b17 100644 --- a/xlators/cluster/afr/src/afr-dir-write.c +++ b/xlators/cluster/afr/src/afr-dir-write.c @@ -297,7 +297,6 @@ afr_create (call_frame_t *frame, xlator_t *this, local->transaction.main_frame = frame; local->transaction.basename = AFR_BASENAME (loc->path); - local->transaction.pending = AFR_ENTRY_PENDING; afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION); @@ -521,7 +520,6 @@ afr_mknod (call_frame_t *frame, xlator_t *this, local->transaction.main_frame = frame; local->transaction.basename = AFR_BASENAME (loc->path); - local->transaction.pending = AFR_ENTRY_PENDING; afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION); @@ -745,7 +743,6 @@ afr_mkdir (call_frame_t *frame, xlator_t *this, local->transaction.main_frame = frame; local->transaction.basename = AFR_BASENAME (loc->path); - local->transaction.pending = AFR_ENTRY_PENDING; afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION); @@ -973,7 +970,6 @@ afr_link (call_frame_t *frame, xlator_t *this, local->transaction.main_frame = frame; local->transaction.basename = AFR_BASENAME (oldloc->path); local->transaction.new_basename = AFR_BASENAME (newloc->path); - local->transaction.pending = AFR_ENTRY_PENDING; afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION); @@ -1199,7 +1195,6 @@ afr_symlink (call_frame_t *frame, xlator_t *this, local->transaction.main_frame = frame; local->transaction.basename = AFR_BASENAME (loc->path); - local->transaction.pending = AFR_ENTRY_PENDING; afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION); @@ -1396,7 +1391,6 @@ afr_rename (call_frame_t *frame, xlator_t *this, local->transaction.main_frame = frame; local->transaction.basename = AFR_BASENAME (oldloc->path); local->transaction.new_basename = AFR_BASENAME (newloc->path); - local->transaction.pending = AFR_ENTRY_PENDING; afr_transaction (transaction_frame, this, AFR_ENTRY_RENAME_TRANSACTION); @@ -1583,7 +1577,6 @@ afr_unlink (call_frame_t *frame, xlator_t *this, local->transaction.main_frame = frame; local->transaction.basename = AFR_BASENAME (loc->path); - local->transaction.pending = AFR_ENTRY_PENDING; afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION); @@ -1770,7 +1763,6 @@ afr_rmdir (call_frame_t *frame, xlator_t *this, local->transaction.main_frame = frame; local->transaction.basename = AFR_BASENAME (loc->path); - local->transaction.pending = AFR_ENTRY_PENDING; afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION); @@ -1915,7 +1907,6 @@ afr_setdents (call_frame_t *frame, xlator_t *this, local->transaction.done = afr_setdents_done; local->transaction.basename = NULL; - local->transaction.pending = AFR_ENTRY_PENDING; afr_transaction (frame, this, AFR_ENTRY_TRANSACTION); diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c index e8d843b3c..8b568a0ea 100644 --- a/xlators/cluster/afr/src/afr-inode-write.c +++ b/xlators/cluster/afr/src/afr-inode-write.c @@ -226,7 +226,6 @@ afr_chmod (call_frame_t *frame, xlator_t *this, local->transaction.main_frame = frame; local->transaction.start = 0; local->transaction.len = 0; - local->transaction.pending = AFR_METADATA_PENDING; afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION); @@ -421,7 +420,6 @@ afr_fchmod (call_frame_t *frame, xlator_t *this, local->transaction.main_frame = frame; local->transaction.start = 0; local->transaction.len = 0; - local->transaction.pending = AFR_METADATA_PENDING; afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION); @@ -618,7 +616,6 @@ afr_chown (call_frame_t *frame, xlator_t *this, local->transaction.main_frame = frame; local->transaction.start = 0; local->transaction.len = 0; - local->transaction.pending = AFR_METADATA_PENDING; afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION); @@ -816,7 +813,6 @@ afr_fchown (call_frame_t *frame, xlator_t *this, local->transaction.main_frame = frame; local->transaction.start = 0; local->transaction.len = 0; - local->transaction.pending = AFR_METADATA_PENDING; afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION); @@ -1028,8 +1024,6 @@ afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, local->transaction.len = iov_length (vector, count); } - local->transaction.pending = AFR_DATA_PENDING; - afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION); op_ret = 0; @@ -1226,7 +1220,6 @@ afr_truncate (call_frame_t *frame, xlator_t *this, local->transaction.main_frame = frame; local->transaction.start = 0; local->transaction.len = offset; - local->transaction.pending = AFR_DATA_PENDING; afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION); @@ -1425,7 +1418,6 @@ afr_ftruncate (call_frame_t *frame, xlator_t *this, local->transaction.main_frame = frame; local->transaction.start = 0; local->transaction.len = offset; - local->transaction.pending = AFR_DATA_PENDING; afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION); @@ -1626,7 +1618,6 @@ afr_utimens (call_frame_t *frame, xlator_t *this, local->transaction.main_frame = frame; local->transaction.start = 0; local->transaction.len = 0; - local->transaction.pending = AFR_METADATA_PENDING; afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION); @@ -1817,7 +1808,6 @@ afr_setxattr (call_frame_t *frame, xlator_t *this, local->transaction.main_frame = frame; local->transaction.start = 0; local->transaction.len = 0; - local->transaction.pending = AFR_METADATA_PENDING; afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION); @@ -2007,7 +1997,6 @@ afr_removexattr (call_frame_t *frame, xlator_t *this, local->transaction.main_frame = frame; local->transaction.start = 0; local->transaction.len = 0; - local->transaction.pending = AFR_METADATA_PENDING; afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION); diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 7c2e403c7..b74035be8 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -88,64 +88,6 @@ afr_sh_supress_errenous_children (int sources[], int child_errno[], } -int -afr_sh_supress_empty_children (int sources[], dict_t *xattr[], - struct stat *buf, - int child_count, const char *key) -{ - int i = 0; - int32_t *pending = NULL; - int ret = 0; - int all_xattr_missing = 1; - - /* if the file was created by afr with xattrs */ - for (i = 0; i < child_count; i++) { - if (!xattr[i]) - continue; - - ret = dict_get_ptr (xattr[i], (char *)key, VOID(&pending)); - if (ret != 0) { - continue; - } - - all_xattr_missing = 0; - break; - } - - if (all_xattr_missing) { - /* supress 0byte files.. this avoids empty file created - by dir selfheal to overwrite the 'good' file */ - for (i = 0; i < child_count; i++) { - if (!buf[i].st_size) - sources[i] = 0; - } - goto out; - } - - - for (i = 0; i < child_count; i++) { - if (!xattr[i]) { - sources[i] = 0; - continue; - } - - ret = dict_get_ptr (xattr[i], (char *)key, VOID(&pending)); - if (ret != 0) { - sources[i] = 0; - continue; - } - - if (!pending) { - sources[i] = 0; - continue; - } - } - -out: - return 0; -} - - void afr_sh_print_pending_matrix (int32_t *pending_matrix[], xlator_t *this) { @@ -175,11 +117,12 @@ afr_sh_print_pending_matrix (int32_t *pending_matrix[], xlator_t *this) void -afr_sh_build_pending_matrix (int32_t *pending_matrix[], dict_t *xattr[], - int child_count, const char *key) +afr_sh_build_pending_matrix (afr_private_t *priv, + int32_t *pending_matrix[], dict_t *xattr[], + int child_count, afr_transaction_type type) { - int i = 0; - int j = 0; + int i, j, k; + int32_t *pending = NULL; int ret = -1; @@ -200,22 +143,25 @@ afr_sh_build_pending_matrix (int32_t *pending_matrix[], dict_t *xattr[], pending = NULL; - ret = dict_get_ptr (xattr[i], (char *) key, - VOID(&pending)); - if (ret != 0) { - /* - * There is no xattr present. This means this - * subvolume should be considered an 'ignorant' - * subvolume. - */ + for (j = 0; j < child_count; j++) { + ret = dict_get_ptr (xattr[i], priv->pending_key[j], + VOID(&pending)); + + if (ret != 0) { + /* + * There is no xattr present. This means this + * subvolume should be considered an 'ignorant' + * subvolume. + */ + + ignorant_subvols[i] = 1; + continue; + } - ignorant_subvols[i] = 1; - continue; + k = afr_index_for_transaction_type (type); + + pending_matrix[i][j] = ntoh32 (pending[k]); } - - for (j = 0; j < child_count; j++) { - pending_matrix[i][j] = ntoh32 (pending[j]); - } } /* @@ -563,12 +509,13 @@ out: void -afr_sh_pending_to_delta (dict_t **xattr, char *key, +afr_sh_pending_to_delta (afr_private_t *priv, dict_t **xattr, int32_t *delta_matrix[], int success[], - int child_count) + int child_count, afr_transaction_type type) { int i = 0; int j = 0; + int k = 0; int32_t * pending = NULL; int ret = 0; @@ -583,29 +530,34 @@ afr_sh_pending_to_delta (dict_t **xattr, char *key, for (i = 0; i < child_count; i++) { pending = NULL; - ret = dict_get_ptr (xattr[i], (char *) key, - VOID (&pending)); - - for (j = 0; j < child_count; j++) { - if (!success[j]) - continue; - + for (j = 0; j < child_count; j++) { + ret = dict_get_ptr (xattr[i], priv->pending_key[j], + VOID(&pending)); + + if (!success[j]) + continue; + + k = afr_index_for_transaction_type (type); + if (pending) { - delta_matrix[i][j] = -(ntoh32 (pending[j])); + delta_matrix[i][j] = -(ntoh32 (pending[k])); } else { - delta_matrix[i][j] = 0; + delta_matrix[i][j] = 0; } - } + + } } } int -afr_sh_delta_to_xattr (int32_t *delta_matrix[], dict_t *xattr[], - int child_count, const char *key) +afr_sh_delta_to_xattr (afr_private_t *priv, + int32_t *delta_matrix[], dict_t *xattr[], + int child_count, afr_transaction_type type) { int i = 0; int j = 0; + int k = 0; int ret = 0; @@ -615,13 +567,18 @@ afr_sh_delta_to_xattr (int32_t *delta_matrix[], dict_t *xattr[], if (!xattr[i]) continue; - pending = CALLOC (sizeof (int32_t), child_count); for (j = 0; j < child_count; j++) { - pending[j] = hton32 (delta_matrix[i][j]); - } + pending = CALLOC (sizeof (int32_t), 3); + /* 3 = data+metadata+entry */ + + k = afr_index_for_transaction_type (type); + + pending[k] = hton32 (delta_matrix[i][j]); - ret = dict_set_bin (xattr[i], (char *) key, pending, - child_count * sizeof (int32_t)); + ret = dict_set_bin (xattr[i], priv->pending_key[j], + pending, + 3 * sizeof (int32_t)); + } } return 0; @@ -637,21 +594,24 @@ afr_sh_has_metadata_pending (dict_t *xattr, int child_count, xlator_t *this) int ret = -1; int i = 0; + int j = 0; priv = this->private; - ret = dict_get_ptr (xattr, AFR_METADATA_PENDING, &tmp_pending); + for (i = 0; i < priv->child_count; i++) { + ret = dict_get_ptr (xattr, priv->pending_key[i], + &tmp_pending); - if (ret != 0) - return 0; + if (ret != 0) + return 0; + + pending = tmp_pending; - pending = tmp_pending; - for (i = 0; i < priv->child_count; i++) { - if (i == child_count) - continue; - if (pending[i]) - return 1; - } + j = afr_index_for_transaction_type (AFR_METADATA_TRANSACTION); + + if (pending[j]) + return 1; + } return 0; } @@ -664,23 +624,26 @@ afr_sh_has_data_pending (dict_t *xattr, int child_count, xlator_t *this) int32_t *pending = NULL; void *tmp_pending = NULL; /* This is required to remove 'type-punned' warnings from gcc */ - int ret = -1; - int i = 0; + int ret = -1; + int i = 0; + int j = 0; priv = this->private; - ret = dict_get_ptr (xattr, AFR_DATA_PENDING, &tmp_pending); + for (i = 0; i < priv->child_count; i++) { + ret = dict_get_ptr (xattr, priv->pending_key[i], + &tmp_pending); - if (ret != 0) - return 0; + if (ret != 0) + return 0; + + pending = tmp_pending; - pending = tmp_pending; - for (i = 0; i < priv->child_count; i++) { - if (i == child_count) - continue; - if (pending[i]) - return 1; - } + j = afr_index_for_transaction_type (AFR_DATA_TRANSACTION); + + if (pending[j]) + return 1; + } return 0; } @@ -689,33 +652,35 @@ afr_sh_has_data_pending (dict_t *xattr, int child_count, xlator_t *this) int afr_sh_has_entry_pending (dict_t *xattr, int child_count, xlator_t *this) { - afr_private_t *priv = NULL; + afr_private_t *priv = NULL; int32_t *pending = NULL; void *tmp_pending = NULL; /* This is required to remove 'type-punned' warnings from gcc */ - - int ret = -1; - int i = 0; + + int ret = -1; + int i = 0; + int j = 0; priv = this->private; - ret = dict_get_ptr (xattr, AFR_ENTRY_PENDING, &tmp_pending); + for (i = 0; i < priv->child_count; i++) { + ret = dict_get_ptr (xattr, priv->pending_key[i], + &tmp_pending); - if (ret != 0) - return 0; + if (ret != 0) + return 0; + + pending = tmp_pending; - pending = tmp_pending; - for (i = 0; i < priv->child_count; i++) { - if (i == child_count) - continue; - if (pending[i]) - return 1; - } + j = afr_index_for_transaction_type (AFR_ENTRY_TRANSACTION); + + if (pending[j]) + return 1; + } return 0; } - /** * is_matrix_zero - return true if pending matrix is all zeroes */ @@ -1229,9 +1194,13 @@ sh_missing_entries_lookup (call_frame_t *frame, xlator_t *this) xattr_req = dict_new(); - if (xattr_req) - ret = dict_set_uint64 (xattr_req, AFR_ENTRY_PENDING, - priv->child_count * sizeof(int32_t)); + if (xattr_req) { + for (i = 0; i < priv->child_count; i++) { + ret = dict_set_uint64 (xattr_req, + priv->pending_key[i], + 3 * sizeof(int32_t)); + } + } for (i = 0; i < priv->child_count; i++) { if (local->child_up[i]) { diff --git a/xlators/cluster/afr/src/afr-self-heal-common.h b/xlators/cluster/afr/src/afr-self-heal-common.h index 86f155b68..a311cdf5e 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.h +++ b/xlators/cluster/afr/src/afr-self-heal-common.h @@ -41,29 +41,27 @@ int afr_sh_supress_errenous_children (int sources[], int child_errno[], int child_count); -int -afr_sh_supress_empty_children (int sources[], dict_t *xattr[], - struct stat *buf, - int child_count, const char *key); - void afr_sh_print_pending_matrix (int32_t *pending_matrix[], xlator_t *this); void -afr_sh_build_pending_matrix (int32_t *pending_matrix[], dict_t *xattr[], - int child_count, const char *key); +afr_sh_build_pending_matrix (afr_private_t *priv, + int32_t *pending_matrix[], dict_t *xattr[], + int child_count, afr_transaction_type type); void -afr_sh_pending_to_delta (dict_t **xattr, char *key, int32_t *delta_matrix[], - int32_t success[], int child_count); +afr_sh_pending_to_delta (afr_private_t *priv, dict_t **xattr, + int32_t *delta_matrix[], int success[], + int child_count, afr_transaction_type type); int afr_sh_mark_sources (afr_self_heal_t *sh, int child_count, afr_self_heal_type type); int -afr_sh_delta_to_xattr (int32_t *delta_matrix[], dict_t *xattr[], - int child_count, const char *key); +afr_sh_delta_to_xattr (afr_private_t *priv, + int32_t *delta_matrix[], dict_t *xattr[], + int child_count, afr_transaction_type type); int afr_sh_is_matrix_zero (int32_t *pending_matrix[], int child_count); diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index 0f06e4558..a7a3d44f7 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -301,10 +301,8 @@ afr_sh_data_erase_pending (call_frame_t *frame, xlator_t *this) sh = &local->self_heal; priv = this->private; - - afr_sh_pending_to_delta (sh->xattr, AFR_DATA_PENDING, - sh->delta_matrix, sh->success, - priv->child_count); + afr_sh_pending_to_delta (priv, sh->xattr, sh->delta_matrix, sh->success, + priv->child_count, AFR_DATA_TRANSACTION); erase_xattr = CALLOC (sizeof (*erase_xattr), priv->child_count); @@ -317,8 +315,8 @@ afr_sh_data_erase_pending (call_frame_t *frame, xlator_t *this) } } - afr_sh_delta_to_xattr (sh->delta_matrix, erase_xattr, - priv->child_count, AFR_DATA_PENDING); + afr_sh_delta_to_xattr (priv, sh->delta_matrix, erase_xattr, + priv->child_count, AFR_DATA_TRANSACTION); local->call_count = call_count; for (i = 0; i < priv->child_count; i++) { @@ -773,18 +771,14 @@ afr_sh_data_fix (call_frame_t *frame, xlator_t *this) sh = &local->self_heal; priv = this->private; - afr_sh_build_pending_matrix (sh->pending_matrix, sh->xattr, - priv->child_count, AFR_DATA_PENDING); + afr_sh_build_pending_matrix (priv, sh->pending_matrix, sh->xattr, + priv->child_count, AFR_DATA_TRANSACTION); afr_sh_print_pending_matrix (sh->pending_matrix, this); - nsources = afr_sh_mark_sources (sh, priv->child_count, AFR_SELF_HEAL_DATA); - afr_sh_supress_empty_children (sh->sources, sh->xattr, sh->buf, - priv->child_count, AFR_DATA_PENDING); - afr_sh_supress_errenous_children (sh->sources, sh->child_errno, priv->child_count); @@ -900,9 +894,12 @@ afr_sh_data_lookup (call_frame_t *frame, xlator_t *this) local->call_count = call_count; xattr_req = dict_new(); - if (xattr_req) - ret = dict_set_uint64 (xattr_req, AFR_DATA_PENDING, - priv->child_count * sizeof(int32_t)); + if (xattr_req) { + for (i = 0; i < priv->child_count; i++) { + ret = dict_set_uint64 (xattr_req, priv->pending_key[i], + 3 * sizeof(int32_t)); + } + } for (i = 0; i < priv->child_count; i++) { if (local->child_up[i]) { diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index 3906b707e..70edd5bab 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -217,10 +217,8 @@ afr_sh_entry_erase_pending (call_frame_t *frame, xlator_t *this) sh = &local->self_heal; priv = this->private; - - afr_sh_pending_to_delta (sh->xattr, AFR_ENTRY_PENDING, - sh->delta_matrix, sh->success, - priv->child_count); + afr_sh_pending_to_delta (priv, sh->xattr, sh->delta_matrix, sh->success, + priv->child_count, AFR_ENTRY_TRANSACTION); erase_xattr = CALLOC (sizeof (*erase_xattr), priv->child_count); @@ -233,8 +231,8 @@ afr_sh_entry_erase_pending (call_frame_t *frame, xlator_t *this) } } - afr_sh_delta_to_xattr (sh->delta_matrix, erase_xattr, - priv->child_count, AFR_ENTRY_PENDING); + afr_sh_delta_to_xattr (priv, sh->delta_matrix, erase_xattr, + priv->child_count, AFR_ENTRY_TRANSACTION); local->call_count = call_count; for (i = 0; i < priv->child_count; i++) { @@ -1824,12 +1822,11 @@ afr_sh_entry_fix (call_frame_t *frame, xlator_t *this) sh = &local->self_heal; priv = this->private; - afr_sh_build_pending_matrix (sh->pending_matrix, sh->xattr, - priv->child_count, AFR_ENTRY_PENDING); + afr_sh_build_pending_matrix (priv, sh->pending_matrix, sh->xattr, + priv->child_count, AFR_ENTRY_TRANSACTION); afr_sh_print_pending_matrix (sh->pending_matrix, this); - afr_sh_mark_sources (sh, priv->child_count, AFR_SELF_HEAL_ENTRY); @@ -1902,9 +1899,13 @@ afr_sh_entry_lookup (call_frame_t *frame, xlator_t *this) local->call_count = call_count; xattr_req = dict_new(); - if (xattr_req) - ret = dict_set_uint64 (xattr_req, AFR_ENTRY_PENDING, - priv->child_count * sizeof(int32_t)); + if (xattr_req) { + for (i = 0; i < priv->child_count; i++) { + ret = dict_set_uint64 (xattr_req, + priv->pending_key[i], + 3 * sizeof(int32_t)); + } + } for (i = 0; i < priv->child_count; i++) { if (local->child_up[i]) { diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c index 6dce5d3af..8e832698f 100644 --- a/xlators/cluster/afr/src/afr-self-heal-metadata.c +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -213,10 +213,9 @@ afr_sh_metadata_erase_pending (call_frame_t *frame, xlator_t *this) sh = &local->self_heal; priv = this->private; - - afr_sh_pending_to_delta (sh->xattr, AFR_METADATA_PENDING, - sh->delta_matrix, sh->success, - priv->child_count); + afr_sh_pending_to_delta (priv, sh->xattr, sh->delta_matrix, + sh->success, priv->child_count, + AFR_METADATA_TRANSACTION); erase_xattr = CALLOC (sizeof (*erase_xattr), priv->child_count); @@ -229,8 +228,8 @@ afr_sh_metadata_erase_pending (call_frame_t *frame, xlator_t *this) } } - afr_sh_delta_to_xattr (sh->delta_matrix, erase_xattr, - priv->child_count, AFR_METADATA_PENDING); + afr_sh_delta_to_xattr (priv, sh->delta_matrix, erase_xattr, + priv->child_count, AFR_METADATA_TRANSACTION); local->call_count = call_count; @@ -431,6 +430,8 @@ afr_sh_metadata_getxattr_cbk (call_frame_t *frame, void *cookie, afr_private_t *priv = NULL; int source = 0; + int i; + local = frame->local; sh = &local->self_heal; priv = this->private; @@ -445,9 +446,10 @@ afr_sh_metadata_getxattr_cbk (call_frame_t *frame, void *cookie, afr_sh_metadata_sync (frame, this, NULL); } else { - dict_del (xattr, AFR_DATA_PENDING); - dict_del (xattr, AFR_METADATA_PENDING); - dict_del (xattr, AFR_ENTRY_PENDING); + for (i = 0; i < priv->child_count; i++) { + dict_del (xattr, priv->pending_key[i]); + } + afr_sh_metadata_sync (frame, this, xattr); } @@ -515,8 +517,9 @@ afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this) sh = &local->self_heal; priv = this->private; - afr_sh_build_pending_matrix (sh->pending_matrix, sh->xattr, - priv->child_count, AFR_METADATA_PENDING); + afr_sh_build_pending_matrix (priv, sh->pending_matrix, sh->xattr, + priv->child_count, + AFR_METADATA_TRANSACTION); afr_sh_print_pending_matrix (sh->pending_matrix, this); @@ -656,9 +659,13 @@ afr_sh_metadata_lookup (call_frame_t *frame, xlator_t *this) xattr_req = dict_new(); - if (xattr_req) - ret = dict_set_uint64 (xattr_req, AFR_METADATA_PENDING, - priv->child_count * sizeof(int32_t)); + if (xattr_req) { + for (i = 0; i < priv->child_count; i++) { + ret = dict_set_uint64 (xattr_req, + priv->pending_key[i], + 3 * sizeof(int32_t)); + } + } for (i = 0; i < priv->child_count; i++) { if (local->child_up[i]) { diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index ba6633282..7245ee1ac 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -27,19 +27,28 @@ static void -__mark_all_pending (int32_t *pending, int child_count) +__mark_all_pending (int32_t *pending[], int child_count, + afr_transaction_type type) { int i; - - for (i = 0; i < child_count; i++) - pending[i] = hton32 (1); + int j; + + for (i = 0; i < child_count; i++) { + j = afr_index_for_transaction_type (type); + pending[i][j] = hton32 (1); + } } static void -__mark_child_dead (int32_t *pending, int child_count, int child) +__mark_child_dead (int32_t *pending[], int child_count, int child, + afr_transaction_type type) { - pending[child] = 0; + int j; + + j = afr_index_for_transaction_type (type); + + pending[child][j] = 0; } @@ -66,14 +75,15 @@ out: static void -__mark_failed_children (int32_t *pending, int child_count, - xlator_t *this, fd_t *fd) +__mark_failed_children (int32_t *pending[], int child_count, + xlator_t *this, fd_t *fd, afr_transaction_type type) { uint64_t ctx; afr_fd_ctx_t * fd_ctx = NULL; int ret = 0; int i = 0; + int j = 0; ret = fd_ctx_get (fd, this, &ctx); @@ -83,8 +93,10 @@ __mark_failed_children (int32_t *pending, int child_count, fd_ctx = (afr_fd_ctx_t *)(long) ctx; for (i = 0; i < child_count; i++) { + j = afr_index_for_transaction_type (type); + if (fd_ctx->child_failed[i]) - pending[i] = 0; + pending[i][j] = 0; } out: @@ -93,23 +105,32 @@ out: static void -__mark_down_children (int32_t *pending, int child_count, unsigned char *child_up) +__mark_down_children (int32_t *pending[], int child_count, + unsigned char *child_up, afr_transaction_type type) { int i; - - for (i = 0; i < child_count; i++) + int j; + + for (i = 0; i < child_count; i++) { + j = afr_index_for_transaction_type (type); + if (!child_up[i]) - pending[i] = 0; + pending[i][j] = 0; + } } static void -__mark_all_success (int32_t *pending, int child_count) +__mark_all_success (int32_t *pending[], int child_count, + afr_transaction_type type) { int i; - - for (i = 0; i < child_count; i++) - pending[i] = hton32 (-1); + int j; + + for (i = 0; i < child_count; i++) { + j = afr_index_for_transaction_type (type); + pending[i][j] = hton32 (-1); + } } @@ -286,6 +307,26 @@ __changelog_needed_post_op (call_frame_t *frame, xlator_t *this) } +static int +afr_set_pending_dict (afr_private_t *priv, dict_t *xattr, int32_t **pending) +{ + int i; + int ret = 0; + + for (i = 0; i < priv->child_count; i++) { + ret = dict_set_static_bin (xattr, priv->pending_key[i], + pending[i], 3 * sizeof (int32_t)); + /* 3 = data+metadata+entry */ + + if (ret < 0) + goto out; + } + +out: + return ret; +} + + static int afr_lock_server_count (afr_private_t *priv, afr_transaction_type type) { @@ -481,12 +522,13 @@ afr_changelog_post_op (call_frame_t *frame, xlator_t *this) local = frame->local; - __mark_down_children (local->pending_array, priv->child_count, - local->child_up); + __mark_down_children (local->pending, priv->child_count, + local->child_up, local->transaction.type); if (local->op == GF_FOP_FLUSH) { - __mark_failed_children (local->pending_array, priv->child_count, - this, local->fd); + __mark_failed_children (local->pending, priv->child_count, + this, local->fd, + local->transaction.type); } call_count = afr_up_children_count (priv->child_count, local->child_up); @@ -504,11 +546,11 @@ afr_changelog_post_op (call_frame_t *frame, xlator_t *this) return 0; } - for (i = 0; i < priv->child_count; i++) { + for (i = 0; i < priv->child_count; i++) { if (local->child_up[i]) { - ret = dict_set_static_bin (xattr, local->transaction.pending, - local->pending_array, - priv->child_count * sizeof (int32_t)); + ret = afr_set_pending_dict (priv, xattr, + local->pending); + if (ret < 0) gf_log (this->name, GF_LOG_ERROR, "failed to set pending entry"); @@ -553,9 +595,10 @@ afr_changelog_post_op (call_frame_t *frame, xlator_t *this) used the dict as placeholder for return value */ - ret = dict_set_static_bin (xattr, local->transaction.pending, - local->pending_array, - priv->child_count * sizeof (int32_t)); + + ret = afr_set_pending_dict (priv, xattr, + local->pending); + if (ret < 0) gf_log (this->name, GF_LOG_ERROR, "failed to set pending entry"); @@ -632,8 +675,8 @@ afr_changelog_pre_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this, (local->op_errno == ENOTSUP)) { local->transaction.resume (frame, this); } else { - __mark_all_success (local->pending_array, - priv->child_count); + __mark_all_success (local->pending, priv->child_count, + local->transaction.type); local->transaction.fop (frame, this); } @@ -675,15 +718,14 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this) local->call_count = call_count; - __mark_all_pending (local->pending_array, priv->child_count); + __mark_all_pending (local->pending, priv->child_count, + local->transaction.type); for (i = 0; i < priv->child_count; i++) { if (local->child_up[i]) { - ret = dict_set_static_bin (xattr, - local->transaction.pending, - local->pending_array, - (priv->child_count * - sizeof (int32_t))); + ret = afr_set_pending_dict (priv, xattr, + local->pending); + if (ret < 0) gf_log (this->name, GF_LOG_ERROR, "failed to set pending entry"); @@ -735,9 +777,9 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this) value */ - ret = dict_set_static_bin (xattr, local->transaction.pending, - local->pending_array, - priv->child_count * sizeof (int32_t)); + ret = afr_set_pending_dict (priv, xattr, + local->pending); + if (ret < 0) gf_log (this->name, GF_LOG_ERROR, "failed to set pending entry"); @@ -902,8 +944,8 @@ int afr_lock_rec (call_frame_t *frame, xlator_t *this, int child_index) if (__changelog_needed_pre_op (frame, this)) { afr_changelog_pre_op (frame, this); } else { - __mark_all_success (local->pending_array, - priv->child_count); + __mark_all_success (local->pending, priv->child_count, + local->transaction.type); local->transaction.fop (frame, this); } @@ -1054,8 +1096,8 @@ afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this, int child_index __mark_fop_failed_on_fd (local->fd, this, child_index); break; default: - __mark_child_dead (local->pending_array, priv->child_count, - child_index); + __mark_child_dead (local->pending, priv->child_count, + child_index, local->transaction.type); break; } } @@ -1079,8 +1121,8 @@ afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type) if (__changelog_needed_pre_op (frame, this)) { afr_changelog_pre_op (frame, this); } else { - __mark_all_success (local->pending_array, - priv->child_count); + __mark_all_success (local->pending, priv->child_count, + local->transaction.type); local->transaction.fop (frame, this); } diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h index 95e297739..c7a6490e7 100644 --- a/xlators/cluster/afr/src/afr-transaction.h +++ b/xlators/cluster/afr/src/afr-transaction.h @@ -20,12 +20,6 @@ #ifndef __TRANSACTION_H__ #define __TRANSACTION_H__ -#define AFR_METADATA_PENDING "trusted.glusterfs.afr.metadata-pending" - -#define AFR_DATA_PENDING "trusted.glusterfs.afr.data-pending" - -#define AFR_ENTRY_PENDING "trusted.glusterfs.afr.entry-pending" - void afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this, int child_index); diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index 215a80b21..270364ff9 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -212,13 +212,24 @@ afr_local_sh_cleanup (afr_local_t *local, xlator_t *this) void afr_local_cleanup (afr_local_t *local, xlator_t *this) { + int i; + afr_private_t * priv = NULL; + if (!local) return; afr_local_sh_cleanup (local, this); FREE (local->child_errno); - FREE (local->pending_array); + + priv = this->private; + + for (i = 0; i < priv->child_count; i++) { + if (local->pending && local->pending[i]) + FREE (local->pending[i]); + } + + FREE (local->pending); loc_wipe (&local->loc); loc_wipe (&local->newloc); @@ -665,28 +676,17 @@ afr_lookup (call_frame_t *frame, xlator_t *this, /* By default assume ENOTCONN. On success it will be set to 0. */ local->op_errno = ENOTCONN; - if ((xattr_req == NULL) - && (priv->metadata_self_heal - || priv->data_self_heal - || priv->entry_self_heal)) + if (xattr_req == NULL) local->xattr_req = dict_new (); else local->xattr_req = dict_ref (xattr_req); - if (priv->metadata_self_heal) { - ret = dict_set_uint64 (local->xattr_req, AFR_METADATA_PENDING, - priv->child_count * sizeof(int32_t)); - } - - if (priv->data_self_heal) { - ret = dict_set_uint64 (local->xattr_req, AFR_DATA_PENDING, - priv->child_count * sizeof(int32_t)); - } - - if (priv->entry_self_heal) { - ret = dict_set_uint64 (local->xattr_req, AFR_ENTRY_PENDING, - priv->child_count * sizeof(int32_t)); - } + for (i = 0; i < priv->child_count; i++) { + ret = dict_set_uint64 (local->xattr_req, priv->pending_key[i], + 3 * sizeof(int32_t)); + + /* 3 = data+metadata+entry */ + } ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_OPEN_FD_COUNT, 0); @@ -1024,8 +1024,6 @@ afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd) local->transaction.start = 0; local->transaction.len = 0; - local->transaction.pending = AFR_DATA_PENDING; - afr_transaction (frame, this, AFR_FLUSH_TRANSACTION); op_ret = 0; @@ -2362,7 +2360,6 @@ init (xlator_t *this) priv->entry_lock_server_count = lock_server_count; } - trav = this->children; while (trav) { if (!read_ret && !strcmp (read_subvol, trav->xlator->name)) { @@ -2407,11 +2404,22 @@ init (xlator_t *this) goto out; } + priv->pending_key = CALLOC (sizeof (*priv->pending_key), child_count); + if (!priv->pending_key) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + op_errno = ENOMEM; + goto out; + } + trav = this->children; i = 0; while (i < child_count) { priv->children[i] = trav->xlator; + asprintf (&priv->pending_key[i], "%s.%s", AFR_XATTR_PREFIX, + trav->xlator->name); + trav = trav->next; i++; } diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index c15ed85f9..32484fd0a 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -30,6 +30,7 @@ #include "call-stub.h" #include "compat-errno.h" +#define AFR_XATTR_PREFIX "trusted.afr" typedef struct _afr_private { gf_lock_t lock; /* to guard access to child_count, etc */ @@ -42,11 +43,12 @@ typedef struct _afr_private { unsigned char *child_up; + char **pending_key; + gf_boolean_t data_self_heal; /* on/off */ gf_boolean_t metadata_self_heal; /* on/off */ gf_boolean_t entry_self_heal; /* on/off */ - gf_boolean_t data_change_log; /* on/off */ gf_boolean_t metadata_change_log; /* on/off */ gf_boolean_t entry_change_log; /* on/off */ @@ -103,6 +105,35 @@ typedef enum { AFR_FLUSH_TRANSACTION, /* flush */ } afr_transaction_type; + +/* + xattr format: trusted.afr.volume = [x y z] + x - data pending + y - metadata pending + z - entry pending +*/ + +static inline int +afr_index_for_transaction_type (afr_transaction_type type) +{ + switch (type) { + + case AFR_DATA_TRANSACTION: + case AFR_FLUSH_TRANSACTION: + return 0; + + case AFR_METADATA_TRANSACTION: + return 1; + + case AFR_ENTRY_TRANSACTION: + case AFR_ENTRY_RENAME_TRANSACTION: + return 2; + } + + return -1; /* make gcc happy */ +} + + typedef struct _afr_local { unsigned int call_count; unsigned int success_count; @@ -118,7 +149,7 @@ typedef struct _afr_local { int32_t op_ret; int32_t op_errno; - int32_t *pending_array; + int32_t **pending; loc_t loc; loc_t newloc; @@ -365,8 +396,6 @@ typedef struct _afr_local { const char *basename; const char *new_basename; - char *pending; - loc_t parent_loc; loc_t new_parent_loc; @@ -521,18 +550,28 @@ AFR_LOCAL_INIT (afr_local_t *local, afr_private_t *priv) static inline int afr_transaction_local_init (afr_local_t *local, afr_private_t *priv) { + int i; + local->child_errno = CALLOC (sizeof (*local->child_errno), priv->child_count); if (!local->child_errno) { return -ENOMEM; } - local->pending_array = CALLOC (sizeof (*local->pending_array), - priv->child_count); - if (!local->pending_array) { + local->pending = CALLOC (sizeof (*local->pending), + priv->child_count); + + if (!local->pending) { return -ENOMEM; } + for (i = 0; i < priv->child_count; i++) { + local->pending[i] = CALLOC (sizeof (*local->pending[i]), + 3); /* data + metadata + entry */ + if (!local->pending[i]) + return -ENOMEM; + } + local->transaction.locked_nodes = CALLOC (sizeof (*local->transaction.locked_nodes), priv->child_count); -- cgit