From 676b8793b9a77babf0275ee78eeacfa7291a8c35 Mon Sep 17 00:00:00 2001 From: Pranith Kumar K Date: Tue, 27 Nov 2012 12:34:18 +0530 Subject: cluster/afr: mark new entry changelog for create/mknod failures Problem: When create/mknod fails on some of the nodes, appropriate pending data/metadata changelogs are not assigned. This was not considered to be an issue because entry self-heal would do the assigning of appropriate changelog after creating new entries. But using the combination of rebalance and remove brick we can construct a case where a file with same name and gfid can be created in a dir with different data and link-to xattr without any changelog. Fix: When a create/mknod failure is observed mark the appropriate changelog on the new file created. Change-Id: I4c32cbf5594a13fb14deaf97ff30b2fff11cbfd6 BUG: 858212 Signed-off-by: Pranith Kumar K Reviewed-on: http://review.gluster.org/4207 Tested-by: Gluster Build System Reviewed-by: Anand Avati --- xlators/cluster/afr/src/afr-common.c | 51 ++++++++++- xlators/cluster/afr/src/afr-dir-write.c | 127 +++++++++++++++++++++++++- xlators/cluster/afr/src/afr-self-heal-entry.c | 36 +------- xlators/cluster/afr/src/afr-transaction.c | 67 +++++++------- xlators/cluster/afr/src/afr.h | 14 +++ 5 files changed, 228 insertions(+), 67 deletions(-) diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 46f492bc5..c38e52725 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -766,6 +766,13 @@ out: return; } +void +afr_xattr_array_destroy (dict_t **xattr, unsigned int child_count) +{ + afr_reset_xattr (xattr, child_count); + GF_FREE (xattr); +} + void afr_local_sh_cleanup (afr_local_t *local, xlator_t *this) { @@ -782,10 +789,7 @@ afr_local_sh_cleanup (afr_local_t *local, xlator_t *this) if (sh->inode) inode_unref (sh->inode); - if (sh->xattr) { - afr_reset_xattr (sh->xattr, priv->child_count); - GF_FREE (sh->xattr); - } + afr_xattr_array_destroy (sh->xattr, priv->child_count); GF_FREE (sh->child_errno); @@ -4233,3 +4237,42 @@ xlator_subvolume_count (xlator_t *this) i++; return i; } + +inline gf_boolean_t +afr_is_errno_set (int *child_errno, int child) +{ + return child_errno[child]; +} + +inline gf_boolean_t +afr_is_errno_unset (int *child_errno, int child) +{ + return !afr_is_errno_set (child_errno, child); +} + +void +afr_prepare_new_entry_pending_matrix (int32_t **pending, + gf_boolean_t (*is_pending) (int *, int), + int *ctx, struct iatt *buf, + unsigned int child_count) +{ + int midx = 0; + int idx = 0; + int i = 0; + + midx = afr_index_for_transaction_type (AFR_METADATA_TRANSACTION); + if (IA_ISDIR (buf->ia_type)) + idx = afr_index_for_transaction_type (AFR_ENTRY_TRANSACTION); + else if (IA_ISREG (buf->ia_type)) + idx = afr_index_for_transaction_type (AFR_DATA_TRANSACTION); + else + idx = -1; + for (i = 0; i < child_count; i++) { + if (is_pending (ctx, i)) { + pending[i][midx] = hton32 (1); + if (idx == -1) + continue; + pending[i][idx] = hton32 (1); + } + } +} diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c index 49cf8d453..d86b08b87 100644 --- a/xlators/cluster/afr/src/afr-dir-write.c +++ b/xlators/cluster/afr/src/afr-dir-write.c @@ -106,11 +106,134 @@ __dir_entry_fop_common_cbk (call_frame_t *frame, int child_index, local->fresh_children[local->success_count] = child_index; local->success_count++; + local->child_errno[child_index] = 0; + } else { + local->child_errno[child_index] = op_errno; } local->op_errno = op_errno; } +int +afr_mark_new_entry_changelog_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, + int32_t op_ret, int32_t op_errno, + dict_t *xattr, dict_t *xdata) +{ + int call_count = 0; + + call_count = afr_frame_return (frame); + if (call_count == 0) { + AFR_STACK_DESTROY (frame); + } + return 0; +} + +void +afr_mark_new_entry_changelog (call_frame_t *frame, xlator_t *this) +{ + call_frame_t *new_frame = NULL; + afr_local_t *local = NULL; + afr_local_t *new_local = NULL; + afr_private_t *priv = NULL; + dict_t **xattr = NULL; + int32_t **changelog = NULL; + int i = 0; + GF_UNUSED int op_errno = 0; + + local = frame->local; + priv = this->private; + + new_frame = copy_frame (frame); + if (!new_frame) { + goto out; + } + + AFR_LOCAL_ALLOC_OR_GOTO (new_frame->local, out); + new_local = new_frame->local; + changelog = afr_matrix_create (priv->child_count, AFR_NUM_CHANGE_LOGS); + if (!changelog) + goto out; + + xattr = GF_CALLOC (priv->child_count, sizeof (*xattr), + gf_afr_mt_dict_t); + if (!xattr) + goto out; + for (i = 0; i < priv->child_count; i++) { + if (local->child_errno[i]) + continue; + xattr[i] = dict_new (); + if (!xattr[i]) + goto out; + } + + afr_prepare_new_entry_pending_matrix (changelog, + afr_is_errno_set, + local->child_errno, + &local->cont.dir_fop.buf, + priv->child_count); + + new_local->pending = changelog; + uuid_copy (new_local->loc.gfid, local->cont.dir_fop.buf.ia_gfid); + new_local->loc.inode = inode_ref (local->cont.dir_fop.inode); + new_local->call_count = local->success_count; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_errno[i]) + continue; + + afr_set_pending_dict (priv, xattr[i], changelog, i, LOCAL_LAST); + STACK_WIND_COOKIE (new_frame, afr_mark_new_entry_changelog_cbk, + (void *) (long) i, priv->children[i], + priv->children[i]->fops->xattrop, + &new_local->loc, GF_XATTROP_ADD_ARRAY, + xattr[i], NULL); + } + new_frame = NULL; +out: + if (new_frame) + AFR_STACK_DESTROY (new_frame); + afr_xattr_array_destroy (xattr, priv->child_count); + return; +} + +gf_boolean_t +afr_is_new_entry_changelog_needed (glusterfs_fop_t fop) +{ + glusterfs_fop_t fops[] = {GF_FOP_CREATE, GF_FOP_MKNOD, GF_FOP_NULL}; + int i = 0; + + for (i = 0; fops[i] != GF_FOP_NULL; i++) { + if (fop == fops[i]) + return _gf_true; + } + return _gf_false; +} + +void +afr_dir_fop_mark_entry_pending_changelog (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + local = frame->local; + priv = this->private; + + if (local->op_ret < 0) + goto out; + + if (local->success_count == priv->child_count) + goto out; + + if (!afr_is_new_entry_changelog_needed (local->op)) + goto out; + + afr_mark_new_entry_changelog (frame, this); + +out: + local->transaction.resume (frame, this); +} + void afr_dir_fop_done (call_frame_t *frame, xlator_t *this) { @@ -129,7 +252,7 @@ afr_dir_fop_done (call_frame_t *frame, xlator_t *this) local->cont.dir_fop.buf.ia_gfid); done: local->transaction.unwind (frame, this); - local->transaction.resume (frame, this); + afr_dir_fop_mark_entry_pending_changelog (frame, this); } /* {{{ create */ @@ -331,6 +454,7 @@ afr_create (call_frame_t *frame, xlator_t *this, } UNLOCK (&priv->read_child_lock); + local->op = GF_FOP_CREATE; local->cont.create.flags = flags; local->cont.create.mode = mode; local->cont.create.fd = fd_ref (fd); @@ -524,6 +648,7 @@ afr_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, } UNLOCK (&priv->read_child_lock); + local->op = GF_FOP_MKNOD; local->cont.mknod.mode = mode; local->cont.mknod.dev = dev; local->umask = umask; diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index 0c70d4b6e..c3c9f9fca 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -1018,33 +1018,6 @@ out: return 0; } -void -afr_sh_prepare_new_entry_pending_matrix (int32_t **pending, - int *child_errno, - struct iatt *buf, - unsigned int child_count) -{ - int midx = 0; - int idx = 0; - int i = 0; - - midx = afr_index_for_transaction_type (AFR_METADATA_TRANSACTION); - if (IA_ISDIR (buf->ia_type)) - idx = afr_index_for_transaction_type (AFR_ENTRY_TRANSACTION); - else if (IA_ISREG (buf->ia_type)) - idx = afr_index_for_transaction_type (AFR_DATA_TRANSACTION); - else - idx = -1; - for (i = 0; i < child_count; i++) { - if (child_errno[i]) - continue; - pending[i][midx] = hton32 (1); - if (idx == -1) - continue; - pending[i][idx] = hton32 (1); - } -} - int afr_sh_entry_impunge_perform_xattrop (call_frame_t *impunge_frame, xlator_t *this) @@ -1061,10 +1034,11 @@ afr_sh_entry_impunge_perform_xattrop (call_frame_t *impunge_frame, impunge_sh = &impunge_local->self_heal; active_src = impunge_sh->active_source; - afr_sh_prepare_new_entry_pending_matrix (impunge_local->pending, - impunge_sh->child_errno, - &impunge_sh->entrybuf, - priv->child_count); + afr_prepare_new_entry_pending_matrix (impunge_local->pending, + afr_is_errno_unset, + impunge_sh->child_errno, + &impunge_sh->entrybuf, + priv->child_count); xattr = dict_new (); if (!xattr) { op_errno = ENOMEM; diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index 3ba8d9b56..5e636d8dc 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -203,6 +203,40 @@ __mark_all_success (int32_t *pending[], int child_count, } } +void +_set_all_child_errno (int *child_errno, unsigned int child_count) +{ + int i = 0; + + for (i = 0; i < child_count; i++) + if (child_errno[i] == 0) + child_errno[i] = ENOTCONN; +} + +void +afr_transaction_perform_fop (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + local = frame->local; + priv = this->private; + __mark_all_success (local->pending, priv->child_count, + local->transaction.type); + + _set_all_child_errno (local->child_errno, priv->child_count); + + /* Perform fops with the lk-owner from top xlator. + * Eg: lk-owner of posix-lk and flush should be same, + * flush cant clear the posix-lks without that lk-owner. + */ + afr_save_lk_owner (frame); + frame->root->lk_owner = + local->transaction.main_frame->root->lk_owner; + + local->transaction.fop (frame, this); +} + static int __changelog_enabled (afr_private_t *priv, afr_transaction_type type) @@ -778,18 +812,7 @@ afr_changelog_pre_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this, (local->op_errno == ENOTSUP)) { local->transaction.resume (frame, this); } else { - __mark_all_success (local->pending, priv->child_count, - local->transaction.type); - - /* Perform fops with the lk-owner from top xlator. - * Eg: lk-owner of posix-lk and flush should be same, - * flush cant clear the posix-lks without that lk-owner. - */ - afr_save_lk_owner (frame); - frame->root->lk_owner = - local->transaction.main_frame->root->lk_owner; - - local->transaction.fop (frame, this); + afr_transaction_perform_fop (frame, this); } } @@ -1218,28 +1241,10 @@ afr_lock (call_frame_t *frame, xlator_t *this) int afr_internal_lock_finish (call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - - priv = this->private; - local = frame->local; - if (__fop_changelog_needed (frame, this)) { afr_changelog_pre_op (frame, this); } else { - __mark_all_success (local->pending, priv->child_count, - local->transaction.type); - - - /* Perform fops with the lk-owner from top xlator. - * Eg: lk-owner of posix-lk and flush should be same, - * flush cant clear the posix-lks without that lk-owner. - */ - afr_save_lk_owner (frame); - frame->root->lk_owner = - local->transaction.main_frame->root->lk_owner; - - local->transaction.fop (frame, this); + afr_transaction_perform_fop (frame, this); } return 0; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index d22daf51b..da9cc67c6 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -1023,6 +1023,20 @@ afr_matrix_cleanup (int32_t **pending, unsigned int m); int32_t** afr_matrix_create (unsigned int m, unsigned int n); + +gf_boolean_t +afr_is_errno_set (int *child_errno, int child); + +gf_boolean_t +afr_is_errno_unset (int *child_errno, int child); + +void +afr_prepare_new_entry_pending_matrix (int32_t **pending, + gf_boolean_t (*is_pending) (int *, int), + int *ctx, struct iatt *buf, + unsigned int child_count); +void +afr_xattr_array_destroy (dict_t **xattr, unsigned int child_count); /* * Special value indicating we should use the "auto" quorum method instead of * a fixed value (including zero to turn off quorum enforcement). -- cgit