diff options
Diffstat (limited to 'xlators/cluster/afr/src/afr-self-heal-entry.c')
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-entry.c | 229 |
1 files changed, 94 insertions, 135 deletions
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index b57521b9f..53491a1d7 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -1,20 +1,11 @@ /* - Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com> + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ #include <libgen.h> @@ -166,73 +157,20 @@ afr_sh_entry_erase_pending (call_frame_t *frame, xlator_t *this) { afr_local_t *local = NULL; afr_self_heal_t *sh = NULL; - afr_private_t *priv = NULL; - int call_count = 0; - int i = 0; - dict_t **erase_xattr = NULL; - int need_unwind = 0; local = frame->local; sh = &local->self_heal; - priv = this->private; if (sh->entries_skipped) { - need_unwind = 1; - sh->op_failed = _gf_true; + afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); goto out; } - afr_sh_pending_to_delta (priv, sh->xattr, sh->delta_matrix, sh->success, - priv->child_count, AFR_ENTRY_TRANSACTION); - - erase_xattr = GF_CALLOC (sizeof (*erase_xattr), priv->child_count, - gf_afr_mt_dict_t); - - for (i = 0; i < priv->child_count; i++) { - if (sh->xattr[i]) { - call_count++; - - erase_xattr[i] = get_new_dict(); - dict_ref (erase_xattr[i]); - } - } - - if (call_count == 0) - need_unwind = 1; - - afr_sh_delta_to_xattr (priv, sh->delta_matrix, erase_xattr, - priv->child_count, AFR_ENTRY_TRANSACTION); - - local->call_count = call_count; - for (i = 0; i < priv->child_count; i++) { - if (!erase_xattr[i]) - continue; - - gf_log (this->name, GF_LOG_TRACE, - "erasing pending flags from %s on %s", - local->loc.path, priv->children[i]->name); - - STACK_WIND_COOKIE (frame, afr_sh_entry_erase_pending_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->xattrop, - &local->loc, - GF_XATTROP_ADD_ARRAY, erase_xattr[i], - NULL); - if (!--call_count) - break; - } - - for (i = 0; i < priv->child_count; i++) { - if (erase_xattr[i]) { - dict_unref (erase_xattr[i]); - } - } - GF_FREE (erase_xattr); - + afr_sh_erase_pending (frame, this, AFR_ENTRY_TRANSACTION, + afr_sh_entry_erase_pending_cbk, + afr_sh_entry_finish); + return 0; out: - if (need_unwind) - afr_sh_entry_finish (frame, this); - + afr_sh_entry_finish (frame, this); return 0; } @@ -642,7 +580,8 @@ afr_sh_entry_expunge_entry_cbk (call_frame_t *expunge_frame, void *cookie, if (need_expunge) { gf_log (this->name, GF_LOG_INFO, - "missing entry %s on %s", + "Entry %s is missing on %s and deleting from " + "replica's other bricks", expunge_local->loc.path, priv->children[source]->name); @@ -674,6 +613,19 @@ out: return 0; } +static gf_boolean_t +can_skip_entry_self_heal (char *name, loc_t *parent_loc) +{ + if (strcmp (name, ".") == 0) { + return _gf_true; + } else if (strcmp (name, "..") == 0) { + return _gf_true; + } else if (loc_is_root (parent_loc) && + (strcmp (name, GF_REPLICATE_TRASH_DIR) == 0)) { + return _gf_true; + } + return _gf_false; +} int afr_sh_entry_expunge_entry (call_frame_t *frame, xlator_t *this, @@ -701,15 +653,7 @@ afr_sh_entry_expunge_entry (call_frame_t *frame, xlator_t *this, sh->expunge_done = afr_sh_entry_expunge_entry_done; name = entry->d_name; - - if ((strcmp (name, ".") == 0) - || (strcmp (name, "..") == 0) - || ((strcmp (local->loc.path, "/") == 0) - && (strcmp (name, GF_REPLICATE_TRASH_DIR) == 0))) { - - gf_log (this->name, GF_LOG_TRACE, - "skipping inspection of %s under %s", - name, local->loc.path); + if (can_skip_entry_self_heal (name, &local->loc)) { op_ret = 0; goto out; } @@ -862,7 +806,7 @@ afr_sh_entry_expunge_all (call_frame_t *frame, xlator_t *this) active_src = next_active_sink (frame, this, sh->active_source); sh->active_source = active_src; - if (sh->op_failed) { + if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) { goto out; } @@ -1081,33 +1025,6 @@ out: return 0; } -void -afr_sh_prepare_new_entry_pending_matrix (int32_t **pending, - int *child_errno, - struct iatt *buf, - unsigned int child_count) -{ - int midx = 0; - int idx = 0; - int i = 0; - - midx = afr_index_for_transaction_type (AFR_METADATA_TRANSACTION); - if (IA_ISDIR (buf->ia_type)) - idx = afr_index_for_transaction_type (AFR_ENTRY_TRANSACTION); - else if (IA_ISREG (buf->ia_type)) - idx = afr_index_for_transaction_type (AFR_DATA_TRANSACTION); - else - idx = -1; - for (i = 0; i < child_count; i++) { - if (child_errno[i]) - continue; - pending[i][midx] = hton32 (1); - if (idx == -1) - continue; - pending[i][idx] = hton32 (1); - } -} - int afr_sh_entry_impunge_perform_xattrop (call_frame_t *impunge_frame, xlator_t *this) @@ -1124,17 +1041,19 @@ afr_sh_entry_impunge_perform_xattrop (call_frame_t *impunge_frame, impunge_sh = &impunge_local->self_heal; active_src = impunge_sh->active_source; - afr_sh_prepare_new_entry_pending_matrix (impunge_local->pending, - impunge_sh->child_errno, - &impunge_sh->entrybuf, - priv->child_count); + afr_prepare_new_entry_pending_matrix (impunge_local->pending, + afr_is_errno_unset, + impunge_sh->child_errno, + &impunge_sh->entrybuf, + priv->child_count); xattr = dict_new (); if (!xattr) { op_errno = ENOMEM; goto out; } - afr_set_pending_dict (priv, xattr, impunge_local->pending); + afr_set_pending_dict (priv, xattr, impunge_local->pending, active_src, + LOCAL_LAST); STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_xattrop_cbk, (void *) (long) active_src, @@ -1205,6 +1124,18 @@ afr_sh_entry_impunge_hardlink_cbk (call_frame_t *impunge_frame, void *cookie, struct iatt *postparent, dict_t *xdata) { int call_count = 0; + afr_local_t *impunge_local = NULL; + afr_self_heal_t *impunge_sh = NULL; + + impunge_local = impunge_frame->local; + impunge_sh = &impunge_local->self_heal; + + if (IA_IFLNK == impunge_sh->entrybuf.ia_type) { + //For symlinks impunge is attempted un-conditionally + //So the file can already exist. + if ((op_ret < 0) && (op_errno == EEXIST)) + op_ret = 0; + } call_count = afr_frame_return (impunge_frame); if (call_count == 0) @@ -1332,6 +1263,35 @@ afr_sh_entry_impunge_mknod (call_frame_t *impunge_frame, xlator_t *this, gf_log (this->name, GF_LOG_INFO, "%s: gfid set failed", impunge_local->loc.path); + /* + * Reason for adding GLUSTERFS_INTERNAL_FOP_KEY : + * + * Problem: + * While a brick is down in a replica pair, lets say the user creates + * one file(file-A) and a hard link to that file(h-file-A). After the + * brick comes back up, entry self-heal is attempted on parent dir of + * these two files. As part of readdir in self-heal it reads both the + * entries file-A and h-file-A for both of them it does name less lookup + * to check if there are any hardlinks already present in the + * destination brick. It finds that there are no hard links already + * present for files file-A, h-file-A. Self-heal does mknods for both + * file-A and h-file-A. This leads to file-A and h-file-A not being + * hardlinks anymore. + * + * Fix: (More like shrinking of race-window, the race itself is still + * present in posix-mknod). + * If mknod comes with the presence of GLUSTERFS_INTERNAL_FOP_KEY then + * posix_mknod checks if there are already any gfid-links and does + * link() instead of mknod. There still can be a race where two + * posix_mknods same gfid see that + * gfid-link file is not present and proceeds with mknods and result in + * two different files with same gfid. + */ + ret = dict_set_str (dict, GLUSTERFS_INTERNAL_FOP_KEY, "yes"); + if (ret) + gf_log (this->name, GF_LOG_INFO, "%s: %s set failed", + impunge_local->loc.path, GLUSTERFS_INTERNAL_FOP_KEY); + STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_newfile_cbk, (void *) (long) child_index, priv->children[child_index], @@ -1408,7 +1368,7 @@ afr_sh_entry_impunge_symlink (call_frame_t *impunge_frame, xlator_t *this, priv = this->private; impunge_local = impunge_frame->local; - buf = &impunge_local->cont.symlink.buf; + buf = &impunge_local->cont.dir_fop.buf; dict = dict_new (); if (!dict) { @@ -1668,7 +1628,7 @@ afr_sh_entry_impunge_readlink (call_frame_t *impunge_frame, xlator_t *this, impunge_local = impunge_frame->local; impunge_sh = &impunge_local->self_heal; active_src = impunge_sh->active_source; - impunge_local->cont.symlink.buf = *stbuf; + impunge_local->cont.dir_fop.buf = *stbuf; STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_readlink_cbk, (void *) (long) child_index, @@ -1948,14 +1908,7 @@ afr_sh_entry_impunge_entry (call_frame_t *frame, xlator_t *this, active_src = sh->active_source; sh->impunge_done = afr_sh_entry_impunge_entry_done; - if ((strcmp (entry->d_name, ".") == 0) - || (strcmp (entry->d_name, "..") == 0) - || ((strcmp (local->loc.path, "/") == 0) - && (strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR) == 0))) { - - gf_log (this->name, GF_LOG_TRACE, - "skipping inspection of %s under %s", - entry->d_name, local->loc.path); + if (can_skip_entry_self_heal (entry->d_name, &local->loc)) { op_ret = 0; goto out; } @@ -2024,7 +1977,7 @@ afr_sh_entry_impunge_readdir_cbk (call_frame_t *frame, void *cookie, local->loc.path, priv->children[active_src]->name, strerror (op_errno)); - sh->op_failed = 1; + afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); } else { gf_log (this->name, GF_LOG_TRACE, "readdir of %s on subvolume %s complete", @@ -2097,7 +2050,7 @@ afr_sh_entry_impunge_all (call_frame_t *frame, xlator_t *this) active_src = next_active_source (frame, this, sh->active_source); sh->active_source = active_src; - if (sh->op_failed) { + if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) { afr_sh_entry_finish (frame, this); return 0; } @@ -2146,7 +2099,7 @@ afr_sh_entry_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->loc.path, priv->children[child_index]->name, strerror (op_errno)); - sh->op_failed = 1; + afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); } } UNLOCK (&frame->lock); @@ -2154,7 +2107,7 @@ afr_sh_entry_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, call_count = afr_frame_return (frame); if (call_count == 0) { - if (sh->op_failed) { + if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) { afr_sh_entry_finish (frame, this); return 0; } @@ -2192,7 +2145,7 @@ afr_sh_entry_open (call_frame_t *frame, xlator_t *this) source = local->self_heal.source; sources = local->self_heal.sources; - sh->block_size = 65536; //131072 + sh->block_size = priv->sh_readdir_size; sh->offset = 0; call_count = sh->active_sinks; @@ -2286,6 +2239,8 @@ afr_sh_entry_sync_prepare (call_frame_t *frame, xlator_t *this) "merging all entries as a conservative decision", local->loc.path); + sh->actual_sh_started = _gf_true; + afr_set_self_heal_status (sh, AFR_SELF_HEAL_SYNC_BEGIN); afr_sh_entry_open (frame, this); return 0; @@ -2308,7 +2263,7 @@ afr_sh_entry_fix (call_frame_t *frame, xlator_t *this, priv = this->private; if (op_ret < 0) { - sh->op_failed = 1; + afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); afr_sh_set_error (sh, op_errno); afr_sh_entry_finish (frame, this); goto out; @@ -2371,7 +2326,7 @@ afr_sh_post_nonblocking_entry_cbk (call_frame_t *frame, xlator_t *this) if (int_lock->lock_op_ret < 0) { gf_log (this->name, GF_LOG_ERROR, "Non Blocking entrylks " "failed for %s.", local->loc.path); - sh->op_failed = 1; + afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); afr_sh_entry_done (frame, this); } else { @@ -2390,14 +2345,18 @@ afr_sh_post_nonblocking_entry_cbk (call_frame_t *frame, xlator_t *this) int afr_self_heal_entry (call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; + afr_local_t *local = NULL; afr_private_t *priv = NULL; - + afr_self_heal_t *sh = NULL; priv = this->private; local = frame->local; + sh = &local->self_heal; + + sh->sh_type_in_action = AFR_SELF_HEAL_ENTRY; if (local->self_heal.do_entry_self_heal && priv->entry_self_heal) { + afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED); afr_sh_entrylk (frame, this, &local->loc, NULL, afr_sh_post_nonblocking_entry_cbk); } else { |
