From a1a0e929534cb56e3a938229c7f87bfd0f1c17ec Mon Sep 17 00:00:00 2001 From: Amar Tumballi Date: Thu, 24 Mar 2011 07:28:00 +0000 Subject: cluster/afr: log enhancements - part 1 Signed-off-by: Amar Tumballi Signed-off-by: Vijay Bellur BUG: 2346 () URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=2346 --- xlators/cluster/afr/src/afr-dir-read.c | 338 +++++++++++---------- xlators/cluster/afr/src/afr-inode-read.c | 222 +++++++------- xlators/cluster/afr/src/afr-open.c | 145 ++++----- xlators/cluster/afr/src/afr-self-heal-algorithm.c | 346 +++++++++++----------- xlators/cluster/afr/src/afr-self-heal-data.c | 77 ++--- xlators/cluster/afr/src/afr-self-heal-metadata.c | 21 +- xlators/cluster/afr/src/afr.h | 4 +- 7 files changed, 568 insertions(+), 585 deletions(-) (limited to 'xlators/cluster') diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c index 59bd7872e..b2a001a19 100644 --- a/xlators/cluster/afr/src/afr-dir-read.c +++ b/xlators/cluster/afr/src/afr-dir-read.c @@ -70,10 +70,9 @@ gf_boolean_t __checksums_differ (uint32_t *checksum, int child_count, unsigned char *child_up) { - int ret = _gf_false; - int i = 0; - - uint32_t cksum; + int ret = _gf_false; + int i = 0; + uint32_t cksum = 0; cksum = checksum[0]; @@ -98,20 +97,16 @@ afr_examine_dir_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, gf_dirent_t *entries) { - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - afr_self_heal_t * sh = NULL; - - gf_dirent_t * entry = NULL; - gf_dirent_t * tmp = NULL; - - int child_index = 0; - - uint32_t entry_cksum; - - int call_count = 0; - off_t last_offset = 0; - char sh_type_str[256] = {0,}; + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + afr_self_heal_t * sh = NULL; + gf_dirent_t * entry = NULL; + gf_dirent_t * tmp = NULL; + int child_index = 0; + uint32_t entry_cksum = 0; + int call_count = 0; + off_t last_offset = 0; + char sh_type_str[256] = {0,}; priv = this->private; local = frame->local; @@ -120,13 +115,20 @@ afr_examine_dir_readdir_cbk (call_frame_t *frame, void *cookie, child_index = (long) cookie; if (op_ret == -1) { + gf_log (this->name, GF_LOG_INFO, + "%s: failed to do opendir on %s", + local->loc.path, priv->children[child_index]->name); local->op_ret = -1; local->op_ret = op_errno; goto out; } - if (op_ret == 0) + if (op_ret == 0) { + gf_log (this->name, GF_LOG_DEBUG, + "%s: no entries found in %s", + local->loc.path, priv->children[child_index]->name); goto out; + } list_for_each_entry_safe (entry, tmp, &entries->list, list) { entry_cksum = gf_rsync_weak_checksum (entry->d_name, @@ -146,37 +148,37 @@ afr_examine_dir_readdir_cbk (call_frame_t *frame, void *cookie, priv->children[child_index]->fops->readdir, local->fd, 131072, last_offset); + return 0; + out: - if ((op_ret == 0) || (op_ret == -1)) { - call_count = afr_frame_return (frame); - - if (call_count == 0) { - if (__checksums_differ (local->cont.opendir.checksum, - priv->child_count, - local->child_up)) { - - sh->need_entry_self_heal = _gf_true; - sh->forced_merge = _gf_true; - sh->type = local->fd->inode->ia_type; - sh->background = _gf_false; - sh->unwind = afr_examine_dir_sh_unwind; - - afr_self_heal_type_str_get(&local->self_heal, - sh_type_str, - sizeof(sh_type_str)); - gf_log (this->name, GF_LOG_NORMAL, - "%s self-heal triggered. path: %s, " - "reason: checksums of directory differ," - " forced merge option set", - sh_type_str, local->loc.path); - - afr_self_heal (frame, this); - } else { - afr_set_opendir_done (this, local->fd->inode); - - AFR_STACK_UNWIND (opendir, frame, local->op_ret, - local->op_errno, local->fd); - } + call_count = afr_frame_return (frame); + + if (call_count == 0) { + if (__checksums_differ (local->cont.opendir.checksum, + priv->child_count, + local->child_up)) { + + sh->need_entry_self_heal = _gf_true; + sh->forced_merge = _gf_true; + sh->type = local->fd->inode->ia_type; + sh->background = _gf_false; + sh->unwind = afr_examine_dir_sh_unwind; + + afr_self_heal_type_str_get(&local->self_heal, + sh_type_str, + sizeof(sh_type_str)); + gf_log (this->name, GF_LOG_INFO, + "%s self-heal triggered. path: %s, " + "reason: checksums of directory differ," + " forced merge option set", + sh_type_str, local->loc.path); + + afr_self_heal (frame, this); + } else { + afr_set_opendir_done (this, local->fd->inode); + + AFR_STACK_UNWIND (opendir, frame, local->op_ret, + local->op_errno, local->fd); } } @@ -187,11 +189,10 @@ out: int afr_examine_dir (call_frame_t *frame, xlator_t *this) { - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - - int i; - int call_count = 0; + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + int i = 0; + int call_count = 0; local = frame->local; priv = this->private; @@ -229,9 +230,8 @@ afr_opendir_cbk (call_frame_t *frame, void *cookie, afr_private_t *priv = NULL; afr_local_t *local = NULL; int32_t up_children_count = 0; - int ret = -1; - - int call_count = -1; + int ret = -1; + int call_count = -1; priv = this->private; local = frame->local; @@ -251,48 +251,50 @@ afr_opendir_cbk (call_frame_t *frame, void *cookie, call_count = afr_frame_return (frame); if (call_count == 0) { - if (local->op_ret == 0) { - - ret = afr_fd_ctx_set (this, local->fd); + if (local->op_ret != 0) + goto out; - if (ret) { - local->op_ret = -1; - local->op_errno = -1; - gf_log (this->name, GF_LOG_ERROR, " failed to " - "set fd ctx for fd %p", local->fd); - goto out; - } - if (!afr_is_opendir_done (this, local->fd->inode) && - up_children_count > 1) { - - /* - * This is the first opendir on this inode. We need - * to check if the directory's entries are the same - * on all subvolumes. This is needed in addition - * to regular entry self-heal because the readdir - * call is sent only to the first subvolume, and - * thus files that exist only there will never be healed - * otherwise (assuming changelog shows no anamolies). - */ + ret = afr_fd_ctx_set (this, local->fd); + if (ret) { + local->op_ret = -1; + local->op_errno = -1; + gf_log (this->name, GF_LOG_ERROR, + "failed to set fd ctx for fd %p", + local->fd); + goto out; + } + if (!afr_is_opendir_done (this, local->fd->inode) && + up_children_count > 1) { + + /* + * This is the first opendir on this inode. We need + * to check if the directory's entries are the same + * on all subvolumes. This is needed in addition + * to regular entry self-heal because the readdir + * call is sent only to the first subvolume, and + * thus files that exist only there will never be healed + * otherwise (assuming changelog shows no anamolies). + */ - gf_log (this->name, GF_LOG_TRACE, - "reading contents of directory %s looking for mismatch", - local->loc.path); + gf_log (this->name, GF_LOG_TRACE, + "reading contents of directory %s looking for mismatch", + local->loc.path); - afr_examine_dir (frame, this); + afr_examine_dir (frame, this); - } else { - AFR_STACK_UNWIND (opendir, frame, local->op_ret, - local->op_errno, local->fd); - } } else { - out: - AFR_STACK_UNWIND (opendir, frame, local->op_ret, - local->op_errno, local->fd); + /* do the unwind */ + goto out; } } return 0; + +out: + AFR_STACK_UNWIND (opendir, frame, local->op_ret, + local->op_errno, local->fd); + + return 0; } @@ -302,15 +304,12 @@ afr_opendir (call_frame_t *frame, xlator_t *this, { afr_private_t * priv = NULL; afr_local_t * local = NULL; - int child_count = 0; int i = 0; - - int ret = -1; - int call_count = -1; - - int32_t op_ret = -1; - int32_t op_errno = 0; + int ret = -1; + int call_count = -1; + int32_t op_ret = -1; + int32_t op_errno = 0; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); @@ -376,8 +375,8 @@ struct entry_name { static gf_boolean_t remembered_name (const char *name, struct list_head *entries) { - struct entry_name *e; - gf_boolean_t ret = _gf_false; + struct entry_name *e = NULL; + gf_boolean_t ret = _gf_false; list_for_each_entry (e, entries, list) { if (!strcmp (name, e->name)) { @@ -394,17 +393,15 @@ out: static void afr_remember_entries (gf_dirent_t *entries, fd_t *fd) { - struct entry_name *n = NULL; - gf_dirent_t * entry = NULL; - - int ret = 0; - - uint64_t ctx; - afr_fd_ctx_t *fd_ctx; + struct entry_name *n = NULL; + gf_dirent_t *entry = NULL; + int ret = 0; + uint64_t ctx = 0; + afr_fd_ctx_t *fd_ctx = NULL; ret = fd_ctx_get (fd, THIS, &ctx); if (ret < 0) { - gf_log (THIS->name, GF_LOG_DEBUG, + gf_log (THIS->name, GF_LOG_INFO, "could not get fd ctx for fd=%p", fd); return; } @@ -424,17 +421,16 @@ afr_remember_entries (gf_dirent_t *entries, fd_t *fd) static off_t afr_filter_entries (gf_dirent_t *entries, fd_t *fd) { - gf_dirent_t *entry, *tmp; - int ret = 0; - - uint64_t ctx; - afr_fd_ctx_t *fd_ctx; - - off_t offset = 0; + gf_dirent_t *entry = NULL; + gf_dirent_t *tmp = NULL; + int ret = 0; + uint64_t ctx = 0; + afr_fd_ctx_t *fd_ctx = NULL; + off_t offset = 0; ret = fd_ctx_get (fd, THIS, &ctx); if (ret < 0) { - gf_log (THIS->name, GF_LOG_DEBUG, + gf_log (THIS->name, GF_LOG_INFO, "could not get fd ctx for fd=%p", fd); return -1; } @@ -457,15 +453,15 @@ afr_filter_entries (gf_dirent_t *entries, fd_t *fd) static void afr_forget_entries (fd_t *fd) { - struct entry_name *entry, *tmp; - int ret = 0; - - uint64_t ctx; - afr_fd_ctx_t *fd_ctx; + struct entry_name *entry = NULL; + struct entry_name *tmp = NULL; + int ret = 0; + uint64_t ctx = 0; + afr_fd_ctx_t *fd_ctx = NULL; ret = fd_ctx_get (fd, THIS, &ctx); if (ret < 0) { - gf_log (THIS->name, GF_LOG_DEBUG, + gf_log (THIS->name, GF_LOG_INFO, "could not get fd ctx for fd=%p", fd); return; } @@ -485,32 +481,32 @@ afr_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, gf_dirent_t *entries) { - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - - gf_dirent_t * entry = NULL; - gf_dirent_t * tmp = NULL; - - int child_index = -1; + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + gf_dirent_t * entry = NULL; + gf_dirent_t * tmp = NULL; + int child_index = -1; priv = this->private; local = frame->local; child_index = (long) cookie; - if (op_ret != -1) { - list_for_each_entry_safe (entry, tmp, &entries->list, list) { - entry->d_ino = afr_itransform (entry->d_ino, - priv->child_count, - child_index); + if (op_ret == -1) + goto out; - if ((local->fd->inode == local->fd->inode->table->root) - && !strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR)) { - list_del_init (&entry->list); - GF_FREE (entry); - } + list_for_each_entry_safe (entry, tmp, &entries->list, list) { + entry->d_ino = afr_itransform (entry->d_ino, + priv->child_count, + child_index); + + if ((local->fd->inode == local->fd->inode->table->root) + && !strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR)) { + list_del_init (&entry->list); + GF_FREE (entry); } } +out: AFR_STACK_UNWIND (readdir, frame, op_ret, op_errno, entries); return 0; @@ -521,23 +517,18 @@ int32_t afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, gf_dirent_t *entries) { - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - xlator_t ** children = NULL; - ino_t inum = 0; - - int call_child = 0; - int ret = 0; - - gf_dirent_t * entry = NULL; - gf_dirent_t * tmp = NULL; - - int child_index = -1; - - uint64_t ctx = 0; - afr_fd_ctx_t *fd_ctx = NULL; - - off_t offset = 0; + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + xlator_t ** children = NULL; + ino_t inum = 0; + int call_child = 0; + int ret = 0; + gf_dirent_t * entry = NULL; + gf_dirent_t * tmp = NULL; + int child_index = -1; + uint64_t ctx = 0; + afr_fd_ctx_t *fd_ctx = NULL; + off_t offset = 0; priv = this->private; children = priv->children; @@ -549,7 +540,7 @@ afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (priv->strict_readdir) { ret = fd_ctx_get (local->fd, this, &ctx); if (ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_INFO, "could not get fd ctx for fd=%p", local->fd); op_ret = -1; op_errno = -ret; @@ -560,6 +551,8 @@ afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (child_went_down (op_ret, op_errno)) { if (all_tried (child_index, priv->child_count)) { + gf_log (this->name, GF_LOG_INFO, + "all options tried going out"); goto out; } @@ -600,6 +593,8 @@ afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (priv->strict_readdir) { if (fd_ctx->failed_over) { if (list_empty (&entries->list)) { + gf_log (this->name, GF_LOG_DEBUG, + "no entries found"); goto out; } @@ -614,7 +609,8 @@ afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, try to get more entries */ gf_log (this->name, GF_LOG_TRACE, - "trying to fetch non-duplicate entries from offset %"PRId64", child %s", + "trying to fetch non-duplicate entries " + "from offset %"PRId64", child %s", offset, children[child_index]->name); STACK_WIND_COOKIE (frame, afr_readdirp_cbk, @@ -640,18 +636,15 @@ int32_t afr_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t offset, int whichop) { - afr_private_t * priv = NULL; - xlator_t ** children = NULL; - int call_child = 0; - afr_local_t *local = NULL; - - uint64_t ctx; - afr_fd_ctx_t *fd_ctx; - - int ret = -1; - - int32_t op_ret = -1; - int32_t op_errno = 0; + afr_private_t * priv = NULL; + xlator_t ** children = NULL; + int call_child = 0; + afr_local_t *local = NULL; + uint64_t ctx = 0; + afr_fd_ctx_t *fd_ctx = NULL; + int ret = -1; + int32_t op_ret = -1; + int32_t op_errno = 0; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); @@ -672,7 +665,7 @@ afr_do_readdir (call_frame_t *frame, xlator_t *this, call_child = afr_first_up_child (priv); if (call_child == -1) { op_errno = ENOTCONN; - gf_log (this->name, GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_INFO, "no child is up"); goto out; } @@ -683,7 +676,7 @@ afr_do_readdir (call_frame_t *frame, xlator_t *this, if (priv->strict_readdir) { ret = fd_ctx_get (fd, this, &ctx); if (ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_INFO, "could not get fd ctx for fd=%p", fd); op_errno = -ret; goto out; @@ -693,7 +686,8 @@ afr_do_readdir (call_frame_t *frame, xlator_t *this, if (fd_ctx->last_tried != call_child) { gf_log (this->name, GF_LOG_TRACE, - "first up child has changed from %d to %d, restarting readdir from offset 0", + "first up child has changed from %d to %d, " + "restarting readdir from offset 0", fd_ctx->last_tried, call_child); fd_ctx->failed_over = _gf_true; diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c index 11db1e1d7..dd832ffe7 100644 --- a/xlators/cluster/afr/src/afr-inode-read.c +++ b/xlators/cluster/afr/src/afr-inode-read.c @@ -63,14 +63,13 @@ int32_t afr_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno) { - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - xlator_t ** children = NULL; - - int unwind = 1; - int last_tried = -1; - int this_try = -1; - int read_child = -1; + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + xlator_t ** children = NULL; + int unwind = 1; + int last_tried = -1; + int this_try = -1; + int read_child = -1; priv = this->private; children = priv->children; @@ -84,6 +83,9 @@ afr_access_cbk (call_frame_t *frame, void *cookie, last_tried = local->cont.access.last_tried; if (all_tried (last_tried, priv->child_count)) { + gf_log (this->name, GF_LOG_DEBUG, + "%s: all subvolumes tried, going out", + local->loc.path); goto out; } this_try = ++local->cont.access.last_tried; @@ -111,19 +113,15 @@ out: int32_t -afr_access (call_frame_t *frame, xlator_t *this, - loc_t *loc, int32_t mask) +afr_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask) { - afr_private_t * priv = NULL; - xlator_t ** children = NULL; - int call_child = 0; - afr_local_t *local = NULL; - - int32_t read_child = -1; - - - int32_t op_ret = -1; - int32_t op_errno = 0; + afr_private_t * priv = NULL; + xlator_t ** children = NULL; + int call_child = 0; + afr_local_t *local = NULL; + int32_t read_child = -1; + int32_t op_ret = -1; + int32_t op_errno = 0; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); @@ -147,8 +145,8 @@ afr_access (call_frame_t *frame, xlator_t *this, call_child = afr_first_up_child (priv); if (call_child == -1) { op_errno = ENOTCONN; - gf_log (this->name, GF_LOG_DEBUG, - "no child is up"); + gf_log (this->name, GF_LOG_INFO, + "%s: no child is up", loc->path); goto out; } @@ -156,7 +154,7 @@ afr_access (call_frame_t *frame, xlator_t *this, } loc_copy (&local->loc, loc); - local->cont.access.mask = mask; + local->cont.access.mask = mask; STACK_WIND_COOKIE (frame, afr_access_cbk, (void *) (long) call_child, @@ -181,14 +179,13 @@ afr_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *buf) { - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - xlator_t ** children = NULL; - - int unwind = 1; - int last_tried = -1; - int this_try = -1; - int read_child = -1; + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + xlator_t ** children = NULL; + int unwind = 1; + int last_tried = -1; + int this_try = -1; + int read_child = -1; priv = this->private; children = priv->children; @@ -202,6 +199,9 @@ afr_stat_cbk (call_frame_t *frame, void *cookie, last_tried = local->cont.stat.last_tried; if (all_tried (last_tried, priv->child_count)) { + gf_log (this->name, GF_LOG_DEBUG, + "%s: all subvolumes tried, going out", + local->loc.path); goto out; } this_try = ++local->cont.stat.last_tried; @@ -232,16 +232,13 @@ out: int32_t -afr_stat (call_frame_t *frame, xlator_t *this, - loc_t *loc) +afr_stat (call_frame_t *frame, xlator_t *this, loc_t *loc) { afr_private_t * priv = NULL; afr_local_t * local = NULL; xlator_t ** children = NULL; - int32_t read_child = -1; int call_child = 0; - int32_t op_ret = -1; int32_t op_errno = 0; @@ -269,8 +266,8 @@ afr_stat (call_frame_t *frame, xlator_t *this, call_child = afr_first_up_child (priv); if (call_child == -1) { op_errno = ENOTCONN; - gf_log (this->name, GF_LOG_DEBUG, - "no child is up"); + gf_log (this->name, GF_LOG_INFO, + "%s: no child is up", loc->path); goto out; } @@ -301,18 +298,16 @@ out: /* {{{ fstat */ int32_t -afr_fstat_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - struct iatt *buf) +afr_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf) { - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - xlator_t ** children = NULL; - - int unwind = 1; - int last_tried = -1; - int this_try = -1; - int read_child = -1; + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + xlator_t ** children = NULL; + int unwind = 1; + int last_tried = -1; + int this_try = -1; + int read_child = -1; priv = this->private; children = priv->children; @@ -326,6 +321,9 @@ afr_fstat_cbk (call_frame_t *frame, void *cookie, last_tried = local->cont.fstat.last_tried; if (all_tried (last_tried, priv->child_count)) { + gf_log (this->name, GF_LOG_DEBUG, + "%p: all subvolumes tried, going out", + local->fd); goto out; } this_try = ++local->cont.fstat.last_tried; @@ -362,10 +360,8 @@ afr_fstat (call_frame_t *frame, xlator_t *this, afr_private_t * priv = NULL; afr_local_t * local = NULL; xlator_t ** children = NULL; - int call_child = 0; int32_t read_child = -1; - int32_t op_ret = -1; int32_t op_errno = 0; @@ -396,8 +392,8 @@ afr_fstat (call_frame_t *frame, xlator_t *this, if (call_child == -1) { op_errno = ENOTCONN; - gf_log (this->name, GF_LOG_DEBUG, - "no child is up"); + gf_log (this->name, GF_LOG_INFO, + "%p: no child is up", fd); goto out; } @@ -430,14 +426,13 @@ afr_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, const char *buf, struct iatt *sbuf) { - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - xlator_t ** children = NULL; - - int unwind = 1; - int last_tried = -1; - int this_try = -1; - int read_child = -1; + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + xlator_t ** children = NULL; + int unwind = 1; + int last_tried = -1; + int this_try = -1; + int read_child = -1; priv = this->private; children = priv->children; @@ -451,6 +446,9 @@ afr_readlink_cbk (call_frame_t *frame, void *cookie, last_tried = local->cont.readlink.last_tried; if (all_tried (last_tried, priv->child_count)) { + gf_log (this->name, GF_LOG_DEBUG, + "%s: all subvolumes tried, going out", + local->loc.path); goto out; } this_try = ++local->cont.readlink.last_tried; @@ -484,15 +482,13 @@ int32_t afr_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size) { - afr_private_t * priv = NULL; - xlator_t ** children = NULL; - int call_child = 0; - afr_local_t *local = NULL; - - int32_t read_child = -1; - - int32_t op_ret = -1; - int32_t op_errno = 0; + afr_private_t * priv = NULL; + xlator_t ** children = NULL; + int call_child = 0; + afr_local_t *local = NULL; + int32_t read_child = -1; + int32_t op_ret = -1; + int32_t op_errno = 0; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); @@ -519,8 +515,8 @@ afr_readlink (call_frame_t *frame, xlator_t *this, if (call_child == -1) { op_errno = ENOTCONN; - gf_log (this->name, GF_LOG_DEBUG, - "no child is up"); + gf_log (this->name, GF_LOG_INFO, + "%s: no child is up", loc->path); goto out; } @@ -581,10 +577,9 @@ __gather_xattr_keys (dict_t *dict, char *key, data_t *value, void __filter_xattrs (dict_t *dict) { - struct list_head keys; - - struct _xattr_key *key; - struct _xattr_key *tmp; + struct list_head keys = {0,}; + struct _xattr_key *key = NULL; + struct _xattr_key *tmp = NULL; INIT_LIST_HEAD (&keys); @@ -607,14 +602,13 @@ afr_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *dict) { - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - xlator_t ** children = NULL; - - int unwind = 1; - int last_tried = -1; - int this_try = -1; - int read_child = -1; + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + xlator_t ** children = NULL; + int unwind = 1; + int last_tried = -1; + int this_try = -1; + int read_child = -1; priv = this->private; children = priv->children; @@ -628,6 +622,9 @@ afr_getxattr_cbk (call_frame_t *frame, void *cookie, last_tried = local->cont.getxattr.last_tried; if (all_tried (last_tried, priv->child_count)) { + gf_log (this->name, GF_LOG_DEBUG, + "%s: all subvolumes tried, going out", + local->loc.path); goto out; } this_try = ++local->cont.getxattr.last_tried; @@ -669,18 +666,16 @@ int32_t afr_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name) { - afr_private_t * priv = NULL; - xlator_t ** children = NULL; - int call_child = 0; - afr_local_t * local = NULL; - xlator_list_t * trav = NULL; - xlator_t ** sub_volumes= NULL; - - int read_child = -1; - int i = 0; - - int32_t op_ret = -1; - int32_t op_errno = 0; + afr_private_t * priv = NULL; + xlator_t ** children = NULL; + int call_child = 0; + afr_local_t * local = NULL; + xlator_list_t * trav = NULL; + xlator_t ** sub_volumes = NULL; + int read_child = -1; + int i = 0; + int32_t op_ret = -1; + int32_t op_errno = 0; VALIDATE_OR_GOTO (frame, out); @@ -703,7 +698,9 @@ afr_getxattr (call_frame_t *frame, xlator_t *this, if (name) { if (!strncmp (name, AFR_XATTR_PREFIX, strlen (AFR_XATTR_PREFIX))) { - + gf_log (this->name, GF_LOG_INFO, + "%s: no data present for key %s", + loc->path, name); op_errno = ENODATA; goto out; } @@ -726,6 +723,9 @@ afr_getxattr (call_frame_t *frame, xlator_t *this, MARKER_UUID_TYPE, priv->vol_uuid)) { + gf_log (this->name, GF_LOG_INFO, + "%s: failed to get marker attr (%s)", + loc->path, name); op_errno = EINVAL; goto out; } @@ -754,6 +754,9 @@ afr_getxattr (call_frame_t *frame, xlator_t *this, priv->child_count, MARKER_XTIME_TYPE, priv->vol_uuid)) { + gf_log (this->name, GF_LOG_INFO, + "%s: failed to get marker attr (%s)", + loc->path, name); op_errno = EINVAL; goto out; } @@ -761,7 +764,6 @@ afr_getxattr (call_frame_t *frame, xlator_t *this, return 0; } } - } read_child = afr_read_child (this, loc->inode); @@ -775,8 +777,8 @@ afr_getxattr (call_frame_t *frame, xlator_t *this, if (call_child == -1) { op_errno = ENOTCONN; - gf_log (this->name, GF_LOG_DEBUG, - "no child is up"); + gf_log (this->name, GF_LOG_INFO, + "%s: no child is up", loc->path); goto out; } @@ -820,14 +822,13 @@ afr_readv_cbk (call_frame_t *frame, void *cookie, struct iovec *vector, int32_t count, struct iatt *buf, struct iobref *iobref) { - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - xlator_t ** children = NULL; - - int unwind = 1; - int last_tried = -1; - int this_try = -1; - int read_child = -1; + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + xlator_t ** children = NULL; + int unwind = 1; + int last_tried = -1; + int this_try = -1; + int read_child = -1; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); @@ -847,6 +848,9 @@ afr_readv_cbk (call_frame_t *frame, void *cookie, last_tried = local->cont.readv.last_tried; if (all_tried (last_tried, priv->child_count)) { + gf_log (this->name, GF_LOG_DEBUG, + "%p: all subvolumes tried, going out", + local->fd); goto out; } this_try = ++local->cont.readv.last_tried; @@ -889,10 +893,8 @@ afr_readv (call_frame_t *frame, xlator_t *this, afr_private_t * priv = NULL; afr_local_t * local = NULL; xlator_t ** children = NULL; - int32_t read_child = -1; int call_child = 0; - int32_t op_ret = -1; int32_t op_errno = 0; @@ -924,7 +926,7 @@ afr_readv (call_frame_t *frame, xlator_t *this, if (call_child == -1) { op_errno = ENOTCONN; gf_log (this->name, GF_LOG_DEBUG, - "no child is up"); + "%p: no child is up", fd); goto out; } diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c index 5b96c4e34..81c591651 100644 --- a/xlators/cluster/afr/src/afr-open.c +++ b/xlators/cluster/afr/src/afr-open.c @@ -74,16 +74,12 @@ afr_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, fd_t *fd) { - afr_local_t * local = NULL; - - int child_index = (long) cookie; - - uint64_t ctx; - afr_fd_ctx_t *fd_ctx; - - int ret = 0; - - int call_count = -1; + afr_local_t * local = NULL; + uint64_t ctx = 0; + afr_fd_ctx_t *fd_ctx = NULL; + int ret = 0; + int call_count = -1; + int child_index = (long) cookie; local = frame->local; @@ -101,8 +97,7 @@ afr_open_cbk (call_frame_t *frame, void *cookie, if (ret < 0) { gf_log (this->name, GF_LOG_ERROR, - "could not set fd ctx for fd=%p", - fd); + "could not set fd ctx for fd=%p", fd); local->op_ret = -1; local->op_errno = -ret; @@ -151,16 +146,14 @@ int afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, fd_t *fd, int32_t wbflags) { - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - - int i = 0; - int ret = -1; - - int32_t call_count = 0; - int32_t op_ret = -1; - int32_t op_errno = 0; - int32_t wind_flags = flags & (~O_TRUNC); + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + int i = 0; + int ret = -1; + int32_t call_count = 0; + int32_t op_ret = -1; + int32_t op_errno = 0; + int32_t wind_flags = flags & (~O_TRUNC); VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); @@ -171,6 +164,8 @@ afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, if (afr_is_split_brain (this, loc->inode)) { /* self-heal failed */ + gf_log (this->name, GF_LOG_WARNING, + "failed to open as split brain seen, returning EIO"); op_errno = EIO; goto out; } @@ -219,17 +214,14 @@ int afr_openfd_sh_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, fd_t *fd) { - afr_internal_lock_t *int_lock = NULL; - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - - int ret = 0; - - uint64_t ctx; - afr_fd_ctx_t *fd_ctx; - - int call_count = 0; - int child_index = (long) cookie; + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + afr_fd_ctx_t *fd_ctx = NULL; + uint64_t ctx = 0; + int ret = 0; + int call_count = 0; + int child_index = (long) cookie; priv = this->private; local = frame->local; @@ -241,6 +233,8 @@ afr_openfd_sh_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, ret = fd_ctx_get (fd, this, &ctx); if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "failed to get fd context, %p", fd); goto out; } @@ -270,7 +264,7 @@ out: static int __unopened_count (int child_count, unsigned int *opened_on, unsigned char *child_up) { - int i; + int i = 0; int count = 0; for (i = 0; i < child_count; i++) { @@ -285,16 +279,14 @@ __unopened_count (int child_count, unsigned int *opened_on, unsigned char *child int afr_openfd_sh_unwind (call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - - uint64_t ctx; - afr_fd_ctx_t *fd_ctx; - - int abandon = 0; - int ret = 0; - int i; - int call_count = 0; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + uint64_t ctx = 0; + afr_fd_ctx_t *fd_ctx = NULL; + int abandon = 0; + int ret = 0; + int i = 0; + int call_count = 0; priv = this->private; local = frame->local; @@ -306,8 +298,10 @@ afr_openfd_sh_unwind (call_frame_t *frame, xlator_t *this) */ ret = fd_ctx_get (local->fd, this, &ctx); - if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "failed to get fd context %p (%s)", + local->fd, local->loc.path); abandon = 1; goto out; } @@ -327,6 +321,9 @@ afr_openfd_sh_unwind (call_frame_t *frame, xlator_t *this) UNLOCK (&local->fd->lock); if (call_count == 0) { + gf_log (this->name, GF_LOG_WARNING, + "fd not open on any subvolume %p (%s)", + local->fd, local->loc.path); abandon = 1; goto out; } @@ -460,8 +457,10 @@ afr_openfd_flush_done (call_frame_t *frame, xlator_t *this) LOCK (&local->fd->lock); { _ret = __fd_ctx_get (local->fd, this, &ctx); - if (_ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "failed to get fd context %p (%s)", + local->fd, local->loc.path); goto out; } @@ -499,15 +498,14 @@ afr_openfd_xaction (call_frame_t *frame, xlator_t *this, fd_t *fd) local->op = GF_FOP_FLUSH; - local->transaction.fop = afr_openfd_sh; - local->transaction.done = afr_openfd_flush_done; + local->transaction.fop = afr_openfd_sh; + local->transaction.done = afr_openfd_flush_done; local->transaction.start = 0; local->transaction.len = 0; gf_log (this->name, GF_LOG_TRACE, - "doing up/down flush on fd=%p", - fd); + "doing up/down flush on fd=%p", fd); afr_transaction (frame, this, AFR_DATA_TRANSACTION); @@ -521,17 +519,14 @@ int afr_openfd_xaction_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, fd_t *fd) { - afr_internal_lock_t *int_lock = NULL; - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - - int ret = 0; - - uint64_t ctx = 0; - afr_fd_ctx_t *fd_ctx = NULL; - - int call_count = 0; - int child_index = (long) cookie; + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int ret = 0; + uint64_t ctx = 0; + afr_fd_ctx_t *fd_ctx = NULL; + int call_count = 0; + int child_index = (long) cookie; priv = this->private; local = frame->local; @@ -543,6 +538,9 @@ afr_openfd_xaction_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, ret = fd_ctx_get (fd, this, &ctx); if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "failed to get fd context %p (%s)", + fd, local->loc.path); goto out; } @@ -571,16 +569,14 @@ out: int afr_openfd_flush (call_frame_t *frame, xlator_t *this, fd_t *fd) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - - uint64_t ctx; - afr_fd_ctx_t *fd_ctx; - - int no_open = 0; - int ret = 0; - int i; - int call_count = 0; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + uint64_t ctx = 0; + afr_fd_ctx_t *fd_ctx = NULL; + int no_open = 0; + int ret = 0; + int i = 0; + int call_count = 0; priv = this->private; local = frame->local; @@ -604,8 +600,10 @@ afr_openfd_flush (call_frame_t *frame, xlator_t *this, fd_t *fd) local->fd = fd_ref (fd); ret = fd_ctx_get (fd, this, &ctx); - if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "failed to get fd context %p (%s)", + fd, local->loc.path); no_open = 1; goto out; } @@ -621,6 +619,9 @@ afr_openfd_flush (call_frame_t *frame, xlator_t *this, fd_t *fd) UNLOCK (&local->fd->lock); if (call_count == 0) { + gf_log (this->name, GF_LOG_WARNING, + "fd not open on any subvolume %p (%s)", + fd, local->loc.path); no_open = 1; goto out; } diff --git a/xlators/cluster/afr/src/afr-self-heal-algorithm.c b/xlators/cluster/afr/src/afr-self-heal-algorithm.c index 4bb70915e..b28889fbd 100644 --- a/xlators/cluster/afr/src/afr-self-heal-algorithm.c +++ b/xlators/cluster/afr/src/afr-self-heal-algorithm.c @@ -86,7 +86,7 @@ sh_full_loop_driver_done (call_frame_t *frame, xlator_t *this) sh_full_private_cleanup (frame, this); if (sh->op_failed) { - gf_log (this->name, GF_LOG_TRACE, + gf_log (this->name, GF_LOG_INFO, "full self-heal aborting on %s", local->loc.path); @@ -104,13 +104,12 @@ sh_full_loop_driver_done (call_frame_t *frame, xlator_t *this) static int sh_full_loop_return (call_frame_t *rw_frame, xlator_t *this, off_t offset) { - afr_local_t * rw_local = NULL; - afr_self_heal_t * rw_sh = NULL; - - call_frame_t *sh_frame = NULL; - afr_local_t * sh_local = NULL; - afr_self_heal_t *sh = NULL; - afr_sh_algo_full_private_t *sh_priv = NULL; + afr_local_t * rw_local = NULL; + afr_self_heal_t * rw_sh = NULL; + call_frame_t *sh_frame = NULL; + afr_local_t * sh_local = NULL; + afr_self_heal_t *sh = NULL; + afr_sh_algo_full_private_t *sh_priv = NULL; rw_local = rw_frame->local; rw_sh = &rw_local->self_heal; @@ -133,16 +132,14 @@ sh_full_write_cbk (call_frame_t *rw_frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf) { - afr_private_t * priv = NULL; - afr_local_t * rw_local = NULL; - afr_self_heal_t *rw_sh = NULL; - - call_frame_t *sh_frame = NULL; - afr_local_t * sh_local = NULL; - afr_self_heal_t *sh = NULL; - - int child_index = (long) cookie; - int call_count = 0; + afr_private_t * priv = NULL; + afr_local_t * rw_local = NULL; + afr_self_heal_t *rw_sh = NULL; + call_frame_t *sh_frame = NULL; + afr_local_t * sh_local = NULL; + afr_self_heal_t *sh = NULL; + int child_index = (long) cookie; + int call_count = 0; priv = this->private; @@ -161,7 +158,7 @@ sh_full_write_cbk (call_frame_t *rw_frame, void *cookie, xlator_t *this, LOCK (&sh_frame->lock); { if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_INFO, "write to %s failed on subvolume %s (%s)", sh_local->loc.path, priv->children[child_index]->name, @@ -188,18 +185,15 @@ sh_full_read_cbk (call_frame_t *rw_frame, void *cookie, struct iovec *vector, int32_t count, struct iatt *buf, struct iobref *iobref) { - afr_private_t * priv = NULL; - afr_local_t * rw_local = NULL; - afr_self_heal_t *rw_sh = NULL; - - call_frame_t *sh_frame = NULL; - afr_local_t * sh_local = NULL; - afr_self_heal_t *sh = NULL; - - int i = 0; - int call_count = 0; - - off_t offset = (long) cookie; + afr_private_t * priv = NULL; + afr_local_t * rw_local = NULL; + afr_self_heal_t *rw_sh = NULL; + call_frame_t *sh_frame = NULL; + afr_local_t * sh_local = NULL; + afr_self_heal_t *sh = NULL; + int i = 0; + int call_count = 0; + off_t offset = (long) cookie; priv = this->private; rw_local = rw_frame->local; @@ -218,6 +212,11 @@ sh_full_read_cbk (call_frame_t *rw_frame, void *cookie, op_ret, sh_local->loc.path, offset); if (op_ret <= 0) { + gf_log (this->name, GF_LOG_INFO, + "read from %s failed on subvolume %s (%s)", + sh_local->loc.path, + priv->children[sh->source]->name, + strerror (op_errno)); sh->op_failed = 1; sh_full_loop_return (rw_frame, this, offset); return 0; @@ -231,7 +230,8 @@ sh_full_read_cbk (call_frame_t *rw_frame, void *cookie, sh->offset already being updated above */ - + gf_log (this->name, GF_LOG_DEBUG, + "block has all 0 filled"); sh_full_loop_return (rw_frame, this, offset); goto out; } @@ -262,15 +262,13 @@ out: static int sh_full_read_write (call_frame_t *frame, xlator_t *this, off_t offset) { - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - afr_local_t * rw_local = NULL; - afr_self_heal_t *rw_sh = NULL; - afr_self_heal_t *sh = NULL; - - call_frame_t *rw_frame = NULL; - - int32_t op_errno = 0; + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + afr_local_t * rw_local = NULL; + afr_self_heal_t *rw_sh = NULL; + afr_self_heal_t *sh = NULL; + call_frame_t *rw_frame = NULL; + int32_t op_errno = 0; priv = this->private; local = frame->local; @@ -308,15 +306,14 @@ out: static int sh_full_loop_driver (call_frame_t *frame, xlator_t *this, gf_boolean_t is_first_call) { - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - afr_self_heal_t *sh = NULL; - afr_sh_algo_full_private_t *sh_priv = NULL; - gf_boolean_t is_driver_done = _gf_false; - blksize_t block_size = 0; - off_t offset = 0; - - int loop = 0; + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + afr_self_heal_t *sh = NULL; + afr_sh_algo_full_private_t *sh_priv = NULL; + gf_boolean_t is_driver_done = _gf_false; + blksize_t block_size = 0; + off_t offset = 0; + int loop = 0; priv = this->private; local = frame->local; @@ -381,6 +378,8 @@ afr_sh_algo_full (call_frame_t *frame, xlator_t *this) sh_priv = GF_CALLOC (1, sizeof (*sh_priv), gf_afr_mt_afr_private_t); + if (!sh_priv) + goto out; LOCK_INIT (&sh_priv->lock); @@ -389,6 +388,7 @@ afr_sh_algo_full (call_frame_t *frame, xlator_t *this) local->call_count = 0; sh_full_loop_driver (frame, this, _gf_true); +out: return 0; } @@ -406,8 +406,7 @@ sh_diff_private_cleanup (call_frame_t *frame, xlator_t *this) afr_local_t * local = NULL; afr_self_heal_t * sh = NULL; afr_sh_algo_diff_private_t *sh_priv = NULL; - - int i; + int i = 0; priv = this->private; local = frame->local; @@ -441,7 +440,7 @@ sh_diff_private_cleanup (call_frame_t *frame, xlator_t *this) static uint32_t __make_cookie (int loop_index, int child_index) { - uint32_t ret = (loop_index << 16) | child_index; + uint32_t ret = ((loop_index << 16) | child_index); return ret; } @@ -449,7 +448,7 @@ __make_cookie (int loop_index, int child_index) static int __loop_index (uint32_t cookie) { - return (cookie & 0xFFFF0000) >> 16; + return ((cookie & 0xFFFF0000) >> 16); } @@ -478,7 +477,7 @@ static int sh_diff_number_of_writes_needed (unsigned char *write_needed, int child_count) { int writes = 0; - int i; + int i = 0; for (i = 0; i < child_count; i++) { if (write_needed[i]) @@ -492,13 +491,12 @@ sh_diff_number_of_writes_needed (unsigned char *write_needed, int child_count) static int sh_diff_loop_driver_done (call_frame_t *frame, xlator_t *this) { - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - afr_self_heal_t * sh = NULL; - afr_sh_algo_diff_private_t *sh_priv = NULL; - int32_t total_blocks = 0; - int32_t diff_blocks = 0; - + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + afr_self_heal_t * sh = NULL; + afr_sh_algo_diff_private_t *sh_priv = NULL; + int32_t total_blocks = 0; + int32_t diff_blocks = 0; priv = this->private; local = frame->local; @@ -509,19 +507,15 @@ sh_diff_loop_driver_done (call_frame_t *frame, xlator_t *this) sh_diff_private_cleanup (frame, this); if (sh->op_failed) { - gf_log (this->name, GF_LOG_TRACE, + gf_log (this->name, GF_LOG_INFO, "diff self-heal aborting on %s", local->loc.path); local->self_heal.algo_abort_cbk (frame, this); } else { - gf_log (this->name, GF_LOG_TRACE, - "diff self-heal completed on %s", - local->loc.path); - - - gf_log (this->name, GF_LOG_NORMAL, - "diff self-heal on %s: %d blocks of %d were different (%.2f%%)", + gf_log (this->name, GF_LOG_INFO, + "diff self-heal on %s: completed. " + "(%d blocks of %d were different (%.2f%%))", local->loc.path, diff_blocks, total_blocks, ((diff_blocks * 1.0)/total_blocks) * 100); @@ -540,14 +534,13 @@ static int sh_diff_loop_return (call_frame_t *rw_frame, xlator_t *this, struct sh_diff_loop_state *loop_state) { - afr_private_t * priv = NULL; - afr_local_t * rw_local = NULL; - afr_self_heal_t * rw_sh = NULL; - - call_frame_t *sh_frame = NULL; - afr_local_t * sh_local = NULL; - afr_self_heal_t *sh = NULL; - afr_sh_algo_diff_private_t *sh_priv = NULL; + afr_private_t * priv = NULL; + afr_local_t * rw_local = NULL; + afr_self_heal_t * rw_sh = NULL; + call_frame_t *sh_frame = NULL; + afr_local_t * sh_local = NULL; + afr_self_heal_t *sh = NULL; + afr_sh_algo_diff_private_t *sh_priv = NULL; priv = this->private; @@ -575,20 +568,17 @@ sh_diff_write_cbk (call_frame_t *rw_frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *buf, struct iatt *postbuf) { - afr_private_t * priv = NULL; - afr_local_t * rw_local = NULL; - afr_self_heal_t * rw_sh = NULL; - - call_frame_t *sh_frame = NULL; - afr_local_t * sh_local = NULL; - afr_self_heal_t *sh = NULL; - - afr_sh_algo_diff_private_t *sh_priv; - struct sh_diff_loop_state *loop_state; - - int call_count = 0; - int child_index = 0; - int loop_index = 0; + afr_private_t * priv = NULL; + afr_local_t * rw_local = NULL; + afr_self_heal_t * rw_sh = NULL; + call_frame_t *sh_frame = NULL; + afr_local_t * sh_local = NULL; + afr_self_heal_t *sh = NULL; + afr_sh_algo_diff_private_t *sh_priv = NULL; + struct sh_diff_loop_state *loop_state = NULL; + int call_count = 0; + int child_index = 0; + int loop_index = 0; priv = this->private; rw_local = rw_frame->local; @@ -611,7 +601,7 @@ sh_diff_write_cbk (call_frame_t *rw_frame, void *cookie, xlator_t *this, LOCK (&sh_frame->lock); { if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_INFO, "write to %s failed on subvolume %s (%s)", sh_local->loc.path, priv->children[child_index]->name, @@ -638,23 +628,18 @@ sh_diff_read_cbk (call_frame_t *rw_frame, void *cookie, struct iovec *vector, int32_t count, struct iatt *buf, struct iobref *iobref) { - afr_private_t * priv = NULL; - afr_local_t * rw_local = NULL; - afr_self_heal_t * rw_sh = NULL; - - afr_sh_algo_diff_private_t * sh_priv = NULL; - - call_frame_t *sh_frame = NULL; - afr_local_t * sh_local = NULL; - afr_self_heal_t *sh = NULL; - - int loop_index; - struct sh_diff_loop_state *loop_state; - - uint32_t wcookie; - - int i = 0; - int call_count = 0; + afr_private_t * priv = NULL; + afr_local_t * rw_local = NULL; + afr_self_heal_t * rw_sh = NULL; + afr_sh_algo_diff_private_t * sh_priv = NULL; + call_frame_t *sh_frame = NULL; + afr_local_t * sh_local = NULL; + afr_self_heal_t *sh = NULL; + int loop_index = 0; + struct sh_diff_loop_state *loop_state = NULL; + uint32_t wcookie = 0; + int i = 0; + int call_count = 0; priv = this->private; rw_local = rw_frame->local; @@ -686,7 +671,7 @@ sh_diff_read_cbk (call_frame_t *rw_frame, void *cookie, if (sh->file_has_holes) { if (iov_0filled (vector, count) == 0) { - + gf_log (this->name, GF_LOG_DEBUG, "0 filled block"); sh_diff_loop_return (rw_frame, this, loop_state); goto out; } @@ -717,18 +702,15 @@ static int sh_diff_read (call_frame_t *rw_frame, xlator_t *this, int loop_index) { - afr_private_t * priv = NULL; - afr_local_t * rw_local = NULL; - afr_self_heal_t * rw_sh = NULL; - - afr_sh_algo_diff_private_t * sh_priv = NULL; - struct sh_diff_loop_state *loop_state; - - call_frame_t *sh_frame = NULL; - afr_local_t * sh_local = NULL; - afr_self_heal_t *sh = NULL; - - uint32_t cookie; + afr_private_t * priv = NULL; + afr_local_t * rw_local = NULL; + afr_self_heal_t * rw_sh = NULL; + afr_sh_algo_diff_private_t * sh_priv = NULL; + struct sh_diff_loop_state *loop_state = NULL; + call_frame_t *sh_frame = NULL; + afr_local_t * sh_local = NULL; + afr_self_heal_t *sh = NULL; + uint32_t cookie = 0; priv = this->private; rw_local = rw_frame->local; @@ -759,23 +741,19 @@ sh_diff_checksum_cbk (call_frame_t *rw_frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, uint32_t weak_checksum, uint8_t *strong_checksum) { - afr_private_t * priv = NULL; - afr_local_t * rw_local = NULL; - afr_self_heal_t *rw_sh = NULL; - - call_frame_t *sh_frame = NULL; - afr_local_t * sh_local = NULL; - afr_self_heal_t *sh = NULL; - - afr_sh_algo_diff_private_t * sh_priv = NULL; - - int loop_index = 0; - int child_index = 0; - struct sh_diff_loop_state *loop_state; - - int call_count = 0; - int i = 0; - int write_needed = 0; + afr_private_t * priv = NULL; + afr_local_t * rw_local = NULL; + afr_self_heal_t *rw_sh = NULL; + call_frame_t *sh_frame = NULL; + afr_local_t * sh_local = NULL; + afr_self_heal_t *sh = NULL; + afr_sh_algo_diff_private_t * sh_priv = NULL; + int loop_index = 0; + int child_index = 0; + struct sh_diff_loop_state *loop_state = NULL; + int call_count = 0; + int i = 0; + int write_needed = 0; priv = this->private; @@ -821,7 +799,7 @@ sh_diff_checksum_cbk (call_frame_t *rw_frame, void *cookie, xlator_t *this, must be written to this sink */ - gf_log (this->name, GF_LOG_TRACE, + gf_log (this->name, GF_LOG_DEBUG, "checksum on subvolume %s at offset %" PRId64" differs from that on source", priv->children[i]->name, loop_state->offset); @@ -854,7 +832,7 @@ sh_diff_checksum_cbk (call_frame_t *rw_frame, void *cookie, xlator_t *this, static int sh_diff_find_unused_loop (afr_sh_algo_diff_private_t *sh_priv, int max) { - int i; + int i = 0; LOCK (&sh_priv->lock); { @@ -880,24 +858,19 @@ sh_diff_find_unused_loop (afr_sh_algo_diff_private_t *sh_priv, int max) static int sh_diff_checksum (call_frame_t *frame, xlator_t *this, off_t offset) { - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - afr_local_t * rw_local = NULL; - afr_self_heal_t * sh = NULL; - afr_self_heal_t * rw_sh = NULL; - - afr_sh_algo_diff_private_t * sh_priv = NULL; - - call_frame_t *rw_frame = NULL; - - uint32_t cookie; - int loop_index = 0; - struct sh_diff_loop_state *loop_state = NULL; - - int32_t op_errno = 0; - - int call_count = 0; - int i = 0; + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + afr_local_t * rw_local = NULL; + afr_self_heal_t * sh = NULL; + afr_self_heal_t * rw_sh = NULL; + afr_sh_algo_diff_private_t * sh_priv = NULL; + call_frame_t *rw_frame = NULL; + uint32_t cookie = 0; + int loop_index = 0; + struct sh_diff_loop_state *loop_state = NULL; + int32_t op_errno = 0; + int call_count = 0; + int i = 0; priv = this->private; local = frame->local; @@ -971,17 +944,15 @@ sh_diff_loop_driver (call_frame_t *frame, xlator_t *this, gf_boolean_t is_first_call, struct sh_diff_loop_state *loop_state) { - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - afr_self_heal_t * sh = NULL; - afr_sh_algo_diff_private_t *sh_priv = NULL; - gf_boolean_t is_driver_done = _gf_false; - blksize_t block_size = 0; - - int loop = 0; - - off_t offset = 0; - char sh_type_str[256] = {0,}; + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + afr_self_heal_t * sh = NULL; + afr_sh_algo_diff_private_t *sh_priv = NULL; + gf_boolean_t is_driver_done = _gf_false; + blksize_t block_size = 0; + int loop = 0; + off_t offset = 0; + char sh_type_str[256] = {0,}; priv = this->private; local = frame->local; @@ -1044,8 +1015,7 @@ afr_sh_algo_diff (call_frame_t *frame, xlator_t *this) afr_local_t * local = NULL; afr_self_heal_t * sh = NULL; afr_sh_algo_diff_private_t *sh_priv = NULL; - - int i; + int i = 0; priv = this->private; local = frame->local; @@ -1053,6 +1023,8 @@ afr_sh_algo_diff (call_frame_t *frame, xlator_t *this) sh_priv = GF_CALLOC (1, sizeof (*sh_priv), gf_afr_mt_afr_private_t); + if (!sh_priv) + goto err; sh_priv->block_size = this->ctx->page_size; @@ -1065,20 +1037,48 @@ afr_sh_algo_diff (call_frame_t *frame, xlator_t *this) sh_priv->loops = GF_CALLOC (priv->data_self_heal_window_size, sizeof (*sh_priv->loops), gf_afr_mt_sh_diff_loop_state); + if (!sh_priv->loops) + goto err; for (i = 0; i < priv->data_self_heal_window_size; i++) { sh_priv->loops[i] = GF_CALLOC (1, sizeof (*sh_priv->loops[i]), gf_afr_mt_sh_diff_loop_state); + if (!sh_priv->loops[i]) + goto err; sh_priv->loops[i]->checksum = GF_CALLOC (priv->child_count, MD5_DIGEST_LEN, gf_afr_mt_uint8_t); + if (!sh_priv->loops[i]->checksum) + goto err; + sh_priv->loops[i]->write_needed = GF_CALLOC (priv->child_count, sizeof (*sh_priv->loops[i]->write_needed), gf_afr_mt_char); + if (!sh_priv->loops[i]->write_needed) + goto err; + } sh_diff_loop_driver (frame, this, _gf_true, NULL); + return 0; +err: + if (sh_priv) { + if (sh_priv->loops) { + for (i = 0; i < priv->data_self_heal_window_size; i++) { + if (sh_priv->loops[i]->write_needed) + GF_FREE (sh_priv->loops[i]->write_needed); + if (sh_priv->loops[i]->checksum) + GF_FREE (sh_priv->loops[i]->checksum); + if (sh_priv->loops[i]) + GF_FREE (sh_priv->loops[i]); + } + + GF_FREE (sh_priv->loops); + } + + GF_FREE (sh_priv); + } return 0; } diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index ca7dd92d8..950fcb167 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -72,8 +72,8 @@ afr_sh_data_done (call_frame_t *frame, xlator_t *this) sh->healing_fd = NULL; } -/* for (i = 0; i < priv->child_count; i++) */ -/* sh->locked_nodes[i] = 0; */ + /* for (i = 0; i < priv->child_count; i++) */ + /* sh->locked_nodes[i] = 0; */ gf_log (this->name, GF_LOG_TRACE, "self heal of %s completed", @@ -89,11 +89,10 @@ int afr_sh_data_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int call_count = 0; - - int child_index = (long) cookie; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int call_count = 0; + int child_index = (long) cookie; local = frame->local; priv = this->private; @@ -101,7 +100,7 @@ afr_sh_data_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, LOCK (&frame->lock); { if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_INFO, "flush or setattr failed on %s on subvolume %s: %s", local->loc.path, priv->children[child_index]->name, strerror (op_errno)); @@ -121,7 +120,8 @@ afr_sh_data_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int afr_sh_data_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *statpre, struct iatt *statpost) + int32_t op_ret, int32_t op_errno, struct iatt *statpre, + struct iatt *statpost) { afr_sh_data_flush_cbk (frame, cookie, this, op_ret, op_errno); @@ -132,22 +132,20 @@ afr_sh_data_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int afr_sh_data_close (call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - afr_self_heal_t *sh = NULL; - - int i = 0; - int call_count = 0; - int source = 0; - int32_t valid = 0; - - struct iatt stbuf = {0,}; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + afr_self_heal_t *sh = NULL; + int i = 0; + int call_count = 0; + int source = 0; + int32_t valid = 0; + struct iatt stbuf = {0,}; local = frame->local; sh = &local->self_heal; priv = this->private; - source = sh->source; + source = sh->source; valid |= (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME); @@ -232,13 +230,12 @@ afr_sh_data_unlck_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int call_count = 0; int child_index = (long) cookie; - local = frame->local; LOCK (&frame->lock); { if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_INFO, "locking inode of %s on child %d failed: %s", local->loc.path, child_index, strerror (op_errno)); @@ -327,7 +324,6 @@ afr_sh_data_erase_pending (call_frame_t *frame, xlator_t *this) int i = 0; dict_t **erase_xattr = NULL; - local = frame->local; sh = &local->self_heal; priv = this->private; @@ -398,7 +394,7 @@ afr_sh_data_trim_cbk (call_frame_t *frame, void *cookie, xlator_t *this, LOCK (&frame->lock); { if (op_ret == -1) - gf_log (this->name, GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_INFO, "ftruncate of %s on subvolume %s failed (%s)", local->loc.path, priv->children[child_index]->name, @@ -511,7 +507,8 @@ afr_sh_data_pick_algo (call_frame_t *frame, xlator_t *this) if ((local->enoent_count != 0) || sh_zero_byte_files_exist (sh, priv->child_count) - || (sh->file_size <= (priv->data_self_heal_window_size * this->ctx->page_size))) { + || (sh->file_size <= (priv->data_self_heal_window_size * + this->ctx->page_size))) { /* * If the file does not exist on one of the subvolumes, @@ -546,7 +543,6 @@ afr_sh_data_sync_prepare (call_frame_t *frame, xlator_t *this) int active_sinks = 0; int source = 0; int i = 0; - struct afr_sh_algorithm *sh_algo = NULL; local = frame->local; @@ -564,7 +560,7 @@ afr_sh_data_sync_prepare (call_frame_t *frame, xlator_t *this) sh->success[source] = 1; if (active_sinks == 0) { - gf_log (this->name, GF_LOG_TRACE, + gf_log (this->name, GF_LOG_INFO, "no active sinks for performing self-heal on file %s", local->loc.path); afr_sh_data_finish (frame, this); @@ -592,7 +588,6 @@ afr_sh_data_fix (call_frame_t *frame, xlator_t *this) { afr_local_t *local = NULL; afr_local_t * orig_local = NULL; - afr_self_heal_t *sh = NULL; afr_private_t *priv = NULL; int nsources = 0; @@ -628,7 +623,8 @@ afr_sh_data_fix (call_frame_t *frame, xlator_t *this) && (sh->child_errno[priv->favorite_child] == 0)) { gf_log (this->name, GF_LOG_DEBUG, - "Picking favorite child %s as authentic source to resolve conflicting data of %s", + "Picking favorite child %s as authentic source to " + "resolve conflicting data of %s", priv->children[priv->favorite_child]->name, local->loc.path); @@ -640,9 +636,9 @@ afr_sh_data_fix (call_frame_t *frame, xlator_t *this) if (nsources == -1) { gf_log (this->name, GF_LOG_ERROR, - "Unable to self-heal contents of '%s' (possible split-brain). " - "Please delete the file from all but the preferred " - "subvolume.", local->loc.path); + "Unable to self-heal contents of '%s' (possible " + "split-brain). Please delete the file from all but " + "the preferred subvolume.", local->loc.path); local->govinda_gOvinda = 1; @@ -661,7 +657,7 @@ afr_sh_data_fix (call_frame_t *frame, xlator_t *this) } sh->source = source; - sh->block_size = 65536; + sh->block_size = 65536; /* TODO: make it configurable or use macro */ sh->file_size = sh->buf[source].ia_size; if (FILE_HAS_HOLES (&sh->buf[source])) @@ -706,7 +702,6 @@ afr_self_heal_get_source (xlator_t *this, afr_local_t *local, dict_t **xattr) { afr_self_heal_t *sh = NULL; afr_private_t *priv = NULL; - int source = 0; int i = 0; @@ -743,7 +738,6 @@ afr_sh_data_fstat_cbk (call_frame_t *frame, void *cookie, afr_private_t *priv = NULL; afr_local_t *local = NULL; afr_self_heal_t *sh = NULL; - int call_count = -1; int child_index = (long) cookie; @@ -780,7 +774,6 @@ afr_sh_data_fstat (call_frame_t *frame, xlator_t *this) afr_self_heal_t *sh = NULL; afr_local_t *local = NULL; afr_private_t *priv = NULL; - int call_count = 0; int i = 0; @@ -818,7 +811,6 @@ afr_sh_data_fxattrop_cbk (call_frame_t *frame, void *cookie, afr_private_t *priv = NULL; afr_local_t *local = NULL; afr_self_heal_t *sh = NULL; - int call_count = -1; int child_index = (long) cookie; @@ -856,9 +848,7 @@ afr_sh_data_fxattrop (call_frame_t *frame, xlator_t *this) afr_local_t *local = NULL; afr_private_t *priv = NULL; dict_t *xattr_req = NULL; - - int32_t zero_pending[3] = {0, 0, 0}; - + int32_t zero_pending[3] = {0,}; int call_count = 0; int i = 0; int ret = 0; @@ -917,7 +907,7 @@ afr_sh_data_post_nonblocking_inodelk_cbk (call_frame_t *frame, xlator_t *this) int_lock = &local->internal_lock; if (int_lock->lock_op_ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_INFO, "Non Blocking inodelks failed."); afr_sh_data_done (frame, this); } else { @@ -1005,7 +995,7 @@ afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, LOCK (&frame->lock); { if (op_ret == -1) { - gf_log (this->name, GF_LOG_TRACE, + gf_log (this->name, GF_LOG_INFO, "open of %s failed on child %s (%s)", local->loc.path, priv->children[child_index]->name, @@ -1044,9 +1034,7 @@ afr_sh_data_open (call_frame_t *frame, xlator_t *this) { int i = 0; int call_count = 0; - fd_t *fd = NULL; - afr_local_t * local = NULL; afr_private_t * priv = NULL; afr_self_heal_t *sh = NULL; @@ -1095,7 +1083,6 @@ afr_self_heal_data (call_frame_t *frame, xlator_t *this) afr_self_heal_t *sh = NULL; afr_private_t *priv = this->private; - local = frame->local; sh = &local->self_heal; diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c index 3f2e657a4..ee27a7bd1 100644 --- a/xlators/cluster/afr/src/afr-self-heal-metadata.c +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -76,13 +76,13 @@ afr_sh_metadata_done (call_frame_t *frame, xlator_t *this) } if (local->govinda_gOvinda) { - gf_log (this->name, GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_INFO, "aborting selfheal of %s", local->loc.path); sh->completion_cbk (frame, this); } else { if (IA_ISREG (sh->type)) { - gf_log (this->name, GF_LOG_TRACE, + gf_log (this->name, GF_LOG_DEBUG, "proceeding to data check on %s", local->loc.path); afr_self_heal_data (frame, this); @@ -90,7 +90,7 @@ afr_sh_metadata_done (call_frame_t *frame, xlator_t *this) } if (IA_ISDIR (sh->type)) { - gf_log (this->name, GF_LOG_TRACE, + gf_log (this->name, GF_LOG_DEBUG, "proceeding to entry check on %s", local->loc.path); afr_self_heal_entry (frame, this); @@ -155,11 +155,6 @@ afr_sh_metadata_erase_pending_cbk (call_frame_t *frame, void *cookie, local = frame->local; - LOCK (&frame->lock); - { - } - UNLOCK (&frame->lock); - call_count = afr_frame_return (frame); if (call_count == 0) @@ -190,6 +185,8 @@ afr_sh_metadata_erase_pending (call_frame_t *frame, xlator_t *this) erase_xattr = GF_CALLOC (sizeof (*erase_xattr), priv->child_count, gf_afr_mt_dict_t); + if (!erase_xattr) + return -ENOMEM; for (i = 0; i < priv->child_count; i++) { if (sh->xattr[i]) { @@ -206,7 +203,7 @@ afr_sh_metadata_erase_pending (call_frame_t *frame, xlator_t *this) local->call_count = call_count; if (call_count == 0) { - gf_log (this->name, GF_LOG_WARNING, + gf_log (this->name, GF_LOG_INFO, "metadata of %s not healed on any subvolume", local->loc.path); @@ -262,7 +259,7 @@ afr_sh_metadata_sync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, LOCK (&frame->lock); { if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_INFO, "setting attributes failed for %s on %s (%s)", local->loc.path, priv->children[child_index]->name, @@ -314,7 +311,7 @@ afr_sh_metadata_sync (call_frame_t *frame, xlator_t *this, dict_t *xattr) int call_count = 0; int i = 0; - struct iatt stbuf; + struct iatt stbuf = {0,}; int32_t valid = 0; local = frame->local; @@ -591,7 +588,7 @@ afr_sh_metadata_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (xattr) sh->xattr[child_index] = dict_ref (xattr); } else { - gf_log (this->name, GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_INFO, "path %s on subvolume %s => -1 (%s)", local->loc.path, priv->children[child_index]->name, diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index a2da671a7..6e59b15ce 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -834,8 +834,10 @@ AFR_LOCAL_INIT (afr_local_t *local, afr_private_t *priv) local->optimistic_change_log = 1; local->call_count = afr_up_children_count (priv->child_count, local->child_up); - if (local->call_count == 0) + if (local->call_count == 0) { + gf_log (THIS->name, GF_LOG_INFO, "no subvolumes up"); return -ENOTCONN; + } local->transaction.erase_pending = 1; -- cgit