/* Copyright (c) 2010-2011 Gluster, Inc. This file is part of GlusterFS. GlusterFS is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. GlusterFS is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef _CONFIG_H #define _CONFIG_H #include "config.h" #endif #include "afr.h" #include "syncop.h" #include "afr-self-heald.h" #include "afr-self-heal-common.h" #define AFR_POLL_TIMEOUT 600 void afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl); void afr_do_poll_self_heal (void *data) { afr_private_t *priv = NULL; afr_self_heald_t *shd = NULL; struct timeval timeout = {0}; xlator_t *this = NULL; long child = (long)data; int i = 0; this = THIS; priv = this->private; shd = &priv->shd; if (child == AFR_ALL_CHILDREN) { //done by command for (i = 0; i < priv->child_count; i++) afr_start_crawl (this, i, INDEX); goto out; } else { afr_start_crawl (this, child, INDEX); if (shd->pos[child] == AFR_POS_REMOTE) goto out; } timeout.tv_sec = AFR_POLL_TIMEOUT; timeout.tv_usec = 0; if (shd->timer[child]) gf_timer_call_cancel (this->ctx, shd->timer[child]); shd->timer[child] = gf_timer_call_after (this->ctx, timeout, afr_do_poll_self_heal, data); if (shd->timer[child] == NULL) { gf_log (this->name, GF_LOG_WARNING, "Cannot create pending self-heal polling timer for %s", priv->children[child]->name); } out: return; } static int _crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data, xlator_t *readdir_xl); static int get_pathinfo_host (char *pathinfo, char *hostname, size_t size) { char *start = NULL; char *end = NULL; int ret = -1; int i = 0; if (!pathinfo) goto out; start = strchr (pathinfo, ':'); if (!start) goto out; end = strrchr (pathinfo, ':'); if (start == end) goto out; memset (hostname, 0, size); i = 0; while (++start != end) hostname[i++] = *start; ret = 0; out: return ret; } int afr_local_pathinfo (char *pathinfo, gf_boolean_t *local) { int ret = 0; char pathinfohost[1024] = {0}; char localhost[1024] = {0}; xlator_t *this = THIS; *local = _gf_false; ret = get_pathinfo_host (pathinfo, pathinfohost, sizeof (pathinfohost)); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Invalid pathinfo: %s", pathinfo); goto out; } ret = gethostname (localhost, sizeof (localhost)); if (ret) { gf_log (this->name, GF_LOG_ERROR, "gethostname() failed, " "reason: %s", strerror (errno)); goto out; } if (!strcmp (localhost, pathinfohost)) *local = _gf_true; out: return ret; } int afr_crawl_build_start_loc (xlator_t *this, afr_crawl_data_t *crawl_data, loc_t *dirloc, xlator_t *readdir_xl) { afr_private_t *priv = NULL; dict_t *xattr = NULL; void *index_gfid = NULL; loc_t rootloc = {0}; struct iatt iatt = {0}; struct iatt parent = {0}; int ret = 0; priv = this->private; if (crawl_data->crawl == FULL) { afr_build_root_loc (this, dirloc); } else { afr_build_root_loc (this, &rootloc); ret = syncop_getxattr (readdir_xl, &rootloc, &xattr, GF_XATTROP_INDEX_GFID); if (ret < 0) goto out; ret = dict_get_ptr (xattr, GF_XATTROP_INDEX_GFID, &index_gfid); if (ret < 0) { gf_log (this->name, GF_LOG_ERROR, "failed to get index " "dir gfid on %s", readdir_xl->name); goto out; } if (!index_gfid) { gf_log (this->name, GF_LOG_ERROR, "index gfid empty " "on %s", readdir_xl->name); ret = -1; goto out; } uuid_copy (dirloc->gfid, index_gfid); dirloc->path = ""; dirloc->inode = inode_new (priv->root_inode->table); ret = syncop_lookup (readdir_xl, dirloc, NULL, &iatt, NULL, &parent); if (ret < 0) { gf_log (this->name, GF_LOG_ERROR, "lookup failed on " "index dir on %s", readdir_xl->name); goto out; } inode_link (dirloc->inode, NULL, NULL, &iatt); } ret = 0; out: if (xattr) dict_unref (xattr); loc_wipe (&rootloc); return ret; } int afr_crawl_opendir (xlator_t *this, afr_crawl_data_t *crawl_data, fd_t **dirfd, loc_t *dirloc, xlator_t *readdir_xl) { fd_t *fd = NULL; int ret = 0; if (crawl_data->crawl == FULL) { fd = fd_create (dirloc->inode, crawl_data->pid); if (!fd) { gf_log (this->name, GF_LOG_ERROR, "Failed to create fd for %s", dirloc->path); ret = -1; goto out; } ret = syncop_opendir (readdir_xl, dirloc, fd); if (ret < 0) { gf_log (this->name, GF_LOG_ERROR, "opendir failed on %s", dirloc->path); goto out; } } else { fd = fd_anonymous (dirloc->inode); } ret = 0; out: if (!ret) *dirfd = fd; return ret; } xlator_t* afr_crawl_readdir_xl_get (xlator_t *this, afr_crawl_data_t *crawl_data) { afr_private_t *priv = this->private; if (crawl_data->crawl == FULL) { return this; } else { return priv->children[crawl_data->child]; } return NULL; } int afr_crawl_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, gf_dirent_t *entry, afr_crawl_data_t *crawl_data) { int ret = 0; afr_private_t *priv = NULL; priv = this->private; if (crawl_data->crawl == FULL) { ret = afr_build_child_loc (this, child, parent, entry->d_name); } else { child->path = ""; child->inode = inode_new (priv->root_inode->table); uuid_parse (entry->d_name, child->gfid); } return ret; } gf_boolean_t _crawl_proceed (xlator_t *this, int child) { afr_private_t *priv = this->private; gf_boolean_t proceed = _gf_false; if (!priv->child_up[child]) { gf_log (this->name, GF_LOG_ERROR, "Stopping crawl for %s " ", subvol went down", priv->children[child]->name); goto out; } if (afr_up_children_count (priv->child_up, priv->child_count) < 2) { gf_log (this->name, GF_LOG_ERROR, "Stopping crawl as " "< 2 children are up"); goto out; } proceed = _gf_true; out: return proceed; } static int _build_index_loc (xlator_t *this, loc_t *loc, char *name, loc_t *parent) { int ret = 0; uuid_copy (loc->pargfid, parent->inode->gfid); loc->path = ""; loc->name = name; loc->parent = inode_ref (parent->inode); if (!loc->parent) { loc->path = NULL; loc_wipe (loc); ret = -1; } return ret; } void _index_crawl_post_lookup_fop (xlator_t *this, loc_t *parentloc, gf_dirent_t *entry, int op_ret, int op_errno, xlator_t *readdir_xl) { loc_t index_loc = {0}; int ret = 0; if (op_ret && (op_errno == ENOENT)) { ret = _build_index_loc (this, &index_loc, entry->d_name, parentloc); if (ret) goto out; gf_log (this->name, GF_LOG_INFO, "Removing stale index " "for %s on %s", index_loc.name, readdir_xl->name); ret = syncop_unlink (readdir_xl, &index_loc); if (ret) { gf_log (this->name, GF_LOG_ERROR, "%s: Failed to remove" " index on %s - %s", index_loc.name, readdir_xl->name, strerror (errno)); } index_loc.path = NULL; loc_wipe (&index_loc); } out: return; } static int _perform_self_heal (xlator_t *this, loc_t *parentloc, gf_dirent_t *entries, off_t *offset, afr_crawl_data_t *crawl_data, xlator_t *readdir_xl) { gf_dirent_t *entry = NULL; gf_dirent_t *tmp = NULL; struct iatt iatt = {0}; struct iatt parent = {0}; int ret = 0; loc_t entry_loc = {0}; fd_t *fd = NULL; list_for_each_entry_safe (entry, tmp, &entries->list, list) { if (!_crawl_proceed (this, crawl_data->child)) { ret = -1; goto out; } *offset = entry->d_off; if (IS_ENTRY_CWD (entry->d_name) || IS_ENTRY_PARENT (entry->d_name)) continue; if ((crawl_data->crawl == FULL) && uuid_is_null (entry->d_stat.ia_gfid)) { gf_log (this->name, GF_LOG_WARNING, "%s/%s: No " "gfid present skipping", parentloc->path, entry->d_name); continue; } loc_wipe (&entry_loc); ret = afr_crawl_build_child_loc (this, &entry_loc, parentloc, entry, crawl_data); if (ret) goto out; if (uuid_is_null (entry_loc.gfid)) { gf_log (this->name, GF_LOG_WARNING, "failed to build " "location for %s", entry->d_name); continue; } if (entry_loc.path) gf_log (this->name, GF_LOG_DEBUG, "lookup %s", entry_loc.path); else gf_log (this->name, GF_LOG_DEBUG, "lookup %s", uuid_utoa (entry_loc.gfid)); ret = syncop_lookup (this, &entry_loc, NULL, &iatt, NULL, &parent); if (crawl_data->crawl == INDEX) { _index_crawl_post_lookup_fop (this, parentloc, entry, ret, errno, readdir_xl); entry_loc.path = NULL; loc_wipe (&entry_loc); continue; } //Don't fail the crawl if lookup fails as it //could be because of split-brain if (ret || (!IA_ISDIR (iatt.ia_type))) continue; inode_link (entry_loc.inode, parentloc->inode, NULL, &iatt); ret = afr_crawl_opendir (this, crawl_data, &fd, &entry_loc, readdir_xl); if (ret) continue; ret = _crawl_directory (fd, &entry_loc, crawl_data, readdir_xl); fd_unref (fd); } ret = 0; out: if (entry_loc.path) loc_wipe (&entry_loc); return ret; } static int _crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data, xlator_t *readdir_xl) { xlator_t *this = NULL; off_t offset = 0; gf_dirent_t entries; int ret = 0; gf_boolean_t free_entries = _gf_false; INIT_LIST_HEAD (&entries.list); this = THIS; GF_ASSERT (loc->inode); if (loc->path) gf_log (this->name, GF_LOG_DEBUG, "crawling %s", loc->path); else gf_log (this->name, GF_LOG_DEBUG, "crawling %s", uuid_utoa (loc->gfid)); while (1) { if (crawl_data->crawl == FULL) ret = syncop_readdirp (readdir_xl, fd, 131072, offset, NULL, &entries); else ret = syncop_readdir (readdir_xl, fd, 131072, offset, &entries); if (ret <= 0) break; ret = 0; free_entries = _gf_true; if (!_crawl_proceed (this, crawl_data->child)) { ret = -1; goto out; } if (list_empty (&entries.list)) goto out; ret = _perform_self_heal (this, loc, &entries, &offset, crawl_data, readdir_xl); gf_dirent_free (&entries); free_entries = _gf_false; } ret = 0; out: if (free_entries) gf_dirent_free (&entries); return ret; } static char* position_str_get (afr_child_pos_t pos) { switch (pos) { case AFR_POS_UNKNOWN: return "unknown"; case AFR_POS_LOCAL: return "local"; case AFR_POS_REMOTE: return "remote"; } return NULL; } int afr_find_child_position (xlator_t *this, int child) { afr_private_t *priv = NULL; dict_t *xattr_rsp = NULL; loc_t loc = {0}; int ret = 0; gf_boolean_t local = _gf_false; char *pathinfo = NULL; afr_child_pos_t *pos = NULL; priv = this->private; pos = &priv->shd.pos[child]; if (!priv->root_inode) { LOCK (&priv->lock); { if (!priv->root_inode) priv->root_inode = inode_ref (this->itable->root); } UNLOCK (&priv->lock); } afr_build_root_loc (this, &loc); ret = syncop_getxattr (priv->children[child], &loc, &xattr_rsp, GF_XATTR_PATHINFO_KEY); if (ret) { gf_log (this->name, GF_LOG_ERROR, "getxattr failed on child " "%d", child); goto out; } ret = dict_get_str (xattr_rsp, GF_XATTR_PATHINFO_KEY, &pathinfo); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Pathinfo key not found on " "child %d", child); goto out; } ret = afr_local_pathinfo (pathinfo, &local); if (ret) goto out; if (local) *pos = AFR_POS_LOCAL; else *pos = AFR_POS_REMOTE; gf_log (this->name, GF_LOG_INFO, "child %s is %s", priv->children[child]->name, position_str_get (*pos)); out: if (ret) *pos = AFR_POS_UNKNOWN; loc_wipe (&loc); return ret; } static int afr_crawl_done (int ret, call_frame_t *sync_frame, void *data) { GF_FREE (data); STACK_DESTROY (sync_frame->root); return 0; } static inline int afr_is_local_child (afr_self_heald_t *shd, int child, unsigned int child_count) { return (shd->pos[child] == AFR_POS_LOCAL); } static int afr_crawl_directory (xlator_t *this, afr_crawl_data_t *crawl_data) { afr_private_t *priv = NULL; afr_self_heald_t *shd = NULL; loc_t dirloc = {0}; gf_boolean_t crawl = _gf_false; int ret = 0; xlator_t *readdir_xl = NULL; fd_t *fd = NULL; int child = -1; priv = this->private; shd = &priv->shd; child = crawl_data->child; LOCK (&priv->lock); { if (shd->inprogress[child]) { shd->pending[child] = _gf_true; } else { shd->inprogress[child] = _gf_true; crawl = _gf_true; } } UNLOCK (&priv->lock); if (!priv->root_inode) { ret = -1; goto out; } if (!crawl) { gf_log (this->name, GF_LOG_INFO, "Another crawl is in progress " "for %s", priv->children[child]->name); goto out; } do { readdir_xl = afr_crawl_readdir_xl_get (this, crawl_data); if (!readdir_xl) goto done; ret = afr_crawl_build_start_loc (this, crawl_data, &dirloc, readdir_xl); if (ret) goto done; ret = afr_crawl_opendir (this, crawl_data, &fd, &dirloc, readdir_xl); if (ret) goto done; ret = _crawl_directory (fd, &dirloc, crawl_data, readdir_xl); if (ret) gf_log (this->name, GF_LOG_ERROR, "Crawl failed on %s", readdir_xl->name); else gf_log (this->name, GF_LOG_INFO, "Crawl completed " "on %s", readdir_xl->name); fd_unref (fd); fd = NULL; done: LOCK (&priv->lock); { if (shd->pending[child]) { shd->pending[child] = _gf_false; } else { shd->inprogress[child] = _gf_false; crawl = _gf_false; } } UNLOCK (&priv->lock); if (crawl_data->crawl == INDEX) { dirloc.path = NULL; loc_wipe (&dirloc); } } while (crawl); out: if (fd) fd_unref (fd); if (crawl_data->crawl == INDEX) { dirloc.path = NULL; loc_wipe (&dirloc); } return ret; } static int afr_crawl (void *data) { xlator_t *this = NULL; afr_private_t *priv = NULL; afr_self_heald_t *shd = NULL; int ret = -1; afr_crawl_data_t *crawl_data = data; this = THIS; priv = this->private; shd = &priv->shd; if (!_crawl_proceed (this, crawl_data->child)) goto out; ret = afr_find_child_position (this, crawl_data->child); if (ret) goto out; if (!afr_is_local_child (shd, crawl_data->child, priv->child_count)) goto out; ret = afr_crawl_directory (this, crawl_data); out: return ret; } void afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl) { afr_private_t *priv = NULL; afr_self_heald_t *shd = NULL; call_frame_t *frame = NULL; afr_crawl_data_t *crawl_data = NULL; int ret = 0; priv = this->private; shd = &priv->shd; if (!shd->enabled) goto out; frame = create_frame (this, this->ctx->pool); if (!frame) goto out; afr_set_lk_owner (frame, this); afr_set_low_priority (frame); crawl_data = GF_CALLOC (1, sizeof (*crawl_data), gf_afr_mt_afr_crawl_data_t); if (!crawl_data) goto out; crawl_data->child = idx; crawl_data->pid = frame->root->pid; crawl_data->crawl = crawl; gf_log (this->name, GF_LOG_INFO, "starting crawl for %s", priv->children[idx]->name); ret = synctask_new (this->ctx->env, afr_crawl, afr_crawl_done, frame, crawl_data); if (ret) gf_log (this->name, GF_LOG_ERROR, "Could not create the " "task for %d ret %d", idx, ret); out: return; } //void //afr_full_self_heal (xlator_t *this) //{ // int i = 0; // afr_private_t *priv = this->private; // // for (i = 0; i < priv->child_count; i++) // afr_start_crawl (this, i, FULL); //} void afr_build_root_loc (xlator_t *this, loc_t *loc) { afr_private_t *priv = NULL; priv = this->private; loc->path = gf_strdup ("/"); loc->name = ""; loc->inode = inode_ref (priv->root_inode); uuid_copy (loc->gfid, loc->inode->gfid); } int afr_set_root_gfid (dict_t *dict) { uuid_t gfid; int ret = 0; memset (gfid, 0, 16); gfid[15] = 1; ret = afr_set_dict_gfid (dict, gfid); return ret; }