diff options
Diffstat (limited to 'xlators')
27 files changed, 877 insertions, 677 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 4c8fa31b679..032ab5c8001 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -45,6 +45,41 @@ afr_quorum_errno(afr_private_t *priv) return ENOTCONN; } +gf_boolean_t +afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name, + pid_t pid) +{ + if (!__is_root_gfid(pargfid)) { + return _gf_false; + } + + if (strcmp(name, GF_REPLICATE_TRASH_DIR) == 0) { + /*For backward compatibility /.landfill is private*/ + return _gf_true; + } + + if (pid == GF_CLIENT_PID_GSYNCD) { + /*geo-rep needs to create/sync private directory on slave because + * it appears in changelog*/ + return _gf_false; + } + + if (pid == GF_CLIENT_PID_GLFS_HEAL || pid == GF_CLIENT_PID_SELF_HEALD) { + if (strcmp(name, priv->anon_inode_name) == 0) { + /* anonymous-inode dir is private*/ + return _gf_true; + } + } else { + if (strncmp(name, AFR_ANON_DIR_PREFIX, strlen(AFR_ANON_DIR_PREFIX)) == + 0) { + /* anonymous-inode dir prefix is private for geo-rep to work*/ + return _gf_true; + } + } + + return _gf_false; +} + void afr_fill_success_replies(afr_local_t *local, afr_private_t *priv, unsigned char *replies) @@ -2271,8 +2306,9 @@ afr_hash_child(afr_read_subvol_args_t *args, afr_private_t *priv, * need is a low probability that multiple clients * won't converge on the same subvolume. */ + gf_uuid_copy(gfid_copy, args->gfid); pid = getpid(); - memcpy(gfid_copy, &pid, sizeof(pid)); + *(pid_t *)gfid_copy ^= pid; } child = SuperFastHash((char *)gfid_copy, sizeof(gfid_copy)) % priv->child_count; @@ -3978,11 +4014,10 @@ afr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) return 0; } - if (__is_root_gfid(loc->parent->gfid)) { - if (!strcmp(loc->name, GF_REPLICATE_TRASH_DIR)) { - op_errno = EPERM; - goto out; - } + if (afr_is_private_directory(this->private, loc->parent->gfid, loc->name, + frame->root->pid)) { + op_errno = EPERM; + goto out; } local = AFR_FRAME_INIT(frame, op_errno); @@ -5660,6 +5695,7 @@ afr_priv_dump(xlator_t *this) priv->background_self_heal_count); gf_proc_dump_write("healers", "%d", priv->healers); gf_proc_dump_write("read-hash-mode", "%d", priv->hash_mode); + gf_proc_dump_write("use-anonymous-inode", "%d", priv->use_anon_inode); if (priv->quorum_count == AFR_QUORUM_AUTO) { gf_proc_dump_write("quorum-type", "auto"); } else if (priv->quorum_count == 0) { @@ -6653,6 +6689,7 @@ afr_priv_destroy(afr_private_t *priv) GF_FREE(priv->local); GF_FREE(priv->pending_key); GF_FREE(priv->children); + GF_FREE(priv->anon_inode); GF_FREE(priv->child_up); GF_FREE(priv->halo_child_up); GF_FREE(priv->child_latency); diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c index f69013f3e0a..f8bf8340dab 100644 --- a/xlators/cluster/afr/src/afr-dir-read.c +++ b/xlators/cluster/afr/src/afr-dir-read.c @@ -164,8 +164,8 @@ afr_validate_read_subvol(inode_t *inode, xlator_t *this, int par_read_subvol) } static void -afr_readdir_transform_entries(gf_dirent_t *subvol_entries, int subvol, - gf_dirent_t *entries, fd_t *fd) +afr_readdir_transform_entries(call_frame_t *frame, gf_dirent_t *subvol_entries, + int subvol, gf_dirent_t *entries, fd_t *fd) { int ret = -1; gf_dirent_t *entry = NULL; @@ -183,8 +183,8 @@ afr_readdir_transform_entries(gf_dirent_t *subvol_entries, int subvol, list_for_each_entry_safe(entry, tmp, &subvol_entries->list, list) { - if (__is_root_gfid(fd->inode->gfid) && - !strcmp(entry->d_name, GF_REPLICATE_TRASH_DIR)) { + if (afr_is_private_directory(priv, fd->inode->gfid, entry->d_name, + frame->root->pid)) { continue; } @@ -228,8 +228,8 @@ afr_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, } if (op_ret >= 0) - afr_readdir_transform_entries(subvol_entries, (long)cookie, &entries, - local->fd); + afr_readdir_transform_entries(frame, subvol_entries, (long)cookie, + &entries, local->fd); AFR_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, xdata); diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index f35c41df274..a580a1584cc 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -2750,3 +2750,185 @@ afr_choose_source_by_policy(afr_private_t *priv, unsigned char *sources, out: return source; } + +static int +afr_anon_inode_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + afr_local_t *local = frame->local; + int i = (long)cookie; + + local->replies[i].valid = 1; + local->replies[i].op_ret = op_ret; + local->replies[i].op_errno = op_errno; + if (op_ret == 0) { + local->op_ret = 0; + local->replies[i].poststat = *buf; + local->replies[i].preparent = *preparent; + local->replies[i].postparent = *postparent; + } + if (xdata) { + local->replies[i].xdata = dict_ref(xdata); + } + + syncbarrier_wake(&local->barrier); + return 0; +} + +int +afr_anon_inode_create(xlator_t *this, int child, inode_t **linked_inode) +{ + call_frame_t *frame = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = this->private; + unsigned char *mkdir_on = alloca0(priv->child_count); + unsigned char *lookup_on = alloca0(priv->child_count); + loc_t loc = {0}; + int32_t op_errno = 0; + int32_t child_op_errno = 0; + struct iatt iatt = {0}; + dict_t *xdata = NULL; + uuid_t anon_inode_gfid = {0}; + int mkdir_count = 0; + int i = 0; + + /*Try to mkdir everywhere and return success if the dir exists on 'child' + */ + + if (!priv->use_anon_inode) { + op_errno = EINVAL; + goto out; + } + + frame = afr_frame_create(this, &op_errno); + if (op_errno) { + goto out; + } + local = frame->local; + if (!local->child_up[child]) { + /*Other bricks may need mkdir so don't error out yet*/ + child_op_errno = ENOTCONN; + } + gf_uuid_parse(priv->anon_gfid_str, anon_inode_gfid); + for (i = 0; i < priv->child_count; i++) { + if (!local->child_up[i]) + continue; + + if (priv->anon_inode[i]) { + mkdir_on[i] = 0; + } else { + mkdir_on[i] = 1; + mkdir_count++; + } + } + + if (mkdir_count == 0) { + *linked_inode = inode_find(this->itable, anon_inode_gfid); + if (*linked_inode) { + op_errno = 0; + goto out; + } + } + + loc.parent = inode_ref(this->itable->root); + loc.name = priv->anon_inode_name; + loc.inode = inode_new(this->itable); + if (!loc.inode) { + op_errno = ENOMEM; + goto out; + } + + xdata = dict_new(); + if (!xdata) { + op_errno = ENOMEM; + goto out; + } + + op_errno = -dict_set_gfuuid(xdata, "gfid-req", anon_inode_gfid, _gf_true); + if (op_errno) { + goto out; + } + + if (mkdir_count == 0) { + memcpy(lookup_on, local->child_up, priv->child_count); + goto lookup; + } + + AFR_ONLIST(mkdir_on, frame, afr_anon_inode_mkdir_cbk, mkdir, &loc, 0755, 0, + xdata); + + for (i = 0; i < priv->child_count; i++) { + if (!mkdir_on[i]) { + continue; + } + + if (local->replies[i].op_ret == 0) { + priv->anon_inode[i] = 1; + iatt = local->replies[i].poststat; + } else if (local->replies[i].op_ret < 0 && + local->replies[i].op_errno == EEXIST) { + lookup_on[i] = 1; + } else if (i == child) { + child_op_errno = local->replies[i].op_errno; + } + } + + if (AFR_COUNT(lookup_on, priv->child_count) == 0) { + goto link; + } + +lookup: + AFR_ONLIST(lookup_on, frame, afr_selfheal_discover_cbk, lookup, &loc, + xdata); + for (i = 0; i < priv->child_count; i++) { + if (!lookup_on[i]) { + continue; + } + + if (local->replies[i].op_ret == 0) { + if (gf_uuid_compare(anon_inode_gfid, + local->replies[i].poststat.ia_gfid) == 0) { + priv->anon_inode[i] = 1; + iatt = local->replies[i].poststat; + } else { + if (i == child) + child_op_errno = EINVAL; + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_DATA, + "%s has gfid: %s", priv->anon_inode_name, + uuid_utoa(local->replies[i].poststat.ia_gfid)); + } + } else if (i == child) { + child_op_errno = local->replies[i].op_errno; + } + } +link: + if (!gf_uuid_is_null(iatt.ia_gfid)) { + *linked_inode = inode_link(loc.inode, loc.parent, loc.name, &iatt); + if (*linked_inode) { + op_errno = 0; + inode_lookup(*linked_inode); + } else { + op_errno = ENOMEM; + } + goto out; + } + +out: + if (xdata) + dict_unref(xdata); + loc_wipe(&loc); + /*child_op_errno takes precedence*/ + if (child_op_errno == 0) { + child_op_errno = op_errno; + } + + if (child_op_errno && *linked_inode) { + inode_unref(*linked_inode); + *linked_inode = NULL; + } + if (frame) + AFR_STACK_DESTROY(frame); + return -child_op_errno; +} diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index ac31751997f..64893f441e3 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -16,54 +16,170 @@ #include <glusterfs/syncop-utils.h> #include <glusterfs/events.h> -static int -afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name, - inode_t *inode, int child, struct afr_reply *replies) +int +afr_selfheal_entry_anon_inode(xlator_t *this, inode_t *dir, const char *name, + inode_t *inode, int child, + struct afr_reply *replies, + gf_boolean_t *anon_inode) { afr_private_t *priv = NULL; + afr_local_t *local = NULL; xlator_t *subvol = NULL; int ret = 0; + int i = 0; + char g[64] = {0}; + unsigned char *lookup_success = NULL; + call_frame_t *frame = NULL; + loc_t loc2 = { + 0, + }; loc_t loc = { 0, }; - char g[64]; priv = this->private; - subvol = priv->children[child]; + lookup_success = alloca0(priv->child_count); + uuid_utoa_r(replies[child].poststat.ia_gfid, g); + loc.inode = inode_new(inode->table); + if (!loc.inode) { + ret = -ENOMEM; + goto out; + } + + if (replies[child].poststat.ia_type == IA_IFDIR) { + /* This directory may have sub-directory hierarchy which may need to + * be preserved for subsequent heals. So unconditionally move the + * directory to anonymous-inode directory*/ + *anon_inode = _gf_true; + goto anon_inode; + } + + frame = afr_frame_create(this, &ret); + if (!frame) { + ret = -ret; + goto out; + } + local = frame->local; + gf_uuid_copy(loc.gfid, replies[child].poststat.ia_gfid); + AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, &loc, + NULL); + for (i = 0; i < priv->child_count; i++) { + if (local->replies[i].op_ret == 0) { + lookup_success[i] = 1; + } else if (local->replies[i].op_errno != ENOENT && + local->replies[i].op_errno != ESTALE) { + ret = -local->replies[i].op_errno; + } + } + + if (priv->quorum_count) { + if (afr_has_quorum(lookup_success, this, NULL)) { + *anon_inode = _gf_true; + } + } else if (AFR_COUNT(lookup_success, priv->child_count) > 1) { + *anon_inode = _gf_true; + } else if (ret) { + goto out; + } + +anon_inode: + if (!*anon_inode) { + ret = 0; + goto out; + } loc.parent = inode_ref(dir); gf_uuid_copy(loc.pargfid, dir->gfid); loc.name = name; - loc.inode = inode_ref(inode); - if (replies[child].valid && replies[child].op_ret == 0) { - switch (replies[child].poststat.ia_type) { - case IA_IFDIR: - gf_msg(this->name, GF_LOG_WARNING, 0, - AFR_MSG_EXPUNGING_FILE_OR_DIR, - "expunging dir %s/%s (%s) on %s", uuid_utoa(dir->gfid), - name, uuid_utoa_r(replies[child].poststat.ia_gfid, g), - subvol->name); - ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL); - break; - default: - gf_msg(this->name, GF_LOG_WARNING, 0, - AFR_MSG_EXPUNGING_FILE_OR_DIR, - "expunging file %s/%s (%s) on %s", uuid_utoa(dir->gfid), - name, uuid_utoa_r(replies[child].poststat.ia_gfid, g), - subvol->name); - ret = syncop_unlink(subvol, &loc, NULL, NULL); - break; - } + ret = afr_anon_inode_create(this, child, &loc2.parent); + if (ret < 0) + goto out; + + loc2.name = g; + ret = syncop_rename(subvol, &loc, &loc2, NULL, NULL); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_EXPUNGING_FILE_OR_DIR, + "Rename to %s dir %s/%s (%s) on %s failed", + priv->anon_inode_name, uuid_utoa(dir->gfid), name, g, + subvol->name); + } else { + gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR, + "Rename to %s dir %s/%s (%s) on %s successful", + priv->anon_inode_name, uuid_utoa(dir->gfid), name, g, + subvol->name); } +out: loc_wipe(&loc); + loc_wipe(&loc2); + if (frame) { + AFR_STACK_DESTROY(frame); + } return ret; } int +afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name, + inode_t *inode, int child, struct afr_reply *replies) +{ + char g[64] = {0}; + afr_private_t *priv = NULL; + xlator_t *subvol = NULL; + int ret = 0; + loc_t loc = { + 0, + }; + gf_boolean_t anon_inode = _gf_false; + + priv = this->private; + subvol = priv->children[child]; + + if ((!replies[child].valid) || (replies[child].op_ret < 0)) { + /*Nothing to do*/ + ret = 0; + goto out; + } + + if (priv->use_anon_inode) { + ret = afr_selfheal_entry_anon_inode(this, dir, name, inode, child, + replies, &anon_inode); + if (ret < 0 || anon_inode) + goto out; + } + + loc.parent = inode_ref(dir); + loc.inode = inode_new(inode->table); + if (!loc.inode) { + ret = -ENOMEM; + goto out; + } + loc.name = name; + switch (replies[child].poststat.ia_type) { + case IA_IFDIR: + gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR, + "expunging dir %s/%s (%s) on %s", uuid_utoa(dir->gfid), name, + uuid_utoa_r(replies[child].poststat.ia_gfid, g), + subvol->name); + ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL); + break; + default: + gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR, + "expunging file %s/%s (%s) on %s", uuid_utoa(dir->gfid), + name, uuid_utoa_r(replies[child].poststat.ia_gfid, g), + subvol->name); + ret = syncop_unlink(subvol, &loc, NULL, NULL); + break; + } + +out: + loc_wipe(&loc); + return ret; +} + +int afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source, unsigned char *sources, inode_t *dir, const char *name, inode_t *inode, @@ -76,6 +192,9 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source, loc_t srcloc = { 0, }; + loc_t anonloc = { + 0, + }; xlator_t *this = frame->this; afr_private_t *priv = NULL; dict_t *xdata = NULL; @@ -86,15 +205,17 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source, 0, }; unsigned char *newentry = NULL; - char dir_uuid_str[64] = {0}, iatt_uuid_str[64] = {0}; + char iatt_uuid_str[64] = {0}; + char dir_uuid_str[64] = {0}; priv = this->private; iatt = &replies[source].poststat; + uuid_utoa_r(iatt->ia_gfid, iatt_uuid_str); if (iatt->ia_type == IA_INVAL || gf_uuid_is_null(iatt->ia_gfid)) { gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SELF_HEAL_FAILED, "Invalid ia_type (%d) or gfid(%s). source brick=%d, " "pargfid=%s, name=%s", - iatt->ia_type, uuid_utoa_r(iatt->ia_gfid, iatt_uuid_str), source, + iatt->ia_type, iatt_uuid_str, source, uuid_utoa_r(dir->gfid, dir_uuid_str), name); ret = -EINVAL; goto out; @@ -120,14 +241,24 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source, srcloc.inode = inode_ref(inode); gf_uuid_copy(srcloc.gfid, iatt->ia_gfid); - if (iatt->ia_type != IA_IFDIR) - ret = syncop_lookup(priv->children[dst], &srcloc, 0, 0, 0, 0); - if (iatt->ia_type == IA_IFDIR || ret == -ENOENT || ret == -ESTALE) { + ret = syncop_lookup(priv->children[dst], &srcloc, 0, 0, 0, 0); + if (ret == -ENOENT || ret == -ESTALE) { newentry[dst] = 1; ret = afr_selfheal_newentry_mark(frame, this, inode, source, replies, sources, newentry); if (ret) goto out; + } else if (ret == 0 && iatt->ia_type == IA_IFDIR && priv->use_anon_inode) { + // Try rename from hidden directory + ret = afr_anon_inode_create(this, dst, &anonloc.parent); + if (ret < 0) + goto out; + anonloc.inode = inode_ref(inode); + anonloc.name = iatt_uuid_str; + ret = syncop_rename(priv->children[dst], &anonloc, &loc, NULL, NULL); + if (ret == -ENOENT || ret == -ESTALE) + ret = -1; /*This sets 'mismatch' to true*/ + goto out; } mode = st_mode_from_ia(iatt->ia_prot, iatt->ia_type); @@ -166,6 +297,7 @@ out: GF_FREE(linkname); loc_wipe(&loc); loc_wipe(&srcloc); + loc_wipe(&anonloc); return ret; } @@ -578,6 +710,11 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd, priv = this->private; + if (afr_is_private_directory(priv, fd->inode->gfid, name, + GF_CLIENT_PID_SELF_HEALD)) { + return 0; + } + xattr = dict_new(); if (!xattr) return -ENOMEM; @@ -626,7 +763,7 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd, replies); if ((ret == 0) && (priv->esh_granular) && parent_idx_inode) { - ret = afr_shd_index_purge(subvol, parent_idx_inode, name, + ret = afr_shd_entry_purge(subvol, parent_idx_inode, name, inode->ia_type); /* Why is ret force-set to 0? We do not care about * index purge failing for full heal as it is quite @@ -756,10 +893,6 @@ afr_selfheal_entry_do_subvol(call_frame_t *frame, xlator_t *this, fd_t *fd, if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) continue; - if (__is_root_gfid(fd->inode->gfid) && - !strcmp(entry->d_name, GF_REPLICATE_TRASH_DIR)) - continue; - ret = afr_selfheal_entry_dirent(iter_frame, this, fd, entry->d_name, loc.inode, subvol, local->need_full_crawl); @@ -822,7 +955,7 @@ afr_selfheal_entry_granular_dirent(xlator_t *subvol, gf_dirent_t *entry, /* The name indices under the pgfid index dir are guaranteed * to be regular files. Hence the hardcoding. */ - afr_shd_index_purge(subvol, parent->inode, entry->d_name, IA_IFREG); + afr_shd_entry_purge(subvol, parent->inode, entry->d_name, IA_IFREG); ret = 0; goto out; } diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c index dd40c57ab12..834aac86d48 100644 --- a/xlators/cluster/afr/src/afr-self-heal-name.c +++ b/xlators/cluster/afr/src/afr-self-heal-name.c @@ -98,21 +98,12 @@ __afr_selfheal_name_expunge(xlator_t *this, inode_t *parent, uuid_t pargfid, const char *bname, inode_t *inode, struct afr_reply *replies) { - loc_t loc = { - 0, - }; int i = 0; afr_private_t *priv = NULL; - char g[64]; int ret = 0; priv = this->private; - loc.parent = inode_ref(parent); - gf_uuid_copy(loc.pargfid, pargfid); - loc.name = bname; - loc.inode = inode_ref(inode); - for (i = 0; i < priv->child_count; i++) { if (!replies[i].valid) continue; @@ -120,30 +111,10 @@ __afr_selfheal_name_expunge(xlator_t *this, inode_t *parent, uuid_t pargfid, if (replies[i].op_ret) continue; - switch (replies[i].poststat.ia_type) { - case IA_IFDIR: - gf_msg(this->name, GF_LOG_WARNING, 0, - AFR_MSG_EXPUNGING_FILE_OR_DIR, - "expunging dir %s/%s (%s) on %s", uuid_utoa(pargfid), - bname, uuid_utoa_r(replies[i].poststat.ia_gfid, g), - priv->children[i]->name); - - ret |= syncop_rmdir(priv->children[i], &loc, 1, NULL, NULL); - break; - default: - gf_msg(this->name, GF_LOG_WARNING, 0, - AFR_MSG_EXPUNGING_FILE_OR_DIR, - "expunging file %s/%s (%s) on %s", uuid_utoa(pargfid), - bname, uuid_utoa_r(replies[i].poststat.ia_gfid, g), - priv->children[i]->name); - - ret |= syncop_unlink(priv->children[i], &loc, NULL, NULL); - break; - } + ret |= afr_selfheal_entry_delete(this, parent, bname, inode, i, + replies); } - loc_wipe(&loc); - return ret; } diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h index 7a038fa7fe3..48e6dbcfb18 100644 --- a/xlators/cluster/afr/src/afr-self-heal.h +++ b/xlators/cluster/afr/src/afr-self-heal.h @@ -369,4 +369,9 @@ gf_boolean_t afr_is_file_empty_on_all_children(afr_private_t *priv, struct afr_reply *replies); +int +afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name, + inode_t *inode, int child, struct afr_reply *replies); +int +afr_anon_inode_create(xlator_t *this, int child, inode_t **linked_inode); #endif /* !_AFR_SELFHEAL_H */ diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c index f2e08908c24..109fd4b7421 100644 --- a/xlators/cluster/afr/src/afr-self-heald.c +++ b/xlators/cluster/afr/src/afr-self-heald.c @@ -222,7 +222,7 @@ out: } int -afr_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name, +afr_shd_entry_purge(xlator_t *subvol, inode_t *inode, char *name, ia_type_t type) { int ret = 0; @@ -424,7 +424,7 @@ afr_shd_index_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, ret = afr_shd_selfheal(healer, healer->subvol, gfid); if (ret == -ENOENT || ret == -ESTALE) - afr_shd_index_purge(subvol, parent->inode, entry->d_name, val); + afr_shd_entry_purge(subvol, parent->inode, entry->d_name, val); if (ret == 2) /* If bricks crashed in pre-op after creating indices/xattrop @@ -843,6 +843,176 @@ out: return need_heal; } +static int +afr_shd_anon_inode_cleaner(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, + void *data) +{ + struct subvol_healer *healer = data; + afr_private_t *priv = healer->this->private; + call_frame_t *frame = NULL; + afr_local_t *local = NULL; + int ret = 0; + loc_t loc = {0}; + int count = 0; + int i = 0; + int op_errno = 0; + struct iatt *iatt = NULL; + gf_boolean_t multiple_links = _gf_false; + unsigned char *gfid_present = alloca0(priv->child_count); + unsigned char *entry_present = alloca0(priv->child_count); + char *type = "file"; + + frame = afr_frame_create(healer->this, &ret); + if (!frame) { + ret = -ret; + goto out; + } + local = frame->local; + if (AFR_COUNT(local->child_up, priv->child_count) != priv->child_count) { + gf_msg_debug(healer->this->name, 0, + "Not all bricks are up. Skipping " + "cleanup of %s on %s", + entry->d_name, subvol->name); + ret = 0; + goto out; + } + + loc.inode = inode_new(parent->inode->table); + if (!loc.inode) { + ret = -ENOMEM; + goto out; + } + ret = gf_uuid_parse(entry->d_name, loc.gfid); + if (ret) { + ret = 0; + goto out; + } + AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, &loc, + NULL); + for (i = 0; i < priv->child_count; i++) { + if (local->replies[i].op_ret == 0) { + count++; + gfid_present[i] = 1; + iatt = &local->replies[i].poststat; + if (iatt->ia_type == IA_IFDIR) { + type = "dir"; + } + + if (i == healer->subvol) { + if (local->replies[i].poststat.ia_nlink > 1) { + multiple_links = _gf_true; + } + } + } else if (local->replies[i].op_errno != ENOENT && + local->replies[i].op_errno != ESTALE) { + /*We don't have complete view. Skip the entry*/ + gf_msg_debug(healer->this->name, local->replies[i].op_errno, + "Skipping cleanup of %s on %s", entry->d_name, + subvol->name); + ret = 0; + goto out; + } + } + + /*Inode is deleted from subvol*/ + if (count == 1 || (iatt->ia_type != IA_IFDIR && multiple_links)) { + gf_msg(healer->this->name, GF_LOG_WARNING, 0, + AFR_MSG_EXPUNGING_FILE_OR_DIR, "expunging %s %s/%s on %s", type, + priv->anon_inode_name, entry->d_name, subvol->name); + ret = afr_shd_entry_purge(subvol, parent->inode, entry->d_name, + iatt->ia_type); + if (ret == -ENOENT || ret == -ESTALE) + ret = 0; + } else if (count > 1) { + loc_wipe(&loc); + loc.parent = inode_ref(parent->inode); + loc.name = entry->d_name; + loc.inode = inode_new(parent->inode->table); + if (!loc.inode) { + ret = -ENOMEM; + goto out; + } + AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, + &loc, NULL); + count = 0; + for (i = 0; i < priv->child_count; i++) { + if (local->replies[i].op_ret == 0) { + count++; + entry_present[i] = 1; + iatt = &local->replies[i].poststat; + } else if (local->replies[i].op_errno != ENOENT && + local->replies[i].op_errno != ESTALE) { + /*We don't have complete view. Skip the entry*/ + gf_msg_debug(healer->this->name, local->replies[i].op_errno, + "Skipping cleanup of %s on %s", entry->d_name, + subvol->name); + ret = 0; + goto out; + } + } + for (i = 0; i < priv->child_count; i++) { + if (gfid_present[i] && !entry_present[i]) { + /*Entry is not anonymous on at least one subvol*/ + gf_msg_debug(healer->this->name, 0, + "Valid entry present on %s " + "Skipping cleanup of %s on %s", + priv->children[i]->name, entry->d_name, + subvol->name); + ret = 0; + goto out; + } + } + + gf_msg(healer->this->name, GF_LOG_WARNING, 0, + AFR_MSG_EXPUNGING_FILE_OR_DIR, + "expunging %s %s/%s on all subvols", type, priv->anon_inode_name, + entry->d_name); + ret = 0; + for (i = 0; i < priv->child_count; i++) { + op_errno = -afr_shd_entry_purge(priv->children[i], loc.parent, + entry->d_name, iatt->ia_type); + if (op_errno != ENOENT && op_errno != ESTALE) { + ret |= -op_errno; + } + } + } + +out: + if (frame) + AFR_STACK_DESTROY(frame); + loc_wipe(&loc); + return ret; +} + +static void +afr_cleanup_anon_inode_dir(struct subvol_healer *healer) +{ + int ret = 0; + call_frame_t *frame = NULL; + afr_private_t *priv = healer->this->private; + loc_t loc = {0}; + + ret = afr_anon_inode_create(healer->this, healer->subvol, &loc.inode); + if (ret) + goto out; + + frame = afr_frame_create(healer->this, &ret); + if (!frame) { + ret = -ret; + goto out; + } + + ret = syncop_mt_dir_scan(frame, priv->children[healer->subvol], &loc, + GF_CLIENT_PID_SELF_HEALD, healer, + afr_shd_anon_inode_cleaner, NULL, + priv->shd.max_threads, priv->shd.wait_qlength); +out: + if (frame) + AFR_STACK_DESTROY(frame); + loc_wipe(&loc); + return; +} + void * afr_shd_index_healer(void *data) { @@ -900,6 +1070,10 @@ afr_shd_index_healer(void *data) sleep(1); } while (ret > 0); + if (ret == 0) { + afr_cleanup_anon_inode_dir(healer); + } + if (ret == 0 && pre_crawl_xdata && !healer->crawl_event.heal_failed_count) { afr_shd_ta_check_and_unset_xattrs(this, &loc, healer, diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h index 687c28e6472..18db728ea7b 100644 --- a/xlators/cluster/afr/src/afr-self-heald.h +++ b/xlators/cluster/afr/src/afr-self-heald.h @@ -70,6 +70,6 @@ afr_shd_gfid_to_path(xlator_t *this, xlator_t *subvol, uuid_t gfid, char **path_p); int -afr_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name, +afr_shd_entry_purge(xlator_t *subvol, inode_t *inode, char *name, ia_type_t type); #endif /* !_AFR_SELF_HEALD_H */ diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index 67e0a4d10be..df7366f0a65 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -135,6 +135,27 @@ set_data_self_heal_algorithm(afr_private_t *priv, char *algo) } } +void +afr_handle_anon_inode_options(afr_private_t *priv, dict_t *options) +{ + char *volfile_id_str = NULL; + uuid_t anon_inode_gfid = {0}; + + /*If volume id is not present don't enable anything*/ + if (dict_get_str(options, "volume-id", &volfile_id_str)) + return; + GF_ASSERT(strlen(AFR_ANON_DIR_PREFIX) + strlen(volfile_id_str) <= NAME_MAX); + /*anon_inode_name is not supposed to change once assigned*/ + if (!priv->anon_inode_name[0]) { + snprintf(priv->anon_inode_name, sizeof(priv->anon_inode_name), "%s-%s", + AFR_ANON_DIR_PREFIX, volfile_id_str); + gf_uuid_parse(volfile_id_str, anon_inode_gfid); + /*Flip a bit to make sure volfile-id and anon-gfid are not same*/ + anon_inode_gfid[0] ^= 1; + uuid_utoa_r(anon_inode_gfid, priv->anon_gfid_str); + } +} + int reconfigure(xlator_t *this, dict_t *options) { @@ -290,6 +311,10 @@ reconfigure(xlator_t *this, dict_t *options) consistent_io = _gf_false; priv->consistent_io = consistent_io; + afr_handle_anon_inode_options(priv, options); + + GF_OPTION_RECONF("use-anonymous-inode", priv->use_anon_inode, options, bool, + out); if (priv->shd.enabled) { if ((priv->shd.enabled != enabled_old) || (timeout_old != priv->shd.timeout)) @@ -541,7 +566,9 @@ init(xlator_t *this) GF_OPTION_INIT("consistent-metadata", priv->consistent_metadata, bool, out); GF_OPTION_INIT("consistent-io", priv->consistent_io, bool, out); + afr_handle_anon_inode_options(priv, this->options); + GF_OPTION_INIT("use-anonymous-inode", priv->use_anon_inode, bool, out); if (priv->quorum_count != 0) priv->consistent_io = _gf_false; @@ -553,6 +580,9 @@ init(xlator_t *this) goto out; } + priv->anon_inode = GF_CALLOC(sizeof(unsigned char), child_count, + gf_afr_mt_char); + priv->child_up = GF_CALLOC(sizeof(unsigned char), child_count, gf_afr_mt_char); @@ -561,7 +591,8 @@ init(xlator_t *this) priv->halo_child_up = GF_CALLOC(sizeof(unsigned char), child_count, gf_afr_mt_char); - if (!priv->child_up || !priv->child_latency || !priv->halo_child_up) { + if (!priv->child_up || !priv->child_latency || !priv->halo_child_up || + !priv->anon_inode) { ret = -ENOMEM; goto out; } @@ -1286,6 +1317,14 @@ struct volume_options options[] = { .tags = {"replicate"}, .description = "This option exists only for backward compatibility " "and configuring it doesn't have any effect"}, + {.key = {"use-anonymous-inode"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "no", + .op_version = {GD_OP_VERSION_8_0}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE, + .tags = {"replicate"}, + .description = "Setting this option heals directory renames efficiently"}, + {.key = {NULL}}, }; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 2e60708accf..d62f9a9caf2 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -42,6 +42,7 @@ #define AFR_LK_HEAL_DOM "afr.lock-heal.domain" #define AFR_HALO_MAX_LATENCY 99999 +#define AFR_ANON_DIR_PREFIX ".glusterfs-anonymous-inode" #define PFLAG_PENDING (1 << 0) #define PFLAG_SBRAIN (1 << 1) @@ -190,6 +191,7 @@ typedef struct _afr_private { struct list_head ta_waitq; struct list_head ta_onwireq; + unsigned char *anon_inode; unsigned char *child_up; unsigned char *halo_child_up; int64_t *child_latency; @@ -275,10 +277,15 @@ typedef struct _afr_private { gf_boolean_t esh_granular; gf_boolean_t consistent_io; gf_boolean_t data_self_heal; /* on/off */ + gf_boolean_t use_anon_inode; /*For lock healing.*/ struct list_head saved_locks; struct list_head lk_healq; + + /*For anon-inode handling */ + char anon_inode_name[NAME_MAX + 1]; + char anon_gfid_str[UUID_SIZE + 1]; } afr_private_t; typedef enum { @@ -1409,4 +1416,8 @@ afr_dom_lock_release(call_frame_t *frame); void afr_fill_success_replies(afr_local_t *local, afr_private_t *priv, unsigned char *replies); + +gf_boolean_t +afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name, + pid_t pid); #endif /* __AFR_H__ */ diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index d874415699c..8ba0cc4c732 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -382,7 +382,7 @@ out: /* Code to save hashed subvol on inode ctx as a mds subvol */ -static int +int dht_inode_ctx_mdsvol_set(inode_t *inode, xlator_t *this, xlator_t *mds_subvol) { dht_inode_ctx_t *ctx = NULL; @@ -2161,31 +2161,18 @@ static int dht_fill_dict_to_avoid_unlink_of_migrating_file(dict_t *dict) { int ret = 0; - xlator_t *this = NULL; - char *linktoskip_key = NULL; - - this = THIS; - GF_VALIDATE_OR_GOTO("dht", this, err); - if (dht_is_tier_xlator(this)) - linktoskip_key = TIER_SKIP_NON_LINKTO_UNLINK; - else - linktoskip_key = DHT_SKIP_NON_LINKTO_UNLINK; - - ret = dict_set_int32(dict, linktoskip_key, 1); + ret = dict_set_int32_sizen(dict, DHT_SKIP_NON_LINKTO_UNLINK, 1); if (ret) - goto err; + return -1; - ret = dict_set_int32(dict, DHT_SKIP_OPEN_FD_UNLINK, 1); + ret = dict_set_int32_sizen(dict, DHT_SKIP_OPEN_FD_UNLINK, 1); if (ret) - goto err; + return -1; return 0; - -err: - return -1; } static int32_t @@ -4314,6 +4301,8 @@ dht_find_local_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this, index = conf->local_subvols_cnt; uuid_list_copy = gf_strdup(uuid_list); + if (!uuid_list_copy) + goto unlock; for (uuid_str = strtok_r(uuid_list, " ", &saveptr); uuid_str; uuid_str = next_uuid_str) { @@ -4604,18 +4593,8 @@ dht_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, dict_del(xattr, conf->xattr_name); dict_del(xattr, conf->mds_xattr_key); - /* filter out following two xattrs that need not - * be visible on the mount point for geo-rep - - * trusted.tier.fix.layout.complete and - * trusted.tier.tier-dht.commithash - */ - dict_del(xattr, conf->commithash_xattr_name); - if (frame->root->pid >= 0 && dht_is_tier_xlator(this)) { - dict_del(xattr, GF_XATTR_TIER_LAYOUT_FIXED_KEY); - } - if (frame->root->pid >= 0) { GF_REMOVE_INTERNAL_XATTR("trusted.glusterfs.quota*", xattr); GF_REMOVE_INTERNAL_XATTR("trusted.pgfid*", xattr); @@ -5893,22 +5872,7 @@ dht_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr, if (local->rebalance.target_node) { local->flags = forced_rebalance; - /* Flag to suggest its a tiering migration - * The reason for this dic key-value is that - * promotions and demotions are multithreaded - * so the original frame from gf_defrag_start() - * is not carried. A new frame will be created when - * we do syncop_setxattr(). This does not have the - * frame->root->pid of the original frame. So we pass - * this dic key-value when we do syncop_setxattr() to do - * data migration and set the frame->root->pid to - * GF_CLIENT_PID_TIER_DEFRAG in dht_setxattr() just before - * calling dht_start_rebalance_task() */ - tmp = dict_get(xattr, TIERING_MIGRATION_KEY); - if (tmp) - frame->root->pid = GF_CLIENT_PID_TIER_DEFRAG; - else - frame->root->pid = GF_CLIENT_PID_DEFRAG; + frame->root->pid = GF_CLIENT_PID_DEFRAG; ret = dht_start_rebalance_task(this, frame); if (!ret) @@ -6720,10 +6684,9 @@ dht_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, layout = local->layout; - /* We have seen crashes in while running "rm -rf" on tier volumes - when the layout was NULL on the hot tier. This will skip the - entries on the subvol without a layout, hence preventing the crash - but rmdir might fail with "directory not empty" errors*/ + /* This will skip the entries on the subvol without a layout, + * hence preventing the crash but rmdir might fail with + * "directory not empty" errors*/ if (layout == NULL) goto done; @@ -10855,15 +10818,12 @@ dht_notify(xlator_t *this, int event, void *data, ...) gf_defrag_type cmd = 0; dict_t *output = NULL; va_list ap; - dht_methods_t *methods = NULL; struct gf_upcall *up_data = NULL; struct gf_upcall_cache_invalidation *up_ci = NULL; conf = this->private; GF_VALIDATE_OR_GOTO(this->name, conf, out); - methods = &(conf->methods); - /* had all subvolumes reported status once till now? */ had_heard_from_all = 1; for (i = 0; i < conf->subvolume_cnt; i++) { @@ -11086,15 +11046,13 @@ dht_notify(xlator_t *this, int event, void *data, ...) * thread has already started. */ if (conf->defrag && !run_defrag) { - if (methods->migration_needed(this)) { - run_defrag = 1; - ret = gf_thread_create(&conf->defrag->th, NULL, gf_defrag_start, - this, "dhtdg"); - if (ret) { - GF_FREE(conf->defrag); - conf->defrag = NULL; - kill(getpid(), SIGTERM); - } + run_defrag = 1; + ret = gf_thread_create(&conf->defrag->th, NULL, gf_defrag_start, + this, "dhtdg"); + if (ret) { + GF_FREE(conf->defrag); + conf->defrag = NULL; + kill(getpid(), SIGTERM); } } } @@ -11239,28 +11197,6 @@ out: return ret; } -int32_t -dht_migration_needed(xlator_t *this) -{ - gf_defrag_info_t *defrag = NULL; - dht_conf_t *conf = NULL; - int ret = 0; - - conf = this->private; - - GF_VALIDATE_OR_GOTO("dht", conf, out); - GF_VALIDATE_OR_GOTO("dht", conf->defrag, out); - - defrag = conf->defrag; - - if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) && - (defrag->cmd != GF_DEFRAG_CMD_START_DETACH_TIER)) - ret = 1; - -out: - return ret; -} - /* This function should not be called more then once during a FOP handling path. It is valid only for for ops on files @@ -11295,14 +11231,6 @@ dht_set_local_rebalance(xlator_t *this, dht_local_t *local, struct iatt *stbuf, return 0; } -gf_boolean_t -dht_is_tier_xlator(xlator_t *this) -{ - if (strcmp(this->type, "cluster/tier") == 0) - return _gf_true; - return _gf_false; -} - int32_t dht_release(xlator_t *this, fd_t *fd) { diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index 07bbe204997..fe0dc3db34a 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -24,7 +24,6 @@ #define _DHT_H #define GF_XATTR_FIX_LAYOUT_KEY "distribute.fix.layout" -#define GF_XATTR_TIER_LAYOUT_FIXED_KEY "trusted.tier.fix.layout.complete" #define GF_XATTR_FILE_MIGRATE_KEY "trusted.distribute.migrate-data" #define DHT_MDS_STR "mds" #define GF_DHT_LOOKUP_UNHASHED_OFF 0 @@ -36,7 +35,6 @@ #define DHT_LAYOUT_HEAL_DOMAIN "dht.layout.heal" /* Namespace synchronization */ #define DHT_ENTRY_SYNC_DOMAIN "dht.entry.sync" -#define TIERING_MIGRATION_KEY "tiering.migration" #define DHT_LAYOUT_HASH_INVALID 1 #define MAX_REBAL_THREADS sysconf(_SC_NPROCESSORS_ONLN) @@ -242,19 +240,6 @@ typedef gf_boolean_t (*dht_need_heal_t)(call_frame_t *frame, dht_layout_t **inmem, dht_layout_t **ondisk); -typedef struct { - uint64_t blocks_used; - uint64_t pblocks_used; - uint64_t files_used; - uint64_t pfiles_used; - uint64_t unhashed_blocks_used; - uint64_t unhashed_pblocks_used; - uint64_t unhashed_files_used; - uint64_t unhashed_pfiles_used; - uint64_t unhashed_fsid; - uint64_t hashed_fsid; -} tier_statvfs_t; - struct dht_local { loc_t loc; loc_t loc2; @@ -272,7 +257,6 @@ struct dht_local { struct iatt preparent; struct iatt postparent; struct statvfs statvfs; - tier_statvfs_t tier_statvfs; fd_t *fd; inode_t *inode; dict_t *params; @@ -405,14 +389,7 @@ enum gf_defrag_type { GF_DEFRAG_CMD_STATUS = 1 + 2, GF_DEFRAG_CMD_START_LAYOUT_FIX = 1 + 3, GF_DEFRAG_CMD_START_FORCE = 1 + 4, - GF_DEFRAG_CMD_START_TIER = 1 + 5, - GF_DEFRAG_CMD_STATUS_TIER = 1 + 6, - GF_DEFRAG_CMD_START_DETACH_TIER = 1 + 7, - GF_DEFRAG_CMD_STOP_DETACH_TIER = 1 + 8, - GF_DEFRAG_CMD_PAUSE_TIER = 1 + 9, - GF_DEFRAG_CMD_RESUME_TIER = 1 + 10, GF_DEFRAG_CMD_DETACH_STATUS = 1 + 11, - GF_DEFRAG_CMD_STOP_TIER = 1 + 12, GF_DEFRAG_CMD_DETACH_START = 1 + 13, GF_DEFRAG_CMD_DETACH_COMMIT = 1 + 14, GF_DEFRAG_CMD_DETACH_COMMIT_FORCE = 1 + 15, @@ -463,75 +440,6 @@ struct dht_container { int local_subvol_index; }; -typedef enum tier_mode_ { - TIER_MODE_NONE = 0, - TIER_MODE_TEST, - TIER_MODE_WM -} tier_mode_t; - -typedef enum tier_pause_state_ { - TIER_RUNNING = 0, - TIER_REQUEST_PAUSE, - TIER_PAUSED -} tier_pause_state_t; - -/* This Structure is only used in tiering fixlayout */ -typedef struct gf_tier_fix_layout_arg { - xlator_t *this; - dict_t *fix_layout; - pthread_t thread_id; -} gf_tier_fix_layout_arg_t; - -typedef struct gf_tier_conf { - int is_tier; - int watermark_hi; - int watermark_low; - int watermark_last; - unsigned long block_size; - fsblkcnt_t blocks_total; - fsblkcnt_t blocks_used; - uint64_t max_migrate_bytes; - int max_migrate_files; - int query_limit; - tier_mode_t mode; - int percent_full; - /* These flags are only used for tier-compact */ - gf_boolean_t compact_active; - /* These 3 flags are set to true when the client changes the */ - /* compaction mode on the command line. */ - /* When they are set, the daemon will trigger compaction as */ - /* soon as possible to activate or deactivate compaction. */ - /* If in the middle of a compaction, then the switches take */ - /* effect on the next compaction, not the current one. */ - /* If the user switches it off, we want to avoid needless */ - /* compactions. */ - /* If the user switches it on, they want to compact as soon */ - /* as possible. */ - gf_boolean_t compact_mode_switched; - gf_boolean_t compact_mode_switched_hot; - gf_boolean_t compact_mode_switched_cold; - int tier_max_promote_size; - int tier_promote_frequency; - int tier_demote_frequency; - int tier_compact_hot_frequency; - int tier_compact_cold_frequency; - uint64_t st_last_promoted_size; - uint64_t st_last_demoted_size; - struct synctask *pause_synctask; - gf_timer_t *pause_timer; - pthread_mutex_t pause_mutex; - int promote_in_progress; - int demote_in_progress; - /* This Structure is only used in tiering fixlayout */ - gf_tier_fix_layout_arg_t tier_fix_layout_arg; - /* Indicates the index of the first queryfile picked - * in the last cycle of promote or demote */ - int32_t last_promote_qfile_index; - int32_t last_demote_qfile_index; - tier_pause_state_t pause_state; - char volname[GD_VOLUME_NAME_MAX + 1]; -} gf_tier_conf_t; - typedef struct nodeuuid_info { char info; /* Set to 1 is this is my node's uuid*/ uuid_t uuid; /* Store the nodeuuid as well for debugging*/ @@ -563,13 +471,6 @@ struct gf_defrag_info_ { uint32_t new_commit_hash; gf_defrag_status_t defrag_status; gf_defrag_pattern_list_t *defrag_pattern; - gf_tier_conf_t tier_conf; - - /*Data Tiering params for scanner*/ - uint64_t total_files_promoted; - uint64_t total_files_demoted; - int write_freq_threshold; - int read_freq_threshold; pthread_cond_t parallel_migration_cond; pthread_mutex_t dfq_mutex; @@ -605,7 +506,6 @@ typedef struct gf_defrag_info_ gf_defrag_info_t; struct dht_methods_s { int32_t (*migration_get_dst_subvol)(xlator_t *this, dht_local_t *local); int32_t (*migration_other)(xlator_t *this, gf_defrag_info_t *defrag); - int32_t (*migration_needed)(xlator_t *this); xlator_t *(*layout_search)(xlator_t *this, dht_layout_t *layout, const char *name); }; @@ -1316,9 +1216,6 @@ dht_layout_missing_dirs(dht_layout_t *layout); int dht_refresh_layout(call_frame_t *frame); -gf_boolean_t -dht_is_tier_xlator(xlator_t *this); - int dht_build_parent_loc(xlator_t *this, loc_t *parent, loc_t *child, int32_t *op_errno); @@ -1482,4 +1379,6 @@ dht_set_parent_layout_in_dict(loc_t *loc, xlator_t *this, dht_local_t *local); int dht_dir_layout_error_check(xlator_t *this, inode_t *inode); +int +dht_inode_ctx_mdsvol_set(inode_t *inode, xlator_t *this, xlator_t *mds_subvol); #endif /* _DHT_H */ diff --git a/xlators/cluster/dht/src/dht-inode-write.c b/xlators/cluster/dht/src/dht-inode-write.c index eda2491e0ff..2f23ce90fbd 100644 --- a/xlators/cluster/dht/src/dht-inode-write.c +++ b/xlators/cluster/dht/src/dht-inode-write.c @@ -93,30 +93,28 @@ dht_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, /* Check if the rebalance phase1 is true */ if (IS_DHT_MIGRATION_PHASE1(postbuf)) { - if (!dht_is_tier_xlator(this)) { + if (!local->xattr_req) { + local->xattr_req = dict_new(); if (!local->xattr_req) { - local->xattr_req = dict_new(); - if (!local->xattr_req) { - gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, ENOMEM, - "insufficient memory"); - local->op_errno = ENOMEM; - local->op_ret = -1; - goto out; - } - } - - ret = dict_set_uint32(local->xattr_req, - GF_PROTECT_FROM_EXTERNAL_WRITES, 1); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_DICT_SET_FAILED, 0, - "Failed to set key %s in dictionary", - GF_PROTECT_FROM_EXTERNAL_WRITES); + gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, ENOMEM, + "insufficient memory"); local->op_errno = ENOMEM; local->op_ret = -1; goto out; } } + ret = dict_set_uint32(local->xattr_req, GF_PROTECT_FROM_EXTERNAL_WRITES, + 1); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_DICT_SET_FAILED, 0, + "Failed to set key %s in dictionary", + GF_PROTECT_FROM_EXTERNAL_WRITES); + local->op_errno = ENOMEM; + local->op_ret = -1; + goto out; + } + dht_iatt_merge(this, &local->stbuf, postbuf); dht_iatt_merge(this, &local->prebuf, prebuf); diff --git a/xlators/cluster/dht/src/dht-mem-types.h b/xlators/cluster/dht/src/dht-mem-types.h index 33f9832395b..e3c4471334a 100644 --- a/xlators/cluster/dht/src/dht-mem-types.h +++ b/xlators/cluster/dht/src/dht-mem-types.h @@ -30,10 +30,7 @@ enum gf_dht_mem_types_ { gf_dht_mt_container_t, gf_dht_mt_octx_t, gf_dht_mt_miginfo_t, - gf_tier_mt_bricklist_t, - gf_tier_mt_ipc_ctr_params_t, gf_dht_mt_fd_ctx_t, - gf_tier_mt_qfile_array_t, gf_dht_ret_cache_t, gf_dht_nodeuuids_t, gf_dht_mt_end diff --git a/xlators/cluster/dht/src/dht-messages.h b/xlators/cluster/dht/src/dht-messages.h index 026879e14af..601f8dad78b 100644 --- a/xlators/cluster/dht/src/dht-messages.h +++ b/xlators/cluster/dht/src/dht-messages.h @@ -38,12 +38,11 @@ GLFS_MSGID( DHT_MSG_REBALANCE_STATUS, DHT_MSG_REBALANCE_STOPPED, DHT_MSG_RENAME_FAILED, DHT_MSG_SETATTR_FAILED, DHT_MSG_SUBVOL_INSUFF_INODES, DHT_MSG_SUBVOL_INSUFF_SPACE, DHT_MSG_UNLINK_FAILED, - DHT_MSG_LAYOUT_SET_FAILED, DHT_MSG_LOG_FIXED_LAYOUT, DHT_MSG_LOG_TIER_ERROR, - DHT_MSG_LOG_TIER_STATUS, DHT_MSG_GET_XATTR_FAILED, - DHT_MSG_FILE_LOOKUP_FAILED, DHT_MSG_OPEN_FD_FAILED, - DHT_MSG_SET_INODE_CTX_FAILED, DHT_MSG_UNLOCKING_FAILED, - DHT_MSG_DISK_LAYOUT_NULL, DHT_MSG_SUBVOL_INFO, DHT_MSG_CHUNK_SIZE_INFO, - DHT_MSG_LAYOUT_FORM_FAILED, DHT_MSG_SUBVOL_ERROR, + DHT_MSG_LAYOUT_SET_FAILED, DHT_MSG_LOG_FIXED_LAYOUT, + DHT_MSG_GET_XATTR_FAILED, DHT_MSG_FILE_LOOKUP_FAILED, + DHT_MSG_OPEN_FD_FAILED, DHT_MSG_SET_INODE_CTX_FAILED, + DHT_MSG_UNLOCKING_FAILED, DHT_MSG_DISK_LAYOUT_NULL, DHT_MSG_SUBVOL_INFO, + DHT_MSG_CHUNK_SIZE_INFO, DHT_MSG_LAYOUT_FORM_FAILED, DHT_MSG_SUBVOL_ERROR, DHT_MSG_LAYOUT_SORT_FAILED, DHT_MSG_REGEX_INFO, DHT_MSG_FOPEN_FAILED, DHT_MSG_SET_HOSTNAME_FAILED, DHT_MSG_BRICK_ERROR, DHT_MSG_SYNCOP_FAILED, DHT_MSG_MIGRATE_INFO, DHT_MSG_SOCKET_ERROR, DHT_MSG_CREATE_FD_FAILED, @@ -69,8 +68,7 @@ GLFS_MSGID( DHT_MSG_INIT_LOCAL_SUBVOL_FAILED, DHT_MSG_SYS_CALL_GET_TIME_FAILED, DHT_MSG_NO_DISK_USAGE_STATUS, DHT_MSG_SUBVOL_DOWN_ERROR, DHT_MSG_REBAL_THROTTLE_INFO, DHT_MSG_COMMIT_HASH_INFO, - DHT_MSG_REBAL_STRUCT_SET, DHT_MSG_HAS_MIGINFO, DHT_MSG_LOG_IPC_TIER_ERROR, - DHT_MSG_TIER_PAUSED, DHT_MSG_TIER_RESUME, DHT_MSG_SETTLE_HASH_FAILED, + DHT_MSG_REBAL_STRUCT_SET, DHT_MSG_HAS_MIGINFO, DHT_MSG_SETTLE_HASH_FAILED, DHT_MSG_DEFRAG_PROCESS_DIR_FAILED, DHT_MSG_FD_CTX_SET_FAILED, DHT_MSG_STALE_LOOKUP, DHT_MSG_PARENT_LAYOUT_CHANGED, DHT_MSG_LOCK_MIGRATION_FAILED, DHT_MSG_LOCK_INODE_UNREF_FAILED, @@ -96,15 +94,13 @@ GLFS_MSGID( DHT_MSG_UNLOCK_FILE_FAILED, DHT_MSG_REMOVE_XATTR_FAILED, DHT_MSG_DATA_MIGRATE_ABORT, DHT_MSG_DEFRAG_NULL, DHT_MSG_PARENT_NULL, DHT_MSG_GFID_NOT_PRESENT, DHT_MSG_CHILD_LOC_FAILED, - DHT_MSG_SET_LOOKUP_FAILED, DHT_MSG_DIR_REMOVED, - DHT_MSG_TIER_FIX_LAYOUT_STARTED, DHT_MSG_FIX_NOT_COMP, - DHT_MSG_REMOVE_TIER_FAILED, DHT_MSG_SUBVOL_DETER_FAILED, - DHT_MSG_LOCAL_SUBVOL, DHT_MSG_NODE_UUID, DHT_MSG_SIZE_FILE, - DHT_MSG_GET_DATA_SIZE_FAILED, DHT_MSG_PTHREAD_JOIN_FAILED, - DHT_MSG_COUNTER_THREAD_CREATE_FAILED, DHT_MSG_MIGRATION_INIT_QUEUE_FAILED, - DHT_MSG_PAUSED_TIMEOUT, DHT_MSG_WOKE, DHT_MSG_ABORT_REBALANCE, - DHT_MSG_CREATE_TASK_REBAL_FAILED, DHT_MSG_REBAL_ESTIMATE_NOT_AVAIL, - DHT_MSG_MIG_TIER_PAUSED, DHT_MSG_ADD_CHOICES_ERROR, + DHT_MSG_SET_LOOKUP_FAILED, DHT_MSG_DIR_REMOVED, DHT_MSG_FIX_NOT_COMP, + DHT_MSG_SUBVOL_DETER_FAILED, DHT_MSG_LOCAL_SUBVOL, DHT_MSG_NODE_UUID, + DHT_MSG_SIZE_FILE, DHT_MSG_GET_DATA_SIZE_FAILED, + DHT_MSG_PTHREAD_JOIN_FAILED, DHT_MSG_COUNTER_THREAD_CREATE_FAILED, + DHT_MSG_MIGRATION_INIT_QUEUE_FAILED, DHT_MSG_PAUSED_TIMEOUT, DHT_MSG_WOKE, + DHT_MSG_ABORT_REBALANCE, DHT_MSG_CREATE_TASK_REBAL_FAILED, + DHT_MSG_REBAL_ESTIMATE_NOT_AVAIL, DHT_MSG_ADD_CHOICES_ERROR, DHT_MSG_GET_CHOICES_ERROR, DHT_MSG_PREPARE_STATUS_ERROR, DHT_MSG_SET_CHOICE_FAILED, DHT_MSG_SET_HASHED_SUBVOL_FAILED, DHT_MSG_XATTR_HEAL_NOT_POSS, DHT_MSG_LINKTO_FILE_FAILED, @@ -180,7 +176,6 @@ GLFS_MSGID( "adding bricks" #define DHT_MSG_NEW_TARGET_FOUND_STR "New target found for file" #define DHT_MSG_INSUFF_MEMORY_STR "insufficient memory" -#define DHT_MSG_MIG_TIER_PAUSED_STR "Migrate file paused" #define DHT_MSG_SET_XATTR_FAILED_STR "failed to set xattr" #define DHT_MSG_SET_MODE_FAILED_STR "failed to set mode" #define DHT_MSG_FILE_EXISTS_IN_DEST_STR "file exists in destination" @@ -222,17 +217,14 @@ GLFS_MSGID( #define DHT_MSG_GFID_NOT_PRESENT_STR "gfid not present" #define DHT_MSG_CHILD_LOC_FAILED_STR "Child loc build failed" #define DHT_MSG_SET_LOOKUP_FAILED_STR "Failed to set lookup" -#define DHT_MSG_LOG_TIER_STATUS_STR "lookup to cold tier on attach heal failed" #define DHT_MSG_DIR_LOOKUP_FAILED_STR "lookup failed" #define DHT_MSG_DIR_REMOVED_STR "Dir renamed or removed. Skipping" #define DHT_MSG_READDIR_ERROR_STR "readdir failed, Aborting fix-layout" #define DHT_MSG_SETTLE_HASH_FAILED_STR "Settle hash failed" #define DHT_MSG_DEFRAG_PROCESS_DIR_FAILED_STR "gf_defrag_process_dir failed" -#define DHT_MSG_TIER_FIX_LAYOUT_STARTED_STR "Tiering fix layout started" #define DHT_MSG_FIX_NOT_COMP_STR \ "Unable to retrieve fixlayout xattr. Assume background fix layout not " \ "complete" -#define DHT_MSG_REMOVE_TIER_FAILED_STR "Failed removing tier fix layout xattr" #define DHT_MSG_SUBVOL_DETER_FAILED_STR \ "local subvolume determination failed with error" #define DHT_MSG_LOCAL_SUBVOL_STR "local subvol" @@ -248,8 +240,6 @@ GLFS_MSGID( #define DHT_MSG_MIGRATION_INIT_QUEUE_FAILED_STR \ "Failed to initialise migration queue" #define DHT_MSG_REBALANCE_STOPPED_STR "Received stop command on rebalance" -#define DHT_MSG_TIER_RESUME_STR "Pause end. Resume tiering" -#define DHT_MSG_TIER_PAUSED_STR "Pause tiering" #define DHT_MSG_PAUSED_TIMEOUT_STR "Request pause timer timeout" #define DHT_MSG_WOKE_STR "woken" #define DHT_MSG_ABORT_REBALANCE_STR "Aborting rebalance" diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index 358e1c6d533..8ba8082bd86 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -609,26 +609,23 @@ __dht_rebalance_create_dst_file(xlator_t *this, xlator_t *to, xlator_t *from, goto out; } - if (!!dht_is_tier_xlator(this)) { - xdata = dict_new(); - if (!xdata) { - *fop_errno = ENOMEM; - ret = -1; - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, - DHT_MSG_MIGRATE_FILE_FAILED, "%s: dict_new failed)", - loc->path); - goto out; - } + xdata = dict_new(); + if (!xdata) { + *fop_errno = ENOMEM; + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MIGRATE_FILE_FAILED, + "%s: dict_new failed)", loc->path); + goto out; + } - ret = dict_set_int32(xdata, GF_CLEAN_WRITE_PROTECTION, 1); - if (ret) { - *fop_errno = ENOMEM; - ret = -1; - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, - "%s: failed to set dictionary value: key = %s ", loc->path, - GF_CLEAN_WRITE_PROTECTION); - goto out; - } + ret = dict_set_int32_sizen(xdata, GF_CLEAN_WRITE_PROTECTION, 1); + if (ret) { + *fop_errno = ENOMEM; + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, + "%s: failed to set dictionary value: key = %s ", loc->path, + GF_CLEAN_WRITE_PROTECTION); + goto out; } ret = syncop_lookup(to, loc, &new_stbuf, NULL, xdata, NULL); @@ -1096,7 +1093,7 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag, break; } - if (!conf->force_migration && !dht_is_tier_xlator(this)) { + if (!conf->force_migration) { if (!xdata) { xdata = dict_new(); if (!xdata) { @@ -1536,21 +1533,6 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, goto out; } - /* If defrag is NULL, it should be assumed that migration is triggered - * from client using the trusted.distribute.migrate-data virtual xattr - */ - defrag = conf->defrag; - - /* migration of files from clients is restricted to non-tiered clients - * for now */ - if (!defrag && dht_is_tier_xlator(this)) { - ret = ENOTSUP; - goto out; - } - - if (defrag && defrag->tier_conf.is_tier) - log_level = GF_LOG_TRACE; - gf_log(this->name, log_level, "%s: attempting to move from %s to %s", loc->path, from->name, to->name); @@ -2300,14 +2282,12 @@ out: } } - if (!dht_is_tier_xlator(this)) { - lk_ret = syncop_removexattr(to, loc, GF_PROTECT_FROM_EXTERNAL_WRITES, - NULL, NULL); - if (lk_ret && (lk_ret != -ENODATA) && (lk_ret != -ENOATTR)) { - gf_msg(this->name, GF_LOG_WARNING, -lk_ret, 0, - "%s: removexattr failed key %s", loc->path, - GF_PROTECT_FROM_EXTERNAL_WRITES); - } + lk_ret = syncop_removexattr(to, loc, GF_PROTECT_FROM_EXTERNAL_WRITES, NULL, + NULL); + if (lk_ret && (lk_ret != -ENODATA) && (lk_ret != -ENOATTR)) { + gf_msg(this->name, GF_LOG_WARNING, -lk_ret, 0, + "%s: removexattr failed key %s", loc->path, + GF_PROTECT_FROM_EXTERNAL_WRITES); } if (dict) @@ -3073,7 +3053,7 @@ int static gf_defrag_get_entry(xlator_t *this, int i, dht_conf_t *conf, gf_defrag_info_t *defrag, fd_t *fd, dict_t *migrate_data, struct dir_dfmeta *dir_dfmeta, dict_t *xattr_req, - int *should_commit_hash, int *perrno) + int *perrno) { int ret = 0; char is_linkfile = 0; @@ -3277,7 +3257,7 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, int dfc_index = 0; int throttle_up = 0; struct dir_dfmeta *dir_dfmeta = NULL; - int should_commit_hash = 1; + xlator_t *old_THIS = NULL; gf_log(this->name, GF_LOG_INFO, "migrate data called on %s", loc->path); gettimeofday(&dir_start, NULL); @@ -3290,6 +3270,9 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, goto out; } + old_THIS = THIS; + THIS = this; + dir_dfmeta = GF_CALLOC(1, sizeof(*dir_dfmeta), gf_common_mt_pointer); if (!dir_dfmeta) { gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta is NULL"); @@ -3454,7 +3437,7 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, ret = gf_defrag_get_entry(this, dfc_index, &container, loc, conf, defrag, dir_dfmeta->lfd[dfc_index], migrate_data, dir_dfmeta, xattr_req, - &should_commit_hash, perrno); + perrno); if (defrag->defrag_status == GF_DEFRAG_STATUS_STOPPED) { goto out; @@ -3505,16 +3488,12 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, loc->path, elapsed / 1e6); ret = 0; out: - + THIS = old_THIS; gf_defrag_free_dir_dfmeta(dir_dfmeta, local_subvols_cnt); if (xattr_req) dict_unref(xattr_req); - if (ret == 0 && should_commit_hash == 0) { - ret = 2; - } - /* It does not matter if it errored out - this number is * used to calculate rebalance estimated time to complete. * No locking required as dirs are processed by a single thread. @@ -3522,6 +3501,7 @@ out: defrag->num_dirs_processed++; return ret; } + int gf_defrag_settle_hash(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, dict_t *fix_layout) @@ -3536,7 +3516,6 @@ gf_defrag_settle_hash(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, * rebalance is complete. */ if (defrag->cmd == GF_DEFRAG_CMD_START_LAYOUT_FIX || - defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER || defrag->cmd == GF_DEFRAG_CMD_DETACH_START) { return 0; } @@ -3582,114 +3561,6 @@ gf_defrag_settle_hash(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, return 0; } -/* Function for doing a named lookup on file inodes during an attach tier - * So that a hardlink lookup heal i.e gfid to parent gfid lookup heal - * happens on pre-existing data. This is required so that the ctr database has - * hardlinks of all the exisitng file in the volume. CTR xlator on the - * brick/server side does db update/insert of the hardlink on a namelookup. - * Currently the namedlookup is done synchronous to the fixlayout that is - * triggered by attach tier. This is not performant, adding more time to - * fixlayout. The performant approach is record the hardlinks on a compressed - * datastore and then do the namelookup asynchronously later, giving the ctr db - * eventual consistency - * */ -int -gf_fix_layout_tier_attach_lookup(xlator_t *this, loc_t *parent_loc, - gf_dirent_t *file_dentry) -{ - int ret = -1; - dict_t *lookup_xdata = NULL; - dht_conf_t *conf = NULL; - loc_t file_loc = { - 0, - }; - struct iatt iatt = { - 0, - }; - - GF_VALIDATE_OR_GOTO("tier", this, out); - - GF_VALIDATE_OR_GOTO(this->name, parent_loc, out); - - GF_VALIDATE_OR_GOTO(this->name, file_dentry, out); - - GF_VALIDATE_OR_GOTO(this->name, this->private, out); - - if (!parent_loc->inode) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "%s/%s parent is NULL", parent_loc->path, file_dentry->d_name); - goto out; - } - - conf = this->private; - - loc_wipe(&file_loc); - - if (gf_uuid_is_null(file_dentry->d_stat.ia_gfid)) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "%s/%s gfid not present", parent_loc->path, file_dentry->d_name); - goto out; - } - - gf_uuid_copy(file_loc.gfid, file_dentry->d_stat.ia_gfid); - - if (gf_uuid_is_null(parent_loc->gfid)) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "%s/%s" - " gfid not present", - parent_loc->path, file_dentry->d_name); - goto out; - } - - gf_uuid_copy(file_loc.pargfid, parent_loc->gfid); - - ret = dht_build_child_loc(this, &file_loc, parent_loc, file_dentry->d_name); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Child loc build failed"); - ret = -1; - goto out; - } - - lookup_xdata = dict_new(); - if (!lookup_xdata) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Failed creating lookup dict for %s", file_dentry->d_name); - goto out; - } - - ret = dict_set_int32(lookup_xdata, CTR_ATTACH_TIER_LOOKUP, 1); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Failed to set lookup flag"); - goto out; - } - - gf_uuid_copy(file_loc.parent->gfid, parent_loc->gfid); - - /* Sending lookup to cold tier only */ - ret = syncop_lookup(conf->subvolumes[0], &file_loc, &iatt, NULL, - lookup_xdata, NULL); - if (ret) { - /* If the file does not exist on the cold tier than it must */ - /* have been discovered on the hot tier. This is not an error. */ - gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, - "%s lookup to cold tier on attach heal failed", file_loc.path); - goto out; - } - - ret = 0; - -out: - - loc_wipe(&file_loc); - - if (lookup_xdata) - dict_unref(lookup_xdata); - - return ret; -} - int gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, dict_t *fix_layout, dict_t *migrate_data) @@ -3709,7 +3580,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, }; inode_t *linked_inode = NULL, *inode = NULL; dht_conf_t *conf = NULL; - int should_commit_hash = 1; int perrno = 0; conf = this->private; @@ -3812,16 +3682,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) continue; if (!IA_ISDIR(entry->d_stat.ia_type)) { - /* If its a fix layout during the attach - * tier operation do lookups on files - * on cold subvolume so that there is a - * CTR DB Lookup Heal triggered on existing - * data. - * */ - if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) { - gf_fix_layout_tier_attach_lookup(this, loc, entry); - } - continue; } loc_wipe(&entry_loc); @@ -3838,8 +3698,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, goto out; } else { - should_commit_hash = 0; - continue; } } @@ -3902,7 +3760,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, ret = -1; goto out; } else { - should_commit_hash = 0; continue; } } @@ -3920,7 +3777,7 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, goto out; } - if (ret && ret != 2) { + if (ret) { gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LAYOUT_FIX_FAILED, "Fix layout failed for %s", entry_loc.path); @@ -3987,11 +3844,10 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, } } - if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) && - (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX)) { + if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) { ret = gf_defrag_process_dir(this, defrag, loc, migrate_data, &perrno); - if (ret && (ret != 2)) { + if (ret) { if (perrno == ENOENT || perrno == ESTALE) { ret = 0; goto out; @@ -4007,18 +3863,13 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, if (conf->decommission_in_progress) { goto out; } - - should_commit_hash = 0; } - } else if (ret == 2) { - should_commit_hash = 0; } } gf_msg_trace(this->name, 0, "fix layout called on %s", loc->path); - if (should_commit_hash && - gf_defrag_settle_hash(this, defrag, loc, fix_layout) != 0) { + if (gf_defrag_settle_hash(this, defrag, loc, fix_layout) != 0) { defrag->total_failures++; gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SETTLE_HASH_FAILED, @@ -4042,10 +3893,6 @@ out: if (fd) fd_unref(fd); - if (ret == 0 && should_commit_hash == 0) { - ret = 2; - } - return ret; } @@ -4054,31 +3901,26 @@ dht_init_local_subvols_and_nodeuuids(xlator_t *this, dht_conf_t *conf, loc_t *loc) { dict_t *dict = NULL; - gf_defrag_info_t *defrag = NULL; uuid_t *uuid_ptr = NULL; int ret = -1; int i = 0; int j = 0; - defrag = conf->defrag; - - if (defrag->cmd != GF_DEFRAG_CMD_START_TIER) { - /* Find local subvolumes */ - ret = syncop_getxattr(this, loc, &dict, GF_REBAL_FIND_LOCAL_SUBVOL, - NULL, NULL); - if (ret && (ret != -ENODATA)) { - gf_msg(this->name, GF_LOG_ERROR, -ret, 0, - "local " - "subvolume determination failed with error: %d", - -ret); - ret = -1; - goto out; - } - - if (!ret) - goto out; + /* Find local subvolumes */ + ret = syncop_getxattr(this, loc, &dict, GF_REBAL_FIND_LOCAL_SUBVOL, NULL, + NULL); + if (ret && (ret != -ENODATA)) { + gf_msg(this->name, GF_LOG_ERROR, -ret, 0, + "local " + "subvolume determination failed with error: %d", + -ret); + ret = -1; + goto out; } + if (!ret) + goto out; + ret = syncop_getxattr(this, loc, &dict, GF_REBAL_OLD_FIND_LOCAL_SUBVOL, NULL, NULL); if (ret) { @@ -4252,7 +4094,7 @@ gf_defrag_estimates_init(xlator_t *this, loc_t *loc, pthread_t *filecnt_thread) goto out; } - ret = gf_thread_create(filecnt_thread, NULL, &dht_file_counter_thread, + ret = gf_thread_create(filecnt_thread, NULL, dht_file_counter_thread, (void *)defrag, "dhtfcnt"); if (ret) { @@ -4309,7 +4151,7 @@ gf_defrag_parallel_migration_init(xlator_t *this, gf_defrag_info_t *defrag, /*Spawn Threads Here*/ while (index < thread_spawn_count) { - ret = gf_thread_create(&(tid[index]), NULL, &gf_defrag_task, + ret = gf_thread_create(&(tid[index]), NULL, gf_defrag_task, (void *)defrag, "dhtmig%d", (index + 1) & 0x3ff); if (ret != 0) { gf_msg("DHT", GF_LOG_ERROR, ret, 0, "Thread[%d] creation failed. ", @@ -4552,14 +4394,13 @@ gf_defrag_start_crawl(void *data) } ret = gf_defrag_fix_layout(this, defrag, &loc, fix_layout, migrate_data); - if (ret && ret != 2) { + if (ret) { defrag->total_failures++; ret = -1; goto out; } - if (ret != 2 && - gf_defrag_settle_hash(this, defrag, &loc, fix_layout) != 0) { + if (gf_defrag_settle_hash(this, defrag, &loc, fix_layout) != 0) { defrag->total_failures++; ret = -1; goto out; @@ -4731,8 +4572,6 @@ gf_defrag_status_get(dht_conf_t *conf, dict_t *dict) uint64_t lookup = 0; uint64_t failures = 0; uint64_t skipped = 0; - uint64_t promoted = 0; - uint64_t demoted = 0; char *status = ""; double elapsed = 0; uint64_t time_to_complete = 0; @@ -4751,15 +4590,12 @@ gf_defrag_status_get(dht_conf_t *conf, dict_t *dict) lookup = defrag->num_files_lookedup; failures = defrag->total_failures; skipped = defrag->skipped; - promoted = defrag->total_files_promoted; - demoted = defrag->total_files_demoted; elapsed = gf_time() - defrag->start_time; /* The rebalance is still in progress */ - if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) && - (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED)) { + if (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED) { time_to_complete = gf_defrag_get_estimates_based_on_size(conf); if (time_to_complete && (time_to_complete > elapsed)) @@ -4774,14 +4610,6 @@ gf_defrag_status_get(dht_conf_t *conf, dict_t *dict) if (!dict) goto log; - ret = dict_set_uint64(dict, "promoted", promoted); - if (ret) - gf_log(THIS->name, GF_LOG_WARNING, "failed to set promoted count"); - - ret = dict_set_uint64(dict, "demoted", demoted); - if (ret) - gf_log(THIS->name, GF_LOG_WARNING, "failed to set demoted count"); - ret = dict_set_uint64(dict, "files", files); if (ret) gf_log(THIS->name, GF_LOG_WARNING, "failed to set file count"); diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c index 1b6571cd43c..3e24065227c 100644 --- a/xlators/cluster/dht/src/dht-selfheal.c +++ b/xlators/cluster/dht/src/dht-selfheal.c @@ -1271,10 +1271,6 @@ dht_selfheal_dir_mkdir_lock_cbk(call_frame_t *frame, void *cookie, local->call_cnt = conf->subvolume_cnt; if (op_ret < 0) { - /* We get this error when the directory entry was not created - * on a newky attached tier subvol. Hence proceed and do mkdir - * on the tier subvol. - */ if (op_errno == EINVAL) { local->call_cnt = 1; dht_selfheal_dir_mkdir_lookup_done(frame, this); @@ -1330,9 +1326,11 @@ dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout, int ret = -1; dht_local_t *local = NULL; xlator_t *this = NULL; + dht_conf_t *conf = NULL; local = frame->local; this = frame->this; + conf = this->private; local->selfheal.force_mkdir = force; local->selfheal.hole_cnt = 0; @@ -1372,15 +1370,44 @@ dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout, return 0; } - if (local->hashed_subvol == NULL) - local->hashed_subvol = dht_subvol_get_hashed(this, loc); + /* MDS xattr is populated only while DHT is having more than one + subvol.In case of graph switch while adding more dht subvols need to + consider hash subvol as a MDS to avoid MDS check failure at the time + of running fop on directory + */ + if (!dict_get(local->xattr, conf->mds_xattr_key) && + (conf->subvolume_cnt > 1)) { + if (local->hashed_subvol == NULL) { + local->hashed_subvol = dht_subvol_get_hashed(this, loc); + if (local->hashed_subvol == NULL) { + local->op_errno = EINVAL; + gf_smsg(this->name, GF_LOG_WARNING, local->op_errno, + DHT_MSG_HASHED_SUBVOL_GET_FAILED, "gfid=%s", + loc->pargfid, "name=%s", loc->name, "path=%s", + loc->path, NULL); + goto err; + } + } + ret = dht_inode_ctx_mdsvol_set(local->inode, this, + local->hashed_subvol); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED, + "Failed to set hashed subvol for %s on inode vol is %s", + local->loc.path, + local->hashed_subvol ? local->hashed_subvol->name : "NULL"); + goto err; + } + } if (local->hashed_subvol == NULL) { - local->op_errno = EINVAL; - gf_smsg(this->name, GF_LOG_WARNING, local->op_errno, - DHT_MSG_HASHED_SUBVOL_GET_FAILED, "gfid=%s", loc->pargfid, - "name=%s", loc->name, "path=%s", loc->path, NULL); - goto err; + local->hashed_subvol = dht_subvol_get_hashed(this, loc); + if (local->hashed_subvol == NULL) { + local->op_errno = EINVAL; + gf_smsg(this->name, GF_LOG_WARNING, local->op_errno, + DHT_MSG_HASHED_SUBVOL_GET_FAILED, "gfid=%s", loc->pargfid, + "name=%s", loc->name, "path=%s", loc->path, NULL); + goto err; + } } local->current = &local->lock[0]; diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c index 96caf40d0b1..bb72b0ffbb5 100644 --- a/xlators/cluster/dht/src/dht-shared.c +++ b/xlators/cluster/dht/src/dht-shared.c @@ -537,6 +537,8 @@ gf_defrag_pattern_list_fill(xlator_t *this, gf_defrag_info_t *defrag, pattern_str = strtok_r(data, ",", &tmp_str); while (pattern_str) { dup_str = gf_strdup(pattern_str); + if (!dup_str) + goto out; pattern_list = GF_CALLOC(1, sizeof(gf_defrag_pattern_list_t), 1); if (!pattern_list) { goto out; @@ -596,7 +598,6 @@ dht_init_methods(xlator_t *this) methods = &(conf->methods); methods->migration_get_dst_subvol = dht_migration_get_dst_subvol; - methods->migration_needed = dht_migration_needed; methods->migration_other = NULL; methods->layout_search = dht_layout_search; @@ -1045,84 +1046,6 @@ struct volume_options dht_options[] = { /* NUFA option */ {.key = {"local-volume-name"}, .type = GF_OPTION_TYPE_XLATOR}, - /* tier options */ - { - .key = {"tier-pause"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", - }, - - { - .key = {"tier-promote-frequency"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "120", - }, - - { - .key = {"tier-demote-frequency"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "3600", - }, - - { - .key = {"write-freq-threshold"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "0", - }, - - { - .key = {"read-freq-threshold"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "0", - }, - { - .key = {"watermark-hi"}, - .type = GF_OPTION_TYPE_PERCENT, - .default_value = "90", - }, - { - .key = {"watermark-low"}, - .type = GF_OPTION_TYPE_PERCENT, - .default_value = "75", - }, - { - .key = {"tier-mode"}, - .type = GF_OPTION_TYPE_STR, - .default_value = "test", - }, - { - .key = {"tier-compact"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", - }, - {.key = {"tier-hot-compact-frequency"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "604800", - .description = "Frequency to compact DBs on hot tier in system"}, - {.key = {"tier-cold-compact-frequency"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "604800", - .description = "Frequency to compact DBs on cold tier in system"}, - { - .key = {"tier-max-mb"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "4000", - }, - { - .key = {"tier-max-promote-file-size"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "0", - }, - { - .key = {"tier-max-files"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "10000", - }, - { - .key = {"tier-query-limit"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "100", - }, /* switch option */ {.key = {"pattern.switch.case"}, .type = GF_OPTION_TYPE_ANY}, diff --git a/xlators/cluster/dht/src/nufa.c b/xlators/cluster/dht/src/nufa.c index 59313639c45..3648a564840 100644 --- a/xlators/cluster/dht/src/nufa.c +++ b/xlators/cluster/dht/src/nufa.c @@ -595,7 +595,6 @@ nufa_init(xlator_t *this) dht_methods_t dht_methods = { .migration_get_dst_subvol = dht_migration_get_dst_subvol, - .migration_needed = dht_migration_needed, .layout_search = dht_layout_search, }; diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h index 8d2b7f051da..6c15a166f18 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h +++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h @@ -44,7 +44,8 @@ GLFS_MSGID(BITROT_STUB, BRS_MSG_NO_MEMORY, BRS_MSG_SET_EVENT_FAILED, BRS_MSG_NON_BITD_PID, BRS_MSG_SIGN_PREPARE_FAIL, BRS_MSG_USING_DEFAULT_THREAD_SIZE, BRS_MSG_ALLOC_MEM_FAILED, BRS_MSG_DICT_ALLOC_FAILED, BRS_MSG_CREATE_GF_DIRENT_FAILED, - BRS_MSG_ALLOC_FAILED, BRS_MSG_PATH_XATTR_GET_FAILED); + BRS_MSG_ALLOC_FAILED, BRS_MSG_PATH_XATTR_GET_FAILED, + BRS_MSG_VERSION_PREPARE_FAIL); #define BRS_MSG_MEM_ACNT_FAILED_STR "Memory accounting init failed" #define BRS_MSG_BAD_OBJ_THREAD_FAIL_STR "pthread_init failed" @@ -68,6 +69,8 @@ GLFS_MSGID(BITROT_STUB, BRS_MSG_NO_MEMORY, BRS_MSG_SET_EVENT_FAILED, "daemon. Unwinding the fop" #define BRS_MSG_SIGN_PREPARE_FAIL_STR \ "failed to prepare the signature. Unwinding the fop" +#define BRS_MSG_VERSION_PREPARE_FAIL_STR \ + "failed to prepare the version. Unwinding the fop" #define BRS_MSG_STUB_ALLOC_FAILED_STR "failed to allocate stub fop, Unwinding" #define BRS_MSG_BAD_OBJ_MARK_FAIL_STR "failed to mark object as bad" #define BRS_MSG_NON_SCRUB_BAD_OBJ_MARK_STR \ diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c index 605a5e4c3e4..447dd47ff41 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c +++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c @@ -424,8 +424,8 @@ br_stub_prepare_version_request(xlator_t *this, dict_t *dict, priv = this->private; br_set_ongoingversion(obuf, oversion, priv->boot); - return dict_set_static_bin(dict, BITROT_CURRENT_VERSION_KEY, (void *)obuf, - sizeof(br_version_t)); + return dict_set_bin(dict, BITROT_CURRENT_VERSION_KEY, (void *)obuf, + sizeof(br_version_t)); } static int @@ -436,8 +436,7 @@ br_stub_prepare_signing_request(dict_t *dict, br_signature_t *sbuf, br_set_signature(sbuf, sign, signaturelen, &size); - return dict_set_static_bin(dict, BITROT_SIGNING_VERSION_KEY, (void *)sbuf, - size); + return dict_set_bin(dict, BITROT_SIGNING_VERSION_KEY, (void *)sbuf, size); } /** @@ -854,23 +853,27 @@ br_stub_perform_incversioning(xlator_t *this, call_frame_t *frame, op_errno = ENOMEM; dict = dict_new(); if (!dict) - goto done; + goto out; ret = br_stub_alloc_versions(&obuf, NULL, 0); - if (ret) - goto dealloc_dict; + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ALLOC_MEM_FAILED, + "gfid=%s", uuid_utoa(fd->inode->gfid), NULL); + goto out; + } ret = br_stub_prepare_version_request(this, dict, obuf, writeback_version); - if (ret) - goto dealloc_versions; + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_VERSION_PREPARE_FAIL, + "gfid=%s", uuid_utoa(fd->inode->gfid), NULL); + br_stub_dealloc_versions(obuf); + goto out; + } ret = br_stub_fd_versioning( this, frame, stub, dict, fd, br_stub_fd_incversioning_cbk, writeback_version, BR_STUB_INCREMENTAL_VERSIONING, !WRITEBACK_DURABLE); - -dealloc_versions: - br_stub_dealloc_versions(obuf); -dealloc_dict: - dict_unref(dict); -done: +out: + if (dict) + dict_unref(dict); if (ret) { if (local) frame->local = NULL; @@ -1025,31 +1028,36 @@ static int br_stub_prepare_signature(xlator_t *this, dict_t *dict, inode_t *inode, br_isignature_t *sign, int *fakesuccess) { - int32_t ret = 0; + int32_t ret = -1; size_t signaturelen = 0; br_signature_t *sbuf = NULL; if (!br_is_signature_type_valid(sign->signaturetype)) - goto error_return; + goto out; signaturelen = sign->signaturelen; ret = br_stub_alloc_versions(NULL, &sbuf, signaturelen); - if (ret) - goto error_return; + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ALLOC_MEM_FAILED, + "gfid=%s", uuid_utoa(inode->gfid), NULL); + ret = -1; + goto out; + } ret = br_stub_prepare_signing_request(dict, sbuf, sign, signaturelen); - if (ret) - goto dealloc_versions; + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_SIGN_PREPARE_FAIL, + "gfid=%s", uuid_utoa(inode->gfid), NULL); + ret = -1; + br_stub_dealloc_versions(sbuf); + goto out; + } + /* At this point sbuf has been added to dict, so the memory will be freed + * when the data from the dict is destroyed + */ ret = br_stub_compare_sign_version(this, inode, sbuf, dict, fakesuccess); - if (ret) - goto dealloc_versions; - - return 0; - -dealloc_versions: - br_stub_dealloc_versions(sbuf); -error_return: - return -1; +out: + return ret; } static void diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c index cda02dadc2c..cf0ae4c57dd 100644 --- a/xlators/features/locks/src/posix.c +++ b/xlators/features/locks/src/posix.c @@ -494,6 +494,9 @@ pl_inodelk_xattr_fill_multiple(dict_t *this, char *key, data_t *value, char *save_ptr = NULL; tmp_key = gf_strdup(key); + if (!tmp_key) + return -1; + strtok_r(tmp_key, ":", &save_ptr); if (!*save_ptr) { if (tmp_key) diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c index de932826c90..d94dceb10b7 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.c +++ b/xlators/mgmt/glusterd/src/glusterd-store.c @@ -74,7 +74,7 @@ glusterd_replace_slash_with_hyphen(char *str) while (ptr) { *ptr = '-'; - ptr = strchr(str, '/'); + ptr = strchr(ptr, '/'); } } diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index b96838e3367..90ef2cf4c9c 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -2220,7 +2220,10 @@ retry: if (wait) { synclock_unlock(&priv->big_lock); + errno = 0; ret = runner_run(&runner); + if (errno != 0) + ret = errno; synclock_lock(&priv->big_lock); if (ret == EADDRINUSE) { @@ -6106,7 +6109,6 @@ send_attach_req(xlator_t *this, struct rpc_clnt *rpc, char *path, GF_ATOMIC_INC(conf->blockers); ret = rpc_clnt_submit(rpc, &gd_brick_prog, op, cbkfn, &iov, 1, NULL, 0, iobref, frame, NULL, 0, NULL, 0, NULL); - return ret; free_iobref: iobref_unref(iobref); @@ -6115,7 +6117,7 @@ maybe_free_iobuf: iobuf_unref(iobuf); } err: - return -1; + return ret; } extern size_t @@ -14818,6 +14820,10 @@ glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type, i = 0; ai_list_tmp1 = cds_list_entry(ai_list->list.next, addrinfo_list_t, list); + if (*brick_count < sub_count) { + sub_count = *brick_count; + } + /* Check for bad brick order */ while (i < *brick_count) { ++i; diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index 4e5712e6447..8d6fb5e0fac 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -3810,6 +3810,38 @@ out: } static int +set_volfile_id_option(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + int clusters) +{ + xlator_t *xlator = NULL; + int i = 0; + int ret = -1; + glusterd_conf_t *conf = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, out); + + if (conf->op_version < GD_OP_VERSION_9_0) + return 0; + xlator = first_of(graph); + + for (i = 0; i < clusters; i++) { + ret = xlator_set_fixed_option(xlator, "volume-id", + uuid_utoa(volinfo->volume_id)); + if (ret) + goto out; + + xlator = xlator->next; + } + +out: + return ret; +} + +static int volgen_graph_build_afr_clusters(volgen_graph_t *graph, glusterd_volinfo_t *volinfo) { @@ -3851,6 +3883,13 @@ volgen_graph_build_afr_clusters(volgen_graph_t *graph, clusters = -1; goto out; } + + ret = set_volfile_id_option(graph, volinfo, clusters); + if (ret) { + clusters = -1; + goto out; + } + if (!volinfo->arbiter_count && !volinfo->thin_arbiter_count) goto out; diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index f74876eec9f..398b4d76f52 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -3138,4 +3138,9 @@ struct volopt_map_entry glusterd_volopt_map[] = { .type = NO_DOC, }, + {.key = "cluster.use-anonymous-inode", + .voltype = "cluster/replicate", + .op_version = GD_OP_VERSION_9_0, + .value = "yes", + .flags = VOLOPT_FLAG_CLIENT_OPT}, {.key = NULL}}; diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c index 1cf55143e24..0e22fe411ee 100644 --- a/xlators/mount/fuse/src/fuse-bridge.c +++ b/xlators/mount/fuse/src/fuse-bridge.c @@ -5899,7 +5899,9 @@ fuse_graph_sync(xlator_t *this) new_graph_id = priv->next_graph->id; priv->next_graph = NULL; need_first_lookup = 1; - priv->handle_graph_switch = _gf_true; + if (old_subvol) { + priv->handle_graph_switch = _gf_true; + } while (!priv->event_recvd) { ret = pthread_cond_wait(&priv->sync_cond, &priv->sync_mutex); @@ -5935,13 +5937,6 @@ unlock: if (winds_on_old_subvol == 0) { xlator_notify(old_subvol, GF_EVENT_PARENT_DOWN, old_subvol, NULL); } - } else { - pthread_mutex_lock(&priv->sync_mutex); - { - priv->handle_graph_switch = _gf_false; - pthread_cond_broadcast(&priv->migrate_cond); - } - pthread_mutex_unlock(&priv->sync_mutex); } return 0; |