diff options
Diffstat (limited to 'xlators')
166 files changed, 9539 insertions, 4759 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 4ee83659c6e..032ab5c8001 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -45,7 +45,42 @@ afr_quorum_errno(afr_private_t *priv) return ENOTCONN; } -static void +gf_boolean_t +afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name, + pid_t pid) +{ + if (!__is_root_gfid(pargfid)) { + return _gf_false; + } + + if (strcmp(name, GF_REPLICATE_TRASH_DIR) == 0) { + /*For backward compatibility /.landfill is private*/ + return _gf_true; + } + + if (pid == GF_CLIENT_PID_GSYNCD) { + /*geo-rep needs to create/sync private directory on slave because + * it appears in changelog*/ + return _gf_false; + } + + if (pid == GF_CLIENT_PID_GLFS_HEAL || pid == GF_CLIENT_PID_SELF_HEALD) { + if (strcmp(name, priv->anon_inode_name) == 0) { + /* anonymous-inode dir is private*/ + return _gf_true; + } + } else { + if (strncmp(name, AFR_ANON_DIR_PREFIX, strlen(AFR_ANON_DIR_PREFIX)) == + 0) { + /* anonymous-inode dir prefix is private for geo-rep to work*/ + return _gf_true; + } + } + + return _gf_false; +} + +void afr_fill_success_replies(afr_local_t *local, afr_private_t *priv, unsigned char *replies) { @@ -885,7 +920,7 @@ __afr_set_in_flight_sb_status(xlator_t *this, afr_local_t *local, metadatamap |= (1 << index); } if (metadatamap_old != metadatamap) { - event = 0; + __afr_inode_need_refresh_set(inode, this); } break; @@ -898,7 +933,7 @@ __afr_set_in_flight_sb_status(xlator_t *this, afr_local_t *local, datamap |= (1 << index); } if (datamap_old != datamap) - event = 0; + __afr_inode_need_refresh_set(inode, this); break; default: @@ -1062,34 +1097,6 @@ out: } int -__afr_inode_event_gen_reset_small(inode_t *inode, xlator_t *this) -{ - int ret = -1; - uint16_t datamap = 0; - uint16_t metadatamap = 0; - uint32_t event = 0; - uint64_t val = 0; - afr_inode_ctx_t *ctx = NULL; - - ret = __afr_inode_ctx_get(this, inode, &ctx); - if (ret) - return ret; - - val = ctx->read_subvol; - - metadatamap = (val & 0x000000000000ffff) >> 0; - datamap = (val & 0x00000000ffff0000) >> 16; - event = 0; - - val = ((uint64_t)metadatamap) | (((uint64_t)datamap) << 16) | - (((uint64_t)event) << 32); - - ctx->read_subvol = val; - - return ret; -} - -int __afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data, unsigned char *metadata, int *event_p) { @@ -1160,22 +1167,6 @@ out: } int -__afr_inode_event_gen_reset(inode_t *inode, xlator_t *this) -{ - afr_private_t *priv = NULL; - int ret = -1; - - priv = this->private; - - if (priv->child_count <= 16) - ret = __afr_inode_event_gen_reset_small(inode, this); - else - ret = -1; - - return ret; -} - -int afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data, unsigned char *metadata, int *event_p) { @@ -1241,12 +1232,11 @@ afr_inode_get_readable(call_frame_t *frame, inode_t *inode, xlator_t *this, return 0; } -int +static int afr_inode_split_brain_choice_get(inode_t *inode, xlator_t *this, int *spb_choice) { int ret = -1; - GF_VALIDATE_OR_GOTO(this->name, inode, out); LOCK(&inode->lock); @@ -1258,6 +1248,40 @@ out: return ret; } +/* + * frame is used to get the favourite policy. Since + * afr_inode_split_brain_choice_get was called with afr_open, it is possible to + * have a frame with out local->replies. So in that case, frame is passed as + * null, hence this function will handle the frame NULL case. + */ +int +afr_split_brain_read_subvol_get(inode_t *inode, xlator_t *this, + call_frame_t *frame, int *spb_subvol) +{ + int ret = -1; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + GF_VALIDATE_OR_GOTO("afr", this, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + GF_VALIDATE_OR_GOTO(this->name, inode, out); + GF_VALIDATE_OR_GOTO(this->name, spb_subvol, out); + + priv = this->private; + + ret = afr_inode_split_brain_choice_get(inode, this, spb_subvol); + if (*spb_subvol < 0 && priv->fav_child_policy && frame && frame->local) { + local = frame->local; + *spb_subvol = afr_sh_get_fav_by_policy(this, local->replies, inode, + NULL); + if (*spb_subvol >= 0) { + ret = 0; + } + } + +out: + return ret; +} int afr_inode_read_subvol_set(inode_t *inode, xlator_t *this, unsigned char *data, unsigned char *metadata, int event) @@ -1324,30 +1348,22 @@ out: return need_refresh; } -static int -afr_inode_need_refresh_set(inode_t *inode, xlator_t *this) +int +__afr_inode_need_refresh_set(inode_t *inode, xlator_t *this) { int ret = -1; afr_inode_ctx_t *ctx = NULL; - GF_VALIDATE_OR_GOTO(this->name, inode, out); - - LOCK(&inode->lock); - { - ret = __afr_inode_ctx_get(this, inode, &ctx); - if (ret) - goto unlock; - + ret = __afr_inode_ctx_get(this, inode, &ctx); + if (ret == 0) { ctx->need_refresh = _gf_true; } -unlock: - UNLOCK(&inode->lock); -out: + return ret; } int -afr_inode_event_gen_reset(inode_t *inode, xlator_t *this) +afr_inode_need_refresh_set(inode_t *inode, xlator_t *this) { int ret = -1; @@ -1355,7 +1371,7 @@ afr_inode_event_gen_reset(inode_t *inode, xlator_t *this) LOCK(&inode->lock); { - ret = __afr_inode_event_gen_reset(inode, this); + ret = __afr_inode_need_refresh_set(inode, this); } UNLOCK(&inode->lock); out: @@ -1790,7 +1806,7 @@ afr_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err) ret = afr_inode_get_readable(frame, inode, this, local->readable, &event_generation, local->transaction.type); - if (ret == -EIO || (local->is_read_txn && !event_generation)) { + if (ret == -EIO) { /* No readable subvolume even after refresh ==> splitbrain.*/ if (!priv->fav_child_policy) { err = EIO; @@ -2290,8 +2306,9 @@ afr_hash_child(afr_read_subvol_args_t *args, afr_private_t *priv, * need is a low probability that multiple clients * won't converge on the same subvolume. */ + gf_uuid_copy(gfid_copy, args->gfid); pid = getpid(); - memcpy(gfid_copy, &pid, sizeof(pid)); + *(pid_t *)gfid_copy ^= pid; } child = SuperFastHash((char *)gfid_copy, sizeof(gfid_copy)) % priv->child_count; @@ -2875,7 +2892,7 @@ afr_attempt_readsubvol_set(call_frame_t *frame, xlator_t *this, { afr_private_t *priv = NULL; afr_local_t *local = NULL; - int spb_choice = -1; + int spb_subvol = -1; int child_count = -1; if (*read_subvol != -1) @@ -2885,10 +2902,10 @@ afr_attempt_readsubvol_set(call_frame_t *frame, xlator_t *this, local = frame->local; child_count = priv->child_count; - afr_inode_split_brain_choice_get(local->inode, this, &spb_choice); - if ((spb_choice >= 0) && + afr_split_brain_read_subvol_get(local->inode, this, frame, &spb_subvol); + if ((spb_subvol >= 0) && (AFR_COUNT(success_replies, child_count) == child_count)) { - *read_subvol = spb_choice; + *read_subvol = spb_subvol; } else if (!priv->quorum_count || frame->root->pid == GF_CLIENT_PID_GLFS_HEAL) { *read_subvol = afr_first_up_child(frame, this); @@ -2929,6 +2946,7 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this) 0, }; gf_boolean_t locked_entry = _gf_false; + gf_boolean_t in_flight_create = _gf_false; gf_boolean_t can_interpret = _gf_true; inode_t *parent = NULL; ia_type_t ia_type = IA_INVAL; @@ -2972,17 +2990,12 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this) if (!replies[i].valid) continue; - if (locked_entry && replies[i].op_ret == -1 && - replies[i].op_errno == ENOENT) { - /* Second, check entry is still - "underway" in creation */ - local->op_ret = -1; - local->op_errno = ENOENT; - goto error; - } - - if (replies[i].op_ret == -1) + if (replies[i].op_ret == -1) { + if (locked_entry && replies[i].op_errno == ENOENT) { + in_flight_create = _gf_true; + } continue; + } if (read_subvol == -1 || !readable[read_subvol]) { read_subvol = i; @@ -2992,6 +3005,12 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this) } } + if (in_flight_create && !afr_has_quorum(success_replies, this, NULL)) { + local->op_ret = -1; + local->op_errno = ENOENT; + goto error; + } + if (read_subvol == -1) goto error; /* We now have a read_subvol, which is readable[] (if there @@ -3050,7 +3069,7 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this) if (read_subvol == -1) goto cant_interpret; if (ret) { - afr_inode_event_gen_reset(local->inode, this); + afr_inode_need_refresh_set(local->inode, this); dict_del_sizen(local->replies[read_subvol].xdata, GF_CONTENT_KEY); } } else { @@ -3103,7 +3122,7 @@ error: * others in that they must be given higher priority while * returning to the user. * - * The hierarchy is ENODATA > ENOENT > ESTALE > others + * The hierarchy is ENODATA > ENOENT > ESTALE > ENOSPC others */ int @@ -3115,6 +3134,8 @@ afr_higher_errno(int32_t old_errno, int32_t new_errno) return ENOENT; if (old_errno == ESTALE || new_errno == ESTALE) return ESTALE; + if (old_errno == ENOSPC || new_errno == ENOSPC) + return ENOSPC; return new_errno; } @@ -3606,6 +3627,7 @@ afr_discover_unwind(call_frame_t *frame, xlator_t *this) afr_private_t *priv = NULL; afr_local_t *local = NULL; int read_subvol = -1; + int ret = 0; unsigned char *data_readable = NULL; unsigned char *success_replies = NULL; @@ -3627,7 +3649,10 @@ afr_discover_unwind(call_frame_t *frame, xlator_t *this) if (!afr_has_quorum(success_replies, this, frame)) goto unwind; - afr_replies_interpret(frame, this, local->inode, NULL); + ret = afr_replies_interpret(frame, this, local->inode, NULL); + if (ret) { + afr_inode_need_refresh_set(local->inode, this); + } read_subvol = afr_read_subvol_decide(local->inode, this, NULL, data_readable); @@ -3679,7 +3704,7 @@ afr_ta_id_file_check(void *opaque) this = opaque; priv = this->private; - ret = afr_fill_ta_loc(this, &loc); + ret = afr_fill_ta_loc(this, &loc, _gf_false); if (ret) { gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, "Failed to populate thin-arbiter loc for: %s.", loc.name); @@ -3888,11 +3913,7 @@ afr_discover(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) afr_read_subvol_get(loc->inode, this, NULL, NULL, &event, AFR_DATA_TRANSACTION, NULL); - if (afr_is_inode_refresh_reqd(loc->inode, this, event, - local->event_generation)) - afr_inode_refresh(frame, this, loc->inode, NULL, afr_discover_do); - else - afr_discover_do(frame, this, 0); + afr_discover_do(frame, this, 0); return 0; out: @@ -3993,11 +4014,10 @@ afr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) return 0; } - if (__is_root_gfid(loc->parent->gfid)) { - if (!strcmp(loc->name, GF_REPLICATE_TRASH_DIR)) { - op_errno = EPERM; - goto out; - } + if (afr_is_private_directory(this->private, loc->parent->gfid, loc->name, + frame->root->pid)) { + op_errno = EPERM; + goto out; } local = AFR_FRAME_INIT(frame, op_errno); @@ -4033,11 +4053,7 @@ afr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) afr_read_subvol_get(loc->parent, this, NULL, NULL, &event, AFR_DATA_TRANSACTION, NULL); - if (afr_is_inode_refresh_reqd(loc->inode, this, event, - local->event_generation)) - afr_inode_refresh(frame, this, loc->parent, NULL, afr_lookup_do); - else - afr_lookup_do(frame, this, 0); + afr_lookup_do(frame, this, 0); return 0; out: @@ -5679,6 +5695,7 @@ afr_priv_dump(xlator_t *this) priv->background_self_heal_count); gf_proc_dump_write("healers", "%d", priv->healers); gf_proc_dump_write("read-hash-mode", "%d", priv->hash_mode); + gf_proc_dump_write("use-anonymous-inode", "%d", priv->use_anon_inode); if (priv->quorum_count == AFR_QUORUM_AUTO) { gf_proc_dump_write("quorum-type", "auto"); } else if (priv->quorum_count == 0) { @@ -6655,6 +6672,8 @@ afr_priv_destroy(afr_private_t *priv) if (!priv) goto out; + + GF_FREE(priv->sh_domain); GF_FREE(priv->last_event); child_count = priv->child_count; @@ -6670,6 +6689,7 @@ afr_priv_destroy(afr_private_t *priv) GF_FREE(priv->local); GF_FREE(priv->pending_key); GF_FREE(priv->children); + GF_FREE(priv->anon_inode); GF_FREE(priv->child_up); GF_FREE(priv->halo_child_up); GF_FREE(priv->child_latency); diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c index 74f71fdc76a..f8bf8340dab 100644 --- a/xlators/cluster/afr/src/afr-dir-read.c +++ b/xlators/cluster/afr/src/afr-dir-read.c @@ -67,7 +67,8 @@ afr_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, } int -afr_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd) +afr_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, + dict_t *xdata) { afr_private_t *priv = NULL; afr_local_t *local = NULL; @@ -163,8 +164,8 @@ afr_validate_read_subvol(inode_t *inode, xlator_t *this, int par_read_subvol) } static void -afr_readdir_transform_entries(gf_dirent_t *subvol_entries, int subvol, - gf_dirent_t *entries, fd_t *fd) +afr_readdir_transform_entries(call_frame_t *frame, gf_dirent_t *subvol_entries, + int subvol, gf_dirent_t *entries, fd_t *fd) { int ret = -1; gf_dirent_t *entry = NULL; @@ -182,8 +183,8 @@ afr_readdir_transform_entries(gf_dirent_t *subvol_entries, int subvol, list_for_each_entry_safe(entry, tmp, &subvol_entries->list, list) { - if (__is_root_gfid(fd->inode->gfid) && - !strcmp(entry->d_name, GF_REPLICATE_TRASH_DIR)) { + if (afr_is_private_directory(priv, fd->inode->gfid, entry->d_name, + frame->root->pid)) { continue; } @@ -227,8 +228,8 @@ afr_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, } if (op_ret >= 0) - afr_readdir_transform_entries(subvol_entries, (long)cookie, &entries, - local->fd); + afr_readdir_transform_entries(frame, subvol_entries, (long)cookie, + &entries, local->fd); AFR_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, xdata); diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c index e96b7d0798e..b7cceb79158 100644 --- a/xlators/cluster/afr/src/afr-dir-write.c +++ b/xlators/cluster/afr/src/afr-dir-write.c @@ -119,11 +119,11 @@ __afr_dir_write_finalize(call_frame_t *frame, xlator_t *this) continue; if (local->replies[i].op_ret < 0) { if (local->inode) - afr_inode_event_gen_reset(local->inode, this); + afr_inode_need_refresh_set(local->inode, this); if (local->parent) - afr_inode_event_gen_reset(local->parent, this); + afr_inode_need_refresh_set(local->parent, this); if (local->parent2) - afr_inode_event_gen_reset(local->parent2, this); + afr_inode_need_refresh_set(local->parent2, this); continue; } @@ -345,6 +345,7 @@ afr_mark_entry_pending_changelog(call_frame_t *frame, xlator_t *this) afr_private_t *priv = NULL; int pre_op_count = 0; int failed_count = 0; + unsigned char *success_replies = NULL; local = frame->local; priv = this->private; @@ -360,9 +361,16 @@ afr_mark_entry_pending_changelog(call_frame_t *frame, xlator_t *this) failed_count = AFR_COUNT(local->transaction.failed_subvols, priv->child_count); + /* FOP succeeded on all bricks. */ if (pre_op_count == priv->child_count && !failed_count) return; + /* FOP did not suceed on quorum no. of bricks. */ + success_replies = alloca0(priv->child_count); + afr_fill_success_replies(local, priv, success_replies); + if (!afr_has_quorum(success_replies, this, NULL)) + return; + if (priv->thin_arbiter_count) { /*Mark new entry using ta file*/ local->is_new_entry = _gf_true; diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c index c01b4131d58..1d6e4f3570a 100644 --- a/xlators/cluster/afr/src/afr-inode-write.c +++ b/xlators/cluster/afr/src/afr-inode-write.c @@ -2506,6 +2506,7 @@ afr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, call_frame_t *transaction_frame = NULL; int ret = -1; int32_t op_errno = ENOMEM; + int8_t last_fsync = 0; AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out); transaction_frame = copy_frame(frame); @@ -2516,10 +2517,16 @@ afr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, if (!local) goto out; - if (xdata) + if (xdata) { local->xdata_req = dict_copy_with_ref(xdata, NULL); - else + if (dict_get_int8(xdata, "last-fsync", &last_fsync) == 0) { + if (last_fsync) { + local->transaction.disable_delayed_post_op = _gf_true; + } + } + } else { local->xdata_req = dict_new(); + } if (!local->xdata_req) goto out; diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c index a5b004f4258..64856042b65 100644 --- a/xlators/cluster/afr/src/afr-open.c +++ b/xlators/cluster/afr/src/afr-open.c @@ -137,7 +137,7 @@ afr_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, { afr_private_t *priv = NULL; afr_local_t *local = NULL; - int spb_choice = 0; + int spb_subvol = 0; int event_generation = 0; int ret = 0; int32_t op_errno = 0; @@ -179,9 +179,9 @@ afr_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, ret = afr_inode_get_readable(frame, local->inode, this, NULL, &event_generation, AFR_DATA_TRANSACTION); if ((ret < 0) && - (afr_inode_split_brain_choice_get(local->inode, this, &spb_choice) == - 0) && - spb_choice < 0) { + (afr_split_brain_read_subvol_get(local->inode, this, NULL, + &spb_subvol) == 0) && + spb_subvol < 0) { afr_inode_refresh(frame, this, local->inode, local->inode->gfid, afr_open_continue); } else { diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c index 772b59f9a2f..6fc2c75145c 100644 --- a/xlators/cluster/afr/src/afr-read-txn.c +++ b/xlators/cluster/afr/src/afr-read-txn.c @@ -164,7 +164,7 @@ afr_ta_read_txn(void *opaque) xdata_rsp = NULL; /* It doesn't. So query thin-arbiter to see if it blames any data brick. */ - ret = afr_fill_ta_loc(this, &loc); + ret = afr_fill_ta_loc(this, &loc, _gf_true); if (ret) { gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, "Failed to populate thin-arbiter loc for: %s.", loc.name); @@ -272,7 +272,7 @@ afr_read_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err) int read_subvol = -1; inode_t *inode = NULL; int ret = -1; - int spb_choice = -1; + int spb_subvol = -1; local = frame->local; inode = local->inode; @@ -303,9 +303,9 @@ afr_read_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err) local->read_attempted[read_subvol] = 1; readfn: if (read_subvol == -1) { - ret = afr_inode_split_brain_choice_get(inode, this, &spb_choice); - if ((ret == 0) && spb_choice >= 0) - read_subvol = spb_choice; + ret = afr_split_brain_read_subvol_get(inode, this, frame, &spb_subvol); + if ((ret == 0) && spb_subvol >= 0) + read_subvol = spb_subvol; } if (read_subvol == -1) { diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index fdec66340ba..a580a1584cc 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -140,7 +140,7 @@ heal: } } out: - if (gfid_idx && (*gfid_idx == -1) && (ret == 0)) { + if (gfid_idx && (*gfid_idx == -1) && (ret == 0) && local) { ret = -afr_final_errno(local, priv); } loc_wipe(&loc); @@ -1909,7 +1909,8 @@ afr_selfheal_unlocked_discover(call_frame_t *frame, inode_t *inode, uuid_t gfid, dict_t *dict = NULL; local = frame->local; - if (local && local->xattr_req) + + if (local->xattr_req) dict = local->xattr_req; return afr_selfheal_unlocked_discover_on(frame, inode, gfid, replies, @@ -2749,3 +2750,185 @@ afr_choose_source_by_policy(afr_private_t *priv, unsigned char *sources, out: return source; } + +static int +afr_anon_inode_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + afr_local_t *local = frame->local; + int i = (long)cookie; + + local->replies[i].valid = 1; + local->replies[i].op_ret = op_ret; + local->replies[i].op_errno = op_errno; + if (op_ret == 0) { + local->op_ret = 0; + local->replies[i].poststat = *buf; + local->replies[i].preparent = *preparent; + local->replies[i].postparent = *postparent; + } + if (xdata) { + local->replies[i].xdata = dict_ref(xdata); + } + + syncbarrier_wake(&local->barrier); + return 0; +} + +int +afr_anon_inode_create(xlator_t *this, int child, inode_t **linked_inode) +{ + call_frame_t *frame = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = this->private; + unsigned char *mkdir_on = alloca0(priv->child_count); + unsigned char *lookup_on = alloca0(priv->child_count); + loc_t loc = {0}; + int32_t op_errno = 0; + int32_t child_op_errno = 0; + struct iatt iatt = {0}; + dict_t *xdata = NULL; + uuid_t anon_inode_gfid = {0}; + int mkdir_count = 0; + int i = 0; + + /*Try to mkdir everywhere and return success if the dir exists on 'child' + */ + + if (!priv->use_anon_inode) { + op_errno = EINVAL; + goto out; + } + + frame = afr_frame_create(this, &op_errno); + if (op_errno) { + goto out; + } + local = frame->local; + if (!local->child_up[child]) { + /*Other bricks may need mkdir so don't error out yet*/ + child_op_errno = ENOTCONN; + } + gf_uuid_parse(priv->anon_gfid_str, anon_inode_gfid); + for (i = 0; i < priv->child_count; i++) { + if (!local->child_up[i]) + continue; + + if (priv->anon_inode[i]) { + mkdir_on[i] = 0; + } else { + mkdir_on[i] = 1; + mkdir_count++; + } + } + + if (mkdir_count == 0) { + *linked_inode = inode_find(this->itable, anon_inode_gfid); + if (*linked_inode) { + op_errno = 0; + goto out; + } + } + + loc.parent = inode_ref(this->itable->root); + loc.name = priv->anon_inode_name; + loc.inode = inode_new(this->itable); + if (!loc.inode) { + op_errno = ENOMEM; + goto out; + } + + xdata = dict_new(); + if (!xdata) { + op_errno = ENOMEM; + goto out; + } + + op_errno = -dict_set_gfuuid(xdata, "gfid-req", anon_inode_gfid, _gf_true); + if (op_errno) { + goto out; + } + + if (mkdir_count == 0) { + memcpy(lookup_on, local->child_up, priv->child_count); + goto lookup; + } + + AFR_ONLIST(mkdir_on, frame, afr_anon_inode_mkdir_cbk, mkdir, &loc, 0755, 0, + xdata); + + for (i = 0; i < priv->child_count; i++) { + if (!mkdir_on[i]) { + continue; + } + + if (local->replies[i].op_ret == 0) { + priv->anon_inode[i] = 1; + iatt = local->replies[i].poststat; + } else if (local->replies[i].op_ret < 0 && + local->replies[i].op_errno == EEXIST) { + lookup_on[i] = 1; + } else if (i == child) { + child_op_errno = local->replies[i].op_errno; + } + } + + if (AFR_COUNT(lookup_on, priv->child_count) == 0) { + goto link; + } + +lookup: + AFR_ONLIST(lookup_on, frame, afr_selfheal_discover_cbk, lookup, &loc, + xdata); + for (i = 0; i < priv->child_count; i++) { + if (!lookup_on[i]) { + continue; + } + + if (local->replies[i].op_ret == 0) { + if (gf_uuid_compare(anon_inode_gfid, + local->replies[i].poststat.ia_gfid) == 0) { + priv->anon_inode[i] = 1; + iatt = local->replies[i].poststat; + } else { + if (i == child) + child_op_errno = EINVAL; + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_DATA, + "%s has gfid: %s", priv->anon_inode_name, + uuid_utoa(local->replies[i].poststat.ia_gfid)); + } + } else if (i == child) { + child_op_errno = local->replies[i].op_errno; + } + } +link: + if (!gf_uuid_is_null(iatt.ia_gfid)) { + *linked_inode = inode_link(loc.inode, loc.parent, loc.name, &iatt); + if (*linked_inode) { + op_errno = 0; + inode_lookup(*linked_inode); + } else { + op_errno = ENOMEM; + } + goto out; + } + +out: + if (xdata) + dict_unref(xdata); + loc_wipe(&loc); + /*child_op_errno takes precedence*/ + if (child_op_errno == 0) { + child_op_errno = op_errno; + } + + if (child_op_errno && *linked_inode) { + inode_unref(*linked_inode); + *linked_inode = NULL; + } + if (frame) + AFR_STACK_DESTROY(frame); + return -child_op_errno; +} diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index ac31751997f..64893f441e3 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -16,54 +16,170 @@ #include <glusterfs/syncop-utils.h> #include <glusterfs/events.h> -static int -afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name, - inode_t *inode, int child, struct afr_reply *replies) +int +afr_selfheal_entry_anon_inode(xlator_t *this, inode_t *dir, const char *name, + inode_t *inode, int child, + struct afr_reply *replies, + gf_boolean_t *anon_inode) { afr_private_t *priv = NULL; + afr_local_t *local = NULL; xlator_t *subvol = NULL; int ret = 0; + int i = 0; + char g[64] = {0}; + unsigned char *lookup_success = NULL; + call_frame_t *frame = NULL; + loc_t loc2 = { + 0, + }; loc_t loc = { 0, }; - char g[64]; priv = this->private; - subvol = priv->children[child]; + lookup_success = alloca0(priv->child_count); + uuid_utoa_r(replies[child].poststat.ia_gfid, g); + loc.inode = inode_new(inode->table); + if (!loc.inode) { + ret = -ENOMEM; + goto out; + } + + if (replies[child].poststat.ia_type == IA_IFDIR) { + /* This directory may have sub-directory hierarchy which may need to + * be preserved for subsequent heals. So unconditionally move the + * directory to anonymous-inode directory*/ + *anon_inode = _gf_true; + goto anon_inode; + } + + frame = afr_frame_create(this, &ret); + if (!frame) { + ret = -ret; + goto out; + } + local = frame->local; + gf_uuid_copy(loc.gfid, replies[child].poststat.ia_gfid); + AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, &loc, + NULL); + for (i = 0; i < priv->child_count; i++) { + if (local->replies[i].op_ret == 0) { + lookup_success[i] = 1; + } else if (local->replies[i].op_errno != ENOENT && + local->replies[i].op_errno != ESTALE) { + ret = -local->replies[i].op_errno; + } + } + + if (priv->quorum_count) { + if (afr_has_quorum(lookup_success, this, NULL)) { + *anon_inode = _gf_true; + } + } else if (AFR_COUNT(lookup_success, priv->child_count) > 1) { + *anon_inode = _gf_true; + } else if (ret) { + goto out; + } + +anon_inode: + if (!*anon_inode) { + ret = 0; + goto out; + } loc.parent = inode_ref(dir); gf_uuid_copy(loc.pargfid, dir->gfid); loc.name = name; - loc.inode = inode_ref(inode); - if (replies[child].valid && replies[child].op_ret == 0) { - switch (replies[child].poststat.ia_type) { - case IA_IFDIR: - gf_msg(this->name, GF_LOG_WARNING, 0, - AFR_MSG_EXPUNGING_FILE_OR_DIR, - "expunging dir %s/%s (%s) on %s", uuid_utoa(dir->gfid), - name, uuid_utoa_r(replies[child].poststat.ia_gfid, g), - subvol->name); - ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL); - break; - default: - gf_msg(this->name, GF_LOG_WARNING, 0, - AFR_MSG_EXPUNGING_FILE_OR_DIR, - "expunging file %s/%s (%s) on %s", uuid_utoa(dir->gfid), - name, uuid_utoa_r(replies[child].poststat.ia_gfid, g), - subvol->name); - ret = syncop_unlink(subvol, &loc, NULL, NULL); - break; - } + ret = afr_anon_inode_create(this, child, &loc2.parent); + if (ret < 0) + goto out; + + loc2.name = g; + ret = syncop_rename(subvol, &loc, &loc2, NULL, NULL); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_EXPUNGING_FILE_OR_DIR, + "Rename to %s dir %s/%s (%s) on %s failed", + priv->anon_inode_name, uuid_utoa(dir->gfid), name, g, + subvol->name); + } else { + gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR, + "Rename to %s dir %s/%s (%s) on %s successful", + priv->anon_inode_name, uuid_utoa(dir->gfid), name, g, + subvol->name); } +out: loc_wipe(&loc); + loc_wipe(&loc2); + if (frame) { + AFR_STACK_DESTROY(frame); + } return ret; } int +afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name, + inode_t *inode, int child, struct afr_reply *replies) +{ + char g[64] = {0}; + afr_private_t *priv = NULL; + xlator_t *subvol = NULL; + int ret = 0; + loc_t loc = { + 0, + }; + gf_boolean_t anon_inode = _gf_false; + + priv = this->private; + subvol = priv->children[child]; + + if ((!replies[child].valid) || (replies[child].op_ret < 0)) { + /*Nothing to do*/ + ret = 0; + goto out; + } + + if (priv->use_anon_inode) { + ret = afr_selfheal_entry_anon_inode(this, dir, name, inode, child, + replies, &anon_inode); + if (ret < 0 || anon_inode) + goto out; + } + + loc.parent = inode_ref(dir); + loc.inode = inode_new(inode->table); + if (!loc.inode) { + ret = -ENOMEM; + goto out; + } + loc.name = name; + switch (replies[child].poststat.ia_type) { + case IA_IFDIR: + gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR, + "expunging dir %s/%s (%s) on %s", uuid_utoa(dir->gfid), name, + uuid_utoa_r(replies[child].poststat.ia_gfid, g), + subvol->name); + ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL); + break; + default: + gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR, + "expunging file %s/%s (%s) on %s", uuid_utoa(dir->gfid), + name, uuid_utoa_r(replies[child].poststat.ia_gfid, g), + subvol->name); + ret = syncop_unlink(subvol, &loc, NULL, NULL); + break; + } + +out: + loc_wipe(&loc); + return ret; +} + +int afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source, unsigned char *sources, inode_t *dir, const char *name, inode_t *inode, @@ -76,6 +192,9 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source, loc_t srcloc = { 0, }; + loc_t anonloc = { + 0, + }; xlator_t *this = frame->this; afr_private_t *priv = NULL; dict_t *xdata = NULL; @@ -86,15 +205,17 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source, 0, }; unsigned char *newentry = NULL; - char dir_uuid_str[64] = {0}, iatt_uuid_str[64] = {0}; + char iatt_uuid_str[64] = {0}; + char dir_uuid_str[64] = {0}; priv = this->private; iatt = &replies[source].poststat; + uuid_utoa_r(iatt->ia_gfid, iatt_uuid_str); if (iatt->ia_type == IA_INVAL || gf_uuid_is_null(iatt->ia_gfid)) { gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SELF_HEAL_FAILED, "Invalid ia_type (%d) or gfid(%s). source brick=%d, " "pargfid=%s, name=%s", - iatt->ia_type, uuid_utoa_r(iatt->ia_gfid, iatt_uuid_str), source, + iatt->ia_type, iatt_uuid_str, source, uuid_utoa_r(dir->gfid, dir_uuid_str), name); ret = -EINVAL; goto out; @@ -120,14 +241,24 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source, srcloc.inode = inode_ref(inode); gf_uuid_copy(srcloc.gfid, iatt->ia_gfid); - if (iatt->ia_type != IA_IFDIR) - ret = syncop_lookup(priv->children[dst], &srcloc, 0, 0, 0, 0); - if (iatt->ia_type == IA_IFDIR || ret == -ENOENT || ret == -ESTALE) { + ret = syncop_lookup(priv->children[dst], &srcloc, 0, 0, 0, 0); + if (ret == -ENOENT || ret == -ESTALE) { newentry[dst] = 1; ret = afr_selfheal_newentry_mark(frame, this, inode, source, replies, sources, newentry); if (ret) goto out; + } else if (ret == 0 && iatt->ia_type == IA_IFDIR && priv->use_anon_inode) { + // Try rename from hidden directory + ret = afr_anon_inode_create(this, dst, &anonloc.parent); + if (ret < 0) + goto out; + anonloc.inode = inode_ref(inode); + anonloc.name = iatt_uuid_str; + ret = syncop_rename(priv->children[dst], &anonloc, &loc, NULL, NULL); + if (ret == -ENOENT || ret == -ESTALE) + ret = -1; /*This sets 'mismatch' to true*/ + goto out; } mode = st_mode_from_ia(iatt->ia_prot, iatt->ia_type); @@ -166,6 +297,7 @@ out: GF_FREE(linkname); loc_wipe(&loc); loc_wipe(&srcloc); + loc_wipe(&anonloc); return ret; } @@ -578,6 +710,11 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd, priv = this->private; + if (afr_is_private_directory(priv, fd->inode->gfid, name, + GF_CLIENT_PID_SELF_HEALD)) { + return 0; + } + xattr = dict_new(); if (!xattr) return -ENOMEM; @@ -626,7 +763,7 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd, replies); if ((ret == 0) && (priv->esh_granular) && parent_idx_inode) { - ret = afr_shd_index_purge(subvol, parent_idx_inode, name, + ret = afr_shd_entry_purge(subvol, parent_idx_inode, name, inode->ia_type); /* Why is ret force-set to 0? We do not care about * index purge failing for full heal as it is quite @@ -756,10 +893,6 @@ afr_selfheal_entry_do_subvol(call_frame_t *frame, xlator_t *this, fd_t *fd, if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) continue; - if (__is_root_gfid(fd->inode->gfid) && - !strcmp(entry->d_name, GF_REPLICATE_TRASH_DIR)) - continue; - ret = afr_selfheal_entry_dirent(iter_frame, this, fd, entry->d_name, loc.inode, subvol, local->need_full_crawl); @@ -822,7 +955,7 @@ afr_selfheal_entry_granular_dirent(xlator_t *subvol, gf_dirent_t *entry, /* The name indices under the pgfid index dir are guaranteed * to be regular files. Hence the hardcoding. */ - afr_shd_index_purge(subvol, parent->inode, entry->d_name, IA_IFREG); + afr_shd_entry_purge(subvol, parent->inode, entry->d_name, IA_IFREG); ret = 0; goto out; } diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c index dace07131cb..834aac86d48 100644 --- a/xlators/cluster/afr/src/afr-self-heal-name.c +++ b/xlators/cluster/afr/src/afr-self-heal-name.c @@ -98,21 +98,12 @@ __afr_selfheal_name_expunge(xlator_t *this, inode_t *parent, uuid_t pargfid, const char *bname, inode_t *inode, struct afr_reply *replies) { - loc_t loc = { - 0, - }; int i = 0; afr_private_t *priv = NULL; - char g[64]; int ret = 0; priv = this->private; - loc.parent = inode_ref(parent); - gf_uuid_copy(loc.pargfid, pargfid); - loc.name = bname; - loc.inode = inode_ref(inode); - for (i = 0; i < priv->child_count; i++) { if (!replies[i].valid) continue; @@ -120,30 +111,10 @@ __afr_selfheal_name_expunge(xlator_t *this, inode_t *parent, uuid_t pargfid, if (replies[i].op_ret) continue; - switch (replies[i].poststat.ia_type) { - case IA_IFDIR: - gf_msg(this->name, GF_LOG_WARNING, 0, - AFR_MSG_EXPUNGING_FILE_OR_DIR, - "expunging dir %s/%s (%s) on %s", uuid_utoa(pargfid), - bname, uuid_utoa_r(replies[i].poststat.ia_gfid, g), - priv->children[i]->name); - - ret |= syncop_rmdir(priv->children[i], &loc, 1, NULL, NULL); - break; - default: - gf_msg(this->name, GF_LOG_WARNING, 0, - AFR_MSG_EXPUNGING_FILE_OR_DIR, - "expunging file %s/%s (%s) on %s", uuid_utoa(pargfid), - bname, uuid_utoa_r(replies[i].poststat.ia_gfid, g), - priv->children[i]->name); - - ret |= syncop_unlink(priv->children[i], &loc, NULL, NULL); - break; - } + ret |= afr_selfheal_entry_delete(this, parent, bname, inode, i, + replies); } - loc_wipe(&loc); - return ret; } @@ -381,7 +352,7 @@ __afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent, ret = __afr_selfheal_assign_gfid(this, parent, pargfid, bname, inode, replies, gfid, locked_on, source, sources, is_gfid_absent, &gfid_idx); - if (ret) + if (ret || (gfid_idx < 0)) return ret; ret = __afr_selfheal_name_impunge(frame, this, parent, pargfid, bname, diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h index 7a038fa7fe3..48e6dbcfb18 100644 --- a/xlators/cluster/afr/src/afr-self-heal.h +++ b/xlators/cluster/afr/src/afr-self-heal.h @@ -369,4 +369,9 @@ gf_boolean_t afr_is_file_empty_on_all_children(afr_private_t *priv, struct afr_reply *replies); +int +afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name, + inode_t *inode, int child, struct afr_reply *replies); +int +afr_anon_inode_create(xlator_t *this, int child, inode_t **linked_inode); #endif /* !_AFR_SELFHEAL_H */ diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c index 2219a53b277..109fd4b7421 100644 --- a/xlators/cluster/afr/src/afr-self-heald.c +++ b/xlators/cluster/afr/src/afr-self-heald.c @@ -94,7 +94,7 @@ __afr_shd_healer_wait(struct subvol_healer *healer) priv = healer->this->private; disabled_loop: - wait_till.tv_sec = time(NULL) + priv->shd.timeout; + wait_till.tv_sec = gf_time() + priv->shd.timeout; while (!healer->rerun) { ret = pthread_cond_timedwait(&healer->cond, &healer->mutex, &wait_till); @@ -222,7 +222,7 @@ out: } int -afr_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name, +afr_shd_entry_purge(xlator_t *subvol, inode_t *inode, char *name, ia_type_t type) { int ret = 0; @@ -371,7 +371,7 @@ afr_shd_sweep_prepare(struct subvol_healer *healer) event->split_brain_count = 0; event->heal_failed_count = 0; - time(&event->start_time); + event->start_time = gf_time(); event->end_time = 0; _mask_cancellation(); } @@ -386,7 +386,7 @@ afr_shd_sweep_done(struct subvol_healer *healer) event = &healer->crawl_event; shd = &(((afr_private_t *)healer->this->private)->shd); - time(&event->end_time); + event->end_time = gf_time(); history = gf_memdup(event, sizeof(*event)); event->start_time = 0; @@ -424,7 +424,7 @@ afr_shd_index_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, ret = afr_shd_selfheal(healer, healer->subvol, gfid); if (ret == -ENOENT || ret == -ESTALE) - afr_shd_index_purge(subvol, parent->inode, entry->d_name, val); + afr_shd_entry_purge(subvol, parent->inode, entry->d_name, val); if (ret == 2) /* If bricks crashed in pre-op after creating indices/xattrop @@ -843,6 +843,176 @@ out: return need_heal; } +static int +afr_shd_anon_inode_cleaner(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, + void *data) +{ + struct subvol_healer *healer = data; + afr_private_t *priv = healer->this->private; + call_frame_t *frame = NULL; + afr_local_t *local = NULL; + int ret = 0; + loc_t loc = {0}; + int count = 0; + int i = 0; + int op_errno = 0; + struct iatt *iatt = NULL; + gf_boolean_t multiple_links = _gf_false; + unsigned char *gfid_present = alloca0(priv->child_count); + unsigned char *entry_present = alloca0(priv->child_count); + char *type = "file"; + + frame = afr_frame_create(healer->this, &ret); + if (!frame) { + ret = -ret; + goto out; + } + local = frame->local; + if (AFR_COUNT(local->child_up, priv->child_count) != priv->child_count) { + gf_msg_debug(healer->this->name, 0, + "Not all bricks are up. Skipping " + "cleanup of %s on %s", + entry->d_name, subvol->name); + ret = 0; + goto out; + } + + loc.inode = inode_new(parent->inode->table); + if (!loc.inode) { + ret = -ENOMEM; + goto out; + } + ret = gf_uuid_parse(entry->d_name, loc.gfid); + if (ret) { + ret = 0; + goto out; + } + AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, &loc, + NULL); + for (i = 0; i < priv->child_count; i++) { + if (local->replies[i].op_ret == 0) { + count++; + gfid_present[i] = 1; + iatt = &local->replies[i].poststat; + if (iatt->ia_type == IA_IFDIR) { + type = "dir"; + } + + if (i == healer->subvol) { + if (local->replies[i].poststat.ia_nlink > 1) { + multiple_links = _gf_true; + } + } + } else if (local->replies[i].op_errno != ENOENT && + local->replies[i].op_errno != ESTALE) { + /*We don't have complete view. Skip the entry*/ + gf_msg_debug(healer->this->name, local->replies[i].op_errno, + "Skipping cleanup of %s on %s", entry->d_name, + subvol->name); + ret = 0; + goto out; + } + } + + /*Inode is deleted from subvol*/ + if (count == 1 || (iatt->ia_type != IA_IFDIR && multiple_links)) { + gf_msg(healer->this->name, GF_LOG_WARNING, 0, + AFR_MSG_EXPUNGING_FILE_OR_DIR, "expunging %s %s/%s on %s", type, + priv->anon_inode_name, entry->d_name, subvol->name); + ret = afr_shd_entry_purge(subvol, parent->inode, entry->d_name, + iatt->ia_type); + if (ret == -ENOENT || ret == -ESTALE) + ret = 0; + } else if (count > 1) { + loc_wipe(&loc); + loc.parent = inode_ref(parent->inode); + loc.name = entry->d_name; + loc.inode = inode_new(parent->inode->table); + if (!loc.inode) { + ret = -ENOMEM; + goto out; + } + AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, + &loc, NULL); + count = 0; + for (i = 0; i < priv->child_count; i++) { + if (local->replies[i].op_ret == 0) { + count++; + entry_present[i] = 1; + iatt = &local->replies[i].poststat; + } else if (local->replies[i].op_errno != ENOENT && + local->replies[i].op_errno != ESTALE) { + /*We don't have complete view. Skip the entry*/ + gf_msg_debug(healer->this->name, local->replies[i].op_errno, + "Skipping cleanup of %s on %s", entry->d_name, + subvol->name); + ret = 0; + goto out; + } + } + for (i = 0; i < priv->child_count; i++) { + if (gfid_present[i] && !entry_present[i]) { + /*Entry is not anonymous on at least one subvol*/ + gf_msg_debug(healer->this->name, 0, + "Valid entry present on %s " + "Skipping cleanup of %s on %s", + priv->children[i]->name, entry->d_name, + subvol->name); + ret = 0; + goto out; + } + } + + gf_msg(healer->this->name, GF_LOG_WARNING, 0, + AFR_MSG_EXPUNGING_FILE_OR_DIR, + "expunging %s %s/%s on all subvols", type, priv->anon_inode_name, + entry->d_name); + ret = 0; + for (i = 0; i < priv->child_count; i++) { + op_errno = -afr_shd_entry_purge(priv->children[i], loc.parent, + entry->d_name, iatt->ia_type); + if (op_errno != ENOENT && op_errno != ESTALE) { + ret |= -op_errno; + } + } + } + +out: + if (frame) + AFR_STACK_DESTROY(frame); + loc_wipe(&loc); + return ret; +} + +static void +afr_cleanup_anon_inode_dir(struct subvol_healer *healer) +{ + int ret = 0; + call_frame_t *frame = NULL; + afr_private_t *priv = healer->this->private; + loc_t loc = {0}; + + ret = afr_anon_inode_create(healer->this, healer->subvol, &loc.inode); + if (ret) + goto out; + + frame = afr_frame_create(healer->this, &ret); + if (!frame) { + ret = -ret; + goto out; + } + + ret = syncop_mt_dir_scan(frame, priv->children[healer->subvol], &loc, + GF_CLIENT_PID_SELF_HEALD, healer, + afr_shd_anon_inode_cleaner, NULL, + priv->shd.max_threads, priv->shd.wait_qlength); +out: + if (frame) + AFR_STACK_DESTROY(frame); + loc_wipe(&loc); + return; +} + void * afr_shd_index_healer(void *data) { @@ -900,6 +1070,10 @@ afr_shd_index_healer(void *data) sleep(1); } while (ret > 0); + if (ret == 0) { + afr_cleanup_anon_inode_dir(healer); + } + if (ret == 0 && pre_crawl_xdata && !healer->crawl_event.heal_failed_count) { afr_shd_ta_check_and_unset_xattrs(this, &loc, healer, diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h index 687c28e6472..18db728ea7b 100644 --- a/xlators/cluster/afr/src/afr-self-heald.h +++ b/xlators/cluster/afr/src/afr-self-heald.h @@ -70,6 +70,6 @@ afr_shd_gfid_to_path(xlator_t *this, xlator_t *subvol, uuid_t gfid, char **path_p); int -afr_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name, +afr_shd_entry_purge(xlator_t *subvol, inode_t *inode, char *name, ia_type_t type); #endif /* !_AFR_SELF_HEALD_H */ diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index 78438f91331..a51f79b1f43 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -124,9 +124,9 @@ afr_release_notify_lock_for_ta(void *opaque) this = (xlator_t *)opaque; priv = this->private; - ret = afr_fill_ta_loc(this, &loc); + ret = afr_fill_ta_loc(this, &loc, _gf_true); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_THIN_ARB, + gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, "Failed to populate loc for thin-arbiter."); goto out; } @@ -521,42 +521,6 @@ afr_compute_pre_op_sources(call_frame_t *frame, xlator_t *this) local->transaction.pre_op_sources[j] = 0; } -gf_boolean_t -afr_has_arbiter_fop_cbk_quorum(call_frame_t *frame) -{ - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - xlator_t *this = NULL; - gf_boolean_t fop_failed = _gf_false; - unsigned char *pre_op_sources = NULL; - int i = 0; - - local = frame->local; - this = frame->this; - priv = this->private; - pre_op_sources = local->transaction.pre_op_sources; - - /* If the fop failed on the brick, it is not a source. */ - for (i = 0; i < priv->child_count; i++) - if (local->transaction.failed_subvols[i]) - pre_op_sources[i] = 0; - - switch (AFR_COUNT(pre_op_sources, priv->child_count)) { - case 1: - if (pre_op_sources[ARBITER_BRICK_INDEX]) - fop_failed = _gf_true; - break; - case 0: - fop_failed = _gf_true; - break; - } - - if (fop_failed) - return _gf_false; - - return _gf_true; -} - void afr_txn_arbitrate_fop(call_frame_t *frame, xlator_t *this) { @@ -971,12 +935,8 @@ afr_need_dirty_marking(call_frame_t *frame, xlator_t *this) priv->child_count) return _gf_false; - if (priv->arbiter_count) { - if (!afr_has_arbiter_fop_cbk_quorum(frame)) - need_dirty = _gf_true; - } else if (!afr_has_fop_cbk_quorum(frame)) { + if (!afr_has_fop_cbk_quorum(frame)) need_dirty = _gf_true; - } return need_dirty; } @@ -1026,12 +986,8 @@ afr_handle_quorum(call_frame_t *frame, xlator_t *this) * no split-brain with the fix. The problem is eliminated completely. */ - if (priv->arbiter_count) { - if (afr_has_arbiter_fop_cbk_quorum(frame)) - return; - } else if (afr_has_fop_cbk_quorum(frame)) { + if (afr_has_fop_cbk_quorum(frame)) return; - } if (afr_need_dirty_marking(frame, this)) goto set_response; @@ -1073,7 +1029,7 @@ set_response: } int -afr_fill_ta_loc(xlator_t *this, loc_t *loc) +afr_fill_ta_loc(xlator_t *this, loc_t *loc, gf_boolean_t is_gfid_based_fop) { afr_private_t *priv = NULL; @@ -1081,6 +1037,11 @@ afr_fill_ta_loc(xlator_t *this, loc_t *loc) loc->parent = inode_ref(priv->root_inode); gf_uuid_copy(loc->pargfid, loc->parent->gfid); loc->name = priv->pending_key[THIN_ARBITER_BRICK_INDEX]; + if (is_gfid_based_fop && gf_uuid_is_null(priv->ta_gfid)) { + /* Except afr_ta_id_file_check() which is path based, all other gluster + * FOPS need gfid.*/ + return -EINVAL; + } gf_uuid_copy(loc->gfid, priv->ta_gfid); loc->inode = inode_new(loc->parent->table); if (!loc->inode) { @@ -1090,86 +1051,6 @@ afr_fill_ta_loc(xlator_t *this, loc_t *loc) return 0; } -int -afr_changelog_thin_arbiter_post_op(xlator_t *this, afr_local_t *local) -{ - int ret = 0; - afr_private_t *priv = NULL; - dict_t *xattr = NULL; - int failed_count = 0; - struct gf_flock flock = { - 0, - }; - loc_t loc = { - 0, - }; - int i = 0; - - priv = this->private; - if (!priv->thin_arbiter_count) - return 0; - - failed_count = AFR_COUNT(local->transaction.failed_subvols, - priv->child_count); - if (!failed_count) - return 0; - - GF_ASSERT(failed_count == 1); - ret = afr_fill_ta_loc(this, &loc); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, - "Failed to populate thin-arbiter loc for: %s.", loc.name); - goto out; - } - - xattr = dict_new(); - if (!xattr) { - ret = -ENOMEM; - goto out; - } - for (i = 0; i < priv->child_count; i++) { - ret = dict_set_static_bin(xattr, priv->pending_key[i], - local->pending[i], - AFR_NUM_CHANGE_LOGS * sizeof(int)); - if (ret) - goto out; - } - - flock.l_type = F_WRLCK; - flock.l_start = 0; - flock.l_len = 0; - - /*TODO: Convert to two domain locking. */ - ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX], - AFR_TA_DOM_NOTIFY, &loc, F_SETLKW, &flock, NULL, NULL); - if (ret) - goto out; - - ret = syncop_xattrop(priv->children[THIN_ARBITER_BRICK_INDEX], &loc, - GF_XATTROP_ADD_ARRAY, xattr, NULL, NULL, NULL); - - if (ret == -EINVAL) { - gf_msg(this->name, GF_LOG_INFO, -ret, AFR_MSG_THIN_ARB, - "Thin-arbiter has denied post-op on %s for gfid %s.", - priv->pending_key[THIN_ARBITER_BRICK_INDEX], - uuid_utoa(local->inode->gfid)); - - } else if (ret) { - gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, - "Post-op on thin-arbiter id file %s failed for gfid %s.", - priv->pending_key[THIN_ARBITER_BRICK_INDEX], - uuid_utoa(local->inode->gfid)); - } - flock.l_type = F_UNLCK; - syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX], AFR_TA_DOM_NOTIFY, - &loc, F_SETLK, &flock, NULL, NULL); -out: - if (xattr) - dict_unref(xattr); - - return ret; -} - static int afr_ta_post_op_done(int ret, call_frame_t *frame, void *opaque) { @@ -1264,9 +1145,9 @@ afr_ta_post_op_do(void *opaque) this = local->transaction.frame->this; priv = this->private; - ret = afr_fill_ta_loc(this, &loc); + ret = afr_fill_ta_loc(this, &loc, _gf_true); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_THIN_ARB, + gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, "Failed to populate loc for thin-arbiter."); goto out; } @@ -2466,8 +2347,13 @@ afr_is_delayed_changelog_post_op_needed(call_frame_t *frame, xlator_t *this, goto out; } - if ((local->op != GF_FOP_WRITE) && (local->op != GF_FOP_FXATTROP)) { - /*Only allow writes but shard does [f]xattrops on writes, so + if (local->transaction.disable_delayed_post_op) { + goto out; + } + + if ((local->op != GF_FOP_WRITE) && (local->op != GF_FOP_FXATTROP) && + (local->op != GF_FOP_FSYNC)) { + /*Only allow writes/fsyncs but shard does [f]xattrops on writes, so * they are fine too*/ goto out; } diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index a38489d9932..df7366f0a65 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -135,6 +135,27 @@ set_data_self_heal_algorithm(afr_private_t *priv, char *algo) } } +void +afr_handle_anon_inode_options(afr_private_t *priv, dict_t *options) +{ + char *volfile_id_str = NULL; + uuid_t anon_inode_gfid = {0}; + + /*If volume id is not present don't enable anything*/ + if (dict_get_str(options, "volume-id", &volfile_id_str)) + return; + GF_ASSERT(strlen(AFR_ANON_DIR_PREFIX) + strlen(volfile_id_str) <= NAME_MAX); + /*anon_inode_name is not supposed to change once assigned*/ + if (!priv->anon_inode_name[0]) { + snprintf(priv->anon_inode_name, sizeof(priv->anon_inode_name), "%s-%s", + AFR_ANON_DIR_PREFIX, volfile_id_str); + gf_uuid_parse(volfile_id_str, anon_inode_gfid); + /*Flip a bit to make sure volfile-id and anon-gfid are not same*/ + anon_inode_gfid[0] ^= 1; + uuid_utoa_r(anon_inode_gfid, priv->anon_gfid_str); + } +} + int reconfigure(xlator_t *this, dict_t *options) { @@ -168,7 +189,8 @@ reconfigure(xlator_t *this, dict_t *options) bool, out); GF_OPTION_RECONF("data-self-heal", data_self_heal, options, str, out); - gf_string2boolean(data_self_heal, &priv->data_self_heal); + if (gf_string2boolean(data_self_heal, &priv->data_self_heal) == -1) + goto out; GF_OPTION_RECONF("entry-self-heal", priv->entry_self_heal, options, bool, out); @@ -289,6 +311,10 @@ reconfigure(xlator_t *this, dict_t *options) consistent_io = _gf_false; priv->consistent_io = consistent_io; + afr_handle_anon_inode_options(priv, options); + + GF_OPTION_RECONF("use-anonymous-inode", priv->use_anon_inode, options, bool, + out); if (priv->shd.enabled) { if ((priv->shd.enabled != enabled_old) || (timeout_old != priv->shd.timeout)) @@ -485,7 +511,8 @@ init(xlator_t *this) GF_OPTION_INIT("heal-wait-queue-length", priv->heal_wait_qlen, uint32, out); GF_OPTION_INIT("data-self-heal", data_self_heal, str, out); - gf_string2boolean(data_self_heal, &priv->data_self_heal); + if (gf_string2boolean(data_self_heal, &priv->data_self_heal) == -1) + goto out; GF_OPTION_INIT("data-self-heal-algorithm", data_self_heal_algorithm, str, out); @@ -539,7 +566,9 @@ init(xlator_t *this) GF_OPTION_INIT("consistent-metadata", priv->consistent_metadata, bool, out); GF_OPTION_INIT("consistent-io", priv->consistent_io, bool, out); + afr_handle_anon_inode_options(priv, this->options); + GF_OPTION_INIT("use-anonymous-inode", priv->use_anon_inode, bool, out); if (priv->quorum_count != 0) priv->consistent_io = _gf_false; @@ -551,6 +580,9 @@ init(xlator_t *this) goto out; } + priv->anon_inode = GF_CALLOC(sizeof(unsigned char), child_count, + gf_afr_mt_char); + priv->child_up = GF_CALLOC(sizeof(unsigned char), child_count, gf_afr_mt_char); @@ -559,7 +591,8 @@ init(xlator_t *this) priv->halo_child_up = GF_CALLOC(sizeof(unsigned char), child_count, gf_afr_mt_char); - if (!priv->child_up || !priv->child_latency || !priv->halo_child_up) { + if (!priv->child_up || !priv->child_latency || !priv->halo_child_up || + !priv->anon_inode) { ret = -ENOMEM; goto out; } @@ -1284,6 +1317,14 @@ struct volume_options options[] = { .tags = {"replicate"}, .description = "This option exists only for backward compatibility " "and configuring it doesn't have any effect"}, + {.key = {"use-anonymous-inode"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "no", + .op_version = {GD_OP_VERSION_8_0}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE, + .tags = {"replicate"}, + .description = "Setting this option heals directory renames efficiently"}, + {.key = {NULL}}, }; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 7f50a27e6c9..d62f9a9caf2 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -42,6 +42,7 @@ #define AFR_LK_HEAL_DOM "afr.lock-heal.domain" #define AFR_HALO_MAX_LATENCY 99999 +#define AFR_ANON_DIR_PREFIX ".glusterfs-anonymous-inode" #define PFLAG_PENDING (1 << 0) #define PFLAG_SBRAIN (1 << 1) @@ -190,6 +191,7 @@ typedef struct _afr_private { struct list_head ta_waitq; struct list_head ta_onwireq; + unsigned char *anon_inode; unsigned char *child_up; unsigned char *halo_child_up; int64_t *child_latency; @@ -275,10 +277,15 @@ typedef struct _afr_private { gf_boolean_t esh_granular; gf_boolean_t consistent_io; gf_boolean_t data_self_heal; /* on/off */ + gf_boolean_t use_anon_inode; /*For lock healing.*/ struct list_head saved_locks; struct list_head lk_healq; + + /*For anon-inode handling */ + char anon_inode_name[NAME_MAX + 1]; + char anon_gfid_str[UUID_SIZE + 1]; } afr_private_t; typedef enum { @@ -901,7 +908,7 @@ typedef struct _afr_local { gf_boolean_t uninherit_done; gf_boolean_t uninherit_value; - /* post-op hook */ + gf_boolean_t disable_delayed_post_op; } transaction; syncbarrier_t barrier; @@ -997,7 +1004,10 @@ afr_inode_read_subvol_set(inode_t *inode, xlator_t *this, int event_generation); int -afr_inode_event_gen_reset(inode_t *inode, xlator_t *this); +__afr_inode_need_refresh_set(inode_t *inode, xlator_t *this); + +int +afr_inode_need_refresh_set(inode_t *inode, xlator_t *this); int afr_read_subvol_select_by_policy(inode_t *inode, xlator_t *this, @@ -1268,8 +1278,8 @@ int afr_inode_split_brain_choice_set(inode_t *inode, xlator_t *this, int spb_choice); int -afr_inode_split_brain_choice_get(inode_t *inode, xlator_t *this, - int *spb_choice); +afr_split_brain_read_subvol_get(inode_t *inode, xlator_t *this, + call_frame_t *frame, int *spb_subvol); int afr_get_child_index_from_name(xlator_t *this, char *name); @@ -1354,7 +1364,7 @@ int afr_set_inode_local(xlator_t *this, afr_local_t *local, inode_t *inode); int -afr_fill_ta_loc(xlator_t *this, loc_t *loc); +afr_fill_ta_loc(xlator_t *this, loc_t *loc, gf_boolean_t is_gfid_based_fop); int afr_ta_post_op_lock(xlator_t *this, loc_t *loc); @@ -1402,4 +1412,12 @@ afr_is_lock_mode_mandatory(dict_t *xdata); void afr_dom_lock_release(call_frame_t *frame); + +void +afr_fill_success_replies(afr_local_t *local, afr_private_t *priv, + unsigned char *replies); + +gf_boolean_t +afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name, + pid_t pid); #endif /* __AFR_H__ */ diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index 2231af647de..8ba0cc4c732 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -17,6 +17,7 @@ #include <glusterfs/quota-common-utils.h> #include <glusterfs/upcall-utils.h> #include "glusterfs/compat-errno.h" // for ENODATA on BSD +#include <glusterfs/common-utils.h> #include <sys/time.h> #include <libgen.h> @@ -43,15 +44,6 @@ dht_common_mark_mdsxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, static int dht_rmdir_unlock(call_frame_t *frame, xlator_t *this); -char *xattrs_to_heal[] = {"user.", - POSIX_ACL_ACCESS_XATTR, - POSIX_ACL_DEFAULT_XATTR, - QUOTA_LIMIT_KEY, - QUOTA_LIMIT_OBJECTS_KEY, - GF_SELINUX_XATTR_KEY, - GF_XATTR_MDATA_KEY, - NULL}; - static const char *dht_dbg_vxattrs[] = {DHT_DBG_HASHED_SUBVOL_PATTERN, NULL}; /* Check the xdata to make sure EBADF has been set by client xlator */ @@ -84,6 +76,8 @@ dht_set_fixed_dir_stat(struct iatt *stat) static gf_boolean_t dht_match_xattr(const char *key) { + char **xattrs_to_heal = get_xattrs_to_heal(); + return gf_get_index_by_elem(xattrs_to_heal, (char *)key) >= 0; } @@ -388,7 +382,7 @@ out: /* Code to save hashed subvol on inode ctx as a mds subvol */ -static int +int dht_inode_ctx_mdsvol_set(inode_t *inode, xlator_t *this, xlator_t *mds_subvol) { dht_inode_ctx_t *ctx = NULL; @@ -619,13 +613,14 @@ dht_discover_complete(xlator_t *this, call_frame_t *discover_frame) if (local->need_xattr_heal && !heal_path) { local->need_xattr_heal = 0; - ret = dht_dir_xattr_heal(this, local); - if (ret) - gf_msg(this->name, GF_LOG_ERROR, ret, + ret = dht_dir_xattr_heal(this, local, &op_errno); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_DIR_XATTR_HEAL_FAILED, "xattr heal failed for " "directory gfid is %s ", gfid_local); + } } } @@ -695,6 +690,7 @@ dht_common_mark_mdsxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int ret = -1; dht_conf_t *conf = 0; dht_layout_t *layout = NULL; + int32_t mds_heal_fresh_lookup = 0; GF_VALIDATE_OR_GOTO(this->name, frame, out); GF_VALIDATE_OR_GOTO(this->name, frame->local, out); @@ -702,6 +698,7 @@ dht_common_mark_mdsxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, local = frame->local; conf = this->private; layout = local->selfheal.layout; + mds_heal_fresh_lookup = local->mds_heal_fresh_lookup; if (op_ret) { gf_msg_debug(this->name, op_ret, @@ -722,7 +719,7 @@ dht_common_mark_mdsxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, layout); } out: - if (local && local->mds_heal_fresh_lookup) + if (mds_heal_fresh_lookup) DHT_STACK_DESTROY(frame); return 0; } @@ -1256,7 +1253,7 @@ err: to non hashed subvol */ int -dht_dir_xattr_heal(xlator_t *this, dht_local_t *local) +dht_dir_xattr_heal(xlator_t *this, dht_local_t *local, int *op_errno) { dht_local_t *copy_local = NULL; call_frame_t *copy = NULL; @@ -1268,6 +1265,7 @@ dht_dir_xattr_heal(xlator_t *this, dht_local_t *local) "No gfid exists for path %s " "so healing xattr is not possible", local->loc.path); + *op_errno = EIO; goto out; } @@ -1281,6 +1279,7 @@ dht_dir_xattr_heal(xlator_t *this, dht_local_t *local) "Memory allocation failed " "for path %s gfid %s ", local->loc.path, gfid_local); + *op_errno = ENOMEM; DHT_STACK_DESTROY(copy); } else { copy_local->stbuf = local->stbuf; @@ -1295,6 +1294,7 @@ dht_dir_xattr_heal(xlator_t *this, dht_local_t *local) "Synctask creation failed to heal xattr " "for path %s gfid %s ", local->loc.path, gfid_local); + *op_errno = ENOMEM; DHT_STACK_DESTROY(copy); } } @@ -1435,15 +1435,31 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, dht_aggregate_xattr(local->xattr, xattr); } + if (__is_root_gfid(stbuf->ia_gfid)) { + ret = dht_dir_has_layout(xattr, conf->xattr_name); + if (ret >= 0) { + if (is_greater_time(local->prebuf.ia_ctime, + local->prebuf.ia_ctime_nsec, + stbuf->ia_ctime, stbuf->ia_ctime_nsec)) { + /* Choose source */ + local->prebuf.ia_gid = stbuf->ia_gid; + local->prebuf.ia_uid = stbuf->ia_uid; + + local->prebuf.ia_ctime = stbuf->ia_ctime; + local->prebuf.ia_ctime_nsec = stbuf->ia_ctime_nsec; + local->prebuf.ia_prot = stbuf->ia_prot; + } + } + } + if (local->stbuf.ia_type != IA_INVAL) { /* This is not the first subvol to respond * Compare values to see if attrs need to be healed */ - if (!__is_root_gfid(stbuf->ia_gfid) && - ((local->stbuf.ia_gid != stbuf->ia_gid) || - (local->stbuf.ia_uid != stbuf->ia_uid) || - (is_permission_different(&local->stbuf.ia_prot, - &stbuf->ia_prot)))) { + if ((local->stbuf.ia_gid != stbuf->ia_gid) || + (local->stbuf.ia_uid != stbuf->ia_uid) || + (is_permission_different(&local->stbuf.ia_prot, + &stbuf->ia_prot))) { local->need_attrheal = 1; } } @@ -1635,7 +1651,7 @@ dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, uint32_t vol_commit_hash = 0; xlator_t *subvol = NULL; int32_t check_mds = 0; - int errst = 0; + int errst = 0, i = 0; int32_t mds_xattr_val[1] = {0}; GF_VALIDATE_OR_GOTO("dht", frame, err); @@ -1702,6 +1718,14 @@ dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, local->need_lookup_everywhere = 1; } else if (IA_ISDIR(local->loc.inode->ia_type)) { + layout = local->layout; + for (i = 0; i < layout->cnt; i++) { + if (layout->list[i].xlator == prev) { + layout->list[i].err = op_errno; + break; + } + } + local->need_selfheal = 1; } } @@ -2137,31 +2161,18 @@ static int dht_fill_dict_to_avoid_unlink_of_migrating_file(dict_t *dict) { int ret = 0; - xlator_t *this = NULL; - char *linktoskip_key = NULL; - - this = THIS; - GF_VALIDATE_OR_GOTO("dht", this, err); - - if (dht_is_tier_xlator(this)) - linktoskip_key = TIER_SKIP_NON_LINKTO_UNLINK; - else - linktoskip_key = DHT_SKIP_NON_LINKTO_UNLINK; - ret = dict_set_int32(dict, linktoskip_key, 1); + ret = dict_set_int32_sizen(dict, DHT_SKIP_NON_LINKTO_UNLINK, 1); if (ret) - goto err; + return -1; - ret = dict_set_int32(dict, DHT_SKIP_OPEN_FD_UNLINK, 1); + ret = dict_set_int32_sizen(dict, DHT_SKIP_OPEN_FD_UNLINK, 1); if (ret) - goto err; + return -1; return 0; - -err: - return -1; } static int32_t @@ -4290,6 +4301,8 @@ dht_find_local_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this, index = conf->local_subvols_cnt; uuid_list_copy = gf_strdup(uuid_list); + if (!uuid_list_copy) + goto unlock; for (uuid_str = strtok_r(uuid_list, " ", &saveptr); uuid_str; uuid_str = next_uuid_str) { @@ -4580,18 +4593,8 @@ dht_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, dict_del(xattr, conf->xattr_name); dict_del(xattr, conf->mds_xattr_key); - /* filter out following two xattrs that need not - * be visible on the mount point for geo-rep - - * trusted.tier.fix.layout.complete and - * trusted.tier.tier-dht.commithash - */ - dict_del(xattr, conf->commithash_xattr_name); - if (frame->root->pid >= 0 && dht_is_tier_xlator(this)) { - dict_del(xattr, GF_XATTR_TIER_LAYOUT_FIXED_KEY); - } - if (frame->root->pid >= 0) { GF_REMOVE_INTERNAL_XATTR("trusted.glusterfs.quota*", xattr); GF_REMOVE_INTERNAL_XATTR("trusted.pgfid*", xattr); @@ -5430,11 +5433,13 @@ dht_dir_common_set_remove_xattr(call_frame_t *frame, xlator_t *this, loc_t *loc, int call_cnt = 0; dht_local_t *local = NULL; char gfid_local[GF_UUID_BUF_SIZE] = {0}; + char **xattrs_to_heal; conf = this->private; local = frame->local; call_cnt = conf->subvolume_cnt; local->flags = flags; + xattrs_to_heal = get_xattrs_to_heal(); if (!gf_uuid_is_null(local->gfid)) { gf_uuid_unparse(local->gfid, gfid_local); @@ -5867,22 +5872,7 @@ dht_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr, if (local->rebalance.target_node) { local->flags = forced_rebalance; - /* Flag to suggest its a tiering migration - * The reason for this dic key-value is that - * promotions and demotions are multithreaded - * so the original frame from gf_defrag_start() - * is not carried. A new frame will be created when - * we do syncop_setxattr(). This does not have the - * frame->root->pid of the original frame. So we pass - * this dic key-value when we do syncop_setxattr() to do - * data migration and set the frame->root->pid to - * GF_CLIENT_PID_TIER_DEFRAG in dht_setxattr() just before - * calling dht_start_rebalance_task() */ - tmp = dict_get(xattr, TIERING_MIGRATION_KEY); - if (tmp) - frame->root->pid = GF_CLIENT_PID_TIER_DEFRAG; - else - frame->root->pid = GF_CLIENT_PID_DEFRAG; + frame->root->pid = GF_CLIENT_PID_DEFRAG; ret = dht_start_rebalance_task(this, frame); if (!ret) @@ -6694,10 +6684,9 @@ dht_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, layout = local->layout; - /* We have seen crashes in while running "rm -rf" on tier volumes - when the layout was NULL on the hot tier. This will skip the - entries on the subvol without a layout, hence preventing the crash - but rmdir might fail with "directory not empty" errors*/ + /* This will skip the entries on the subvol without a layout, + * hence preventing the crash but rmdir might fail with + * "directory not empty" errors*/ if (layout == NULL) goto done; @@ -10824,23 +10813,17 @@ dht_notify(xlator_t *this, int event, void *data, ...) int had_heard_from_all = 0; int have_heard_from_all = 0; - struct timeval time = { - 0, - }; gf_defrag_info_t *defrag = NULL; dict_t *dict = NULL; gf_defrag_type cmd = 0; dict_t *output = NULL; va_list ap; - dht_methods_t *methods = NULL; struct gf_upcall *up_data = NULL; struct gf_upcall_cache_invalidation *up_ci = NULL; conf = this->private; GF_VALIDATE_OR_GOTO(this->name, conf, out); - methods = &(conf->methods); - /* had all subvolumes reported status once till now? */ had_heard_from_all = 1; for (i = 0; i < conf->subvolume_cnt; i++) { @@ -10870,12 +10853,11 @@ dht_notify(xlator_t *this, int event, void *data, ...) break; } - gettimeofday(&time, NULL); LOCK(&conf->subvolume_lock); { conf->subvolume_status[cnt] = 1; conf->last_event[cnt] = event; - conf->subvol_up_time[cnt] = time.tv_sec; + conf->subvol_up_time[cnt] = gf_time(); } UNLOCK(&conf->subvolume_lock); @@ -10983,21 +10965,13 @@ dht_notify(xlator_t *this, int event, void *data, ...) if (defrag->is_exiting) goto unlock; if ((cmd == GF_DEFRAG_CMD_STATUS) || - (cmd == GF_DEFRAG_CMD_STATUS_TIER) || (cmd == GF_DEFRAG_CMD_DETACH_STATUS)) gf_defrag_status_get(conf, output); - else if (cmd == GF_DEFRAG_CMD_START_DETACH_TIER) - gf_defrag_start_detach_tier(defrag); else if (cmd == GF_DEFRAG_CMD_DETACH_START) defrag->cmd = GF_DEFRAG_CMD_DETACH_START; else if (cmd == GF_DEFRAG_CMD_STOP || - cmd == GF_DEFRAG_CMD_STOP_DETACH_TIER || cmd == GF_DEFRAG_CMD_DETACH_STOP) gf_defrag_stop(conf, GF_DEFRAG_STATUS_STOPPED, output); - else if (cmd == GF_DEFRAG_CMD_PAUSE_TIER) - ret = gf_defrag_pause_tier(this, defrag); - else if (cmd == GF_DEFRAG_CMD_RESUME_TIER) - ret = gf_defrag_resume_tier(this, defrag); } unlock: UNLOCK(&defrag->lock); @@ -11072,15 +11046,13 @@ dht_notify(xlator_t *this, int event, void *data, ...) * thread has already started. */ if (conf->defrag && !run_defrag) { - if (methods->migration_needed(this)) { - run_defrag = 1; - ret = gf_thread_create(&conf->defrag->th, NULL, gf_defrag_start, - this, "dhtdg"); - if (ret) { - GF_FREE(conf->defrag); - conf->defrag = NULL; - kill(getpid(), SIGTERM); - } + run_defrag = 1; + ret = gf_thread_create(&conf->defrag->th, NULL, gf_defrag_start, + this, "dhtdg"); + if (ret) { + GF_FREE(conf->defrag); + conf->defrag = NULL; + kill(getpid(), SIGTERM); } } } @@ -11225,28 +11197,6 @@ out: return ret; } -int32_t -dht_migration_needed(xlator_t *this) -{ - gf_defrag_info_t *defrag = NULL; - dht_conf_t *conf = NULL; - int ret = 0; - - conf = this->private; - - GF_VALIDATE_OR_GOTO("dht", conf, out); - GF_VALIDATE_OR_GOTO("dht", conf->defrag, out); - - defrag = conf->defrag; - - if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) && - (defrag->cmd != GF_DEFRAG_CMD_START_DETACH_TIER)) - ret = 1; - -out: - return ret; -} - /* This function should not be called more then once during a FOP handling path. It is valid only for for ops on files @@ -11281,14 +11231,6 @@ dht_set_local_rebalance(xlator_t *this, dht_local_t *local, struct iatt *stbuf, return 0; } -gf_boolean_t -dht_is_tier_xlator(xlator_t *this) -{ - if (strcmp(this->type, "cluster/tier") == 0) - return _gf_true; - return _gf_false; -} - int32_t dht_release(xlator_t *this, fd_t *fd) { @@ -11428,3 +11370,22 @@ dht_pt_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key, FIRST_CHILD(this)->fops->fgetxattr, fd, key, xdata); return 0; } + +/* The job of this function is to check if all the xlators have updated + * error in the layout. */ +int +dht_dir_layout_error_check(xlator_t *this, inode_t *inode) +{ + dht_layout_t *layout = NULL; + int i = 0; + + layout = dht_layout_get(this, inode); + for (i = 0; i < layout->cnt; i++) { + if (layout->list[i].err == 0) { + return 0; + } + } + + /* Returning the first xlator error as all xlators have errors */ + return layout->list[0].err; +} diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index 871b7aed9b3..fe0dc3db34a 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -24,7 +24,6 @@ #define _DHT_H #define GF_XATTR_FIX_LAYOUT_KEY "distribute.fix.layout" -#define GF_XATTR_TIER_LAYOUT_FIXED_KEY "trusted.tier.fix.layout.complete" #define GF_XATTR_FILE_MIGRATE_KEY "trusted.distribute.migrate-data" #define DHT_MDS_STR "mds" #define GF_DHT_LOOKUP_UNHASHED_OFF 0 @@ -36,7 +35,6 @@ #define DHT_LAYOUT_HEAL_DOMAIN "dht.layout.heal" /* Namespace synchronization */ #define DHT_ENTRY_SYNC_DOMAIN "dht.entry.sync" -#define TIERING_MIGRATION_KEY "tiering.migration" #define DHT_LAYOUT_HASH_INVALID 1 #define MAX_REBAL_THREADS sysconf(_SC_NPROCESSORS_ONLN) @@ -52,10 +50,6 @@ #define DHT_DBG_HASHED_SUBVOL_PATTERN "dht.file.hashed-subvol.*" #define DHT_DBG_HASHED_SUBVOL_KEY "dht.file.hashed-subvol." -/* Array to hold custom xattr keys - */ -extern char *xattrs_to_heal[]; - /* Rebalance nodeuuid flags */ #define REBAL_NODEUUID_MINE 0x01 @@ -246,19 +240,6 @@ typedef gf_boolean_t (*dht_need_heal_t)(call_frame_t *frame, dht_layout_t **inmem, dht_layout_t **ondisk); -typedef struct { - uint64_t blocks_used; - uint64_t pblocks_used; - uint64_t files_used; - uint64_t pfiles_used; - uint64_t unhashed_blocks_used; - uint64_t unhashed_pblocks_used; - uint64_t unhashed_files_used; - uint64_t unhashed_pfiles_used; - uint64_t unhashed_fsid; - uint64_t hashed_fsid; -} tier_statvfs_t; - struct dht_local { loc_t loc; loc_t loc2; @@ -276,7 +257,6 @@ struct dht_local { struct iatt preparent; struct iatt postparent; struct statvfs statvfs; - tier_statvfs_t tier_statvfs; fd_t *fd; inode_t *inode; dict_t *params; @@ -409,14 +389,7 @@ enum gf_defrag_type { GF_DEFRAG_CMD_STATUS = 1 + 2, GF_DEFRAG_CMD_START_LAYOUT_FIX = 1 + 3, GF_DEFRAG_CMD_START_FORCE = 1 + 4, - GF_DEFRAG_CMD_START_TIER = 1 + 5, - GF_DEFRAG_CMD_STATUS_TIER = 1 + 6, - GF_DEFRAG_CMD_START_DETACH_TIER = 1 + 7, - GF_DEFRAG_CMD_STOP_DETACH_TIER = 1 + 8, - GF_DEFRAG_CMD_PAUSE_TIER = 1 + 9, - GF_DEFRAG_CMD_RESUME_TIER = 1 + 10, GF_DEFRAG_CMD_DETACH_STATUS = 1 + 11, - GF_DEFRAG_CMD_STOP_TIER = 1 + 12, GF_DEFRAG_CMD_DETACH_START = 1 + 13, GF_DEFRAG_CMD_DETACH_COMMIT = 1 + 14, GF_DEFRAG_CMD_DETACH_COMMIT_FORCE = 1 + 15, @@ -467,75 +440,6 @@ struct dht_container { int local_subvol_index; }; -typedef enum tier_mode_ { - TIER_MODE_NONE = 0, - TIER_MODE_TEST, - TIER_MODE_WM -} tier_mode_t; - -typedef enum tier_pause_state_ { - TIER_RUNNING = 0, - TIER_REQUEST_PAUSE, - TIER_PAUSED -} tier_pause_state_t; - -/* This Structure is only used in tiering fixlayout */ -typedef struct gf_tier_fix_layout_arg { - xlator_t *this; - dict_t *fix_layout; - pthread_t thread_id; -} gf_tier_fix_layout_arg_t; - -typedef struct gf_tier_conf { - int is_tier; - int watermark_hi; - int watermark_low; - int watermark_last; - unsigned long block_size; - fsblkcnt_t blocks_total; - fsblkcnt_t blocks_used; - uint64_t max_migrate_bytes; - int max_migrate_files; - int query_limit; - tier_mode_t mode; - int percent_full; - /* These flags are only used for tier-compact */ - gf_boolean_t compact_active; - /* These 3 flags are set to true when the client changes the */ - /* compaction mode on the command line. */ - /* When they are set, the daemon will trigger compaction as */ - /* soon as possible to activate or deactivate compaction. */ - /* If in the middle of a compaction, then the switches take */ - /* effect on the next compaction, not the current one. */ - /* If the user switches it off, we want to avoid needless */ - /* compactions. */ - /* If the user switches it on, they want to compact as soon */ - /* as possible. */ - gf_boolean_t compact_mode_switched; - gf_boolean_t compact_mode_switched_hot; - gf_boolean_t compact_mode_switched_cold; - int tier_max_promote_size; - int tier_promote_frequency; - int tier_demote_frequency; - int tier_compact_hot_frequency; - int tier_compact_cold_frequency; - uint64_t st_last_promoted_size; - uint64_t st_last_demoted_size; - struct synctask *pause_synctask; - gf_timer_t *pause_timer; - pthread_mutex_t pause_mutex; - int promote_in_progress; - int demote_in_progress; - /* This Structure is only used in tiering fixlayout */ - gf_tier_fix_layout_arg_t tier_fix_layout_arg; - /* Indicates the index of the first queryfile picked - * in the last cycle of promote or demote */ - int32_t last_promote_qfile_index; - int32_t last_demote_qfile_index; - tier_pause_state_t pause_state; - char volname[GD_VOLUME_NAME_MAX + 1]; -} gf_tier_conf_t; - typedef struct nodeuuid_info { char info; /* Set to 1 is this is my node's uuid*/ uuid_t uuid; /* Store the nodeuuid as well for debugging*/ @@ -563,17 +467,10 @@ struct gf_defrag_info_ { int cmd; inode_t *root_inode; uuid_t node_uuid; - struct timeval start_time; + time_t start_time; uint32_t new_commit_hash; gf_defrag_status_t defrag_status; gf_defrag_pattern_list_t *defrag_pattern; - gf_tier_conf_t tier_conf; - - /*Data Tiering params for scanner*/ - uint64_t total_files_promoted; - uint64_t total_files_demoted; - int write_freq_threshold; - int read_freq_threshold; pthread_cond_t parallel_migration_cond; pthread_mutex_t dfq_mutex; @@ -609,7 +506,6 @@ typedef struct gf_defrag_info_ gf_defrag_info_t; struct dht_methods_s { int32_t (*migration_get_dst_subvol)(xlator_t *this, dht_local_t *local); int32_t (*migration_other)(xlator_t *this, gf_defrag_info_t *defrag); - int32_t (*migration_needed)(xlator_t *this); xlator_t *(*layout_search)(xlator_t *this, dht_layout_t *layout, const char *name); }; @@ -630,7 +526,7 @@ struct dht_conf { int subvolume_cnt; int32_t refresh_interval; gf_lock_t subvolume_lock; - struct timeval last_stat_fetch; + time_t last_stat_fetch; gf_lock_t layout_lock; dict_t *leaf_to_subvol; void *private; /* Can be used by wrapper xlators over @@ -752,6 +648,8 @@ struct dir_dfmeta { struct list_head **head; struct list_head **iterator; int *fetch_entries; + /* fds corresponding to local subvols only */ + fd_t **lfd; }; typedef struct dht_migrate_info { @@ -1238,24 +1136,6 @@ dht_common_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int gf_defrag_status_get(dht_conf_t *conf, dict_t *dict); -void -gf_defrag_set_pause_state(gf_tier_conf_t *tier_conf, tier_pause_state_t state); - -tier_pause_state_t -gf_defrag_get_pause_state(gf_tier_conf_t *tier_conf); - -int -gf_defrag_pause_tier(xlator_t *this, gf_defrag_info_t *defrag); - -tier_pause_state_t -gf_defrag_check_pause_tier(gf_tier_conf_t *defrag); - -int -gf_defrag_resume_tier(xlator_t *this, gf_defrag_info_t *defrag); - -int -gf_defrag_start_detach_tier(gf_defrag_info_t *defrag); - int gf_defrag_stop(dht_conf_t *conf, gf_defrag_status_t status, dict_t *output); @@ -1336,9 +1216,6 @@ dht_layout_missing_dirs(dht_layout_t *layout); int dht_refresh_layout(call_frame_t *frame); -gf_boolean_t -dht_is_tier_xlator(xlator_t *this); - int dht_build_parent_loc(xlator_t *this, loc_t *parent, loc_t *child, int32_t *op_errno); @@ -1451,7 +1328,7 @@ dht_dir_set_heal_xattr(xlator_t *this, dht_local_t *local, dict_t *dst, dict_t *src, int *uret, int *uflag); int -dht_dir_xattr_heal(xlator_t *this, dht_local_t *local); +dht_dir_xattr_heal(xlator_t *this, dht_local_t *local, int *op_errno); int dht_common_mark_mdsxattr(call_frame_t *frame, int *errst, int flag); @@ -1499,4 +1376,9 @@ dht_create_lock(call_frame_t *frame, xlator_t *subvol); int dht_set_parent_layout_in_dict(loc_t *loc, xlator_t *this, dht_local_t *local); +int +dht_dir_layout_error_check(xlator_t *this, inode_t *inode); + +int +dht_inode_ctx_mdsvol_set(inode_t *inode, xlator_t *this, xlator_t *mds_subvol); #endif /* _DHT_H */ diff --git a/xlators/cluster/dht/src/dht-diskusage.c b/xlators/cluster/dht/src/dht-diskusage.c index 27097ca2475..c0588828fdb 100644 --- a/xlators/cluster/dht/src/dht-diskusage.c +++ b/xlators/cluster/dht/src/dht-diskusage.c @@ -151,22 +151,18 @@ dht_get_du_info(call_frame_t *frame, xlator_t *this, loc_t *loc) dht_conf_t *conf = NULL; call_frame_t *statfs_frame = NULL; dht_local_t *statfs_local = NULL; - struct timeval tv = { - 0, - }; loc_t tmp_loc = { 0, }; + time_t now; conf = this->private; - - gettimeofday(&tv, NULL); - + now = gf_time(); /* make it root gfid, should be enough to get the proper info back */ tmp_loc.gfid[15] = 1; - if (tv.tv_sec > (conf->refresh_interval + conf->last_stat_fetch.tv_sec)) { + if (now > (conf->refresh_interval + conf->last_stat_fetch)) { statfs_frame = copy_frame(frame); if (!statfs_frame) { goto err; @@ -198,7 +194,7 @@ dht_get_du_info(call_frame_t *frame, xlator_t *this, loc_t *loc) statfs_local->params); } - conf->last_stat_fetch.tv_sec = tv.tv_sec; + conf->last_stat_fetch = now; } return 0; err: diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c index 73a89399efd..3f2fe43d5f3 100644 --- a/xlators/cluster/dht/src/dht-helper.c +++ b/xlators/cluster/dht/src/dht-helper.c @@ -2083,6 +2083,7 @@ dht_heal_full_path_done(int op_ret, call_frame_t *heal_frame, void *data) dht_local_t *local = NULL; xlator_t *this = NULL; int ret = -1; + int op_errno = 0; local = heal_frame->local; main_frame = local->main_frame; @@ -2092,11 +2093,12 @@ dht_heal_full_path_done(int op_ret, call_frame_t *heal_frame, void *data) dht_set_fixed_dir_stat(&local->postparent); if (local->need_xattr_heal) { local->need_xattr_heal = 0; - ret = dht_dir_xattr_heal(this, local); - if (ret) - gf_smsg(this->name, GF_LOG_ERROR, ret, + ret = dht_dir_xattr_heal(this, local, &op_errno); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_DIR_XATTR_HEAL_FAILED, "path=%s", local->loc.path, NULL); + } } DHT_STACK_UNWIND(lookup, main_frame, 0, 0, local->inode, &local->stbuf, @@ -2265,6 +2267,7 @@ dht_dir_set_heal_xattr(xlator_t *this, dht_local_t *local, dict_t *dst, int luret = -1; int luflag = -1; int i = 0; + char **xattrs_to_heal; if (!src || !dst) { gf_smsg(this->name, GF_LOG_WARNING, EINVAL, DHT_MSG_DST_NULL_SET_FAILED, @@ -2279,6 +2282,9 @@ dht_dir_set_heal_xattr(xlator_t *this, dht_local_t *local, dict_t *dst, and set it to dst dict, here index start from 1 because user xattr already checked in previous statement */ + + xattrs_to_heal = get_xattrs_to_heal(); + for (i = 1; xattrs_to_heal[i]; i++) { keyval = dict_get(src, xattrs_to_heal[i]); if (keyval) { diff --git a/xlators/cluster/dht/src/dht-inode-write.c b/xlators/cluster/dht/src/dht-inode-write.c index eda2491e0ff..2f23ce90fbd 100644 --- a/xlators/cluster/dht/src/dht-inode-write.c +++ b/xlators/cluster/dht/src/dht-inode-write.c @@ -93,30 +93,28 @@ dht_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, /* Check if the rebalance phase1 is true */ if (IS_DHT_MIGRATION_PHASE1(postbuf)) { - if (!dht_is_tier_xlator(this)) { + if (!local->xattr_req) { + local->xattr_req = dict_new(); if (!local->xattr_req) { - local->xattr_req = dict_new(); - if (!local->xattr_req) { - gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, ENOMEM, - "insufficient memory"); - local->op_errno = ENOMEM; - local->op_ret = -1; - goto out; - } - } - - ret = dict_set_uint32(local->xattr_req, - GF_PROTECT_FROM_EXTERNAL_WRITES, 1); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_DICT_SET_FAILED, 0, - "Failed to set key %s in dictionary", - GF_PROTECT_FROM_EXTERNAL_WRITES); + gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, ENOMEM, + "insufficient memory"); local->op_errno = ENOMEM; local->op_ret = -1; goto out; } } + ret = dict_set_uint32(local->xattr_req, GF_PROTECT_FROM_EXTERNAL_WRITES, + 1); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_DICT_SET_FAILED, 0, + "Failed to set key %s in dictionary", + GF_PROTECT_FROM_EXTERNAL_WRITES); + local->op_errno = ENOMEM; + local->op_ret = -1; + goto out; + } + dht_iatt_merge(this, &local->stbuf, postbuf); dht_iatt_merge(this, &local->prebuf, prebuf); diff --git a/xlators/cluster/dht/src/dht-mem-types.h b/xlators/cluster/dht/src/dht-mem-types.h index 33f9832395b..e3c4471334a 100644 --- a/xlators/cluster/dht/src/dht-mem-types.h +++ b/xlators/cluster/dht/src/dht-mem-types.h @@ -30,10 +30,7 @@ enum gf_dht_mem_types_ { gf_dht_mt_container_t, gf_dht_mt_octx_t, gf_dht_mt_miginfo_t, - gf_tier_mt_bricklist_t, - gf_tier_mt_ipc_ctr_params_t, gf_dht_mt_fd_ctx_t, - gf_tier_mt_qfile_array_t, gf_dht_ret_cache_t, gf_dht_nodeuuids_t, gf_dht_mt_end diff --git a/xlators/cluster/dht/src/dht-messages.h b/xlators/cluster/dht/src/dht-messages.h index 026879e14af..601f8dad78b 100644 --- a/xlators/cluster/dht/src/dht-messages.h +++ b/xlators/cluster/dht/src/dht-messages.h @@ -38,12 +38,11 @@ GLFS_MSGID( DHT_MSG_REBALANCE_STATUS, DHT_MSG_REBALANCE_STOPPED, DHT_MSG_RENAME_FAILED, DHT_MSG_SETATTR_FAILED, DHT_MSG_SUBVOL_INSUFF_INODES, DHT_MSG_SUBVOL_INSUFF_SPACE, DHT_MSG_UNLINK_FAILED, - DHT_MSG_LAYOUT_SET_FAILED, DHT_MSG_LOG_FIXED_LAYOUT, DHT_MSG_LOG_TIER_ERROR, - DHT_MSG_LOG_TIER_STATUS, DHT_MSG_GET_XATTR_FAILED, - DHT_MSG_FILE_LOOKUP_FAILED, DHT_MSG_OPEN_FD_FAILED, - DHT_MSG_SET_INODE_CTX_FAILED, DHT_MSG_UNLOCKING_FAILED, - DHT_MSG_DISK_LAYOUT_NULL, DHT_MSG_SUBVOL_INFO, DHT_MSG_CHUNK_SIZE_INFO, - DHT_MSG_LAYOUT_FORM_FAILED, DHT_MSG_SUBVOL_ERROR, + DHT_MSG_LAYOUT_SET_FAILED, DHT_MSG_LOG_FIXED_LAYOUT, + DHT_MSG_GET_XATTR_FAILED, DHT_MSG_FILE_LOOKUP_FAILED, + DHT_MSG_OPEN_FD_FAILED, DHT_MSG_SET_INODE_CTX_FAILED, + DHT_MSG_UNLOCKING_FAILED, DHT_MSG_DISK_LAYOUT_NULL, DHT_MSG_SUBVOL_INFO, + DHT_MSG_CHUNK_SIZE_INFO, DHT_MSG_LAYOUT_FORM_FAILED, DHT_MSG_SUBVOL_ERROR, DHT_MSG_LAYOUT_SORT_FAILED, DHT_MSG_REGEX_INFO, DHT_MSG_FOPEN_FAILED, DHT_MSG_SET_HOSTNAME_FAILED, DHT_MSG_BRICK_ERROR, DHT_MSG_SYNCOP_FAILED, DHT_MSG_MIGRATE_INFO, DHT_MSG_SOCKET_ERROR, DHT_MSG_CREATE_FD_FAILED, @@ -69,8 +68,7 @@ GLFS_MSGID( DHT_MSG_INIT_LOCAL_SUBVOL_FAILED, DHT_MSG_SYS_CALL_GET_TIME_FAILED, DHT_MSG_NO_DISK_USAGE_STATUS, DHT_MSG_SUBVOL_DOWN_ERROR, DHT_MSG_REBAL_THROTTLE_INFO, DHT_MSG_COMMIT_HASH_INFO, - DHT_MSG_REBAL_STRUCT_SET, DHT_MSG_HAS_MIGINFO, DHT_MSG_LOG_IPC_TIER_ERROR, - DHT_MSG_TIER_PAUSED, DHT_MSG_TIER_RESUME, DHT_MSG_SETTLE_HASH_FAILED, + DHT_MSG_REBAL_STRUCT_SET, DHT_MSG_HAS_MIGINFO, DHT_MSG_SETTLE_HASH_FAILED, DHT_MSG_DEFRAG_PROCESS_DIR_FAILED, DHT_MSG_FD_CTX_SET_FAILED, DHT_MSG_STALE_LOOKUP, DHT_MSG_PARENT_LAYOUT_CHANGED, DHT_MSG_LOCK_MIGRATION_FAILED, DHT_MSG_LOCK_INODE_UNREF_FAILED, @@ -96,15 +94,13 @@ GLFS_MSGID( DHT_MSG_UNLOCK_FILE_FAILED, DHT_MSG_REMOVE_XATTR_FAILED, DHT_MSG_DATA_MIGRATE_ABORT, DHT_MSG_DEFRAG_NULL, DHT_MSG_PARENT_NULL, DHT_MSG_GFID_NOT_PRESENT, DHT_MSG_CHILD_LOC_FAILED, - DHT_MSG_SET_LOOKUP_FAILED, DHT_MSG_DIR_REMOVED, - DHT_MSG_TIER_FIX_LAYOUT_STARTED, DHT_MSG_FIX_NOT_COMP, - DHT_MSG_REMOVE_TIER_FAILED, DHT_MSG_SUBVOL_DETER_FAILED, - DHT_MSG_LOCAL_SUBVOL, DHT_MSG_NODE_UUID, DHT_MSG_SIZE_FILE, - DHT_MSG_GET_DATA_SIZE_FAILED, DHT_MSG_PTHREAD_JOIN_FAILED, - DHT_MSG_COUNTER_THREAD_CREATE_FAILED, DHT_MSG_MIGRATION_INIT_QUEUE_FAILED, - DHT_MSG_PAUSED_TIMEOUT, DHT_MSG_WOKE, DHT_MSG_ABORT_REBALANCE, - DHT_MSG_CREATE_TASK_REBAL_FAILED, DHT_MSG_REBAL_ESTIMATE_NOT_AVAIL, - DHT_MSG_MIG_TIER_PAUSED, DHT_MSG_ADD_CHOICES_ERROR, + DHT_MSG_SET_LOOKUP_FAILED, DHT_MSG_DIR_REMOVED, DHT_MSG_FIX_NOT_COMP, + DHT_MSG_SUBVOL_DETER_FAILED, DHT_MSG_LOCAL_SUBVOL, DHT_MSG_NODE_UUID, + DHT_MSG_SIZE_FILE, DHT_MSG_GET_DATA_SIZE_FAILED, + DHT_MSG_PTHREAD_JOIN_FAILED, DHT_MSG_COUNTER_THREAD_CREATE_FAILED, + DHT_MSG_MIGRATION_INIT_QUEUE_FAILED, DHT_MSG_PAUSED_TIMEOUT, DHT_MSG_WOKE, + DHT_MSG_ABORT_REBALANCE, DHT_MSG_CREATE_TASK_REBAL_FAILED, + DHT_MSG_REBAL_ESTIMATE_NOT_AVAIL, DHT_MSG_ADD_CHOICES_ERROR, DHT_MSG_GET_CHOICES_ERROR, DHT_MSG_PREPARE_STATUS_ERROR, DHT_MSG_SET_CHOICE_FAILED, DHT_MSG_SET_HASHED_SUBVOL_FAILED, DHT_MSG_XATTR_HEAL_NOT_POSS, DHT_MSG_LINKTO_FILE_FAILED, @@ -180,7 +176,6 @@ GLFS_MSGID( "adding bricks" #define DHT_MSG_NEW_TARGET_FOUND_STR "New target found for file" #define DHT_MSG_INSUFF_MEMORY_STR "insufficient memory" -#define DHT_MSG_MIG_TIER_PAUSED_STR "Migrate file paused" #define DHT_MSG_SET_XATTR_FAILED_STR "failed to set xattr" #define DHT_MSG_SET_MODE_FAILED_STR "failed to set mode" #define DHT_MSG_FILE_EXISTS_IN_DEST_STR "file exists in destination" @@ -222,17 +217,14 @@ GLFS_MSGID( #define DHT_MSG_GFID_NOT_PRESENT_STR "gfid not present" #define DHT_MSG_CHILD_LOC_FAILED_STR "Child loc build failed" #define DHT_MSG_SET_LOOKUP_FAILED_STR "Failed to set lookup" -#define DHT_MSG_LOG_TIER_STATUS_STR "lookup to cold tier on attach heal failed" #define DHT_MSG_DIR_LOOKUP_FAILED_STR "lookup failed" #define DHT_MSG_DIR_REMOVED_STR "Dir renamed or removed. Skipping" #define DHT_MSG_READDIR_ERROR_STR "readdir failed, Aborting fix-layout" #define DHT_MSG_SETTLE_HASH_FAILED_STR "Settle hash failed" #define DHT_MSG_DEFRAG_PROCESS_DIR_FAILED_STR "gf_defrag_process_dir failed" -#define DHT_MSG_TIER_FIX_LAYOUT_STARTED_STR "Tiering fix layout started" #define DHT_MSG_FIX_NOT_COMP_STR \ "Unable to retrieve fixlayout xattr. Assume background fix layout not " \ "complete" -#define DHT_MSG_REMOVE_TIER_FAILED_STR "Failed removing tier fix layout xattr" #define DHT_MSG_SUBVOL_DETER_FAILED_STR \ "local subvolume determination failed with error" #define DHT_MSG_LOCAL_SUBVOL_STR "local subvol" @@ -248,8 +240,6 @@ GLFS_MSGID( #define DHT_MSG_MIGRATION_INIT_QUEUE_FAILED_STR \ "Failed to initialise migration queue" #define DHT_MSG_REBALANCE_STOPPED_STR "Received stop command on rebalance" -#define DHT_MSG_TIER_RESUME_STR "Pause end. Resume tiering" -#define DHT_MSG_TIER_PAUSED_STR "Pause tiering" #define DHT_MSG_PAUSED_TIMEOUT_STR "Request pause timer timeout" #define DHT_MSG_WOKE_STR "woken" #define DHT_MSG_ABORT_REBALANCE_STR "Aborting rebalance" diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index abe2afae5dc..8ba8082bd86 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -16,8 +16,8 @@ #include "glusterfs/compat-errno.h" // for ENODATA on BSD #define GF_DISK_SECTOR_SIZE 512 -#define DHT_REBALANCE_PID 4242 /* Change it if required */ -#define DHT_REBALANCE_BLKSIZE (1024 * 1024) /* 1 MB */ +#define DHT_REBALANCE_PID 4242 /* Change it if required */ +#define DHT_REBALANCE_BLKSIZE 1048576 /* 1 MB */ #define MAX_MIGRATE_QUEUE_COUNT 500 #define MIN_MIGRATE_QUEUE_COUNT 200 #define MAX_REBAL_TYPE_SIZE 16 @@ -45,7 +45,10 @@ gf_defrag_free_dir_dfmeta(struct dir_dfmeta *meta, int local_subvols_cnt) if (meta) { for (i = 0; i < local_subvols_cnt; i++) { - gf_dirent_free(&meta->equeue[i]); + if (meta->equeue) + gf_dirent_free(&meta->equeue[i]); + if (meta->lfd && meta->lfd[i]) + fd_unref(meta->lfd[i]); } GF_FREE(meta->equeue); @@ -53,6 +56,7 @@ gf_defrag_free_dir_dfmeta(struct dir_dfmeta *meta, int local_subvols_cnt) GF_FREE(meta->iterator); GF_FREE(meta->offset_var); GF_FREE(meta->fetch_entries); + GF_FREE(meta->lfd); GF_FREE(meta); } } @@ -84,26 +88,6 @@ dht_set_global_defrag_error(gf_defrag_info_t *defrag, int ret) return; } -static gf_boolean_t -dht_is_tier_command(int cmd) -{ - gf_boolean_t is_tier = _gf_false; - - switch (cmd) { - case GF_DEFRAG_CMD_START_TIER: - case GF_DEFRAG_CMD_STATUS_TIER: - case GF_DEFRAG_CMD_START_DETACH_TIER: - case GF_DEFRAG_CMD_STOP_DETACH_TIER: - case GF_DEFRAG_CMD_PAUSE_TIER: - case GF_DEFRAG_CMD_RESUME_TIER: - is_tier = _gf_true; - break; - default: - break; - } - return is_tier; -} - static int dht_send_rebalance_event(xlator_t *this, int cmd, gf_defrag_status_t status) { @@ -112,8 +96,6 @@ dht_send_rebalance_event(xlator_t *this, int cmd, gf_defrag_status_t status) char *tmpstr = NULL; char *ptr = NULL; char *suffix = "-dht"; - dht_conf_t *conf = NULL; - gf_defrag_info_t *defrag = NULL; int len = 0; eventtypes_t event = EVENT_LAST; @@ -132,21 +114,14 @@ dht_send_rebalance_event(xlator_t *this, int cmd, gf_defrag_status_t status) break; } - if (dht_is_tier_command(cmd)) { - /* We should have the tier volume name*/ - conf = this->private; - defrag = conf->defrag; - volname = defrag->tier_conf.volname; - } else { - /* DHT volume */ - len = strlen(this->name) - strlen(suffix); - tmpstr = gf_strdup(this->name); - if (tmpstr) { - ptr = tmpstr + len; - if (!strcmp(ptr, suffix)) { - tmpstr[len] = '\0'; - volname = tmpstr; - } + /* DHT volume */ + len = strlen(this->name) - strlen(suffix); + tmpstr = gf_strdup(this->name); + if (tmpstr) { + ptr = tmpstr + len; + if (!strcmp(ptr, suffix)) { + tmpstr[len] = '\0'; + volname = tmpstr; } } @@ -172,75 +147,6 @@ dht_strip_out_acls(dict_t *dict) } } -static int -dht_write_with_holes(xlator_t *to, fd_t *fd, struct iovec *vec, int count, - int32_t size, off_t offset, struct iobref *iobref, - int *fop_errno) -{ - int i = 0; - int ret = -1; - int start_idx = 0; - int tmp_offset = 0; - int write_needed = 0; - int buf_len = 0; - int size_pending = 0; - char *buf = NULL; - - /* loop through each vector */ - for (i = 0; i < count; i++) { - buf = vec[i].iov_base; - buf_len = vec[i].iov_len; - - for (start_idx = 0; (start_idx + GF_DISK_SECTOR_SIZE) <= buf_len; - start_idx += GF_DISK_SECTOR_SIZE) { - if (mem_0filled(buf + start_idx, GF_DISK_SECTOR_SIZE) != 0) { - write_needed = 1; - continue; - } - - if (write_needed) { - ret = syncop_write( - to, fd, (buf + tmp_offset), (start_idx - tmp_offset), - (offset + tmp_offset), iobref, 0, NULL, NULL); - /* 'path' will be logged in calling function */ - if (ret < 0) { - gf_log(THIS->name, GF_LOG_WARNING, "failed to write (%s)", - strerror(-ret)); - *fop_errno = -ret; - ret = -1; - goto out; - } - - write_needed = 0; - } - tmp_offset = start_idx + GF_DISK_SECTOR_SIZE; - } - - if ((start_idx < buf_len) || write_needed) { - /* This means, last chunk is not yet written.. write it */ - ret = syncop_write(to, fd, (buf + tmp_offset), - (buf_len - tmp_offset), (offset + tmp_offset), - iobref, 0, NULL, NULL); - if (ret < 0) { - /* 'path' will be logged in calling function */ - gf_log(THIS->name, GF_LOG_WARNING, "failed to write (%s)", - strerror(-ret)); - *fop_errno = -ret; - ret = -1; - goto out; - } - } - - size_pending = (size - buf_len); - if (!size_pending) - break; - } - - ret = size; -out: - return ret; -} - /* return values: -1 : failure @@ -648,7 +554,7 @@ out: static int __dht_rebalance_create_dst_file(xlator_t *this, xlator_t *to, xlator_t *from, loc_t *loc, struct iatt *stbuf, fd_t **dst_fd, - int *fop_errno) + int *fop_errno, int file_has_holes) { int ret = -1; int ret2 = -1; @@ -703,26 +609,23 @@ __dht_rebalance_create_dst_file(xlator_t *this, xlator_t *to, xlator_t *from, goto out; } - if (!!dht_is_tier_xlator(this)) { - xdata = dict_new(); - if (!xdata) { - *fop_errno = ENOMEM; - ret = -1; - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, - DHT_MSG_MIGRATE_FILE_FAILED, "%s: dict_new failed)", - loc->path); - goto out; - } + xdata = dict_new(); + if (!xdata) { + *fop_errno = ENOMEM; + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MIGRATE_FILE_FAILED, + "%s: dict_new failed)", loc->path); + goto out; + } - ret = dict_set_int32(xdata, GF_CLEAN_WRITE_PROTECTION, 1); - if (ret) { - *fop_errno = ENOMEM; - ret = -1; - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, - "%s: failed to set dictionary value: key = %s ", loc->path, - GF_CLEAN_WRITE_PROTECTION); - goto out; - } + ret = dict_set_int32_sizen(xdata, GF_CLEAN_WRITE_PROTECTION, 1); + if (ret) { + *fop_errno = ENOMEM; + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, + "%s: failed to set dictionary value: key = %s ", loc->path, + GF_CLEAN_WRITE_PROTECTION); + goto out; } ret = syncop_lookup(to, loc, &new_stbuf, NULL, xdata, NULL); @@ -817,7 +720,7 @@ __dht_rebalance_create_dst_file(xlator_t *this, xlator_t *to, xlator_t *from, /* No need to bother about 0 byte size files */ if (stbuf->ia_size > 0) { - if (conf->use_fallocate) { + if (conf->use_fallocate && !file_has_holes) { ret = syncop_fallocate(to, fd, 0, 0, stbuf->ia_size, NULL, NULL); if (ret < 0) { if (ret == -EOPNOTSUPP || ret == -EINVAL || ret == -ENOSYS) { @@ -844,9 +747,7 @@ __dht_rebalance_create_dst_file(xlator_t *this, xlator_t *to, xlator_t *from, goto out; } } - } - - if (!conf->use_fallocate) { + } else { ret = syncop_ftruncate(to, fd, stbuf->ia_size, NULL, NULL, NULL, NULL); if (ret < 0) { @@ -1097,22 +998,90 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag, int ret = 0; int count = 0; off_t offset = 0; + off_t data_offset = 0; + off_t hole_offset = 0; struct iovec *vector = NULL; struct iobref *iobref = NULL; uint64_t total = 0; size_t read_size = 0; + size_t data_block_size = 0; dict_t *xdata = NULL; dht_conf_t *conf = NULL; conf = this->private; + /* if file size is '0', no need to enter this loop */ while (total < ia_size) { - read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE) - ? DHT_REBALANCE_BLKSIZE - : (ia_size - total)); + /* This is a regular file - read it sequentially */ + if (!hole_exists) { + read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE) + ? DHT_REBALANCE_BLKSIZE + : (ia_size - total)); + } else { + /* This is a sparse file - read only the data segments in the file + */ + + /* If the previous data block is fully copied, find the next data + * segment + * starting at the offset of the last read and written byte, */ + if (data_block_size <= 0) { + ret = syncop_seek(from, src, offset, GF_SEEK_DATA, NULL, + &data_offset); + if (ret) { + if (ret == -ENXIO) + ret = 0; /* No more data segments */ + else + *fop_errno = -ret; /* Error occurred */ + + break; + } + + /* If the position of the current data segment is greater than + * the position of the next hole, find the next hole in order to + * calculate the length of the new data segment */ + if (data_offset > hole_offset) { + /* Starting at the offset of the last data segment, find the + * next hole */ + ret = syncop_seek(from, src, data_offset, GF_SEEK_HOLE, + NULL, &hole_offset); + if (ret) { + /* If an error occurred here it's a real error because + * if the seek for a data segment was successful then + * necessarily another hole must exist (EOF is a hole) + */ + *fop_errno = -ret; + break; + } + + /* Calculate the total size of the current data block */ + data_block_size = hole_offset - data_offset; + } + } else { + /* There is still data in the current segment, move the + * data_offset to the position of the last written byte */ + data_offset = offset; + } + + /* Calculate how much data needs to be read and written. If the data + * segment's length is bigger than DHT_REBALANCE_BLKSIZE, read and + * write DHT_REBALANCE_BLKSIZE data length and the rest in the + * next iteration(s) */ + read_size = ((data_block_size > DHT_REBALANCE_BLKSIZE) + ? DHT_REBALANCE_BLKSIZE + : data_block_size); + + /* Calculate the remaining size of the data block - maybe there's no + * need to seek for data in the next iteration */ + data_block_size -= read_size; + + /* Set offset to the offset of the data segment so read and write + * will have the correct position */ + offset = data_offset; + } ret = syncop_readv(from, src, read_size, offset, 0, &vector, &count, &iobref, NULL, NULL, NULL); + if (!ret || (ret < 0)) { if (!ret) { /* File was probably truncated*/ @@ -1124,57 +1093,42 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag, break; } - if (hole_exists) { - ret = dht_write_with_holes(to, dst, vector, count, ret, offset, - iobref, fop_errno); - } else { - if (!conf->force_migration && !dht_is_tier_xlator(this)) { + if (!conf->force_migration) { + if (!xdata) { + xdata = dict_new(); if (!xdata) { - xdata = dict_new(); - if (!xdata) { - gf_msg("dht", GF_LOG_ERROR, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "insufficient memory"); - ret = -1; - *fop_errno = ENOMEM; - break; - } + gf_msg("dht", GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "insufficient memory"); + ret = -1; + *fop_errno = ENOMEM; + break; + } - /* Fail this write and abort rebalance if we - * detect a write from client since migration of - * this file started. This is done to avoid - * potential data corruption due to out of order - * writes from rebalance and client to the same - * region (as compared between src and dst - * files). See - * https://github.com/gluster/glusterfs/issues/308 - * for more details. - */ - ret = dict_set_int32(xdata, GF_AVOID_OVERWRITE, 1); - if (ret) { - gf_msg("dht", GF_LOG_ERROR, 0, ENOMEM, - "failed to set dict"); - ret = -1; - *fop_errno = ENOMEM; - break; - } + /* Fail this write and abort rebalance if we + * detect a write from client since migration of + * this file started. This is done to avoid + * potential data corruption due to out of order + * writes from rebalance and client to the same + * region (as compared between src and dst + * files). See + * https://github.com/gluster/glusterfs/issues/308 + * for more details. + */ + ret = dict_set_int32_sizen(xdata, GF_AVOID_OVERWRITE, 1); + if (ret) { + gf_msg("dht", GF_LOG_ERROR, 0, ENOMEM, + "failed to set dict"); + ret = -1; + *fop_errno = ENOMEM; + break; } } - ret = syncop_writev(to, dst, vector, count, offset, iobref, 0, NULL, - NULL, xdata, NULL); - if (ret < 0) { - *fop_errno = -ret; - } - } - - if ((defrag && defrag->cmd == GF_DEFRAG_CMD_START_TIER) && - (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING)) { - gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_TIER_PAUSED, - "Migrate file paused"); - ret = -1; } + ret = syncop_writev(to, dst, vector, count, offset, iobref, 0, NULL, + NULL, xdata, NULL); if (ret < 0) { + *fop_errno = -ret; break; } @@ -1568,6 +1522,7 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, xlator_t *old_target = NULL; xlator_t *hashed_subvol = NULL; fd_t *linkto_fd = NULL; + dict_t *xdata = NULL; if (from == to) { gf_msg_debug(this->name, 0, @@ -1578,21 +1533,6 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, goto out; } - /* If defrag is NULL, it should be assumed that migration is triggered - * from client using the trusted.distribute.migrate-data virtual xattr - */ - defrag = conf->defrag; - - /* migration of files from clients is restricted to non-tiered clients - * for now */ - if (!defrag && dht_is_tier_xlator(this)) { - ret = ENOTSUP; - goto out; - } - - if (defrag && defrag->tier_conf.is_tier) - log_level = GF_LOG_TRACE; - gf_log(this->name, log_level, "%s: attempting to move from %s to %s", loc->path, from->name, to->name); @@ -1739,9 +1679,13 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, goto out; } + /* Try to preserve 'holes' while migrating data */ + if (stbuf.ia_size > (stbuf.ia_blocks * GF_DISK_SECTOR_SIZE)) + file_has_holes = 1; + /* create the destination, with required modes/xattr */ ret = __dht_rebalance_create_dst_file(this, to, from, loc, &stbuf, &dst_fd, - fop_errno); + fop_errno, file_has_holes); if (ret) { gf_msg(this->name, GF_LOG_ERROR, 0, 0, "Create dst failed" @@ -1785,8 +1729,8 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, * destination. We need to do update this only post migration * as in case of failure the linkto needs to point to the source * subvol */ - ret = __dht_rebalance_create_dst_file(this, to, from, loc, &stbuf, - &dst_fd, fop_errno); + ret = __dht_rebalance_create_dst_file( + this, to, from, loc, &stbuf, &dst_fd, fop_errno, file_has_holes); if (ret) { gf_log(this->name, GF_LOG_ERROR, "Create dst failed" @@ -1873,9 +1817,6 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, ret = 0; goto out; } - /* Try to preserve 'holes' while migrating data */ - if (stbuf.ia_size > (stbuf.ia_blocks * GF_DISK_SECTOR_SIZE)) - file_has_holes = 1; ret = __dht_rebalance_migrate_data(this, defrag, from, to, src_fd, dst_fd, stbuf.ia_size, file_has_holes, @@ -1890,7 +1831,15 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, /* TODO: Sync the locks */ - ret = syncop_fsync(to, dst_fd, 0, NULL, NULL, NULL, NULL); + xdata = dict_new(); + if (!xdata || dict_set_int8(xdata, "last-fsync", 1)) { + gf_log(this->name, GF_LOG_ERROR, + "%s: failed to set last-fsync flag on " + "%s (%s)", + loc->path, to->name, strerror(ENOMEM)); + } + + ret = syncop_fsync(to, dst_fd, 0, NULL, NULL, xdata, NULL); if (ret) { gf_log(this->name, GF_LOG_WARNING, "%s: failed to fsync on %s (%s)", loc->path, to->name, strerror(-ret)); @@ -2333,14 +2282,12 @@ out: } } - if (!dht_is_tier_xlator(this)) { - lk_ret = syncop_removexattr(to, loc, GF_PROTECT_FROM_EXTERNAL_WRITES, - NULL, NULL); - if (lk_ret && (lk_ret != -ENODATA) && (lk_ret != -ENOATTR)) { - gf_msg(this->name, GF_LOG_WARNING, -lk_ret, 0, - "%s: removexattr failed key %s", loc->path, - GF_PROTECT_FROM_EXTERNAL_WRITES); - } + lk_ret = syncop_removexattr(to, loc, GF_PROTECT_FROM_EXTERNAL_WRITES, NULL, + NULL); + if (lk_ret && (lk_ret != -ENODATA) && (lk_ret != -ENOATTR)) { + gf_msg(this->name, GF_LOG_WARNING, -lk_ret, 0, + "%s: removexattr failed key %s", loc->path, + GF_PROTECT_FROM_EXTERNAL_WRITES); } if (dict) @@ -2353,11 +2300,15 @@ out: if (dst_fd) syncop_close(dst_fd); + if (src_fd) syncop_close(src_fd); if (linkto_fd) syncop_close(linkto_fd); + if (xdata) + dict_unref(xdata); + loc_wipe(&tmp_loc); loc_wipe(&parent_loc); @@ -2587,10 +2538,10 @@ out: * all hardlinks. */ -int +gf_boolean_t gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid) { - int ret = 0; + gf_boolean_t ret = _gf_false; int i = local_subvol_index; char *str = NULL; uint32_t hashval = 0; @@ -2612,12 +2563,11 @@ gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid) } str = uuid_utoa_r(gfid, buf); - ret = dht_hash_compute(this, 0, str, &hashval); - if (ret == 0) { + if (dht_hash_compute(this, 0, str, &hashval) == 0) { index = (hashval % entry->count); if (entry->elements[index].info == REBAL_NODEUUID_MINE) { /* Index matches this node's nodeuuid.*/ - ret = 1; + ret = _gf_true; goto out; } @@ -2630,12 +2580,12 @@ gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid) /* None of the bricks in the subvol are up. * CHILD_DOWN will kill the process soon */ - return 0; + return _gf_false; } if (entry->elements[index].info == REBAL_NODEUUID_MINE) { /* Index matches this node's nodeuuid.*/ - ret = 1; + ret = _gf_true; goto out; } } @@ -2684,6 +2634,7 @@ gf_defrag_migrate_single_file(void *opaque) struct iatt *iatt_ptr = NULL; gf_boolean_t update_skippedcount = _gf_true; int i = 0; + gf_boolean_t should_i_migrate = 0; rebal_entry = (struct dht_container *)opaque; if (!rebal_entry) { @@ -2738,11 +2689,29 @@ gf_defrag_migrate_single_file(void *opaque) goto out; } + should_i_migrate = gf_defrag_should_i_migrate( + this, rebal_entry->local_subvol_index, entry->d_stat.ia_gfid); + gf_uuid_copy(entry_loc.gfid, entry->d_stat.ia_gfid); gf_uuid_copy(entry_loc.pargfid, loc->gfid); ret = syncop_lookup(this, &entry_loc, &iatt, NULL, NULL, NULL); + + if (!should_i_migrate) { + /* this node isn't supposed to migrate the file. suppressing any + * potential error from lookup as this file is under migration by + * another node */ + if (ret) { + gf_msg_debug(this->name, -ret, + "Ignoring lookup failure: node isn't migrating %s", + entry_loc.path); + ret = 0; + } + gf_msg_debug(this->name, 0, "Don't migrate %s ", entry_loc.path); + goto out; + } + if (ret) { gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED, "Migrate file failed: %s lookup failed", entry_loc.path); @@ -2763,12 +2732,6 @@ gf_defrag_migrate_single_file(void *opaque) goto out; } - if (!gf_defrag_should_i_migrate(this, rebal_entry->local_subvol_index, - entry->d_stat.ia_gfid)) { - gf_msg_debug(this->name, 0, "Don't migrate %s ", entry_loc.path); - goto out; - } - iatt_ptr = &iatt; hashed_subvol = dht_subvol_get_hashed(this, &entry_loc); @@ -2911,8 +2874,7 @@ gf_defrag_migrate_single_file(void *opaque) if (defrag->stats == _gf_true) { gettimeofday(&end, NULL); - elapsed = (end.tv_sec - start.tv_sec) * 1e6 + - (end.tv_usec - start.tv_usec); + elapsed = gf_tvdiff(&start, &end); gf_log(this->name, GF_LOG_INFO, "Migration of " "file:%s size:%" PRIu64 @@ -3091,9 +3053,9 @@ int static gf_defrag_get_entry(xlator_t *this, int i, dht_conf_t *conf, gf_defrag_info_t *defrag, fd_t *fd, dict_t *migrate_data, struct dir_dfmeta *dir_dfmeta, dict_t *xattr_req, - int *should_commit_hash, int *perrno) + int *perrno) { - int ret = -1; + int ret = 0; char is_linkfile = 0; gf_dirent_t *df_entry = NULL; struct dht_container *tmp_container = NULL; @@ -3109,6 +3071,13 @@ int static gf_defrag_get_entry(xlator_t *this, int i, } if (dir_dfmeta->fetch_entries[i] == 1) { + if (!fd) { + dir_dfmeta->fetch_entries[i] = 0; + dir_dfmeta->offset_var[i].readdir_done = 1; + ret = 0; + goto out; + } + ret = syncop_readdirp(conf->local_subvols[i], fd, 131072, dir_dfmeta->offset_var[i].offset, &(dir_dfmeta->equeue[i]), xattr_req, NULL); @@ -3268,7 +3237,6 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, dict_t *migrate_data, int *perrno) { int ret = -1; - fd_t *fd = NULL; dht_conf_t *conf = NULL; gf_dirent_t entries; dict_t *xattr_req = NULL; @@ -3289,7 +3257,7 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, int dfc_index = 0; int throttle_up = 0; struct dir_dfmeta *dir_dfmeta = NULL; - int should_commit_hash = 1; + xlator_t *old_THIS = NULL; gf_log(this->name, GF_LOG_INFO, "migrate data called on %s", loc->path); gettimeofday(&dir_start, NULL); @@ -3302,28 +3270,53 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, goto out; } - fd = fd_create(loc->inode, defrag->pid); - if (!fd) { - gf_log(this->name, GF_LOG_ERROR, "Failed to create fd"); + old_THIS = THIS; + THIS = this; + + dir_dfmeta = GF_CALLOC(1, sizeof(*dir_dfmeta), gf_common_mt_pointer); + if (!dir_dfmeta) { + gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta is NULL"); ret = -1; goto out; } - ret = syncop_opendir(this, loc, fd, NULL, NULL); - if (ret) { - gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_DATA_FAILED, - "Migrate data failed: Failed to open dir %s", loc->path); - *perrno = -ret; + dir_dfmeta->lfd = GF_CALLOC(local_subvols_cnt, sizeof(fd_t *), + gf_common_mt_pointer); + if (!dir_dfmeta->lfd) { + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_INSUFF_MEMORY, + "for dir_dfmeta", NULL); ret = -1; + *perrno = ENOMEM; goto out; } - fd_bind(fd); - dir_dfmeta = GF_CALLOC(1, sizeof(*dir_dfmeta), gf_common_mt_pointer); - if (!dir_dfmeta) { - gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta is NULL"); - ret = -1; - goto out; + for (i = 0; i < local_subvols_cnt; i++) { + dir_dfmeta->lfd[i] = fd_create(loc->inode, defrag->pid); + if (!dir_dfmeta->lfd[i]) { + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_FD_CREATE_FAILED, + NULL); + *perrno = ENOMEM; + ret = -1; + goto out; + } + + ret = syncop_opendir(conf->local_subvols[i], loc, dir_dfmeta->lfd[i], + NULL, NULL); + if (ret) { + fd_unref(dir_dfmeta->lfd[i]); + dir_dfmeta->lfd[i] = NULL; + gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_FAILED_TO_OPEN, + "dir: %s", loc->path, "subvol: %s", + conf->local_subvols[i]->name, NULL); + + if (conf->decommission_in_progress) { + *perrno = -ret; + ret = -1; + goto out; + } + } else { + fd_bind(dir_dfmeta->lfd[i]); + } } dir_dfmeta->head = GF_CALLOC(local_subvols_cnt, sizeof(*(dir_dfmeta->head)), @@ -3358,6 +3351,7 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, ret = -1; goto out; } + ret = gf_defrag_ctx_subvols_init(dir_dfmeta->offset_var, this); if (ret) { gf_log(this->name, GF_LOG_ERROR, @@ -3370,7 +3364,8 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, dir_dfmeta->fetch_entries = GF_CALLOC(local_subvols_cnt, sizeof(int), gf_common_mt_int); if (!dir_dfmeta->fetch_entries) { - gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta->fetch_entries is NULL"); + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_INSUFF_MEMORY, + "for dir_dfmeta->fetch_entries", NULL); ret = -1; goto out; } @@ -3440,8 +3435,9 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, ldfq_count <= MAX_MIGRATE_QUEUE_COUNT && !dht_dfreaddirp_done(dir_dfmeta->offset_var, local_subvols_cnt)) { ret = gf_defrag_get_entry(this, dfc_index, &container, loc, conf, - defrag, fd, migrate_data, dir_dfmeta, - xattr_req, &should_commit_hash, perrno); + defrag, dir_dfmeta->lfd[dfc_index], + migrate_data, dir_dfmeta, xattr_req, + perrno); if (defrag->defrag_status == GF_DEFRAG_STATUS_STOPPED) { goto out; @@ -3485,27 +3481,19 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, } gettimeofday(&end, NULL); - elapsed = (end.tv_sec - dir_start.tv_sec) * 1e6 + - (end.tv_usec - dir_start.tv_usec); + elapsed = gf_tvdiff(&dir_start, &end); gf_log(this->name, GF_LOG_INFO, "Migration operation on dir %s took " "%.2f secs", loc->path, elapsed / 1e6); ret = 0; out: - + THIS = old_THIS; gf_defrag_free_dir_dfmeta(dir_dfmeta, local_subvols_cnt); if (xattr_req) dict_unref(xattr_req); - if (fd) - fd_unref(fd); - - if (ret == 0 && should_commit_hash == 0) { - ret = 2; - } - /* It does not matter if it errored out - this number is * used to calculate rebalance estimated time to complete. * No locking required as dirs are processed by a single thread. @@ -3513,6 +3501,7 @@ out: defrag->num_dirs_processed++; return ret; } + int gf_defrag_settle_hash(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, dict_t *fix_layout) @@ -3527,7 +3516,6 @@ gf_defrag_settle_hash(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, * rebalance is complete. */ if (defrag->cmd == GF_DEFRAG_CMD_START_LAYOUT_FIX || - defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER || defrag->cmd == GF_DEFRAG_CMD_DETACH_START) { return 0; } @@ -3573,114 +3561,6 @@ gf_defrag_settle_hash(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, return 0; } -/* Function for doing a named lookup on file inodes during an attach tier - * So that a hardlink lookup heal i.e gfid to parent gfid lookup heal - * happens on pre-existing data. This is required so that the ctr database has - * hardlinks of all the exisitng file in the volume. CTR xlator on the - * brick/server side does db update/insert of the hardlink on a namelookup. - * Currently the namedlookup is done synchronous to the fixlayout that is - * triggered by attach tier. This is not performant, adding more time to - * fixlayout. The performant approach is record the hardlinks on a compressed - * datastore and then do the namelookup asynchronously later, giving the ctr db - * eventual consistency - * */ -int -gf_fix_layout_tier_attach_lookup(xlator_t *this, loc_t *parent_loc, - gf_dirent_t *file_dentry) -{ - int ret = -1; - dict_t *lookup_xdata = NULL; - dht_conf_t *conf = NULL; - loc_t file_loc = { - 0, - }; - struct iatt iatt = { - 0, - }; - - GF_VALIDATE_OR_GOTO("tier", this, out); - - GF_VALIDATE_OR_GOTO(this->name, parent_loc, out); - - GF_VALIDATE_OR_GOTO(this->name, file_dentry, out); - - GF_VALIDATE_OR_GOTO(this->name, this->private, out); - - if (!parent_loc->inode) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "%s/%s parent is NULL", parent_loc->path, file_dentry->d_name); - goto out; - } - - conf = this->private; - - loc_wipe(&file_loc); - - if (gf_uuid_is_null(file_dentry->d_stat.ia_gfid)) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "%s/%s gfid not present", parent_loc->path, file_dentry->d_name); - goto out; - } - - gf_uuid_copy(file_loc.gfid, file_dentry->d_stat.ia_gfid); - - if (gf_uuid_is_null(parent_loc->gfid)) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "%s/%s" - " gfid not present", - parent_loc->path, file_dentry->d_name); - goto out; - } - - gf_uuid_copy(file_loc.pargfid, parent_loc->gfid); - - ret = dht_build_child_loc(this, &file_loc, parent_loc, file_dentry->d_name); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Child loc build failed"); - ret = -1; - goto out; - } - - lookup_xdata = dict_new(); - if (!lookup_xdata) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Failed creating lookup dict for %s", file_dentry->d_name); - goto out; - } - - ret = dict_set_int32(lookup_xdata, CTR_ATTACH_TIER_LOOKUP, 1); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Failed to set lookup flag"); - goto out; - } - - gf_uuid_copy(file_loc.parent->gfid, parent_loc->gfid); - - /* Sending lookup to cold tier only */ - ret = syncop_lookup(conf->subvolumes[0], &file_loc, &iatt, NULL, - lookup_xdata, NULL); - if (ret) { - /* If the file does not exist on the cold tier than it must */ - /* have been discovered on the hot tier. This is not an error. */ - gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, - "%s lookup to cold tier on attach heal failed", file_loc.path); - goto out; - } - - ret = 0; - -out: - - loc_wipe(&file_loc); - - if (lookup_xdata) - dict_unref(lookup_xdata); - - return ret; -} - int gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, dict_t *fix_layout, dict_t *migrate_data) @@ -3700,7 +3580,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, }; inode_t *linked_inode = NULL, *inode = NULL; dht_conf_t *conf = NULL; - int should_commit_hash = 1; int perrno = 0; conf = this->private; @@ -3803,16 +3682,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) continue; if (!IA_ISDIR(entry->d_stat.ia_type)) { - /* If its a fix layout during the attach - * tier operation do lookups on files - * on cold subvolume so that there is a - * CTR DB Lookup Heal triggered on existing - * data. - * */ - if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) { - gf_fix_layout_tier_attach_lookup(this, loc, entry); - } - continue; } loc_wipe(&entry_loc); @@ -3829,8 +3698,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, goto out; } else { - should_commit_hash = 0; - continue; } } @@ -3893,7 +3760,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, ret = -1; goto out; } else { - should_commit_hash = 0; continue; } } @@ -3906,11 +3772,12 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, ret = gf_defrag_fix_layout(this, defrag, &entry_loc, fix_layout, migrate_data); - if (defrag->defrag_status == GF_DEFRAG_STATUS_STOPPED) { + if (defrag->defrag_status == GF_DEFRAG_STATUS_STOPPED || + defrag->defrag_status == GF_DEFRAG_STATUS_FAILED) { goto out; } - if (ret && ret != 2) { + if (ret) { gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LAYOUT_FIX_FAILED, "Fix layout failed for %s", entry_loc.path); @@ -3941,6 +3808,17 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, */ ret = syncop_setxattr(this, loc, fix_layout, 0, NULL, NULL); + + /* In case of a race where the directory is deleted just before + * layout setxattr, the errors are updated in the layout structure. + * We can use this information to make a decision whether the directory + * is deleted entirely. + */ + if (ret == 0) { + ret = dht_dir_layout_error_check(this, loc->inode); + ret = -ret; + } + if (ret) { if (-ret == ENOENT || -ret == ESTALE) { gf_msg(this->name, GF_LOG_INFO, -ret, DHT_MSG_LAYOUT_FIX_FAILED, @@ -3966,11 +3844,10 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, } } - if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) && - (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX)) { + if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) { ret = gf_defrag_process_dir(this, defrag, loc, migrate_data, &perrno); - if (ret && (ret != 2)) { + if (ret) { if (perrno == ENOENT || perrno == ESTALE) { ret = 0; goto out; @@ -3986,18 +3863,13 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, if (conf->decommission_in_progress) { goto out; } - - should_commit_hash = 0; } - } else if (ret == 2) { - should_commit_hash = 0; } } gf_msg_trace(this->name, 0, "fix layout called on %s", loc->path); - if (should_commit_hash && - gf_defrag_settle_hash(this, defrag, loc, fix_layout) != 0) { + if (gf_defrag_settle_hash(this, defrag, loc, fix_layout) != 0) { defrag->total_failures++; gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SETTLE_HASH_FAILED, @@ -4021,245 +3893,34 @@ out: if (fd) fd_unref(fd); - if (ret == 0 && should_commit_hash == 0) { - ret = 2; - } - return ret; } -/****************************************************************************** - * Tier background Fix layout functions - ******************************************************************************/ -/* This is the background tier fixlayout thread */ -void * -gf_tier_do_fix_layout(void *args) -{ - gf_tier_fix_layout_arg_t *tier_fix_layout_arg = args; - int ret = -1; - xlator_t *this = NULL; - dht_conf_t *conf = NULL; - gf_defrag_info_t *defrag = NULL; - dict_t *dict = NULL; - loc_t loc = { - 0, - }; - struct iatt iatt = { - 0, - }; - struct iatt parent = { - 0, - }; - - GF_VALIDATE_OR_GOTO("tier", tier_fix_layout_arg, out); - GF_VALIDATE_OR_GOTO("tier", tier_fix_layout_arg->this, out); - this = tier_fix_layout_arg->this; - - conf = this->private; - GF_VALIDATE_OR_GOTO(this->name, conf, out); - - defrag = conf->defrag; - GF_VALIDATE_OR_GOTO(this->name, defrag, out); - GF_VALIDATE_OR_GOTO(this->name, defrag->root_inode, out); - - GF_VALIDATE_OR_GOTO(this->name, tier_fix_layout_arg->fix_layout, out); - - /* Get Root loc_t */ - dht_build_root_loc(defrag->root_inode, &loc); - ret = syncop_lookup(this, &loc, &iatt, &parent, NULL, NULL); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_REBALANCE_START_FAILED, - "Lookup on root failed."); - ret = -1; - goto out; - } - - /* Start the crawl */ - gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, - "Tiering Fixlayout started"); - - ret = gf_defrag_fix_layout(this, defrag, &loc, - tier_fix_layout_arg->fix_layout, NULL); - if (ret && ret != 2) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_REBALANCE_FAILED, - "Tiering fixlayout failed."); - ret = -1; - goto out; - } - - if (ret != 2 && - gf_defrag_settle_hash(this, defrag, &loc, - tier_fix_layout_arg->fix_layout) != 0) { - defrag->total_failures++; - ret = -1; - goto out; - } - - dict = dict_new(); - if (!dict) { - ret = -1; - goto out; - } - - ret = dict_set_str(dict, GF_XATTR_TIER_LAYOUT_FIXED_KEY, "yes"); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_REBALANCE_FAILED, - "Failed to set dictionary value: key = %s", - GF_XATTR_TIER_LAYOUT_FIXED_KEY); - ret = -1; - goto out; - } - - /* Marking the completion of tiering fix layout via a xattr on root */ - ret = syncop_setxattr(this, &loc, dict, 0, NULL, NULL); - if (ret) { - gf_log(this->name, GF_LOG_ERROR, - "Failed to set tiering fix " - "layout completed xattr on %s", - loc.path); - ret = -1; - goto out; - } - - ret = 0; -out: - if (ret && defrag) - defrag->total_failures++; - - if (dict) - dict_unref(dict); - - return NULL; -} - -int -gf_tier_start_fix_layout(xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag, - dict_t *fix_layout) -{ - int ret = -1; - dict_t *tier_dict = NULL; - gf_tier_fix_layout_arg_t *tier_fix_layout_arg = NULL; - - tier_dict = dict_new(); - if (!tier_dict) { - gf_log("tier", GF_LOG_ERROR, - "Tier fix layout failed :" - "Creation of tier_dict failed"); - ret = -1; - goto out; - } - - /* Check if layout is fixed already */ - ret = syncop_getxattr(this, loc, &tier_dict, GF_XATTR_TIER_LAYOUT_FIXED_KEY, - NULL, NULL); - if (ret != 0) { - tier_fix_layout_arg = &defrag->tier_conf.tier_fix_layout_arg; - - /*Fill crawl arguments */ - tier_fix_layout_arg->this = this; - tier_fix_layout_arg->fix_layout = fix_layout; - - /* Spawn the fix layout thread so that its done in the - * background */ - ret = gf_thread_create(&tier_fix_layout_arg->thread_id, NULL, - gf_tier_do_fix_layout, tier_fix_layout_arg, - "tierfixl"); - if (ret) { - gf_log("tier", GF_LOG_ERROR, - "Thread creation failed. " - "Background fix layout for tiering will not " - "work."); - defrag->total_failures++; - goto out; - } - } - ret = 0; -out: - if (tier_dict) - dict_unref(tier_dict); - - return ret; -} - -void -gf_tier_clear_fix_layout(xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag) -{ - int ret = -1; - dict_t *dict = NULL; - - GF_VALIDATE_OR_GOTO("tier", this, out); - GF_VALIDATE_OR_GOTO(this->name, loc, out); - GF_VALIDATE_OR_GOTO(this->name, defrag, out); - - /* Check if background fixlayout is completed. This is not - * multi-process safe i.e there is a possibility that by the time - * we move to remove the xattr there it might have been cleared by some - * other detach process from other node. We ignore the error if such - * a thing happens */ - ret = syncop_getxattr(this, loc, &dict, GF_XATTR_TIER_LAYOUT_FIXED_KEY, - NULL, NULL); - if (ret) { - /* Background fixlayout not complete - nothing to clear*/ - gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_LOG_TIER_STATUS, - "Unable to retrieve fixlayout xattr." - "Assume background fix layout not complete"); - goto out; - } - - ret = syncop_removexattr(this, loc, GF_XATTR_TIER_LAYOUT_FIXED_KEY, NULL, - NULL); - if (ret) { - gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_LOG_TIER_STATUS, - "Failed removing tier fix layout " - "xattr from %s", - loc->path); - goto out; - } - ret = 0; -out: - if (dict) - dict_unref(dict); -} - -void -gf_tier_wait_fix_lookup(gf_defrag_info_t *defrag) -{ - if (defrag->tier_conf.tier_fix_layout_arg.thread_id) { - pthread_join(defrag->tier_conf.tier_fix_layout_arg.thread_id, NULL); - } -} -/******************Tier background Fix layout functions END********************/ - int dht_init_local_subvols_and_nodeuuids(xlator_t *this, dht_conf_t *conf, loc_t *loc) { dict_t *dict = NULL; - gf_defrag_info_t *defrag = NULL; uuid_t *uuid_ptr = NULL; int ret = -1; int i = 0; int j = 0; - defrag = conf->defrag; - - if (defrag->cmd != GF_DEFRAG_CMD_START_TIER) { - /* Find local subvolumes */ - ret = syncop_getxattr(this, loc, &dict, GF_REBAL_FIND_LOCAL_SUBVOL, - NULL, NULL); - if (ret && (ret != -ENODATA)) { - gf_msg(this->name, GF_LOG_ERROR, -ret, 0, - "local " - "subvolume determination failed with error: %d", - -ret); - ret = -1; - goto out; - } - - if (!ret) - goto out; + /* Find local subvolumes */ + ret = syncop_getxattr(this, loc, &dict, GF_REBAL_FIND_LOCAL_SUBVOL, NULL, + NULL); + if (ret && (ret != -ENODATA)) { + gf_msg(this->name, GF_LOG_ERROR, -ret, 0, + "local " + "subvolume determination failed with error: %d", + -ret); + ret = -1; + goto out; } + if (!ret) + goto out; + ret = syncop_getxattr(this, loc, &dict, GF_REBAL_OLD_FIND_LOCAL_SUBVOL, NULL, NULL); if (ret) { @@ -4350,9 +4011,6 @@ dht_file_counter_thread(void *args) struct timespec time_to_wait = { 0, }; - struct timeval now = { - 0, - }; uint64_t tmp_size = 0; if (!args) @@ -4362,9 +4020,8 @@ dht_file_counter_thread(void *args) dht_build_root_loc(defrag->root_inode, &root_loc); while (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED) { - gettimeofday(&now, NULL); - time_to_wait.tv_sec = now.tv_sec + 600; - time_to_wait.tv_nsec = 0; + timespec_now(&time_to_wait); + time_to_wait.tv_sec += 600; pthread_mutex_lock(&defrag->fc_mutex); pthread_cond_timedwait(&defrag->fc_wakeup_cond, &defrag->fc_mutex, @@ -4437,7 +4094,7 @@ gf_defrag_estimates_init(xlator_t *this, loc_t *loc, pthread_t *filecnt_thread) goto out; } - ret = gf_thread_create(filecnt_thread, NULL, &dht_file_counter_thread, + ret = gf_thread_create(filecnt_thread, NULL, dht_file_counter_thread, (void *)defrag, "dhtfcnt"); if (ret) { @@ -4494,7 +4151,7 @@ gf_defrag_parallel_migration_init(xlator_t *this, gf_defrag_info_t *defrag, /*Spawn Threads Here*/ while (index < thread_spawn_count) { - ret = gf_thread_create(&(tid[index]), NULL, &gf_defrag_task, + ret = gf_thread_create(&(tid[index]), NULL, gf_defrag_task, (void *)defrag, "dhtmig%d", (index + 1) & 0x3ff); if (ret != 0) { gf_msg("DHT", GF_LOG_ERROR, ret, 0, "Thread[%d] creation failed. ", @@ -4568,7 +4225,6 @@ gf_defrag_start_crawl(void *data) dict_t *migrate_data = NULL; dict_t *status = NULL; glusterfs_ctx_t *ctx = NULL; - dht_methods_t *methods = NULL; call_frame_t *statfs_frame = NULL; xlator_t *old_THIS = NULL; int ret = -1; @@ -4584,7 +4240,6 @@ gf_defrag_start_crawl(void *data) int thread_index = 0; pthread_t *tid = NULL; pthread_t filecnt_thread; - gf_boolean_t is_tier_detach = _gf_false; gf_boolean_t fc_thread_started = _gf_false; this = data; @@ -4603,7 +4258,8 @@ gf_defrag_start_crawl(void *data) if (!defrag) goto exit; - gettimeofday(&defrag->start_time, NULL); + defrag->start_time = gf_time(); + dht_build_root_inode(this, &defrag->root_inode); if (!defrag->root_inode) goto out; @@ -4737,43 +4393,17 @@ gf_defrag_start_crawl(void *data) } } - if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) { - /* Fix layout for attach tier */ - ret = gf_tier_start_fix_layout(this, &loc, defrag, fix_layout); - if (ret) { - goto out; - } - - methods = &(conf->methods); - - /* Calling tier_start of tier.c */ - methods->migration_other(this, defrag); - if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER || - defrag->cmd == GF_DEFRAG_CMD_DETACH_START) { - ret = dict_set_str(migrate_data, GF_XATTR_FILE_MIGRATE_KEY, - "force"); - if (ret) - goto out; - } - } else { - ret = gf_defrag_fix_layout(this, defrag, &loc, fix_layout, - migrate_data); - if (ret && ret != 2) { - defrag->total_failures++; - ret = -1; - goto out; - } - - if (ret != 2 && - gf_defrag_settle_hash(this, defrag, &loc, fix_layout) != 0) { - defrag->total_failures++; - ret = -1; - goto out; - } + ret = gf_defrag_fix_layout(this, defrag, &loc, fix_layout, migrate_data); + if (ret) { + defrag->total_failures++; + ret = -1; + goto out; + } - if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER || - defrag->cmd == GF_DEFRAG_CMD_DETACH_START) - is_tier_detach = _gf_true; + if (gf_defrag_settle_hash(this, defrag, &loc, fix_layout) != 0) { + defrag->total_failures++; + ret = -1; + goto out; } gf_log("DHT", GF_LOG_INFO, "crawling file-system completed"); @@ -4787,19 +4417,6 @@ out: defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; } - if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) { - /* Wait for the tier fixlayout to - * complete if its was started.*/ - gf_tier_wait_fix_lookup(defrag); - } - - if (is_tier_detach && ret == 0) { - /* If it was a detach remove the tier fix-layout - * xattr on root. Ignoring the failure, as nothing has to be - * done, logging is done in gf_tier_clear_fix_layout */ - gf_tier_clear_fix_layout(this, &loc, defrag); - } - gf_defrag_parallel_migration_cleanup(defrag, tid, thread_index); if ((defrag->defrag_status != GF_DEFRAG_STATUS_STOPPED) && @@ -4898,9 +4515,6 @@ gf_defrag_get_estimates_based_on_size(dht_conf_t *conf) uint64_t total_processed = 0; uint64_t tmp_count = 0; uint64_t time_to_complete = 0; - struct timeval now = { - 0, - }; double elapsed = 0; defrag = conf->defrag; @@ -4908,8 +4522,7 @@ gf_defrag_get_estimates_based_on_size(dht_conf_t *conf) if (!g_totalsize) goto out; - gettimeofday(&now, NULL); - elapsed = now.tv_sec - defrag->start_time.tv_sec; + elapsed = gf_time() - defrag->start_time; /* Don't calculate the estimates for the first 10 minutes. * It is unlikely to be accurate and estimates are not required @@ -4959,13 +4572,8 @@ gf_defrag_status_get(dht_conf_t *conf, dict_t *dict) uint64_t lookup = 0; uint64_t failures = 0; uint64_t skipped = 0; - uint64_t promoted = 0; - uint64_t demoted = 0; char *status = ""; double elapsed = 0; - struct timeval end = { - 0, - }; uint64_t time_to_complete = 0; uint64_t time_left = 0; gf_defrag_info_t *defrag = conf->defrag; @@ -4982,17 +4590,12 @@ gf_defrag_status_get(dht_conf_t *conf, dict_t *dict) lookup = defrag->num_files_lookedup; failures = defrag->total_failures; skipped = defrag->skipped; - promoted = defrag->total_files_promoted; - demoted = defrag->total_files_demoted; - - gettimeofday(&end, NULL); - elapsed = end.tv_sec - defrag->start_time.tv_sec; + elapsed = gf_time() - defrag->start_time; /* The rebalance is still in progress */ - if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) && - (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED)) { + if (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED) { time_to_complete = gf_defrag_get_estimates_based_on_size(conf); if (time_to_complete && (time_to_complete > elapsed)) @@ -5007,14 +4610,6 @@ gf_defrag_status_get(dht_conf_t *conf, dict_t *dict) if (!dict) goto log; - ret = dict_set_uint64(dict, "promoted", promoted); - if (ret) - gf_log(THIS->name, GF_LOG_WARNING, "failed to set promoted count"); - - ret = dict_set_uint64(dict, "demoted", demoted); - if (ret) - gf_log(THIS->name, GF_LOG_WARNING, "failed to set demoted count"); - ret = dict_set_uint64(dict, "files", files); if (ret) gf_log(THIS->name, GF_LOG_WARNING, "failed to set file count"); @@ -5080,159 +4675,6 @@ out: return 0; } -void -gf_defrag_set_pause_state(gf_tier_conf_t *tier_conf, tier_pause_state_t state) -{ - pthread_mutex_lock(&tier_conf->pause_mutex); - tier_conf->pause_state = state; - pthread_mutex_unlock(&tier_conf->pause_mutex); -} - -tier_pause_state_t -gf_defrag_get_pause_state(gf_tier_conf_t *tier_conf) -{ - int state; - - pthread_mutex_lock(&tier_conf->pause_mutex); - state = tier_conf->pause_state; - pthread_mutex_unlock(&tier_conf->pause_mutex); - - return state; -} - -tier_pause_state_t -gf_defrag_check_pause_tier(gf_tier_conf_t *tier_conf) -{ - int woke = 0; - int state = -1; - - pthread_mutex_lock(&tier_conf->pause_mutex); - - if (tier_conf->pause_state == TIER_RUNNING) - goto out; - - if (tier_conf->pause_state == TIER_PAUSED) - goto out; - - if (tier_conf->promote_in_progress || tier_conf->demote_in_progress) - goto out; - - tier_conf->pause_state = TIER_PAUSED; - - if (tier_conf->pause_synctask) { - synctask_wake(tier_conf->pause_synctask); - tier_conf->pause_synctask = 0; - woke = 1; - } - - gf_msg("tier", GF_LOG_DEBUG, 0, DHT_MSG_TIER_PAUSED, "woken %d", woke); - - gf_event(EVENT_TIER_PAUSE, "vol=%s", tier_conf->volname); -out: - state = tier_conf->pause_state; - - pthread_mutex_unlock(&tier_conf->pause_mutex); - - return state; -} - -void -gf_defrag_pause_tier_timeout(void *data) -{ - xlator_t *this = NULL; - dht_conf_t *conf = NULL; - gf_defrag_info_t *defrag = NULL; - - this = (xlator_t *)data; - GF_VALIDATE_OR_GOTO("tier", this, out); - - conf = this->private; - GF_VALIDATE_OR_GOTO(this->name, conf, out); - - defrag = conf->defrag; - GF_VALIDATE_OR_GOTO(this->name, defrag, out); - - gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_TIER_PAUSED, - "Request pause timer timeout"); - - gf_defrag_check_pause_tier(&defrag->tier_conf); - -out: - return; -} - -int -gf_defrag_pause_tier(xlator_t *this, gf_defrag_info_t *defrag) -{ - int ret = 0; - struct timespec delta = { - 0, - }; - int delay = 2; - - if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) - goto out; - - /* - * Set flag requesting to pause tiering. Wait 'delay' seconds for - * tiering to actually stop as indicated by the pause state - * before returning success or failure. - */ - gf_defrag_set_pause_state(&defrag->tier_conf, TIER_REQUEST_PAUSE); - - /* - * If migration is not underway, can pause immediately. - */ - gf_defrag_check_pause_tier(&defrag->tier_conf); - if (gf_defrag_get_pause_state(&defrag->tier_conf) == TIER_PAUSED) - goto out; - - gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_TIER_PAUSED, - "Request pause tier"); - - defrag->tier_conf.pause_synctask = synctask_get(); - delta.tv_sec = delay; - delta.tv_nsec = 0; - defrag->tier_conf.pause_timer = gf_timer_call_after( - this->ctx, delta, gf_defrag_pause_tier_timeout, this); - - synctask_yield(defrag->tier_conf.pause_synctask); - - if (gf_defrag_get_pause_state(&defrag->tier_conf) == TIER_PAUSED) - goto out; - - gf_defrag_set_pause_state(&defrag->tier_conf, TIER_RUNNING); - - ret = -1; -out: - - gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_TIER_PAUSED, - "Pause tiering ret=%d", ret); - - return ret; -} - -int -gf_defrag_resume_tier(xlator_t *this, gf_defrag_info_t *defrag) -{ - gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_TIER_RESUME, - "Pause end. Resume tiering"); - - gf_defrag_set_pause_state(&defrag->tier_conf, TIER_RUNNING); - - gf_event(EVENT_TIER_RESUME, "vol=%s", defrag->tier_conf.volname); - - return 0; -} - -int -gf_defrag_start_detach_tier(gf_defrag_info_t *defrag) -{ - defrag->cmd = GF_DEFRAG_CMD_START_DETACH_TIER; - - return 0; -} - int gf_defrag_stop(dht_conf_t *conf, gf_defrag_status_t status, dict_t *output) { diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c index eb0a853e81f..3e24065227c 100644 --- a/xlators/cluster/dht/src/dht-selfheal.c +++ b/xlators/cluster/dht/src/dht-selfheal.c @@ -1271,10 +1271,6 @@ dht_selfheal_dir_mkdir_lock_cbk(call_frame_t *frame, void *cookie, local->call_cnt = conf->subvolume_cnt; if (op_ret < 0) { - /* We get this error when the directory entry was not created - * on a newky attached tier subvol. Hence proceed and do mkdir - * on the tier subvol. - */ if (op_errno == EINVAL) { local->call_cnt = 1; dht_selfheal_dir_mkdir_lookup_done(frame, this); @@ -1326,12 +1322,15 @@ dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout, { int missing_dirs = 0; int i = 0; + int op_errno = 0; int ret = -1; dht_local_t *local = NULL; xlator_t *this = NULL; + dht_conf_t *conf = NULL; local = frame->local; this = frame->this; + conf = this->private; local->selfheal.force_mkdir = force; local->selfheal.hole_cnt = 0; @@ -1348,11 +1347,12 @@ dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout, if (!__is_root_gfid(local->stbuf.ia_gfid)) { if (local->need_xattr_heal) { local->need_xattr_heal = 0; - ret = dht_dir_xattr_heal(this, local); - if (ret) - gf_smsg(this->name, GF_LOG_ERROR, ret, + ret = dht_dir_xattr_heal(this, local, &op_errno); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_DIR_XATTR_HEAL_FAILED, "path=%s", local->loc.path, "gfid=%s", local->gfid, NULL); + } } else { if (!gf_uuid_is_null(local->gfid)) gf_uuid_copy(loc->gfid, local->gfid); @@ -1370,15 +1370,44 @@ dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout, return 0; } - if (local->hashed_subvol == NULL) - local->hashed_subvol = dht_subvol_get_hashed(this, loc); + /* MDS xattr is populated only while DHT is having more than one + subvol.In case of graph switch while adding more dht subvols need to + consider hash subvol as a MDS to avoid MDS check failure at the time + of running fop on directory + */ + if (!dict_get(local->xattr, conf->mds_xattr_key) && + (conf->subvolume_cnt > 1)) { + if (local->hashed_subvol == NULL) { + local->hashed_subvol = dht_subvol_get_hashed(this, loc); + if (local->hashed_subvol == NULL) { + local->op_errno = EINVAL; + gf_smsg(this->name, GF_LOG_WARNING, local->op_errno, + DHT_MSG_HASHED_SUBVOL_GET_FAILED, "gfid=%s", + loc->pargfid, "name=%s", loc->name, "path=%s", + loc->path, NULL); + goto err; + } + } + ret = dht_inode_ctx_mdsvol_set(local->inode, this, + local->hashed_subvol); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED, + "Failed to set hashed subvol for %s on inode vol is %s", + local->loc.path, + local->hashed_subvol ? local->hashed_subvol->name : "NULL"); + goto err; + } + } if (local->hashed_subvol == NULL) { - local->op_errno = EINVAL; - gf_smsg(this->name, GF_LOG_WARNING, local->op_errno, - DHT_MSG_HASHED_SUBVOL_GET_FAILED, "gfid=%s", loc->pargfid, - "name=%s", loc->name, "path=%s", loc->path, NULL); - goto err; + local->hashed_subvol = dht_subvol_get_hashed(this, loc); + if (local->hashed_subvol == NULL) { + local->op_errno = EINVAL; + gf_smsg(this->name, GF_LOG_WARNING, local->op_errno, + DHT_MSG_HASHED_SUBVOL_GET_FAILED, "gfid=%s", loc->pargfid, + "name=%s", loc->name, "path=%s", loc->path, NULL); + goto err; + } } local->current = &local->lock[0]; @@ -1941,9 +1970,18 @@ dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk, local->selfheal.dir_cbk = dir_cbk; local->selfheal.layout = dht_layout_ref(this, layout); - if (local->need_attrheal && !IA_ISINVAL(local->mds_stbuf.ia_type)) { - /*Use the one in the mds_stbuf*/ - local->stbuf = local->mds_stbuf; + if (local->need_attrheal) { + if (__is_root_gfid(local->stbuf.ia_gfid)) { + local->stbuf.ia_gid = local->prebuf.ia_gid; + local->stbuf.ia_uid = local->prebuf.ia_uid; + + local->stbuf.ia_ctime = local->prebuf.ia_ctime; + local->stbuf.ia_ctime_nsec = local->prebuf.ia_ctime_nsec; + local->stbuf.ia_prot = local->prebuf.ia_prot; + + } else if (!IA_ISINVAL(local->mds_stbuf.ia_type)) { + local->stbuf = local->mds_stbuf; + } } if (!__is_root_gfid(local->stbuf.ia_gfid)) { @@ -2145,6 +2183,15 @@ dht_dir_heal_xattrs(void *data) if (subvol == mds_subvol) continue; if (uret || uflag) { + /* Custom xattr heal is required - let posix handle it */ + ret = dict_set_int8(xdata, "sync_backend_xattrs", _gf_true); + if (ret) { + gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, + "path=%s", local->loc.path, "key=%s", + "sync_backend_xattrs", NULL); + goto out; + } + ret = syncop_setxattr(subvol, &local->loc, user_xattr, 0, xdata, NULL); if (ret) { @@ -2153,6 +2200,8 @@ dht_dir_heal_xattrs(void *data) DHT_MSG_DIR_XATTR_HEAL_FAILED, "set-user-xattr-failed path=%s", local->loc.path, "subvol=%s", subvol->name, "gfid=%s", gfid, NULL); + } else { + dict_del(xdata, "sync_backend_xattrs"); } } } diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c index d85b4d1ce13..bb72b0ffbb5 100644 --- a/xlators/cluster/dht/src/dht-shared.c +++ b/xlators/cluster/dht/src/dht-shared.c @@ -140,9 +140,9 @@ dht_priv_dump(xlator_t *this) } } - if (conf->last_stat_fetch.tv_sec) + if (conf->last_stat_fetch) gf_proc_dump_write("last_stat_fetch", "%s", - ctime(&conf->last_stat_fetch.tv_sec)); + ctime(&conf->last_stat_fetch)); UNLOCK(&conf->subvolume_lock); @@ -537,6 +537,8 @@ gf_defrag_pattern_list_fill(xlator_t *this, gf_defrag_info_t *defrag, pattern_str = strtok_r(data, ",", &tmp_str); while (pattern_str) { dup_str = gf_strdup(pattern_str); + if (!dup_str) + goto out; pattern_list = GF_CALLOC(1, sizeof(gf_defrag_pattern_list_t), 1); if (!pattern_list) { goto out; @@ -596,7 +598,6 @@ dht_init_methods(xlator_t *this) methods = &(conf->methods); methods->migration_get_dst_subvol = dht_migration_get_dst_subvol; - methods->migration_needed = dht_migration_needed; methods->migration_other = NULL; methods->layout_search = dht_layout_search; @@ -1045,84 +1046,6 @@ struct volume_options dht_options[] = { /* NUFA option */ {.key = {"local-volume-name"}, .type = GF_OPTION_TYPE_XLATOR}, - /* tier options */ - { - .key = {"tier-pause"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", - }, - - { - .key = {"tier-promote-frequency"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "120", - }, - - { - .key = {"tier-demote-frequency"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "3600", - }, - - { - .key = {"write-freq-threshold"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "0", - }, - - { - .key = {"read-freq-threshold"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "0", - }, - { - .key = {"watermark-hi"}, - .type = GF_OPTION_TYPE_PERCENT, - .default_value = "90", - }, - { - .key = {"watermark-low"}, - .type = GF_OPTION_TYPE_PERCENT, - .default_value = "75", - }, - { - .key = {"tier-mode"}, - .type = GF_OPTION_TYPE_STR, - .default_value = "test", - }, - { - .key = {"tier-compact"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", - }, - {.key = {"tier-hot-compact-frequency"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "604800", - .description = "Frequency to compact DBs on hot tier in system"}, - {.key = {"tier-cold-compact-frequency"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "604800", - .description = "Frequency to compact DBs on cold tier in system"}, - { - .key = {"tier-max-mb"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "4000", - }, - { - .key = {"tier-max-promote-file-size"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "0", - }, - { - .key = {"tier-max-files"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "10000", - }, - { - .key = {"tier-query-limit"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "100", - }, /* switch option */ {.key = {"pattern.switch.case"}, .type = GF_OPTION_TYPE_ANY}, diff --git a/xlators/cluster/dht/src/nufa.c b/xlators/cluster/dht/src/nufa.c index 59313639c45..3648a564840 100644 --- a/xlators/cluster/dht/src/nufa.c +++ b/xlators/cluster/dht/src/nufa.c @@ -595,7 +595,6 @@ nufa_init(xlator_t *this) dht_methods_t dht_methods = { .migration_get_dst_subvol = dht_migration_get_dst_subvol, - .migration_needed = dht_migration_needed, .layout_search = dht_layout_search, }; diff --git a/xlators/cluster/ec/src/ec-combine.c b/xlators/cluster/ec/src/ec-combine.c index 9d712b359a0..703a30e2485 100644 --- a/xlators/cluster/ec/src/ec-combine.c +++ b/xlators/cluster/ec/src/ec-combine.c @@ -343,9 +343,8 @@ out: } static int32_t -ec_dict_data_concat(const char *fmt, ec_cbk_data_t *cbk, int32_t which, - char *key, char *new_key, const char *def, - gf_boolean_t global, ...) +ec_dict_data_concat(ec_cbk_data_t *cbk, int32_t which, char *key, char *new_key, + const char *def, gf_boolean_t global, const char *fmt, ...) { ec_t *ec = cbk->fop->xl->private; data_t *data[ec->nodes]; @@ -357,7 +356,7 @@ ec_dict_data_concat(const char *fmt, ec_cbk_data_t *cbk, int32_t which, ec_dict_list(data, cbk, which, key, global); - va_start(args, global); + va_start(args, fmt); err = ec_concat_prepare(cbk->fop->xl, &pre, &sep, &post, fmt, args); va_end(args); @@ -730,14 +729,14 @@ ec_dict_data_combine(dict_t *dict, char *key, data_t *value, void *arg) if ((strcmp(key, GF_XATTR_PATHINFO_KEY) == 0) || (strcmp(key, GF_XATTR_USER_PATHINFO_KEY) == 0)) { - return ec_dict_data_concat("(<EC:%s> { })", data->cbk, data->which, key, - NULL, NULL, _gf_false, + return ec_dict_data_concat(data->cbk, data->which, key, NULL, NULL, + _gf_false, _gf_false, "(<EC:%s> { })", data->cbk->fop->xl->name); } if (strncmp(key, GF_XATTR_CLRLK_CMD, SLEN(GF_XATTR_CLRLK_CMD)) == 0) { - return ec_dict_data_concat("{\n}", data->cbk, data->which, key, NULL, - NULL, _gf_false); + return ec_dict_data_concat(data->cbk, data->which, key, NULL, NULL, + _gf_false, "{\n}"); } if (strncmp(key, GF_XATTR_LOCKINFO_KEY, SLEN(GF_XATTR_LOCKINFO_KEY)) == 0) { @@ -767,9 +766,9 @@ ec_dict_data_combine(dict_t *dict, char *key, data_t *value, void *arg) if (XATTR_IS_NODE_UUID(key)) { if (data->cbk->fop->int32) { /* List of node uuid is requested */ - return ec_dict_data_concat("{ }", data->cbk, data->which, key, + return ec_dict_data_concat(data->cbk, data->which, key, GF_XATTR_LIST_NODE_UUIDS_KEY, UUID0_STR, - _gf_true); + _gf_true, "{ }"); } else { return ec_dict_data_uuid(data->cbk, data->which, key); } diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c index ded34b81aa2..b955efd8c2d 100644 --- a/xlators/cluster/ec/src/ec-common.c +++ b/xlators/cluster/ec/src/ec-common.c @@ -230,7 +230,7 @@ ec_child_next(ec_t *ec, ec_fop_data_t *fop, uint32_t idx) int32_t ec_heal_report(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, uintptr_t mask, uintptr_t good, - uintptr_t bad, dict_t *xdata) + uintptr_t bad, uint32_t pending, dict_t *xdata) { if (op_ret < 0) { gf_msg(this->name, GF_LOG_DEBUG, op_errno, EC_MSG_HEAL_FAIL, @@ -316,17 +316,19 @@ ec_check_status(ec_fop_data_t *fop) } } - gf_msg(fop->xl->name, GF_LOG_WARNING, 0, EC_MSG_OP_FAIL_ON_SUBVOLS, - "Operation failed on %d of %d subvolumes.(up=%s, mask=%s, " - "remaining=%s, good=%s, bad=%s, %s)", - gf_bits_count(ec->xl_up & ~(fop->remaining | fop->good)), ec->nodes, - ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes), - ec_bin(str2, sizeof(str2), fop->mask, ec->nodes), - ec_bin(str3, sizeof(str3), fop->remaining, ec->nodes), - ec_bin(str4, sizeof(str4), fop->good, ec->nodes), - ec_bin(str5, sizeof(str5), ec->xl_up & ~(fop->remaining | fop->good), - ec->nodes), - ec_msg_str(fop)); + gf_msg( + fop->xl->name, GF_LOG_WARNING, 0, EC_MSG_OP_FAIL_ON_SUBVOLS, + "Operation failed on %d of %d subvolumes.(up=%s, mask=%s, " + "remaining=%s, good=%s, bad=%s," + "(Least significant bit represents first client/brick of subvol), %s)", + gf_bits_count(ec->xl_up & ~(fop->remaining | fop->good)), ec->nodes, + ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes), + ec_bin(str2, sizeof(str2), fop->mask, ec->nodes), + ec_bin(str3, sizeof(str3), fop->remaining, ec->nodes), + ec_bin(str4, sizeof(str4), fop->good, ec->nodes), + ec_bin(str5, sizeof(str5), ec->xl_up & ~(fop->remaining | fop->good), + ec->nodes), + ec_msg_str(fop)); if (fop->use_fd) { if (fop->fd != NULL) { ec_fheal(NULL, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, NULL, @@ -614,10 +616,10 @@ ec_msg_str(ec_fop_data_t *fop) loc_t *loc2 = NULL; char gfid1[64] = {0}; char gfid2[64] = {0}; + ec_fop_data_t *parent = fop->parent; if (fop->errstr) return fop->errstr; - if (!fop->use_fd) { loc1 = &fop->loc[0]; loc2 = &fop->loc[1]; @@ -625,23 +627,45 @@ ec_msg_str(ec_fop_data_t *fop) if (fop->id == GF_FOP_RENAME) { gf_asprintf(&fop->errstr, "FOP : '%s' failed on '%s' and '%s' with gfids " - "%s and %s respectively", + "%s and %s respectively. Parent FOP: %s", ec_fop_name(fop->id), loc1->path, loc2->path, uuid_utoa_r(loc1->gfid, gfid1), - uuid_utoa_r(loc2->gfid, gfid2)); + uuid_utoa_r(loc2->gfid, gfid2), + parent ? ec_fop_name(parent->id) : "No Parent"); } else { - gf_asprintf(&fop->errstr, "FOP : '%s' failed on '%s' with gfid %s", - ec_fop_name(fop->id), loc1->path, - uuid_utoa_r(loc1->gfid, gfid1)); + gf_asprintf( + &fop->errstr, + "FOP : '%s' failed on '%s' with gfid %s. Parent FOP: %s", + ec_fop_name(fop->id), loc1->path, + uuid_utoa_r(loc1->gfid, gfid1), + parent ? ec_fop_name(parent->id) : "No Parent"); } } else { - gf_asprintf(&fop->errstr, "FOP : '%s' failed on gfid %s", - ec_fop_name(fop->id), - uuid_utoa_r(fop->fd->inode->gfid, gfid1)); + gf_asprintf( + &fop->errstr, "FOP : '%s' failed on gfid %s. Parent FOP: %s", + ec_fop_name(fop->id), uuid_utoa_r(fop->fd->inode->gfid, gfid1), + parent ? ec_fop_name(parent->id) : "No Parent"); } return fop->errstr; } +static void +ec_log_insufficient_vol(ec_fop_data_t *fop, int32_t have, uint32_t need, + int32_t loglevel) +{ + ec_t *ec = fop->xl->private; + char str1[32], str2[32], str3[32]; + + gf_msg(ec->xl->name, loglevel, 0, EC_MSG_CHILDS_INSUFFICIENT, + "Insufficient available children for this request: " + "Have : %d, Need : %u : Child UP : %s " + "Mask: %s, Healing : %s : %s ", + have, need, ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes), + ec_bin(str2, sizeof(str2), fop->mask, ec->nodes), + ec_bin(str3, sizeof(str3), fop->healing, ec->nodes), + ec_msg_str(fop)); +} + static int32_t ec_child_select(ec_fop_data_t *fop) { @@ -699,11 +723,7 @@ ec_child_select(ec_fop_data_t *fop) ec_trace("SELECT", fop, ""); if ((num < fop->minimum) && (num < ec->fragments)) { - gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_CHILDS_INSUFFICIENT, - "Insufficient available children " - "for this request (have %d, need " - "%d). %s", - num, fop->minimum, ec_msg_str(fop)); + ec_log_insufficient_vol(fop, num, fop->minimum, GF_LOG_ERROR); return 0; } @@ -711,11 +731,7 @@ ec_child_select(ec_fop_data_t *fop) (fop->locks[0].update[EC_DATA_TXN] || fop->locks[0].update[EC_METADATA_TXN])) { if (ec->quorum_count && (num < ec->quorum_count)) { - gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_CHILDS_INSUFFICIENT, - "Insufficient available children " - "for this request (have %d, need " - "%d). %s", - num, ec->quorum_count, ec_msg_str(fop)); + ec_log_insufficient_vol(fop, num, ec->quorum_count, GF_LOG_ERROR); return 0; } } diff --git a/xlators/cluster/ec/src/ec-dir-read.c b/xlators/cluster/ec/src/ec-dir-read.c index ef6b06fa4dd..f71dcfac293 100644 --- a/xlators/cluster/ec/src/ec-dir-read.c +++ b/xlators/cluster/ec/src/ec-dir-read.c @@ -386,9 +386,16 @@ ec_manager_readdir(ec_fop_data_t *fop, int32_t state) /* Return error if opendir has not been successfully called on * any subvolume. */ ctx = ec_fd_get(fop->fd, fop->xl); - if ((ctx == NULL) || (ctx->open == 0)) { - fop->error = EINVAL; + if (ctx == NULL) { + fop->error = ENOMEM; + } else if (ctx->open == 0) { + fop->error = EBADFD; + } + if (fop->error) { + gf_msg(fop->xl->name, GF_LOG_ERROR, fop->error, + EC_MSG_INVALID_REQUEST, "EC is not winding readdir: %s", + ec_msg_str(fop)); return EC_STATE_REPORT; } diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c index 81f6add5bb0..7d991f04aac 100644 --- a/xlators/cluster/ec/src/ec-heal.c +++ b/xlators/cluster/ec/src/ec-heal.c @@ -70,6 +70,7 @@ struct ec_name_data { char *name; inode_t *parent; default_args_cbk_t *replies; + uint32_t heal_pending; }; static char *ec_ignore_xattrs[] = {GF_SELINUX_XATTR_KEY, QUOTA_SIZE_KEY, NULL}; @@ -994,6 +995,7 @@ ec_set_new_entry_dirty(ec_t *ec, loc_t *loc, struct iatt *ia, ret = -ENOTCONN; goto out; } + out: if (xattr) dict_unref(xattr); @@ -1172,6 +1174,7 @@ ec_create_name(call_frame_t *frame, ec_t *ec, inode_t *parent, char *name, dict_t *xdata = NULL; char *linkname = NULL; ec_config_t config; + /* There should be just one gfid key */ EC_REPLIES_ALLOC(replies, ec->nodes); if (gfid_db->count != 1) { @@ -1416,6 +1419,11 @@ __ec_heal_name(call_frame_t *frame, ec_t *ec, inode_t *parent, char *name, ret = ec_create_name(frame, ec, parent, name, replies, gfid_db, enoent, participants); + if (ret >= 0) { + /* If ec_create_name() succeeded we return 1 to indicate that a new + * file has been created and it will need to be healed. */ + ret = 1; + } out: cluster_replies_wipe(replies, ec->nodes); loc_wipe(&loc); @@ -1493,18 +1501,22 @@ ec_name_heal_handler(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, ret = ec_heal_name(name_data->frame, ec, parent->inode, entry->d_name, name_on); - if (ret < 0) + if (ret < 0) { memset(name_on, 0, ec->nodes); + } else { + name_data->heal_pending += ret; + } for (i = 0; i < ec->nodes; i++) if (name_data->participants[i] && !name_on[i]) name_data->failed_on[i] = 1; + return 0; } int ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode, - unsigned char *participants) + unsigned char *participants, uint32_t *pending) { int i = 0; int j = 0; @@ -1517,7 +1529,7 @@ ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode, name_data.frame = frame; name_data.participants = participants; name_data.failed_on = alloca0(ec->nodes); - ; + name_data.heal_pending = 0; for (i = 0; i < ec->nodes; i++) { if (!participants[i]) @@ -1536,6 +1548,8 @@ ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode, break; } } + *pending += name_data.heal_pending; + loc_wipe(&loc); return ret; } @@ -1543,7 +1557,7 @@ ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode, int __ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode, unsigned char *heal_on, unsigned char *sources, - unsigned char *healed_sinks) + unsigned char *healed_sinks, uint32_t *pending) { unsigned char *locked_on = NULL; unsigned char *output = NULL; @@ -1588,7 +1602,7 @@ unlock: if (sources[i] || healed_sinks[i]) participants[i] = 1; } - ret = ec_heal_names(frame, ec, inode, participants); + ret = ec_heal_names(frame, ec, inode, participants, pending); if (EC_COUNT(participants, ec->nodes) <= ec->fragments) goto out; @@ -1609,7 +1623,8 @@ out: int ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode, - unsigned char *sources, unsigned char *healed_sinks) + unsigned char *sources, unsigned char *healed_sinks, + uint32_t *pending) { unsigned char *locked_on = NULL; unsigned char *up_subvols = NULL; @@ -1640,7 +1655,7 @@ ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode, goto unlock; } ret = __ec_heal_entry(frame, ec, inode, locked_on, sources, - healed_sinks); + healed_sinks, pending); } unlock: cluster_uninodelk(ec->xl_list, locked_on, ec->nodes, replies, output, frame, @@ -1961,14 +1976,14 @@ ec_manager_heal_block(ec_fop_data_t *fop, int32_t state) if (fop->cbks.heal) { fop->cbks.heal(fop->req_frame, fop->data, fop->xl, 0, 0, (heal->good | heal->bad), heal->good, heal->bad, - NULL); + 0, NULL); } return EC_STATE_END; case -EC_STATE_REPORT: if (fop->cbks.heal) { fop->cbks.heal(fop->req_frame, fop->data, fop->xl, -1, - fop->error, 0, 0, 0, NULL); + fop->error, 0, 0, 0, 0, NULL); } return EC_STATE_END; @@ -2005,14 +2020,15 @@ out: if (fop != NULL) { ec_manager(fop, error); } else { - func(frame, heal, this, -1, error, 0, 0, 0, NULL); + func(frame, heal, this, -1, error, 0, 0, 0, 0, NULL); } } int32_t ec_heal_block_done(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, uintptr_t mask, - uintptr_t good, uintptr_t bad, dict_t *xdata) + uintptr_t good, uintptr_t bad, uint32_t pending, + dict_t *xdata) { ec_heal_t *heal = cookie; @@ -2481,6 +2497,58 @@ out: return ret; } +int +ec_heal_purge_stale_index(call_frame_t *frame, ec_t *ec, inode_t *inode) +{ + int i = 0; + int ret = 0; + dict_t **xattr = NULL; + loc_t loc = {0}; + uint64_t dirty_xattr[EC_VERSION_SIZE] = {0}; + unsigned char *on = NULL; + default_args_cbk_t *replies = NULL; + dict_t *dict = NULL; + + /* Allocate the required memory */ + loc.inode = inode_ref(inode); + gf_uuid_copy(loc.gfid, inode->gfid); + on = alloca0(ec->nodes); + EC_REPLIES_ALLOC(replies, ec->nodes); + xattr = GF_CALLOC(ec->nodes, sizeof(*xattr), gf_common_mt_pointer); + if (!xattr) { + ret = -ENOMEM; + goto out; + } + dict = dict_new(); + if (!dict) { + ret = -ENOMEM; + goto out; + } + for (i = 0; i < ec->nodes; i++) { + xattr[i] = dict; + on[i] = 1; + } + ret = dict_set_static_bin(dict, EC_XATTR_DIRTY, dirty_xattr, + (sizeof(*dirty_xattr) * EC_VERSION_SIZE)); + if (ret < 0) { + ret = -ENOMEM; + goto out; + } + PARALLEL_FOP_ONLIST(ec->xl_list, on, ec->nodes, replies, frame, + ec_wind_xattrop_parallel, &loc, GF_XATTROP_ADD_ARRAY64, + xattr, NULL); +out: + if (dict) { + dict_unref(dict); + } + if (xattr) { + GF_FREE(xattr); + } + cluster_replies_wipe(replies, ec->nodes); + loc_wipe(&loc); + return ret; +} + void ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) { @@ -2498,6 +2566,7 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) intptr_t mbad = 0; intptr_t good = 0; intptr_t bad = 0; + uint32_t pending = 0; ec_fop_data_t *fop = data; gf_boolean_t blocking = _gf_false; ec_heal_need_t need_heal = EC_HEAL_NONEED; @@ -2533,7 +2602,7 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) if (loc->name && strlen(loc->name)) { ret = ec_heal_name(frame, ec, loc->parent, (char *)loc->name, participants); - if (ret == 0) { + if (ret >= 0) { gf_msg_debug(this->name, 0, "%s: name heal " "successful on %" PRIXPTR, @@ -2551,23 +2620,34 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) /* Mount triggers heal only when it detects that it must need heal, shd * triggers heals periodically which need not be thorough*/ - if (ec->shd.iamshd) { + if (ec->shd.iamshd && (ret <= 0)) { ec_heal_inspect(frame, ec, loc->inode, up_subvols, _gf_false, _gf_false, &need_heal); - if (need_heal == EC_HEAL_NONEED) { + if (need_heal == EC_HEAL_PURGE_INDEX) { + gf_msg(ec->xl->name, GF_LOG_INFO, 0, EC_MSG_HEAL_FAIL, + "Index entry needs to be purged for: %s ", + uuid_utoa(loc->gfid)); + /* We need to send zero-xattrop so that stale index entry could be + * removed. We need not take lock on this entry to do so as + * xattrop on a brick is atomic. */ + ec_heal_purge_stale_index(frame, ec, loc->inode); + goto out; + } else if (need_heal == EC_HEAL_NONEED) { gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL, "Heal is not required for : %s ", uuid_utoa(loc->gfid)); goto out; } } + sources = alloca0(ec->nodes); healed_sinks = alloca0(ec->nodes); if (IA_ISREG(loc->inode->ia_type)) { ret = ec_heal_data(frame, ec, blocking, loc->inode, sources, healed_sinks); } else if (IA_ISDIR(loc->inode->ia_type) && !partial) { - ret = ec_heal_entry(frame, ec, loc->inode, sources, healed_sinks); + ret = ec_heal_entry(frame, ec, loc->inode, sources, healed_sinks, + &pending); } else { ret = 0; memcpy(sources, participants, ec->nodes); @@ -2597,10 +2677,11 @@ out: if (fop->cbks.heal) { fop->cbks.heal(fop->req_frame, fop->data, fop->xl, op_ret, op_errno, ec_char_array_to_mask(participants, ec->nodes), - mgood & good, mbad & bad, NULL); + mgood & good, mbad & bad, pending, NULL); } if (frame) STACK_DESTROY(frame->root); + return; } @@ -2648,7 +2729,7 @@ ec_heal_fail(ec_t *ec, ec_fop_data_t *fop) { if (fop->cbks.heal) { fop->cbks.heal(fop->req_frame, fop->data, ec->xl, -1, fop->error, 0, 0, - 0, NULL); + 0, 0, NULL); } ec_fop_data_release(fop); } @@ -2835,7 +2916,7 @@ fail: if (fop) ec_fop_data_release(fop); if (func) - func(frame, data, this, -1, err, 0, 0, 0, NULL); + func(frame, data, this, -1, err, 0, 0, 0, 0, NULL); } int @@ -2964,6 +3045,13 @@ _need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources, goto out; } } + /* If lock count is 0, all dirty flags are 0 and all the + * versions are macthing then why are we here. It looks + * like something went wrong while removing the index entries + * after completing a successful heal or fop. In this case + * we need to remove this index entry to avoid triggering heal + * in a loop and causing lookups again and again*/ + *need_heal = EC_HEAL_PURGE_INDEX; } else { for (i = 0; i < ec->nodes; i++) { /* Since each lock can only increment the dirty diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c index 956e73c2088..5c1586bc9c5 100644 --- a/xlators/cluster/ec/src/ec-heald.c +++ b/xlators/cluster/ec/src/ec-heald.c @@ -62,7 +62,7 @@ __ec_shd_healer_wait(struct subvol_healer *healer) ec = healer->this->private; disabled_loop: - wait_till.tv_sec = time(NULL) + ec->shd.timeout; + wait_till.tv_sec = gf_time() + ec->shd.timeout; while (!healer->rerun) { ret = pthread_cond_timedwait(&healer->cond, &healer->mutex, &wait_till); @@ -156,19 +156,78 @@ ec_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name) return ret; } +static gf_boolean_t +ec_is_heal_completed(char *status) +{ + char *bad_pos = NULL; + char *zero_pos = NULL; + + if (!status) { + return _gf_false; + } + + /*Logic: + * Status will be of the form Good: <binary>, Bad: <binary> + * If heal completes, if we do strchr for '0' it should be present after + * 'Bad:' i.e. strRchr for ':' + * */ + + zero_pos = strchr(status, '0'); + bad_pos = strrchr(status, ':'); + if (!zero_pos || !bad_pos) { + /*malformed status*/ + return _gf_false; + } + + if (zero_pos > bad_pos) { + return _gf_true; + } + + return _gf_false; +} + int ec_shd_selfheal(struct subvol_healer *healer, int child, loc_t *loc, gf_boolean_t full) { + dict_t *xdata = NULL; + dict_t *dict = NULL; + uint32_t count; int32_t ret; + char *heal_status = NULL; + ec_t *ec = healer->this->private; + + GF_ATOMIC_INC(ec->stats.shd.attempted); + ret = syncop_getxattr(healer->this, loc, &dict, EC_XATTR_HEAL, NULL, + &xdata); + if (ret == 0) { + if (dict && (dict_get_str(dict, EC_XATTR_HEAL, &heal_status) == 0)) { + if (ec_is_heal_completed(heal_status)) { + GF_ATOMIC_INC(ec->stats.shd.completed); + } + } + } - ret = syncop_getxattr(healer->this, loc, NULL, EC_XATTR_HEAL, NULL, NULL); - if (!full && (ret >= 0) && (loc->inode->ia_type == IA_IFDIR)) { + if (!full && (loc->inode->ia_type == IA_IFDIR)) { /* If we have just healed a directory, it's possible that - * other index entries have appeared to be healed. We put a - * mark so that we can check it later and restart a scan - * without delay. */ - healer->rerun = _gf_true; + * other index entries have appeared to be healed. */ + if ((xdata != NULL) && + (dict_get_uint32(xdata, EC_XATTR_HEAL_NEW, &count) == 0) && + (count > 0)) { + /* Force a rerun of the index healer. */ + gf_msg_debug(healer->this->name, 0, "%d more entries to heal", + count); + + healer->rerun = _gf_true; + } + } + + if (xdata != NULL) { + dict_unref(xdata); + } + + if (dict) { + dict_unref(dict); } return ret; diff --git a/xlators/cluster/ec/src/ec-inode-read.c b/xlators/cluster/ec/src/ec-inode-read.c index a891ccd0952..dad5f4d7018 100644 --- a/xlators/cluster/ec/src/ec-inode-read.c +++ b/xlators/cluster/ec/src/ec-inode-read.c @@ -390,7 +390,8 @@ ec_manager_getxattr(ec_fop_data_t *fop, int32_t state) int32_t ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl, int32_t op_ret, int32_t op_errno, uintptr_t mask, - uintptr_t good, uintptr_t bad, dict_t *xdata) + uintptr_t good, uintptr_t bad, uint32_t pending, + dict_t *xdata) { fop_getxattr_cbk_t func = cookie; ec_t *ec = xl->private; @@ -398,6 +399,25 @@ ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl, char *str; char bin1[65], bin2[65]; + /* We try to return the 'pending' information in xdata, but if this cannot + * be set, we will ignore it silently. We prefer to report the success or + * failure of the heal itself. */ + if (xdata == NULL) { + xdata = dict_new(); + } else { + dict_ref(xdata); + } + if (xdata != NULL) { + if (dict_set_uint32(xdata, EC_XATTR_HEAL_NEW, pending) != 0) { + /* dict_set_uint32() is marked as 'warn_unused_result' and gcc + * enforces to check the result in this case. However we don't + * really care if it succeeded or not. We'll just do the same. + * + * This empty 'if' avoids the warning, and it will be removed by + * the optimizer. */ + } + } + if (op_ret >= 0) { dict = dict_new(); if (dict == NULL) { @@ -431,11 +451,14 @@ ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl, } out: - func(frame, NULL, xl, op_ret, op_errno, dict, NULL); + func(frame, NULL, xl, op_ret, op_errno, dict, xdata); if (dict != NULL) { dict_unref(dict); } + if (xdata != NULL) { + dict_unref(xdata); + } return 0; } diff --git a/xlators/cluster/ec/src/ec-locks.c b/xlators/cluster/ec/src/ec-locks.c index 8e84977d2b3..601960d6154 100644 --- a/xlators/cluster/ec/src/ec-locks.c +++ b/xlators/cluster/ec/src/ec-locks.c @@ -24,9 +24,36 @@ ec_lock_check(ec_fop_data_t *fop, uintptr_t *mask) ec_t *ec = fop->xl->private; ec_cbk_data_t *ans = NULL; ec_cbk_data_t *cbk = NULL; - uintptr_t locked = 0, notlocked = 0; + uintptr_t locked = 0; + int32_t good = 0; + int32_t eagain = 0; + int32_t estale = 0; int32_t error = -1; + /* There are some errors that we'll handle in an special way while trying + * to acquire a lock. + * + * EAGAIN: If it's found during a parallel non-blocking lock request, we + * consider that there's contention on the inode, so we consider + * the acquisition a failure and try again with a sequential + * blocking lock request. This will ensure that we get a lock on + * as many bricks as possible (ignoring EAGAIN here would cause + * unnecessary triggers of self-healing). + * + * If it's found during a sequential blocking lock request, it's + * considered an error. Lock will only succeed if there are + * enough other bricks locked. + * + * ESTALE: This can appear during parallel or sequential lock request if + * the inode has just been unlinked. We consider this error is + * not recoverable, but we also don't consider it as fatal. So, + * if it happens during parallel lock, we won't attempt a + * sequential one unless there are EAGAIN errors on other + * bricks (and are enough to form a quorum), but if we reach + * quorum counting the ESTALE bricks, we consider the whole + * result of the operation is ESTALE instead of EIO. + */ + list_for_each_entry(ans, &fop->cbk_list, list) { if (ans->op_ret >= 0) { @@ -34,24 +61,23 @@ ec_lock_check(ec_fop_data_t *fop, uintptr_t *mask) error = EIO; } locked |= ans->mask; + good = ans->count; cbk = ans; - } else { - if (ans->op_errno == EAGAIN) { - switch (fop->uint32) { - case EC_LOCK_MODE_NONE: - case EC_LOCK_MODE_ALL: - /* Goal is to treat non-blocking lock as failure - * even if there is a single EAGAIN*/ - notlocked |= ans->mask; - break; - } - } + } else if (ans->op_errno == ESTALE) { + estale += ans->count; + } else if ((ans->op_errno == EAGAIN) && + (fop->uint32 != EC_LOCK_MODE_INC)) { + eagain += ans->count; } } if (error == -1) { - if (gf_bits_count(locked | notlocked) >= ec->fragments) { - if (notlocked == 0) { + /* If we have enough quorum with succeeded and EAGAIN answers, we + * ignore for now any ESTALE answer. If there are EAGAIN answers, + * we retry with a sequential blocking lock request if needed. + * Otherwise we succeed. */ + if ((good + eagain) >= ec->fragments) { + if (eagain == 0) { if (fop->answer == NULL) { fop->answer = cbk; } @@ -64,21 +90,28 @@ ec_lock_check(ec_fop_data_t *fop, uintptr_t *mask) case EC_LOCK_MODE_NONE: error = EAGAIN; break; - case EC_LOCK_MODE_ALL: fop->uint32 = EC_LOCK_MODE_INC; break; - default: + /* This shouldn't happen because eagain cannot be > 0 + * when fop->uint32 is EC_LOCK_MODE_INC. */ error = EIO; break; } } } else { - if (fop->answer && fop->answer->op_ret < 0) + /* We have been unable to find enough candidates that will be able + * to take the lock. If we have quorum on some answer, we return + * it. Otherwise we check if ESTALE answers allow us to reach + * quorum. If so, we return ESTALE. */ + if (fop->answer && fop->answer->op_ret < 0) { error = fop->answer->op_errno; - else + } else if ((good + eagain + estale) >= ec->fragments) { + error = ESTALE; + } else { error = EIO; + } } } diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h index 7829b8c27b3..de9b89bb2c9 100644 --- a/xlators/cluster/ec/src/ec-types.h +++ b/xlators/cluster/ec/src/ec-types.h @@ -130,7 +130,12 @@ typedef void (*ec_resume_f)(ec_fop_data_t *, int32_t); enum _ec_read_policy { EC_ROUND_ROBIN, EC_GFID_HASH, EC_READ_POLICY_MAX }; -enum _ec_heal_need { EC_HEAL_NONEED, EC_HEAL_MAYBE, EC_HEAL_MUST }; +enum _ec_heal_need { + EC_HEAL_NONEED, + EC_HEAL_MAYBE, + EC_HEAL_MUST, + EC_HEAL_PURGE_INDEX +}; enum _ec_stripe_part { EC_STRIPE_HEAD, EC_STRIPE_TAIL }; @@ -186,10 +191,10 @@ struct _ec_inode { typedef int32_t (*fop_heal_cbk_t)(call_frame_t *, void *, xlator_t *, int32_t, int32_t, uintptr_t, uintptr_t, uintptr_t, - dict_t *); + uint32_t, dict_t *); typedef int32_t (*fop_fheal_cbk_t)(call_frame_t *, void *, xlator_t *, int32_t, int32_t, uintptr_t, uintptr_t, uintptr_t, - dict_t *); + uint32_t, dict_t *); union _ec_cbk { fop_access_cbk_t access; @@ -621,6 +626,11 @@ struct _ec_statistics { requests. (Basically memory allocation errors). */ } stripe_cache; + struct { + gf_atomic_t attempted; /*Number of heals attempted on + files/directories*/ + gf_atomic_t completed; /*Number of heals complted on files/directories*/ + } shd; }; struct _ec { diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c index 66b4e634911..7344be4968d 100644 --- a/xlators/cluster/ec/src/ec.c +++ b/xlators/cluster/ec/src/ec.c @@ -325,13 +325,18 @@ ec_get_event_from_state(ec_t *ec) void ec_up(xlator_t *this, ec_t *ec) { + char str1[32], str2[32]; + if (ec->timer != NULL) { gf_timer_call_cancel(this->ctx, ec->timer); ec->timer = NULL; } ec->up = 1; - gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_UP, "Going UP"); + gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_UP, + "Going UP : Child UP = %s Child Notify = %s", + ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes), + ec_bin(str2, sizeof(str2), ec->xl_notify, ec->nodes)); gf_event(EVENT_EC_MIN_BRICKS_UP, "subvol=%s", this->name); } @@ -339,13 +344,18 @@ ec_up(xlator_t *this, ec_t *ec) void ec_down(xlator_t *this, ec_t *ec) { + char str1[32], str2[32]; + if (ec->timer != NULL) { gf_timer_call_cancel(this->ctx, ec->timer); ec->timer = NULL; } ec->up = 0; - gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_DOWN, "Going DOWN"); + gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_DOWN, + "Going DOWN : Child UP = %s Child Notify = %s", + ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes), + ec_bin(str2, sizeof(str2), ec->xl_notify, ec->nodes)); gf_event(EVENT_EC_MIN_BRICKS_NOT_UP, "subvol=%s", this->name); } @@ -700,6 +710,8 @@ ec_statistics_init(ec_t *ec) GF_ATOMIC_INIT(ec->stats.stripe_cache.evicts, 0); GF_ATOMIC_INIT(ec->stats.stripe_cache.allocs, 0); GF_ATOMIC_INIT(ec->stats.stripe_cache.errors, 0); + GF_ATOMIC_INIT(ec->stats.shd.attempted, 0); + GF_ATOMIC_INIT(ec->stats.shd.completed, 0); } static int @@ -1569,6 +1581,10 @@ ec_dump_private(xlator_t *this) GF_ATOMIC_GET(ec->stats.stripe_cache.allocs)); gf_proc_dump_write("errors", "%" GF_PRI_ATOMIC, GF_ATOMIC_GET(ec->stats.stripe_cache.errors)); + gf_proc_dump_write("heals-attempted", "%" GF_PRI_ATOMIC, + GF_ATOMIC_GET(ec->stats.shd.attempted)); + gf_proc_dump_write("heals-completed", "%" GF_PRI_ATOMIC, + GF_ATOMIC_GET(ec->stats.shd.completed)); return 0; } diff --git a/xlators/cluster/ec/src/ec.h b/xlators/cluster/ec/src/ec.h index 1b210d9adc1..6f6de6d5981 100644 --- a/xlators/cluster/ec/src/ec.h +++ b/xlators/cluster/ec/src/ec.h @@ -18,6 +18,7 @@ #define EC_XATTR_SIZE EC_XATTR_PREFIX "size" #define EC_XATTR_VERSION EC_XATTR_PREFIX "version" #define EC_XATTR_HEAL EC_XATTR_PREFIX "heal" +#define EC_XATTR_HEAL_NEW EC_XATTR_PREFIX "heal-new" #define EC_XATTR_DIRTY EC_XATTR_PREFIX "dirty" #define EC_STRIPE_CACHE_MAX_SIZE 10 #define EC_VERSION_SIZE 2 diff --git a/xlators/debug/error-gen/src/error-gen.c b/xlators/debug/error-gen/src/error-gen.c index ff993f7b5e5..d45655ef4c3 100644 --- a/xlators/debug/error-gen/src/error-gen.c +++ b/xlators/debug/error-gen/src/error-gen.c @@ -31,9 +31,9 @@ sys_error_t error_no_list[] = { [GF_FOP_LOOKUP] = {.error_no_count = 4, .error_no = {ENOENT, ENOTDIR, ENAMETOOLONG, EAGAIN}}, - [GF_FOP_STAT] = {.error_no_count = 7, - .error_no = {EACCES, EBADF, EFAULT, ENAMETOOLONG, ENOENT, - ENOMEM, ENOTDIR}}, + [GF_FOP_STAT] = {.error_no_count = 6, + .error_no = {EACCES, EFAULT, ENAMETOOLONG, ENOENT, ENOMEM, + ENOTDIR}}, [GF_FOP_READLINK] = {.error_no_count = 8, .error_no = {EACCES, EFAULT, EINVAL, EIO, ENAMETOOLONG, ENOENT, ENOMEM, ENOTDIR}}, @@ -79,21 +79,20 @@ sys_error_t error_no_list[] = { [GF_FOP_WRITE] = {.error_no_count = 7, .error_no = {EINVAL, EBADF, EFAULT, EISDIR, ENAMETOOLONG, ENOSPC, GF_ERROR_SHORT_WRITE}}, - [GF_FOP_STATFS] = {.error_no_count = 10, - .error_no = {EACCES, EBADF, EFAULT, EINTR, EIO, - ENAMETOOLONG, ENOENT, ENOMEM, ENOSYS, - ENOTDIR}}, + [GF_FOP_STATFS] = {.error_no_count = 9, + .error_no = {EACCES, EFAULT, EINTR, EIO, ENAMETOOLONG, + ENOENT, ENOMEM, ENOSYS, ENOTDIR}}, [GF_FOP_FLUSH] = {.error_no_count = 5, .error_no = {EACCES, EFAULT, ENAMETOOLONG, ENOSYS, ENOENT}}, [GF_FOP_FSYNC] = {.error_no_count = 4, .error_no = {EBADF, EIO, EROFS, EINVAL}}, - [GF_FOP_SETXATTR] = {.error_no_count = 4, - .error_no = {EACCES, EBADF, EINTR, ENAMETOOLONG}}, - [GF_FOP_GETXATTR] = {.error_no_count = 4, - .error_no = {EACCES, EBADF, ENAMETOOLONG, EINTR}}, - [GF_FOP_REMOVEXATTR] = {.error_no_count = 4, - .error_no = {EACCES, EBADF, ENAMETOOLONG, EINTR}}, + [GF_FOP_SETXATTR] = {.error_no_count = 3, + .error_no = {EACCES, EINTR, ENAMETOOLONG}}, + [GF_FOP_GETXATTR] = {.error_no_count = 3, + .error_no = {EACCES, ENAMETOOLONG, EINTR}}, + [GF_FOP_REMOVEXATTR] = {.error_no_count = 3, + .error_no = {EACCES, ENAMETOOLONG, EINTR}}, [GF_FOP_FSETXATTR] = {.error_no_count = 4, .error_no = {EACCES, EBADF, EINTR, ENAMETOOLONG}}, [GF_FOP_FGETXATTR] = {.error_no_count = 4, @@ -125,26 +124,25 @@ sys_error_t error_no_list[] = { ENOENT}}, [GF_FOP_FXATTROP] = {.error_no_count = 4, .error_no = {EBADF, EIO, EROFS, EINVAL}}, - [GF_FOP_INODELK] = {.error_no_count = 4, - .error_no = {EACCES, EBADF, EINTR, ENAMETOOLONG}}, + [GF_FOP_INODELK] = {.error_no_count = 3, + .error_no = {EACCES, EINTR, ENAMETOOLONG}}, [GF_FOP_FINODELK] = {.error_no_count = 4, .error_no = {EACCES, EBADF, EINTR, ENAMETOOLONG}}, - [GF_FOP_ENTRYLK] = {.error_no_count = 4, - .error_no = {EACCES, EBADF, ENAMETOOLONG, EINTR}}, + [GF_FOP_ENTRYLK] = {.error_no_count = 3, + .error_no = {EACCES, ENAMETOOLONG, EINTR}}, [GF_FOP_FENTRYLK] = {.error_no_count = 10, .error_no = {EACCES, EEXIST, EFAULT, EISDIR, EMFILE, ENAMETOOLONG, ENFILE, ENODEV, ENOENT, ENOMEM}}, - [GF_FOP_SETATTR] = {.error_no_count = 11, + [GF_FOP_SETATTR] = {.error_no_count = 10, .error_no = {EACCES, EFAULT, EIO, ENAMETOOLONG, ENOENT, - ENOMEM, ENOTDIR, EPERM, EROFS, EBADF, - EIO}}, + ENOMEM, ENOTDIR, EPERM, EROFS, EIO}}, [GF_FOP_FSETATTR] = {.error_no_count = 11, .error_no = {EACCES, EFAULT, EIO, ENAMETOOLONG, ENOENT, ENOMEM, ENOTDIR, EPERM, EROFS, EBADF, EIO}}, - [GF_FOP_GETSPEC] = {.error_no_count = 4, - .error_no = {EACCES, EBADF, ENAMETOOLONG, EINTR}}}; + [GF_FOP_GETSPEC] = {.error_no_count = 3, + .error_no = {EACCES, ENAMETOOLONG, EINTR}}}; int generate_rand_no(int op_no) @@ -1509,8 +1507,8 @@ init(xlator_t *this) this->private = pvt; - /* Give some seed value here */ - srand(time(NULL)); + /* Give some seed value here. */ + srand(gf_time()); ret = 0; out: diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c index 4e1f6e5af07..aa00c446e5a 100644 --- a/xlators/debug/io-stats/src/io-stats.c +++ b/xlators/debug/io-stats/src/io-stats.c @@ -135,7 +135,7 @@ struct ios_global_stats { gf_atomic_t block_count_read[IOS_BLOCK_COUNT_SIZE]; gf_atomic_t fop_hits[GF_FOP_MAXVALUE]; gf_atomic_t upcall_hits[GF_UPCALL_FLAGS_MAXVALUE]; - struct timeval started_at; + time_t started_at; struct ios_lat latency[GF_FOP_MAXVALUE]; uint64_t nr_opens; uint64_t max_nr_opens; @@ -292,9 +292,7 @@ is_fop_latency_started(call_frame_t *frame) begin = &frame->begin; \ end = &frame->end; \ \ - elapsed = ((end->tv_sec - begin->tv_sec) * 1e9 + \ - (end->tv_nsec - begin->tv_nsec)) / \ - 1000; \ + elapsed = gf_tsdiff(begin, end) / 1000.0; \ throughput = op_ret / elapsed; \ \ conf = this->private; \ @@ -678,10 +676,7 @@ ios_dump_throughput_stats(struct ios_stat_head *list_head, xlator_t *this, FILE *logfp, ios_stats_thru_t type) { struct ios_stat_list *entry = NULL; - struct timeval time = { - 0, - }; - char timestr[256] = { + char timestr[GF_TIMESTR_SIZE] = { 0, }; @@ -689,12 +684,9 @@ ios_dump_throughput_stats(struct ios_stat_head *list_head, xlator_t *this, { list_for_each_entry(entry, &list_head->iosstats->list, list) { - gf_time_fmt(timestr, sizeof timestr, - entry->iosstat->thru_counters[type].time.tv_sec, - gf_timefmt_FT); - snprintf(timestr + strlen(timestr), - sizeof timestr - strlen(timestr), ".%" GF_PRI_SUSECONDS, - time.tv_usec); + gf_time_fmt_tv(timestr, sizeof timestr, + &entry->iosstat->thru_counters[type].time, + gf_timefmt_FT); ios_log(this, logfp, "%s \t %-10.2f \t %s", timestr, entry->value, entry->iosstat->filename); @@ -773,9 +765,8 @@ err: int io_stats_dump_global_to_json_logfp(xlator_t *this, - struct ios_global_stats *stats, - struct timeval *now, int interval, - FILE *logfp) + struct ios_global_stats *stats, time_t now, + int interval, FILE *logfp) { int i = 0; int j = 0; @@ -801,10 +792,7 @@ io_stats_dump_global_to_json_logfp(xlator_t *this, }; dict_t *xattr = NULL; - interval_sec = ((now->tv_sec * 1000000.0 + now->tv_usec) - - (stats->started_at.tv_sec * 1000000.0 + - stats->started_at.tv_usec)) / - 1000000.0; + interval_sec = (double)(now - stats->started_at); conf = this->private; @@ -956,8 +944,8 @@ io_stats_dump_global_to_json_logfp(xlator_t *this, } if (interval == -1) { - ios_log(this, logfp, "\"%s.%s.uptime\": %" PRId64 ",", key_prefix, - str_prefix, (uint64_t)(now->tv_sec - stats->started_at.tv_sec)); + ios_log(this, logfp, "\"%s.%s.uptime\": %" PRIu64 ",", key_prefix, + str_prefix, (uint64_t)(now - stats->started_at)); ios_log(this, logfp, "\"%s.%s.bytes_read\": " "%" GF_PRI_ATOMIC ",", @@ -1209,14 +1197,14 @@ out: int io_stats_dump_global_to_logfp(xlator_t *this, struct ios_global_stats *stats, - struct timeval *now, int interval, FILE *logfp) + time_t now, int interval, FILE *logfp) { int i = 0; int per_line = 0; int index = 0; struct ios_stat_head *list_head = NULL; struct ios_conf *conf = NULL; - char timestr[256] = { + char timestr[GF_TIMESTR_SIZE] = { 0, }; char str_header[128] = {0}; @@ -1232,8 +1220,8 @@ io_stats_dump_global_to_logfp(xlator_t *this, struct ios_global_stats *stats, ios_log(this, logfp, "\n=== Cumulative stats ==="); else ios_log(this, logfp, "\n=== Interval %d stats ===", interval); - ios_log(this, logfp, " Duration : %" PRId64 " secs", - (uint64_t)(now->tv_sec - stats->started_at.tv_sec)); + ios_log(this, logfp, " Duration : %" PRIu64 " secs", + (uint64_t)(now - stats->started_at)); ios_log(this, logfp, " BytesRead : %" GF_PRI_ATOMIC, GF_ATOMIC_GET(stats->data_read)); ios_log(this, logfp, " BytesWritten : %" GF_PRI_ATOMIC "\n", @@ -1325,11 +1313,8 @@ io_stats_dump_global_to_logfp(xlator_t *this, struct ios_global_stats *stats, if (interval == -1) { LOCK(&conf->lock); { - gf_time_fmt(timestr, sizeof timestr, - conf->cumulative.max_openfd_time.tv_sec, gf_timefmt_FT); - snprintf(timestr + strlen(timestr), - sizeof timestr - strlen(timestr), ".%" GF_PRI_SUSECONDS, - conf->cumulative.max_openfd_time.tv_usec); + gf_time_fmt_tv(timestr, sizeof timestr, + &conf->cumulative.max_openfd_time, gf_timefmt_FT); ios_log(this, logfp, "Current open fd's: %" PRId64 " Max open fd's: %" PRId64 " time %s", @@ -1381,7 +1366,7 @@ io_stats_dump_global_to_logfp(xlator_t *this, struct ios_global_stats *stats, int io_stats_dump_global_to_dict(xlator_t *this, struct ios_global_stats *stats, - struct timeval *now, int interval, dict_t *dict) + time_t now, int interval, dict_t *dict) { int ret = 0; char key[64] = {0}; @@ -1407,7 +1392,7 @@ io_stats_dump_global_to_dict(xlator_t *this, struct ios_global_stats *stats, interval); snprintf(key, sizeof(key), "%d-duration", interval); - sec = (uint64_t)(now->tv_sec - stats->started_at.tv_sec); + sec = now - stats->started_at; ret = dict_set_uint64(dict, key, sec); if (ret) { gf_log(this->name, GF_LOG_ERROR, @@ -1530,9 +1515,8 @@ out: } int -io_stats_dump_global(xlator_t *this, struct ios_global_stats *stats, - struct timeval *now, int interval, - struct ios_dump_args *args) +io_stats_dump_global(xlator_t *this, struct ios_global_stats *stats, time_t now, + int interval, struct ios_dump_args *args) { int ret = -1; @@ -1590,13 +1574,13 @@ ios_dump_args_init(struct ios_dump_args *args, ios_dump_type_t type, } static void -ios_global_stats_clear(struct ios_global_stats *stats, struct timeval *now) +ios_global_stats_clear(struct ios_global_stats *stats, time_t now) { GF_ASSERT(stats); GF_ASSERT(now); memset(stats, 0, sizeof(*stats)); - stats->started_at = *now; + stats->started_at = now; } int @@ -1607,7 +1591,7 @@ io_stats_dump(xlator_t *this, struct ios_dump_args *args, ios_info_op_t op, struct ios_global_stats cumulative = {}; struct ios_global_stats incremental = {}; int increment = 0; - struct timeval now; + time_t now = 0; GF_ASSERT(this); GF_ASSERT(args); @@ -1615,8 +1599,8 @@ io_stats_dump(xlator_t *this, struct ios_dump_args *args, ios_info_op_t op, GF_ASSERT(args->type < IOS_DUMP_TYPE_MAX); conf = this->private; + now = gf_time(); - gettimeofday(&now, NULL); LOCK(&conf->lock); { if (op == GF_IOS_INFO_ALL || op == GF_IOS_INFO_CUMULATIVE) @@ -1629,17 +1613,17 @@ io_stats_dump(xlator_t *this, struct ios_dump_args *args, ios_info_op_t op, if (!is_peek) { increment = conf->increment++; - ios_global_stats_clear(&conf->incremental, &now); + ios_global_stats_clear(&conf->incremental, now); } } } UNLOCK(&conf->lock); if (op == GF_IOS_INFO_ALL || op == GF_IOS_INFO_CUMULATIVE) - io_stats_dump_global(this, &cumulative, &now, -1, args); + io_stats_dump_global(this, &cumulative, now, -1, args); if (op == GF_IOS_INFO_ALL || op == GF_IOS_INFO_INCREMENTAL) - io_stats_dump_global(this, &incremental, &now, increment, args); + io_stats_dump_global(this, &incremental, now, increment, args); return 0; } @@ -1649,9 +1633,8 @@ io_stats_dump_fd(xlator_t *this, struct ios_fd *iosfd) { struct ios_conf *conf = NULL; struct timeval now; - uint64_t sec = 0; - uint64_t usec = 0; int i = 0; + double usecs = 0; uint64_t data_read = 0; uint64_t data_written = 0; uint64_t block_count_read = 0; @@ -1666,23 +1649,15 @@ io_stats_dump_fd(xlator_t *this, struct ios_fd *iosfd) return 0; gettimeofday(&now, NULL); - - if (iosfd->opened_at.tv_usec > now.tv_usec) { - now.tv_usec += 1000000; - now.tv_usec--; - } - - sec = now.tv_sec - iosfd->opened_at.tv_sec; - usec = now.tv_usec - iosfd->opened_at.tv_usec; + usecs = gf_tvdiff(&iosfd->opened_at, &now); gf_log(this->name, GF_LOG_INFO, "--- fd stats ---"); if (iosfd->filename) gf_log(this->name, GF_LOG_INFO, " Filename : %s", iosfd->filename); - if (sec) - gf_log(this->name, GF_LOG_INFO, - " Lifetime : %" PRId64 "secs, %" PRId64 "usecs", sec, usec); + if (usecs) + gf_log(this->name, GF_LOG_INFO, " Lifetime : %lf secs", usecs); data_read = GF_ATOMIC_GET(iosfd->data_read); if (data_read) @@ -1785,9 +1760,7 @@ update_ios_latency(struct ios_conf *conf, call_frame_t *frame, begin = &frame->begin; end = &frame->end; - elapsed = ((end->tv_sec - begin->tv_sec) * 1e9 + - (end->tv_nsec - begin->tv_nsec)) / - 1000; + elapsed = gf_tsdiff(begin, end) / 1000.0; update_ios_latency_stats(&conf->cumulative, elapsed, op); update_ios_latency_stats(&conf->incremental, elapsed, op); @@ -1808,7 +1781,7 @@ io_stats_dump_stats_to_dict(xlator_t *this, dict_t *resp, struct ios_stat_list *entry = NULL; int ret = -1; ios_stats_thru_t index = IOS_STATS_THRU_MAX; - char timestr[256] = { + char timestr[GF_TIMESTR_SIZE] = { 0, }; char *dict_timestr = NULL; @@ -1827,14 +1800,9 @@ io_stats_dump_stats_to_dict(xlator_t *this, dict_t *resp, ret = dict_set_uint64(resp, "max-open", conf->cumulative.max_nr_opens); - gf_time_fmt(timestr, sizeof timestr, - conf->cumulative.max_openfd_time.tv_sec, - gf_timefmt_FT); - if (conf->cumulative.max_openfd_time.tv_sec) - snprintf(timestr + strlen(timestr), - sizeof timestr - strlen(timestr), - ".%" GF_PRI_SUSECONDS, - conf->cumulative.max_openfd_time.tv_usec); + gf_time_fmt_tv(timestr, sizeof timestr, + &conf->cumulative.max_openfd_time, + gf_timefmt_FT); dict_timestr = gf_strdup(timestr); if (!dict_timestr) @@ -3606,26 +3574,21 @@ ios_destroy_top_stats(struct ios_conf *conf) return; } -static int +static void io_stats_clear(struct ios_conf *conf) { - struct timeval now; - int ret = -1; + time_t now = 0; GF_ASSERT(conf); + now = gf_time(); - if (!gettimeofday(&now, NULL)) { - LOCK(&conf->lock); - { - ios_global_stats_clear(&conf->cumulative, &now); - ios_global_stats_clear(&conf->incremental, &now); - conf->increment = 0; - } - UNLOCK(&conf->lock); - ret = 0; + LOCK(&conf->lock); + { + ios_global_stats_clear(&conf->cumulative, now); + ios_global_stats_clear(&conf->incremental, now); + conf->increment = 0; } - - return ret; + UNLOCK(&conf->lock); } int32_t @@ -3866,7 +3829,7 @@ ios_conf_destroy(struct ios_conf *conf) _ios_destroy_dump_thread(conf); ios_destroy_sample_buf(conf->ios_sample_buf); LOCK_DESTROY(&conf->lock); - GF_FREE(conf->dnscache); + gf_dnscache_deinit(conf->dnscache); GF_FREE(conf); } @@ -3889,7 +3852,7 @@ ios_init_stats(struct ios_global_stats *stats) for (i = 0; i < GF_UPCALL_FLAGS_MAXVALUE; i++) GF_ATOMIC_INIT(stats->upcall_hits[i], 0); - gettimeofday(&stats->started_at, NULL); + stats->started_at = gf_time(); } int @@ -3978,11 +3941,14 @@ init(xlator_t *this) gf_log(this->name, GF_LOG_ERROR, "Out of memory."); goto out; } - ret = -1; GF_OPTION_INIT("ios-dnscache-ttl-sec", conf->ios_dnscache_ttl_sec, int32, out); conf->dnscache = gf_dnscache_init(conf->ios_dnscache_ttl_sec); + if (!conf->dnscache) { + ret = -1; + goto out; + } GF_OPTION_INIT("sys-log-level", sys_log_str, str, out); if (sys_log_str) { @@ -4133,12 +4099,9 @@ notify(xlator_t *this, int32_t event, void *data, ...) } if (GF_IOS_INFO_CLEAR == op) { - ret = io_stats_clear(this->private); - if (ret) - gf_log(this->name, GF_LOG_ERROR, - "Failed to clear info stats"); + io_stats_clear(this->private); - ret = dict_set_int32(output, "stats-cleared", ret ? 0 : 1); + ret = dict_set_int32(output, "stats-cleared", 1); if (ret) gf_log(this->name, GF_LOG_ERROR, "Failed to set stats-cleared" diff --git a/xlators/debug/trace/src/trace.c b/xlators/debug/trace/src/trace.c index 3db2e263524..6ed0ca00342 100644 --- a/xlators/debug/trace/src/trace.c +++ b/xlators/debug/trace/src/trace.c @@ -22,13 +22,13 @@ static void trace_stat_to_str(struct iatt *buf, char *str, size_t len) { - char atime_buf[200] = { + char atime_buf[GF_TIMESTR_SIZE] = { 0, }; - char mtime_buf[200] = { + char mtime_buf[GF_TIMESTR_SIZE] = { 0, }; - char ctime_buf[200] = { + char ctime_buf[GF_TIMESTR_SIZE] = { 0, }; @@ -64,7 +64,7 @@ trace_stat_to_str(struct iatt *buf, char *str, size_t len) int dump_history_trace(circular_buffer_t *cb, void *data) { - char timestr[256] = { + char timestr[GF_TIMESTR_SIZE] = { 0, }; @@ -72,9 +72,7 @@ dump_history_trace(circular_buffer_t *cb, void *data) gettimeofday () fails, it's safe to check tm and then dump the time at which the entry was added to the buffer */ - gf_time_fmt(timestr, sizeof timestr, cb->tv.tv_sec, gf_timefmt_Ymd_T); - snprintf(timestr + strlen(timestr), 256 - strlen(timestr), - ".%" GF_PRI_SUSECONDS, cb->tv.tv_usec); + gf_time_fmt_tv(timestr, sizeof timestr, &cb->tv, gf_timefmt_Ymd_T); gf_proc_dump_write("TIME", "%s", timestr); gf_proc_dump_write("FOP", "%s\n", (char *)cb->data); @@ -2209,10 +2207,10 @@ int trace_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf, int32_t valid, dict_t *xdata) { - char actime_str[256] = { + char actime_str[GF_TIMESTR_SIZE] = { 0, }; - char modtime_str[256] = { + char modtime_str[GF_TIMESTR_SIZE] = { 0, }; trace_conf_t *conf = NULL; @@ -2278,10 +2276,10 @@ int trace_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf, int32_t valid, dict_t *xdata) { - char actime_str[256] = { + char actime_str[GF_TIMESTR_SIZE] = { 0, }; - char modtime_str[256] = { + char modtime_str[GF_TIMESTR_SIZE] = { 0, }; trace_conf_t *conf = NULL; diff --git a/xlators/features/Makefile.am b/xlators/features/Makefile.am index 194634b003d..c57897f11ea 100644 --- a/xlators/features/Makefile.am +++ b/xlators/features/Makefile.am @@ -2,9 +2,13 @@ if BUILD_CLOUDSYNC CLOUDSYNC_DIR = cloudsync endif +if BUILD_METADISP + METADISP_DIR = metadisp +endif + SUBDIRS = locks quota read-only quiesce marker index barrier arbiter upcall \ compress changelog gfid-access snapview-client snapview-server trash \ shard bit-rot leases selinux sdfs namespace $(CLOUDSYNC_DIR) thin-arbiter \ - utime + utime $(METADISP_DIR) CLEANFILES = diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c index 34e20f9df11..5cef2ffa5e5 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c @@ -40,21 +40,21 @@ br_inc_scrubbed_file(br_scrub_stats_t *scrub_stat) } void -br_update_scrub_start_time(br_scrub_stats_t *scrub_stat, struct timeval *tv) +br_update_scrub_start_time(br_scrub_stats_t *scrub_stat, time_t time) { if (!scrub_stat) return; pthread_mutex_lock(&scrub_stat->lock); { - scrub_stat->scrub_start_tv.tv_sec = tv->tv_sec; + scrub_stat->scrub_start_time = time; } pthread_mutex_unlock(&scrub_stat->lock); } void br_update_scrub_finish_time(br_scrub_stats_t *scrub_stat, char *timestr, - struct timeval *tv) + time_t time) { int lst_size = 0; @@ -67,10 +67,10 @@ br_update_scrub_finish_time(br_scrub_stats_t *scrub_stat, char *timestr, pthread_mutex_lock(&scrub_stat->lock); { - scrub_stat->scrub_end_tv.tv_sec = tv->tv_sec; + scrub_stat->scrub_end_time = time; - scrub_stat->scrub_duration = scrub_stat->scrub_end_tv.tv_sec - - scrub_stat->scrub_start_tv.tv_sec; + scrub_stat->scrub_duration = scrub_stat->scrub_end_time - + scrub_stat->scrub_start_time; snprintf(scrub_stat->last_scrub_time, lst_size, "%s", timestr); } diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h index 24128b90a66..f022aa831eb 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h +++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h @@ -15,20 +15,22 @@ #include <sys/time.h> #include <pthread.h> +#include <glusterfs/common-utils.h> + struct br_scrub_stats { - uint64_t scrubbed_files; /* Total number of scrubbed file */ + uint64_t scrubbed_files; /* Total number of scrubbed files. */ - uint64_t unsigned_files; /* Total number of unsigned file */ + uint64_t unsigned_files; /* Total number of unsigned files. */ - uint64_t scrub_duration; /* Duration of last scrub */ + uint64_t scrub_duration; /* Duration of last scrub. */ - char last_scrub_time[1024]; /*last scrub completion time */ + char last_scrub_time[GF_TIMESTR_SIZE]; /* Last scrub completion time. */ - struct timeval scrub_start_tv; /* Scrubbing starting time*/ + time_t scrub_start_time; /* Scrubbing starting time. */ - struct timeval scrub_end_tv; /* Scrubbing finishing time */ + time_t scrub_end_time; /* Scrubbing finishing time. */ - int8_t scrub_running; /* Scrub running or not */ + int8_t scrub_running; /* Whether scrub running or not. */ pthread_mutex_t lock; }; @@ -40,9 +42,9 @@ br_inc_unsigned_file_count(br_scrub_stats_t *scrub_stat); void br_inc_scrubbed_file(br_scrub_stats_t *scrub_stat); void -br_update_scrub_start_time(br_scrub_stats_t *scrub_stat, struct timeval *tv); +br_update_scrub_start_time(br_scrub_stats_t *scrub_stat, time_t time); void br_update_scrub_finish_time(br_scrub_stats_t *scrub_stat, char *timestr, - struct timeval *tv); + time_t time); #endif /* __BIT_ROT_SCRUB_STATUS_H__ */ diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c index d20ecc7cdbe..289dd53f610 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c @@ -601,25 +601,23 @@ br_fsscan_deactivate(xlator_t *this) static void br_scrubber_log_time(xlator_t *this, const char *sfx) { - char timestr[1024] = { - 0, - }; - struct timeval tv = { + char timestr[GF_TIMESTR_SIZE] = { 0, }; br_private_t *priv = NULL; + time_t now = 0; + now = gf_time(); priv = this->private; - gettimeofday(&tv, NULL); - gf_time_fmt(timestr, sizeof(timestr), tv.tv_sec, gf_timefmt_FT); + gf_time_fmt(timestr, sizeof(timestr), now, gf_timefmt_FT); if (strcasecmp(sfx, "started") == 0) { - br_update_scrub_start_time(&priv->scrub_stat, &tv); + br_update_scrub_start_time(&priv->scrub_stat, now); gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_START, "Scrubbing %s at %s", sfx, timestr); } else { - br_update_scrub_finish_time(&priv->scrub_stat, timestr, &tv); + br_update_scrub_finish_time(&priv->scrub_stat, timestr, now); gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_FINISH, "Scrubbing %s at %s", sfx, timestr); } @@ -628,15 +626,13 @@ br_scrubber_log_time(xlator_t *this, const char *sfx) static void br_fsscanner_log_time(xlator_t *this, br_child_t *child, const char *sfx) { - char timestr[1024] = { - 0, - }; - struct timeval tv = { + char timestr[GF_TIMESTR_SIZE] = { 0, }; + time_t now = 0; - gettimeofday(&tv, NULL); - gf_time_fmt(timestr, sizeof(timestr), tv.tv_sec, gf_timefmt_FT); + now = gf_time(); + gf_time_fmt(timestr, sizeof(timestr), now, gf_timefmt_FT); if (strcasecmp(sfx, "started") == 0) { gf_msg_debug(this->name, 0, "Scrubbing \"%s\" %s at %s", @@ -919,10 +915,7 @@ br_fsscan_schedule(xlator_t *this) { uint32_t timo = 0; br_private_t *priv = NULL; - struct timeval tv = { - 0, - }; - char timestr[1024] = { + char timestr[GF_TIMESTR_SIZE] = { 0, }; struct br_scrubber *fsscrub = NULL; @@ -933,8 +926,7 @@ br_fsscan_schedule(xlator_t *this) fsscrub = &priv->fsscrub; scrub_monitor = &priv->scrub_monitor; - (void)gettimeofday(&tv, NULL); - scrub_monitor->boot = tv.tv_sec; + scrub_monitor->boot = gf_time(); timo = br_fsscan_calculate_timeout(fsscrub->frequency); if (timo == 0) { @@ -975,12 +967,10 @@ int32_t br_fsscan_activate(xlator_t *this) { uint32_t timo = 0; - char timestr[1024] = { - 0, - }; - struct timeval now = { + char timestr[GF_TIMESTR_SIZE] = { 0, }; + time_t now = 0; br_private_t *priv = NULL; struct br_scrubber *fsscrub = NULL; struct br_monitor *scrub_monitor = NULL; @@ -989,7 +979,7 @@ br_fsscan_activate(xlator_t *this) fsscrub = &priv->fsscrub; scrub_monitor = &priv->scrub_monitor; - (void)gettimeofday(&now, NULL); + now = gf_time(); timo = br_fsscan_calculate_timeout(fsscrub->frequency); if (timo == 0) { gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_ZERO_TIMEOUT_BUG, @@ -1003,7 +993,7 @@ br_fsscan_activate(xlator_t *this) } pthread_mutex_unlock(&scrub_monitor->donelock); - gf_time_fmt(timestr, sizeof(timestr), (now.tv_sec + timo), gf_timefmt_FT); + gf_time_fmt(timestr, sizeof(timestr), now + timo, gf_timefmt_FT); (void)gf_tw_mod_timer(priv->timer_wheel, scrub_monitor->timer, timo); _br_monitor_set_scrub_state(scrub_monitor, BR_SCRUB_STATE_PENDING); @@ -1020,12 +1010,10 @@ br_fsscan_reschedule(xlator_t *this) { int32_t ret = 0; uint32_t timo = 0; - char timestr[1024] = { - 0, - }; - struct timeval now = { + char timestr[GF_TIMESTR_SIZE] = { 0, }; + time_t now = 0; br_private_t *priv = NULL; struct br_scrubber *fsscrub = NULL; struct br_monitor *scrub_monitor = NULL; @@ -1037,7 +1025,7 @@ br_fsscan_reschedule(xlator_t *this) if (!fsscrub->frequency_reconf) return 0; - (void)gettimeofday(&now, NULL); + now = gf_time(); timo = br_fsscan_calculate_timeout(fsscrub->frequency); if (timo == 0) { gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_ZERO_TIMEOUT_BUG, @@ -1045,7 +1033,7 @@ br_fsscan_reschedule(xlator_t *this) return -1; } - gf_time_fmt(timestr, sizeof(timestr), (now.tv_sec + timo), gf_timefmt_FT); + gf_time_fmt(timestr, sizeof(timestr), now + timo, gf_timefmt_FT); pthread_mutex_lock(&scrub_monitor->donelock); { @@ -1073,23 +1061,19 @@ br_fsscan_ondemand(xlator_t *this) { int32_t ret = 0; uint32_t timo = 0; - char timestr[1024] = { - 0, - }; - struct timeval now = { + char timestr[GF_TIMESTR_SIZE] = { 0, }; + time_t now = 0; br_private_t *priv = NULL; struct br_monitor *scrub_monitor = NULL; priv = this->private; scrub_monitor = &priv->scrub_monitor; - (void)gettimeofday(&now, NULL); - + now = gf_time(); timo = BR_SCRUB_ONDEMAND; - - gf_time_fmt(timestr, sizeof(timestr), (now.tv_sec + timo), gf_timefmt_FT); + gf_time_fmt(timestr, sizeof(timestr), now + timo, gf_timefmt_FT); pthread_mutex_lock(&scrub_monitor->donelock); { @@ -1799,7 +1783,7 @@ br_collect_bad_objects_of_child(xlator_t *this, br_child_t *child, dict_t *dict, tmp_count = total_count; for (j = 0; j < count; j++) { - len = snprintf(key, PATH_MAX, "quarantine-%d", j); + len = snprintf(key, sizeof(key), "quarantine-%d", j); ret = dict_get_strn(child_dict, key, len, &entry); if (ret) continue; @@ -1810,7 +1794,7 @@ br_collect_bad_objects_of_child(xlator_t *this, br_child_t *child, dict_t *dict, if ((len < 0) || (len >= PATH_MAX)) { continue; } - snprintf(main_key, PATH_MAX, "quarantine-%d", tmp_count); + snprintf(main_key, sizeof(main_key), "quarantine-%d", tmp_count); ret = dict_set_dynstr_with_alloc(dict, main_key, tmp); if (!ret) diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h index 8d2b7f051da..6c15a166f18 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h +++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h @@ -44,7 +44,8 @@ GLFS_MSGID(BITROT_STUB, BRS_MSG_NO_MEMORY, BRS_MSG_SET_EVENT_FAILED, BRS_MSG_NON_BITD_PID, BRS_MSG_SIGN_PREPARE_FAIL, BRS_MSG_USING_DEFAULT_THREAD_SIZE, BRS_MSG_ALLOC_MEM_FAILED, BRS_MSG_DICT_ALLOC_FAILED, BRS_MSG_CREATE_GF_DIRENT_FAILED, - BRS_MSG_ALLOC_FAILED, BRS_MSG_PATH_XATTR_GET_FAILED); + BRS_MSG_ALLOC_FAILED, BRS_MSG_PATH_XATTR_GET_FAILED, + BRS_MSG_VERSION_PREPARE_FAIL); #define BRS_MSG_MEM_ACNT_FAILED_STR "Memory accounting init failed" #define BRS_MSG_BAD_OBJ_THREAD_FAIL_STR "pthread_init failed" @@ -68,6 +69,8 @@ GLFS_MSGID(BITROT_STUB, BRS_MSG_NO_MEMORY, BRS_MSG_SET_EVENT_FAILED, "daemon. Unwinding the fop" #define BRS_MSG_SIGN_PREPARE_FAIL_STR \ "failed to prepare the signature. Unwinding the fop" +#define BRS_MSG_VERSION_PREPARE_FAIL_STR \ + "failed to prepare the version. Unwinding the fop" #define BRS_MSG_STUB_ALLOC_FAILED_STR "failed to allocate stub fop, Unwinding" #define BRS_MSG_BAD_OBJ_MARK_FAIL_STR "failed to mark object as bad" #define BRS_MSG_NON_SCRUB_BAD_OBJ_MARK_STR \ diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c index 605a5e4c3e4..447dd47ff41 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c +++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c @@ -424,8 +424,8 @@ br_stub_prepare_version_request(xlator_t *this, dict_t *dict, priv = this->private; br_set_ongoingversion(obuf, oversion, priv->boot); - return dict_set_static_bin(dict, BITROT_CURRENT_VERSION_KEY, (void *)obuf, - sizeof(br_version_t)); + return dict_set_bin(dict, BITROT_CURRENT_VERSION_KEY, (void *)obuf, + sizeof(br_version_t)); } static int @@ -436,8 +436,7 @@ br_stub_prepare_signing_request(dict_t *dict, br_signature_t *sbuf, br_set_signature(sbuf, sign, signaturelen, &size); - return dict_set_static_bin(dict, BITROT_SIGNING_VERSION_KEY, (void *)sbuf, - size); + return dict_set_bin(dict, BITROT_SIGNING_VERSION_KEY, (void *)sbuf, size); } /** @@ -854,23 +853,27 @@ br_stub_perform_incversioning(xlator_t *this, call_frame_t *frame, op_errno = ENOMEM; dict = dict_new(); if (!dict) - goto done; + goto out; ret = br_stub_alloc_versions(&obuf, NULL, 0); - if (ret) - goto dealloc_dict; + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ALLOC_MEM_FAILED, + "gfid=%s", uuid_utoa(fd->inode->gfid), NULL); + goto out; + } ret = br_stub_prepare_version_request(this, dict, obuf, writeback_version); - if (ret) - goto dealloc_versions; + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_VERSION_PREPARE_FAIL, + "gfid=%s", uuid_utoa(fd->inode->gfid), NULL); + br_stub_dealloc_versions(obuf); + goto out; + } ret = br_stub_fd_versioning( this, frame, stub, dict, fd, br_stub_fd_incversioning_cbk, writeback_version, BR_STUB_INCREMENTAL_VERSIONING, !WRITEBACK_DURABLE); - -dealloc_versions: - br_stub_dealloc_versions(obuf); -dealloc_dict: - dict_unref(dict); -done: +out: + if (dict) + dict_unref(dict); if (ret) { if (local) frame->local = NULL; @@ -1025,31 +1028,36 @@ static int br_stub_prepare_signature(xlator_t *this, dict_t *dict, inode_t *inode, br_isignature_t *sign, int *fakesuccess) { - int32_t ret = 0; + int32_t ret = -1; size_t signaturelen = 0; br_signature_t *sbuf = NULL; if (!br_is_signature_type_valid(sign->signaturetype)) - goto error_return; + goto out; signaturelen = sign->signaturelen; ret = br_stub_alloc_versions(NULL, &sbuf, signaturelen); - if (ret) - goto error_return; + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ALLOC_MEM_FAILED, + "gfid=%s", uuid_utoa(inode->gfid), NULL); + ret = -1; + goto out; + } ret = br_stub_prepare_signing_request(dict, sbuf, sign, signaturelen); - if (ret) - goto dealloc_versions; + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_SIGN_PREPARE_FAIL, + "gfid=%s", uuid_utoa(inode->gfid), NULL); + ret = -1; + br_stub_dealloc_versions(sbuf); + goto out; + } + /* At this point sbuf has been added to dict, so the memory will be freed + * when the data from the dict is destroyed + */ ret = br_stub_compare_sign_version(this, inode, sbuf, dict, fakesuccess); - if (ret) - goto dealloc_versions; - - return 0; - -dealloc_versions: - br_stub_dealloc_versions(sbuf); -error_return: - return -1; +out: + return ret; } static void diff --git a/xlators/features/changelog/src/changelog-helpers.c b/xlators/features/changelog/src/changelog-helpers.c index 71fe1f032a0..e561997d858 100644 --- a/xlators/features/changelog/src/changelog-helpers.c +++ b/xlators/features/changelog/src/changelog-helpers.c @@ -242,8 +242,7 @@ changelog_write(int fd, char *buffer, size_t len) } int -htime_update(xlator_t *this, changelog_priv_t *priv, unsigned long ts, - char *buffer) +htime_update(xlator_t *this, changelog_priv_t *priv, time_t ts, char *buffer) { char changelog_path[PATH_MAX + 1] = { 0, @@ -273,7 +272,7 @@ htime_update(xlator_t *this, changelog_priv_t *priv, unsigned long ts, goto out; } - len = snprintf(x_value, sizeof(x_value), "%lu:%d", ts, + len = snprintf(x_value, sizeof(x_value), "%ld:%d", ts, priv->rollover_count); if (len >= sizeof(x_value)) { ret = -1; @@ -382,8 +381,7 @@ out: } static int -changelog_rollover_changelog(xlator_t *this, changelog_priv_t *priv, - unsigned long ts) +changelog_rollover_changelog(xlator_t *this, changelog_priv_t *priv, time_t ts) { int ret = -1; int notify = 0; @@ -421,16 +419,14 @@ changelog_rollover_changelog(xlator_t *this, changelog_priv_t *priv, priv->changelog_fd = -1; } - time_t time = (time_t)ts; - - /* Get GMT time */ - gmt = gmtime(&time); + /* Get GMT time. */ + gmt = gmtime(&ts); strftime(yyyymmdd, sizeof(yyyymmdd), "%Y/%m/%d", gmt); (void)snprintf(ofile, PATH_MAX, "%s/" CHANGELOG_FILE_NAME, priv->changelog_dir); - (void)snprintf(nfile, PATH_MAX, "%s/%s/" CHANGELOG_FILE_NAME ".%lu", + (void)snprintf(nfile, PATH_MAX, "%s/%s/" CHANGELOG_FILE_NAME ".%ld", priv->changelog_dir, yyyymmdd, ts); (void)snprintf(nfile_dir, PATH_MAX, "%s/%s", priv->changelog_dir, yyyymmdd); @@ -593,7 +589,7 @@ out: * returns -1 on failure or error */ int -htime_open(xlator_t *this, changelog_priv_t *priv, unsigned long ts) +htime_open(xlator_t *this, changelog_priv_t *priv, time_t ts) { int ht_file_fd = -1; int ht_dir_fd = -1; @@ -723,7 +719,7 @@ out: * returns -1 on failure or error */ int -htime_create(xlator_t *this, changelog_priv_t *priv, unsigned long ts) +htime_create(xlator_t *this, changelog_priv_t *priv, time_t ts) { int ht_file_fd = -1; int ht_dir_fd = -1; @@ -741,12 +737,12 @@ htime_create(xlator_t *this, changelog_priv_t *priv, unsigned long ts) int32_t len = 0; gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_NEW_HTIME_FILE, - "name=%lu", ts, NULL); + "name=%ld", ts, NULL); CHANGELOG_FILL_HTIME_DIR(priv->changelog_dir, ht_dir_path); /* get the htime file name in ht_file_path */ - len = snprintf(ht_file_path, PATH_MAX, "%s/%s.%lu", ht_dir_path, + len = snprintf(ht_file_path, PATH_MAX, "%s/%s.%ld", ht_dir_path, HTIME_FILE_NAME, ts); if ((len < 0) || (len >= PATH_MAX)) { ret = -1; @@ -792,7 +788,7 @@ htime_create(xlator_t *this, changelog_priv_t *priv, unsigned long ts) goto out; } - (void)snprintf(ht_file_bname, sizeof(ht_file_bname), "%s.%lu", + (void)snprintf(ht_file_bname, sizeof(ht_file_bname), "%s.%ld", HTIME_FILE_NAME, ts); if (sys_fsetxattr(ht_dir_fd, HTIME_CURRENT, ht_file_bname, strlen(ht_file_bname), 0)) { @@ -963,8 +959,8 @@ out: } int -changelog_start_next_change(xlator_t *this, changelog_priv_t *priv, - unsigned long ts, gf_boolean_t finale) +changelog_start_next_change(xlator_t *this, changelog_priv_t *priv, time_t ts, + gf_boolean_t finale) { int ret = -1; @@ -985,21 +981,12 @@ changelog_entry_length() return sizeof(changelog_log_data_t); } -int +void changelog_fill_rollover_data(changelog_log_data_t *cld, gf_boolean_t is_last) { - struct timeval tv = { - 0, - }; - cld->cld_type = CHANGELOG_TYPE_ROLLOVER; - - if (gettimeofday(&tv, NULL)) - return -1; - - cld->cld_roll_time = (unsigned long)tv.tv_sec; + cld->cld_roll_time = gf_time(); cld->cld_finale = is_last; - return 0; } int @@ -1274,7 +1261,7 @@ changelog_rollover(void *data) while (1) { (void)pthread_testcancel(); - tv.tv_sec = time(NULL) + priv->rollover_time; + tv.tv_sec = gf_time() + priv->rollover_time; tv.tv_nsec = 0; ret = 0; /* Reset ret to zero */ @@ -1355,12 +1342,7 @@ changelog_rollover(void *data) if (priv->explicit_rollover == _gf_true) sleep(1); - ret = changelog_fill_rollover_data(&cld, _gf_false); - if (ret) { - gf_smsg(this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_ROLLOVER_DATA_FILL_FAILED, NULL); - continue; - } + changelog_fill_rollover_data(&cld, _gf_false); _mask_cancellation(); diff --git a/xlators/features/changelog/src/changelog-helpers.h b/xlators/features/changelog/src/changelog-helpers.h index 0d06d98c9e1..38fa7590c32 100644 --- a/xlators/features/changelog/src/changelog-helpers.h +++ b/xlators/features/changelog/src/changelog-helpers.h @@ -31,7 +31,7 @@ */ typedef struct changelog_log_data { /* rollover related */ - unsigned long cld_roll_time; + time_t cld_roll_time; /* reopen changelog? */ gf_boolean_t cld_finale; @@ -97,12 +97,6 @@ struct changelog_encoder { typedef struct changelog_time_slice { /** - * just in case we need nanosecond granularity some day. - * field is unused as of now (maybe we'd need it later). - */ - struct timeval tv_start; - - /** * version of changelog file, incremented each time changes * rollover. */ @@ -423,11 +417,11 @@ changelog_local_t * changelog_local_init(xlator_t *this, inode_t *inode, uuid_t gfid, int xtra_records, gf_boolean_t update_flag); int -changelog_start_next_change(xlator_t *this, changelog_priv_t *priv, - unsigned long ts, gf_boolean_t finale); +changelog_start_next_change(xlator_t *this, changelog_priv_t *priv, time_t ts, + gf_boolean_t finale); int changelog_open_journal(xlator_t *this, changelog_priv_t *priv); -int +void changelog_fill_rollover_data(changelog_log_data_t *cld, gf_boolean_t is_last); int changelog_inject_single_event(xlator_t *this, changelog_priv_t *priv, @@ -451,12 +445,11 @@ changelog_fsync_thread(void *data); int changelog_forget(xlator_t *this, inode_t *inode); int -htime_update(xlator_t *this, changelog_priv_t *priv, unsigned long ts, - char *buffer); +htime_update(xlator_t *this, changelog_priv_t *priv, time_t ts, char *buffer); int -htime_open(xlator_t *this, changelog_priv_t *priv, unsigned long ts); +htime_open(xlator_t *this, changelog_priv_t *priv, time_t ts); int -htime_create(xlator_t *this, changelog_priv_t *priv, unsigned long ts); +htime_create(xlator_t *this, changelog_priv_t *priv, time_t ts); /* Geo-Rep snapshot dependency changes */ void diff --git a/xlators/features/changelog/src/changelog-messages.h b/xlators/features/changelog/src/changelog-messages.h index 4dd56b8ee97..cb0e16c85d8 100644 --- a/xlators/features/changelog/src/changelog-messages.h +++ b/xlators/features/changelog/src/changelog-messages.h @@ -59,12 +59,12 @@ GLFS_MSGID( CHANGELOG_MSG_NO_HTIME_CURRENT, CHANGELOG_MSG_HTIME_CURRENT, CHANGELOG_MSG_NEW_HTIME_FILE, CHANGELOG_MSG_MKDIR_ERROR, CHANGELOG_MSG_PATH_NOT_FOUND, CHANGELOG_MSG_XATTR_INIT_FAILED, - CHANGELOG_MSG_WROTE_TO_CSNAP, CHANGELOG_MSG_ROLLOVER_DATA_FILL_FAILED, + CHANGELOG_MSG_WROTE_TO_CSNAP, CHANGELOG_MSG_UNUSED_0, CHANGELOG_MSG_GET_BUFFER_FAILED, CHANGELOG_MSG_BARRIER_STATE_NOTIFY, CHANGELOG_MSG_BARRIER_DISABLED, CHANGELOG_MSG_BARRIER_ALREADY_DISABLED, CHANGELOG_MSG_BARRIER_ON_ERROR, CHANGELOG_MSG_BARRIER_ENABLE, CHANGELOG_MSG_BARRIER_KEY_NOT_FOUND, CHANGELOG_MSG_ERROR_IN_DICT_GET, - CHANGELOG_MSG_GET_TIME_FAILURE, CHANGELOG_MSG_HTIME_FETCH_FAILED, + CHANGELOG_MSG_UNUSED_1, CHANGELOG_MSG_UNUSED_2, CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS, CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS_FINISHED, CHANGELOG_MSG_BARRIER_TIMEOUT, CHANGELOG_MSG_TIMEOUT_ADD_FAILED, @@ -123,8 +123,6 @@ GLFS_MSGID( #define CHANGELOG_MSG_GET_TIME_OP_FAILED_STR "Problem rolling over changelog(s)" #define CHANGELOG_MSG_BARRIER_INFO_STR "Explicit wakeup on barrier notify" #define CHANGELOG_MSG_SELECT_FAILED_STR "pthread_cond_timedwait failed" -#define CHANGELOG_MSG_ROLLOVER_DATA_FILL_FAILED_STR \ - "failed to fill rollover data" #define CHANGELOG_MSG_INJECT_FSYNC_FAILED_STR "failed to inject fsync event" #define CHANGELOG_MSG_LOCAL_INIT_FAILED_STR \ "changelog local initialization failed" @@ -144,9 +142,7 @@ GLFS_MSGID( #define CHANGELOG_MSG_BARRIER_KEY_NOT_FOUND_STR "barrier key not found" #define CHANGELOG_MSG_ERROR_IN_DICT_GET_STR \ "Something went wrong in dict_get_str_boolean" -#define CHANGELOG_MSG_GET_TIME_FAILURE_STR "gettimeofday() failure" #define CHANGELOG_MSG_DIR_OPTIONS_NOT_SET_STR "changelog-dir option is not set" -#define CHANGELOG_MSG_HTIME_FETCH_FAILED_STR "unable to fetch htime" #define CHANGELOG_MSG_FREEUP_FAILED_STR "could not cleanup bootstrapper" #define CHANGELOG_MSG_CHILD_MISCONFIGURED_STR \ "translator needs a single subvolume" diff --git a/xlators/features/changelog/src/changelog.c b/xlators/features/changelog/src/changelog.c index 37916f40882..6a6e5af859e 100644 --- a/xlators/features/changelog/src/changelog.c +++ b/xlators/features/changelog/src/changelog.c @@ -2036,20 +2036,20 @@ notify(xlator_t *this, int event, void *data, ...) priv->notify_down = _gf_true; } UNLOCK(&priv->lock); - list_for_each_entry_safe(listener, next, &priv->rpc->listeners, - list) - { - if (listener->trans) { - rpc_transport_unref(listener->trans); + if (priv->rpc) { + list_for_each_entry_safe(listener, next, + &priv->rpc->listeners, list) + { + if (listener->trans) { + rpc_transport_unref(listener->trans); + } } + rpcsvc_destroy(priv->rpc); + priv->rpc = NULL; } CHANGELOG_MAKE_SOCKET_PATH(priv->changelog_brick, sockfile, UNIX_PATH_MAX); sys_unlink(sockfile); - if (priv->rpc) { - rpcsvc_destroy(priv->rpc); - priv->rpc = NULL; - } if (!cleanup_notify) default_notify(this, GF_EVENT_PARENT_DOWN, data); } @@ -2252,23 +2252,11 @@ static int changelog_init(xlator_t *this, changelog_priv_t *priv) { int i = 0; - int ret = -1; - struct timeval tv = { - 0, - }; + int ret = 0; changelog_log_data_t cld = { 0, }; - ret = gettimeofday(&tv, NULL); - if (ret) { - gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_GET_TIME_FAILURE, - NULL); - goto out; - } - - priv->slice.tv_start = tv; - priv->maps[CHANGELOG_TYPE_DATA] = "D "; priv->maps[CHANGELOG_TYPE_METADATA] = "M "; priv->maps[CHANGELOG_TYPE_METADATA_XATTR] = "M "; @@ -2287,9 +2275,7 @@ changelog_init(xlator_t *this, changelog_priv_t *priv) * in case there was an encoding change. so... things are kept * simple here. */ - ret = changelog_fill_rollover_data(&cld, _gf_false); - if (ret) - goto out; + changelog_fill_rollover_data(&cld, _gf_false); ret = htime_open(this, priv, cld.cld_roll_time); /* call htime open with cld's rollover_time */ @@ -2470,9 +2456,6 @@ reconfigure(xlator_t *this, dict_t *options) char csnap_dir[PATH_MAX] = { 0, }; - struct timeval tv = { - 0, - }; uint32_t timeout = 0; priv = this->private; @@ -2564,9 +2547,7 @@ reconfigure(xlator_t *this, dict_t *options) out); if (active_now || active_earlier) { - ret = changelog_fill_rollover_data(&cld, !active_now); - if (ret) - goto out; + changelog_fill_rollover_data(&cld, !active_now); slice = &priv->slice; @@ -2585,13 +2566,7 @@ reconfigure(xlator_t *this, dict_t *options) if (!active_earlier) { gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_RECONFIGURE, NULL); - if (gettimeofday(&tv, NULL)) { - gf_smsg(this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_HTIME_FETCH_FAILED, NULL); - ret = -1; - goto out; - } - htime_create(this, priv, tv.tv_sec); + htime_create(this, priv, gf_time()); } ret = changelog_spawn_helper_threads(this, priv); } diff --git a/xlators/features/cloudsync/src/Makefile.am b/xlators/features/cloudsync/src/Makefile.am index 0c3966c968b..e2a277e372b 100644 --- a/xlators/features/cloudsync/src/Makefile.am +++ b/xlators/features/cloudsync/src/Makefile.am @@ -21,9 +21,9 @@ cloudsync_la_SOURCES = $(cloudsync_sources) $(cloudsynccommon_sources) nodist_cloudsync_la_SOURCES = cloudsync-autogen-fops.c cloudsync-autogen-fops.h BUILT_SOURCES = cloudsync-autogen-fops.h -cloudsync_la_LDFLAGS = $(LIB_DL) -module $(GF_XLATOR_DEFAULT_LDFLAGS) +cloudsync_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) -cloudsync_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la +cloudsync_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(LIB_DL) AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \ -DCS_PLUGINDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/cloudsync-plugins\" diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c index 7680260988b..23c3599825a 100644 --- a/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c +++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c @@ -237,7 +237,7 @@ aws_form_request(char *resource, char **date, char *reqtype, char *bucketid, int date_len = -1; int res_len = -1; - ctime = time(NULL); + ctime = gf_time(); gtime = gmtime(&ctime); date_len = strftime(httpdate, sizeof(httpdate), diff --git a/xlators/features/index/src/index.c b/xlators/features/index/src/index.c index 4ece7ff6fc8..4abb2c73ce5 100644 --- a/xlators/features/index/src/index.c +++ b/xlators/features/index/src/index.c @@ -2104,7 +2104,7 @@ index_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) worker_enqueue(this, stub); return 0; normal: - ret = dict_get_str(xattr_req, "link-count", &flag); + ret = dict_get_str_sizen(xattr_req, "link-count", &flag); if ((ret == 0) && (strcmp(flag, GF_XATTROP_INDEX_COUNT) == 0)) { STACK_WIND(frame, index_lookup_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, loc, xattr_req); @@ -2592,7 +2592,7 @@ notify(xlator_t *this, int event, void *data, ...) if ((event == GF_EVENT_PARENT_DOWN) && victim->cleanup_starting) { stub_cnt = GF_ATOMIC_GET(priv->stub_cnt); - clock_gettime(CLOCK_REALTIME, &sleep_till); + timespec_now_realtime(&sleep_till); sleep_till.tv_sec += 1; /* Wait for draining stub from queue before notify PARENT_DOWN */ diff --git a/xlators/features/leases/src/leases-internal.c b/xlators/features/leases/src/leases-internal.c index 67fdd53cee2..56dee244281 100644 --- a/xlators/features/leases/src/leases-internal.c +++ b/xlators/features/leases/src/leases-internal.c @@ -897,7 +897,7 @@ __recall_lease(xlator_t *this, lease_inode_ctx_t *lease_ctx) } priv = this->private; - recall_time = time(NULL); + recall_time = gf_time(); list_for_each_entry_safe(lease_entry, tmp, &lease_ctx->lease_id_list, lease_id_list) { @@ -1367,7 +1367,7 @@ expired_recall_cleanup(void *data) gf_msg_debug(this->name, 0, "Started the expired_recall_cleanup thread"); while (1) { - time_now = time(NULL); + time_now = gf_time(); pthread_mutex_lock(&priv->mutex); { if (priv->fini) { diff --git a/xlators/features/locks/src/clear.c b/xlators/features/locks/src/clear.c index 116aed68690..ab1eac68a53 100644 --- a/xlators/features/locks/src/clear.c +++ b/xlators/features/locks/src/clear.c @@ -181,9 +181,9 @@ clrlk_clear_posixlk(xlator_t *this, pl_inode_t *pl_inode, clrlk_args *args, if (plock->blocked) { bcount++; pl_trace_out(this, plock->frame, NULL, NULL, F_SETLKW, - &plock->user_flock, -1, EAGAIN, NULL); + &plock->user_flock, -1, EINTR, NULL); - STACK_UNWIND_STRICT(lk, plock->frame, -1, EAGAIN, + STACK_UNWIND_STRICT(lk, plock->frame, -1, EINTR, &plock->user_flock, NULL); } else { diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c index 4c6b78c2372..a2c6be93e03 100644 --- a/xlators/features/locks/src/common.c +++ b/xlators/features/locks/src/common.c @@ -460,11 +460,16 @@ pl_inode_get(xlator_t *this, inode_t *inode, pl_local_t *local) INIT_LIST_HEAD(&pl_inode->blocked_calls); INIT_LIST_HEAD(&pl_inode->metalk_list); INIT_LIST_HEAD(&pl_inode->queued_locks); + INIT_LIST_HEAD(&pl_inode->waiting); gf_uuid_copy(pl_inode->gfid, inode->gfid); pl_inode->check_mlock_info = _gf_true; pl_inode->mlock_enforced = _gf_false; + /* -2 means never looked up. -1 means something went wrong and link + * tracking is disabled. */ + pl_inode->links = -2; + ret = __inode_ctx_put(inode, this, (uint64_t)(long)(pl_inode)); if (ret) { pthread_mutex_destroy(&pl_inode->mutex); @@ -600,13 +605,11 @@ static void __insert_lock(pl_inode_t *pl_inode, posix_lock_t *lock) { if (lock->blocked) - gettimeofday(&lock->blkd_time, NULL); + lock->blkd_time = gf_time(); else - gettimeofday(&lock->granted_time, NULL); + lock->granted_time = gf_time(); list_add_tail(&lock->list, &pl_inode->ext_list); - - return; } /* Return true if the locks overlap, false otherwise */ @@ -1290,3 +1293,299 @@ pl_is_lk_owner_valid(gf_lkowner_t *owner, client_t *client) } return _gf_true; } + +static int32_t +pl_inode_from_loc(loc_t *loc, inode_t **pinode) +{ + inode_t *inode = NULL; + int32_t error = 0; + + if (loc->inode != NULL) { + inode = inode_ref(loc->inode); + goto done; + } + + if (loc->parent == NULL) { + error = EINVAL; + goto done; + } + + if (!gf_uuid_is_null(loc->gfid)) { + inode = inode_find(loc->parent->table, loc->gfid); + if (inode != NULL) { + goto done; + } + } + + if (loc->name == NULL) { + error = EINVAL; + goto done; + } + + inode = inode_grep(loc->parent->table, loc->parent, loc->name); + if (inode == NULL) { + /* We haven't found any inode. This means that the file doesn't exist + * or that even if it exists, we don't have any knowledge about it, so + * we don't have locks on it either, which is fine for our purposes. */ + goto done; + } + +done: + *pinode = inode; + + return error; +} + +static gf_boolean_t +pl_inode_has_owners(xlator_t *xl, client_t *client, pl_inode_t *pl_inode, + struct timespec *now, struct list_head *contend) +{ + pl_dom_list_t *dom; + pl_inode_lock_t *lock; + gf_boolean_t has_owners = _gf_false; + + list_for_each_entry(dom, &pl_inode->dom_list, inode_list) + { + list_for_each_entry(lock, &dom->inodelk_list, list) + { + /* If the lock belongs to the same client, we assume it's related + * to the same operation, so we allow the removal to continue. */ + if (lock->client == client) { + continue; + } + /* If the lock belongs to an internal process, we don't block the + * removal. */ + if (lock->client_pid < 0) { + continue; + } + if (contend == NULL) { + return _gf_true; + } + has_owners = _gf_true; + inodelk_contention_notify_check(xl, lock, now, contend); + } + } + + return has_owners; +} + +int32_t +pl_inode_remove_prepare(xlator_t *xl, call_frame_t *frame, loc_t *loc, + pl_inode_t **ppl_inode, struct list_head *contend) +{ + struct timespec now; + inode_t *inode; + pl_inode_t *pl_inode; + int32_t error; + + pl_inode = NULL; + + error = pl_inode_from_loc(loc, &inode); + if ((error != 0) || (inode == NULL)) { + goto done; + } + + pl_inode = pl_inode_get(xl, inode, NULL); + if (pl_inode == NULL) { + inode_unref(inode); + error = ENOMEM; + goto done; + } + + /* pl_inode_from_loc() already increments ref count for inode, so + * we only assign here our reference. */ + pl_inode->inode = inode; + + timespec_now(&now); + + pthread_mutex_lock(&pl_inode->mutex); + + if (pl_inode->removed) { + error = ESTALE; + goto unlock; + } + + if (pl_inode_has_owners(xl, frame->root->client, pl_inode, &now, contend)) { + error = -1; + /* We skip the unlock here because the caller must create a stub when + * we return -1 and do a call to pl_inode_remove_complete(), which + * assumes the lock is still acquired and will release it once + * everything else is prepared. */ + goto done; + } + + pl_inode->is_locked = _gf_true; + pl_inode->remove_running++; + +unlock: + pthread_mutex_unlock(&pl_inode->mutex); + +done: + *ppl_inode = pl_inode; + + return error; +} + +int32_t +pl_inode_remove_complete(xlator_t *xl, pl_inode_t *pl_inode, call_stub_t *stub, + struct list_head *contend) +{ + pl_inode_lock_t *lock; + int32_t error = -1; + + if (stub != NULL) { + list_add_tail(&stub->list, &pl_inode->waiting); + pl_inode->is_locked = _gf_true; + } else { + error = ENOMEM; + + while (!list_empty(contend)) { + lock = list_first_entry(contend, pl_inode_lock_t, list); + list_del_init(&lock->list); + __pl_inodelk_unref(lock); + } + } + + pthread_mutex_unlock(&pl_inode->mutex); + + if (error < 0) { + inodelk_contention_notify(xl, contend); + } + + inode_unref(pl_inode->inode); + + return error; +} + +void +pl_inode_remove_wake(struct list_head *list) +{ + call_stub_t *stub; + + while (!list_empty(list)) { + stub = list_first_entry(list, call_stub_t, list); + list_del_init(&stub->list); + + call_resume(stub); + } +} + +void +pl_inode_remove_cbk(xlator_t *xl, pl_inode_t *pl_inode, int32_t error) +{ + struct list_head contend, granted; + struct timespec now; + pl_dom_list_t *dom; + + if (pl_inode == NULL) { + return; + } + + INIT_LIST_HEAD(&contend); + INIT_LIST_HEAD(&granted); + timespec_now(&now); + + pthread_mutex_lock(&pl_inode->mutex); + + if (error == 0) { + if (pl_inode->links >= 0) { + pl_inode->links--; + } + if (pl_inode->links == 0) { + pl_inode->removed = _gf_true; + } + } + + pl_inode->remove_running--; + + if ((pl_inode->remove_running == 0) && list_empty(&pl_inode->waiting)) { + pl_inode->is_locked = _gf_false; + + list_for_each_entry(dom, &pl_inode->dom_list, inode_list) + { + __grant_blocked_inode_locks(xl, pl_inode, &granted, dom, &now, + &contend); + } + } + + pthread_mutex_unlock(&pl_inode->mutex); + + unwind_granted_inodes(xl, pl_inode, &granted); + + inodelk_contention_notify(xl, &contend); + + inode_unref(pl_inode->inode); +} + +void +pl_inode_remove_unlocked(xlator_t *xl, pl_inode_t *pl_inode, + struct list_head *list) +{ + call_stub_t *stub, *tmp; + + if (!pl_inode->is_locked) { + return; + } + + list_for_each_entry_safe(stub, tmp, &pl_inode->waiting, list) + { + if (!pl_inode_has_owners(xl, stub->frame->root->client, pl_inode, NULL, + NULL)) { + list_move_tail(&stub->list, list); + } + } +} + +/* This function determines if an inodelk attempt can be done now or it needs + * to wait. + * + * Possible return values: + * < 0: An error occurred. Currently only -ESTALE can be returned if the + * inode has been deleted previously by unlink/rmdir/rename + * = 0: The lock can be attempted. + * > 0: The lock needs to wait because a conflicting remove operation is + * ongoing. + */ +int32_t +pl_inode_remove_inodelk(pl_inode_t *pl_inode, pl_inode_lock_t *lock) +{ + pl_dom_list_t *dom; + pl_inode_lock_t *ilock; + + /* If the inode has been deleted, we won't allow any lock. */ + if (pl_inode->removed) { + return -ESTALE; + } + + /* We only synchronize with locks made for regular operations coming from + * the user. Locks done for internal purposes are hard to control and could + * lead to long delays or deadlocks quite easily. */ + if (lock->client_pid < 0) { + return 0; + } + if (!pl_inode->is_locked) { + return 0; + } + if (pl_inode->remove_running > 0) { + return 1; + } + + list_for_each_entry(dom, &pl_inode->dom_list, inode_list) + { + list_for_each_entry(ilock, &dom->inodelk_list, list) + { + /* If a lock from the same client is already granted, we allow this + * one to continue. This is necessary to prevent deadlocks when + * multiple locks are taken for the same operation. + * + * On the other side it's unlikely that the same client sends + * completely unrelated locks for the same inode. + */ + if (ilock->client == lock->client) { + return 0; + } + } + } + + return 1; +} diff --git a/xlators/features/locks/src/common.h b/xlators/features/locks/src/common.h index 0916c299e84..281223bf3b8 100644 --- a/xlators/features/locks/src/common.h +++ b/xlators/features/locks/src/common.h @@ -105,6 +105,15 @@ void __pl_inodelk_unref(pl_inode_lock_t *lock); void +__grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode, + struct list_head *granted, pl_dom_list_t *dom, + struct timespec *now, struct list_head *contend); + +void +unwind_granted_inodes(xlator_t *this, pl_inode_t *pl_inode, + struct list_head *granted); + +void grant_blocked_entry_locks(xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom, struct timespec *now, struct list_head *contend); @@ -204,6 +213,16 @@ pl_metalock_is_active(pl_inode_t *pl_inode); void __pl_queue_lock(pl_inode_t *pl_inode, posix_lock_t *reqlock); +void +inodelk_contention_notify_check(xlator_t *xl, pl_inode_lock_t *lock, + struct timespec *now, + struct list_head *contend); + +void +entrylk_contention_notify_check(xlator_t *xl, pl_entry_lock_t *lock, + struct timespec *now, + struct list_head *contend); + gf_boolean_t pl_does_monkey_want_stuck_lock(); @@ -218,4 +237,26 @@ pl_local_init(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd); gf_boolean_t pl_is_lk_owner_valid(gf_lkowner_t *owner, client_t *client); + +int32_t +pl_inode_remove_prepare(xlator_t *xl, call_frame_t *frame, loc_t *loc, + pl_inode_t **ppl_inode, struct list_head *contend); + +int32_t +pl_inode_remove_complete(xlator_t *xl, pl_inode_t *pl_inode, call_stub_t *stub, + struct list_head *contend); + +void +pl_inode_remove_wake(struct list_head *list); + +void +pl_inode_remove_cbk(xlator_t *xl, pl_inode_t *pl_inode, int32_t error); + +void +pl_inode_remove_unlocked(xlator_t *xl, pl_inode_t *pl_inode, + struct list_head *list); + +int32_t +pl_inode_remove_inodelk(pl_inode_t *pl_inode, pl_inode_lock_t *lock); + #endif /* __COMMON_H__ */ diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c index 0911659b437..fd772c850dd 100644 --- a/xlators/features/locks/src/entrylk.c +++ b/xlators/features/locks/src/entrylk.c @@ -121,7 +121,6 @@ __stale_entrylk(xlator_t *this, pl_entry_lock_t *candidate_lock, pl_entry_lock_t *requested_lock, time_t *lock_age_sec) { posix_locks_private_t *priv = NULL; - struct timeval curr; priv = this->private; @@ -129,8 +128,7 @@ __stale_entrylk(xlator_t *this, pl_entry_lock_t *candidate_lock, * chance? Or just the locks we are attempting to acquire? */ if (names_conflict(candidate_lock->basename, requested_lock->basename)) { - gettimeofday(&curr, NULL); - *lock_age_sec = curr.tv_sec - candidate_lock->granted_time.tv_sec; + *lock_age_sec = gf_time() - candidate_lock->granted_time; if (*lock_age_sec > priv->revocation_secs) return _gf_true; } @@ -204,9 +202,9 @@ out: return revoke_lock; } -static gf_boolean_t -__entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock, - struct timespec *now) +void +entrylk_contention_notify_check(xlator_t *this, pl_entry_lock_t *lock, + struct timespec *now, struct list_head *contend) { posix_locks_private_t *priv; int64_t elapsed; @@ -216,7 +214,7 @@ __entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock, /* If this lock is in a list, it means that we are about to send a * notification for it, so no need to do anything else. */ if (!list_empty(&lock->contend)) { - return _gf_false; + return; } elapsed = now->tv_sec; @@ -225,7 +223,7 @@ __entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock, elapsed--; } if (elapsed < priv->notify_contention_delay) { - return _gf_false; + return; } /* All contention notifications will be sent outside of the locked @@ -238,7 +236,7 @@ __entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock, lock->contention_time = *now; - return _gf_true; + list_add_tail(&lock->contend, contend); } void @@ -332,9 +330,7 @@ __entrylk_grantable(xlator_t *this, pl_dom_list_t *dom, pl_entry_lock_t *lock, break; } } - if (__entrylk_needs_contention_notify(this, tmp, now)) { - list_add_tail(&tmp->contend, contend); - } + entrylk_contention_notify_check(this, tmp, now, contend); } } @@ -546,14 +542,10 @@ static int __lock_blocked_add(xlator_t *this, pl_inode_t *pinode, pl_dom_list_t *dom, pl_entry_lock_t *lock, int nonblock) { - struct timeval now; - if (nonblock) goto out; - gettimeofday(&now, NULL); - - lock->blkd_time = now; + lock->blkd_time = gf_time(); list_add_tail(&lock->blocked_locks, &dom->blocked_entrylks); gf_msg_trace(this->name, 0, "Blocking lock: {pinode=%p, basename=%s}", @@ -614,7 +606,7 @@ __lock_entrylk(xlator_t *this, pl_inode_t *pinode, pl_entry_lock_t *lock, } __pl_entrylk_ref(lock); - gettimeofday(&lock->granted_time, NULL); + lock->granted_time = gf_time(); list_add(&lock->domain_list, &dom->entrylk_list); ret = 0; @@ -697,10 +689,9 @@ __grant_blocked_entry_locks(xlator_t *this, pl_inode_t *pl_inode, bl_ret = __lock_entrylk(bl->this, pl_inode, bl, 0, dom, now, contend); if (bl_ret == 0) { - list_add(&bl->blocked_locks, granted); + list_add_tail(&bl->blocked_locks, granted); } } - return; } /* Grants locks if possible which are blocked on a lock */ diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c index e0e3b8f1f2d..d4e51d6e0a1 100644 --- a/xlators/features/locks/src/inodelk.c +++ b/xlators/features/locks/src/inodelk.c @@ -140,15 +140,13 @@ __stale_inodelk(xlator_t *this, pl_inode_lock_t *candidate_lock, pl_inode_lock_t *requested_lock, time_t *lock_age_sec) { posix_locks_private_t *priv = NULL; - struct timeval curr; priv = this->private; /* Question: Should we just prune them all given the * chance? Or just the locks we are attempting to acquire? */ if (inodelk_conflict(candidate_lock, requested_lock)) { - gettimeofday(&curr, NULL); - *lock_age_sec = curr.tv_sec - candidate_lock->granted_time.tv_sec; + *lock_age_sec = gf_time() - candidate_lock->granted_time; if (*lock_age_sec > priv->revocation_secs) return _gf_true; } @@ -229,9 +227,9 @@ out: return revoke_lock; } -static gf_boolean_t -__inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock, - struct timespec *now) +void +inodelk_contention_notify_check(xlator_t *this, pl_inode_lock_t *lock, + struct timespec *now, struct list_head *contend) { posix_locks_private_t *priv; int64_t elapsed; @@ -241,7 +239,7 @@ __inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock, /* If this lock is in a list, it means that we are about to send a * notification for it, so no need to do anything else. */ if (!list_empty(&lock->contend)) { - return _gf_false; + return; } elapsed = now->tv_sec; @@ -250,7 +248,7 @@ __inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock, elapsed--; } if (elapsed < priv->notify_contention_delay) { - return _gf_false; + return; } /* All contention notifications will be sent outside of the locked @@ -263,7 +261,7 @@ __inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock, lock->contention_time = *now; - return _gf_true; + list_add_tail(&lock->contend, contend); } void @@ -351,9 +349,7 @@ __inodelk_grantable(xlator_t *this, pl_dom_list_t *dom, pl_inode_lock_t *lock, break; } } - if (__inodelk_needs_contention_notify(this, l, now)) { - list_add_tail(&l->contend, contend); - } + inodelk_contention_notify_check(this, l, now, contend); } } @@ -399,15 +395,11 @@ static int __lock_blocked_add(xlator_t *this, pl_dom_list_t *dom, pl_inode_lock_t *lock, int can_block) { - struct timeval now; - if (can_block == 0) { goto out; } - gettimeofday(&now, NULL); - - lock->blkd_time = now; + lock->blkd_time = gf_time(); list_add_tail(&lock->blocked_locks, &dom->blocked_inodelks); gf_msg_trace(this->name, 0, @@ -433,12 +425,17 @@ __lock_inodelk(xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock, struct list_head *contend) { pl_inode_lock_t *conf = NULL; - int ret = -EINVAL; + int ret; - conf = __inodelk_grantable(this, dom, lock, now, contend); - if (conf) { - ret = __lock_blocked_add(this, dom, lock, can_block); - goto out; + ret = pl_inode_remove_inodelk(pl_inode, lock); + if (ret < 0) { + return ret; + } + if (ret == 0) { + conf = __inodelk_grantable(this, dom, lock, now, contend); + } + if ((ret > 0) || (conf != NULL)) { + return __lock_blocked_add(this, dom, lock, can_block); } /* To prevent blocked locks starvation, check if there are any blocked @@ -460,17 +457,13 @@ __lock_inodelk(xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock, "starvation"); } - ret = __lock_blocked_add(this, dom, lock, can_block); - goto out; + return __lock_blocked_add(this, dom, lock, can_block); } __pl_inodelk_ref(lock); - gettimeofday(&lock->granted_time, NULL); + lock->granted_time = gf_time(); list_add(&lock->list, &dom->inodelk_list); - ret = 0; - -out: - return ret; + return 0; } /* Return true if the two inodelks have exactly same lock boundaries */ @@ -527,12 +520,11 @@ out: return conf; } -static void +void __grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode, struct list_head *granted, pl_dom_list_t *dom, struct timespec *now, struct list_head *contend) { - int bl_ret = 0; pl_inode_lock_t *bl = NULL; pl_inode_lock_t *tmp = NULL; @@ -545,52 +537,48 @@ __grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode, { list_del_init(&bl->blocked_locks); - bl_ret = __lock_inodelk(this, pl_inode, bl, 1, dom, now, contend); + bl->status = __lock_inodelk(this, pl_inode, bl, 1, dom, now, contend); - if (bl_ret == 0) { - list_add(&bl->blocked_locks, granted); + if (bl->status != -EAGAIN) { + list_add_tail(&bl->blocked_locks, granted); } } - return; } -/* Grant all inodelks blocked on a lock */ void -grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode, - pl_dom_list_t *dom, struct timespec *now, - struct list_head *contend) +unwind_granted_inodes(xlator_t *this, pl_inode_t *pl_inode, + struct list_head *granted) { - struct list_head granted; pl_inode_lock_t *lock; pl_inode_lock_t *tmp; + int32_t op_ret; + int32_t op_errno; - INIT_LIST_HEAD(&granted); - - pthread_mutex_lock(&pl_inode->mutex); - { - __grant_blocked_inode_locks(this, pl_inode, &granted, dom, now, - contend); - } - pthread_mutex_unlock(&pl_inode->mutex); - - list_for_each_entry_safe(lock, tmp, &granted, blocked_locks) + list_for_each_entry_safe(lock, tmp, granted, blocked_locks) { - gf_log(this->name, GF_LOG_TRACE, - "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 " => Granted", - lock->fl_type == F_UNLCK ? "Unlock" : "Lock", lock->client_pid, - lkowner_utoa(&lock->owner), lock->user_flock.l_start, - lock->user_flock.l_len); - + if (lock->status == 0) { + op_ret = 0; + op_errno = 0; + gf_log(this->name, GF_LOG_TRACE, + "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 + " => Granted", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, lkowner_utoa(&lock->owner), + lock->user_flock.l_start, lock->user_flock.l_len); + } else { + op_ret = -1; + op_errno = -lock->status; + } pl_trace_out(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock, - 0, 0, lock->volume); + op_ret, op_errno, lock->volume); - STACK_UNWIND_STRICT(inodelk, lock->frame, 0, 0, NULL); + STACK_UNWIND_STRICT(inodelk, lock->frame, op_ret, op_errno, NULL); lock->frame = NULL; } pthread_mutex_lock(&pl_inode->mutex); { - list_for_each_entry_safe(lock, tmp, &granted, blocked_locks) + list_for_each_entry_safe(lock, tmp, granted, blocked_locks) { list_del_init(&lock->blocked_locks); __pl_inodelk_unref(lock); @@ -599,6 +587,26 @@ grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode, pthread_mutex_unlock(&pl_inode->mutex); } +/* Grant all inodelks blocked on a lock */ +void +grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode, + pl_dom_list_t *dom, struct timespec *now, + struct list_head *contend) +{ + struct list_head granted; + + INIT_LIST_HEAD(&granted); + + pthread_mutex_lock(&pl_inode->mutex); + { + __grant_blocked_inode_locks(this, pl_inode, &granted, dom, now, + contend); + } + pthread_mutex_unlock(&pl_inode->mutex); + + unwind_granted_inodes(this, pl_inode, &granted); +} + static void pl_inodelk_log_cleanup(pl_inode_lock_t *lock) { @@ -660,7 +668,7 @@ pl_inodelk_client_cleanup(xlator_t *this, pl_ctx_t *ctx) * and blocked lists, then this means that a parallel * unlock on another inodelk (L2 say) may have 'granted' * L1 and added it to 'granted' list in - * __grant_blocked_node_locks() (although using the + * __grant_blocked_inode_locks() (although using the * 'blocked_locks' member). In that case, the cleanup * codepath must try and grant other overlapping * blocked inodelks from other clients, now that L1 is @@ -745,6 +753,7 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode, gf_boolean_t need_inode_unref = _gf_false; struct list_head *pcontend = NULL; struct list_head contend; + struct list_head wake; struct timespec now = {}; short fl_type; @@ -796,6 +805,8 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode, timespec_now(&now); } + INIT_LIST_HEAD(&wake); + if (ctx) pthread_mutex_lock(&ctx->lock); pthread_mutex_lock(&pl_inode->mutex); @@ -818,18 +829,17 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode, lock->fl_type == F_UNLCK ? "Unlock" : "Lock", lock->client_pid, lkowner_utoa(&lock->owner), lock->user_flock.l_start, lock->user_flock.l_len); - if (can_block) + if (can_block) { unref = _gf_false; - /* For all but the case where a non-blocking - * lock attempt fails, the extra ref taken at - * the start of this function must be negated. - */ - else - need_inode_unref = _gf_true; + } } - - if (ctx && (!ret || can_block)) + /* For all but the case where a non-blocking lock attempt fails + * with -EAGAIN, the extra ref taken at the start of this function + * must be negated. */ + need_inode_unref = (ret != 0) && ((ret != -EAGAIN) || !can_block); + if (ctx && !need_inode_unref) { list_add_tail(&lock->client_list, &ctx->inodelk_lockers); + } } else { /* Irrespective of whether unlock succeeds or not, * the extra inode ref that was done at the start of @@ -847,6 +857,8 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode, list_del_init(&retlock->client_list); __pl_inodelk_unref(retlock); + pl_inode_remove_unlocked(this, pl_inode, &wake); + ret = 0; } out: @@ -857,6 +869,8 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode, if (ctx) pthread_mutex_unlock(&ctx->lock); + pl_inode_remove_wake(&wake); + /* The following (extra) unref corresponds to the ref that * was done at the time the lock was granted. */ @@ -1037,10 +1051,14 @@ pl_common_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, inode); if (ret < 0) { - if ((can_block) && (F_UNLCK != lock_type)) { - goto out; + if (ret == -EAGAIN) { + if (can_block && (F_UNLCK != lock_type)) { + goto out; + } + gf_log(this->name, GF_LOG_TRACE, "returning EAGAIN"); + } else { + gf_log(this->name, GF_LOG_TRACE, "returning %d", ret); } - gf_log(this->name, GF_LOG_TRACE, "returning EAGAIN"); op_errno = -ret; goto unwind; } diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h index 3305350afb1..c868eb494a2 100644 --- a/xlators/features/locks/src/locks.h +++ b/xlators/features/locks/src/locks.h @@ -43,9 +43,8 @@ struct __posix_lock { fd_t *fd; call_frame_t *frame; - struct timeval blkd_time; /*time at which lock was queued into blkd list*/ - struct timeval - granted_time; /*time at which lock was queued into active list*/ + time_t blkd_time; /* time at which lock was queued into blkd list */ + time_t granted_time; /* time at which lock was queued into active list */ /* These two together serve to uniquely identify each process across nodes */ @@ -85,9 +84,9 @@ struct __pl_inode_lock { call_frame_t *frame; - struct timeval blkd_time; /*time at which lock was queued into blkd list*/ - struct timeval - granted_time; /*time at which lock was queued into active list*/ + time_t blkd_time; /* time at which lock was queued into blkd list */ + time_t granted_time; /* time at which lock was queued into active list */ + /*last time at which lock contention was detected and notified*/ struct timespec contention_time; @@ -102,6 +101,9 @@ struct __pl_inode_lock { struct list_head client_list; /* list of all locks from a client */ short fl_type; + + int32_t status; /* Error code when we try to grant a lock in blocked + state */ }; typedef struct __pl_inode_lock pl_inode_lock_t; @@ -136,9 +138,9 @@ struct __entry_lock { const char *basename; - struct timeval blkd_time; /*time at which lock was queued into blkd list*/ - struct timeval - granted_time; /*time at which lock was queued into active list*/ + time_t blkd_time; /* time at which lock was queued into blkd list */ + time_t granted_time; /* time at which lock was queued into active list */ + /*last time at which lock contention was detected and notified*/ struct timespec contention_time; @@ -164,13 +166,14 @@ struct __pl_inode { struct list_head rw_list; /* list of waiting r/w requests */ struct list_head reservelk_list; /* list of reservelks */ struct list_head blocked_reservelks; /* list of blocked reservelks */ - struct list_head - blocked_calls; /* List of blocked lock calls while a reserve is held*/ - struct list_head metalk_list; /* Meta lock list */ - /* This is to store the incoming lock - requests while meta lock is enabled */ - struct list_head queued_locks; - int mandatory; /* if mandatory locking is enabled */ + struct list_head blocked_calls; /* List of blocked lock calls while a + reserve is held*/ + struct list_head metalk_list; /* Meta lock list */ + struct list_head queued_locks; /* This is to store the incoming lock + requests while meta lock is enabled */ + struct list_head waiting; /* List of pending fops waiting to unlink/rmdir + the inode. */ + int mandatory; /* if mandatory locking is enabled */ inode_t *refkeeper; /* hold refs on an inode while locks are held to prevent pruning */ @@ -197,7 +200,13 @@ struct __pl_inode { */ int fop_wind_count; pthread_cond_t check_fop_wind_count; + gf_boolean_t track_fop_wind_count; + + int32_t links; /* Number of hard links the inode has. */ + uint32_t remove_running; /* Number of remove operations running. */ + gf_boolean_t is_locked; /* Regular locks will be blocked. */ + gf_boolean_t removed; /* The inode has been deleted. */ }; typedef struct __pl_inode pl_inode_t; diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c index 8b57627addf..cf0ae4c57dd 100644 --- a/xlators/features/locks/src/posix.c +++ b/xlators/features/locks/src/posix.c @@ -148,6 +148,29 @@ fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **); } \ } while (0) +#define PL_INODE_REMOVE(_fop, _frame, _xl, _loc1, _loc2, _cont, _cbk, \ + _args...) \ + ({ \ + struct list_head contend; \ + pl_inode_t *__pl_inode; \ + call_stub_t *__stub; \ + int32_t __error; \ + INIT_LIST_HEAD(&contend); \ + __error = pl_inode_remove_prepare(_xl, _frame, _loc2 ? _loc2 : _loc1, \ + &__pl_inode, &contend); \ + if (__error < 0) { \ + __stub = fop_##_fop##_stub(_frame, _cont, ##_args); \ + __error = pl_inode_remove_complete(_xl, __pl_inode, __stub, \ + &contend); \ + } else if (__error == 0) { \ + PL_LOCAL_GET_REQUESTS(_frame, _xl, xdata, ((fd_t *)NULL), _loc1, \ + _loc2); \ + STACK_WIND_COOKIE(_frame, _cbk, __pl_inode, FIRST_CHILD(_xl), \ + FIRST_CHILD(_xl)->fops->_fop, ##_args); \ + } \ + __error; \ + }) + gf_boolean_t pl_has_xdata_requests(dict_t *xdata) { @@ -471,6 +494,9 @@ pl_inodelk_xattr_fill_multiple(dict_t *this, char *key, data_t *value, char *save_ptr = NULL; tmp_key = gf_strdup(key); + if (!tmp_key) + return -1; + strtok_r(tmp_key, ":", &save_ptr); if (!*save_ptr) { if (tmp_key) @@ -2962,11 +2988,85 @@ out: return ret; } +static int32_t +pl_request_link_count(dict_t **pxdata) +{ + dict_t *xdata; + + xdata = *pxdata; + if (xdata == NULL) { + xdata = dict_new(); + if (xdata == NULL) { + return ENOMEM; + } + } else { + dict_ref(xdata); + } + + if (dict_set_uint32(xdata, GET_LINK_COUNT, 0) != 0) { + dict_unref(xdata); + return ENOMEM; + } + + *pxdata = xdata; + + return 0; +} + +static int32_t +pl_check_link_count(dict_t *xdata) +{ + int32_t count; + + /* In case we are unable to read the link count from xdata, we take a + * conservative approach and return -2, which will prevent the inode from + * being considered deleted. In fact it will cause link tracking for this + * inode to be disabled completely to avoid races. */ + + if (xdata == NULL) { + return -2; + } + + if (dict_get_int32(xdata, GET_LINK_COUNT, &count) != 0) { + return -2; + } + + return count; +} + int32_t pl_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata, struct iatt *postparent) { + pl_inode_t *pl_inode; + + if (op_ret >= 0) { + pl_inode = pl_inode_get(this, inode, NULL); + if (pl_inode == NULL) { + PL_STACK_UNWIND(lookup, xdata, frame, -1, ENOMEM, NULL, NULL, NULL, + NULL); + return 0; + } + + pthread_mutex_lock(&pl_inode->mutex); + + /* We only update the link count if we previously didn't know it. + * Doing it always can lead to races since lookup is not executed + * atomically most of the times. */ + if (pl_inode->links == -2) { + pl_inode->links = pl_check_link_count(xdata); + if (buf->ia_type == IA_IFDIR) { + /* Directories have at least 2 links. To avoid special handling + * for directories, we simply decrement the value here to make + * them equivalent to regular files. */ + pl_inode->links--; + } + } + + pthread_mutex_unlock(&pl_inode->mutex); + } + PL_STACK_UNWIND(lookup, xdata, frame, op_ret, op_errno, inode, buf, xdata, postparent); return 0; @@ -2975,9 +3075,17 @@ pl_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t pl_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { - PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL); - STACK_WIND(frame, pl_lookup_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, loc, xdata); + int32_t error; + + error = pl_request_link_count(&xdata); + if (error == 0) { + PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL); + STACK_WIND(frame, pl_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, xdata); + dict_unref(xdata); + } else { + STACK_UNWIND_STRICT(lookup, frame, -1, error, NULL, NULL, NULL, NULL); + } return 0; } @@ -3502,10 +3610,10 @@ pl_dump_lock(char *str, int size, struct gf_flock *flock, gf_lkowner_t *owner, time_t *blkd_time, gf_boolean_t active) { char *type_str = NULL; - char granted[256] = { + char granted[GF_TIMESTR_SIZE] = { 0, }; - char blocked[256] = { + char blocked[GF_TIMESTR_SIZE] = { 0, }; @@ -3556,10 +3664,10 @@ __dump_entrylks(pl_inode_t *pl_inode) { pl_dom_list_t *dom = NULL; pl_entry_lock_t *lock = NULL; - char blocked[256] = { + char blocked[GF_TIMESTR_SIZE] = { 0, }; - char granted[256] = { + char granted[GF_TIMESTR_SIZE] = { 0, }; int count = 0; @@ -3579,10 +3687,10 @@ __dump_entrylks(pl_inode_t *pl_inode) list_for_each_entry(lock, &dom->entrylk_list, domain_list) { - gf_time_fmt(granted, sizeof(granted), lock->granted_time.tv_sec, + gf_time_fmt(granted, sizeof(granted), lock->granted_time, gf_timefmt_FT); gf_proc_dump_build_key(key, k, "entrylk[%d](ACTIVE)", count); - if (lock->blkd_time.tv_sec == 0) { + if (lock->blkd_time == 0) { snprintf(tmp, sizeof(tmp), ENTRY_GRNTD_FMT, lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" : "ENTRYLK_WRLCK", @@ -3590,7 +3698,7 @@ __dump_entrylks(pl_inode_t *pl_inode) lkowner_utoa(&lock->owner), lock->client, lock->connection_id, granted); } else { - gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time.tv_sec, + gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time, gf_timefmt_FT); snprintf(tmp, sizeof(tmp), ENTRY_BLKD_GRNTD_FMT, lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" @@ -3607,7 +3715,7 @@ __dump_entrylks(pl_inode_t *pl_inode) list_for_each_entry(lock, &dom->blocked_entrylks, blocked_locks) { - gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time.tv_sec, + gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time, gf_timefmt_FT); gf_proc_dump_build_key(key, k, "entrylk[%d](BLOCKED)", count); @@ -3659,9 +3767,8 @@ __dump_inodelks(pl_inode_t *pl_inode) SET_FLOCK_PID(&lock->user_flock, lock); pl_dump_lock(tmp, sizeof(tmp), &lock->user_flock, &lock->owner, - lock->client, lock->connection_id, - &lock->granted_time.tv_sec, &lock->blkd_time.tv_sec, - _gf_true); + lock->client, lock->connection_id, &lock->granted_time, + &lock->blkd_time, _gf_true); gf_proc_dump_write(key, "%s", tmp); count++; @@ -3673,8 +3780,8 @@ __dump_inodelks(pl_inode_t *pl_inode) count); SET_FLOCK_PID(&lock->user_flock, lock); pl_dump_lock(tmp, sizeof(tmp), &lock->user_flock, &lock->owner, - lock->client, lock->connection_id, 0, - &lock->blkd_time.tv_sec, _gf_false); + lock->client, lock->connection_id, 0, &lock->blkd_time, + _gf_false); gf_proc_dump_write(key, "%s", tmp); count++; @@ -3707,9 +3814,8 @@ __dump_posixlks(pl_inode_t *pl_inode) gf_proc_dump_build_key(key, "posixlk", "posixlk[%d](%s)", count, lock->blocked ? "BLOCKED" : "ACTIVE"); pl_dump_lock(tmp, sizeof(tmp), &lock->user_flock, &lock->owner, - lock->client, lock->client_uid, &lock->granted_time.tv_sec, - &lock->blkd_time.tv_sec, - (lock->blocked) ? _gf_false : _gf_true); + lock->client, lock->client_uid, &lock->granted_time, + &lock->blkd_time, (lock->blocked) ? _gf_false : _gf_true); gf_proc_dump_write(key, "%s", tmp); count++; @@ -3793,6 +3899,10 @@ unlock: gf_proc_dump_write("posixlk-count", "%d", count); __dump_posixlks(pl_inode); } + + gf_proc_dump_write("links", "%d", pl_inode->links); + gf_proc_dump_write("removes_pending", "%u", pl_inode->remove_running); + gf_proc_dump_write("removed", "%u", pl_inode->removed); } pthread_mutex_unlock(&pl_inode->mutex); @@ -4138,8 +4248,11 @@ pl_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, struct iatt *postoldparent, struct iatt *prenewparent, struct iatt *postnewparent, dict_t *xdata) { + pl_inode_remove_cbk(this, cookie, op_ret < 0 ? op_errno : 0); + PL_STACK_UNWIND(rename, xdata, frame, op_ret, op_errno, buf, preoldparent, postoldparent, prenewparent, postnewparent, xdata); + return 0; } @@ -4147,10 +4260,15 @@ int32_t pl_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata) { - PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), oldloc, newloc); + int32_t error; + + error = PL_INODE_REMOVE(rename, frame, this, oldloc, newloc, pl_rename, + pl_rename_cbk, oldloc, newloc, xdata); + if (error > 0) { + STACK_UNWIND_STRICT(rename, frame, -1, error, NULL, NULL, NULL, NULL, + NULL, NULL); + } - STACK_WIND(frame, pl_rename_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); return 0; } @@ -4274,8 +4392,11 @@ pl_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *preparent, struct iatt *postparent, dict_t *xdata) { + pl_inode_remove_cbk(this, cookie, op_ret < 0 ? op_errno : 0); + PL_STACK_UNWIND(unlink, xdata, frame, op_ret, op_errno, preparent, postparent, xdata); + return 0; } @@ -4283,9 +4404,14 @@ int32_t pl_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, dict_t *xdata) { - PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL); - STACK_WIND(frame, pl_unlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); + int32_t error; + + error = PL_INODE_REMOVE(unlink, frame, this, loc, NULL, pl_unlink, + pl_unlink_cbk, loc, xflag, xdata); + if (error > 0) { + STACK_UNWIND_STRICT(unlink, frame, -1, error, NULL, NULL, NULL); + } + return 0; } @@ -4352,8 +4478,11 @@ pl_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *preparent, struct iatt *postparent, dict_t *xdata) { + pl_inode_remove_cbk(this, cookie, op_ret < 0 ? op_errno : 0); + PL_STACK_UNWIND_FOR_CLIENT(rmdir, xdata, frame, op_ret, op_errno, preparent, postparent, xdata); + return 0; } @@ -4361,9 +4490,14 @@ int pl_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags, dict_t *xdata) { - PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL); - STACK_WIND(frame, pl_rmdir_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rmdir, loc, xflags, xdata); + int32_t error; + + error = PL_INODE_REMOVE(rmdir, frame, this, loc, NULL, pl_rmdir, + pl_rmdir_cbk, loc, xflags, xdata); + if (error > 0) { + STACK_UNWIND_STRICT(rmdir, frame, -1, error, NULL, NULL, NULL); + } + return 0; } @@ -4393,6 +4527,19 @@ pl_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, inode_t *inode, struct iatt *buf, struct iatt *preparent, struct iatt *postparent, dict_t *xdata) { + pl_inode_t *pl_inode = (pl_inode_t *)cookie; + + if (op_ret >= 0) { + pthread_mutex_lock(&pl_inode->mutex); + + /* TODO: can happen pl_inode->links == 0 ? */ + if (pl_inode->links >= 0) { + pl_inode->links++; + } + + pthread_mutex_unlock(&pl_inode->mutex); + } + PL_STACK_UNWIND_FOR_CLIENT(link, xdata, frame, op_ret, op_errno, inode, buf, preparent, postparent, xdata); return 0; @@ -4402,9 +4549,18 @@ int pl_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata) { + pl_inode_t *pl_inode; + + pl_inode = pl_inode_get(this, oldloc->inode, NULL); + if (pl_inode == NULL) { + STACK_UNWIND_STRICT(link, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, + NULL); + return 0; + } + PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), oldloc, newloc); - STACK_WIND(frame, pl_link_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata); + STACK_WIND_COOKIE(frame, pl_link_cbk, pl_inode, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata); return 0; } diff --git a/xlators/features/metadisp/Makefile.am b/xlators/features/metadisp/Makefile.am new file mode 100644 index 00000000000..a985f42a877 --- /dev/null +++ b/xlators/features/metadisp/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/features/metadisp/src/Makefile.am b/xlators/features/metadisp/src/Makefile.am new file mode 100644 index 00000000000..1520ad8c424 --- /dev/null +++ b/xlators/features/metadisp/src/Makefile.am @@ -0,0 +1,38 @@ +noinst_PYTHON = gen-fops.py + +EXTRA_DIST = fops-tmpl.c + +xlator_LTLIBRARIES = metadisp.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features + +nodist_metadisp_la_SOURCES = fops.c + +BUILT_SOURCES = fops.c + +metadisp_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) + +metadisp_la_SOURCES = metadisp.c \ + metadisp-unlink.c \ + metadisp-stat.c \ + metadisp-lookup.c \ + metadisp-readdir.c \ + metadisp-create.c \ + metadisp-open.c \ + metadisp-fsync.c \ + metadisp-setattr.c \ + backend.c + +metadisp_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS = metadisp.h metadisp-fops.h + +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src + +AM_CFLAGS = -Wall $(GF_CFLAGS) + +fops.c: fops-tmpl.c $(top_srcdir)/libglusterfs/src/generator.py gen-fops.py + PYTHONPATH=$(top_srcdir)/libglusterfs/src \ + $(PYTHON) $(srcdir)/gen-fops.py $(srcdir)/fops-tmpl.c > $@ + +CLEANFILES = $(nodist_metadisp_la_SOURCES) diff --git a/xlators/features/metadisp/src/backend.c b/xlators/features/metadisp/src/backend.c new file mode 100644 index 00000000000..ee2c25bfaa7 --- /dev/null +++ b/xlators/features/metadisp/src/backend.c @@ -0,0 +1,45 @@ +#define GFID_STR_LEN 37 + +#include "metadisp.h" + +/* + * backend.c + * + * functions responsible for converting user-facing paths to backend-style + * "/$GFID" paths. + */ + +int32_t +build_backend_loc(uuid_t gfid, loc_t *src_loc, loc_t *dst_loc) +{ + static uuid_t root = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; + char gfid_buf[GFID_STR_LEN + 1] = { + 0, + }; + char *path = NULL; + + GF_VALIDATE_OR_GOTO("metadisp", src_loc, out); + GF_VALIDATE_OR_GOTO("metadisp", dst_loc, out); + + loc_copy(dst_loc, src_loc); + memcpy(dst_loc->pargfid, root, sizeof(root)); + GF_FREE((char *)dst_loc->path); // we are overwriting path so nuke + // whatever loc_copy gave us + + uuid_utoa_r(gfid, gfid_buf); + + path = GF_CALLOC(GFID_STR_LEN + 1, sizeof(char), + gf_common_mt_char); // freed via loc_wipe + + path[0] = '/'; + strncpy(path + 1, gfid_buf, GFID_STR_LEN); + path[GFID_STR_LEN] = 0; + dst_loc->path = path; + if (src_loc->name) + dst_loc->name = strrchr(dst_loc->path, '/'); + if (dst_loc->name) + dst_loc->name++; + return 0; +out: + return -1; +} diff --git a/xlators/features/metadisp/src/fops-tmpl.c b/xlators/features/metadisp/src/fops-tmpl.c new file mode 100644 index 00000000000..4385b7dd5b7 --- /dev/null +++ b/xlators/features/metadisp/src/fops-tmpl.c @@ -0,0 +1,10 @@ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include <glusterfs/xlator.h> +#include "metadisp.h" +#include "metadisp-fops.h" + +#pragma generate diff --git a/xlators/features/metadisp/src/gen-fops.py b/xlators/features/metadisp/src/gen-fops.py new file mode 100644 index 00000000000..8b5e120fdec --- /dev/null +++ b/xlators/features/metadisp/src/gen-fops.py @@ -0,0 +1,160 @@ +#!/usr/bin/python + +import sys +from generator import fop_subs, generate + +FN_METADATA_CHILD_GENERIC = """ +int32_t +metadisp_@NAME@ (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) +{ + METADISP_TRACE("@NAME@ metadata"); + STACK_WIND (frame, default_@NAME@_cbk, + METADATA_CHILD(this), METADATA_CHILD(this)->fops->@NAME@, + @SHORT_ARGS@); + return 0; +} +""" + +FN_GENERIC_TEMPLATE = """ +int32_t +metadisp_@NAME@ (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) +{ + METADISP_TRACE("@NAME@ generic"); + STACK_WIND (frame, default_@NAME@_cbk, + DATA_CHILD(this), DATA_CHILD(this)->fops->@NAME@, + @SHORT_ARGS@); + return 0; +} +""" + +FN_DATAFD_TEMPLATE = """ +int32_t +metadisp_@NAME@ (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) +{ + METADISP_TRACE("@NAME@ datafd"); + xlator_t *child = NULL; + child = DATA_CHILD(this); + STACK_WIND (frame, default_@NAME@_cbk, + child, child->fops->@NAME@, + @SHORT_ARGS@); + return 0; +} +""" + +FN_DATALOC_TEMPLATE = """ +int32_t +metadisp_@NAME@ (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) +{ + METADISP_TRACE("@NAME@ dataloc"); + loc_t backend_loc = { + 0, + }; + if (build_backend_loc(loc->gfid, loc, &backend_loc)) { + goto unwind; + } + xlator_t *child = NULL; + child = DATA_CHILD(this); + STACK_WIND (frame, default_@NAME@_cbk, + child, child->fops->@NAME@, + @SHORT_ARGS@); + return 0; + +unwind: + STACK_UNWIND_STRICT(lookup, frame, -1, EINVAL, NULL, NULL, NULL, NULL); + return 0; +} +""" + +FOPS_LINE_TEMPLATE = "\t.@NAME@ = metadisp_@NAME@," + +skipped = [ + "readdir", + "readdirp", + "lookup", + "fsync", + "stat", + "open", + "create", + "unlink", + "setattr", + # TODO: implement "inodelk", +] + + +def gen_fops(): + done = skipped + + # + # these are fops that wind to the DATA_CHILD + # + # NOTE: re-written in order from google doc: + # https://docs.google.com/document/d/1KEwVtSNvDhs4qb63gWx2ulCp5GJjge77NGJk4p_Ms4Q + for name in [ + "writev", + "readv", + "ftruncate", + "zerofill", + "discard", + "seek", + "fstat", + ]: + done = done + [name] + print(generate(FN_DATAFD_TEMPLATE, name, fop_subs)) + + for name in ["truncate"]: + done = done + [name] + print(generate(FN_DATALOC_TEMPLATE, name, fop_subs)) + + # these are fops that operate solely on dentries, folders, + # or extended attributes. Therefore, they must always + # wind to METADATA_CHILD and should never perform + # any path rewriting + # + # NOTE: re-written in order from google doc: + # https://docs.google.com/document/d/1KEwVtSNvDhs4qb63gWx2ulCp5GJjge77NGJk4p_Ms4Q + for name in [ + "mkdir", + "symlink", + "link", + "rename", + "mknod", + "opendir", + # "readdir, # special-cased + # "readdirp, # special-cased + "fsyncdir", + # "setattr", # special-cased + "readlink", + "fentrylk", + "access", + # TODO: these wind to both, + # data for backend-attributes and metadata for the rest + "xattrop", + "setxattr", + "getxattr", + "removexattr", + "fgetxattr", + "fsetxattr", + "fremovexattr", + ]: + + done = done + [name] + print(generate(FN_METADATA_CHILD_GENERIC, name, fop_subs)) + + print("struct xlator_fops fops = {") + for name in done: + print(generate(FOPS_LINE_TEMPLATE, name, fop_subs)) + + print("};") + + +for l in open(sys.argv[1], "r").readlines(): + if l.find("#pragma generate") != -1: + print("/* BEGIN GENERATED CODE - DO NOT MODIFY */") + gen_fops() + print("/* END GENERATED CODE */") + else: + print(l[:-1]) diff --git a/xlators/features/metadisp/src/metadisp-create.c b/xlators/features/metadisp/src/metadisp-create.c new file mode 100644 index 00000000000..f8c9798dd59 --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-create.c @@ -0,0 +1,101 @@ +#include "metadisp.h" +#include <glusterfs/call-stub.h> + +/** + * Create, like stat, is a two-step process. We send a create + * to the METADATA_CHILD, then send another create to the DATA_CHILD. + * + * We do the metadata child first to ensure that the ACLs are enforced. + */ + +int32_t +metadisp_create_dentry_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, + inode_t *inode, struct iatt *buf, + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) +{ + STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf, + preparent, postparent, xdata); + return 0; +} + +int32_t +metadisp_create_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + int32_t flags, mode_t mode, mode_t umask, fd_t *fd, + dict_t *xdata) +{ + // create the backend data inode + STACK_WIND(frame, metadisp_create_dentry_cbk, DATA_CHILD(this), + DATA_CHILD(this)->fops->create, loc, flags, mode, umask, fd, + xdata); + return 0; +} + +int32_t +metadisp_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + METADISP_TRACE("%d %d", op_ret, op_errno); + call_stub_t *stub = cookie; + if (op_ret != 0) { + STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf, + preparent, postparent, xdata); + return 0; + } + + if (stub == NULL) { + goto unwind; + } + + if (stub->poison) { + call_stub_destroy(stub); + return 0; + } + + call_resume(stub); + return 0; + +unwind: + STACK_UNWIND_STRICT(create, frame, -1, EINVAL, NULL, NULL, NULL, NULL, NULL, + NULL); + return 0; +} + +int32_t +metadisp_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) +{ + METADISP_TRACE("."); + + loc_t backend_loc = { + 0, + }; + call_stub_t *stub = NULL; + uuid_t *gfid_req = NULL; + + RESOLVE_GFID_REQ(xdata, gfid_req, out); + + if (build_backend_loc(*gfid_req, loc, &backend_loc)) { + goto unwind; + } + + frame->local = loc; + + stub = fop_create_stub(frame, metadisp_create_resume, &backend_loc, flags, + mode, umask, fd, xdata); + + STACK_WIND_COOKIE(frame, metadisp_create_cbk, stub, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->create, loc, flags, mode, + umask, fd, xdata); + return 0; + +unwind: + STACK_UNWIND_STRICT(create, frame, -1, EINVAL, NULL, NULL, NULL, NULL, NULL, + NULL); + return 0; +out: + return -1; +} diff --git a/xlators/features/metadisp/src/metadisp-fops.h b/xlators/features/metadisp/src/metadisp-fops.h new file mode 100644 index 00000000000..56dd427cf34 --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-fops.h @@ -0,0 +1,51 @@ +#ifndef GF_METADISP_FOPS_H_ +#define GF_METADISP_FOPS_H_ + +#include <glusterfs/xlator.h> +#include <glusterfs/dict.h> +#include <glusterfs/glusterfs.h> + +#include <sys/types.h> + +/* fops in here are defined in their own file. Every other fop is just defined + * inline of fops.c */ + +int +metadisp_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *xdata); + +int +metadisp_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *dict); + +int +metadisp_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata); + +int +metadisp_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata); + +int +metadisp_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + fd_t *fd, dict_t *xdata); + +int +metadisp_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata); + +int +metadisp_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, + loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata); + +int +metadisp_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + dict_t *xdata); + +int +metadisp_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, + dict_t *xdata); + +int +metadisp_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, dict_t *xdata); + +#endif diff --git a/xlators/features/metadisp/src/metadisp-fsync.c b/xlators/features/metadisp/src/metadisp-fsync.c new file mode 100644 index 00000000000..2e46fa84eac --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-fsync.c @@ -0,0 +1,54 @@ + +#include "metadisp.h" +#include <glusterfs/call-stub.h> + +int32_t +metadisp_fsync_resume(call_frame_t *frame, xlator_t *this, fd_t *fd, + int32_t flags, dict_t *xdata) +{ + STACK_WIND(frame, default_fsync_cbk, DATA_CHILD(this), + DATA_CHILD(this)->fops->fsync, fd, flags, xdata); + return 0; +} + +int32_t +metadisp_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + call_stub_t *stub = NULL; + if (cookie) { + stub = cookie; + } + + if (op_ret != 0) { + goto unwind; + } + + if (stub->poison) { + call_stub_destroy(stub); + stub = NULL; + return 0; + } + + call_resume(stub); + return 0; + +unwind: + if (stub) { + call_stub_destroy(stub); + } + STACK_UNWIND_STRICT(fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata); + return 0; +} + +int32_t +metadisp_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + dict_t *xdata) +{ + call_stub_t *stub = NULL; + stub = fop_fsync_stub(frame, metadisp_fsync_resume, fd, flags, xdata); + STACK_WIND_COOKIE(frame, metadisp_fsync_cbk, stub, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->fsync, fd, flags, xdata); + return 0; +} diff --git a/xlators/features/metadisp/src/metadisp-lookup.c b/xlators/features/metadisp/src/metadisp-lookup.c new file mode 100644 index 00000000000..27d90c9f746 --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-lookup.c @@ -0,0 +1,90 @@ +#include "metadisp.h" +#include <glusterfs/call-stub.h> + +/** + * Lookup, like stat, is a two-step process for grabbing the metadata details + * as well as the data details. + */ + +int32_t +metadisp_backend_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, + struct iatt *postparent) +{ + METADISP_TRACE("backend_lookup_cbk"); + if (op_errno == ENOENT) { + op_errno = ENODATA; + op_ret = -1; + } + STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, xdata, + postparent); + return 0; +} + +int32_t +metadisp_backend_lookup_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + dict_t *xdata) +{ + METADISP_TRACE("backend_lookup_resume"); + loc_t backend_loc = { + 0, + }; + if (build_backend_loc(loc->gfid, loc, &backend_loc)) { + goto unwind; + } + + STACK_WIND(frame, metadisp_backend_lookup_cbk, DATA_CHILD(this), + DATA_CHILD(this)->fops->lookup, &backend_loc, xdata); + return 0; + +unwind: + STACK_UNWIND_STRICT(lookup, frame, -1, EINVAL, NULL, NULL, NULL, NULL); + return 0; +} + +int32_t +metadisp_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, struct iatt *postparent) +{ + METADISP_TRACE("%d %d", op_ret, op_errno); + call_stub_t *stub = NULL; + stub = cookie; + + if (op_ret != 0) { + goto unwind; + } + + if (!IA_ISREG(buf->ia_type)) { + goto unwind; + } else if (!stub) { + op_errno = EINVAL; + goto unwind; + } + + METADISP_TRACE("resuming stub"); + + // memcpy(stub->args.loc.gfid, buf->ia_gfid, sizeof(uuid_t)); + call_resume(stub); + return 0; +unwind: + METADISP_TRACE("unwinding %d %d", op_ret, op_errno); + STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, xdata, + postparent); + if (stub) { + call_stub_destroy(stub); + } + return 0; +} + +int32_t +metadisp_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ + METADISP_TRACE("lookup"); + call_stub_t *stub = NULL; + stub = fop_lookup_stub(frame, metadisp_backend_lookup_resume, loc, xdata); + STACK_WIND_COOKIE(frame, metadisp_lookup_cbk, stub, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->lookup, loc, xdata); + return 0; +} diff --git a/xlators/features/metadisp/src/metadisp-open.c b/xlators/features/metadisp/src/metadisp-open.c new file mode 100644 index 00000000000..64814afe636 --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-open.c @@ -0,0 +1,70 @@ +#include <glusterfs/call-stub.h> +#include "metadisp.h" + +int32_t +metadisp_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) +{ + METADISP_TRACE("got open results %d %d", op_ret, op_errno); + + call_stub_t *stub = NULL; + if (cookie) { + stub = cookie; + } + + if (op_ret != 0) { + goto unwind; + } + + if (!stub) { + goto unwind; + } + + if (stub->poison) { + call_stub_destroy(stub); + stub = NULL; + return 0; + } + + call_resume(stub); + return 0; + +unwind: + if (stub) { + call_stub_destroy(stub); + } + STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, xdata); + return 0; +} + +int32_t +metadisp_open_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + int32_t flags, fd_t *fd, dict_t *xdata) +{ + STACK_WIND_COOKIE(frame, metadisp_open_cbk, NULL, DATA_CHILD(this), + DATA_CHILD(this)->fops->open, loc, flags, fd, xdata); + return 0; +} + +int32_t +metadisp_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + fd_t *fd, dict_t *xdata) +{ + call_stub_t *stub = NULL; + loc_t backend_loc = { + 0, + }; + + if (build_backend_loc(loc->gfid, loc, &backend_loc)) { + goto unwind; + } + + stub = fop_open_stub(frame, metadisp_open_resume, &backend_loc, flags, fd, + xdata); + STACK_WIND_COOKIE(frame, metadisp_open_cbk, stub, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->open, loc, flags, fd, xdata); + return 0; +unwind: + STACK_UNWIND_STRICT(open, frame, -1, EINVAL, NULL, NULL); + return 0; +} diff --git a/xlators/features/metadisp/src/metadisp-readdir.c b/xlators/features/metadisp/src/metadisp-readdir.c new file mode 100644 index 00000000000..5f840b1e88f --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-readdir.c @@ -0,0 +1,65 @@ +#include "metadisp.h" + +/** + * With a change to the posix xlator, readdir and readdirp are shockingly + * simple. + * + * The issue with separating the backend data of the files + * with the metadata is that readdirs must now read from multiple sources + * to coalesce the directory entries. + * + * The way we do this is to tell the METADATA_CHILD that when it's + * running readdirp, each file entry should have a stat wound to + * 'stat-source-of-truth'. + * + * see metadisp_stat for how it handles winds _from_posix. + */ + +int32_t +metadisp_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *xdata) +{ + METADISP_TRACE("."); + /* + * Always use readdirp, even if the original was readdir. Why? Because NFS. + * There are multiple translations between Gluster, UNIX, and NFS stat + * structures in that path. One of them uses the type etc. from the stat + * structure, which is only filled in by readdirp. If we use readdir, the + * entries do actually go all the way back to the client and are visible in + * getdents, but then the readdir throws them away because of the + * uninitialized type. + */ + GF_UNUSED int32_t ret; + if (!xdata) { + xdata = dict_new(); + } + + // ret = dict_set_int32 (xdata, "list-xattr", 1); + + // I'm my own source of truth! + ret = dict_set_static_ptr(xdata, "stat-source-of-truth", (void *)this); + + STACK_WIND(frame, default_readdirp_cbk, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->readdirp, fd, size, off, xdata); + + return 0; +} + +int32_t +metadisp_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *xdata) +{ + METADISP_TRACE("."); + if (!xdata) { + xdata = dict_new(); + } + GF_UNUSED int32_t ret; + // ret = dict_set_int32 (xdata, "list-xattr", 1); + + // I'm my own source of truth! + ret = dict_set_static_ptr(xdata, "stat-source-of-truth", (void *)this); + + STACK_WIND(frame, default_readdirp_cbk, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->readdirp, fd, size, off, xdata); + return 0; +} diff --git a/xlators/features/metadisp/src/metadisp-setattr.c b/xlators/features/metadisp/src/metadisp-setattr.c new file mode 100644 index 00000000000..6991cf644f3 --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-setattr.c @@ -0,0 +1,90 @@ +#include "metadisp.h" +#include <glusterfs/call-stub.h> + +int32_t +metadisp_backend_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *statpre, struct iatt *statpost, + dict_t *xdata) + +{ + METADISP_TRACE("backend_setattr_cbk"); + if (op_errno == ENOENT) { + op_errno = ENODATA; + op_ret = -1; + } + STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, statpre, statpost, + xdata); + return 0; +} + +int32_t +metadisp_backend_setattr_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, + dict_t *xdata) + +{ + METADISP_TRACE("backend_setattr_resume"); + loc_t backend_loc = { + 0, + }; + if (build_backend_loc(loc->gfid, loc, &backend_loc)) { + goto unwind; + } + + STACK_WIND(frame, metadisp_backend_setattr_cbk, DATA_CHILD(this), + DATA_CHILD(this)->fops->setattr, &backend_loc, stbuf, valid, + xdata); + return 0; + +unwind: + STACK_UNWIND_STRICT(setattr, frame, -1, EINVAL, NULL, NULL, NULL); + return 0; +} + +int32_t +metadisp_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *statpre, + struct iatt *statpost, dict_t *xdata) +{ + METADISP_TRACE("%d %d", op_ret, op_errno); + call_stub_t *stub = NULL; + stub = cookie; + + if (op_ret != 0) { + goto unwind; + } + + if (!IA_ISREG(statpost->ia_type)) { + goto unwind; + } else if (!stub) { + op_errno = EINVAL; + goto unwind; + } + + METADISP_TRACE("resuming stub"); + call_resume(stub); + return 0; +unwind: + METADISP_TRACE("unwinding %d %d", op_ret, op_errno); + STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, statpre, statpost, + xdata); + if (stub) { + call_stub_destroy(stub); + } + return 0; +} + +int32_t +metadisp_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, dict_t *xdata) +{ + METADISP_TRACE("setattr"); + call_stub_t *stub = NULL; + stub = fop_setattr_stub(frame, metadisp_backend_setattr_resume, loc, stbuf, + valid, xdata); + STACK_WIND_COOKIE(frame, metadisp_setattr_cbk, stub, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->setattr, loc, stbuf, valid, + xdata); + return 0; +} diff --git a/xlators/features/metadisp/src/metadisp-stat.c b/xlators/features/metadisp/src/metadisp-stat.c new file mode 100644 index 00000000000..b06d0dbcddd --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-stat.c @@ -0,0 +1,124 @@ +#include "metadisp.h" +#include <glusterfs/call-stub.h> + +/** + * The stat flow in METADISP is complicated because we must + * do ensure a few things: + * 1. stat, on the path within the metadata layer, + * MUST get the backend FD of the data layer. + * --- we wind to the metadata layer, then the data layer. + * + * 2. the metadata layer MUST be able to ask the data + * layer for stat information. + * --- this is 'syncop-internal-from-posix' + * + * 3. when the metadata exists BUT the data is missing, + * we MUST mark the backend file as bad and heal it. + */ + +int32_t +metadisp_stat_backend_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + dict_t *xdata) +{ + METADISP_TRACE("got backend stat results %d %d", op_ret, op_errno); + if (op_errno == ENOENT) { + STACK_UNWIND_STRICT(open, frame, -1, ENODATA, NULL, NULL); + return 0; + } + STACK_UNWIND_STRICT(stat, frame, op_ret, op_errno, buf, xdata); + return 0; +} + +int32_t +metadisp_stat_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + dict_t *xdata) +{ + METADISP_TRACE("winding stat to path %s", loc->path); + if (gf_uuid_is_null(loc->gfid)) { + METADISP_TRACE("bad object, sending EUCLEAN"); + STACK_UNWIND_STRICT(open, frame, -1, EUCLEAN, NULL, NULL); + return 0; + } + + STACK_WIND(frame, metadisp_stat_backend_cbk, SECOND_CHILD(this), + SECOND_CHILD(this)->fops->stat, loc, xdata); + return 0; +} + +int32_t +metadisp_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + dict_t *xdata) +{ + call_stub_t *stub = NULL; + + METADISP_TRACE("got stat results %d %d", op_ret, op_errno); + + if (cookie) { + stub = cookie; + } + + if (op_ret != 0) { + goto unwind; + } + + // only use the stub for the files + if (!IA_ISREG(buf->ia_type)) { + goto unwind; + } + + if (stub->poison) { + call_stub_destroy(stub); + stub = NULL; + return 0; + } + + call_resume(stub); + return 0; + +unwind: + if (stub) { + call_stub_destroy(stub); + } + STACK_UNWIND_STRICT(stat, frame, op_ret, op_errno, buf, xdata); + return 0; +} + +int32_t +metadisp_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ + call_stub_t *stub = NULL; + int32_t ret = 0; + loc_t backend_loc = { + 0, + }; + METADISP_FILTER_ROOT(stat, loc, xdata); + + if (build_backend_loc(loc->gfid, loc, &backend_loc)) { + goto unwind; + } + + if (dict_get_int32(xdata, "syncop-internal-from-posix", &ret) == 0) { + // if we've just been sent a stat from posix, then we know + // that we must send down a stat for a file to the second child. + // + // that means we can skip the stat for the first child and just + // send to the data disk. + METADISP_TRACE("got syncop-internal-from-posix"); + STACK_WIND(frame, default_stat_cbk, DATA_CHILD(this), + DATA_CHILD(this)->fops->stat, &backend_loc, xdata); + return 0; + } + + // we do not know if the request is for a file, folder, etc. wind + // to first child to find out. + stub = fop_stat_stub(frame, metadisp_stat_resume, &backend_loc, xdata); + METADISP_TRACE("winding stat to first child %s", loc->path); + STACK_WIND_COOKIE(frame, metadisp_stat_cbk, stub, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->stat, loc, xdata); + return 0; +unwind: + STACK_UNWIND_STRICT(stat, frame, -1, EINVAL, NULL, NULL); + return 0; +} diff --git a/xlators/features/metadisp/src/metadisp-unlink.c b/xlators/features/metadisp/src/metadisp-unlink.c new file mode 100644 index 00000000000..1f6a8eb35ce --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-unlink.c @@ -0,0 +1,160 @@ + +#include "metadisp.h" +#include <glusterfs/call-stub.h> + +/** + * The unlink flow in metadisp is complicated because we must + * do ensure that UNLINK causes both the metadata objects + * to get removed and the data objects to get removed. + */ + +int32_t +metadisp_unlink_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + int xflag, dict_t *xdata) +{ + METADISP_TRACE("winding backend unlink to path %s", loc->path); + STACK_WIND(frame, default_unlink_cbk, DATA_CHILD(this), + DATA_CHILD(this)->fops->unlink, loc, xflag, xdata); + return 0; +} + +int32_t +metadisp_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + METADISP_TRACE(". %d %d", op_ret, op_errno); + + int ret = 0; + call_stub_t *stub = NULL; + int nlink = 0; + + if (cookie) { + stub = cookie; + } + + if (op_ret != 0) { + goto unwind; + } + + if (stub->poison) { + call_stub_destroy(stub); + stub = NULL; + return 0; + } + + ret = dict_get_uint32(xdata, GF_RESPONSE_LINK_COUNT_XDATA, &nlink); + if (ret != 0) { + op_errno = EINVAL; + op_ret = -1; + goto unwind; + } + METADISP_TRACE("frontend hardlink count %d %d", ret, nlink); + if (nlink > 1) { + goto unwind; + } + + call_resume(stub); + return 0; + +unwind: + if (stub) { + call_stub_destroy(stub); + } + STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, preparent, postparent, + xdata); + return 0; +} + +int32_t +metadisp_unlink_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, + struct iatt *postparent) +{ + call_stub_t *stub = NULL; + + if (cookie) { + stub = cookie; + } + + if (op_ret != 0) { + goto unwind; + } + + // fail fast on empty gfid so we don't loop forever + if (gf_uuid_is_null(buf->ia_gfid)) { + op_ret = -1; + op_errno = ENODATA; + goto unwind; + } + + // fill gfid since the stub is incomplete + memcpy(stub->args.loc.gfid, buf->ia_gfid, sizeof(uuid_t)); + memcpy(stub->args.loc.pargfid, postparent->ia_gfid, sizeof(uuid_t)); + + if (stub->poison) { + call_stub_destroy(stub); + stub = NULL; + return 0; + } + + call_resume(stub); + return 0; + +unwind: + if (stub) { + call_stub_destroy(stub); + } + STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, NULL, NULL, NULL); + return 0; +} + +int32_t +metadisp_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, + dict_t *xdata) +{ + call_stub_t *stub = NULL; + loc_t backend_loc = { + 0, + }; + + if (gf_uuid_is_null(loc->gfid)) { + METADISP_TRACE("winding lookup for unlink to path %s", loc->path); + + // loop back to ourselves after a lookup + stub = fop_unlink_stub(frame, metadisp_unlink, loc, xflag, xdata); + STACK_WIND_COOKIE(frame, metadisp_unlink_lookup_cbk, stub, + METADATA_CHILD(this), + METADATA_CHILD(this)->fops->lookup, loc, xdata); + return 0; + } + + if (build_backend_loc(loc->gfid, loc, &backend_loc)) { + goto unwind; + } + + // + // ensure we get the link count on the unlink response, so we can + // account for hardlinks before winding to the backend. + // NOTE: + // multiple xlators use GF_REQUEST_LINK_COUNT_XDATA. confirmation + // is needed to ensure that multiple requests will work in the same + // xlator stack. + // + if (!xdata) { + xdata = dict_new(); + } + dict_set_int32(xdata, GF_REQUEST_LINK_COUNT_XDATA, 1); + + METADISP_TRACE("winding frontend unlink to path %s", loc->path); + stub = fop_unlink_stub(frame, metadisp_unlink_resume, &backend_loc, xflag, + xdata); + + STACK_WIND_COOKIE(frame, metadisp_unlink_cbk, stub, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->unlink, loc, xflag, xdata); + return 0; +unwind: + STACK_UNWIND_STRICT(unlink, frame, -1, EINVAL, NULL, NULL, NULL); + return 0; +} diff --git a/xlators/features/metadisp/src/metadisp.c b/xlators/features/metadisp/src/metadisp.c new file mode 100644 index 00000000000..3c8f150cebc --- /dev/null +++ b/xlators/features/metadisp/src/metadisp.c @@ -0,0 +1,46 @@ +#include <glusterfs/call-stub.h> + +#include "metadisp.h" +#include "metadisp-fops.h" + +int32_t +init(xlator_t *this) +{ + if (!this->children) { + gf_log(this->name, GF_LOG_ERROR, + "not configured with children. exiting"); + return -1; + } + + if (!this->parents) { + gf_log(this->name, GF_LOG_WARNING, "dangling volume. check volfile "); + } + + return 0; +} + +void +fini(xlator_t *this) +{ + return; +} + +/* defined in fops.c */ +struct xlator_fops fops; + +struct xlator_cbks cbks = {}; + +struct volume_options options[] = { + {.key = {NULL}}, +}; + +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .fops = &fops, + .cbks = &cbks, + .options = options, + .op_version = {1}, + .identifier = "metadisp", + .category = GF_EXPERIMENTAL, +}; diff --git a/xlators/features/metadisp/src/metadisp.h b/xlators/features/metadisp/src/metadisp.h new file mode 100644 index 00000000000..c8fd7a13c04 --- /dev/null +++ b/xlators/features/metadisp/src/metadisp.h @@ -0,0 +1,45 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef GF_METADISP_H_ +#define GF_METADISP_H_ + +#include <glusterfs/glusterfs.h> +#include <glusterfs/logging.h> +#include <glusterfs/dict.h> +#include <glusterfs/xlator.h> +#include <glusterfs/defaults.h> + +#define METADATA_CHILD(_this) FIRST_CHILD(_this) +#define DATA_CHILD(_this) SECOND_CHILD(_this) + +int32_t +build_backend_loc(uuid_t gfid, loc_t *src_loc, loc_t *dst_loc); + +#define METADISP_TRACE(_args...) gf_log("metadisp", GF_LOG_INFO, _args) + +#define METADISP_FILTER_ROOT(_op, _args...) \ + if (strcmp(loc->path, "/") == 0) { \ + STACK_WIND(frame, default_##_op##_cbk, METADATA_CHILD(this), \ + METADATA_CHILD(this)->fops->_op, _args); \ + return 0; \ + } + +#define METADISP_FILTER_ROOT_BY_GFID(_op, _gfid, _args...) \ + if (__is_root_gfid(_gfid)) { \ + STACK_WIND(frame, default_##_op##_cbk, METADATA_CHILD(this), \ + METADATA_CHILD(this)->fops->_op, _args); \ + return 0; \ + } + +#define RESOLVE_GFID_REQ(_dict, _dest, _lbl) \ + VALIDATE_OR_GOTO(dict_get_ptr(_dict, "gfid-req", (void **)&_dest) == 0, \ + _lbl) + +#endif /* __TEMPLATE_H__ */ diff --git a/xlators/features/quota/src/quota-enforcer-client.c b/xlators/features/quota/src/quota-enforcer-client.c index 097439d86d6..480d64ade27 100644 --- a/xlators/features/quota/src/quota-enforcer-client.c +++ b/xlators/features/quota/src/quota-enforcer-client.c @@ -32,12 +32,6 @@ #include <malloc.h> #endif -#ifdef HAVE_MALLOC_STATS -#ifdef DEBUG -#include <mcheck.h> -#endif -#endif - #include "quota.h" #include "quota-messages.h" diff --git a/xlators/features/quota/src/quota.c b/xlators/features/quota/src/quota.c index 73c008a2c00..18df9ae6d19 100644 --- a/xlators/features/quota/src/quota.c +++ b/xlators/features/quota/src/quota.c @@ -586,9 +586,6 @@ quota_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, quota_meta_t size = { 0, }; - struct timeval tv = { - 0, - }; local = frame->local; @@ -626,13 +623,12 @@ quota_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, * loop of validation and checking * limit when timeout is zero. */ - gettimeofday(&tv, NULL); LOCK(&ctx->lock); { ctx->size = size.size; + ctx->validate_time = gf_time(); ctx->file_count = size.file_count; ctx->dir_count = size.dir_count; - memcpy(&ctx->tv, &tv, sizeof(struct timeval)); } UNLOCK(&ctx->lock); @@ -644,27 +640,10 @@ unwind: return 0; } -static uint64_t -quota_time_elapsed(struct timeval *now, struct timeval *then) -{ - return (now->tv_sec - then->tv_sec); -} - -int32_t -quota_timeout(struct timeval *tv, int32_t timeout) +static inline gf_boolean_t +quota_timeout(time_t t, uint32_t timeout) { - struct timeval now = { - 0, - }; - int32_t timed_out = 0; - - gettimeofday(&now, NULL); - - if (quota_time_elapsed(&now, tv) >= timeout) { - timed_out = 1; - } - - return timed_out; + return (gf_time() - t) >= timeout; } /* Return: 1 if new entry added @@ -1128,7 +1107,7 @@ quota_check_object_limit(call_frame_t *frame, quota_inode_ctx_t *ctx, timeout = priv->hard_timeout; } - if (!just_validated && quota_timeout(&ctx->tv, timeout)) { + if (!just_validated && quota_timeout(ctx->validate_time, timeout)) { need_validate = 1; } else if ((object_aggr_count) > ctx->object_hard_lim) { hard_limit_exceeded = 1; @@ -1195,7 +1174,7 @@ quota_check_size_limit(call_frame_t *frame, quota_inode_ctx_t *ctx, timeout = priv->hard_timeout; } - if (!just_validated && quota_timeout(&ctx->tv, timeout)) { + if (!just_validated && quota_timeout(ctx->validate_time, timeout)) { need_validate = 1; } else if (wouldbe_size >= ctx->hard_lim) { hard_limit_exceeded = 1; @@ -4314,9 +4293,6 @@ quota_statfs_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, quota_meta_t size = { 0, }; - struct timeval tv = { - 0, - }; local = frame->local; @@ -4348,13 +4324,12 @@ quota_statfs_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, op_errno = EINVAL; } - gettimeofday(&tv, NULL); LOCK(&ctx->lock); { ctx->size = size.size; + ctx->validate_time = gf_time(); ctx->file_count = size.file_count; ctx->dir_count = size.dir_count; - memcpy(&ctx->tv, &tv, sizeof(struct timeval)); } UNLOCK(&ctx->lock); @@ -4873,7 +4848,7 @@ off: void quota_log_helper(char **usage_str, int64_t cur_size, inode_t *inode, - char **path, struct timeval *cur_time) + char **path, time_t *cur_time) { xlator_t *this = THIS; @@ -4892,7 +4867,7 @@ quota_log_helper(char **usage_str, int64_t cur_size, inode_t *inode, if (!(*path)) *path = uuid_utoa(inode->gfid); - gettimeofday(cur_time, NULL); + *cur_time = gf_time(); } /* Logs if @@ -4903,9 +4878,7 @@ void quota_log_usage(xlator_t *this, quota_inode_ctx_t *ctx, inode_t *inode, int64_t delta) { - struct timeval cur_time = { - 0, - }; + time_t cur_time = 0; char *usage_str = NULL; char *path = NULL; int64_t cur_size = 0; @@ -4931,12 +4904,12 @@ quota_log_usage(xlator_t *this, quota_inode_ctx_t *ctx, inode_t *inode, "path=%s", usage_str, priv->volume_uuid, path); - ctx->prev_log = cur_time; + ctx->prev_log_time = cur_time; } /* Usage is above soft limit */ else if (cur_size > ctx->soft_lim && - quota_timeout(&ctx->prev_log, priv->log_timeout)) { + quota_timeout(ctx->prev_log_time, priv->log_timeout)) { quota_log_helper(&usage_str, cur_size, inode, &path, &cur_time); gf_msg(this->name, GF_LOG_ALERT, 0, Q_MSG_CROSSED_SOFT_LIMIT, @@ -4947,7 +4920,7 @@ quota_log_usage(xlator_t *this, quota_inode_ctx_t *ctx, inode_t *inode, "path=%s", usage_str, priv->volume_uuid, path); - ctx->prev_log = cur_time; + ctx->prev_log_time = cur_time; } if (path) @@ -5184,9 +5157,9 @@ quota_priv_dump(xlator_t *this) if (ret) goto out; else { - gf_proc_dump_write("soft-timeout", "%d", priv->soft_timeout); - gf_proc_dump_write("hard-timeout", "%d", priv->hard_timeout); - gf_proc_dump_write("alert-time", "%d", priv->log_timeout); + gf_proc_dump_write("soft-timeout", "%u", priv->soft_timeout); + gf_proc_dump_write("hard-timeout", "%u", priv->hard_timeout); + gf_proc_dump_write("alert-time", "%u", priv->log_timeout); gf_proc_dump_write("quota-on", "%d", priv->is_quota_on); gf_proc_dump_write("statfs", "%d", priv->consider_statfs); gf_proc_dump_write("volume-uuid", "%s", priv->volume_uuid); diff --git a/xlators/features/quota/src/quota.h b/xlators/features/quota/src/quota.h index 8a3dc7a77f5..0395d78c9ef 100644 --- a/xlators/features/quota/src/quota.h +++ b/xlators/features/quota/src/quota.h @@ -153,8 +153,8 @@ struct quota_inode_ctx { int64_t object_soft_lim; struct iatt buf; struct list_head parents; - struct timeval tv; - struct timeval prev_log; + time_t validate_time; + time_t prev_log_time; gf_boolean_t ancestry_built; gf_lock_t lock; }; @@ -199,6 +199,7 @@ struct quota_local { typedef struct quota_local quota_local_t; struct quota_priv { + /* FIXME: consider time_t for timeouts. */ uint32_t soft_timeout; uint32_t hard_timeout; uint32_t log_timeout; diff --git a/xlators/features/read-only/src/worm-helper.c b/xlators/features/read-only/src/worm-helper.c index 25fbd4aa748..df45f2a940b 100644 --- a/xlators/features/read-only/src/worm-helper.c +++ b/xlators/features/read-only/src/worm-helper.c @@ -41,7 +41,7 @@ worm_init_state(xlator_t *this, gf_boolean_t fop_with_fd, void *file_ptr) GF_VALIDATE_OR_GOTO("worm", this, out); GF_VALIDATE_OR_GOTO(this->name, file_ptr, out); - start_time = time(NULL); + start_time = gf_time(); dict = dict_new(); if (!dict) { gf_log(this->name, GF_LOG_ERROR, "Error creating the dict"); @@ -94,7 +94,7 @@ worm_set_state(xlator_t *this, gf_boolean_t fop_with_fd, void *file_ptr, if (ret) goto out; stbuf->ia_mtime = stpre.ia_mtime; - stbuf->ia_atime = time(NULL) + retention_state->ret_period; + stbuf->ia_atime = gf_time() + retention_state->ret_period; if (fop_with_fd) ret = syncop_fsetattr(this, (fd_t *)file_ptr, stbuf, GF_SET_ATTR_ATIME, @@ -286,6 +286,7 @@ gf_worm_state_transition(xlator_t *this, gf_boolean_t fop_with_fd, { int op_errno = EROFS; int ret = -1; + time_t now = 0; uint64_t com_period = 0; uint64_t start_time = 0; dict_t *dict = NULL; @@ -337,8 +338,10 @@ gf_worm_state_transition(xlator_t *this, gf_boolean_t fop_with_fd, goto out; } - if (ret == -1 && (time(NULL) - start_time) >= com_period) { - if ((time(NULL) - stbuf.ia_mtime) >= com_period) { + now = gf_time(); + + if (ret == -1 && (now - start_time) >= com_period) { + if ((now - stbuf.ia_mtime) >= com_period) { ret = worm_set_state(this, fop_with_fd, file_ptr, &reten_state, &stbuf); if (ret) { @@ -352,10 +355,10 @@ gf_worm_state_transition(xlator_t *this, gf_boolean_t fop_with_fd, op_errno = 0; goto out; } - } else if (ret == -1 && (time(NULL) - start_time) < com_period) { + } else if (ret == -1 && (now - start_time) < com_period) { op_errno = 0; goto out; - } else if (reten_state.retain && ((time(NULL) >= stbuf.ia_atime))) { + } else if (reten_state.retain && ((now >= stbuf.ia_atime))) { gf_worm_state_lookup(this, fop_with_fd, file_ptr, &reten_state, &stbuf); } if (reten_state.worm && !reten_state.retain && priv->worm_files_deletable && diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c index e4042117427..e5f93063943 100644 --- a/xlators/features/shard/src/shard.c +++ b/xlators/features/shard/src/shard.c @@ -513,6 +513,9 @@ shard_local_wipe(shard_local_t *local) loc_wipe(&local->int_entrylk.loc); loc_wipe(&local->newloc); + if (local->name) + GF_FREE(local->name); + if (local->int_entrylk.basename) GF_FREE(local->int_entrylk.basename); if (local->fd) @@ -1001,6 +1004,10 @@ shard_initiate_evicted_inode_fsync(xlator_t *this, inode_t *inode) } int +shard_common_inode_write_post_lookup_shards_handler(call_frame_t *frame, + xlator_t *this); + +int shard_common_resolve_shards(call_frame_t *frame, xlator_t *this, shard_post_resolve_fop_handler_t post_res_handler) { @@ -1017,21 +1024,47 @@ shard_common_resolve_shards(call_frame_t *frame, xlator_t *this, inode_t *fsync_inode = NULL; shard_priv_t *priv = NULL; shard_local_t *local = NULL; + uint64_t resolve_count = 0; priv = this->private; local = frame->local; local->call_count = 0; shard_idx_iter = local->first_block; res_inode = local->resolver_base_inode; + + if ((local->op_ret < 0) || (local->resolve_not)) + goto out; + + /* If this prealloc FOP is for fresh file creation, then the size of the + * file will be 0. Then there will be no shards associated with this file. + * So we can skip the lookup process for the shards which do not exists + * and directly issue mknod to crete shards. + * + * In case the prealloc fop is to extend the preallocated file to bigger + * size then just lookup and populate inodes of existing shards and + * update the create count + */ + if (local->fop == GF_FOP_FALLOCATE) { + if (!local->prebuf.ia_size) { + local->inode_list[0] = inode_ref(res_inode); + local->create_count = local->last_block; + shard_common_inode_write_post_lookup_shards_handler(frame, this); + return 0; + } + if (local->prebuf.ia_size < local->total_size) + local->create_count = local->last_block - + ((local->prebuf.ia_size - 1) / + local->block_size); + } + + resolve_count = local->last_block - local->create_count; + if (res_inode) gf_uuid_copy(gfid, res_inode->gfid); else gf_uuid_copy(gfid, local->base_gfid); - if ((local->op_ret < 0) || (local->resolve_not)) - goto out; - - while (shard_idx_iter <= local->last_block) { + while (shard_idx_iter <= resolve_count) { i++; if (shard_idx_iter == 0) { local->inode_list[i] = inode_ref(res_inode); @@ -1659,26 +1692,24 @@ err: } int -shard_lookup_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *xdata, - struct iatt *postparent) +shard_set_iattr_invoke_post_handler(call_frame_t *frame, xlator_t *this, + inode_t *inode, int32_t op_ret, + int32_t op_errno, struct iatt *buf, + dict_t *xdata) { int ret = -1; int32_t mask = SHARD_INODE_WRITE_MASK; - shard_local_t *local = NULL; + shard_local_t *local = frame->local; shard_inode_ctx_t ctx = { 0, }; - local = frame->local; - if (op_ret < 0) { gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_BASE_FILE_LOOKUP_FAILED, "Lookup on base file" " failed : %s", - loc_gfid_utoa(&(local->loc))); + uuid_utoa(inode->gfid)); local->op_ret = op_ret; local->op_errno = op_errno; goto unwind; @@ -1712,18 +1743,57 @@ unwind: } int -shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc, - shard_post_fop_handler_t handler) +shard_fstat_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + dict_t *xdata) +{ + shard_local_t *local = frame->local; + + shard_set_iattr_invoke_post_handler(frame, this, local->fd->inode, op_ret, + op_errno, buf, xdata); + return 0; +} + +int +shard_lookup_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, + struct iatt *postparent) +{ + /* In case of op_ret < 0, inode passed to this function will be NULL + ex: in case of op_errno = ENOENT. So refer prefilled inode data + which is part of local. + Note: Reassigning/overriding the inode passed to this cbk with inode + which is part of *struct shard_local_t* won't cause any issue as + both inodes have same reference/address as of the inode passed */ + inode = ((shard_local_t *)frame->local)->loc.inode; + + shard_set_iattr_invoke_post_handler(frame, this, inode, op_ret, op_errno, + buf, xdata); + return 0; +} + +/* This function decides whether to make file based lookup or + * fd based lookup (fstat) depending on the 3rd and 4th arg. + * If fd != NULL and loc == NULL then call is for fstat + * If fd == NULL and loc != NULL then call is for file based + * lookup. Please pass args based on the requirement. + */ +int +shard_refresh_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc, + fd_t *fd, shard_post_fop_handler_t handler) { int ret = -1; + inode_t *inode = NULL; shard_local_t *local = NULL; dict_t *xattr_req = NULL; gf_boolean_t need_refresh = _gf_false; local = frame->local; local->handler = handler; + inode = fd ? fd->inode : loc->inode; - ret = shard_inode_ctx_fill_iatt_from_cache(loc->inode, this, &local->prebuf, + ret = shard_inode_ctx_fill_iatt_from_cache(inode, this, &local->prebuf, &need_refresh); /* By this time, inode ctx should have been created either in create, * mknod, readdirp or lookup. If not it is a bug! @@ -1732,7 +1802,7 @@ shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc, gf_msg_debug(this->name, 0, "Skipping lookup on base file: %s" "Serving prebuf off the inode ctx cache", - uuid_utoa(loc->gfid)); + uuid_utoa(inode->gfid)); goto out; } @@ -1743,10 +1813,14 @@ shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc, goto out; } - SHARD_MD_READ_FOP_INIT_REQ_DICT(this, xattr_req, loc->gfid, local, out); + SHARD_MD_READ_FOP_INIT_REQ_DICT(this, xattr_req, inode->gfid, local, out); - STACK_WIND(frame, shard_lookup_base_file_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, loc, xattr_req); + if (fd) + STACK_WIND(frame, shard_fstat_base_file_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, fd, xattr_req); + else + STACK_WIND(frame, shard_lookup_base_file_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, xattr_req); dict_unref(xattr_req); return 0; @@ -2015,8 +2089,8 @@ shard_truncate_last_shard(call_frame_t *frame, xlator_t *this, inode_t *inode) */ if (!inode) { gf_msg_debug(this->name, 0, - "Last shard to be truncated absent in backend: %d of " - "gfid %s. Directly proceeding to update file size", + "Last shard to be truncated absent in backend: %" PRIu64 + " of gfid %s. Directly proceeding to update file size", local->first_block, uuid_utoa(local->loc.inode->gfid)); shard_update_file_size(frame, this, NULL, &local->loc, shard_post_update_size_truncate_handler); @@ -2399,7 +2473,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode, int count = 0; int call_count = 0; int32_t shard_idx_iter = 0; - int last_block = 0; + int lookup_count = 0; char path[PATH_MAX] = { 0, }; @@ -2419,7 +2493,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode, local = frame->local; count = call_count = local->call_count; shard_idx_iter = local->first_block; - last_block = local->last_block; + lookup_count = local->last_block - local->create_count; local->pls_fop_handler = handler; if (local->lookup_shards_barriered) local->barrier.waitfor = local->call_count; @@ -2429,7 +2503,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode, else gf_uuid_copy(gfid, local->base_gfid); - while (shard_idx_iter <= last_block) { + while (shard_idx_iter <= lookup_count) { if (local->inode_list[i]) { i++; shard_idx_iter++; @@ -2574,6 +2648,7 @@ shard_truncate_begin(call_frame_t *frame, xlator_t *this) local->block_size); local->num_blocks = local->last_block - local->first_block + 1; + GF_ASSERT(local->num_blocks > 0); local->resolver_base_inode = (local->fop == GF_FOP_TRUNCATE) ? local->loc.inode : local->fd->inode; @@ -2723,8 +2798,8 @@ shard_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, local->resolver_base_inode = loc->inode; GF_ATOMIC_INIT(local->delta_blocks, 0); - shard_lookup_base_file(frame, this, &local->loc, - shard_post_lookup_truncate_handler); + shard_refresh_base_file(frame, this, &local->loc, NULL, + shard_post_lookup_truncate_handler); return 0; err: @@ -2779,8 +2854,8 @@ shard_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, local->resolver_base_inode = fd->inode; GF_ATOMIC_INIT(local->delta_blocks, 0); - shard_lookup_base_file(frame, this, &local->loc, - shard_post_lookup_truncate_handler); + shard_refresh_base_file(frame, this, NULL, fd, + shard_post_lookup_truncate_handler); return 0; err: shard_common_failure_unwind(GF_FOP_FTRUNCATE, frame, -1, ENOMEM); @@ -2924,8 +2999,8 @@ shard_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, if (!local->xattr_req) goto err; - shard_lookup_base_file(frame, this, &local->loc, - shard_post_lookup_link_handler); + shard_refresh_base_file(frame, this, &local->loc, NULL, + shard_post_lookup_link_handler); return 0; err: shard_common_failure_unwind(GF_FOP_LINK, frame, -1, ENOMEM); @@ -4254,8 +4329,8 @@ shard_post_inodelk_fop_handler(call_frame_t *frame, xlator_t *this) switch (local->fop) { case GF_FOP_UNLINK: case GF_FOP_RENAME: - shard_lookup_base_file(frame, this, &local->int_inodelk.loc, - shard_post_lookup_base_shard_rm_handler); + shard_refresh_base_file(frame, this, &local->int_inodelk.loc, NULL, + shard_post_lookup_base_shard_rm_handler); break; default: gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, @@ -4510,8 +4585,8 @@ shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this, if (local->block_size) { local->tmp_loc.inode = inode_new(this->itable); gf_uuid_copy(local->tmp_loc.gfid, (local->loc.inode)->gfid); - shard_lookup_base_file(frame, this, &local->tmp_loc, - shard_post_rename_lookup_handler); + shard_refresh_base_file(frame, this, &local->tmp_loc, NULL, + shard_post_rename_lookup_handler); } else { shard_rename_cbk(frame, this); } @@ -5150,6 +5225,7 @@ shard_post_lookup_readv_handler(call_frame_t *frame, xlator_t *this) local->block_size); local->num_blocks = local->last_block - local->first_block + 1; + GF_ASSERT(local->num_blocks > 0); local->resolver_base_inode = local->loc.inode; local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *), @@ -5246,8 +5322,8 @@ shard_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, local->loc.inode = inode_ref(fd->inode); gf_uuid_copy(local->loc.gfid, fd->inode->gfid); - shard_lookup_base_file(frame, this, &local->loc, - shard_post_lookup_readv_handler); + shard_refresh_base_file(frame, this, NULL, fd, + shard_post_lookup_readv_handler); return 0; err: shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM); @@ -5610,6 +5686,8 @@ shard_common_inode_write_post_resolve_handler(call_frame_t *frame, shard_common_lookup_shards( frame, this, local->resolver_base_inode, shard_common_inode_write_post_lookup_shards_handler); + } else if (local->create_count) { + shard_common_inode_write_post_lookup_shards_handler(frame, this); } else { shard_common_inode_write_do(frame, this); } @@ -5640,6 +5718,7 @@ shard_common_inode_write_post_lookup_handler(call_frame_t *frame, local->last_block = get_highest_block(local->offset, local->total_size, local->block_size); local->num_blocks = local->last_block - local->first_block + 1; + GF_ASSERT(local->num_blocks > 0); local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *), gf_shard_mt_inode_list); if (!local->inode_list) { @@ -5648,9 +5727,9 @@ shard_common_inode_write_post_lookup_handler(call_frame_t *frame, } gf_msg_trace(this->name, 0, - "%s: gfid=%s first_block=%" PRIu32 + "%s: gfid=%s first_block=%" PRIu64 " " - "last_block=%" PRIu32 " num_blocks=%" PRIu32 " offset=%" PRId64 + "last_block=%" PRIu64 " num_blocks=%" PRIu64 " offset=%" PRId64 " total_size=%zu flags=%" PRId32 "", gf_fop_list[local->fop], uuid_utoa(local->resolver_base_inode->gfid), @@ -6045,8 +6124,8 @@ shard_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, local->loc.inode = inode_ref(fd->inode); gf_uuid_copy(local->loc.gfid, fd->inode->gfid); - shard_lookup_base_file(frame, this, &local->loc, - shard_post_lookup_fsync_handler); + shard_refresh_base_file(frame, this, NULL, fd, + shard_post_lookup_fsync_handler); return 0; err: shard_common_failure_unwind(GF_FOP_FSYNC, frame, -1, ENOMEM); @@ -6238,48 +6317,210 @@ shard_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, } int32_t -shard_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name, dict_t *xdata) +shard_modify_and_set_iatt_in_dict(dict_t *xdata, shard_local_t *local, + char *key) { - int op_errno = EINVAL; + int ret = 0; + struct iatt *tmpbuf = NULL; + struct iatt *stbuf = NULL; + data_t *data = NULL; - if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { - GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out); + if (!xdata) + return 0; + + data = dict_get(xdata, key); + if (!data) + return 0; + + tmpbuf = data_to_iatt(data, key); + stbuf = GF_MALLOC(sizeof(struct iatt), gf_common_mt_char); + if (stbuf == NULL) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto err; } + *stbuf = *tmpbuf; + stbuf->ia_size = local->prebuf.ia_size; + stbuf->ia_blocks = local->prebuf.ia_blocks; + ret = dict_set_iatt(xdata, key, stbuf, false); + if (ret < 0) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto err; + } + return 0; - if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { - dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE); - dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE); +err: + GF_FREE(stbuf); + return -1; +} + +int32_t +shard_common_remove_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + int ret = -1; + shard_local_t *local = NULL; + + local = frame->local; + + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; + goto err; } - STACK_WIND_TAIL(frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->removexattr, loc, name, xdata); + ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_PRESTAT); + if (ret < 0) + goto err; + + ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_POSTSTAT); + if (ret < 0) + goto err; + + if (local->fd) + SHARD_STACK_UNWIND(fremovexattr, frame, local->op_ret, local->op_errno, + xdata); + else + SHARD_STACK_UNWIND(removexattr, frame, local->op_ret, local->op_errno, + xdata); return 0; -out: - shard_common_failure_unwind(GF_FOP_REMOVEXATTR, frame, -1, op_errno); + +err: + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); return 0; } int32_t -shard_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, - const char *name, dict_t *xdata) +shard_post_lookup_remove_xattr_handler(call_frame_t *frame, xlator_t *this) { - int op_errno = EINVAL; + shard_local_t *local = NULL; + local = frame->local; + + if (local->op_ret < 0) { + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); + return 0; + } + + if (local->fd) + STACK_WIND(frame, shard_common_remove_xattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fremovexattr, local->fd, + local->name, local->xattr_req); + else + STACK_WIND(frame, shard_common_remove_xattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, &local->loc, + local->name, local->xattr_req); + return 0; +} + +int32_t +shard_common_remove_xattr(call_frame_t *frame, xlator_t *this, + glusterfs_fop_t fop, loc_t *loc, fd_t *fd, + const char *name, dict_t *xdata) +{ + int ret = -1; + int op_errno = ENOMEM; + uint64_t block_size = 0; + shard_local_t *local = NULL; + inode_t *inode = loc ? loc->inode : fd->inode; + + if ((IA_ISDIR(inode->ia_type)) || (IA_ISLNK(inode->ia_type))) { + if (loc) + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, loc, name, + xdata); + else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fremovexattr, fd, name, + xdata); + return 0; + } + + /* If shard's special xattrs are attempted to be removed, + * fail the fop with EPERM (except if the client is gsyncd). + */ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { - GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out); + GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, err); } + /* Repeat the same check for bulk-removexattr */ if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE); dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE); } - STACK_WIND_TAIL(frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata); + ret = shard_inode_ctx_get_block_size(inode, this, &block_size); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get block size from inode ctx of %s", + uuid_utoa(inode->gfid)); + goto err; + } + + if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { + if (loc) + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, loc, name, + xdata); + else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fremovexattr, fd, name, + xdata); + return 0; + } + + local = mem_get0(this->local_pool); + if (!local) + goto err; + + frame->local = local; + local->fop = fop; + if (loc) { + if (loc_copy(&local->loc, loc) != 0) + goto err; + } + + if (fd) { + local->fd = fd_ref(fd); + local->loc.inode = inode_ref(fd->inode); + gf_uuid_copy(local->loc.gfid, fd->inode->gfid); + } + + if (name) { + local->name = gf_strdup(name); + if (!local->name) + goto err; + } + + if (xdata) + local->xattr_req = dict_ref(xdata); + + shard_refresh_base_file(frame, this, loc, fd, + shard_post_lookup_remove_xattr_handler); return 0; -out: - shard_common_failure_unwind(GF_FOP_FREMOVEXATTR, frame, -1, op_errno); +err: + shard_common_failure_unwind(fop, frame, -1, op_errno); + return 0; +} + +int32_t +shard_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) +{ + shard_common_remove_xattr(frame, this, GF_FOP_REMOVEXATTR, loc, NULL, name, + xdata); + return 0; +} + +int32_t +shard_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) +{ + shard_common_remove_xattr(frame, this, GF_FOP_FREMOVEXATTR, NULL, fd, name, + xdata); return 0; } @@ -6360,38 +6601,164 @@ out: } int32_t -shard_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, - int32_t flags, dict_t *xdata) +shard_common_set_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - int op_errno = EINVAL; + int ret = -1; + shard_local_t *local = NULL; - if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { - GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out); + local = frame->local; + + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; + goto err; } - STACK_WIND_TAIL(frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata); + ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_PRESTAT); + if (ret < 0) + goto err; + + ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_POSTSTAT); + if (ret < 0) + goto err; + + if (local->fd) + SHARD_STACK_UNWIND(fsetxattr, frame, local->op_ret, local->op_errno, + xdata); + else + SHARD_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno, + xdata); return 0; -out: - shard_common_failure_unwind(GF_FOP_FSETXATTR, frame, -1, op_errno); + +err: + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); return 0; } int32_t -shard_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, - int32_t flags, dict_t *xdata) +shard_post_lookup_set_xattr_handler(call_frame_t *frame, xlator_t *this) { - int op_errno = EINVAL; + shard_local_t *local = NULL; + local = frame->local; + + if (local->op_ret < 0) { + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); + return 0; + } + + if (local->fd) + STACK_WIND(frame, shard_common_set_xattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetxattr, local->fd, + local->xattr_req, local->flags, local->xattr_rsp); + else + STACK_WIND(frame, shard_common_set_xattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, &local->loc, + local->xattr_req, local->flags, local->xattr_rsp); + return 0; +} + +int32_t +shard_common_set_xattr(call_frame_t *frame, xlator_t *this, glusterfs_fop_t fop, + loc_t *loc, fd_t *fd, dict_t *dict, int32_t flags, + dict_t *xdata) +{ + int ret = -1; + int op_errno = ENOMEM; + uint64_t block_size = 0; + shard_local_t *local = NULL; + inode_t *inode = loc ? loc->inode : fd->inode; + + if ((IA_ISDIR(inode->ia_type)) || (IA_ISLNK(inode->ia_type))) { + if (loc) + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, + xdata); + else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, + xdata); + return 0; + } + + /* Sharded or not, if shard's special xattrs are attempted to be set, + * fail the fop with EPERM (except if the client is gsyncd. + */ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { - GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out); + GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, err); } - STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr, - loc, dict, flags, xdata); + ret = shard_inode_ctx_get_block_size(inode, this, &block_size); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get block size from inode ctx of %s", + uuid_utoa(inode->gfid)); + goto err; + } + + if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { + if (loc) + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, + xdata); + else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, + xdata); + return 0; + } + + local = mem_get0(this->local_pool); + if (!local) + goto err; + + frame->local = local; + local->fop = fop; + if (loc) { + if (loc_copy(&local->loc, loc) != 0) + goto err; + } + + if (fd) { + local->fd = fd_ref(fd); + local->loc.inode = inode_ref(fd->inode); + gf_uuid_copy(local->loc.gfid, fd->inode->gfid); + } + local->flags = flags; + /* Reusing local->xattr_req and local->xattr_rsp to store the setxattr dict + * and the xdata dict + */ + if (dict) + local->xattr_req = dict_ref(dict); + if (xdata) + local->xattr_rsp = dict_ref(xdata); + + shard_refresh_base_file(frame, this, loc, fd, + shard_post_lookup_set_xattr_handler); return 0; -out: - shard_common_failure_unwind(GF_FOP_SETXATTR, frame, -1, op_errno); +err: + shard_common_failure_unwind(fop, frame, -1, op_errno); + return 0; +} + +int32_t +shard_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, + int32_t flags, dict_t *xdata) +{ + shard_common_set_xattr(frame, this, GF_FOP_FSETXATTR, NULL, fd, dict, flags, + xdata); + return 0; +} + +int32_t +shard_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + int32_t flags, dict_t *xdata) +{ + shard_common_set_xattr(frame, this, GF_FOP_SETXATTR, loc, NULL, dict, flags, + xdata); return 0; } @@ -6654,8 +7021,8 @@ shard_common_inode_write_begin(call_frame_t *frame, xlator_t *this, local->loc.inode = inode_ref(fd->inode); gf_uuid_copy(local->loc.gfid, fd->inode->gfid); - shard_lookup_base_file(frame, this, &local->loc, - shard_common_inode_write_post_lookup_handler); + shard_refresh_base_file(frame, this, NULL, fd, + shard_common_inode_write_post_lookup_handler); return 0; out: shard_common_failure_unwind(fop, frame, -1, ENOMEM); diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h index 04abd62c21c..4fe181b64d5 100644 --- a/xlators/features/shard/src/shard.h +++ b/xlators/features/shard/src/shard.h @@ -254,9 +254,9 @@ typedef int32_t (*shard_post_update_size_fop_handler_t)(call_frame_t *frame, typedef struct shard_local { int op_ret; int op_errno; - int first_block; - int last_block; - int num_blocks; + uint64_t first_block; + uint64_t last_block; + uint64_t num_blocks; int call_count; int eexist_count; int create_count; @@ -318,6 +318,7 @@ typedef struct shard_local { uint32_t deletion_rate; gf_boolean_t cleanup_required; uuid_t base_gfid; + char *name; } shard_local_t; typedef struct shard_inode_ctx { diff --git a/xlators/features/trash/src/trash.c b/xlators/features/trash/src/trash.c index f44b11c6872..7d09cba3e9c 100644 --- a/xlators/features/trash/src/trash.c +++ b/xlators/features/trash/src/trash.c @@ -212,11 +212,11 @@ void append_time_stamp(char *name, size_t name_size) { int i; - char timestr[64] = { + char timestr[GF_TIMESTR_SIZE] = { 0, }; - gf_time_fmt(timestr, sizeof(timestr), time(NULL), gf_timefmt_F_HMS); + gf_time_fmt(timestr, sizeof(timestr), gf_time(), gf_timefmt_F_HMS); /* removing white spaces in timestamp */ for (i = 0; i < strlen(timestr); i++) { diff --git a/xlators/features/upcall/src/upcall-internal.c b/xlators/features/upcall/src/upcall-internal.c index 978825f6b56..c641bd6f432 100644 --- a/xlators/features/upcall/src/upcall-internal.c +++ b/xlators/features/upcall/src/upcall-internal.c @@ -316,7 +316,7 @@ upcall_reaper_thread(void *data) priv = this->private; GF_ASSERT(priv); - time_now = time(NULL); + time_now = gf_time(); while (!priv->fini) { list_for_each_entry_safe(inode_ctx, tmp, &priv->inode_ctx_list, inode_ctx_list) @@ -344,7 +344,7 @@ upcall_reaper_thread(void *data) /* don't do a very busy loop */ timeout = get_cache_invalidation_timeout(this); sleep(timeout / 2); - time_now = time(NULL); + time_now = gf_time(); } return NULL; @@ -533,7 +533,7 @@ upcall_cache_invalidate(call_frame_t *frame, xlator_t *this, client_t *client, goto out; } - time_now = time(NULL); + time_now = gf_time(); pthread_mutex_lock(&up_inode_ctx->client_list_lock); { list_for_each_entry_safe(up_client_entry, tmp, @@ -670,13 +670,13 @@ upcall_cache_forget(xlator_t *this, inode_t *inode, return; } - time_now = time(NULL); + time_now = gf_time(); pthread_mutex_lock(&up_inode_ctx->client_list_lock); { list_for_each_entry_safe(up_client_entry, tmp, &up_inode_ctx->client_list, client_list) { - /* Set the access time to time(NULL) + /* Set the access time to gf_time() * to send notify */ up_client_entry->access_time = time_now; diff --git a/xlators/meta/src/meta-helpers.c b/xlators/meta/src/meta-helpers.c index 8b3d7b2f2f2..cb54f547468 100644 --- a/xlators/meta/src/meta-helpers.c +++ b/xlators/meta/src/meta-helpers.c @@ -182,14 +182,15 @@ meta_uuid_copy(uuid_t dst, uuid_t src) } static void -default_meta_iatt_fill(struct iatt *iatt, inode_t *inode, ia_type_t type) +default_meta_iatt_fill(struct iatt *iatt, inode_t *inode, ia_type_t type, + gf_boolean_t is_tunable) { struct timeval tv = {}; iatt->ia_type = type; switch (type) { case IA_IFDIR: - iatt->ia_prot = ia_prot_from_st_mode(0755); + iatt->ia_prot = ia_prot_from_st_mode(0555); iatt->ia_nlink = 2; break; case IA_IFLNK: @@ -197,7 +198,7 @@ default_meta_iatt_fill(struct iatt *iatt, inode_t *inode, ia_type_t type) iatt->ia_nlink = 1; break; default: - iatt->ia_prot = ia_prot_from_st_mode(0644); + iatt->ia_prot = ia_prot_from_st_mode(is_tunable ? 0644 : 0444); iatt->ia_nlink = 1; break; } @@ -225,7 +226,7 @@ meta_iatt_fill(struct iatt *iatt, inode_t *inode, ia_type_t type) return; if (!ops->iatt_fill) - default_meta_iatt_fill(iatt, inode, type); + default_meta_iatt_fill(iatt, inode, type, !!ops->file_write); else ops->iatt_fill(THIS, inode, iatt); return; diff --git a/xlators/mgmt/glusterd/src/Makefile.am b/xlators/mgmt/glusterd/src/Makefile.am index eaa61c435e5..685beb42d27 100644 --- a/xlators/mgmt/glusterd/src/Makefile.am +++ b/xlators/mgmt/glusterd/src/Makefile.am @@ -25,13 +25,14 @@ glusterd_la_SOURCES = glusterd.c glusterd-handler.c glusterd-sm.c \ glusterd-conn-helper.c glusterd-snapd-svc.c glusterd-snapd-svc-helper.c \ glusterd-bitd-svc.c glusterd-scrub-svc.c glusterd-server-quorum.c \ glusterd-reset-brick.c glusterd-shd-svc.c glusterd-shd-svc-helper.c \ - glusterd-gfproxyd-svc.c glusterd-gfproxyd-svc-helper.c glusterd-ganesha.c + glusterd-gfproxyd-svc.c glusterd-gfproxyd-svc-helper.c glusterd-ganesha.c \ + $(CONTRIBDIR)/mount/mntent.c glusterd_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ $(top_builddir)/libglusterd/src/libglusterd.la \ $(top_builddir)/rpc/xdr/src/libgfxdr.la \ $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \ - $(XML_LIBS) -lcrypto $(URCU_LIBS) $(URCU_CDS_LIBS) $(LIB_DL) + $(XML_LIBS) -lcrypto $(URCU_LIBS) $(URCU_CDS_LIBS) $(LIB_DL) $(GF_XLATOR_MGNT_LIBADD) noinst_HEADERS = glusterd.h glusterd-utils.h glusterd-op-sm.h \ glusterd-sm.h glusterd-store.h glusterd-mem-types.h \ @@ -46,7 +47,8 @@ noinst_HEADERS = glusterd.h glusterd-utils.h glusterd-op-sm.h \ glusterd-scrub-svc.h glusterd-server-quorum.h glusterd-errno.h \ glusterd-shd-svc.h glusterd-shd-svc-helper.h \ glusterd-gfproxyd-svc.h glusterd-gfproxyd-svc-helper.h \ - $(CONTRIBDIR)/userspace-rcu/rculist-extra.h + $(CONTRIBDIR)/userspace-rcu/rculist-extra.h \ + $(CONTRIBDIR)/mount/mntent_compat.h AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \ diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c index 3af2867b82a..e56cd0e6c74 100644 --- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c @@ -21,7 +21,6 @@ #include "glusterd-messages.h" #include "glusterd-server-quorum.h" #include <glusterfs/run.h> -#include "glusterd-volgen.h" #include <glusterfs/syscall.h> #include <sys/signal.h> @@ -183,6 +182,9 @@ gd_rmbr_validate_replica_count(glusterd_volinfo_t *volinfo, { int ret = -1; int replica_nodes = 0; + xlator_t *this = NULL; + this = THIS; + GF_ASSERT(this); switch (volinfo->type) { case GF_CLUSTER_TYPE_NONE: @@ -191,8 +193,8 @@ gd_rmbr_validate_replica_count(glusterd_volinfo_t *volinfo, "replica count (%d) option given for non replicate " "volume %s", replica_count, volinfo->volname); - gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_VOL_NOT_REPLICA, "%s", - err_str); + gf_smsg(this->name, GF_LOG_WARNING, EINVAL, GD_MSG_INVALID_ARGUMENT, + err_str, NULL); goto out; case GF_CLUSTER_TYPE_REPLICATE: @@ -203,8 +205,8 @@ gd_rmbr_validate_replica_count(glusterd_volinfo_t *volinfo, "than volume %s's replica count (%d)", replica_count, volinfo->volname, volinfo->replica_count); - gf_msg(THIS->name, GF_LOG_WARNING, EINVAL, GD_MSG_INVALID_ENTRY, - "%s", err_str); + gf_smsg(this->name, GF_LOG_WARNING, EINVAL, + GD_MSG_INVALID_ARGUMENT, err_str, NULL); goto out; } if (replica_count == volinfo->replica_count) { @@ -218,8 +220,8 @@ gd_rmbr_validate_replica_count(glusterd_volinfo_t *volinfo, "(or %dxN)", brick_count, volinfo->dist_leaf_count, volinfo->dist_leaf_count); - gf_msg(THIS->name, GF_LOG_WARNING, EINVAL, - GD_MSG_INVALID_ENTRY, "%s", err_str); + gf_smsg(this->name, GF_LOG_WARNING, EINVAL, + GD_MSG_INVALID_ARGUMENT, err_str, NULL); goto out; } ret = 1; @@ -234,6 +236,8 @@ gd_rmbr_validate_replica_count(glusterd_volinfo_t *volinfo, "need %d(xN) bricks for reducing replica " "count of the volume from %d to %d", replica_nodes, volinfo->replica_count, replica_count); + gf_smsg(this->name, GF_LOG_WARNING, EINVAL, + GD_MSG_INVALID_ARGUMENT, err_str, NULL); goto out; } break; @@ -283,6 +287,7 @@ __glusterd_handle_add_brick(rpcsvc_request_t *req) // failed to decode msg; req->rpc_err = GARBAGE_ARGS; snprintf(err_str, sizeof(err_str), "Garbage args received"); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL); goto out; } @@ -510,6 +515,8 @@ subvol_matcher_verify(int *subvols, glusterd_volinfo_t *volinfo, char *err_str, int i = 0; int ret = 0; int count = volinfo->replica_count - replica_count; + xlator_t *this = THIS; + GF_ASSERT(this); if (replica_count && subvols) { for (i = 0; i < volinfo->subvol_count; i++) { @@ -519,6 +526,8 @@ subvol_matcher_verify(int *subvols, glusterd_volinfo_t *volinfo, char *err_str, "Remove exactly %d" " brick(s) from each subvolume.", count); + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_BRICK_SUBVOL_VERIFY_FAIL, err_str, NULL); break; } } @@ -532,6 +541,8 @@ subvol_matcher_verify(int *subvols, glusterd_volinfo_t *volinfo, char *err_str, ret = -1; snprintf(err_str, err_len, "Bricks not from same subvol for %s", vol_type); + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_BRICK_SUBVOL_VERIFY_FAIL, err_str, NULL); break; } } while (++i < volinfo->subvol_count); @@ -556,6 +567,9 @@ glusterd_remove_brick_validate_arbiters(glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *brickinfo = NULL; glusterd_brickinfo_t *last = NULL; char *arbiter_array = NULL; + xlator_t *this = NULL; + this = THIS; + GF_ASSERT(this); if (volinfo->type != GF_CLUSTER_TYPE_REPLICATE) goto out; @@ -574,6 +588,8 @@ glusterd_remove_brick_validate_arbiters(glusterd_volinfo_t *volinfo, "Remove arbiter " "brick(s) only when converting from " "arbiter to replica 2 subvolume."); + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_REMOVE_ARBITER_BRICK, err_str, NULL); ret = -1; goto out; } @@ -598,6 +614,8 @@ glusterd_remove_brick_validate_arbiters(glusterd_volinfo_t *volinfo, "Removed bricks " "must contain arbiter when converting" " to plain distribute."); + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_REMOVE_ARBITER_BRICK, err_str, NULL); ret = -1; break; } @@ -621,6 +639,7 @@ __glusterd_handle_remove_brick(rpcsvc_request_t *req) char key[64] = ""; int keylen; int i = 1; + glusterd_conf_t *conf = NULL; glusterd_volinfo_t *volinfo = NULL; glusterd_brickinfo_t *brickinfo = NULL; glusterd_brickinfo_t **brickinfo_list = NULL; @@ -639,12 +658,15 @@ __glusterd_handle_remove_brick(rpcsvc_request_t *req) GF_ASSERT(req); this = THIS; GF_ASSERT(this); + conf = this->private; + GF_ASSERT(conf); ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); if (ret < 0) { // failed to decode msg; req->rpc_err = GARBAGE_ARGS; snprintf(err_str, sizeof(err_str), "Received garbage args"); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL); goto out; } @@ -835,7 +857,17 @@ __glusterd_handle_remove_brick(rpcsvc_request_t *req) if (ret) goto out; - ret = glusterd_op_begin_synctask(req, GD_OP_REMOVE_BRICK, dict); + if (conf->op_version < GD_OP_VERSION_8_0) { + gf_msg_debug(this->name, 0, + "The cluster is operating at " + "version less than %d. remove-brick operation" + "falling back to syncop framework.", + GD_OP_VERSION_8_0); + ret = glusterd_op_begin_synctask(req, GD_OP_REMOVE_BRICK, dict); + } else { + ret = glusterd_mgmt_v3_initiate_all_phases(req, GD_OP_REMOVE_BRICK, + dict); + } out: if (ret) { @@ -991,6 +1023,7 @@ glusterd_op_perform_add_bricks(glusterd_volinfo_t *volinfo, int32_t count, xlator_t *this = NULL; glusterd_conf_t *conf = NULL; gf_boolean_t is_valid_add_brick = _gf_false; + gf_boolean_t restart_shd = _gf_false; struct statvfs brickstat = { 0, }; @@ -1147,6 +1180,15 @@ glusterd_op_perform_add_bricks(glusterd_volinfo_t *volinfo, int32_t count, if (glusterd_is_volume_replicate(volinfo)) { if (replica_count && conf->op_version >= GD_OP_VERSION_3_7_10) { is_valid_add_brick = _gf_true; + if (volinfo->status == GLUSTERD_STATUS_STARTED) { + ret = volinfo->shd.svc.stop(&(volinfo->shd.svc), SIGTERM); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, + GD_MSG_GLUSTER_SERVICES_STOP_FAIL, + "Failed to stop shd for %s.", volinfo->volname); + } + restart_shd = _gf_true; + } ret = generate_dummy_client_volfiles(volinfo); if (ret) { gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, @@ -1221,6 +1263,14 @@ generate_volfiles: out: GF_FREE(free_ptr1); GF_FREE(free_ptr2); + if (restart_shd) { + if (volinfo->shd.svc.manager(&(volinfo->shd.svc), volinfo, + PROC_START_NO_WAIT)) { + gf_msg("glusterd", GF_LOG_CRITICAL, 0, + GD_MSG_GLUSTER_SERVICE_START_FAIL, + "Failed to start shd for %s.", volinfo->volname); + } + } gf_msg_debug("glusterd", 0, "Returning %d", ret); return ret; @@ -1309,14 +1359,14 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict) ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); if (ret) { - gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, "Unable to get volume name"); goto out; } ret = glusterd_volinfo_find(volname, &volinfo); if (ret) { - gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, "Unable to find volume: %s", volname); goto out; } @@ -1328,13 +1378,7 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict) ret = dict_get_int32n(dict, "replica-count", SLEN("replica-count"), &replica_count); if (ret) { - gf_msg_debug(THIS->name, 0, "Unable to get replica count"); - } - - ret = dict_get_int32n(dict, "arbiter-count", SLEN("arbiter-count"), - &arbiter_count); - if (ret) { - gf_msg_debug(THIS->name, 0, "No arbiter count present in the dict"); + gf_msg_debug(this->name, 0, "Unable to get replica count"); } if (replica_count > 0) { @@ -1348,18 +1392,20 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict) } } - if (glusterd_is_volume_replicate(volinfo)) { + glusterd_add_peers_to_auth_list(volname); + + if (replica_count && glusterd_is_volume_replicate(volinfo)) { /* Do not allow add-brick for stopped volumes when replica-count * is being increased. */ - if (conf->op_version >= GD_OP_VERSION_3_7_10 && replica_count && - GLUSTERD_STATUS_STOPPED == volinfo->status) { + if (GLUSTERD_STATUS_STOPPED == volinfo->status && + conf->op_version >= GD_OP_VERSION_3_7_10) { ret = -1; snprintf(msg, sizeof(msg), " Volume must not be in" " stopped state when replica-count needs to " " be increased."); - gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s", + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s", msg); *op_errstr = gf_strdup(msg); goto out; @@ -1367,25 +1413,31 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict) /* op-version check for replica 2 to arbiter conversion. If we * don't have this check, an older peer added as arbiter brick * will not have the arbiter xlator in its volfile. */ - if ((conf->op_version < GD_OP_VERSION_3_8_0) && (arbiter_count == 1) && - (replica_count == 3)) { - ret = -1; - snprintf(msg, sizeof(msg), - "Cluster op-version must " - "be >= 30800 to add arbiter brick to a " - "replica 2 volume."); - gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s", - msg); - *op_errstr = gf_strdup(msg); - goto out; + if ((replica_count == 3) && (conf->op_version < GD_OP_VERSION_3_8_0)) { + ret = dict_get_int32n(dict, "arbiter-count", SLEN("arbiter-count"), + &arbiter_count); + if (ret) { + gf_msg_debug(this->name, 0, + "No arbiter count present in the dict"); + } else if (arbiter_count == 1) { + ret = -1; + snprintf(msg, sizeof(msg), + "Cluster op-version must " + "be >= 30800 to add arbiter brick to a " + "replica 2 volume."); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s", + msg); + *op_errstr = gf_strdup(msg); + goto out; + } } /* Do not allow increasing replica count for arbiter volumes. */ - if (replica_count && volinfo->arbiter_count) { + if (volinfo->arbiter_count) { ret = -1; snprintf(msg, sizeof(msg), "Increasing replica count " "for arbiter volumes is not supported."); - gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s", + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s", msg); *op_errstr = gf_strdup(msg); goto out; @@ -1394,6 +1446,43 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict) is_force = dict_get_str_boolean(dict, "force", _gf_false); + /* Check brick order if the volume type is replicate or disperse. If + * force at the end of command not given then check brick order. + * doing this check at the originator node is sufficient. + */ + + if (!is_force && is_origin_glusterd(dict)) { + ret = 0; + if (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) { + gf_msg_debug(this->name, 0, + "Replicate cluster type " + "found. Checking brick order."); + if (replica_count) + ret = glusterd_check_brick_order(dict, msg, volinfo->type, + &volname, &bricks, &count, + replica_count); + else + ret = glusterd_check_brick_order(dict, msg, volinfo->type, + &volname, &bricks, &count, + volinfo->replica_count); + } else if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE) { + gf_msg_debug(this->name, 0, + "Disperse cluster type" + " found. Checking brick order."); + ret = glusterd_check_brick_order(dict, msg, volinfo->type, &volname, + &bricks, &count, + volinfo->disperse_count); + } + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER, + "Not adding brick because of " + "bad brick order. %s", + msg); + *op_errstr = gf_strdup(msg); + goto out; + } + } + if (volinfo->replica_count < replica_count && !is_force) { cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) { @@ -1410,7 +1499,7 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict) if (len < 0) { strcpy(msg, "<error>"); } - gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s", + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s", msg); *op_errstr = gf_strdup(msg); goto out; @@ -1442,7 +1531,7 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict) "Volume name %s rebalance is in " "progress. Please retry after completion", volname); - gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_OIP_RETRY_LATER, "%s", msg); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OIP_RETRY_LATER, "%s", msg); *op_errstr = gf_strdup(msg); ret = -1; goto out; @@ -1460,18 +1549,22 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict) msg[0] = '\0'; } - ret = dict_get_int32n(dict, "count", SLEN("count"), &count); - if (ret) { - gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, - "Unable to get count"); - goto out; + if (!count) { + ret = dict_get_int32n(dict, "count", SLEN("count"), &count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Unable to get count"); + goto out; + } } - ret = dict_get_strn(dict, "bricks", SLEN("bricks"), &bricks); - if (ret) { - gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, - "Unable to get bricks"); - goto out; + if (!bricks) { + ret = dict_get_strn(dict, "bricks", SLEN("bricks"), &bricks); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Unable to get bricks"); + goto out; + } } if (bricks) { @@ -1490,7 +1583,7 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict) "brick path %s is " "too long", brick); - gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_BRKPATH_TOO_LONG, "%s", + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRKPATH_TOO_LONG, "%s", msg); *op_errstr = gf_strdup(msg); @@ -1501,7 +1594,7 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict) ret = glusterd_brickinfo_new_from_brick(brick, &brickinfo, _gf_true, NULL); if (ret) { - gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_NOT_FOUND, + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_NOT_FOUND, "Add-brick: Unable" " to get brickinfo"); goto out; @@ -1571,7 +1664,7 @@ out: GF_FREE(str_ret); GF_FREE(all_bricks); - gf_msg_debug(THIS->name, 0, "Returning %d", ret); + gf_msg_debug(this->name, 0, "Returning %d", ret); return ret; } @@ -1595,6 +1688,8 @@ glusterd_remove_brick_validate_bricks(gf1_op_commands cmd, int32_t brick_count, }; glusterd_conf_t *priv = THIS->private; int pid = -1; + xlator_t *this = THIS; + GF_ASSERT(this); /* Check whether all the nodes of the bricks to be removed are * up, if not fail the operation */ @@ -1603,6 +1698,8 @@ glusterd_remove_brick_validate_bricks(gf1_op_commands cmd, int32_t brick_count, ret = dict_get_strn(dict, key, keylen, &brick); if (ret) { snprintf(msg, sizeof(msg), "Unable to get %s", key); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "key=%s", key, NULL); *errstr = gf_strdup(msg); goto out; } @@ -1614,6 +1711,8 @@ glusterd_remove_brick_validate_bricks(gf1_op_commands cmd, int32_t brick_count, "Incorrect brick " "%s for volume %s", brick, volinfo->volname); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INCORRECT_BRICK, + "Brick=%s, Volume=%s", brick, volinfo->volname, NULL); *errstr = gf_strdup(msg); goto out; } @@ -1626,6 +1725,8 @@ glusterd_remove_brick_validate_bricks(gf1_op_commands cmd, int32_t brick_count, "is not decommissioned. " "Use start or force option", brick); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_BRICK_NOT_DECOM, + "Use 'start' or 'force' option, Brick=%s", brick, NULL); *errstr = gf_strdup(msg); ret = -1; goto out; @@ -1652,6 +1753,10 @@ glusterd_remove_brick_validate_bricks(gf1_op_commands cmd, int32_t brick_count, "brick %s. Use force option to " "remove the offline brick", brick); + gf_smsg( + this->name, GF_LOG_ERROR, errno, GD_MSG_BRICK_STOPPED, + "Use 'force' option to remove the offline brick, Brick=%s", + brick, NULL); *errstr = gf_strdup(msg); ret = -1; goto out; @@ -1662,6 +1767,8 @@ glusterd_remove_brick_validate_bricks(gf1_op_commands cmd, int32_t brick_count, "Found dead " "brick %s", brick); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_BRICK_DEAD, + "Brick=%s", brick, NULL); *errstr = gf_strdup(msg); ret = -1; goto out; @@ -1679,6 +1786,8 @@ glusterd_remove_brick_validate_bricks(gf1_op_commands cmd, int32_t brick_count, "Host node of the " "brick %s is not in cluster", brick); + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_BRICK_HOST_NOT_FOUND, "Brick=%s", brick, NULL); *errstr = gf_strdup(msg); ret = -1; goto out; @@ -1689,6 +1798,8 @@ glusterd_remove_brick_validate_bricks(gf1_op_commands cmd, int32_t brick_count, "Host node of the " "brick %s is down", brick); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_HOST_DOWN, + "Brick=%s", brick, NULL); *errstr = gf_strdup(msg); ret = -1; goto out; @@ -1768,6 +1879,7 @@ glusterd_op_stage_remove_brick(dict_t *dict, char **op_errstr) errstr = gf_strdup( "Deleting all the bricks of the " "volume is not allowed"); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_DELETE, NULL); ret = -1; goto out; } @@ -1776,6 +1888,8 @@ glusterd_op_stage_remove_brick(dict_t *dict, char **op_errstr) switch (cmd) { case GF_OP_CMD_NONE: errstr = gf_strdup("no remove-brick command issued"); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_NO_REMOVE_CMD, + NULL); goto out; case GF_OP_CMD_STATUS: @@ -1900,6 +2014,8 @@ glusterd_op_stage_remove_brick(dict_t *dict, char **op_errstr) errstr = gf_strdup( "use 'force' option as migration " "is in progress"); + gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_MIGRATION_PROG, + "Use 'force' option", NULL); goto out; } @@ -1907,6 +2023,8 @@ glusterd_op_stage_remove_brick(dict_t *dict, char **op_errstr) errstr = gf_strdup( "use 'force' option as migration " "has failed"); + gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_MIGRATION_FAIL, + "Use 'force' option", NULL); goto out; } @@ -1917,6 +2035,11 @@ glusterd_op_stage_remove_brick(dict_t *dict, char **op_errstr) "use 'force' option as migration " "of some files might have been skipped or " "has failed"); + gf_smsg(this->name, GF_LOG_WARNING, 0, + GD_MSG_MIGRATION_FAIL, + "Use 'force' option, some files might have been " + "skipped", + NULL); goto out; } } @@ -2111,6 +2234,119 @@ out: } int +glusterd_post_commit_add_brick(dict_t *dict, char **op_errstr) +{ + int ret = 0; + char *volname = NULL; + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Unable to get volume name"); + goto out; + } + ret = glusterd_replace_old_auth_allow_list(volname); +out: + return ret; +} + +int +glusterd_post_commit_replace_brick(dict_t *dict, char **op_errstr) +{ + int ret = 0; + char *volname = NULL; + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Unable to get volume name"); + goto out; + } + ret = glusterd_replace_old_auth_allow_list(volname); +out: + return ret; +} + +int +glusterd_set_rebalance_id_for_remove_brick(dict_t *req_dict, dict_t *rsp_dict) +{ + int ret = -1; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + char msg[2048] = {0}; + char *task_id_str = NULL; + xlator_t *this = NULL; + int32_t cmd = 0; + + this = THIS; + GF_ASSERT(this); + + GF_ASSERT(rsp_dict); + GF_ASSERT(req_dict); + + ret = dict_get_strn(rsp_dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg_debug(this->name, 0, "volname not found"); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND, + "Unable to allocate memory"); + goto out; + } + + ret = dict_get_int32n(rsp_dict, "command", SLEN("command"), &cmd); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Unable to get command"); + goto out; + } + + /* remove brick task id is generted in glusterd_op_stage_remove_brick(), + * but rsp_dict is unavailable there. So copying it to rsp_dict from + * req_dict here. */ + + if (is_origin_glusterd(rsp_dict)) { + ret = dict_get_strn(req_dict, GF_REMOVE_BRICK_TID_KEY, + SLEN(GF_REMOVE_BRICK_TID_KEY), &task_id_str); + if (ret) { + snprintf(msg, sizeof(msg), "Missing rebalance id for remove-brick"); + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_REBALANCE_ID_MISSING, + "%s", msg); + ret = 0; + } else { + gf_uuid_parse(task_id_str, volinfo->rebal.rebalance_id); + + ret = glusterd_copy_uuid_to_dict(volinfo->rebal.rebalance_id, + rsp_dict, GF_REMOVE_BRICK_TID_KEY, + SLEN(GF_REMOVE_BRICK_TID_KEY)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_REMOVE_BRICK_ID_SET_FAIL, + "Failed to set remove-brick-id"); + goto out; + } + } + } + if (!gf_uuid_is_null(volinfo->rebal.rebalance_id) && + GD_OP_REMOVE_BRICK == volinfo->rebal.op) { + ret = glusterd_copy_uuid_to_dict(volinfo->rebal.rebalance_id, rsp_dict, + GF_REMOVE_BRICK_TID_KEY, + SLEN(GF_REMOVE_BRICK_TID_KEY)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set task-id for volume %s", volname); + goto out; + } + } +out: + return ret; +} +int glusterd_op_remove_brick(dict_t *dict, char **op_errstr) { int ret = -1; @@ -2424,7 +2660,7 @@ out: GF_FREE(brick_tmpstr); if (bricks_dict) dict_unref(bricks_dict); - + gf_msg_debug(this->name, 0, "returning %d ", ret); return ret; } diff --git a/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c index 09f0a35dc45..5c01f0c70b6 100644 --- a/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c +++ b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c @@ -26,12 +26,17 @@ glusterd_conn_init(glusterd_conn_t *conn, char *sockpath, int frame_timeout, xlator_t *this = THIS; glusterd_svc_t *svc = NULL; - if (!this) + if (!this) { + gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_XLATOR_NOT_DEFINED, + NULL); goto out; + } options = dict_new(); - if (!options) + if (!options) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); goto out; + } svc = glusterd_conn_get_svc_object(conn); if (!svc) { @@ -46,8 +51,11 @@ glusterd_conn_init(glusterd_conn_t *conn, char *sockpath, int frame_timeout, ret = dict_set_int32n(options, "transport.socket.ignore-enoent", SLEN("transport.socket.ignore-enoent"), 1); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=transport.socket.ignore-enoent", NULL); goto out; + } /* @options is free'd by rpc_transport when destroyed */ rpc = rpc_clnt_new(options, this, (char *)svc->name, 16); @@ -61,9 +69,10 @@ glusterd_conn_init(glusterd_conn_t *conn, char *sockpath, int frame_timeout, goto out; ret = snprintf(conn->sockpath, sizeof(conn->sockpath), "%s", sockpath); - if (ret < 0) + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); goto out; - else + } else ret = 0; conn->frame_timeout = frame_timeout; diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c index cf567fa4172..f08bd6cebee 100644 --- a/xlators/mgmt/glusterd/src/glusterd-ganesha.c +++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c @@ -421,6 +421,35 @@ check_host_list(void) } int +gd_ganesha_send_dbus(char *volname, char *value) +{ + runner_t runner = { + 0, + }; + int ret = -1; + runinit(&runner); + + GF_VALIDATE_OR_GOTO("glusterd-ganesha", volname, out); + GF_VALIDATE_OR_GOTO("glusterd-ganesha", value, out); + + ret = 0; + if (check_host_list()) { + /* Check whether ganesha is running on this node */ + if (manage_service("status")) { + gf_msg("glusterd-ganesha", GF_LOG_WARNING, 0, + GD_MSG_GANESHA_NOT_RUNNING, + "Export failed, NFS-Ganesha is not running"); + } else { + runner_add_args(&runner, GANESHA_PREFIX "/dbus-send.sh", CONFDIR, + value, volname, NULL); + ret = runner_run(&runner); + } + } +out: + return ret; +} + +int manage_export_config(char *volname, char *value, char **op_errstr) { runner_t runner = { @@ -447,9 +476,6 @@ int ganesha_manage_export(dict_t *dict, char *value, gf_boolean_t update_cache_invalidation, char **op_errstr) { - runner_t runner = { - 0, - }; int ret = -1; glusterd_volinfo_t *volinfo = NULL; dict_t *vol_opts = NULL; @@ -458,7 +484,6 @@ ganesha_manage_export(dict_t *dict, char *value, glusterd_conf_t *priv = NULL; gf_boolean_t option = _gf_false; - runinit(&runner); this = THIS; GF_ASSERT(this); priv = this->private; @@ -538,26 +563,13 @@ ganesha_manage_export(dict_t *dict, char *value, goto out; } } - - if (check_host_list()) { - /* Check whether ganesha is running on this node */ - if (manage_service("status")) { - gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_GANESHA_NOT_RUNNING, - "Export failed, NFS-Ganesha is not running"); - } else { - runner_add_args(&runner, GANESHA_PREFIX "/dbus-send.sh", CONFDIR, - value, volname, NULL); - ret = runner_run(&runner); - if (ret) { - gf_asprintf(op_errstr, - "Dynamic export" - " addition/deletion failed." - " Please see log file for details"); - goto out; - } - } + ret = gd_ganesha_send_dbus(volname, value); + if (ret) { + gf_asprintf(op_errstr, + "Dynamic export addition/deletion failed." + " Please see log file for details"); + goto out; } - if (update_cache_invalidation) { vol_opts = volinfo->dict; ret = dict_set_dynstr_with_alloc(vol_opts, @@ -617,8 +629,9 @@ tear_down_cluster(gf_boolean_t run_teardown) goto out; } - GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scratch); - while (entry) { + while ((entry = sys_readdir(dir, scratch))) { + if (gf_irrelevant_entry(entry)) + continue; snprintf(path, PATH_MAX, "%s/%s", CONFDIR, entry->d_name); ret = sys_lstat(path, &st); if (ret == -1) { @@ -649,7 +662,6 @@ tear_down_cluster(gf_boolean_t run_teardown) gf_msg_debug(THIS->name, 0, "%s %s", ret ? "Failed to remove" : "Removed", entry->d_name); - GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scratch); } ret = sys_closedir(dir); diff --git a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c index 76b7684538f..bf062c87060 100644 --- a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c +++ b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c @@ -115,13 +115,18 @@ __glusterd_handle_sys_exec(rpcsvc_request_t *req) ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); if (ret < 0) { req->rpc_err = GARBAGE_ARGS; + snprintf(err_str, sizeof(err_str), "Garbage args received"); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL); goto out; } if (cli_req.dict.dict_len) { dict = dict_new(); - if (!dict) + if (!dict) { + gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, + NULL); goto out; + } ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, &dict); @@ -142,13 +147,18 @@ __glusterd_handle_sys_exec(rpcsvc_request_t *req) snprintf(err_str, sizeof(err_str), "Failed to get " "the uuid of local glusterd"); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_UUID_GET_FAIL, + NULL); ret = -1; goto out; } ret = dict_set_dynstr(dict, "host-uuid", host_uuid); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=host-uuid", NULL); goto out; + } } ret = glusterd_op_begin_synctask(req, cli_op, dict); @@ -188,13 +198,18 @@ __glusterd_handle_copy_file(rpcsvc_request_t *req) ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); if (ret < 0) { req->rpc_err = GARBAGE_ARGS; + snprintf(err_str, sizeof(err_str), "Garbage args received"); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL); goto out; } if (cli_req.dict.dict_len) { dict = dict_new(); - if (!dict) + if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, + NULL); goto out; + } ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, &dict); @@ -215,6 +230,8 @@ __glusterd_handle_copy_file(rpcsvc_request_t *req) snprintf(err_str, sizeof(err_str), "Failed to get " "the uuid of local glusterd"); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_UUID_GET_FAIL, + NULL); ret = -1; goto out; } @@ -267,13 +284,18 @@ __glusterd_handle_gsync_set(rpcsvc_request_t *req) ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); if (ret < 0) { req->rpc_err = GARBAGE_ARGS; + snprintf(err_str, sizeof(err_str), "Garbage args received"); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL); goto out; } if (cli_req.dict.dict_len) { dict = dict_new(); - if (!dict) + if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, + NULL); goto out; + } ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, &dict); @@ -294,6 +316,8 @@ __glusterd_handle_gsync_set(rpcsvc_request_t *req) snprintf(err_str, sizeof(err_str), "Failed to get " "the uuid of local glusterd"); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_UUID_GET_FAIL, + NULL); ret = -1; goto out; } @@ -2251,6 +2275,9 @@ glusterd_op_verify_gsync_running(glusterd_volinfo_t *volinfo, char *slave, "Volume %s needs to be started " "before " GEOREP " start", volinfo->volname); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_GEO_REP_START_FAILED, + "Volume is not in a started state, Volname=%s", + volinfo->volname, NULL); goto out; } @@ -2555,6 +2582,7 @@ glusterd_op_stage_copy_file(dict_t *dict, char **op_errstr) len = snprintf(abs_filename, sizeof(abs_filename), "%s/%s", priv->workdir, filename); if ((len < 0) || (len >= sizeof(abs_filename))) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL); ret = -1; goto out; } @@ -2567,6 +2595,9 @@ glusterd_op_stage_copy_file(dict_t *dict, char **op_errstr) if (len < 0) { strcpy(errmsg, "<error>"); } + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_REALPATH_GET_FAIL, + "Realpath=%s, Reason=%s", priv->workdir, strerror(errno), + NULL); *op_errstr = gf_strdup(errmsg); ret = -1; goto out; @@ -2577,6 +2608,8 @@ glusterd_op_stage_copy_file(dict_t *dict, char **op_errstr) "Failed to get " "realpath of %s: %s", filename, strerror(errno)); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_REALPATH_GET_FAIL, + "Filename=%s, Reason=%s", filename, strerror(errno), NULL); *op_errstr = gf_strdup(errmsg); ret = -1; goto out; @@ -2586,6 +2619,7 @@ glusterd_op_stage_copy_file(dict_t *dict, char **op_errstr) will succeed for /var/lib/glusterd_bad */ len = snprintf(workdir, sizeof(workdir), "%s/", realpath_workdir); if ((len < 0) || (len >= sizeof(workdir))) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL); ret = -1; goto out; } @@ -2599,6 +2633,8 @@ glusterd_op_stage_copy_file(dict_t *dict, char **op_errstr) if (len < 0) { strcpy(errmsg, "<error>"); } + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_SRC_FILE_ERROR, errmsg, + NULL); *op_errstr = gf_strdup(errmsg); ret = -1; goto out; @@ -2613,6 +2649,8 @@ glusterd_op_stage_copy_file(dict_t *dict, char **op_errstr) if (len < 0) { strcpy(errmsg, "<error>"); } + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_SRC_FILE_ERROR, errmsg, + NULL); *op_errstr = gf_strdup(errmsg); goto out; } @@ -2621,9 +2659,9 @@ glusterd_op_stage_copy_file(dict_t *dict, char **op_errstr) snprintf(errmsg, sizeof(errmsg), "Source file" " is not a regular file."); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_SRC_FILE_ERROR, errmsg, + NULL); *op_errstr = gf_strdup(errmsg); - gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SRC_FILE_ERROR, "%s", - errmsg); ret = -1; goto out; } @@ -2842,8 +2880,11 @@ glusterd_verify_slave(char *volname, char *slave_url, char *slave_vol, */ if (strstr(slave_url, "@")) { slave_url_buf = gf_strdup(slave_url); - if (!slave_url_buf) + if (!slave_url_buf) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_STRDUP_FAILED, + "Slave_url=%s", slave_url, NULL); goto out; + } slave_user = strtok_r(slave_url_buf, "@", &save_ptr); slave_ip = strtok_r(NULL, "@", &save_ptr); diff --git a/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c index b01fd4da24b..a0bfea41f0f 100644 --- a/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c +++ b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c @@ -310,7 +310,7 @@ glusterd_gfproxydsvc_start(glusterd_svc_t *svc, int flags) } runinit(&runner); - if (this->ctx->cmd_args.valgrind) { + if (this->ctx->cmd_args.vgtool != _gf_none) { len = snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-%s", svc->proc.logdir, svc->proc.logfile); if ((len < 0) || (len >= PATH_MAX)) { @@ -318,8 +318,13 @@ glusterd_gfproxydsvc_start(glusterd_svc_t *svc, int flags) goto out; } - runner_add_args(&runner, "valgrind", "--leak-check=full", - "--trace-children=yes", "--track-origins=yes", NULL); + if (this->ctx->cmd_args.vgtool == _gf_memcheck) + runner_add_args(&runner, "valgrind", "--leak-check=full", + "--trace-children=yes", "--track-origins=yes", + NULL); + else + runner_add_args(&runner, "valgrind", "--tool=drd", NULL); + runner_argprintf(&runner, "--log-file=%s", valgrind_logfile); } diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index 6e6593e7263..1b21c40596d 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -140,6 +140,7 @@ glusterd_handle_friend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname, ctx->req = req; if (!dict) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); ret = -1; goto out; } @@ -147,9 +148,11 @@ glusterd_handle_friend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname, ret = dict_unserialize(friend_req->vols.vols_val, friend_req->vols.vols_len, &dict); - if (ret) + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + NULL); goto out; - else + } else dict->extra_stdfree = friend_req->vols.vols_val; ctx->vols = dict; @@ -386,82 +389,129 @@ glusterd_add_volume_detail_to_dict(glusterd_volinfo_t *volinfo, dict_t *volumes, keylen = snprintf(key, sizeof(key), "volume%d.name", count); ret = dict_set_strn(volumes, key, keylen, volinfo->volname); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; + } keylen = snprintf(key, sizeof(key), "volume%d.type", count); ret = dict_set_int32n(volumes, key, keylen, volinfo->type); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; + } keylen = snprintf(key, sizeof(key), "volume%d.status", count); ret = dict_set_int32n(volumes, key, keylen, volinfo->status); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; + } keylen = snprintf(key, sizeof(key), "volume%d.brick_count", count); ret = dict_set_int32n(volumes, key, keylen, volinfo->brick_count); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; + } keylen = snprintf(key, sizeof(key), "volume%d.dist_count", count); ret = dict_set_int32n(volumes, key, keylen, volinfo->dist_leaf_count); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; + } keylen = snprintf(key, sizeof(key), "volume%d.stripe_count", count); ret = dict_set_int32n(volumes, key, keylen, volinfo->stripe_count); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; + } keylen = snprintf(key, sizeof(key), "volume%d.replica_count", count); ret = dict_set_int32n(volumes, key, keylen, volinfo->replica_count); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; + } keylen = snprintf(key, sizeof(key), "volume%d.disperse_count", count); ret = dict_set_int32n(volumes, key, keylen, volinfo->disperse_count); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; + } keylen = snprintf(key, sizeof(key), "volume%d.redundancy_count", count); ret = dict_set_int32n(volumes, key, keylen, volinfo->redundancy_count); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; + } keylen = snprintf(key, sizeof(key), "volume%d.arbiter_count", count); ret = dict_set_int32n(volumes, key, keylen, volinfo->arbiter_count); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; + } keylen = snprintf(key, sizeof(key), "volume%d.transport", count); ret = dict_set_int32n(volumes, key, keylen, volinfo->transport_type); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; + } keylen = snprintf(key, sizeof(key), "volume%d.thin_arbiter_count", count); ret = dict_set_int32n(volumes, key, keylen, volinfo->thin_arbiter_count); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; + } volume_id_str = gf_strdup(uuid_utoa(volinfo->volume_id)); - if (!volume_id_str) + if (!volume_id_str) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED, NULL); goto out; + } keylen = snprintf(key, sizeof(key), "volume%d.volume_id", count); ret = dict_set_dynstrn(volumes, key, keylen, volume_id_str); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; + } keylen = snprintf(key, sizeof(key), "volume%d.rebalance", count); ret = dict_set_int32n(volumes, key, keylen, volinfo->rebal.defrag_cmd); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; + } keylen = snprintf(key, sizeof(key), "volume%d.snap_count", count); ret = dict_set_int32n(volumes, key, keylen, volinfo->snap_count); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; + } cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) { @@ -474,23 +524,33 @@ glusterd_add_volume_detail_to_dict(glusterd_volinfo_t *volinfo, dict_t *volumes, len = snprintf(brick, sizeof(brick), "%s:%s", brickinfo->hostname, brickinfo->path); if ((len < 0) || (len >= sizeof(brick))) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); ret = -1; goto out; } buf = gf_strdup(brick); keylen = snprintf(key, sizeof(key), "volume%d.brick%d", count, i); ret = dict_set_dynstrn(volumes, key, keylen, buf); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; + } keylen = snprintf(key, sizeof(key), "volume%d.brick%d.uuid", count, i); snprintf(brick_uuid, sizeof(brick_uuid), "%s", uuid_utoa(brickinfo->uuid)); buf = gf_strdup(brick_uuid); - if (!buf) + if (!buf) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED, + "brick_uuid=%s", brick_uuid, NULL); goto out; + } ret = dict_set_dynstrn(volumes, key, keylen, buf); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; + } i++; } @@ -500,6 +560,7 @@ glusterd_add_volume_detail_to_dict(glusterd_volinfo_t *volinfo, dict_t *volumes, len = snprintf(ta_brick, sizeof(ta_brick), "%s:%s", ta_brickinfo->hostname, ta_brickinfo->path); if ((len < 0) || (len >= sizeof(ta_brick))) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); ret = -1; goto out; } @@ -507,16 +568,23 @@ glusterd_add_volume_detail_to_dict(glusterd_volinfo_t *volinfo, dict_t *volumes, keylen = snprintf(key, sizeof(key), "volume%d.thin_arbiter_brick", count); ret = dict_set_dynstrn(volumes, key, keylen, buf); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; + } } ret = glusterd_add_arbiter_info_to_bricks(volinfo, volumes, count); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_ARBITER_BRICK_SET_INFO_FAIL, NULL); goto out; + } dict = volinfo->dict; if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); ret = 0; goto out; } @@ -812,11 +880,14 @@ glusterd_req_ctx_create(rpcsvc_request_t *rpc_req, int op, uuid_t uuid, gf_msg_debug(this->name, 0, "Received op from uuid %s", str); dict = dict_new(); - if (!dict) + if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); goto out; + } req_ctx = GF_CALLOC(1, sizeof(*req_ctx), mem_type); if (!req_ctx) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); goto out; } @@ -824,8 +895,8 @@ glusterd_req_ctx_create(rpcsvc_request_t *rpc_req, int op, uuid_t uuid, req_ctx->op = op; ret = dict_unserialize(buf_val, buf_len, &dict); if (ret) { - gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, - "failed to unserialize the dictionary"); + gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + NULL); goto out; } @@ -1601,6 +1672,8 @@ __glusterd_handle_cli_uuid_get(rpcsvc_request_t *req) if (cli_req.dict.dict_len) { dict = dict_new(); if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, + NULL); ret = -1; goto out; } @@ -1623,6 +1696,7 @@ __glusterd_handle_cli_uuid_get(rpcsvc_request_t *req) rsp_dict = dict_new(); if (!rsp_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); ret = -1; goto out; } @@ -1639,9 +1713,8 @@ __glusterd_handle_cli_uuid_get(rpcsvc_request_t *req) ret = dict_allocate_and_serialize(rsp_dict, &rsp.dict.dict_val, &rsp.dict.dict_len); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SERL_LENGTH_GET_FAIL, - "Failed to serialize " - "dictionary."); + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); goto out; } ret = 0; @@ -1694,8 +1767,10 @@ __glusterd_handle_cli_list_volume(rpcsvc_request_t *req) GF_ASSERT(priv); dict = dict_new(); - if (!dict) + if (!dict) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); goto out; + } cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) { @@ -1707,8 +1782,11 @@ __glusterd_handle_cli_list_volume(rpcsvc_request_t *req) } ret = dict_set_int32n(dict, "count", SLEN("count"), count); - if (ret) + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=count", NULL); goto out; + } ret = dict_allocate_and_serialize(dict, &rsp.dict.dict_val, &rsp.dict.dict_len); @@ -1790,6 +1868,8 @@ __glusterd_handle_ganesha_cmd(rpcsvc_request_t *req) /* Unserialize the dictionary */ dict = dict_new(); if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, + NULL); ret = -1; goto out; } @@ -2158,9 +2238,8 @@ glusterd_fsm_log_send_resp(rpcsvc_request_t *req, int op_ret, char *op_errstr, ret = dict_allocate_and_serialize(dict, &rsp.fsm_log.fsm_log_val, &rsp.fsm_log.fsm_log_len); if (ret < 0) { - gf_msg("glusterd", GF_LOG_ERROR, 0, - GD_MSG_DICT_SERL_LENGTH_GET_FAIL, - "failed to get serialized length of dict"); + gf_smsg("glusterd", GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); return ret; } } @@ -2206,6 +2285,7 @@ __glusterd_handle_fsm_log(rpcsvc_request_t *req) dict = dict_new(); if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); ret = -1; goto out; } @@ -2432,8 +2512,8 @@ glusterd_op_stage_send_resp(rpcsvc_request_t *req, int32_t op, int32_t status, ret = dict_allocate_and_serialize(rsp_dict, &rsp.dict.dict_val, &rsp.dict.dict_len); if (ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SERL_LENGTH_GET_FAIL, - "failed to get serialized length of dict"); + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); return ret; } @@ -2472,9 +2552,8 @@ glusterd_op_commit_send_resp(rpcsvc_request_t *req, int32_t op, int32_t status, ret = dict_allocate_and_serialize(rsp_dict, &rsp.dict.dict_val, &rsp.dict.dict_len); if (ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, 0, - GD_MSG_DICT_SERL_LENGTH_GET_FAIL, - "failed to get serialized length of dict"); + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); goto out; } } @@ -2715,12 +2794,18 @@ __glusterd_handle_friend_update(rpcsvc_request_t *req) } ret = dict_get_int32n(dict, "count", SLEN("count"), &count); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=count", NULL); goto out; + } ret = dict_get_int32n(dict, "op", SLEN("op"), &op); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=op", NULL); goto out; + } if (GD_FRIEND_UPDATE_DEL == op) { (void)glusterd_handle_friend_update_delete(dict); @@ -2979,8 +3064,11 @@ __glusterd_handle_cli_profile_volume(rpcsvc_request_t *req) if (cli_req.dict.dict_len > 0) { dict = dict_new(); - if (!dict) + if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, + NULL); goto out; + } dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, &dict); } @@ -3207,6 +3295,7 @@ __glusterd_handle_umount(rpcsvc_request_t *req) /* check if it is allowed to umount path */ path = gf_strdup(umnt_req.path); if (!path) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED, NULL); rsp.op_errno = ENOMEM; goto out; } @@ -3414,12 +3503,16 @@ glusterd_friend_rpc_create(xlator_t *this, glusterd_peerinfo_t *peerinfo, char *af = NULL; peerctx = GF_CALLOC(1, sizeof(*peerctx), gf_gld_mt_peerctx_t); - if (!peerctx) + if (!peerctx) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); goto out; + } options = dict_new(); - if (!options) + if (!options) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); goto out; + } if (args) peerctx->args = *args; @@ -3513,6 +3606,7 @@ glusterd_friend_add(const char *hoststr, int port, *friend = glusterd_peerinfo_new(state, uuid, hoststr, port); if (*friend == NULL) { ret = -1; + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_PEER_ADD_FAIL, NULL); goto out; } @@ -4090,13 +4184,15 @@ glusterd_list_friends(rpcsvc_request_t *req, dict_t *dict, int32_t flags) }; int keylen; - priv = THIS->private; + xlator_t *this = THIS; + GF_ASSERT(this); + + priv = this->private; GF_ASSERT(priv); friends = dict_new(); if (!friends) { - gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, - "Out of Memory"); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); goto out; } @@ -4122,24 +4218,36 @@ unlock: keylen = snprintf(key, sizeof(key), "friend%d.uuid", count); uuid_utoa_r(MY_UUID, my_uuid_str); ret = dict_set_strn(friends, key, keylen, my_uuid_str); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; + } keylen = snprintf(key, sizeof(key), "friend%d.hostname", count); ret = dict_set_nstrn(friends, key, keylen, "localhost", SLEN("localhost")); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; + } keylen = snprintf(key, sizeof(key), "friend%d.connected", count); ret = dict_set_int32n(friends, key, keylen, 1); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; + } } ret = dict_set_int32n(friends, "count", SLEN("count"), count); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=count", NULL); goto out; + } ret = dict_allocate_and_serialize(friends, &rsp.friends.friends_val, &rsp.friends.friends_len); @@ -4311,8 +4419,11 @@ __glusterd_handle_status_volume(rpcsvc_request_t *req) if (cli_req.dict.dict_len > 0) { dict = dict_new(); - if (!dict) + if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, + NULL); goto out; + } ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, &dict); if (ret < 0) { @@ -4580,6 +4691,7 @@ __glusterd_handle_barrier(rpcsvc_request_t *req) dict = dict_new(); if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); ret = -1; goto out; } @@ -5114,12 +5226,17 @@ glusterd_print_gsync_status_by_vol(FILE *fp, glusterd_volinfo_t *volinfo) 0, }; + xlator_t *this = THIS; + GF_ASSERT(this); + GF_VALIDATE_OR_GOTO(THIS->name, volinfo, out); GF_VALIDATE_OR_GOTO(THIS->name, fp, out); gsync_rsp_dict = dict_new(); - if (!gsync_rsp_dict) + if (!gsync_rsp_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); goto out; + } ret = gethostname(my_hostname, sizeof(my_hostname)); if (ret) { @@ -5146,7 +5263,7 @@ glusterd_print_snapinfo_by_vol(FILE *fp, glusterd_volinfo_t *volinfo, glusterd_volinfo_t *tmp_vol = NULL; glusterd_snap_t *snapinfo = NULL; int snapcount = 0; - char timestr[64] = { + char timestr[GF_TIMESTR_SIZE] = { 0, }; char snap_status_str[STATUS_STRLEN] = { @@ -5264,16 +5381,25 @@ glusterd_print_client_details(FILE *fp, dict_t *dict, ret = dict_set_strn(dict, "brick-name", SLEN("brick-name"), brickinfo->path); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=brick-name", NULL); goto out; + } ret = dict_set_int32n(dict, "cmd", SLEN("cmd"), GF_CLI_STATUS_CLIENTS); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=cmd", NULL); goto out; + } ret = dict_set_strn(dict, "volname", SLEN("volname"), volinfo->volname); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=volname", NULL); goto out; + } ret = dict_allocate_and_serialize(dict, &brick_req->input.input_val, &brick_req->input.input_len); @@ -5467,7 +5593,7 @@ glusterd_get_state(rpcsvc_request_t *req, dict_t *dict) ret = dict_get_strn(dict, "filename", SLEN("filename"), &tmp_str); if (ret) { - now = time(NULL); + now = gf_time(); strftime(timestamp, sizeof(timestamp), "%Y%m%d_%H%M%S", localtime(&now)); gf_asprintf(&filename, "%s_%s", "glusterd_state", timestamp); @@ -5911,14 +6037,27 @@ get_brickinfo_from_brickid(char *brickid, glusterd_brickinfo_t **brickinfo) uuid_t volid = {0}; int ret = -1; + xlator_t *this = THIS; + GF_ASSERT(this); + brickid_dup = gf_strdup(brickid); - if (!brickid_dup) + if (!brickid_dup) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED, + "brick_id=%s", brickid, NULL); goto out; + } volid_str = brickid_dup; brick = strchr(brickid_dup, ':'); - if (!volid_str || !brick) + if (!volid_str) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL); goto out; + } + + if (!brick) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL); + goto out; + } *brick = '\0'; brick++; diff --git a/xlators/mgmt/glusterd/src/glusterd-handshake.c b/xlators/mgmt/glusterd/src/glusterd-handshake.c index 7cb70fcb4e2..d96e35503dd 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handshake.c +++ b/xlators/mgmt/glusterd/src/glusterd-handshake.c @@ -111,6 +111,8 @@ get_snap_volname_and_volinfo(const char *volpath, char **volname, volfile_token = strtok_r(NULL, "/", &save_ptr); *volname = gf_strdup(volfile_token); if (NULL == *volname) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED, + "Volname=%s", volfile_token, NULL); ret = -1; goto out; } @@ -236,6 +238,7 @@ build_volfile_path(char *volume_id, char *path, size_t path_len, if (volid_ptr) { volid_ptr = strchr(volid_ptr, '/'); if (!volid_ptr) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL); ret = -1; goto out; } @@ -256,6 +259,7 @@ build_volfile_path(char *volume_id, char *path, size_t path_len, if (volid_ptr) { volid_ptr = strchr(volid_ptr, '/'); if (!volid_ptr) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL); ret = -1; goto out; } @@ -271,6 +275,7 @@ build_volfile_path(char *volume_id, char *path, size_t path_len, if (volid_ptr) { volid_ptr = strchr(volid_ptr, '/'); if (!volid_ptr) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL); ret = -1; goto out; } @@ -292,6 +297,7 @@ build_volfile_path(char *volume_id, char *path, size_t path_len, if (volid_ptr) { volid_ptr = strchr(volid_ptr, '/'); if (!volid_ptr) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL); ret = -1; goto out; } @@ -312,6 +318,7 @@ build_volfile_path(char *volume_id, char *path, size_t path_len, if (volid_ptr) { volid_ptr = strchr(volid_ptr, '/'); if (!volid_ptr) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL); ret = -1; goto out; } @@ -366,6 +373,7 @@ build_volfile_path(char *volume_id, char *path, size_t path_len, if (volid_ptr) { volid_ptr = strchr(volid_ptr, '/'); if (!volid_ptr) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL); ret = -1; goto out; } @@ -386,6 +394,7 @@ build_volfile_path(char *volume_id, char *path, size_t path_len, if (volid_ptr) { volid_ptr = strchr(volid_ptr, '/'); if (!volid_ptr) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL); ret = -1; goto out; } @@ -402,6 +411,8 @@ build_volfile_path(char *volume_id, char *path, size_t path_len, /* Split the volume name */ vol = strtok_r(dup_volname, ".", &save_ptr); if (!vol) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_SPLIT_FAIL, + "Volume name=%s", dup_volname, NULL); ret = -1; goto out; } @@ -446,18 +457,25 @@ build_volfile_path(char *volume_id, char *path, size_t path_len, if (ret) { dup_volname = gf_strdup(volid_ptr); if (!dup_volname) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED, + "Volume name=%s", volid_ptr, NULL); ret = -1; goto out; } /* Split the volume name */ vol = strtok_r(dup_volname, ".", &save_ptr); if (!vol) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_SPLIT_FAIL, + "Volume name=%s", dup_volname, NULL); ret = -1; goto out; } ret = glusterd_volinfo_find(vol, &volinfo); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VOLINFO_GET_FAIL, + NULL); goto out; + } } gotvolinfo: @@ -466,8 +484,10 @@ gotvolinfo: ret = snprintf(path, path_len, "%s/%s/%s.vol", path_prefix, volinfo->volname, volid_ptr); - if (ret == -1) + if (ret == -1) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); goto out; + } ret = sys_stat(path, &stbuf); @@ -522,12 +542,14 @@ glusterd_get_args_from_dict(gf_getspec_req *args, peer_info_t *peerinfo, GF_ASSERT(peerinfo); if (!args->xdata.xdata_len) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); ret = 0; goto out; } dict = dict_new(); if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); ret = -1; goto out; } @@ -561,6 +583,8 @@ glusterd_get_args_from_dict(gf_getspec_req *args, peer_info_t *peerinfo, } *brick_name = gf_strdup(name); if (*brick_name == NULL) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED, + "Brick_name=%s", name, NULL); ret = -1; goto out; } @@ -976,6 +1000,7 @@ __server_getspec(rpcsvc_request_t *req) dict = dict_new(); if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); ret = -ENOMEM; goto fail; } @@ -1037,10 +1062,11 @@ __server_getspec(rpcsvc_request_t *req) } RCU_READ_UNLOCK; if (peer_cnt) { - ret = dict_set_str(dict, GLUSTERD_BRICK_SERVERS, peer_hosts); - if (ret) { + op_ret = dict_set_str(dict, GLUSTERD_BRICK_SERVERS, peer_hosts); + if (op_ret) { gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "failed to set peer_host in dict"); + ret = op_ret; goto fail; } } @@ -1050,9 +1076,8 @@ __server_getspec(rpcsvc_request_t *req) ret = dict_allocate_and_serialize(dict, &rsp.xdata.xdata_val, &rsp.xdata.xdata_len); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, - GD_MSG_DICT_SERL_LENGTH_GET_FAIL, - "Failed to serialize dict to request buffer"); + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); goto fail; } } @@ -1073,6 +1098,7 @@ __server_getspec(rpcsvc_request_t *req) } ret = file_len = stbuf.st_size; } else { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_PEER_NOT_FOUND, NULL); op_errno = ENOENT; goto fail; } @@ -1080,6 +1106,7 @@ __server_getspec(rpcsvc_request_t *req) if (file_len) { rsp.spec = CALLOC(file_len + 1, sizeof(char)); if (!rsp.spec) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); ret = -1; op_errno = ENOMEM; goto fail; @@ -1158,13 +1185,17 @@ __server_event_notify(rpcsvc_request_t *req) (xdrproc_t)xdr_gf_event_notify_req); if (ret < 0) { req->rpc_err = GARBAGE_ARGS; + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL); goto fail; } if (args.dict.dict_len) { dict = dict_new(); - if (!dict) + if (!dict) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, + NULL); return ret; + } ret = dict_unserialize(args.dict.dict_val, args.dict.dict_len, &dict); if (ret) { gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, @@ -1357,6 +1388,7 @@ __glusterd_mgmt_hndsk_versions(rpcsvc_request_t *req) if (ret < 0) { // failed to decode msg; req->rpc_err = GARBAGE_ARGS; + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL); goto out; } @@ -1370,8 +1402,10 @@ __glusterd_mgmt_hndsk_versions(rpcsvc_request_t *req) } dict = dict_new(); - if (!dict) + if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); goto out; + } ret = dict_set_int32(dict, GD_OP_VERSION_KEY, conf->op_version); if (ret) { @@ -1457,6 +1491,7 @@ __glusterd_mgmt_hndsk_versions_ack(rpcsvc_request_t *req) if (ret < 0) { // failed to decode msg; req->rpc_err = GARBAGE_ARGS; + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL); goto out; } @@ -1529,22 +1564,25 @@ __server_get_volume_info(rpcsvc_request_t *req) char *volume_id_str = NULL; int32_t flags = 0; + xlator_t *this = THIS; + GF_ASSERT(this); + ret = xdr_to_generic(req->msg[0], &vol_info_req, (xdrproc_t)xdr_gf_get_volume_info_req); if (ret < 0) { /* failed to decode msg */ req->rpc_err = GARBAGE_ARGS; + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL); goto out; } - gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_VOL_INFO_REQ_RECVD, - "Received get volume info req"); + gf_smsg(this->name, GF_LOG_INFO, 0, GD_MSG_VOL_INFO_REQ_RECVD, NULL); if (vol_info_req.dict.dict_len) { /* Unserialize the dictionary */ dict = dict_new(); if (!dict) { - gf_msg("glusterd", GF_LOG_WARNING, ENOMEM, GD_MSG_NO_MEMORY, - "Out of Memory"); + gf_smsg(this->name, GF_LOG_WARNING, ENOMEM, GD_MSG_DICT_CREATE_FAIL, + NULL); op_errno = ENOMEM; ret = -1; goto out; @@ -1553,9 +1591,8 @@ __server_get_volume_info(rpcsvc_request_t *req) ret = dict_unserialize(vol_info_req.dict.dict_val, vol_info_req.dict.dict_len, &dict); if (ret < 0) { - gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, - "failed to " - "unserialize req-buffer to dictionary"); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + NULL); op_errno = -ret; ret = -1; goto out; @@ -1566,8 +1603,8 @@ __server_get_volume_info(rpcsvc_request_t *req) ret = dict_get_int32(dict, "flags", &flags); if (ret) { - gf_msg(THIS->name, GF_LOG_ERROR, -ret, GD_MSG_DICT_GET_FAILED, - "failed to get flags"); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=flags", NULL); op_errno = -ret; ret = -1; goto out; @@ -1575,13 +1612,15 @@ __server_get_volume_info(rpcsvc_request_t *req) if (!flags) { /* Nothing to query about. Just return success */ - gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_NO_FLAG_SET, "No flags set"); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_FLAG_SET, NULL); ret = 0; goto out; } ret = dict_get_str(dict, "volname", &volname); if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=volname", NULL); op_errno = EINVAL; ret = -1; goto out; @@ -1589,6 +1628,8 @@ __server_get_volume_info(rpcsvc_request_t *req) ret = glusterd_volinfo_find(volname, &volinfo); if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VOLINFO_GET_FAIL, + "Volname=%s", volname, NULL); op_errno = EINVAL; ret = -1; goto out; @@ -1597,6 +1638,8 @@ __server_get_volume_info(rpcsvc_request_t *req) if (flags & (int32_t)GF_GET_VOLUME_UUID) { volume_id_str = gf_strdup(uuid_utoa(volinfo->volume_id)); if (!volume_id_str) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED, + NULL); op_errno = ENOMEM; ret = -1; goto out; @@ -1604,8 +1647,8 @@ __server_get_volume_info(rpcsvc_request_t *req) dict_rsp = dict_new(); if (!dict_rsp) { - gf_msg("glusterd", GF_LOG_WARNING, ENOMEM, GD_MSG_NO_MEMORY, - "Out of Memory"); + gf_smsg(this->name, GF_LOG_WARNING, ENOMEM, GD_MSG_DICT_CREATE_FAIL, + NULL); op_errno = ENOMEM; GF_FREE(volume_id_str); ret = -1; @@ -1613,6 +1656,8 @@ __server_get_volume_info(rpcsvc_request_t *req) } ret = dict_set_dynstr(dict_rsp, "volume_id", volume_id_str); if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=volume_id", NULL); op_errno = -ret; ret = -1; goto out; @@ -1621,6 +1666,8 @@ __server_get_volume_info(rpcsvc_request_t *req) ret = dict_allocate_and_serialize(dict_rsp, &vol_info_rsp.dict.dict_val, &vol_info_rsp.dict.dict_len); if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); op_errno = -ret; ret = -1; goto out; @@ -1686,6 +1733,8 @@ __server_get_snap_info(rpcsvc_request_t *req) if (snap_info_req.dict.dict_len) { dict = dict_new(); if (!dict) { + gf_smsg("glusterd", GF_LOG_WARNING, ENOMEM, GD_MSG_DICT_CREATE_FAIL, + NULL); op_errno = ENOMEM; ret = -1; goto out; @@ -1716,6 +1765,8 @@ __server_get_snap_info(rpcsvc_request_t *req) dict_rsp = dict_new(); if (!dict_rsp) { + gf_smsg("glusterd", GF_LOG_WARNING, ENOMEM, GD_MSG_DICT_CREATE_FAIL, + NULL); op_errno = ENOMEM; ret = -1; goto out; @@ -1908,22 +1959,45 @@ gd_validate_peer_op_version(xlator_t *this, glusterd_peerinfo_t *peerinfo, int32_t peer_min_op_version = 0; int32_t peer_max_op_version = 0; - if (!dict || !this || !peerinfo) + if (!dict) { + gf_smsg("glusterd", GF_LOG_WARNING, ENOMEM, GD_MSG_DICT_CREATE_FAIL, + NULL); goto out; + } + + if (!this) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_XLATOR_NOT_DEFINED, + NULL); + goto out; + } + + if (!peerinfo) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); + goto out; + } conf = this->private; ret = dict_get_int32(dict, GD_OP_VERSION_KEY, &peer_op_version); - if (ret) + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=%s", GD_OP_VERSION_KEY, NULL); goto out; + } ret = dict_get_int32(dict, GD_MAX_OP_VERSION_KEY, &peer_max_op_version); - if (ret) + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=%s", GD_MAX_OP_VERSION_KEY, NULL); goto out; + } ret = dict_get_int32(dict, GD_MIN_OP_VERSION_KEY, &peer_min_op_version); - if (ret) + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=%s", GD_MIN_OP_VERSION_KEY, NULL); goto out; + } ret = -1; /* Check if peer can support our op_version */ @@ -2189,14 +2263,20 @@ glusterd_mgmt_handshake(xlator_t *this, glusterd_peerctx_t *peerctx) int ret = -1; frame = create_frame(this, this->ctx->pool); - if (!frame) + if (!frame) { + gf_smsg("glusterd", GF_LOG_WARNING, errno, GD_MSG_FRAME_CREATE_FAIL, + NULL); goto out; + } frame->local = peerctx; req_dict = dict_new(); - if (!req_dict) + if (!req_dict) { + gf_smsg("glusterd", GF_LOG_WARNING, ENOMEM, GD_MSG_DICT_CREATE_FAIL, + NULL); goto out; + } ret = dict_set_dynstr(req_dict, GD_PEER_ID_KEY, gf_strdup(uuid_utoa(MY_UUID))); @@ -2463,12 +2543,17 @@ glusterd_peer_dump_version(xlator_t *this, struct rpc_clnt *rpc, int ret = -1; frame = create_frame(this, this->ctx->pool); - if (!frame) + if (!frame) { + gf_smsg(this->name, GF_LOG_WARNING, errno, GD_MSG_FRAME_CREATE_FAIL, + NULL); goto out; + } frame->local = peerctx; - if (!peerctx) + if (!peerctx) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); goto out; + } RCU_READ_LOCK; diff --git a/xlators/mgmt/glusterd/src/glusterd-hooks.c b/xlators/mgmt/glusterd/src/glusterd-hooks.c index 511a102d016..61c0f1c946f 100644 --- a/xlators/mgmt/glusterd/src/glusterd-hooks.c +++ b/xlators/mgmt/glusterd/src/glusterd-hooks.c @@ -87,21 +87,24 @@ glusterd_hooks_create_hooks_directory(char *basedir) glusterd_conf_t *priv = NULL; int32_t len = 0; - priv = THIS->private; + xlator_t *this = NULL; + this = THIS; + GF_ASSERT(this); + priv = this->private; snprintf(path, sizeof(path), "%s/hooks", basedir); ret = mkdir_p(path, 0755, _gf_true); if (ret) { - gf_msg(THIS->name, GF_LOG_CRITICAL, errno, GD_MSG_CREATE_DIR_FAILED, - "Unable to create %s", path); + gf_smsg(this->name, GF_LOG_CRITICAL, errno, GD_MSG_CREATE_DIR_FAILED, + "Path=%s", path, NULL); goto out; } GLUSTERD_GET_HOOKS_DIR(version_dir, GLUSTERD_HOOK_VER, priv); ret = mkdir_p(version_dir, 0755, _gf_true); if (ret) { - gf_msg(THIS->name, GF_LOG_CRITICAL, errno, GD_MSG_CREATE_DIR_FAILED, - "Unable to create %s", version_dir); + gf_smsg(this->name, GF_LOG_CRITICAL, errno, GD_MSG_CREATE_DIR_FAILED, + "Directory=%s", version_dir, NULL); goto out; } @@ -112,13 +115,14 @@ glusterd_hooks_create_hooks_directory(char *basedir) len = snprintf(path, sizeof(path), "%s/%s", version_dir, cmd_subdir); if ((len < 0) || (len >= sizeof(path))) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); ret = -1; goto out; } ret = mkdir_p(path, 0755, _gf_true); if (ret) { - gf_msg(THIS->name, GF_LOG_CRITICAL, errno, GD_MSG_CREATE_DIR_FAILED, - "Unable to create %s", path); + gf_smsg(this->name, GF_LOG_CRITICAL, errno, + GD_MSG_CREATE_DIR_FAILED, "Path=%s", path, NULL); goto out; } @@ -126,13 +130,15 @@ glusterd_hooks_create_hooks_directory(char *basedir) len = snprintf(path, sizeof(path), "%s/%s/%s", version_dir, cmd_subdir, type_subdir[type]); if ((len < 0) || (len >= sizeof(path))) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, + NULL); ret = -1; goto out; } ret = mkdir_p(path, 0755, _gf_true); if (ret) { - gf_msg(THIS->name, GF_LOG_CRITICAL, errno, - GD_MSG_CREATE_DIR_FAILED, "Unable to create %s", path); + gf_smsg(this->name, GF_LOG_CRITICAL, errno, + GD_MSG_CREATE_DIR_FAILED, "Path=%s", path, NULL); goto out; } } @@ -200,20 +206,31 @@ glusterd_hooks_set_volume_args(dict_t *dict, runner_t *runner) int i = 0; int count = 0; int ret = -1; + int flag = 0; char query[1024] = { 0, }; char *key = NULL; char *value = NULL; + char *inet_family = NULL; + xlator_t *this = NULL; + this = THIS; + GF_ASSERT(this); ret = dict_get_int32(dict, "count", &count); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=count", NULL); goto out; + } /* This will not happen unless op_ctx * is corrupted*/ - if (!count) + if (!count) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ENTRY, "count", + NULL); goto out; + } runner_add_arg(runner, "-o"); for (i = 1; ret == 0; i++) { @@ -228,9 +245,23 @@ glusterd_hooks_set_volume_args(dict_t *dict, runner_t *runner) continue; runner_argprintf(runner, "%s=%s", key, value); + if ((strncmp(key, "cluster.enable-shared-storage", + SLEN("cluster.enable-shared-storage")) == 0 || + strncmp(key, "enable-shared-storage", + SLEN("enable-shared-storage")) == 0) && + strncmp(value, "enable", SLEN("enable")) == 0) + flag = 1; } glusterd_hooks_add_custom_args(dict, runner); + if (flag == 1) { + ret = dict_get_str_sizen(this->options, "transport.address-family", + &inet_family); + if (!ret) { + runner_argprintf(runner, "transport.address-family=%s", + inet_family); + } + } ret = 0; out: @@ -357,27 +388,31 @@ glusterd_hooks_run_hooks(char *hooks_path, glusterd_op_t op, dict_t *op_ctx, lines = GF_CALLOC(1, N * sizeof(*lines), gf_gld_mt_charptr); if (!lines) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); ret = -1; goto out; } ret = -1; line_count = 0; - GF_SKIP_IRRELEVANT_ENTRIES(entry, hookdir, scratch); - while (entry) { + + while ((entry = sys_readdir(hookdir, scratch))) { + if (gf_irrelevant_entry(entry)) + continue; if (line_count == N - 1) { N *= 2; lines = GF_REALLOC(lines, N * sizeof(char *)); - if (!lines) + if (!lines) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, + NULL); goto out; + } } if (glusterd_is_hook_enabled(entry->d_name)) { lines[line_count] = gf_strdup(entry->d_name); line_count++; } - - GF_SKIP_IRRELEVANT_ENTRIES(entry, hookdir, scratch); } lines[line_count] = NULL; @@ -461,31 +496,40 @@ glusterd_hooks_stub_init(glusterd_hooks_stub_t **stub, char *scriptdir, int ret = -1; glusterd_hooks_stub_t *hooks_stub = NULL; + xlator_t *this = NULL; + this = THIS; + GF_ASSERT(this); GF_ASSERT(stub); if (!stub) goto out; hooks_stub = GF_CALLOC(1, sizeof(*hooks_stub), gf_gld_mt_hooks_stub_t); - if (!hooks_stub) + if (!hooks_stub) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); goto out; + } CDS_INIT_LIST_HEAD(&hooks_stub->all_hooks); hooks_stub->op = op; hooks_stub->scriptdir = gf_strdup(scriptdir); - if (!hooks_stub->scriptdir) + if (!hooks_stub->scriptdir) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED, + "scriptdir=%s", scriptdir, NULL); goto out; + } hooks_stub->op_ctx = dict_copy_with_ref(op_ctx, hooks_stub->op_ctx); - if (!hooks_stub->op_ctx) + if (!hooks_stub->op_ctx) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_COPY_FAIL, NULL); goto out; + } *stub = hooks_stub; ret = 0; out: if (ret) { - gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_POST_HOOK_STUB_INIT_FAIL, - "Failed to initialize " - "post hooks stub"); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_HOOK_STUB_INIT_FAIL, + NULL); glusterd_hooks_stub_cleanup(hooks_stub); } @@ -547,12 +591,20 @@ glusterd_hooks_priv_init(glusterd_hooks_private_t **new) int ret = -1; glusterd_hooks_private_t *hooks_priv = NULL; - if (!new) + xlator_t *this = NULL; + this = THIS; + GF_ASSERT(this); + + if (!new) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); goto out; + } hooks_priv = GF_CALLOC(1, sizeof(*hooks_priv), gf_gld_mt_hooks_priv_t); - if (!hooks_priv) + if (!hooks_priv) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); goto out; + } pthread_mutex_init(&hooks_priv->mutex, NULL); pthread_cond_init(&hooks_priv->cond, NULL); diff --git a/xlators/mgmt/glusterd/src/glusterd-log-ops.c b/xlators/mgmt/glusterd/src/glusterd-log-ops.c index a48923e26e1..34abf35cb00 100644 --- a/xlators/mgmt/glusterd/src/glusterd-log-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-log-ops.c @@ -43,6 +43,7 @@ __glusterd_handle_log_rotate(rpcsvc_request_t *req) if (ret < 0) { // failed to decode msg; req->rpc_err = GARBAGE_ARGS; + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL); goto out; } @@ -75,7 +76,7 @@ __glusterd_handle_log_rotate(rpcsvc_request_t *req) "for volume %s", volname); - ret = dict_set_uint64(dict, "rotate-key", (uint64_t)time(NULL)); + ret = dict_set_uint64(dict, "rotate-key", (uint64_t)gf_time()); if (ret) goto out; @@ -138,6 +139,8 @@ glusterd_op_stage_log_rotate(dict_t *dict, char **op_errstr) /* If no brick is specified, do log-rotate for all the bricks in the volume */ if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=brick", NULL); ret = 0; goto out; } @@ -204,8 +207,11 @@ glusterd_op_log_rotate(dict_t *dict) ret = dict_get_str(dict, "brick", &brick); /* If no brick is specified, do log-rotate for all the bricks in the volume */ - if (ret) + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=brick", NULL); goto cont; + } ret = glusterd_brickinfo_new_from_brick(brick, &tmpbrkinfo, _gf_false, NULL); diff --git a/xlators/mgmt/glusterd/src/glusterd-mem-types.h b/xlators/mgmt/glusterd/src/glusterd-mem-types.h index 17052cee263..d7257e1a7b5 100644 --- a/xlators/mgmt/glusterd/src/glusterd-mem-types.h +++ b/xlators/mgmt/glusterd/src/glusterd-mem-types.h @@ -27,6 +27,7 @@ typedef enum gf_gld_mem_types_ { gf_gld_mt_mop_stage_req_t, gf_gld_mt_probe_ctx_t, gf_gld_mt_glusterd_volinfo_t, + gf_gld_mt_volinfo_dict_data_t, gf_gld_mt_glusterd_brickinfo_t, gf_gld_mt_peer_hostname_t, gf_gld_mt_defrag_info, diff --git a/xlators/mgmt/glusterd/src/glusterd-messages.h b/xlators/mgmt/glusterd/src/glusterd-messages.h index 435a43df85d..3a1e600fb03 100644 --- a/xlators/mgmt/glusterd/src/glusterd-messages.h +++ b/xlators/mgmt/glusterd/src/glusterd-messages.h @@ -46,7 +46,7 @@ GLFS_MSGID( GD_MSG_SNAP_STATUS_FAIL, GD_MSG_SNAP_INIT_FAIL, GD_MSG_VOLINFO_SET_FAIL, GD_MSG_VOLINFO_GET_FAIL, GD_MSG_BRICK_CREATION_FAIL, GD_MSG_BRICK_GET_INFO_FAIL, GD_MSG_BRICK_NEW_INFO_FAIL, GD_MSG_LVS_FAIL, - GD_MSG_SETXATTR_FAIL, GD_MSG_UMOUNTING_SNAP_BRICK, GD_MSG_OP_UNSUPPORTED, + GD_MSG_SET_XATTR_FAIL, GD_MSG_UMOUNTING_SNAP_BRICK, GD_MSG_OP_UNSUPPORTED, GD_MSG_SNAP_NOT_FOUND, GD_MSG_FS_LABEL_UPDATE_FAIL, GD_MSG_LVM_MOUNT_FAILED, GD_MSG_DICT_SET_FAILED, GD_MSG_CANONICALIZE_FAIL, GD_MSG_DICT_GET_FAILED, GD_MSG_SNAP_INFO_FAIL, GD_MSG_SNAP_VOL_CONFIG_FAIL, @@ -78,7 +78,7 @@ GLFS_MSGID( GD_MSG_COMMIT_OP_FAIL, GD_MSG_PEER_LIST_CREATE_FAIL, GD_MSG_BRICK_OP_FAIL, GD_MSG_OPINFO_SET_FAIL, GD_MSG_OP_EVENT_UNLOCK_FAIL, GD_MSG_MGMTV3_OP_RESP_FAIL, GD_MSG_PEER_NOT_FOUND, GD_MSG_REQ_DECODE_FAIL, - GD_MSG_DICT_SERL_LENGTH_GET_FAIL, GD_MSG_ALREADY_STOPPED, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, GD_MSG_ALREADY_STOPPED, GD_MSG_PRE_VALD_RESP_FAIL, GD_MSG_SVC_GET_FAIL, GD_MSG_VOLFILE_NOT_FOUND, GD_MSG_OP_EVENT_LOCK_FAIL, GD_MSG_NON_STRIPE_VOL, GD_MSG_SNAPD_OBJ_GET_FAIL, GD_MSG_QUOTA_DISABLED, GD_MSG_CACHE_MINMAX_SIZE_INVALID, @@ -116,7 +116,7 @@ GLFS_MSGID( GD_MSG_PARSE_BRICKINFO_FAIL, GD_MSG_VERS_STORE_FAIL, GD_MSG_HEADER_ADD_FAIL, GD_MSG_QUOTA_CONF_WRITE_FAIL, GD_MSG_QUOTA_CONF_CORRUPT, GD_MSG_FORK_FAIL, GD_MSG_CKSUM_COMPUTE_FAIL, GD_MSG_VERS_CKSUM_STORE_FAIL, - GD_MSG_GETXATTR_FAIL, GD_MSG_CONVERSION_FAILED, GD_MSG_VOL_NOT_DISTRIBUTE, + GD_MSG_GET_XATTR_FAIL, GD_MSG_CONVERSION_FAILED, GD_MSG_VOL_NOT_DISTRIBUTE, GD_MSG_VOL_STOPPED, GD_MSG_OPCTX_GET_FAIL, GD_MSG_TASKID_GEN_FAIL, GD_MSG_REBALANCE_ID_MISSING, GD_MSG_NO_REBALANCE_PFX_IN_VOLNAME, GD_MSG_DEFRAG_STATUS_UPDATE_FAIL, GD_MSG_UUID_GEN_STORE_FAIL, @@ -302,6 +302,150 @@ GLFS_MSGID( GD_MSG_SHD_OBJ_GET_FAIL, GD_MSG_SVC_ATTACH_FAIL, GD_MSG_ATTACH_INFO, GD_MSG_DETACH_INFO, GD_MSG_SVC_DETACH_FAIL, GD_MSG_RPC_TRANSPORT_GET_PEERNAME_FAIL, GD_MSG_CLUSTER_RC_ENABLE, - GD_MSG_NFS_GANESHA_DISABLED, GD_MSG_GANESHA_NOT_RUNNING, GD_MSG_SNAP_WARN); + GD_MSG_NFS_GANESHA_DISABLED, GD_MSG_GANESHA_NOT_RUNNING, GD_MSG_SNAP_WARN, + GD_MSG_BRICK_SUBVOL_VERIFY_FAIL, GD_MSG_REMOVE_ARBITER_BRICK, + GD_MSG_BRICK_NOT_DECOM, GD_MSG_BRICK_STOPPED, GD_MSG_BRICK_DEAD, + GD_MSG_BRICK_HOST_NOT_FOUND, GD_MSG_BRICK_HOST_DOWN, GD_MSG_BRICK_DELETE, + GD_MSG_BRICK_NO_REMOVE_CMD, GD_MSG_MIGRATION_PROG, GD_MSG_MIGRATION_FAIL, + GD_MSG_COPY_FAIL, GD_MSG_REALPATH_GET_FAIL, + GD_MSG_ARBITER_BRICK_SET_INFO_FAIL, GD_MSG_STRCHR_FAIL, GD_MSG_SPLIT_FAIL, + GD_MSG_ALLOC_AND_COPY_UUID_FAIL, GD_MSG_VOL_SHD_NOT_COMP, + GD_MSG_BITROT_NOT_ENABLED, GD_MSG_CREATE_BRICK_DIR_FAILED, + GD_MSG_CREATE_GLUSTER_DIR_FAILED, GD_MSG_BRICK_CREATE_MNTPNT, + GD_MSG_BRICK_CREATE_ROOT, GD_MSG_SET_XATTR_BRICK_FAIL, + GD_MSG_REMOVE_XATTR_FAIL, GD_MSG_XLATOR_NOT_DEFINED, + GD_MSG_BRICK_NOT_RUNNING, GD_MSG_INCORRECT_BRICK, GD_MSG_UUID_GET_FAIL, + GD_MSG_INVALID_ARGUMENT, GD_MSG_FRAME_CREATE_FAIL, + GD_MSG_SNAPSHOT_NOT_THIN_PROVISIONED, GD_MSG_VOL_STOP_ARGS_GET_FAILED, + GD_MSG_LSTAT_FAIL, GD_MSG_VOLUME_NOT_IMPORTED, + GD_MSG_ADD_BRICK_MNT_INFO_FAIL, GD_MSG_GET_MNT_ENTRY_INFO_FAIL, + GD_MSG_QUORUM_CLUSTER_COUNT_GET_FAIL, GD_MSG_POST_COMMIT_OP_FAIL, + GD_MSG_POST_COMMIT_FROM_UUID_REJCT, GD_MSG_POST_COMMIT_REQ_SEND_FAIL); + +#define GD_MSG_INVALID_ENTRY_STR "Invalid data entry" +#define GD_MSG_INVALID_ARGUMENT_STR \ + "Invalid arguments have been given to function" +#define GD_MSG_GARBAGE_ARGS_STR "Garbage args received" +#define GD_MSG_BRICK_SUBVOL_VERIFY_FAIL_STR "Brick's subvol verification fail" +#define GD_MSG_REMOVE_ARBITER_BRICK_STR "Failed to remove arbiter bricks" +#define GD_MSG_DICT_GET_FAILED_STR "Dict get failed" +#define GD_MSG_DICT_SET_FAILED_STR "Dict set failed" +#define GD_MSG_BRICK_NOT_FOUND_STR "Brick not found in volume" +#define GD_MSG_BRICK_NOT_DECOM_STR "Brick is not decommissoned" +#define GD_MSG_BRICK_STOPPED_STR "Found stopped brick" +#define GD_MSG_BRICK_DEAD_STR "Found dead brick" +#define GD_MSG_BRICK_HOST_NOT_FOUND_STR \ + "Host node of the brick is not a part of cluster" +#define GD_MSG_BRICK_HOST_DOWN_STR "Host node of the brick is down" +#define GD_MSG_BRICK_DELETE_STR \ + "Deleting all the bricks of the volume is not allowed" +#define GD_MSG_BRICK_NO_REMOVE_CMD_STR "No remove-brick command issued" +#define GD_MSG_INCORRECT_BRICK_STR "Incorrect brick for volume" +#define GD_MSG_MIGRATION_PROG_STR "Migration is in progress" +#define GD_MSG_MIGRATION_FAIL_STR "Migration has failed" +#define GD_MSG_XLATOR_NOT_DEFINED_STR "Xlator not defined" +#define GD_MSG_DICT_CREATE_FAIL_STR "Failed to create dictionary" +#define GD_MSG_COPY_FAIL_STR "Failed to copy" +#define GD_MSG_UUID_GET_FAIL_STR "Failed to get the uuid of local glusterd" +#define GD_MSG_GEO_REP_START_FAILED_STR "Georep start failed for volume" +#define GD_MSG_REALPATH_GET_FAIL_STR "Failed to get realpath" +#define GD_MSG_FILE_NOT_FOUND_STR "File not found in directory" +#define GD_MSG_SRC_FILE_ERROR_STR "Error in source file" +#define GD_MSG_DICT_UNSERIALIZE_FAIL_STR "Failed to unserialize dict" +#define GD_MSG_VOL_ID_SET_FAIL_STR "Failed to set volume id" +#define GD_MSG_ARBITER_BRICK_SET_INFO_FAIL_STR \ + "Failed to add arbiter info to brick" +#define GD_MSG_NO_MEMORY_STR "Out of memory" +#define GD_MSG_GLUSTERD_UMOUNT_FAIL_STR "Failed to unmount path" +#define GD_MSG_PEER_ADD_FAIL_STR "Failed to add new peer" +#define GD_MSG_BRICK_GET_INFO_FAIL_STR "Failed to get brick info" +#define GD_MSG_STRCHR_FAIL_STR "Failed to get the character" +#define GD_MSG_SPLIT_FAIL_STR "Failed to split" +#define GD_MSG_VOLINFO_GET_FAIL_STR "Failed to get volinfo" +#define GD_MSG_PEER_NOT_FOUND_STR "Failed to find peer info" +#define GD_MSG_DICT_COPY_FAIL_STR "Failed to copy values from dictionary" +#define GD_MSG_ALLOC_AND_COPY_UUID_FAIL_STR \ + "Failed to allocate memory or copy uuid" +#define GD_MSG_VOL_NOT_FOUND_STR "Volume not found" +#define GD_MSG_PEER_DISCONNECTED_STR "Peer is disconnected" +#define GD_MSG_QUOTA_GET_STAT_FAIL_STR "Failed to get quota status" +#define GD_MSG_SNAP_STATUS_FAIL_STR "Failed to get status of snapd" +#define GD_MSG_VALIDATE_FAILED_STR "Failed to validate volume" +#define GD_MSG_VOL_NOT_STARTED_STR "Volume is not started" +#define GD_MSG_VOL_SHD_NOT_COMP_STR "Volume is not Self-heal compatible" +#define GD_MSG_SELF_HEALD_DISABLED_STR "Self-heal daemon is disabled" +#define GD_MSG_NFS_GANESHA_DISABLED_STR "NFS server is disabled" +#define GD_MSG_QUOTA_DISABLED_STR "Quota is disabled" +#define GD_MSG_BITROT_NOT_RUNNING_STR "Bitrot is not enabled" +#define GD_MSG_BITROT_NOT_ENABLED_STR "Volume does not have bitrot enabled" +#define GD_MSG_SNAPD_NOT_RUNNING_STR "Snapd is not enabled" +#define GD_MSG_STRDUP_FAILED_STR "Strdup operation failed" +#define GD_MSG_QUORUM_CLUSTER_COUNT_GET_FAIL_STR \ + "Failed to get quorum cluster counts" +#define GD_MSG_GLUSTER_SERVICE_START_FAIL_STR "Failed to start glusterd service" +#define GD_MSG_PEER_ADDRESS_GET_FAIL_STR "Failed to get the address of peer" +#define GD_MSG_INVALID_SLAVE_STR "Volume is not a slave volume" +#define GD_MSG_BRICK_NOT_RUNNING_STR "One or more bricks are not running" +#define GD_MSG_BRK_MNTPATH_GET_FAIL_STR "Failed to get brick mount device" +#define GD_MSG_SNAPSHOT_NOT_THIN_PROVISIONED_STR \ + "Snapshot is supported only for thin provisioned LV." +#define GD_MSG_SNAP_DEVICE_NAME_GET_FAIL_STR \ + "Failed to copy snapshot device name" +#define GD_MSG_SNAP_NOT_FOUND_STR "Snapshot does not exist" +#define GD_MSG_CREATE_BRICK_DIR_FAILED_STR "Failed to create brick directory" +#define GD_MSG_LSTAT_FAIL_STR "Lstat operation failed" +#define GD_MSG_DIR_OP_FAILED_STR \ + "The provided path is already present. It is not a directory" +#define GD_MSG_BRICK_CREATION_FAIL_STR \ + "Brick isn't allowed to be created inside glusterd's working directory." +#define GD_MSG_BRICK_CREATE_ROOT_STR \ + "The brick is being created in the root partition. It is recommended " \ + "that you don't use the system's root partition for storage backend." +#define GD_MSG_BRICK_CREATE_MNTPNT_STR \ + "The brick is a mount point. Please create a sub-directory under the " \ + "mount point and use that as the brick directory." +#define GD_MSG_CREATE_GLUSTER_DIR_FAILED_STR \ + "Failed to create glusterfs directory" +#define GD_MSG_VOLINFO_IMPORT_FAIL_STR "Volume is not yet imported" +#define GD_MSG_BRICK_SET_INFO_FAIL_STR \ + "Failed to add brick mount details to dict" +#define GD_MSG_SET_XATTR_BRICK_FAIL_STR \ + "Glusterfs is not supported on brick. Setting extended attribute failed" +#define GD_MSG_SET_XATTR_FAIL_STR "Failed to set extended attribute" +#define GD_MSG_REMOVE_XATTR_FAIL_STR "Failed to remove extended attribute" +#define GD_MSG_XLATOR_SET_OPT_FAIL_STR "Failed to set xlator type" +#define GD_MSG_XLATOR_LINK_FAIL_STR \ + "Failed to do the link of xlator with children" +#define GD_MSG_READ_ERROR_STR "Failed to read directory" +#define GD_MSG_INCOMPATIBLE_VALUE_STR "Incompatible transport type" +#define GD_MSG_VOL_STOP_ARGS_GET_FAILED_STR "Failed to get volume stop args" +#define GD_MSG_FRAME_CREATE_FAIL_STR "Failed to create frame" +#define GD_MSG_VOLUME_NOT_IMPORTED_STR "Volume has not been imported" +#define GD_MSG_ADD_BRICK_MNT_INFO_FAIL_STR \ + "Failed to add brick mount details to dict" +#define GD_MSG_GET_MNT_ENTRY_INFO_FAIL_STR "Failed to get mount entry details" +#define GD_MSG_BRICKPATH_ROOT_GET_FAIL_STR "failed to get brick root details" +#define GD_MSG_VOL_INFO_REQ_RECVD_STR "Received get volume info req" +#define GD_MSG_NO_FLAG_SET_STR "No flags set" +#define GD_MSG_CREATE_DIR_FAILED_STR "Failed to create directory" +#define GD_MSG_POST_HOOK_STUB_INIT_FAIL_STR \ + "Failed to initialize post hooks stub" +#define GD_MSG_FILE_OP_FAILED_STR "File operation failed" +#define GD_MSG_INODE_SIZE_GET_FAIL_STR "Failed to get inode size" +#define GD_MSG_CMD_EXEC_FAIL_STR "Command execution failed" +#define GD_MSG_XLATOR_CREATE_FAIL_STR "Failed to create xlator" +#define GD_MSG_CLRCLK_VOL_REQ_RCVD_STR "Received clear-locks request for volume" +#define GD_MSG_BRK_PORT_NUM_GET_FAIL_STR \ + "Couldn't get port number of local bricks" +#define GD_MSG_CLRLOCKS_MOUNTDIR_CREATE_FAIL_STR \ + "Creating mount directory for clear-locks failed" +#define GD_MSG_CLRLOCKS_CLNT_MOUNT_FAIL_STR \ + "Failed to mount clear-locks maintenance client" +#define GD_MSG_CLRLOCKS_CLNT_UMOUNT_FAIL_STR \ + "Failed to unmount clear-locks mount point" +#define GD_MSG_CLRCLK_SND_CMD_FAIL_STR "Failed to send command for clear-locks" +#define GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL_STR \ + "Failed to allocate memory or get serialized length of dict" +#define GD_MSG_GET_XATTR_FAIL_STR "Failed to get extended attribute" #endif /* !_GLUSTERD_MESSAGES_H_ */ diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c b/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c index ef8b4c38571..1069688a89d 100644 --- a/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c @@ -165,6 +165,7 @@ glusterd_handle_mgmt_v3_lock_fn(rpcsvc_request_t *req) ctx = GF_CALLOC(1, sizeof(*ctx), gf_gld_mt_op_lock_ctx_t); if (!ctx) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); ret = -1; goto out; } @@ -174,6 +175,7 @@ glusterd_handle_mgmt_v3_lock_fn(rpcsvc_request_t *req) ctx->dict = dict_new(); if (!ctx->dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); ret = -1; goto out; } @@ -181,8 +183,8 @@ glusterd_handle_mgmt_v3_lock_fn(rpcsvc_request_t *req) ret = dict_unserialize(lock_req.dict.dict_val, lock_req.dict.dict_len, &ctx->dict); if (ret) { - gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, - "failed to unserialize the dictionary"); + gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + NULL); goto out; } @@ -264,8 +266,8 @@ glusterd_mgmt_v3_pre_validate_send_resp(rpcsvc_request_t *req, int32_t op, ret = dict_allocate_and_serialize(rsp_dict, &rsp.dict.dict_val, &rsp.dict.dict_len); if (ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SERL_LENGTH_GET_FAIL, - "failed to get serialized length of dict"); + gf_smsg(this->name, GF_LOG_ERROR, 0, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); goto out; } @@ -315,20 +317,21 @@ glusterd_handle_pre_validate_fn(rpcsvc_request_t *req) } dict = dict_new(); - if (!dict) + if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL); goto out; + } ret = dict_unserialize(op_req.dict.dict_val, op_req.dict.dict_len, &dict); if (ret) { - gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, - "failed to unserialize the dictionary"); + gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + NULL); goto out; } rsp_dict = dict_new(); if (!rsp_dict) { - gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, - "Failed to get new dictionary"); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL); return -1; } @@ -391,8 +394,8 @@ glusterd_mgmt_v3_brick_op_send_resp(rpcsvc_request_t *req, int32_t op, ret = dict_allocate_and_serialize(rsp_dict, &rsp.dict.dict_val, &rsp.dict.dict_len); if (ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SERL_LENGTH_GET_FAIL, - "failed to get serialized length of dict"); + gf_smsg(this->name, GF_LOG_ERROR, 0, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); goto out; } @@ -441,20 +444,21 @@ glusterd_handle_brick_op_fn(rpcsvc_request_t *req) } dict = dict_new(); - if (!dict) + if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL); goto out; + } ret = dict_unserialize(op_req.dict.dict_val, op_req.dict.dict_len, &dict); if (ret) { - gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, - "failed to unserialize the dictionary"); + gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + NULL); goto out; } rsp_dict = dict_new(); if (!rsp_dict) { - gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, - "Failed to get new dictionary"); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL); return -1; } @@ -518,8 +522,8 @@ glusterd_mgmt_v3_commit_send_resp(rpcsvc_request_t *req, int32_t op, ret = dict_allocate_and_serialize(rsp_dict, &rsp.dict.dict_val, &rsp.dict.dict_len); if (ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SERL_LENGTH_GET_FAIL, - "failed to get serialized length of dict"); + gf_smsg(this->name, GF_LOG_ERROR, 0, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); goto out; } @@ -569,20 +573,21 @@ glusterd_handle_commit_fn(rpcsvc_request_t *req) } dict = dict_new(); - if (!dict) + if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL); goto out; + } ret = dict_unserialize(op_req.dict.dict_val, op_req.dict.dict_len, &dict); if (ret) { - gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, - "failed to unserialize the dictionary"); + gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + NULL); goto out; } rsp_dict = dict_new(); if (!rsp_dict) { - gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, - "Failed to get new dictionary"); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL); return -1; } @@ -621,6 +626,136 @@ out: } static int +glusterd_mgmt_v3_post_commit_send_resp(rpcsvc_request_t *req, int32_t op, + int32_t status, char *op_errstr, + uint32_t op_errno, dict_t *rsp_dict) +{ + gd1_mgmt_v3_post_commit_rsp rsp = { + {0}, + }; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + + rsp.op_ret = status; + glusterd_get_uuid(&rsp.uuid); + rsp.op = op; + rsp.op_errno = op_errno; + if (op_errstr) + rsp.op_errstr = op_errstr; + else + rsp.op_errstr = ""; + + ret = dict_allocate_and_serialize(rsp_dict, &rsp.dict.dict_val, + &rsp.dict.dict_len); + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, 0, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); + goto out; + } + + ret = glusterd_submit_reply(req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_v3_post_commit_rsp); + + GF_FREE(rsp.dict.dict_val); +out: + gf_msg_debug(this->name, 0, "Responded to post commit, ret: %d", ret); + return ret; +} + +static int +glusterd_handle_post_commit_fn(rpcsvc_request_t *req) +{ + int32_t ret = -1; + gd1_mgmt_v3_post_commit_req op_req = { + {0}, + }; + xlator_t *this = NULL; + char *op_errstr = NULL; + dict_t *dict = NULL; + dict_t *rsp_dict = NULL; + uint32_t op_errno = 0; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + + ret = xdr_to_generic(req->msg[0], &op_req, + (xdrproc_t)xdr_gd1_mgmt_v3_post_commit_req); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, + "Failed to decode post commit " + "request received from peer"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (glusterd_peerinfo_find_by_uuid(op_req.uuid) == NULL) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_PEER_NOT_FOUND, + "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa(op_req.uuid)); + ret = -1; + goto out; + } + + dict = dict_new(); + if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL); + goto out; + } + + ret = dict_unserialize(op_req.dict.dict_val, op_req.dict.dict_len, &dict); + if (ret) { + gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + NULL); + goto out; + } + + rsp_dict = dict_new(); + if (!rsp_dict) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL); + return -1; + } + + ret = gd_mgmt_v3_post_commit_fn(op_req.op, dict, &op_errstr, &op_errno, + rsp_dict); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL, + "post commit failed on operation %s", gd_op_list[op_req.op]); + } + + ret = glusterd_mgmt_v3_post_commit_send_resp(req, op_req.op, ret, op_errstr, + op_errno, rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_OP_RESP_FAIL, + "Failed to send post commit " + "response for operation %s", + gd_op_list[op_req.op]); + goto out; + } + +out: + if (op_errstr && (strcmp(op_errstr, ""))) + GF_FREE(op_errstr); + + free(op_req.dict.dict_val); + + if (dict) + dict_unref(dict); + + if (rsp_dict) + dict_unref(rsp_dict); + + /* Return 0 from handler to avoid double deletion of req obj */ + return 0; +} + +static int glusterd_mgmt_v3_post_validate_send_resp(rpcsvc_request_t *req, int32_t op, int32_t status, char *op_errstr, dict_t *rsp_dict) @@ -646,8 +781,8 @@ glusterd_mgmt_v3_post_validate_send_resp(rpcsvc_request_t *req, int32_t op, ret = dict_allocate_and_serialize(rsp_dict, &rsp.dict.dict_val, &rsp.dict.dict_len); if (ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SERL_LENGTH_GET_FAIL, - "failed to get serialized length of dict"); + gf_smsg(this->name, GF_LOG_ERROR, 0, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); goto out; } @@ -696,20 +831,21 @@ glusterd_handle_post_validate_fn(rpcsvc_request_t *req) } dict = dict_new(); - if (!dict) + if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL); goto out; + } ret = dict_unserialize(op_req.dict.dict_val, op_req.dict.dict_len, &dict); if (ret) { - gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, - "failed to unserialize the dictionary"); + gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + NULL); goto out; } rsp_dict = dict_new(); if (!rsp_dict) { - gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, - "Failed to get new dictionary"); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL); return -1; } @@ -867,6 +1003,7 @@ glusterd_handle_mgmt_v3_unlock_fn(rpcsvc_request_t *req) ctx = GF_CALLOC(1, sizeof(*ctx), gf_gld_mt_op_lock_ctx_t); if (!ctx) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_MEMORY, NULL); ret = -1; goto out; } @@ -876,6 +1013,7 @@ glusterd_handle_mgmt_v3_unlock_fn(rpcsvc_request_t *req) ctx->dict = dict_new(); if (!ctx->dict) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL); ret = -1; goto out; } @@ -883,8 +1021,8 @@ glusterd_handle_mgmt_v3_unlock_fn(rpcsvc_request_t *req) ret = dict_unserialize(lock_req.dict.dict_val, lock_req.dict.dict_len, &ctx->dict); if (ret) { - gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, - "failed to unserialize the dictionary"); + gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + NULL); goto out; } @@ -955,6 +1093,12 @@ glusterd_handle_commit(rpcsvc_request_t *req) } static int +glusterd_handle_post_commit(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, glusterd_handle_post_commit_fn); +} + +static int glusterd_handle_post_validate(rpcsvc_request_t *req) { return glusterd_big_locked_handler(req, glusterd_handle_post_validate_fn); @@ -978,6 +1122,9 @@ static rpcsvc_actor_t gd_svc_mgmt_v3_actors[GLUSTERD_MGMT_V3_MAXVALUE] = { GLUSTERD_MGMT_V3_BRICK_OP, DRC_NA, 0}, [GLUSTERD_MGMT_V3_COMMIT] = {"COMMIT", glusterd_handle_commit, NULL, GLUSTERD_MGMT_V3_COMMIT, DRC_NA, 0}, + [GLUSTERD_MGMT_V3_POST_COMMIT] = {"POST_COMMIT", + glusterd_handle_post_commit, NULL, + GLUSTERD_MGMT_V3_POST_COMMIT, DRC_NA, 0}, [GLUSTERD_MGMT_V3_POST_VALIDATE] = {"POST_VAL", glusterd_handle_post_validate, NULL, GLUSTERD_MGMT_V3_POST_VALIDATE, DRC_NA, diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-mgmt.c index bf9b5a870a0..bca7221062b 100644 --- a/xlators/mgmt/glusterd/src/glusterd-mgmt.c +++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.c @@ -86,6 +86,11 @@ gd_mgmt_v3_collate_errors(struct syncargs *args, int op_ret, int op_errno, peer_str, err_string); break; } + case GLUSTERD_MGMT_V3_POST_COMMIT: { + snprintf(op_err, sizeof(op_err), "Post commit failed on %s. %s", + peer_str, err_string); + break; + } case GLUSTERD_MGMT_V3_POST_VALIDATE: { snprintf(op_err, sizeof(op_err), "Post Validation failed on %s. %s", peer_str, @@ -187,6 +192,16 @@ gd_mgmt_v3_pre_validate_fn(glusterd_op_t op, dict_t *dict, char **op_errstr, goto out; } break; + case GD_OP_REMOVE_BRICK: + ret = glusterd_op_stage_remove_brick(dict, op_errstr); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, + GD_MSG_PRE_VALIDATION_FAIL, + "Remove brick prevalidation failed."); + goto out; + } + break; + case GD_OP_RESET_BRICK: ret = glusterd_reset_brick_prevalidate(dict, op_errstr, rsp_dict); if (ret) { @@ -337,6 +352,15 @@ gd_mgmt_v3_commit_fn(glusterd_op_t op, dict_t *dict, char **op_errstr, } break; } + case GD_OP_REMOVE_BRICK: { + ret = glusterd_op_remove_brick(dict, op_errstr); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL, + "Remove-brick commit failed."); + goto out; + } + break; + } case GD_OP_RESET_BRICK: { ret = glusterd_op_reset_brick(dict, rsp_dict); if (ret) { @@ -386,6 +410,47 @@ out: } int32_t +gd_mgmt_v3_post_commit_fn(glusterd_op_t op, dict_t *dict, char **op_errstr, + uint32_t *op_errno, dict_t *rsp_dict) +{ + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(op_errstr); + GF_VALIDATE_OR_GOTO(this->name, op_errno, out); + GF_ASSERT(rsp_dict); + + switch (op) { + case GD_OP_ADD_BRICK: + ret = glusterd_post_commit_add_brick(dict, op_errstr); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL, + "Add-brick post commit failed."); + goto out; + } + break; + case GD_OP_REPLACE_BRICK: + ret = glusterd_post_commit_replace_brick(dict, op_errstr); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL, + "Replace-brick post commit failed."); + goto out; + } + break; + default: + break; + } + + ret = 0; +out: + gf_msg_debug(this->name, 0, "OP = %d. Returning %d", op, ret); + return ret; +} + +int32_t gd_mgmt_v3_post_validate_fn(glusterd_op_t op, int32_t op_ret, dict_t *dict, char **op_errstr, dict_t *rsp_dict) { @@ -582,15 +647,21 @@ gd_mgmt_v3_lock(glusterd_op_t op, dict_t *op_ctx, glusterd_peerinfo_t *peerinfo, ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val, &req.dict.dict_len); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); goto out; + } gf_uuid_copy(req.uuid, my_uuid); req.op = op; GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_ALLOC_AND_COPY_UUID_FAIL, NULL); goto out; + } ret = gd_syncop_submit_request(peerinfo->rpc, &req, args, peerid, &gd_mgmt_v3_prog, GLUSTERD_MGMT_V3_LOCK, @@ -759,6 +830,7 @@ glusterd_pre_validate_aggr_rsp_dict(glusterd_op_t op, dict_t *aggr, dict_t *rsp) goto out; } case GD_OP_STOP_VOLUME: + case GD_OP_REMOVE_BRICK: case GD_OP_PROFILE_VOLUME: case GD_OP_DEFRAG_BRICK_VOLUME: case GD_OP_REBALANCE: @@ -897,15 +969,21 @@ gd_mgmt_v3_pre_validate_req(glusterd_op_t op, dict_t *op_ctx, ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val, &req.dict.dict_len); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); goto out; + } gf_uuid_copy(req.uuid, my_uuid); req.op = op; GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_ALLOC_AND_COPY_UUID_FAIL, NULL); goto out; + } ret = gd_syncop_submit_request( peerinfo->rpc, &req, args, peerid, &gd_mgmt_v3_prog, @@ -948,7 +1026,7 @@ glusterd_mgmt_v3_pre_validate(glusterd_op_t op, dict_t *req_dict, } if (op == GD_OP_PROFILE_VOLUME || op == GD_OP_STOP_VOLUME || - op == GD_OP_REBALANCE) { + op == GD_OP_REBALANCE || op == GD_OP_REMOVE_BRICK) { ret = glusterd_validate_quorum(this, op, req_dict, op_errstr); if (ret) { gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SERVER_QUORUM_NOT_MET, @@ -1076,6 +1154,7 @@ glusterd_mgmt_v3_build_payload(dict_t **req, char **op_errstr, dict_t *dict, case GD_OP_START_VOLUME: case GD_OP_STOP_VOLUME: case GD_OP_ADD_BRICK: + case GD_OP_REMOVE_BRICK: case GD_OP_DEFRAG_BRICK_VOLUME: case GD_OP_REPLACE_BRICK: case GD_OP_RESET_BRICK: @@ -1258,15 +1337,21 @@ gd_mgmt_v3_brick_op_req(glusterd_op_t op, dict_t *op_ctx, ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val, &req.dict.dict_len); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); goto out; + } gf_uuid_copy(req.uuid, my_uuid); req.op = op; GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_ALLOC_AND_COPY_UUID_FAIL, NULL); goto out; + } ret = gd_syncop_submit_request(peerinfo->rpc, &req, args, peerid, &gd_mgmt_v3_prog, GLUSTERD_MGMT_V3_BRICK_OP, @@ -1515,15 +1600,21 @@ gd_mgmt_v3_commit_req(glusterd_op_t op, dict_t *op_ctx, ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val, &req.dict.dict_len); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); goto out; + } gf_uuid_copy(req.uuid, my_uuid); req.op = op; GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_ALLOC_AND_COPY_UUID_FAIL, NULL); goto out; + } ret = gd_syncop_submit_request(peerinfo->rpc, &req, args, peerid, &gd_mgmt_v3_prog, GLUSTERD_MGMT_V3_COMMIT, @@ -1559,12 +1650,25 @@ glusterd_mgmt_v3_commit(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, GF_ASSERT(op_errstr); GF_VALIDATE_OR_GOTO(this->name, op_errno, out); - if (op == GD_OP_REBALANCE || op == GD_OP_DEFRAG_BRICK_VOLUME) { - ret = glusterd_set_rebalance_id_in_rsp_dict(req_dict, op_ctx); - if (ret) { - gf_log(this->name, GF_LOG_WARNING, - "Failed to set rebalance id in dict."); - } + switch (op) { + case GD_OP_REBALANCE: + case GD_OP_DEFRAG_BRICK_VOLUME: + + ret = glusterd_set_rebalance_id_in_rsp_dict(req_dict, op_ctx); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, + "Failed to set rebalance id in dict."); + } + break; + case GD_OP_REMOVE_BRICK: + ret = glusterd_set_rebalance_id_for_remove_brick(req_dict, op_ctx); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, + "Failed to set rebalance id for remove-brick in dict."); + } + break; + default: + break; } rsp_dict = dict_new(); if (!rsp_dict) { @@ -1662,6 +1766,274 @@ out: } int32_t +gd_mgmt_v3_post_commit_cbk_fn(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) +{ + int32_t ret = -1; + struct syncargs *args = NULL; + gd1_mgmt_v3_post_commit_rsp rsp = { + {0}, + }; + call_frame_t *frame = NULL; + int32_t op_ret = -1; + int32_t op_errno = -1; + dict_t *rsp_dict = NULL; + xlator_t *this = NULL; + uuid_t *peerid = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + GF_ASSERT(myframe); + + frame = myframe; + args = frame->local; + peerid = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + GF_VALIDATE_OR_GOTO_WITH_ERROR(this->name, iov, out, op_errno, EINVAL); + + ret = xdr_to_generic(*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_v3_post_commit_rsp); + if (ret < 0) + goto out; + + if (rsp.dict.dict_len) { + /* Unserialize the dictionary */ + rsp_dict = dict_new(); + + ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &rsp_dict); + if (ret < 0) { + free(rsp.dict.dict_val); + goto out; + } else { + rsp_dict->extra_stdfree = rsp.dict.dict_val; + } + } + + gf_uuid_copy(args->uuid, rsp.uuid); + pthread_mutex_lock(&args->lock_dict); + { + ret = glusterd_syncop_aggr_rsp_dict(rsp.op, args->dict, rsp_dict); + } + pthread_mutex_unlock(&args->lock_dict); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RESP_AGGR_FAIL, "%s", + "Failed to aggregate response from " + " node/brick"); + if (!rsp.op_ret) + op_ret = ret; + else { + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + } + } else { + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + } + +out: + if (rsp_dict) + dict_unref(rsp_dict); + + gd_mgmt_v3_collate_errors(args, op_ret, op_errno, rsp.op_errstr, + GLUSTERD_MGMT_V3_POST_COMMIT, *peerid, rsp.uuid); + GF_FREE(peerid); + + if (rsp.op_errstr) + free(rsp.op_errstr); + + /* req->rpc_status set to -1 means, STACK_DESTROY will be called from + * the caller function. + */ + if (req->rpc_status != -1) + STACK_DESTROY(frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_mgmt_v3_post_commit_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) +{ + return glusterd_big_locked_cbk(req, iov, count, myframe, + gd_mgmt_v3_post_commit_cbk_fn); +} + +int +gd_mgmt_v3_post_commit_req(glusterd_op_t op, dict_t *op_ctx, + glusterd_peerinfo_t *peerinfo, struct syncargs *args, + uuid_t my_uuid, uuid_t recv_uuid) +{ + int32_t ret = -1; + gd1_mgmt_v3_post_commit_req req = { + {0}, + }; + xlator_t *this = NULL; + uuid_t *peerid = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(op_ctx); + GF_ASSERT(peerinfo); + GF_ASSERT(args); + + ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val, + &req.dict.dict_len); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); + goto out; + } + + gf_uuid_copy(req.uuid, my_uuid); + req.op = op; + + GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_ALLOC_AND_COPY_UUID_FAIL, NULL); + goto out; + } + + ret = gd_syncop_submit_request( + peerinfo->rpc, &req, args, peerid, &gd_mgmt_v3_prog, + GLUSTERD_MGMT_V3_POST_COMMIT, gd_mgmt_v3_post_commit_cbk, + (xdrproc_t)xdr_gd1_mgmt_v3_post_commit_req); +out: + GF_FREE(req.dict.dict_val); + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +int +glusterd_mgmt_v3_post_commit(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, + char **op_errstr, uint32_t *op_errno, + uint32_t txn_generation) +{ + int32_t ret = -1; + int32_t peer_cnt = 0; + dict_t *rsp_dict = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + struct syncargs args = {0}; + uuid_t peer_uuid = {0}; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + + this = THIS; + GF_ASSERT(this); + conf = this->private; + GF_ASSERT(conf); + + GF_ASSERT(op_ctx); + GF_ASSERT(req_dict); + GF_ASSERT(op_errstr); + GF_VALIDATE_OR_GOTO(this->name, op_errno, out); + + rsp_dict = dict_new(); + if (!rsp_dict) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, + "Failed to create response dictionary"); + goto out; + } + + /* Post commit on local node */ + ret = gd_mgmt_v3_post_commit_fn(op, req_dict, op_errstr, op_errno, + rsp_dict); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL, + "Post commit failed for " + "operation %s on local node", + gd_op_list[op]); + + if (*op_errstr == NULL) { + ret = gf_asprintf(op_errstr, + "Post commit failed " + "on localhost. Please " + "check log file for details."); + if (ret == -1) + *op_errstr = NULL; + + ret = -1; + } + goto out; + } + + ret = glusterd_syncop_aggr_rsp_dict(op, op_ctx, rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RESP_AGGR_FAIL, "%s", + "Failed to aggregate response from " + " node/brick"); + goto out; + } + + dict_unref(rsp_dict); + rsp_dict = NULL; + + /* Sending post commit req to other nodes in the cluster */ + gd_syncargs_init(&args, op_ctx); + ret = synctask_barrier_init((&args)); + if (ret) + goto out; + peer_cnt = 0; + + RCU_READ_LOCK; + cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list) + { + /* Only send requests to peers who were available before the + * transaction started + */ + if (peerinfo->generation > txn_generation) + continue; + if (!peerinfo->connected) + continue; + + if (op != GD_OP_SYNC_VOLUME && + peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) + continue; + + gd_mgmt_v3_post_commit_req(op, req_dict, peerinfo, &args, MY_UUID, + peer_uuid); + peer_cnt++; + } + RCU_READ_UNLOCK; + + if (0 == peer_cnt) { + ret = 0; + goto out; + } + + gd_synctask_barrier_wait((&args), peer_cnt); + + if (args.op_ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL, + "Post commit failed on peers"); + + if (args.errstr) + *op_errstr = gf_strdup(args.errstr); + } + + ret = args.op_ret; + *op_errno = args.op_errno; + + gf_msg_debug(this->name, 0, + "Sent post commit req for %s to %d " + "peers. Returning %d", + gd_op_list[op], peer_cnt, ret); +out: + glusterd_op_modify_op_ctx(op, op_ctx); + return ret; +} + +int32_t gd_mgmt_v3_post_validate_cbk_fn(struct rpc_req *req, struct iovec *iov, int count, void *myframe) { @@ -1751,16 +2123,22 @@ gd_mgmt_v3_post_validate_req(glusterd_op_t op, int32_t op_ret, dict_t *op_ctx, ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val, &req.dict.dict_len); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); goto out; + } gf_uuid_copy(req.uuid, my_uuid); req.op = op; req.op_ret = op_ret; GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_ALLOC_AND_COPY_UUID_FAIL, NULL); goto out; + } ret = gd_syncop_submit_request( peerinfo->rpc, &req, args, peerid, &gd_mgmt_v3_prog, @@ -1967,15 +2345,21 @@ gd_mgmt_v3_unlock(glusterd_op_t op, dict_t *op_ctx, ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val, &req.dict.dict_len); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); goto out; + } gf_uuid_copy(req.uuid, my_uuid); req.op = op; GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_ALLOC_AND_COPY_UUID_FAIL, NULL); goto out; + } ret = gd_syncop_submit_request(peerinfo->rpc, &req, args, peerid, &gd_mgmt_v3_prog, GLUSTERD_MGMT_V3_UNLOCK, @@ -2338,6 +2722,15 @@ glusterd_mgmt_v3_initiate_all_phases(rpcsvc_request_t *req, glusterd_op_t op, goto out; } + /* POST COMMIT OP PHASE */ + ret = glusterd_mgmt_v3_post_commit(op, dict, req_dict, &op_errstr, + &op_errno, txn_generation); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL, + "Post commit Op Failed"); + goto out; + } + /* POST-COMMIT VALIDATE PHASE */ /* As of now, post_validate is not trying to cleanup any failed commands. So as of now, I am sending 0 (op_ret as 0). diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.h b/xlators/mgmt/glusterd/src/glusterd-mgmt.h index 71f793d0397..27dd1849519 100644 --- a/xlators/mgmt/glusterd/src/glusterd-mgmt.h +++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.h @@ -28,6 +28,10 @@ gd_mgmt_v3_commit_fn(glusterd_op_t op, dict_t *dict, char **op_errstr, uint32_t *op_errno, dict_t *rsp_dict); int32_t +gd_mgmt_v3_post_commit_fn(glusterd_op_t op, dict_t *dict, char **op_errstr, + uint32_t *op_errno, dict_t *rsp_dict); + +int32_t gd_mgmt_v3_post_validate_fn(glusterd_op_t op, int32_t op_ret, dict_t *dict, char **op_errstr, dict_t *rsp_dict); @@ -84,4 +88,10 @@ glusterd_reset_brick_prevalidate(dict_t *dict, char **op_errstr, dict_t *rsp_dict); int glusterd_op_reset_brick(dict_t *dict, dict_t *rsp_dict); + +int +glusterd_post_commit_add_brick(dict_t *dict, char **op_errstr); + +int +glusterd_post_commit_replace_brick(dict_t *dict, char **op_errstr); #endif /* _GLUSTERD_MGMT_H_ */ diff --git a/xlators/mgmt/glusterd/src/glusterd-mountbroker.c b/xlators/mgmt/glusterd/src/glusterd-mountbroker.c index 9c4b2fb18cc..645d845ee76 100644 --- a/xlators/mgmt/glusterd/src/glusterd-mountbroker.c +++ b/xlators/mgmt/glusterd/src/glusterd-mountbroker.c @@ -81,6 +81,7 @@ parse_mount_pattern_desc(gf_mount_spec_t *mspec, char *pdesc) mspec->patterns = GF_CALLOC(mspec->len, sizeof(*mspec->patterns), gf_gld_mt_mount_pattern); if (!mspec->patterns) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); ret = -1; goto out; } @@ -261,8 +262,11 @@ make_georep_mountspec(gf_mount_spec_t *mspec, const char *volnames, char *user, int ret = 0; vols = gf_strdup((char *)volnames); - if (!vols) + if (!vols) { + gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED, + "Volume name=%s", volnames, NULL); goto out; + } for (vc = 1, p = vols; *p; p++) { if (*p == ',') @@ -270,8 +274,10 @@ make_georep_mountspec(gf_mount_spec_t *mspec, const char *volnames, char *user, } siz = strlen(volnames) + vc * SLEN("volfile-id="); meetspec = GF_CALLOC(1, siz + 1, gf_gld_mt_georep_meet_spec); - if (!meetspec) + if (!meetspec) { + gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); goto out; + } for (p = vols;;) { vol = strtok_r(p, ",", &savetok); diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index 14915b3fc17..c537fc33a85 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -106,6 +106,7 @@ glusterd_txn_opinfo_dict_init() priv->glusterd_txn_opinfo = dict_new(); if (!priv->glusterd_txn_opinfo) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); ret = -1; goto out; } @@ -178,8 +179,10 @@ glusterd_generate_txn_id(dict_t *dict, uuid_t **txn_id) GF_ASSERT(dict); *txn_id = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); - if (!*txn_id) + if (!*txn_id) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); goto out; + } if (priv->op_version < GD_OP_VERSION_3_6_0) gf_uuid_copy(**txn_id, priv->global_txn_id); @@ -541,8 +544,11 @@ glusterd_brick_op_build_payload(glusterd_op_t op, case GD_OP_STOP_VOLUME: brick_req = GF_CALLOC(1, sizeof(*brick_req), gf_gld_mt_mop_brick_req_t); - if (!brick_req) + if (!brick_req) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, + NULL); goto out; + } brick_req->op = GLUSTERD_BRICK_TERMINATE; brick_req->name = brickinfo->path; glusterd_set_brick_status(brickinfo, GF_BRICK_STOPPING); @@ -551,8 +557,11 @@ glusterd_brick_op_build_payload(glusterd_op_t op, brick_req = GF_CALLOC(1, sizeof(*brick_req), gf_gld_mt_mop_brick_req_t); - if (!brick_req) + if (!brick_req) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, + NULL); goto out; + } brick_req->op = GLUSTERD_BRICK_XLATOR_INFO; brick_req->name = brickinfo->path; @@ -561,45 +570,69 @@ glusterd_brick_op_build_payload(glusterd_op_t op, case GD_OP_HEAL_VOLUME: { brick_req = GF_CALLOC(1, sizeof(*brick_req), gf_gld_mt_mop_brick_req_t); - if (!brick_req) + if (!brick_req) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, + NULL); goto out; + } brick_req->op = GLUSTERD_BRICK_XLATOR_OP; brick_req->name = ""; ret = dict_get_int32n(dict, "heal-op", SLEN("heal-op"), (int32_t *)&heal_op); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=heal-op", NULL); goto out; + } ret = dict_set_int32n(dict, "xl-op", SLEN("xl-op"), heal_op); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=xl-op", NULL); goto out; + } } break; case GD_OP_STATUS_VOLUME: { brick_req = GF_CALLOC(1, sizeof(*brick_req), gf_gld_mt_mop_brick_req_t); - if (!brick_req) + if (!brick_req) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, + NULL); goto out; + } brick_req->op = GLUSTERD_BRICK_STATUS; brick_req->name = ""; ret = dict_set_strn(dict, "brick-name", SLEN("brick-name"), brickinfo->path); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=brick-name", NULL); goto out; + } } break; case GD_OP_REBALANCE: case GD_OP_DEFRAG_BRICK_VOLUME: brick_req = GF_CALLOC(1, sizeof(*brick_req), gf_gld_mt_mop_brick_req_t); - if (!brick_req) + if (!brick_req) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, + NULL); goto out; + } brick_req->op = GLUSTERD_BRICK_XLATOR_DEFRAG; ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=volname", NULL); goto out; + } ret = glusterd_volinfo_find(volname, &volinfo); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_VOLINFO_GET_FAIL, "Volume=%s", volname, NULL); goto out; + } snprintf(name, sizeof(name), "%s-dht", volname); brick_req->name = gf_strdup(name); @@ -608,8 +641,11 @@ glusterd_brick_op_build_payload(glusterd_op_t op, case GD_OP_BARRIER: brick_req = GF_CALLOC(1, sizeof(*brick_req), gf_gld_mt_mop_brick_req_t); - if (!brick_req) + if (!brick_req) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, + NULL); goto out; + } brick_req->op = GLUSTERD_BRICK_BARRIER; brick_req->name = brickinfo->path; break; @@ -623,8 +659,11 @@ glusterd_brick_op_build_payload(glusterd_op_t op, brick_req->dict.dict_val = NULL; ret = dict_allocate_and_serialize(dict, &brick_req->input.input_val, &brick_req->input.input_len); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); goto out; + } *req = brick_req; ret = 0; @@ -646,13 +685,19 @@ glusterd_node_op_build_payload(glusterd_op_t op, gd1_mgmt_brick_op_req **req, GF_ASSERT(op < GD_OP_MAX); GF_ASSERT(op > GD_OP_NONE); GF_ASSERT(req); + xlator_t *this = NULL; + this = THIS; + GF_ASSERT(this); switch (op) { case GD_OP_PROFILE_VOLUME: brick_req = GF_CALLOC(1, sizeof(*brick_req), gf_gld_mt_mop_brick_req_t); - if (!brick_req) + if (!brick_req) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, + NULL); goto out; + } brick_req->op = GLUSTERD_NODE_PROFILE; brick_req->name = ""; @@ -662,8 +707,11 @@ glusterd_node_op_build_payload(glusterd_op_t op, gd1_mgmt_brick_op_req **req, case GD_OP_STATUS_VOLUME: brick_req = GF_CALLOC(1, sizeof(*brick_req), gf_gld_mt_mop_brick_req_t); - if (!brick_req) + if (!brick_req) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, + NULL); goto out; + } brick_req->op = GLUSTERD_NODE_STATUS; brick_req->name = ""; @@ -674,14 +722,20 @@ glusterd_node_op_build_payload(glusterd_op_t op, gd1_mgmt_brick_op_req **req, case GD_OP_SCRUB_ONDEMAND: brick_req = GF_CALLOC(1, sizeof(*brick_req), gf_gld_mt_mop_brick_req_t); - if (!brick_req) + if (!brick_req) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, + NULL); goto out; + } brick_req->op = GLUSTERD_NODE_BITROT; ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=volname", NULL); goto out; + } brick_req->name = gf_strdup(volname); break; @@ -694,8 +748,11 @@ glusterd_node_op_build_payload(glusterd_op_t op, gd1_mgmt_brick_op_req **req, ret = dict_allocate_and_serialize(dict, &brick_req->input.input_val, &brick_req->input.input_len); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); goto out; + } *req = brick_req; ret = 0; @@ -703,7 +760,7 @@ glusterd_node_op_build_payload(glusterd_op_t op, gd1_mgmt_brick_op_req **req, out: if (ret && brick_req) GF_FREE(brick_req); - gf_msg_debug(THIS->name, 0, "Returning %d", ret); + gf_msg_debug(this->name, 0, "Returning %d", ret); return ret; } @@ -719,12 +776,14 @@ glusterd_validate_quorum_options(xlator_t *this, char *fullkey, char *value, goto out; key = strchr(fullkey, '.'); if (key == NULL) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL); ret = -1; goto out; } key++; opt = xlator_volume_option_get(this, key); if (!opt) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VOLINFO_GET_FAIL, NULL); ret = -1; goto out; } @@ -988,8 +1047,8 @@ glusterd_op_stage_set_volume(dict_t *dict, char **op_errstr) if (check_op_version) { ret = dict_get_uint32(dict, "new-op-version", &new_op_version); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, - "Failed to get new_op_version"); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Key=new-op-version", NULL); goto out; } @@ -1043,8 +1102,8 @@ glusterd_op_stage_set_volume(dict_t *dict, char **op_errstr) ret = dict_get_str_sizen(dict, "volname", &volname); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, - "Unable to get volume name"); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Key=volname", NULL); goto out; } @@ -1069,14 +1128,19 @@ glusterd_op_stage_set_volume(dict_t *dict, char **op_errstr) } val_dict = dict_new(); - if (!val_dict) + if (!val_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); goto out; + } for (count = 1; ret != 1; count++) { keystr_len = sprintf(keystr, "key%d", count); ret = dict_get_strn(dict, keystr, keystr_len, &key); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=%s", keystr, NULL); break; + } keystr_len = sprintf(keystr, "value%d", count); ret = dict_get_strn(dict, keystr, keystr_len, &value); @@ -1603,12 +1667,17 @@ glusterd_op_stage_sync_volume(dict_t *dict, char **op_errstr) 0, }; glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + this = THIS; + GF_ASSERT(this); ret = dict_get_strn(dict, "hostname", SLEN("hostname"), &hostname); if (ret) { snprintf(msg, sizeof(msg), "hostname couldn't be " "retrieved from msg"); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=hostname", NULL); *op_errstr = gf_strdup(msg); goto out; } @@ -1623,6 +1692,8 @@ glusterd_op_stage_sync_volume(dict_t *dict, char **op_errstr) "Volume %s " "does not exist", volname); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VOL_NOT_FOUND, + "Volume=%s", volname, NULL); *op_errstr = gf_strdup(msg); goto out; } @@ -1635,6 +1706,8 @@ glusterd_op_stage_sync_volume(dict_t *dict, char **op_errstr) RCU_READ_UNLOCK; ret = -1; snprintf(msg, sizeof(msg), "%s, is not a friend", hostname); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_PEER_NOT_FOUND, + "Peer_name=%s", hostname, NULL); *op_errstr = gf_strdup(msg); goto out; @@ -1645,6 +1718,8 @@ glusterd_op_stage_sync_volume(dict_t *dict, char **op_errstr) "%s, is not connected at " "the moment", hostname); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_PEER_DISCONNECTED, + "Peer_name=%s", hostname, NULL); *op_errstr = gf_strdup(msg); goto out; } @@ -1685,8 +1760,11 @@ glusterd_op_stage_status_volume(dict_t *dict, char **op_errstr) GF_ASSERT(priv); ret = dict_get_uint32(dict, "cmd", &cmd); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=cmd", NULL); goto out; + } if (cmd & GF_CLI_STATUS_ALL) goto out; @@ -1697,6 +1775,8 @@ glusterd_op_stage_status_volume(dict_t *dict, char **op_errstr) "The cluster is operating at " "version 1. Getting the status of quotad is not " "allowed in this state."); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_QUOTA_GET_STAT_FAIL, + msg, NULL); ret = -1; goto out; } @@ -1708,6 +1788,8 @@ glusterd_op_stage_status_volume(dict_t *dict, char **op_errstr) "version less than %d. Getting the " "status of snapd is not allowed in this state.", GD_OP_VERSION_3_6_0); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_SNAP_STATUS_FAIL, msg, + NULL); ret = -1; goto out; } @@ -1722,17 +1804,23 @@ glusterd_op_stage_status_volume(dict_t *dict, char **op_errstr) ret = glusterd_volinfo_find(volname, &volinfo); if (ret) { snprintf(msg, sizeof(msg), FMTSTR_CHECK_VOL_EXISTS, volname); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VOLINFO_GET_FAIL, + "Volume=%s", volname, NULL); ret = -1; goto out; } ret = glusterd_validate_volume_id(dict, volinfo); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VALIDATE_FAILED, NULL); goto out; + } ret = glusterd_is_volume_started(volinfo); if (!ret) { snprintf(msg, sizeof(msg), "Volume %s is not started", volname); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VOL_NOT_STARTED, + "Volume=%s", volname, NULL); ret = -1; goto out; } @@ -1746,12 +1834,16 @@ glusterd_op_stage_status_volume(dict_t *dict, char **op_errstr) ret = -1; snprintf(msg, sizeof(msg), "Volume %s is not Self-heal compatible", volname); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VOL_SHD_NOT_COMP, + "Volume=%s", volname, NULL); goto out; } if (!shd_enabled) { ret = -1; snprintf(msg, sizeof(msg), "Self-heal Daemon is disabled for volume %s", volname); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_SELF_HEALD_DISABLED, + "Volume=%s", volname, NULL); goto out; } #ifdef BUILD_GNFS @@ -1762,6 +1854,8 @@ glusterd_op_stage_status_volume(dict_t *dict, char **op_errstr) ret = -1; snprintf(msg, sizeof(msg), "NFS server is disabled for volume %s", volname); + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_NFS_GANESHA_DISABLED, "Volume=%s", volname, NULL); goto out; } #endif @@ -1772,6 +1866,8 @@ glusterd_op_stage_status_volume(dict_t *dict, char **op_errstr) "Volume %s does not have " "quota enabled", volname); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_QUOTA_DISABLED, + "Volume=%s", volname, NULL); goto out; } } else if ((cmd & GF_CLI_STATUS_BITD) != 0) { @@ -1781,6 +1877,8 @@ glusterd_op_stage_status_volume(dict_t *dict, char **op_errstr) "Volume %s does not have " "bitrot enabled", volname); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_BITROT_NOT_ENABLED, + "Volume=%s", volname, NULL); goto out; } } else if ((cmd & GF_CLI_STATUS_SCRUB) != 0) { @@ -1791,6 +1889,10 @@ glusterd_op_stage_status_volume(dict_t *dict, char **op_errstr) "bitrot enabled. Scrubber will be enabled " "automatically if bitrot is enabled", volname); + gf_smsg( + this->name, GF_LOG_ERROR, errno, GD_MSG_BITROT_NOT_ENABLED, + "Scrubber will be enabled automatically if bitrot is enabled", + "Volume=%s", volname, NULL); goto out; } } else if ((cmd & GF_CLI_STATUS_SNAPD) != 0) { @@ -1800,12 +1902,17 @@ glusterd_op_stage_status_volume(dict_t *dict, char **op_errstr) "Volume %s does not have " "uss enabled", volname); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_SNAPD_NOT_RUNNING, + "Volume=%s", volname, NULL); goto out; } } else if ((cmd & GF_CLI_STATUS_BRICK) != 0) { ret = dict_get_strn(dict, "brick", SLEN("brick"), &brick); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Key=brick", NULL); goto out; + } ret = glusterd_volume_brickinfo_get_by_brick(brick, volinfo, &brickinfo, _gf_false); @@ -1814,6 +1921,8 @@ glusterd_op_stage_status_volume(dict_t *dict, char **op_errstr) "No brick %s in" " volume %s", brick, volname); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_BRICK_NOT_FOUND, + "Brick=%s, Volume=%s", brick, volname, NULL); ret = -1; goto out; } @@ -2100,8 +2209,10 @@ glusterd_op_reset_all_volume_options(xlator_t *this, dict_t *dict) ret = -1; dup_opt = dict_new(); - if (!dup_opt) + if (!dup_opt) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); goto out; + } if (!all) { dict_copy(conf->opts, dup_opt); dict_del(dup_opt, key); @@ -2112,8 +2223,11 @@ glusterd_op_reset_all_volume_options(xlator_t *this, dict_t *dict) ret = dict_set_strn(dup_opt, GLUSTERD_GLOBAL_OPT_VERSION, SLEN(GLUSTERD_GLOBAL_OPT_VERSION), next_version); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", GLUSTERD_GLOBAL_OPT_VERSION, NULL); goto out; + } ret = glusterd_store_options(this, dup_opt); if (ret) @@ -2124,9 +2238,11 @@ glusterd_op_reset_all_volume_options(xlator_t *this, dict_t *dict) ret = dict_set_dynstrn(conf->opts, GLUSTERD_GLOBAL_OPT_VERSION, SLEN(GLUSTERD_GLOBAL_OPT_VERSION), next_version); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", GLUSTERD_GLOBAL_OPT_VERSION, NULL); goto out; - else + } else next_version = NULL; if (!all) { @@ -2410,8 +2526,11 @@ glusterd_op_set_all_volume_options(xlator_t *this, dict_t *dict, conf = this->private; ret = dict_get_strn(dict, "key1", SLEN("key1"), &key); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=key1", NULL); goto out; + } ret = dict_get_strn(dict, "value1", SLEN("value1"), &value); if (ret) { @@ -2530,12 +2649,17 @@ glusterd_op_set_all_volume_options(xlator_t *this, dict_t *dict, } ret = -1; dup_opt = dict_new(); - if (!dup_opt) + if (!dup_opt) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); goto out; + } dict_copy(conf->opts, dup_opt); ret = dict_set_str(dup_opt, key, value); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; + } ret = glusterd_get_next_global_opt_version_str(conf->opts, &next_version); if (ret) @@ -2543,8 +2667,11 @@ glusterd_op_set_all_volume_options(xlator_t *this, dict_t *dict, ret = dict_set_strn(dup_opt, GLUSTERD_GLOBAL_OPT_VERSION, SLEN(GLUSTERD_GLOBAL_OPT_VERSION), next_version); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", GLUSTERD_GLOBAL_OPT_VERSION, NULL); goto out; + } ret = glusterd_store_options(this, dup_opt); if (ret) @@ -2555,9 +2682,11 @@ glusterd_op_set_all_volume_options(xlator_t *this, dict_t *dict, ret = dict_set_dynstrn(conf->opts, GLUSTERD_GLOBAL_OPT_VERSION, SLEN(GLUSTERD_GLOBAL_OPT_VERSION), next_version); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", GLUSTERD_GLOBAL_OPT_VERSION, NULL); goto out; - else + } else next_version = NULL; dup_value = gf_strdup(value); @@ -2565,9 +2694,11 @@ glusterd_op_set_all_volume_options(xlator_t *this, dict_t *dict, goto out; ret = dict_set_dynstr(conf->opts, key, dup_value); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; - else + } else dup_value = NULL; /* Protect the allocation from GF_FREE */ out: @@ -3002,6 +3133,8 @@ glusterd_op_sync_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict) snprintf(msg, sizeof(msg), "hostname couldn't be " "retrieved from msg"); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=hostname", NULL); *op_errstr = gf_strdup(msg); goto out; } @@ -3026,6 +3159,7 @@ glusterd_op_sync_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict) if (!rsp_dict) { // this should happen only on source + gf_smsg(this->name, GF_LOG_INFO, errno, GD_MSG_INVALID_ARGUMENT, NULL); ret = 0; goto out; } @@ -3582,27 +3716,30 @@ glusterd_op_status_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict) } ret = dict_set_int32n(rsp_dict, "type", SLEN("type"), volinfo->type); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=type", NULL); goto out; + } ret = dict_set_int32n(rsp_dict, "brick-index-max", SLEN("brick-index-max"), brick_index); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, - "Error setting brick-index-max to dict"); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Key=brick-index-max", NULL); goto out; } ret = dict_set_int32n(rsp_dict, "other-count", SLEN("other-count"), other_count); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, - "Error setting other-count to dict"); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Key=other-count", NULL); goto out; } ret = dict_set_int32n(rsp_dict, "count", SLEN("count"), node_count); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, - "Error setting node count to dict"); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Key=count", NULL); goto out; } @@ -4079,8 +4216,10 @@ glusterd_dict_set_volid(dict_t *dict, char *volname, char **op_errstr) this = THIS; GF_ASSERT(this); - if (!dict || !volname) + if (!dict || !volname) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); goto out; + } ret = glusterd_volinfo_find(volname, &volinfo); if (ret) { @@ -5838,13 +5977,8 @@ glusterd_op_stage_validate(glusterd_op_t op, dict_t *dict, char **op_errstr, static void glusterd_wait_for_blockers(glusterd_conf_t *priv) { - uint64_t blockers = GF_ATOMIC_GET(priv->blockers); - - while (blockers) { - synclock_unlock(&priv->big_lock); - sleep(1); - blockers = GF_ATOMIC_GET(priv->blockers); - synclock_lock(&priv->big_lock); + while (GF_ATOMIC_GET(priv->blockers)) { + synccond_wait(&priv->cond_blockers, &priv->big_lock); } } @@ -6479,6 +6613,10 @@ _select_hxlators_for_full_self_heal(xlator_t *this, glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *brickinfo = NULL; int hxl_children = 0; uuid_t candidate = {0}; + int brick_index = 0; + glusterd_peerinfo_t *peerinfo = NULL; + int delta = 0; + uuid_t candidate_max = {0}; if ((*index) == 0) (*index)++; @@ -6490,13 +6628,40 @@ _select_hxlators_for_full_self_heal(xlator_t *this, glusterd_volinfo_t *volinfo, cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) { + if (gf_uuid_compare(brickinfo->uuid, candidate_max) > 0) { + if (!gf_uuid_compare(MY_UUID, brickinfo->uuid)) { + gf_uuid_copy(candidate_max, brickinfo->uuid); + } else { + peerinfo = glusterd_peerinfo_find(brickinfo->uuid, NULL); + if (peerinfo && peerinfo->connected) { + gf_uuid_copy(candidate_max, brickinfo->uuid); + } + } + } + } + + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { if (gf_uuid_is_null(brickinfo->uuid)) (void)glusterd_resolve_brick(brickinfo); - if (gf_uuid_compare(brickinfo->uuid, candidate) > 0) - gf_uuid_copy(candidate, brickinfo->uuid); + delta %= hxl_children; + if ((*index + delta) == (brick_index + hxl_children)) { + if (!gf_uuid_compare(MY_UUID, brickinfo->uuid)) { + gf_uuid_copy(candidate, brickinfo->uuid); + } else { + peerinfo = glusterd_peerinfo_find(brickinfo->uuid, NULL); + if (peerinfo && peerinfo->connected) { + gf_uuid_copy(candidate, brickinfo->uuid); + } else if (peerinfo && + (!gf_uuid_compare(candidate_max, MY_UUID))) { + _add_hxlator_to_dict(dict, volinfo, + ((*index) - 1) / hxl_children, + (*hxlator_count)); + (*hxlator_count)++; + } + } - if ((*index) % hxl_children == 0) { if (!gf_uuid_compare(MY_UUID, candidate)) { _add_hxlator_to_dict(dict, volinfo, ((*index) - 1) / hxl_children, @@ -6504,6 +6669,8 @@ _select_hxlators_for_full_self_heal(xlator_t *this, glusterd_volinfo_t *volinfo, (*hxlator_count)++; } gf_uuid_clear(candidate); + brick_index += hxl_children; + delta++; } (*index)++; diff --git a/xlators/mgmt/glusterd/src/glusterd-peer-utils.c b/xlators/mgmt/glusterd/src/glusterd-peer-utils.c index 82acf5bf03c..18d355cb186 100644 --- a/xlators/mgmt/glusterd/src/glusterd-peer-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-peer-utils.c @@ -367,8 +367,10 @@ glusterd_peerinfo_new(glusterd_friend_sm_state_t state, uuid_t *uuid, GF_ASSERT(conf); new_peer = GF_CALLOC(1, sizeof(*new_peer), gf_gld_mt_peerinfo_t); - if (!new_peer) + if (!new_peer) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); goto out; + } CDS_INIT_LIST_HEAD(&new_peer->uuid_list); @@ -564,12 +566,16 @@ glusterd_peer_hostname_new(const char *hostname, GF_ASSERT(hostname); GF_ASSERT(name); + xlator_t *this = THIS; + GF_ASSERT(this); peer_hostname = GF_CALLOC(1, sizeof(*peer_hostname), gf_gld_mt_peer_hostname_t); - if (!peer_hostname) + if (!peer_hostname) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); goto out; + } peer_hostname->hostname = gf_strdup(hostname); CDS_INIT_LIST_HEAD(&peer_hostname->hostname_list); @@ -900,8 +906,11 @@ gd_add_peer_hostnames_to_dict(glusterd_peerinfo_t *peerinfo, dict_t *dict, { snprintf(key, sizeof(key), "%s.hostname%d", prefix, count); ret = dict_set_dynstr_with_alloc(dict, key, addr->hostname); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; + } count++; } @@ -923,41 +932,61 @@ gd_add_peer_detail_to_dict(glusterd_peerinfo_t *peerinfo, dict_t *friends, int keylen; char *peer_uuid_str = NULL; + xlator_t *this = THIS; + GF_ASSERT(this); GF_ASSERT(peerinfo); GF_ASSERT(friends); peer_uuid_str = gd_peer_uuid_str(peerinfo); keylen = snprintf(key, sizeof(key), "friend%d.uuid", count); ret = dict_set_strn(friends, key, keylen, peer_uuid_str); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s", + key, NULL); goto out; + } keylen = snprintf(key, sizeof(key), "friend%d.hostname", count); ret = dict_set_strn(friends, key, keylen, peerinfo->hostname); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s", + key, NULL); goto out; + } keylen = snprintf(key, sizeof(key), "friend%d.port", count); ret = dict_set_int32n(friends, key, keylen, peerinfo->port); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s", + key, NULL); goto out; + } keylen = snprintf(key, sizeof(key), "friend%d.stateId", count); ret = dict_set_int32n(friends, key, keylen, peerinfo->state.state); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Key=%s in dict", key, NULL); goto out; + } keylen = snprintf(key, sizeof(key), "friend%d.state", count); ret = dict_set_strn( friends, key, keylen, glusterd_friend_sm_state_name_get(peerinfo->state.state)); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "key=%s", + key, NULL); goto out; + } keylen = snprintf(key, sizeof(key), "friend%d.connected", count); ret = dict_set_int32n(friends, key, keylen, (int32_t)peerinfo->connected); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s", + key, NULL); goto out; + } snprintf(key, sizeof(key), "friend%d", count); ret = gd_add_peer_hostnames_to_dict(peerinfo, friends, key); diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.c b/xlators/mgmt/glusterd/src/glusterd-pmap.c index ec5bd1137f1..16ac628ab82 100644 --- a/xlators/mgmt/glusterd/src/glusterd-pmap.c +++ b/xlators/mgmt/glusterd/src/glusterd-pmap.c @@ -433,17 +433,20 @@ __gluster_pmap_portbybrick(rpcsvc_request_t *req) char *brick = NULL; int port = 0; int ret = -1; + xlator_t *this = THIS; + GF_ASSERT(this); ret = xdr_to_generic(req->msg[0], &args, (xdrproc_t)xdr_pmap_port_by_brick_req); if (ret < 0) { req->rpc_err = GARBAGE_ARGS; + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL); goto fail; } brick = args.brick; - port = pmap_registry_search(THIS, brick, GF_PMAP_PORT_BRICKSERVER, + port = pmap_registry_search(this, brick, GF_PMAP_PORT_BRICKSERVER, _gf_false); if (!port) @@ -475,11 +478,14 @@ __gluster_pmap_brickbyport(rpcsvc_request_t *req) 0, }; int ret = -1; + xlator_t *this = THIS; + GF_ASSERT(this); ret = xdr_to_generic(req->msg[0], &args, (xdrproc_t)xdr_pmap_brick_by_port_req); if (ret < 0) { req->rpc_err = GARBAGE_ARGS; + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL); goto fail; } @@ -513,10 +519,13 @@ __gluster_pmap_signin(rpcsvc_request_t *req) }; int ret = -1; glusterd_brickinfo_t *brickinfo = NULL; + xlator_t *this = THIS; + GF_ASSERT(this); ret = xdr_to_generic(req->msg[0], &args, (xdrproc_t)xdr_pmap_signin_req); if (ret < 0) { req->rpc_err = GARBAGE_ARGS; + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL); goto fail; } @@ -570,6 +579,7 @@ __gluster_pmap_signout(rpcsvc_request_t *req) if (ret < 0) { // failed to decode msg; req->rpc_err = GARBAGE_ARGS; + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL); goto fail; } rsp.op_ret = pmap_registry_remove(THIS, args.port, args.brick, diff --git a/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c index d96adcae89e..a05c90d7b10 100644 --- a/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c +++ b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c @@ -114,7 +114,7 @@ glusterd_proc_stop(glusterd_proc_t *proc, int sig, int flags) goto out; synclock_unlock(&conf->big_lock); - sleep(1); + synctask_sleep(1); synclock_lock(&conf->big_lock); if (gf_is_service_running(proc->pidfile, &pid)) { ret = kill(pid, SIGKILL); diff --git a/xlators/mgmt/glusterd/src/glusterd-quota.c b/xlators/mgmt/glusterd/src/glusterd-quota.c index cb2d9c7c384..8370c174ce3 100644 --- a/xlators/mgmt/glusterd/src/glusterd-quota.c +++ b/xlators/mgmt/glusterd/src/glusterd-quota.c @@ -478,8 +478,9 @@ glusterd_stop_all_quota_crawl_service(glusterd_conf_t *priv, if (dir == NULL) return; - GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scratch); - while (entry) { + while ((entry = sys_readdir(dir, scratch))) { + if (gf_irrelevant_entry(entry)) + continue; len = snprintf(pidfile, sizeof(pidfile), "%s/%s", pid_dir, entry->d_name); if ((len >= 0) && (len < sizeof(pidfile))) { @@ -487,8 +488,6 @@ glusterd_stop_all_quota_crawl_service(glusterd_conf_t *priv, _gf_true); sys_unlink(pidfile); } - - GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scratch); } sys_closedir(dir); } @@ -1900,10 +1899,9 @@ glusterd_get_gfid_from_brick(dict_t *dict, glusterd_volinfo_t *volinfo, } ret = sys_lgetxattr(backend_path, GFID_XATTR_KEY, gfid, 16); if (ret < 0) { - gf_msg(this->name, GF_LOG_INFO, errno, GD_MSG_SETXATTR_FAIL, - "Failed to get " - "extended attribute %s for directory %s. ", - GFID_XATTR_KEY, backend_path); + gf_smsg(this->name, GF_LOG_INFO, errno, GD_MSG_GET_XATTR_FAIL, + "Attribute=%s, Directory=%s", GFID_XATTR_KEY, backend_path, + NULL); ret = 0; continue; } diff --git a/xlators/mgmt/glusterd/src/glusterd-quotad-svc.c b/xlators/mgmt/glusterd/src/glusterd-quotad-svc.c index fc0aaddcbe3..f26d832a06d 100644 --- a/xlators/mgmt/glusterd/src/glusterd-quotad-svc.c +++ b/xlators/mgmt/glusterd/src/glusterd-quotad-svc.c @@ -127,8 +127,10 @@ glusterd_quotadsvc_start(glusterd_svc_t *svc, int flags) char *options[] = {svc->name, "--process-name", NULL}; cmdline = dict_new(); - if (!cmdline) + if (!cmdline) { + gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); goto out; + } for (i = 0; options[i]; i++) { ret = snprintf(key, sizeof(key), "arg%d", i); diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c index 4ce20a9e592..458bf168ede 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c +++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c @@ -219,6 +219,9 @@ glusterd_handle_defrag_start(glusterd_volinfo_t *volinfo, char *op_errstr, char valgrind_logfile[PATH_MAX] = { 0, }; + char msg[1024] = { + 0, + }; char *volfileserver = NULL; char *localtime_logging = NULL; @@ -270,12 +273,17 @@ glusterd_handle_defrag_start(glusterd_volinfo_t *volinfo, char *op_errstr, "rebalance"); runinit(&runner); - if (this->ctx->cmd_args.valgrind) { + if (this->ctx->cmd_args.vgtool != _gf_none) { snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-%s-rebalance.log", priv->logdir, volinfo->volname); - runner_add_args(&runner, "valgrind", "--leak-check=full", - "--trace-children=yes", "--track-origins=yes", NULL); + if (this->ctx->cmd_args.vgtool == _gf_memcheck) + runner_add_args(&runner, "valgrind", "--leak-check=full", + "--trace-children=yes", "--track-origins=yes", + NULL); + else + runner_add_args(&runner, "valgrind", "--tool=drd", NULL); + runner_argprintf(&runner, "--log-file=%s", valgrind_logfile); } @@ -316,6 +324,10 @@ glusterd_handle_defrag_start(glusterd_volinfo_t *volinfo, char *op_errstr, runner_add_arg(&runner, "--localtime-logging"); } + snprintf(msg, sizeof(msg), "Starting the rebalance service for volume %s", + volinfo->volname); + runner_log(&runner, this->name, GF_LOG_DEBUG, msg); + ret = runner_run_nowait(&runner); if (ret) { gf_msg_debug("glusterd", 0, "rebalance command failed"); @@ -390,8 +402,10 @@ glusterd_rebalance_rpc_create(glusterd_volinfo_t *volinfo) goto out; options = dict_new(); - if (!options) + if (!options) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); goto out; + } GLUSTERD_GET_DEFRAG_SOCK_FILE(sockfile, volinfo); @@ -497,6 +511,7 @@ __glusterd_handle_defrag_volume(rpcsvc_request_t *req) if (ret < 0) { // failed to decode msg; req->rpc_err = GARBAGE_ARGS; + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL); goto out; } diff --git a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c index 3d13ef95ffd..43c2f4373e0 100644 --- a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c +++ b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c @@ -239,6 +239,8 @@ glusterd_op_stage_replace_brick(dict_t *dict, char **op_errstr, msg[0] = '\0'; } + glusterd_add_peers_to_auth_list(volname); + ret = glusterd_get_dst_brick_info(&dst_brick, volname, op_errstr, &dst_brickinfo, &host, dict, &dup_dstbrick); diff --git a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c index a8e35f32a15..88662e3bbae 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c @@ -183,10 +183,8 @@ glusterd_op_send_cli_response(glusterd_op_t op, int32_t op_ret, ret = dict_allocate_and_serialize(ctx, &rsp.dict.dict_val, &rsp.dict.dict_len); if (ret < 0) - gf_msg(this->name, GF_LOG_ERROR, 0, - GD_MSG_DICT_SERL_LENGTH_GET_FAIL, - "failed to " - "serialize buffer"); + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); else free_ptr = rsp.dict.dict_val; } @@ -1464,6 +1462,7 @@ glusterd_rpc_probe(call_frame_t *frame, xlator_t *this, void *data) dict_t *dict = NULL; if (!frame || !this || !data) { + gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); ret = -1; goto out; } @@ -1473,15 +1472,24 @@ glusterd_rpc_probe(call_frame_t *frame, xlator_t *this, void *data) GF_ASSERT(priv); ret = dict_get_strn(dict, "hostname", SLEN("hostname"), &hostname); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=hostname", NULL); goto out; + } ret = dict_get_int32n(dict, "port", SLEN("port"), &port); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_DEBUG, errno, GD_MSG_DICT_GET_FAILED, + "Key=port", NULL); port = GF_DEFAULT_BASE_PORT; + } ret = dict_get_ptr(dict, "peerinfo", VOID(&peerinfo)); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=peerinfo", NULL); goto out; + } gf_uuid_copy(req.uuid, MY_UUID); req.hostname = gf_strdup(hostname); @@ -1510,6 +1518,7 @@ glusterd_rpc_friend_add(call_frame_t *frame, xlator_t *this, void *data) dict_t *peer_data = NULL; if (!frame || !this || !data) { + gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); ret = -1; goto out; } @@ -1540,6 +1549,8 @@ glusterd_rpc_friend_add(call_frame_t *frame, xlator_t *this, void *data) peer_data = dict_new(); if (!peer_data) { + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_DICT_CREATE_FAIL, + NULL); errno = ENOMEM; goto out; } @@ -1585,8 +1596,11 @@ glusterd_rpc_friend_add(call_frame_t *frame, xlator_t *this, void *data) if (!req.vols.vols_len) { ret = dict_allocate_and_serialize(peer_data, &req.vols.vols_val, &req.vols.vols_len); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); goto out; + } } ret = glusterd_submit_request( @@ -1760,8 +1774,11 @@ glusterd_mgmt_v3_lock_peers(call_frame_t *frame, xlator_t *this, void *data) GF_ASSERT(priv); ret = dict_get_ptr(dict, "peerinfo", VOID(&peerinfo)); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=peerinfo", NULL); goto out; + } // peerinfo should not be in payload dict_deln(dict, "peerinfo", SLEN("peerinfo")); @@ -1771,9 +1788,8 @@ glusterd_mgmt_v3_lock_peers(call_frame_t *frame, xlator_t *this, void *data) ret = dict_allocate_and_serialize(dict, &req.dict.dict_val, &req.dict.dict_len); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SERL_LENGTH_GET_FAIL, - "Failed to serialize dict " - "to request buffer"); + gf_smsg(this->name, GF_LOG_ERROR, 0, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); goto out; } @@ -1797,6 +1813,7 @@ glusterd_mgmt_v3_lock_peers(call_frame_t *frame, xlator_t *this, void *data) } frame->cookie = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); if (!frame->cookie) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); ret = -1; goto out; } @@ -1836,8 +1853,11 @@ glusterd_mgmt_v3_unlock_peers(call_frame_t *frame, xlator_t *this, void *data) GF_ASSERT(priv); ret = dict_get_ptr(dict, "peerinfo", VOID(&peerinfo)); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=peerinfo", NULL); goto out; + } // peerinfo should not be in payload dict_deln(dict, "peerinfo", SLEN("peerinfo")); @@ -1847,9 +1867,8 @@ glusterd_mgmt_v3_unlock_peers(call_frame_t *frame, xlator_t *this, void *data) ret = dict_allocate_and_serialize(dict, &req.dict.dict_val, &req.dict.dict_len); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SERL_LENGTH_GET_FAIL, - "Failed to serialize dict " - "to request buffer"); + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); goto out; } @@ -1873,6 +1892,7 @@ glusterd_mgmt_v3_unlock_peers(call_frame_t *frame, xlator_t *this, void *data) } frame->cookie = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); if (!frame->cookie) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); ret = -1; goto out; } @@ -1954,8 +1974,11 @@ glusterd_stage_op(call_frame_t *frame, xlator_t *this, void *data) GF_ASSERT(priv); ret = dict_get_ptr(dict, "peerinfo", VOID(&peerinfo)); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=peerinfo", NULL); goto out; + } // peerinfo should not be in payload dict_deln(dict, "peerinfo", SLEN("peerinfo")); @@ -1965,9 +1988,8 @@ glusterd_stage_op(call_frame_t *frame, xlator_t *this, void *data) ret = dict_allocate_and_serialize(dict, &req.buf.buf_val, &req.buf.buf_len); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SERL_LENGTH_GET_FAIL, - "Failed to serialize dict " - "to request buffer"); + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); goto out; } /* Sending valid transaction ID to peers */ @@ -1989,6 +2011,7 @@ glusterd_stage_op(call_frame_t *frame, xlator_t *this, void *data) } frame->cookie = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); if (!frame->cookie) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); ret = -1; goto out; } @@ -2030,8 +2053,11 @@ glusterd_commit_op(call_frame_t *frame, xlator_t *this, void *data) GF_ASSERT(priv); ret = dict_get_ptr(dict, "peerinfo", VOID(&peerinfo)); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=peerinfo", NULL); goto out; + } // peerinfo should not be in payload dict_deln(dict, "peerinfo", SLEN("peerinfo")); @@ -2041,9 +2067,8 @@ glusterd_commit_op(call_frame_t *frame, xlator_t *this, void *data) ret = dict_allocate_and_serialize(dict, &req.buf.buf_val, &req.buf.buf_len); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SERL_LENGTH_GET_FAIL, - "Failed to serialize dict to " - "request buffer"); + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); goto out; } /* Sending valid transaction ID to peers */ @@ -2065,6 +2090,7 @@ glusterd_commit_op(call_frame_t *frame, xlator_t *this, void *data) } frame->cookie = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); if (!frame->cookie) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); ret = -1; goto out; } diff --git a/xlators/mgmt/glusterd/src/glusterd-scrub-svc.c b/xlators/mgmt/glusterd/src/glusterd-scrub-svc.c index eab9078eb8e..c49a0eefba5 100644 --- a/xlators/mgmt/glusterd/src/glusterd-scrub-svc.c +++ b/xlators/mgmt/glusterd/src/glusterd-scrub-svc.c @@ -117,8 +117,10 @@ glusterd_scrubsvc_start(glusterd_svc_t *svc, int flags) dict_t *cmdict = NULL; cmdict = dict_new(); - if (!cmdict) + if (!cmdict) { + gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); goto error_return; + } ret = dict_set_str(cmdict, "cmdarg0", "--global-timer-wheel"); if (ret) diff --git a/xlators/mgmt/glusterd/src/glusterd-server-quorum.c b/xlators/mgmt/glusterd/src/glusterd-server-quorum.c index f3781879d99..b0b8a2e4018 100644 --- a/xlators/mgmt/glusterd/src/glusterd-server-quorum.c +++ b/xlators/mgmt/glusterd/src/glusterd-server-quorum.c @@ -89,12 +89,15 @@ glusterd_validate_quorum(xlator_t *this, glusterd_op_t op, dict_t *dict, ret = dict_get_str(dict, "volname", &volname); if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=volname", NULL); ret = 0; goto out; } ret = glusterd_volinfo_find(volname, &volinfo); if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VOLINFO_GET_FAIL, NULL); ret = 0; goto out; } @@ -252,8 +255,11 @@ glusterd_is_volume_in_server_quorum(glusterd_volinfo_t *volinfo) int ret = 0; ret = dict_get_str(volinfo->dict, GLUSTERD_QUORUM_TYPE_KEY, &quorum_type); - if (ret) + if (ret) { + gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=%s", GLUSTERD_QUORUM_TYPE_KEY, NULL); goto out; + } if (strcmp(quorum_type, GLUSTERD_SERVER_QUORUM) == 0) res = _gf_true; @@ -287,8 +293,11 @@ does_gd_meet_server_quorum(xlator_t *this) ret = glusterd_get_quorum_cluster_counts(this, &active_count, &quorum_count); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_QUORUM_CLUSTER_COUNT_GET_FAIL, NULL); goto out; + } if (!does_quorum_meet(active_count, quorum_count)) { goto out; diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c index e106398e697..1c56384a14b 100644 --- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c @@ -155,6 +155,8 @@ glusterd_shdsvc_create_volfile(glusterd_volinfo_t *volinfo) int ret = -1; dict_t *mod_dict = NULL; + xlator_t *this = THIS; + GF_ASSERT(this); glusterd_svc_build_shd_volfile_path(volinfo, filepath, PATH_MAX); if (!glusterd_is_shd_compatible_volume(volinfo)) { @@ -166,28 +168,42 @@ glusterd_shdsvc_create_volfile(glusterd_volinfo_t *volinfo) goto out; } mod_dict = dict_new(); - if (!mod_dict) + if (!mod_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); goto out; + } ret = dict_set_uint32(mod_dict, "cluster.background-self-heal-count", 0); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=cluster.background-self-heal-count", NULL); goto out; + } ret = dict_set_str(mod_dict, "cluster.data-self-heal", "on"); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=cluster.data-self-heal", NULL); goto out; + } ret = dict_set_str(mod_dict, "cluster.metadata-self-heal", "on"); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=cluster.metadata-self-heal", NULL); goto out; + } ret = dict_set_str(mod_dict, "cluster.entry-self-heal", "on"); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=cluster.entry-self-heal", NULL); goto out; + } ret = glusterd_shdsvc_generate_volfile(volinfo, filepath, mod_dict); if (ret) { - gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, "Failed to create volfile"); goto out; } @@ -195,7 +211,7 @@ glusterd_shdsvc_create_volfile(glusterd_volinfo_t *volinfo) out: if (mod_dict) dict_unref(mod_dict); - gf_msg_debug(THIS->name, 0, "Returning %d", ret); + gf_msg_debug(this->name, 0, "Returning %d", ret); return ret; } @@ -270,9 +286,7 @@ glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags) } while (conf->restart_shd) { - synclock_unlock(&conf->big_lock); - sleep(2); - synclock_lock(&conf->big_lock); + synccond_wait(&conf->cond_restart_shd, &conf->big_lock); } conf->restart_shd = _gf_true; shd_restart = _gf_true; @@ -328,8 +342,10 @@ glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags) } } out: - if (shd_restart) + if (shd_restart) { conf->restart_shd = _gf_false; + synccond_broadcast(&conf->cond_restart_shd); + } if (volinfo) glusterd_volinfo_unref(volinfo); if (ret) @@ -346,6 +362,8 @@ glusterd_new_shd_svc_start(glusterd_svc_t *svc, int flags) char glusterd_uuid_option[PATH_MAX] = {0}; char client_pid[32] = {0}; dict_t *cmdline = NULL; + xlator_t *this = THIS; + GF_ASSERT(this); cmdline = dict_new(); if (!cmdline) @@ -362,31 +380,49 @@ glusterd_new_shd_svc_start(glusterd_svc_t *svc, int flags) goto out; ret = dict_set_str(cmdline, "arg", client_pid); - if (ret < 0) + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=arg", NULL); goto out; + } /* Pass cmdline arguments as key-value pair. The key is merely * a carrier and is not used. Since dictionary follows LIFO the value * should be put in reverse order*/ ret = dict_set_str(cmdline, "arg4", svc->name); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=arg4", NULL); goto out; + } ret = dict_set_str(cmdline, "arg3", GD_SHD_PROCESS_NAME); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=arg3", NULL); goto out; + } ret = dict_set_str(cmdline, "arg2", glusterd_uuid_option); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=arg2", NULL); goto out; + } ret = dict_set_str(cmdline, "arg1", "--xlator-option"); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=arg1", NULL); goto out; + } ret = glusterd_svc_start(svc, flags, cmdline); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_GLUSTER_SERVICE_START_FAIL, NULL); goto out; + } ret = glusterd_conn_connect(&(svc->conn)); out: @@ -539,28 +575,45 @@ glusterd_shdsvc_reconfigure(glusterd_volinfo_t *volinfo) goto out; } mod_dict = dict_new(); - if (!mod_dict) + if (!mod_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); goto out; + } ret = dict_set_uint32(mod_dict, "cluster.background-self-heal-count", 0); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=cluster.background-self-heal-count", NULL); goto out; + } ret = dict_set_str(mod_dict, "cluster.data-self-heal", "on"); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=cluster.data-self-heal", NULL); goto out; + } ret = dict_set_str(mod_dict, "cluster.metadata-self-heal", "on"); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=cluster.metadata-self-heal", NULL); goto out; + } ret = dict_set_int32(mod_dict, "graph-check", 1); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=graph-check", NULL); goto out; + } ret = dict_set_str(mod_dict, "cluster.entry-self-heal", "on"); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=cluster.entry-self-heal", NULL); goto out; + } ret = glusterd_volume_svc_check_volfile_identical( "glustershd", mod_dict, volinfo, glusterd_shdsvc_generate_volfile, diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.c b/xlators/mgmt/glusterd/src/glusterd-sm.c index 7b67592e27c..bf2d81b644a 100644 --- a/xlators/mgmt/glusterd/src/glusterd-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-sm.c @@ -146,22 +146,33 @@ glusterd_broadcast_friend_delete(char *hostname, uuid_t uuid) ctx.op = GD_FRIEND_UPDATE_DEL; friends = dict_new(); - if (!friends) + if (!friends) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); goto out; + } keylen = snprintf(key, sizeof(key), "op"); ret = dict_set_int32n(friends, key, keylen, ctx.op); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; + } keylen = snprintf(key, sizeof(key), "hostname"); ret = dict_set_strn(friends, key, keylen, hostname); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; + } ret = dict_set_int32n(friends, "count", SLEN("count"), count); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; + } RCU_READ_LOCK; cds_list_for_each_entry_rcu(peerinfo, &priv->peers, uuid_list) @@ -370,30 +381,45 @@ glusterd_ac_friend_probe(glusterd_friend_sm_event_t *event, void *ctx) peerinfo = glusterd_peerinfo_find(NULL, probe_ctx->hostname); if (peerinfo == NULL) { // We should not reach this state ideally + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_PEER_NOT_FOUND, NULL); ret = -1; goto unlock; } - if (!peerinfo->peer) + if (!peerinfo->peer) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_PEER_ADDRESS_GET_FAIL, + NULL); goto unlock; + } proc = &peerinfo->peer->proctable[GLUSTERD_PROBE_QUERY]; if (proc->fn) { frame = create_frame(this, this->ctx->pool); if (!frame) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_FRAME_CREATE_FAIL, + NULL); goto unlock; } frame->local = ctx; dict = dict_new(); - if (!dict) + if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, + NULL); goto unlock; + } ret = dict_set_strn(dict, "hostname", SLEN("hostname"), probe_ctx->hostname); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=hostname", NULL); goto unlock; + } ret = dict_set_int32n(dict, "port", SLEN("port"), probe_ctx->port); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=port", NULL); goto unlock; + } /* The peerinfo reference being set here is going to be used * only within this critical section, in glusterd_rpc_probe @@ -482,12 +508,17 @@ glusterd_ac_send_friend_remove_req(glusterd_friend_sm_event_t *event, goto unlock; } - if (!peerinfo->peer) + if (!peerinfo->peer) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_PEER_ADDRESS_GET_FAIL, + NULL); goto unlock; + } proc = &peerinfo->peer->proctable[GLUSTERD_FRIEND_REMOVE]; if (proc->fn) { frame = create_frame(this, this->ctx->pool); if (!frame) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_FRAME_CREATE_FAIL, + NULL); goto unlock; } frame->local = data; @@ -556,13 +587,18 @@ glusterd_ac_send_friend_update(glusterd_friend_sm_event_t *event, void *ctx) goto out; } - if (!friends) + if (!friends) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); goto unlock; + } ev_ctx.op = GD_FRIEND_UPDATE_ADD; ret = dict_set_int32n(friends, key, keylen, ev_ctx.op); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto unlock; + } cds_list_for_each_entry_rcu(peerinfo, &priv->peers, uuid_list) { @@ -578,8 +614,11 @@ glusterd_ac_send_friend_update(glusterd_friend_sm_event_t *event, void *ctx) } ret = dict_set_int32n(friends, "count", SLEN("count"), count); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=count", NULL); goto unlock; + } cds_list_for_each_entry_rcu(peerinfo, &priv->peers, uuid_list) { @@ -665,13 +704,18 @@ glusterd_ac_update_friend(glusterd_friend_sm_event_t *event, void *ctx) goto unlock; } - if (!friends) + if (!friends) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); goto out; + } ev_ctx.op = GD_FRIEND_UPDATE_ADD; ret = dict_set_int32n(friends, key, keylen, ev_ctx.op); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto unlock; + } cds_list_for_each_entry_rcu(peerinfo, &priv->peers, uuid_list) { @@ -687,8 +731,11 @@ glusterd_ac_update_friend(glusterd_friend_sm_event_t *event, void *ctx) } ret = dict_set_int32n(friends, "count", SLEN("count"), count); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=count", NULL); goto unlock; + } ret = dict_set_static_ptr(friends, "peerinfo", cur_peerinfo); if (ret) { @@ -1062,6 +1109,7 @@ glusterd_friend_sm_transition_state(uuid_t peerid, char *peername, RCU_READ_LOCK; peerinfo = glusterd_peerinfo_find(peerid, peername); if (!peerinfo) { + gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_PEER_NOT_FOUND, NULL); goto out; } diff --git a/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c b/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c index 3042789916c..d75f249b29e 100644 --- a/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c +++ b/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c @@ -87,8 +87,10 @@ glusterd_snapdsvc_init(void *data) svc = &(volinfo->snapd.svc); ret = snprintf(svc->name, sizeof(svc->name), "%s", snapd_svc_name); - if (ret < 0) + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); goto out; + } notify = glusterd_snapdsvc_rpc_notify; @@ -115,6 +117,7 @@ glusterd_snapdsvc_init(void *data) glusterd_svc_build_snapd_logfile(logfile, logdir, sizeof(logfile)); len = snprintf(volfileid, sizeof(volfileid), "snapd/%s", volinfo->volname); if ((len < 0) || (len >= sizeof(volfileid))) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); ret = -1; goto out; } @@ -301,16 +304,22 @@ glusterd_snapdsvc_start(glusterd_svc_t *svc, int flags) } runinit(&runner); - if (this->ctx->cmd_args.valgrind) { + if (this->ctx->cmd_args.vgtool != _gf_none) { len = snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-snapd.log", svc->proc.logdir); if ((len < 0) || (len >= PATH_MAX)) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); ret = -1; goto out; } - runner_add_args(&runner, "valgrind", "--leak-check=full", - "--trace-children=yes", "--track-origins=yes", NULL); + if (this->ctx->cmd_args.vgtool == _gf_memcheck) + runner_add_args(&runner, "valgrind", "--leak-check=full", + "--trace-children=yes", "--track-origins=yes", + NULL); + else + runner_add_args(&runner, "valgrind", "--tool=drd", NULL); + runner_argprintf(&runner, "--log-file=%s", valgrind_logfile); } diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c index 43735d33fee..995268b796d 100644 --- a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c @@ -282,12 +282,10 @@ glusterd_snap_volinfo_restore(dict_t *dict, dict_t *rsp_dict, new_volinfo->volume_id, sizeof(new_volinfo->volume_id), XATTR_REPLACE); if (ret == -1) { - gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SETXATTR_FAIL, - "Failed to " - "set extended attribute %s on %s. " - "Reason: %s, snap: %s", - GF_XATTR_VOL_ID_KEY, new_brickinfo->path, - strerror(errno), new_volinfo->volname); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_SET_XATTR_FAIL, + "Attribute=%s, Path=%s, Reason=%s, Snap=%s", + GF_XATTR_VOL_ID_KEY, new_brickinfo->path, + strerror(errno), new_volinfo->volname, NULL); goto out; } } @@ -1961,9 +1959,7 @@ glusterd_update_snaps_synctask(void *opaque) synclock_lock(&conf->big_lock); while (conf->restart_bricks) { - synclock_unlock(&conf->big_lock); - sleep(2); - synclock_lock(&conf->big_lock); + synccond_wait(&conf->cond_restart_bricks, &conf->big_lock); } conf->restart_bricks = _gf_true; @@ -2041,8 +2037,9 @@ glusterd_update_snaps_synctask(void *opaque) "Failed to remove snap %s", snap->snapname); goto out; } - if (dict) - dict_unref(dict); + + dict_unref(dict); + dict = NULL; } snprintf(buf, sizeof(buf), "%s.accept_peer_data", prefix); ret = dict_get_int32(peer_data, buf, &val); @@ -2070,6 +2067,7 @@ out: if (dict) dict_unref(dict); conf->restart_bricks = _gf_false; + synccond_broadcast(&conf->cond_restart_bricks); return ret; } @@ -2149,18 +2147,27 @@ glusterd_add_snapd_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict, snprintf(base_key, sizeof(base_key), "brick%d", count); snprintf(key, sizeof(key), "%s.hostname", base_key); ret = dict_set_str(dict, key, "Snapshot Daemon"); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s", + key, NULL); goto out; + } snprintf(key, sizeof(key), "%s.path", base_key); ret = dict_set_dynstr(dict, key, gf_strdup(uuid_utoa(MY_UUID))); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s", + key, NULL); goto out; + } snprintf(key, sizeof(key), "%s.port", base_key); ret = dict_set_int32(dict, key, volinfo->snapd.port); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s", + key, NULL); goto out; + } glusterd_svc_build_snapd_pidfile(volinfo, pidfile, sizeof(pidfile)); @@ -2170,8 +2177,11 @@ glusterd_add_snapd_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict, snprintf(key, sizeof(key), "%s.pid", base_key); ret = dict_set_int32(dict, key, pid); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s", + key, NULL); goto out; + } snprintf(key, sizeof(key), "%s.status", base_key); ret = dict_set_int32(dict, key, brick_online); @@ -2672,8 +2682,10 @@ glusterd_missed_snapinfo_new(glusterd_missed_snap_info **missed_snapinfo) new_missed_snapinfo = GF_CALLOC(1, sizeof(*new_missed_snapinfo), gf_gld_mt_missed_snapinfo_t); - if (!new_missed_snapinfo) + if (!new_missed_snapinfo) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); goto out; + } CDS_INIT_LIST_HEAD(&new_missed_snapinfo->missed_snaps); CDS_INIT_LIST_HEAD(&new_missed_snapinfo->snap_ops); @@ -2701,8 +2713,10 @@ glusterd_missed_snap_op_new(glusterd_snap_op_t **snap_op) new_snap_op = GF_CALLOC(1, sizeof(*new_snap_op), gf_gld_mt_missed_snapinfo_t); - if (!new_snap_op) + if (!new_snap_op) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); goto out; + } new_snap_op->brick_num = -1; new_snap_op->op = -1; @@ -3594,13 +3608,17 @@ glusterd_copy_folder(const char *source, const char *destination) continue; ret = snprintf(src_path, sizeof(src_path), "%s/%s", source, entry->d_name); - if (ret < 0) + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); goto out; + } ret = snprintf(dest_path, sizeof(dest_path), "%s/%s", destination, entry->d_name); - if (ret < 0) + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); goto out; + } ret = glusterd_copy_file(src_path, dest_path); if (ret) { @@ -3756,8 +3774,10 @@ glusterd_copy_quota_files(glusterd_volinfo_t *src_vol, GLUSTERD_GET_VOLUME_DIR(dest_dir, dest_vol, priv); ret = snprintf(src_path, sizeof(src_path), "%s/quota.conf", src_dir); - if (ret < 0) + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); goto out; + } /* quota.conf is not present if quota is not enabled, Hence ignoring * the absence of this file @@ -3770,8 +3790,10 @@ glusterd_copy_quota_files(glusterd_volinfo_t *src_vol, } ret = snprintf(dest_path, sizeof(dest_path), "%s/quota.conf", dest_dir); - if (ret < 0) + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); goto out; + } ret = glusterd_copy_file(src_path, dest_path); if (ret) { @@ -3795,8 +3817,10 @@ glusterd_copy_quota_files(glusterd_volinfo_t *src_vol, } ret = snprintf(dest_path, sizeof(dest_path), "%s/quota.cksum", dest_dir); - if (ret < 0) + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); goto out; + } ret = glusterd_copy_file(src_path, dest_path); if (ret) { @@ -4066,8 +4090,10 @@ glusterd_restore_nfs_ganesha_file(glusterd_volinfo_t *src_vol, ret = snprintf(src_path, sizeof(src_path), "%s/export.%s.conf", snap_dir, snap->snapname); - if (ret < 0) + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); goto out; + } ret = sys_lstat(src_path, &stbuf); if (ret) { @@ -4082,8 +4108,10 @@ glusterd_restore_nfs_ganesha_file(glusterd_volinfo_t *src_vol, ret = snprintf(dest_path, sizeof(dest_path), "%s/export.%s.conf", GANESHA_EXPORT_DIRECTORY, src_vol->volname); - if (ret < 0) + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); goto out; + } ret = glusterd_copy_file(src_path, dest_path); if (ret) diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c index 4703a072294..aeaa8d15214 100644 --- a/xlators/mgmt/glusterd/src/glusterd-snapshot.c +++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c @@ -514,6 +514,7 @@ glusterd_copy_geo_rep_session_files(char *session, glusterd_volinfo_t *snap_vol) ret = snprintf(georep_session_dir, sizeof(georep_session_dir), "%s/%s/%s", priv->workdir, GEOREP, session); if (ret < 0) { /* Negative value is an error */ + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL); goto out; } @@ -521,6 +522,7 @@ glusterd_copy_geo_rep_session_files(char *session, glusterd_volinfo_t *snap_vol) priv->workdir, GLUSTERD_VOL_SNAP_DIR_PREFIX, snap_vol->snapshot->snapname, GEOREP, session); if (ret < 0) { /* Negative value is an error */ + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL); goto out; } @@ -568,12 +570,14 @@ glusterd_copy_geo_rep_session_files(char *session, glusterd_volinfo_t *snap_vol) ret = snprintf(src_path, sizeof(src_path), "%s/%s", georep_session_dir, files[i]->d_name); if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL); goto out; } ret = snprintf(dest_path, sizeof(dest_path), "%s/%s", snap_session_dir, files[i]->d_name); if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL); goto out; } @@ -632,12 +636,14 @@ glusterd_snapshot_backup_vol(glusterd_volinfo_t *volinfo) "%s/" GLUSTERD_TRASH "/vols-%s.deleted", priv->workdir, volinfo->volname); if ((len < 0) || (len >= sizeof(delete_path))) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL); goto out; } len = snprintf(trashdir, sizeof(trashdir), "%s/" GLUSTERD_TRASH, priv->workdir); - if ((len < 0) || (len >= sizeof(delete_path))) { + if ((len < 0) || (len >= sizeof(trashdir))) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL); goto out; } @@ -730,6 +736,7 @@ glusterd_copy_geo_rep_files(glusterd_volinfo_t *origin_vol, * is slave volume. */ if (!origin_vol->gsync_slaves) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_SLAVE, NULL); ret = 0; goto out; } @@ -1418,6 +1425,8 @@ glusterd_handle_snapshot_config(rpcsvc_request_t *req, glusterd_op_t op, &config_command); if (ret) { snprintf(err_str, len, "Failed to get config-command type"); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Key=config-command", NULL); goto out; } @@ -1976,6 +1985,13 @@ glusterd_snap_create_clone_common_prevalidate( "command or use [force] option in " "snapshot create to override this " "behavior."); + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_BRICK_NOT_RUNNING, + "Please run volume status command to see brick " + "status.Please start the stopped brick and then issue " + "snapshot create command or use 'force' option in " + "snapshot create to override this behavior.", + NULL); } else { snprintf(err_str, PATH_MAX, "One or more bricks are not running. " @@ -1984,6 +2000,12 @@ glusterd_snap_create_clone_common_prevalidate( "Please start the stopped brick " "and then issue snapshot clone " "command "); + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_BRICK_NOT_RUNNING, + "Please run snapshot status command to see brick " + "status. Please start the stopped brick and then issue " + "snapshot clone command.", + NULL); } *op_errno = EG_BRCKDWN; ret = -1; @@ -1999,6 +2021,10 @@ glusterd_snap_create_clone_common_prevalidate( if (len < 0) { strcpy(err_str, "<error>"); } + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_BRK_MNTPATH_GET_FAIL, + "Brick_hostname=%s, Brick_path=%s", brickinfo->hostname, + brickinfo->path, NULL); ret = -1; goto out; } @@ -2010,6 +2036,11 @@ glusterd_snap_create_clone_common_prevalidate( "all bricks of %s are thinly " "provisioned LV.", volinfo->volname); + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_SNAPSHOT_NOT_THIN_PROVISIONED, + "Ensure that all bricks of volume are thinly " + "provisioned LV, Volume=%s", + volinfo->volname, NULL); ret = -1; goto out; } @@ -2022,6 +2053,9 @@ glusterd_snap_create_clone_common_prevalidate( "cannot copy the snapshot device " "name (volname: %s, snapname: %s)", volinfo->volname, snapname); + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_SNAP_DEVICE_NAME_GET_FAIL, "Volname=%s, Snapname=%s", + volinfo->volname, snapname, NULL); *loglevel = GF_LOG_WARNING; ret = -1; goto out; @@ -2188,6 +2222,16 @@ glusterd_snapshot_clone_prevalidate(dict_t *dict, char **op_errstr, goto out; } + if (!glusterd_is_volume_started(snap_vol)) { + snprintf(err_str, sizeof(err_str), + "Snapshot %s is " + "not activated", + snap->snapname); + loglevel = GF_LOG_WARNING; + *op_errno = EG_VOLSTP; + goto out; + } + ret = dict_get_bin(dict, "vol1_volid", (void **)&snap_volid); if (ret) { gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, @@ -3211,7 +3255,7 @@ glusterd_snapshot_get_snap_detail(dict_t *dict, glusterd_snap_t *snap, int volcount = 0; char key[32] = ""; /* keyprefix is quite small, up to 16 bytes */ int keylen; - char timestr[64] = ""; + char timestr[GF_TIMESTR_SIZE] = ""; char *value = NULL; glusterd_volinfo_t *snap_vol = NULL; glusterd_volinfo_t *tmp_vol = NULL; @@ -3886,7 +3930,8 @@ glusterd_handle_snapshot_create(rpcsvc_request_t *req, glusterd_op_t op, goto out; } - ret = dict_set_int64(dict, "snap-time", (int64_t)time(&snap_time)); + snap_time = gf_time(); + ret = dict_set_int64(dict, "snap-time", (int64_t)snap_time); if (ret) { gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Unable to set snap-time"); @@ -4451,6 +4496,7 @@ glusterd_add_missed_snaps_to_dict(dict_t *rsp_dict, snap_uuid, snap_vol->volname, brick_number, brickinfo->path, op, GD_MISSED_SNAP_PENDING); if ((len < 0) || (len >= sizeof(missed_snap_entry))) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL); goto out; } @@ -4458,6 +4504,8 @@ glusterd_add_missed_snaps_to_dict(dict_t *rsp_dict, ret = dict_get_int32n(rsp_dict, "missed_snap_count", SLEN("missed_snap_count"), &missed_snap_count); if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Key=missed_snap_count", NULL); /* Initialize the missed_snap_count for the first time */ missed_snap_count = 0; } @@ -4647,7 +4695,7 @@ glusterd_snap_brick_create(glusterd_volinfo_t *snap_volinfo, ret = sys_lsetxattr(brickinfo->path, GF_XATTR_VOL_ID_KEY, snap_volinfo->volume_id, 16, XATTR_REPLACE); if (ret == -1) { - gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_SETXATTR_FAIL, + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_SET_XATTR_FAIL, "Failed to set " "extended attribute %s on %s. Reason: " "%s, snap: %s", @@ -5275,6 +5323,48 @@ glusterd_do_snap_vol(glusterd_volinfo_t *origin_vol, glusterd_snap_t *snap, dict_deln(snap_vol->dict, "features.barrier", SLEN("features.barrier")); gd_update_volume_op_versions(snap_vol); + /* * + * Create the export file from the node where ganesha.enable "on" + * is executed + * */ + if (glusterd_is_ganesha_cluster() && + glusterd_check_ganesha_export(snap_vol)) { + if (is_origin_glusterd(dict)) { + ret = manage_export_config(clonename, "on", NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_EXPORT_FILE_CREATE_FAIL, + "Failed to create" + "export file for NFS-Ganesha\n"); + goto out; + } + } + + ret = dict_set_dynstr_with_alloc(snap_vol->dict, + "features.cache-invalidation", "on"); + ret = gd_ganesha_send_dbus(clonename, "on"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_EXPORT_FILE_CREATE_FAIL, + "Dynamic export addition/deletion failed." + " Please see log file for details. Clone name = %s", + clonename); + goto out; + } + } + if (!glusterd_is_ganesha_cluster() && + glusterd_check_ganesha_export(snap_vol)) { + /* This happens when a snapshot was created when Ganesha was + * enabled globally. Then Ganesha disabled from the cluster. + * In such cases, we will have the volume level option set + * on dict, So we have to disable it as it doesn't make sense + * to keep the option. + */ + + ret = dict_set_dynstr(snap_vol->dict, "ganesha.enable", "off"); + if (ret) + goto out; + } + ret = glusterd_store_volinfo(snap_vol, GLUSTERD_VOLINFO_VER_AC_INCREMENT); if (ret) { gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_SET_FAIL, @@ -5346,8 +5436,31 @@ out: for (i = 0; unsupported_opt[i].key; i++) GF_FREE(unsupported_opt[i].value); - if (snap_vol) + if (snap_vol) { + if (glusterd_is_ganesha_cluster() && + glusterd_check_ganesha_export(snap_vol)) { + if (is_origin_glusterd(dict)) { + ret = manage_export_config(clonename, "on", NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_EXPORT_FILE_CREATE_FAIL, + "Failed to create" + "export file for NFS-Ganesha\n"); + } + } + + ret = gd_ganesha_send_dbus(clonename, "off"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_EXPORT_FILE_CREATE_FAIL, + "Dynamic export addition/deletion failed." + " Please see log file for details. Clone name = %s", + clonename); + } + } + glusterd_snap_volume_remove(rsp_dict, snap_vol, _gf_true, _gf_true); + } snap_vol = NULL; } @@ -5399,6 +5512,8 @@ glusterd_snapshot_activate_deactivate_prevalidate(dict_t *dict, "Snapshot (%s) does not " "exist.", snapname); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_NOT_FOUND, + "Snapname=%s", snapname, NULL); *op_errno = EG_NOSNAP; ret = -1; goto out; @@ -7204,11 +7319,15 @@ glusterd_get_brick_lvm_details(dict_t *rsp_dict, if (token != NULL) { value = gf_strdup(token); if (!value) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED, + "token=%s", token, NULL); ret = -1; goto end; } ret = snprintf(key, sizeof(key), "%s.data", key_prefix); if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, + NULL); goto end; } @@ -7223,11 +7342,15 @@ glusterd_get_brick_lvm_details(dict_t *rsp_dict, if (token != NULL) { value = gf_strdup(token); if (!value) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED, + "token=%s", token, NULL); ret = -1; goto end; } ret = snprintf(key, sizeof(key), "%s.lvsize", key_prefix); if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, + NULL); goto end; } @@ -7287,6 +7410,7 @@ glusterd_get_single_brick_status(char **op_errstr, dict_t *rsp_dict, keylen = snprintf(key, sizeof(key), "%s.brick%d.path", keyprefix, index); if (keylen < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); ret = -1; goto out; } @@ -7294,11 +7418,14 @@ glusterd_get_single_brick_status(char **op_errstr, dict_t *rsp_dict, ret = snprintf(brick_path, sizeof(brick_path), "%s:%s", brickinfo->hostname, brickinfo->path); if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); goto out; } value = gf_strdup(brick_path); if (!value) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED, + "brick_path=%s", brick_path, NULL); ret = -1; goto out; } @@ -7374,6 +7501,8 @@ glusterd_get_single_brick_status(char **op_errstr, dict_t *rsp_dict, index); if (keylen < 0) { ret = -1; + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, + NULL); goto out; } @@ -7459,6 +7588,7 @@ glusterd_get_single_snap_status(char **op_errstr, dict_t *rsp_dict, { keylen = snprintf(key, sizeof(key), "%s.vol%d", keyprefix, volcount); if (keylen < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); ret = -1; goto out; } @@ -7482,6 +7612,7 @@ glusterd_get_single_snap_status(char **op_errstr, dict_t *rsp_dict, } keylen = snprintf(brickkey, sizeof(brickkey), "%s.brickcount", key); if (keylen < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); goto out; } @@ -7496,6 +7627,7 @@ glusterd_get_single_snap_status(char **op_errstr, dict_t *rsp_dict, keylen = snprintf(key, sizeof(key), "%s.volcount", keyprefix); if (keylen < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); ret = -1; goto out; } @@ -7535,6 +7667,7 @@ glusterd_get_each_snap_object_status(char **op_errstr, dict_t *rsp_dict, */ keylen = snprintf(key, sizeof(key), "%s.snapname", keyprefix); if (keylen < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); ret = -1; goto out; } @@ -7556,6 +7689,7 @@ glusterd_get_each_snap_object_status(char **op_errstr, dict_t *rsp_dict, keylen = snprintf(key, sizeof(key), "%s.uuid", keyprefix); if (keylen < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); ret = -1; goto out; } @@ -7639,6 +7773,7 @@ glusterd_get_snap_status_of_volume(char **op_errstr, dict_t *rsp_dict, { ret = snprintf(key, sizeof(key), "status.snap%d.snapname", i); if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); goto out; } @@ -7690,6 +7825,7 @@ glusterd_get_all_snapshot_status(dict_t *dict, char **op_errstr, { ret = snprintf(key, sizeof(key), "status.snap%d.snapname", i); if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); goto out; } @@ -8733,6 +8869,7 @@ glusterd_snapshot_revert_partial_restored_vol(glusterd_volinfo_t *volinfo) "%s/" GLUSTERD_TRASH "/vols-%s.deleted", priv->workdir, volinfo->volname); if ((len < 0) || (len >= sizeof(trash_path))) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); ret = -1; goto out; } @@ -8793,12 +8930,10 @@ glusterd_snapshot_revert_partial_restored_vol(glusterd_volinfo_t *volinfo) snap_vol->volume_id, sizeof(snap_vol->volume_id), XATTR_REPLACE); if (ret == -1) { - gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SETXATTR_FAIL, - "Failed to set extended " - "attribute %s on %s. " - "Reason: %s, snap: %s", - GF_XATTR_VOL_ID_KEY, brickinfo->path, - strerror(errno), snap_vol->volname); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_SET_XATTR_FAIL, + "Attribute=%s, Path=%s, Reason=%s, Snap=%s", + GF_XATTR_VOL_ID_KEY, brickinfo->path, + strerror(errno), snap_vol->volname, NULL); goto out; } } @@ -9178,6 +9313,7 @@ glusterd_handle_snapshot_fn(rpcsvc_request_t *req) ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); if (ret < 0) { req->rpc_err = GARBAGE_ARGS; + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL); goto out; } diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c index 458df8dbd1d..d94dceb10b7 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.c +++ b/xlators/mgmt/glusterd/src/glusterd-store.c @@ -74,7 +74,7 @@ glusterd_replace_slash_with_hyphen(char *str) while (ptr) { *ptr = '-'; - ptr = strchr(str, '/'); + ptr = strchr(ptr, '/'); } } @@ -660,85 +660,72 @@ out: } static int -_storeslaves(dict_t *this, char *key, data_t *value, void *data) -{ - int32_t ret = 0; - gf_store_handle_t *shandle = NULL; - xlator_t *xl = NULL; - - xl = THIS; - GF_ASSERT(xl); - - shandle = (gf_store_handle_t *)data; - - GF_ASSERT(shandle); - GF_ASSERT(shandle->fd > 0); - GF_ASSERT(shandle->path); - GF_ASSERT(key); - GF_ASSERT(value); - GF_ASSERT(value->data); - - gf_msg_debug(xl->name, 0, "Storing in volinfo:key= %s, val=%s", key, - value->data); - - ret = gf_store_save_value(shandle->fd, key, (char *)value->data); - if (ret) { - gf_msg(xl->name, GF_LOG_ERROR, 0, GD_MSG_STORE_HANDLE_WRITE_FAIL, - "Unable to write into store" - " handle for path: %s", - shandle->path); - return -1; - } - return 0; -} - -int -_storeopts(dict_t *this, char *key, data_t *value, void *data) +_storeopts(dict_t *dict_value, char *key, data_t *value, void *data) { int32_t ret = 0; int32_t exists = 0; + int32_t option_len = 0; gf_store_handle_t *shandle = NULL; - xlator_t *xl = NULL; + glusterd_volinfo_data_store_t *dict_data = NULL; + xlator_t *this = NULL; - xl = THIS; - GF_ASSERT(xl); + this = THIS; + GF_ASSERT(this); - shandle = (gf_store_handle_t *)data; + dict_data = (glusterd_volinfo_data_store_t *)data; + shandle = dict_data->shandle; GF_ASSERT(shandle); GF_ASSERT(shandle->fd > 0); - GF_ASSERT(shandle->path); GF_ASSERT(key); GF_ASSERT(value); GF_ASSERT(value->data); - if (is_key_glusterd_hooks_friendly(key)) { - exists = 1; + if (dict_data->key_check == 1) { + if (is_key_glusterd_hooks_friendly(key)) { + exists = 1; - } else { - exists = glusterd_check_option_exists(key, NULL); + } else { + exists = glusterd_check_option_exists(key, NULL); + } } - - if (1 == exists) { - gf_msg_debug(xl->name, 0, - "Storing in volinfo:key= %s, " + if (exists == 1 || dict_data->key_check == 0) { + gf_msg_debug(this->name, 0, + "Storing in buffer for volinfo:key= %s, " "val=%s", key, value->data); - } else { - gf_msg_debug(xl->name, 0, "Discarding:key= %s, val=%s", key, + gf_msg_debug(this->name, 0, "Discarding:key= %s, val=%s", key, value->data); return 0; } - ret = gf_store_save_value(shandle->fd, key, (char *)value->data); - if (ret) { - gf_msg(xl->name, GF_LOG_ERROR, 0, GD_MSG_STORE_HANDLE_WRITE_FAIL, - "Unable to write into store" - " handle for path: %s", - shandle->path); + /* + * The option_len considers the length of the key value + * pair and along with that '=' and '\n', but as value->len + * already considers a NULL at the end of the data, adding + * just 1. + */ + option_len = strlen(key) + value->len + 1; + + if ((VOLINFO_BUFFER_SIZE - dict_data->buffer_len - 1) < option_len) { + ret = gf_store_save_items(shandle->fd, dict_data->buffer); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED, NULL); + return -1; + } + dict_data->buffer_len = 0; + dict_data->buffer[0] = '\0'; + } + ret = snprintf(dict_data->buffer + dict_data->buffer_len, option_len + 1, + "%s=%s\n", key, value->data); + if (ret < 0 || ret > option_len + 1) { + gf_smsg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_COPY_FAIL, NULL); return -1; } + + dict_data->buffer_len += ret; + return 0; } @@ -1013,7 +1000,7 @@ glusterd_store_create_snap_dir(glusterd_snap_t *snap) return ret; } -int32_t +static int32_t glusterd_store_volinfo_write(int fd, glusterd_volinfo_t *volinfo) { int32_t ret = -1; @@ -1021,19 +1008,47 @@ glusterd_store_volinfo_write(int fd, glusterd_volinfo_t *volinfo) GF_ASSERT(fd > 0); GF_ASSERT(volinfo); GF_ASSERT(volinfo->shandle); + xlator_t *this = NULL; + glusterd_volinfo_data_store_t *dict_data = NULL; + + this = THIS; + GF_ASSERT(this); shandle = volinfo->shandle; + + dict_data = GF_CALLOC(1, sizeof(glusterd_volinfo_data_store_t), + gf_gld_mt_volinfo_dict_data_t); + if (dict_data == NULL) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_MEMORY, NULL); + return -1; + } + ret = glusterd_volume_exclude_options_write(fd, volinfo); - if (ret) + if (ret) { goto out; + } + + dict_data->shandle = shandle; + dict_data->key_check = 1; shandle->fd = fd; - dict_foreach(volinfo->dict, _storeopts, shandle); + dict_foreach(volinfo->dict, _storeopts, (void *)dict_data); + + dict_data->key_check = 0; + dict_foreach(volinfo->gsync_slaves, _storeopts, (void *)dict_data); + + if (dict_data->buffer_len > 0) { + ret = gf_store_save_items(fd, dict_data->buffer); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED, NULL); + goto out; + } + } - dict_foreach(volinfo->gsync_slaves, _storeslaves, shandle); shandle->fd = 0; out: - gf_msg_debug(THIS->name, 0, "Returning %d", ret); + GF_FREE(dict_data); + gf_msg_debug(this->name, 0, "Returning %d", ret); return ret; } @@ -1274,14 +1289,6 @@ out: return ret; } -static int -_gd_store_rebalance_dict(dict_t *dict, char *key, data_t *value, void *data) -{ - int fd = *(int *)data; - - return gf_store_save_value(fd, key, value->data); -} - int32_t glusterd_store_node_state_write(int fd, glusterd_volinfo_t *volinfo) { @@ -1289,6 +1296,12 @@ glusterd_store_node_state_write(int fd, glusterd_volinfo_t *volinfo) char buf[PATH_MAX]; char uuid[UUID_SIZE + 1]; uint total_len = 0; + glusterd_volinfo_data_store_t *dict_data = NULL; + gf_store_handle_t shandle; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); GF_ASSERT(fd > 0); GF_ASSERT(volinfo); @@ -1328,14 +1341,33 @@ glusterd_store_node_state_write(int fd, glusterd_volinfo_t *volinfo) } ret = gf_store_save_items(fd, buf); - if (ret) + if (ret) { goto out; + } if (volinfo->rebal.dict) { - dict_foreach(volinfo->rebal.dict, _gd_store_rebalance_dict, &fd); + dict_data = GF_CALLOC(1, sizeof(glusterd_volinfo_data_store_t), + gf_gld_mt_volinfo_dict_data_t); + if (dict_data == NULL) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_MEMORY, NULL); + return -1; + } + dict_data->shandle = &shandle; + shandle.fd = fd; + dict_foreach(volinfo->rebal.dict, _storeopts, (void *)dict_data); + if (dict_data->buffer_len > 0) { + ret = gf_store_save_items(fd, dict_data->buffer); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED, + NULL); + goto out; + ; + } + } } out: - gf_msg_debug(THIS->name, 0, "Returning %d", ret); + GF_FREE(dict_data); + gf_msg_debug(this->name, 0, "Returning %d", ret); return ret; } @@ -1781,8 +1813,9 @@ glusterd_store_delete_snap(glusterd_snap_t *snap) goto out; } - GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scratch); - while (entry) { + while ((entry = sys_readdir(dir, scratch))) { + if (gf_irrelevant_entry(entry)) + continue; len = snprintf(path, PATH_MAX, "%s/%s", delete_path, entry->d_name); if ((len < 0) || (len >= PATH_MAX)) { goto stat_failed; @@ -1812,7 +1845,6 @@ glusterd_store_delete_snap(glusterd_snap_t *snap) ret ? "Failed to remove" : "Removed", entry->d_name); stat_failed: memset(path, 0, sizeof(path)); - GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scratch); } ret = sys_closedir(dir); @@ -2309,7 +2341,7 @@ glusterd_store_retrieve_snapd(glusterd_volinfo_t *volinfo) ret = 0; out: - if (gf_store_iter_destroy(iter)) { + if (gf_store_iter_destroy(&iter)) { gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL, "Failed to destroy store iter"); ret = -1; @@ -2642,6 +2674,13 @@ glusterd_store_retrieve_bricks(glusterd_volinfo_t *volinfo) brick_count++; } + if (gf_store_iter_destroy(&tmpiter)) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL, + "Failed to destroy store iter"); + ret = -1; + goto out; + } + ret = gf_store_iter_new(volinfo->shandle, &tmpiter); if (ret) @@ -2816,13 +2855,13 @@ glusterd_store_retrieve_bricks(glusterd_volinfo_t *volinfo) ret = 0; out: - if (gf_store_iter_destroy(tmpiter)) { + if (gf_store_iter_destroy(&tmpiter)) { gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL, "Failed to destroy store iter"); ret = -1; } - if (gf_store_iter_destroy(iter)) { + if (gf_store_iter_destroy(&iter)) { gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL, "Failed to destroy store iter"); ret = -1; @@ -2955,7 +2994,7 @@ glusterd_store_retrieve_node_state(glusterd_volinfo_t *volinfo) ret = 0; out: - if (gf_store_iter_destroy(iter)) { + if (gf_store_iter_destroy(&iter)) { gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL, "Failed to destroy store iter"); ret = -1; @@ -3231,7 +3270,7 @@ glusterd_store_update_volinfo(glusterd_volinfo_t *volinfo) ret = 0; out: - if (gf_store_iter_destroy(iter)) { + if (gf_store_iter_destroy(&iter)) { gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL, "Failed to destroy store iter"); ret = -1; @@ -3336,20 +3375,6 @@ glusterd_store_set_options_path(glusterd_conf_t *conf, char *path, size_t len) snprintf(path, len, "%s/options", conf->workdir); } -int -_store_global_opts(dict_t *this, char *key, data_t *value, void *data) -{ - gf_store_handle_t *shandle = data; - - if (gf_store_save_value(shandle->fd, key, (char *)value->data)) { - gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_STORE_HANDLE_WRITE_FAIL, - "Unable to write into store handle for key : %s, value %s", key, - (char *)value->data); - } - - return 0; -} - int32_t glusterd_store_options(xlator_t *this, dict_t *opts) { @@ -3358,13 +3383,15 @@ glusterd_store_options(xlator_t *this, dict_t *opts) char path[PATH_MAX] = {0}; int fd = -1; int32_t ret = -1; + glusterd_volinfo_data_store_t *dict_data = NULL; conf = this->private; glusterd_store_set_options_path(conf, path, sizeof(path)); ret = gf_store_handle_new(path, &shandle); - if (ret) + if (ret) { goto out; + } fd = gf_store_mkstemp(shandle); if (fd <= 0) { @@ -3372,15 +3399,30 @@ glusterd_store_options(xlator_t *this, dict_t *opts) goto out; } + dict_data = GF_CALLOC(1, sizeof(glusterd_volinfo_data_store_t), + gf_gld_mt_volinfo_dict_data_t); + if (dict_data == NULL) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_MEMORY, NULL); + return -1; + } + dict_data->shandle = shandle; shandle->fd = fd; - dict_foreach(opts, _store_global_opts, shandle); - shandle->fd = 0; + dict_foreach(opts, _storeopts, (void *)dict_data); + if (dict_data->buffer_len > 0) { + ret = gf_store_save_items(fd, dict_data->buffer); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED, NULL); + goto out; + } + } + ret = gf_store_rename_tmppath(shandle); - if (ret) - goto out; out: - if ((ret < 0) && (fd > 0)) + shandle->fd = 0; + GF_FREE(dict_data); + if ((ret < 0) && (fd > 0)) { gf_store_unlink_tmppath(shandle); + } gf_store_handle_destroy(shandle); return ret; } @@ -3426,7 +3468,7 @@ glusterd_store_retrieve_options(xlator_t *this) goto out; ret = 0; out: - (void)gf_store_iter_destroy(iter); + (void)gf_store_iter_destroy(&iter); gf_store_handle_destroy(shandle); return ret; } @@ -3478,28 +3520,28 @@ glusterd_store_retrieve_volumes(xlator_t *this, glusterd_snap_t *snap) goto out; } - GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scratch); - - while (entry) { + while ((entry = sys_readdir(dir, scratch))) { + if (gf_irrelevant_entry(entry)) + continue; if (snap && ((!strcmp(entry->d_name, "geo-replication")) || (!strcmp(entry->d_name, "info")))) - goto next; + continue; len = snprintf(entry_path, PATH_MAX, "%s/%s", path, entry->d_name); - if ((len < 0) || (len >= PATH_MAX)) { - goto next; - } + if ((len < 0) || (len >= PATH_MAX)) + continue; + ret = sys_lstat(entry_path, &st); if (ret == -1) { gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, "Failed to stat entry %s : %s", path, strerror(errno)); - goto next; + continue; } if (!S_ISDIR(st.st_mode)) { gf_msg_debug(this->name, 0, "%s is not a valid volume", entry->d_name); - goto next; + continue; } volinfo = glusterd_store_retrieve_volume(entry->d_name, snap); @@ -3522,8 +3564,6 @@ glusterd_store_retrieve_volumes(xlator_t *this, glusterd_snap_t *snap) glusterd_store_create_nodestate_sh_on_absence(volinfo); glusterd_store_perform_node_state_store(volinfo); } - next: - GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scratch); } ret = 0; @@ -3878,7 +3918,7 @@ glusterd_store_update_snap(glusterd_snap_t *snap) ret = 0; out: - if (gf_store_iter_destroy(iter)) { + if (gf_store_iter_destroy(&iter)) { gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL, "Failed to destroy store iter"); ret = -1; @@ -4073,9 +4113,9 @@ glusterd_store_retrieve_snaps(xlator_t *this) goto out; } - GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scratch); - - while (entry) { + while ((entry = sys_readdir(dir, scratch))) { + if (gf_irrelevant_entry(entry)) + continue; if (strcmp(entry->d_name, GLUSTERD_MISSED_SNAPS_LIST_FILE)) { ret = glusterd_store_retrieve_snap(entry->d_name); if (ret) { @@ -4084,7 +4124,6 @@ glusterd_store_retrieve_snaps(xlator_t *this) goto out; } } - GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scratch); } /* Retrieve missed_snaps_list */ @@ -4399,7 +4438,7 @@ glusterd_store_create_peer_shandle(glusterd_peerinfo_t *peerinfo) static int32_t glusterd_store_peer_write(int fd, glusterd_peerinfo_t *peerinfo) { - char buf[128]; + char buf[PATH_MAX]; uint total_len = 0; int32_t ret = 0; int32_t i = 1; @@ -4408,7 +4447,7 @@ glusterd_store_peer_write(int fd, glusterd_peerinfo_t *peerinfo) ret = snprintf(buf + total_len, sizeof(buf) - total_len, "%s=%s\n%s=%d\n", GLUSTERD_STORE_KEY_PEER_UUID, uuid_utoa(peerinfo->uuid), GLUSTERD_STORE_KEY_PEER_STATE, peerinfo->state.state); - if (ret < 0 || ret >= sizeof(buf)) { + if (ret < 0 || ret >= sizeof(buf) - total_len) { ret = -1; goto out; } @@ -4419,7 +4458,7 @@ glusterd_store_peer_write(int fd, glusterd_peerinfo_t *peerinfo) ret = snprintf(buf + total_len, sizeof(buf) - total_len, GLUSTERD_STORE_KEY_PEER_HOSTNAME "%d=%s\n", i, hostname->hostname); - if (ret < 0 || ret >= sizeof(buf)) { + if (ret < 0 || ret >= sizeof(buf) - total_len) { ret = -1; goto out; } @@ -4531,11 +4570,9 @@ glusterd_store_retrieve_peers(xlator_t *this) goto out; } - for (;;) { - GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scratch); - if (!entry) { - break; - } + while ((entry = sys_readdir(dir, scratch))) { + if (gf_irrelevant_entry(entry)) + continue; if (gf_uuid_parse(entry->d_name, tmp_uuid) != 0) { gf_log(this->name, GF_LOG_WARNING, "skipping non-peer file %s", entry->d_name); @@ -4623,7 +4660,7 @@ glusterd_store_retrieve_peers(xlator_t *this) is_ok = _gf_true; next: - (void)gf_store_iter_destroy(iter); + (void)gf_store_iter_destroy(&iter); if (!is_ok) { gf_log(this->name, GF_LOG_WARNING, diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h index 04070549678..83f4df0783e 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.h +++ b/xlators/mgmt/glusterd/src/glusterd-store.h @@ -29,7 +29,7 @@ typedef enum glusterd_store_ver_ac_ { } glusterd_volinfo_ver_ac_t; #define UUID_SIZE 36 - +#define VOLINFO_BUFFER_SIZE 4093 #define GLUSTERD_STORE_UUID_KEY "UUID" #define GLUSTERD_STORE_KEY_VOL_TYPE "type" @@ -112,6 +112,19 @@ typedef enum glusterd_store_ver_ac_ { #define GLUSTERD_STORE_KEY_GANESHA_GLOBAL "nfs-ganesha" +/* + * The structure is responsible for handling the parameter for writes into + * the buffer before it is finally written to the file. The writes will be + * of the form of key-value pairs. + */ +struct glusterd_volinfo_data_store_ { + gf_store_handle_t *shandle; /*Contains fd and path of the file */ + int16_t buffer_len; + char key_check; /* flag to check if key is to be validated before write*/ + char buffer[VOLINFO_BUFFER_SIZE]; +}; +typedef struct glusterd_volinfo_data_store_ glusterd_volinfo_data_store_t; + int32_t glusterd_store_volinfo(glusterd_volinfo_t *volinfo, glusterd_volinfo_ver_ac_t ac); diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c index 18990fe365b..ca845903c4f 100644 --- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c +++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c @@ -239,8 +239,10 @@ glusterd_svc_check_topology_identical(char *svc_name, int tmpclean = 0; int tmpfd = -1; - if ((!identical) || (!this) || (!this->private)) + if ((!identical) || (!this) || (!this->private)) { + gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); goto out; + } conf = this->private; GF_VALIDATE_OR_GOTO(this->name, conf, out); @@ -358,8 +360,10 @@ glusterd_volume_svc_check_topology_identical( int tmpclean = 0; int tmpfd = -1; - if ((!identical) || (!this) || (!this->private)) + if ((!identical) || (!this) || (!this->private)) { + gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); goto out; + } conf = this->private; GF_VALIDATE_OR_GOTO(this->name, conf, out); @@ -634,7 +638,9 @@ my_callback(struct rpc_req *req, struct iovec *iov, int count, void *v_frame) conf = this->private; GF_VALIDATE_OR_GOTO(this->name, conf, out); - GF_ATOMIC_DEC(conf->blockers); + if (GF_ATOMIC_DEC(conf->blockers) == 0) { + synccond_broadcast(&conf->cond_blockers); + } STACK_DESTROY(frame->root); out: @@ -722,7 +728,9 @@ out: if (volinfo) glusterd_volinfo_unref(volinfo); - GF_ATOMIC_DEC(conf->blockers); + if (GF_ATOMIC_DEC(conf->blockers) == 0) { + synccond_broadcast(&conf->cond_blockers); + } STACK_DESTROY(frame->root); return 0; } @@ -785,12 +793,16 @@ __glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flags, frame = create_frame(this, this->ctx->pool); if (!frame) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_FRAME_CREATE_FAIL, + NULL); goto *errlbl; } if (op == GLUSTERD_SVC_ATTACH) { dict = dict_new(); if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, + NULL); ret = -ENOMEM; goto *errlbl; } @@ -808,6 +820,7 @@ __glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flags, file_len = stbuf.st_size; volfile_content = GF_MALLOC(file_len + 1, gf_common_mt_char); if (!volfile_content) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); ret = -ENOMEM; goto *errlbl; } @@ -834,10 +847,8 @@ __glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flags, ret = dict_allocate_and_serialize(dict, &brick_req.dict.dict_val, &brick_req.dict.dict_len); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, - GD_MSG_DICT_SERL_LENGTH_GET_FAIL, - "Failed to serialize dict " - "to request buffer"); + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); goto *errlbl; } } @@ -969,7 +980,7 @@ glusterd_attach_svc(glusterd_svc_t *svc, glusterd_volinfo_t *volinfo, int flags) * TBD: see if there's a better way */ synclock_unlock(&conf->big_lock); - sleep(1); + synctask_sleep(1); synclock_lock(&conf->big_lock); } ret = -1; @@ -1023,7 +1034,7 @@ glusterd_detach_svc(glusterd_svc_t *svc, glusterd_volinfo_t *volinfo, int sig) * TBD: see if there's a better way */ synclock_unlock(&conf->big_lock); - sleep(1); + synctask_sleep(1); synclock_lock(&conf->big_lock); } ret = -1; diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c index 99119d69e45..18b3fb13630 100644 --- a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c +++ b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c @@ -162,6 +162,9 @@ glusterd_svc_start(glusterd_svc_t *svc, int flags, dict_t *cmdline) char *localtime_logging = NULL; char *log_level = NULL; char daemon_log_level[30] = {0}; + char msg[1024] = { + 0, + }; int32_t len = 0; this = THIS; @@ -187,7 +190,7 @@ glusterd_svc_start(glusterd_svc_t *svc, int flags, dict_t *cmdline) runinit(&runner); - if (this->ctx->cmd_args.valgrind) { + if (this->ctx->cmd_args.vgtool != _gf_none) { len = snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-%s.log", svc->proc.logdir, svc->name); if ((len < 0) || (len >= PATH_MAX)) { @@ -195,9 +198,13 @@ glusterd_svc_start(glusterd_svc_t *svc, int flags, dict_t *cmdline) goto unlock; } - runner_add_args(&runner, "valgrind", "--leak-check=full", - "--trace-children=yes", "--track-origins=yes", - NULL); + if (this->ctx->cmd_args.vgtool == _gf_memcheck) + runner_add_args(&runner, "valgrind", "--leak-check=full", + "--trace-children=yes", "--track-origins=yes", + NULL); + else + runner_add_args(&runner, "valgrind", "--tool=drd", NULL); + runner_argprintf(&runner, "--log-file=%s", valgrind_logfile); } @@ -226,8 +233,8 @@ glusterd_svc_start(glusterd_svc_t *svc, int flags, dict_t *cmdline) if (cmdline) dict_foreach(cmdline, svc_add_args, (void *)&runner); - gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_SVC_START_SUCCESS, - "Starting %s service", svc->name); + snprintf(msg, sizeof(msg), "Starting %s service", svc->name); + runner_log(&runner, this->name, GF_LOG_DEBUG, msg); if (flags == PROC_START_NO_WAIT) { ret = runner_run_nowait(&runner); diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c index b7039f83885..b73d37ad08e 100644 --- a/xlators/mgmt/glusterd/src/glusterd-syncop.c +++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c @@ -406,8 +406,11 @@ gd_syncop_mgmt_v3_lock(glusterd_op_t op, dict_t *op_ctx, ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val, &req.dict.dict_len); - if (ret) + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); goto out; + } gf_uuid_copy(req.uuid, my_uuid); gf_uuid_copy(req.txn_id, txn_id); @@ -507,8 +510,11 @@ gd_syncop_mgmt_v3_unlock(dict_t *op_ctx, glusterd_peerinfo_t *peerinfo, ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val, &req.dict.dict_len); - if (ret) + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); goto out; + } gf_uuid_copy(req.uuid, my_uuid); gf_uuid_copy(req.txn_id, txn_id); @@ -842,16 +848,21 @@ gd_syncop_mgmt_stage_op(glusterd_peerinfo_t *peerinfo, struct syncargs *args, uuid_t *peerid = NULL; req = GF_CALLOC(1, sizeof(*req), gf_gld_mt_mop_stage_req_t); - if (!req) + if (!req) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); goto out; + } gf_uuid_copy(req->uuid, my_uuid); req->op = op; ret = dict_allocate_and_serialize(dict_out, &req->buf.buf_val, &req->buf.buf_len); - if (ret) + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); goto out; + } GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret); if (ret) @@ -903,6 +914,8 @@ _gd_syncop_brick_op_cbk(struct rpc_req *req, struct iovec *iov, int count, if (rsp.output.output_len) { args->dict = dict_new(); if (!args->dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, + NULL); ret = -1; args->op_errno = ENOMEM; goto out; @@ -910,8 +923,11 @@ _gd_syncop_brick_op_cbk(struct rpc_req *req, struct iovec *iov, int count, ret = dict_unserialize(rsp.output.output_val, rsp.output.output_len, &args->dict); - if (ret < 0) + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_UNSERIALIZE_FAIL, NULL); goto out; + } } args->op_ret = rsp.op_ret; @@ -1152,16 +1168,21 @@ gd_syncop_mgmt_commit_op(glusterd_peerinfo_t *peerinfo, struct syncargs *args, uuid_t *peerid = NULL; req = GF_CALLOC(1, sizeof(*req), gf_gld_mt_mop_commit_req_t); - if (!req) + if (!req) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); goto out; + } gf_uuid_copy(req->uuid, my_uuid); req->op = op; ret = dict_allocate_and_serialize(dict_out, &req->buf.buf_val, &req->buf.buf_len); - if (ret) + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); goto out; + } GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret); if (ret) @@ -1278,8 +1299,10 @@ gd_stage_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, GF_ASSERT(conf); rsp_dict = dict_new(); - if (!rsp_dict) + if (!rsp_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); goto out; + } if ((op == GD_OP_CREATE_VOLUME) || (op == GD_OP_ADD_BRICK) || (op == GD_OP_START_VOLUME)) @@ -1408,6 +1431,7 @@ gd_commit_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, rsp_dict = dict_new(); if (!rsp_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); ret = -1; goto out; } @@ -1464,8 +1488,11 @@ commit_done: if (op == GD_OP_STATUS_VOLUME) { ret = dict_get_uint32(req_dict, "cmd", &cmd); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=cmd", NULL); goto out; + } if (origin_glusterd) { if ((cmd & GF_CLI_STATUS_ALL)) { @@ -1691,10 +1718,12 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, rpc_clnt_t *rpc = NULL; dict_t *rsp_dict = NULL; int32_t cmd = GF_OP_CMD_NONE; + glusterd_volinfo_t *volinfo = NULL; this = THIS; rsp_dict = dict_new(); if (!rsp_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); ret = -1; goto out; } @@ -1722,18 +1751,28 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, cds_list_for_each_entry_safe(pending_node, tmp, &selected, list) { rpc = glusterd_pending_node_get_rpc(pending_node); + /* In the case of rebalance if the rpc object is null, we try to + * create the rpc object. if the rebalance daemon is down, it returns + * -1. otherwise, rpc object will be created and referenced. + */ if (!rpc) { - if (pending_node->type == GD_NODE_REBALANCE) { - ret = 0; - glusterd_defrag_volume_node_rsp(req_dict, NULL, op_ctx); + if (pending_node->type == GD_NODE_REBALANCE && pending_node->node) { + volinfo = pending_node->node; + ret = glusterd_rebalance_rpc_create(volinfo); + if (ret) { + ret = 0; + glusterd_defrag_volume_node_rsp(req_dict, NULL, op_ctx); + goto out; + } else { + rpc = glusterd_defrag_rpc_get(volinfo->rebal.defrag); + } + } else { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RPC_FAILURE, + "Brick Op failed " + "due to rpc failure."); goto out; } - - ret = -1; - gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RPC_FAILURE, - "Brick Op failed " - "due to rpc failure."); - goto out; } ret = gd_syncop_mgmt_brick_op(rpc, pending_node, op, req_dict, op_ctx, @@ -1759,7 +1798,7 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, pending_node = NULL; ret = 0; out: - if (pending_node) + if (pending_node && pending_node->node) glusterd_pending_node_put_rpc(pending_node); if (rsp_dict) diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.h b/xlators/mgmt/glusterd/src/glusterd-syncop.h index ce4a940c7a0..a265f2135c6 100644 --- a/xlators/mgmt/glusterd/src/glusterd-syncop.h +++ b/xlators/mgmt/glusterd/src/glusterd-syncop.h @@ -32,7 +32,7 @@ ret = gd_syncop_submit_request(rpc, req, stb, cookie, prog, procnum, \ cbk, (xdrproc_t)xdrproc); \ if (!ret) \ - synctask_yield(stb->task); \ + synctask_yield(stb->task, NULL); \ else \ gf_asprintf(&stb->errstr, \ "%s failed. Check log file" \ diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index c947e6c926b..90ef2cf4c9c 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -79,6 +79,14 @@ #include <sys/sockio.h> #endif +#ifdef __FreeBSD__ +#include <sys/sysctl.h> +#include <sys/param.h> +#include <sys/queue.h> +#include <libprocstat.h> +#include <libutil.h> +#endif + #define NFS_PROGRAM 100003 #define NFSV3_VERSION 3 @@ -443,6 +451,8 @@ glusterd_submit_request(struct rpc_clnt *rpc, void *req, call_frame_t *frame, if (!iobref) { iobref = iobref_new(); if (!iobref) { + gf_smsg("glusterd", GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + NULL); goto out; } @@ -645,6 +655,7 @@ glusterd_volinfo_new(glusterd_volinfo_t **volinfo) new_volinfo->dict = dict_new(); if (!new_volinfo->dict) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); GF_FREE(new_volinfo); goto out; @@ -652,6 +663,7 @@ glusterd_volinfo_new(glusterd_volinfo_t **volinfo) new_volinfo->gsync_slaves = dict_new(); if (!new_volinfo->gsync_slaves) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); dict_unref(new_volinfo->dict); GF_FREE(new_volinfo); goto out; @@ -659,6 +671,7 @@ glusterd_volinfo_new(glusterd_volinfo_t **volinfo) new_volinfo->gsync_active_slaves = dict_new(); if (!new_volinfo->gsync_active_slaves) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); dict_unref(new_volinfo->dict); dict_unref(new_volinfo->gsync_slaves); GF_FREE(new_volinfo); @@ -675,7 +688,9 @@ glusterd_volinfo_new(glusterd_volinfo_t **volinfo) glusterd_gfproxydsvc_build(&new_volinfo->gfproxyd.svc); glusterd_shdsvc_build(&new_volinfo->shd.svc); + pthread_mutex_init(&new_volinfo->store_volinfo_lock, NULL); pthread_mutex_init(&new_volinfo->reflock, NULL); + *volinfo = glusterd_volinfo_ref(new_volinfo); ret = 0; @@ -956,7 +971,10 @@ glusterd_volinfo_delete(glusterd_volinfo_t *volinfo) glusterd_auth_cleanup(volinfo); glusterd_shd_svcproc_cleanup(&volinfo->shd); + pthread_mutex_destroy(&volinfo->store_volinfo_lock); pthread_mutex_destroy(&volinfo->reflock); + LOCK_DESTROY(&volinfo->lock); + GF_FREE(volinfo); ret = 0; out: @@ -1107,7 +1125,8 @@ glusterd_get_brick_mount_dir(char *brickpath, char *hostname, char *mount_dir) } brick_dir = &brickpath[strlen(mnt_pt)]; - brick_dir++; + if (brick_dir[0] == '/') + brick_dir++; snprintf(mount_dir, VALID_GLUSTERD_PATHMAX, "/%s", brick_dir); } @@ -1352,6 +1371,10 @@ glusterd_validate_and_create_brickpath(glusterd_brickinfo_t *brickinfo, "Reason : %s ", brickinfo->hostname, brickinfo->path, strerror(errno)); + gf_smsg( + "glusterd", GF_LOG_ERROR, errno, GD_MSG_CREATE_BRICK_DIR_FAILED, + "Brick_hostname=%s, Brick_path=%s, Reason=%s", + brickinfo->hostname, brickinfo->path, strerror(errno), NULL); goto out; } } else { @@ -1364,6 +1387,9 @@ glusterd_validate_and_create_brickpath(glusterd_brickinfo_t *brickinfo, "lstat failed on %s. " "Reason : %s", brickinfo->path, strerror(errno)); + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_LSTAT_FAIL, + "Failed on Brick_path=%s, Reason=%s", brickinfo->path, + strerror(errno), NULL); goto out; } @@ -1372,6 +1398,8 @@ glusterd_validate_and_create_brickpath(glusterd_brickinfo_t *brickinfo, "The provided path %s " "which is already present, is not a directory", brickinfo->path); + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED, + "Brick_path=%s", brickinfo->path, NULL); ret = -1; goto out; } @@ -1388,6 +1416,8 @@ glusterd_validate_and_create_brickpath(glusterd_brickinfo_t *brickinfo, "lstat failed on /. " "Reason : %s", strerror(errno)); + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_LSTAT_FAIL, + "Failed on /, Reason=%s", strerror(errno), NULL); goto out; } @@ -1397,6 +1427,9 @@ glusterd_validate_and_create_brickpath(glusterd_brickinfo_t *brickinfo, "lstat failed on %s. " "Reason : %s", parentdir, strerror(errno)); + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_LSTAT_FAIL, + "Failed on parentdir=%s, Reason=%s", parentdir, strerror(errno), + NULL); goto out; } if (strncmp(volname, GLUSTER_SHARED_STORAGE, @@ -1407,6 +1440,8 @@ glusterd_validate_and_create_brickpath(glusterd_brickinfo_t *brickinfo, len = snprintf(msg, sizeof(msg), "Brick isn't allowed to be " "created inside glusterd's working directory."); + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_BRICK_CREATION_FAIL, + NULL); ret = -1; goto out; } @@ -1422,6 +1457,10 @@ glusterd_validate_and_create_brickpath(glusterd_brickinfo_t *brickinfo, "command if you want to override this " "behavior.", brickinfo->hostname, brickinfo->path); + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_BRICK_CREATE_MNTPNT, + "Use 'force' at the end of the command if you want to " + "override this behavior, Brick_hostname=%s, Brick_path=%s", + brickinfo->hostname, brickinfo->path, NULL); ret = -1; goto out; } else if (parent_st.st_dev == root_st.st_dev) { @@ -1435,6 +1474,10 @@ glusterd_validate_and_create_brickpath(glusterd_brickinfo_t *brickinfo, "command if you want to override this " "behavior.", brickinfo->hostname, brickinfo->path); + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_BRICK_CREATE_ROOT, + "Use 'force' at the end of the command if you want to " + "override this behavior, Brick_hostname=%s, Brick_path=%s", + brickinfo->hostname, brickinfo->path, NULL); /* If --wignore-partition flag is used, ignore warnings * related to bricks being on root partition when 'force' @@ -1466,6 +1509,10 @@ glusterd_validate_and_create_brickpath(glusterd_brickinfo_t *brickinfo, ".glusterfs directory for brick %s:%s. " "Reason : %s ", brickinfo->hostname, brickinfo->path, strerror(errno)); + gf_smsg("glusterd", GF_LOG_ERROR, errno, + GD_MSG_CREATE_GLUSTER_DIR_FAILED, + "Brick_hostname=%s, Brick_path=%s, Reason=%s", + brickinfo->hostname, brickinfo->path, strerror(errno), NULL); goto out; } @@ -1608,8 +1655,10 @@ glusterd_volinfo_find_by_volume_id(uuid_t volume_id, glusterd_volinfo_t *voliter = NULL; glusterd_conf_t *priv = NULL; - if (!volume_id) + if (!volume_id) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); return -1; + } this = THIS; priv = this->private; @@ -1881,8 +1930,11 @@ glusterd_brick_connect(glusterd_volinfo_t *volinfo, * connections is too long for unix domain socket connections. */ options = dict_new(); - if (!options) + if (!options) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, + NULL); goto out; + } ret = rpc_transport_unix_options_build(options, socketpath, 600); if (ret) @@ -2025,8 +2077,8 @@ glusterd_volume_start_glusterfs(glusterd_volinfo_t *volinfo, retry: runinit(&runner); - if (this->ctx->cmd_args.valgrind) { - /* Run bricks with valgrind */ + if (this->ctx->cmd_args.vgtool != _gf_none) { + /* Run bricks with valgrind. */ if (volinfo->logdir) { len = snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-%s-%s.log", volinfo->logdir, volinfo->volname, exp_path); @@ -2040,8 +2092,13 @@ retry: goto out; } - runner_add_args(&runner, "valgrind", "--leak-check=full", - "--trace-children=yes", "--track-origins=yes", NULL); + if (this->ctx->cmd_args.vgtool == _gf_memcheck) + runner_add_args(&runner, "valgrind", "--leak-check=full", + "--trace-children=yes", "--track-origins=yes", + NULL); + else + runner_add_args(&runner, "valgrind", "--tool=drd", NULL); + runner_argprintf(&runner, "--log-file=%s", valgrind_logfile); } @@ -2154,7 +2211,7 @@ retry: if (is_brick_mx_enabled()) runner_add_arg(&runner, "--brick-mux"); - runner_log(&runner, "", 0, "Starting GlusterFS"); + runner_log(&runner, "", GF_LOG_DEBUG, "Starting GlusterFS"); brickinfo->port = port; brickinfo->rdma_port = rdma_port; @@ -2163,7 +2220,10 @@ retry: if (wait) { synclock_unlock(&priv->big_lock); + errno = 0; ret = runner_run(&runner); + if (errno != 0) + ret = errno; synclock_lock(&priv->big_lock); if (ret == EADDRINUSE) { @@ -2745,6 +2805,15 @@ glusterd_volume_compute_cksum(glusterd_volinfo_t *volinfo, char *cksum_path, ret = -1; goto out; } + } else if (priv->op_version < GD_OP_VERSION_7_0) { + ret = get_checksum_for_path(filepath, &cksum, priv->op_version); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CKSUM_GET_FAIL, + "unable to get " + "checksum for path: %s", + filepath); + goto out; + } } ret = get_checksum_for_file(fd, &cksum, priv->op_version); @@ -2864,13 +2933,19 @@ glusterd_add_bricks_hname_path_to_dict(dict_t *dict, { ret = snprintf(key, sizeof(key), "%d-hostname", index); ret = dict_set_strn(dict, key, ret, brickinfo->hostname); - if (ret) + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; + } ret = snprintf(key, sizeof(key), "%d-path", index); ret = dict_set_strn(dict, key, ret, brickinfo->path); - if (ret) + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; + } index++; } @@ -2998,11 +3073,16 @@ glusterd_add_volume_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict, goto out; ret = gd_add_vol_snap_details_to_dict(dict, pfx, volinfo); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "vol snap details", NULL); goto out; + } volume_id_str = gf_strdup(uuid_utoa(volinfo->volume_id)); if (!volume_id_str) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED, + "volume id=%s", volinfo->volume_id, NULL); ret = -1; goto out; } @@ -3035,6 +3115,8 @@ glusterd_add_volume_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict, rebalance_id_str = gf_strdup(uuid_utoa(volinfo->rebal.rebalance_id)); if (!rebalance_id_str) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED, + "rebalance_id=%s", volinfo->rebal.rebalance_id, NULL); ret = -1; goto out; } @@ -3186,6 +3268,9 @@ out: GF_FREE(rebalance_id_str); GF_FREE(rb_id_str); + if (key[0] != '\0' && ret != 0) + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); gf_msg_debug(this->name, 0, "Returning with %d", ret); return ret; } @@ -3244,29 +3329,44 @@ glusterd_vol_add_quota_conf_to_dict(glusterd_volinfo_t *volinfo, dict_t *load, snprintf(key, sizeof(key) - 1, "%s.gfid%d", key_prefix, gfid_idx); ret = dict_set_dynstr_with_alloc(load, key, uuid_utoa(buf)); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; + } snprintf(key, sizeof(key) - 1, "%s.gfid-type%d", key_prefix, gfid_idx); ret = dict_set_int8(load, key, type); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; + } } ret = snprintf(key, sizeof(key), "%s.gfid-count", key_prefix); ret = dict_set_int32n(load, key, ret, gfid_idx); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; + } snprintf(key, sizeof(key), "%s.quota-cksum", key_prefix); ret = dict_set_uint32(load, key, volinfo->quota_conf_cksum); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; + } snprintf(key, sizeof(key), "%s.quota-version", key_prefix); ret = dict_set_uint32(load, key, volinfo->quota_conf_version); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; + } ret = 0; out: @@ -3599,8 +3699,11 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count, snprintf(key_prefix, sizeof(key_prefix), "volume%d", count); keylen = snprintf(key, sizeof(key), "%s.name", key_prefix); ret = dict_get_strn(peer_data, key, keylen, &volname); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=%s", key, NULL); goto out; + } ret = glusterd_volinfo_find(volname, &volinfo); if (ret) { @@ -3617,8 +3720,11 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count, keylen = snprintf(key, sizeof(key), "%s.version", key_prefix); ret = dict_get_int32n(peer_data, key, keylen, &version); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=%s", key, NULL); goto out; + } if (version > volinfo->version) { // Mismatch detected @@ -3626,6 +3732,7 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count, "Version of volume %s differ. local version = %d, " "remote version = %d on peer %s", volinfo->volname, volinfo->version, version, hostname); + GF_ATOMIC_INIT(volinfo->volpeerupdate, 1); *status = GLUSTERD_VOL_COMP_UPDATE_REQ; goto out; } else if (version < volinfo->version) { @@ -3637,8 +3744,11 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count, // snprintf(key, sizeof(key), "%s.ckusm", key_prefix); ret = dict_get_uint32(peer_data, key, &cksum); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=%s", key, NULL); goto out; + } if (cksum != volinfo->cksum) { gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CKSUM_VERS_MISMATCH, @@ -4164,8 +4274,11 @@ glusterd_import_quota_conf(dict_t *peer_data, int vol_idx, keylen = snprintf(key, sizeof(key), "%s.gfid-count", key_prefix); ret = dict_get_int32n(peer_data, key, keylen, &gfid_count); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=%s", key, NULL); goto out; + } ret = glusterd_quota_conf_write_header(fd); if (ret) @@ -4175,8 +4288,11 @@ glusterd_import_quota_conf(dict_t *peer_data, int vol_idx, keylen = snprintf(key, sizeof(key) - 1, "%s.gfid%d", key_prefix, gfid_idx); ret = dict_get_strn(peer_data, key, keylen, &gfid_str); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=%s", key, NULL); goto out; + } snprintf(key, sizeof(key) - 1, "%s.gfid-type%d", key_prefix, gfid_idx); ret = dict_get_int8(peer_data, key, &gfid_type); @@ -4237,18 +4353,23 @@ gd_import_friend_volume_rebal_dict(dict_t *dict, int count, GF_ASSERT(dict); GF_ASSERT(volinfo); + xlator_t *this = THIS; + GF_ASSERT(this); snprintf(key_prefix, sizeof(key_prefix), "volume%d", count); ret = snprintf(key, sizeof(key), "%s.rebal-dict-count", key_prefix); ret = dict_get_int32n(dict, key, ret, &dict_count); if (ret) { /* Older peers will not have this dict */ + gf_smsg(this->name, GF_LOG_INFO, errno, GD_MSG_DICT_GET_FAILED, + "Key=%s", key, NULL); ret = 0; goto out; } volinfo->rebal.dict = dict_new(); if (!volinfo->rebal.dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); ret = -1; goto out; } @@ -4258,7 +4379,7 @@ gd_import_friend_volume_rebal_dict(dict_t *dict, int count, out: if (ret && volinfo->rebal.dict) dict_unref(volinfo->rebal.dict); - gf_msg_debug(THIS->name, 0, "Returning with %d", ret); + gf_msg_debug(this->name, 0, "Returning with %d", ret); return ret; } @@ -4746,7 +4867,7 @@ glusterd_volinfo_stop_stale_bricks(glusterd_volinfo_t *new_volinfo, * brick multiplexing enabled, then stop the brick process */ if (ret || (new_brickinfo->snap_status == -1) || - is_brick_mx_enabled()) { + GF_ATOMIC_GET(old_volinfo->volpeerupdate)) { /*TODO: may need to switch to 'atomic' flavour of * brick_stop, once we make peer rpc program also * synctask enabled*/ @@ -4915,8 +5036,15 @@ glusterd_import_friend_volume(dict_t *peer_data, int count) ret = snprintf(key, sizeof(key), "volume%d.update", count); ret = dict_get_int32n(peer_data, key, ret, &update); - if (ret || !update) { + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=%s", key, NULL); + goto out; + } + + if (!update) { /* if update is 0 that means the volume is not imported */ + gf_smsg(this->name, GF_LOG_INFO, 0, GD_MSG_VOLUME_NOT_IMPORTED, NULL); goto out; } @@ -5020,18 +5148,25 @@ glusterd_import_friend_volumes_synctask(void *opaque) goto out; peer_data = dict_new(); - if (!peer_data) + if (!peer_data) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); goto out; + } ret = dict_unserialize(arg->dict_buf, arg->dictlen, &peer_data); if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_UNSERIALIZE_FAIL, + NULL); errno = ENOMEM; goto out; } ret = dict_get_int32n(peer_data, "count", SLEN("count"), &count); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=count", NULL); goto out; + } synclock_lock(&conf->big_lock); @@ -5040,22 +5175,22 @@ glusterd_import_friend_volumes_synctask(void *opaque) * restarted (refer glusterd_restart_bricks ()) */ while (conf->restart_bricks) { - synclock_unlock(&conf->big_lock); - sleep(2); - synclock_lock(&conf->big_lock); + synccond_wait(&conf->cond_restart_bricks, &conf->big_lock); } conf->restart_bricks = _gf_true; while (i <= count) { ret = glusterd_import_friend_volume(peer_data, i); if (ret) { - conf->restart_bricks = _gf_false; - goto out; + break; } i++; } - glusterd_svcs_manager(NULL); + if (i > count) { + glusterd_svcs_manager(NULL); + } conf->restart_bricks = _gf_false; + synccond_broadcast(&conf->cond_restart_bricks); out: if (peer_data) dict_unref(peer_data); @@ -5079,8 +5214,11 @@ glusterd_import_friend_volumes(dict_t *peer_data) GF_ASSERT(peer_data); ret = dict_get_int32n(peer_data, "count", SLEN("count"), &count); - if (ret) + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=count", NULL); goto out; + } while (i <= count) { ret = glusterd_import_friend_volume(peer_data, i); @@ -5099,11 +5237,16 @@ glusterd_get_global_server_quorum_ratio(dict_t *opts, double *quorum) { int ret = -1; char *quorum_str = NULL; + xlator_t *this = THIS; + GF_ASSERT(this); ret = dict_get_strn(opts, GLUSTERD_QUORUM_RATIO_KEY, SLEN(GLUSTERD_QUORUM_RATIO_KEY), &quorum_str); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=%s", GLUSTERD_QUORUM_RATIO_KEY, NULL); goto out; + } ret = gf_string2percent(quorum_str, quorum); if (ret) @@ -5118,11 +5261,16 @@ glusterd_get_global_opt_version(dict_t *opts, uint32_t *version) { int ret = -1; char *version_str = NULL; + xlator_t *this = THIS; + GF_ASSERT(this); ret = dict_get_strn(opts, GLUSTERD_GLOBAL_OPT_VERSION, SLEN(GLUSTERD_GLOBAL_OPT_VERSION), &version_str); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=%s", GLUSTERD_GLOBAL_OPT_VERSION, NULL); goto out; + } ret = gf_string2uint(version_str, version); if (ret) @@ -5171,13 +5319,17 @@ glusterd_import_global_opts(dict_t *friend_data) SLEN("global-opt-count"), &count); if (ret) { // old version peer + gf_smsg(this->name, GF_LOG_INFO, errno, GD_MSG_DICT_GET_FAILED, + "Key=global-opt-count", NULL); ret = 0; goto out; } import_options = dict_new(); - if (!import_options) + if (!import_options) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); goto out; + } ret = import_prdict_dict(friend_data, import_options, "key", "val", count, "global"); if (ret) { @@ -5248,8 +5400,11 @@ glusterd_compare_friend_data(dict_t *peer_data, int32_t *status, char *hostname) } ret = dict_get_int32n(peer_data, "count", SLEN("count"), &count); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=count", NULL); goto out; + } while (i <= count) { ret = glusterd_compare_friend_volume(peer_data, i, status, hostname); @@ -5517,13 +5672,19 @@ glusterd_add_node_to_dict(char *server, dict_t *dict, int count, else if (!strcmp(server, priv->scrub_svc.name)) ret = dict_set_nstrn(dict, key, keylen, "Scrubber Daemon", SLEN("Scrubber Daemon")); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; + } keylen = snprintf(key, sizeof(key), "brick%d.path", count); ret = dict_set_dynstrn(dict, key, keylen, gf_strdup(uuid_utoa(MY_UUID))); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; + } #ifdef BUILD_GNFS /* Port is available only for the NFS server. @@ -5534,26 +5695,38 @@ glusterd_add_node_to_dict(char *server, dict_t *dict, int count, if (dict_getn(vol_opts, "nfs.port", SLEN("nfs.port"))) { ret = dict_get_int32n(vol_opts, "nfs.port", SLEN("nfs.port"), &port); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=nfs.port", NULL); goto out; + } } else port = GF_NFS3_PORT; } #endif keylen = snprintf(key, sizeof(key), "brick%d.port", count); ret = dict_set_int32n(dict, key, keylen, port); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; + } keylen = snprintf(key, sizeof(key), "brick%d.pid", count); ret = dict_set_int32n(dict, key, keylen, pid); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; + } keylen = snprintf(key, sizeof(key), "brick%d.status", count); ret = dict_set_int32n(dict, key, keylen, running); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; + } out: gf_msg_debug(THIS->name, 0, "Returning %d", ret); @@ -5735,7 +5908,9 @@ my_callback(struct rpc_req *req, struct iovec *iov, int count, void *v_frame) call_frame_t *frame = v_frame; glusterd_conf_t *conf = frame->this->private; - GF_ATOMIC_DEC(conf->blockers); + if (GF_ATOMIC_DEC(conf->blockers) == 0) { + synccond_broadcast(&conf->cond_blockers); + } STACK_DESTROY(frame->root); return 0; @@ -5837,7 +6012,9 @@ attach_brick_callback(struct rpc_req *req, struct iovec *iov, int count, } } out: - GF_ATOMIC_DEC(conf->blockers); + if (GF_ATOMIC_DEC(conf->blockers) == 0) { + synccond_broadcast(&conf->cond_blockers); + } STACK_DESTROY(frame->root); return 0; } @@ -5893,12 +6070,15 @@ send_attach_req(xlator_t *this, struct rpc_clnt *rpc, char *path, iobref = iobref_new(); if (!iobref) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); goto *errlbl; } errlbl = &&free_iobref; frame = create_frame(this, this->ctx->pool); if (!frame) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_FRAME_CREATE_FAIL, + NULL); goto *errlbl; } @@ -5929,7 +6109,6 @@ send_attach_req(xlator_t *this, struct rpc_clnt *rpc, char *path, GF_ATOMIC_INC(conf->blockers); ret = rpc_clnt_submit(rpc, &gd_brick_prog, op, cbkfn, &iov, 1, NULL, 0, iobref, frame, NULL, 0, NULL, 0, NULL); - return ret; free_iobref: iobref_unref(iobref); @@ -5938,7 +6117,7 @@ maybe_free_iobuf: iobuf_unref(iobuf); } err: - return -1; + return ret; } extern size_t @@ -6022,7 +6201,7 @@ attach_brick(xlator_t *this, glusterd_brickinfo_t *brickinfo, * TBD: see if there's a better way */ synclock_unlock(&conf->big_lock); - sleep(1); + synctask_sleep(1); synclock_lock(&conf->big_lock); } @@ -6162,7 +6341,7 @@ find_compat_brick_in_vol(glusterd_conf_t *conf, "brick %s is still" " starting, waiting for 2 seconds ", other_brick->path); - sleep(2); + synctask_sleep(2); synclock_lock(&conf->big_lock); retries--; } @@ -6266,7 +6445,6 @@ find_compatible_brick(glusterd_conf_t *conf, glusterd_volinfo_t *volinfo, int glusterd_get_sock_from_brick_pid(int pid, char *sockpath, size_t len) { - char fname[128] = ""; char buf[1024] = ""; char cmdline[2048] = ""; xlator_t *this = NULL; @@ -6281,6 +6459,22 @@ glusterd_get_sock_from_brick_pid(int pid, char *sockpath, size_t len) this = THIS; GF_ASSERT(this); +#ifdef __FreeBSD__ + blen = sizeof(buf); + int mib[4]; + + mib[0] = CTL_KERN; + mib[1] = KERN_PROC; + mib[2] = KERN_PROC_ARGS; + mib[3] = pid; + + if (sys_sysctl(mib, 4, buf, &blen, NULL, blen) != 0) { + gf_log(this->name, GF_LOG_ERROR, "brick process %d is not running", + pid); + return ret; + } +#else + char fname[128] = ""; snprintf(fname, sizeof(fname), "/proc/%d/cmdline", pid); if (sys_access(fname, R_OK) != 0) { @@ -6297,6 +6491,7 @@ glusterd_get_sock_from_brick_pid(int pid, char *sockpath, size_t len) strerror(errno), fname); return ret; } +#endif /* convert cmdline to single string */ for (i = 0, j = 0; i < blen; i++) { @@ -6345,6 +6540,43 @@ glusterd_get_sock_from_brick_pid(int pid, char *sockpath, size_t len) char * search_brick_path_from_proc(pid_t brick_pid, char *brickpath) { + char *brick_path = NULL; +#ifdef __FreeBSD__ + struct filestat *fst; + struct procstat *ps; + struct kinfo_proc *kp; + struct filestat_list *head; + + ps = procstat_open_sysctl(); + if (ps == NULL) + goto out; + + kp = kinfo_getproc(brick_pid); + if (kp == NULL) + goto out; + + head = procstat_getfiles(ps, (void *)kp, 0); + if (head == NULL) + goto out; + + STAILQ_FOREACH(fst, head, next) + { + if (fst->fs_fd < 0) + continue; + + if (!strcmp(fst->fs_path, brickpath)) { + brick_path = gf_strdup(fst->fs_path); + break; + } + } + +out: + if (head != NULL) + procstat_freefiles(ps, head); + if (kp != NULL) + free(kp); + procstat_close(ps); +#else struct dirent *dp = NULL; DIR *dirp = NULL; size_t len = 0; @@ -6355,7 +6587,6 @@ search_brick_path_from_proc(pid_t brick_pid, char *brickpath) 0, }, }; - char *brick_path = NULL; if (!brickpath) goto out; @@ -6391,7 +6622,9 @@ search_brick_path_from_proc(pid_t brick_pid, char *brickpath) } } out: - sys_closedir(dirp); + if (dirp) + sys_closedir(dirp); +#endif return brick_path; } @@ -6419,8 +6652,10 @@ glusterd_brick_start(glusterd_volinfo_t *volinfo, GF_ASSERT(this); conf = this->private; - if ((!brickinfo) || (!volinfo)) + if ((!brickinfo) || (!volinfo)) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); goto out; + } if (gf_uuid_is_null(brickinfo->uuid)) { ret = glusterd_resolve_brick(brickinfo); @@ -6445,7 +6680,8 @@ glusterd_brick_start(glusterd_volinfo_t *volinfo, * three different triggers for an attempt to start the brick process * due to the quorum handling code in glusterd_friend_sm. */ - if (brickinfo->status == GF_BRICK_STARTING || brickinfo->start_triggered) { + if (brickinfo->status == GF_BRICK_STARTING || brickinfo->start_triggered || + GF_ATOMIC_GET(volinfo->volpeerupdate)) { gf_msg_debug(this->name, 0, "brick %s is already in starting " "phase", @@ -6651,9 +6887,7 @@ glusterd_restart_bricks(void *opaque) * glusterd_compare_friend_data ()) */ while (conf->restart_bricks) { - synclock_unlock(&conf->big_lock); - sleep(2); - synclock_lock(&conf->big_lock); + synccond_wait(&conf->cond_restart_bricks, &conf->big_lock); } conf->restart_bricks = _gf_true; @@ -6767,9 +7001,12 @@ glusterd_restart_bricks(void *opaque) ret = 0; out: - GF_ATOMIC_DEC(conf->blockers); conf->restart_done = _gf_true; conf->restart_bricks = _gf_false; + if (GF_ATOMIC_DEC(conf->blockers) == 0) { + synccond_broadcast(&conf->cond_blockers); + } + synccond_broadcast(&conf->cond_restart_bricks); return_block: return ret; @@ -7107,22 +7344,26 @@ glusterd_get_brick_root(char *path, char **mount_point) char *mnt_pt = NULL; struct stat brickstat = {0}; struct stat buf = {0}; + xlator_t *this = THIS; + GF_ASSERT(this); - if (!path) + if (!path) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); goto err; + } mnt_pt = gf_strdup(path); - if (!mnt_pt) + if (!mnt_pt) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); goto err; + } if (sys_stat(mnt_pt, &brickstat)) goto err; while ((ptr = strrchr(mnt_pt, '/')) && ptr != mnt_pt) { *ptr = '\0'; if (sys_stat(mnt_pt, &buf)) { - gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, - "error in " - "stat: %s", - strerror(errno)); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "Error in stat=%s", strerror(errno), NULL); goto err; } @@ -7134,10 +7375,8 @@ glusterd_get_brick_root(char *path, char **mount_point) if (ptr == mnt_pt) { if (sys_stat("/", &buf)) { - gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, - "error in " - "stat: %s", - strerror(errno)); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "Error in stat=%s", strerror(errno), NULL); goto err; } if (brickstat.st_dev == buf.st_dev) @@ -7202,11 +7441,16 @@ glusterd_add_inode_size_to_dict(dict_t *dict, int count) }; struct fs_info *fs = NULL; static dict_t *cached_fs = NULL; + xlator_t *this = THIS; + GF_ASSERT(this); ret = snprintf(key, sizeof(key), "brick%d.device", count); ret = dict_get_strn(dict, key, ret, &device); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=%s", key, NULL); goto out; + } if (cached_fs) { if (dict_get_str(cached_fs, device, &cur_word) == 0) { @@ -7218,8 +7462,11 @@ glusterd_add_inode_size_to_dict(dict_t *dict, int count) ret = snprintf(key, sizeof(key), "brick%d.fs_name", count); ret = dict_get_strn(dict, key, ret, &fs_name); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=%s", key, NULL); goto out; + } runinit(&runner); runner_redir(&runner, STDOUT_FILENO, RUN_PIPE); @@ -7228,11 +7475,9 @@ glusterd_add_inode_size_to_dict(dict_t *dict, int count) if (strcmp(fs_name, fs->fs_type_name) == 0) { if (!fs->fs_tool_name) { /* dynamic inodes */ - gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_INODE_SIZE_GET_FAIL, - "the " - "brick on %s (%s) uses dynamic inode " - "sizes", - device, fs_name); + gf_smsg(this->name, GF_LOG_INFO, 0, GD_MSG_INODE_SIZE_GET_FAIL, + "The brick on device uses dynamic inode sizes", + "Device=%s (%s)", device, fs_name, NULL); cur_word = "N/A"; goto cached; } @@ -7246,19 +7491,17 @@ glusterd_add_inode_size_to_dict(dict_t *dict, int count) runner_add_arg(&runner, fs->fs_tool_arg); runner_add_arg(&runner, device); } else { - gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_INODE_SIZE_GET_FAIL, - "could not find %s to get" - "inode size for %s (%s): %s package missing?", - fs->fs_tool_name, device, fs_name, fs->fs_tool_pkg); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_INODE_SIZE_GET_FAIL, + "Could not find tool to get inode size for device", "Tool=%s", + fs->fs_tool_name, "Device=%s (%s)", device, fs_name, + "Missing package=%s ?", fs->fs_tool_pkg, NULL); goto out; } ret = runner_start(&runner); if (ret) { - gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_CMD_EXEC_FAIL, - "failed to execute " - "\"%s\"", - fs->fs_tool_name); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_CMD_EXEC_FAIL, + "Failed to execute \"%s\"", fs->fs_tool_name, NULL); /* * Runner_start might return an error after the child has * been forked, e.g. if the program isn't there. In that @@ -7286,21 +7529,22 @@ glusterd_add_inode_size_to_dict(dict_t *dict, int count) ret = runner_end(&runner); if (ret) { - gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_CMD_EXEC_FAIL, - "%s exited with non-zero exit status", fs->fs_tool_name); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_CMD_EXEC_FAIL, + "Tool exited with non-zero exit status", "Tool=%s", + fs->fs_tool_name, NULL); goto out; } if (!cur_word) { ret = -1; - gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_INODE_SIZE_GET_FAIL, - "Unable to retrieve inode size using %s", fs->fs_tool_name); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_INODE_SIZE_GET_FAIL, + "Using Tool=%s", fs->fs_tool_name, NULL); goto out; } if (dict_set_dynstr_with_alloc(cached_fs, device, cur_word)) { /* not fatal if not entered into the cache */ - gf_msg_debug(THIS->name, 0, "failed to cache fs inode size for %s", + gf_msg_debug(this->name, 0, "failed to cache fs inode size for %s", device); } @@ -7311,8 +7555,7 @@ cached: out: if (ret) - gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_INODE_SIZE_GET_FAIL, - "failed to get inode size"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INODE_SIZE_GET_FAIL, NULL); return ret; } @@ -7368,16 +7611,23 @@ glusterd_add_brick_mount_details(glusterd_brickinfo_t *brickinfo, dict_t *dict, struct mntent save_entry = {0}; char *mnt_pt = NULL; struct mntent *entry = NULL; + xlator_t *this = THIS; + GF_ASSERT(this); snprintf(base_key, sizeof(base_key), "brick%d", count); ret = glusterd_get_brick_root(brickinfo->path, &mnt_pt); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_BRICKPATH_ROOT_GET_FAIL, + NULL); goto out; + } entry = glusterd_get_mnt_entry_info(mnt_pt, buff, sizeof(buff), &save_entry); if (!entry) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GET_MNT_ENTRY_INFO_FAIL, + NULL); ret = -1; goto out; } @@ -7386,15 +7636,21 @@ glusterd_add_brick_mount_details(glusterd_brickinfo_t *brickinfo, dict_t *dict, snprintf(key, sizeof(key), "%s.device", base_key); ret = dict_set_dynstr_with_alloc(dict, key, entry->mnt_fsname); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; + } /* fs type */ snprintf(key, sizeof(key), "%s.fs_name", base_key); ret = dict_set_dynstr_with_alloc(dict, key, entry->mnt_type); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; + } /* mount options */ snprintf(key, sizeof(key), "%s.mnt_options", base_key); @@ -7488,43 +7744,61 @@ glusterd_add_brick_detail_to_dict(glusterd_volinfo_t *volinfo, block_size = brickstat.f_bsize; snprintf(key, sizeof(key), "%s.block_size", base_key); ret = dict_set_uint64(dict, key, block_size); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; + } /* free space in brick */ memfree = brickstat.f_bfree * brickstat.f_bsize; snprintf(key, sizeof(key), "%s.free", base_key); ret = dict_set_uint64(dict, key, memfree); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; + } /* total space of brick */ memtotal = brickstat.f_blocks * brickstat.f_bsize; snprintf(key, sizeof(key), "%s.total", base_key); ret = dict_set_uint64(dict, key, memtotal); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; + } /* inodes: total and free counts only for ext2/3/4 and xfs */ inodes_total = brickstat.f_files; if (inodes_total) { snprintf(key, sizeof(key), "%s.total_inodes", base_key); ret = dict_set_uint64(dict, key, inodes_total); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; + } } inodes_free = brickstat.f_ffree; if (inodes_free) { snprintf(key, sizeof(key), "%s.free_inodes", base_key); ret = dict_set_uint64(dict, key, inodes_free); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; + } } ret = glusterd_add_brick_mount_details(brickinfo, dict, count); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_ADD_BRICK_MNT_INFO_FAIL, + NULL); goto out; + } ret = glusterd_add_inode_size_to_dict(dict, count); out: @@ -7630,8 +7904,11 @@ glusterd_add_brick_to_dict(glusterd_volinfo_t *volinfo, ret = dict_set_int32n(dict, key, keylen, brick_online); out: - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); gf_msg_debug(this->name, 0, "Returning %d", ret); + } return ret; } @@ -7712,8 +7989,10 @@ glusterd_brick_stop(glusterd_volinfo_t *volinfo, conf = this->private; GF_ASSERT(conf); - if ((!brickinfo) || (!volinfo)) + if ((!brickinfo) || (!volinfo)) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); goto out; + } if (gf_uuid_is_null(brickinfo->uuid)) { ret = glusterd_resolve_brick(brickinfo); @@ -7856,8 +8135,10 @@ glusterd_rb_check_bricks(glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *src, rb = &volinfo->rep_brick; - if (!rb->src_brick || !rb->dst_brick) + if (!rb->src_brick || !rb->dst_brick) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); return -1; + } if (strcmp(rb->src_brick->hostname, src->hostname) || strcmp(rb->src_brick->path, src->path)) { @@ -8003,6 +8284,8 @@ glusterd_check_and_set_brick_xattr(char *host, char *path, uuid_t uuid, char msg[2048] = ""; gf_boolean_t in_use = _gf_false; int flags = 0; + xlator_t *this = THIS; + GF_ASSERT(this); /* Check for xattr support in backend fs */ ret = sys_lsetxattr(path, "trusted.glusterfs.test", "working", 8, 0); @@ -8013,6 +8296,8 @@ glusterd_check_and_set_brick_xattr(char *host, char *path, uuid_t uuid, " extended attributes failed, reason:" " %s.", host, path, strerror(errno)); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_SET_XATTR_BRICK_FAIL, + "Host=%s, Path=%s", host, path, NULL); goto out; } else { @@ -8022,6 +8307,8 @@ glusterd_check_and_set_brick_xattr(char *host, char *path, uuid_t uuid, "Removing test extended" " attribute failed, reason: %s", strerror(errno)); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_REMOVE_XATTR_FAIL, + NULL); goto out; } } @@ -8044,6 +8331,8 @@ glusterd_check_and_set_brick_xattr(char *host, char *path, uuid_t uuid, "Failed to set extended " "attributes %s, reason: %s", GF_XATTR_VOL_ID_KEY, strerror(errno)); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_SET_XATTR_FAIL, + "Attriutes=%s", GF_XATTR_VOL_ID_KEY, NULL); goto out; } @@ -8063,7 +8352,7 @@ glusterd_sm_tr_log_transition_add_to_dict(dict_t *dict, int ret = -1; char key[64] = ""; int keylen; - char timestr[64] = ""; + char timestr[GF_TIMESTR_SIZE] = ""; char *str = NULL; GF_ASSERT(dict); @@ -8095,6 +8384,9 @@ glusterd_sm_tr_log_transition_add_to_dict(dict_t *dict, goto out; out: + if (key[0] != '\0' && ret != 0) + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); gf_msg_debug("glusterd", 0, "returning %d", ret); return ret; } @@ -8203,7 +8495,8 @@ glusterd_sm_tr_log_transition_add(glusterd_sm_tr_log_t *log, int old_state, transitions[next].old_state = old_state; transitions[next].new_state = new_state; transitions[next].event = event; - time(&transitions[next].time); + transitions[next].time = gf_time(); + log->current = next; if (log->count < log->size) log->count++; @@ -8319,8 +8612,10 @@ glusterd_get_local_brickpaths(glusterd_volinfo_t *volinfo, char **pathlist) int i = 0; glusterd_brickinfo_t *brickinfo = NULL; - if ((!volinfo) || (!pathlist)) + if ((!volinfo) || (!pathlist)) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); goto out; + } path_tokens = GF_CALLOC(sizeof(char *), volinfo->brick_count, gf_gld_mt_charptr); @@ -8774,6 +9069,8 @@ glusterd_nfs_statedump(char *options, int option_cnt, char **op_errstr) snprintf(msg, sizeof(msg), "for nfs statedump, options should" " be after the key nfs"); + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_INVALID_ENTRY, + "Options misplaced", NULL); *op_errstr = gf_strdup(msg); ret = -1; goto out; @@ -8841,6 +9138,8 @@ glusterd_client_statedump(char *volname, char *options, int option_cnt, dup_options = gf_strdup(options); if (!dup_options) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED, + "options=%s", options, NULL); goto out; } option = strtok_r(dup_options, " ", &tmpptr); @@ -8848,6 +9147,8 @@ glusterd_client_statedump(char *volname, char *options, int option_cnt, snprintf(msg, sizeof(msg), "for gluster client statedump, options " "should be after the key 'client'"); + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_INVALID_ENTRY, + "Options misplaced", NULL); *op_errstr = gf_strdup(msg); ret = -1; goto out; @@ -8855,6 +9156,8 @@ glusterd_client_statedump(char *volname, char *options, int option_cnt, target_ip = strtok_r(NULL, " ", &tmpptr); if (target_ip == NULL) { snprintf(msg, sizeof(msg), "ip address not specified"); + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_INVALID_ENTRY, msg, + NULL); *op_errstr = gf_strdup(msg); ret = -1; goto out; @@ -8863,6 +9166,8 @@ glusterd_client_statedump(char *volname, char *options, int option_cnt, pid = strtok_r(NULL, " ", &tmpptr); if (pid == NULL) { snprintf(msg, sizeof(msg), "pid not specified"); + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_INVALID_ENTRY, msg, + NULL); *op_errstr = gf_strdup(msg); ret = -1; goto out; @@ -8903,6 +9208,8 @@ glusterd_quotad_statedump(char *options, int option_cnt, char **op_errstr) snprintf(msg, sizeof(msg), "for quotad statedump, options " "should be after the key 'quotad'"); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ENTRY, + "Options misplaced", NULL); *op_errstr = gf_strdup(msg); ret = -1; goto out; @@ -9765,6 +10072,8 @@ glusterd_append_gsync_status(dict_t *dst, dict_t *src) ret = dict_get_strn(src, "gsync-status", SLEN("gsync-status"), &stop_msg); if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=gsync-status", NULL); ret = 0; goto out; } @@ -10019,8 +10328,11 @@ glusterd_sync_use_rsp_dict(dict_t *aggr, dict_t *rsp_dict) int ret = 0; GF_ASSERT(rsp_dict); + xlator_t *this = THIS; + GF_ASSERT(this); if (!rsp_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); goto out; } @@ -10068,6 +10380,8 @@ glusterd_profile_volume_use_rsp_dict(dict_t *aggr, dict_t *rsp_dict) ret = dict_get_int32n(rsp_dict, "count", SLEN("count"), &brick_count); if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=count", NULL); ret = 0; // no bricks in the rsp goto out; } @@ -10347,6 +10661,8 @@ glusterd_volume_status_copy_to_op_ctx_dict(dict_t *aggr, dict_t *rsp_dict) glusterd_volinfo_t *volinfo = NULL; GF_ASSERT(rsp_dict); + xlator_t *this = THIS; + GF_ASSERT(this); if (aggr) { ctx_dict = aggr; @@ -10356,8 +10672,11 @@ glusterd_volume_status_copy_to_op_ctx_dict(dict_t *aggr, dict_t *rsp_dict) } ret = dict_get_int32n(ctx_dict, "cmd", SLEN("cmd"), &cmd); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "Key=cmd", + NULL); goto out; + } if (cmd & GF_CLI_STATUS_ALL && is_origin_glusterd(ctx_dict)) { ret = dict_get_int32n(rsp_dict, "vol_count", SLEN("vol_count"), @@ -10365,18 +10684,27 @@ glusterd_volume_status_copy_to_op_ctx_dict(dict_t *aggr, dict_t *rsp_dict) if (ret == 0) { ret = dict_set_int32n(ctx_dict, "vol_count", SLEN("vol_count"), vol_count); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Key=vol_count", NULL); goto out; + } for (i = 0; i < vol_count; i++) { keylen = snprintf(key, sizeof(key), "vol%d", i); ret = dict_get_strn(rsp_dict, key, keylen, &volname); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Key=%s", key, NULL); goto out; + } ret = dict_set_strn(ctx_dict, key, keylen, volname); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); goto out; + } } } else { /* Ignore the error as still the aggregation applies in @@ -10390,6 +10718,8 @@ glusterd_volume_status_copy_to_op_ctx_dict(dict_t *aggr, dict_t *rsp_dict) ret = dict_get_int32n(rsp_dict, "count", SLEN("count"), &rsp_node_count); if (ret) { + gf_smsg(this->name, GF_LOG_INFO, 0, GD_MSG_DICT_GET_FAILED, "Key=count", + NULL); ret = 0; // no bricks in the rsp goto out; } @@ -10397,8 +10727,8 @@ glusterd_volume_status_copy_to_op_ctx_dict(dict_t *aggr, dict_t *rsp_dict) ret = dict_get_int32n(rsp_dict, "other-count", SLEN("other-count"), &rsp_other_count); if (ret) { - gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, - "Failed to get other count from rsp_dict"); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Key=other-count", NULL); goto out; } @@ -10408,18 +10738,27 @@ glusterd_volume_status_copy_to_op_ctx_dict(dict_t *aggr, dict_t *rsp_dict) if (!dict_getn(ctx_dict, "brick-index-max", SLEN("brick-index-max"))) { ret = dict_get_int32n(rsp_dict, "brick-index-max", SLEN("brick-index-max"), &brick_index_max); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Key=brick-index-max", NULL); goto out; + } ret = dict_set_int32n(ctx_dict, "brick-index-max", SLEN("brick-index-max"), brick_index_max); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Key=brick-index-max", NULL); goto out; + } } else { ret = dict_get_int32n(ctx_dict, "brick-index-max", SLEN("brick-index-max"), &brick_index_max); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Key=brick-index-max", NULL); goto out; + } } rsp_ctx.count = node_count; @@ -10432,45 +10771,45 @@ glusterd_volume_status_copy_to_op_ctx_dict(dict_t *aggr, dict_t *rsp_dict) ret = dict_set_int32n(ctx_dict, "count", SLEN("count"), node_count + rsp_node_count); if (ret) { - gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, - "Failed to update node count"); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Key=count", NULL); goto out; } ret = dict_set_int32n(ctx_dict, "other-count", SLEN("other-count"), (other_count + rsp_other_count)); if (ret) { - gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, - "Failed to update other-count"); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Key=other-count", NULL); goto out; } ret = dict_get_strn(ctx_dict, "volname", SLEN("volname"), &volname); if (ret) { - gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, - "Failed to get volname"); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Key=volname", NULL); goto out; } ret = glusterd_volinfo_find(volname, &volinfo); if (ret) { - gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, - "Failed to get volinfo for volume: %s", volname); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, + "Volume=%s", volname, NULL); goto out; } ret = dict_set_int32n(ctx_dict, "hot_brick_count", SLEN("hot_brick_count"), hot_brick_count); if (ret) { - gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, - "Failed to update hot_brick_count"); + gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=hot_brick_count", NULL); goto out; } ret = dict_set_int32n(ctx_dict, "type", SLEN("type"), type); if (ret) { - gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, - "Failed to update type"); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=type", NULL); goto out; } @@ -12912,7 +13251,7 @@ glusterd_get_value_for_vme_entry(struct volopt_map_entry *vme, char **def_val) ret = xlator_option_info_list(&vol_opt_handle, key, &local_def_val, &descr); if (ret) { /*Swallow Error if option not found*/ - gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GET_KEY_FAILED, + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_GET_KEY_FAILED, "Failed to get option for %s " "key", key); @@ -13069,7 +13408,9 @@ glusterd_get_global_options_for_all_vols(rpcsvc_request_t *req, dict_t *ctx, gf_asprintf(&def_val, "%d", priv->op_version); need_free = _gf_true; } else { - def_val = valid_all_vol_opts[i].dflt_val; + gf_asprintf(&def_val, "%s (DEFAULT)", + valid_all_vol_opts[i].dflt_val); + need_free = _gf_true; } } @@ -13155,6 +13496,7 @@ glusterd_get_default_val_for_volopt(dict_t *ctx, gf_boolean_t all_opts, int count = 0; xlator_t *this = NULL; char *def_val = NULL; + char *def_val_str = NULL; char dict_key[50] = ""; int keylen; gf_boolean_t key_found = _gf_false; @@ -13215,7 +13557,13 @@ glusterd_get_default_val_for_volopt(dict_t *ctx, gf_boolean_t all_opts, goto out; } sprintf(dict_key, "value%d", count); - ret = dict_set_dynstr_with_alloc(ctx, dict_key, def_val); + if (get_value_vme) { // the value was never changed - DEFAULT is used + gf_asprintf(&def_val_str, "%s (DEFAULT)", def_val); + ret = dict_set_dynstr_with_alloc(ctx, dict_key, def_val_str); + GF_FREE(def_val_str); + def_val_str = NULL; + } else + ret = dict_set_dynstr_with_alloc(ctx, dict_key, def_val); if (ret) { gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Failed to " @@ -13740,23 +14088,24 @@ glusterd_handle_replicate_brick_ops(glusterd_volinfo_t *volinfo, char vpath[PATH_MAX] = ""; char *volfileserver = NULL; - priv = THIS->private; - GF_VALIDATE_OR_GOTO(THIS->name, priv, out); + xlator_t *this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); dirty[2] = hton32(1); ret = sys_lsetxattr(brickinfo->path, GF_AFR_DIRTY, dirty, sizeof(dirty), 0); if (ret == -1) { - gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_SETXATTR_FAIL, - "Failed to set extended" - " attribute %s : %s.", - GF_AFR_DIRTY, strerror(errno)); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_SET_XATTR_FAIL, + "Attribute=%s", GF_AFR_DIRTY, "Reason=%s", strerror(errno), + NULL); goto out; } if (mkdtemp(tmpmount) == NULL) { - gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED, - "failed to create a temporary mount directory."); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_CREATE_DIR_FAILED, + NULL); ret = -1; goto out; } @@ -13767,7 +14116,7 @@ glusterd_handle_replicate_brick_ops(glusterd_volinfo_t *volinfo, switch (op) { case GD_OP_REPLACE_BRICK: - if (dict_get_strn(THIS->options, "transport.socket.bind-address", + if (dict_get_strn(this->options, "transport.socket.bind-address", SLEN("transport.socket.bind-address"), &volfileserver) != 0) volfileserver = "localhost"; @@ -13810,7 +14159,7 @@ glusterd_handle_replicate_brick_ops(glusterd_volinfo_t *volinfo, ret = runner_run(&runner); if (ret) { - gf_log(THIS->name, GF_LOG_ERROR, + gf_log(this->name, GF_LOG_ERROR, "mount command" " failed."); goto lock; @@ -13820,19 +14169,18 @@ glusterd_handle_replicate_brick_ops(glusterd_volinfo_t *volinfo, (op == GD_OP_REPLACE_BRICK) ? GF_AFR_REPLACE_BRICK : GF_AFR_ADD_BRICK, brickinfo->brick_id, sizeof(brickinfo->brick_id), 0); if (ret == -1) - gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_SETXATTR_FAIL, - "Failed to set extended" - " attribute %s : %s", - (op == GD_OP_REPLACE_BRICK) ? GF_AFR_REPLACE_BRICK - : GF_AFR_ADD_BRICK, - strerror(errno)); - gf_umount_lazy(THIS->name, tmpmount, 1); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_SET_XATTR_FAIL, + "Attribute=%s, Reason=%s", + (op == GD_OP_REPLACE_BRICK) ? GF_AFR_REPLACE_BRICK + : GF_AFR_ADD_BRICK, + strerror(errno), NULL); + gf_umount_lazy(this->name, tmpmount, 1); lock: synclock_lock(&priv->big_lock); out: if (pid) GF_FREE(pid); - gf_msg_debug("glusterd", 0, "Returning with ret"); + gf_msg_debug(this->name, 0, "Returning with ret"); return ret; } @@ -14041,6 +14389,8 @@ glusterd_brick_op_prerequisites(dict_t *dict, char **op, glusterd_op_t *gd_op, "brick: %s does not exist in " "volume: %s", *src_brick, *volname); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_NOT_FOUND, + "Brick=%s, Volume=%s", *src_brick, *volname, NULL); *op_errstr = gf_strdup(msg); goto out; } @@ -14265,8 +14615,11 @@ glusterd_add_shd_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict, keylen = snprintf(key, sizeof(key), "brick%d.hostname", count); ret = dict_set_nstrn(dict, key, keylen, "Self-heal Daemon", SLEN("Self-heal Daemon")); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s", + key, NULL); goto out; + } keylen = snprintf(key, sizeof(key), "brick%d.path", count); uuid_str = gf_strdup(uuid_utoa(MY_UUID)); @@ -14275,8 +14628,11 @@ glusterd_add_shd_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict, goto out; } ret = dict_set_dynstrn(dict, key, keylen, uuid_str); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s", + key, NULL); goto out; + } uuid_str = NULL; /* shd doesn't have a port. but the cli needs a port key with @@ -14285,8 +14641,11 @@ glusterd_add_shd_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict, keylen = snprintf(key, sizeof(key), "brick%d.port", count); ret = dict_set_int32n(dict, key, keylen, 0); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s", + key, NULL); goto out; + } pidfile = volinfo->shd.svc.proc.pidfile; @@ -14297,8 +14656,11 @@ glusterd_add_shd_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict, pid = -1; keylen = snprintf(key, sizeof(key), "brick%d.pid", count); ret = dict_set_int32n(dict, key, keylen, pid); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s", + key, NULL); goto out; + } keylen = snprintf(key, sizeof(key), "brick%d.status", count); ret = dict_set_int32n(dict, key, keylen, brick_online); @@ -14313,3 +14675,372 @@ out: return ret; } + +static gf_ai_compare_t +glusterd_compare_addrinfo(struct addrinfo *first, struct addrinfo *next) +{ + int ret = -1; + struct addrinfo *tmp1 = NULL; + struct addrinfo *tmp2 = NULL; + char firstip[NI_MAXHOST] = {0.}; + char nextip[NI_MAXHOST] = { + 0, + }; + + for (tmp1 = first; tmp1 != NULL; tmp1 = tmp1->ai_next) { + ret = getnameinfo(tmp1->ai_addr, tmp1->ai_addrlen, firstip, NI_MAXHOST, + NULL, 0, NI_NUMERICHOST); + if (ret) + return GF_AI_COMPARE_ERROR; + for (tmp2 = next; tmp2 != NULL; tmp2 = tmp2->ai_next) { + ret = getnameinfo(tmp2->ai_addr, tmp2->ai_addrlen, nextip, + NI_MAXHOST, NULL, 0, NI_NUMERICHOST); + if (ret) + return GF_AI_COMPARE_ERROR; + if (!strcmp(firstip, nextip)) { + return GF_AI_COMPARE_MATCH; + } + } + } + return GF_AI_COMPARE_NO_MATCH; +} + +/* Check for non optimal brick order for Replicate/Disperse : + * Checks if bricks belonging to a replicate or disperse + * volume are present on the same server + */ +int32_t +glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type, + char **volname, char **brick_list, + int32_t *brick_count, int32_t sub_count) +{ + int ret = -1; + int i = 0; + int j = 0; + int k = 0; + xlator_t *this = NULL; + addrinfo_list_t *ai_list = NULL; + addrinfo_list_t *ai_list_tmp1 = NULL; + addrinfo_list_t *ai_list_tmp2 = NULL; + char *brick = NULL; + char *brick_list_dup = NULL; + char *brick_list_ptr = NULL; + char *tmpptr = NULL; + struct addrinfo *ai_info = NULL; + char brick_addr[128] = { + 0, + }; + int addrlen = 0; + + const char failed_string[2048] = + "Failed to perform brick order " + "check. Use 'force' at the end of the command" + " if you want to override this behavior. "; + const char found_string[2048] = + "Multiple bricks of a %s " + "volume are present on the same server. This " + "setup is not optimal. Bricks should be on " + "different nodes to have best fault tolerant " + "configuration. Use 'force' at the end of the " + "command if you want to override this " + "behavior. "; + + this = THIS; + + GF_ASSERT(this); + + ai_list = MALLOC(sizeof(addrinfo_list_t)); + ai_list->info = NULL; + CDS_INIT_LIST_HEAD(&ai_list->list); + + if (!(*volname)) { + ret = dict_get_strn(dict, "volname", SLEN("volname"), &(*volname)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get volume name"); + goto out; + } + } + + if (!(*brick_list)) { + ret = dict_get_strn(dict, "bricks", SLEN("bricks"), &(*brick_list)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Bricks check : Could not " + "retrieve bricks list"); + goto out; + } + } + + if (!(*brick_count)) { + ret = dict_get_int32n(dict, "count", SLEN("count"), &(*brick_count)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Bricks check : Could not " + "retrieve brick count"); + goto out; + } + } + + brick_list_dup = brick_list_ptr = gf_strdup(*brick_list); + /* Resolve hostnames and get addrinfo */ + while (i < *brick_count) { + ++i; + brick = strtok_r(brick_list_dup, " \n", &tmpptr); + brick_list_dup = tmpptr; + if (brick == NULL) + goto check_failed; + tmpptr = strrchr(brick, ':'); + if (tmpptr == NULL) + goto check_failed; + addrlen = strlen(brick) - strlen(tmpptr); + strncpy(brick_addr, brick, addrlen); + brick_addr[addrlen] = '\0'; + ret = getaddrinfo(brick_addr, NULL, NULL, &ai_info); + if (ret != 0) { + ret = 0; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HOSTNAME_RESOLVE_FAIL, + "unable to resolve host name for addr %s", brick_addr); + goto out; + } + ai_list_tmp1 = MALLOC(sizeof(addrinfo_list_t)); + if (ai_list_tmp1 == NULL) { + ret = 0; + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "failed to allocate " + "memory"); + freeaddrinfo(ai_info); + goto out; + } + ai_list_tmp1->info = ai_info; + cds_list_add_tail(&ai_list_tmp1->list, &ai_list->list); + ai_list_tmp1 = NULL; + } + + i = 0; + ai_list_tmp1 = cds_list_entry(ai_list->list.next, addrinfo_list_t, list); + + if (*brick_count < sub_count) { + sub_count = *brick_count; + } + + /* Check for bad brick order */ + while (i < *brick_count) { + ++i; + ai_info = ai_list_tmp1->info; + ai_list_tmp1 = cds_list_entry(ai_list_tmp1->list.next, addrinfo_list_t, + list); + if (0 == i % sub_count) { + j = 0; + continue; + } + ai_list_tmp2 = ai_list_tmp1; + k = j; + while (k < sub_count - 1) { + ++k; + ret = glusterd_compare_addrinfo(ai_info, ai_list_tmp2->info); + if (GF_AI_COMPARE_ERROR == ret) + goto check_failed; + if (GF_AI_COMPARE_MATCH == ret) + goto found_bad_brick_order; + ai_list_tmp2 = cds_list_entry(ai_list_tmp2->list.next, + addrinfo_list_t, list); + } + ++j; + } + gf_msg_debug(this->name, 0, "Brick order okay"); + ret = 0; + goto out; + +check_failed: + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER_CHECK_FAIL, + "Failed bad brick order check"); + snprintf(err_str, sizeof(failed_string), failed_string); + ret = -1; + goto out; + +found_bad_brick_order: + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_BAD_BRKORDER, + "Bad brick order found"); + if (type == GF_CLUSTER_TYPE_DISPERSE) { + snprintf(err_str, sizeof(found_string), found_string, "disperse"); + } else { + snprintf(err_str, sizeof(found_string), found_string, "replicate"); + } + + ret = -1; +out: + ai_list_tmp2 = NULL; + GF_FREE(brick_list_ptr); + cds_list_for_each_entry(ai_list_tmp1, &ai_list->list, list) + { + if (ai_list_tmp1->info) + freeaddrinfo(ai_list_tmp1->info); + free(ai_list_tmp2); + ai_list_tmp2 = ai_list_tmp1; + } + free(ai_list); + free(ai_list_tmp2); + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; +} + +static gf_boolean_t +search_peer_in_auth_list(char *peer_hostname, char *auth_allow_list) +{ + if (strstr(auth_allow_list, peer_hostname)) { + return _gf_true; + } + + return _gf_false; +} + +/* glusterd_add_peers_to_auth_list() adds peers into auth.allow list + * if auth.allow list is not empty. This is called for add-brick and + * replica brick operations to avoid failing the temporary mount. New + * volfiles will be generated and clients are notified reg new volfiles. + */ +void +glusterd_add_peers_to_auth_list(char *volname) +{ + int ret = 0; + glusterd_volinfo_t *volinfo = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + int32_t len = 0; + char *auth_allow_list = NULL; + char *new_auth_allow_list = NULL; + + this = THIS; + GF_ASSERT(this); + conf = this->private; + GF_ASSERT(conf); + + GF_VALIDATE_OR_GOTO(this->name, volname, out); + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, + "Unable to find volume: %s", volname); + goto out; + } + + ret = dict_get_str_sizen(volinfo->dict, "auth.allow", &auth_allow_list); + if (ret) { + gf_msg(this->name, GF_LOG_INFO, errno, GD_MSG_DICT_GET_FAILED, + "auth allow list is not set"); + goto out; + } + cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list) + { + len += strlen(peerinfo->hostname); + } + len += strlen(auth_allow_list) + 1; + + new_auth_allow_list = GF_CALLOC(1, len, gf_common_mt_char); + + new_auth_allow_list = strncat(new_auth_allow_list, auth_allow_list, + strlen(auth_allow_list)); + cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list) + { + ret = search_peer_in_auth_list(peerinfo->hostname, new_auth_allow_list); + if (!ret) { + gf_log(this->name, GF_LOG_DEBUG, + "peer %s not found in auth.allow list", peerinfo->hostname); + new_auth_allow_list = strcat(new_auth_allow_list, ","); + new_auth_allow_list = strncat(new_auth_allow_list, + peerinfo->hostname, + strlen(peerinfo->hostname)); + } + } + if (strcmp(new_auth_allow_list, auth_allow_list) != 0) { + /* In case, new_auth_allow_list is not same as auth_allow_list, + * we need to update the volinfo->dict with new_auth_allow_list. + * we delete the auth_allow_list and replace it with + * new_auth_allow_list. for reverting the changes in post commit, we + * keep the copy of auth_allow_list as old_auth_allow_list in + * volinfo->dict. + */ + dict_del_sizen(volinfo->dict, "auth.allow"); + ret = dict_set_strn(volinfo->dict, "auth.allow", SLEN("auth.allow"), + new_auth_allow_list); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Unable to set new auth.allow list"); + goto out; + } + ret = dict_set_strn(volinfo->dict, "old.auth.allow", + SLEN("old.auth.allow"), auth_allow_list); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Unable to set old auth.allow list"); + goto out; + } + ret = glusterd_create_volfiles_and_notify_services(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOLFILE_CREATE_FAIL, + "failed to create volfiles"); + goto out; + } + } +out: + GF_FREE(new_auth_allow_list); + return; +} + +int +glusterd_replace_old_auth_allow_list(char *volname) +{ + int ret = 0; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + char *old_auth_allow_list = NULL; + + this = THIS; + GF_ASSERT(this); + + GF_VALIDATE_OR_GOTO(this->name, volname, out); + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, + "Unable to find volume: %s", volname); + goto out; + } + + ret = dict_get_str_sizen(volinfo->dict, "old.auth.allow", + &old_auth_allow_list); + if (ret) { + gf_msg(this->name, GF_LOG_INFO, errno, GD_MSG_DICT_GET_FAILED, + "old auth allow list is not set, no need to replace the list"); + ret = 0; + goto out; + } + + dict_del_sizen(volinfo->dict, "auth.allow"); + ret = dict_set_strn(volinfo->dict, "auth.allow", SLEN("auth.allow"), + old_auth_allow_list); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Unable to replace auth.allow list"); + goto out; + } + + dict_del_sizen(volinfo->dict, "old.auth.allow"); + + ret = glusterd_create_volfiles_and_notify_services(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOLFILE_CREATE_FAIL, + "failed to create volfiles"); + goto out; + } + ret = glusterd_store_volinfo(volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOLINFO_STORE_FAIL, + "failed to store volinfo"); + goto out; + } +out: + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index b58f158fd14..bf6ac295e26 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -857,4 +857,9 @@ search_brick_path_from_proc(pid_t brick_pid, char *brickpath); int32_t glusterd_add_shd_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict, int32_t count); +int32_t +glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type, + char **volname, char **bricks, int32_t *brick_count, + int32_t sub_count); + #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index a085a0ff8d6..8d6fb5e0fac 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -90,6 +90,8 @@ xlator_instantiate_va(const char *type, const char *format, va_list arg) xlator_t *xl = NULL; char *volname = NULL; int ret = 0; + xlator_t *this = THIS; + GF_ASSERT(this); ret = gf_vasprintf(&volname, format, arg); if (ret < 0) { @@ -99,14 +101,21 @@ xlator_instantiate_va(const char *type, const char *format, va_list arg) } xl = GF_CALLOC(1, sizeof(*xl), gf_common_mt_xlator_t); - if (!xl) + if (!xl) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); goto error; + } ret = xlator_set_type_virtual(xl, type); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_XLATOR_SET_OPT_FAIL, + NULL); goto error; + } xl->options = dict_new(); - if (!xl->options) + if (!xl->options) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); goto error; + } xl->name = volname; CDS_INIT_LIST_HEAD(&xl->volume_options); @@ -115,8 +124,8 @@ xlator_instantiate_va(const char *type, const char *format, va_list arg) return xl; error: - gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_XLATOR_CREATE_FAIL, - "creating xlator of type %s failed", type); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_XLATOR_CREATE_FAIL, "Type=%s", + type, NULL); GF_FREE(volname); if (xl) xlator_destroy(xl); @@ -865,6 +874,8 @@ _xl_link_children(xlator_t *parent, xlator_t *children, size_t child_count) xlator_t *trav = NULL; size_t seek = 0; int ret = -1; + xlator_t *this = THIS; + GF_ASSERT(this); if (child_count == 0) goto out; @@ -873,9 +884,12 @@ _xl_link_children(xlator_t *parent, xlator_t *children, size_t child_count) ; for (; child_count--; trav = trav->prev) { ret = volgen_xlator_link(parent, trav); - gf_msg_debug(THIS->name, 0, "%s:%s", parent->name, trav->name); - if (ret) + gf_msg_debug(this->name, 0, "%s:%s", parent->name, trav->name); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_XLATOR_LINK_FAIL, + NULL); goto out; + } } ret = 0; out: @@ -933,8 +947,10 @@ volgen_apply_filters(char *orig_volfile) entry = sys_readdir(filterdir, scratch); - if (!entry || errno != 0) + if (!entry || errno != 0) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_READ_ERROR, NULL); break; + } if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0) continue; @@ -1472,14 +1488,22 @@ volgen_graph_set_xl_options(volgen_graph_t *graph, dict_t *dict) }; /* for posix* -> *posix* */ char *loglevel = NULL; xlator_t *trav = NULL; + xlator_t *this = THIS; + GF_ASSERT(this); ret = dict_get_str_sizen(dict, "xlator", &xlator); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=xlator", NULL); goto out; + } ret = dict_get_str_sizen(dict, "loglevel", &loglevel); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=loglevel", NULL); goto out; + } snprintf(xlator_match, 1024, "*%s", xlator); @@ -1579,14 +1603,22 @@ gfproxy_server_graph_builder(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, char *password = NULL; /*int rclusters = 0;*/ + xlator_t *this = THIS; + GF_ASSERT(this); /* We are a trusted client */ ret = dict_set_uint32(set_dict, "trusted-client", GF_CLIENT_TRUSTED); - if (ret != 0) + if (ret != 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=trusted-client", NULL); goto out; + } ret = dict_set_int32_sizen(set_dict, "gfproxy-server", 1); - if (ret != 0) + if (ret != 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=gfproxy-server", NULL); goto out; + } /* Build the client section of the graph first */ build_client_graph(graph, volinfo, set_dict); @@ -1647,11 +1679,13 @@ brick_graph_add_posix(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, xlator_t *this = NULL; glusterd_conf_t *priv = NULL; - if (!graph || !volinfo || !set_dict || !brickinfo) + this = THIS; + + if (!graph || !volinfo || !set_dict || !brickinfo) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); goto out; + } - this = THIS; - GF_VALIDATE_OR_GOTO("glusterd", this, out); priv = this->private; GF_VALIDATE_OR_GOTO("glusterd", priv, out); @@ -1716,9 +1750,13 @@ brick_graph_add_selinux(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, { xlator_t *xl = NULL; int ret = -1; + xlator_t *this = THIS; + GF_ASSERT(this); - if (!graph || !volinfo) + if (!graph || !volinfo) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); goto out; + } xl = volgen_graph_add(graph, "features/selinux", volinfo->volname); if (!xl) @@ -1785,8 +1823,10 @@ brick_graph_add_bitrot_stub(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, char *value = NULL; xlator_t *this = THIS; - if (!graph || !volinfo || !set_dict || !brickinfo) + if (!graph || !volinfo || !set_dict || !brickinfo) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); goto out; + } xl = volgen_graph_add(graph, "features/bitrot-stub", volinfo->volname); if (!xl) @@ -1821,9 +1861,13 @@ brick_graph_add_changelog(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, }; int ret = -1; int32_t len = 0; + xlator_t *this = THIS; + GF_ASSERT(this); - if (!graph || !volinfo || !set_dict || !brickinfo) + if (!graph || !volinfo || !set_dict || !brickinfo) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); goto out; + } xl = volgen_graph_add(graph, "features/changelog", volinfo->volname); if (!xl) @@ -1836,6 +1880,7 @@ brick_graph_add_changelog(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, len = snprintf(changelog_basepath, sizeof(changelog_basepath), "%s/%s", brickinfo->path, ".glusterfs/changelogs"); if ((len < 0) || (len >= sizeof(changelog_basepath))) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); ret = -1; goto out; } @@ -1865,14 +1910,31 @@ brick_graph_add_acl(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, { xlator_t *xl = NULL; int ret = -1; + xlator_t *this = THIS; + GF_ASSERT(this); - if (!graph || !volinfo || !set_dict) + if (!graph || !volinfo || !set_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); goto out; + } - xl = volgen_graph_add(graph, "features/access-control", volinfo->volname); - if (!xl) + ret = dict_get_str_boolean(set_dict, "features.acl", 1); + if (!ret) { + /* Skip creating this volume if option is disabled */ + /* By default, this is 'true' */ goto out; + } else if (ret < 0) { + /* lets not treat this as error, as this option is not critical, + and implemented for debug help */ + gf_log(THIS->name, GF_LOG_INFO, + "failed to get 'features.acl' flag from dict"); + } + xl = volgen_graph_add(graph, "features/access-control", volinfo->volname); + if (!xl) { + ret = -1; + goto out; + } ret = 0; out: return ret; @@ -1884,9 +1946,13 @@ brick_graph_add_locks(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, { xlator_t *xl = NULL; int ret = -1; + xlator_t *this = THIS; + GF_ASSERT(this); - if (!graph || !volinfo || !set_dict) + if (!graph || !volinfo || !set_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); goto out; + } xl = volgen_graph_add(graph, "features/locks", volinfo->volname); if (!xl) @@ -1903,9 +1969,13 @@ brick_graph_add_iot(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, { xlator_t *xl = NULL; int ret = -1; + xlator_t *this = THIS; + GF_ASSERT(this); - if (!graph || !volinfo || !set_dict) + if (!graph || !volinfo || !set_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); goto out; + } xl = volgen_graph_add(graph, "performance/io-threads", volinfo->volname); if (!xl) @@ -1921,9 +1991,12 @@ brick_graph_add_barrier(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, { xlator_t *xl = NULL; int ret = -1; + xlator_t *this = THIS; - if (!graph || !volinfo) + if (!graph || !volinfo) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); goto out; + } xl = volgen_graph_add(graph, "features/barrier", volinfo->volname); if (!xl) @@ -1940,9 +2013,13 @@ brick_graph_add_sdfs(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, { xlator_t *xl = NULL; int ret = -1; + xlator_t *this = THIS; + GF_ASSERT(this); - if (!graph || !volinfo) + if (!graph || !volinfo) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); goto out; + } if (!dict_get_str_boolean(set_dict, "features.sdfs", 0)) { /* update only if option is enabled */ @@ -1970,9 +2047,13 @@ brick_graph_add_namespace(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, { xlator_t *xl = NULL; int ret = -1; + xlator_t *this = THIS; + GF_ASSERT(this); - if (!graph || !volinfo || !set_dict) + if (!graph || !volinfo || !set_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); goto out; + } ret = dict_get_str_boolean(set_dict, "features.tag-namespaces", 0); if (ret == -1) @@ -2025,9 +2106,13 @@ brick_graph_add_index(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, char index_basepath[PATH_MAX] = {0}; int ret = -1; int32_t len = 0; + xlator_t *this = THIS; + GF_ASSERT(this); - if (!graph || !volinfo || !brickinfo || !set_dict) + if (!graph || !volinfo || !brickinfo || !set_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); goto out; + } xl = volgen_graph_add(graph, "features/index", volinfo->volname); if (!xl) @@ -2036,6 +2121,7 @@ brick_graph_add_index(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, len = snprintf(index_basepath, sizeof(index_basepath), "%s/%s", brickinfo->path, ".glusterfs/indices"); if ((len < 0) || (len >= sizeof(index_basepath))) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); goto out; } @@ -2082,9 +2168,13 @@ brick_graph_add_marker(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, char buf[32] = { 0, }; + xlator_t *this = THIS; + GF_ASSERT(this); - if (!graph || !volinfo || !set_dict) + if (!graph || !volinfo || !set_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); goto out; + } xl = volgen_graph_add(graph, "features/marker", volinfo->volname); if (!xl) @@ -2115,9 +2205,13 @@ brick_graph_add_quota(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, int ret = -1; xlator_t *xl = NULL; char *value = NULL; + xlator_t *this = THIS; + GF_ASSERT(this); - if (!graph || !volinfo || !set_dict) + if (!graph || !volinfo || !set_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); goto out; + } xl = volgen_graph_add(graph, "features/quota", volinfo->volname); if (!xl) @@ -2143,9 +2237,13 @@ brick_graph_add_ro(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, { int ret = -1; xlator_t *xl = NULL; + xlator_t *this = THIS; + GF_ASSERT(this); - if (!graph || !volinfo || !set_dict) + if (!graph || !volinfo || !set_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); goto out; + } if (dict_get_str_boolean(set_dict, "features.read-only", 0) && (dict_get_str_boolean(set_dict, "features.worm", 0) || @@ -2175,9 +2273,13 @@ brick_graph_add_worm(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, { int ret = -1; xlator_t *xl = NULL; + xlator_t *this = THIS; + GF_ASSERT(this); - if (!graph || !volinfo || !set_dict) + if (!graph || !volinfo || !set_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); goto out; + } if (dict_get_str_boolean(set_dict, "features.read-only", 0) && (dict_get_str_boolean(set_dict, "features.worm", 0) || @@ -2204,9 +2306,13 @@ brick_graph_add_cdc(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, { int ret = -1; xlator_t *xl = NULL; + xlator_t *this = THIS; + GF_ASSERT(this); - if (!graph || !volinfo || !set_dict) + if (!graph || !volinfo || !set_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); goto out; + } /* Check for compress volume option, and add it to the graph on * server side */ @@ -2236,8 +2342,10 @@ brick_graph_add_io_stats(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, xlator_t *this = THIS; glusterd_conf_t *priv = this->private; - if (!graph || !set_dict || !brickinfo) + if (!graph || !set_dict || !brickinfo) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); goto out; + } xl = volgen_graph_add_as(graph, "debug/io-stats", brickinfo->path); if (!xl) @@ -2265,9 +2373,13 @@ brick_graph_add_upcall(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, { xlator_t *xl = NULL; int ret = -1; + xlator_t *this = THIS; + GF_ASSERT(this); - if (!graph || !volinfo || !set_dict) + if (!graph || !volinfo || !set_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); goto out; + } xl = volgen_graph_add(graph, "features/upcall", volinfo->volname); if (!xl) { @@ -2287,9 +2399,13 @@ brick_graph_add_leases(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, { xlator_t *xl = NULL; int ret = -1; + xlator_t *this = THIS; + GF_ASSERT(this); - if (!graph || !volinfo || !set_dict) + if (!graph || !volinfo || !set_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); goto out; + } xl = volgen_graph_add(graph, "features/leases", volinfo->volname); if (!xl) { @@ -2319,9 +2435,13 @@ brick_graph_add_server(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, char *volname = NULL; char *address_family_data = NULL; int32_t len = 0; + xlator_t *this = THIS; + GF_ASSERT(this); - if (!graph || !volinfo || !set_dict || !brickinfo) + if (!graph || !volinfo || !set_dict || !brickinfo) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); goto out; + } get_vol_transport_type(volinfo, transt); @@ -2430,13 +2550,20 @@ brick_graph_add_pump(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, char *password = NULL; char *ptranst = NULL; char *address_family_data = NULL; + xlator_t *this = THIS; + GF_ASSERT(this); - if (!graph || !volinfo || !set_dict) + if (!graph || !volinfo || !set_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); goto out; + } ret = dict_get_int32(volinfo->dict, "enable-pump", &pump); - if (ret == -ENOENT) + if (ret == -ENOENT) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=enable-pump", NULL); ret = pump = 0; + } if (ret) return -1; @@ -3234,11 +3361,20 @@ volgen_link_bricks(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, j); j++; } + if (!xl) { ret = -1; goto out; } + if (strncmp(xl_type, "performance/readdir-ahead", + SLEN("performance/readdir-ahead")) == 0) { + ret = xlator_set_fixed_option(xl, "performance.readdir-ahead", + "on"); + if (ret) + goto out; + } + ret = volgen_xlator_link(xl, trav); if (ret) goto out; @@ -3466,13 +3602,13 @@ volgen_graph_build_readdir_ahead(volgen_graph_t *graph, int32_t clusters = 0; if (graph->type == GF_QUOTAD || graph->type == GF_SNAPD || - !glusterd_volinfo_get_boolean(volinfo, VKEY_PARALLEL_READDIR) || - !glusterd_volinfo_get_boolean(volinfo, VKEY_READDIR_AHEAD)) + !glusterd_volinfo_get_boolean(volinfo, VKEY_PARALLEL_READDIR)) goto out; clusters = volgen_link_bricks_from_list_tail( graph, volinfo, "performance/readdir-ahead", "%s-readdir-ahead-%d", child_count, 1); + out: return clusters; } @@ -3674,6 +3810,38 @@ out: } static int +set_volfile_id_option(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + int clusters) +{ + xlator_t *xlator = NULL; + int i = 0; + int ret = -1; + glusterd_conf_t *conf = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, out); + + if (conf->op_version < GD_OP_VERSION_9_0) + return 0; + xlator = first_of(graph); + + for (i = 0; i < clusters; i++) { + ret = xlator_set_fixed_option(xlator, "volume-id", + uuid_utoa(volinfo->volume_id)); + if (ret) + goto out; + + xlator = xlator->next; + } + +out: + return ret; +} + +static int volgen_graph_build_afr_clusters(volgen_graph_t *graph, glusterd_volinfo_t *volinfo) { @@ -3715,6 +3883,13 @@ volgen_graph_build_afr_clusters(volgen_graph_t *graph, clusters = -1; goto out; } + + ret = set_volfile_id_option(graph, volinfo, clusters); + if (ret) { + clusters = -1; + goto out; + } + if (!volinfo->arbiter_count && !volinfo->thin_arbiter_count) goto out; @@ -4419,11 +4594,15 @@ nfs_option_handler(volgen_graph_t *graph, struct volopt_map_entry *vme, volinfo = param; - if (!volinfo || (volinfo->volname[0] == '\0')) + if (!volinfo || (volinfo->volname[0] == '\0')) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); return 0; + } - if (!vme || !(vme->option)) + if (!vme || !(vme->option)) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); return 0; + } xl = first_of(graph); @@ -4554,8 +4733,11 @@ prepare_shd_volume_options(glusterd_volinfo_t *volinfo, dict_t *mod_dict, goto out; ret = dict_set_uint32(set_dict, "trusted-client", GF_CLIENT_TRUSTED); - if (ret) + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=trusted-client", NULL); goto out; + } dict_copy(volinfo->dict, set_dict); if (mod_dict) @@ -4742,6 +4924,7 @@ build_shd_graph(glusterd_volinfo_t *volinfo, volgen_graph_t *graph, set_dict = dict_new(); if (!set_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); ret = -ENOMEM; goto out; } @@ -4880,25 +5063,40 @@ build_nfs_graph(volgen_graph_t *graph, dict_t *mod_dict) ret = dict_set_sizen_str_sizen(set_dict, "performance.stat-prefetch", "off"); - if (ret) + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=performance.stat-prefetch", NULL); goto out; + } ret = dict_set_sizen_str_sizen(set_dict, "performance.client-io-threads", "off"); - if (ret) + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=performance.client-io-threads", NULL); goto out; + } ret = dict_set_str_sizen(set_dict, "client-transport-type", nfs_xprt); - if (ret) + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=client-transport-type", NULL); goto out; + } ret = dict_set_uint32(set_dict, "trusted-client", GF_CLIENT_TRUSTED); - if (ret) + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=trusted-client", NULL); goto out; + } ret = dict_set_sizen_str_sizen(set_dict, "nfs-volume-file", "yes"); - if (ret) + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=nfs-volume-file", NULL); goto out; + } if (mod_dict && (data = dict_get_sizen(mod_dict, "volume-name"))) { volname = data->data; @@ -5140,8 +5338,11 @@ build_quotad_graph(volgen_graph_t *graph, dict_t *mod_dict) continue; ret = dict_set_uint32(set_dict, "trusted-client", GF_CLIENT_TRUSTED); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=trusted-client", NULL); goto out; + } dict_copy(voliter->dict, set_dict); if (mod_dict) @@ -5337,14 +5538,21 @@ glusterd_generate_client_per_brick_volfile(glusterd_volinfo_t *volinfo) int ret = -1; char *ssl_str = NULL; gf_boolean_t ssl_bool = _gf_false; + xlator_t *this = THIS; + GF_ASSERT(this); dict = dict_new(); - if (!dict) + if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); goto out; + } ret = dict_set_uint32(dict, "trusted-client", GF_CLIENT_TRUSTED); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=trusted-client", NULL); goto free_dict; + } if (dict_get_str_sizen(volinfo->dict, "client.ssl", &ssl_str) == 0) { if (gf_string2boolean(ssl_str, &ssl_bool) == 0) { @@ -5426,17 +5634,25 @@ generate_dummy_client_volfiles(glusterd_volinfo_t *volinfo) enumerate_transport_reqs(volinfo->transport_type, types); dict = dict_new(); - if (!dict) + if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); goto out; + } for (i = 0; types[i]; i++) { ret = dict_set_str(dict, "client-transport-type", types[i]); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=client-transport-type", NULL); goto out; + } type = transport_str_to_type(types[i]); ret = dict_set_uint32(dict, "trusted-client", GF_CLIENT_OTHER); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=trusted-client", NULL); goto out; + } ret = glusterd_get_dummy_client_filepath(filepath, volinfo, type); if (ret) { @@ -5497,17 +5713,25 @@ generate_client_volfiles(glusterd_volinfo_t *volinfo, enumerate_transport_reqs(volinfo->transport_type, types); dict = dict_new(); - if (!dict) + if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); goto out; + } for (i = 0; types[i]; i++) { ret = dict_set_str(dict, "client-transport-type", types[i]); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=client-transport-type", NULL); goto out; + } type = transport_str_to_type(types[i]); ret = dict_set_uint32(dict, "trusted-client", client_type); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=trusted-client", NULL); goto out; + } if (client_type == GF_CLIENT_TRUSTED) { ret = glusterd_get_trusted_client_filepath(filepath, volinfo, type); @@ -5657,10 +5881,15 @@ prepare_bitrot_scrub_volume_options(glusterd_volinfo_t *volinfo, dict_t *mod_dict, dict_t *set_dict) { int ret = 0; + xlator_t *this = THIS; + GF_ASSERT(this); ret = dict_set_uint32(set_dict, "trusted-client", GF_CLIENT_TRUSTED); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=trusted-client", NULL); goto out; + } dict_copy(volinfo->dict, set_dict); if (mod_dict) @@ -5728,6 +5957,7 @@ build_bitd_volume_graph(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, set_dict = dict_new(); if (!set_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); ret = -1; goto out; } @@ -6134,8 +6364,11 @@ validate_shdopts(glusterd_volinfo_t *volinfo, dict_t *val_dict, goto out; } ret = dict_set_int32_sizen(val_dict, "graph-check", 1); - if (ret) + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=graph-check", NULL); goto out; + } ret = build_shd_graph(volinfo, &graph, val_dict); if (!ret) ret = graph_reconf_validateopt(&graph.graph, op_errstr); @@ -6189,6 +6422,8 @@ validate_nfsopts(glusterd_volinfo_t *volinfo, dict_t *val_dict, "wrong transport " "type %s", tt); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_INCOMPATIBLE_VALUE, + "Type=%s", tt, NULL); *op_errstr = gf_strdup(err_str); ret = -1; goto out; @@ -6257,6 +6492,7 @@ validate_brickopts(glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *brickinfo, graph.errstr = op_errstr; full_dict = dict_new(); if (!full_dict) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); ret = -1; goto out; } diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c index 1c5a2508d9c..814ab14fb27 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c @@ -37,240 +37,6 @@ #define glusterd_op_start_volume_args_get(dict, volname, flags) \ glusterd_op_stop_volume_args_get(dict, volname, flags) -gf_ai_compare_t -glusterd_compare_addrinfo(struct addrinfo *first, struct addrinfo *next) -{ - int ret = -1; - struct addrinfo *tmp1 = NULL; - struct addrinfo *tmp2 = NULL; - char firstip[NI_MAXHOST] = {0.}; - char nextip[NI_MAXHOST] = { - 0, - }; - - for (tmp1 = first; tmp1 != NULL; tmp1 = tmp1->ai_next) { - ret = getnameinfo(tmp1->ai_addr, tmp1->ai_addrlen, firstip, NI_MAXHOST, - NULL, 0, NI_NUMERICHOST); - if (ret) - return GF_AI_COMPARE_ERROR; - for (tmp2 = next; tmp2 != NULL; tmp2 = tmp2->ai_next) { - ret = getnameinfo(tmp2->ai_addr, tmp2->ai_addrlen, nextip, - NI_MAXHOST, NULL, 0, NI_NUMERICHOST); - if (ret) - return GF_AI_COMPARE_ERROR; - if (!strcmp(firstip, nextip)) { - return GF_AI_COMPARE_MATCH; - } - } - } - return GF_AI_COMPARE_NO_MATCH; -} - -/* Check for non optimal brick order for replicate : - * Checks if bricks belonging to a replicate volume - * are present on the same server - */ -int32_t -glusterd_check_brick_order(dict_t *dict, char *err_str) -{ - int ret = -1; - int i = 0; - int j = 0; - int k = 0; - xlator_t *this = NULL; - addrinfo_list_t *ai_list = NULL; - addrinfo_list_t *ai_list_tmp1 = NULL; - addrinfo_list_t *ai_list_tmp2 = NULL; - char *brick = NULL; - char *brick_list = NULL; - char *brick_list_dup = NULL; - char *brick_list_ptr = NULL; - char *tmpptr = NULL; - char *volname = NULL; - int32_t brick_count = 0; - int32_t type = GF_CLUSTER_TYPE_NONE; - int32_t sub_count = 0; - struct addrinfo *ai_info = NULL; - char brick_addr[128] = { - 0, - }; - int addrlen = 0; - - const char failed_string[2048] = - "Failed to perform brick order " - "check. Use 'force' at the end of the command" - " if you want to override this behavior. "; - const char found_string[2048] = - "Multiple bricks of a %s " - "volume are present on the same server. This " - "setup is not optimal. Bricks should be on " - "different nodes to have best fault tolerant " - "configuration. Use 'force' at the end of the " - "command if you want to override this " - "behavior. "; - - this = THIS; - - GF_ASSERT(this); - - ai_list = MALLOC(sizeof(addrinfo_list_t)); - ai_list->info = NULL; - CDS_INIT_LIST_HEAD(&ai_list->list); - - ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, - "Unable to get volume name"); - goto out; - } - - ret = dict_get_int32n(dict, "type", SLEN("type"), &type); - if (ret) { - snprintf(err_str, 512, "Unable to get type of volume %s", volname); - gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED, "%s", - err_str); - goto out; - } - - ret = dict_get_strn(dict, "bricks", SLEN("bricks"), &brick_list); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, - "Bricks check : Could not " - "retrieve bricks list"); - goto out; - } - - ret = dict_get_int32n(dict, "count", SLEN("count"), &brick_count); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, - "Bricks check : Could not " - "retrieve brick count"); - goto out; - } - - if (type != GF_CLUSTER_TYPE_DISPERSE) { - ret = dict_get_int32n(dict, "replica-count", SLEN("replica-count"), - &sub_count); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, - "Bricks check : Could" - " not retrieve replica count"); - goto out; - } - gf_msg_debug(this->name, 0, - "Replicate cluster type " - "found. Checking brick order."); - } else { - ret = dict_get_int32n(dict, "disperse-count", SLEN("disperse-count"), - &sub_count); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, - "Bricks check : Could" - " not retrieve disperse count"); - goto out; - } - gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_DISPERSE_CLUSTER_FOUND, - "Disperse cluster type" - " found. Checking brick order."); - } - - brick_list_dup = brick_list_ptr = gf_strdup(brick_list); - /* Resolve hostnames and get addrinfo */ - while (i < brick_count) { - ++i; - brick = strtok_r(brick_list_dup, " \n", &tmpptr); - brick_list_dup = tmpptr; - if (brick == NULL) - goto check_failed; - tmpptr = strrchr(brick, ':'); - if (tmpptr == NULL) - goto check_failed; - addrlen = strlen(brick) - strlen(tmpptr); - strncpy(brick_addr, brick, addrlen); - brick_addr[addrlen] = '\0'; - ret = getaddrinfo(brick_addr, NULL, NULL, &ai_info); - if (ret != 0) { - ret = 0; - gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HOSTNAME_RESOLVE_FAIL, - "unable to resolve host name for addr %s", brick_addr); - goto out; - } - ai_list_tmp1 = MALLOC(sizeof(addrinfo_list_t)); - if (ai_list_tmp1 == NULL) { - ret = 0; - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, - "failed to allocate " - "memory"); - freeaddrinfo(ai_info); - goto out; - } - ai_list_tmp1->info = ai_info; - cds_list_add_tail(&ai_list_tmp1->list, &ai_list->list); - ai_list_tmp1 = NULL; - } - - i = 0; - ai_list_tmp1 = cds_list_entry(ai_list->list.next, addrinfo_list_t, list); - - /* Check for bad brick order */ - while (i < brick_count) { - ++i; - ai_info = ai_list_tmp1->info; - ai_list_tmp1 = cds_list_entry(ai_list_tmp1->list.next, addrinfo_list_t, - list); - if (0 == i % sub_count) { - j = 0; - continue; - } - ai_list_tmp2 = ai_list_tmp1; - k = j; - while (k < sub_count - 1) { - ++k; - ret = glusterd_compare_addrinfo(ai_info, ai_list_tmp2->info); - if (GF_AI_COMPARE_ERROR == ret) - goto check_failed; - if (GF_AI_COMPARE_MATCH == ret) - goto found_bad_brick_order; - ai_list_tmp2 = cds_list_entry(ai_list_tmp2->list.next, - addrinfo_list_t, list); - } - ++j; - } - gf_msg_debug(this->name, 0, "Brick order okay"); - ret = 0; - goto out; - -check_failed: - gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER_CHECK_FAIL, - "Failed bad brick order check"); - snprintf(err_str, sizeof(failed_string), failed_string); - ret = -1; - goto out; - -found_bad_brick_order: - gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_BAD_BRKORDER, - "Bad brick order found"); - if (type == GF_CLUSTER_TYPE_DISPERSE) { - snprintf(err_str, sizeof(found_string), found_string, "disperse"); - } else { - snprintf(err_str, sizeof(found_string), found_string, "replicate"); - } - - ret = -1; -out: - ai_list_tmp2 = NULL; - GF_FREE(brick_list_ptr); - cds_list_for_each_entry(ai_list_tmp1, &ai_list->list, list) - { - if (ai_list_tmp1->info) - freeaddrinfo(ai_list_tmp1->info); - free(ai_list_tmp2); - ai_list_tmp2 = ai_list_tmp1; - } - free(ai_list_tmp2); - return ret; -} - int __glusterd_handle_create_volume(rpcsvc_request_t *req) { @@ -809,10 +575,14 @@ glusterd_handle_heal_options_enable_disable(rpcsvc_request_t *req, dict_t *dict, int ret = 0; char *key = NULL; char *value = NULL; + xlator_t *this = THIS; + GF_ASSERT(this); ret = dict_get_int32n(dict, "heal-op", SLEN("heal-op"), (int32_t *)&heal_op); if (ret || (heal_op == GF_SHD_OP_INVALID)) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=heal-op", NULL); ret = -1; goto out; } @@ -850,21 +620,33 @@ glusterd_handle_heal_options_enable_disable(rpcsvc_request_t *req, dict_t *dict, } else { key = "cluster.granular-entry-heal"; ret = dict_set_int8(dict, "is-special-key", 1); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=is-special-key", NULL); goto out; + } } ret = dict_set_strn(dict, "key1", SLEN("key1"), key); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=key1", NULL); goto out; + } ret = dict_set_strn(dict, "value1", SLEN("value1"), value); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=value1", NULL); goto out; + } ret = dict_set_int32n(dict, "count", SLEN("count"), 1); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=count", NULL); goto out; + } ret = glusterd_op_begin_synctask(req, GD_OP_SET_VOLUME, dict); @@ -888,18 +670,19 @@ __glusterd_handle_cli_heal_volume(rpcsvc_request_t *req) 0, }; + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); if (ret < 0) { // failed to decode msg; req->rpc_err = GARBAGE_ARGS; + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL); goto out; } - this = THIS; - GF_ASSERT(this); - if (cli_req.dict.dict_len) { /* Unserialize the dictionary */ dict = dict_new(); @@ -960,8 +743,11 @@ __glusterd_handle_cli_heal_volume(rpcsvc_request_t *req) goto out; ret = dict_set_int32n(dict, "count", SLEN("count"), volinfo->brick_count); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=count", NULL); goto out; + } ret = glusterd_op_begin_synctask(req, GD_OP_HEAL_VOLUME, dict); @@ -1013,6 +799,7 @@ __glusterd_handle_cli_statedump_volume(rpcsvc_request_t *req) ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); if (ret < 0) { req->rpc_err = GARBAGE_ARGS; + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL); goto out; } if (cli_req.dict.dict_len) { @@ -1107,6 +894,8 @@ glusterd_op_stage_create_volume(dict_t *dict, char **op_errstr, int32_t local_brick_count = 0; int32_t i = 0; int32_t type = 0; + int32_t replica_count = 0; + int32_t disperse_count = 0; char *brick = NULL; char *tmpptr = NULL; xlator_t *this = NULL; @@ -1201,16 +990,44 @@ glusterd_op_stage_create_volume(dict_t *dict, char **op_errstr, } if (!is_force) { - if ((type == GF_CLUSTER_TYPE_REPLICATE) || - (type == GF_CLUSTER_TYPE_DISPERSE)) { - ret = glusterd_check_brick_order(dict, msg); + if (type == GF_CLUSTER_TYPE_REPLICATE) { + ret = dict_get_int32n(dict, "replica-count", + SLEN("replica-count"), &replica_count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Bricks check : Could" + " not retrieve replica count"); + goto out; + } + gf_msg_debug(this->name, 0, + "Replicate cluster type " + "found. Checking brick order."); + ret = glusterd_check_brick_order(dict, msg, type, &volname, + &bricks, &brick_count, + replica_count); + } else if (type == GF_CLUSTER_TYPE_DISPERSE) { + ret = dict_get_int32n(dict, "disperse-count", + SLEN("disperse-count"), &disperse_count); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER, - "Not " - "creating volume because of " - "bad brick order"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Bricks check : Could" + " not retrieve disperse count"); goto out; } + gf_msg_debug(this->name, 0, + "Disperse cluster type" + " found. Checking brick order."); + ret = glusterd_check_brick_order(dict, msg, type, &volname, + &bricks, &brick_count, + disperse_count); + } + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER, + "Not creating the volume because of " + "bad brick order. %s", + msg); + *op_errstr = gf_strdup(msg); + goto out; } } } @@ -1325,20 +1142,32 @@ glusterd_op_stop_volume_args_get(dict_t *dict, char **volname, int *flags) this = THIS; GF_ASSERT(this); - if (!dict || !volname || !flags) + if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ARGUMENT, NULL); + goto out; + } + + if (!volname) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ARGUMENT, NULL); goto out; + } + + if (!flags) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ARGUMENT, NULL); + goto out; + } ret = dict_get_strn(dict, "volname", SLEN("volname"), volname); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, - "Unable to get volume name"); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Key=volname", NULL); goto out; } ret = dict_get_int32n(dict, "flags", SLEN("flags"), flags); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, - "Unable to get flags"); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Key=flags", NULL); goto out; } out: @@ -1351,27 +1180,29 @@ glusterd_op_statedump_volume_args_get(dict_t *dict, char **volname, { int ret = -1; - if (!dict || !volname || !options || !option_cnt) + if (!dict || !volname || !options || !option_cnt) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); goto out; + } ret = dict_get_strn(dict, "volname", SLEN("volname"), volname); if (ret) { - gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, - "Unable to get volname"); + gf_smsg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Key=volname", NULL); goto out; } ret = dict_get_strn(dict, "options", SLEN("options"), options); if (ret) { - gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, - "Unable to get options"); + gf_smsg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Key=options", NULL); goto out; } ret = dict_get_int32n(dict, "option_cnt", SLEN("option_cnt"), option_cnt); if (ret) { - gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, - "Unable to get option count"); + gf_smsg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Key=option_cnt", NULL); goto out; } @@ -1598,8 +1429,13 @@ glusterd_op_stage_stop_volume(dict_t *dict, char **op_errstr) GF_ASSERT(this); ret = glusterd_op_stop_volume_args_get(dict, &volname, &flags); - if (ret) + if (ret) { + snprintf(msg, sizeof(msg), "Failed to get details of volume %s", + volname); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_STOP_ARGS_GET_FAILED, + "Volume name=%s", volname, NULL); goto out; + } ret = glusterd_volinfo_find(volname, &volinfo); if (ret) { @@ -1881,14 +1717,15 @@ glusterd_op_stage_heal_volume(dict_t *dict, char **op_errstr) if (!glusterd_is_volume_started(volinfo)) { ret = -1; snprintf(msg, sizeof(msg), "Volume %s is not started.", volname); - gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_VOL_NOT_STARTED, "%s", - msg); + gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOL_NOT_STARTED, + "Volume=%s", volname, NULL); *op_errstr = gf_strdup(msg); goto out; } opt_dict = volinfo->dict; if (!opt_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, NULL); ret = 0; goto out; } @@ -1944,6 +1781,8 @@ glusterd_op_stage_statedump_volume(dict_t *dict, char **op_errstr) ret = glusterd_volinfo_find(volname, &volinfo); if (ret) { snprintf(msg, sizeof(msg), FMTSTR_CHECK_VOL_EXISTS, volname); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VOLINFO_GET_FAIL, + "Volume=%s", volname, NULL); goto out; } @@ -2107,8 +1946,6 @@ glusterd_op_create_volume(dict_t *dict, char **op_errstr) goto out; } - pthread_mutex_init(&volinfo->store_volinfo_lock, NULL); - ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); if (ret) { @@ -3060,33 +2897,35 @@ glusterd_op_clearlocks_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict) char *mntpt = NULL; char **xl_opts = NULL; glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = THIS; + GF_ASSERT(this); ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); if (ret) { - gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, - "Failed to get volume name"); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Key=volname", NULL); goto out; } gf_msg_debug("glusterd", 0, "Performing clearlocks on volume %s", volname); ret = dict_get_strn(dict, "path", SLEN("path"), &path); if (ret) { - gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, - "Failed to get path"); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "Key=path", + NULL); goto out; } ret = dict_get_strn(dict, "kind", SLEN("kind"), &kind); if (ret) { - gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, - "Failed to get kind"); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "Key=kind", + NULL); goto out; } ret = dict_get_strn(dict, "type", SLEN("type"), &type); if (ret) { - gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, - "Failed to get type"); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "Key=type", + NULL); goto out; } @@ -3094,10 +2933,9 @@ glusterd_op_clearlocks_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict) if (ret) ret = 0; - gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_CLRCLK_VOL_REQ_RCVD, - "Received clear-locks request for " - "volume %s with kind %s type %s and options %s", - volname, kind, type, opts); + gf_smsg(this->name, GF_LOG_INFO, 0, GD_MSG_CLRCLK_VOL_REQ_RCVD, + "Volume=%s, Kind=%s, Type=%s, Options=%s", volname, kind, type, + opts, NULL); if (opts) ret = gf_asprintf(&cmd_str, GF_XATTR_CLRLK_CMD ".t%s.k%s.%s", type, @@ -3110,22 +2948,25 @@ glusterd_op_clearlocks_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict) ret = glusterd_volinfo_find(volname, &volinfo); if (ret) { snprintf(msg, sizeof(msg), "Volume %s doesn't exist.", volname); - gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, "%s", msg); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, "Volume=%s", + volname, NULL); goto out; } xl_opts = GF_CALLOC(volinfo->brick_count + 1, sizeof(char *), gf_gld_mt_charptr); - if (!xl_opts) + if (!xl_opts) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); goto out; + } ret = glusterd_clearlocks_get_local_client_ports(volinfo, xl_opts); if (ret) { snprintf(msg, sizeof(msg), "Couldn't get port numbers of " "local bricks"); - gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_BRK_PORT_NUM_GET_FAIL, "%s", - msg); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRK_PORT_NUM_GET_FAIL, + NULL); goto out; } @@ -3134,8 +2975,8 @@ glusterd_op_clearlocks_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict) snprintf(msg, sizeof(msg), "Creating mount directory " "for clear-locks failed."); - gf_msg(THIS->name, GF_LOG_ERROR, 0, - GD_MSG_CLRLOCKS_MOUNTDIR_CREATE_FAIL, "%s", msg); + gf_smsg(this->name, GF_LOG_ERROR, 0, + GD_MSG_CLRLOCKS_MOUNTDIR_CREATE_FAIL, NULL); goto out; } @@ -3144,16 +2985,15 @@ glusterd_op_clearlocks_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict) snprintf(msg, sizeof(msg), "Failed to mount clear-locks " "maintenance client."); - gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_CLRLOCKS_CLNT_MOUNT_FAIL, - "%s", msg); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_CLRLOCKS_CLNT_MOUNT_FAIL, + NULL); goto out; } ret = glusterd_clearlocks_send_cmd(volinfo, cmd_str, path, result, msg, sizeof(msg), mntpt); if (ret) { - gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_CLRCLK_SND_CMD_FAIL, "%s", - msg); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_CLRCLK_SND_CMD_FAIL, NULL); goto umount; } @@ -3164,16 +3004,16 @@ glusterd_op_clearlocks_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict) snprintf(msg, sizeof(msg), "Failed to set clear-locks " "result"); - gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "%s", msg); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Key=lk-summary", NULL); } umount: glusterd_clearlocks_unmount(volinfo, mntpt); if (glusterd_clearlocks_rmdir_mount(volinfo, mntpt)) - gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_CLRLOCKS_CLNT_UMOUNT_FAIL, - "Couldn't unmount " - "clear-locks mount point"); + gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_CLRLOCKS_CLNT_UMOUNT_FAIL, + NULL); out: if (ret) diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index 04ec9a6e571..398b4d76f52 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -8,9 +8,10 @@ later), or the GNU General Public License, version 2 (GPLv2), in all cases as published by the Free Software Foundation. */ +#include <glusterfs/syscall.h> #include "glusterd-volgen.h" #include "glusterd-utils.h" -#include "sys/stat.h" + static int validate_cache_max_min_size(glusterd_volinfo_t *volinfo, dict_t *dict, char *key, char *value, char **op_errstr) @@ -790,7 +791,7 @@ static int is_directory(const char *path) { struct stat statbuf; - if (stat(path, &statbuf) != 0) + if (sys_stat(path, &statbuf) != 0) return 0; return S_ISDIR(statbuf.st_mode); } @@ -1265,10 +1266,21 @@ struct volopt_map_entry glusterd_volopt_map[] = { .option = "priority", .op_version = 1, .flags = VOLOPT_FLAG_CLIENT_OPT}, - {.key = "performance.cache-size", + {.key = "performance.io-cache-size", .voltype = "performance/io-cache", - .op_version = 1, + .option = "cache-size", + .op_version = GD_OP_VERSION_8_0, .flags = VOLOPT_FLAG_CLIENT_OPT}, + { + .key = "performance.cache-size", + .voltype = "performance/io-cache", + .op_version = 1, + .flags = VOLOPT_FLAG_CLIENT_OPT, + .description = "Deprecated option. Use performance.io-cache-size " + "to adjust the cache size of the io-cache translator, " + "and use performance.quick-read-cache-size to adjust " + "the cache size of the quick-read translator.", + }, /* IO-threads xlator options */ {.key = "performance.io-thread-count", @@ -1308,16 +1320,29 @@ struct volopt_map_entry glusterd_volopt_map[] = { .voltype = "performance/io-cache", .option = "pass-through", .op_version = GD_OP_VERSION_4_1_0}, + {.key = "performance.quick-read-cache-size", + .voltype = "performance/quick-read", + .option = "cache-size", + .op_version = GD_OP_VERSION_8_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, {.key = "performance.cache-size", .voltype = "performance/quick-read", .type = NO_DOC, .op_version = 1, .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "performance.quick-read-cache-timeout", + .voltype = "performance/quick-read", + .option = "cache-timeout", + .op_version = GD_OP_VERSION_8_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, {.key = "performance.qr-cache-timeout", .voltype = "performance/quick-read", .option = "cache-timeout", .op_version = 1, - .flags = VOLOPT_FLAG_CLIENT_OPT}, + .flags = VOLOPT_FLAG_CLIENT_OPT, + .description = + "Deprecated option. Use performance.quick-read-cache-timeout " + "instead."}, {.key = "performance.quick-read-cache-invalidation", .voltype = "performance/quick-read", .option = "quick-read-cache-invalidation", @@ -1788,7 +1813,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { {.key = "performance.readdir-ahead", .voltype = "performance/readdir-ahead", .option = "!perf", - .value = "on", + .value = "off", .op_version = 3, .description = "enable/disable readdir-ahead translator in the volume.", .flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT}, @@ -2461,7 +2486,6 @@ struct volopt_map_entry glusterd_volopt_map[] = { .voltype = "storage/posix", .op_version = GD_OP_VERSION_4_1_0, }, - {.key = "storage.bd-aio", .voltype = "storage/bd", .op_version = 3}, {.key = "config.memory-accounting", .voltype = "mgmt/glusterd", .option = "!config", @@ -3103,4 +3127,20 @@ struct volopt_map_entry glusterd_volopt_map[] = { .voltype = "features/cloudsync", .op_version = GD_OP_VERSION_7_0, .flags = VOLOPT_FLAG_CLIENT_OPT}, + { + .key = "features.acl", + .voltype = "features/access-control", + .value = "enable", + .option = "!features", + .op_version = GD_OP_VERSION_8_0, + .description = "(WARNING: for debug purpose only) enable/disable " + "access-control xlator in volume", + .type = NO_DOC, + }, + + {.key = "cluster.use-anonymous-inode", + .voltype = "cluster/replicate", + .op_version = GD_OP_VERSION_9_0, + .value = "yes", + .flags = VOLOPT_FLAG_CLIENT_OPT}, {.key = NULL}}; diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c index cd2c5da628b..7a86c2997b1 100644 --- a/xlators/mgmt/glusterd/src/glusterd.c +++ b/xlators/mgmt/glusterd/src/glusterd.c @@ -67,7 +67,7 @@ extern struct rpcsvc_program gd_svc_cli_trusted_progs; extern struct rpc_clnt_program gd_brick_prog; extern struct rpcsvc_program glusterd_mgmt_hndsk_prog; -extern char snap_mount_dir[PATH_MAX]; +extern char snap_mount_dir[VALID_GLUSTERD_PATHMAX]; rpcsvc_cbk_program_t glusterd_cbk_prog = { .progname = "Gluster Callback", @@ -202,8 +202,10 @@ glusterd_options_init(xlator_t *this) priv = this->private; priv->opts = dict_new(); - if (!priv->opts) + if (!priv->opts) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); goto out; + } ret = glusterd_store_retrieve_options(this); if (ret == 0) { @@ -247,6 +249,7 @@ glusterd_client_statedump_submit_req(char *volname, char *target_ip, char *pid) GF_ASSERT(conf); if (target_ip == NULL || pid == NULL) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); ret = -1; goto out; } @@ -447,14 +450,19 @@ glusterd_rpcsvc_options_build(dict_t *options) { int ret = 0; uint32_t backlog = 0; + xlator_t *this = THIS; + GF_ASSERT(this); ret = dict_get_uint32(options, "transport.listen-backlog", &backlog); if (ret) { backlog = GLUSTERFS_SOCKET_LISTEN_BACKLOG; ret = dict_set_uint32(options, "transport.listen-backlog", backlog); - if (ret) + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=transport.listen-backlog", NULL); goto out; + } } gf_msg_debug("glusterd", 0, "listen-backlog value: %d", backlog); @@ -574,6 +582,7 @@ glusterd_crt_georep_folders(char *georepdir, glusterd_conf_t *conf) len = snprintf(georepdir, PATH_MAX, "%s/" GEOREP, conf->workdir); if ((len < 0) || (len >= PATH_MAX)) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); ret = -1; goto out; } @@ -585,9 +594,11 @@ glusterd_crt_georep_folders(char *georepdir, glusterd_conf_t *conf) } ret = dict_get_str(THIS->options, GEOREP "-log-group", &greplg_s); - if (ret) + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=log-group", NULL); ret = 0; - else { + } else { gr = getgrnam(greplg_s); if (!gr) { gf_msg("glusterd", GF_LOG_CRITICAL, 0, GD_MSG_LOGGROUP_INVALID, @@ -628,6 +639,7 @@ glusterd_crt_georep_folders(char *georepdir, glusterd_conf_t *conf) } len = snprintf(logdir, PATH_MAX, "%s/" GEOREP "-slaves", conf->logdir); if ((len < 0) || (len >= PATH_MAX)) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); ret = -1; goto out; } @@ -654,6 +666,7 @@ glusterd_crt_georep_folders(char *georepdir, glusterd_conf_t *conf) len = snprintf(logdir, PATH_MAX, "%s/" GEOREP "-slaves/mbr", conf->logdir); if ((len < 0) || (len >= PATH_MAX)) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); ret = -1; goto out; } @@ -1045,6 +1058,8 @@ _install_mount_spec(dict_t *opts, char *key, data_t *value, void *data) int rv = 0; gf_mount_spec_t *mspec = NULL; char *user = NULL; + xlator_t *this = THIS; + GF_ASSERT(this); label = strtail(key, "mountbroker."); @@ -1059,8 +1074,10 @@ _install_mount_spec(dict_t *opts, char *key, data_t *value, void *data) return 0; mspec = GF_CALLOC(1, sizeof(*mspec), gf_gld_mt_mount_spec); - if (!mspec) + if (!mspec) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); goto err; + } mspec->label = label; if (georep) { @@ -1116,8 +1133,10 @@ glusterd_init_uds_listener(xlator_t *this) GF_ASSERT(this); options = dict_new(); - if (!options) + if (!options) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); goto out; + } sock_data = dict_get(this->options, "glusterd-sockfile"); (void)snprintf(sockfile, sizeof(sockfile), "%s", @@ -1404,7 +1423,7 @@ init(xlator_t *this) char *mountbroker_root = NULL; int i = 0; int total_transport = 0; - gf_boolean_t valgrind = _gf_false; + gf_valgrind_tool vgtool; char *valgrind_str = NULL; char *transport_type = NULL; char var_run_dir[PATH_MAX] = { @@ -1417,6 +1436,14 @@ init(xlator_t *this) int32_t len = 0; int op_version = 0; +#if defined(RUN_WITH_MEMCHECK) + vgtool = _gf_memcheck; +#elif defined(RUN_WITH_DRD) + vgtool = _gf_drd; +#else + vgtool = _gf_none; +#endif + #ifndef GF_DARWIN_HOST_OS { struct rlimit lim; @@ -1424,9 +1451,8 @@ init(xlator_t *this) lim.rlim_max = 65536; if (setrlimit(RLIMIT_NOFILE, &lim) == -1) { - gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_SETXATTR_FAIL, - "Failed to set 'ulimit -n " - " 65536'"); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_SET_XATTR_FAIL, + "Failed to set 'ulimit -n 65536'", NULL); } else { gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_FILE_DESC_LIMIT_SET, "Maximum allowed open file descriptors " @@ -1872,6 +1898,9 @@ init(xlator_t *this) (void)strncpy(conf->logdir, logdir, sizeof(conf->logdir)); synclock_init(&conf->big_lock, SYNC_LOCK_RECURSIVE); + synccond_init(&conf->cond_restart_bricks); + synccond_init(&conf->cond_restart_shd); + synccond_init(&conf->cond_blockers); pthread_mutex_init(&conf->xprt_lock, NULL); INIT_LIST_HEAD(&conf->xprt_list); pthread_mutex_init(&conf->import_volumes, NULL); @@ -1904,18 +1933,24 @@ init(xlator_t *this) } /* Set option to run bricks on valgrind if enabled in glusterd.vol */ - this->ctx->cmd_args.valgrind = valgrind; + this->ctx->cmd_args.vgtool = vgtool; ret = dict_get_str(this->options, "run-with-valgrind", &valgrind_str); if (ret < 0) { gf_msg_debug(this->name, 0, "cannot get run-with-valgrind value"); } if (valgrind_str) { - if (gf_string2boolean(valgrind_str, &valgrind)) { + gf_boolean_t vg = _gf_false; + + if (!strcmp(valgrind_str, "memcheck")) + this->ctx->cmd_args.vgtool = _gf_memcheck; + else if (!strcmp(valgrind_str, "drd")) + this->ctx->cmd_args.vgtool = _gf_drd; + else if (!gf_string2boolean(valgrind_str, &vg)) + this->ctx->cmd_args.vgtool = (vg ? _gf_memcheck : _gf_none); + else gf_msg(this->name, GF_LOG_WARNING, EINVAL, GD_MSG_INVALID_ENTRY, - "run-with-valgrind value not a boolean string"); - } else { - this->ctx->cmd_args.valgrind = valgrind; - } + "run-with-valgrind is neither boolean" + " nor one of 'memcheck' or 'drd'"); } /* Store ping-timeout in conf */ diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index d7e4da8425a..cc4f98ecf47 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -209,6 +209,9 @@ typedef struct { gf_boolean_t restart_done; dict_t *opts; synclock_t big_lock; + synccond_t cond_restart_bricks; + synccond_t cond_restart_shd; + synccond_t cond_blockers; rpcsvc_t *uds_rpc; /* RPCSVC for the unix domain socket */ uint32_t base_port; uint32_t max_port; @@ -510,6 +513,10 @@ struct glusterd_volinfo_ { * volfile generation code, we are * temporarily appending either "-hot" * or "-cold" */ + gf_atomic_t volpeerupdate; + /* Flag to check about volume has received updates + from peer + */ }; typedef enum gd_snap_status_ { @@ -1190,6 +1197,8 @@ glusterd_op_set_ganesha(dict_t *dict, char **errstr); int ganesha_manage_export(dict_t *dict, char *value, gf_boolean_t update_cache_invalidation, char **op_errstr); +int +gd_ganesha_send_dbus(char *volname, char *value); gf_boolean_t glusterd_is_ganesha_cluster(); gf_boolean_t @@ -1211,6 +1220,9 @@ int glusterd_op_stage_remove_brick(dict_t *dict, char **op_errstr); int +glusterd_set_rebalance_id_for_remove_brick(dict_t *req_dict, dict_t *rsp_dict); + +int glusterd_set_rebalance_id_in_rsp_dict(dict_t *req_dict, dict_t *rsp_dict); int @@ -1354,4 +1366,10 @@ glusterd_options_init(xlator_t *this); int32_t glusterd_recreate_volfiles(glusterd_conf_t *conf); +void +glusterd_add_peers_to_auth_list(char *volname); + +int +glusterd_replace_old_auth_allow_list(char *volname); + #endif diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c index 3df1135a2ed..0e22fe411ee 100644 --- a/xlators/mount/fuse/src/fuse-bridge.c +++ b/xlators/mount/fuse/src/fuse-bridge.c @@ -8,6 +8,8 @@ cases as published by the Free Software Foundation. */ +#include <config.h> + #include <sys/wait.h> #include "fuse-bridge.h" #include <glusterfs/glusterfs.h> @@ -177,7 +179,7 @@ fusedump_gettime(struct fusedump_timespec *fts) 0, }; - clock_gettime(CLOCK_REALTIME, &ts); + timespec_now_realtime(&ts); fts->sec = ts.tv_sec; fts->nsec = ts.tv_nsec; @@ -225,14 +227,30 @@ check_and_dump_fuse_W(fuse_private_t *priv, struct iovec *iov_out, int count, if (res == -1) { const char *errdesc = NULL; gf_loglevel_t loglevel = GF_LOG_ERROR; + gf_boolean_t errno_degraded = _gf_false; + gf_boolean_t errno_promoted = _gf_false; + +#define ACCOUNT_ERRNO(eno) \ + do { \ + if (errno_degraded) { \ + pthread_mutex_lock(&priv->fusedev_errno_cnt_mutex); \ + { \ + if (!++priv->fusedev_errno_cnt[FUSEDEV_##eno]) \ + errno_promoted = _gf_true; \ + } \ + pthread_mutex_unlock(&priv->fusedev_errno_cnt_mutex); \ + } \ + } while (0) /* If caller masked the errno, then it * does not indicate an error at the application * level, so we degrade the log severity to DEBUG. */ if (errnomask && errno < ERRNOMASK_MAX && - GET_ERRNO_MASK(errnomask, errno)) + GET_ERRNO_MASK(errnomask, errno)) { loglevel = GF_LOG_DEBUG; + errno_degraded = _gf_true; + } switch (errno) { /* The listed errnos are FUSE status indicators, @@ -242,33 +260,43 @@ check_and_dump_fuse_W(fuse_private_t *priv, struct iovec *iov_out, int count, */ case ENOENT: errdesc = "ENOENT"; + ACCOUNT_ERRNO(ENOENT); break; case ENOTDIR: errdesc = "ENOTDIR"; + ACCOUNT_ERRNO(ENOTDIR); break; case ENODEV: errdesc = "ENODEV"; + ACCOUNT_ERRNO(ENODEV); break; case EPERM: errdesc = "EPERM"; + ACCOUNT_ERRNO(EPERM); break; case ENOMEM: errdesc = "ENOMEM"; + ACCOUNT_ERRNO(ENOMEM); break; case ENOTCONN: errdesc = "ENOTCONN"; + ACCOUNT_ERRNO(ENOTCONN); break; case ECONNREFUSED: errdesc = "ECONNREFUSED"; + ACCOUNT_ERRNO(ECONNREFUSED); break; case EOVERFLOW: errdesc = "EOVERFLOW"; + ACCOUNT_ERRNO(EOVERFLOW); break; case EBUSY: errdesc = "EBUSY"; + ACCOUNT_ERRNO(EBUSY); break; case ENOTEMPTY: errdesc = "ENOTEMPTY"; + ACCOUNT_ERRNO(ENOTEMPTY); break; default: errdesc = strerror(errno); @@ -276,7 +304,13 @@ check_and_dump_fuse_W(fuse_private_t *priv, struct iovec *iov_out, int count, gf_log_callingfn("glusterfs-fuse", loglevel, "writing to fuse device failed: %s", errdesc); + if (errno_promoted) + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "writing to fuse device yielded %s %d times", errdesc, + UINT8_MAX + 1); return errno; + +#undef ACCOUNT_ERRNO } fouh = iov_out[0].iov_base; @@ -373,7 +407,7 @@ send_fuse_data(xlator_t *this, fuse_in_header_t *finh, void *data, size_t size) static int32_t fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino) { -#if FUSE_KERNEL_MINOR_VERSION >= 11 +#if (FUSE_KERNEL_MINOR_VERSION >= 11 && defined(HAVE_FUSE_NOTIFICATIONS)) struct fuse_out_header *fouh = NULL; struct fuse_notify_inval_entry_out *fnieo = NULL; fuse_private_t *priv = NULL; @@ -464,7 +498,7 @@ fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino) static int32_t fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino) { -#if FUSE_KERNEL_MINOR_VERSION >= 11 +#if (FUSE_KERNEL_MINOR_VERSION >= 11 && defined(HAVE_FUSE_NOTIFICATIONS)) struct fuse_out_header *fouh = NULL; struct fuse_notify_inval_inode_out *fniio = NULL; fuse_private_t *priv = NULL; @@ -799,18 +833,22 @@ fuse_interrupt_finish_fop(call_frame_t *frame, xlator_t *this, { intstat_orig = fir->interrupt_state; if (fir->interrupt_state == INTERRUPT_NONE) { - fir->interrupt_state = INTERRUPT_SQUELCHED; if (sync) { - while (fir->interrupt_state == INTERRUPT_NONE) { + fir->interrupt_state = INTERRUPT_WAITING_HANDLER; + while (fir->interrupt_state != INTERRUPT_SQUELCHED) { pthread_cond_wait(&fir->handler_cond, &fir->handler_mutex); } - } + } else + fir->interrupt_state = INTERRUPT_SQUELCHED; } } pthread_mutex_unlock(&fir->handler_mutex); } + GF_ASSERT(intstat_orig == INTERRUPT_NONE || + intstat_orig == INTERRUPT_HANDLED || + intstat_orig == INTERRUPT_SQUELCHED); gf_log("glusterfs-fuse", GF_LOG_DEBUG, "intstat_orig=%d", intstat_orig); /* @@ -860,19 +898,29 @@ fuse_interrupt_finish_interrupt(xlator_t *this, fuse_interrupt_record_t *fir, }; fuse_interrupt_state_t intstat_orig = INTERRUPT_NONE; + GF_ASSERT(intstat == INTERRUPT_HANDLED || intstat == INTERRUPT_SQUELCHED); + pthread_mutex_lock(&fir->handler_mutex); { intstat_orig = fir->interrupt_state; - if (fir->interrupt_state == INTERRUPT_NONE) { - fir->interrupt_state = intstat; - if (sync) { + switch (intstat_orig) { + case INTERRUPT_NONE: + fir->interrupt_state = intstat; + break; + case INTERRUPT_WAITING_HANDLER: + fir->interrupt_state = INTERRUPT_SQUELCHED; pthread_cond_signal(&fir->handler_cond); - } + break; + default: + break; } finh = fir->fuse_in_header; } pthread_mutex_unlock(&fir->handler_mutex); + GF_ASSERT(intstat_orig == INTERRUPT_NONE || + (sync && intstat_orig == INTERRUPT_WAITING_HANDLER) || + (!sync && intstat_orig == INTERRUPT_SQUELCHED)); gf_log("glusterfs-fuse", GF_LOG_DEBUG, "intstat_orig=%d", intstat_orig); /* @@ -2185,7 +2233,6 @@ fuse_mknod(xlator_t *this, fuse_in_header_t *finh, void *msg, fuse_state_t *state = NULL; #if FUSE_KERNEL_MINOR_VERSION >= 12 fuse_private_t *priv = NULL; - int32_t ret = -1; priv = this->private; if (priv->proto_minor < 12) @@ -2257,7 +2304,6 @@ fuse_mkdir(xlator_t *this, fuse_in_header_t *finh, void *msg, char *name = (char *)(fmi + 1); #if FUSE_KERNEL_MINOR_VERSION >= 12 fuse_private_t *priv = NULL; - int32_t ret = -1; #endif fuse_state_t *state; @@ -2783,7 +2829,6 @@ fuse_create(xlator_t *this, fuse_in_header_t *finh, void *msg, #if FUSE_KERNEL_MINOR_VERSION >= 12 struct fuse_create_in *fci = msg; fuse_private_t *priv = NULL; - int32_t ret = -1; #else struct fuse_open_in *fci = msg; #endif @@ -3381,6 +3426,8 @@ fuse_release(xlator_t *this, fuse_in_header_t *finh, void *msg, gf_log("glusterfs-fuse", GF_LOG_TRACE, "finh->unique: %" PRIu64 ": RELEASE %p", finh->unique, state->fd); + fd_close(state->fd); + fuse_fd_ctx_destroy(this, state->fd); fd_unref(fd); @@ -4690,12 +4737,10 @@ fuse_setlk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, fuse_state_t *state = NULL; int ret = 0; - ret = fuse_interrupt_finish_fop(frame, this, _gf_false, (void **)&state); - if (state) { - GF_FREE(state->name); - dict_unref(state->xdata); - GF_FREE(state); - } + ret = fuse_interrupt_finish_fop(frame, this, _gf_true, (void **)&state); + GF_FREE(state->name); + dict_unref(state->xdata); + GF_FREE(state); if (ret) { return 0; } @@ -4751,28 +4796,12 @@ fuse_setlk_interrupt_handler_cbk(call_frame_t *frame, void *cookie, int32_t op_errno, dict_t *dict, dict_t *xdata) { fuse_interrupt_state_t intstat = INTERRUPT_NONE; - fuse_interrupt_record_t *fir; - fuse_state_t *state = NULL; - int ret = 0; - - ret = dict_get_bin(xdata, "fuse-interrupt-record", (void **)&fir); - if (ret < 0) { - gf_log("glusterfs-fuse", GF_LOG_ERROR, "interrupt record not found"); - - goto out; - } + fuse_interrupt_record_t *fir = cookie; intstat = op_ret >= 0 ? INTERRUPT_HANDLED : INTERRUPT_SQUELCHED; - fuse_interrupt_finish_interrupt(this, fir, intstat, _gf_false, - (void **)&state); - if (state) { - GF_FREE(state->name); - dict_unref(state->xdata); - GF_FREE(state); - } + fuse_interrupt_finish_interrupt(this, fir, intstat, _gf_true, NULL); -out: STACK_DESTROY(frame->root); return 0; @@ -4810,9 +4839,10 @@ fuse_setlk_interrupt_handler(xlator_t *this, fuse_interrupt_record_t *fir) frame->op = GF_FOP_GETXATTR; state->name = xattr_name; - STACK_WIND(frame, fuse_setlk_interrupt_handler_cbk, state->active_subvol, - state->active_subvol->fops->fgetxattr, state->fd, xattr_name, - state->xdata); + STACK_WIND_COOKIE(frame, fuse_setlk_interrupt_handler_cbk, fir, + state->active_subvol, + state->active_subvol->fops->fgetxattr, state->fd, + xattr_name, state->xdata); return; @@ -4835,15 +4865,9 @@ fuse_setlk_resume(fuse_state_t *state) fir = fuse_interrupt_record_new(state->finh, fuse_setlk_interrupt_handler); state_clone = gf_memdup(state, sizeof(*state)); if (state_clone) { - /* - * Calling this allocator with fir casted to (char *) seems like - * an abuse of this API, but in fact the API is stupid to assume - * a (char *) argument (in the funcion it's casted to (void *) - * anyway). - */ - state_clone->xdata = dict_for_key_value( - "fuse-interrupt-record", (char *)fir, sizeof(*fir), _gf_true); + state_clone->xdata = dict_new(); } + if (!fir || !state_clone || !state_clone->xdata) { if (fir) { GF_FREE(fir); @@ -4901,7 +4925,7 @@ fuse_setlk(xlator_t *this, fuse_in_header_t *finh, void *msg, return; } -#if FUSE_KERNEL_MINOR_VERSION >= 11 +#if FUSE_KERNEL_MINOR_VERSION >= 11 && defined(HAVE_FUSE_NOTIFICATIONS) static void * notify_kernel_loop(void *data) { @@ -5148,6 +5172,7 @@ fuse_init(xlator_t *this, fuse_in_header_t *finh, void *msg, priv->timed_response_fuse_thread_started = _gf_true; /* Used for 'reverse invalidation of inode' */ +#ifdef HAVE_FUSE_NOTIFICATIONS if (fini->minor >= 12) { ret = gf_thread_create(&messenger, NULL, notify_kernel_loop, this, "fusenoti"); @@ -5159,7 +5184,9 @@ fuse_init(xlator_t *this, fuse_in_header_t *finh, void *msg, goto out; } priv->reverse_fuse_thread_started = _gf_true; - } else { + } else +#endif + { /* * FUSE minor < 12 does not implement invalidate notifications. * This mechanism is required for fopen-keep-cache to operate @@ -5586,6 +5613,7 @@ fuse_migrate_fd(xlator_t *this, fd_t *basefd, xlator_t *old_subvol, char create_in_progress = 0; fuse_fd_ctx_t *basefd_ctx = NULL; fd_t *oldfd = NULL; + dict_t *xdata = NULL; basefd_ctx = fuse_fd_ctx_get(this, basefd); GF_VALIDATE_OR_GOTO("glusterfs-fuse", basefd_ctx, out); @@ -5622,10 +5650,23 @@ fuse_migrate_fd(xlator_t *this, fd_t *basefd, xlator_t *old_subvol, } if (oldfd->inode->table->xl == old_subvol) { - if (IA_ISDIR(oldfd->inode->ia_type)) + if (IA_ISDIR(oldfd->inode->ia_type)) { ret = syncop_fsyncdir(old_subvol, oldfd, 0, NULL, NULL); - else - ret = syncop_fsync(old_subvol, oldfd, 0, NULL, NULL, NULL, NULL); + } else { + xdata = dict_new(); + if (!xdata || dict_set_int8(xdata, "last-fsync", 1)) { + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "last-fsync set failed (%s) on fd (%p)" + "(basefd:%p basefd-inode.gfid:%s) " + "(old-subvolume:%s-%d new-subvolume:%s-%d)", + strerror(ENOMEM), oldfd, basefd, + uuid_utoa(basefd->inode->gfid), old_subvol->name, + old_subvol->graph->id, new_subvol->name, + new_subvol->graph->id); + } + + ret = syncop_fsync(old_subvol, oldfd, 0, NULL, NULL, xdata, NULL); + } if (ret < 0) { gf_log("glusterfs-fuse", GF_LOG_WARNING, @@ -5680,6 +5721,9 @@ out: fd_unref(oldfd); + if (xdata) + dict_unref(xdata); + return ret; } @@ -5855,7 +5899,9 @@ fuse_graph_sync(xlator_t *this) new_graph_id = priv->next_graph->id; priv->next_graph = NULL; need_first_lookup = 1; - priv->handle_graph_switch = _gf_true; + if (old_subvol) { + priv->handle_graph_switch = _gf_true; + } while (!priv->event_recvd) { ret = pthread_cond_wait(&priv->sync_cond, &priv->sync_mutex); @@ -5891,13 +5937,6 @@ unlock: if (winds_on_old_subvol == 0) { xlator_notify(old_subvol, GF_EVENT_PARENT_DOWN, old_subvol, NULL); } - } else { - pthread_mutex_lock(&priv->sync_mutex); - { - priv->handle_graph_switch = _gf_false; - pthread_cond_broadcast(&priv->migrate_cond); - } - pthread_mutex_unlock(&priv->sync_mutex); } return 0; @@ -6284,14 +6323,12 @@ out: int dump_history_fuse(circular_buffer_t *cb, void *data) { - char timestr[256] = { + char timestr[GF_TIMESTR_SIZE] = { 0, }; - gf_time_fmt(timestr, sizeof timestr, cb->tv.tv_sec, gf_timefmt_F_HMS); + gf_time_fmt_tv(timestr, sizeof timestr, &cb->tv, gf_timefmt_F_HMS); - snprintf(timestr + strlen(timestr), 256 - strlen(timestr), - ".%" GF_PRI_SUSECONDS, cb->tv.tv_usec); gf_proc_dump_write("TIME", "%s", timestr); gf_proc_dump_write("message", "%s\n", (char *)cb->data); @@ -6374,6 +6411,7 @@ notify(xlator_t *this, int32_t event, void *data, ...) fuse_private_t *private = NULL; gf_boolean_t start_thread = _gf_false; glusterfs_graph_t *graph = NULL; + struct pollfd pfd = {0}; private = this->private; @@ -6441,6 +6479,32 @@ notify(xlator_t *this, int32_t event, void *data, ...) /* Authentication failure is an error and glusterfs should stop */ gf_log(this->name, GF_LOG_ERROR, "Server authenication failed. Shutting down."); + pthread_mutex_lock(&private->sync_mutex); + { + /*Wait for mount to finish*/ + if (!private->mount_finished) { + pfd.fd = private->status_pipe[0]; + pfd.events = POLLIN | POLLHUP | POLLERR; + if (poll(&pfd, 1, -1) < 0) { + gf_log(this->name, GF_LOG_ERROR, "poll error %s", + strerror(errno)); + goto auth_fail_unlock; + } + if (pfd.revents & POLLIN) { + if (fuse_get_mount_status(this) != 0) { + goto auth_fail_unlock; + } + private + ->mount_finished = _gf_true; + } else if (pfd.revents) { + gf_log(this->name, GF_LOG_ERROR, + "mount pipe closed without status"); + goto auth_fail_unlock; + } + } + } + auth_fail_unlock: + pthread_mutex_unlock(&private->sync_mutex); fini(this); break; } @@ -6644,6 +6708,8 @@ init(xlator_t *this_xl) INIT_LIST_HEAD(&priv->interrupt_list); pthread_mutex_init(&priv->interrupt_mutex, NULL); + pthread_mutex_init(&priv->fusedev_errno_cnt_mutex, NULL); + /* get options from option dictionary */ ret = dict_get_str(options, ZR_MOUNTPOINT_OPT, &value_string); if (ret == -1 || value_string == NULL) { diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h index f4c6ad8f2fb..4cb94c23cad 100644 --- a/xlators/mount/fuse/src/fuse-bridge.h +++ b/xlators/mount/fuse/src/fuse-bridge.h @@ -78,6 +78,20 @@ typedef struct fuse_in_header fuse_in_header_t; typedef void(fuse_handler_t)(xlator_t *this, fuse_in_header_t *finh, void *msg, struct iobuf *iobuf); +enum fusedev_errno { + FUSEDEV_ENOENT, + FUSEDEV_ENOTDIR, + FUSEDEV_ENODEV, + FUSEDEV_EPERM, + FUSEDEV_ENOMEM, + FUSEDEV_ENOTCONN, + FUSEDEV_ECONNREFUSED, + FUSEDEV_EOVERFLOW, + FUSEDEV_EBUSY, + FUSEDEV_ENOTEMPTY, + FUSEDEV_EMAXPLUS +}; + struct fuse_private { int fd; uint32_t proto_minor; @@ -193,6 +207,10 @@ struct fuse_private { uint32_t lru_limit; uint32_t invalidate_limit; uint32_t fuse_dev_eperm_ratelimit_ns; + + /* counters for fusdev errnos */ + uint8_t fusedev_errno_cnt[FUSEDEV_EMAXPLUS]; + pthread_mutex_t fusedev_errno_cnt_mutex; }; typedef struct fuse_private fuse_private_t; @@ -226,6 +244,7 @@ enum fuse_interrupt_state { INTERRUPT_NONE, INTERRUPT_SQUELCHED, INTERRUPT_HANDLED, + INTERRUPT_WAITING_HANDLER, }; typedef enum fuse_interrupt_state fuse_interrupt_state_t; struct fuse_interrupt_record; @@ -341,30 +360,6 @@ typedef struct fuse_graph_switch_args fuse_graph_switch_args_t; return; \ } \ state->umask = fci->umask; \ - \ - /* TODO: remove this after 3.4.0 release. keeping it for the \ - sake of backward compatibility with old (3.3.[01]) \ - releases till then. */ \ - ret = dict_set_int16(state->xdata, "umask", fci->umask); \ - if (ret < 0) { \ - gf_log("glusterfs-fuse", GF_LOG_WARNING, \ - "%s Failed adding umask" \ - " to request", \ - op); \ - send_fuse_err(this, finh, ENOMEM); \ - free_fuse_state(state); \ - return; \ - } \ - ret = dict_set_int16(state->xdata, "mode", fci->mode); \ - if (ret < 0) { \ - gf_log("glusterfs-fuse", GF_LOG_WARNING, \ - "%s Failed adding mode " \ - "to request", \ - op); \ - send_fuse_err(this, finh, ENOMEM); \ - free_fuse_state(state); \ - return; \ - } \ } \ } while (0) diff --git a/xlators/mount/fuse/src/fuse-helpers.c b/xlators/mount/fuse/src/fuse-helpers.c index fd11f2ba652..a2b0ad11fe4 100644 --- a/xlators/mount/fuse/src/fuse-helpers.c +++ b/xlators/mount/fuse/src/fuse-helpers.c @@ -139,8 +139,6 @@ get_fuse_state(xlator_t *this, fuse_in_header_t *finh) return state; } -#define FUSE_MAX_AUX_GROUPS \ - 32 /* We can get only up to 32 aux groups from /proc */ void frame_fill_groups(call_frame_t *frame) { @@ -150,8 +148,6 @@ frame_fill_groups(call_frame_t *frame) char filename[32]; char line[4096]; char *ptr = NULL; - FILE *fp = NULL; - int idx = 0; long int id = 0; char *saveptr = NULL; char *endptr = NULL; @@ -191,45 +187,72 @@ frame_fill_groups(call_frame_t *frame) call_stack_set_groups(frame->root, ngroups, &mygroups); } else { + FILE *fp = NULL; + ret = snprintf(filename, sizeof filename, "/proc/%d/status", frame->root->pid); - if (ret >= sizeof filename) + if (ret >= sizeof filename) { + gf_log(this->name, GF_LOG_ERROR, "procfs path exceeds buffer size"); goto out; + } fp = fopen(filename, "r"); - if (!fp) + if (!fp) { + gf_log(this->name, GF_LOG_ERROR, "failed to open %s: %s", filename, + strerror(errno)); goto out; + } - if (call_stack_alloc_groups(frame->root, ngroups) != 0) - goto out; + for (;;) { + gf_boolean_t found_groups = _gf_false; + int idx = 0; - while ((ptr = fgets(line, sizeof line, fp))) { - if (strncmp(ptr, "Groups:", 7) != 0) - continue; + if (call_stack_alloc_groups(frame->root, ngroups) != 0) { + gf_log(this->name, GF_LOG_ERROR, + "failed to allocate gid buffer"); + goto out; + } + while ((ptr = fgets(line, sizeof line, fp))) { + if (strncmp(ptr, "Groups:", 7) == 0) { + found_groups = _gf_true; + break; + } + } + if (!found_groups) { + gf_log(this->name, GF_LOG_ERROR, "cannot find gid list in %s", + filename); + break; + } ptr = line + 8; for (ptr = strtok_r(ptr, " \t\r\n", &saveptr); ptr; ptr = strtok_r(NULL, " \t\r\n", &saveptr)) { errno = 0; id = strtol(ptr, &endptr, 0); - if (errno == ERANGE) - break; - if (!endptr || *endptr) + if (errno == ERANGE || !endptr || *endptr) { + gf_log(this->name, GF_LOG_ERROR, "failed to parse %s", + filename); break; - frame->root->groups[idx++] = id; - if (idx == FUSE_MAX_AUX_GROUPS) + } + if (idx < call_stack_groups_capacity(frame->root)) + frame->root->groups[idx] = id; + idx++; + if (idx == GF_MAX_AUX_GROUPS) break; } - - frame->root->ngrps = idx; - break; + if (idx > call_stack_groups_capacity(frame->root)) { + ngroups = idx; + rewind(fp); + } else { + frame->root->ngrps = idx; + break; + } } + out: + if (fp) + fclose(fp); } - -out: - if (fp) - fclose(fp); #elif defined(GF_SOLARIS_HOST_OS) char filename[32]; char scratch[128]; @@ -245,7 +268,7 @@ out: fp = fopen(filename, "r"); if (fp != NULL) { if (fgets(scratch, sizeof scratch, fp) != NULL) { - ngrps = MIN(prcred->pr_ngroups, FUSE_MAX_AUX_GROUPS); + ngrps = MIN(prcred->pr_ngroups, GF_MAX_AUX_GROUPS); if (call_stack_alloc_groups(frame->root, ngrps) != 0) { fclose(fp); return; diff --git a/xlators/mount/fuse/utils/mount_glusterfs.in b/xlators/mount/fuse/utils/mount_glusterfs.in index d43fc97d084..3a5feb606d7 100755 --- a/xlators/mount/fuse/utils/mount_glusterfs.in +++ b/xlators/mount/fuse/utils/mount_glusterfs.in @@ -469,6 +469,7 @@ parse_options() main () { +#if !defined(__FreeBSD__) ## `mount` on OSX specifies options as first argument echo $1|grep -q -- "-o" if [ $? -eq 0 ]; then @@ -478,7 +479,7 @@ main () volfile_loc=$1 mount_point=$2 fi - +#endif /* __FreeBSD__ */ while getopts "Vo:h" opt; do case "${opt}" in o) @@ -499,6 +500,12 @@ main () esac done +#ifdef __FreeBSD__ + shift $((OPTIND - 1)) + volfile_loc="$1" + mount_point="$2" +#endif /* __FreeBSD__ */ + [ -r "$volfile_loc" ] || { # '%' included to support ipv6 link local addresses server_ip=$(echo "$volfile_loc" | sed -n 's/\([a-zA-Z0-9:%.\-]*\):.*/\1/p'); diff --git a/xlators/nfs/server/src/acl3.c b/xlators/nfs/server/src/acl3.c index 50cd82dd422..7e3bbf16086 100644 --- a/xlators/nfs/server/src/acl3.c +++ b/xlators/nfs/server/src/acl3.c @@ -571,6 +571,12 @@ acl3_setacl_resume(void *carg) acl3_check_fh_resolve_status(cs, stat, acl3err); nfs_request_user_init(&nfu, cs->req); xattr = dict_new(); + if (xattr == NULL) { + gf_msg(GF_NLM, GF_LOG_ERROR, ENOMEM, NFS_MSG_GFID_DICT_CREATE_FAIL, + "dict allocation failed"); + goto acl3err; + } + if (cs->aclcount) ret = dict_set_static_bin(xattr, POSIX_ACL_ACCESS_XATTR, cs->aclxattr, posix_acl_xattr_size(cs->aclcount)); @@ -724,7 +730,6 @@ acl3svc_init(xlator_t *nfsx) struct nfs_state *nfs = NULL; dict_t *options = NULL; int ret = -1; - char *portstr = NULL; static gf_boolean_t acl3_inited = _gf_false; /* Already inited */ @@ -742,12 +747,13 @@ acl3svc_init(xlator_t *nfsx) acl3prog.private = ns; options = dict_new(); - - ret = gf_asprintf(&portstr, "%d", GF_ACL3_PORT); - if (ret == -1) + if (options == NULL) { + gf_msg(GF_ACL, GF_LOG_ERROR, ENOMEM, NFS_MSG_GFID_DICT_CREATE_FAIL, + "dict allocation failed"); goto err; + } - ret = dict_set_dynstr(options, "transport.socket.listen-port", portstr); + ret = dict_set_str(options, "transport.socket.listen-port", GF_ACL3_PORT); if (ret == -1) goto err; ret = dict_set_str(options, "transport-type", "socket"); @@ -783,7 +789,6 @@ acl3svc_init(xlator_t *nfsx) if (ret == -1) { gf_msg(GF_ACL, GF_LOG_ERROR, errno, NFS_MSG_LISTENERS_CREATE_FAIL, "Unable to create listeners"); - dict_unref(options); goto err; } diff --git a/xlators/nfs/server/src/acl3.h b/xlators/nfs/server/src/acl3.h index dead04b4273..762fbb04a0f 100644 --- a/xlators/nfs/server/src/acl3.h +++ b/xlators/nfs/server/src/acl3.h @@ -18,7 +18,7 @@ #define ACL3_SETACL 2 #define ACL3_PROC_COUNT 3 -#define GF_ACL3_PORT 38469 +#define GF_ACL3_PORT "38469" #define GF_ACL GF_NFS "-ACL" /* Flags for the getacl/setacl mode */ diff --git a/xlators/nfs/server/src/auth-cache.c b/xlators/nfs/server/src/auth-cache.c index 64768646074..ffbf5b6cad6 100644 --- a/xlators/nfs/server/src/auth-cache.c +++ b/xlators/nfs/server/src/auth-cache.c @@ -189,7 +189,7 @@ out: static int _auth_cache_expired(struct auth_cache *cache, struct auth_cache_entry *entry) { - return ((time(NULL) - entry->timestamp) > cache->ttl_sec); + return ((gf_time() - entry->timestamp) > cache->ttl_sec); } /** @@ -474,7 +474,7 @@ cache_nfs_fh(struct auth_cache *cache, struct nfs3_fh *fh, goto out; } - entry->timestamp = time(NULL); + entry->timestamp = gf_time(); /* Update entry->item if it is pointing to a different export_item */ if (entry->item && entry->item != export_item) { GF_REF_PUT(entry->item); diff --git a/xlators/nfs/server/src/mount3.c b/xlators/nfs/server/src/mount3.c index 88d3002a752..a34d9104c17 100644 --- a/xlators/nfs/server/src/mount3.c +++ b/xlators/nfs/server/src/mount3.c @@ -4062,6 +4062,11 @@ mnt3svc_init(xlator_t *nfsx) mnt3prog.private = mstate; options = dict_new(); + if (options == NULL) { + gf_msg(GF_NFS, GF_LOG_ERROR, ENOMEM, NFS_MSG_GFID_DICT_CREATE_FAIL, + "dict allocation failed"); + goto err; + } ret = gf_asprintf(&portstr, "%d", GF_MOUNTV3_PORT); if (ret == -1) @@ -4097,7 +4102,6 @@ mnt3svc_init(xlator_t *nfsx) if (ret == -1) { gf_msg(GF_NFS, GF_LOG_ERROR, errno, NFS_MSG_LISTENERS_CREATE_FAIL, "Unable to create listeners"); - dict_unref(options); goto err; } @@ -4162,6 +4166,11 @@ mnt1svc_init(xlator_t *nfsx) mnt1prog.private = mstate; options = dict_new(); + if (options == NULL) { + gf_msg(GF_NFS, GF_LOG_ERROR, ENOMEM, NFS_MSG_GFID_DICT_CREATE_FAIL, + "dict allocation failed"); + goto err; + } ret = gf_asprintf(&portstr, "%d", GF_MOUNTV1_PORT); if (ret == -1) diff --git a/xlators/nfs/server/src/mount3udp_svc.c b/xlators/nfs/server/src/mount3udp_svc.c index 0688779eb65..1a2b0f85453 100644 --- a/xlators/nfs/server/src/mount3udp_svc.c +++ b/xlators/nfs/server/src/mount3udp_svc.c @@ -216,7 +216,7 @@ mount3udp_thread(void *argv) GF_ASSERT(nfsx); - glusterfs_this_set(nfsx); + THIS = nfsx; transp = svcudp_create(RPC_ANYSOCK); if (transp == NULL) { diff --git a/xlators/nfs/server/src/nfs.c b/xlators/nfs/server/src/nfs.c index cab57bbf3c9..39b73f88ac3 100644 --- a/xlators/nfs/server/src/nfs.c +++ b/xlators/nfs/server/src/nfs.c @@ -1157,7 +1157,7 @@ out: return ret; } -int +static int nfs_reconfigure_state(xlator_t *this, dict_t *options) { int ret = 0; @@ -1167,8 +1167,8 @@ nfs_reconfigure_state(xlator_t *this, dict_t *options) gf_boolean_t optbool; uint32_t optuint32; struct nfs_state *nfs = NULL; - char *blacklist_keys[] = {"nfs.port", "nfs.transport-type", - "nfs.mem-factor", NULL}; + static char *options_require_restart[] = {"nfs.port", "nfs.transport-type", + "nfs.mem-factor", NULL}; GF_VALIDATE_OR_GOTO(GF_NFS, this, out); GF_VALIDATE_OR_GOTO(GF_NFS, this->private, out); @@ -1176,14 +1176,14 @@ nfs_reconfigure_state(xlator_t *this, dict_t *options) nfs = (struct nfs_state *)this->private; - /* Black listed options can't be reconfigured, they need + /* Some listed options can't be reconfigured, they need * NFS to be restarted. There are two cases 1. SET 2. UNSET. * 1. SET */ - while (blacklist_keys[keyindx]) { - if (dict_get(options, blacklist_keys[keyindx])) { + while (options_require_restart[keyindx]) { + if (dict_get(options, options_require_restart[keyindx])) { gf_msg(GF_NFS, GF_LOG_ERROR, 0, NFS_MSG_RECONFIG_FAIL, "Reconfiguring %s needs NFS restart", - blacklist_keys[keyindx]); + options_require_restart[keyindx]); goto out; } keyindx++; diff --git a/xlators/nfs/server/src/nfs3-helpers.c b/xlators/nfs/server/src/nfs3-helpers.c index 8a58977b53c..897fb42b071 100644 --- a/xlators/nfs/server/src/nfs3-helpers.c +++ b/xlators/nfs/server/src/nfs3-helpers.c @@ -1072,7 +1072,7 @@ nfs3_sattr3_to_setattr_valid(sattr3 *sattr, struct iatt *buf, mode_t *omode) if (sattr->atime.set_it == SET_TO_SERVER_TIME) { valid |= GF_SET_ATTR_ATIME; if (buf) - buf->ia_atime = time(NULL); + buf->ia_atime = gf_time(); } if (sattr->mtime.set_it == SET_TO_CLIENT_TIME) { @@ -1084,7 +1084,7 @@ nfs3_sattr3_to_setattr_valid(sattr3 *sattr, struct iatt *buf, mode_t *omode) if (sattr->mtime.set_it == SET_TO_SERVER_TIME) { valid |= GF_SET_ATTR_MTIME; if (buf) - buf->ia_mtime = time(NULL); + buf->ia_mtime = gf_time(); } return valid; diff --git a/xlators/nfs/server/src/nfs3.c b/xlators/nfs/server/src/nfs3.c index 7cfd75f9ed1..f9042bc3b3f 100644 --- a/xlators/nfs/server/src/nfs3.c +++ b/xlators/nfs/server/src/nfs3.c @@ -5651,7 +5651,7 @@ nfs3_init_state(xlator_t *nfsx) goto free_localpool; } - nfs3->serverstart = (uint64_t)time(NULL); + nfs3->serverstart = (uint64_t)gf_time(); INIT_LIST_HEAD(&nfs3->fdlru); LOCK_INIT(&nfs3->fdlrulock); nfs3->fdcount = 0; diff --git a/xlators/nfs/server/src/nlm4.c b/xlators/nfs/server/src/nlm4.c index ef65d7f55f8..577e8543966 100644 --- a/xlators/nfs/server/src/nlm4.c +++ b/xlators/nfs/server/src/nlm4.c @@ -1011,7 +1011,8 @@ nlm4_establish_callback(nfs3_call_state_t *cs, call_frame_t *cbk_frame) int port = -1; struct nlm4_notify_args *ncf = NULL; - glusterfs_this_set(cs->nfsx); + GF_ASSERT(cs->nfsx); + THIS = cs->nfsx; rpc_transport_get_peeraddr(cs->trans, NULL, 0, &sock_union.storage, sizeof(sock_union.storage)); @@ -1054,6 +1055,12 @@ nlm4_establish_callback(nfs3_call_state_t *cs, call_frame_t *cbk_frame) } options = dict_new(); + if (options == NULL) { + gf_msg(GF_NLM, GF_LOG_ERROR, ENOMEM, NFS_MSG_GFID_DICT_CREATE_FAIL, + "dict allocation failed"); + goto err; + } + ret = dict_set_str(options, "transport-type", "socket"); if (ret == -1) { gf_msg(GF_NLM, GF_LOG_ERROR, errno, NFS_MSG_DICT_SET_FAILED, @@ -2592,6 +2599,11 @@ nlm4svc_init(xlator_t *nfsx) nlm4prog.private = ns; options = dict_new(); + if (options == NULL) { + gf_msg(GF_NLM, GF_LOG_ERROR, ENOMEM, NFS_MSG_GFID_DICT_CREATE_FAIL, + "dict allocation failed"); + goto err; + } ret = gf_asprintf(&portstr, "%d", GF_NLM4_PORT); if (ret == -1) @@ -2633,7 +2645,6 @@ nlm4svc_init(xlator_t *nfsx) if (ret == -1) { gf_msg(GF_NLM, GF_LOG_ERROR, errno, NFS_MSG_LISTENERS_CREATE_FAIL, "Unable to create listeners"); - dict_unref(options); goto err; } INIT_LIST_HEAD(&nlm_client_list); @@ -2704,7 +2715,7 @@ nlm4svc_init(xlator_t *nfsx) goto err; } - (void)gf_thread_create(&thr, NULL, nsm_thread, (void *)NULL, "nfsnsm"); + (void)gf_thread_create(&thr, NULL, nsm_thread, nfsx, "nfsnsm"); timeout.tv_sec = nlm_grace_period; timeout.tv_nsec = 0; diff --git a/xlators/nfs/server/src/nlmcbk_svc.c b/xlators/nfs/server/src/nlmcbk_svc.c index d18b86ce8db..eaa7b916190 100644 --- a/xlators/nfs/server/src/nlmcbk_svc.c +++ b/xlators/nfs/server/src/nlmcbk_svc.c @@ -84,9 +84,14 @@ nlmcbk_program_0(struct svc_req *rqstp, register SVCXPRT *transp) void * nsm_thread(void *argv) { + xlator_t *nfsx = argv; register SVCXPRT *transp; int ret = 0; + GF_ASSERT(nfsx); + + THIS = nfsx; + ret = pmap_unset(NLMCBK_PROGRAM, NLMCBK_V1); if (ret == 0) { gf_msg(GF_NLM, GF_LOG_ERROR, 0, NFS_MSG_PMAP_UNSET_FAIL, diff --git a/xlators/performance/io-cache/src/io-cache.c b/xlators/performance/io-cache/src/io-cache.c index c007e0a355d..9375d29c17f 100644 --- a/xlators/performance/io-cache/src/io-cache.c +++ b/xlators/performance/io-cache/src/io-cache.c @@ -133,23 +133,17 @@ ioc_update_pages(call_frame_t *frame, ioc_inode_t *ioc_inode, return 0; } -int32_t +static gf_boolean_t ioc_inode_need_revalidate(ioc_inode_t *ioc_inode) { - int8_t need_revalidate = 0; - struct timeval tv = { - 0, - }; ioc_table_t *table = NULL; + GF_ASSERT(ioc_inode); table = ioc_inode->table; + GF_ASSERT(table); - gettimeofday(&tv, NULL); - - if (time_elapsed(&tv, &ioc_inode->cache.tv) >= table->cache_timeout) - need_revalidate = 1; - - return need_revalidate; + return (gf_time() - ioc_inode->cache.last_revalidate >= + table->cache_timeout); } /* @@ -411,9 +405,6 @@ ioc_cache_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ioc_inode_t *ioc_inode = NULL; size_t destroy_size = 0; struct iatt *local_stbuf = NULL; - struct timeval tv = { - 0, - }; local = frame->local; ioc_inode = local->inode; @@ -451,10 +442,9 @@ ioc_cache_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, if (op_ret < 0) local_stbuf = NULL; - gettimeofday(&tv, NULL); ioc_inode_lock(ioc_inode); { - memcpy(&ioc_inode->cache.tv, &tv, sizeof(struct timeval)); + ioc_inode->cache.last_revalidate = gf_time(); } ioc_inode_unlock(ioc_inode); @@ -1405,9 +1395,6 @@ ioc_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, { ioc_inode_t *ioc_inode = NULL; uint64_t tmp_inode = 0; - struct timeval tv = { - 0, - }; inode_ctx_get(fd->inode, this, &tmp_inode); ioc_inode = (ioc_inode_t *)(long)tmp_inode; @@ -1418,10 +1405,9 @@ ioc_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, return 0; } - gettimeofday(&tv, NULL); ioc_inode_lock(ioc_inode); { - memcpy(&ioc_inode->cache.tv, &tv, sizeof(struct timeval)); + ioc_inode->cache.last_revalidate = gf_time(); } ioc_inode_unlock(ioc_inode); @@ -1945,7 +1931,7 @@ __ioc_cache_dump(ioc_inode_t *ioc_inode, char *prefix) char key[GF_DUMP_MAX_BUF_LEN] = { 0, }; - char timestr[256] = { + char timestr[GF_TIMESTR_SIZE] = { 0, }; @@ -1955,11 +1941,9 @@ __ioc_cache_dump(ioc_inode_t *ioc_inode, char *prefix) table = ioc_inode->table; - if (ioc_inode->cache.tv.tv_sec) { - gf_time_fmt(timestr, sizeof timestr, ioc_inode->cache.tv.tv_sec, + if (ioc_inode->cache.last_revalidate) { + gf_time_fmt(timestr, sizeof timestr, ioc_inode->cache.last_revalidate, gf_timefmt_FT); - snprintf(timestr + strlen(timestr), sizeof timestr - strlen(timestr), - ".%" GF_PRI_SUSECONDS, ioc_inode->cache.tv.tv_usec); gf_proc_dump_write("last-cache-validation-time", "%s", timestr); } diff --git a/xlators/performance/io-cache/src/io-cache.h b/xlators/performance/io-cache/src/io-cache.h index 4303c2fae13..14923c75edc 100644 --- a/xlators/performance/io-cache/src/io-cache.h +++ b/xlators/performance/io-cache/src/io-cache.h @@ -117,15 +117,13 @@ struct ioc_page { struct ioc_cache { rbthash_table_t *page_table; struct list_head page_lru; - time_t mtime; /* - * seconds component of file mtime - */ - time_t mtime_nsec; /* - * nanosecond component of file mtime - */ - struct timeval tv; /* - * time-stamp at last re-validate - */ + time_t mtime; /* + * seconds component of file mtime + */ + time_t mtime_nsec; /* + * nanosecond component of file mtime + */ + time_t last_revalidate; /* timestamp at last re-validate */ }; struct ioc_inode { @@ -270,17 +268,6 @@ ioc_frame_fill(ioc_page_t *page, call_frame_t *frame, off_t offset, size_t size, pthread_mutex_unlock(&page->page_lock); \ } while (0) -static inline uint64_t -time_elapsed(struct timeval *now, struct timeval *then) -{ - uint64_t sec = now->tv_sec - then->tv_sec; - - if (sec) - return sec; - - return 0; -} - ioc_inode_t * ioc_inode_search(ioc_table_t *table, inode_t *inode); diff --git a/xlators/performance/io-cache/src/page.c b/xlators/performance/io-cache/src/page.c index a8edbde23f2..84b1ae6cb20 100644 --- a/xlators/performance/io-cache/src/page.c +++ b/xlators/performance/io-cache/src/page.c @@ -413,9 +413,6 @@ ioc_fault_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, ioc_waitq_t *waitq = NULL; size_t iobref_page_size = 0; char zero_filled = 0; - struct timeval tv = { - 0, - }; GF_ASSERT(frame); @@ -431,7 +428,6 @@ ioc_fault_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, zero_filled = ((op_ret >= 0) && (stbuf->ia_mtime == 0)); - gettimeofday(&tv, NULL); ioc_inode_lock(ioc_inode); { if (op_ret == -1 || @@ -448,7 +444,7 @@ ioc_fault_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, ioc_inode->cache.mtime_nsec = stbuf->ia_mtime_nsec; } - memcpy(&ioc_inode->cache.tv, &tv, sizeof(struct timeval)); + ioc_inode->cache.last_revalidate = gf_time(); if (op_ret < 0) { /* error, readv returned -1 */ diff --git a/xlators/performance/io-threads/src/io-threads.c b/xlators/performance/io-threads/src/io-threads.c index 6fa4d88389c..3d24cc97f4b 100644 --- a/xlators/performance/io-threads/src/io-threads.c +++ b/xlators/performance/io-threads/src/io-threads.c @@ -1016,16 +1016,13 @@ static uint32_t THRESH_LIMIT = 1209600; /* SECONDS * (EVENTS-1) */ static void iot_apply_event(xlator_t *this, threshold_t *thresh) { - struct timespec now; - time_t delta; + time_t delta, now = gf_time(); /* Refresh for manual testing/debugging. It's cheap. */ THRESH_LIMIT = THRESH_SECONDS * (THRESH_EVENTS - 1); - timespec_now(&now); - if (thresh->value && thresh->update_time) { - delta = now.tv_sec - thresh->update_time; + delta = now - thresh->update_time; /* Be careful about underflow. */ if (thresh->value <= delta) { thresh->value = 0; @@ -1046,7 +1043,7 @@ iot_apply_event(xlator_t *this, threshold_t *thresh) kill(getpid(), SIGTRAP); } - thresh->update_time = now.tv_sec; + thresh->update_time = now; } static void * @@ -1311,7 +1308,7 @@ notify(xlator_t *this, int32_t event, void *data, ...) /* Wait for draining stub from queue before notify PARENT_DOWN */ stub_cnt = GF_ATOMIC_GET(conf->stub_cnt); if (stub_cnt) { - clock_gettime(CLOCK_REALTIME, &sleep_till); + timespec_now_realtime(&sleep_till); sleep_till.tv_sec += 1; pthread_mutex_lock(&conf->mutex); { diff --git a/xlators/performance/md-cache/src/md-cache.c b/xlators/performance/md-cache/src/md-cache.c index 4c76f3089d5..a405be51f02 100644 --- a/xlators/performance/md-cache/src/md-cache.c +++ b/xlators/performance/md-cache/src/md-cache.c @@ -8,7 +8,6 @@ cases as published by the Free Software Foundation. */ -#include <glusterfs/timespec.h> #include <glusterfs/glusterfs.h> #include <glusterfs/defaults.h> #include <glusterfs/logging.h> @@ -33,8 +32,7 @@ struct mdc_statfs_cache { pthread_mutex_t lock; - gf_boolean_t initialized; - struct timespec last_refreshed; + time_t last_refreshed; /* (time_t)-1 if not yet initialized. */ struct statvfs buf; }; @@ -61,7 +59,7 @@ struct mdc_statistics { }; struct mdc_conf { - int timeout; + uint32_t timeout; gf_boolean_t cache_posix_acl; gf_boolean_t cache_glusterfs_acl; gf_boolean_t cache_selinux; @@ -132,6 +130,7 @@ struct mdc_local { char *key; dict_t *xattr; uint64_t incident_time; + bool update_cache; }; int @@ -375,10 +374,9 @@ unlock: static gf_boolean_t __is_cache_valid(xlator_t *this, time_t mdc_time) { - time_t now = 0; gf_boolean_t ret = _gf_true; struct mdc_conf *conf = NULL; - int timeout = 0; + uint32_t timeout = 0; time_t last_child_down = 0; conf = this->private; @@ -392,15 +390,13 @@ __is_cache_valid(xlator_t *this, time_t mdc_time) last_child_down = conf->last_child_down; timeout = conf->timeout; - time(&now); - if ((mdc_time == 0) || ((last_child_down != 0) && (mdc_time < last_child_down))) { ret = _gf_false; goto out; } - if (now >= (mdc_time + timeout)) { + if (gf_time() >= (mdc_time + timeout)) { ret = _gf_false; } @@ -580,10 +576,9 @@ mdc_inode_iatt_set_validate(xlator_t *this, inode_t *inode, struct iatt *prebuf, mdc_from_iatt(mdc, iatt); mdc->valid = _gf_true; if (update_time) { - time(&mdc->ia_time); - + mdc->ia_time = gf_time(); if (mdc->xa_time && update_xa_time) - time(&mdc->xa_time); + mdc->xa_time = mdc->ia_time; } gf_msg_callingfn( @@ -784,7 +779,7 @@ mdc_inode_xatt_set(xlator_t *this, inode_t *inode, dict_t *dict) if (newdict) mdc->xattr = newdict; - time(&mdc->xa_time); + mdc->xa_time = gf_time(); gf_msg_trace("md-cache", 0, "xatt cache set for (%s) time:%lld", uuid_utoa(inode->gfid), (long long)mdc->xa_time); } @@ -985,7 +980,7 @@ out: return ret; } -void +static bool mdc_load_reqs(xlator_t *this, dict_t *dict) { struct mdc_conf *conf = this->private; @@ -994,6 +989,7 @@ mdc_load_reqs(xlator_t *this, dict_t *dict) char *tmp = NULL; char *tmp1 = NULL; int ret = 0; + bool loaded = false; tmp1 = conf->mdc_xattr_str; if (!tmp1) @@ -1011,13 +1007,17 @@ mdc_load_reqs(xlator_t *this, dict_t *dict) conf->mdc_xattr_str = NULL; gf_msg("md-cache", GF_LOG_ERROR, 0, MD_CACHE_MSG_NO_XATTR_CACHE, "Disabled cache for xattrs, dict_set failed"); + goto out; } pattern = strtok_r(NULL, ",", &tmp); } - GF_FREE(mdc_xattr_str); + loaded = true; + out: - return; + GF_FREE(mdc_xattr_str); + + return loaded; } struct checkpair { @@ -1057,8 +1057,7 @@ mdc_cache_statfs(xlator_t *this, struct statvfs *buf) pthread_mutex_lock(&conf->statfs_cache.lock); { memcpy(&conf->statfs_cache.buf, buf, sizeof(struct statvfs)); - clock_gettime(CLOCK_MONOTONIC, &conf->statfs_cache.last_refreshed); - conf->statfs_cache.initialized = _gf_true; + conf->statfs_cache.last_refreshed = gf_time(); } pthread_mutex_unlock(&conf->statfs_cache.lock); } @@ -1067,8 +1066,7 @@ int mdc_load_statfs_info_from_cache(xlator_t *this, struct statvfs **buf) { struct mdc_conf *conf = this->private; - struct timespec now; - double cache_age = 0.0; + uint32_t cache_age = 0; int ret = 0; if (!buf || !conf) { @@ -1077,23 +1075,23 @@ mdc_load_statfs_info_from_cache(xlator_t *this, struct statvfs **buf) } *buf = NULL; - timespec_now(&now); pthread_mutex_lock(&conf->statfs_cache.lock); { - /* Skip if the cache is not initialized */ - if (!conf->statfs_cache.initialized) { + /* Skip if the cache is not initialized. */ + if (conf->statfs_cache.last_refreshed == (time_t)-1) { ret = -1; goto unlock; } - cache_age = (now.tv_sec - conf->statfs_cache.last_refreshed.tv_sec); + cache_age = (gf_time() - conf->statfs_cache.last_refreshed); - gf_log(this->name, GF_LOG_DEBUG, "STATFS cache age = %lf", cache_age); + gf_log(this->name, GF_LOG_DEBUG, "STATFS cache age = %u secs", + cache_age); if (cache_age > conf->timeout) { - /* Expire the cache */ + /* Expire the cache. */ gf_log(this->name, GF_LOG_DEBUG, - "Cache age %lf exceeded timeout %d", cache_age, + "Cache age %u secs exceeded timeout %u secs", cache_age, conf->timeout); ret = -1; goto unlock; @@ -1107,6 +1105,31 @@ err: return ret; } +static dict_t * +mdc_prepare_request(xlator_t *this, mdc_local_t *local, dict_t *xdata) +{ + if (xdata != NULL) { + dict_ref(xdata); + } + + if (local == NULL) { + return xdata; + } + + if (xdata == NULL) { + xdata = dict_new(); + if (xdata == NULL) { + local->update_cache = false; + + return NULL; + } + } + + local->update_cache = mdc_load_reqs(this, xdata); + + return xdata; +} + int mdc_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct statvfs *buf, @@ -1189,6 +1212,9 @@ mdc_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, local = frame->local; + if (!local) + goto out; + if (op_ret != 0) { if (op_errno == ENOENT) GF_ATOMIC_INC(conf->mdc_counter.negative_lookup); @@ -1206,9 +1232,6 @@ mdc_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, goto out; } - if (!local) - goto out; - if (local->loc.parent) { mdc_inode_iatt_set(this, local->loc.parent, postparent, local->incident_time); @@ -1216,7 +1239,9 @@ mdc_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, if (local->loc.inode) { mdc_inode_iatt_set(this, local->loc.inode, stbuf, local->incident_time); - mdc_inode_xatt_set(this, local->loc.inode, dict); + if (local->update_cache) { + mdc_inode_xatt_set(this, local->loc.inode, dict); + } } out: MDC_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, stbuf, dict, @@ -1235,7 +1260,6 @@ mdc_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) 0, }; dict_t *xattr_rsp = NULL; - dict_t *xattr_alloc = NULL; mdc_local_t *local = NULL; struct mdc_conf *conf = this->private; @@ -1286,18 +1310,18 @@ mdc_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) return 0; uncached: - if (!xdata) - xdata = xattr_alloc = dict_new(); - if (xdata) - mdc_load_reqs(this, xdata); + xdata = mdc_prepare_request(this, local, xdata); STACK_WIND(frame, mdc_lookup_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, loc, xdata); if (xattr_rsp) dict_unref(xattr_rsp); - if (xattr_alloc) - dict_unref(xattr_alloc); + + if (xdata != NULL) { + dict_unref(xdata); + } + return 0; } @@ -1320,7 +1344,9 @@ mdc_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, } mdc_inode_iatt_set(this, local->loc.inode, buf, local->incident_time); - mdc_inode_xatt_set(this, local->loc.inode, xdata); + if (local->update_cache) { + mdc_inode_xatt_set(this, local->loc.inode, xdata); + } out: MDC_STACK_UNWIND(stat, frame, op_ret, op_errno, buf, xdata); @@ -1334,7 +1360,6 @@ mdc_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) int ret; struct iatt stbuf; mdc_local_t *local = NULL; - dict_t *xattr_alloc = NULL; struct mdc_conf *conf = this->private; local = mdc_local_get(frame, loc->inode); @@ -1358,17 +1383,16 @@ mdc_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) return 0; uncached: - if (!xdata) - xdata = xattr_alloc = dict_new(); - if (xdata) - mdc_load_reqs(this, xdata); + xdata = mdc_prepare_request(this, local, xdata); GF_ATOMIC_INC(conf->mdc_counter.stat_miss); STACK_WIND(frame, mdc_stat_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->stat, loc, xdata); - if (xattr_alloc) - dict_unref(xattr_alloc); + if (xdata != NULL) { + dict_unref(xdata); + } + return 0; } @@ -1391,7 +1415,9 @@ mdc_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, } mdc_inode_iatt_set(this, local->fd->inode, buf, local->incident_time); - mdc_inode_xatt_set(this, local->fd->inode, xdata); + if (local->update_cache) { + mdc_inode_xatt_set(this, local->fd->inode, xdata); + } out: MDC_STACK_UNWIND(fstat, frame, op_ret, op_errno, buf, xdata); @@ -1405,14 +1431,13 @@ mdc_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) int ret; struct iatt stbuf; mdc_local_t *local = NULL; - dict_t *xattr_alloc = NULL; struct mdc_conf *conf = this->private; local = mdc_local_get(frame, fd->inode); if (!local) goto uncached; - local->fd = fd_ref(fd); + local->fd = __fd_ref(fd); ret = mdc_inode_iatt_get(this, fd->inode, &stbuf); if (ret != 0) @@ -1424,17 +1449,16 @@ mdc_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) return 0; uncached: - if (!xdata) - xdata = xattr_alloc = dict_new(); - if (xdata) - mdc_load_reqs(this, xdata); + xdata = mdc_prepare_request(this, local, xdata); GF_ATOMIC_INC(conf->mdc_counter.stat_miss); STACK_WIND(frame, mdc_fstat_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fstat, fd, xdata); - if (xattr_alloc) - dict_unref(xattr_alloc); + if (xdata != NULL) { + dict_unref(xdata); + } + return 0; } @@ -1473,8 +1497,9 @@ mdc_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, mdc_local_t *local = NULL; local = mdc_local_get(frame, loc->inode); - - local->loc.inode = inode_ref(loc->inode); + if (local != NULL) { + local->loc.inode = inode_ref(loc->inode); + } STACK_WIND(frame, mdc_truncate_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); @@ -1517,8 +1542,9 @@ mdc_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, mdc_local_t *local = NULL; local = mdc_local_get(frame, fd->inode); - - local->fd = fd_ref(fd); + if (local != NULL) { + local->fd = __fd_ref(fd); + } STACK_WIND(frame, mdc_ftruncate_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); @@ -1566,9 +1592,10 @@ mdc_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, mdc_local_t *local = NULL; local = mdc_local_get(frame, loc->inode); - - loc_copy(&local->loc, loc); - local->xattr = dict_ref(xdata); + if (local != NULL) { + loc_copy(&local->loc, loc); + local->xattr = dict_ref(xdata); + } STACK_WIND(frame, mdc_mknod_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata); @@ -1616,9 +1643,10 @@ mdc_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, mdc_local_t *local = NULL; local = mdc_local_get(frame, loc->inode); - - loc_copy(&local->loc, loc); - local->xattr = dict_ref(xdata); + if (local != NULL) { + loc_copy(&local->loc, loc); + local->xattr = dict_ref(xdata); + } STACK_WIND(frame, mdc_mkdir_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata); @@ -1675,8 +1703,9 @@ mdc_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t xflag, mdc_local_t *local = NULL; local = mdc_local_get(frame, loc->inode); - - loc_copy(&local->loc, loc); + if (local != NULL) { + loc_copy(&local->loc, loc); + } STACK_WIND(frame, mdc_unlink_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); @@ -1729,8 +1758,9 @@ mdc_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flag, mdc_local_t *local = NULL; local = mdc_local_get(frame, loc->inode); - - loc_copy(&local->loc, loc); + if (local != NULL) { + loc_copy(&local->loc, loc); + } STACK_WIND(frame, mdc_rmdir_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->rmdir, loc, flag, xdata); @@ -1777,13 +1807,22 @@ mdc_symlink(call_frame_t *frame, xlator_t *this, const char *linkname, loc_t *loc, mode_t umask, dict_t *xdata) { mdc_local_t *local = NULL; + char *name; + name = gf_strdup(linkname); + if (name == NULL) { + goto wind; + } local = mdc_local_get(frame, loc->inode); + if (local == NULL) { + GF_FREE(name); + goto wind; + } loc_copy(&local->loc, loc); + local->linkname = name; - local->linkname = gf_strdup(linkname); - +wind: STACK_WIND(frame, mdc_symlink_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->symlink, linkname, loc, umask, xdata); return 0; @@ -1841,9 +1880,10 @@ mdc_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, mdc_local_t *local = NULL; local = mdc_local_get(frame, oldloc->inode); - - loc_copy(&local->loc, oldloc); - loc_copy(&local->loc2, newloc); + if (local != NULL) { + loc_copy(&local->loc, oldloc); + loc_copy(&local->loc2, newloc); + } STACK_WIND(frame, mdc_rename_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); @@ -1892,9 +1932,10 @@ mdc_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, mdc_local_t *local = NULL; local = mdc_local_get(frame, oldloc->inode); - - loc_copy(&local->loc, oldloc); - loc_copy(&local->loc2, newloc); + if (local != NULL) { + loc_copy(&local->loc, oldloc); + loc_copy(&local->loc2, newloc); + } STACK_WIND(frame, mdc_link_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata); @@ -1943,9 +1984,10 @@ mdc_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, mdc_local_t *local = NULL; local = mdc_local_get(frame, loc->inode); - - loc_copy(&local->loc, loc); - local->xattr = dict_ref(xdata); + if (local != NULL) { + loc_copy(&local->loc, loc); + local->xattr = dict_ref(xdata); + } STACK_WIND(frame, mdc_create_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd, @@ -1992,8 +2034,9 @@ mdc_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd, } local = mdc_local_get(frame, loc->inode); - - local->fd = fd_ref(fd); + if (local != NULL) { + local->fd = __fd_ref(fd); + } out: STACK_WIND(frame, mdc_open_cbk, FIRST_CHILD(this), @@ -2034,8 +2077,9 @@ mdc_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, mdc_local_t *local = NULL; local = mdc_local_get(frame, fd->inode); - - local->fd = fd_ref(fd); + if (local != NULL) { + local->fd = __fd_ref(fd); + } STACK_WIND(frame, mdc_readv_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata); @@ -2076,8 +2120,9 @@ mdc_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, mdc_local_t *local = NULL; local = mdc_local_get(frame, fd->inode); - - local->fd = fd_ref(fd); + if (local != NULL) { + local->fd = __fd_ref(fd); + } STACK_WIND(frame, mdc_writev_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev, fd, vector, count, offset, @@ -2093,15 +2138,14 @@ mdc_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, mdc_local_t *local = NULL; local = frame->local; + if (!local) + goto out; if (op_ret != 0) { mdc_inode_iatt_set(this, local->loc.inode, NULL, local->incident_time); goto out; } - if (!local) - goto out; - mdc_inode_iatt_set_validate(this, local->loc.inode, prebuf, postbuf, _gf_true, local->incident_time); mdc_inode_xatt_update(this, local->loc.inode, xdata); @@ -2122,6 +2166,9 @@ mdc_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf, struct mdc_conf *conf = this->private; local = mdc_local_get(frame, loc->inode); + if (local == NULL) { + goto wind; + } loc_copy(&local->loc, loc); @@ -2149,6 +2196,7 @@ mdc_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf, } } +wind: STACK_WIND(frame, mdc_setattr_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); @@ -2194,8 +2242,11 @@ mdc_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf, struct mdc_conf *conf = this->private; local = mdc_local_get(frame, fd->inode); + if (local == NULL) { + goto wind; + } - local->fd = fd_ref(fd); + local->fd = __fd_ref(fd); if ((valid & GF_SET_ATTR_MODE) && conf->cache_glusterfs_acl) { if (!xdata) @@ -2221,6 +2272,7 @@ mdc_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf, } } +wind: STACK_WIND(frame, mdc_fsetattr_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata); @@ -2262,8 +2314,9 @@ mdc_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync, mdc_local_t *local = NULL; local = mdc_local_get(frame, fd->inode); - - local->fd = fd_ref(fd); + if (local != NULL) { + local->fd = __fd_ref(fd); + } STACK_WIND(frame, mdc_fsync_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata); @@ -2318,9 +2371,10 @@ mdc_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr, mdc_local_t *local = NULL; local = mdc_local_get(frame, loc->inode); - - loc_copy(&local->loc, loc); - local->xattr = dict_ref(xattr); + if (local != NULL) { + loc_copy(&local->loc, loc); + local->xattr = dict_ref(xattr); + } STACK_WIND(frame, mdc_setxattr_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr, loc, xattr, flags, xdata); @@ -2376,9 +2430,10 @@ mdc_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xattr, mdc_local_t *local = NULL; local = mdc_local_get(frame, fd->inode); - - local->fd = fd_ref(fd); - local->xattr = dict_ref(xattr); + if (local != NULL) { + local->fd = __fd_ref(fd); + local->xattr = dict_ref(xattr); + } STACK_WIND(frame, mdc_fsetxattr_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsetxattr, fd, xattr, flags, xdata); @@ -2408,7 +2463,9 @@ mdc_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, goto out; } - mdc_inode_xatt_set(this, local->loc.inode, xdata); + if (local->update_cache) { + mdc_inode_xatt_set(this, local->loc.inode, xdata); + } out: MDC_STACK_UNWIND(getxattr, frame, op_ret, op_errno, xattr, xdata); @@ -2425,19 +2482,19 @@ mdc_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key, mdc_local_t *local = NULL; dict_t *xattr = NULL; struct mdc_conf *conf = this->private; - dict_t *xattr_alloc = NULL; - gf_boolean_t key_satisfied = _gf_true; + gf_boolean_t key_satisfied = _gf_false; local = mdc_local_get(frame, loc->inode); - if (!local) + if (!local) { goto uncached; + } loc_copy(&local->loc, loc); if (!is_mdc_key_satisfied(this, key)) { - key_satisfied = _gf_false; goto uncached; } + key_satisfied = _gf_true; ret = mdc_inode_xatt_get(this, loc->inode, &xattr); if (ret != 0) @@ -2458,18 +2515,17 @@ mdc_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key, uncached: if (key_satisfied) { - if (!xdata) - xdata = xattr_alloc = dict_new(); - if (xdata) - mdc_load_reqs(this, xdata); + xdata = mdc_prepare_request(this, local, xdata); } GF_ATOMIC_INC(conf->mdc_counter.xattr_miss); STACK_WIND(frame, mdc_getxattr_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->getxattr, loc, key, xdata); - if (xattr_alloc) - dict_unref(xattr_alloc); + if (key_satisfied && (xdata != NULL)) { + dict_unref(xdata); + } + return 0; } @@ -2496,7 +2552,9 @@ mdc_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, goto out; } - mdc_inode_xatt_set(this, local->fd->inode, xdata); + if (local->update_cache) { + mdc_inode_xatt_set(this, local->fd->inode, xdata); + } out: MDC_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, xattr, xdata); @@ -2513,14 +2571,13 @@ mdc_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key, dict_t *xattr = NULL; int op_errno = ENODATA; struct mdc_conf *conf = this->private; - dict_t *xattr_alloc = NULL; gf_boolean_t key_satisfied = _gf_true; local = mdc_local_get(frame, fd->inode); if (!local) goto uncached; - local->fd = fd_ref(fd); + local->fd = __fd_ref(fd); if (!is_mdc_key_satisfied(this, key)) { key_satisfied = _gf_false; @@ -2546,18 +2603,17 @@ mdc_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key, uncached: if (key_satisfied) { - if (!xdata) - xdata = xattr_alloc = dict_new(); - if (xdata) - mdc_load_reqs(this, xdata); + xdata = mdc_prepare_request(this, local, xdata); } GF_ATOMIC_INC(conf->mdc_counter.xattr_miss); STACK_WIND(frame, mdc_fgetxattr_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fgetxattr, fd, key, xdata); - if (xattr_alloc) - dict_unref(xattr_alloc); + if (key_satisfied && (xdata != NULL)) { + dict_unref(xdata); + } + return 0; } @@ -2613,12 +2669,21 @@ mdc_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, int ret = 0; dict_t *xattr = NULL; struct mdc_conf *conf = this->private; + char *name2; + + name2 = gf_strdup(name); + if (name2 == NULL) { + goto uncached; + } local = mdc_local_get(frame, loc->inode); + if (local == NULL) { + GF_FREE(name2); + goto uncached; + } loc_copy(&local->loc, loc); - - local->key = gf_strdup(name); + local->key = name2; if (!is_mdc_key_satisfied(this, name)) goto uncached; @@ -2704,12 +2769,21 @@ mdc_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, int ret = 0; dict_t *xattr = NULL; struct mdc_conf *conf = this->private; + char *name2; - local = mdc_local_get(frame, fd->inode); + name2 = gf_strdup(name); + if (name2 == NULL) { + goto uncached; + } - local->fd = fd_ref(fd); + local = mdc_local_get(frame, fd->inode); + if (local == NULL) { + GF_FREE(name2); + goto uncached; + } - local->key = gf_strdup(name); + local->fd = __fd_ref(fd); + local->key = name2; if (!is_mdc_key_satisfied(this, name)) goto uncached; @@ -2767,27 +2841,23 @@ int mdc_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, dict_t *xdata) { - dict_t *xattr_alloc = NULL; mdc_local_t *local = NULL; local = mdc_local_get(frame, loc->inode); - - loc_copy(&local->loc, loc); - - if (!xdata) - xdata = xattr_alloc = dict_new(); - - if (xdata) { - /* Tell readdir-ahead to include these keys in xdata when it - * internally issues readdirp() in it's opendir_cbk */ - mdc_load_reqs(this, xdata); + if (local != NULL) { + loc_copy(&local->loc, loc); } + /* Tell readdir-ahead to include these keys in xdata when it + * internally issues readdirp() in it's opendir_cbk */ + xdata = mdc_prepare_request(this, local, xdata); + STACK_WIND(frame, mdc_opendir_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->opendir, loc, fd, xdata); - if (xattr_alloc) - dict_unref(xattr_alloc); + if (xdata != NULL) { + dict_unref(xdata); + } return 0; } @@ -2815,7 +2885,9 @@ mdc_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, continue; mdc_inode_iatt_set(this, entry->inode, &entry->d_stat, local->incident_time); - mdc_inode_xatt_set(this, entry->inode, entry->dict); + if (local->update_cache) { + mdc_inode_xatt_set(this, entry->inode, entry->dict); + } } unwind: @@ -2827,24 +2899,23 @@ int mdc_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t offset, dict_t *xdata) { - dict_t *xattr_alloc = NULL; mdc_local_t *local = NULL; local = mdc_local_get(frame, fd->inode); if (!local) goto out; - local->fd = fd_ref(fd); + local->fd = __fd_ref(fd); - if (!xdata) - xdata = xattr_alloc = dict_new(); - if (xdata) - mdc_load_reqs(this, xdata); + xdata = mdc_prepare_request(this, local, xdata); STACK_WIND(frame, mdc_readdirp_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp, fd, size, offset, xdata); - if (xattr_alloc) - dict_unref(xattr_alloc); + + if (xdata != NULL) { + dict_unref(xdata); + } + return 0; out: MDC_STACK_UNWIND(readdirp, frame, -1, ENOMEM, NULL, NULL); @@ -2875,7 +2946,6 @@ int mdc_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t offset, dict_t *xdata) { - int need_unref = 0; mdc_local_t *local = NULL; struct mdc_conf *conf = this->private; @@ -2883,7 +2953,7 @@ mdc_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, if (!local) goto unwind; - local->fd = fd_ref(fd); + local->fd = __fd_ref(fd); if (!conf->force_readdirp) { STACK_WIND(frame, mdc_readdir_cbk, FIRST_CHILD(this), @@ -2891,19 +2961,14 @@ mdc_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, return 0; } - if (!xdata) { - xdata = dict_new(); - need_unref = 1; - } - - if (xdata) - mdc_load_reqs(this, xdata); + xdata = mdc_prepare_request(this, local, xdata); STACK_WIND(frame, mdc_readdirp_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp, fd, size, offset, xdata); - if (need_unref && xdata) + if (xdata != NULL) { dict_unref(xdata); + } return 0; unwind: @@ -2945,7 +3010,9 @@ mdc_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, mdc_local_t *local; local = mdc_local_get(frame, fd->inode); - local->fd = fd_ref(fd); + if (local != NULL) { + local->fd = __fd_ref(fd); + } STACK_WIND(frame, mdc_fallocate_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len, @@ -2987,7 +3054,9 @@ mdc_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, mdc_local_t *local; local = mdc_local_get(frame, fd->inode); - local->fd = fd_ref(fd); + if (local != NULL) { + local->fd = __fd_ref(fd); + } STACK_WIND(frame, mdc_discard_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata); @@ -3028,7 +3097,9 @@ mdc_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, mdc_local_t *local; local = mdc_local_get(frame, fd->inode); - local->fd = fd_ref(fd); + if (local != NULL) { + local->fd = __fd_ref(fd); + } STACK_WIND(frame, mdc_zerofill_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata); @@ -3110,7 +3181,7 @@ mdc_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, if (!local) goto unwind; - local->fd = fd_ref(fd); + local->fd = __fd_ref(fd); STACK_WIND(frame, mdc_fsyncdir_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsyncdir, fd, flags, xdata); @@ -3483,7 +3554,12 @@ mdc_register_xattr_inval(xlator_t *this) goto out; } - mdc_load_reqs(this, xattr); + if (!mdc_load_reqs(this, xattr)) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, MD_CACHE_MSG_NO_MEMORY, + "failed to populate cache entries"); + ret = -1; + goto out; + } frame = create_frame(this, this->ctx->pool); if (!frame) { @@ -3532,7 +3608,7 @@ int mdc_reconfigure(xlator_t *this, dict_t *options) { struct mdc_conf *conf = NULL; - int timeout = 0; + int timeout = 0, ret = 0; char *tmp_str = NULL; conf = this->private; @@ -3572,7 +3648,10 @@ mdc_reconfigure(xlator_t *this, dict_t *options) GF_OPTION_RECONF("md-cache-statfs", conf->cache_statfs, options, bool, out); GF_OPTION_RECONF("xattr-cache-list", tmp_str, options, str, out); - mdc_xattr_list_populate(conf, tmp_str); + + ret = mdc_xattr_list_populate(conf, tmp_str); + if (ret < 0) + goto out; /* If timeout is greater than 60s (default before the patch that added * cache invalidation support was added) then, cache invalidation @@ -3585,25 +3664,22 @@ mdc_reconfigure(xlator_t *this, dict_t *options) } conf->timeout = timeout; - (void)mdc_register_xattr_inval(this); + ret = mdc_register_xattr_inval(this); out: - return 0; + return ret; } int32_t mdc_mem_acct_init(xlator_t *this) { - int ret = -1; - - ret = xlator_mem_acct_init(this, gf_mdc_mt_end + 1); - return ret; + return xlator_mem_acct_init(this, gf_mdc_mt_end + 1); } int mdc_init(xlator_t *this) { struct mdc_conf *conf = NULL; - int timeout = 0; + uint32_t timeout = 0; char *tmp_str = NULL; conf = GF_CALLOC(sizeof(*conf), 1, gf_mdc_mt_mdc_conf_t); @@ -3615,7 +3691,7 @@ mdc_init(xlator_t *this) LOCK_INIT(&conf->lock); - GF_OPTION_INIT("md-cache-timeout", timeout, int32, out); + GF_OPTION_INIT("md-cache-timeout", timeout, uint32, out); GF_OPTION_INIT("cache-selinux", conf->cache_selinux, bool, out); @@ -3649,7 +3725,9 @@ mdc_init(xlator_t *this) GF_OPTION_INIT("xattr-cache-list", tmp_str, str, out); mdc_xattr_list_populate(conf, tmp_str); - time(&conf->last_child_down); + conf->last_child_down = gf_time(); + conf->statfs_cache.last_refreshed = (time_t)-1; + /* initialize gf_atomic_t counters */ GF_ATOMIC_INIT(conf->mdc_counter.stat_hit, 0); GF_ATOMIC_INIT(conf->mdc_counter.stat_miss, 0); @@ -3680,7 +3758,7 @@ out: } void -mdc_update_child_down_time(xlator_t *this, time_t *now) +mdc_update_child_down_time(xlator_t *this, time_t now) { struct mdc_conf *conf = NULL; @@ -3688,7 +3766,7 @@ mdc_update_child_down_time(xlator_t *this, time_t *now) LOCK(&conf->lock); { - conf->last_child_down = *now; + conf->last_child_down = now; } UNLOCK(&conf->lock); } @@ -3698,14 +3776,12 @@ mdc_notify(xlator_t *this, int event, void *data, ...) { int ret = 0; struct mdc_conf *conf = NULL; - time_t now = 0; conf = this->private; switch (event) { case GF_EVENT_CHILD_DOWN: case GF_EVENT_SOME_DESCENDENT_DOWN: - time(&now); - mdc_update_child_down_time(this, &now); + mdc_update_child_down_time(this, gf_time()); break; case GF_EVENT_UPCALL: if (conf->mdc_invalidation) diff --git a/xlators/performance/nl-cache/src/nl-cache-helper.c b/xlators/performance/nl-cache/src/nl-cache-helper.c index 03dedf8ea08..29b99b5b8ea 100644 --- a/xlators/performance/nl-cache/src/nl-cache-helper.c +++ b/xlators/performance/nl-cache/src/nl-cache-helper.c @@ -113,7 +113,7 @@ out: } void -nlc_update_child_down_time(xlator_t *this, time_t *now) +nlc_update_child_down_time(xlator_t *this, time_t now) { nlc_conf_t *conf = NULL; @@ -121,7 +121,7 @@ nlc_update_child_down_time(xlator_t *this, time_t *now) LOCK(&conf->lock); { - conf->last_child_down = *now; + conf->last_child_down = now; } UNLOCK(&conf->lock); @@ -262,7 +262,7 @@ nlc_init_invalid_ctx(xlator_t *this, inode_t *inode, nlc_ctx_t *nlc_ctx) if (nlc_ctx->timer) { gf_tw_mod_timer_pending(conf->timer_wheel, nlc_ctx->timer, conf->cache_timeout); - time(&nlc_ctx->cache_time); + nlc_ctx->cache_time = gf_time(); goto unlock; } @@ -496,7 +496,7 @@ __nlc_inode_ctx_timer_start(xlator_t *this, inode_t *inode, nlc_ctx_t *nlc_ctx) nlc_ctx->timer_data = tmp; gf_tw_add_timer(conf->timer_wheel, timer); - time(&nlc_ctx->cache_time); + nlc_ctx->cache_time = gf_time(); gf_msg_trace(this->name, 0, "Registering timer:%p, inode:%p, " "gfid:%s", diff --git a/xlators/performance/nl-cache/src/nl-cache.c b/xlators/performance/nl-cache/src/nl-cache.c index cd0e1d195fd..33a7c471663 100644 --- a/xlators/performance/nl-cache/src/nl-cache.c +++ b/xlators/performance/nl-cache/src/nl-cache.c @@ -520,15 +520,13 @@ int nlc_notify(xlator_t *this, int event, void *data, ...) { int ret = 0; - time_t now = 0; switch (event) { case GF_EVENT_CHILD_DOWN: case GF_EVENT_SOME_DESCENDENT_DOWN: case GF_EVENT_CHILD_UP: case GF_EVENT_SOME_DESCENDENT_UP: - time(&now); - nlc_update_child_down_time(this, &now); + nlc_update_child_down_time(this, gf_time()); /* TODO: nlc_clear_all_cache (this); else lru prune will lazily clear it*/ break; @@ -731,7 +729,7 @@ nlc_init(xlator_t *this) GF_ATOMIC_INIT(conf->nlc_counter.nlc_invals, 0); INIT_LIST_HEAD(&conf->lru); - time(&conf->last_child_down); + conf->last_child_down = gf_time(); conf->timer_wheel = glusterfs_ctx_tw_get(this->ctx); if (!conf->timer_wheel) { diff --git a/xlators/performance/nl-cache/src/nl-cache.h b/xlators/performance/nl-cache/src/nl-cache.h index 8b09972bb09..85fcc176342 100644 --- a/xlators/performance/nl-cache/src/nl-cache.h +++ b/xlators/performance/nl-cache/src/nl-cache.h @@ -155,7 +155,7 @@ nlc_local_init(call_frame_t *frame, xlator_t *this, glusterfs_fop_t fop, loc_t *loc, loc_t *loc2); void -nlc_update_child_down_time(xlator_t *this, time_t *now); +nlc_update_child_down_time(xlator_t *this, time_t now); void nlc_inode_clear_cache(xlator_t *this, inode_t *inode, int reason); diff --git a/xlators/performance/open-behind/src/open-behind-messages.h b/xlators/performance/open-behind/src/open-behind-messages.h index f25082433f8..0e789177684 100644 --- a/xlators/performance/open-behind/src/open-behind-messages.h +++ b/xlators/performance/open-behind/src/open-behind-messages.h @@ -23,6 +23,10 @@ */ GLFS_MSGID(OPEN_BEHIND, OPEN_BEHIND_MSG_XLATOR_CHILD_MISCONFIGURED, - OPEN_BEHIND_MSG_VOL_MISCONFIGURED, OPEN_BEHIND_MSG_NO_MEMORY); + OPEN_BEHIND_MSG_VOL_MISCONFIGURED, OPEN_BEHIND_MSG_NO_MEMORY, + OPEN_BEHIND_MSG_FAILED, OPEN_BEHIND_MSG_BAD_STATE); + +#define OPEN_BEHIND_MSG_FAILED_STR "Failed to submit fop" +#define OPEN_BEHIND_MSG_BAD_STATE_STR "Unexpected state" #endif /* _OPEN_BEHIND_MESSAGES_H_ */ diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c index cbe89ec82e8..600c3b62ffe 100644 --- a/xlators/performance/open-behind/src/open-behind.c +++ b/xlators/performance/open-behind/src/open-behind.c @@ -16,6 +16,18 @@ #include "open-behind-messages.h" #include <glusterfs/glusterfs-acl.h> +/* Note: The initial design of open-behind was made to cover the simple case + * of open, read, close for small files. This pattern combined with + * quick-read can do the whole operation without a single request to the + * bricks (except the initial lookup). + * + * The way to do this has been improved, but the logic remains the same. + * Basically, this means that any operation sent to the fd or the inode + * that it's not a read, causes the open request to be sent to the + * bricks, and all future operations will be executed synchronously, + * including opens (it's reset once all fd's are closed). + */ + typedef struct ob_conf { gf_boolean_t use_anonymous_fd; /* use anonymous FDs wherever safe e.g - fstat() readv() @@ -32,1096 +44,811 @@ typedef struct ob_conf { */ } ob_conf_t; -typedef struct ob_inode { - inode_t *inode; - struct list_head resume_fops; - struct list_head ob_fds; - int count; - int op_ret; - int op_errno; - gf_boolean_t open_in_progress; - int unlinked; -} ob_inode_t; +/* A negative state represents an errno value negated. In this case the + * current operation cannot be processed. */ +typedef enum _ob_state { + /* There are no opens on the inode or the first open is already + * completed. The current operation can be sent directly. */ + OB_STATE_READY = 0, -typedef struct ob_fd { - call_frame_t *open_frame; - loc_t loc; - dict_t *xdata; - int flags; - int op_errno; - ob_inode_t *ob_inode; - fd_t *fd; - gf_boolean_t opened; - gf_boolean_t ob_inode_fops_waiting; - struct list_head list; - struct list_head ob_fds_on_inode; -} ob_fd_t; + /* There's an open pending and it has been triggered. The current + * operation should be "stubbified" and processed with + * ob_stub_dispatch(). */ + OB_STATE_OPEN_TRIGGERED, -ob_inode_t * -ob_inode_alloc(inode_t *inode) -{ - ob_inode_t *ob_inode = NULL; + /* There's an open pending but it has not been triggered. The current + * operation can be processed directly but using an anonymous fd. */ + OB_STATE_OPEN_PENDING, - ob_inode = GF_CALLOC(1, sizeof(*ob_inode), gf_ob_mt_inode_t); - if (ob_inode == NULL) - goto out; + /* The current operation is the first open on the inode. */ + OB_STATE_FIRST_OPEN +} ob_state_t; - ob_inode->inode = inode; - INIT_LIST_HEAD(&ob_inode->resume_fops); - INIT_LIST_HEAD(&ob_inode->ob_fds); -out: - return ob_inode; -} - -void -ob_inode_free(ob_inode_t *ob_inode) -{ - if (ob_inode == NULL) - goto out; +typedef struct ob_inode { + /* List of stubs pending on the first open. Once the first open is + * complete, all these stubs will be resubmitted, and dependencies + * will be checked again. */ + struct list_head resume_fops; - list_del_init(&ob_inode->resume_fops); - list_del_init(&ob_inode->ob_fds); + /* The inode this object references. */ + inode_t *inode; - GF_FREE(ob_inode); -out: - return; -} + /* The fd from the first open sent to this inode. It will be set + * from the moment the open is processed until the open if fully + * executed or closed before actually opened. It's NULL in all + * other cases. */ + fd_t *first_fd; + + /* The stub from the first open operation. When open fop starts + * being processed, it's assigned the OB_OPEN_PREPARING value + * until the actual stub is created. This is necessary to avoid + * creating the stub inside a locked region. Once the stub is + * successfully created, it's assigned here. This value is set + * to NULL once the stub is resumed. */ + call_stub_t *first_open; + + /* The total number of currently open fd's on this inode. */ + int32_t open_count; + + /* This flag is set as soon as we know that the open will be + * sent to the bricks, even before the stub is ready. */ + bool triggered; +} ob_inode_t; -ob_inode_t * -ob_inode_get(xlator_t *this, inode_t *inode) +/* Dummy pointer used temporarily while the actual open stub is being created */ +#define OB_OPEN_PREPARING ((call_stub_t *)-1) + +#define OB_POST_COMMON(_fop, _xl, _frame, _fd, _args...) \ + case OB_STATE_FIRST_OPEN: \ + gf_smsg((_xl)->name, GF_LOG_ERROR, EINVAL, OPEN_BEHIND_MSG_BAD_STATE, \ + "fop=%s", #_fop, "state=%d", __ob_state, NULL); \ + default_##_fop##_failure_cbk(_frame, EINVAL); \ + break; \ + case OB_STATE_READY: \ + default_##_fop(_frame, _xl, ##_args); \ + break; \ + case OB_STATE_OPEN_TRIGGERED: { \ + call_stub_t *__ob_stub = fop_##_fop##_stub(_frame, ob_##_fop, \ + ##_args); \ + if (__ob_stub != NULL) { \ + ob_stub_dispatch(_xl, __ob_inode, _fd, __ob_stub); \ + break; \ + } \ + __ob_state = -ENOMEM; \ + } \ + default: \ + gf_smsg((_xl)->name, GF_LOG_ERROR, -__ob_state, \ + OPEN_BEHIND_MSG_FAILED, "fop=%s", #_fop, NULL); \ + default_##_fop##_failure_cbk(_frame, -__ob_state) + +#define OB_POST_FD(_fop, _xl, _frame, _fd, _trigger, _args...) \ + do { \ + ob_inode_t *__ob_inode; \ + fd_t *__first_fd; \ + ob_state_t __ob_state = ob_open_and_resume_fd( \ + _xl, _fd, 0, true, _trigger, &__ob_inode, &__first_fd); \ + switch (__ob_state) { \ + case OB_STATE_OPEN_PENDING: \ + if (!(_trigger)) { \ + fd_t *__ob_fd = fd_anonymous_with_flags((_fd)->inode, \ + (_fd)->flags); \ + if (__ob_fd != NULL) { \ + default_##_fop(_frame, _xl, ##_args); \ + fd_unref(__ob_fd); \ + break; \ + } \ + __ob_state = -ENOMEM; \ + } \ + OB_POST_COMMON(_fop, _xl, _frame, __first_fd, ##_args); \ + } \ + } while (0) + +#define OB_POST_FLUSH(_xl, _frame, _fd, _args...) \ + do { \ + ob_inode_t *__ob_inode; \ + fd_t *__first_fd; \ + ob_state_t __ob_state = ob_open_and_resume_fd( \ + _xl, _fd, 0, true, false, &__ob_inode, &__first_fd); \ + switch (__ob_state) { \ + case OB_STATE_OPEN_PENDING: \ + default_flush_cbk(_frame, NULL, _xl, 0, 0, NULL); \ + break; \ + OB_POST_COMMON(flush, _xl, _frame, __first_fd, ##_args); \ + } \ + } while (0) + +#define OB_POST_INODE(_fop, _xl, _frame, _inode, _trigger, _args...) \ + do { \ + ob_inode_t *__ob_inode; \ + fd_t *__first_fd; \ + ob_state_t __ob_state = ob_open_and_resume_inode( \ + _xl, _inode, NULL, 0, true, _trigger, &__ob_inode, &__first_fd); \ + switch (__ob_state) { \ + case OB_STATE_OPEN_PENDING: \ + OB_POST_COMMON(_fop, _xl, _frame, __first_fd, ##_args); \ + } \ + } while (0) + +static ob_inode_t * +ob_inode_get_locked(xlator_t *this, inode_t *inode) { ob_inode_t *ob_inode = NULL; uint64_t value = 0; - int ret = 0; - if (!inode) - goto out; + if ((__inode_ctx_get(inode, this, &value) == 0) && (value != 0)) { + return (ob_inode_t *)(uintptr_t)value; + } - LOCK(&inode->lock); - { - __inode_ctx_get(inode, this, &value); - if (value == 0) { - ob_inode = ob_inode_alloc(inode); - if (ob_inode == NULL) - goto unlock; - - value = (uint64_t)(uintptr_t)ob_inode; - ret = __inode_ctx_set(inode, this, &value); - if (ret < 0) { - ob_inode_free(ob_inode); - ob_inode = NULL; - } - } else { - ob_inode = (ob_inode_t *)(uintptr_t)value; + ob_inode = GF_CALLOC(1, sizeof(*ob_inode), gf_ob_mt_inode_t); + if (ob_inode != NULL) { + ob_inode->inode = inode; + INIT_LIST_HEAD(&ob_inode->resume_fops); + + value = (uint64_t)(uintptr_t)ob_inode; + if (__inode_ctx_set(inode, this, &value) < 0) { + GF_FREE(ob_inode); + ob_inode = NULL; } } -unlock: - UNLOCK(&inode->lock); -out: return ob_inode; } -ob_fd_t * -__ob_fd_ctx_get(xlator_t *this, fd_t *fd) +static ob_state_t +ob_open_and_resume_inode(xlator_t *xl, inode_t *inode, fd_t *fd, + int32_t open_count, bool synchronous, bool trigger, + ob_inode_t **pob_inode, fd_t **pfd) { - uint64_t value = 0; - int ret = -1; - ob_fd_t *ob_fd = NULL; + ob_conf_t *conf; + ob_inode_t *ob_inode; + call_stub_t *open_stub; - ret = __fd_ctx_get(fd, this, &value); - if (ret) - return NULL; + if (inode == NULL) { + return OB_STATE_READY; + } - ob_fd = (void *)((long)value); + conf = xl->private; - return ob_fd; -} + *pfd = NULL; -ob_fd_t * -ob_fd_ctx_get(xlator_t *this, fd_t *fd) -{ - ob_fd_t *ob_fd = NULL; - - LOCK(&fd->lock); + LOCK(&inode->lock); { - ob_fd = __ob_fd_ctx_get(this, fd); - } - UNLOCK(&fd->lock); - - return ob_fd; -} + ob_inode = ob_inode_get_locked(xl, inode); + if (ob_inode == NULL) { + UNLOCK(&inode->lock); -int -__ob_fd_ctx_set(xlator_t *this, fd_t *fd, ob_fd_t *ob_fd) -{ - uint64_t value = 0; - int ret = -1; + return -ENOMEM; + } + *pob_inode = ob_inode; + + ob_inode->open_count += open_count; + + /* If first_fd is not NULL, it means that there's a previous open not + * yet completed. */ + if (ob_inode->first_fd != NULL) { + *pfd = ob_inode->first_fd; + /* If the current request doesn't trigger the open and it hasn't + * been triggered yet, we can continue without issuing the open + * only if the current request belongs to the same fd as the + * first one. */ + if (!trigger && !ob_inode->triggered && + (ob_inode->first_fd == fd)) { + UNLOCK(&inode->lock); + + return OB_STATE_OPEN_PENDING; + } - value = (long)((void *)ob_fd); + /* We need to issue the open. It could have already been triggered + * before. In this case open_stub will be NULL. Or the initial open + * may not be completely ready yet. In this case open_stub will be + * OB_OPEN_PREPARING. */ + open_stub = ob_inode->first_open; + ob_inode->first_open = NULL; + ob_inode->triggered = true; - ret = __fd_ctx_set(fd, this, value); + UNLOCK(&inode->lock); - return ret; -} + if ((open_stub != NULL) && (open_stub != OB_OPEN_PREPARING)) { + call_resume(open_stub); + } -int -ob_fd_ctx_set(xlator_t *this, fd_t *fd, ob_fd_t *ob_fd) -{ - int ret = -1; + return OB_STATE_OPEN_TRIGGERED; + } - LOCK(&fd->lock); - { - ret = __ob_fd_ctx_set(this, fd, ob_fd); - } - UNLOCK(&fd->lock); + /* There's no pending open. Only opens can be non synchronous, so all + * regular fops will be processed directly. For non synchronous opens, + * we'll still process them normally (i.e. synchornous) if there are + * more file descriptors open. */ + if (synchronous || (ob_inode->open_count > open_count)) { + UNLOCK(&inode->lock); - return ret; -} + return OB_STATE_READY; + } -ob_fd_t * -ob_fd_new(void) -{ - ob_fd_t *ob_fd = NULL; + *pfd = fd; - ob_fd = GF_CALLOC(1, sizeof(*ob_fd), gf_ob_mt_fd_t); + /* This is the first open. We keep a reference on the fd and set + * first_open stub to OB_OPEN_PREPARING until the actual stub can + * be assigned (we don't create the stub here to avoid doing memory + * allocations inside the mutex). */ + ob_inode->first_fd = __fd_ref(fd); + ob_inode->first_open = OB_OPEN_PREPARING; - INIT_LIST_HEAD(&ob_fd->list); - INIT_LIST_HEAD(&ob_fd->ob_fds_on_inode); + /* If lazy_open is not set, we'll need to immediately send the open, + * so we set triggered right now. */ + ob_inode->triggered = !conf->lazy_open; + } + UNLOCK(&inode->lock); - return ob_fd; + return OB_STATE_FIRST_OPEN; } -void -ob_fd_free(ob_fd_t *ob_fd) +static ob_state_t +ob_open_and_resume_fd(xlator_t *xl, fd_t *fd, int32_t open_count, + bool synchronous, bool trigger, ob_inode_t **pob_inode, + fd_t **pfd) { - LOCK(&ob_fd->fd->inode->lock); - { - list_del_init(&ob_fd->ob_fds_on_inode); - } - UNLOCK(&ob_fd->fd->inode->lock); - - loc_wipe(&ob_fd->loc); - - if (ob_fd->xdata) - dict_unref(ob_fd->xdata); + uint64_t err; - if (ob_fd->open_frame) { - /* If we sill have a frame it means that background open has never - * been triggered. We need to release the pending reference. */ - fd_unref(ob_fd->fd); - - STACK_DESTROY(ob_fd->open_frame->root); + if ((fd_ctx_get(fd, xl, &err) == 0) && (err != 0)) { + return (ob_state_t)-err; } - GF_FREE(ob_fd); + return ob_open_and_resume_inode(xl, fd->inode, fd, open_count, synchronous, + trigger, pob_inode, pfd); } -int -ob_wake_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, - int op_errno, fd_t *fd_ret, dict_t *xdata) +static ob_state_t +ob_open_behind(xlator_t *xl, fd_t *fd, int32_t flags, ob_inode_t **pob_inode, + fd_t **pfd) { - fd_t *fd = NULL; - int count = 0; - int ob_inode_op_ret = 0; - int ob_inode_op_errno = 0; - ob_fd_t *ob_fd = NULL; - call_stub_t *stub = NULL, *tmp = NULL; - ob_inode_t *ob_inode = NULL; - gf_boolean_t ob_inode_fops_waiting = _gf_false; - struct list_head fops_waiting_on_fd, fops_waiting_on_inode; + bool synchronous; - fd = frame->local; - frame->local = NULL; - - INIT_LIST_HEAD(&fops_waiting_on_fd); - INIT_LIST_HEAD(&fops_waiting_on_inode); + /* TODO: If O_CREAT, O_APPEND, O_WRONLY or O_DIRECT are specified, shouldn't + * we also execute this open synchronously ? */ + synchronous = (flags & O_TRUNC) != 0; - ob_inode = ob_inode_get(this, fd->inode); + return ob_open_and_resume_fd(xl, fd, 1, synchronous, true, pob_inode, pfd); +} - LOCK(&fd->lock); +static int32_t +ob_stub_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd, + call_stub_t *stub) +{ + LOCK(&ob_inode->inode->lock); { - ob_fd = __ob_fd_ctx_get(this, fd); - ob_fd->opened = _gf_true; - - ob_inode_fops_waiting = ob_fd->ob_inode_fops_waiting; - - list_splice_init(&ob_fd->list, &fops_waiting_on_fd); - - if (op_ret < 0) { - /* mark fd BAD for ever */ - ob_fd->op_errno = op_errno; - ob_fd = NULL; /*shouldn't be freed*/ - } else { - __fd_ctx_del(fd, this, NULL); - } - } - UNLOCK(&fd->lock); - - if (ob_inode_fops_waiting) { - LOCK(&fd->inode->lock); - { - count = --ob_inode->count; - if (op_ret < 0) { - /* TODO: when to reset the error? */ - ob_inode->op_ret = -1; - ob_inode->op_errno = op_errno; - } - - if (count == 0) { - ob_inode->open_in_progress = _gf_false; - ob_inode_op_ret = ob_inode->op_ret; - ob_inode_op_errno = ob_inode->op_errno; - list_splice_init(&ob_inode->resume_fops, - &fops_waiting_on_inode); - } + /* We only queue a stub if the open has not been completed or + * cancelled. */ + if (ob_inode->first_fd == fd) { + list_add_tail(&stub->list, &ob_inode->resume_fops); + stub = NULL; } - UNLOCK(&fd->inode->lock); - } - - if (ob_fd) - ob_fd_free(ob_fd); - - list_for_each_entry_safe(stub, tmp, &fops_waiting_on_fd, list) - { - list_del_init(&stub->list); - - if (op_ret < 0) - call_unwind_error(stub, -1, op_errno); - else - call_resume(stub); } + UNLOCK(&ob_inode->inode->lock); - list_for_each_entry_safe(stub, tmp, &fops_waiting_on_inode, list) - { - list_del_init(&stub->list); - - if (ob_inode_op_ret < 0) - call_unwind_error(stub, -1, ob_inode_op_errno); - else - call_resume(stub); + if (stub != NULL) { + call_resume(stub); } - /* The background open is completed. We can release the 'fd' reference. */ - fd_unref(fd); - - STACK_DESTROY(frame->root); - return 0; } -int -ob_fd_wake(xlator_t *this, fd_t *fd, ob_fd_t *ob_fd) +static void +ob_open_destroy(call_stub_t *stub, fd_t *fd) { - call_frame_t *frame = NULL; + stub->frame->local = NULL; + STACK_DESTROY(stub->frame->root); + call_stub_destroy(stub); + fd_unref(fd); +} - if (ob_fd == NULL) { - LOCK(&fd->lock); - { - ob_fd = __ob_fd_ctx_get(this, fd); - if (!ob_fd) - goto unlock; +static int32_t +ob_open_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd, + call_stub_t *stub) +{ + bool closed; - frame = ob_fd->open_frame; - ob_fd->open_frame = NULL; - } - unlock: - UNLOCK(&fd->lock); - } else { - LOCK(&fd->lock); - { - frame = ob_fd->open_frame; - ob_fd->open_frame = NULL; + LOCK(&ob_inode->inode->lock); + { + closed = ob_inode->first_fd != fd; + if (!closed) { + if (ob_inode->triggered) { + ob_inode->first_open = NULL; + } else { + ob_inode->first_open = stub; + stub = NULL; + } } - UNLOCK(&fd->lock); } + UNLOCK(&ob_inode->inode->lock); - if (frame) { - /* We don't need to take a reference here. We already have a reference - * while the open is pending. */ - frame->local = fd; - - STACK_WIND(frame, ob_wake_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, &ob_fd->loc, ob_fd->flags, fd, - ob_fd->xdata); + if (stub != NULL) { + if (closed) { + ob_open_destroy(stub, fd); + } else { + call_resume(stub); + } } return 0; } -void -ob_inode_wake(xlator_t *this, struct list_head *ob_fds) +static void +ob_resume_pending(struct list_head *list) { - ob_fd_t *ob_fd = NULL, *tmp = NULL; + call_stub_t *stub; - if (!list_empty(ob_fds)) { - list_for_each_entry_safe(ob_fd, tmp, ob_fds, ob_fds_on_inode) - { - ob_fd_wake(this, ob_fd->fd, ob_fd); - ob_fd_free(ob_fd); - } - } -} + while (!list_empty(list)) { + stub = list_first_entry(list, call_stub_t, list); + list_del_init(&stub->list); -/* called holding inode->lock and fd->lock */ -void -ob_fd_copy(ob_fd_t *src, ob_fd_t *dst) -{ - if (!src || !dst) - goto out; - - dst->fd = src->fd; - dst->loc.inode = inode_ref(src->loc.inode); - gf_uuid_copy(dst->loc.gfid, src->loc.gfid); - dst->flags = src->flags; - dst->xdata = dict_ref(src->xdata); - dst->ob_inode = src->ob_inode; -out: - return; + call_resume(stub); + } } -int -open_all_pending_fds_and_resume(xlator_t *this, inode_t *inode, - call_stub_t *stub) +static void +ob_open_completed(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd, int32_t op_ret, + int32_t op_errno) { - ob_inode_t *ob_inode = NULL; - ob_fd_t *ob_fd = NULL, *tmp = NULL; - gf_boolean_t was_open_in_progress = _gf_false; - gf_boolean_t wait_for_open = _gf_false; - struct list_head ob_fds; + struct list_head list; - ob_inode = ob_inode_get(this, inode); - if (ob_inode == NULL) - goto out; + INIT_LIST_HEAD(&list); - INIT_LIST_HEAD(&ob_fds); + if (op_ret < 0) { + fd_ctx_set(fd, xl, op_errno <= 0 ? EIO : op_errno); + } - LOCK(&inode->lock); + LOCK(&ob_inode->inode->lock); { - was_open_in_progress = ob_inode->open_in_progress; - ob_inode->unlinked = 1; - - if (was_open_in_progress) { - list_add_tail(&stub->list, &ob_inode->resume_fops); - goto inode_unlock; - } - - list_for_each_entry(ob_fd, &ob_inode->ob_fds, ob_fds_on_inode) - { - LOCK(&ob_fd->fd->lock); - { - if (ob_fd->opened) - goto fd_unlock; - - ob_inode->count++; - ob_fd->ob_inode_fops_waiting = _gf_true; - - if (ob_fd->open_frame == NULL) { - /* open in progress no need of wake */ - } else { - tmp = ob_fd_new(); - tmp->open_frame = ob_fd->open_frame; - ob_fd->open_frame = NULL; - - ob_fd_copy(ob_fd, tmp); - list_add_tail(&tmp->ob_fds_on_inode, &ob_fds); - } - } - fd_unlock: - UNLOCK(&ob_fd->fd->lock); - } - - if (ob_inode->count) { - wait_for_open = ob_inode->open_in_progress = _gf_true; - list_add_tail(&stub->list, &ob_inode->resume_fops); + /* Only update the fields if the file has not been closed before + * getting here. */ + if (ob_inode->first_fd == fd) { + list_splice_init(&ob_inode->resume_fops, &list); + ob_inode->first_fd = NULL; + ob_inode->first_open = NULL; + ob_inode->triggered = false; } } -inode_unlock: - UNLOCK(&inode->lock); + UNLOCK(&ob_inode->inode->lock); -out: - if (!was_open_in_progress) { - if (!wait_for_open) { - call_resume(stub); - } else { - ob_inode_wake(this, &ob_fds); - } - } + ob_resume_pending(&list); - return 0; + fd_unref(fd); } -int -open_and_resume(xlator_t *this, fd_t *fd, call_stub_t *stub) +static int32_t +ob_open_cbk(call_frame_t *frame, void *cookie, xlator_t *xl, int32_t op_ret, + int32_t op_errno, fd_t *fd, dict_t *xdata) { - ob_fd_t *ob_fd = NULL; - int op_errno = 0; - - if (!fd) - goto nofd; - - LOCK(&fd->lock); - { - ob_fd = __ob_fd_ctx_get(this, fd); - if (!ob_fd) - goto unlock; + ob_inode_t *ob_inode; - if (ob_fd->op_errno) { - op_errno = ob_fd->op_errno; - goto unlock; - } + ob_inode = frame->local; + frame->local = NULL; - list_add_tail(&stub->list, &ob_fd->list); - } -unlock: - UNLOCK(&fd->lock); + ob_open_completed(xl, ob_inode, cookie, op_ret, op_errno); -nofd: - if (op_errno) - call_unwind_error(stub, -1, op_errno); - else if (ob_fd) - ob_fd_wake(this, fd, NULL); - else - call_resume(stub); + STACK_DESTROY(frame->root); return 0; } -int -ob_open_behind(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, +static int32_t +ob_open_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd, dict_t *xdata) { - ob_fd_t *ob_fd = NULL; - int ret = -1; - ob_conf_t *conf = NULL; - ob_inode_t *ob_inode = NULL; - gf_boolean_t open_in_progress = _gf_false; - int unlinked = 0; + STACK_WIND_COOKIE(frame, ob_open_cbk, fd, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); - conf = this->private; + return 0; +} - if (flags & O_TRUNC) { - STACK_WIND(frame, default_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); - return 0; +static int32_t +ob_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd, + dict_t *xdata) +{ + ob_inode_t *ob_inode; + call_frame_t *open_frame; + call_stub_t *stub; + fd_t *first_fd; + ob_state_t state; + + state = ob_open_behind(this, fd, flags, &ob_inode, &first_fd); + if (state == OB_STATE_READY) { + /* There's no pending open, but there are other file descriptors opened + * or the current flags require a synchronous open. */ + return default_open(frame, this, loc, flags, fd, xdata); } - ob_inode = ob_inode_get(this, fd->inode); - - ob_fd = ob_fd_new(); - if (!ob_fd) - goto enomem; - - ob_fd->ob_inode = ob_inode; + if (state == OB_STATE_OPEN_TRIGGERED) { + /* The first open is in progress (either because it was already issued + * or because this request triggered it). We try to create a new stub + * to retry the operation once the initial open completes. */ + stub = fop_open_stub(frame, ob_open, loc, flags, fd, xdata); + if (stub != NULL) { + return ob_stub_dispatch(this, ob_inode, first_fd, stub); + } - ob_fd->fd = fd; + state = -ENOMEM; + } - ob_fd->open_frame = copy_frame(frame); - if (!ob_fd->open_frame) - goto enomem; - ret = loc_copy(&ob_fd->loc, loc); - if (ret) - goto enomem; + if (state == OB_STATE_FIRST_OPEN) { + /* We try to create a stub for the new open. A new frame needs to be + * used because the current one may be destroyed soon after sending + * the open's reply. */ + open_frame = copy_frame(frame); + if (open_frame != NULL) { + stub = fop_open_stub(open_frame, ob_open_resume, loc, flags, fd, + xdata); + if (stub != NULL) { + open_frame->local = ob_inode; - ob_fd->flags = flags; - if (xdata) - ob_fd->xdata = dict_ref(xdata); + /* TODO: Previous version passed xdata back to the caller, but + * probably this doesn't make sense since it won't contain + * any requested data. I think it would be better to pass + * NULL for xdata. */ + default_open_cbk(frame, NULL, this, 0, 0, fd, xdata); - LOCK(&fd->inode->lock); - { - open_in_progress = ob_inode->open_in_progress; - unlinked = ob_inode->unlinked; - if (!open_in_progress && !unlinked) { - ret = ob_fd_ctx_set(this, fd, ob_fd); - if (ret) { - UNLOCK(&fd->inode->lock); - goto enomem; + return ob_open_dispatch(this, ob_inode, first_fd, stub); } - list_add(&ob_fd->ob_fds_on_inode, &ob_inode->ob_fds); + STACK_DESTROY(open_frame->root); } - } - UNLOCK(&fd->inode->lock); - /* We take a reference while the background open is pending or being - * processed. If we finally wind the request in the foreground, then - * ob_fd_free() will take care of this additional reference. */ - fd_ref(fd); + /* In case of error, simulate a regular completion but with an error + * code. */ + ob_open_completed(this, ob_inode, first_fd, -1, ENOMEM); - if (!open_in_progress && !unlinked) { - STACK_UNWIND_STRICT(open, frame, 0, 0, fd, xdata); - - if (!conf->lazy_open) - ob_fd_wake(this, fd, NULL); - } else { - ob_fd_free(ob_fd); - STACK_WIND(frame, default_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); + state = -ENOMEM; } - return 0; -enomem: - if (ob_fd) { - if (ob_fd->open_frame) - STACK_DESTROY(ob_fd->open_frame->root); - - loc_wipe(&ob_fd->loc); - if (ob_fd->xdata) - dict_unref(ob_fd->xdata); + /* In case of failure we need to decrement the number of open files because + * ob_fdclose() won't be called. */ - GF_FREE(ob_fd); + LOCK(&fd->inode->lock); + { + ob_inode->open_count--; } + UNLOCK(&fd->inode->lock); - return -1; + gf_smsg(this->name, GF_LOG_ERROR, -state, OPEN_BEHIND_MSG_FAILED, "fop=%s", + "open", "path=%s", loc->path, NULL); + + return default_open_failure_cbk(frame, -state); } -int -ob_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd, - dict_t *xdata) +static int32_t +ob_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) { - fd_t *old_fd = NULL; - int ret = -1; - int op_errno = ENOMEM; - call_stub_t *stub = NULL; - - old_fd = fd_lookup(fd->inode, 0); - if (old_fd) { - /* open-behind only when this is the first FD */ - stub = fop_open_stub(frame, default_open_resume, loc, flags, fd, xdata); - if (!stub) { - fd_unref(old_fd); - goto err; - } - - open_and_resume(this, old_fd, stub); - - fd_unref(old_fd); - - return 0; - } - - ret = ob_open_behind(frame, this, loc, flags, fd, xdata); - if (ret) { - goto err; + ob_inode_t *ob_inode; + call_stub_t *stub; + fd_t *first_fd; + ob_state_t state; + + /* Create requests are never delayed. We always send them synchronously. */ + state = ob_open_and_resume_fd(this, fd, 1, true, true, &ob_inode, + &first_fd); + if (state == OB_STATE_READY) { + /* There's no pending open, but there are other file descriptors opened + * so we simply forward the request synchronously. */ + return default_create(frame, this, loc, flags, mode, umask, fd, xdata); } - return 0; -err: - gf_msg(this->name, GF_LOG_ERROR, op_errno, OPEN_BEHIND_MSG_NO_MEMORY, "%s", - loc->path); - - STACK_UNWIND_STRICT(open, frame, -1, op_errno, 0, 0); - - return 0; -} + if (state == OB_STATE_OPEN_TRIGGERED) { + /* The first open is in progress (either because it was already issued + * or because this request triggered it). We try to create a new stub + * to retry the operation once the initial open completes. */ + stub = fop_create_stub(frame, ob_create, loc, flags, mode, umask, fd, + xdata); + if (stub != NULL) { + return ob_stub_dispatch(this, ob_inode, first_fd, stub); + } -fd_t * -ob_get_wind_fd(xlator_t *this, fd_t *fd, uint32_t *flag) -{ - fd_t *wind_fd = NULL; - ob_fd_t *ob_fd = NULL; - ob_conf_t *conf = NULL; + state = -ENOMEM; + } - conf = this->private; + /* Since we forced a synchronous request, OB_STATE_FIRST_OPEN will never + * be returned by ob_open_and_resume_fd(). If we are here it can only be + * because there has been a problem. */ - ob_fd = ob_fd_ctx_get(this, fd); + /* In case of failure we need to decrement the number of open files because + * ob_fdclose() won't be called. */ - if (ob_fd && ob_fd->open_frame && conf->use_anonymous_fd) { - wind_fd = fd_anonymous(fd->inode); - if ((ob_fd->flags & O_DIRECT) && (flag)) - *flag = *flag | O_DIRECT; - } else { - wind_fd = fd_ref(fd); + LOCK(&fd->inode->lock); + { + ob_inode->open_count--; } + UNLOCK(&fd->inode->lock); + + gf_smsg(this->name, GF_LOG_ERROR, -state, OPEN_BEHIND_MSG_FAILED, "fop=%s", + "create", "path=%s", loc->path, NULL); - return wind_fd; + return default_create_failure_cbk(frame, -state); } -int +static int32_t ob_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata) { - call_stub_t *stub = NULL; - fd_t *wind_fd = NULL; - ob_conf_t *conf = NULL; - - conf = this->private; + ob_conf_t *conf = this->private; + bool trigger = conf->read_after_open || !conf->use_anonymous_fd; - if (!conf->read_after_open) - wind_fd = ob_get_wind_fd(this, fd, &flags); - else - wind_fd = fd_ref(fd); - - stub = fop_readv_stub(frame, default_readv_resume, wind_fd, size, offset, - flags, xdata); - fd_unref(wind_fd); - - if (!stub) - goto err; - - open_and_resume(this, wind_fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(readv, frame, -1, ENOMEM, 0, 0, 0, 0, 0); + OB_POST_FD(readv, this, frame, fd, trigger, fd, size, offset, flags, xdata); return 0; } -int +static int32_t ob_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *iov, int count, off_t offset, uint32_t flags, struct iobref *iobref, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_writev_stub(frame, default_writev_resume, fd, iov, count, offset, - flags, iobref, xdata); - if (!stub) - goto err; - - open_and_resume(this, fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(writev, frame, -1, ENOMEM, 0, 0, 0); + OB_POST_FD(writev, this, frame, fd, true, fd, iov, count, offset, flags, + iobref, xdata); return 0; } -int +static int32_t ob_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { - call_stub_t *stub = NULL; - fd_t *wind_fd = NULL; - - wind_fd = ob_get_wind_fd(this, fd, NULL); - - stub = fop_fstat_stub(frame, default_fstat_resume, wind_fd, xdata); - - fd_unref(wind_fd); - - if (!stub) - goto err; - - open_and_resume(this, wind_fd, stub); + ob_conf_t *conf = this->private; + bool trigger = !conf->use_anonymous_fd; - return 0; -err: - STACK_UNWIND_STRICT(fstat, frame, -1, ENOMEM, 0, 0); + OB_POST_FD(fstat, this, frame, fd, trigger, fd, xdata); return 0; } -int +static int32_t ob_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, gf_seek_what_t what, dict_t *xdata) { - call_stub_t *stub = NULL; - fd_t *wind_fd = NULL; - - wind_fd = ob_get_wind_fd(this, fd, NULL); + ob_conf_t *conf = this->private; + bool trigger = !conf->use_anonymous_fd; - stub = fop_seek_stub(frame, default_seek_resume, wind_fd, offset, what, - xdata); - - fd_unref(wind_fd); - - if (!stub) - goto err; - - open_and_resume(this, wind_fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(fstat, frame, -1, ENOMEM, 0, 0); + OB_POST_FD(seek, this, frame, fd, trigger, fd, offset, what, xdata); return 0; } -int +static int32_t ob_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { - call_stub_t *stub = NULL; - ob_fd_t *ob_fd = NULL; - gf_boolean_t unwind = _gf_false; - - LOCK(&fd->lock); - { - ob_fd = __ob_fd_ctx_get(this, fd); - if (ob_fd && ob_fd->open_frame) - /* if open() was never wound to backend, - no need to wind flush() either. - */ - unwind = _gf_true; - } - UNLOCK(&fd->lock); - - if (unwind) - goto unwind; - - stub = fop_flush_stub(frame, default_flush_resume, fd, xdata); - if (!stub) - goto err; - - open_and_resume(this, fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(flush, frame, -1, ENOMEM, 0); - - return 0; - -unwind: - STACK_UNWIND_STRICT(flush, frame, 0, 0, 0); + OB_POST_FLUSH(this, frame, fd, fd, xdata); return 0; } -int +static int32_t ob_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int flag, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_fsync_stub(frame, default_fsync_resume, fd, flag, xdata); - if (!stub) - goto err; - - open_and_resume(this, fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(fsync, frame, -1, ENOMEM, 0, 0, 0); + OB_POST_FD(fsync, this, frame, fd, true, fd, flag, xdata); return 0; } -int +static int32_t ob_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int cmd, struct gf_flock *flock, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_lk_stub(frame, default_lk_resume, fd, cmd, flock, xdata); - if (!stub) - goto err; - - open_and_resume(this, fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(lk, frame, -1, ENOMEM, 0, 0); + OB_POST_FD(lk, this, frame, fd, true, fd, cmd, flock, xdata); return 0; } -int +static int32_t ob_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_ftruncate_stub(frame, default_ftruncate_resume, fd, offset, - xdata); - if (!stub) - goto err; - - open_and_resume(this, fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(ftruncate, frame, -1, ENOMEM, 0, 0, 0); + OB_POST_FD(ftruncate, this, frame, fd, true, fd, offset, xdata); return 0; } -int +static int32_t ob_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xattr, int flags, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_fsetxattr_stub(frame, default_fsetxattr_resume, fd, xattr, flags, - xdata); - if (!stub) - goto err; - - open_and_resume(this, fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(fsetxattr, frame, -1, ENOMEM, 0); + OB_POST_FD(fsetxattr, this, frame, fd, true, fd, xattr, flags, xdata); return 0; } -int +static int32_t ob_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_fgetxattr_stub(frame, default_fgetxattr_resume, fd, name, xdata); - if (!stub) - goto err; - - open_and_resume(this, fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(fgetxattr, frame, -1, ENOMEM, 0, 0); + OB_POST_FD(fgetxattr, this, frame, fd, true, fd, name, xdata); return 0; } -int +static int32_t ob_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_fremovexattr_stub(frame, default_fremovexattr_resume, fd, name, - xdata); - if (!stub) - goto err; - - open_and_resume(this, fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(fremovexattr, frame, -1, ENOMEM, 0); + OB_POST_FD(fremovexattr, this, frame, fd, true, fd, name, xdata); return 0; } -int +static int32_t ob_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, int cmd, struct gf_flock *flock, dict_t *xdata) { - call_stub_t *stub = fop_finodelk_stub(frame, default_finodelk_resume, - volume, fd, cmd, flock, xdata); - if (stub) - open_and_resume(this, fd, stub); - else - STACK_UNWIND_STRICT(finodelk, frame, -1, ENOMEM, 0); + OB_POST_FD(finodelk, this, frame, fd, true, volume, fd, cmd, flock, xdata); return 0; } -int +static int32_t ob_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, const char *basename, entrylk_cmd cmd, entrylk_type type, dict_t *xdata) { - call_stub_t *stub = fop_fentrylk_stub( - frame, default_fentrylk_resume, volume, fd, basename, cmd, type, xdata); - if (stub) - open_and_resume(this, fd, stub); - else - STACK_UNWIND_STRICT(fentrylk, frame, -1, ENOMEM, 0); + OB_POST_FD(fentrylk, this, frame, fd, true, volume, fd, basename, cmd, type, + xdata); return 0; } -int +static int32_t ob_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) { - call_stub_t *stub = fop_fxattrop_stub(frame, default_fxattrop_resume, fd, - optype, xattr, xdata); - if (stub) - open_and_resume(this, fd, stub); - else - STACK_UNWIND_STRICT(fxattrop, frame, -1, ENOMEM, 0, 0); + OB_POST_FD(fxattrop, this, frame, fd, true, fd, optype, xattr, xdata); return 0; } -int +static int32_t ob_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *iatt, int valid, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_fsetattr_stub(frame, default_fsetattr_resume, fd, iatt, valid, - xdata); - if (!stub) - goto err; - - open_and_resume(this, fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(fsetattr, frame, -1, ENOMEM, 0, 0, 0); + OB_POST_FD(fsetattr, this, frame, fd, true, fd, iatt, valid, xdata); return 0; } -int +static int32_t ob_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, off_t offset, size_t len, dict_t *xdata) { - call_stub_t *stub; + OB_POST_FD(fallocate, this, frame, fd, true, fd, mode, offset, len, xdata); - stub = fop_fallocate_stub(frame, default_fallocate_resume, fd, mode, offset, - len, xdata); - if (!stub) - goto err; - - open_and_resume(this, fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(fallocate, frame, -1, ENOMEM, NULL, NULL, NULL); return 0; } -int +static int32_t ob_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, size_t len, dict_t *xdata) { - call_stub_t *stub; - - stub = fop_discard_stub(frame, default_discard_resume, fd, offset, len, - xdata); - if (!stub) - goto err; - - open_and_resume(this, fd, stub); + OB_POST_FD(discard, this, frame, fd, true, fd, offset, len, xdata); return 0; -err: - STACK_UNWIND_STRICT(discard, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; } -int +static int32_t ob_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, off_t len, dict_t *xdata) { - call_stub_t *stub; - - stub = fop_zerofill_stub(frame, default_zerofill_resume, fd, offset, len, - xdata); - if (!stub) - goto err; + OB_POST_FD(zerofill, this, frame, fd, true, fd, offset, len, xdata); - open_and_resume(this, fd, stub); - - return 0; -err: - STACK_UNWIND_STRICT(zerofill, frame, -1, ENOMEM, NULL, NULL, NULL); return 0; } -int +static int32_t ob_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_unlink_stub(frame, default_unlink_resume, loc, xflags, xdata); - if (!stub) - goto err; - - open_all_pending_fds_and_resume(this, loc->inode, stub); - - return 0; -err: - STACK_UNWIND_STRICT(unlink, frame, -1, ENOMEM, 0, 0, 0); + OB_POST_INODE(unlink, this, frame, loc->inode, true, loc, xflags, xdata); return 0; } -int +static int32_t ob_rename(call_frame_t *frame, xlator_t *this, loc_t *src, loc_t *dst, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_rename_stub(frame, default_rename_resume, src, dst, xdata); - if (!stub) - goto err; - - open_all_pending_fds_and_resume(this, dst->inode, stub); - - return 0; -err: - STACK_UNWIND_STRICT(rename, frame, -1, ENOMEM, 0, 0, 0, 0, 0, 0); + OB_POST_INODE(rename, this, frame, dst->inode, true, src, dst, xdata); return 0; } -int32_t +static int32_t ob_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf, int32_t valid, dict_t *xdata) { - call_stub_t *stub = NULL; - - stub = fop_setattr_stub(frame, default_setattr_resume, loc, stbuf, valid, - xdata); - if (!stub) - goto err; + OB_POST_INODE(setattr, this, frame, loc->inode, true, loc, stbuf, valid, + xdata); - open_all_pending_fds_and_resume(this, loc->inode, stub); - - return 0; -err: - STACK_UNWIND_STRICT(setattr, frame, -1, ENOMEM, NULL, NULL, NULL); return 0; } -int32_t +static int32_t ob_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata) { - call_stub_t *stub = NULL; - gf_boolean_t access_xattr = _gf_false; - if (dict_get(dict, POSIX_ACL_DEFAULT_XATTR) || dict_get(dict, POSIX_ACL_ACCESS_XATTR) || - dict_get(dict, GF_SELINUX_XATTR_KEY)) - access_xattr = _gf_true; - - if (!access_xattr) + dict_get(dict, GF_SELINUX_XATTR_KEY)) { return default_setxattr(frame, this, loc, dict, flags, xdata); + } - stub = fop_setxattr_stub(frame, default_setxattr_resume, loc, dict, flags, - xdata); - if (!stub) - goto err; - - open_all_pending_fds_and_resume(this, loc->inode, stub); + OB_POST_INODE(setxattr, this, frame, loc->inode, true, loc, dict, flags, + xdata); return 0; -err: - STACK_UNWIND_STRICT(setxattr, frame, -1, ENOMEM, NULL); - return 0; } -int -ob_release(xlator_t *this, fd_t *fd) +static void +ob_fdclose(xlator_t *this, fd_t *fd) { - ob_fd_t *ob_fd = NULL; + struct list_head list; + ob_inode_t *ob_inode; + call_stub_t *stub; + + INIT_LIST_HEAD(&list); + stub = NULL; - ob_fd = ob_fd_ctx_get(this, fd); + LOCK(&fd->inode->lock); + { + ob_inode = ob_inode_get_locked(this, fd->inode); + if (ob_inode != NULL) { + ob_inode->open_count--; + + /* If this fd is the same as ob_inode->first_fd, it means that + * the initial open has not fully completed. We'll try to cancel + * it. */ + if (ob_inode->first_fd == fd) { + if (ob_inode->first_open == OB_OPEN_PREPARING) { + /* In this case ob_open_dispatch() has not been called yet. + * We clear first_fd and first_open to allow that function + * to know that the open is not really needed. This also + * allows other requests to work as expected if they + * arrive before the dispatch function is called. If there + * are pending fops, we can directly process them here. + * (note that there shouldn't be any fd related fops, but + * if there are, it's fine if they fail). */ + ob_inode->first_fd = NULL; + ob_inode->first_open = NULL; + ob_inode->triggered = false; + list_splice_init(&ob_inode->resume_fops, &list); + } else if (!ob_inode->triggered) { + /* If the open has already been dispatched, we can only + * cancel it if it has not been triggered. Otherwise we + * simply wait until it completes. While it's not triggered, + * first_open must be a valid stub and there can't be any + * pending fops. */ + GF_ASSERT((ob_inode->first_open != NULL) && + list_empty(&ob_inode->resume_fops)); + + ob_inode->first_fd = NULL; + stub = ob_inode->first_open; + ob_inode->first_open = NULL; + } + } + } + } + UNLOCK(&fd->inode->lock); - ob_fd_free(ob_fd); + if (stub != NULL) { + ob_open_destroy(stub, fd); + } - return 0; + ob_resume_pending(&list); } int ob_forget(xlator_t *this, inode_t *inode) { - ob_inode_t *ob_inode = NULL; + ob_inode_t *ob_inode; uint64_t value = 0; - inode_ctx_del(inode, this, &value); - - if (value) { + if ((inode_ctx_del(inode, this, &value) == 0) && (value != 0)) { ob_inode = (ob_inode_t *)(uintptr_t)value; - ob_inode_free(ob_inode); + GF_FREE(ob_inode); } return 0; @@ -1153,20 +880,18 @@ ob_priv_dump(xlator_t *this) int ob_fdctx_dump(xlator_t *this, fd_t *fd) { - ob_fd_t *ob_fd = NULL; char key_prefix[GF_DUMP_MAX_BUF_LEN] = { 0, }; - int ret = 0; + uint64_t value = 0; + int ret = 0, error = 0; ret = TRY_LOCK(&fd->lock); if (ret) return 0; - ob_fd = __ob_fd_ctx_get(this, fd); - if (!ob_fd) { - UNLOCK(&fd->lock); - return 0; + if ((__fd_ctx_get(fd, this, &value) == 0) && (value != 0)) { + error = (int32_t)value; } gf_proc_dump_build_key(key_prefix, "xlator.performance.open-behind", @@ -1175,17 +900,7 @@ ob_fdctx_dump(xlator_t *this, fd_t *fd) gf_proc_dump_write("fd", "%p", fd); - gf_proc_dump_write("open_frame", "%p", ob_fd->open_frame); - - if (ob_fd->open_frame) - gf_proc_dump_write("open_frame.root.unique", "%" PRIu64, - ob_fd->open_frame->root->unique); - - gf_proc_dump_write("loc.path", "%s", ob_fd->loc.path); - - gf_proc_dump_write("loc.ino", "%s", uuid_utoa(ob_fd->loc.gfid)); - - gf_proc_dump_write("flags", "%d", ob_fd->flags); + gf_proc_dump_write("error", "%d", error); UNLOCK(&fd->lock); @@ -1282,6 +997,7 @@ fini(xlator_t *this) struct xlator_fops fops = { .open = ob_open, + .create = ob_create, .readv = ob_readv, .writev = ob_writev, .flush = ob_flush, @@ -1307,7 +1023,7 @@ struct xlator_fops fops = { }; struct xlator_cbks cbks = { - .release = ob_release, + .fdclose = ob_fdclose, .forget = ob_forget, }; diff --git a/xlators/performance/quick-read/src/quick-read.c b/xlators/performance/quick-read/src/quick-read.c index 4f16d148262..7fe4b3c3a4b 100644 --- a/xlators/performance/quick-read/src/quick-read.c +++ b/xlators/performance/quick-read/src/quick-read.c @@ -421,9 +421,6 @@ qr_content_update(xlator_t *this, qr_inode_t *qr_inode, void *data, qr_private_t *priv = NULL; qr_inode_table_t *table = NULL; uint32_t rollover = 0; - struct timeval tv = { - 0, - }; rollover = gen >> 32; gen = gen & 0xffffffff; @@ -431,7 +428,6 @@ qr_content_update(xlator_t *this, qr_inode_t *qr_inode, void *data, priv = this->private; table = &priv->table; - gettimeofday(&tv, NULL); LOCK(&table->lock); { if ((rollover != qr_inode->gen_rollover) || @@ -453,8 +449,7 @@ qr_content_update(xlator_t *this, qr_inode_t *qr_inode, void *data, qr_inode->ia_ctime_nsec = buf->ia_ctime_nsec; qr_inode->buf = *buf; - - memcpy(&qr_inode->last_refresh, &tv, sizeof(struct timeval)); + qr_inode->last_refresh = gf_time(); __qr_inode_register(this, table, qr_inode); } @@ -524,9 +519,7 @@ __qr_content_refresh(xlator_t *this, qr_inode_t *qr_inode, struct iatt *buf, if (qr_size_fits(conf, buf) && qr_time_equal(conf, qr_inode, buf)) { qr_inode->buf = *buf; - - gettimeofday(&qr_inode->last_refresh, NULL); - + qr_inode->last_refresh = gf_time(); __qr_inode_register(this, table, qr_inode); } else { __qr_inode_prune(this, table, qr_inode, gen); @@ -558,20 +551,14 @@ __qr_cache_is_fresh(xlator_t *this, qr_inode_t *qr_inode) { qr_conf_t *conf = NULL; qr_private_t *priv = NULL; - struct timeval now; - struct timeval diff; priv = this->private; conf = &priv->conf; - gettimeofday(&now, NULL); - - timersub(&now, &qr_inode->last_refresh, &diff); - - if (qr_inode->last_refresh.tv_sec < priv->last_child_down) + if (qr_inode->last_refresh < priv->last_child_down) return _gf_false; - if (diff.tv_sec >= conf->cache_timeout) + if (gf_time() - qr_inode->last_refresh >= conf->cache_timeout) return _gf_false; return _gf_true; @@ -1034,7 +1021,7 @@ qr_inodectx_dump(xlator_t *this, inode_t *inode) char key_prefix[GF_DUMP_MAX_BUF_LEN] = { 0, }; - char buf[256] = { + char buf[GF_TIMESTR_SIZE] = { 0, }; @@ -1049,12 +1036,8 @@ qr_inodectx_dump(xlator_t *this, inode_t *inode) gf_proc_dump_write("entire-file-cached", "%s", qr_inode->data ? "yes" : "no"); - if (qr_inode->last_refresh.tv_sec) { - gf_time_fmt(buf, sizeof buf, qr_inode->last_refresh.tv_sec, - gf_timefmt_FT); - snprintf(buf + strlen(buf), sizeof buf - strlen(buf), - ".%" GF_PRI_SUSECONDS, qr_inode->last_refresh.tv_usec); - + if (qr_inode->last_refresh) { + gf_time_fmt(buf, sizeof buf, qr_inode->last_refresh, gf_timefmt_FT); gf_proc_dump_write("last-cache-validation-time", "%s", buf); } @@ -1407,7 +1390,7 @@ qr_init(xlator_t *this) ret = 0; - time(&priv->last_child_down); + priv->last_child_down = gf_time(); GF_ATOMIC_INIT(priv->generation, 0); this->private = priv; out: @@ -1457,7 +1440,7 @@ qr_conf_destroy(qr_conf_t *conf) } void -qr_update_child_down_time(xlator_t *this, time_t *now) +qr_update_child_down_time(xlator_t *this, time_t now) { qr_private_t *priv = NULL; @@ -1465,7 +1448,7 @@ qr_update_child_down_time(xlator_t *this, time_t *now) LOCK(&priv->lock); { - priv->last_child_down = *now; + priv->last_child_down = now; } UNLOCK(&priv->lock); } @@ -1511,7 +1494,6 @@ qr_notify(xlator_t *this, int event, void *data, ...) { int ret = 0; qr_private_t *priv = NULL; - time_t now = 0; qr_conf_t *conf = NULL; priv = this->private; @@ -1520,8 +1502,7 @@ qr_notify(xlator_t *this, int event, void *data, ...) switch (event) { case GF_EVENT_CHILD_DOWN: case GF_EVENT_SOME_DESCENDENT_DOWN: - time(&now); - qr_update_child_down_time(this, &now); + qr_update_child_down_time(this, gf_time()); break; case GF_EVENT_UPCALL: if (conf->qr_invalidation) diff --git a/xlators/performance/quick-read/src/quick-read.h b/xlators/performance/quick-read/src/quick-read.h index 67850821b8e..20fcc70b3a7 100644 --- a/xlators/performance/quick-read/src/quick-read.h +++ b/xlators/performance/quick-read/src/quick-read.h @@ -39,7 +39,7 @@ struct qr_inode { uint32_t ia_ctime_nsec; uint32_t gen_rollover; struct iatt buf; - struct timeval last_refresh; + time_t last_refresh; struct list_head lru; uint64_t gen; uint64_t invalidation_time; diff --git a/xlators/performance/write-behind/src/write-behind.c b/xlators/performance/write-behind/src/write-behind.c index e1d0e9aaf00..00cfca016e6 100644 --- a/xlators/performance/write-behind/src/write-behind.c +++ b/xlators/performance/write-behind/src/write-behind.c @@ -2489,7 +2489,7 @@ wb_mark_readdirp_start(xlator_t *this, inode_t *directory) wb_directory_inode = wb_inode_create(this, directory); - if (!wb_directory_inode || !wb_directory_inode->lock.spinlock) + if (!wb_directory_inode) return; LOCK(&wb_directory_inode->lock); @@ -2509,7 +2509,7 @@ wb_mark_readdirp_end(xlator_t *this, inode_t *directory) wb_directory_inode = wb_inode_ctx_get(this, directory); - if (!wb_directory_inode || !wb_directory_inode->lock.spinlock) + if (!wb_directory_inode) return; LOCK(&wb_directory_inode->lock); diff --git a/xlators/protocol/server/src/Makefile.am b/xlators/protocol/server/src/Makefile.am index 01edbd35d9c..5e875c8df0b 100644 --- a/xlators/protocol/server/src/Makefile.am +++ b/xlators/protocol/server/src/Makefile.am @@ -4,11 +4,11 @@ endif xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/protocol -server_la_LDFLAGS = $(LIB_DL) -module $(GF_XLATOR_DEFAULT_LDFLAGS) +server_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) server_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \ - $(top_builddir)/rpc/xdr/src/libgfxdr.la + $(top_builddir)/rpc/xdr/src/libgfxdr.la $(LIB_DL) server_la_SOURCES = server.c server-resolve.c server-helpers.c \ server-rpc-fops.c server-handshake.c authenticate.c \ diff --git a/xlators/protocol/server/src/server-common.c b/xlators/protocol/server/src/server-common.c index 4bb84042a9e..cd79cf4d930 100644 --- a/xlators/protocol/server/src/server-common.c +++ b/xlators/protocol/server/src/server-common.c @@ -828,7 +828,7 @@ server4_post_lease(gfx_lease_rsp *rsp, struct gf_lease *lease) void server4_post_link(server_state_t *state, gfx_common_3iatt_rsp *rsp, inode_t *inode, struct iatt *stbuf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) + struct iatt *postparent) { inode_t *link_inode = NULL; diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c index 5b76b49cc82..721968004a0 100644 --- a/xlators/protocol/server/src/server.c +++ b/xlators/protocol/server/src/server.c @@ -267,6 +267,8 @@ server_priv(xlator_t *this) gf_proc_dump_build_key(key, "server", "total-bytes-write"); gf_proc_dump_write(key, "%" PRIu64, total_write); + rpcsvc_statedump(conf->rpc); + ret = 0; out: if (ret) @@ -407,6 +409,7 @@ server_call_xlator_mem_cleanup(xlator_t *this, char *victim_name) if (!arg->victim_name) { gf_smsg(this->name, GF_LOG_CRITICAL, ENOMEM, LG_MSG_NO_MEMORY, "Memory allocation is failed"); + free(arg); return; } diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c index 1fc0eae2056..f10722ec3fb 100644 --- a/xlators/storage/posix/src/posix-common.c +++ b/xlators/storage/posix/src/posix-common.c @@ -140,6 +140,7 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...) struct timespec sleep_till = { 0, }; + glusterfs_ctx_t *ctx = this->ctx; switch (event) { case GF_EVENT_PARENT_UP: { @@ -150,8 +151,6 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...) case GF_EVENT_PARENT_DOWN: { if (!victim->cleanup_starting) break; - gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s", - victim->name); if (priv->janitor) { pthread_mutex_lock(&priv->janitor_mutex); @@ -160,7 +159,7 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...) ret = gf_tw_del_timer(this->ctx->tw->timer_wheel, priv->janitor); if (!ret) { - clock_gettime(CLOCK_REALTIME, &sleep_till); + timespec_now_realtime(&sleep_till); sleep_till.tv_sec += 1; /* Wait to set janitor_task flag to _gf_false by * janitor_task_done */ @@ -168,7 +167,7 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...) (void)pthread_cond_timedwait(&priv->janitor_cond, &priv->janitor_mutex, &sleep_till); - clock_gettime(CLOCK_REALTIME, &sleep_till); + timespec_now_realtime(&sleep_till); sleep_till.tv_sec += 1; } } @@ -177,6 +176,16 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...) GF_FREE(priv->janitor); } priv->janitor = NULL; + pthread_mutex_lock(&ctx->fd_lock); + { + while (priv->rel_fdcount > 0) { + pthread_cond_wait(&priv->fd_cond, &ctx->fd_lock); + } + } + pthread_mutex_unlock(&ctx->fd_lock); + + gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s", + victim->name); default_notify(this->parents->xlator, GF_EVENT_CHILD_DOWN, data); } break; default: @@ -611,6 +620,7 @@ posix_init(xlator_t *this) 0, }; int hdirfd = -1; + char value; dir_data = dict_get(this->options, "directory"); @@ -672,16 +682,11 @@ posix_init(xlator_t *this) } /* Check for Extended attribute support, if not present, log it */ - op_ret = sys_lsetxattr(dir_data->data, "trusted.glusterfs.test", "working", - 8, 0); - if (op_ret != -1) { - ret = sys_lremovexattr(dir_data->data, "trusted.glusterfs.test"); - if (ret) { - gf_msg(this->name, GF_LOG_DEBUG, errno, P_MSG_INVALID_OPTION, - "failed to remove xattr: " - "trusted.glusterfs.test"); - } - } else { + size = sys_lgetxattr(dir_data->data, "user.x", &value, sizeof(value)); + + if ((size == -1) && (errno == EOPNOTSUPP)) { + gf_msg(this->name, GF_LOG_DEBUG, 0, P_MSG_XDATA_GETXATTR, + "getxattr returned %zd", size); tmp_data = dict_get(this->options, "mandate-attribute"); if (tmp_data) { if (gf_string2boolean(tmp_data->data, &tmp_bool) == -1) { @@ -1088,7 +1093,13 @@ posix_init(xlator_t *this) pthread_cond_init(&_private->fsync_cond, NULL); pthread_mutex_init(&_private->janitor_mutex, NULL); pthread_cond_init(&_private->janitor_cond, NULL); + pthread_cond_init(&_private->fd_cond, NULL); INIT_LIST_HEAD(&_private->fsyncs); + _private->rel_fdcount = 0; + ret = posix_spawn_ctx_janitor_thread(this); + if (ret) + goto out; + ret = gf_thread_create(&_private->fsyncer, NULL, posix_fsyncer, this, "posixfsy"); if (ret) { @@ -1201,6 +1212,8 @@ posix_fini(xlator_t *this) { struct posix_private *priv = this->private; gf_boolean_t health_check = _gf_false; + glusterfs_ctx_t *ctx = this->ctx; + uint32_t count; int ret = 0; int i = 0; @@ -1247,6 +1260,19 @@ posix_fini(xlator_t *this) priv->janitor = NULL; } + pthread_mutex_lock(&ctx->fd_lock); + { + count = --ctx->pxl_count; + if (count == 0) { + pthread_cond_signal(&ctx->fd_cond); + } + } + pthread_mutex_unlock(&ctx->fd_lock); + + if (count == 0) { + pthread_join(ctx->janitor, NULL); + } + if (priv->fsyncer) { (void)gf_thread_cleanup_xint(priv->fsyncer); priv->fsyncer = 0; @@ -1440,24 +1466,21 @@ struct volume_options posix_options[] = { .min = 0000, .max = 0777, .default_value = "0000", - .validate = GF_OPT_VALIDATE_MIN, - .validate = GF_OPT_VALIDATE_MAX, + .validate = GF_OPT_VALIDATE_BOTH, .description = "Mode bit permission that will always be set on a file."}, {.key = {"force-directory-mode"}, .type = GF_OPTION_TYPE_INT, .min = 0000, .max = 0777, .default_value = "0000", - .validate = GF_OPT_VALIDATE_MIN, - .validate = GF_OPT_VALIDATE_MAX, + .validate = GF_OPT_VALIDATE_BOTH, .description = "Mode bit permission that will be always set on directory"}, {.key = {"create-mask"}, .type = GF_OPTION_TYPE_INT, .min = 0000, .max = 0777, .default_value = "0777", - .validate = GF_OPT_VALIDATE_MIN, - .validate = GF_OPT_VALIDATE_MAX, + .validate = GF_OPT_VALIDATE_BOTH, .description = "Any bit not set here will be removed from the" "modes set on a file when it is created"}, {.key = {"create-directory-mask"}, @@ -1465,8 +1488,7 @@ struct volume_options posix_options[] = { .min = 0000, .max = 0777, .default_value = "0777", - .validate = GF_OPT_VALIDATE_MIN, - .validate = GF_OPT_VALIDATE_MAX, + .validate = GF_OPT_VALIDATE_BOTH, .description = "Any bit not set here will be removed from the" "modes set on a directory when it is created"}, {.key = {"max-hardlinks"}, diff --git a/xlators/storage/posix/src/posix-entry-ops.c b/xlators/storage/posix/src/posix-entry-ops.c index c3a20f4c85c..8cc3ccf8c00 100644 --- a/xlators/storage/posix/src/posix-entry-ops.c +++ b/xlators/storage/posix/src/posix-entry-ops.c @@ -198,6 +198,19 @@ posix_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) goto out; } +#ifdef __NetBSD__ + /* Same for NetBSD's .attribute directory */ + if (__is_root_gfid(loc->pargfid) && loc->name && + (strcmp(loc->name, ".attribute") == 0)) { + gf_msg(this->name, GF_LOG_WARNING, EPERM, P_MSG_LOOKUP_NOT_PERMITTED, + "Lookup issued on .attribute," + " which is not permitted"); + op_errno = EPERM; + op_ret = -1; + goto out; + } +#endif /* __NetBSD__ */ + op_ret = dict_get_int32_sizen(xdata, GF_GFIDLESS_LOOKUP, &gfidless); op_ret = -1; if (gf_uuid_is_null(loc->pargfid) || (loc->name == NULL)) { @@ -650,6 +663,19 @@ posix_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, goto out; } +#ifdef __NetBSD__ + /* Same for NetBSD's .attribute directory */ + if (__is_root_gfid(loc->pargfid) && + (strcmp(loc->name, ".attribute") == 0)) { + gf_msg(this->name, GF_LOG_WARNING, EPERM, P_MSG_MKDIR_NOT_PERMITTED, + "mkdir issued on .attribute, which" + "is not permitted"); + op_errno = EPERM; + op_ret = -1; + goto out; + } +#endif + priv = this->private; VALIDATE_OR_GOTO(priv, out); GFID_NULL_CHECK_AND_GOTO(frame, this, loc, xdata, op_ret, op_errno, @@ -1416,6 +1442,19 @@ posix_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, goto out; } +#ifdef __NetBSD__ + /* Same for NetBSD's .attribute directory */ + if (__is_root_gfid(loc->pargfid) && + (strcmp(loc->name, ".attribute") == 0)) { + gf_msg(this->name, GF_LOG_WARNING, EPERM, P_MSG_RMDIR_NOT_PERMITTED, + "rmdir issued on .attribute, which" + "is not permitted"); + op_errno = EPERM; + op_ret = -1; + goto out; + } +#endif + priv = this->private; MAKE_ENTRY_HANDLE(real_path, par_path, this, loc, &stbuf); diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c index 372df89807c..67db3324083 100644 --- a/xlators/storage/posix/src/posix-helpers.c +++ b/xlators/storage/posix/src/posix-helpers.c @@ -1070,7 +1070,7 @@ verify_handle: ret = posix_handle_soft(this, path, loc, uuid_curr, &stat); out: - if (!(*op_errno)) + if (ret && !(*op_errno)) *op_errno = errno; return ret; } @@ -1505,7 +1505,7 @@ posix_janitor_task(void *data) if (!priv) goto out; - time(&now); + now = gf_time(); if ((now - priv->last_landfill_check) > priv->janitor_sleep_duration) { if (priv->disable_landfill_purge) { gf_msg_debug(this->name, 0, @@ -1592,16 +1592,108 @@ unlock: return; } +static struct posix_fd * +janitor_get_next_fd(glusterfs_ctx_t *ctx) +{ + struct posix_fd *pfd = NULL; + + while (list_empty(&ctx->janitor_fds)) { + if (ctx->pxl_count == 0) { + return NULL; + } + + pthread_cond_wait(&ctx->fd_cond, &ctx->fd_lock); + } + + pfd = list_first_entry(&ctx->janitor_fds, struct posix_fd, list); + list_del_init(&pfd->list); + + return pfd; +} + +static void +posix_close_pfd(xlator_t *xl, struct posix_fd *pfd) +{ + THIS = xl; + + if (pfd->dir == NULL) { + gf_msg_trace(xl->name, 0, "janitor: closing file fd=%d", pfd->fd); + sys_close(pfd->fd); + } else { + gf_msg_debug(xl->name, 0, "janitor: closing dir fd=%p", pfd->dir); + sys_closedir(pfd->dir); + } + + GF_FREE(pfd); +} + +static void * +posix_ctx_janitor_thread_proc(void *data) +{ + xlator_t *xl; + struct posix_fd *pfd; + glusterfs_ctx_t *ctx = NULL; + struct posix_private *priv_fd; + + ctx = data; + + pthread_mutex_lock(&ctx->fd_lock); + + while ((pfd = janitor_get_next_fd(ctx)) != NULL) { + pthread_mutex_unlock(&ctx->fd_lock); + + xl = pfd->xl; + posix_close_pfd(xl, pfd); + + pthread_mutex_lock(&ctx->fd_lock); + + priv_fd = xl->private; + priv_fd->rel_fdcount--; + if (!priv_fd->rel_fdcount) + pthread_cond_signal(&priv_fd->fd_cond); + } + + pthread_mutex_unlock(&ctx->fd_lock); + + return NULL; +} + +int +posix_spawn_ctx_janitor_thread(xlator_t *this) +{ + int ret = 0; + glusterfs_ctx_t *ctx = NULL; + + ctx = this->ctx; + + pthread_mutex_lock(&ctx->fd_lock); + { + if (ctx->pxl_count++ == 0) { + ret = gf_thread_create(&ctx->janitor, NULL, + posix_ctx_janitor_thread_proc, ctx, + "posixctxjan"); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_THREAD_FAILED, + "spawning janitor thread failed"); + ctx->pxl_count--; + } + } + } + pthread_mutex_unlock(&ctx->fd_lock); + + return ret; +} + static int -is_fresh_file(int64_t sec, int64_t ns) +is_fresh_file(struct timespec *ts) { - struct timeval tv; + struct timespec now; int64_t elapsed; - gettimeofday(&tv, NULL); + timespec_now_realtime(&now); + elapsed = (int64_t)gf_tsdiff(ts, &now); - elapsed = (tv.tv_sec - sec) * 1000000L; - elapsed += tv.tv_usec - (ns / 1000L); if (elapsed < 0) { /* The file has been modified in the future !!! * Is it fresh ? previous implementation considered this as a @@ -1610,11 +1702,7 @@ is_fresh_file(int64_t sec, int64_t ns) } /* If the file is newer than a second, we consider it fresh. */ - if (elapsed < 1000000) { - return 1; - } - - return 0; + return elapsed < 1000000; } int @@ -1677,7 +1765,9 @@ posix_gfid_heal(xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req) if (ret != 16) { /* TODO: This is a very hacky way of doing this, and very prone to * errors and unexpected behavior. This should be changed. */ - if (is_fresh_file(stbuf.ia_ctime, stbuf.ia_ctime_nsec)) { + struct timespec ts = {.tv_sec = stbuf.ia_ctime, + .tv_nsec = stbuf.ia_ctime_nsec}; + if (is_fresh_file(&ts)) { gf_msg(this->name, GF_LOG_ERROR, ENOENT, P_MSG_FRESHFILE, "Fresh file: %s", path); return -ENOENT; @@ -1691,7 +1781,7 @@ posix_gfid_heal(xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req) if (ret != 16) { /* TODO: This is a very hacky way of doing this, and very prone to * errors and unexpected behavior. This should be changed. */ - if (is_fresh_file(stat.st_ctim.tv_sec, stat.st_ctim.tv_nsec)) { + if (is_fresh_file(&stat.st_ctim)) { gf_msg(this->name, GF_LOG_ERROR, ENOENT, P_MSG_FRESHFILE, "Fresh file: %s", path); return -ENOENT; @@ -1924,7 +2014,7 @@ posix_fs_health_check(xlator_t *this, char *file_path) { struct posix_private *priv = NULL; int ret = -1; - char timestamp[256] = { + char timestamp[GF_TIMESTR_SIZE] = { 0, }; int fd = -1; @@ -1939,9 +2029,7 @@ posix_fs_health_check(xlator_t *this, char *file_path) int timeout = 0; struct aiocb aiocb; - GF_VALIDATE_OR_GOTO(this->name, this, out); priv = this->private; - GF_VALIDATE_OR_GOTO("posix-helpers", priv, out); timeout = priv->health_check_timeout; @@ -1952,7 +2040,7 @@ posix_fs_health_check(xlator_t *this, char *file_path) goto out; } - time_sec = time(NULL); + time_sec = gf_time(); gf_time_fmt(timestamp, sizeof timestamp, time_sec, gf_timefmt_FT); timelen = strlen(timestamp); @@ -2224,7 +2312,7 @@ posix_disk_space_check(xlator_t *this) double totsz = 0; double freesz = 0; - GF_VALIDATE_OR_GOTO(this->name, this, out); + GF_VALIDATE_OR_GOTO("posix-helpers", this, out); priv = this->private; GF_VALIDATE_OR_GOTO(this->name, priv, out); @@ -2317,7 +2405,7 @@ posix_spawn_disk_space_check_thread(xlator_t *xl) ret = gf_thread_create(&priv->disk_space_check, NULL, posix_disk_space_check_thread_proc, xl, - "posix_reserve"); + "posixrsv"); if (ret) { priv->disk_space_check_active = _gf_false; gf_msg(xl->name, GF_LOG_ERROR, errno, P_MSG_DISK_SPACE_CHECK_FAILED, @@ -2397,23 +2485,8 @@ posix_fsyncer_syncfs(xlator_t *this, struct list_head *head) stub = list_entry(head->prev, call_stub_t, list); ret = posix_fd_ctx_get(stub->args.fd, this, &pfd, NULL); - if (ret) - return; - -#ifdef GF_LINUX_HOST_OS - /* syncfs() is not "declared" in RHEL's glibc even though - the kernel has support. - */ -#include <sys/syscall.h> -#include <unistd.h> -#ifdef SYS_syncfs - syscall(SYS_syncfs, pfd->fd); -#else - sync(); -#endif -#else - sync(); -#endif + if (!ret) + (void)gf_syncfs(pfd->fd); } void * @@ -3505,18 +3578,21 @@ posix_is_layout_stale(dict_t *xdata, char *par_path, xlator_t *this) gf_boolean_t have_val = _gf_false; data_t *arg_data = NULL; char *xattr_name = NULL; + size_t xattr_len = 0; gf_boolean_t is_stale = _gf_false; op_ret = dict_get_str_sizen(xdata, GF_PREOP_PARENT_KEY, &xattr_name); if (xattr_name == NULL) { op_ret = 0; - goto out; + return is_stale; } - arg_data = dict_get(xdata, xattr_name); + xattr_len = strlen(xattr_name); + arg_data = dict_getn(xdata, xattr_name, xattr_len); if (!arg_data) { op_ret = 0; - goto out; + dict_del_sizen(xdata, GF_PREOP_PARENT_KEY); + return is_stale; } size = sys_lgetxattr(par_path, xattr_name, value_buf, @@ -3560,7 +3636,7 @@ posix_is_layout_stale(dict_t *xdata, char *par_path, xlator_t *this) } out: - dict_del_sizen(xdata, xattr_name); + dict_deln(xdata, xattr_name, xattr_len); dict_del_sizen(xdata, GF_PREOP_PARENT_KEY); if (op_ret == -1) { @@ -3569,3 +3645,22 @@ out: return is_stale; } + +/* Delete user xattr from the file at the file-path specified by data and from + * dict */ +int +posix_delete_user_xattr(dict_t *dict, char *k, data_t *v, void *data) +{ + int ret; + char *real_path = data; + + ret = sys_lremovexattr(real_path, k); + if (ret) { + gf_msg("posix-helpers", GF_LOG_ERROR, P_MSG_XATTR_NOT_REMOVED, errno, + "removexattr failed. key %s path %s", k, real_path); + } + + dict_del(dict, k); + + return ret; +} diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c index 439a4362fc6..6d54d37e5aa 100644 --- a/xlators/storage/posix/src/posix-inode-fd-ops.c +++ b/xlators/storage/posix/src/posix-inode-fd-ops.c @@ -54,6 +54,7 @@ #include <glusterfs/events.h> #include "posix-gfid-path.h" #include <glusterfs/compat-uuid.h> +#include <glusterfs/common-utils.h> extern char *marker_xattrs[]; #define ALIGN_SIZE 4096 @@ -1360,6 +1361,22 @@ out: return 0; } +static void +posix_add_fd_to_cleanup(xlator_t *this, struct posix_fd *pfd) +{ + glusterfs_ctx_t *ctx = this->ctx; + struct posix_private *priv = this->private; + + pfd->xl = this; + pthread_mutex_lock(&ctx->fd_lock); + { + list_add_tail(&pfd->list, &ctx->janitor_fds); + priv->rel_fdcount++; + pthread_cond_signal(&ctx->fd_cond); + } + pthread_mutex_unlock(&ctx->fd_lock); +} + int32_t posix_releasedir(xlator_t *this, fd_t *fd) { @@ -1382,11 +1399,7 @@ posix_releasedir(xlator_t *this, fd_t *fd) "pfd->dir is NULL for fd=%p", fd); goto out; } - - gf_msg_debug(this->name, 0, "janitor: closing dir fd=%p", pfd->dir); - - sys_closedir(pfd->dir); - GF_FREE(pfd); + posix_add_fd_to_cleanup(this, pfd); out: return 0; @@ -2294,8 +2307,7 @@ posix_copy_file_range(call_frame_t *frame, xlator_t *this, fd_t *fd_in, flags); if (op_ret < 0) { - op_errno = -op_ret; - op_ret = -1; + op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_COPY_FILE_RANGE_FAILED, "copy_file_range failed: fd_in: %p (gfid: %s) ," " fd_out %p (gfid:%s)", @@ -2510,7 +2522,6 @@ out: int32_t posix_release(xlator_t *this, fd_t *fd) { - struct posix_private *priv = NULL; struct posix_fd *pfd = NULL; int ret = -1; uint64_t tmp_pfd = 0; @@ -2518,8 +2529,6 @@ posix_release(xlator_t *this, fd_t *fd) VALIDATE_OR_GOTO(this, out); VALIDATE_OR_GOTO(fd, out); - priv = this->private; - ret = fd_ctx_del(fd, this, &tmp_pfd); if (ret < 0) { gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_PFD_NULL, @@ -2533,13 +2542,7 @@ posix_release(xlator_t *this, fd_t *fd) "pfd->dir is %p (not NULL) for file fd=%p", pfd->dir, fd); } - gf_msg_debug(this->name, 0, "janitor: closing dir fd=%p", pfd->dir); - - sys_close(pfd->fd); - GF_FREE(pfd); - - if (!priv) - goto out; + posix_add_fd_to_cleanup(this, pfd); out: return 0; @@ -2709,6 +2712,7 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, int32_t ret = 0; ssize_t acl_size = 0; dict_t *xattr = NULL; + dict_t *subvol_xattrs = NULL; posix_xattr_filler_t filler = { 0, }; @@ -2724,6 +2728,10 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, struct mdata_iatt mdata_iatt = { 0, }; + int8_t sync_backend_xattrs = _gf_false; + data_pair_t *custom_xattrs; + data_t *keyval = NULL; + char **xattrs_to_heal = get_xattrs_to_heal(); DECLARE_OLD_FS_ID_VAR; SET_FS_ID(frame->root->uid, frame->root->gid); @@ -2906,6 +2914,66 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, goto out; } + ret = dict_get_int8(xdata, "sync_backend_xattrs", &sync_backend_xattrs); + if (ret) { + gf_msg_debug(this->name, -ret, "Unable to get sync_backend_xattrs"); + } + + if (sync_backend_xattrs) { + /* List all custom xattrs */ + subvol_xattrs = dict_new(); + if (!subvol_xattrs) + goto out; + + ret = dict_set_int32_sizen(xdata, "list-xattr", 1); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM, + "Unable to set list-xattr in dict "); + goto out; + } + + subvol_xattrs = posix_xattr_fill(this, real_path, loc, NULL, -1, xdata, + NULL); + + /* Remove all user xattrs from the file */ + dict_foreach_fnmatch(subvol_xattrs, "user.*", posix_delete_user_xattr, + real_path); + + /* Remove all custom xattrs from the file */ + for (i = 1; xattrs_to_heal[i]; i++) { + keyval = dict_get(subvol_xattrs, xattrs_to_heal[i]); + if (keyval) { + ret = sys_lremovexattr(real_path, xattrs_to_heal[i]); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, P_MSG_XATTR_NOT_REMOVED, + errno, "removexattr failed. key %s path %s", + xattrs_to_heal[i], loc->path); + goto out; + } + + dict_del(subvol_xattrs, xattrs_to_heal[i]); + keyval = NULL; + } + } + + /* Set custom xattrs based on info provided by DHT */ + custom_xattrs = dict->members_list; + + while (custom_xattrs != NULL) { + ret = sys_lsetxattr(real_path, custom_xattrs->key, + custom_xattrs->value->data, + custom_xattrs->value->len, flags); + if (ret) { + op_errno = errno; + gf_log(this->name, GF_LOG_ERROR, "setxattr failed - %s %d", + custom_xattrs->key, ret); + goto out; + } + + custom_xattrs = custom_xattrs->next; + } + } + xattr = dict_new(); if (!xattr) goto out; @@ -3013,6 +3081,9 @@ out: if (xattr) dict_unref(xattr); + if (subvol_xattrs) + dict_unref(subvol_xattrs); + return 0; } diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h index 662b5c69f8a..b8db146eef2 100644 --- a/xlators/storage/posix/src/posix.h +++ b/xlators/storage/posix/src/posix.h @@ -125,7 +125,7 @@ struct posix_fd { off_t dir_eof; /* offset at dir EOF */ struct list_head list; /* to add to the janitor list */ int odirect; - + xlator_t *xl; char _pad[4]; /* manual padding */ }; @@ -137,10 +137,6 @@ struct posix_private { gf_lock_t lock; char *hostname; - /* Statistics, provides activity of the server */ - - struct timeval prev_fetch_time; - struct timeval init_time; time_t last_landfill_check; @@ -170,6 +166,7 @@ struct posix_private { pthread_cond_t fsync_cond; pthread_mutex_t janitor_mutex; pthread_cond_t janitor_cond; + pthread_cond_t fd_cond; int fsync_queue_count; int32_t janitor_sleep_duration; @@ -254,8 +251,7 @@ struct posix_private { gf_boolean_t aio_configured; gf_boolean_t aio_init_done; gf_boolean_t aio_capable; - - char _pad[4]; /* manual padding */ + uint32_t rel_fdcount; }; typedef struct { @@ -662,10 +658,16 @@ posix_cs_maintenance(xlator_t *this, fd_t *fd, loc_t *loc, int *pfd, int posix_check_dev_file(xlator_t *this, inode_t *inode, char *fop, int *op_errno); +int +posix_spawn_ctx_janitor_thread(xlator_t *this); + void posix_update_iatt_buf(struct iatt *buf, int fd, char *loc, dict_t *xdata); gf_boolean_t posix_is_layout_stale(dict_t *xdata, char *par_path, xlator_t *this); +int +posix_delete_user_xattr(dict_t *dict, char *k, data_t *v, void *data); + #endif /* _POSIX_H */ diff --git a/xlators/system/posix-acl/src/posix-acl.c b/xlators/system/posix-acl/src/posix-acl.c index 77c8df5a54f..fc227364b31 100644 --- a/xlators/system/posix-acl/src/posix-acl.c +++ b/xlators/system/posix-acl/src/posix-acl.c @@ -50,8 +50,8 @@ r00t() return conf->super_uid; } -int -whitelisted_xattr(const char *key) +static int +allowed_xattr(const char *key) { if (!key) return 0; @@ -2016,7 +2016,7 @@ int posix_acl_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name, dict_t *xdata) { - if (whitelisted_xattr(name)) + if (allowed_xattr(name)) goto green; if (acl_permits(frame, loc->inode, POSIX_ACL_READ)) @@ -2039,7 +2039,7 @@ int posix_acl_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, dict_t *xdata) { - if (whitelisted_xattr(name)) + if (allowed_xattr(name)) goto green; if (acl_permits(frame, fd->inode, POSIX_ACL_READ)) @@ -2072,7 +2072,7 @@ posix_acl_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, goto red; } - if (whitelisted_xattr(name)) { + if (allowed_xattr(name)) { if (!frame_is_user(frame, ctx->uid)) { op_errno = EPERM; goto red; |