diff options
Diffstat (limited to 'xlators/cluster')
43 files changed, 2722 insertions, 2473 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index a2f0b2ad1c2..032ab5c8001 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -45,7 +45,42 @@ afr_quorum_errno(afr_private_t *priv) return ENOTCONN; } -static void +gf_boolean_t +afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name, + pid_t pid) +{ + if (!__is_root_gfid(pargfid)) { + return _gf_false; + } + + if (strcmp(name, GF_REPLICATE_TRASH_DIR) == 0) { + /*For backward compatibility /.landfill is private*/ + return _gf_true; + } + + if (pid == GF_CLIENT_PID_GSYNCD) { + /*geo-rep needs to create/sync private directory on slave because + * it appears in changelog*/ + return _gf_false; + } + + if (pid == GF_CLIENT_PID_GLFS_HEAL || pid == GF_CLIENT_PID_SELF_HEALD) { + if (strcmp(name, priv->anon_inode_name) == 0) { + /* anonymous-inode dir is private*/ + return _gf_true; + } + } else { + if (strncmp(name, AFR_ANON_DIR_PREFIX, strlen(AFR_ANON_DIR_PREFIX)) == + 0) { + /* anonymous-inode dir prefix is private for geo-rep to work*/ + return _gf_true; + } + } + + return _gf_false; +} + +void afr_fill_success_replies(afr_local_t *local, afr_private_t *priv, unsigned char *replies) { @@ -885,7 +920,7 @@ __afr_set_in_flight_sb_status(xlator_t *this, afr_local_t *local, metadatamap |= (1 << index); } if (metadatamap_old != metadatamap) { - event = 0; + __afr_inode_need_refresh_set(inode, this); } break; @@ -898,7 +933,7 @@ __afr_set_in_flight_sb_status(xlator_t *this, afr_local_t *local, datamap |= (1 << index); } if (datamap_old != datamap) - event = 0; + __afr_inode_need_refresh_set(inode, this); break; default: @@ -1062,34 +1097,6 @@ out: } int -__afr_inode_event_gen_reset_small(inode_t *inode, xlator_t *this) -{ - int ret = -1; - uint16_t datamap = 0; - uint16_t metadatamap = 0; - uint32_t event = 0; - uint64_t val = 0; - afr_inode_ctx_t *ctx = NULL; - - ret = __afr_inode_ctx_get(this, inode, &ctx); - if (ret) - return ret; - - val = ctx->read_subvol; - - metadatamap = (val & 0x000000000000ffff) >> 0; - datamap = (val & 0x00000000ffff0000) >> 16; - event = 0; - - val = ((uint64_t)metadatamap) | (((uint64_t)datamap) << 16) | - (((uint64_t)event) << 32); - - ctx->read_subvol = val; - - return ret; -} - -int __afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data, unsigned char *metadata, int *event_p) { @@ -1160,22 +1167,6 @@ out: } int -__afr_inode_event_gen_reset(inode_t *inode, xlator_t *this) -{ - afr_private_t *priv = NULL; - int ret = -1; - - priv = this->private; - - if (priv->child_count <= 16) - ret = __afr_inode_event_gen_reset_small(inode, this); - else - ret = -1; - - return ret; -} - -int afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data, unsigned char *metadata, int *event_p) { @@ -1241,12 +1232,11 @@ afr_inode_get_readable(call_frame_t *frame, inode_t *inode, xlator_t *this, return 0; } -int +static int afr_inode_split_brain_choice_get(inode_t *inode, xlator_t *this, int *spb_choice) { int ret = -1; - GF_VALIDATE_OR_GOTO(this->name, inode, out); LOCK(&inode->lock); @@ -1258,6 +1248,40 @@ out: return ret; } +/* + * frame is used to get the favourite policy. Since + * afr_inode_split_brain_choice_get was called with afr_open, it is possible to + * have a frame with out local->replies. So in that case, frame is passed as + * null, hence this function will handle the frame NULL case. + */ +int +afr_split_brain_read_subvol_get(inode_t *inode, xlator_t *this, + call_frame_t *frame, int *spb_subvol) +{ + int ret = -1; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + GF_VALIDATE_OR_GOTO("afr", this, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + GF_VALIDATE_OR_GOTO(this->name, inode, out); + GF_VALIDATE_OR_GOTO(this->name, spb_subvol, out); + + priv = this->private; + + ret = afr_inode_split_brain_choice_get(inode, this, spb_subvol); + if (*spb_subvol < 0 && priv->fav_child_policy && frame && frame->local) { + local = frame->local; + *spb_subvol = afr_sh_get_fav_by_policy(this, local->replies, inode, + NULL); + if (*spb_subvol >= 0) { + ret = 0; + } + } + +out: + return ret; +} int afr_inode_read_subvol_set(inode_t *inode, xlator_t *this, unsigned char *data, unsigned char *metadata, int event) @@ -1324,30 +1348,22 @@ out: return need_refresh; } -static int -afr_inode_need_refresh_set(inode_t *inode, xlator_t *this) +int +__afr_inode_need_refresh_set(inode_t *inode, xlator_t *this) { int ret = -1; afr_inode_ctx_t *ctx = NULL; - GF_VALIDATE_OR_GOTO(this->name, inode, out); - - LOCK(&inode->lock); - { - ret = __afr_inode_ctx_get(this, inode, &ctx); - if (ret) - goto unlock; - + ret = __afr_inode_ctx_get(this, inode, &ctx); + if (ret == 0) { ctx->need_refresh = _gf_true; } -unlock: - UNLOCK(&inode->lock); -out: + return ret; } int -afr_inode_event_gen_reset(inode_t *inode, xlator_t *this) +afr_inode_need_refresh_set(inode_t *inode, xlator_t *this) { int ret = -1; @@ -1355,7 +1371,7 @@ afr_inode_event_gen_reset(inode_t *inode, xlator_t *this) LOCK(&inode->lock); { - ret = __afr_inode_event_gen_reset(inode, this); + ret = __afr_inode_need_refresh_set(inode, this); } UNLOCK(&inode->lock); out: @@ -1790,7 +1806,7 @@ afr_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err) ret = afr_inode_get_readable(frame, inode, this, local->readable, &event_generation, local->transaction.type); - if (ret == -EIO || (local->is_read_txn && !event_generation)) { + if (ret == -EIO) { /* No readable subvolume even after refresh ==> splitbrain.*/ if (!priv->fav_child_policy) { err = EIO; @@ -2290,8 +2306,9 @@ afr_hash_child(afr_read_subvol_args_t *args, afr_private_t *priv, * need is a low probability that multiple clients * won't converge on the same subvolume. */ + gf_uuid_copy(gfid_copy, args->gfid); pid = getpid(); - memcpy(gfid_copy, &pid, sizeof(pid)); + *(pid_t *)gfid_copy ^= pid; } child = SuperFastHash((char *)gfid_copy, sizeof(gfid_copy)) % priv->child_count; @@ -2875,7 +2892,7 @@ afr_attempt_readsubvol_set(call_frame_t *frame, xlator_t *this, { afr_private_t *priv = NULL; afr_local_t *local = NULL; - int spb_choice = -1; + int spb_subvol = -1; int child_count = -1; if (*read_subvol != -1) @@ -2885,10 +2902,10 @@ afr_attempt_readsubvol_set(call_frame_t *frame, xlator_t *this, local = frame->local; child_count = priv->child_count; - afr_inode_split_brain_choice_get(local->inode, this, &spb_choice); - if ((spb_choice >= 0) && + afr_split_brain_read_subvol_get(local->inode, this, frame, &spb_subvol); + if ((spb_subvol >= 0) && (AFR_COUNT(success_replies, child_count) == child_count)) { - *read_subvol = spb_choice; + *read_subvol = spb_subvol; } else if (!priv->quorum_count || frame->root->pid == GF_CLIENT_PID_GLFS_HEAL) { *read_subvol = afr_first_up_child(frame, this); @@ -2929,6 +2946,7 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this) 0, }; gf_boolean_t locked_entry = _gf_false; + gf_boolean_t in_flight_create = _gf_false; gf_boolean_t can_interpret = _gf_true; inode_t *parent = NULL; ia_type_t ia_type = IA_INVAL; @@ -2972,17 +2990,12 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this) if (!replies[i].valid) continue; - if (locked_entry && replies[i].op_ret == -1 && - replies[i].op_errno == ENOENT) { - /* Second, check entry is still - "underway" in creation */ - local->op_ret = -1; - local->op_errno = ENOENT; - goto error; - } - - if (replies[i].op_ret == -1) + if (replies[i].op_ret == -1) { + if (locked_entry && replies[i].op_errno == ENOENT) { + in_flight_create = _gf_true; + } continue; + } if (read_subvol == -1 || !readable[read_subvol]) { read_subvol = i; @@ -2992,6 +3005,12 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this) } } + if (in_flight_create && !afr_has_quorum(success_replies, this, NULL)) { + local->op_ret = -1; + local->op_errno = ENOENT; + goto error; + } + if (read_subvol == -1) goto error; /* We now have a read_subvol, which is readable[] (if there @@ -3050,7 +3069,7 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this) if (read_subvol == -1) goto cant_interpret; if (ret) { - afr_inode_event_gen_reset(local->inode, this); + afr_inode_need_refresh_set(local->inode, this); dict_del_sizen(local->replies[read_subvol].xdata, GF_CONTENT_KEY); } } else { @@ -3103,7 +3122,7 @@ error: * others in that they must be given higher priority while * returning to the user. * - * The hierarchy is ENODATA > ENOENT > ESTALE > others + * The hierarchy is ENODATA > ENOENT > ESTALE > ENOSPC others */ int @@ -3115,6 +3134,8 @@ afr_higher_errno(int32_t old_errno, int32_t new_errno) return ENOENT; if (old_errno == ESTALE || new_errno == ESTALE) return ESTALE; + if (old_errno == ENOSPC || new_errno == ENOSPC) + return ENOSPC; return new_errno; } @@ -3606,6 +3627,7 @@ afr_discover_unwind(call_frame_t *frame, xlator_t *this) afr_private_t *priv = NULL; afr_local_t *local = NULL; int read_subvol = -1; + int ret = 0; unsigned char *data_readable = NULL; unsigned char *success_replies = NULL; @@ -3627,7 +3649,10 @@ afr_discover_unwind(call_frame_t *frame, xlator_t *this) if (!afr_has_quorum(success_replies, this, frame)) goto unwind; - afr_replies_interpret(frame, this, local->inode, NULL); + ret = afr_replies_interpret(frame, this, local->inode, NULL); + if (ret) { + afr_inode_need_refresh_set(local->inode, this); + } read_subvol = afr_read_subvol_decide(local->inode, this, NULL, data_readable); @@ -3679,7 +3704,7 @@ afr_ta_id_file_check(void *opaque) this = opaque; priv = this->private; - ret = afr_fill_ta_loc(this, &loc); + ret = afr_fill_ta_loc(this, &loc, _gf_false); if (ret) { gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, "Failed to populate thin-arbiter loc for: %s.", loc.name); @@ -3888,11 +3913,7 @@ afr_discover(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) afr_read_subvol_get(loc->inode, this, NULL, NULL, &event, AFR_DATA_TRANSACTION, NULL); - if (afr_is_inode_refresh_reqd(loc->inode, this, event, - local->event_generation)) - afr_inode_refresh(frame, this, loc->inode, NULL, afr_discover_do); - else - afr_discover_do(frame, this, 0); + afr_discover_do(frame, this, 0); return 0; out: @@ -3993,11 +4014,10 @@ afr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) return 0; } - if (__is_root_gfid(loc->parent->gfid)) { - if (!strcmp(loc->name, GF_REPLICATE_TRASH_DIR)) { - op_errno = EPERM; - goto out; - } + if (afr_is_private_directory(this->private, loc->parent->gfid, loc->name, + frame->root->pid)) { + op_errno = EPERM; + goto out; } local = AFR_FRAME_INIT(frame, op_errno); @@ -4033,11 +4053,7 @@ afr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) afr_read_subvol_get(loc->parent, this, NULL, NULL, &event, AFR_DATA_TRANSACTION, NULL); - if (afr_is_inode_refresh_reqd(loc->inode, this, event, - local->event_generation)) - afr_inode_refresh(frame, this, loc->parent, NULL, afr_lookup_do); - else - afr_lookup_do(frame, this, 0); + afr_lookup_do(frame, this, 0); return 0; out: @@ -5665,6 +5681,8 @@ afr_priv_dump(xlator_t *this) GF_ATOMIC_GET(priv->pending_reads[i])); sprintf(key, "child_latency[%d]", i); gf_proc_dump_write(key, "%" PRId64, priv->child_latency[i]); + sprintf(key, "halo_child_up[%d]", i); + gf_proc_dump_write(key, "%d", priv->halo_child_up[i]); } gf_proc_dump_write("data_self_heal", "%d", priv->data_self_heal); gf_proc_dump_write("metadata_self_heal", "%d", priv->metadata_self_heal); @@ -5677,6 +5695,7 @@ afr_priv_dump(xlator_t *this) priv->background_self_heal_count); gf_proc_dump_write("healers", "%d", priv->healers); gf_proc_dump_write("read-hash-mode", "%d", priv->hash_mode); + gf_proc_dump_write("use-anonymous-inode", "%d", priv->use_anon_inode); if (priv->quorum_count == AFR_QUORUM_AUTO) { gf_proc_dump_write("quorum-type", "auto"); } else if (priv->quorum_count == 0) { @@ -5737,13 +5756,31 @@ __afr_get_up_children_count(afr_private_t *priv) return up_children; } +static int +__get_heard_from_all_status(xlator_t *this) +{ + afr_private_t *priv = this->private; + int i; + + for (i = 0; i < priv->child_count; i++) { + if (!priv->last_event[i]) { + return 0; + } + } + if (priv->thin_arbiter_count && !priv->ta_child_up) { + return 0; + } + return 1; +} + glusterfs_event_t -__afr_transform_event_from_state(afr_private_t *priv) +__afr_transform_event_from_state(xlator_t *this) { int i = 0; int up_children = 0; + afr_private_t *priv = this->private; - if (AFR_COUNT(priv->last_event, priv->child_count) == priv->child_count) + if (__get_heard_from_all_status(this)) /* have_heard_from_all. Let afr_notify() do the propagation. */ return GF_EVENT_MAXVAL; @@ -5785,7 +5822,7 @@ afr_notify_cbk(void *data) goto unlock; } priv->timer = NULL; - event = __afr_transform_event_from_state(priv); + event = __afr_transform_event_from_state(this); if (event != GF_EVENT_MAXVAL) propagate = _gf_true; } @@ -5813,20 +5850,6 @@ __afr_launch_notify_timer(xlator_t *this, afr_private_t *priv) } static int -__get_heard_from_all_status(xlator_t *this) -{ - afr_private_t *priv = this->private; - int i; - - for (i = 0; i < priv->child_count; i++) { - if (!priv->last_event[i]) { - return 0; - } - } - return 1; -} - -static int find_best_down_child(xlator_t *this) { afr_private_t *priv = NULL; @@ -5837,7 +5860,7 @@ find_best_down_child(xlator_t *this) priv = this->private; for (i = 0; i < priv->child_count; i++) { - if (priv->child_up[i] && priv->child_latency[i] >= 0 && + if (!priv->child_up[i] && priv->child_latency[i] >= 0 && priv->child_latency[i] < best_latency) { best_child = i; best_latency = priv->child_latency[i]; @@ -5909,7 +5932,9 @@ __afr_handle_ping_event(xlator_t *this, xlator_t *child_xlator, const int idx, "), " "marking child down.", child_latency_msec, halo_max_latency_msec); - *event = GF_EVENT_CHILD_DOWN; + if (priv->halo_child_up[idx]) { + *event = GF_EVENT_CHILD_DOWN; + } } } else if (child_latency_msec < halo_max_latency_msec && priv->child_up[idx] == 0) { @@ -5921,7 +5946,9 @@ __afr_handle_ping_event(xlator_t *this, xlator_t *child_xlator, const int idx, "), " "marking child up.", child_latency_msec, halo_max_latency_msec); - *event = GF_EVENT_CHILD_UP; + if (priv->halo_child_up[idx]) { + *event = GF_EVENT_CHILD_UP; + } } else { gf_log(child_xlator->name, GF_LOG_INFO, "Not marking child %d up, " @@ -5988,7 +6015,10 @@ __afr_handle_child_up_event(xlator_t *this, xlator_t *child_xlator, if (child_latency_msec < 0) { /*set to INT64_MAX-1 so that it is found for best_down_child*/ - priv->child_latency[idx] = AFR_HALO_MAX_LATENCY; + priv->halo_child_up[idx] = 1; + if (priv->child_latency[idx] < 0) { + priv->child_latency[idx] = AFR_HALO_MAX_LATENCY; + } } /* @@ -6077,6 +6107,7 @@ __afr_handle_child_down_event(xlator_t *this, xlator_t *child_xlator, int idx, */ if (child_latency_msec < 0) { priv->child_latency[idx] = child_latency_msec; + priv->halo_child_up[idx] = 0; } priv->child_up[idx] = 0; @@ -6641,6 +6672,8 @@ afr_priv_destroy(afr_private_t *priv) if (!priv) goto out; + + GF_FREE(priv->sh_domain); GF_FREE(priv->last_event); child_count = priv->child_count; @@ -6656,7 +6689,9 @@ afr_priv_destroy(afr_private_t *priv) GF_FREE(priv->local); GF_FREE(priv->pending_key); GF_FREE(priv->children); + GF_FREE(priv->anon_inode); GF_FREE(priv->child_up); + GF_FREE(priv->halo_child_up); GF_FREE(priv->child_latency); LOCK_DESTROY(&priv->lock); @@ -6794,8 +6829,8 @@ afr_is_dirty_count_non_unary(xlator_t *this, struct afr_reply *replies, static int afr_update_heal_status(xlator_t *this, struct afr_reply *replies, - char *index_vgfid, ia_type_t ia_type, gf_boolean_t *esh, - gf_boolean_t *dsh, gf_boolean_t *msh) + ia_type_t ia_type, gf_boolean_t *esh, gf_boolean_t *dsh, + gf_boolean_t *msh, unsigned char pending) { int ret = -1; GF_UNUSED int ret1 = 0; @@ -6825,14 +6860,7 @@ afr_update_heal_status(xlator_t *this, struct afr_reply *replies, } } - if (!strcmp(index_vgfid, GF_XATTROP_INDEX_GFID)) { - if (shd_domain_lk_count) { - ret = -EAGAIN; /*For 'possibly-healing'. */ - } else { - ret = 0; /*needs heal. Just set a non -ve value so that it is - assumed as the source index.*/ - } - } else if (!strcmp(index_vgfid, GF_XATTROP_DIRTY_GFID)) { + if (!pending) { if ((afr_is_dirty_count_non_unary(this, replies, ia_type)) || (!io_domain_lk_count)) { /* Needs heal. */ @@ -6841,6 +6869,13 @@ afr_update_heal_status(xlator_t *this, struct afr_reply *replies, /* No heal needed. */ *dsh = *esh = *msh = 0; } + } else { + if (shd_domain_lk_count) { + ret = -EAGAIN; /*For 'possibly-healing'. */ + } else { + ret = 0; /*needs heal. Just set a non -ve value so that it is + assumed as the source index.*/ + } } return ret; } @@ -6848,8 +6883,8 @@ afr_update_heal_status(xlator_t *this, struct afr_reply *replies, /*return EIO, EAGAIN or pending*/ int afr_lockless_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid, - inode_t **inode, char *index_vgfid, - gf_boolean_t *entry_selfheal, gf_boolean_t *data_selfheal, + inode_t **inode, gf_boolean_t *entry_selfheal, + gf_boolean_t *data_selfheal, gf_boolean_t *metadata_selfheal, unsigned char *pending) { int ret = -1; @@ -6908,8 +6943,8 @@ afr_lockless_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid, goto out; } - ret = afr_update_heal_status(this, replies, index_vgfid, (*inode)->ia_type, - &esh, &dsh, &msh); + ret = afr_update_heal_status(this, replies, (*inode)->ia_type, &esh, &dsh, + &msh, *pending); out: *data_selfheal = dsh; *entry_selfheal = esh; @@ -6934,14 +6969,6 @@ afr_get_heal_info(call_frame_t *frame, xlator_t *this, loc_t *loc) char *status = NULL; call_frame_t *heal_frame = NULL; afr_local_t *heal_local = NULL; - afr_local_t *local = NULL; - char *index_vgfid = NULL; - - local = frame->local; - if (dict_get_str(local->xdata_req, "index-vgfid", &index_vgfid)) { - ret = -1; - goto out; - } /*Use frame with lk-owner set*/ heal_frame = afr_frame_create(frame->this, &op_errno); @@ -6952,7 +6979,7 @@ afr_get_heal_info(call_frame_t *frame, xlator_t *this, loc_t *loc) heal_local = heal_frame->local; heal_frame->local = frame->local; - ret = afr_lockless_inspect(heal_frame, this, loc->gfid, &inode, index_vgfid, + ret = afr_lockless_inspect(heal_frame, this, loc->gfid, &inode, &entry_selfheal, &data_selfheal, &metadata_selfheal, &pending); @@ -7427,7 +7454,7 @@ afr_fav_child_reset_sink_xattrs(void *opaque) ret = afr_selfheal_inodelk(heal_frame, this, inode, this->name, 0, 0, locked_on); { - if (ret < AFR_SH_MIN_PARTICIPANTS) + if (ret < priv->child_count) goto data_unlock; ret = __afr_selfheal_data_prepare( heal_frame, this, inode, locked_on, sources, sinks, @@ -7444,7 +7471,7 @@ afr_fav_child_reset_sink_xattrs(void *opaque) ret = afr_selfheal_inodelk(heal_frame, this, inode, this->name, LLONG_MAX - 1, 0, locked_on); { - if (ret < AFR_SH_MIN_PARTICIPANTS) + if (ret < priv->child_count) goto mdata_unlock; ret = __afr_selfheal_metadata_prepare( heal_frame, this, inode, locked_on, sources, sinks, diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c index 74f71fdc76a..f8bf8340dab 100644 --- a/xlators/cluster/afr/src/afr-dir-read.c +++ b/xlators/cluster/afr/src/afr-dir-read.c @@ -67,7 +67,8 @@ afr_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, } int -afr_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd) +afr_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, + dict_t *xdata) { afr_private_t *priv = NULL; afr_local_t *local = NULL; @@ -163,8 +164,8 @@ afr_validate_read_subvol(inode_t *inode, xlator_t *this, int par_read_subvol) } static void -afr_readdir_transform_entries(gf_dirent_t *subvol_entries, int subvol, - gf_dirent_t *entries, fd_t *fd) +afr_readdir_transform_entries(call_frame_t *frame, gf_dirent_t *subvol_entries, + int subvol, gf_dirent_t *entries, fd_t *fd) { int ret = -1; gf_dirent_t *entry = NULL; @@ -182,8 +183,8 @@ afr_readdir_transform_entries(gf_dirent_t *subvol_entries, int subvol, list_for_each_entry_safe(entry, tmp, &subvol_entries->list, list) { - if (__is_root_gfid(fd->inode->gfid) && - !strcmp(entry->d_name, GF_REPLICATE_TRASH_DIR)) { + if (afr_is_private_directory(priv, fd->inode->gfid, entry->d_name, + frame->root->pid)) { continue; } @@ -227,8 +228,8 @@ afr_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, } if (op_ret >= 0) - afr_readdir_transform_entries(subvol_entries, (long)cookie, &entries, - local->fd); + afr_readdir_transform_entries(frame, subvol_entries, (long)cookie, + &entries, local->fd); AFR_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, xdata); diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c index e96b7d0798e..b7cceb79158 100644 --- a/xlators/cluster/afr/src/afr-dir-write.c +++ b/xlators/cluster/afr/src/afr-dir-write.c @@ -119,11 +119,11 @@ __afr_dir_write_finalize(call_frame_t *frame, xlator_t *this) continue; if (local->replies[i].op_ret < 0) { if (local->inode) - afr_inode_event_gen_reset(local->inode, this); + afr_inode_need_refresh_set(local->inode, this); if (local->parent) - afr_inode_event_gen_reset(local->parent, this); + afr_inode_need_refresh_set(local->parent, this); if (local->parent2) - afr_inode_event_gen_reset(local->parent2, this); + afr_inode_need_refresh_set(local->parent2, this); continue; } @@ -345,6 +345,7 @@ afr_mark_entry_pending_changelog(call_frame_t *frame, xlator_t *this) afr_private_t *priv = NULL; int pre_op_count = 0; int failed_count = 0; + unsigned char *success_replies = NULL; local = frame->local; priv = this->private; @@ -360,9 +361,16 @@ afr_mark_entry_pending_changelog(call_frame_t *frame, xlator_t *this) failed_count = AFR_COUNT(local->transaction.failed_subvols, priv->child_count); + /* FOP succeeded on all bricks. */ if (pre_op_count == priv->child_count && !failed_count) return; + /* FOP did not suceed on quorum no. of bricks. */ + success_replies = alloca0(priv->child_count); + afr_fill_success_replies(local, priv, success_replies); + if (!afr_has_quorum(success_replies, this, NULL)) + return; + if (priv->thin_arbiter_count) { /*Mark new entry using ta file*/ local->is_new_entry = _gf_true; diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c index c01b4131d58..1d6e4f3570a 100644 --- a/xlators/cluster/afr/src/afr-inode-write.c +++ b/xlators/cluster/afr/src/afr-inode-write.c @@ -2506,6 +2506,7 @@ afr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, call_frame_t *transaction_frame = NULL; int ret = -1; int32_t op_errno = ENOMEM; + int8_t last_fsync = 0; AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out); transaction_frame = copy_frame(frame); @@ -2516,10 +2517,16 @@ afr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, if (!local) goto out; - if (xdata) + if (xdata) { local->xdata_req = dict_copy_with_ref(xdata, NULL); - else + if (dict_get_int8(xdata, "last-fsync", &last_fsync) == 0) { + if (last_fsync) { + local->transaction.disable_delayed_post_op = _gf_true; + } + } + } else { local->xdata_req = dict_new(); + } if (!local->xdata_req) goto out; diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c index a5b004f4258..64856042b65 100644 --- a/xlators/cluster/afr/src/afr-open.c +++ b/xlators/cluster/afr/src/afr-open.c @@ -137,7 +137,7 @@ afr_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, { afr_private_t *priv = NULL; afr_local_t *local = NULL; - int spb_choice = 0; + int spb_subvol = 0; int event_generation = 0; int ret = 0; int32_t op_errno = 0; @@ -179,9 +179,9 @@ afr_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, ret = afr_inode_get_readable(frame, local->inode, this, NULL, &event_generation, AFR_DATA_TRANSACTION); if ((ret < 0) && - (afr_inode_split_brain_choice_get(local->inode, this, &spb_choice) == - 0) && - spb_choice < 0) { + (afr_split_brain_read_subvol_get(local->inode, this, NULL, + &spb_subvol) == 0) && + spb_subvol < 0) { afr_inode_refresh(frame, this, local->inode, local->inode->gfid, afr_open_continue); } else { diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c index 772b59f9a2f..6fc2c75145c 100644 --- a/xlators/cluster/afr/src/afr-read-txn.c +++ b/xlators/cluster/afr/src/afr-read-txn.c @@ -164,7 +164,7 @@ afr_ta_read_txn(void *opaque) xdata_rsp = NULL; /* It doesn't. So query thin-arbiter to see if it blames any data brick. */ - ret = afr_fill_ta_loc(this, &loc); + ret = afr_fill_ta_loc(this, &loc, _gf_true); if (ret) { gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, "Failed to populate thin-arbiter loc for: %s.", loc.name); @@ -272,7 +272,7 @@ afr_read_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err) int read_subvol = -1; inode_t *inode = NULL; int ret = -1; - int spb_choice = -1; + int spb_subvol = -1; local = frame->local; inode = local->inode; @@ -303,9 +303,9 @@ afr_read_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err) local->read_attempted[read_subvol] = 1; readfn: if (read_subvol == -1) { - ret = afr_inode_split_brain_choice_get(inode, this, &spb_choice); - if ((ret == 0) && spb_choice >= 0) - read_subvol = spb_choice; + ret = afr_split_brain_read_subvol_get(inode, this, frame, &spb_subvol); + if ((ret == 0) && spb_subvol >= 0) + read_subvol = spb_subvol; } if (read_subvol == -1) { diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 16f19e7bc63..a580a1584cc 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -140,7 +140,7 @@ heal: } } out: - if (gfid_idx && (*gfid_idx == -1) && (ret == 0)) { + if (gfid_idx && (*gfid_idx == -1) && (ret == 0) && local) { ret = -afr_final_errno(local, priv); } loc_wipe(&loc); @@ -1575,7 +1575,6 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this, char *accused = NULL; /* Accused others without any self-accusal */ char *pending = NULL; /* Have pending operations on others */ char *self_accused = NULL; /* Accused itself */ - int min_participants = -1; priv = this->private; @@ -1599,12 +1598,7 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this, } } - if (type == AFR_DATA_TRANSACTION || type == AFR_METADATA_TRANSACTION) { - min_participants = priv->child_count; - } else { - min_participants = AFR_SH_MIN_PARTICIPANTS; - } - if (afr_success_count(replies, priv->child_count) < min_participants) { + if (afr_success_count(replies, priv->child_count) < priv->child_count) { /* Treat this just like locks not being acquired */ return -ENOTCONN; } @@ -1911,17 +1905,16 @@ int afr_selfheal_unlocked_discover(call_frame_t *frame, inode_t *inode, uuid_t gfid, struct afr_reply *replies) { - afr_private_t *priv = NULL; afr_local_t *local = NULL; dict_t *dict = NULL; - priv = frame->this->private; local = frame->local; - if (local && local->xattr_req) + + if (local->xattr_req) dict = local->xattr_req; return afr_selfheal_unlocked_discover_on(frame, inode, gfid, replies, - priv->child_up, dict); + local->child_up, dict); } unsigned int @@ -2757,3 +2750,185 @@ afr_choose_source_by_policy(afr_private_t *priv, unsigned char *sources, out: return source; } + +static int +afr_anon_inode_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + afr_local_t *local = frame->local; + int i = (long)cookie; + + local->replies[i].valid = 1; + local->replies[i].op_ret = op_ret; + local->replies[i].op_errno = op_errno; + if (op_ret == 0) { + local->op_ret = 0; + local->replies[i].poststat = *buf; + local->replies[i].preparent = *preparent; + local->replies[i].postparent = *postparent; + } + if (xdata) { + local->replies[i].xdata = dict_ref(xdata); + } + + syncbarrier_wake(&local->barrier); + return 0; +} + +int +afr_anon_inode_create(xlator_t *this, int child, inode_t **linked_inode) +{ + call_frame_t *frame = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = this->private; + unsigned char *mkdir_on = alloca0(priv->child_count); + unsigned char *lookup_on = alloca0(priv->child_count); + loc_t loc = {0}; + int32_t op_errno = 0; + int32_t child_op_errno = 0; + struct iatt iatt = {0}; + dict_t *xdata = NULL; + uuid_t anon_inode_gfid = {0}; + int mkdir_count = 0; + int i = 0; + + /*Try to mkdir everywhere and return success if the dir exists on 'child' + */ + + if (!priv->use_anon_inode) { + op_errno = EINVAL; + goto out; + } + + frame = afr_frame_create(this, &op_errno); + if (op_errno) { + goto out; + } + local = frame->local; + if (!local->child_up[child]) { + /*Other bricks may need mkdir so don't error out yet*/ + child_op_errno = ENOTCONN; + } + gf_uuid_parse(priv->anon_gfid_str, anon_inode_gfid); + for (i = 0; i < priv->child_count; i++) { + if (!local->child_up[i]) + continue; + + if (priv->anon_inode[i]) { + mkdir_on[i] = 0; + } else { + mkdir_on[i] = 1; + mkdir_count++; + } + } + + if (mkdir_count == 0) { + *linked_inode = inode_find(this->itable, anon_inode_gfid); + if (*linked_inode) { + op_errno = 0; + goto out; + } + } + + loc.parent = inode_ref(this->itable->root); + loc.name = priv->anon_inode_name; + loc.inode = inode_new(this->itable); + if (!loc.inode) { + op_errno = ENOMEM; + goto out; + } + + xdata = dict_new(); + if (!xdata) { + op_errno = ENOMEM; + goto out; + } + + op_errno = -dict_set_gfuuid(xdata, "gfid-req", anon_inode_gfid, _gf_true); + if (op_errno) { + goto out; + } + + if (mkdir_count == 0) { + memcpy(lookup_on, local->child_up, priv->child_count); + goto lookup; + } + + AFR_ONLIST(mkdir_on, frame, afr_anon_inode_mkdir_cbk, mkdir, &loc, 0755, 0, + xdata); + + for (i = 0; i < priv->child_count; i++) { + if (!mkdir_on[i]) { + continue; + } + + if (local->replies[i].op_ret == 0) { + priv->anon_inode[i] = 1; + iatt = local->replies[i].poststat; + } else if (local->replies[i].op_ret < 0 && + local->replies[i].op_errno == EEXIST) { + lookup_on[i] = 1; + } else if (i == child) { + child_op_errno = local->replies[i].op_errno; + } + } + + if (AFR_COUNT(lookup_on, priv->child_count) == 0) { + goto link; + } + +lookup: + AFR_ONLIST(lookup_on, frame, afr_selfheal_discover_cbk, lookup, &loc, + xdata); + for (i = 0; i < priv->child_count; i++) { + if (!lookup_on[i]) { + continue; + } + + if (local->replies[i].op_ret == 0) { + if (gf_uuid_compare(anon_inode_gfid, + local->replies[i].poststat.ia_gfid) == 0) { + priv->anon_inode[i] = 1; + iatt = local->replies[i].poststat; + } else { + if (i == child) + child_op_errno = EINVAL; + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_DATA, + "%s has gfid: %s", priv->anon_inode_name, + uuid_utoa(local->replies[i].poststat.ia_gfid)); + } + } else if (i == child) { + child_op_errno = local->replies[i].op_errno; + } + } +link: + if (!gf_uuid_is_null(iatt.ia_gfid)) { + *linked_inode = inode_link(loc.inode, loc.parent, loc.name, &iatt); + if (*linked_inode) { + op_errno = 0; + inode_lookup(*linked_inode); + } else { + op_errno = ENOMEM; + } + goto out; + } + +out: + if (xdata) + dict_unref(xdata); + loc_wipe(&loc); + /*child_op_errno takes precedence*/ + if (child_op_errno == 0) { + child_op_errno = op_errno; + } + + if (child_op_errno && *linked_inode) { + inode_unref(*linked_inode); + *linked_inode = NULL; + } + if (frame) + AFR_STACK_DESTROY(frame); + return -child_op_errno; +} diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index cdff4a57674..37bcc2b3f9e 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -225,24 +225,40 @@ __afr_selfheal_data_read_write(call_frame_t *frame, xlator_t *this, fd_t *fd, return ret; } +static gf_boolean_t +afr_source_sinks_locked(xlator_t *this, unsigned char *locked_on, int source, + unsigned char *healed_sinks) +{ + afr_private_t *priv = this->private; + int i = 0; + + if (!locked_on[source]) + return _gf_false; + + for (i = 0; i < priv->child_count; i++) { + if (healed_sinks[i] && locked_on[i]) + return _gf_true; + } + + return _gf_false; +} + static int afr_selfheal_data_block(call_frame_t *frame, xlator_t *this, fd_t *fd, int source, unsigned char *healed_sinks, off_t offset, size_t size, int type, struct afr_reply *replies) { int ret = -1; - int sink_count = 0; afr_private_t *priv = NULL; unsigned char *data_lock = NULL; priv = this->private; - sink_count = AFR_COUNT(healed_sinks, priv->child_count); data_lock = alloca0(priv->child_count); ret = afr_selfheal_inodelk(frame, this, fd->inode, this->name, offset, size, data_lock); { - if (ret < sink_count) { + if (!afr_source_sinks_locked(this, data_lock, source, healed_sinks)) { ret = -ENOTCONN; goto unlock; } diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index e7062289c79..64893f441e3 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -16,54 +16,170 @@ #include <glusterfs/syncop-utils.h> #include <glusterfs/events.h> -static int -afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name, - inode_t *inode, int child, struct afr_reply *replies) +int +afr_selfheal_entry_anon_inode(xlator_t *this, inode_t *dir, const char *name, + inode_t *inode, int child, + struct afr_reply *replies, + gf_boolean_t *anon_inode) { afr_private_t *priv = NULL; + afr_local_t *local = NULL; xlator_t *subvol = NULL; int ret = 0; + int i = 0; + char g[64] = {0}; + unsigned char *lookup_success = NULL; + call_frame_t *frame = NULL; + loc_t loc2 = { + 0, + }; loc_t loc = { 0, }; - char g[64]; priv = this->private; - subvol = priv->children[child]; + lookup_success = alloca0(priv->child_count); + uuid_utoa_r(replies[child].poststat.ia_gfid, g); + loc.inode = inode_new(inode->table); + if (!loc.inode) { + ret = -ENOMEM; + goto out; + } + + if (replies[child].poststat.ia_type == IA_IFDIR) { + /* This directory may have sub-directory hierarchy which may need to + * be preserved for subsequent heals. So unconditionally move the + * directory to anonymous-inode directory*/ + *anon_inode = _gf_true; + goto anon_inode; + } + + frame = afr_frame_create(this, &ret); + if (!frame) { + ret = -ret; + goto out; + } + local = frame->local; + gf_uuid_copy(loc.gfid, replies[child].poststat.ia_gfid); + AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, &loc, + NULL); + for (i = 0; i < priv->child_count; i++) { + if (local->replies[i].op_ret == 0) { + lookup_success[i] = 1; + } else if (local->replies[i].op_errno != ENOENT && + local->replies[i].op_errno != ESTALE) { + ret = -local->replies[i].op_errno; + } + } + + if (priv->quorum_count) { + if (afr_has_quorum(lookup_success, this, NULL)) { + *anon_inode = _gf_true; + } + } else if (AFR_COUNT(lookup_success, priv->child_count) > 1) { + *anon_inode = _gf_true; + } else if (ret) { + goto out; + } + +anon_inode: + if (!*anon_inode) { + ret = 0; + goto out; + } loc.parent = inode_ref(dir); gf_uuid_copy(loc.pargfid, dir->gfid); loc.name = name; - loc.inode = inode_ref(inode); - if (replies[child].valid && replies[child].op_ret == 0) { - switch (replies[child].poststat.ia_type) { - case IA_IFDIR: - gf_msg(this->name, GF_LOG_WARNING, 0, - AFR_MSG_EXPUNGING_FILE_OR_DIR, - "expunging dir %s/%s (%s) on %s", uuid_utoa(dir->gfid), - name, uuid_utoa_r(replies[child].poststat.ia_gfid, g), - subvol->name); - ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL); - break; - default: - gf_msg(this->name, GF_LOG_WARNING, 0, - AFR_MSG_EXPUNGING_FILE_OR_DIR, - "expunging file %s/%s (%s) on %s", uuid_utoa(dir->gfid), - name, uuid_utoa_r(replies[child].poststat.ia_gfid, g), - subvol->name); - ret = syncop_unlink(subvol, &loc, NULL, NULL); - break; - } + ret = afr_anon_inode_create(this, child, &loc2.parent); + if (ret < 0) + goto out; + + loc2.name = g; + ret = syncop_rename(subvol, &loc, &loc2, NULL, NULL); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_EXPUNGING_FILE_OR_DIR, + "Rename to %s dir %s/%s (%s) on %s failed", + priv->anon_inode_name, uuid_utoa(dir->gfid), name, g, + subvol->name); + } else { + gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR, + "Rename to %s dir %s/%s (%s) on %s successful", + priv->anon_inode_name, uuid_utoa(dir->gfid), name, g, + subvol->name); } +out: loc_wipe(&loc); + loc_wipe(&loc2); + if (frame) { + AFR_STACK_DESTROY(frame); + } return ret; } int +afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name, + inode_t *inode, int child, struct afr_reply *replies) +{ + char g[64] = {0}; + afr_private_t *priv = NULL; + xlator_t *subvol = NULL; + int ret = 0; + loc_t loc = { + 0, + }; + gf_boolean_t anon_inode = _gf_false; + + priv = this->private; + subvol = priv->children[child]; + + if ((!replies[child].valid) || (replies[child].op_ret < 0)) { + /*Nothing to do*/ + ret = 0; + goto out; + } + + if (priv->use_anon_inode) { + ret = afr_selfheal_entry_anon_inode(this, dir, name, inode, child, + replies, &anon_inode); + if (ret < 0 || anon_inode) + goto out; + } + + loc.parent = inode_ref(dir); + loc.inode = inode_new(inode->table); + if (!loc.inode) { + ret = -ENOMEM; + goto out; + } + loc.name = name; + switch (replies[child].poststat.ia_type) { + case IA_IFDIR: + gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR, + "expunging dir %s/%s (%s) on %s", uuid_utoa(dir->gfid), name, + uuid_utoa_r(replies[child].poststat.ia_gfid, g), + subvol->name); + ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL); + break; + default: + gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR, + "expunging file %s/%s (%s) on %s", uuid_utoa(dir->gfid), + name, uuid_utoa_r(replies[child].poststat.ia_gfid, g), + subvol->name); + ret = syncop_unlink(subvol, &loc, NULL, NULL); + break; + } + +out: + loc_wipe(&loc); + return ret; +} + +int afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source, unsigned char *sources, inode_t *dir, const char *name, inode_t *inode, @@ -76,6 +192,9 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source, loc_t srcloc = { 0, }; + loc_t anonloc = { + 0, + }; xlator_t *this = frame->this; afr_private_t *priv = NULL; dict_t *xdata = NULL; @@ -86,15 +205,17 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source, 0, }; unsigned char *newentry = NULL; - char dir_uuid_str[64] = {0}, iatt_uuid_str[64] = {0}; + char iatt_uuid_str[64] = {0}; + char dir_uuid_str[64] = {0}; priv = this->private; iatt = &replies[source].poststat; + uuid_utoa_r(iatt->ia_gfid, iatt_uuid_str); if (iatt->ia_type == IA_INVAL || gf_uuid_is_null(iatt->ia_gfid)) { gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SELF_HEAL_FAILED, "Invalid ia_type (%d) or gfid(%s). source brick=%d, " "pargfid=%s, name=%s", - iatt->ia_type, uuid_utoa_r(iatt->ia_gfid, iatt_uuid_str), source, + iatt->ia_type, iatt_uuid_str, source, uuid_utoa_r(dir->gfid, dir_uuid_str), name); ret = -EINVAL; goto out; @@ -120,14 +241,24 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source, srcloc.inode = inode_ref(inode); gf_uuid_copy(srcloc.gfid, iatt->ia_gfid); - if (iatt->ia_type != IA_IFDIR) - ret = syncop_lookup(priv->children[dst], &srcloc, 0, 0, 0, 0); - if (iatt->ia_type == IA_IFDIR || ret == -ENOENT || ret == -ESTALE) { + ret = syncop_lookup(priv->children[dst], &srcloc, 0, 0, 0, 0); + if (ret == -ENOENT || ret == -ESTALE) { newentry[dst] = 1; ret = afr_selfheal_newentry_mark(frame, this, inode, source, replies, sources, newentry); if (ret) goto out; + } else if (ret == 0 && iatt->ia_type == IA_IFDIR && priv->use_anon_inode) { + // Try rename from hidden directory + ret = afr_anon_inode_create(this, dst, &anonloc.parent); + if (ret < 0) + goto out; + anonloc.inode = inode_ref(inode); + anonloc.name = iatt_uuid_str; + ret = syncop_rename(priv->children[dst], &anonloc, &loc, NULL, NULL); + if (ret == -ENOENT || ret == -ESTALE) + ret = -1; /*This sets 'mismatch' to true*/ + goto out; } mode = st_mode_from_ia(iatt->ia_prot, iatt->ia_type); @@ -166,6 +297,7 @@ out: GF_FREE(linkname); loc_wipe(&loc); loc_wipe(&srcloc); + loc_wipe(&anonloc); return ret; } @@ -578,6 +710,11 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd, priv = this->private; + if (afr_is_private_directory(priv, fd->inode->gfid, name, + GF_CLIENT_PID_SELF_HEALD)) { + return 0; + } + xattr = dict_new(); if (!xattr) return -ENOMEM; @@ -598,7 +735,7 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd, ret = afr_selfheal_entrylk(frame, this, fd->inode, this->name, NULL, locked_on); { - if (ret < AFR_SH_MIN_PARTICIPANTS) { + if (ret < priv->child_count) { gf_msg_debug(this->name, 0, "%s: Skipping " "entry self-heal as only %d sub-volumes " @@ -626,7 +763,7 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd, replies); if ((ret == 0) && (priv->esh_granular) && parent_idx_inode) { - ret = afr_shd_index_purge(subvol, parent_idx_inode, name, + ret = afr_shd_entry_purge(subvol, parent_idx_inode, name, inode->ia_type); /* Why is ret force-set to 0? We do not care about * index purge failing for full heal as it is quite @@ -756,10 +893,6 @@ afr_selfheal_entry_do_subvol(call_frame_t *frame, xlator_t *this, fd_t *fd, if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) continue; - if (__is_root_gfid(fd->inode->gfid) && - !strcmp(entry->d_name, GF_REPLICATE_TRASH_DIR)) - continue; - ret = afr_selfheal_entry_dirent(iter_frame, this, fd, entry->d_name, loc.inode, subvol, local->need_full_crawl); @@ -822,7 +955,7 @@ afr_selfheal_entry_granular_dirent(xlator_t *subvol, gf_dirent_t *entry, /* The name indices under the pgfid index dir are guaranteed * to be regular files. Hence the hardcoding. */ - afr_shd_index_purge(subvol, parent->inode, entry->d_name, IA_IFREG); + afr_shd_entry_purge(subvol, parent->inode, entry->d_name, IA_IFREG); ret = 0; goto out; } @@ -992,7 +1125,7 @@ __afr_selfheal_entry(call_frame_t *frame, xlator_t *this, fd_t *fd, ret = afr_selfheal_entrylk(frame, this, fd->inode, this->name, NULL, data_lock); { - if (ret < AFR_SH_MIN_PARTICIPANTS) { + if (ret < priv->child_count) { gf_msg_debug(this->name, 0, "%s: Skipping " "entry self-heal as only %d sub-volumes could " @@ -1116,7 +1249,7 @@ afr_selfheal_entry(call_frame_t *frame, xlator_t *this, inode_t *inode) ret = afr_selfheal_tie_breaker_entrylk(frame, this, inode, priv->sh_domain, NULL, locked_on); { - if (ret < AFR_SH_MIN_PARTICIPANTS) { + if (ret < priv->child_count) { gf_msg_debug(this->name, 0, "%s: Skipping " "entry self-heal as only %d sub-volumes could " diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c index f4e31b65bf9..03f43bad16e 100644 --- a/xlators/cluster/afr/src/afr-self-heal-metadata.c +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -190,6 +190,59 @@ out: return ret; } +static int +__afr_selfheal_metadata_mark_pending_xattrs(call_frame_t *frame, xlator_t *this, + inode_t *inode, + struct afr_reply *replies, + unsigned char *sources) +{ + int ret = 0; + int i = 0; + int m_idx = 0; + afr_private_t *priv = NULL; + int raw[AFR_NUM_CHANGE_LOGS] = {0}; + dict_t *xattr = NULL; + + priv = this->private; + m_idx = afr_index_for_transaction_type(AFR_METADATA_TRANSACTION); + raw[m_idx] = 1; + + xattr = dict_new(); + if (!xattr) + return -ENOMEM; + + for (i = 0; i < priv->child_count; i++) { + if (sources[i]) + continue; + ret = dict_set_static_bin(xattr, priv->pending_key[i], raw, + sizeof(int) * AFR_NUM_CHANGE_LOGS); + if (ret) { + ret = -1; + goto out; + } + } + + for (i = 0; i < priv->child_count; i++) { + if (!sources[i]) + continue; + ret = afr_selfheal_post_op(frame, this, inode, i, xattr, NULL); + if (ret < 0) { + gf_msg(this->name, GF_LOG_INFO, -ret, AFR_MSG_SELF_HEAL_INFO, + "Failed to set pending metadata xattr on child %d for %s", i, + uuid_utoa(inode->gfid)); + goto out; + } + } + + afr_replies_wipe(replies, priv->child_count); + ret = afr_selfheal_unlocked_discover(frame, inode, inode->gfid, replies); + +out: + if (xattr) + dict_unref(xattr); + return ret; +} + /* * Look for mismatching uid/gid or mode or user xattrs even if * AFR xattrs don't say so, and pick one arbitrarily as winner. */ @@ -210,6 +263,7 @@ __afr_selfheal_metadata_finalize_source(call_frame_t *frame, xlator_t *this, }; int source = -1; int sources_count = 0; + int ret = 0; priv = this->private; @@ -300,7 +354,13 @@ __afr_selfheal_metadata_finalize_source(call_frame_t *frame, xlator_t *this, healed_sinks[i] = 1; } } - + if ((sources_count == priv->child_count) && (source > -1) && + (AFR_COUNT(healed_sinks, priv->child_count) != 0)) { + ret = __afr_selfheal_metadata_mark_pending_xattrs(frame, this, inode, + replies, sources); + if (ret < 0) + return ret; + } out: afr_mark_active_sinks(this, sources, locked_on, healed_sinks); return source; diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c index 36640b5456b..834aac86d48 100644 --- a/xlators/cluster/afr/src/afr-self-heal-name.c +++ b/xlators/cluster/afr/src/afr-self-heal-name.c @@ -98,21 +98,12 @@ __afr_selfheal_name_expunge(xlator_t *this, inode_t *parent, uuid_t pargfid, const char *bname, inode_t *inode, struct afr_reply *replies) { - loc_t loc = { - 0, - }; int i = 0; afr_private_t *priv = NULL; - char g[64]; int ret = 0; priv = this->private; - loc.parent = inode_ref(parent); - gf_uuid_copy(loc.pargfid, pargfid); - loc.name = bname; - loc.inode = inode_ref(inode); - for (i = 0; i < priv->child_count; i++) { if (!replies[i].valid) continue; @@ -120,30 +111,10 @@ __afr_selfheal_name_expunge(xlator_t *this, inode_t *parent, uuid_t pargfid, if (replies[i].op_ret) continue; - switch (replies[i].poststat.ia_type) { - case IA_IFDIR: - gf_msg(this->name, GF_LOG_WARNING, 0, - AFR_MSG_EXPUNGING_FILE_OR_DIR, - "expunging dir %s/%s (%s) on %s", uuid_utoa(pargfid), - bname, uuid_utoa_r(replies[i].poststat.ia_gfid, g), - priv->children[i]->name); - - ret |= syncop_rmdir(priv->children[i], &loc, 1, NULL, NULL); - break; - default: - gf_msg(this->name, GF_LOG_WARNING, 0, - AFR_MSG_EXPUNGING_FILE_OR_DIR, - "expunging file %s/%s (%s) on %s", uuid_utoa(pargfid), - bname, uuid_utoa_r(replies[i].poststat.ia_gfid, g), - priv->children[i]->name); - - ret |= syncop_unlink(priv->children[i], &loc, NULL, NULL); - break; - } + ret |= afr_selfheal_entry_delete(this, parent, bname, inode, i, + replies); } - loc_wipe(&loc); - return ret; } @@ -381,7 +352,7 @@ __afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent, ret = __afr_selfheal_assign_gfid(this, parent, pargfid, bname, inode, replies, gfid, locked_on, source, sources, is_gfid_absent, &gfid_idx); - if (ret) + if (ret || (gfid_idx < 0)) return ret; ret = __afr_selfheal_name_impunge(frame, this, parent, pargfid, bname, @@ -514,7 +485,7 @@ afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent, ret = afr_selfheal_entrylk(frame, this, parent, this->name, bname, locked_on); { - if (ret < AFR_SH_MIN_PARTICIPANTS) { + if (ret < priv->child_count) { ret = -ENOTCONN; goto unlock; } @@ -560,13 +531,15 @@ afr_selfheal_name_unlocked_inspect(call_frame_t *frame, xlator_t *this, struct afr_reply *replies = NULL; inode_t *inode = NULL; int first_idx = -1; + afr_local_t *local = NULL; priv = this->private; + local = frame->local; replies = alloca0(sizeof(*replies) * priv->child_count); inode = afr_selfheal_unlocked_lookup_on(frame, parent, bname, replies, - priv->child_up, NULL); + local->child_up, NULL); if (!inode) return -ENOMEM; diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h index 5e7bde8689d..48e6dbcfb18 100644 --- a/xlators/cluster/afr/src/afr-self-heal.h +++ b/xlators/cluster/afr/src/afr-self-heal.h @@ -11,8 +11,6 @@ #ifndef _AFR_SELFHEAL_H #define _AFR_SELFHEAL_H -#define AFR_SH_MIN_PARTICIPANTS 2 - /* Perform fop on all UP subvolumes and wait for all callbacks to return */ #define AFR_ONALL(frame, rfn, fop, args...) \ @@ -47,13 +45,16 @@ afr_local_t *__local = frame->local; \ afr_private_t *__priv = frame->this->private; \ int __i = 0; \ - int __count = AFR_COUNT(list, __priv->child_count); \ + int __count = 0; \ + unsigned char *__list = alloca(__priv->child_count); \ \ + memcpy(__list, list, sizeof(*__list) * __priv->child_count); \ + __count = AFR_COUNT(__list, __priv->child_count); \ __local->barrier.waitfor = __count; \ afr_local_replies_wipe(__local, __priv); \ \ for (__i = 0; __i < __priv->child_count; __i++) { \ - if (!list[__i]) \ + if (!__list[__i]) \ continue; \ STACK_WIND_COOKIE(frame, rfn, (void *)(long)__i, \ __priv->children[__i], \ @@ -368,4 +369,9 @@ gf_boolean_t afr_is_file_empty_on_all_children(afr_private_t *priv, struct afr_reply *replies); +int +afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name, + inode_t *inode, int child, struct afr_reply *replies); +int +afr_anon_inode_create(xlator_t *this, int child, inode_t **linked_inode); #endif /* !_AFR_SELFHEAL_H */ diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c index aa774bb8d51..109fd4b7421 100644 --- a/xlators/cluster/afr/src/afr-self-heald.c +++ b/xlators/cluster/afr/src/afr-self-heald.c @@ -94,7 +94,7 @@ __afr_shd_healer_wait(struct subvol_healer *healer) priv = healer->this->private; disabled_loop: - wait_till.tv_sec = time(NULL) + priv->shd.timeout; + wait_till.tv_sec = gf_time() + priv->shd.timeout; while (!healer->rerun) { ret = pthread_cond_timedwait(&healer->cond, &healer->mutex, &wait_till); @@ -222,7 +222,7 @@ out: } int -afr_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name, +afr_shd_entry_purge(xlator_t *subvol, inode_t *inode, char *name, ia_type_t type) { int ret = 0; @@ -371,7 +371,7 @@ afr_shd_sweep_prepare(struct subvol_healer *healer) event->split_brain_count = 0; event->heal_failed_count = 0; - time(&event->start_time); + event->start_time = gf_time(); event->end_time = 0; _mask_cancellation(); } @@ -386,7 +386,7 @@ afr_shd_sweep_done(struct subvol_healer *healer) event = &healer->crawl_event; shd = &(((afr_private_t *)healer->this->private)->shd); - time(&event->end_time); + event->end_time = gf_time(); history = gf_memdup(event, sizeof(*event)); event->start_time = 0; @@ -424,7 +424,7 @@ afr_shd_index_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, ret = afr_shd_selfheal(healer, healer->subvol, gfid); if (ret == -ENOENT || ret == -ESTALE) - afr_shd_index_purge(subvol, parent->inode, entry->d_name, val); + afr_shd_entry_purge(subvol, parent->inode, entry->d_name, val); if (ret == 2) /* If bricks crashed in pre-op after creating indices/xattrop @@ -843,6 +843,176 @@ out: return need_heal; } +static int +afr_shd_anon_inode_cleaner(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, + void *data) +{ + struct subvol_healer *healer = data; + afr_private_t *priv = healer->this->private; + call_frame_t *frame = NULL; + afr_local_t *local = NULL; + int ret = 0; + loc_t loc = {0}; + int count = 0; + int i = 0; + int op_errno = 0; + struct iatt *iatt = NULL; + gf_boolean_t multiple_links = _gf_false; + unsigned char *gfid_present = alloca0(priv->child_count); + unsigned char *entry_present = alloca0(priv->child_count); + char *type = "file"; + + frame = afr_frame_create(healer->this, &ret); + if (!frame) { + ret = -ret; + goto out; + } + local = frame->local; + if (AFR_COUNT(local->child_up, priv->child_count) != priv->child_count) { + gf_msg_debug(healer->this->name, 0, + "Not all bricks are up. Skipping " + "cleanup of %s on %s", + entry->d_name, subvol->name); + ret = 0; + goto out; + } + + loc.inode = inode_new(parent->inode->table); + if (!loc.inode) { + ret = -ENOMEM; + goto out; + } + ret = gf_uuid_parse(entry->d_name, loc.gfid); + if (ret) { + ret = 0; + goto out; + } + AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, &loc, + NULL); + for (i = 0; i < priv->child_count; i++) { + if (local->replies[i].op_ret == 0) { + count++; + gfid_present[i] = 1; + iatt = &local->replies[i].poststat; + if (iatt->ia_type == IA_IFDIR) { + type = "dir"; + } + + if (i == healer->subvol) { + if (local->replies[i].poststat.ia_nlink > 1) { + multiple_links = _gf_true; + } + } + } else if (local->replies[i].op_errno != ENOENT && + local->replies[i].op_errno != ESTALE) { + /*We don't have complete view. Skip the entry*/ + gf_msg_debug(healer->this->name, local->replies[i].op_errno, + "Skipping cleanup of %s on %s", entry->d_name, + subvol->name); + ret = 0; + goto out; + } + } + + /*Inode is deleted from subvol*/ + if (count == 1 || (iatt->ia_type != IA_IFDIR && multiple_links)) { + gf_msg(healer->this->name, GF_LOG_WARNING, 0, + AFR_MSG_EXPUNGING_FILE_OR_DIR, "expunging %s %s/%s on %s", type, + priv->anon_inode_name, entry->d_name, subvol->name); + ret = afr_shd_entry_purge(subvol, parent->inode, entry->d_name, + iatt->ia_type); + if (ret == -ENOENT || ret == -ESTALE) + ret = 0; + } else if (count > 1) { + loc_wipe(&loc); + loc.parent = inode_ref(parent->inode); + loc.name = entry->d_name; + loc.inode = inode_new(parent->inode->table); + if (!loc.inode) { + ret = -ENOMEM; + goto out; + } + AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, + &loc, NULL); + count = 0; + for (i = 0; i < priv->child_count; i++) { + if (local->replies[i].op_ret == 0) { + count++; + entry_present[i] = 1; + iatt = &local->replies[i].poststat; + } else if (local->replies[i].op_errno != ENOENT && + local->replies[i].op_errno != ESTALE) { + /*We don't have complete view. Skip the entry*/ + gf_msg_debug(healer->this->name, local->replies[i].op_errno, + "Skipping cleanup of %s on %s", entry->d_name, + subvol->name); + ret = 0; + goto out; + } + } + for (i = 0; i < priv->child_count; i++) { + if (gfid_present[i] && !entry_present[i]) { + /*Entry is not anonymous on at least one subvol*/ + gf_msg_debug(healer->this->name, 0, + "Valid entry present on %s " + "Skipping cleanup of %s on %s", + priv->children[i]->name, entry->d_name, + subvol->name); + ret = 0; + goto out; + } + } + + gf_msg(healer->this->name, GF_LOG_WARNING, 0, + AFR_MSG_EXPUNGING_FILE_OR_DIR, + "expunging %s %s/%s on all subvols", type, priv->anon_inode_name, + entry->d_name); + ret = 0; + for (i = 0; i < priv->child_count; i++) { + op_errno = -afr_shd_entry_purge(priv->children[i], loc.parent, + entry->d_name, iatt->ia_type); + if (op_errno != ENOENT && op_errno != ESTALE) { + ret |= -op_errno; + } + } + } + +out: + if (frame) + AFR_STACK_DESTROY(frame); + loc_wipe(&loc); + return ret; +} + +static void +afr_cleanup_anon_inode_dir(struct subvol_healer *healer) +{ + int ret = 0; + call_frame_t *frame = NULL; + afr_private_t *priv = healer->this->private; + loc_t loc = {0}; + + ret = afr_anon_inode_create(healer->this, healer->subvol, &loc.inode); + if (ret) + goto out; + + frame = afr_frame_create(healer->this, &ret); + if (!frame) { + ret = -ret; + goto out; + } + + ret = syncop_mt_dir_scan(frame, priv->children[healer->subvol], &loc, + GF_CLIENT_PID_SELF_HEALD, healer, + afr_shd_anon_inode_cleaner, NULL, + priv->shd.max_threads, priv->shd.wait_qlength); +out: + if (frame) + AFR_STACK_DESTROY(frame); + loc_wipe(&loc); + return; +} + void * afr_shd_index_healer(void *data) { @@ -900,6 +1070,10 @@ afr_shd_index_healer(void *data) sleep(1); } while (ret > 0); + if (ret == 0) { + afr_cleanup_anon_inode_dir(healer); + } + if (ret == 0 && pre_crawl_xdata && !healer->crawl_event.heal_failed_count) { afr_shd_ta_check_and_unset_xattrs(this, &loc, healer, @@ -1481,15 +1655,6 @@ afr_xl_op(xlator_t *this, dict_t *input, dict_t *output) case GF_SHD_OP_INDEX_SUMMARY: /* this case has been handled in glfs-heal.c */ break; - case GF_SHD_OP_HEALED_FILES: - case GF_SHD_OP_HEAL_FAILED_FILES: - for (i = 0; i < priv->child_count; i++) { - keylen = snprintf(key, sizeof(key), "%d-%d-status", xl_id, i); - AFR_SET_DICT_AND_LOG(this->name, output, key, keylen, - SOP_NOT_SUPPORTED, - SLEN(SOP_NOT_SUPPORTED)); - } - break; case GF_SHD_OP_SPLIT_BRAIN_FILES: eh_dump(shd->split_brain, output, afr_add_shd_event); break; diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h index 687c28e6472..18db728ea7b 100644 --- a/xlators/cluster/afr/src/afr-self-heald.h +++ b/xlators/cluster/afr/src/afr-self-heald.h @@ -70,6 +70,6 @@ afr_shd_gfid_to_path(xlator_t *this, xlator_t *subvol, uuid_t gfid, char **path_p); int -afr_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name, +afr_shd_entry_purge(xlator_t *subvol, inode_t *inode, char *name, ia_type_t type); #endif /* !_AFR_SELF_HEALD_H */ diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index 78438f91331..a51f79b1f43 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -124,9 +124,9 @@ afr_release_notify_lock_for_ta(void *opaque) this = (xlator_t *)opaque; priv = this->private; - ret = afr_fill_ta_loc(this, &loc); + ret = afr_fill_ta_loc(this, &loc, _gf_true); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_THIN_ARB, + gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, "Failed to populate loc for thin-arbiter."); goto out; } @@ -521,42 +521,6 @@ afr_compute_pre_op_sources(call_frame_t *frame, xlator_t *this) local->transaction.pre_op_sources[j] = 0; } -gf_boolean_t -afr_has_arbiter_fop_cbk_quorum(call_frame_t *frame) -{ - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - xlator_t *this = NULL; - gf_boolean_t fop_failed = _gf_false; - unsigned char *pre_op_sources = NULL; - int i = 0; - - local = frame->local; - this = frame->this; - priv = this->private; - pre_op_sources = local->transaction.pre_op_sources; - - /* If the fop failed on the brick, it is not a source. */ - for (i = 0; i < priv->child_count; i++) - if (local->transaction.failed_subvols[i]) - pre_op_sources[i] = 0; - - switch (AFR_COUNT(pre_op_sources, priv->child_count)) { - case 1: - if (pre_op_sources[ARBITER_BRICK_INDEX]) - fop_failed = _gf_true; - break; - case 0: - fop_failed = _gf_true; - break; - } - - if (fop_failed) - return _gf_false; - - return _gf_true; -} - void afr_txn_arbitrate_fop(call_frame_t *frame, xlator_t *this) { @@ -971,12 +935,8 @@ afr_need_dirty_marking(call_frame_t *frame, xlator_t *this) priv->child_count) return _gf_false; - if (priv->arbiter_count) { - if (!afr_has_arbiter_fop_cbk_quorum(frame)) - need_dirty = _gf_true; - } else if (!afr_has_fop_cbk_quorum(frame)) { + if (!afr_has_fop_cbk_quorum(frame)) need_dirty = _gf_true; - } return need_dirty; } @@ -1026,12 +986,8 @@ afr_handle_quorum(call_frame_t *frame, xlator_t *this) * no split-brain with the fix. The problem is eliminated completely. */ - if (priv->arbiter_count) { - if (afr_has_arbiter_fop_cbk_quorum(frame)) - return; - } else if (afr_has_fop_cbk_quorum(frame)) { + if (afr_has_fop_cbk_quorum(frame)) return; - } if (afr_need_dirty_marking(frame, this)) goto set_response; @@ -1073,7 +1029,7 @@ set_response: } int -afr_fill_ta_loc(xlator_t *this, loc_t *loc) +afr_fill_ta_loc(xlator_t *this, loc_t *loc, gf_boolean_t is_gfid_based_fop) { afr_private_t *priv = NULL; @@ -1081,6 +1037,11 @@ afr_fill_ta_loc(xlator_t *this, loc_t *loc) loc->parent = inode_ref(priv->root_inode); gf_uuid_copy(loc->pargfid, loc->parent->gfid); loc->name = priv->pending_key[THIN_ARBITER_BRICK_INDEX]; + if (is_gfid_based_fop && gf_uuid_is_null(priv->ta_gfid)) { + /* Except afr_ta_id_file_check() which is path based, all other gluster + * FOPS need gfid.*/ + return -EINVAL; + } gf_uuid_copy(loc->gfid, priv->ta_gfid); loc->inode = inode_new(loc->parent->table); if (!loc->inode) { @@ -1090,86 +1051,6 @@ afr_fill_ta_loc(xlator_t *this, loc_t *loc) return 0; } -int -afr_changelog_thin_arbiter_post_op(xlator_t *this, afr_local_t *local) -{ - int ret = 0; - afr_private_t *priv = NULL; - dict_t *xattr = NULL; - int failed_count = 0; - struct gf_flock flock = { - 0, - }; - loc_t loc = { - 0, - }; - int i = 0; - - priv = this->private; - if (!priv->thin_arbiter_count) - return 0; - - failed_count = AFR_COUNT(local->transaction.failed_subvols, - priv->child_count); - if (!failed_count) - return 0; - - GF_ASSERT(failed_count == 1); - ret = afr_fill_ta_loc(this, &loc); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, - "Failed to populate thin-arbiter loc for: %s.", loc.name); - goto out; - } - - xattr = dict_new(); - if (!xattr) { - ret = -ENOMEM; - goto out; - } - for (i = 0; i < priv->child_count; i++) { - ret = dict_set_static_bin(xattr, priv->pending_key[i], - local->pending[i], - AFR_NUM_CHANGE_LOGS * sizeof(int)); - if (ret) - goto out; - } - - flock.l_type = F_WRLCK; - flock.l_start = 0; - flock.l_len = 0; - - /*TODO: Convert to two domain locking. */ - ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX], - AFR_TA_DOM_NOTIFY, &loc, F_SETLKW, &flock, NULL, NULL); - if (ret) - goto out; - - ret = syncop_xattrop(priv->children[THIN_ARBITER_BRICK_INDEX], &loc, - GF_XATTROP_ADD_ARRAY, xattr, NULL, NULL, NULL); - - if (ret == -EINVAL) { - gf_msg(this->name, GF_LOG_INFO, -ret, AFR_MSG_THIN_ARB, - "Thin-arbiter has denied post-op on %s for gfid %s.", - priv->pending_key[THIN_ARBITER_BRICK_INDEX], - uuid_utoa(local->inode->gfid)); - - } else if (ret) { - gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, - "Post-op on thin-arbiter id file %s failed for gfid %s.", - priv->pending_key[THIN_ARBITER_BRICK_INDEX], - uuid_utoa(local->inode->gfid)); - } - flock.l_type = F_UNLCK; - syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX], AFR_TA_DOM_NOTIFY, - &loc, F_SETLK, &flock, NULL, NULL); -out: - if (xattr) - dict_unref(xattr); - - return ret; -} - static int afr_ta_post_op_done(int ret, call_frame_t *frame, void *opaque) { @@ -1264,9 +1145,9 @@ afr_ta_post_op_do(void *opaque) this = local->transaction.frame->this; priv = this->private; - ret = afr_fill_ta_loc(this, &loc); + ret = afr_fill_ta_loc(this, &loc, _gf_true); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_THIN_ARB, + gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, "Failed to populate loc for thin-arbiter."); goto out; } @@ -2466,8 +2347,13 @@ afr_is_delayed_changelog_post_op_needed(call_frame_t *frame, xlator_t *this, goto out; } - if ((local->op != GF_FOP_WRITE) && (local->op != GF_FOP_FXATTROP)) { - /*Only allow writes but shard does [f]xattrops on writes, so + if (local->transaction.disable_delayed_post_op) { + goto out; + } + + if ((local->op != GF_FOP_WRITE) && (local->op != GF_FOP_FXATTROP) && + (local->op != GF_FOP_FSYNC)) { + /*Only allow writes/fsyncs but shard does [f]xattrops on writes, so * they are fine too*/ goto out; } diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index ec7aa226821..df7366f0a65 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -135,6 +135,27 @@ set_data_self_heal_algorithm(afr_private_t *priv, char *algo) } } +void +afr_handle_anon_inode_options(afr_private_t *priv, dict_t *options) +{ + char *volfile_id_str = NULL; + uuid_t anon_inode_gfid = {0}; + + /*If volume id is not present don't enable anything*/ + if (dict_get_str(options, "volume-id", &volfile_id_str)) + return; + GF_ASSERT(strlen(AFR_ANON_DIR_PREFIX) + strlen(volfile_id_str) <= NAME_MAX); + /*anon_inode_name is not supposed to change once assigned*/ + if (!priv->anon_inode_name[0]) { + snprintf(priv->anon_inode_name, sizeof(priv->anon_inode_name), "%s-%s", + AFR_ANON_DIR_PREFIX, volfile_id_str); + gf_uuid_parse(volfile_id_str, anon_inode_gfid); + /*Flip a bit to make sure volfile-id and anon-gfid are not same*/ + anon_inode_gfid[0] ^= 1; + uuid_utoa_r(anon_inode_gfid, priv->anon_gfid_str); + } +} + int reconfigure(xlator_t *this, dict_t *options) { @@ -168,7 +189,8 @@ reconfigure(xlator_t *this, dict_t *options) bool, out); GF_OPTION_RECONF("data-self-heal", data_self_heal, options, str, out); - gf_string2boolean(data_self_heal, &priv->data_self_heal); + if (gf_string2boolean(data_self_heal, &priv->data_self_heal) == -1) + goto out; GF_OPTION_RECONF("entry-self-heal", priv->entry_self_heal, options, bool, out); @@ -289,6 +311,10 @@ reconfigure(xlator_t *this, dict_t *options) consistent_io = _gf_false; priv->consistent_io = consistent_io; + afr_handle_anon_inode_options(priv, options); + + GF_OPTION_RECONF("use-anonymous-inode", priv->use_anon_inode, options, bool, + out); if (priv->shd.enabled) { if ((priv->shd.enabled != enabled_old) || (timeout_old != priv->shd.timeout)) @@ -485,7 +511,8 @@ init(xlator_t *this) GF_OPTION_INIT("heal-wait-queue-length", priv->heal_wait_qlen, uint32, out); GF_OPTION_INIT("data-self-heal", data_self_heal, str, out); - gf_string2boolean(data_self_heal, &priv->data_self_heal); + if (gf_string2boolean(data_self_heal, &priv->data_self_heal) == -1) + goto out; GF_OPTION_INIT("data-self-heal-algorithm", data_self_heal_algorithm, str, out); @@ -539,7 +566,9 @@ init(xlator_t *this) GF_OPTION_INIT("consistent-metadata", priv->consistent_metadata, bool, out); GF_OPTION_INIT("consistent-io", priv->consistent_io, bool, out); + afr_handle_anon_inode_options(priv, this->options); + GF_OPTION_INIT("use-anonymous-inode", priv->use_anon_inode, bool, out); if (priv->quorum_count != 0) priv->consistent_io = _gf_false; @@ -551,13 +580,19 @@ init(xlator_t *this) goto out; } + priv->anon_inode = GF_CALLOC(sizeof(unsigned char), child_count, + gf_afr_mt_char); + priv->child_up = GF_CALLOC(sizeof(unsigned char), child_count, gf_afr_mt_char); priv->child_latency = GF_MALLOC(sizeof(*priv->child_latency) * child_count, gf_afr_mt_child_latency_t); + priv->halo_child_up = GF_CALLOC(sizeof(unsigned char), child_count, + gf_afr_mt_char); - if (!priv->child_up || !priv->child_latency) { + if (!priv->child_up || !priv->child_latency || !priv->halo_child_up || + !priv->anon_inode) { ret = -ENOMEM; goto out; } @@ -1282,6 +1317,14 @@ struct volume_options options[] = { .tags = {"replicate"}, .description = "This option exists only for backward compatibility " "and configuring it doesn't have any effect"}, + {.key = {"use-anonymous-inode"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "no", + .op_version = {GD_OP_VERSION_8_0}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE, + .tags = {"replicate"}, + .description = "Setting this option heals directory renames efficiently"}, + {.key = {NULL}}, }; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 88456562610..d62f9a9caf2 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -42,6 +42,7 @@ #define AFR_LK_HEAL_DOM "afr.lock-heal.domain" #define AFR_HALO_MAX_LATENCY 99999 +#define AFR_ANON_DIR_PREFIX ".glusterfs-anonymous-inode" #define PFLAG_PENDING (1 << 0) #define PFLAG_SBRAIN (1 << 1) @@ -190,7 +191,9 @@ typedef struct _afr_private { struct list_head ta_waitq; struct list_head ta_onwireq; + unsigned char *anon_inode; unsigned char *child_up; + unsigned char *halo_child_up; int64_t *child_latency; unsigned char *local; @@ -274,10 +277,15 @@ typedef struct _afr_private { gf_boolean_t esh_granular; gf_boolean_t consistent_io; gf_boolean_t data_self_heal; /* on/off */ + gf_boolean_t use_anon_inode; /*For lock healing.*/ struct list_head saved_locks; struct list_head lk_healq; + + /*For anon-inode handling */ + char anon_inode_name[NAME_MAX + 1]; + char anon_gfid_str[UUID_SIZE + 1]; } afr_private_t; typedef enum { @@ -900,7 +908,7 @@ typedef struct _afr_local { gf_boolean_t uninherit_done; gf_boolean_t uninherit_value; - /* post-op hook */ + gf_boolean_t disable_delayed_post_op; } transaction; syncbarrier_t barrier; @@ -996,7 +1004,10 @@ afr_inode_read_subvol_set(inode_t *inode, xlator_t *this, int event_generation); int -afr_inode_event_gen_reset(inode_t *inode, xlator_t *this); +__afr_inode_need_refresh_set(inode_t *inode, xlator_t *this); + +int +afr_inode_need_refresh_set(inode_t *inode, xlator_t *this); int afr_read_subvol_select_by_policy(inode_t *inode, xlator_t *this, @@ -1267,8 +1278,8 @@ int afr_inode_split_brain_choice_set(inode_t *inode, xlator_t *this, int spb_choice); int -afr_inode_split_brain_choice_get(inode_t *inode, xlator_t *this, - int *spb_choice); +afr_split_brain_read_subvol_get(inode_t *inode, xlator_t *this, + call_frame_t *frame, int *spb_subvol); int afr_get_child_index_from_name(xlator_t *this, char *name); @@ -1353,7 +1364,7 @@ int afr_set_inode_local(xlator_t *this, afr_local_t *local, inode_t *inode); int -afr_fill_ta_loc(xlator_t *this, loc_t *loc); +afr_fill_ta_loc(xlator_t *this, loc_t *loc, gf_boolean_t is_gfid_based_fop); int afr_ta_post_op_lock(xlator_t *this, loc_t *loc); @@ -1401,4 +1412,12 @@ afr_is_lock_mode_mandatory(dict_t *xdata); void afr_dom_lock_release(call_frame_t *frame); + +void +afr_fill_success_replies(afr_local_t *local, afr_private_t *priv, + unsigned char *replies); + +gf_boolean_t +afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name, + pid_t pid); #endif /* __AFR_H__ */ diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index b762f1a358d..8ba0cc4c732 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -17,6 +17,7 @@ #include <glusterfs/quota-common-utils.h> #include <glusterfs/upcall-utils.h> #include "glusterfs/compat-errno.h" // for ENODATA on BSD +#include <glusterfs/common-utils.h> #include <sys/time.h> #include <libgen.h> @@ -43,15 +44,6 @@ dht_common_mark_mdsxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, static int dht_rmdir_unlock(call_frame_t *frame, xlator_t *this); -char *xattrs_to_heal[] = {"user.", - POSIX_ACL_ACCESS_XATTR, - POSIX_ACL_DEFAULT_XATTR, - QUOTA_LIMIT_KEY, - QUOTA_LIMIT_OBJECTS_KEY, - GF_SELINUX_XATTR_KEY, - GF_XATTR_MDATA_KEY, - NULL}; - static const char *dht_dbg_vxattrs[] = {DHT_DBG_HASHED_SUBVOL_PATTERN, NULL}; /* Check the xdata to make sure EBADF has been set by client xlator */ @@ -84,6 +76,8 @@ dht_set_fixed_dir_stat(struct iatt *stat) static gf_boolean_t dht_match_xattr(const char *key) { + char **xattrs_to_heal = get_xattrs_to_heal(); + return gf_get_index_by_elem(xattrs_to_heal, (char *)key) >= 0; } @@ -388,7 +382,7 @@ out: /* Code to save hashed subvol on inode ctx as a mds subvol */ -static int +int dht_inode_ctx_mdsvol_set(inode_t *inode, xlator_t *this, xlator_t *mds_subvol) { dht_inode_ctx_t *ctx = NULL; @@ -619,13 +613,14 @@ dht_discover_complete(xlator_t *this, call_frame_t *discover_frame) if (local->need_xattr_heal && !heal_path) { local->need_xattr_heal = 0; - ret = dht_dir_xattr_heal(this, local); - if (ret) - gf_msg(this->name, GF_LOG_ERROR, ret, + ret = dht_dir_xattr_heal(this, local, &op_errno); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_DIR_XATTR_HEAL_FAILED, "xattr heal failed for " "directory gfid is %s ", gfid_local); + } } } @@ -695,6 +690,7 @@ dht_common_mark_mdsxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int ret = -1; dht_conf_t *conf = 0; dht_layout_t *layout = NULL; + int32_t mds_heal_fresh_lookup = 0; GF_VALIDATE_OR_GOTO(this->name, frame, out); GF_VALIDATE_OR_GOTO(this->name, frame->local, out); @@ -702,6 +698,7 @@ dht_common_mark_mdsxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, local = frame->local; conf = this->private; layout = local->selfheal.layout; + mds_heal_fresh_lookup = local->mds_heal_fresh_lookup; if (op_ret) { gf_msg_debug(this->name, op_ret, @@ -722,7 +719,7 @@ dht_common_mark_mdsxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, layout); } out: - if (local && local->mds_heal_fresh_lookup) + if (mds_heal_fresh_lookup) DHT_STACK_DESTROY(frame); return 0; } @@ -1256,7 +1253,7 @@ err: to non hashed subvol */ int -dht_dir_xattr_heal(xlator_t *this, dht_local_t *local) +dht_dir_xattr_heal(xlator_t *this, dht_local_t *local, int *op_errno) { dht_local_t *copy_local = NULL; call_frame_t *copy = NULL; @@ -1268,6 +1265,7 @@ dht_dir_xattr_heal(xlator_t *this, dht_local_t *local) "No gfid exists for path %s " "so healing xattr is not possible", local->loc.path); + *op_errno = EIO; goto out; } @@ -1281,6 +1279,7 @@ dht_dir_xattr_heal(xlator_t *this, dht_local_t *local) "Memory allocation failed " "for path %s gfid %s ", local->loc.path, gfid_local); + *op_errno = ENOMEM; DHT_STACK_DESTROY(copy); } else { copy_local->stbuf = local->stbuf; @@ -1295,6 +1294,7 @@ dht_dir_xattr_heal(xlator_t *this, dht_local_t *local) "Synctask creation failed to heal xattr " "for path %s gfid %s ", local->loc.path, gfid_local); + *op_errno = ENOMEM; DHT_STACK_DESTROY(copy); } } @@ -1435,15 +1435,31 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, dht_aggregate_xattr(local->xattr, xattr); } + if (__is_root_gfid(stbuf->ia_gfid)) { + ret = dht_dir_has_layout(xattr, conf->xattr_name); + if (ret >= 0) { + if (is_greater_time(local->prebuf.ia_ctime, + local->prebuf.ia_ctime_nsec, + stbuf->ia_ctime, stbuf->ia_ctime_nsec)) { + /* Choose source */ + local->prebuf.ia_gid = stbuf->ia_gid; + local->prebuf.ia_uid = stbuf->ia_uid; + + local->prebuf.ia_ctime = stbuf->ia_ctime; + local->prebuf.ia_ctime_nsec = stbuf->ia_ctime_nsec; + local->prebuf.ia_prot = stbuf->ia_prot; + } + } + } + if (local->stbuf.ia_type != IA_INVAL) { /* This is not the first subvol to respond * Compare values to see if attrs need to be healed */ - if (!__is_root_gfid(stbuf->ia_gfid) && - ((local->stbuf.ia_gid != stbuf->ia_gid) || - (local->stbuf.ia_uid != stbuf->ia_uid) || - (is_permission_different(&local->stbuf.ia_prot, - &stbuf->ia_prot)))) { + if ((local->stbuf.ia_gid != stbuf->ia_gid) || + (local->stbuf.ia_uid != stbuf->ia_uid) || + (is_permission_different(&local->stbuf.ia_prot, + &stbuf->ia_prot))) { local->need_attrheal = 1; } } @@ -1635,7 +1651,7 @@ dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, uint32_t vol_commit_hash = 0; xlator_t *subvol = NULL; int32_t check_mds = 0; - int errst = 0; + int errst = 0, i = 0; int32_t mds_xattr_val[1] = {0}; GF_VALIDATE_OR_GOTO("dht", frame, err); @@ -1702,6 +1718,14 @@ dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, local->need_lookup_everywhere = 1; } else if (IA_ISDIR(local->loc.inode->ia_type)) { + layout = local->layout; + for (i = 0; i < layout->cnt; i++) { + if (layout->list[i].xlator == prev) { + layout->list[i].err = op_errno; + break; + } + } + local->need_selfheal = 1; } } @@ -2137,31 +2161,18 @@ static int dht_fill_dict_to_avoid_unlink_of_migrating_file(dict_t *dict) { int ret = 0; - xlator_t *this = NULL; - char *linktoskip_key = NULL; - this = THIS; - GF_VALIDATE_OR_GOTO("dht", this, err); - - if (dht_is_tier_xlator(this)) - linktoskip_key = TIER_SKIP_NON_LINKTO_UNLINK; - else - linktoskip_key = DHT_SKIP_NON_LINKTO_UNLINK; - - ret = dict_set_int32(dict, linktoskip_key, 1); + ret = dict_set_int32_sizen(dict, DHT_SKIP_NON_LINKTO_UNLINK, 1); if (ret) - goto err; + return -1; - ret = dict_set_int32(dict, DHT_SKIP_OPEN_FD_UNLINK, 1); + ret = dict_set_int32_sizen(dict, DHT_SKIP_OPEN_FD_UNLINK, 1); if (ret) - goto err; + return -1; return 0; - -err: - return -1; } static int32_t @@ -4290,6 +4301,8 @@ dht_find_local_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this, index = conf->local_subvols_cnt; uuid_list_copy = gf_strdup(uuid_list); + if (!uuid_list_copy) + goto unlock; for (uuid_str = strtok_r(uuid_list, " ", &saveptr); uuid_str; uuid_str = next_uuid_str) { @@ -4580,18 +4593,8 @@ dht_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, dict_del(xattr, conf->xattr_name); dict_del(xattr, conf->mds_xattr_key); - /* filter out following two xattrs that need not - * be visible on the mount point for geo-rep - - * trusted.tier.fix.layout.complete and - * trusted.tier.tier-dht.commithash - */ - dict_del(xattr, conf->commithash_xattr_name); - if (frame->root->pid >= 0 && dht_is_tier_xlator(this)) { - dict_del(xattr, GF_XATTR_TIER_LAYOUT_FIXED_KEY); - } - if (frame->root->pid >= 0) { GF_REMOVE_INTERNAL_XATTR("trusted.glusterfs.quota*", xattr); GF_REMOVE_INTERNAL_XATTR("trusted.pgfid*", xattr); @@ -4865,6 +4868,60 @@ out: return 0; } +/* Virtual Xattr which returns 1 if all subvols are up, + else returns 0. Geo-rep then uses this virtual xattr + after a fresh mount and starts the I/O. +*/ + +enum dht_vxattr_subvol { + DHT_VXATTR_SUBVOLS_UP = 1, + DHT_VXATTR_SUBVOLS_DOWN = 0, +}; + +int +dht_vgetxattr_subvol_status(call_frame_t *frame, xlator_t *this, + const char *key) +{ + dht_local_t *local = NULL; + int ret = -1; + int op_errno = ENODATA; + int value = DHT_VXATTR_SUBVOLS_UP; + int i = 0; + dht_conf_t *conf = NULL; + + conf = this->private; + local = frame->local; + + if (!key) { + op_errno = EINVAL; + goto out; + } + local->xattr = dict_new(); + if (!local->xattr) { + op_errno = ENOMEM; + goto out; + } + for (i = 0; i < conf->subvolume_cnt; i++) { + if (!conf->subvolume_status[i]) { + value = DHT_VXATTR_SUBVOLS_DOWN; + gf_msg_debug(this->name, 0, "subvol %s is down ", + conf->subvolumes[i]->name); + break; + } + } + ret = dict_set_int8(local->xattr, (char *)key, value); + if (ret < 0) { + op_errno = -ret; + ret = -1; + goto out; + } + ret = 0; + +out: + DHT_STACK_UNWIND(getxattr, frame, ret, op_errno, local->xattr, NULL); + return 0; +} + int dht_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key, dict_t *xdata) @@ -4922,6 +4979,11 @@ dht_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key, goto err; } + if (strncmp(key, DHT_SUBVOL_STATUS_KEY, SLEN(DHT_SUBVOL_STATUS_KEY)) == 0) { + dht_vgetxattr_subvol_status(frame, this, key); + return 0; + } + /* skip over code which is irrelevant if !DHT_IS_DIR(layout) */ if (!DHT_IS_DIR(layout)) goto no_dht_is_dir; @@ -5371,11 +5433,13 @@ dht_dir_common_set_remove_xattr(call_frame_t *frame, xlator_t *this, loc_t *loc, int call_cnt = 0; dht_local_t *local = NULL; char gfid_local[GF_UUID_BUF_SIZE] = {0}; + char **xattrs_to_heal; conf = this->private; local = frame->local; call_cnt = conf->subvolume_cnt; local->flags = flags; + xattrs_to_heal = get_xattrs_to_heal(); if (!gf_uuid_is_null(local->gfid)) { gf_uuid_unparse(local->gfid, gfid_local); @@ -5808,22 +5872,7 @@ dht_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr, if (local->rebalance.target_node) { local->flags = forced_rebalance; - /* Flag to suggest its a tiering migration - * The reason for this dic key-value is that - * promotions and demotions are multithreaded - * so the original frame from gf_defrag_start() - * is not carried. A new frame will be created when - * we do syncop_setxattr(). This does not have the - * frame->root->pid of the original frame. So we pass - * this dic key-value when we do syncop_setxattr() to do - * data migration and set the frame->root->pid to - * GF_CLIENT_PID_TIER_DEFRAG in dht_setxattr() just before - * calling dht_start_rebalance_task() */ - tmp = dict_get(xattr, TIERING_MIGRATION_KEY); - if (tmp) - frame->root->pid = GF_CLIENT_PID_TIER_DEFRAG; - else - frame->root->pid = GF_CLIENT_PID_DEFRAG; + frame->root->pid = GF_CLIENT_PID_DEFRAG; ret = dht_start_rebalance_task(this, frame); if (!ret) @@ -6635,10 +6684,9 @@ dht_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, layout = local->layout; - /* We have seen crashes in while running "rm -rf" on tier volumes - when the layout was NULL on the hot tier. This will skip the - entries on the subvol without a layout, hence preventing the crash - but rmdir might fail with "directory not empty" errors*/ + /* This will skip the entries on the subvol without a layout, + * hence preventing the crash but rmdir might fail with + * "directory not empty" errors*/ if (layout == NULL) goto done; @@ -8282,6 +8330,11 @@ dht_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, xlator_t *prev = NULL; int ret = -1; dht_local_t *local = NULL; + gf_boolean_t parent_layout_changed = _gf_false; + char pgfid[GF_UUID_BUF_SIZE] = {0}; + xlator_t *subvol = NULL; + + local = frame->local; local = frame->local; if (!local) { @@ -8290,8 +8343,69 @@ dht_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, goto out; } - if (op_ret == -1) + if (op_ret == -1) { + local->op_errno = op_errno; + parent_layout_changed = (xdata && + dict_get(xdata, GF_PREOP_CHECK_FAILED)) + ? _gf_true + : _gf_false; + + if (parent_layout_changed) { + if (local && local->lock[0].layout.parent_layout.locks) { + /* Returning failure as the layout could not be fixed even under + * the lock */ + goto out; + } + + gf_uuid_unparse(local->loc.parent->gfid, pgfid); + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_PARENT_LAYOUT_CHANGED, + "create (%s/%s) (path: %s): parent layout " + "changed. Attempting a layout refresh and then a " + "retry", + pgfid, local->loc.name, local->loc.path); + + /* + dht_refresh_layout needs directory info in local->loc.Hence, + storing the parent_loc in local->loc and storing the create + context in local->loc2. We will restore this information in + dht_creation_do. + */ + + loc_wipe(&local->loc2); + + ret = loc_copy(&local->loc2, &local->loc); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, + "loc_copy failed %s", local->loc.path); + + goto out; + } + + loc_wipe(&local->loc); + + ret = dht_build_parent_loc(this, &local->loc, &local->loc2, + &op_errno); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_LOC_FAILED, + "parent loc build failed"); + goto out; + } + + subvol = dht_subvol_get_hashed(this, &local->loc2); + + ret = dht_create_lock(frame, subvol); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INODE_LK_ERROR, + "locking parent failed"); + goto out; + } + + return 0; + } + goto out; + } prev = cookie; @@ -8412,6 +8526,8 @@ dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this, gf_msg_debug(this->name, 0, "creating %s on %s", loc->path, subvol->name); + dht_set_parent_layout_in_dict(loc, this, local); + STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol, subvol->fops->create, loc, flags, mode, umask, fd, params); @@ -8420,10 +8536,6 @@ dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this, avail_subvol = dht_free_disk_available_subvol(this, subvol, local); if (avail_subvol != subvol) { - local->params = dict_ref(params); - local->flags = flags; - local->mode = mode; - local->umask = umask; local->cached_subvol = avail_subvol; local->hashed_subvol = subvol; @@ -8439,6 +8551,8 @@ dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this, gf_msg_debug(this->name, 0, "creating %s on %s", loc->path, subvol->name); + dht_set_parent_layout_in_dict(loc, this, local); + STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol, subvol->fops->create, loc, flags, mode, umask, fd, params); @@ -8654,7 +8768,7 @@ err: return 0; } -static int32_t +int32_t dht_create_lock(call_frame_t *frame, xlator_t *subvol) { dht_local_t *local = NULL; @@ -8700,6 +8814,60 @@ err: } int +dht_set_parent_layout_in_dict(loc_t *loc, xlator_t *this, dht_local_t *local) +{ + dht_conf_t *conf = this->private; + dht_layout_t *parent_layout = NULL; + int *parent_disk_layout = NULL; + xlator_t *hashed_subvol = NULL; + char pgfid[GF_UUID_BUF_SIZE] = {0}; + int ret = 0; + + gf_uuid_unparse(loc->parent->gfid, pgfid); + + parent_layout = dht_layout_get(this, loc->parent); + hashed_subvol = dht_subvol_get_hashed(this, loc); + + ret = dht_disk_layout_extract_for_subvol(this, parent_layout, hashed_subvol, + &parent_disk_layout); + if (ret == -1) { + gf_msg(this->name, GF_LOG_WARNING, local->op_errno, + DHT_MSG_PARENT_LAYOUT_CHANGED, + "%s (%s/%s) (path: %s): " + "extracting in-memory layout of parent failed. ", + gf_fop_list[local->fop], pgfid, loc->name, loc->path); + goto err; + } + + ret = dict_set_str_sizen(local->params, GF_PREOP_PARENT_KEY, + conf->xattr_name); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, local->op_errno, + DHT_MSG_PARENT_LAYOUT_CHANGED, + "%s (%s/%s) (path: %s): " + "setting %s key in params dictionary failed. ", + gf_fop_list[local->fop], pgfid, loc->name, loc->path, + GF_PREOP_PARENT_KEY); + goto err; + } + + ret = dict_set_bin(local->params, conf->xattr_name, parent_disk_layout, + 4 * 4); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, local->op_errno, + DHT_MSG_PARENT_LAYOUT_CHANGED, + "%s (%s/%s) (path: %s): " + "setting parent-layout in params dictionary failed. ", + gf_fop_list[local->fop], pgfid, loc->name, loc->path); + goto err; + } + +err: + dht_layout_unref(this, parent_layout); + return ret; +} + +int dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, mode_t mode, mode_t umask, fd_t *fd, dict_t *params) { @@ -8725,6 +8893,11 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, goto err; } + local->params = dict_ref(params); + local->flags = flags; + local->mode = mode; + local->umask = umask; + if (dht_filter_loc_subvol_key(this, loc, &local->loc, &subvol)) { gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO, "creating %s on %s (got create on %s)", local->loc.path, @@ -8740,10 +8913,6 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, if (hashed_subvol && (hashed_subvol != subvol)) { /* Create the linkto file and then the data file */ - local->params = dict_ref(params); - local->flags = flags; - local->mode = mode; - local->umask = umask; local->cached_subvol = subvol; local->hashed_subvol = hashed_subvol; @@ -8756,6 +8925,9 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, * file as we expect a lookup everywhere if there are problems * with the parent layout */ + + dht_set_parent_layout_in_dict(loc, this, local); + STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol, subvol->fops->create, &local->loc, flags, mode, umask, fd, params); @@ -8807,11 +8979,6 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, goto err; } - local->params = dict_ref(params); - local->flags = flags; - local->mode = mode; - local->umask = umask; - loc_wipe(&local->loc); ret = dht_build_parent_loc(this, &local->loc, loc, &op_errno); @@ -10646,23 +10813,17 @@ dht_notify(xlator_t *this, int event, void *data, ...) int had_heard_from_all = 0; int have_heard_from_all = 0; - struct timeval time = { - 0, - }; gf_defrag_info_t *defrag = NULL; dict_t *dict = NULL; gf_defrag_type cmd = 0; dict_t *output = NULL; va_list ap; - dht_methods_t *methods = NULL; struct gf_upcall *up_data = NULL; struct gf_upcall_cache_invalidation *up_ci = NULL; conf = this->private; GF_VALIDATE_OR_GOTO(this->name, conf, out); - methods = &(conf->methods); - /* had all subvolumes reported status once till now? */ had_heard_from_all = 1; for (i = 0; i < conf->subvolume_cnt; i++) { @@ -10692,12 +10853,11 @@ dht_notify(xlator_t *this, int event, void *data, ...) break; } - gettimeofday(&time, NULL); LOCK(&conf->subvolume_lock); { conf->subvolume_status[cnt] = 1; conf->last_event[cnt] = event; - conf->subvol_up_time[cnt] = time.tv_sec; + conf->subvol_up_time[cnt] = gf_time(); } UNLOCK(&conf->subvolume_lock); @@ -10805,21 +10965,13 @@ dht_notify(xlator_t *this, int event, void *data, ...) if (defrag->is_exiting) goto unlock; if ((cmd == GF_DEFRAG_CMD_STATUS) || - (cmd == GF_DEFRAG_CMD_STATUS_TIER) || (cmd == GF_DEFRAG_CMD_DETACH_STATUS)) gf_defrag_status_get(conf, output); - else if (cmd == GF_DEFRAG_CMD_START_DETACH_TIER) - gf_defrag_start_detach_tier(defrag); else if (cmd == GF_DEFRAG_CMD_DETACH_START) defrag->cmd = GF_DEFRAG_CMD_DETACH_START; else if (cmd == GF_DEFRAG_CMD_STOP || - cmd == GF_DEFRAG_CMD_STOP_DETACH_TIER || cmd == GF_DEFRAG_CMD_DETACH_STOP) gf_defrag_stop(conf, GF_DEFRAG_STATUS_STOPPED, output); - else if (cmd == GF_DEFRAG_CMD_PAUSE_TIER) - ret = gf_defrag_pause_tier(this, defrag); - else if (cmd == GF_DEFRAG_CMD_RESUME_TIER) - ret = gf_defrag_resume_tier(this, defrag); } unlock: UNLOCK(&defrag->lock); @@ -10894,15 +11046,13 @@ dht_notify(xlator_t *this, int event, void *data, ...) * thread has already started. */ if (conf->defrag && !run_defrag) { - if (methods->migration_needed(this)) { - run_defrag = 1; - ret = gf_thread_create(&conf->defrag->th, NULL, gf_defrag_start, - this, "dhtdg"); - if (ret) { - GF_FREE(conf->defrag); - conf->defrag = NULL; - kill(getpid(), SIGTERM); - } + run_defrag = 1; + ret = gf_thread_create(&conf->defrag->th, NULL, gf_defrag_start, + this, "dhtdg"); + if (ret) { + GF_FREE(conf->defrag); + conf->defrag = NULL; + kill(getpid(), SIGTERM); } } } @@ -11047,28 +11197,6 @@ out: return ret; } -int32_t -dht_migration_needed(xlator_t *this) -{ - gf_defrag_info_t *defrag = NULL; - dht_conf_t *conf = NULL; - int ret = 0; - - conf = this->private; - - GF_VALIDATE_OR_GOTO("dht", conf, out); - GF_VALIDATE_OR_GOTO("dht", conf->defrag, out); - - defrag = conf->defrag; - - if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) && - (defrag->cmd != GF_DEFRAG_CMD_START_DETACH_TIER)) - ret = 1; - -out: - return ret; -} - /* This function should not be called more then once during a FOP handling path. It is valid only for for ops on files @@ -11103,14 +11231,6 @@ dht_set_local_rebalance(xlator_t *this, dht_local_t *local, struct iatt *stbuf, return 0; } -gf_boolean_t -dht_is_tier_xlator(xlator_t *this) -{ - if (strcmp(this->type, "cluster/tier") == 0) - return _gf_true; - return _gf_false; -} - int32_t dht_release(xlator_t *this, fd_t *fd) { @@ -11250,3 +11370,22 @@ dht_pt_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key, FIRST_CHILD(this)->fops->fgetxattr, fd, key, xdata); return 0; } + +/* The job of this function is to check if all the xlators have updated + * error in the layout. */ +int +dht_dir_layout_error_check(xlator_t *this, inode_t *inode) +{ + dht_layout_t *layout = NULL; + int i = 0; + + layout = dht_layout_get(this, inode); + for (i = 0; i < layout->cnt; i++) { + if (layout->list[i].err == 0) { + return 0; + } + } + + /* Returning the first xlator error as all xlators have errors */ + return layout->list[0].err; +} diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index 288bdf7be38..fe0dc3db34a 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -24,7 +24,6 @@ #define _DHT_H #define GF_XATTR_FIX_LAYOUT_KEY "distribute.fix.layout" -#define GF_XATTR_TIER_LAYOUT_FIXED_KEY "trusted.tier.fix.layout.complete" #define GF_XATTR_FILE_MIGRATE_KEY "trusted.distribute.migrate-data" #define DHT_MDS_STR "mds" #define GF_DHT_LOOKUP_UNHASHED_OFF 0 @@ -36,22 +35,21 @@ #define DHT_LAYOUT_HEAL_DOMAIN "dht.layout.heal" /* Namespace synchronization */ #define DHT_ENTRY_SYNC_DOMAIN "dht.entry.sync" -#define TIERING_MIGRATION_KEY "tiering.migration" #define DHT_LAYOUT_HASH_INVALID 1 #define MAX_REBAL_THREADS sysconf(_SC_NPROCESSORS_ONLN) #define DHT_DIR_STAT_BLOCKS 8 #define DHT_DIR_STAT_SIZE 4096 +/* Virtual xattr for subvols status */ + +#define DHT_SUBVOL_STATUS_KEY "dht.subvol.status" + /* Virtual xattrs for debugging */ #define DHT_DBG_HASHED_SUBVOL_PATTERN "dht.file.hashed-subvol.*" #define DHT_DBG_HASHED_SUBVOL_KEY "dht.file.hashed-subvol." -/* Array to hold custom xattr keys - */ -extern char *xattrs_to_heal[]; - /* Rebalance nodeuuid flags */ #define REBAL_NODEUUID_MINE 0x01 @@ -242,19 +240,6 @@ typedef gf_boolean_t (*dht_need_heal_t)(call_frame_t *frame, dht_layout_t **inmem, dht_layout_t **ondisk); -typedef struct { - uint64_t blocks_used; - uint64_t pblocks_used; - uint64_t files_used; - uint64_t pfiles_used; - uint64_t unhashed_blocks_used; - uint64_t unhashed_pblocks_used; - uint64_t unhashed_files_used; - uint64_t unhashed_pfiles_used; - uint64_t unhashed_fsid; - uint64_t hashed_fsid; -} tier_statvfs_t; - struct dht_local { loc_t loc; loc_t loc2; @@ -272,7 +257,6 @@ struct dht_local { struct iatt preparent; struct iatt postparent; struct statvfs statvfs; - tier_statvfs_t tier_statvfs; fd_t *fd; inode_t *inode; dict_t *params; @@ -405,14 +389,7 @@ enum gf_defrag_type { GF_DEFRAG_CMD_STATUS = 1 + 2, GF_DEFRAG_CMD_START_LAYOUT_FIX = 1 + 3, GF_DEFRAG_CMD_START_FORCE = 1 + 4, - GF_DEFRAG_CMD_START_TIER = 1 + 5, - GF_DEFRAG_CMD_STATUS_TIER = 1 + 6, - GF_DEFRAG_CMD_START_DETACH_TIER = 1 + 7, - GF_DEFRAG_CMD_STOP_DETACH_TIER = 1 + 8, - GF_DEFRAG_CMD_PAUSE_TIER = 1 + 9, - GF_DEFRAG_CMD_RESUME_TIER = 1 + 10, GF_DEFRAG_CMD_DETACH_STATUS = 1 + 11, - GF_DEFRAG_CMD_STOP_TIER = 1 + 12, GF_DEFRAG_CMD_DETACH_START = 1 + 13, GF_DEFRAG_CMD_DETACH_COMMIT = 1 + 14, GF_DEFRAG_CMD_DETACH_COMMIT_FORCE = 1 + 15, @@ -463,75 +440,6 @@ struct dht_container { int local_subvol_index; }; -typedef enum tier_mode_ { - TIER_MODE_NONE = 0, - TIER_MODE_TEST, - TIER_MODE_WM -} tier_mode_t; - -typedef enum tier_pause_state_ { - TIER_RUNNING = 0, - TIER_REQUEST_PAUSE, - TIER_PAUSED -} tier_pause_state_t; - -/* This Structure is only used in tiering fixlayout */ -typedef struct gf_tier_fix_layout_arg { - xlator_t *this; - dict_t *fix_layout; - pthread_t thread_id; -} gf_tier_fix_layout_arg_t; - -typedef struct gf_tier_conf { - int is_tier; - int watermark_hi; - int watermark_low; - int watermark_last; - unsigned long block_size; - fsblkcnt_t blocks_total; - fsblkcnt_t blocks_used; - uint64_t max_migrate_bytes; - int max_migrate_files; - int query_limit; - tier_mode_t mode; - int percent_full; - /* These flags are only used for tier-compact */ - gf_boolean_t compact_active; - /* These 3 flags are set to true when the client changes the */ - /* compaction mode on the command line. */ - /* When they are set, the daemon will trigger compaction as */ - /* soon as possible to activate or deactivate compaction. */ - /* If in the middle of a compaction, then the switches take */ - /* effect on the next compaction, not the current one. */ - /* If the user switches it off, we want to avoid needless */ - /* compactions. */ - /* If the user switches it on, they want to compact as soon */ - /* as possible. */ - gf_boolean_t compact_mode_switched; - gf_boolean_t compact_mode_switched_hot; - gf_boolean_t compact_mode_switched_cold; - int tier_max_promote_size; - int tier_promote_frequency; - int tier_demote_frequency; - int tier_compact_hot_frequency; - int tier_compact_cold_frequency; - uint64_t st_last_promoted_size; - uint64_t st_last_demoted_size; - struct synctask *pause_synctask; - gf_timer_t *pause_timer; - pthread_mutex_t pause_mutex; - int promote_in_progress; - int demote_in_progress; - /* This Structure is only used in tiering fixlayout */ - gf_tier_fix_layout_arg_t tier_fix_layout_arg; - /* Indicates the index of the first queryfile picked - * in the last cycle of promote or demote */ - int32_t last_promote_qfile_index; - int32_t last_demote_qfile_index; - tier_pause_state_t pause_state; - char volname[GD_VOLUME_NAME_MAX + 1]; -} gf_tier_conf_t; - typedef struct nodeuuid_info { char info; /* Set to 1 is this is my node's uuid*/ uuid_t uuid; /* Store the nodeuuid as well for debugging*/ @@ -559,17 +467,10 @@ struct gf_defrag_info_ { int cmd; inode_t *root_inode; uuid_t node_uuid; - struct timeval start_time; + time_t start_time; uint32_t new_commit_hash; gf_defrag_status_t defrag_status; gf_defrag_pattern_list_t *defrag_pattern; - gf_tier_conf_t tier_conf; - - /*Data Tiering params for scanner*/ - uint64_t total_files_promoted; - uint64_t total_files_demoted; - int write_freq_threshold; - int read_freq_threshold; pthread_cond_t parallel_migration_cond; pthread_mutex_t dfq_mutex; @@ -605,7 +506,6 @@ typedef struct gf_defrag_info_ gf_defrag_info_t; struct dht_methods_s { int32_t (*migration_get_dst_subvol)(xlator_t *this, dht_local_t *local); int32_t (*migration_other)(xlator_t *this, gf_defrag_info_t *defrag); - int32_t (*migration_needed)(xlator_t *this); xlator_t *(*layout_search)(xlator_t *this, dht_layout_t *layout, const char *name); }; @@ -626,7 +526,7 @@ struct dht_conf { int subvolume_cnt; int32_t refresh_interval; gf_lock_t subvolume_lock; - struct timeval last_stat_fetch; + time_t last_stat_fetch; gf_lock_t layout_lock; dict_t *leaf_to_subvol; void *private; /* Can be used by wrapper xlators over @@ -748,6 +648,8 @@ struct dir_dfmeta { struct list_head **head; struct list_head **iterator; int *fetch_entries; + /* fds corresponding to local subvols only */ + fd_t **lfd; }; typedef struct dht_migrate_info { @@ -879,7 +781,6 @@ dht_layout_anomalies(xlator_t *this, loc_t *loc, dht_layout_t *layout, int dht_layout_dir_mismatch(xlator_t *this, dht_layout_t *layout, xlator_t *subvol, loc_t *loc, dict_t *xattr); - xlator_t * dht_linkfile_subvol(xlator_t *this, inode_t *inode, struct iatt *buf, dict_t *xattr); @@ -897,9 +798,6 @@ int dht_disk_layout_extract(xlator_t *this, dht_layout_t *layout, int pos, int32_t **disk_layout_p); int -dht_disk_layout_merge(xlator_t *this, dht_layout_t *layout, int pos, - void *disk_layout_raw, int disk_layout_len); -int dht_disk_layout_extract_for_subvol(xlator_t *this, dht_layout_t *layout, xlator_t *subvol, int32_t **disk_layout_p); @@ -945,7 +843,7 @@ dht_selfheal_new_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t cbk, int dht_selfheal_restore(call_frame_t *frame, dht_selfheal_dir_cbk_t cbk, loc_t *loc, dht_layout_t *layout); -int +void dht_layout_sort_volname(dht_layout_t *layout); int @@ -962,14 +860,14 @@ dht_get_du_info_for_subvol(xlator_t *this, int subvol_idx); int dht_layout_preset(xlator_t *this, xlator_t *subvol, inode_t *inode); int -dht_layout_index_for_subvol(dht_layout_t *layout, xlator_t *subvol); -int dht_layout_set(xlator_t *this, inode_t *inode, dht_layout_t *layout); ; void dht_layout_unref(xlator_t *this, dht_layout_t *layout); dht_layout_t * dht_layout_ref(xlator_t *this, dht_layout_t *layout); +int +dht_layout_index_for_subvol(dht_layout_t *layout, xlator_t *subvol); xlator_t * dht_first_up_subvol(xlator_t *this); xlator_t * @@ -1236,30 +1134,8 @@ dht_common_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata); int -dht_fxattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata); - -int gf_defrag_status_get(dht_conf_t *conf, dict_t *dict); -void -gf_defrag_set_pause_state(gf_tier_conf_t *tier_conf, tier_pause_state_t state); - -tier_pause_state_t -gf_defrag_get_pause_state(gf_tier_conf_t *tier_conf); - -int -gf_defrag_pause_tier(xlator_t *this, gf_defrag_info_t *defrag); - -tier_pause_state_t -gf_defrag_check_pause_tier(gf_tier_conf_t *defrag); - -int -gf_defrag_resume_tier(xlator_t *this, gf_defrag_info_t *defrag); - -int -gf_defrag_start_detach_tier(gf_defrag_info_t *defrag); - int gf_defrag_stop(dht_conf_t *conf, gf_defrag_status_t status, dict_t *output); @@ -1291,10 +1167,6 @@ int dht_dir_attr_heal(void *data); int dht_dir_attr_heal_done(int ret, call_frame_t *sync_frame, void *data); -int -dht_dir_has_layout(dict_t *xattr, char *name); -gf_boolean_t -dht_is_subvol_in_layout(dht_layout_t *layout, xlator_t *xlator); xlator_t * dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol, xlator_t *ignore, dht_layout_t *layout, @@ -1303,15 +1175,18 @@ xlator_t * dht_subvol_maxspace_nonzeroinode(xlator_t *this, xlator_t *subvol, dht_layout_t *layout); int +dht_dir_has_layout(dict_t *xattr, char *name); +int dht_linkfile_attr_heal(call_frame_t *frame, xlator_t *this); -void -dht_layout_dump(dht_layout_t *layout, const char *prefix); int32_t dht_priv_dump(xlator_t *this); int32_t dht_inodectx_dump(xlator_t *this, inode_t *inode); +gf_boolean_t +dht_is_subvol_in_layout(dht_layout_t *layout, xlator_t *xlator); + int dht_inode_ctx_get_mig_info(xlator_t *this, inode_t *inode, xlator_t **src_subvol, xlator_t **dst_subvol); @@ -1341,9 +1216,6 @@ dht_layout_missing_dirs(dht_layout_t *layout); int dht_refresh_layout(call_frame_t *frame); -gf_boolean_t -dht_is_tier_xlator(xlator_t *this); - int dht_build_parent_loc(xlator_t *this, loc_t *parent, loc_t *child, int32_t *op_errno); @@ -1456,7 +1328,7 @@ dht_dir_set_heal_xattr(xlator_t *this, dht_local_t *local, dict_t *dst, dict_t *src, int *uret, int *uflag); int -dht_dir_xattr_heal(xlator_t *this, dht_local_t *local); +dht_dir_xattr_heal(xlator_t *this, dht_local_t *local, int *op_errno); int dht_common_mark_mdsxattr(call_frame_t *frame, int *errst, int flag); @@ -1498,4 +1370,15 @@ dht_common_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata); +int32_t +dht_create_lock(call_frame_t *frame, xlator_t *subvol); + +int +dht_set_parent_layout_in_dict(loc_t *loc, xlator_t *this, dht_local_t *local); + +int +dht_dir_layout_error_check(xlator_t *this, inode_t *inode); + +int +dht_inode_ctx_mdsvol_set(inode_t *inode, xlator_t *this, xlator_t *mds_subvol); #endif /* _DHT_H */ diff --git a/xlators/cluster/dht/src/dht-diskusage.c b/xlators/cluster/dht/src/dht-diskusage.c index 27097ca2475..c0588828fdb 100644 --- a/xlators/cluster/dht/src/dht-diskusage.c +++ b/xlators/cluster/dht/src/dht-diskusage.c @@ -151,22 +151,18 @@ dht_get_du_info(call_frame_t *frame, xlator_t *this, loc_t *loc) dht_conf_t *conf = NULL; call_frame_t *statfs_frame = NULL; dht_local_t *statfs_local = NULL; - struct timeval tv = { - 0, - }; loc_t tmp_loc = { 0, }; + time_t now; conf = this->private; - - gettimeofday(&tv, NULL); - + now = gf_time(); /* make it root gfid, should be enough to get the proper info back */ tmp_loc.gfid[15] = 1; - if (tv.tv_sec > (conf->refresh_interval + conf->last_stat_fetch.tv_sec)) { + if (now > (conf->refresh_interval + conf->last_stat_fetch)) { statfs_frame = copy_frame(frame); if (!statfs_frame) { goto err; @@ -198,7 +194,7 @@ dht_get_du_info(call_frame_t *frame, xlator_t *this, loc_t *loc) statfs_local->params); } - conf->last_stat_fetch.tv_sec = tv.tv_sec; + conf->last_stat_fetch = now; } return 0; err: diff --git a/xlators/cluster/dht/src/dht-hashfn.c b/xlators/cluster/dht/src/dht-hashfn.c index 1dfe2a77907..acda67c312a 100644 --- a/xlators/cluster/dht/src/dht-hashfn.c +++ b/xlators/cluster/dht/src/dht-hashfn.c @@ -78,6 +78,9 @@ dht_hash_compute(xlator_t *this, int type, const char *name, uint32_t *hash_p) priv = this->private; + if (name == NULL) + return -1; + len = strlen(name) + 1; rsync_friendly_name = alloca(len); diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c index 023d4371c0e..3f2fe43d5f3 100644 --- a/xlators/cluster/dht/src/dht-helper.c +++ b/xlators/cluster/dht/src/dht-helper.c @@ -64,8 +64,8 @@ __dht_fd_ctx_set(xlator_t *this, fd_t *fd, xlator_t *dst) ret = __fd_ctx_set(fd, this, value); if (ret < 0) { - gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_FD_CTX_SET_FAILED, - "Failed to set fd ctx in fd=0x%p", fd); + gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_FD_CTX_SET_FAILED, + "fd=0x%p", fd, NULL); GF_REF_PUT(fd_ctx); } out: @@ -96,8 +96,8 @@ dht_fd_ctx_set(xlator_t *this, fd_t *fd, xlator_t *dst) /* Overwrite and hope for the best*/ fd_ctx->opened_on_dst = (uint64_t)(uintptr_t)dst; UNLOCK(&fd->lock); - gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_INVALID_VALUE, - "Different dst found in the fd ctx"); + gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_INVALID_VALUE, + NULL); goto out; } @@ -383,9 +383,9 @@ dht_check_and_open_fd_on_subvol_complete(int ret, call_frame_t *frame, &local->rebalance.flock, local->xattr_req); break; default: - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_UNKNOWN_FOP, - "Unknown FOP on fd (%p) on file %s @ %s", fd, - uuid_utoa(fd->inode->gfid), subvol->name); + gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_UNKNOWN_FOP, "fd=%p", + fd, "gfid=%s", uuid_utoa(fd->inode->gfid), "name=%s", + subvol->name, NULL); break; } @@ -458,9 +458,9 @@ handle_err: break; default: - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_UNKNOWN_FOP, - "Unknown FOP on fd (%p) on file %s @ %s", fd, - uuid_utoa(fd->inode->gfid), subvol->name); + gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_UNKNOWN_FOP, "fd=%p", + fd, "gfid=%s", uuid_utoa(fd->inode->gfid), "name=%s", + subvol->name, NULL); break; } @@ -513,10 +513,9 @@ dht_check_and_open_fd_on_subvol_task(void *data) fd, NULL, NULL); if (ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_OPEN_FD_ON_DST_FAILED, - "Failed to open the fd" - " (%p, flags=0%o) on file %s @ %s", - fd, fd->flags, uuid_utoa(fd->inode->gfid), subvol->name); + gf_smsg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_OPEN_FD_ON_DST_FAILED, + "fd=%p", fd, "flags=0%o", fd->flags, "gfid=%s", + uuid_utoa(fd->inode->gfid), "name=%s", subvol->name, NULL); /* This can happen if the cached subvol was updated in the * inode_ctx and the fd was opened on the new cached suvol * after this fop was wound on the old cached subvol. @@ -562,10 +561,8 @@ dht_check_and_open_fd_on_subvol(xlator_t *this, call_frame_t *frame) dht_check_and_open_fd_on_subvol_complete, frame, frame); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, 0, - "Failed to create synctask" - " to check and open fd=%p", - local->fd); + gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SYNCTASK_CREATE_FAILED, + "to-check-and-open fd=%p", local->fd, NULL); } return ret; @@ -674,9 +671,7 @@ dht_get_subvol_from_id(xlator_t *this, int client_id) ret = gf_asprintf(&sid, "%d", client_id); if (ret == -1) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_ASPRINTF_FAILED, - "asprintf failed while " - "fetching subvol from the id"); + gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_ASPRINTF_FAILED, NULL); goto out; } @@ -1336,9 +1331,9 @@ dht_migration_complete_check_task(void *data) * migrated by two different layers. Raise * a warning here. */ - gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_HAS_MIGINFO, - "%s: Found miginfo in the inode ctx", - tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid)); + gf_smsg( + this->name, GF_LOG_WARNING, 0, DHT_MSG_HAS_MIGINFO, "tmp=%s", + tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid), NULL); miginfo = (void *)(uintptr_t)tmp_miginfo; GF_REF_PUT(miginfo); @@ -1359,10 +1354,9 @@ dht_migration_complete_check_task(void *data) ret = syncop_lookup(this, &tmp_loc, &stbuf, 0, 0, 0); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_FILE_LOOKUP_FAILED, - "%s: failed to lookup the file on %s", - tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid), - this->name); + gf_smsg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_FILE_LOOKUP_FAILED, + "tmp=%s", tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid), + "name=%s", this->name, NULL); local->op_errno = -ret; ret = -1; goto out; @@ -1370,18 +1364,15 @@ dht_migration_complete_check_task(void *data) dst_node = dht_subvol_get_cached(this, tmp_loc.inode); if (linkto_target && dst_node != linkto_target) { - gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_INVALID_LINKFILE, - "linkto target (%s) is " - "different from cached-subvol (%s). Treating %s as " - "destination subvol", - linkto_target->name, dst_node->name, dst_node->name); + gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_INVALID_LINKFILE, + "linkto_target_name=%s", linkto_target->name, "dst_name=%s", + dst_node->name, NULL); } if (gf_uuid_compare(stbuf.ia_gfid, tmp_loc.inode->gfid)) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_GFID_MISMATCH, - "%s: gfid different on the target file on %s", - tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid), - dst_node->name); + gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_GFID_MISMATCH, "tmp=%s", + tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid), + "dst_name=%s", dst_node->name, NULL); ret = -1; local->op_errno = EIO; goto out; @@ -1463,12 +1454,10 @@ dht_migration_complete_check_task(void *data) (iter_fd->flags & ~(O_CREAT | O_EXCL | O_TRUNC)), iter_fd, NULL, NULL); if (ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, -ret, - DHT_MSG_OPEN_FD_ON_DST_FAILED, - "failed" - " to open the fd" - " (%p, flags=0%o) on file %s @ %s", - iter_fd, iter_fd->flags, path, dst_node->name); + gf_smsg(this->name, GF_LOG_ERROR, -ret, + DHT_MSG_OPEN_FD_ON_DST_FAILED, "id=%p", iter_fd, + "flags=0%o", iter_fd->flags, "path=%s", path, "name=%s", + dst_node->name, NULL); open_failed = 1; local->op_errno = -ret; @@ -1622,9 +1611,9 @@ dht_rebalance_inprogress_task(void *data) * migrated by two different layers. Raise * a warning here. */ - gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_HAS_MIGINFO, - "%s: Found miginfo in the inode ctx", - tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid)); + gf_smsg( + this->name, GF_LOG_WARNING, 0, DHT_MSG_HAS_MIGINFO, "tmp=%s", + tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid), NULL); miginfo = (void *)(uintptr_t)tmp_miginfo; GF_REF_PUT(miginfo); } @@ -1633,17 +1622,16 @@ dht_rebalance_inprogress_task(void *data) } if (ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_GET_XATTR_FAILED, - "%s: failed to get the 'linkto' xattr", local->loc.path); + gf_smsg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_GET_XATTR_FAILED, + "path=%s", local->loc.path, NULL); ret = -1; goto out; } dst_node = dht_linkfile_subvol(this, NULL, NULL, dict); if (!dst_node) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SUBVOL_NOT_FOUND, - "%s: failed to get the 'linkto' xattr from dict", - local->loc.path); + gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_GET_XATTR_FAILED, + "path=%s", local->loc.path, NULL); ret = -1; goto out; } @@ -1660,20 +1648,17 @@ dht_rebalance_inprogress_task(void *data) /* lookup on dst */ ret = syncop_lookup(dst_node, &tmp_loc, &stbuf, NULL, NULL, NULL); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, -ret, - DHT_MSG_FILE_LOOKUP_ON_DST_FAILED, - "%s: failed to lookup the file on %s", - tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid), - dst_node->name); + gf_smsg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_FILE_LOOKUP_FAILED, + "tmp=%s", tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid), + "name=%s", dst_node->name, NULL); ret = -1; goto out; } if (gf_uuid_compare(stbuf.ia_gfid, tmp_loc.inode->gfid)) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_GFID_MISMATCH, - "%s: gfid different on the target file on %s", - tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid), - dst_node->name); + gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_GFID_MISMATCH, "tmp=%s", + tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid), + "name=%s", dst_node->name, NULL); ret = -1; goto out; } @@ -1741,11 +1726,10 @@ dht_rebalance_inprogress_task(void *data) (iter_fd->flags & ~(O_CREAT | O_EXCL | O_TRUNC)), iter_fd, NULL, NULL); if (ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, -ret, - DHT_MSG_OPEN_FD_ON_DST_FAILED, - "failed to send open " - "the fd (%p, flags=0%o) on file %s @ %s", - iter_fd, iter_fd->flags, path, dst_node->name); + gf_smsg(this->name, GF_LOG_ERROR, -ret, + DHT_MSG_OPEN_FD_ON_DST_FAILED, "fd=%p", iter_fd, + "flags=0%o", iter_fd->flags, "path=%s", path, "name=%s", + dst_node->name, NULL); ret = -1; open_failed = 1; } else { @@ -1777,9 +1761,8 @@ unlock: ret = dht_inode_ctx_set_mig_info(this, inode, src_node, dst_node); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED, - "%s: failed to set inode-ctx target file at %s", local->loc.path, - dst_node->name); + gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED, + "path=%s", local->loc.path, "name=%s", dst_node->name, NULL); goto out; } @@ -2001,10 +1984,9 @@ dht_heal_path(xlator_t *this, char *path, inode_table_t *itable) ret = syncop_lookup(this, &loc, &iatt, NULL, NULL, NULL); if (ret) { - gf_msg(this->name, GF_LOG_INFO, -ret, DHT_MSG_DIR_SELFHEAL_FAILED, - "Healing of path %s failed on subvolume %s for " - "directory %s", - path, this->name, bname); + gf_smsg(this->name, GF_LOG_INFO, -ret, DHT_MSG_DIR_SELFHEAL_FAILED, + "path=%s", path, "subvolume=%s", this->name, "bname=%s", + bname, NULL); goto out; } @@ -2062,10 +2044,8 @@ dht_heal_full_path(void *data) ret = syncop_getxattr(source, &loc, &dict, GET_ANCESTRY_PATH_KEY, NULL, NULL); if (ret) { - gf_msg(this->name, GF_LOG_INFO, -ret, DHT_MSG_DIR_SELFHEAL_FAILED, - "Failed to get path from subvol %s. Aborting " - "directory healing.", - source->name); + gf_smsg(this->name, GF_LOG_INFO, -ret, DHT_MSG_DIR_HEAL_ABORT, + "subvol=%s", source->name, NULL); goto out; } @@ -2103,6 +2083,7 @@ dht_heal_full_path_done(int op_ret, call_frame_t *heal_frame, void *data) dht_local_t *local = NULL; xlator_t *this = NULL; int ret = -1; + int op_errno = 0; local = heal_frame->local; main_frame = local->main_frame; @@ -2112,10 +2093,12 @@ dht_heal_full_path_done(int op_ret, call_frame_t *heal_frame, void *data) dht_set_fixed_dir_stat(&local->postparent); if (local->need_xattr_heal) { local->need_xattr_heal = 0; - ret = dht_dir_xattr_heal(this, local); - if (ret) - gf_msg(this->name, GF_LOG_ERROR, ret, DHT_MSG_DIR_XATTR_HEAL_FAILED, - "xattr heal failed for directory %s ", local->loc.path); + ret = dht_dir_xattr_heal(this, local, &op_errno); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + DHT_MSG_DIR_XATTR_HEAL_FAILED, "path=%s", local->loc.path, + NULL); + } } DHT_STACK_UNWIND(lookup, main_frame, 0, 0, local->inode, &local->stbuf, @@ -2203,8 +2186,8 @@ dht_get_lock_subvolume(xlator_t *this, struct gf_flock *lock, if (ret) { gf_uuid_unparse(inode->gfid, gfid); UNLOCK(&inode->lock); - gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_SET_INODE_CTX_FAILED, - "Failed to set lock_subvol in inode ctx for gfid %s", gfid); + gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_SET_INODE_CTX_FAILED, + "lock_subvol gfid=%s", gfid, NULL); goto post_unlock; } subvol = cached_subvol; @@ -2234,8 +2217,8 @@ dht_lk_inode_unref(call_frame_t *frame, int32_t op_ret) inode = local->loc.inode ? local->loc.inode : local->fd->inode; } if (!inode) { - gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LOCK_INODE_UNREF_FAILED, - "Found a NULL inode. Failed to unref the inode"); + gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LOCK_INODE_UNREF_FAILED, + NULL); goto out; } @@ -2261,11 +2244,8 @@ dht_lk_inode_unref(call_frame_t *frame, int32_t op_ret) inode_unref(inode); } else { gf_uuid_unparse(inode->gfid, gfid); - gf_msg(this->name, GF_LOG_WARNING, 0, - DHT_MSG_LOCK_INODE_UNREF_FAILED, - "Unlock request failed for gfid %s." - "Failed to unref the inode", - gfid); + gf_smsg(this->name, GF_LOG_WARNING, 0, + DHT_MSG_LOCK_INODE_UNREF_FAILED, "gfid=%s", gfid, NULL); goto out; } default: @@ -2287,12 +2267,11 @@ dht_dir_set_heal_xattr(xlator_t *this, dht_local_t *local, dict_t *dst, int luret = -1; int luflag = -1; int i = 0; + char **xattrs_to_heal; if (!src || !dst) { - gf_msg(this->name, GF_LOG_WARNING, EINVAL, DHT_MSG_DICT_SET_FAILED, - "src or dst is NULL. Failed to set " - " dictionary value for path %s", - local->loc.path); + gf_smsg(this->name, GF_LOG_WARNING, EINVAL, DHT_MSG_DST_NULL_SET_FAILED, + "path=%s", local->loc.path, NULL); return; } /* Check if any user xattr present in src dict and set @@ -2303,17 +2282,18 @@ dht_dir_set_heal_xattr(xlator_t *this, dht_local_t *local, dict_t *dst, and set it to dst dict, here index start from 1 because user xattr already checked in previous statement */ + + xattrs_to_heal = get_xattrs_to_heal(); + for (i = 1; xattrs_to_heal[i]; i++) { keyval = dict_get(src, xattrs_to_heal[i]); if (keyval) { luflag = 1; ret = dict_set(dst, xattrs_to_heal[i], keyval); if (ret) - gf_msg(this->name, GF_LOG_WARNING, ENOMEM, - DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary value:key = %s for " - "path %s", - xattrs_to_heal[i], local->loc.path); + gf_smsg(this->name, GF_LOG_WARNING, ENOMEM, + DHT_MSG_DICT_SET_FAILED, "key=%s", xattrs_to_heal[i], + "path=%s", local->loc.path, NULL); keyval = NULL; } } diff --git a/xlators/cluster/dht/src/dht-inode-read.c b/xlators/cluster/dht/src/dht-inode-read.c index 8007fc70693..dbb8070b0da 100644 --- a/xlators/cluster/dht/src/dht-inode-read.c +++ b/xlators/cluster/dht/src/dht-inode-read.c @@ -10,25 +10,25 @@ #include "dht-common.h" -int +static int dht_access2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret); -int +static int dht_readv2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret); -int +static int dht_attr2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret); -int +static int dht_open2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret); -int +static int dht_flush2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret); -int +static int dht_lk2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret); -int +static int dht_fsync2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret); -int +static int dht_common_xattrop2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret); -int +static int dht_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, fd_t *fd, dict_t *xdata) { @@ -67,7 +67,7 @@ out: return 0; } -int +static int dht_open2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) { dht_local_t *local = NULL; @@ -216,7 +216,7 @@ err: return 0; } -int +static int dht_attr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) { dht_local_t *local = NULL; @@ -258,7 +258,7 @@ out: return 0; } -int +static int dht_attr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, struct iatt *stbuf, dict_t *xdata) { @@ -473,7 +473,7 @@ out: return 0; } -int +static int dht_readv2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) { dht_local_t *local = NULL; @@ -558,7 +558,7 @@ err: return 0; } -int +static int dht_access_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, dict_t *xdata) { @@ -606,7 +606,7 @@ out: return 0; } -int +static int dht_access2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) { dht_local_t *local = NULL; @@ -735,7 +735,7 @@ out: return 0; } -int +static int dht_flush2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) { dht_local_t *local = NULL; @@ -881,7 +881,7 @@ out: return 0; } -int +static int dht_fsync2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) { dht_local_t *local = NULL; @@ -959,7 +959,7 @@ err: /* TODO: for 'lk()' call, we need some other special error, may be ESTALE to indicate that lock migration happened on the fd, so we can consider it as phase 2 of migration */ -int +static int dht_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, struct gf_flock *flock, dict_t *xdata) { @@ -1006,7 +1006,7 @@ out: return 0; } -int +static int dht_lk2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) { dht_local_t *local = NULL; @@ -1087,7 +1087,7 @@ err: return 0; } -int +static int dht_lease_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, struct gf_lease *lease, dict_t *xdata) { @@ -1129,7 +1129,7 @@ err: } /* Symlinks are currently not migrated, so no need for any check here */ -int +static int dht_readlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, const char *path, struct iatt *stbuf, dict_t *xdata) @@ -1305,7 +1305,7 @@ out: return 0; } -int +static int dht_common_xattrop2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) { @@ -1364,7 +1364,7 @@ out: return 0; } -int +static int dht_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) { @@ -1452,7 +1452,7 @@ err: return 0; } -int +static int dht_fxattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) { @@ -1525,7 +1525,7 @@ err: * below fops, hence not implementing 'migration' related checks */ -int +static int dht_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata) diff --git a/xlators/cluster/dht/src/dht-inode-write.c b/xlators/cluster/dht/src/dht-inode-write.c index b6b349d64ed..2f23ce90fbd 100644 --- a/xlators/cluster/dht/src/dht-inode-write.c +++ b/xlators/cluster/dht/src/dht-inode-write.c @@ -10,17 +10,17 @@ #include "dht-common.h" -int +static int dht_writev2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret); -int +static int dht_truncate2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret); -int +static int dht_setattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret); -int +static int dht_fallocate2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret); -int +static int dht_discard2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret); -int +static int dht_zerofill2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret); int @@ -93,30 +93,28 @@ dht_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, /* Check if the rebalance phase1 is true */ if (IS_DHT_MIGRATION_PHASE1(postbuf)) { - if (!dht_is_tier_xlator(this)) { + if (!local->xattr_req) { + local->xattr_req = dict_new(); if (!local->xattr_req) { - local->xattr_req = dict_new(); - if (!local->xattr_req) { - gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, ENOMEM, - "insufficient memory"); - local->op_errno = ENOMEM; - local->op_ret = -1; - goto out; - } - } - - ret = dict_set_uint32(local->xattr_req, - GF_PROTECT_FROM_EXTERNAL_WRITES, 1); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_DICT_SET_FAILED, 0, - "Failed to set key %s in dictionary", - GF_PROTECT_FROM_EXTERNAL_WRITES); + gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, ENOMEM, + "insufficient memory"); local->op_errno = ENOMEM; local->op_ret = -1; goto out; } } + ret = dict_set_uint32(local->xattr_req, GF_PROTECT_FROM_EXTERNAL_WRITES, + 1); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_DICT_SET_FAILED, 0, + "Failed to set key %s in dictionary", + GF_PROTECT_FROM_EXTERNAL_WRITES); + local->op_errno = ENOMEM; + local->op_ret = -1; + goto out; + } + dht_iatt_merge(this, &local->stbuf, postbuf); dht_iatt_merge(this, &local->prebuf, prebuf); @@ -142,7 +140,7 @@ out: return 0; } -int +static int dht_writev2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) { dht_local_t *local = NULL; @@ -336,7 +334,7 @@ err: return 0; } -int +static int dht_truncate2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) { dht_local_t *local = NULL; @@ -555,7 +553,7 @@ err: return 0; } -int +static int dht_fallocate2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) { dht_local_t *local = NULL; @@ -731,7 +729,7 @@ err: return 0; } -int +static int dht_discard2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) { dht_local_t *local = NULL; @@ -902,7 +900,7 @@ err: return 0; } -int +static int dht_zerofill2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) { dht_local_t *local = NULL; @@ -1049,7 +1047,7 @@ out: return 0; } -int +static int dht_setattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) { dht_local_t *local = NULL; diff --git a/xlators/cluster/dht/src/dht-layout.c b/xlators/cluster/dht/src/dht-layout.c index dc14d72c1ff..fda904c92c9 100644 --- a/xlators/cluster/dht/src/dht-layout.c +++ b/xlators/cluster/dht/src/dht-layout.c @@ -131,9 +131,8 @@ dht_layout_search(xlator_t *this, dht_layout_t *layout, const char *name) ret = dht_hash_compute(this, layout->type, name, &hash); if (ret != 0) { - gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_COMPUTE_HASH_FAILED, - "hash computation failed for type=%d name=%s", layout->type, - name); + gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_COMPUTE_HASH_FAILED, + "type=%d", layout->type, "name=%s", name, NULL); goto out; } @@ -145,8 +144,8 @@ dht_layout_search(xlator_t *this, dht_layout_t *layout, const char *name) } if (!subvol) { - gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_HASHED_SUBVOL_GET_FAILED, - "no subvolume for hash (value) = 0x%x", hash); + gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_HASHED_SUBVOL_GET_FAILED, + "hash-value=0x%x", hash, NULL); } out: @@ -255,7 +254,7 @@ dht_disk_layout_extract_for_subvol(xlator_t *this, dht_layout_t *layout, return dht_disk_layout_extract(this, layout, i, disk_layout_p); } -int +static int dht_disk_layout_merge(xlator_t *this, dht_layout_t *layout, int pos, void *disk_layout_raw, int disk_layout_len) { @@ -266,8 +265,8 @@ dht_disk_layout_merge(xlator_t *this, dht_layout_t *layout, int pos, int disk_layout[4]; if (!disk_layout_raw) { - gf_msg(this->name, GF_LOG_CRITICAL, 0, DHT_MSG_LAYOUT_MERGE_FAILED, - "error no layout on disk for merge"); + gf_smsg(this->name, GF_LOG_CRITICAL, 0, DHT_MSG_LAYOUT_MERGE_FAILED, + NULL); return -1; } @@ -284,10 +283,8 @@ dht_disk_layout_merge(xlator_t *this, dht_layout_t *layout, int pos, case DHT_HASH_TYPE_DM: break; default: - gf_msg(this->name, GF_LOG_CRITICAL, 0, DHT_MSG_INVALID_DISK_LAYOUT, - "Invalid disk layout: " - "Catastrophic error layout with unknown type found %d", - disk_layout[1]); + gf_smsg(this->name, GF_LOG_CRITICAL, 0, DHT_MSG_INVALID_DISK_LAYOUT, + "layout=%d", disk_layout[1], NULL); return -1; } @@ -355,8 +352,8 @@ dht_layout_merge(xlator_t *this, dht_layout_t *layout, xlator_t *subvol, ret = dht_disk_layout_merge(this, layout, i, disk_layout_raw, disk_layout_len); if (ret != 0) { - gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_MERGE_FAILED, - "layout merge from subvolume %s failed", subvol->name); + gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_MERGE_FAILED, + "subvolume=%s", subvol->name, NULL); goto out; } @@ -415,8 +412,7 @@ dht_layout_range_swap(dht_layout_t *layout, int i, int j) layout->list[j].start = start_swap; layout->list[j].stop = stop_swap; } - -int64_t +static int64_t dht_layout_entry_cmp_volname(dht_layout_t *layout, int i, int j) { return (strcmp(layout->list[i].xlator->name, layout->list[j].xlator->name)); @@ -439,7 +435,7 @@ dht_is_subvol_in_layout(dht_layout_t *layout, xlator_t *xlator) return _gf_false; } -int64_t +static int64_t dht_layout_entry_cmp(dht_layout_t *layout, int i, int j) { int64_t diff = 0; @@ -475,7 +471,7 @@ dht_layout_sort(dht_layout_t *layout) return 0; } -int +void dht_layout_sort_volname(dht_layout_t *layout) { int i = 0; @@ -491,8 +487,6 @@ dht_layout_sort_volname(dht_layout_t *layout) dht_layout_entry_swap(layout, i, j); } } - - return 0; } void @@ -625,8 +619,8 @@ dht_layout_normalize(xlator_t *this, loc_t *loc, dht_layout_t *layout) ret = dht_layout_sort(layout); if (ret == -1) { - gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_SORT_FAILED, - "sort failed?! how the ...."); + gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_SORT_FAILED, + NULL); goto out; } @@ -642,10 +636,9 @@ dht_layout_normalize(xlator_t *this, loc_t *loc, dht_layout_t *layout) " gfid = %s", loc->path, gfid); } else { - gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_ANOMALIES_INFO, - "Found anomalies in %s (gfid = %s). " - "Holes=%d overlaps=%d", - loc->path, gfid, holes, overlaps); + gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_ANOMALIES_INFO, + "path=%s", loc->path, "gfid=%s", gfid, "holes=%d", holes, + "overlaps=%d", overlaps, NULL); } ret = -1; } @@ -712,12 +705,11 @@ dht_layout_dir_mismatch(xlator_t *this, dht_layout_t *layout, xlator_t *subvol, if (!xattr) { if (err == 0) { if (loc) { - gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_DICT_GET_FAILED, - "%s: xattr dictionary is NULL", loc->path); + gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_XATTR_DICT_NULL, + "path=%s", loc->path, NULL); } else { - gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_DICT_GET_FAILED, - "path not found: " - "xattr dictionary is NULL"); + gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_XATTR_DICT_NULL, + "path not found", NULL); } ret = -1; } @@ -729,13 +721,13 @@ dht_layout_dir_mismatch(xlator_t *this, dht_layout_t *layout, xlator_t *subvol, if (dict_ret < 0) { if (err == 0 && layout->list[pos].stop) { if (loc) { - gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_DISK_LAYOUT_MISSING, - "%s: Disk layout missing, gfid = %s", loc->path, gfid); + gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_DISK_LAYOUT_MISSING, + "path=%s", loc->path, "gfid=%s", gfid, NULL); } else { - gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_DISK_LAYOUT_MISSING, - "path not found: " - "Disk layout missing, gfid = %s", - gfid); + gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_DISK_LAYOUT_MISSING, + "path not found" + "gfid=%s", + gfid, NULL); } ret = -1; } @@ -751,12 +743,13 @@ dht_layout_dir_mismatch(xlator_t *this, dht_layout_t *layout, xlator_t *subvol, if ((layout->list[pos].start != start_off) || (layout->list[pos].stop != stop_off) || (layout->list[pos].commit_hash != commit_hash)) { - gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LAYOUT_INFO, - "subvol: %s; inode layout: 0x%x - 0x%x, 0x%x; " - "disk layout: 0x%x - 0x%x, 0x%x", - layout->list[pos].xlator->name, layout->list[pos].start, - layout->list[pos].stop, layout->list[pos].commit_hash, start_off, - stop_off, commit_hash); + gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_LAYOUT_INFO, "subvol=%s", + layout->list[pos].xlator->name, "inode-layout:start=0x%x", + layout->list[pos].start, "inode-layout:stop=0x%x", + layout->list[pos].stop, "layout-commit-hash=0x%x; ", + layout->list[pos].commit_hash, "disk-layout:start-off=0x%x", + start_off, "disk-layout:top-off=0x%x", stop_off, + "commit-hash=0x%x", commit_hash, NULL); ret = 1; } else { ret = 0; @@ -778,9 +771,8 @@ dht_layout_preset(xlator_t *this, xlator_t *subvol, inode_t *inode) layout = dht_layout_for_subvol(this, subvol); if (!layout) { - gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_NO_LAYOUT_INFO, - "no pre-set layout for subvolume %s", - subvol ? subvol->name : "<nil>"); + gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_NO_LAYOUT_INFO, + "subvolume=%s", subvol ? subvol->name : "<nil>", NULL); ret = -1; goto out; } diff --git a/xlators/cluster/dht/src/dht-linkfile.c b/xlators/cluster/dht/src/dht-linkfile.c index 2f4e1813be0..89ec6cca56e 100644 --- a/xlators/cluster/dht/src/dht-linkfile.c +++ b/xlators/cluster/dht/src/dht-linkfile.c @@ -11,7 +11,7 @@ #include <glusterfs/compat.h> #include "dht-common.h" -int +static int dht_linkfile_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf, dict_t *xattr, @@ -34,17 +34,16 @@ dht_linkfile_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, is_linkfile = check_is_linkfile(inode, stbuf, xattr, conf->link_xattr_name); if (!is_linkfile) - gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NOT_LINK_FILE_ERROR, - "got non-linkfile %s:%s, gfid = %s", prev->name, local->loc.path, - gfid); + gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NOT_LINK_FILE_ERROR, + "name=%s", prev->name, "path=%s", local->loc.path, "gfid=%s", + gfid, NULL); out: local->linkfile.linkfile_cbk(frame, cookie, this, op_ret, op_errno, inode, stbuf, postparent, postparent, xattr); return 0; } -#define is_equal(a, b) ((a) == (b)) -int +static int dht_linkfile_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf, struct iatt *preparent, @@ -73,9 +72,8 @@ dht_linkfile_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, goto out; ret = dict_set_uint32(xattrs, conf->link_xattr_name, 256); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary value. key : %s", - conf->link_xattr_name); + gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, + "mame=%s", conf->link_xattr_name, NULL); goto out; } @@ -125,27 +123,23 @@ dht_linkfile_create(call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk, ret = dict_set_gfuuid(dict, "gfid-req", local->gfid, true); if (ret) - gf_msg("dht-linkfile", GF_LOG_INFO, 0, DHT_MSG_DICT_SET_FAILED, - "%s: Failed to set dictionary value: " - "key = gfid-req, gfid = %s ", - loc->path, gfid); + gf_smsg("dht-linkfile", GF_LOG_INFO, 0, DHT_MSG_DICT_SET_FAILED, + "path=%s", loc->path, "gfid=%s", gfid, NULL); } else { gf_uuid_unparse(loc->gfid, gfid); } ret = dict_set_str(dict, GLUSTERFS_INTERNAL_FOP_KEY, "yes"); if (ret) - gf_msg("dht-linkfile", GF_LOG_INFO, 0, DHT_MSG_DICT_SET_FAILED, - "%s: Failed to set dictionary value: key = %s," - " gfid = %s", - loc->path, GLUSTERFS_INTERNAL_FOP_KEY, gfid); + gf_smsg("dht-linkfile", GF_LOG_INFO, 0, DHT_MSG_DICT_SET_FAILED, + "path=%s", loc->path, "key=%s", GLUSTERFS_INTERNAL_FOP_KEY, + "gfid=%s", gfid, NULL); ret = dict_set_str(dict, conf->link_xattr_name, tovol->name); if (ret < 0) { - gf_msg(frame->this->name, GF_LOG_INFO, 0, DHT_MSG_CREATE_LINK_FAILED, - "%s: failed to initialize linkfile data, gfid = %s", loc->path, - gfid); + gf_smsg(frame->this->name, GF_LOG_INFO, 0, DHT_MSG_CREATE_LINK_FAILED, + "path=%s", loc->path, "gfid=%s", gfid, NULL); goto out; } @@ -186,10 +180,9 @@ dht_linkfile_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, if (op_ret == -1) { gf_uuid_unparse(local->loc.gfid, gfid); - gf_msg(this->name, GF_LOG_INFO, op_errno, DHT_MSG_UNLINK_FAILED, - "Unlinking linkfile %s (gfid = %s)on " - "subvolume %s failed ", - local->loc.path, gfid, subvol->name); + gf_smsg(this->name, GF_LOG_INFO, op_errno, DHT_MSG_UNLINK_FAILED, + "path=%s", local->loc.path, "gfid=%s", gfid, "subvolume=%s", + subvol->name, NULL); } DHT_STACK_DESTROY(frame); @@ -257,7 +250,7 @@ out: return subvol; } -int +static int dht_linkfile_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, struct iatt *statpre, struct iatt *statpost, dict_t *xdata) @@ -269,10 +262,9 @@ dht_linkfile_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, loc = &local->loc; if (op_ret) - gf_msg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_SETATTR_FAILED, - "Failed to set attr uid/gid on %s" - " :<gfid:%s> ", - (loc->path ? loc->path : "NULL"), uuid_utoa(local->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_SETATTR_FAILED, + "path=%s", (loc->path ? loc->path : "NULL"), "gfid=%s", + uuid_utoa(local->gfid), NULL); DHT_STACK_DESTROY(frame); diff --git a/xlators/cluster/dht/src/dht-lock.c b/xlators/cluster/dht/src/dht-lock.c index f9bac4f97c8..638821ccee5 100644 --- a/xlators/cluster/dht/src/dht-lock.c +++ b/xlators/cluster/dht/src/dht-lock.c @@ -44,7 +44,8 @@ dht_log_lk_array(char *name, gf_loglevel_t log_level, dht_lock_t **lk_array, if (!lk_buf) goto out; - gf_msg(name, log_level, 0, DHT_MSG_LK_ARRAY_INFO, "%d. %s", i, lk_buf); + gf_smsg(name, log_level, 0, DHT_MSG_LK_ARRAY_INFO, "index=%d", i, + "lk_buf=%s", lk_buf, NULL); GF_FREE(lk_buf); } @@ -313,11 +314,9 @@ dht_unlock_entrylk_done(call_frame_t *frame, void *cookie, xlator_t *this, gfid); if (op_ret < 0) { - gf_msg(this->name, GF_LOG_WARNING, op_errno, - DHT_MSG_PARENT_LAYOUT_CHANGED, - "unlock failed on gfid: %s, stale lock might be left " - "in DHT_LAYOUT_HEAL_DOMAIN", - gfid); + gf_smsg(this->name, GF_LOG_WARNING, op_errno, + DHT_MSG_UNLOCK_GFID_FAILED, "gfid=%s", gfid, + "DHT_LAYOUT_HEAL_DOMAIN", NULL); } DHT_STACK_DESTROY(frame); @@ -339,9 +338,10 @@ dht_unlock_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, uuid_utoa_r(local->lock[0].ns.directory_ns.locks[lk_index]->loc.gfid, gfid); if (op_ret < 0) { - gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_UNLOCKING_FAILED, - "unlocking failed on %s:%s", - local->lock[0].ns.directory_ns.locks[lk_index]->xl->name, gfid); + gf_smsg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_UNLOCKING_FAILED, + "name=%s", + local->lock[0].ns.directory_ns.locks[lk_index]->xl->name, + "gfid=%s", gfid, NULL); } else { local->lock[0].ns.directory_ns.locks[lk_index]->locked = 0; } @@ -375,9 +375,9 @@ dht_unlock_entrylk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count, lock_frame = dht_lock_frame(frame); if (lock_frame == NULL) { - gf_msg(frame->this->name, GF_LOG_WARNING, 0, DHT_MSG_UNLOCKING_FAILED, - "cannot allocate a frame, not unlocking following " - "entrylks:"); + gf_smsg(frame->this->name, GF_LOG_WARNING, 0, + DHT_MSG_ALLOC_FRAME_FAILED_NOT_UNLOCKING_FOLLOWING_ENTRYLKS, + NULL); dht_log_lk_array(frame->this->name, GF_LOG_WARNING, lk_array, lk_count); goto done; @@ -385,9 +385,9 @@ dht_unlock_entrylk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count, ret = dht_local_entrylk_init(lock_frame, lk_array, lk_count, entrylk_cbk); if (ret < 0) { - gf_msg(frame->this->name, GF_LOG_WARNING, 0, DHT_MSG_UNLOCKING_FAILED, - "storing locks in local failed, not unlocking " - "following entrylks:"); + gf_smsg(frame->this->name, GF_LOG_WARNING, 0, + DHT_MSG_LOCAL_LOCKS_STORE_FAILED_UNLOCKING_FOLLOWING_ENTRYLK, + NULL); dht_log_lk_array(frame->this->name, GF_LOG_WARNING, lk_array, lk_count); @@ -446,21 +446,17 @@ dht_unlock_entrylk_wrapper(call_frame_t *frame, dht_elock_wrap_t *entrylk) lock_frame = copy_frame(frame); if (lock_frame == NULL) { - gf_msg(frame->this->name, GF_LOG_WARNING, ENOMEM, - DHT_MSG_PARENT_LAYOUT_CHANGED, - "mkdir (%s/%s) (path: %s): " - "copy frame failed", - pgfid, local->loc.name, local->loc.path); + gf_smsg(frame->this->name, GF_LOG_WARNING, ENOMEM, + DHT_MSG_COPY_FRAME_FAILED, "pgfid=%s", pgfid, "name=%s", + local->loc.name, "path=%s", local->loc.path, NULL); goto done; } lock_local = dht_local_init(lock_frame, NULL, NULL, 0); if (lock_local == NULL) { - gf_msg(frame->this->name, GF_LOG_WARNING, ENOMEM, - DHT_MSG_PARENT_LAYOUT_CHANGED, - "mkdir (%s/%s) (path: %s): " - "local creation failed", - pgfid, local->loc.name, local->loc.path); + gf_smsg(frame->this->name, GF_LOG_WARNING, ENOMEM, + DHT_MSG_CREATE_FAILED, "local", "pgfid=%s", pgfid, "name=%s", + local->loc.name, "path=%s", local->loc.path, NULL); goto done; } @@ -700,9 +696,10 @@ dht_unlock_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, uuid_utoa_r(local->lock[0].layout.my_layout.locks[lk_index]->loc.gfid, gfid); - gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_UNLOCKING_FAILED, - "unlocking failed on %s:%s", - local->lock[0].layout.my_layout.locks[lk_index]->xl->name, gfid); + gf_smsg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_UNLOCKING_FAILED, + "name=%s", + local->lock[0].layout.my_layout.locks[lk_index]->xl->name, + "gfid=%s", gfid, NULL); } else { local->lock[0].layout.my_layout.locks[lk_index]->locked = 0; } @@ -727,11 +724,9 @@ dht_unlock_inodelk_done(call_frame_t *frame, void *cookie, xlator_t *this, gfid); if (op_ret < 0) { - gf_msg(this->name, GF_LOG_WARNING, op_errno, - DHT_MSG_PARENT_LAYOUT_CHANGED, - "unlock failed on gfid: %s, stale lock might be left " - "in DHT_LAYOUT_HEAL_DOMAIN", - gfid); + gf_smsg(this->name, GF_LOG_WARNING, op_errno, + DHT_MSG_UNLOCK_GFID_FAILED, "DHT_LAYOUT_HEAL_DOMAIN gfid=%s", + gfid, NULL); } DHT_STACK_DESTROY(frame); @@ -762,9 +757,9 @@ dht_unlock_inodelk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count, lock_frame = dht_lock_frame(frame); if (lock_frame == NULL) { - gf_msg(frame->this->name, GF_LOG_WARNING, 0, DHT_MSG_UNLOCKING_FAILED, - "cannot allocate a frame, not unlocking following " - "locks:"); + gf_smsg(frame->this->name, GF_LOG_WARNING, 0, + DHT_MSG_ALLOC_FRAME_FAILED_NOT_UNLOCKING_FOLLOWING_ENTRYLKS, + NULL); dht_log_lk_array(frame->this->name, GF_LOG_WARNING, lk_array, lk_count); goto done; @@ -772,9 +767,9 @@ dht_unlock_inodelk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count, ret = dht_local_inodelk_init(lock_frame, lk_array, lk_count, inodelk_cbk); if (ret < 0) { - gf_msg(frame->this->name, GF_LOG_WARNING, 0, DHT_MSG_UNLOCKING_FAILED, - "storing locks in local failed, not unlocking " - "following locks:"); + gf_smsg(frame->this->name, GF_LOG_WARNING, 0, + DHT_MSG_LOCAL_LOCKS_STORE_FAILED_UNLOCKING_FOLLOWING_ENTRYLK, + NULL); dht_log_lk_array(frame->this->name, GF_LOG_WARNING, lk_array, lk_count); @@ -834,21 +829,17 @@ dht_unlock_inodelk_wrapper(call_frame_t *frame, dht_ilock_wrap_t *inodelk) lock_frame = copy_frame(frame); if (lock_frame == NULL) { - gf_msg(frame->this->name, GF_LOG_WARNING, ENOMEM, - DHT_MSG_PARENT_LAYOUT_CHANGED, - "mkdir (%s/%s) (path: %s): " - "copy frame failed", - pgfid, local->loc.name, local->loc.path); + gf_smsg(frame->this->name, GF_LOG_WARNING, ENOMEM, + DHT_MSG_COPY_FRAME_FAILED, "pgfid=%s", pgfid, "name=%s", + local->loc.name, "path=%s", local->loc.path, NULL); goto done; } lock_local = dht_local_init(lock_frame, NULL, NULL, 0); if (lock_local == NULL) { - gf_msg(frame->this->name, GF_LOG_WARNING, ENOMEM, - DHT_MSG_PARENT_LAYOUT_CHANGED, - "mkdir (%s/%s) (path: %s): " - "local creation failed", - pgfid, local->loc.name, local->loc.path); + gf_smsg(frame->this->name, GF_LOG_WARNING, ENOMEM, + DHT_MSG_CREATE_FAILED, "local", "gfid=%s", pgfid, "name=%s", + local->loc.name, "path=%s", local->loc.path, NULL); goto done; } @@ -1039,13 +1030,12 @@ dht_blocking_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, gfid); local->lock[0].layout.my_layout.op_ret = -1; local->lock[0].layout.my_layout.op_errno = op_errno; - gf_msg(this->name, GF_LOG_ERROR, op_errno, - DHT_MSG_INODELK_FAILED, - "inodelk failed on subvol %s. gfid:%s", - local->lock[0] - .layout.my_layout.locks[lk_index] - ->xl->name, - gfid); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + DHT_MSG_INODELK_FAILED, "subvol=%s", + local->lock[0] + .layout.my_layout.locks[lk_index] + ->xl->name, + "gfid=%s", gfid, NULL); goto cleanup; } break; @@ -1060,13 +1050,12 @@ dht_blocking_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, gfid); local->lock[0].layout.my_layout.op_ret = -1; local->lock[0].layout.my_layout.op_errno = op_errno; - gf_msg(this->name, GF_LOG_ERROR, op_errno, - DHT_MSG_INODELK_FAILED, - "inodelk failed on subvol %s. gfid:%s", - local->lock[0] - .layout.my_layout.locks[lk_index] - ->xl->name, - gfid); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + DHT_MSG_INODELK_FAILED, "subvol=%s", + local->lock[0] + .layout.my_layout.locks[lk_index] + ->xl->name, + "gfid=%s", gfid, NULL); goto cleanup; } break; @@ -1077,11 +1066,11 @@ dht_blocking_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, gfid); local->lock[0].layout.my_layout.op_ret = -1; local->lock[0].layout.my_layout.op_errno = op_errno; - gf_msg( + gf_smsg( this->name, GF_LOG_ERROR, op_errno, DHT_MSG_INODELK_FAILED, - "inodelk failed on subvol %s, gfid:%s", + "subvol=%s", local->lock[0].layout.my_layout.locks[lk_index]->xl->name, - gfid); + "gfid=%s", gfid, NULL); goto cleanup; } } @@ -1153,19 +1142,16 @@ dht_blocking_inodelk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count, lock_frame = dht_lock_frame(frame); if (lock_frame == NULL) { gf_uuid_unparse(tmp_local->loc.gfid, gfid); - gf_msg("dht", GF_LOG_ERROR, ENOMEM, DHT_MSG_LOCK_FRAME_FAILED, - "memory allocation failed for lock_frame. gfid:%s" - " path:%s", - gfid, tmp_local->loc.path); + gf_smsg("dht", GF_LOG_ERROR, ENOMEM, DHT_MSG_LOCK_FRAME_FAILED, + "gfid=%s", gfid, "path=%s", tmp_local->loc.path, NULL); goto out; } ret = dht_local_inodelk_init(lock_frame, lk_array, lk_count, inodelk_cbk); if (ret < 0) { gf_uuid_unparse(tmp_local->loc.gfid, gfid); - gf_msg("dht", GF_LOG_ERROR, ENOMEM, DHT_MSG_LOCAL_LOCK_INIT_FAILED, - "dht_local_lock_init failed, gfid: %s path:%s", gfid, - tmp_local->loc.path); + gf_smsg("dht", GF_LOG_ERROR, ENOMEM, DHT_MSG_LOCAL_LOCK_INIT_FAILED, + "gfid=%s", gfid, "path=%s", tmp_local->loc.path, NULL); goto out; } @@ -1246,11 +1232,10 @@ dht_blocking_entrylk_after_inodelk(call_frame_t *frame, void *cookie, if (ret < 0) { local->op_ret = -1; local->op_errno = EIO; - gf_msg(this->name, GF_LOG_WARNING, local->op_errno, - DHT_MSG_ENTRYLK_ERROR, - "%s (%s/%s): " - "dht_blocking_entrylk failed after taking inodelk", - gf_fop_list[local->fop], pgfid, entrylk->locks[0]->basename); + gf_smsg(this->name, GF_LOG_WARNING, local->op_errno, + DHT_MSG_ENTRYLK_FAILED_AFT_INODELK, "fop=%s", + gf_fop_list[local->fop], "pgfid=%s", pgfid, "basename=%s", + entrylk->locks[0]->basename, NULL); goto err; } @@ -1310,10 +1295,9 @@ dht_protect_namespace(call_frame_t *frame, loc_t *loc, xlator_t *subvol, ret = dht_build_parent_loc(this, &parent, loc, &op_errno); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_LOC_FAILED, - "gfid:%s (name:%s) (path: %s): " - "parent loc build failed", - loc->gfid, loc->name, loc->path); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_LOC_FAILED, + "gfid=%s", loc->gfid, "name=%s", loc->name, "path=%s", + loc->path, NULL); goto out; } gf_uuid_unparse(parent.gfid, pgfid); @@ -1322,10 +1306,10 @@ dht_protect_namespace(call_frame_t *frame, loc_t *loc, xlator_t *subvol, inodelk->locks = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_pointer); if (inodelk->locks == NULL) { local->op_errno = ENOMEM; - gf_msg(this->name, GF_LOG_WARNING, local->op_errno, DHT_MSG_NO_MEMORY, - "%s (%s/%s) (path: %s): " - "calloc failure", - gf_fop_list[local->fop], pgfid, loc->name, loc->path); + gf_smsg(this->name, GF_LOG_WARNING, local->op_errno, + DHT_MSG_CALLOC_FAILED, "fop=%s", gf_fop_list[local->fop], + "pgfid=%s", pgfid, "name=%s", loc->name, "path=%s", loc->path, + NULL); goto out; } @@ -1334,10 +1318,10 @@ dht_protect_namespace(call_frame_t *frame, loc_t *loc, xlator_t *subvol, FAIL_ON_ANY_ERROR); if (inodelk->locks[0] == NULL) { local->op_errno = ENOMEM; - gf_msg(this->name, GF_LOG_WARNING, local->op_errno, DHT_MSG_NO_MEMORY, - "%s (%s/%s) (path: %s): " - "inodelk: lock allocation failed", - gf_fop_list[local->fop], pgfid, loc->name, loc->path); + gf_smsg(this->name, GF_LOG_WARNING, local->op_errno, + DHT_MSG_LOCK_ALLOC_FAILED, "inodelk-fop=%s", + gf_fop_list[local->fop], "pgfid=%s", pgfid, "name=%s", + loc->name, "path=%s", loc->path, NULL); goto err; } inodelk->lk_count = count; @@ -1346,10 +1330,10 @@ dht_protect_namespace(call_frame_t *frame, loc_t *loc, xlator_t *subvol, entrylk->locks = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_pointer); if (entrylk->locks == NULL) { local->op_errno = ENOMEM; - gf_msg(this->name, GF_LOG_WARNING, local->op_errno, DHT_MSG_NO_MEMORY, - "%s (%s/%s) (path: %s): " - "entrylk: calloc failure", - gf_fop_list[local->fop], pgfid, loc->name, loc->path); + gf_smsg(this->name, GF_LOG_WARNING, local->op_errno, + DHT_MSG_CALLOC_FAILED, "entrylk-fop=%s", + gf_fop_list[local->fop], "pgfid=%s", pgfid, "name=%s", + loc->name, "path=%s", loc->path, NULL); goto err; } @@ -1359,10 +1343,10 @@ dht_protect_namespace(call_frame_t *frame, loc_t *loc, xlator_t *subvol, FAIL_ON_ANY_ERROR); if (entrylk->locks[0] == NULL) { local->op_errno = ENOMEM; - gf_msg(this->name, GF_LOG_WARNING, local->op_errno, DHT_MSG_NO_MEMORY, - "%s (%s/%s) (path: %s): " - "entrylk: lock allocation failed", - gf_fop_list[local->fop], pgfid, loc->name, loc->path); + gf_smsg(this->name, GF_LOG_WARNING, local->op_errno, + DHT_MSG_LOCK_ALLOC_FAILED, "entrylk-fop=%s", + gf_fop_list[local->fop], "pgfid=%s", pgfid, "name=%s", + loc->name, "path=%s", loc->path, NULL); goto err; } @@ -1376,11 +1360,11 @@ dht_protect_namespace(call_frame_t *frame, loc_t *loc, xlator_t *subvol, dht_blocking_entrylk_after_inodelk); if (ret < 0) { local->op_errno = EIO; - gf_msg(this->name, GF_LOG_WARNING, local->op_errno, - DHT_MSG_INODELK_ERROR, - "%s (%s/%s) (path: %s): " - "dht_blocking_inodelk failed", - gf_fop_list[local->fop], pgfid, loc->name, loc->path); + gf_smsg(this->name, GF_LOG_WARNING, local->op_errno, + DHT_MSG_BLOCK_INODELK_FAILED, "fop=%s", gf_fop_list[local->fop], + "pgfid=%s", pgfid, "name=%s", loc->name, "path=%s", loc->path, + NULL); + goto err; } diff --git a/xlators/cluster/dht/src/dht-mem-types.h b/xlators/cluster/dht/src/dht-mem-types.h index 33f9832395b..e3c4471334a 100644 --- a/xlators/cluster/dht/src/dht-mem-types.h +++ b/xlators/cluster/dht/src/dht-mem-types.h @@ -30,10 +30,7 @@ enum gf_dht_mem_types_ { gf_dht_mt_container_t, gf_dht_mt_octx_t, gf_dht_mt_miginfo_t, - gf_tier_mt_bricklist_t, - gf_tier_mt_ipc_ctr_params_t, gf_dht_mt_fd_ctx_t, - gf_tier_mt_qfile_array_t, gf_dht_ret_cache_t, gf_dht_nodeuuids_t, gf_dht_mt_end diff --git a/xlators/cluster/dht/src/dht-messages.h b/xlators/cluster/dht/src/dht-messages.h index afc7460151b..601f8dad78b 100644 --- a/xlators/cluster/dht/src/dht-messages.h +++ b/xlators/cluster/dht/src/dht-messages.h @@ -38,12 +38,11 @@ GLFS_MSGID( DHT_MSG_REBALANCE_STATUS, DHT_MSG_REBALANCE_STOPPED, DHT_MSG_RENAME_FAILED, DHT_MSG_SETATTR_FAILED, DHT_MSG_SUBVOL_INSUFF_INODES, DHT_MSG_SUBVOL_INSUFF_SPACE, DHT_MSG_UNLINK_FAILED, - DHT_MSG_LAYOUT_SET_FAILED, DHT_MSG_LOG_FIXED_LAYOUT, DHT_MSG_LOG_TIER_ERROR, - DHT_MSG_LOG_TIER_STATUS, DHT_MSG_GET_XATTR_FAILED, - DHT_MSG_FILE_LOOKUP_FAILED, DHT_MSG_OPEN_FD_FAILED, - DHT_MSG_SET_INODE_CTX_FAILED, DHT_MSG_UNLOCKING_FAILED, - DHT_MSG_DISK_LAYOUT_NULL, DHT_MSG_SUBVOL_INFO, DHT_MSG_CHUNK_SIZE_INFO, - DHT_MSG_LAYOUT_FORM_FAILED, DHT_MSG_SUBVOL_ERROR, + DHT_MSG_LAYOUT_SET_FAILED, DHT_MSG_LOG_FIXED_LAYOUT, + DHT_MSG_GET_XATTR_FAILED, DHT_MSG_FILE_LOOKUP_FAILED, + DHT_MSG_OPEN_FD_FAILED, DHT_MSG_SET_INODE_CTX_FAILED, + DHT_MSG_UNLOCKING_FAILED, DHT_MSG_DISK_LAYOUT_NULL, DHT_MSG_SUBVOL_INFO, + DHT_MSG_CHUNK_SIZE_INFO, DHT_MSG_LAYOUT_FORM_FAILED, DHT_MSG_SUBVOL_ERROR, DHT_MSG_LAYOUT_SORT_FAILED, DHT_MSG_REGEX_INFO, DHT_MSG_FOPEN_FAILED, DHT_MSG_SET_HOSTNAME_FAILED, DHT_MSG_BRICK_ERROR, DHT_MSG_SYNCOP_FAILED, DHT_MSG_MIGRATE_INFO, DHT_MSG_SOCKET_ERROR, DHT_MSG_CREATE_FD_FAILED, @@ -69,8 +68,7 @@ GLFS_MSGID( DHT_MSG_INIT_LOCAL_SUBVOL_FAILED, DHT_MSG_SYS_CALL_GET_TIME_FAILED, DHT_MSG_NO_DISK_USAGE_STATUS, DHT_MSG_SUBVOL_DOWN_ERROR, DHT_MSG_REBAL_THROTTLE_INFO, DHT_MSG_COMMIT_HASH_INFO, - DHT_MSG_REBAL_STRUCT_SET, DHT_MSG_HAS_MIGINFO, DHT_MSG_LOG_IPC_TIER_ERROR, - DHT_MSG_TIER_PAUSED, DHT_MSG_TIER_RESUME, DHT_MSG_SETTLE_HASH_FAILED, + DHT_MSG_REBAL_STRUCT_SET, DHT_MSG_HAS_MIGINFO, DHT_MSG_SETTLE_HASH_FAILED, DHT_MSG_DEFRAG_PROCESS_DIR_FAILED, DHT_MSG_FD_CTX_SET_FAILED, DHT_MSG_STALE_LOOKUP, DHT_MSG_PARENT_LAYOUT_CHANGED, DHT_MSG_LOCK_MIGRATION_FAILED, DHT_MSG_LOCK_INODE_UNREF_FAILED, @@ -79,6 +77,310 @@ GLFS_MSGID( DHT_MSG_ENTRYLK_ERROR, DHT_MSG_INODELK_ERROR, DHT_MSG_LOC_FAILED, DHT_MSG_UNKNOWN_FOP, DHT_MSG_MIGRATE_FILE_SKIPPED, DHT_MSG_DIR_XATTR_HEAL_FAILED, DHT_MSG_HASHED_SUBVOL_DOWN, - DHT_MSG_NON_HASHED_SUBVOL_DOWN); + DHT_MSG_NON_HASHED_SUBVOL_DOWN, DHT_MSG_SYNCTASK_CREATE_FAILED, + DHT_MSG_DIR_HEAL_ABORT, DHT_MSG_MIGRATE_SKIP, DHT_MSG_FD_CREATE_FAILED, + DHT_MSG_DICT_NEW_FAILED, DHT_MSG_FAILED_TO_OPEN, DHT_MSG_CREATE_FAILED, + DHT_MSG_FILE_NOT_EXIST, DHT_MSG_CHOWN_FAILED, DHT_MSG_FALLOCATE_FAILED, + DHT_MSG_FTRUNCATE_FAILED, DHT_MSG_STATFS_FAILED, DHT_MSG_WRITE_CROSS, + DHT_MSG_NEW_TARGET_FOUND, DHT_MSG_INSUFF_MEMORY, DHT_MSG_SET_XATTR_FAILED, + DHT_MSG_SET_MODE_FAILED, DHT_MSG_FILE_EXISTS_IN_DEST, + DHT_MSG_SYMLINK_FAILED, DHT_MSG_LINKFILE_DEL_FAILED, DHT_MSG_MKNOD_FAILED, + DHT_MSG_MIGRATE_CLEANUP_FAILED, DHT_MSG_LOCK_MIGRATE, + DHT_MSG_PARENT_BUILD_FAILED, DHT_MSG_HASHED_SUBVOL_NOT_FOUND, + DHT_MSG_ACQUIRE_ENTRYLK_FAILED, DHT_MSG_CREATE_DST_FAILED, + DHT_MSG_MIGRATION_EXIT, DHT_MSG_CHANGED_DST, DHT_MSG_TRACE_FAILED, + DHT_MSG_WRITE_LOCK_FAILED, DHT_MSG_GETACTIVELK_FAILED, DHT_MSG_STAT_FAILED, + DHT_MSG_UNLINK_PERFORM_FAILED, DHT_MSG_CLANUP_SOURCE_FILE_FAILED, + DHT_MSG_UNLOCK_FILE_FAILED, DHT_MSG_REMOVE_XATTR_FAILED, + DHT_MSG_DATA_MIGRATE_ABORT, DHT_MSG_DEFRAG_NULL, DHT_MSG_PARENT_NULL, + DHT_MSG_GFID_NOT_PRESENT, DHT_MSG_CHILD_LOC_FAILED, + DHT_MSG_SET_LOOKUP_FAILED, DHT_MSG_DIR_REMOVED, DHT_MSG_FIX_NOT_COMP, + DHT_MSG_SUBVOL_DETER_FAILED, DHT_MSG_LOCAL_SUBVOL, DHT_MSG_NODE_UUID, + DHT_MSG_SIZE_FILE, DHT_MSG_GET_DATA_SIZE_FAILED, + DHT_MSG_PTHREAD_JOIN_FAILED, DHT_MSG_COUNTER_THREAD_CREATE_FAILED, + DHT_MSG_MIGRATION_INIT_QUEUE_FAILED, DHT_MSG_PAUSED_TIMEOUT, DHT_MSG_WOKE, + DHT_MSG_ABORT_REBALANCE, DHT_MSG_CREATE_TASK_REBAL_FAILED, + DHT_MSG_REBAL_ESTIMATE_NOT_AVAIL, DHT_MSG_ADD_CHOICES_ERROR, + DHT_MSG_GET_CHOICES_ERROR, DHT_MSG_PREPARE_STATUS_ERROR, + DHT_MSG_SET_CHOICE_FAILED, DHT_MSG_SET_HASHED_SUBVOL_FAILED, + DHT_MSG_XATTR_HEAL_NOT_POSS, DHT_MSG_LINKTO_FILE_FAILED, + DHT_MSG_STALE_LINKFILE_DELETE, DHT_MSG_NO_SUBVOL_FOR_LINKTO, + DHT_MSG_SUBVOL_RETURNED, DHT_MSG_UNKNOWN_LOCAL_XSEL, DHT_MSG_GET_XATTR_ERR, + DHT_MSG_ALLOC_OR_FILL_FAILED, DHT_MSG_GET_REAL_NAME_FAILED, + DHT_MSG_COPY_UUID_FAILED, DHT_MSG_MDS_DETER_FAILED, + DHT_MSG_CREATE_REBAL_FAILED, DHT_MSG_LINK_LAYOUT_FAILED, + DHT_MSG_NO_SUBVOL_IN_LAYOUT, DHT_MSG_MEM_ALLOC_FAILED, + DHT_MSG_SET_IN_PARAMS_DICT_FAILED, DHT_MSG_LOC_COPY_FAILED, + DHT_MSG_PARENT_LOC_FAILED, DHT_MSG_CREATE_LOCK_FAILED, + DHT_MSG_PREV_ATTEMPT_FAILED, DHT_MSG_REFRESH_ATTEMPT, + DHT_MSG_ACQUIRE_LOCK_FAILED, DHT_MSG_CREATE_STUB_FAILED, + DHT_MSG_WIND_LOCK_REQ_FAILED, DHT_MSG_REFRESH_FAILED, + DHT_MSG_CACHED_SUBVOL_ERROR, DHT_MSG_NO_LINK_SUBVOL, DHT_MSG_SET_KEY_FAILED, + DHT_MSG_REMOVE_LINKTO_FAILED, DHT_MSG_LAYOUT_DICT_SET_FAILED, + DHT_MSG_XATTR_DICT_NULL, DHT_MSG_DUMMY_ALLOC_FAILED, DHT_MSG_DICT_IS_NULL, + DHT_MSG_LINK_INODE_FAILED, DHT_MSG_SELFHEAL_FAILED, DHT_MSG_NO_MDS_SUBVOL, + DHT_MSG_LIST_XATTRS_FAILED, DHT_MSG_RESET_INTER_XATTR_FAILED, + DHT_MSG_MDS_DOWN_UNABLE_TO_SET, DHT_MSG_WIND_UNLOCK_FAILED, + DHT_MSG_COMMIT_HASH_FAILED, DHT_MSG_UNLOCK_GFID_FAILED, + DHT_MSG_UNLOCK_FOLLOW_ENTRYLK, DHT_MSG_COPY_FRAME_FAILED, + DHT_MSG_UNLOCK_FOLLOW_LOCKS, DHT_MSG_ENTRYLK_FAILED_AFT_INODELK, + DHT_MSG_CALLOC_FAILED, DHT_MSG_LOCK_ALLOC_FAILED, + DHT_MSG_BLOCK_INODELK_FAILED, + DHT_MSG_LOCAL_LOCKS_STORE_FAILED_UNLOCKING_FOLLOWING_ENTRYLK, + DHT_MSG_ALLOC_FRAME_FAILED_NOT_UNLOCKING_FOLLOWING_ENTRYLKS, + DHT_MSG_DST_NULL_SET_FAILED); + +#define DHT_MSG_FD_CTX_SET_FAILED_STR "Failed to set fd ctx" +#define DHT_MSG_INVALID_VALUE_STR "Different dst found in the fd ctx" +#define DHT_MSG_UNKNOWN_FOP_STR "Unknown FOP on file" +#define DHT_MSG_OPEN_FD_ON_DST_FAILED_STR "Failed to open the fd on file" +#define DHT_MSG_SYNCTASK_CREATE_FAILED_STR "Failed to create synctask" +#define DHT_MSG_ASPRINTF_FAILED_STR \ + "asprintf failed while fetching subvol from the id" +#define DHT_MSG_HAS_MIGINFO_STR "Found miginfo in the inode ctx" +#define DHT_MSG_FILE_LOOKUP_FAILED_STR "failed to lookup the file" +#define DHT_MSG_INVALID_LINKFILE_STR \ + "linkto target is different from cached-subvol. treating as destination " \ + "subvol" +#define DHT_MSG_GFID_MISMATCH_STR "gfid different on the target file" +#define DHT_MSG_GET_XATTR_FAILED_STR "failed to get 'linkto' xattr" +#define DHT_MSG_SET_INODE_CTX_FAILED_STR "failed to set inode-ctx target file" +#define DHT_MSG_DIR_SELFHEAL_FAILED_STR "Healing of path failed" +#define DHT_MSG_DIR_HEAL_ABORT_STR \ + "Failed to get path from subvol. Aborting directory healing" +#define DHT_MSG_DIR_XATTR_HEAL_FAILED_STR "xattr heal failed for directory" +#define DHT_MSG_LOCK_INODE_UNREF_FAILED_STR \ + "Found a NULL inode. Failed to unref the inode" +#define DHT_MSG_DICT_SET_FAILED_STR "Failed to set dictionary value" +#define DHT_MSG_NOT_LINK_FILE_ERROR_STR "got non-linkfile" +#define DHT_MSG_CREATE_LINK_FAILED_STR "failed to initialize linkfile data" +#define DHT_MSG_UNLINK_FAILED_STR "Unlinking linkfile on subvolume failed" +#define DHT_MSG_MIGRATE_FILE_FAILED_STR "Migrate file failed" +#define DHT_MSG_NO_MEMORY_STR "could not allocate memory for dict" +#define DHT_MSG_SUBVOL_ERROR_STR "Failed to get linkto subvol" +#define DHT_MSG_MIGRATE_HARDLINK_FILE_FAILED_STR "link failed on subvol" +#define DHT_MSG_MIGRATE_FILE_SKIPPED_STR "Migration skipped" +#define DHT_MSG_FD_CREATE_FAILED_STR "fd create failed" +#define DHT_MSG_DICT_NEW_FAILED_STR "dict_new failed" +#define DHT_MSG_FAILED_TO_OPEN_STR "failed to open" +#define DHT_MSG_CREATE_FAILED_STR "failed to create" +#define DHT_MSG_FILE_NOT_EXIST_STR "file does not exist" +#define DHT_MSG_CHOWN_FAILED_STR "chown failed" +#define DHT_MSG_FALLOCATE_FAILED_STR "fallocate failed" +#define DHT_MSG_FTRUNCATE_FAILED_STR "ftruncate failed" +#define DHT_MSG_STATFS_FAILED_STR "failed to get statfs" +#define DHT_MSG_WRITE_CROSS_STR \ + "write will cross min-fre-disk for file on subvol. looking for new subvol" +#define DHT_MSG_SUBVOL_INSUFF_SPACE_STR \ + "Could not find any subvol with space accommodating the file. Cosider " \ + "adding bricks" +#define DHT_MSG_NEW_TARGET_FOUND_STR "New target found for file" +#define DHT_MSG_INSUFF_MEMORY_STR "insufficient memory" +#define DHT_MSG_SET_XATTR_FAILED_STR "failed to set xattr" +#define DHT_MSG_SET_MODE_FAILED_STR "failed to set mode" +#define DHT_MSG_FILE_EXISTS_IN_DEST_STR "file exists in destination" +#define DHT_MSG_LINKFILE_DEL_FAILED_STR "failed to delete the linkfile" +#define DHT_MSG_SYMLINK_FAILED_STR "symlink failed" +#define DHT_MSG_MKNOD_FAILED_STR "mknod failed" +#define DHT_MSG_SETATTR_FAILED_STR "failed to perform setattr" +#define DHT_MSG_MIGRATE_CLEANUP_FAILED_STR \ + "Migrate file cleanup failed: failed to fstat file" +#define DHT_MSG_LOCK_MIGRATE_STR "locks will be migrated for file" +#define DHT_MSG_PARENT_BUILD_FAILED_STR \ + "failed to build parent loc, which is needed to acquire entrylk to " \ + "synchronize with renames on this path. Skipping migration" +#define DHT_MSG_HASHED_SUBVOL_NOT_FOUND_STR \ + "cannot find hashed subvol which is needed to synchronize with renames " \ + "on this path. Skipping migration" +#define DHT_MSG_ACQUIRE_ENTRYLK_FAILED_STR "failed to acquire entrylk on subvol" +#define DHT_MSG_CREATE_DST_FAILED_STR "create dst failed for file" +#define DHT_MSG_MIGRATION_EXIT_STR "Exiting migration" +#define DHT_MSG_CHANGED_DST_STR "destination changed fo file" +#define DHT_MSG_TRACE_FAILED_STR "Trace failed" +#define DHT_MSG_WRITE_LOCK_FAILED_STR "write lock failed" +#define DHT_MSG_GETACTIVELK_FAILED_STR "getactivelk failed for file" +#define DHT_MSG_STAT_FAILED_STR "failed to do a stat" +#define DHT_MSG_UNLINK_PERFORM_FAILED_STR "failed to perform unlink" +#define DHT_MSG_MIGRATE_FILE_COMPLETE_STR "completed migration" +#define DHT_MSG_CLANUP_SOURCE_FILE_FAILED_STR "failed to cleanup source file" +#define DHT_MSG_UNLOCK_FILE_FAILED_STR "failed to unlock file" +#define DHT_MSG_REMOVE_XATTR_FAILED_STR "remove xattr failed" +#define DHT_MSG_SOCKET_ERROR_STR "Failed to unlink listener socket" +#define DHT_MSG_HASHED_SUBVOL_GET_FAILED_STR "Failed to get hashed subvolume" +#define DHT_MSG_CACHED_SUBVOL_GET_FAILED_STR "Failed to get cached subvolume" +#define DHT_MSG_MIGRATE_DATA_FAILED_STR "migrate-data failed" +#define DHT_MSG_DEFRAG_NULL_STR "defrag is NULL" +#define DHT_MSG_DATA_MIGRATE_ABORT_STR \ + "Readdirp failed. Aborting data migration for dict" +#define DHT_MSG_LAYOUT_FIX_FAILED_STR "fix layout failed" +#define DHT_MSG_PARENT_NULL_STR "parent is NULL" +#define DHT_MSG_GFID_NOT_PRESENT_STR "gfid not present" +#define DHT_MSG_CHILD_LOC_FAILED_STR "Child loc build failed" +#define DHT_MSG_SET_LOOKUP_FAILED_STR "Failed to set lookup" +#define DHT_MSG_DIR_LOOKUP_FAILED_STR "lookup failed" +#define DHT_MSG_DIR_REMOVED_STR "Dir renamed or removed. Skipping" +#define DHT_MSG_READDIR_ERROR_STR "readdir failed, Aborting fix-layout" +#define DHT_MSG_SETTLE_HASH_FAILED_STR "Settle hash failed" +#define DHT_MSG_DEFRAG_PROCESS_DIR_FAILED_STR "gf_defrag_process_dir failed" +#define DHT_MSG_FIX_NOT_COMP_STR \ + "Unable to retrieve fixlayout xattr. Assume background fix layout not " \ + "complete" +#define DHT_MSG_SUBVOL_DETER_FAILED_STR \ + "local subvolume determination failed with error" +#define DHT_MSG_LOCAL_SUBVOL_STR "local subvol" +#define DHT_MSG_NODE_UUID_STR "node uuid" +#define DHT_MSG_SIZE_FILE_STR "Total size files" +#define DHT_MSG_GET_DATA_SIZE_FAILED_STR \ + "Failed to get the total data size. Unable to estimate time to complete " \ + "rebalance" +#define DHT_MSG_PTHREAD_JOIN_FAILED_STR \ + "file_counter_thread: pthread_join failed" +#define DHT_MSG_COUNTER_THREAD_CREATE_FAILED_STR \ + "Failed to create the file counter thread" +#define DHT_MSG_MIGRATION_INIT_QUEUE_FAILED_STR \ + "Failed to initialise migration queue" +#define DHT_MSG_REBALANCE_STOPPED_STR "Received stop command on rebalance" +#define DHT_MSG_PAUSED_TIMEOUT_STR "Request pause timer timeout" +#define DHT_MSG_WOKE_STR "woken" +#define DHT_MSG_ABORT_REBALANCE_STR "Aborting rebalance" +#define DHT_MSG_REBALANCE_START_FAILED_STR \ + "Failed to start rebalance: look up on / failed" +#define DHT_MSG_CREATE_TASK_REBAL_FAILED_STR \ + "Could not create task for rebalance" +#define DHT_MSG_REBAL_ESTIMATE_NOT_AVAIL_STR \ + "Rebalance estimates will not be available" +#define DHT_MSG_REBALANCE_STATUS_STR "Rebalance status" +#define DHT_MSG_DATA_NULL_STR "data value is NULL" +#define DHT_MSG_ADD_CHOICES_ERROR_STR "Error to add choices in buffer" +#define DHT_MSG_GET_CHOICES_ERROR_STR "Error to get choices" +#define DHT_MSG_PREPARE_STATUS_ERROR_STR "Error to prepare status" +#define DHT_MSG_SET_CHOICE_FAILED_STR "Failed to set full choice" +#define DHT_MSG_AGGREGATE_QUOTA_XATTR_FAILED_STR \ + "Failed to aggregate quota xattr" +#define DHT_MSG_FILE_TYPE_MISMATCH_STR \ + "path exists as a file on one subvolume and directory on another. Please " \ + "fix it manually" +#define DHT_MSG_LAYOUT_SET_FAILED_STR "failed to set layout for subvolume" +#define DHT_MSG_LAYOUT_MERGE_FAILED_STR "failed to merge layouts for subvolume" +#define DHT_MSG_SET_HASHED_SUBVOL_FAILED_STR "Failed to set hashed subvolume" +#define DHT_MSG_XATTR_HEAL_NOT_POSS_STR \ + "No gfid exists for path. so healing xattr is not possible" +#define DHT_MSG_REVALIDATE_CBK_INFO_STR "Revalidate: subvolume returned -1" +#define DHT_MSG_LAYOUT_MISMATCH_STR "Mismatching layouts" +#define DHT_MSG_UNLINK_LOOKUP_INFO_STR "lookup_unlink retuened" +#define DHT_MSG_LINKTO_FILE_FAILED_STR \ + "Could not unlink the linkto file as either fd is open and/or linkto " \ + "xattr is set" +#define DHT_MSG_LAYOUT_PRESET_FAILED_STR \ + "Could not set pre-set layout for subvolume" +#define DHT_MSG_FILE_ON_MULT_SUBVOL_STR \ + "multiple subvolumes have file (preferably rename the file in the " \ + "backend, and do a fresh lookup" +#define DHT_MSG_STALE_LINKFILE_DELETE_STR \ + "attempting deletion of stale linkfile" +#define DHT_MSG_LINK_FILE_LOOKUP_INFO_STR "Lookup on following linkfile" +#define DHT_MSG_NO_SUBVOL_FOR_LINKTO_STR "No link subvolume for linkto" +#define DHT_MSG_SUBVOL_RETURNED_STR "Subvolume returned -1" +#define DHT_MSG_UNKNOWN_LOCAL_XSEL_STR "Unknown local->xsel" +#define DHT_MSG_DICT_GET_FAILED_STR "Failed to get" +#define DHT_MSG_UUID_PARSE_ERROR_STR "Failed to parse uuid" +#define DHT_MSG_GET_XATTR_ERR_STR "getxattr err for dir" +#define DHT_MSG_ALLOC_OR_FILL_FAILED_STR "alloc or fill failed" +#define DHT_MSG_UPGRADE_BRICKS_STR \ + "At least one of the bricks does not support this operation. Please " \ + "upgrade all bricks" +#define DHT_MSG_GET_REAL_NAME_FAILED_STR "Failed to get real filename" +#define DHT_MSG_LAYOUT_NULL_STR "Layout is NULL" +#define DHT_MSG_COPY_UUID_FAILED_STR "Failed to copy node uuid key" +#define DHT_MSG_MDS_DETER_FAILED_STR \ + "Cannot determine MDS, fetching xattr randomly from a subvol" +#define DHT_MSG_HASHED_SUBVOL_DOWN_STR \ + "MDS is down for path, so fetching xattr randomly from subvol" +#define DHT_MSG_CREATE_REBAL_FAILED_STR \ + "failed to create a new rebalance synctask" +#define DHT_MSG_FIX_LAYOUT_INFO_STR "fixing the layout" +#define DHT_MSG_OPERATION_NOT_SUP_STR "wrong directory-spread-count value" +#define DHT_MSG_LINK_LAYOUT_FAILED_STR "failed to link the layout in inode" +#define DHT_MSG_NO_SUBVOL_IN_LAYOUT_STR "no subvolume in layout for path" +#define DHT_MSG_INODE_LK_ERROR_STR "mknod lock failed for file" +#define DHT_MSG_MEM_ALLOC_FAILED_STR "mem allocation failed" +#define DHT_MSG_PARENT_LAYOUT_CHANGED_STR \ + "extracting in-memory layout of parent failed" +#define DHT_MSG_SET_IN_PARAMS_DICT_FAILED_STR \ + "setting in params dictionary failed" +#define DHT_MSG_LOC_COPY_FAILED_STR "loc_copy failed" +#define DHT_MSG_LOC_FAILED_STR "parent loc build failed" +#define DHT_MSG_PARENT_LOC_FAILED_STR "locking parent failed" +#define DHT_MSG_CREATE_LOCK_FAILED_STR "Create lock failed" +#define DHT_MSG_PREV_ATTEMPT_FAILED_STR \ + "mkdir loop detected. parent layout didn't change even though previous " \ + "attempt of mkdir failed because of in-memory layout not matching with " \ + "that on disk." +#define DHT_MSG_REFRESH_ATTEMPT_STR \ + "mkdir parent layout changed. Attempting a refresh and then a retry" +#define DHT_MSG_ACQUIRE_LOCK_FAILED_STR \ + "Acquiring lock on parent to guard against layout-change failed" +#define DHT_MSG_CREATE_STUB_FAILED_STR "creating stub failed" +#define DHT_MSG_WIND_LOCK_REQ_FAILED_STR \ + "cannot wind lock request to guard parent layout" +#define DHT_MSG_REFRESH_FAILED_STR "refreshing parent layout failed." +#define DHT_MSG_CACHED_SUBVOL_ERROR_STR "On cached subvol" +#define DHT_MSG_NO_LINK_SUBVOL_STR "Linkfile does not have link subvolume" +#define DHT_MSG_SET_KEY_FAILED_STR "failed to set key" +#define DHT_MSG_CHILD_DOWN_STR "Received CHILD_DOWN. Exiting" +#define DHT_MSG_LOG_FIXED_LAYOUT_STR "log layout fixed" +#define DHT_MSG_REBAL_STRUCT_SET_STR "local->rebalance already set" +#define DHT_MSG_REMOVE_LINKTO_FAILED_STR "Removal of linkto failed at subvol" +#define DHT_MSG_LAYOUT_DICT_SET_FAILED_STR "dht layout dict set failed" +#define DHT_MSG_SUBVOL_INFO_STR "creating subvolume" +#define DHT_MSG_COMPUTE_HASH_FAILED_STR "hash computation failed" +#define DHT_MSG_INVALID_DISK_LAYOUT_STR \ + "Invalid disk layout: Catastrophic error layout with unknown type found" +#define DHT_MSG_LAYOUT_SORT_FAILED_STR "layout sort failed" +#define DHT_MSG_ANOMALIES_INFO_STR "Found anomalies" +#define DHT_MSG_XATTR_DICT_NULL_STR "xattr dictionary is NULL" +#define DHT_MSG_DISK_LAYOUT_MISSING_STR "Disk layout missing" +#define DHT_MSG_LAYOUT_INFO_STR "layout info" +#define DHT_MSG_SUBVOL_NO_LAYOUT_INFO_STR "no pre-set layout for subvol" +#define DHT_MSG_SELFHEAL_XATTR_FAILED_STR "layout setxattr failed" +#define DHT_MSG_DIR_SELFHEAL_XATTR_FAILED_STR "Directory self heal xattr failed" +#define DHT_MSG_DUMMY_ALLOC_FAILED_STR "failed to allocate dummy layout" +#define DHT_MSG_DICT_IS_NULL_STR \ + "dict is NULL, need to make sure gfids are same" +#define DHT_MSG_ENTRYLK_ERROR_STR "acquiring entrylk after inodelk failed" +#define DHT_MSG_NO_DISK_USAGE_STATUS_STR "no du stats" +#define DHT_MSG_LINK_INODE_FAILED_STR "linking inode failed" +#define DHT_MSG_SELFHEAL_FAILED_STR "Directory selfheal failed" +#define DHT_MSG_NO_MDS_SUBVOL_STR "No mds subvol" +#define DHT_MSG_LIST_XATTRS_FAILED_STR "failed to list xattrs" +#define DHT_MSG_RESET_INTER_XATTR_FAILED_STR "Failed to reset internal xattr" +#define DHT_MSG_MDS_DOWN_UNABLE_TO_SET_STR \ + "mds subvol is down, unable to set xattr" +#define DHT_MSG_DIR_ATTR_HEAL_FAILED_STR \ + "Directory attr heal failed. Failed to set uid/gid" +#define DHT_MSG_WIND_UNLOCK_FAILED_STR \ + "Winding unlock failed: stale locks left on brick" +#define DHT_MSG_COMMIT_HASH_FAILED_STR "Directory commit hash updaten failed" +#define DHT_MSG_LK_ARRAY_INFO_STR "lk info" +#define DHT_MSG_UNLOCK_GFID_FAILED_STR \ + "unlock failed on gfid: stale lock might be left" +#define DHT_MSG_UNLOCKING_FAILED_STR "unlocking failed" +#define DHT_MSG_UNLOCK_FOLLOW_ENTRYLK_STR "not unlocking following entrylks" +#define DHT_MSG_COPY_FRAME_FAILED_STR "copy frame failed" +#define DHT_MSG_UNLOCK_FOLLOW_LOCKS_STR "not unlocking following locks" +#define DHT_MSG_INODELK_FAILED_STR "inodelk failed on subvol" +#define DHT_MSG_LOCK_FRAME_FAILED_STR "memory allocation failed for lock_frame" +#define DHT_MSG_LOCAL_LOCK_INIT_FAILED_STR "dht_local_lock_init failed" +#define DHT_MSG_ENTRYLK_FAILED_AFT_INODELK_STR \ + "dht_blocking_entrylk failed after taking inodelk" +#define DHT_MSG_BLOCK_INODELK_FAILED_STR "dht_blocking_inodelk failed" +#define DHT_MSG_CALLOC_FAILED_STR "calloc failed" +#define DHT_MSG_LOCK_ALLOC_FAILED_STR "lock allocation failed" +#define DHT_MSG_ALLOC_FRAME_FAILED_NOT_UNLOCKING_FOLLOWING_ENTRYLKS_STR \ + "cannot allocate a frame, not unlocking following entrylks" +#define DHT_MSG_LOCAL_LOCKS_STORE_FAILED_UNLOCKING_FOLLOWING_ENTRYLK_STR \ + "storing locks in local failed, not unlocking following entrylks" +#define DHT_MSG_DST_NULL_SET_FAILED_STR \ + "src or dst is NULL, Failed to set dictionary value" #endif /* _DHT_MESSAGES_H_ */ diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index 0d68ed53e08..8ba8082bd86 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -16,8 +16,8 @@ #include "glusterfs/compat-errno.h" // for ENODATA on BSD #define GF_DISK_SECTOR_SIZE 512 -#define DHT_REBALANCE_PID 4242 /* Change it if required */ -#define DHT_REBALANCE_BLKSIZE (1024 * 1024) /* 1 MB */ +#define DHT_REBALANCE_PID 4242 /* Change it if required */ +#define DHT_REBALANCE_BLKSIZE 1048576 /* 1 MB */ #define MAX_MIGRATE_QUEUE_COUNT 500 #define MIN_MIGRATE_QUEUE_COUNT 200 #define MAX_REBAL_TYPE_SIZE 16 @@ -45,7 +45,10 @@ gf_defrag_free_dir_dfmeta(struct dir_dfmeta *meta, int local_subvols_cnt) if (meta) { for (i = 0; i < local_subvols_cnt; i++) { - gf_dirent_free(&meta->equeue[i]); + if (meta->equeue) + gf_dirent_free(&meta->equeue[i]); + if (meta->lfd && meta->lfd[i]) + fd_unref(meta->lfd[i]); } GF_FREE(meta->equeue); @@ -53,6 +56,7 @@ gf_defrag_free_dir_dfmeta(struct dir_dfmeta *meta, int local_subvols_cnt) GF_FREE(meta->iterator); GF_FREE(meta->offset_var); GF_FREE(meta->fetch_entries); + GF_FREE(meta->lfd); GF_FREE(meta); } } @@ -84,26 +88,6 @@ dht_set_global_defrag_error(gf_defrag_info_t *defrag, int ret) return; } -static gf_boolean_t -dht_is_tier_command(int cmd) -{ - gf_boolean_t is_tier = _gf_false; - - switch (cmd) { - case GF_DEFRAG_CMD_START_TIER: - case GF_DEFRAG_CMD_STATUS_TIER: - case GF_DEFRAG_CMD_START_DETACH_TIER: - case GF_DEFRAG_CMD_STOP_DETACH_TIER: - case GF_DEFRAG_CMD_PAUSE_TIER: - case GF_DEFRAG_CMD_RESUME_TIER: - is_tier = _gf_true; - break; - default: - break; - } - return is_tier; -} - static int dht_send_rebalance_event(xlator_t *this, int cmd, gf_defrag_status_t status) { @@ -112,8 +96,6 @@ dht_send_rebalance_event(xlator_t *this, int cmd, gf_defrag_status_t status) char *tmpstr = NULL; char *ptr = NULL; char *suffix = "-dht"; - dht_conf_t *conf = NULL; - gf_defrag_info_t *defrag = NULL; int len = 0; eventtypes_t event = EVENT_LAST; @@ -132,21 +114,14 @@ dht_send_rebalance_event(xlator_t *this, int cmd, gf_defrag_status_t status) break; } - if (dht_is_tier_command(cmd)) { - /* We should have the tier volume name*/ - conf = this->private; - defrag = conf->defrag; - volname = defrag->tier_conf.volname; - } else { - /* DHT volume */ - len = strlen(this->name) - strlen(suffix); - tmpstr = gf_strdup(this->name); - if (tmpstr) { - ptr = tmpstr + len; - if (!strcmp(ptr, suffix)) { - tmpstr[len] = '\0'; - volname = tmpstr; - } + /* DHT volume */ + len = strlen(this->name) - strlen(suffix); + tmpstr = gf_strdup(this->name); + if (tmpstr) { + ptr = tmpstr + len; + if (!strcmp(ptr, suffix)) { + tmpstr[len] = '\0'; + volname = tmpstr; } } @@ -172,75 +147,6 @@ dht_strip_out_acls(dict_t *dict) } } -static int -dht_write_with_holes(xlator_t *to, fd_t *fd, struct iovec *vec, int count, - int32_t size, off_t offset, struct iobref *iobref, - int *fop_errno) -{ - int i = 0; - int ret = -1; - int start_idx = 0; - int tmp_offset = 0; - int write_needed = 0; - int buf_len = 0; - int size_pending = 0; - char *buf = NULL; - - /* loop through each vector */ - for (i = 0; i < count; i++) { - buf = vec[i].iov_base; - buf_len = vec[i].iov_len; - - for (start_idx = 0; (start_idx + GF_DISK_SECTOR_SIZE) <= buf_len; - start_idx += GF_DISK_SECTOR_SIZE) { - if (mem_0filled(buf + start_idx, GF_DISK_SECTOR_SIZE) != 0) { - write_needed = 1; - continue; - } - - if (write_needed) { - ret = syncop_write( - to, fd, (buf + tmp_offset), (start_idx - tmp_offset), - (offset + tmp_offset), iobref, 0, NULL, NULL); - /* 'path' will be logged in calling function */ - if (ret < 0) { - gf_log(THIS->name, GF_LOG_WARNING, "failed to write (%s)", - strerror(-ret)); - *fop_errno = -ret; - ret = -1; - goto out; - } - - write_needed = 0; - } - tmp_offset = start_idx + GF_DISK_SECTOR_SIZE; - } - - if ((start_idx < buf_len) || write_needed) { - /* This means, last chunk is not yet written.. write it */ - ret = syncop_write(to, fd, (buf + tmp_offset), - (buf_len - tmp_offset), (offset + tmp_offset), - iobref, 0, NULL, NULL); - if (ret < 0) { - /* 'path' will be logged in calling function */ - gf_log(THIS->name, GF_LOG_WARNING, "failed to write (%s)", - strerror(-ret)); - *fop_errno = -ret; - ret = -1; - goto out; - } - } - - size_pending = (size - buf_len); - if (!size_pending) - break; - } - - ret = size; -out: - return ret; -} - /* return values: -1 : failure @@ -648,7 +554,7 @@ out: static int __dht_rebalance_create_dst_file(xlator_t *this, xlator_t *to, xlator_t *from, loc_t *loc, struct iatt *stbuf, fd_t **dst_fd, - int *fop_errno) + int *fop_errno, int file_has_holes) { int ret = -1; int ret2 = -1; @@ -703,26 +609,23 @@ __dht_rebalance_create_dst_file(xlator_t *this, xlator_t *to, xlator_t *from, goto out; } - if (!!dht_is_tier_xlator(this)) { - xdata = dict_new(); - if (!xdata) { - *fop_errno = ENOMEM; - ret = -1; - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, - DHT_MSG_MIGRATE_FILE_FAILED, "%s: dict_new failed)", - loc->path); - goto out; - } + xdata = dict_new(); + if (!xdata) { + *fop_errno = ENOMEM; + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MIGRATE_FILE_FAILED, + "%s: dict_new failed)", loc->path); + goto out; + } - ret = dict_set_int32(xdata, GF_CLEAN_WRITE_PROTECTION, 1); - if (ret) { - *fop_errno = ENOMEM; - ret = -1; - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, - "%s: failed to set dictionary value: key = %s ", loc->path, - GF_CLEAN_WRITE_PROTECTION); - goto out; - } + ret = dict_set_int32_sizen(xdata, GF_CLEAN_WRITE_PROTECTION, 1); + if (ret) { + *fop_errno = ENOMEM; + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, + "%s: failed to set dictionary value: key = %s ", loc->path, + GF_CLEAN_WRITE_PROTECTION); + goto out; } ret = syncop_lookup(to, loc, &new_stbuf, NULL, xdata, NULL); @@ -817,7 +720,7 @@ __dht_rebalance_create_dst_file(xlator_t *this, xlator_t *to, xlator_t *from, /* No need to bother about 0 byte size files */ if (stbuf->ia_size > 0) { - if (conf->use_fallocate) { + if (conf->use_fallocate && !file_has_holes) { ret = syncop_fallocate(to, fd, 0, 0, stbuf->ia_size, NULL, NULL); if (ret < 0) { if (ret == -EOPNOTSUPP || ret == -EINVAL || ret == -ENOSYS) { @@ -844,9 +747,7 @@ __dht_rebalance_create_dst_file(xlator_t *this, xlator_t *to, xlator_t *from, goto out; } } - } - - if (!conf->use_fallocate) { + } else { ret = syncop_ftruncate(to, fd, stbuf->ia_size, NULL, NULL, NULL, NULL); if (ret < 0) { @@ -1097,22 +998,90 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag, int ret = 0; int count = 0; off_t offset = 0; + off_t data_offset = 0; + off_t hole_offset = 0; struct iovec *vector = NULL; struct iobref *iobref = NULL; uint64_t total = 0; size_t read_size = 0; + size_t data_block_size = 0; dict_t *xdata = NULL; dht_conf_t *conf = NULL; conf = this->private; + /* if file size is '0', no need to enter this loop */ while (total < ia_size) { - read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE) - ? DHT_REBALANCE_BLKSIZE - : (ia_size - total)); + /* This is a regular file - read it sequentially */ + if (!hole_exists) { + read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE) + ? DHT_REBALANCE_BLKSIZE + : (ia_size - total)); + } else { + /* This is a sparse file - read only the data segments in the file + */ + + /* If the previous data block is fully copied, find the next data + * segment + * starting at the offset of the last read and written byte, */ + if (data_block_size <= 0) { + ret = syncop_seek(from, src, offset, GF_SEEK_DATA, NULL, + &data_offset); + if (ret) { + if (ret == -ENXIO) + ret = 0; /* No more data segments */ + else + *fop_errno = -ret; /* Error occurred */ + + break; + } + + /* If the position of the current data segment is greater than + * the position of the next hole, find the next hole in order to + * calculate the length of the new data segment */ + if (data_offset > hole_offset) { + /* Starting at the offset of the last data segment, find the + * next hole */ + ret = syncop_seek(from, src, data_offset, GF_SEEK_HOLE, + NULL, &hole_offset); + if (ret) { + /* If an error occurred here it's a real error because + * if the seek for a data segment was successful then + * necessarily another hole must exist (EOF is a hole) + */ + *fop_errno = -ret; + break; + } + + /* Calculate the total size of the current data block */ + data_block_size = hole_offset - data_offset; + } + } else { + /* There is still data in the current segment, move the + * data_offset to the position of the last written byte */ + data_offset = offset; + } + + /* Calculate how much data needs to be read and written. If the data + * segment's length is bigger than DHT_REBALANCE_BLKSIZE, read and + * write DHT_REBALANCE_BLKSIZE data length and the rest in the + * next iteration(s) */ + read_size = ((data_block_size > DHT_REBALANCE_BLKSIZE) + ? DHT_REBALANCE_BLKSIZE + : data_block_size); + + /* Calculate the remaining size of the data block - maybe there's no + * need to seek for data in the next iteration */ + data_block_size -= read_size; + + /* Set offset to the offset of the data segment so read and write + * will have the correct position */ + offset = data_offset; + } ret = syncop_readv(from, src, read_size, offset, 0, &vector, &count, &iobref, NULL, NULL, NULL); + if (!ret || (ret < 0)) { if (!ret) { /* File was probably truncated*/ @@ -1124,57 +1093,42 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag, break; } - if (hole_exists) { - ret = dht_write_with_holes(to, dst, vector, count, ret, offset, - iobref, fop_errno); - } else { - if (!conf->force_migration && !dht_is_tier_xlator(this)) { + if (!conf->force_migration) { + if (!xdata) { + xdata = dict_new(); if (!xdata) { - xdata = dict_new(); - if (!xdata) { - gf_msg("dht", GF_LOG_ERROR, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "insufficient memory"); - ret = -1; - *fop_errno = ENOMEM; - break; - } + gf_msg("dht", GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "insufficient memory"); + ret = -1; + *fop_errno = ENOMEM; + break; + } - /* Fail this write and abort rebalance if we - * detect a write from client since migration of - * this file started. This is done to avoid - * potential data corruption due to out of order - * writes from rebalance and client to the same - * region (as compared between src and dst - * files). See - * https://github.com/gluster/glusterfs/issues/308 - * for more details. - */ - ret = dict_set_int32(xdata, GF_AVOID_OVERWRITE, 1); - if (ret) { - gf_msg("dht", GF_LOG_ERROR, 0, ENOMEM, - "failed to set dict"); - ret = -1; - *fop_errno = ENOMEM; - break; - } + /* Fail this write and abort rebalance if we + * detect a write from client since migration of + * this file started. This is done to avoid + * potential data corruption due to out of order + * writes from rebalance and client to the same + * region (as compared between src and dst + * files). See + * https://github.com/gluster/glusterfs/issues/308 + * for more details. + */ + ret = dict_set_int32_sizen(xdata, GF_AVOID_OVERWRITE, 1); + if (ret) { + gf_msg("dht", GF_LOG_ERROR, 0, ENOMEM, + "failed to set dict"); + ret = -1; + *fop_errno = ENOMEM; + break; } } - ret = syncop_writev(to, dst, vector, count, offset, iobref, 0, NULL, - NULL, xdata, NULL); - if (ret < 0) { - *fop_errno = -ret; - } - } - - if ((defrag && defrag->cmd == GF_DEFRAG_CMD_START_TIER) && - (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING)) { - gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_TIER_PAUSED, - "Migrate file paused"); - ret = -1; } + ret = syncop_writev(to, dst, vector, count, offset, iobref, 0, NULL, + NULL, xdata, NULL); if (ret < 0) { + *fop_errno = -ret; break; } @@ -1568,6 +1522,7 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, xlator_t *old_target = NULL; xlator_t *hashed_subvol = NULL; fd_t *linkto_fd = NULL; + dict_t *xdata = NULL; if (from == to) { gf_msg_debug(this->name, 0, @@ -1578,21 +1533,6 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, goto out; } - /* If defrag is NULL, it should be assumed that migration is triggered - * from client using the trusted.distribute.migrate-data virtual xattr - */ - defrag = conf->defrag; - - /* migration of files from clients is restricted to non-tiered clients - * for now */ - if (!defrag && dht_is_tier_xlator(this)) { - ret = ENOTSUP; - goto out; - } - - if (defrag && defrag->tier_conf.is_tier) - log_level = GF_LOG_TRACE; - gf_log(this->name, log_level, "%s: attempting to move from %s to %s", loc->path, from->name, to->name); @@ -1739,9 +1679,13 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, goto out; } + /* Try to preserve 'holes' while migrating data */ + if (stbuf.ia_size > (stbuf.ia_blocks * GF_DISK_SECTOR_SIZE)) + file_has_holes = 1; + /* create the destination, with required modes/xattr */ ret = __dht_rebalance_create_dst_file(this, to, from, loc, &stbuf, &dst_fd, - fop_errno); + fop_errno, file_has_holes); if (ret) { gf_msg(this->name, GF_LOG_ERROR, 0, 0, "Create dst failed" @@ -1785,8 +1729,8 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, * destination. We need to do update this only post migration * as in case of failure the linkto needs to point to the source * subvol */ - ret = __dht_rebalance_create_dst_file(this, to, from, loc, &stbuf, - &dst_fd, fop_errno); + ret = __dht_rebalance_create_dst_file( + this, to, from, loc, &stbuf, &dst_fd, fop_errno, file_has_holes); if (ret) { gf_log(this->name, GF_LOG_ERROR, "Create dst failed" @@ -1873,9 +1817,6 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, ret = 0; goto out; } - /* Try to preserve 'holes' while migrating data */ - if (stbuf.ia_size > (stbuf.ia_blocks * GF_DISK_SECTOR_SIZE)) - file_has_holes = 1; ret = __dht_rebalance_migrate_data(this, defrag, from, to, src_fd, dst_fd, stbuf.ia_size, file_has_holes, @@ -1890,7 +1831,15 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, /* TODO: Sync the locks */ - ret = syncop_fsync(to, dst_fd, 0, NULL, NULL, NULL, NULL); + xdata = dict_new(); + if (!xdata || dict_set_int8(xdata, "last-fsync", 1)) { + gf_log(this->name, GF_LOG_ERROR, + "%s: failed to set last-fsync flag on " + "%s (%s)", + loc->path, to->name, strerror(ENOMEM)); + } + + ret = syncop_fsync(to, dst_fd, 0, NULL, NULL, xdata, NULL); if (ret) { gf_log(this->name, GF_LOG_WARNING, "%s: failed to fsync on %s (%s)", loc->path, to->name, strerror(-ret)); @@ -2333,14 +2282,12 @@ out: } } - if (!dht_is_tier_xlator(this)) { - lk_ret = syncop_removexattr(to, loc, GF_PROTECT_FROM_EXTERNAL_WRITES, - NULL, NULL); - if (lk_ret && (lk_ret != -ENODATA) && (lk_ret != -ENOATTR)) { - gf_msg(this->name, GF_LOG_WARNING, -lk_ret, 0, - "%s: removexattr failed key %s", loc->path, - GF_PROTECT_FROM_EXTERNAL_WRITES); - } + lk_ret = syncop_removexattr(to, loc, GF_PROTECT_FROM_EXTERNAL_WRITES, NULL, + NULL); + if (lk_ret && (lk_ret != -ENODATA) && (lk_ret != -ENOATTR)) { + gf_msg(this->name, GF_LOG_WARNING, -lk_ret, 0, + "%s: removexattr failed key %s", loc->path, + GF_PROTECT_FROM_EXTERNAL_WRITES); } if (dict) @@ -2353,11 +2300,15 @@ out: if (dst_fd) syncop_close(dst_fd); + if (src_fd) syncop_close(src_fd); if (linkto_fd) syncop_close(linkto_fd); + if (xdata) + dict_unref(xdata); + loc_wipe(&tmp_loc); loc_wipe(&parent_loc); @@ -2587,10 +2538,10 @@ out: * all hardlinks. */ -int +gf_boolean_t gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid) { - int ret = 0; + gf_boolean_t ret = _gf_false; int i = local_subvol_index; char *str = NULL; uint32_t hashval = 0; @@ -2612,12 +2563,11 @@ gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid) } str = uuid_utoa_r(gfid, buf); - ret = dht_hash_compute(this, 0, str, &hashval); - if (ret == 0) { + if (dht_hash_compute(this, 0, str, &hashval) == 0) { index = (hashval % entry->count); if (entry->elements[index].info == REBAL_NODEUUID_MINE) { /* Index matches this node's nodeuuid.*/ - ret = 1; + ret = _gf_true; goto out; } @@ -2630,12 +2580,12 @@ gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid) /* None of the bricks in the subvol are up. * CHILD_DOWN will kill the process soon */ - return 0; + return _gf_false; } if (entry->elements[index].info == REBAL_NODEUUID_MINE) { /* Index matches this node's nodeuuid.*/ - ret = 1; + ret = _gf_true; goto out; } } @@ -2684,6 +2634,7 @@ gf_defrag_migrate_single_file(void *opaque) struct iatt *iatt_ptr = NULL; gf_boolean_t update_skippedcount = _gf_true; int i = 0; + gf_boolean_t should_i_migrate = 0; rebal_entry = (struct dht_container *)opaque; if (!rebal_entry) { @@ -2738,11 +2689,29 @@ gf_defrag_migrate_single_file(void *opaque) goto out; } + should_i_migrate = gf_defrag_should_i_migrate( + this, rebal_entry->local_subvol_index, entry->d_stat.ia_gfid); + gf_uuid_copy(entry_loc.gfid, entry->d_stat.ia_gfid); gf_uuid_copy(entry_loc.pargfid, loc->gfid); ret = syncop_lookup(this, &entry_loc, &iatt, NULL, NULL, NULL); + + if (!should_i_migrate) { + /* this node isn't supposed to migrate the file. suppressing any + * potential error from lookup as this file is under migration by + * another node */ + if (ret) { + gf_msg_debug(this->name, -ret, + "Ignoring lookup failure: node isn't migrating %s", + entry_loc.path); + ret = 0; + } + gf_msg_debug(this->name, 0, "Don't migrate %s ", entry_loc.path); + goto out; + } + if (ret) { gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED, "Migrate file failed: %s lookup failed", entry_loc.path); @@ -2763,12 +2732,6 @@ gf_defrag_migrate_single_file(void *opaque) goto out; } - if (!gf_defrag_should_i_migrate(this, rebal_entry->local_subvol_index, - entry->d_stat.ia_gfid)) { - gf_msg_debug(this->name, 0, "Don't migrate %s ", entry_loc.path); - goto out; - } - iatt_ptr = &iatt; hashed_subvol = dht_subvol_get_hashed(this, &entry_loc); @@ -2911,8 +2874,7 @@ gf_defrag_migrate_single_file(void *opaque) if (defrag->stats == _gf_true) { gettimeofday(&end, NULL); - elapsed = (end.tv_sec - start.tv_sec) * 1e6 + - (end.tv_usec - start.tv_usec); + elapsed = gf_tvdiff(&start, &end); gf_log(this->name, GF_LOG_INFO, "Migration of " "file:%s size:%" PRIu64 @@ -3091,9 +3053,9 @@ int static gf_defrag_get_entry(xlator_t *this, int i, dht_conf_t *conf, gf_defrag_info_t *defrag, fd_t *fd, dict_t *migrate_data, struct dir_dfmeta *dir_dfmeta, dict_t *xattr_req, - int *should_commit_hash, int *perrno) + int *perrno) { - int ret = -1; + int ret = 0; char is_linkfile = 0; gf_dirent_t *df_entry = NULL; struct dht_container *tmp_container = NULL; @@ -3109,6 +3071,13 @@ int static gf_defrag_get_entry(xlator_t *this, int i, } if (dir_dfmeta->fetch_entries[i] == 1) { + if (!fd) { + dir_dfmeta->fetch_entries[i] = 0; + dir_dfmeta->offset_var[i].readdir_done = 1; + ret = 0; + goto out; + } + ret = syncop_readdirp(conf->local_subvols[i], fd, 131072, dir_dfmeta->offset_var[i].offset, &(dir_dfmeta->equeue[i]), xattr_req, NULL); @@ -3268,7 +3237,6 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, dict_t *migrate_data, int *perrno) { int ret = -1; - fd_t *fd = NULL; dht_conf_t *conf = NULL; gf_dirent_t entries; dict_t *xattr_req = NULL; @@ -3289,7 +3257,7 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, int dfc_index = 0; int throttle_up = 0; struct dir_dfmeta *dir_dfmeta = NULL; - int should_commit_hash = 1; + xlator_t *old_THIS = NULL; gf_log(this->name, GF_LOG_INFO, "migrate data called on %s", loc->path); gettimeofday(&dir_start, NULL); @@ -3302,28 +3270,53 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, goto out; } - fd = fd_create(loc->inode, defrag->pid); - if (!fd) { - gf_log(this->name, GF_LOG_ERROR, "Failed to create fd"); + old_THIS = THIS; + THIS = this; + + dir_dfmeta = GF_CALLOC(1, sizeof(*dir_dfmeta), gf_common_mt_pointer); + if (!dir_dfmeta) { + gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta is NULL"); ret = -1; goto out; } - ret = syncop_opendir(this, loc, fd, NULL, NULL); - if (ret) { - gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_DATA_FAILED, - "Migrate data failed: Failed to open dir %s", loc->path); - *perrno = -ret; + dir_dfmeta->lfd = GF_CALLOC(local_subvols_cnt, sizeof(fd_t *), + gf_common_mt_pointer); + if (!dir_dfmeta->lfd) { + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_INSUFF_MEMORY, + "for dir_dfmeta", NULL); ret = -1; + *perrno = ENOMEM; goto out; } - fd_bind(fd); - dir_dfmeta = GF_CALLOC(1, sizeof(*dir_dfmeta), gf_common_mt_pointer); - if (!dir_dfmeta) { - gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta is NULL"); - ret = -1; - goto out; + for (i = 0; i < local_subvols_cnt; i++) { + dir_dfmeta->lfd[i] = fd_create(loc->inode, defrag->pid); + if (!dir_dfmeta->lfd[i]) { + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_FD_CREATE_FAILED, + NULL); + *perrno = ENOMEM; + ret = -1; + goto out; + } + + ret = syncop_opendir(conf->local_subvols[i], loc, dir_dfmeta->lfd[i], + NULL, NULL); + if (ret) { + fd_unref(dir_dfmeta->lfd[i]); + dir_dfmeta->lfd[i] = NULL; + gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_FAILED_TO_OPEN, + "dir: %s", loc->path, "subvol: %s", + conf->local_subvols[i]->name, NULL); + + if (conf->decommission_in_progress) { + *perrno = -ret; + ret = -1; + goto out; + } + } else { + fd_bind(dir_dfmeta->lfd[i]); + } } dir_dfmeta->head = GF_CALLOC(local_subvols_cnt, sizeof(*(dir_dfmeta->head)), @@ -3358,6 +3351,7 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, ret = -1; goto out; } + ret = gf_defrag_ctx_subvols_init(dir_dfmeta->offset_var, this); if (ret) { gf_log(this->name, GF_LOG_ERROR, @@ -3370,7 +3364,8 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, dir_dfmeta->fetch_entries = GF_CALLOC(local_subvols_cnt, sizeof(int), gf_common_mt_int); if (!dir_dfmeta->fetch_entries) { - gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta->fetch_entries is NULL"); + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_INSUFF_MEMORY, + "for dir_dfmeta->fetch_entries", NULL); ret = -1; goto out; } @@ -3440,8 +3435,13 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, ldfq_count <= MAX_MIGRATE_QUEUE_COUNT && !dht_dfreaddirp_done(dir_dfmeta->offset_var, local_subvols_cnt)) { ret = gf_defrag_get_entry(this, dfc_index, &container, loc, conf, - defrag, fd, migrate_data, dir_dfmeta, - xattr_req, &should_commit_hash, perrno); + defrag, dir_dfmeta->lfd[dfc_index], + migrate_data, dir_dfmeta, xattr_req, + perrno); + + if (defrag->defrag_status == GF_DEFRAG_STATUS_STOPPED) { + goto out; + } if (ret) { gf_log(this->name, GF_LOG_WARNING, @@ -3481,27 +3481,19 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, } gettimeofday(&end, NULL); - elapsed = (end.tv_sec - dir_start.tv_sec) * 1e6 + - (end.tv_usec - dir_start.tv_usec); + elapsed = gf_tvdiff(&dir_start, &end); gf_log(this->name, GF_LOG_INFO, "Migration operation on dir %s took " "%.2f secs", loc->path, elapsed / 1e6); ret = 0; out: - + THIS = old_THIS; gf_defrag_free_dir_dfmeta(dir_dfmeta, local_subvols_cnt); if (xattr_req) dict_unref(xattr_req); - if (fd) - fd_unref(fd); - - if (ret == 0 && should_commit_hash == 0) { - ret = 2; - } - /* It does not matter if it errored out - this number is * used to calculate rebalance estimated time to complete. * No locking required as dirs are processed by a single thread. @@ -3509,6 +3501,7 @@ out: defrag->num_dirs_processed++; return ret; } + int gf_defrag_settle_hash(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, dict_t *fix_layout) @@ -3523,7 +3516,6 @@ gf_defrag_settle_hash(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, * rebalance is complete. */ if (defrag->cmd == GF_DEFRAG_CMD_START_LAYOUT_FIX || - defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER || defrag->cmd == GF_DEFRAG_CMD_DETACH_START) { return 0; } @@ -3569,114 +3561,6 @@ gf_defrag_settle_hash(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, return 0; } -/* Function for doing a named lookup on file inodes during an attach tier - * So that a hardlink lookup heal i.e gfid to parent gfid lookup heal - * happens on pre-existing data. This is required so that the ctr database has - * hardlinks of all the exisitng file in the volume. CTR xlator on the - * brick/server side does db update/insert of the hardlink on a namelookup. - * Currently the namedlookup is done synchronous to the fixlayout that is - * triggered by attach tier. This is not performant, adding more time to - * fixlayout. The performant approach is record the hardlinks on a compressed - * datastore and then do the namelookup asynchronously later, giving the ctr db - * eventual consistency - * */ -int -gf_fix_layout_tier_attach_lookup(xlator_t *this, loc_t *parent_loc, - gf_dirent_t *file_dentry) -{ - int ret = -1; - dict_t *lookup_xdata = NULL; - dht_conf_t *conf = NULL; - loc_t file_loc = { - 0, - }; - struct iatt iatt = { - 0, - }; - - GF_VALIDATE_OR_GOTO("tier", this, out); - - GF_VALIDATE_OR_GOTO(this->name, parent_loc, out); - - GF_VALIDATE_OR_GOTO(this->name, file_dentry, out); - - GF_VALIDATE_OR_GOTO(this->name, this->private, out); - - if (!parent_loc->inode) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "%s/%s parent is NULL", parent_loc->path, file_dentry->d_name); - goto out; - } - - conf = this->private; - - loc_wipe(&file_loc); - - if (gf_uuid_is_null(file_dentry->d_stat.ia_gfid)) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "%s/%s gfid not present", parent_loc->path, file_dentry->d_name); - goto out; - } - - gf_uuid_copy(file_loc.gfid, file_dentry->d_stat.ia_gfid); - - if (gf_uuid_is_null(parent_loc->gfid)) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "%s/%s" - " gfid not present", - parent_loc->path, file_dentry->d_name); - goto out; - } - - gf_uuid_copy(file_loc.pargfid, parent_loc->gfid); - - ret = dht_build_child_loc(this, &file_loc, parent_loc, file_dentry->d_name); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Child loc build failed"); - ret = -1; - goto out; - } - - lookup_xdata = dict_new(); - if (!lookup_xdata) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Failed creating lookup dict for %s", file_dentry->d_name); - goto out; - } - - ret = dict_set_int32(lookup_xdata, CTR_ATTACH_TIER_LOOKUP, 1); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Failed to set lookup flag"); - goto out; - } - - gf_uuid_copy(file_loc.parent->gfid, parent_loc->gfid); - - /* Sending lookup to cold tier only */ - ret = syncop_lookup(conf->subvolumes[0], &file_loc, &iatt, NULL, - lookup_xdata, NULL); - if (ret) { - /* If the file does not exist on the cold tier than it must */ - /* have been discovered on the hot tier. This is not an error. */ - gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, - "%s lookup to cold tier on attach heal failed", file_loc.path); - goto out; - } - - ret = 0; - -out: - - loc_wipe(&file_loc); - - if (lookup_xdata) - dict_unref(lookup_xdata); - - return ret; -} - int gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, dict_t *fix_layout, dict_t *migrate_data) @@ -3696,7 +3580,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, }; inode_t *linked_inode = NULL, *inode = NULL; dht_conf_t *conf = NULL; - int should_commit_hash = 1; int perrno = 0; conf = this->private; @@ -3799,16 +3682,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) continue; if (!IA_ISDIR(entry->d_stat.ia_type)) { - /* If its a fix layout during the attach - * tier operation do lookups on files - * on cold subvolume so that there is a - * CTR DB Lookup Heal triggered on existing - * data. - * */ - if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) { - gf_fix_layout_tier_attach_lookup(this, loc, entry); - } - continue; } loc_wipe(&entry_loc); @@ -3825,8 +3698,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, goto out; } else { - should_commit_hash = 0; - continue; } } @@ -3889,7 +3760,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, ret = -1; goto out; } else { - should_commit_hash = 0; continue; } } @@ -3902,7 +3772,12 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, ret = gf_defrag_fix_layout(this, defrag, &entry_loc, fix_layout, migrate_data); - if (ret && ret != 2) { + if (defrag->defrag_status == GF_DEFRAG_STATUS_STOPPED || + defrag->defrag_status == GF_DEFRAG_STATUS_FAILED) { + goto out; + } + + if (ret) { gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LAYOUT_FIX_FAILED, "Fix layout failed for %s", entry_loc.path); @@ -3933,6 +3808,17 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, */ ret = syncop_setxattr(this, loc, fix_layout, 0, NULL, NULL); + + /* In case of a race where the directory is deleted just before + * layout setxattr, the errors are updated in the layout structure. + * We can use this information to make a decision whether the directory + * is deleted entirely. + */ + if (ret == 0) { + ret = dht_dir_layout_error_check(this, loc->inode); + ret = -ret; + } + if (ret) { if (-ret == ENOENT || -ret == ESTALE) { gf_msg(this->name, GF_LOG_INFO, -ret, DHT_MSG_LAYOUT_FIX_FAILED, @@ -3943,6 +3829,7 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, defrag->total_failures++; } ret = 0; + goto out; } else { gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LAYOUT_FIX_FAILED, "Setxattr failed for %s", loc->path); @@ -3957,11 +3844,10 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, } } - if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) && - (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX)) { + if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) { ret = gf_defrag_process_dir(this, defrag, loc, migrate_data, &perrno); - if (ret && (ret != 2)) { + if (ret) { if (perrno == ENOENT || perrno == ESTALE) { ret = 0; goto out; @@ -3977,18 +3863,13 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, if (conf->decommission_in_progress) { goto out; } - - should_commit_hash = 0; } - } else if (ret == 2) { - should_commit_hash = 0; } } gf_msg_trace(this->name, 0, "fix layout called on %s", loc->path); - if (should_commit_hash && - gf_defrag_settle_hash(this, defrag, loc, fix_layout) != 0) { + if (gf_defrag_settle_hash(this, defrag, loc, fix_layout) != 0) { defrag->total_failures++; gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SETTLE_HASH_FAILED, @@ -4012,245 +3893,34 @@ out: if (fd) fd_unref(fd); - if (ret == 0 && should_commit_hash == 0) { - ret = 2; - } - - return ret; -} - -/****************************************************************************** - * Tier background Fix layout functions - ******************************************************************************/ -/* This is the background tier fixlayout thread */ -void * -gf_tier_do_fix_layout(void *args) -{ - gf_tier_fix_layout_arg_t *tier_fix_layout_arg = args; - int ret = -1; - xlator_t *this = NULL; - dht_conf_t *conf = NULL; - gf_defrag_info_t *defrag = NULL; - dict_t *dict = NULL; - loc_t loc = { - 0, - }; - struct iatt iatt = { - 0, - }; - struct iatt parent = { - 0, - }; - - GF_VALIDATE_OR_GOTO("tier", tier_fix_layout_arg, out); - GF_VALIDATE_OR_GOTO("tier", tier_fix_layout_arg->this, out); - this = tier_fix_layout_arg->this; - - conf = this->private; - GF_VALIDATE_OR_GOTO(this->name, conf, out); - - defrag = conf->defrag; - GF_VALIDATE_OR_GOTO(this->name, defrag, out); - GF_VALIDATE_OR_GOTO(this->name, defrag->root_inode, out); - - GF_VALIDATE_OR_GOTO(this->name, tier_fix_layout_arg->fix_layout, out); - - /* Get Root loc_t */ - dht_build_root_loc(defrag->root_inode, &loc); - ret = syncop_lookup(this, &loc, &iatt, &parent, NULL, NULL); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_REBALANCE_START_FAILED, - "Lookup on root failed."); - ret = -1; - goto out; - } - - /* Start the crawl */ - gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, - "Tiering Fixlayout started"); - - ret = gf_defrag_fix_layout(this, defrag, &loc, - tier_fix_layout_arg->fix_layout, NULL); - if (ret && ret != 2) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_REBALANCE_FAILED, - "Tiering fixlayout failed."); - ret = -1; - goto out; - } - - if (ret != 2 && - gf_defrag_settle_hash(this, defrag, &loc, - tier_fix_layout_arg->fix_layout) != 0) { - defrag->total_failures++; - ret = -1; - goto out; - } - - dict = dict_new(); - if (!dict) { - ret = -1; - goto out; - } - - ret = dict_set_str(dict, GF_XATTR_TIER_LAYOUT_FIXED_KEY, "yes"); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_REBALANCE_FAILED, - "Failed to set dictionary value: key = %s", - GF_XATTR_TIER_LAYOUT_FIXED_KEY); - ret = -1; - goto out; - } - - /* Marking the completion of tiering fix layout via a xattr on root */ - ret = syncop_setxattr(this, &loc, dict, 0, NULL, NULL); - if (ret) { - gf_log(this->name, GF_LOG_ERROR, - "Failed to set tiering fix " - "layout completed xattr on %s", - loc.path); - ret = -1; - goto out; - } - - ret = 0; -out: - if (ret && defrag) - defrag->total_failures++; - - if (dict) - dict_unref(dict); - - return NULL; -} - -int -gf_tier_start_fix_layout(xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag, - dict_t *fix_layout) -{ - int ret = -1; - dict_t *tier_dict = NULL; - gf_tier_fix_layout_arg_t *tier_fix_layout_arg = NULL; - - tier_dict = dict_new(); - if (!tier_dict) { - gf_log("tier", GF_LOG_ERROR, - "Tier fix layout failed :" - "Creation of tier_dict failed"); - ret = -1; - goto out; - } - - /* Check if layout is fixed already */ - ret = syncop_getxattr(this, loc, &tier_dict, GF_XATTR_TIER_LAYOUT_FIXED_KEY, - NULL, NULL); - if (ret != 0) { - tier_fix_layout_arg = &defrag->tier_conf.tier_fix_layout_arg; - - /*Fill crawl arguments */ - tier_fix_layout_arg->this = this; - tier_fix_layout_arg->fix_layout = fix_layout; - - /* Spawn the fix layout thread so that its done in the - * background */ - ret = gf_thread_create(&tier_fix_layout_arg->thread_id, NULL, - gf_tier_do_fix_layout, tier_fix_layout_arg, - "tierfixl"); - if (ret) { - gf_log("tier", GF_LOG_ERROR, - "Thread creation failed. " - "Background fix layout for tiering will not " - "work."); - defrag->total_failures++; - goto out; - } - } - ret = 0; -out: - if (tier_dict) - dict_unref(tier_dict); - return ret; } -void -gf_tier_clear_fix_layout(xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag) -{ - int ret = -1; - dict_t *dict = NULL; - - GF_VALIDATE_OR_GOTO("tier", this, out); - GF_VALIDATE_OR_GOTO(this->name, loc, out); - GF_VALIDATE_OR_GOTO(this->name, defrag, out); - - /* Check if background fixlayout is completed. This is not - * multi-process safe i.e there is a possibility that by the time - * we move to remove the xattr there it might have been cleared by some - * other detach process from other node. We ignore the error if such - * a thing happens */ - ret = syncop_getxattr(this, loc, &dict, GF_XATTR_TIER_LAYOUT_FIXED_KEY, - NULL, NULL); - if (ret) { - /* Background fixlayout not complete - nothing to clear*/ - gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_LOG_TIER_STATUS, - "Unable to retrieve fixlayout xattr." - "Assume background fix layout not complete"); - goto out; - } - - ret = syncop_removexattr(this, loc, GF_XATTR_TIER_LAYOUT_FIXED_KEY, NULL, - NULL); - if (ret) { - gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_LOG_TIER_STATUS, - "Failed removing tier fix layout " - "xattr from %s", - loc->path); - goto out; - } - ret = 0; -out: - if (dict) - dict_unref(dict); -} - -void -gf_tier_wait_fix_lookup(gf_defrag_info_t *defrag) -{ - if (defrag->tier_conf.tier_fix_layout_arg.thread_id) { - pthread_join(defrag->tier_conf.tier_fix_layout_arg.thread_id, NULL); - } -} -/******************Tier background Fix layout functions END********************/ - int dht_init_local_subvols_and_nodeuuids(xlator_t *this, dht_conf_t *conf, loc_t *loc) { dict_t *dict = NULL; - gf_defrag_info_t *defrag = NULL; uuid_t *uuid_ptr = NULL; int ret = -1; int i = 0; int j = 0; - defrag = conf->defrag; - - if (defrag->cmd != GF_DEFRAG_CMD_START_TIER) { - /* Find local subvolumes */ - ret = syncop_getxattr(this, loc, &dict, GF_REBAL_FIND_LOCAL_SUBVOL, - NULL, NULL); - if (ret && (ret != -ENODATA)) { - gf_msg(this->name, GF_LOG_ERROR, -ret, 0, - "local " - "subvolume determination failed with error: %d", - -ret); - ret = -1; - goto out; - } - - if (!ret) - goto out; + /* Find local subvolumes */ + ret = syncop_getxattr(this, loc, &dict, GF_REBAL_FIND_LOCAL_SUBVOL, NULL, + NULL); + if (ret && (ret != -ENODATA)) { + gf_msg(this->name, GF_LOG_ERROR, -ret, 0, + "local " + "subvolume determination failed with error: %d", + -ret); + ret = -1; + goto out; } + if (!ret) + goto out; + ret = syncop_getxattr(this, loc, &dict, GF_REBAL_OLD_FIND_LOCAL_SUBVOL, NULL, NULL); if (ret) { @@ -4341,9 +4011,6 @@ dht_file_counter_thread(void *args) struct timespec time_to_wait = { 0, }; - struct timeval now = { - 0, - }; uint64_t tmp_size = 0; if (!args) @@ -4353,9 +4020,8 @@ dht_file_counter_thread(void *args) dht_build_root_loc(defrag->root_inode, &root_loc); while (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED) { - gettimeofday(&now, NULL); - time_to_wait.tv_sec = now.tv_sec + 600; - time_to_wait.tv_nsec = 0; + timespec_now(&time_to_wait); + time_to_wait.tv_sec += 600; pthread_mutex_lock(&defrag->fc_mutex); pthread_cond_timedwait(&defrag->fc_wakeup_cond, &defrag->fc_mutex, @@ -4428,7 +4094,7 @@ gf_defrag_estimates_init(xlator_t *this, loc_t *loc, pthread_t *filecnt_thread) goto out; } - ret = gf_thread_create(filecnt_thread, NULL, &dht_file_counter_thread, + ret = gf_thread_create(filecnt_thread, NULL, dht_file_counter_thread, (void *)defrag, "dhtfcnt"); if (ret) { @@ -4485,7 +4151,7 @@ gf_defrag_parallel_migration_init(xlator_t *this, gf_defrag_info_t *defrag, /*Spawn Threads Here*/ while (index < thread_spawn_count) { - ret = gf_thread_create(&(tid[index]), NULL, &gf_defrag_task, + ret = gf_thread_create(&(tid[index]), NULL, gf_defrag_task, (void *)defrag, "dhtmig%d", (index + 1) & 0x3ff); if (ret != 0) { gf_msg("DHT", GF_LOG_ERROR, ret, 0, "Thread[%d] creation failed. ", @@ -4559,7 +4225,6 @@ gf_defrag_start_crawl(void *data) dict_t *migrate_data = NULL; dict_t *status = NULL; glusterfs_ctx_t *ctx = NULL; - dht_methods_t *methods = NULL; call_frame_t *statfs_frame = NULL; xlator_t *old_THIS = NULL; int ret = -1; @@ -4575,7 +4240,6 @@ gf_defrag_start_crawl(void *data) int thread_index = 0; pthread_t *tid = NULL; pthread_t filecnt_thread; - gf_boolean_t is_tier_detach = _gf_false; gf_boolean_t fc_thread_started = _gf_false; this = data; @@ -4594,7 +4258,8 @@ gf_defrag_start_crawl(void *data) if (!defrag) goto exit; - gettimeofday(&defrag->start_time, NULL); + defrag->start_time = gf_time(); + dht_build_root_inode(this, &defrag->root_inode); if (!defrag->root_inode) goto out; @@ -4728,43 +4393,17 @@ gf_defrag_start_crawl(void *data) } } - if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) { - /* Fix layout for attach tier */ - ret = gf_tier_start_fix_layout(this, &loc, defrag, fix_layout); - if (ret) { - goto out; - } - - methods = &(conf->methods); - - /* Calling tier_start of tier.c */ - methods->migration_other(this, defrag); - if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER || - defrag->cmd == GF_DEFRAG_CMD_DETACH_START) { - ret = dict_set_str(migrate_data, GF_XATTR_FILE_MIGRATE_KEY, - "force"); - if (ret) - goto out; - } - } else { - ret = gf_defrag_fix_layout(this, defrag, &loc, fix_layout, - migrate_data); - if (ret && ret != 2) { - defrag->total_failures++; - ret = -1; - goto out; - } - - if (ret != 2 && - gf_defrag_settle_hash(this, defrag, &loc, fix_layout) != 0) { - defrag->total_failures++; - ret = -1; - goto out; - } + ret = gf_defrag_fix_layout(this, defrag, &loc, fix_layout, migrate_data); + if (ret) { + defrag->total_failures++; + ret = -1; + goto out; + } - if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER || - defrag->cmd == GF_DEFRAG_CMD_DETACH_START) - is_tier_detach = _gf_true; + if (gf_defrag_settle_hash(this, defrag, &loc, fix_layout) != 0) { + defrag->total_failures++; + ret = -1; + goto out; } gf_log("DHT", GF_LOG_INFO, "crawling file-system completed"); @@ -4778,19 +4417,6 @@ out: defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; } - if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) { - /* Wait for the tier fixlayout to - * complete if its was started.*/ - gf_tier_wait_fix_lookup(defrag); - } - - if (is_tier_detach && ret == 0) { - /* If it was a detach remove the tier fix-layout - * xattr on root. Ignoring the failure, as nothing has to be - * done, logging is done in gf_tier_clear_fix_layout */ - gf_tier_clear_fix_layout(this, &loc, defrag); - } - gf_defrag_parallel_migration_cleanup(defrag, tid, thread_index); if ((defrag->defrag_status != GF_DEFRAG_STATUS_STOPPED) && @@ -4889,9 +4515,6 @@ gf_defrag_get_estimates_based_on_size(dht_conf_t *conf) uint64_t total_processed = 0; uint64_t tmp_count = 0; uint64_t time_to_complete = 0; - struct timeval now = { - 0, - }; double elapsed = 0; defrag = conf->defrag; @@ -4899,8 +4522,7 @@ gf_defrag_get_estimates_based_on_size(dht_conf_t *conf) if (!g_totalsize) goto out; - gettimeofday(&now, NULL); - elapsed = now.tv_sec - defrag->start_time.tv_sec; + elapsed = gf_time() - defrag->start_time; /* Don't calculate the estimates for the first 10 minutes. * It is unlikely to be accurate and estimates are not required @@ -4950,13 +4572,8 @@ gf_defrag_status_get(dht_conf_t *conf, dict_t *dict) uint64_t lookup = 0; uint64_t failures = 0; uint64_t skipped = 0; - uint64_t promoted = 0; - uint64_t demoted = 0; char *status = ""; double elapsed = 0; - struct timeval end = { - 0, - }; uint64_t time_to_complete = 0; uint64_t time_left = 0; gf_defrag_info_t *defrag = conf->defrag; @@ -4973,17 +4590,12 @@ gf_defrag_status_get(dht_conf_t *conf, dict_t *dict) lookup = defrag->num_files_lookedup; failures = defrag->total_failures; skipped = defrag->skipped; - promoted = defrag->total_files_promoted; - demoted = defrag->total_files_demoted; - gettimeofday(&end, NULL); - - elapsed = end.tv_sec - defrag->start_time.tv_sec; + elapsed = gf_time() - defrag->start_time; /* The rebalance is still in progress */ - if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) && - (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED)) { + if (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED) { time_to_complete = gf_defrag_get_estimates_based_on_size(conf); if (time_to_complete && (time_to_complete > elapsed)) @@ -4998,14 +4610,6 @@ gf_defrag_status_get(dht_conf_t *conf, dict_t *dict) if (!dict) goto log; - ret = dict_set_uint64(dict, "promoted", promoted); - if (ret) - gf_log(THIS->name, GF_LOG_WARNING, "failed to set promoted count"); - - ret = dict_set_uint64(dict, "demoted", demoted); - if (ret) - gf_log(THIS->name, GF_LOG_WARNING, "failed to set demoted count"); - ret = dict_set_uint64(dict, "files", files); if (ret) gf_log(THIS->name, GF_LOG_WARNING, "failed to set file count"); @@ -5071,159 +4675,6 @@ out: return 0; } -void -gf_defrag_set_pause_state(gf_tier_conf_t *tier_conf, tier_pause_state_t state) -{ - pthread_mutex_lock(&tier_conf->pause_mutex); - tier_conf->pause_state = state; - pthread_mutex_unlock(&tier_conf->pause_mutex); -} - -tier_pause_state_t -gf_defrag_get_pause_state(gf_tier_conf_t *tier_conf) -{ - int state; - - pthread_mutex_lock(&tier_conf->pause_mutex); - state = tier_conf->pause_state; - pthread_mutex_unlock(&tier_conf->pause_mutex); - - return state; -} - -tier_pause_state_t -gf_defrag_check_pause_tier(gf_tier_conf_t *tier_conf) -{ - int woke = 0; - int state = -1; - - pthread_mutex_lock(&tier_conf->pause_mutex); - - if (tier_conf->pause_state == TIER_RUNNING) - goto out; - - if (tier_conf->pause_state == TIER_PAUSED) - goto out; - - if (tier_conf->promote_in_progress || tier_conf->demote_in_progress) - goto out; - - tier_conf->pause_state = TIER_PAUSED; - - if (tier_conf->pause_synctask) { - synctask_wake(tier_conf->pause_synctask); - tier_conf->pause_synctask = 0; - woke = 1; - } - - gf_msg("tier", GF_LOG_DEBUG, 0, DHT_MSG_TIER_PAUSED, "woken %d", woke); - - gf_event(EVENT_TIER_PAUSE, "vol=%s", tier_conf->volname); -out: - state = tier_conf->pause_state; - - pthread_mutex_unlock(&tier_conf->pause_mutex); - - return state; -} - -void -gf_defrag_pause_tier_timeout(void *data) -{ - xlator_t *this = NULL; - dht_conf_t *conf = NULL; - gf_defrag_info_t *defrag = NULL; - - this = (xlator_t *)data; - GF_VALIDATE_OR_GOTO("tier", this, out); - - conf = this->private; - GF_VALIDATE_OR_GOTO(this->name, conf, out); - - defrag = conf->defrag; - GF_VALIDATE_OR_GOTO(this->name, defrag, out); - - gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_TIER_PAUSED, - "Request pause timer timeout"); - - gf_defrag_check_pause_tier(&defrag->tier_conf); - -out: - return; -} - -int -gf_defrag_pause_tier(xlator_t *this, gf_defrag_info_t *defrag) -{ - int ret = 0; - struct timespec delta = { - 0, - }; - int delay = 2; - - if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) - goto out; - - /* - * Set flag requesting to pause tiering. Wait 'delay' seconds for - * tiering to actually stop as indicated by the pause state - * before returning success or failure. - */ - gf_defrag_set_pause_state(&defrag->tier_conf, TIER_REQUEST_PAUSE); - - /* - * If migration is not underway, can pause immediately. - */ - gf_defrag_check_pause_tier(&defrag->tier_conf); - if (gf_defrag_get_pause_state(&defrag->tier_conf) == TIER_PAUSED) - goto out; - - gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_TIER_PAUSED, - "Request pause tier"); - - defrag->tier_conf.pause_synctask = synctask_get(); - delta.tv_sec = delay; - delta.tv_nsec = 0; - defrag->tier_conf.pause_timer = gf_timer_call_after( - this->ctx, delta, gf_defrag_pause_tier_timeout, this); - - synctask_yield(defrag->tier_conf.pause_synctask); - - if (gf_defrag_get_pause_state(&defrag->tier_conf) == TIER_PAUSED) - goto out; - - gf_defrag_set_pause_state(&defrag->tier_conf, TIER_RUNNING); - - ret = -1; -out: - - gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_TIER_PAUSED, - "Pause tiering ret=%d", ret); - - return ret; -} - -int -gf_defrag_resume_tier(xlator_t *this, gf_defrag_info_t *defrag) -{ - gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_TIER_RESUME, - "Pause end. Resume tiering"); - - gf_defrag_set_pause_state(&defrag->tier_conf, TIER_RUNNING); - - gf_event(EVENT_TIER_RESUME, "vol=%s", defrag->tier_conf.volname); - - return 0; -} - -int -gf_defrag_start_detach_tier(gf_defrag_info_t *defrag) -{ - defrag->cmd = GF_DEFRAG_CMD_START_DETACH_TIER; - - return 0; -} - int gf_defrag_stop(dht_conf_t *conf, gf_defrag_status_t status, dict_t *output) { diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c index e7d8028f7f8..3e24065227c 100644 --- a/xlators/cluster/dht/src/dht-selfheal.c +++ b/xlators/cluster/dht/src/dht-selfheal.c @@ -8,9 +8,7 @@ cases as published by the Free Software Foundation. */ -#include "dht-common.h" #include "dht-lock.h" -#include <glusterfs/glusterfs-acl.h> #define DHT_SET_LAYOUT_RANGE(layout, i, srt, chunk, path) \ do { \ @@ -35,7 +33,7 @@ } \ } while (0) -int +static int dht_selfheal_layout_lock(call_frame_t *frame, dht_layout_t *layout, gf_boolean_t newdir, dht_selfheal_layout_t healer, dht_need_heal_t should_heal); @@ -146,8 +144,8 @@ dht_refresh_layout_done(call_frame_t *frame) ret = dht_layout_sort(refreshed); if (ret == -1) { - gf_msg(frame->this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_SORT_FAILED, - "sorting the layout failed"); + gf_smsg(frame->this->name, GF_LOG_WARNING, 0, + DHT_MSG_LAYOUT_SORT_FAILED, NULL); goto err; } @@ -203,10 +201,9 @@ dht_refresh_layout_cbk(call_frame_t *frame, void *cookie, xlator_t *this, if (op_ret == -1) { gf_uuid_unparse(local->loc.gfid, gfid); local->op_errno = op_errno; - gf_msg(this->name, GF_LOG_ERROR, op_errno, - DHT_MSG_FILE_LOOKUP_FAILED, - "lookup of %s on %s returned error, gfid: %s", - local->loc.path, prev->name, gfid); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + DHT_MSG_FILE_LOOKUP_FAILED, "path=%s", local->loc.path, + "name=%s", prev->name, "gfid=%s", gfid, NULL); goto unlock; } @@ -267,9 +264,8 @@ dht_refresh_layout(call_frame_t *frame) conf->subvolume_cnt); if (!local->selfheal.refreshed_layout) { gf_uuid_unparse(local->loc.gfid, gfid); - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, - "mem allocation for layout failed, path:%s gfid:%s", - local->loc.path, gfid); + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MEM_ALLOC_FAILED, + "path=%s", local->loc.path, "gfid=%s", gfid, NULL); goto out; } @@ -281,9 +277,8 @@ dht_refresh_layout(call_frame_t *frame) gf_uuid_unparse(local->loc.gfid, gfid); local->xattr_req = dict_new(); if (local->xattr_req == NULL) { - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, - "dict mem allocation failed, path:%s gfid:%s", - local->loc.path, gfid); + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, + "path=%s", local->loc.path, "gfid=%s", gfid, NULL); goto out; } } @@ -291,9 +286,9 @@ dht_refresh_layout(call_frame_t *frame) if (dict_get(local->xattr_req, conf->xattr_name) == 0) { ret = dict_set_uint32(local->xattr_req, conf->xattr_name, 4 * 4); if (ret) - gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, - "%s: Failed to set dictionary value:key = %s", - local->loc.path, conf->xattr_name); + gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, + "path=%s", local->loc.path, "key=%s", conf->xattr_name, + NULL); } for (i = 0; i < call_cnt; i++) { @@ -526,7 +521,7 @@ out: return fixit; } -int +static int dht_selfheal_layout_lock(call_frame_t *frame, dht_layout_t *layout, gf_boolean_t newdir, dht_selfheal_layout_t healer, dht_need_heal_t should_heal) @@ -558,10 +553,8 @@ dht_selfheal_layout_lock(call_frame_t *frame, dht_layout_t *layout, lk_array = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_char); if (lk_array == NULL) { gf_uuid_unparse(local->stbuf.ia_gfid, gfid); - gf_msg("dht", GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, - "mem allocation failed for " - "lk_array, gfid:%s path: %s", - gfid, local->loc.path); + gf_smsg("dht", GF_LOG_ERROR, ENOMEM, DHT_MSG_MEM_ALLOC_FAILED, + "lk_array-gfid=%s", gfid, "path=%s", local->loc.path, NULL); goto err; } @@ -571,10 +564,9 @@ dht_selfheal_layout_lock(call_frame_t *frame, dht_layout_t *layout, DHT_LAYOUT_HEAL_DOMAIN, NULL, FAIL_ON_ANY_ERROR); if (lk_array[i] == NULL) { gf_uuid_unparse(local->stbuf.ia_gfid, gfid); - gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, - "mem allocation " - "failed for lk_array, gfid:%s path:%s", - gfid, local->loc.path); + gf_smsg(THIS->name, GF_LOG_ERROR, ENOMEM, + DHT_MSG_MEM_ALLOC_FAILED, "lk_array-gfid=%s", gfid, + "path=%s", local->loc.path, NULL); goto err; } } @@ -583,10 +575,8 @@ dht_selfheal_layout_lock(call_frame_t *frame, dht_layout_t *layout, lk_array = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_char); if (lk_array == NULL) { gf_uuid_unparse(local->stbuf.ia_gfid, gfid); - gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, - "mem allocation failed for " - "lk_array, gfid:%s path:%s", - gfid, local->loc.path); + gf_smsg(THIS->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MEM_ALLOC_FAILED, + "lk_array-gfid=%s", gfid, "path=%s", local->loc.path, NULL); goto err; } @@ -595,10 +585,8 @@ dht_selfheal_layout_lock(call_frame_t *frame, dht_layout_t *layout, NULL, FAIL_ON_ANY_ERROR); if (lk_array[0] == NULL) { gf_uuid_unparse(local->stbuf.ia_gfid, gfid); - gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, - "mem allocation failed for " - "lk_array, gfid:%s path:%s", - gfid, local->loc.path); + gf_smsg(THIS->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MEM_ALLOC_FAILED, + "lk_array-gfid=%s", gfid, "path=%s", local->loc.path, NULL); goto err; } } @@ -624,7 +612,7 @@ err: return -1; } -int +static int dht_selfheal_dir_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, dict_t *xdata) { @@ -646,10 +634,9 @@ dht_selfheal_dir_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, err = 0; } else { gf_uuid_unparse(local->loc.gfid, gfid); - gf_msg(this->name, GF_LOG_ERROR, op_errno, - DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, - "layout setxattr failed on %s, path:%s gfid:%s", subvol->name, - local->loc.path, gfid); + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, "name=%s", subvol->name, + "path=%s", local->loc.path, "gfid=%s", gfid, NULL); err = op_errno; } @@ -696,7 +683,7 @@ dht_set_user_xattr(dict_t *dict, char *k, data_t *v, void *data) return ret; } -int +static int dht_selfheal_dir_xattr_persubvol(call_frame_t *frame, loc_t *loc, dht_layout_t *layout, int i, xlator_t *req_subvol) @@ -738,19 +725,17 @@ dht_selfheal_dir_xattr_persubvol(call_frame_t *frame, loc_t *loc, ret = dict_set_str(xdata, GLUSTERFS_INTERNAL_FOP_KEY, "yes"); if (ret < 0) { - gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, - "%s: Failed to set dictionary value: key = %s," - " gfid = %s", - loc->path, GLUSTERFS_INTERNAL_FOP_KEY, gfid); + gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, + "path=%s", loc->path, "key=%s", GLUSTERFS_INTERNAL_FOP_KEY, + "gfid=%s", gfid, NULL); goto err; } ret = dict_set_int8(xdata, DHT_IATT_IN_XDATA_KEY, 1); if (ret < 0) { - gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, - "%s: Failed to set dictionary value: key = %s," - " gfid = %s", - loc->path, DHT_IATT_IN_XDATA_KEY, gfid); + gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, + "path=%s", loc->path, "key=%s", DHT_IATT_IN_XDATA_KEY, + "gfid=%s", gfid, NULL); goto err; } @@ -758,21 +743,21 @@ dht_selfheal_dir_xattr_persubvol(call_frame_t *frame, loc_t *loc, ret = dht_disk_layout_extract(this, layout, i, &disk_layout); if (ret == -1) { - gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, - "Directory self heal xattr failed:" - " %s: (subvol %s) Failed to extract disk layout," - " gfid = %s", - loc->path, subvol->name, gfid); + gf_smsg(this->name, GF_LOG_WARNING, 0, + DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, + "extract-disk-layout-failed, path=%s", loc->path, "subvol=%s", + subvol->name, "gfid=%s", gfid, NULL); goto err; } ret = dict_set_bin(xattr, conf->xattr_name, disk_layout, 4 * 4); if (ret == -1) { - gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, - "Directory self heal xattr failed:" - "%s: (subvol %s) Failed to set xattr dictionary," - " gfid = %s", - loc->path, subvol->name, gfid); + gf_smsg(this->name, GF_LOG_WARNING, 0, + DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, "path=%s", loc->path, + "subvol=%s", subvol->name, + "set-xattr-dictionary-failed" + "gfid=%s", + gfid, NULL); goto err; } disk_layout = NULL; @@ -788,20 +773,17 @@ dht_selfheal_dir_xattr_persubvol(call_frame_t *frame, loc_t *loc, if (data) { ret = dict_add(xattr, QUOTA_LIMIT_KEY, data); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, - "%s: Failed to set dictionary value:" - " key = %s", - loc->path, QUOTA_LIMIT_KEY); + gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, + "path=%s", loc->path, "key=%s", QUOTA_LIMIT_KEY, NULL); } } data = dict_get(local->xattr, QUOTA_LIMIT_OBJECTS_KEY); if (data) { ret = dict_add(xattr, QUOTA_LIMIT_OBJECTS_KEY, data); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, - "%s: Failed to set dictionary value:" - " key = %s", - loc->path, QUOTA_LIMIT_OBJECTS_KEY); + gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, + "path=%s", loc->path, "key=%s", QUOTA_LIMIT_OBJECTS_KEY, + NULL); } } } @@ -830,7 +812,7 @@ err: return 0; } -int +static int dht_fix_dir_xattr(call_frame_t *frame, loc_t *loc, dht_layout_t *layout) { dht_local_t *local = NULL; @@ -879,7 +861,7 @@ out: return 0; } -int +static int dht_selfheal_dir_xattr(call_frame_t *frame, loc_t *loc, dht_layout_t *layout) { dht_local_t *local = NULL; @@ -939,9 +921,8 @@ dht_selfheal_dir_xattr(call_frame_t *frame, loc_t *loc, dht_layout_t *layout) dummy = dht_layout_new(this, 1); if (!dummy) { gf_uuid_unparse(loc->gfid, gfid); - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, - "failed to allocate dummy layout, path:%s gfid:%s", loc->path, - gfid); + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_DUMMY_ALLOC_FAILED, + "path=%s", loc->path, "gfid=%s", gfid, NULL); goto out; } for (i = 0; i < conf->subvolume_cnt && missing_xattr; i++) { @@ -957,38 +938,6 @@ out: return 0; } -gf_boolean_t -dht_is_subvol_part_of_layout(dht_layout_t *layout, xlator_t *xlator) -{ - int i = 0; - gf_boolean_t ret = _gf_false; - - for (i = 0; i < layout->cnt; i++) { - if (!strcmp(layout->list[i].xlator->name, xlator->name)) { - ret = _gf_true; - break; - } - } - - return ret; -} - -int -dht_layout_index_from_conf(dht_layout_t *layout, xlator_t *xlator) -{ - int i = -1; - int j = 0; - - for (j = 0; j < layout->cnt; j++) { - if (!strcmp(layout->list[j].xlator->name, xlator->name)) { - i = j; - break; - } - } - - return i; -} - int dht_selfheal_dir_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, struct iatt *statpre, @@ -1079,7 +1028,7 @@ dht_selfheal_dir_setattr(call_frame_t *frame, loc_t *loc, struct iatt *stbuf, return 0; } -int +static int dht_selfheal_dir_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf, struct iatt *preparent, @@ -1109,11 +1058,10 @@ dht_selfheal_dir_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, if (op_ret) { gf_uuid_unparse(local->loc.gfid, gfid); - gf_msg(this->name, - ((op_errno == EEXIST) ? GF_LOG_DEBUG : GF_LOG_WARNING), op_errno, - DHT_MSG_DIR_SELFHEAL_FAILED, - "Directory selfheal failed: path = %s, gfid = %s", - local->loc.path, gfid); + gf_smsg(this->name, + ((op_errno == EEXIST) ? GF_LOG_DEBUG : GF_LOG_WARNING), + op_errno, DHT_MSG_DIR_SELFHEAL_FAILED, "path=%s", + local->loc.path, "gfid=%s", gfid, NULL); goto out; } dht_iatt_merge(this, &local->preparent, preparent); @@ -1132,89 +1080,7 @@ out: return 0; } -void -dht_selfheal_dir_mkdir_setacl(dict_t *xattr, dict_t *dict) -{ - data_t *acl_default = NULL; - data_t *acl_access = NULL; - xlator_t *this = NULL; - int ret = -1; - - GF_ASSERT(xattr); - GF_ASSERT(dict); - - this = THIS; - GF_ASSERT(this); - - acl_default = dict_get(xattr, POSIX_ACL_DEFAULT_XATTR); - - if (!acl_default) { - gf_msg_debug(this->name, 0, "ACL_DEFAULT xattr not present"); - goto cont; - } - ret = dict_set(dict, POSIX_ACL_DEFAULT_XATTR, acl_default); - if (ret) - gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary value.key = %s", - POSIX_ACL_DEFAULT_XATTR); -cont: - acl_access = dict_get(xattr, POSIX_ACL_ACCESS_XATTR); - if (!acl_access) { - gf_msg_debug(this->name, 0, "ACL_ACCESS xattr not present"); - goto out; - } - ret = dict_set(dict, POSIX_ACL_ACCESS_XATTR, acl_access); - if (ret) - gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary value.key = %s", - POSIX_ACL_ACCESS_XATTR); - -out: - return; -} - -void -dht_selfheal_dir_mkdir_setquota(dict_t *src, dict_t *dst) -{ - data_t *quota_limit_key = NULL; - data_t *quota_limit_obj_key = NULL; - xlator_t *this = NULL; - int ret = -1; - - GF_ASSERT(src); - GF_ASSERT(dst); - - this = THIS; - GF_ASSERT(this); - - quota_limit_key = dict_get(src, QUOTA_LIMIT_KEY); - if (!quota_limit_key) { - gf_msg_debug(this->name, 0, "QUOTA_LIMIT_KEY xattr not present"); - goto cont; - } - ret = dict_set(dst, QUOTA_LIMIT_KEY, quota_limit_key); - if (ret) - gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary value.key = %s", QUOTA_LIMIT_KEY); - -cont: - quota_limit_obj_key = dict_get(src, QUOTA_LIMIT_OBJECTS_KEY); - if (!quota_limit_obj_key) { - gf_msg_debug(this->name, 0, - "QUOTA_LIMIT_OBJECTS_KEY xattr not present"); - goto out; - } - ret = dict_set(dst, QUOTA_LIMIT_OBJECTS_KEY, quota_limit_obj_key); - if (ret) - gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary value.key = %s", - QUOTA_LIMIT_OBJECTS_KEY); - -out: - return; -} - -int +static int dht_selfheal_dir_mkdir_lookup_done(call_frame_t *frame, xlator_t *this) { dht_local_t *local = NULL; @@ -1238,10 +1104,8 @@ dht_selfheal_dir_mkdir_lookup_done(call_frame_t *frame, xlator_t *this) ret = dict_set_gfuuid(dict, "gfid-req", local->gfid, true); if (ret) - gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, - "%s: Failed to set dictionary value:" - " key = gfid-req", - loc->path); + gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, + "path=%s", loc->path, "key=gfid-req", NULL); } else if (local->params) { /* Send the dictionary from higher layers directly */ @@ -1253,18 +1117,15 @@ dht_selfheal_dir_mkdir_lookup_done(call_frame_t *frame, xlator_t *this) dht_dir_set_heal_xattr(this, local, dict, local->xattr, NULL, NULL); if (!dict) { - gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, - "dict is NULL, need to make sure gfids are same"); + gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_IS_NULL, NULL); dict = dict_new(); if (!dict) return -1; } ret = dict_set_flag(dict, GF_INTERNAL_CTX_KEY, GF_DHT_HEAL_DIR); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary value for" - " key = %s at path: %s", - GF_INTERNAL_CTX_KEY, loc->path); + gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, "key=%s", + GF_INTERNAL_CTX_KEY, "path=%s", loc->path, NULL); /* We can still continue. As heal can still happen * unless quota limits have reached for the dir. */ @@ -1296,7 +1157,7 @@ err: return 0; } -int +static int dht_selfheal_dir_mkdir_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf, @@ -1390,7 +1251,7 @@ err: return 0; } -int +static int dht_selfheal_dir_mkdir_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata) @@ -1410,19 +1271,14 @@ dht_selfheal_dir_mkdir_lock_cbk(call_frame_t *frame, void *cookie, local->call_cnt = conf->subvolume_cnt; if (op_ret < 0) { - /* We get this error when the directory entry was not created - * on a newky attached tier subvol. Hence proceed and do mkdir - * on the tier subvol. - */ if (op_errno == EINVAL) { local->call_cnt = 1; dht_selfheal_dir_mkdir_lookup_done(frame, this); return 0; } - gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_ENTRYLK_ERROR, - "acquiring entrylk after inodelk failed for %s", - local->loc.path); + gf_smsg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_ENTRYLK_ERROR, + "path=%s", local->loc.path, NULL); local->op_errno = op_errno; goto err; @@ -1436,10 +1292,8 @@ dht_selfheal_dir_mkdir_lock_cbk(call_frame_t *frame, void *cookie, ret = dict_set_int32(local->xattr_req, "list-xattr", 1); if (ret) - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary key list-xattr value " - " for path %s ", - local->loc.path); + gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, "path=%s", + local->loc.path, NULL); for (i = 0; i < conf->subvolume_cnt; i++) { if (mds_subvol && conf->subvolumes[i] == mds_subvol) { @@ -1462,18 +1316,21 @@ err: return 0; } -int +static int dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout, int force) { int missing_dirs = 0; int i = 0; + int op_errno = 0; int ret = -1; dht_local_t *local = NULL; xlator_t *this = NULL; + dht_conf_t *conf = NULL; local = frame->local; this = frame->this; + conf = this->private; local->selfheal.force_mkdir = force; local->selfheal.hole_cnt = 0; @@ -1490,13 +1347,12 @@ dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout, if (!__is_root_gfid(local->stbuf.ia_gfid)) { if (local->need_xattr_heal) { local->need_xattr_heal = 0; - ret = dht_dir_xattr_heal(this, local); - if (ret) - gf_msg(this->name, GF_LOG_ERROR, ret, - DHT_MSG_DIR_XATTR_HEAL_FAILED, - "%s:xattr heal failed for " - "directory (gfid = %s)", - local->loc.path, local->gfid); + ret = dht_dir_xattr_heal(this, local, &op_errno); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + DHT_MSG_DIR_XATTR_HEAL_FAILED, "path=%s", + local->loc.path, "gfid=%s", local->gfid, NULL); + } } else { if (!gf_uuid_is_null(local->gfid)) gf_uuid_copy(loc->gfid, local->gfid); @@ -1505,28 +1361,53 @@ dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout, if (!ret) return 0; - gf_msg(this->name, GF_LOG_INFO, 0, - DHT_MSG_DIR_XATTR_HEAL_FAILED, - "%s: Failed to set mds xattr " - "for directory (gfid = %s)", - local->loc.path, local->gfid); + gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_SET_XATTR_FAILED, + "path=%s", local->loc.path, "gfid=%s", local->gfid, + NULL); } } dht_selfheal_dir_setattr(frame, loc, &local->stbuf, 0xffffffff, layout); return 0; } - if (local->hashed_subvol == NULL) - local->hashed_subvol = dht_subvol_get_hashed(this, loc); + /* MDS xattr is populated only while DHT is having more than one + subvol.In case of graph switch while adding more dht subvols need to + consider hash subvol as a MDS to avoid MDS check failure at the time + of running fop on directory + */ + if (!dict_get(local->xattr, conf->mds_xattr_key) && + (conf->subvolume_cnt > 1)) { + if (local->hashed_subvol == NULL) { + local->hashed_subvol = dht_subvol_get_hashed(this, loc); + if (local->hashed_subvol == NULL) { + local->op_errno = EINVAL; + gf_smsg(this->name, GF_LOG_WARNING, local->op_errno, + DHT_MSG_HASHED_SUBVOL_GET_FAILED, "gfid=%s", + loc->pargfid, "name=%s", loc->name, "path=%s", + loc->path, NULL); + goto err; + } + } + ret = dht_inode_ctx_mdsvol_set(local->inode, this, + local->hashed_subvol); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED, + "Failed to set hashed subvol for %s on inode vol is %s", + local->loc.path, + local->hashed_subvol ? local->hashed_subvol->name : "NULL"); + goto err; + } + } if (local->hashed_subvol == NULL) { - local->op_errno = EINVAL; - gf_msg(this->name, GF_LOG_WARNING, local->op_errno, - DHT_MSG_HASHED_SUBVOL_GET_FAILED, - "(%s/%s) (path: %s): " - "hashed subvolume not found", - loc->pargfid, loc->name, loc->path); - goto err; + local->hashed_subvol = dht_subvol_get_hashed(this, loc); + if (local->hashed_subvol == NULL) { + local->op_errno = EINVAL; + gf_smsg(this->name, GF_LOG_WARNING, local->op_errno, + DHT_MSG_HASHED_SUBVOL_GET_FAILED, "gfid=%s", loc->pargfid, + "name=%s", loc->name, "path=%s", loc->path, NULL); + goto err; + } } local->current = &local->lock[0]; @@ -1542,7 +1423,7 @@ err: return -1; } -int +static int dht_selfheal_layout_alloc_start(xlator_t *this, loc_t *loc, dht_layout_t *layout) { @@ -1651,8 +1532,6 @@ dht_selfheal_layout_new_directory(call_frame_t *frame, loc_t *loc, dht_layout_t *new_layout); void -dht_layout_entry_swap(dht_layout_t *layout, int i, int j); -void dht_layout_range_swap(dht_layout_t *layout, int i, int j); /* @@ -1661,7 +1540,7 @@ dht_layout_range_swap(dht_layout_t *layout, int i, int j); */ #define OV_ENTRY(x, y) table[x * new->cnt + y] -void +static void dht_selfheal_layout_maximize_overlap(call_frame_t *frame, loc_t *loc, dht_layout_t *new, dht_layout_t *old) { @@ -1738,7 +1617,7 @@ dht_selfheal_layout_maximize_overlap(call_frame_t *frame, loc_t *loc, } } -dht_layout_t * +static dht_layout_t * dht_fix_layout_of_directory(call_frame_t *frame, loc_t *loc, dht_layout_t *layout) { @@ -1763,9 +1642,8 @@ dht_fix_layout_of_directory(call_frame_t *frame, loc_t *loc, new_layout = dht_layout_new(this, priv->subvolume_cnt); if (!new_layout) { gf_uuid_unparse(loc->gfid, gfid); - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, - "mem allocation failed for new_layout, path:%s gfid:%s", - loc->path, gfid); + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MEM_ALLOC_FAILED, + "new_layout, path=%s", loc->path, "gfid=%s", gfid, NULL); goto done; } @@ -1775,10 +1653,9 @@ dht_fix_layout_of_directory(call_frame_t *frame, loc_t *loc, if (subvol_down) { gf_uuid_unparse(loc->gfid, gfid); - gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_FIX_FAILED, - "Layout fix failed: %u subvolume(s) are down" - ". Skipping fix layout. path:%s gfid:%s", - subvol_down, loc->path, gfid); + gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_FIX_FAILED, + "subvol-down=%u", subvol_down, "Skipping-fix-layout", "path=%s", + loc->path, "gfid=%s", gfid, NULL); GF_FREE(new_layout); return NULL; } @@ -1796,10 +1673,10 @@ dht_fix_layout_of_directory(call_frame_t *frame, loc_t *loc, if (priv->du_stats) { for (i = 0; i < priv->subvolume_cnt; ++i) { - gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_SUBVOL_INFO, - "subvolume %d (%s): %u chunks, path:%s", i, - priv->subvolumes[i]->name, priv->du_stats[i].chunks, - loc->path); + gf_smsg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_SUBVOL_INFO, + "index=%d", i, "name=%s", priv->subvolumes[i]->name, + "chunks=%u", priv->du_stats[i].chunks, "path=%s", loc->path, + NULL); /* Maximize overlap if the bricks are all the same * size. @@ -1811,8 +1688,8 @@ dht_fix_layout_of_directory(call_frame_t *frame, loc_t *loc, } } } else { - gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NO_DISK_USAGE_STATUS, - "no du stats ?!?"); + gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NO_DISK_USAGE_STATUS, + NULL); } /* First give it a layout as though it is a new directory. This @@ -1843,7 +1720,7 @@ done: * Having to call this 2x for each entry in the layout is pretty horrible, but * that's what all of this layout-sorting nonsense gets us. */ -uint32_t +static uint32_t dht_get_chunks_from_xl(xlator_t *parent, xlator_t *child) { dht_conf_t *priv = parent->private; @@ -1961,7 +1838,7 @@ done: return; } -int +static int dht_selfheal_dir_getafix(call_frame_t *frame, loc_t *loc, dht_layout_t *layout) { dht_local_t *local = NULL; @@ -2020,9 +1897,8 @@ dht_selfheal_new_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk, linked_inode = inode_link(loc->inode, loc->parent, loc->name, &local->stbuf); if (!linked_inode) { - gf_msg(frame->this->name, GF_LOG_WARNING, 0, - DHT_MSG_DIR_SELFHEAL_FAILED, - "linking inode failed (%s/%s) => %s", pgfid, loc->name, gfid); + gf_smsg(frame->this->name, GF_LOG_WARNING, 0, DHT_MSG_LINK_INODE_FAILED, + "pgfid=%s", pgfid, "name=%s", loc->name, "gfid=%s", gfid, NULL); ret = -1; goto out; } @@ -2094,9 +1970,18 @@ dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk, local->selfheal.dir_cbk = dir_cbk; local->selfheal.layout = dht_layout_ref(this, layout); - if (local->need_attrheal && !IA_ISINVAL(local->mds_stbuf.ia_type)) { - /*Use the one in the mds_stbuf*/ - local->stbuf = local->mds_stbuf; + if (local->need_attrheal) { + if (__is_root_gfid(local->stbuf.ia_gfid)) { + local->stbuf.ia_gid = local->prebuf.ia_gid; + local->stbuf.ia_uid = local->prebuf.ia_uid; + + local->stbuf.ia_ctime = local->prebuf.ia_ctime; + local->stbuf.ia_ctime_nsec = local->prebuf.ia_ctime_nsec; + local->stbuf.ia_prot = local->prebuf.ia_prot; + + } else if (!IA_ISINVAL(local->mds_stbuf.ia_type)) { + local->stbuf = local->mds_stbuf; + } } if (!__is_root_gfid(local->stbuf.ia_gfid)) { @@ -2106,9 +1991,9 @@ dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk, linked_inode = inode_link(loc->inode, loc->parent, loc->name, &local->stbuf); if (!linked_inode) { - gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_SELFHEAL_FAILED, - "linking inode failed (%s/%s) => %s", pgfid, loc->name, - gfid); + gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LINK_INODE_FAILED, + "pgfid=%s", pgfid, "name=%s", loc->name, "gfid=%s", gfid, + NULL); ret = 0; goto sorry_no_fix; } @@ -2134,19 +2019,17 @@ dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk, misc = local->selfheal.misc; if (down) { - gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_SELFHEAL_FAILED, - "%s: Directory selfheal failed: %d subvolumes down." - "Not fixing. gfid = %s", - loc->path, down, gfid); + gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_SELFHEAL_FAILED, + "path=%s", loc->path, "subvol-down=%d", down, "Not-fixing", + "gfid=%s", gfid, NULL); ret = 0; goto sorry_no_fix; } if (misc) { - gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_SELFHEAL_FAILED, - "%s: Directory selfheal failed : %d subvolumes " - "have unrecoverable errors. gfid = %s", - loc->path, misc, gfid); + gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_SELFHEAL_FAILED, + "path=%s", loc->path, "misc=%d", misc, "unrecoverable-errors", + "gfid=%s", gfid, NULL); ret = 0; goto sorry_no_fix; @@ -2232,29 +2115,28 @@ dht_dir_heal_xattrs(void *data) gf_uuid_unparse(local->loc.gfid, gfid); if (!mds_subvol) { - gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_XATTR_HEAL_FAILED, - "No mds subvol for %s gfid = %s", local->loc.path, gfid); + gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NO_MDS_SUBVOL, "path=%s", + local->loc.path, "gfid=%s", gfid, NULL); goto out; } if ((local->loc.inode && gf_uuid_is_null(local->loc.inode->gfid)) || gf_uuid_is_null(local->loc.gfid)) { - gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_XATTR_HEAL_FAILED, - "No gfid present so skip heal for path %s gfid = %s", - local->loc.path, gfid); + gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_GFID_NOT_PRESENT, + "skip-heal path=%s", local->loc.path, "gfid=%s", gfid, NULL); goto out; } internal_xattr = dict_new(); if (!internal_xattr) { - gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, 0, - "dictionary creation failed"); + gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_CREATE_FAILED, + "dictionary", NULL); goto out; } xdata = dict_new(); if (!xdata) { - gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, 0, - "dictionary creation failed"); + gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_CREATE_FAILED, + "dictionary", NULL); goto out; } @@ -2262,18 +2144,17 @@ dht_dir_heal_xattrs(void *data) user_xattr = dict_new(); if (!user_xattr) { - gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, 0, - "dictionary creation failed"); + gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_CREATE_FAILED, + "dictionary", NULL); goto out; } ret = syncop_listxattr(local->mds_subvol, &local->loc, &mds_xattr, NULL, NULL); if (ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_DIR_XATTR_HEAL_FAILED, - "failed to list xattrs for " - "%s: on %s ", - local->loc.path, local->mds_subvol->name); + gf_smsg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LIST_XATTRS_FAILED, + "path=%s", local->loc.path, "name=%s", local->mds_subvol->name, + NULL); } if (!mds_xattr) @@ -2288,10 +2169,9 @@ dht_dir_heal_xattrs(void *data) dict_get(user_xattr, QUOTA_LIMIT_OBJECTS_KEY)) { ret = dict_set_int32(xdata, GLUSTERFS_INTERNAL_FOP_KEY, 1); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary value: key = %s," - " path = %s", - GLUSTERFS_INTERNAL_FOP_KEY, local->loc.path); + gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, + "key=%s", GLUSTERFS_INTERNAL_FOP_KEY, "path=%s", + local->loc.path, NULL); goto out; } } @@ -2303,16 +2183,25 @@ dht_dir_heal_xattrs(void *data) if (subvol == mds_subvol) continue; if (uret || uflag) { + /* Custom xattr heal is required - let posix handle it */ + ret = dict_set_int8(xdata, "sync_backend_xattrs", _gf_true); + if (ret) { + gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, + "path=%s", local->loc.path, "key=%s", + "sync_backend_xattrs", NULL); + goto out; + } + ret = syncop_setxattr(subvol, &local->loc, user_xattr, 0, xdata, NULL); if (ret) { xattr_hashed = 1; - gf_msg(this->name, GF_LOG_ERROR, -ret, - DHT_MSG_DIR_XATTR_HEAL_FAILED, - "Directory xattr heal failed. Failed to set" - "user xattr on path %s on " - "subvol %s, gfid = %s ", - local->loc.path, subvol->name, gfid); + gf_smsg(this->name, GF_LOG_ERROR, -ret, + DHT_MSG_DIR_XATTR_HEAL_FAILED, + "set-user-xattr-failed path=%s", local->loc.path, + "subvol=%s", subvol->name, "gfid=%s", gfid, NULL); + } else { + dict_del(xdata, "sync_backend_xattrs"); } } } @@ -2321,21 +2210,17 @@ dht_dir_heal_xattrs(void *data) ret = dht_dict_set_array(internal_xattr, conf->mds_xattr_key, allzero, 1); if (ret) { - gf_msg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary value:key = %s for " - "path %s", - conf->mds_xattr_key, local->loc.path); + gf_smsg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED, + "key=%s", conf->mds_xattr_key, "path=%s", local->loc.path, + NULL); goto out; } ret = syncop_setxattr(mds_subvol, &local->loc, internal_xattr, 0, NULL, NULL); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, -ret, - DHT_MSG_DIR_XATTR_HEAL_FAILED, - "Failed to reset internal xattr " - "on path %s on subvol %s" - "gfid = %s ", - local->loc.path, mds_subvol->name, gfid); + gf_smsg(this->name, GF_LOG_ERROR, -ret, + DHT_MSG_DIR_XATTR_HEAL_FAILED, "path=%s", local->loc.path, + "subvol=%s", mds_subvol->name, "gfid=%s", gfid, NULL); } } @@ -2386,8 +2271,8 @@ dht_dir_attr_heal(void *data) call_cnt = conf->subvolume_cnt; if (!__is_root_gfid(local->stbuf.ia_gfid) && (!mds_subvol)) { - gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_ATTR_HEAL_FAILED, - "No mds subvol for %s gfid = %s", local->loc.path, gfid); + gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NO_MDS_SUBVOL, "path=%s", + local->loc.path, "gfid=%s", gfid, NULL); goto out; } @@ -2395,11 +2280,9 @@ dht_dir_attr_heal(void *data) for (i = 0; i < conf->subvolume_cnt; i++) { if (conf->subvolumes[i] == mds_subvol) { if (!conf->subvolume_status[i]) { - gf_msg(this->name, GF_LOG_ERROR, 0, - DHT_MSG_HASHED_SUBVOL_DOWN, - "mds subvol is down for path " - " %s gfid is %s Unable to set xattr ", - local->loc.path, gfid); + gf_smsg(this->name, GF_LOG_ERROR, 0, + DHT_MSG_MDS_DOWN_UNABLE_TO_SET, "path=%s", + local->loc.path, "gfid=%s", gfid, NULL); goto out; } } @@ -2425,10 +2308,9 @@ dht_dir_attr_heal(void *data) if (ret) { gf_uuid_unparse(local->loc.gfid, gfid); - gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_DIR_ATTR_HEAL_FAILED, - "Directory attr heal failed. Failed to set" - " uid/gid on path %s on subvol %s, gfid = %s ", - local->loc.path, subvol->name, gfid); + gf_smsg(this->name, GF_LOG_ERROR, -ret, + DHT_MSG_DIR_ATTR_HEAL_FAILED, "path=%s", local->loc.path, + "subvol=%s", subvol->name, "gfid=%s", gfid, NULL); } } out: @@ -2443,7 +2325,7 @@ dht_dir_attr_heal_done(int ret, call_frame_t *sync_frame, void *data) } /* EXIT: dht_update_commit_hash_for_layout */ -int +static int dht_update_commit_hash_for_layout_done(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata) @@ -2463,7 +2345,7 @@ dht_update_commit_hash_for_layout_done(call_frame_t *frame, void *cookie, return 0; } -int +static int dht_update_commit_hash_for_layout_unlock(call_frame_t *frame, xlator_t *this) { dht_local_t *local = NULL; @@ -2481,11 +2363,8 @@ dht_update_commit_hash_for_layout_unlock(call_frame_t *frame, xlator_t *this) local->op_ret = -1; } - gf_msg(this->name, GF_LOG_WARNING, errno, - DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, - "Winding unlock failed: stale locks left on brick" - " %s", - local->loc.path); + gf_smsg(this->name, GF_LOG_WARNING, errno, DHT_MSG_WIND_UNLOCK_FAILED, + "path=%s", local->loc.path, NULL); dht_update_commit_hash_for_layout_done(frame, NULL, this, 0, 0, NULL); } @@ -2493,7 +2372,7 @@ dht_update_commit_hash_for_layout_unlock(call_frame_t *frame, xlator_t *this) return 0; } -int +static int dht_update_commit_hash_for_layout_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, dict_t *xdata) @@ -2520,7 +2399,7 @@ dht_update_commit_hash_for_layout_cbk(call_frame_t *frame, void *cookie, return 0; } -int +static int dht_update_commit_hash_for_layout_resume(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata) @@ -2548,11 +2427,8 @@ dht_update_commit_hash_for_layout_resume(call_frame_t *frame, void *cookie, if (!xattr) { local->op_errno = errno; - gf_msg(this->name, GF_LOG_WARNING, errno, - DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, - "Directory commit hash update failed:" - " %s: Allocation failed", - local->loc.path); + gf_smsg(this->name, GF_LOG_WARNING, errno, DHT_MSG_COMMIT_HASH_FAILED, + "allocation-failed path=%s", local->loc.path, NULL); goto err; } @@ -2563,11 +2439,10 @@ dht_update_commit_hash_for_layout_resume(call_frame_t *frame, void *cookie, if (ret < 0) { local->op_errno = ENOENT; - gf_msg(this->name, GF_LOG_WARNING, 0, - DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, - "Directory commit hash update failed:" - " %s: (subvol %s) Failed to find disk layout", - local->loc.path, conf->local_subvols[i]->name); + gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_COMMIT_HASH_FAILED, + "path=%s", local->loc.path, "subvol=%s", + conf->local_subvols[i]->name, "find-disk-layout-failed", + NULL); goto err; } @@ -2581,12 +2456,10 @@ dht_update_commit_hash_for_layout_resume(call_frame_t *frame, void *cookie, if (ret == -1) { local->op_errno = errno; - gf_msg(this->name, GF_LOG_WARNING, errno, - DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, - "Directory commit hash update failed:" - " %s: (subvol %s) Failed to extract disk" - " layout", - local->loc.path, conf->local_subvols[i]->name); + gf_smsg(this->name, GF_LOG_WARNING, errno, + DHT_MSG_COMMIT_HASH_FAILED, "path=%s", local->loc.path, + "subvol=%s", conf->local_subvols[i]->name, + "extract-disk-layout-failed", NULL); goto err; } @@ -2595,11 +2468,9 @@ dht_update_commit_hash_for_layout_resume(call_frame_t *frame, void *cookie, if (!xattr[i]) { local->op_errno = errno; - gf_msg(this->name, GF_LOG_WARNING, errno, - DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, - "Directory commit hash update failed:" - " %s: Allocation failed", - local->loc.path); + gf_smsg(this->name, GF_LOG_WARNING, errno, + DHT_MSG_COMMIT_HASH_FAILED, "path=%s Allocation-failed", + local->loc.path, NULL); goto err; } @@ -2608,12 +2479,10 @@ dht_update_commit_hash_for_layout_resume(call_frame_t *frame, void *cookie, if (ret != 0) { local->op_errno = ENOMEM; - gf_msg(this->name, GF_LOG_WARNING, 0, - DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, - "Directory self heal xattr failed:" - "%s: (subvol %s) Failed to set xattr" - " dictionary,", - local->loc.path, conf->local_subvols[i]->name); + gf_smsg(this->name, GF_LOG_WARNING, 0, + DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, "path=%s", + local->loc.path, "subvol=%s", conf->local_subvols[i]->name, + "set-xattr-failed", NULL); goto err; } diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c index 0b73121b15a..bb72b0ffbb5 100644 --- a/xlators/cluster/dht/src/dht-shared.c +++ b/xlators/cluster/dht/src/dht-shared.c @@ -17,23 +17,6 @@ #define MAX(a, b) (((a) > (b)) ? (a) : (b)) #endif -#define GF_DECIDE_DEFRAG_THROTTLE_COUNT(throttle_count, conf) \ - { \ - throttle_count = MAX((sysconf(_SC_NPROCESSORS_ONLN) - 4), 4); \ - pthread_mutex_lock(&conf->defrag->dfq_mutex); \ - \ - if (!strcasecmp(conf->dthrottle, "lazy")) \ - conf->defrag->recon_thread_count = 1; \ - \ - else if (!strcasecmp(conf->dthrottle, "normal")) \ - conf->defrag->recon_thread_count = (throttle_count / 2); \ - \ - else if (!strcasecmp(conf->dthrottle, "aggressive")) \ - conf->defrag->recon_thread_count = throttle_count; \ - \ - pthread_mutex_unlock(&conf->defrag->dfq_mutex); \ - } - /* TODO: - use volumename in xattr instead of "dht" - use NS locks @@ -41,9 +24,7 @@ - complete linkfile selfheal */ -extern dht_methods_t dht_methods; - -void +static void dht_layout_dump(dht_layout_t *layout, const char *prefix) { char key[GF_DUMP_MAX_BUF_LEN]; @@ -51,8 +32,6 @@ dht_layout_dump(dht_layout_t *layout, const char *prefix) if (!layout) goto out; - if (!prefix) - goto out; gf_proc_dump_build_key(key, prefix, "cnt"); gf_proc_dump_write(key, "%d", layout->cnt); @@ -161,9 +140,9 @@ dht_priv_dump(xlator_t *this) } } - if (conf->last_stat_fetch.tv_sec) + if (conf->last_stat_fetch) gf_proc_dump_write("last_stat_fetch", "%s", - ctime(&conf->last_stat_fetch.tv_sec)); + ctime(&conf->last_stat_fetch)); UNLOCK(&conf->subvolume_lock); @@ -263,7 +242,7 @@ out: return ret; } -int +static int dht_parse_decommissioned_bricks(xlator_t *this, dht_conf_t *conf, const char *bricks) { @@ -309,14 +288,10 @@ out: return ret; } -int +static void dht_decommissioned_remove(xlator_t *this, dht_conf_t *conf) { int i = 0; - int ret = -1; - - if (!conf) - goto out; for (i = 0; i < conf->subvolume_cnt; i++) { if (conf->decommissioned_bricks[i]) { @@ -324,13 +299,9 @@ dht_decommissioned_remove(xlator_t *this, dht_conf_t *conf) conf->decommission_subvols_cnt--; } } - - ret = 0; -out: - - return ret; } -void + +static void dht_init_regex(xlator_t *this, dict_t *odict, char *name, regex_t *re, gf_boolean_t *re_valid, dht_conf_t *conf) { @@ -387,7 +358,7 @@ out: return ret; } -int +static int dht_configure_throttle(xlator_t *this, dht_conf_t *conf, char *temp_str) { int rebal_thread_count = 0; @@ -526,9 +497,7 @@ dht_reconfigure(xlator_t *this, dict_t *options) if (ret == -1) goto out; } else { - ret = dht_decommissioned_remove(this, conf); - if (ret == -1) - goto out; + dht_decommissioned_remove(this, conf); } dht_init_regex(this, options, "rsync-hash-regex", &conf->rsync_regex, @@ -568,6 +537,8 @@ gf_defrag_pattern_list_fill(xlator_t *this, gf_defrag_info_t *defrag, pattern_str = strtok_r(data, ",", &tmp_str); while (pattern_str) { dup_str = gf_strdup(pattern_str); + if (!dup_str) + goto out; pattern_list = GF_CALLOC(1, sizeof(gf_defrag_pattern_list_t), 1); if (!pattern_list) { goto out; @@ -614,7 +585,7 @@ out: return ret; } -int +static int dht_init_methods(xlator_t *this) { int ret = -1; @@ -627,7 +598,6 @@ dht_init_methods(xlator_t *this) methods = &(conf->methods); methods->migration_get_dst_subvol = dht_migration_get_dst_subvol; - methods->migration_needed = dht_migration_needed; methods->migration_other = NULL; methods->layout_search = dht_layout_search; @@ -1076,84 +1046,6 @@ struct volume_options dht_options[] = { /* NUFA option */ {.key = {"local-volume-name"}, .type = GF_OPTION_TYPE_XLATOR}, - /* tier options */ - { - .key = {"tier-pause"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", - }, - - { - .key = {"tier-promote-frequency"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "120", - }, - - { - .key = {"tier-demote-frequency"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "3600", - }, - - { - .key = {"write-freq-threshold"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "0", - }, - - { - .key = {"read-freq-threshold"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "0", - }, - { - .key = {"watermark-hi"}, - .type = GF_OPTION_TYPE_PERCENT, - .default_value = "90", - }, - { - .key = {"watermark-low"}, - .type = GF_OPTION_TYPE_PERCENT, - .default_value = "75", - }, - { - .key = {"tier-mode"}, - .type = GF_OPTION_TYPE_STR, - .default_value = "test", - }, - { - .key = {"tier-compact"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", - }, - {.key = {"tier-hot-compact-frequency"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "604800", - .description = "Frequency to compact DBs on hot tier in system"}, - {.key = {"tier-cold-compact-frequency"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "604800", - .description = "Frequency to compact DBs on cold tier in system"}, - { - .key = {"tier-max-mb"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "4000", - }, - { - .key = {"tier-max-promote-file-size"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "0", - }, - { - .key = {"tier-max-files"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "10000", - }, - { - .key = {"tier-query-limit"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "100", - }, /* switch option */ {.key = {"pattern.switch.case"}, .type = GF_OPTION_TYPE_ANY}, diff --git a/xlators/cluster/dht/src/nufa.c b/xlators/cluster/dht/src/nufa.c index 59313639c45..3648a564840 100644 --- a/xlators/cluster/dht/src/nufa.c +++ b/xlators/cluster/dht/src/nufa.c @@ -595,7 +595,6 @@ nufa_init(xlator_t *this) dht_methods_t dht_methods = { .migration_get_dst_subvol = dht_migration_get_dst_subvol, - .migration_needed = dht_migration_needed, .layout_search = dht_layout_search, }; diff --git a/xlators/cluster/ec/src/ec-combine.c b/xlators/cluster/ec/src/ec-combine.c index dedcfb076a2..703a30e2485 100644 --- a/xlators/cluster/ec/src/ec-combine.c +++ b/xlators/cluster/ec/src/ec-combine.c @@ -179,13 +179,14 @@ ec_iatt_combine(ec_fop_data_t *fop, struct iatt *dst, struct iatt *src, "links: %u-%u, uid: %u-%u, gid: %u-%u, " "rdev: %" PRIu64 "-%" PRIu64 ", size: %" PRIu64 "-%" PRIu64 ", " - "mode: %o-%o)", + "mode: %o-%o), %s", dst[i].ia_ino, src[i].ia_ino, dst[i].ia_nlink, src[i].ia_nlink, dst[i].ia_uid, src[i].ia_uid, dst[i].ia_gid, src[i].ia_gid, dst[i].ia_rdev, src[i].ia_rdev, dst[i].ia_size, src[i].ia_size, st_mode_from_ia(dst[i].ia_prot, dst[i].ia_type), - st_mode_from_ia(src[i].ia_prot, dst[i].ia_type)); + st_mode_from_ia(src[i].ia_prot, dst[i].ia_type), + ec_msg_str(fop)); return 0; } @@ -342,9 +343,8 @@ out: } static int32_t -ec_dict_data_concat(const char *fmt, ec_cbk_data_t *cbk, int32_t which, - char *key, char *new_key, const char *def, - gf_boolean_t global, ...) +ec_dict_data_concat(ec_cbk_data_t *cbk, int32_t which, char *key, char *new_key, + const char *def, gf_boolean_t global, const char *fmt, ...) { ec_t *ec = cbk->fop->xl->private; data_t *data[ec->nodes]; @@ -356,7 +356,7 @@ ec_dict_data_concat(const char *fmt, ec_cbk_data_t *cbk, int32_t which, ec_dict_list(data, cbk, which, key, global); - va_start(args, global); + va_start(args, fmt); err = ec_concat_prepare(cbk->fop->xl, &pre, &sep, &post, fmt, args); va_end(args); @@ -729,14 +729,14 @@ ec_dict_data_combine(dict_t *dict, char *key, data_t *value, void *arg) if ((strcmp(key, GF_XATTR_PATHINFO_KEY) == 0) || (strcmp(key, GF_XATTR_USER_PATHINFO_KEY) == 0)) { - return ec_dict_data_concat("(<EC:%s> { })", data->cbk, data->which, key, - NULL, NULL, _gf_false, + return ec_dict_data_concat(data->cbk, data->which, key, NULL, NULL, + _gf_false, _gf_false, "(<EC:%s> { })", data->cbk->fop->xl->name); } if (strncmp(key, GF_XATTR_CLRLK_CMD, SLEN(GF_XATTR_CLRLK_CMD)) == 0) { - return ec_dict_data_concat("{\n}", data->cbk, data->which, key, NULL, - NULL, _gf_false); + return ec_dict_data_concat(data->cbk, data->which, key, NULL, NULL, + _gf_false, "{\n}"); } if (strncmp(key, GF_XATTR_LOCKINFO_KEY, SLEN(GF_XATTR_LOCKINFO_KEY)) == 0) { @@ -766,9 +766,9 @@ ec_dict_data_combine(dict_t *dict, char *key, data_t *value, void *arg) if (XATTR_IS_NODE_UUID(key)) { if (data->cbk->fop->int32) { /* List of node uuid is requested */ - return ec_dict_data_concat("{ }", data->cbk, data->which, key, + return ec_dict_data_concat(data->cbk, data->which, key, GF_XATTR_LIST_NODE_UUIDS_KEY, UUID0_STR, - _gf_true); + _gf_true, "{ }"); } else { return ec_dict_data_uuid(data->cbk, data->which, key); } diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c index 9045a336c56..b955efd8c2d 100644 --- a/xlators/cluster/ec/src/ec-common.c +++ b/xlators/cluster/ec/src/ec-common.c @@ -230,7 +230,7 @@ ec_child_next(ec_t *ec, ec_fop_data_t *fop, uint32_t idx) int32_t ec_heal_report(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, uintptr_t mask, uintptr_t good, - uintptr_t bad, dict_t *xdata) + uintptr_t bad, uint32_t pending, dict_t *xdata) { if (op_ret < 0) { gf_msg(this->name, GF_LOG_DEBUG, op_errno, EC_MSG_HEAL_FAIL, @@ -316,17 +316,19 @@ ec_check_status(ec_fop_data_t *fop) } } - gf_msg(fop->xl->name, GF_LOG_WARNING, 0, EC_MSG_OP_FAIL_ON_SUBVOLS, - "Operation failed on %d of %d subvolumes.(up=%s, mask=%s, " - "remaining=%s, good=%s, bad=%s, %s)", - gf_bits_count(ec->xl_up & ~(fop->remaining | fop->good)), ec->nodes, - ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes), - ec_bin(str2, sizeof(str2), fop->mask, ec->nodes), - ec_bin(str3, sizeof(str3), fop->remaining, ec->nodes), - ec_bin(str4, sizeof(str4), fop->good, ec->nodes), - ec_bin(str5, sizeof(str5), ec->xl_up & ~(fop->remaining | fop->good), - ec->nodes), - ec_msg_str(fop)); + gf_msg( + fop->xl->name, GF_LOG_WARNING, 0, EC_MSG_OP_FAIL_ON_SUBVOLS, + "Operation failed on %d of %d subvolumes.(up=%s, mask=%s, " + "remaining=%s, good=%s, bad=%s," + "(Least significant bit represents first client/brick of subvol), %s)", + gf_bits_count(ec->xl_up & ~(fop->remaining | fop->good)), ec->nodes, + ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes), + ec_bin(str2, sizeof(str2), fop->mask, ec->nodes), + ec_bin(str3, sizeof(str3), fop->remaining, ec->nodes), + ec_bin(str4, sizeof(str4), fop->good, ec->nodes), + ec_bin(str5, sizeof(str5), ec->xl_up & ~(fop->remaining | fop->good), + ec->nodes), + ec_msg_str(fop)); if (fop->use_fd) { if (fop->fd != NULL) { ec_fheal(NULL, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, NULL, @@ -614,10 +616,10 @@ ec_msg_str(ec_fop_data_t *fop) loc_t *loc2 = NULL; char gfid1[64] = {0}; char gfid2[64] = {0}; + ec_fop_data_t *parent = fop->parent; if (fop->errstr) return fop->errstr; - if (!fop->use_fd) { loc1 = &fop->loc[0]; loc2 = &fop->loc[1]; @@ -625,23 +627,45 @@ ec_msg_str(ec_fop_data_t *fop) if (fop->id == GF_FOP_RENAME) { gf_asprintf(&fop->errstr, "FOP : '%s' failed on '%s' and '%s' with gfids " - "%s and %s respectively", + "%s and %s respectively. Parent FOP: %s", ec_fop_name(fop->id), loc1->path, loc2->path, uuid_utoa_r(loc1->gfid, gfid1), - uuid_utoa_r(loc2->gfid, gfid2)); + uuid_utoa_r(loc2->gfid, gfid2), + parent ? ec_fop_name(parent->id) : "No Parent"); } else { - gf_asprintf(&fop->errstr, "FOP : '%s' failed on '%s' with gfid %s", - ec_fop_name(fop->id), loc1->path, - uuid_utoa_r(loc1->gfid, gfid1)); + gf_asprintf( + &fop->errstr, + "FOP : '%s' failed on '%s' with gfid %s. Parent FOP: %s", + ec_fop_name(fop->id), loc1->path, + uuid_utoa_r(loc1->gfid, gfid1), + parent ? ec_fop_name(parent->id) : "No Parent"); } } else { - gf_asprintf(&fop->errstr, "FOP : '%s' failed on gfid %s", - ec_fop_name(fop->id), - uuid_utoa_r(fop->fd->inode->gfid, gfid1)); + gf_asprintf( + &fop->errstr, "FOP : '%s' failed on gfid %s. Parent FOP: %s", + ec_fop_name(fop->id), uuid_utoa_r(fop->fd->inode->gfid, gfid1), + parent ? ec_fop_name(parent->id) : "No Parent"); } return fop->errstr; } +static void +ec_log_insufficient_vol(ec_fop_data_t *fop, int32_t have, uint32_t need, + int32_t loglevel) +{ + ec_t *ec = fop->xl->private; + char str1[32], str2[32], str3[32]; + + gf_msg(ec->xl->name, loglevel, 0, EC_MSG_CHILDS_INSUFFICIENT, + "Insufficient available children for this request: " + "Have : %d, Need : %u : Child UP : %s " + "Mask: %s, Healing : %s : %s ", + have, need, ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes), + ec_bin(str2, sizeof(str2), fop->mask, ec->nodes), + ec_bin(str3, sizeof(str3), fop->healing, ec->nodes), + ec_msg_str(fop)); +} + static int32_t ec_child_select(ec_fop_data_t *fop) { @@ -699,11 +723,7 @@ ec_child_select(ec_fop_data_t *fop) ec_trace("SELECT", fop, ""); if ((num < fop->minimum) && (num < ec->fragments)) { - gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_CHILDS_INSUFFICIENT, - "Insufficient available children " - "for this request (have %d, need " - "%d). %s", - num, fop->minimum, ec_msg_str(fop)); + ec_log_insufficient_vol(fop, num, fop->minimum, GF_LOG_ERROR); return 0; } @@ -711,11 +731,7 @@ ec_child_select(ec_fop_data_t *fop) (fop->locks[0].update[EC_DATA_TXN] || fop->locks[0].update[EC_METADATA_TXN])) { if (ec->quorum_count && (num < ec->quorum_count)) { - gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_CHILDS_INSUFFICIENT, - "Insufficient available children " - "for this request (have %d, need " - "%d). %s", - num, ec->quorum_count, ec_msg_str(fop)); + ec_log_insufficient_vol(fop, num, ec->quorum_count, GF_LOG_ERROR); return 0; } } @@ -2240,7 +2256,7 @@ ec_unlocked(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, if (op_ret < 0) { gf_msg(this->name, GF_LOG_WARNING, op_errno, EC_MSG_UNLOCK_FAILED, - "entry/inode unlocking failed (%s)", ec_fop_name(link->fop->id)); + "entry/inode unlocking failed :(%s)", ec_msg_str(link->fop)); } else { ec_trace("UNLOCKED", link->fop, "lock=%p", link->lock); } diff --git a/xlators/cluster/ec/src/ec-dir-read.c b/xlators/cluster/ec/src/ec-dir-read.c index ef6b06fa4dd..f71dcfac293 100644 --- a/xlators/cluster/ec/src/ec-dir-read.c +++ b/xlators/cluster/ec/src/ec-dir-read.c @@ -386,9 +386,16 @@ ec_manager_readdir(ec_fop_data_t *fop, int32_t state) /* Return error if opendir has not been successfully called on * any subvolume. */ ctx = ec_fd_get(fop->fd, fop->xl); - if ((ctx == NULL) || (ctx->open == 0)) { - fop->error = EINVAL; + if (ctx == NULL) { + fop->error = ENOMEM; + } else if (ctx->open == 0) { + fop->error = EBADFD; + } + if (fop->error) { + gf_msg(fop->xl->name, GF_LOG_ERROR, fop->error, + EC_MSG_INVALID_REQUEST, "EC is not winding readdir: %s", + ec_msg_str(fop)); return EC_STATE_REPORT; } diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c index 81f6add5bb0..7d991f04aac 100644 --- a/xlators/cluster/ec/src/ec-heal.c +++ b/xlators/cluster/ec/src/ec-heal.c @@ -70,6 +70,7 @@ struct ec_name_data { char *name; inode_t *parent; default_args_cbk_t *replies; + uint32_t heal_pending; }; static char *ec_ignore_xattrs[] = {GF_SELINUX_XATTR_KEY, QUOTA_SIZE_KEY, NULL}; @@ -994,6 +995,7 @@ ec_set_new_entry_dirty(ec_t *ec, loc_t *loc, struct iatt *ia, ret = -ENOTCONN; goto out; } + out: if (xattr) dict_unref(xattr); @@ -1172,6 +1174,7 @@ ec_create_name(call_frame_t *frame, ec_t *ec, inode_t *parent, char *name, dict_t *xdata = NULL; char *linkname = NULL; ec_config_t config; + /* There should be just one gfid key */ EC_REPLIES_ALLOC(replies, ec->nodes); if (gfid_db->count != 1) { @@ -1416,6 +1419,11 @@ __ec_heal_name(call_frame_t *frame, ec_t *ec, inode_t *parent, char *name, ret = ec_create_name(frame, ec, parent, name, replies, gfid_db, enoent, participants); + if (ret >= 0) { + /* If ec_create_name() succeeded we return 1 to indicate that a new + * file has been created and it will need to be healed. */ + ret = 1; + } out: cluster_replies_wipe(replies, ec->nodes); loc_wipe(&loc); @@ -1493,18 +1501,22 @@ ec_name_heal_handler(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, ret = ec_heal_name(name_data->frame, ec, parent->inode, entry->d_name, name_on); - if (ret < 0) + if (ret < 0) { memset(name_on, 0, ec->nodes); + } else { + name_data->heal_pending += ret; + } for (i = 0; i < ec->nodes; i++) if (name_data->participants[i] && !name_on[i]) name_data->failed_on[i] = 1; + return 0; } int ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode, - unsigned char *participants) + unsigned char *participants, uint32_t *pending) { int i = 0; int j = 0; @@ -1517,7 +1529,7 @@ ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode, name_data.frame = frame; name_data.participants = participants; name_data.failed_on = alloca0(ec->nodes); - ; + name_data.heal_pending = 0; for (i = 0; i < ec->nodes; i++) { if (!participants[i]) @@ -1536,6 +1548,8 @@ ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode, break; } } + *pending += name_data.heal_pending; + loc_wipe(&loc); return ret; } @@ -1543,7 +1557,7 @@ ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode, int __ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode, unsigned char *heal_on, unsigned char *sources, - unsigned char *healed_sinks) + unsigned char *healed_sinks, uint32_t *pending) { unsigned char *locked_on = NULL; unsigned char *output = NULL; @@ -1588,7 +1602,7 @@ unlock: if (sources[i] || healed_sinks[i]) participants[i] = 1; } - ret = ec_heal_names(frame, ec, inode, participants); + ret = ec_heal_names(frame, ec, inode, participants, pending); if (EC_COUNT(participants, ec->nodes) <= ec->fragments) goto out; @@ -1609,7 +1623,8 @@ out: int ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode, - unsigned char *sources, unsigned char *healed_sinks) + unsigned char *sources, unsigned char *healed_sinks, + uint32_t *pending) { unsigned char *locked_on = NULL; unsigned char *up_subvols = NULL; @@ -1640,7 +1655,7 @@ ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode, goto unlock; } ret = __ec_heal_entry(frame, ec, inode, locked_on, sources, - healed_sinks); + healed_sinks, pending); } unlock: cluster_uninodelk(ec->xl_list, locked_on, ec->nodes, replies, output, frame, @@ -1961,14 +1976,14 @@ ec_manager_heal_block(ec_fop_data_t *fop, int32_t state) if (fop->cbks.heal) { fop->cbks.heal(fop->req_frame, fop->data, fop->xl, 0, 0, (heal->good | heal->bad), heal->good, heal->bad, - NULL); + 0, NULL); } return EC_STATE_END; case -EC_STATE_REPORT: if (fop->cbks.heal) { fop->cbks.heal(fop->req_frame, fop->data, fop->xl, -1, - fop->error, 0, 0, 0, NULL); + fop->error, 0, 0, 0, 0, NULL); } return EC_STATE_END; @@ -2005,14 +2020,15 @@ out: if (fop != NULL) { ec_manager(fop, error); } else { - func(frame, heal, this, -1, error, 0, 0, 0, NULL); + func(frame, heal, this, -1, error, 0, 0, 0, 0, NULL); } } int32_t ec_heal_block_done(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, uintptr_t mask, - uintptr_t good, uintptr_t bad, dict_t *xdata) + uintptr_t good, uintptr_t bad, uint32_t pending, + dict_t *xdata) { ec_heal_t *heal = cookie; @@ -2481,6 +2497,58 @@ out: return ret; } +int +ec_heal_purge_stale_index(call_frame_t *frame, ec_t *ec, inode_t *inode) +{ + int i = 0; + int ret = 0; + dict_t **xattr = NULL; + loc_t loc = {0}; + uint64_t dirty_xattr[EC_VERSION_SIZE] = {0}; + unsigned char *on = NULL; + default_args_cbk_t *replies = NULL; + dict_t *dict = NULL; + + /* Allocate the required memory */ + loc.inode = inode_ref(inode); + gf_uuid_copy(loc.gfid, inode->gfid); + on = alloca0(ec->nodes); + EC_REPLIES_ALLOC(replies, ec->nodes); + xattr = GF_CALLOC(ec->nodes, sizeof(*xattr), gf_common_mt_pointer); + if (!xattr) { + ret = -ENOMEM; + goto out; + } + dict = dict_new(); + if (!dict) { + ret = -ENOMEM; + goto out; + } + for (i = 0; i < ec->nodes; i++) { + xattr[i] = dict; + on[i] = 1; + } + ret = dict_set_static_bin(dict, EC_XATTR_DIRTY, dirty_xattr, + (sizeof(*dirty_xattr) * EC_VERSION_SIZE)); + if (ret < 0) { + ret = -ENOMEM; + goto out; + } + PARALLEL_FOP_ONLIST(ec->xl_list, on, ec->nodes, replies, frame, + ec_wind_xattrop_parallel, &loc, GF_XATTROP_ADD_ARRAY64, + xattr, NULL); +out: + if (dict) { + dict_unref(dict); + } + if (xattr) { + GF_FREE(xattr); + } + cluster_replies_wipe(replies, ec->nodes); + loc_wipe(&loc); + return ret; +} + void ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) { @@ -2498,6 +2566,7 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) intptr_t mbad = 0; intptr_t good = 0; intptr_t bad = 0; + uint32_t pending = 0; ec_fop_data_t *fop = data; gf_boolean_t blocking = _gf_false; ec_heal_need_t need_heal = EC_HEAL_NONEED; @@ -2533,7 +2602,7 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) if (loc->name && strlen(loc->name)) { ret = ec_heal_name(frame, ec, loc->parent, (char *)loc->name, participants); - if (ret == 0) { + if (ret >= 0) { gf_msg_debug(this->name, 0, "%s: name heal " "successful on %" PRIXPTR, @@ -2551,23 +2620,34 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) /* Mount triggers heal only when it detects that it must need heal, shd * triggers heals periodically which need not be thorough*/ - if (ec->shd.iamshd) { + if (ec->shd.iamshd && (ret <= 0)) { ec_heal_inspect(frame, ec, loc->inode, up_subvols, _gf_false, _gf_false, &need_heal); - if (need_heal == EC_HEAL_NONEED) { + if (need_heal == EC_HEAL_PURGE_INDEX) { + gf_msg(ec->xl->name, GF_LOG_INFO, 0, EC_MSG_HEAL_FAIL, + "Index entry needs to be purged for: %s ", + uuid_utoa(loc->gfid)); + /* We need to send zero-xattrop so that stale index entry could be + * removed. We need not take lock on this entry to do so as + * xattrop on a brick is atomic. */ + ec_heal_purge_stale_index(frame, ec, loc->inode); + goto out; + } else if (need_heal == EC_HEAL_NONEED) { gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL, "Heal is not required for : %s ", uuid_utoa(loc->gfid)); goto out; } } + sources = alloca0(ec->nodes); healed_sinks = alloca0(ec->nodes); if (IA_ISREG(loc->inode->ia_type)) { ret = ec_heal_data(frame, ec, blocking, loc->inode, sources, healed_sinks); } else if (IA_ISDIR(loc->inode->ia_type) && !partial) { - ret = ec_heal_entry(frame, ec, loc->inode, sources, healed_sinks); + ret = ec_heal_entry(frame, ec, loc->inode, sources, healed_sinks, + &pending); } else { ret = 0; memcpy(sources, participants, ec->nodes); @@ -2597,10 +2677,11 @@ out: if (fop->cbks.heal) { fop->cbks.heal(fop->req_frame, fop->data, fop->xl, op_ret, op_errno, ec_char_array_to_mask(participants, ec->nodes), - mgood & good, mbad & bad, NULL); + mgood & good, mbad & bad, pending, NULL); } if (frame) STACK_DESTROY(frame->root); + return; } @@ -2648,7 +2729,7 @@ ec_heal_fail(ec_t *ec, ec_fop_data_t *fop) { if (fop->cbks.heal) { fop->cbks.heal(fop->req_frame, fop->data, ec->xl, -1, fop->error, 0, 0, - 0, NULL); + 0, 0, NULL); } ec_fop_data_release(fop); } @@ -2835,7 +2916,7 @@ fail: if (fop) ec_fop_data_release(fop); if (func) - func(frame, data, this, -1, err, 0, 0, 0, NULL); + func(frame, data, this, -1, err, 0, 0, 0, 0, NULL); } int @@ -2964,6 +3045,13 @@ _need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources, goto out; } } + /* If lock count is 0, all dirty flags are 0 and all the + * versions are macthing then why are we here. It looks + * like something went wrong while removing the index entries + * after completing a successful heal or fop. In this case + * we need to remove this index entry to avoid triggering heal + * in a loop and causing lookups again and again*/ + *need_heal = EC_HEAL_PURGE_INDEX; } else { for (i = 0; i < ec->nodes; i++) { /* Since each lock can only increment the dirty diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c index 956e73c2088..5c1586bc9c5 100644 --- a/xlators/cluster/ec/src/ec-heald.c +++ b/xlators/cluster/ec/src/ec-heald.c @@ -62,7 +62,7 @@ __ec_shd_healer_wait(struct subvol_healer *healer) ec = healer->this->private; disabled_loop: - wait_till.tv_sec = time(NULL) + ec->shd.timeout; + wait_till.tv_sec = gf_time() + ec->shd.timeout; while (!healer->rerun) { ret = pthread_cond_timedwait(&healer->cond, &healer->mutex, &wait_till); @@ -156,19 +156,78 @@ ec_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name) return ret; } +static gf_boolean_t +ec_is_heal_completed(char *status) +{ + char *bad_pos = NULL; + char *zero_pos = NULL; + + if (!status) { + return _gf_false; + } + + /*Logic: + * Status will be of the form Good: <binary>, Bad: <binary> + * If heal completes, if we do strchr for '0' it should be present after + * 'Bad:' i.e. strRchr for ':' + * */ + + zero_pos = strchr(status, '0'); + bad_pos = strrchr(status, ':'); + if (!zero_pos || !bad_pos) { + /*malformed status*/ + return _gf_false; + } + + if (zero_pos > bad_pos) { + return _gf_true; + } + + return _gf_false; +} + int ec_shd_selfheal(struct subvol_healer *healer, int child, loc_t *loc, gf_boolean_t full) { + dict_t *xdata = NULL; + dict_t *dict = NULL; + uint32_t count; int32_t ret; + char *heal_status = NULL; + ec_t *ec = healer->this->private; + + GF_ATOMIC_INC(ec->stats.shd.attempted); + ret = syncop_getxattr(healer->this, loc, &dict, EC_XATTR_HEAL, NULL, + &xdata); + if (ret == 0) { + if (dict && (dict_get_str(dict, EC_XATTR_HEAL, &heal_status) == 0)) { + if (ec_is_heal_completed(heal_status)) { + GF_ATOMIC_INC(ec->stats.shd.completed); + } + } + } - ret = syncop_getxattr(healer->this, loc, NULL, EC_XATTR_HEAL, NULL, NULL); - if (!full && (ret >= 0) && (loc->inode->ia_type == IA_IFDIR)) { + if (!full && (loc->inode->ia_type == IA_IFDIR)) { /* If we have just healed a directory, it's possible that - * other index entries have appeared to be healed. We put a - * mark so that we can check it later and restart a scan - * without delay. */ - healer->rerun = _gf_true; + * other index entries have appeared to be healed. */ + if ((xdata != NULL) && + (dict_get_uint32(xdata, EC_XATTR_HEAL_NEW, &count) == 0) && + (count > 0)) { + /* Force a rerun of the index healer. */ + gf_msg_debug(healer->this->name, 0, "%d more entries to heal", + count); + + healer->rerun = _gf_true; + } + } + + if (xdata != NULL) { + dict_unref(xdata); + } + + if (dict) { + dict_unref(dict); } return ret; diff --git a/xlators/cluster/ec/src/ec-inode-read.c b/xlators/cluster/ec/src/ec-inode-read.c index a891ccd0952..dad5f4d7018 100644 --- a/xlators/cluster/ec/src/ec-inode-read.c +++ b/xlators/cluster/ec/src/ec-inode-read.c @@ -390,7 +390,8 @@ ec_manager_getxattr(ec_fop_data_t *fop, int32_t state) int32_t ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl, int32_t op_ret, int32_t op_errno, uintptr_t mask, - uintptr_t good, uintptr_t bad, dict_t *xdata) + uintptr_t good, uintptr_t bad, uint32_t pending, + dict_t *xdata) { fop_getxattr_cbk_t func = cookie; ec_t *ec = xl->private; @@ -398,6 +399,25 @@ ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl, char *str; char bin1[65], bin2[65]; + /* We try to return the 'pending' information in xdata, but if this cannot + * be set, we will ignore it silently. We prefer to report the success or + * failure of the heal itself. */ + if (xdata == NULL) { + xdata = dict_new(); + } else { + dict_ref(xdata); + } + if (xdata != NULL) { + if (dict_set_uint32(xdata, EC_XATTR_HEAL_NEW, pending) != 0) { + /* dict_set_uint32() is marked as 'warn_unused_result' and gcc + * enforces to check the result in this case. However we don't + * really care if it succeeded or not. We'll just do the same. + * + * This empty 'if' avoids the warning, and it will be removed by + * the optimizer. */ + } + } + if (op_ret >= 0) { dict = dict_new(); if (dict == NULL) { @@ -431,11 +451,14 @@ ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl, } out: - func(frame, NULL, xl, op_ret, op_errno, dict, NULL); + func(frame, NULL, xl, op_ret, op_errno, dict, xdata); if (dict != NULL) { dict_unref(dict); } + if (xdata != NULL) { + dict_unref(xdata); + } return 0; } diff --git a/xlators/cluster/ec/src/ec-locks.c b/xlators/cluster/ec/src/ec-locks.c index 8e84977d2b3..601960d6154 100644 --- a/xlators/cluster/ec/src/ec-locks.c +++ b/xlators/cluster/ec/src/ec-locks.c @@ -24,9 +24,36 @@ ec_lock_check(ec_fop_data_t *fop, uintptr_t *mask) ec_t *ec = fop->xl->private; ec_cbk_data_t *ans = NULL; ec_cbk_data_t *cbk = NULL; - uintptr_t locked = 0, notlocked = 0; + uintptr_t locked = 0; + int32_t good = 0; + int32_t eagain = 0; + int32_t estale = 0; int32_t error = -1; + /* There are some errors that we'll handle in an special way while trying + * to acquire a lock. + * + * EAGAIN: If it's found during a parallel non-blocking lock request, we + * consider that there's contention on the inode, so we consider + * the acquisition a failure and try again with a sequential + * blocking lock request. This will ensure that we get a lock on + * as many bricks as possible (ignoring EAGAIN here would cause + * unnecessary triggers of self-healing). + * + * If it's found during a sequential blocking lock request, it's + * considered an error. Lock will only succeed if there are + * enough other bricks locked. + * + * ESTALE: This can appear during parallel or sequential lock request if + * the inode has just been unlinked. We consider this error is + * not recoverable, but we also don't consider it as fatal. So, + * if it happens during parallel lock, we won't attempt a + * sequential one unless there are EAGAIN errors on other + * bricks (and are enough to form a quorum), but if we reach + * quorum counting the ESTALE bricks, we consider the whole + * result of the operation is ESTALE instead of EIO. + */ + list_for_each_entry(ans, &fop->cbk_list, list) { if (ans->op_ret >= 0) { @@ -34,24 +61,23 @@ ec_lock_check(ec_fop_data_t *fop, uintptr_t *mask) error = EIO; } locked |= ans->mask; + good = ans->count; cbk = ans; - } else { - if (ans->op_errno == EAGAIN) { - switch (fop->uint32) { - case EC_LOCK_MODE_NONE: - case EC_LOCK_MODE_ALL: - /* Goal is to treat non-blocking lock as failure - * even if there is a single EAGAIN*/ - notlocked |= ans->mask; - break; - } - } + } else if (ans->op_errno == ESTALE) { + estale += ans->count; + } else if ((ans->op_errno == EAGAIN) && + (fop->uint32 != EC_LOCK_MODE_INC)) { + eagain += ans->count; } } if (error == -1) { - if (gf_bits_count(locked | notlocked) >= ec->fragments) { - if (notlocked == 0) { + /* If we have enough quorum with succeeded and EAGAIN answers, we + * ignore for now any ESTALE answer. If there are EAGAIN answers, + * we retry with a sequential blocking lock request if needed. + * Otherwise we succeed. */ + if ((good + eagain) >= ec->fragments) { + if (eagain == 0) { if (fop->answer == NULL) { fop->answer = cbk; } @@ -64,21 +90,28 @@ ec_lock_check(ec_fop_data_t *fop, uintptr_t *mask) case EC_LOCK_MODE_NONE: error = EAGAIN; break; - case EC_LOCK_MODE_ALL: fop->uint32 = EC_LOCK_MODE_INC; break; - default: + /* This shouldn't happen because eagain cannot be > 0 + * when fop->uint32 is EC_LOCK_MODE_INC. */ error = EIO; break; } } } else { - if (fop->answer && fop->answer->op_ret < 0) + /* We have been unable to find enough candidates that will be able + * to take the lock. If we have quorum on some answer, we return + * it. Otherwise we check if ESTALE answers allow us to reach + * quorum. If so, we return ESTALE. */ + if (fop->answer && fop->answer->op_ret < 0) { error = fop->answer->op_errno; - else + } else if ((good + eagain + estale) >= ec->fragments) { + error = ESTALE; + } else { error = EIO; + } } } diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h index 7829b8c27b3..de9b89bb2c9 100644 --- a/xlators/cluster/ec/src/ec-types.h +++ b/xlators/cluster/ec/src/ec-types.h @@ -130,7 +130,12 @@ typedef void (*ec_resume_f)(ec_fop_data_t *, int32_t); enum _ec_read_policy { EC_ROUND_ROBIN, EC_GFID_HASH, EC_READ_POLICY_MAX }; -enum _ec_heal_need { EC_HEAL_NONEED, EC_HEAL_MAYBE, EC_HEAL_MUST }; +enum _ec_heal_need { + EC_HEAL_NONEED, + EC_HEAL_MAYBE, + EC_HEAL_MUST, + EC_HEAL_PURGE_INDEX +}; enum _ec_stripe_part { EC_STRIPE_HEAD, EC_STRIPE_TAIL }; @@ -186,10 +191,10 @@ struct _ec_inode { typedef int32_t (*fop_heal_cbk_t)(call_frame_t *, void *, xlator_t *, int32_t, int32_t, uintptr_t, uintptr_t, uintptr_t, - dict_t *); + uint32_t, dict_t *); typedef int32_t (*fop_fheal_cbk_t)(call_frame_t *, void *, xlator_t *, int32_t, int32_t, uintptr_t, uintptr_t, uintptr_t, - dict_t *); + uint32_t, dict_t *); union _ec_cbk { fop_access_cbk_t access; @@ -621,6 +626,11 @@ struct _ec_statistics { requests. (Basically memory allocation errors). */ } stripe_cache; + struct { + gf_atomic_t attempted; /*Number of heals attempted on + files/directories*/ + gf_atomic_t completed; /*Number of heals complted on files/directories*/ + } shd; }; struct _ec { diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c index 66b4e634911..7344be4968d 100644 --- a/xlators/cluster/ec/src/ec.c +++ b/xlators/cluster/ec/src/ec.c @@ -325,13 +325,18 @@ ec_get_event_from_state(ec_t *ec) void ec_up(xlator_t *this, ec_t *ec) { + char str1[32], str2[32]; + if (ec->timer != NULL) { gf_timer_call_cancel(this->ctx, ec->timer); ec->timer = NULL; } ec->up = 1; - gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_UP, "Going UP"); + gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_UP, + "Going UP : Child UP = %s Child Notify = %s", + ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes), + ec_bin(str2, sizeof(str2), ec->xl_notify, ec->nodes)); gf_event(EVENT_EC_MIN_BRICKS_UP, "subvol=%s", this->name); } @@ -339,13 +344,18 @@ ec_up(xlator_t *this, ec_t *ec) void ec_down(xlator_t *this, ec_t *ec) { + char str1[32], str2[32]; + if (ec->timer != NULL) { gf_timer_call_cancel(this->ctx, ec->timer); ec->timer = NULL; } ec->up = 0; - gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_DOWN, "Going DOWN"); + gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_DOWN, + "Going DOWN : Child UP = %s Child Notify = %s", + ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes), + ec_bin(str2, sizeof(str2), ec->xl_notify, ec->nodes)); gf_event(EVENT_EC_MIN_BRICKS_NOT_UP, "subvol=%s", this->name); } @@ -700,6 +710,8 @@ ec_statistics_init(ec_t *ec) GF_ATOMIC_INIT(ec->stats.stripe_cache.evicts, 0); GF_ATOMIC_INIT(ec->stats.stripe_cache.allocs, 0); GF_ATOMIC_INIT(ec->stats.stripe_cache.errors, 0); + GF_ATOMIC_INIT(ec->stats.shd.attempted, 0); + GF_ATOMIC_INIT(ec->stats.shd.completed, 0); } static int @@ -1569,6 +1581,10 @@ ec_dump_private(xlator_t *this) GF_ATOMIC_GET(ec->stats.stripe_cache.allocs)); gf_proc_dump_write("errors", "%" GF_PRI_ATOMIC, GF_ATOMIC_GET(ec->stats.stripe_cache.errors)); + gf_proc_dump_write("heals-attempted", "%" GF_PRI_ATOMIC, + GF_ATOMIC_GET(ec->stats.shd.attempted)); + gf_proc_dump_write("heals-completed", "%" GF_PRI_ATOMIC, + GF_ATOMIC_GET(ec->stats.shd.completed)); return 0; } diff --git a/xlators/cluster/ec/src/ec.h b/xlators/cluster/ec/src/ec.h index 1b210d9adc1..6f6de6d5981 100644 --- a/xlators/cluster/ec/src/ec.h +++ b/xlators/cluster/ec/src/ec.h @@ -18,6 +18,7 @@ #define EC_XATTR_SIZE EC_XATTR_PREFIX "size" #define EC_XATTR_VERSION EC_XATTR_PREFIX "version" #define EC_XATTR_HEAL EC_XATTR_PREFIX "heal" +#define EC_XATTR_HEAL_NEW EC_XATTR_PREFIX "heal-new" #define EC_XATTR_DIRTY EC_XATTR_PREFIX "dirty" #define EC_STRIPE_CACHE_MAX_SIZE 10 #define EC_VERSION_SIZE 2 |