summaryrefslogtreecommitdiffstats
path: root/xlators/cluster
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/cluster')
-rw-r--r--xlators/cluster/afr/src/afr-common.c317
-rw-r--r--xlators/cluster/afr/src/afr-dir-read.c15
-rw-r--r--xlators/cluster/afr/src/afr-dir-write.c14
-rw-r--r--xlators/cluster/afr/src/afr-inode-write.c11
-rw-r--r--xlators/cluster/afr/src/afr-open.c8
-rw-r--r--xlators/cluster/afr/src/afr-read-txn.c10
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c199
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c22
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c211
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-metadata.c62
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-name.c41
-rw-r--r--xlators/cluster/afr/src/afr-self-heal.h14
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.c193
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.h2
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c152
-rw-r--r--xlators/cluster/afr/src/afr.c49
-rw-r--r--xlators/cluster/afr/src/afr.h29
-rw-r--r--xlators/cluster/dht/src/dht-common.c415
-rw-r--r--xlators/cluster/dht/src/dht-common.h173
-rw-r--r--xlators/cluster/dht/src/dht-diskusage.c12
-rw-r--r--xlators/cluster/dht/src/dht-hashfn.c3
-rw-r--r--xlators/cluster/dht/src/dht-helper.c176
-rw-r--r--xlators/cluster/dht/src/dht-inode-read.c50
-rw-r--r--xlators/cluster/dht/src/dht-inode-write.c56
-rw-r--r--xlators/cluster/dht/src/dht-layout.c84
-rw-r--r--xlators/cluster/dht/src/dht-linkfile.c50
-rw-r--r--xlators/cluster/dht/src/dht-lock.c190
-rw-r--r--xlators/cluster/dht/src/dht-mem-types.h3
-rw-r--r--xlators/cluster/dht/src/dht-messages.h320
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c1153
-rw-r--r--xlators/cluster/dht/src/dht-selfheal.c583
-rw-r--r--xlators/cluster/dht/src/dht-shared.c132
-rw-r--r--xlators/cluster/dht/src/nufa.c1
-rw-r--r--xlators/cluster/ec/src/ec-combine.c24
-rw-r--r--xlators/cluster/ec/src/ec-common.c80
-rw-r--r--xlators/cluster/ec/src/ec-dir-read.c11
-rw-r--r--xlators/cluster/ec/src/ec-heal.c124
-rw-r--r--xlators/cluster/ec/src/ec-heald.c73
-rw-r--r--xlators/cluster/ec/src/ec-inode-read.c27
-rw-r--r--xlators/cluster/ec/src/ec-locks.c69
-rw-r--r--xlators/cluster/ec/src/ec-types.h16
-rw-r--r--xlators/cluster/ec/src/ec.c20
-rw-r--r--xlators/cluster/ec/src/ec.h1
43 files changed, 2722 insertions, 2473 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index a2f0b2a..032ab5c 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -45,7 +45,42 @@ afr_quorum_errno(afr_private_t *priv)
return ENOTCONN;
}
-static void
+gf_boolean_t
+afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name,
+ pid_t pid)
+{
+ if (!__is_root_gfid(pargfid)) {
+ return _gf_false;
+ }
+
+ if (strcmp(name, GF_REPLICATE_TRASH_DIR) == 0) {
+ /*For backward compatibility /.landfill is private*/
+ return _gf_true;
+ }
+
+ if (pid == GF_CLIENT_PID_GSYNCD) {
+ /*geo-rep needs to create/sync private directory on slave because
+ * it appears in changelog*/
+ return _gf_false;
+ }
+
+ if (pid == GF_CLIENT_PID_GLFS_HEAL || pid == GF_CLIENT_PID_SELF_HEALD) {
+ if (strcmp(name, priv->anon_inode_name) == 0) {
+ /* anonymous-inode dir is private*/
+ return _gf_true;
+ }
+ } else {
+ if (strncmp(name, AFR_ANON_DIR_PREFIX, strlen(AFR_ANON_DIR_PREFIX)) ==
+ 0) {
+ /* anonymous-inode dir prefix is private for geo-rep to work*/
+ return _gf_true;
+ }
+ }
+
+ return _gf_false;
+}
+
+void
afr_fill_success_replies(afr_local_t *local, afr_private_t *priv,
unsigned char *replies)
{
@@ -885,7 +920,7 @@ __afr_set_in_flight_sb_status(xlator_t *this, afr_local_t *local,
metadatamap |= (1 << index);
}
if (metadatamap_old != metadatamap) {
- event = 0;
+ __afr_inode_need_refresh_set(inode, this);
}
break;
@@ -898,7 +933,7 @@ __afr_set_in_flight_sb_status(xlator_t *this, afr_local_t *local,
datamap |= (1 << index);
}
if (datamap_old != datamap)
- event = 0;
+ __afr_inode_need_refresh_set(inode, this);
break;
default:
@@ -1062,34 +1097,6 @@ out:
}
int
-__afr_inode_event_gen_reset_small(inode_t *inode, xlator_t *this)
-{
- int ret = -1;
- uint16_t datamap = 0;
- uint16_t metadatamap = 0;
- uint32_t event = 0;
- uint64_t val = 0;
- afr_inode_ctx_t *ctx = NULL;
-
- ret = __afr_inode_ctx_get(this, inode, &ctx);
- if (ret)
- return ret;
-
- val = ctx->read_subvol;
-
- metadatamap = (val & 0x000000000000ffff) >> 0;
- datamap = (val & 0x00000000ffff0000) >> 16;
- event = 0;
-
- val = ((uint64_t)metadatamap) | (((uint64_t)datamap) << 16) |
- (((uint64_t)event) << 32);
-
- ctx->read_subvol = val;
-
- return ret;
-}
-
-int
__afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data,
unsigned char *metadata, int *event_p)
{
@@ -1160,22 +1167,6 @@ out:
}
int
-__afr_inode_event_gen_reset(inode_t *inode, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- int ret = -1;
-
- priv = this->private;
-
- if (priv->child_count <= 16)
- ret = __afr_inode_event_gen_reset_small(inode, this);
- else
- ret = -1;
-
- return ret;
-}
-
-int
afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data,
unsigned char *metadata, int *event_p)
{
@@ -1241,12 +1232,11 @@ afr_inode_get_readable(call_frame_t *frame, inode_t *inode, xlator_t *this,
return 0;
}
-int
+static int
afr_inode_split_brain_choice_get(inode_t *inode, xlator_t *this,
int *spb_choice)
{
int ret = -1;
-
GF_VALIDATE_OR_GOTO(this->name, inode, out);
LOCK(&inode->lock);
@@ -1258,6 +1248,40 @@ out:
return ret;
}
+/*
+ * frame is used to get the favourite policy. Since
+ * afr_inode_split_brain_choice_get was called with afr_open, it is possible to
+ * have a frame with out local->replies. So in that case, frame is passed as
+ * null, hence this function will handle the frame NULL case.
+ */
+int
+afr_split_brain_read_subvol_get(inode_t *inode, xlator_t *this,
+ call_frame_t *frame, int *spb_subvol)
+{
+ int ret = -1;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO("afr", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+ GF_VALIDATE_OR_GOTO(this->name, spb_subvol, out);
+
+ priv = this->private;
+
+ ret = afr_inode_split_brain_choice_get(inode, this, spb_subvol);
+ if (*spb_subvol < 0 && priv->fav_child_policy && frame && frame->local) {
+ local = frame->local;
+ *spb_subvol = afr_sh_get_fav_by_policy(this, local->replies, inode,
+ NULL);
+ if (*spb_subvol >= 0) {
+ ret = 0;
+ }
+ }
+
+out:
+ return ret;
+}
int
afr_inode_read_subvol_set(inode_t *inode, xlator_t *this, unsigned char *data,
unsigned char *metadata, int event)
@@ -1324,30 +1348,22 @@ out:
return need_refresh;
}
-static int
-afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
+int
+__afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
{
int ret = -1;
afr_inode_ctx_t *ctx = NULL;
- GF_VALIDATE_OR_GOTO(this->name, inode, out);
-
- LOCK(&inode->lock);
- {
- ret = __afr_inode_ctx_get(this, inode, &ctx);
- if (ret)
- goto unlock;
-
+ ret = __afr_inode_ctx_get(this, inode, &ctx);
+ if (ret == 0) {
ctx->need_refresh = _gf_true;
}
-unlock:
- UNLOCK(&inode->lock);
-out:
+
return ret;
}
int
-afr_inode_event_gen_reset(inode_t *inode, xlator_t *this)
+afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
{
int ret = -1;
@@ -1355,7 +1371,7 @@ afr_inode_event_gen_reset(inode_t *inode, xlator_t *this)
LOCK(&inode->lock);
{
- ret = __afr_inode_event_gen_reset(inode, this);
+ ret = __afr_inode_need_refresh_set(inode, this);
}
UNLOCK(&inode->lock);
out:
@@ -1790,7 +1806,7 @@ afr_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err)
ret = afr_inode_get_readable(frame, inode, this, local->readable,
&event_generation, local->transaction.type);
- if (ret == -EIO || (local->is_read_txn && !event_generation)) {
+ if (ret == -EIO) {
/* No readable subvolume even after refresh ==> splitbrain.*/
if (!priv->fav_child_policy) {
err = EIO;
@@ -2290,8 +2306,9 @@ afr_hash_child(afr_read_subvol_args_t *args, afr_private_t *priv,
* need is a low probability that multiple clients
* won't converge on the same subvolume.
*/
+ gf_uuid_copy(gfid_copy, args->gfid);
pid = getpid();
- memcpy(gfid_copy, &pid, sizeof(pid));
+ *(pid_t *)gfid_copy ^= pid;
}
child = SuperFastHash((char *)gfid_copy, sizeof(gfid_copy)) %
priv->child_count;
@@ -2875,7 +2892,7 @@ afr_attempt_readsubvol_set(call_frame_t *frame, xlator_t *this,
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int spb_choice = -1;
+ int spb_subvol = -1;
int child_count = -1;
if (*read_subvol != -1)
@@ -2885,10 +2902,10 @@ afr_attempt_readsubvol_set(call_frame_t *frame, xlator_t *this,
local = frame->local;
child_count = priv->child_count;
- afr_inode_split_brain_choice_get(local->inode, this, &spb_choice);
- if ((spb_choice >= 0) &&
+ afr_split_brain_read_subvol_get(local->inode, this, frame, &spb_subvol);
+ if ((spb_subvol >= 0) &&
(AFR_COUNT(success_replies, child_count) == child_count)) {
- *read_subvol = spb_choice;
+ *read_subvol = spb_subvol;
} else if (!priv->quorum_count ||
frame->root->pid == GF_CLIENT_PID_GLFS_HEAL) {
*read_subvol = afr_first_up_child(frame, this);
@@ -2929,6 +2946,7 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
0,
};
gf_boolean_t locked_entry = _gf_false;
+ gf_boolean_t in_flight_create = _gf_false;
gf_boolean_t can_interpret = _gf_true;
inode_t *parent = NULL;
ia_type_t ia_type = IA_INVAL;
@@ -2972,17 +2990,12 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
if (!replies[i].valid)
continue;
- if (locked_entry && replies[i].op_ret == -1 &&
- replies[i].op_errno == ENOENT) {
- /* Second, check entry is still
- "underway" in creation */
- local->op_ret = -1;
- local->op_errno = ENOENT;
- goto error;
- }
-
- if (replies[i].op_ret == -1)
+ if (replies[i].op_ret == -1) {
+ if (locked_entry && replies[i].op_errno == ENOENT) {
+ in_flight_create = _gf_true;
+ }
continue;
+ }
if (read_subvol == -1 || !readable[read_subvol]) {
read_subvol = i;
@@ -2992,6 +3005,12 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
}
}
+ if (in_flight_create && !afr_has_quorum(success_replies, this, NULL)) {
+ local->op_ret = -1;
+ local->op_errno = ENOENT;
+ goto error;
+ }
+
if (read_subvol == -1)
goto error;
/* We now have a read_subvol, which is readable[] (if there
@@ -3050,7 +3069,7 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
if (read_subvol == -1)
goto cant_interpret;
if (ret) {
- afr_inode_event_gen_reset(local->inode, this);
+ afr_inode_need_refresh_set(local->inode, this);
dict_del_sizen(local->replies[read_subvol].xdata, GF_CONTENT_KEY);
}
} else {
@@ -3103,7 +3122,7 @@ error:
* others in that they must be given higher priority while
* returning to the user.
*
- * The hierarchy is ENODATA > ENOENT > ESTALE > others
+ * The hierarchy is ENODATA > ENOENT > ESTALE > ENOSPC others
*/
int
@@ -3115,6 +3134,8 @@ afr_higher_errno(int32_t old_errno, int32_t new_errno)
return ENOENT;
if (old_errno == ESTALE || new_errno == ESTALE)
return ESTALE;
+ if (old_errno == ENOSPC || new_errno == ENOSPC)
+ return ENOSPC;
return new_errno;
}
@@ -3606,6 +3627,7 @@ afr_discover_unwind(call_frame_t *frame, xlator_t *this)
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
int read_subvol = -1;
+ int ret = 0;
unsigned char *data_readable = NULL;
unsigned char *success_replies = NULL;
@@ -3627,7 +3649,10 @@ afr_discover_unwind(call_frame_t *frame, xlator_t *this)
if (!afr_has_quorum(success_replies, this, frame))
goto unwind;
- afr_replies_interpret(frame, this, local->inode, NULL);
+ ret = afr_replies_interpret(frame, this, local->inode, NULL);
+ if (ret) {
+ afr_inode_need_refresh_set(local->inode, this);
+ }
read_subvol = afr_read_subvol_decide(local->inode, this, NULL,
data_readable);
@@ -3679,7 +3704,7 @@ afr_ta_id_file_check(void *opaque)
this = opaque;
priv = this->private;
- ret = afr_fill_ta_loc(this, &loc);
+ ret = afr_fill_ta_loc(this, &loc, _gf_false);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
"Failed to populate thin-arbiter loc for: %s.", loc.name);
@@ -3888,11 +3913,7 @@ afr_discover(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
afr_read_subvol_get(loc->inode, this, NULL, NULL, &event,
AFR_DATA_TRANSACTION, NULL);
- if (afr_is_inode_refresh_reqd(loc->inode, this, event,
- local->event_generation))
- afr_inode_refresh(frame, this, loc->inode, NULL, afr_discover_do);
- else
- afr_discover_do(frame, this, 0);
+ afr_discover_do(frame, this, 0);
return 0;
out:
@@ -3993,11 +4014,10 @@ afr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
return 0;
}
- if (__is_root_gfid(loc->parent->gfid)) {
- if (!strcmp(loc->name, GF_REPLICATE_TRASH_DIR)) {
- op_errno = EPERM;
- goto out;
- }
+ if (afr_is_private_directory(this->private, loc->parent->gfid, loc->name,
+ frame->root->pid)) {
+ op_errno = EPERM;
+ goto out;
}
local = AFR_FRAME_INIT(frame, op_errno);
@@ -4033,11 +4053,7 @@ afr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
afr_read_subvol_get(loc->parent, this, NULL, NULL, &event,
AFR_DATA_TRANSACTION, NULL);
- if (afr_is_inode_refresh_reqd(loc->inode, this, event,
- local->event_generation))
- afr_inode_refresh(frame, this, loc->parent, NULL, afr_lookup_do);
- else
- afr_lookup_do(frame, this, 0);
+ afr_lookup_do(frame, this, 0);
return 0;
out:
@@ -5665,6 +5681,8 @@ afr_priv_dump(xlator_t *this)
GF_ATOMIC_GET(priv->pending_reads[i]));
sprintf(key, "child_latency[%d]", i);
gf_proc_dump_write(key, "%" PRId64, priv->child_latency[i]);
+ sprintf(key, "halo_child_up[%d]", i);
+ gf_proc_dump_write(key, "%d", priv->halo_child_up[i]);
}
gf_proc_dump_write("data_self_heal", "%d", priv->data_self_heal);
gf_proc_dump_write("metadata_self_heal", "%d", priv->metadata_self_heal);
@@ -5677,6 +5695,7 @@ afr_priv_dump(xlator_t *this)
priv->background_self_heal_count);
gf_proc_dump_write("healers", "%d", priv->healers);
gf_proc_dump_write("read-hash-mode", "%d", priv->hash_mode);
+ gf_proc_dump_write("use-anonymous-inode", "%d", priv->use_anon_inode);
if (priv->quorum_count == AFR_QUORUM_AUTO) {
gf_proc_dump_write("quorum-type", "auto");
} else if (priv->quorum_count == 0) {
@@ -5737,13 +5756,31 @@ __afr_get_up_children_count(afr_private_t *priv)
return up_children;
}
+static int
+__get_heard_from_all_status(xlator_t *this)
+{
+ afr_private_t *priv = this->private;
+ int i;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!priv->last_event[i]) {
+ return 0;
+ }
+ }
+ if (priv->thin_arbiter_count && !priv->ta_child_up) {
+ return 0;
+ }
+ return 1;
+}
+
glusterfs_event_t
-__afr_transform_event_from_state(afr_private_t *priv)
+__afr_transform_event_from_state(xlator_t *this)
{
int i = 0;
int up_children = 0;
+ afr_private_t *priv = this->private;
- if (AFR_COUNT(priv->last_event, priv->child_count) == priv->child_count)
+ if (__get_heard_from_all_status(this))
/* have_heard_from_all. Let afr_notify() do the propagation. */
return GF_EVENT_MAXVAL;
@@ -5785,7 +5822,7 @@ afr_notify_cbk(void *data)
goto unlock;
}
priv->timer = NULL;
- event = __afr_transform_event_from_state(priv);
+ event = __afr_transform_event_from_state(this);
if (event != GF_EVENT_MAXVAL)
propagate = _gf_true;
}
@@ -5813,20 +5850,6 @@ __afr_launch_notify_timer(xlator_t *this, afr_private_t *priv)
}
static int
-__get_heard_from_all_status(xlator_t *this)
-{
- afr_private_t *priv = this->private;
- int i;
-
- for (i = 0; i < priv->child_count; i++) {
- if (!priv->last_event[i]) {
- return 0;
- }
- }
- return 1;
-}
-
-static int
find_best_down_child(xlator_t *this)
{
afr_private_t *priv = NULL;
@@ -5837,7 +5860,7 @@ find_best_down_child(xlator_t *this)
priv = this->private;
for (i = 0; i < priv->child_count; i++) {
- if (priv->child_up[i] && priv->child_latency[i] >= 0 &&
+ if (!priv->child_up[i] && priv->child_latency[i] >= 0 &&
priv->child_latency[i] < best_latency) {
best_child = i;
best_latency = priv->child_latency[i];
@@ -5909,7 +5932,9 @@ __afr_handle_ping_event(xlator_t *this, xlator_t *child_xlator, const int idx,
"), "
"marking child down.",
child_latency_msec, halo_max_latency_msec);
- *event = GF_EVENT_CHILD_DOWN;
+ if (priv->halo_child_up[idx]) {
+ *event = GF_EVENT_CHILD_DOWN;
+ }
}
} else if (child_latency_msec < halo_max_latency_msec &&
priv->child_up[idx] == 0) {
@@ -5921,7 +5946,9 @@ __afr_handle_ping_event(xlator_t *this, xlator_t *child_xlator, const int idx,
"), "
"marking child up.",
child_latency_msec, halo_max_latency_msec);
- *event = GF_EVENT_CHILD_UP;
+ if (priv->halo_child_up[idx]) {
+ *event = GF_EVENT_CHILD_UP;
+ }
} else {
gf_log(child_xlator->name, GF_LOG_INFO,
"Not marking child %d up, "
@@ -5988,7 +6015,10 @@ __afr_handle_child_up_event(xlator_t *this, xlator_t *child_xlator,
if (child_latency_msec < 0) {
/*set to INT64_MAX-1 so that it is found for best_down_child*/
- priv->child_latency[idx] = AFR_HALO_MAX_LATENCY;
+ priv->halo_child_up[idx] = 1;
+ if (priv->child_latency[idx] < 0) {
+ priv->child_latency[idx] = AFR_HALO_MAX_LATENCY;
+ }
}
/*
@@ -6077,6 +6107,7 @@ __afr_handle_child_down_event(xlator_t *this, xlator_t *child_xlator, int idx,
*/
if (child_latency_msec < 0) {
priv->child_latency[idx] = child_latency_msec;
+ priv->halo_child_up[idx] = 0;
}
priv->child_up[idx] = 0;
@@ -6641,6 +6672,8 @@ afr_priv_destroy(afr_private_t *priv)
if (!priv)
goto out;
+
+ GF_FREE(priv->sh_domain);
GF_FREE(priv->last_event);
child_count = priv->child_count;
@@ -6656,7 +6689,9 @@ afr_priv_destroy(afr_private_t *priv)
GF_FREE(priv->local);
GF_FREE(priv->pending_key);
GF_FREE(priv->children);
+ GF_FREE(priv->anon_inode);
GF_FREE(priv->child_up);
+ GF_FREE(priv->halo_child_up);
GF_FREE(priv->child_latency);
LOCK_DESTROY(&priv->lock);
@@ -6794,8 +6829,8 @@ afr_is_dirty_count_non_unary(xlator_t *this, struct afr_reply *replies,
static int
afr_update_heal_status(xlator_t *this, struct afr_reply *replies,
- char *index_vgfid, ia_type_t ia_type, gf_boolean_t *esh,
- gf_boolean_t *dsh, gf_boolean_t *msh)
+ ia_type_t ia_type, gf_boolean_t *esh, gf_boolean_t *dsh,
+ gf_boolean_t *msh, unsigned char pending)
{
int ret = -1;
GF_UNUSED int ret1 = 0;
@@ -6825,14 +6860,7 @@ afr_update_heal_status(xlator_t *this, struct afr_reply *replies,
}
}
- if (!strcmp(index_vgfid, GF_XATTROP_INDEX_GFID)) {
- if (shd_domain_lk_count) {
- ret = -EAGAIN; /*For 'possibly-healing'. */
- } else {
- ret = 0; /*needs heal. Just set a non -ve value so that it is
- assumed as the source index.*/
- }
- } else if (!strcmp(index_vgfid, GF_XATTROP_DIRTY_GFID)) {
+ if (!pending) {
if ((afr_is_dirty_count_non_unary(this, replies, ia_type)) ||
(!io_domain_lk_count)) {
/* Needs heal. */
@@ -6841,6 +6869,13 @@ afr_update_heal_status(xlator_t *this, struct afr_reply *replies,
/* No heal needed. */
*dsh = *esh = *msh = 0;
}
+ } else {
+ if (shd_domain_lk_count) {
+ ret = -EAGAIN; /*For 'possibly-healing'. */
+ } else {
+ ret = 0; /*needs heal. Just set a non -ve value so that it is
+ assumed as the source index.*/
+ }
}
return ret;
}
@@ -6848,8 +6883,8 @@ afr_update_heal_status(xlator_t *this, struct afr_reply *replies,
/*return EIO, EAGAIN or pending*/
int
afr_lockless_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid,
- inode_t **inode, char *index_vgfid,
- gf_boolean_t *entry_selfheal, gf_boolean_t *data_selfheal,
+ inode_t **inode, gf_boolean_t *entry_selfheal,
+ gf_boolean_t *data_selfheal,
gf_boolean_t *metadata_selfheal, unsigned char *pending)
{
int ret = -1;
@@ -6908,8 +6943,8 @@ afr_lockless_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid,
goto out;
}
- ret = afr_update_heal_status(this, replies, index_vgfid, (*inode)->ia_type,
- &esh, &dsh, &msh);
+ ret = afr_update_heal_status(this, replies, (*inode)->ia_type, &esh, &dsh,
+ &msh, *pending);
out:
*data_selfheal = dsh;
*entry_selfheal = esh;
@@ -6934,14 +6969,6 @@ afr_get_heal_info(call_frame_t *frame, xlator_t *this, loc_t *loc)
char *status = NULL;
call_frame_t *heal_frame = NULL;
afr_local_t *heal_local = NULL;
- afr_local_t *local = NULL;
- char *index_vgfid = NULL;
-
- local = frame->local;
- if (dict_get_str(local->xdata_req, "index-vgfid", &index_vgfid)) {
- ret = -1;
- goto out;
- }
/*Use frame with lk-owner set*/
heal_frame = afr_frame_create(frame->this, &op_errno);
@@ -6952,7 +6979,7 @@ afr_get_heal_info(call_frame_t *frame, xlator_t *this, loc_t *loc)
heal_local = heal_frame->local;
heal_frame->local = frame->local;
- ret = afr_lockless_inspect(heal_frame, this, loc->gfid, &inode, index_vgfid,
+ ret = afr_lockless_inspect(heal_frame, this, loc->gfid, &inode,
&entry_selfheal, &data_selfheal,
&metadata_selfheal, &pending);
@@ -7427,7 +7454,7 @@ afr_fav_child_reset_sink_xattrs(void *opaque)
ret = afr_selfheal_inodelk(heal_frame, this, inode, this->name, 0, 0,
locked_on);
{
- if (ret < AFR_SH_MIN_PARTICIPANTS)
+ if (ret < priv->child_count)
goto data_unlock;
ret = __afr_selfheal_data_prepare(
heal_frame, this, inode, locked_on, sources, sinks,
@@ -7444,7 +7471,7 @@ afr_fav_child_reset_sink_xattrs(void *opaque)
ret = afr_selfheal_inodelk(heal_frame, this, inode, this->name,
LLONG_MAX - 1, 0, locked_on);
{
- if (ret < AFR_SH_MIN_PARTICIPANTS)
+ if (ret < priv->child_count)
goto mdata_unlock;
ret = __afr_selfheal_metadata_prepare(
heal_frame, this, inode, locked_on, sources, sinks,
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
index 74f71fd..f8bf834 100644
--- a/xlators/cluster/afr/src/afr-dir-read.c
+++ b/xlators/cluster/afr/src/afr-dir-read.c
@@ -67,7 +67,8 @@ afr_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
}
int
-afr_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
+afr_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
@@ -163,8 +164,8 @@ afr_validate_read_subvol(inode_t *inode, xlator_t *this, int par_read_subvol)
}
static void
-afr_readdir_transform_entries(gf_dirent_t *subvol_entries, int subvol,
- gf_dirent_t *entries, fd_t *fd)
+afr_readdir_transform_entries(call_frame_t *frame, gf_dirent_t *subvol_entries,
+ int subvol, gf_dirent_t *entries, fd_t *fd)
{
int ret = -1;
gf_dirent_t *entry = NULL;
@@ -182,8 +183,8 @@ afr_readdir_transform_entries(gf_dirent_t *subvol_entries, int subvol,
list_for_each_entry_safe(entry, tmp, &subvol_entries->list, list)
{
- if (__is_root_gfid(fd->inode->gfid) &&
- !strcmp(entry->d_name, GF_REPLICATE_TRASH_DIR)) {
+ if (afr_is_private_directory(priv, fd->inode->gfid, entry->d_name,
+ frame->root->pid)) {
continue;
}
@@ -227,8 +228,8 @@ afr_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
}
if (op_ret >= 0)
- afr_readdir_transform_entries(subvol_entries, (long)cookie, &entries,
- local->fd);
+ afr_readdir_transform_entries(frame, subvol_entries, (long)cookie,
+ &entries, local->fd);
AFR_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, xdata);
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
index e96b7d0..b7cceb7 100644
--- a/xlators/cluster/afr/src/afr-dir-write.c
+++ b/xlators/cluster/afr/src/afr-dir-write.c
@@ -119,11 +119,11 @@ __afr_dir_write_finalize(call_frame_t *frame, xlator_t *this)
continue;
if (local->replies[i].op_ret < 0) {
if (local->inode)
- afr_inode_event_gen_reset(local->inode, this);
+ afr_inode_need_refresh_set(local->inode, this);
if (local->parent)
- afr_inode_event_gen_reset(local->parent, this);
+ afr_inode_need_refresh_set(local->parent, this);
if (local->parent2)
- afr_inode_event_gen_reset(local->parent2, this);
+ afr_inode_need_refresh_set(local->parent2, this);
continue;
}
@@ -345,6 +345,7 @@ afr_mark_entry_pending_changelog(call_frame_t *frame, xlator_t *this)
afr_private_t *priv = NULL;
int pre_op_count = 0;
int failed_count = 0;
+ unsigned char *success_replies = NULL;
local = frame->local;
priv = this->private;
@@ -360,9 +361,16 @@ afr_mark_entry_pending_changelog(call_frame_t *frame, xlator_t *this)
failed_count = AFR_COUNT(local->transaction.failed_subvols,
priv->child_count);
+ /* FOP succeeded on all bricks. */
if (pre_op_count == priv->child_count && !failed_count)
return;
+ /* FOP did not suceed on quorum no. of bricks. */
+ success_replies = alloca0(priv->child_count);
+ afr_fill_success_replies(local, priv, success_replies);
+ if (!afr_has_quorum(success_replies, this, NULL))
+ return;
+
if (priv->thin_arbiter_count) {
/*Mark new entry using ta file*/
local->is_new_entry = _gf_true;
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
index c01b413..1d6e4f3 100644
--- a/xlators/cluster/afr/src/afr-inode-write.c
+++ b/xlators/cluster/afr/src/afr-inode-write.c
@@ -2506,6 +2506,7 @@ afr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
call_frame_t *transaction_frame = NULL;
int ret = -1;
int32_t op_errno = ENOMEM;
+ int8_t last_fsync = 0;
AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
transaction_frame = copy_frame(frame);
@@ -2516,10 +2517,16 @@ afr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
if (!local)
goto out;
- if (xdata)
+ if (xdata) {
local->xdata_req = dict_copy_with_ref(xdata, NULL);
- else
+ if (dict_get_int8(xdata, "last-fsync", &last_fsync) == 0) {
+ if (last_fsync) {
+ local->transaction.disable_delayed_post_op = _gf_true;
+ }
+ }
+ } else {
local->xdata_req = dict_new();
+ }
if (!local->xdata_req)
goto out;
diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c
index a5b004f..6485604 100644
--- a/xlators/cluster/afr/src/afr-open.c
+++ b/xlators/cluster/afr/src/afr-open.c
@@ -137,7 +137,7 @@ afr_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int spb_choice = 0;
+ int spb_subvol = 0;
int event_generation = 0;
int ret = 0;
int32_t op_errno = 0;
@@ -179,9 +179,9 @@ afr_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
ret = afr_inode_get_readable(frame, local->inode, this, NULL,
&event_generation, AFR_DATA_TRANSACTION);
if ((ret < 0) &&
- (afr_inode_split_brain_choice_get(local->inode, this, &spb_choice) ==
- 0) &&
- spb_choice < 0) {
+ (afr_split_brain_read_subvol_get(local->inode, this, NULL,
+ &spb_subvol) == 0) &&
+ spb_subvol < 0) {
afr_inode_refresh(frame, this, local->inode, local->inode->gfid,
afr_open_continue);
} else {
diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c
index 772b59f..6fc2c75 100644
--- a/xlators/cluster/afr/src/afr-read-txn.c
+++ b/xlators/cluster/afr/src/afr-read-txn.c
@@ -164,7 +164,7 @@ afr_ta_read_txn(void *opaque)
xdata_rsp = NULL;
/* It doesn't. So query thin-arbiter to see if it blames any data brick. */
- ret = afr_fill_ta_loc(this, &loc);
+ ret = afr_fill_ta_loc(this, &loc, _gf_true);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
"Failed to populate thin-arbiter loc for: %s.", loc.name);
@@ -272,7 +272,7 @@ afr_read_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err)
int read_subvol = -1;
inode_t *inode = NULL;
int ret = -1;
- int spb_choice = -1;
+ int spb_subvol = -1;
local = frame->local;
inode = local->inode;
@@ -303,9 +303,9 @@ afr_read_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err)
local->read_attempted[read_subvol] = 1;
readfn:
if (read_subvol == -1) {
- ret = afr_inode_split_brain_choice_get(inode, this, &spb_choice);
- if ((ret == 0) && spb_choice >= 0)
- read_subvol = spb_choice;
+ ret = afr_split_brain_read_subvol_get(inode, this, frame, &spb_subvol);
+ if ((ret == 0) && spb_subvol >= 0)
+ read_subvol = spb_subvol;
}
if (read_subvol == -1) {
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 16f19e7..a580a15 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -140,7 +140,7 @@ heal:
}
}
out:
- if (gfid_idx && (*gfid_idx == -1) && (ret == 0)) {
+ if (gfid_idx && (*gfid_idx == -1) && (ret == 0) && local) {
ret = -afr_final_errno(local, priv);
}
loc_wipe(&loc);
@@ -1575,7 +1575,6 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this,
char *accused = NULL; /* Accused others without any self-accusal */
char *pending = NULL; /* Have pending operations on others */
char *self_accused = NULL; /* Accused itself */
- int min_participants = -1;
priv = this->private;
@@ -1599,12 +1598,7 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this,
}
}
- if (type == AFR_DATA_TRANSACTION || type == AFR_METADATA_TRANSACTION) {
- min_participants = priv->child_count;
- } else {
- min_participants = AFR_SH_MIN_PARTICIPANTS;
- }
- if (afr_success_count(replies, priv->child_count) < min_participants) {
+ if (afr_success_count(replies, priv->child_count) < priv->child_count) {
/* Treat this just like locks not being acquired */
return -ENOTCONN;
}
@@ -1911,17 +1905,16 @@ int
afr_selfheal_unlocked_discover(call_frame_t *frame, inode_t *inode, uuid_t gfid,
struct afr_reply *replies)
{
- afr_private_t *priv = NULL;
afr_local_t *local = NULL;
dict_t *dict = NULL;
- priv = frame->this->private;
local = frame->local;
- if (local && local->xattr_req)
+
+ if (local->xattr_req)
dict = local->xattr_req;
return afr_selfheal_unlocked_discover_on(frame, inode, gfid, replies,
- priv->child_up, dict);
+ local->child_up, dict);
}
unsigned int
@@ -2757,3 +2750,185 @@ afr_choose_source_by_policy(afr_private_t *priv, unsigned char *sources,
out:
return source;
}
+
+static int
+afr_anon_inode_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ afr_local_t *local = frame->local;
+ int i = (long)cookie;
+
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
+ if (op_ret == 0) {
+ local->op_ret = 0;
+ local->replies[i].poststat = *buf;
+ local->replies[i].preparent = *preparent;
+ local->replies[i].postparent = *postparent;
+ }
+ if (xdata) {
+ local->replies[i].xdata = dict_ref(xdata);
+ }
+
+ syncbarrier_wake(&local->barrier);
+ return 0;
+}
+
+int
+afr_anon_inode_create(xlator_t *this, int child, inode_t **linked_inode)
+{
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = this->private;
+ unsigned char *mkdir_on = alloca0(priv->child_count);
+ unsigned char *lookup_on = alloca0(priv->child_count);
+ loc_t loc = {0};
+ int32_t op_errno = 0;
+ int32_t child_op_errno = 0;
+ struct iatt iatt = {0};
+ dict_t *xdata = NULL;
+ uuid_t anon_inode_gfid = {0};
+ int mkdir_count = 0;
+ int i = 0;
+
+ /*Try to mkdir everywhere and return success if the dir exists on 'child'
+ */
+
+ if (!priv->use_anon_inode) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ frame = afr_frame_create(this, &op_errno);
+ if (op_errno) {
+ goto out;
+ }
+ local = frame->local;
+ if (!local->child_up[child]) {
+ /*Other bricks may need mkdir so don't error out yet*/
+ child_op_errno = ENOTCONN;
+ }
+ gf_uuid_parse(priv->anon_gfid_str, anon_inode_gfid);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->child_up[i])
+ continue;
+
+ if (priv->anon_inode[i]) {
+ mkdir_on[i] = 0;
+ } else {
+ mkdir_on[i] = 1;
+ mkdir_count++;
+ }
+ }
+
+ if (mkdir_count == 0) {
+ *linked_inode = inode_find(this->itable, anon_inode_gfid);
+ if (*linked_inode) {
+ op_errno = 0;
+ goto out;
+ }
+ }
+
+ loc.parent = inode_ref(this->itable->root);
+ loc.name = priv->anon_inode_name;
+ loc.inode = inode_new(this->itable);
+ if (!loc.inode) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ xdata = dict_new();
+ if (!xdata) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ op_errno = -dict_set_gfuuid(xdata, "gfid-req", anon_inode_gfid, _gf_true);
+ if (op_errno) {
+ goto out;
+ }
+
+ if (mkdir_count == 0) {
+ memcpy(lookup_on, local->child_up, priv->child_count);
+ goto lookup;
+ }
+
+ AFR_ONLIST(mkdir_on, frame, afr_anon_inode_mkdir_cbk, mkdir, &loc, 0755, 0,
+ xdata);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!mkdir_on[i]) {
+ continue;
+ }
+
+ if (local->replies[i].op_ret == 0) {
+ priv->anon_inode[i] = 1;
+ iatt = local->replies[i].poststat;
+ } else if (local->replies[i].op_ret < 0 &&
+ local->replies[i].op_errno == EEXIST) {
+ lookup_on[i] = 1;
+ } else if (i == child) {
+ child_op_errno = local->replies[i].op_errno;
+ }
+ }
+
+ if (AFR_COUNT(lookup_on, priv->child_count) == 0) {
+ goto link;
+ }
+
+lookup:
+ AFR_ONLIST(lookup_on, frame, afr_selfheal_discover_cbk, lookup, &loc,
+ xdata);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!lookup_on[i]) {
+ continue;
+ }
+
+ if (local->replies[i].op_ret == 0) {
+ if (gf_uuid_compare(anon_inode_gfid,
+ local->replies[i].poststat.ia_gfid) == 0) {
+ priv->anon_inode[i] = 1;
+ iatt = local->replies[i].poststat;
+ } else {
+ if (i == child)
+ child_op_errno = EINVAL;
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_DATA,
+ "%s has gfid: %s", priv->anon_inode_name,
+ uuid_utoa(local->replies[i].poststat.ia_gfid));
+ }
+ } else if (i == child) {
+ child_op_errno = local->replies[i].op_errno;
+ }
+ }
+link:
+ if (!gf_uuid_is_null(iatt.ia_gfid)) {
+ *linked_inode = inode_link(loc.inode, loc.parent, loc.name, &iatt);
+ if (*linked_inode) {
+ op_errno = 0;
+ inode_lookup(*linked_inode);
+ } else {
+ op_errno = ENOMEM;
+ }
+ goto out;
+ }
+
+out:
+ if (xdata)
+ dict_unref(xdata);
+ loc_wipe(&loc);
+ /*child_op_errno takes precedence*/
+ if (child_op_errno == 0) {
+ child_op_errno = op_errno;
+ }
+
+ if (child_op_errno && *linked_inode) {
+ inode_unref(*linked_inode);
+ *linked_inode = NULL;
+ }
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+ return -child_op_errno;
+}
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index cdff4a5..37bcc2b 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -225,24 +225,40 @@ __afr_selfheal_data_read_write(call_frame_t *frame, xlator_t *this, fd_t *fd,
return ret;
}
+static gf_boolean_t
+afr_source_sinks_locked(xlator_t *this, unsigned char *locked_on, int source,
+ unsigned char *healed_sinks)
+{
+ afr_private_t *priv = this->private;
+ int i = 0;
+
+ if (!locked_on[source])
+ return _gf_false;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (healed_sinks[i] && locked_on[i])
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
+
static int
afr_selfheal_data_block(call_frame_t *frame, xlator_t *this, fd_t *fd,
int source, unsigned char *healed_sinks, off_t offset,
size_t size, int type, struct afr_reply *replies)
{
int ret = -1;
- int sink_count = 0;
afr_private_t *priv = NULL;
unsigned char *data_lock = NULL;
priv = this->private;
- sink_count = AFR_COUNT(healed_sinks, priv->child_count);
data_lock = alloca0(priv->child_count);
ret = afr_selfheal_inodelk(frame, this, fd->inode, this->name, offset, size,
data_lock);
{
- if (ret < sink_count) {
+ if (!afr_source_sinks_locked(this, data_lock, source, healed_sinks)) {
ret = -ENOTCONN;
goto unlock;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index e706228..64893f4 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -16,54 +16,170 @@
#include <glusterfs/syncop-utils.h>
#include <glusterfs/events.h>
-static int
-afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name,
- inode_t *inode, int child, struct afr_reply *replies)
+int
+afr_selfheal_entry_anon_inode(xlator_t *this, inode_t *dir, const char *name,
+ inode_t *inode, int child,
+ struct afr_reply *replies,
+ gf_boolean_t *anon_inode)
{
afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
xlator_t *subvol = NULL;
int ret = 0;
+ int i = 0;
+ char g[64] = {0};
+ unsigned char *lookup_success = NULL;
+ call_frame_t *frame = NULL;
+ loc_t loc2 = {
+ 0,
+ };
loc_t loc = {
0,
};
- char g[64];
priv = this->private;
-
subvol = priv->children[child];
+ lookup_success = alloca0(priv->child_count);
+ uuid_utoa_r(replies[child].poststat.ia_gfid, g);
+ loc.inode = inode_new(inode->table);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (replies[child].poststat.ia_type == IA_IFDIR) {
+ /* This directory may have sub-directory hierarchy which may need to
+ * be preserved for subsequent heals. So unconditionally move the
+ * directory to anonymous-inode directory*/
+ *anon_inode = _gf_true;
+ goto anon_inode;
+ }
+
+ frame = afr_frame_create(this, &ret);
+ if (!frame) {
+ ret = -ret;
+ goto out;
+ }
+ local = frame->local;
+ gf_uuid_copy(loc.gfid, replies[child].poststat.ia_gfid);
+ AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, &loc,
+ NULL);
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].op_ret == 0) {
+ lookup_success[i] = 1;
+ } else if (local->replies[i].op_errno != ENOENT &&
+ local->replies[i].op_errno != ESTALE) {
+ ret = -local->replies[i].op_errno;
+ }
+ }
+
+ if (priv->quorum_count) {
+ if (afr_has_quorum(lookup_success, this, NULL)) {
+ *anon_inode = _gf_true;
+ }
+ } else if (AFR_COUNT(lookup_success, priv->child_count) > 1) {
+ *anon_inode = _gf_true;
+ } else if (ret) {
+ goto out;
+ }
+
+anon_inode:
+ if (!*anon_inode) {
+ ret = 0;
+ goto out;
+ }
loc.parent = inode_ref(dir);
gf_uuid_copy(loc.pargfid, dir->gfid);
loc.name = name;
- loc.inode = inode_ref(inode);
- if (replies[child].valid && replies[child].op_ret == 0) {
- switch (replies[child].poststat.ia_type) {
- case IA_IFDIR:
- gf_msg(this->name, GF_LOG_WARNING, 0,
- AFR_MSG_EXPUNGING_FILE_OR_DIR,
- "expunging dir %s/%s (%s) on %s", uuid_utoa(dir->gfid),
- name, uuid_utoa_r(replies[child].poststat.ia_gfid, g),
- subvol->name);
- ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL);
- break;
- default:
- gf_msg(this->name, GF_LOG_WARNING, 0,
- AFR_MSG_EXPUNGING_FILE_OR_DIR,
- "expunging file %s/%s (%s) on %s", uuid_utoa(dir->gfid),
- name, uuid_utoa_r(replies[child].poststat.ia_gfid, g),
- subvol->name);
- ret = syncop_unlink(subvol, &loc, NULL, NULL);
- break;
- }
+ ret = afr_anon_inode_create(this, child, &loc2.parent);
+ if (ret < 0)
+ goto out;
+
+ loc2.name = g;
+ ret = syncop_rename(subvol, &loc, &loc2, NULL, NULL);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "Rename to %s dir %s/%s (%s) on %s failed",
+ priv->anon_inode_name, uuid_utoa(dir->gfid), name, g,
+ subvol->name);
+ } else {
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "Rename to %s dir %s/%s (%s) on %s successful",
+ priv->anon_inode_name, uuid_utoa(dir->gfid), name, g,
+ subvol->name);
}
+out:
loc_wipe(&loc);
+ loc_wipe(&loc2);
+ if (frame) {
+ AFR_STACK_DESTROY(frame);
+ }
return ret;
}
int
+afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name,
+ inode_t *inode, int child, struct afr_reply *replies)
+{
+ char g[64] = {0};
+ afr_private_t *priv = NULL;
+ xlator_t *subvol = NULL;
+ int ret = 0;
+ loc_t loc = {
+ 0,
+ };
+ gf_boolean_t anon_inode = _gf_false;
+
+ priv = this->private;
+ subvol = priv->children[child];
+
+ if ((!replies[child].valid) || (replies[child].op_ret < 0)) {
+ /*Nothing to do*/
+ ret = 0;
+ goto out;
+ }
+
+ if (priv->use_anon_inode) {
+ ret = afr_selfheal_entry_anon_inode(this, dir, name, inode, child,
+ replies, &anon_inode);
+ if (ret < 0 || anon_inode)
+ goto out;
+ }
+
+ loc.parent = inode_ref(dir);
+ loc.inode = inode_new(inode->table);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ loc.name = name;
+ switch (replies[child].poststat.ia_type) {
+ case IA_IFDIR:
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "expunging dir %s/%s (%s) on %s", uuid_utoa(dir->gfid), name,
+ uuid_utoa_r(replies[child].poststat.ia_gfid, g),
+ subvol->name);
+ ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL);
+ break;
+ default:
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "expunging file %s/%s (%s) on %s", uuid_utoa(dir->gfid),
+ name, uuid_utoa_r(replies[child].poststat.ia_gfid, g),
+ subvol->name);
+ ret = syncop_unlink(subvol, &loc, NULL, NULL);
+ break;
+ }
+
+out:
+ loc_wipe(&loc);
+ return ret;
+}
+
+int
afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
unsigned char *sources, inode_t *dir,
const char *name, inode_t *inode,
@@ -76,6 +192,9 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
loc_t srcloc = {
0,
};
+ loc_t anonloc = {
+ 0,
+ };
xlator_t *this = frame->this;
afr_private_t *priv = NULL;
dict_t *xdata = NULL;
@@ -86,15 +205,17 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
0,
};
unsigned char *newentry = NULL;
- char dir_uuid_str[64] = {0}, iatt_uuid_str[64] = {0};
+ char iatt_uuid_str[64] = {0};
+ char dir_uuid_str[64] = {0};
priv = this->private;
iatt = &replies[source].poststat;
+ uuid_utoa_r(iatt->ia_gfid, iatt_uuid_str);
if (iatt->ia_type == IA_INVAL || gf_uuid_is_null(iatt->ia_gfid)) {
gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SELF_HEAL_FAILED,
"Invalid ia_type (%d) or gfid(%s). source brick=%d, "
"pargfid=%s, name=%s",
- iatt->ia_type, uuid_utoa_r(iatt->ia_gfid, iatt_uuid_str), source,
+ iatt->ia_type, iatt_uuid_str, source,
uuid_utoa_r(dir->gfid, dir_uuid_str), name);
ret = -EINVAL;
goto out;
@@ -120,14 +241,24 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
srcloc.inode = inode_ref(inode);
gf_uuid_copy(srcloc.gfid, iatt->ia_gfid);
- if (iatt->ia_type != IA_IFDIR)
- ret = syncop_lookup(priv->children[dst], &srcloc, 0, 0, 0, 0);
- if (iatt->ia_type == IA_IFDIR || ret == -ENOENT || ret == -ESTALE) {
+ ret = syncop_lookup(priv->children[dst], &srcloc, 0, 0, 0, 0);
+ if (ret == -ENOENT || ret == -ESTALE) {
newentry[dst] = 1;
ret = afr_selfheal_newentry_mark(frame, this, inode, source, replies,
sources, newentry);
if (ret)
goto out;
+ } else if (ret == 0 && iatt->ia_type == IA_IFDIR && priv->use_anon_inode) {
+ // Try rename from hidden directory
+ ret = afr_anon_inode_create(this, dst, &anonloc.parent);
+ if (ret < 0)
+ goto out;
+ anonloc.inode = inode_ref(inode);
+ anonloc.name = iatt_uuid_str;
+ ret = syncop_rename(priv->children[dst], &anonloc, &loc, NULL, NULL);
+ if (ret == -ENOENT || ret == -ESTALE)
+ ret = -1; /*This sets 'mismatch' to true*/
+ goto out;
}
mode = st_mode_from_ia(iatt->ia_prot, iatt->ia_type);
@@ -166,6 +297,7 @@ out:
GF_FREE(linkname);
loc_wipe(&loc);
loc_wipe(&srcloc);
+ loc_wipe(&anonloc);
return ret;
}
@@ -578,6 +710,11 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
priv = this->private;
+ if (afr_is_private_directory(priv, fd->inode->gfid, name,
+ GF_CLIENT_PID_SELF_HEALD)) {
+ return 0;
+ }
+
xattr = dict_new();
if (!xattr)
return -ENOMEM;
@@ -598,7 +735,7 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
ret = afr_selfheal_entrylk(frame, this, fd->inode, this->name, NULL,
locked_on);
{
- if (ret < AFR_SH_MIN_PARTICIPANTS) {
+ if (ret < priv->child_count) {
gf_msg_debug(this->name, 0,
"%s: Skipping "
"entry self-heal as only %d sub-volumes "
@@ -626,7 +763,7 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
replies);
if ((ret == 0) && (priv->esh_granular) && parent_idx_inode) {
- ret = afr_shd_index_purge(subvol, parent_idx_inode, name,
+ ret = afr_shd_entry_purge(subvol, parent_idx_inode, name,
inode->ia_type);
/* Why is ret force-set to 0? We do not care about
* index purge failing for full heal as it is quite
@@ -756,10 +893,6 @@ afr_selfheal_entry_do_subvol(call_frame_t *frame, xlator_t *this, fd_t *fd,
if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
continue;
- if (__is_root_gfid(fd->inode->gfid) &&
- !strcmp(entry->d_name, GF_REPLICATE_TRASH_DIR))
- continue;
-
ret = afr_selfheal_entry_dirent(iter_frame, this, fd, entry->d_name,
loc.inode, subvol,
local->need_full_crawl);
@@ -822,7 +955,7 @@ afr_selfheal_entry_granular_dirent(xlator_t *subvol, gf_dirent_t *entry,
/* The name indices under the pgfid index dir are guaranteed
* to be regular files. Hence the hardcoding.
*/
- afr_shd_index_purge(subvol, parent->inode, entry->d_name, IA_IFREG);
+ afr_shd_entry_purge(subvol, parent->inode, entry->d_name, IA_IFREG);
ret = 0;
goto out;
}
@@ -992,7 +1125,7 @@ __afr_selfheal_entry(call_frame_t *frame, xlator_t *this, fd_t *fd,
ret = afr_selfheal_entrylk(frame, this, fd->inode, this->name, NULL,
data_lock);
{
- if (ret < AFR_SH_MIN_PARTICIPANTS) {
+ if (ret < priv->child_count) {
gf_msg_debug(this->name, 0,
"%s: Skipping "
"entry self-heal as only %d sub-volumes could "
@@ -1116,7 +1249,7 @@ afr_selfheal_entry(call_frame_t *frame, xlator_t *this, inode_t *inode)
ret = afr_selfheal_tie_breaker_entrylk(frame, this, inode, priv->sh_domain,
NULL, locked_on);
{
- if (ret < AFR_SH_MIN_PARTICIPANTS) {
+ if (ret < priv->child_count) {
gf_msg_debug(this->name, 0,
"%s: Skipping "
"entry self-heal as only %d sub-volumes could "
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
index f4e31b6..03f43ba 100644
--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
@@ -190,6 +190,59 @@ out:
return ret;
}
+static int
+__afr_selfheal_metadata_mark_pending_xattrs(call_frame_t *frame, xlator_t *this,
+ inode_t *inode,
+ struct afr_reply *replies,
+ unsigned char *sources)
+{
+ int ret = 0;
+ int i = 0;
+ int m_idx = 0;
+ afr_private_t *priv = NULL;
+ int raw[AFR_NUM_CHANGE_LOGS] = {0};
+ dict_t *xattr = NULL;
+
+ priv = this->private;
+ m_idx = afr_index_for_transaction_type(AFR_METADATA_TRANSACTION);
+ raw[m_idx] = 1;
+
+ xattr = dict_new();
+ if (!xattr)
+ return -ENOMEM;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i])
+ continue;
+ ret = dict_set_static_bin(xattr, priv->pending_key[i], raw,
+ sizeof(int) * AFR_NUM_CHANGE_LOGS);
+ if (ret) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ ret = afr_selfheal_post_op(frame, this, inode, i, xattr, NULL);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_INFO, -ret, AFR_MSG_SELF_HEAL_INFO,
+ "Failed to set pending metadata xattr on child %d for %s", i,
+ uuid_utoa(inode->gfid));
+ goto out;
+ }
+ }
+
+ afr_replies_wipe(replies, priv->child_count);
+ ret = afr_selfheal_unlocked_discover(frame, inode, inode->gfid, replies);
+
+out:
+ if (xattr)
+ dict_unref(xattr);
+ return ret;
+}
+
/*
* Look for mismatching uid/gid or mode or user xattrs even if
* AFR xattrs don't say so, and pick one arbitrarily as winner. */
@@ -210,6 +263,7 @@ __afr_selfheal_metadata_finalize_source(call_frame_t *frame, xlator_t *this,
};
int source = -1;
int sources_count = 0;
+ int ret = 0;
priv = this->private;
@@ -300,7 +354,13 @@ __afr_selfheal_metadata_finalize_source(call_frame_t *frame, xlator_t *this,
healed_sinks[i] = 1;
}
}
-
+ if ((sources_count == priv->child_count) && (source > -1) &&
+ (AFR_COUNT(healed_sinks, priv->child_count) != 0)) {
+ ret = __afr_selfheal_metadata_mark_pending_xattrs(frame, this, inode,
+ replies, sources);
+ if (ret < 0)
+ return ret;
+ }
out:
afr_mark_active_sinks(this, sources, locked_on, healed_sinks);
return source;
diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c
index 36640b54..834aac8 100644
--- a/xlators/cluster/afr/src/afr-self-heal-name.c
+++ b/xlators/cluster/afr/src/afr-self-heal-name.c
@@ -98,21 +98,12 @@ __afr_selfheal_name_expunge(xlator_t *this, inode_t *parent, uuid_t pargfid,
const char *bname, inode_t *inode,
struct afr_reply *replies)
{
- loc_t loc = {
- 0,
- };
int i = 0;
afr_private_t *priv = NULL;
- char g[64];
int ret = 0;
priv = this->private;
- loc.parent = inode_ref(parent);
- gf_uuid_copy(loc.pargfid, pargfid);
- loc.name = bname;
- loc.inode = inode_ref(inode);
-
for (i = 0; i < priv->child_count; i++) {
if (!replies[i].valid)
continue;
@@ -120,30 +111,10 @@ __afr_selfheal_name_expunge(xlator_t *this, inode_t *parent, uuid_t pargfid,
if (replies[i].op_ret)
continue;
- switch (replies[i].poststat.ia_type) {
- case IA_IFDIR:
- gf_msg(this->name, GF_LOG_WARNING, 0,
- AFR_MSG_EXPUNGING_FILE_OR_DIR,
- "expunging dir %s/%s (%s) on %s", uuid_utoa(pargfid),
- bname, uuid_utoa_r(replies[i].poststat.ia_gfid, g),
- priv->children[i]->name);
-
- ret |= syncop_rmdir(priv->children[i], &loc, 1, NULL, NULL);
- break;
- default:
- gf_msg(this->name, GF_LOG_WARNING, 0,
- AFR_MSG_EXPUNGING_FILE_OR_DIR,
- "expunging file %s/%s (%s) on %s", uuid_utoa(pargfid),
- bname, uuid_utoa_r(replies[i].poststat.ia_gfid, g),
- priv->children[i]->name);
-
- ret |= syncop_unlink(priv->children[i], &loc, NULL, NULL);
- break;
- }
+ ret |= afr_selfheal_entry_delete(this, parent, bname, inode, i,
+ replies);
}
- loc_wipe(&loc);
-
return ret;
}
@@ -381,7 +352,7 @@ __afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent,
ret = __afr_selfheal_assign_gfid(this, parent, pargfid, bname, inode,
replies, gfid, locked_on, source, sources,
is_gfid_absent, &gfid_idx);
- if (ret)
+ if (ret || (gfid_idx < 0))
return ret;
ret = __afr_selfheal_name_impunge(frame, this, parent, pargfid, bname,
@@ -514,7 +485,7 @@ afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent,
ret = afr_selfheal_entrylk(frame, this, parent, this->name, bname,
locked_on);
{
- if (ret < AFR_SH_MIN_PARTICIPANTS) {
+ if (ret < priv->child_count) {
ret = -ENOTCONN;
goto unlock;
}
@@ -560,13 +531,15 @@ afr_selfheal_name_unlocked_inspect(call_frame_t *frame, xlator_t *this,
struct afr_reply *replies = NULL;
inode_t *inode = NULL;
int first_idx = -1;
+ afr_local_t *local = NULL;
priv = this->private;
+ local = frame->local;
replies = alloca0(sizeof(*replies) * priv->child_count);
inode = afr_selfheal_unlocked_lookup_on(frame, parent, bname, replies,
- priv->child_up, NULL);
+ local->child_up, NULL);
if (!inode)
return -ENOMEM;
diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
index 5e7bde8..48e6dbc 100644
--- a/xlators/cluster/afr/src/afr-self-heal.h
+++ b/xlators/cluster/afr/src/afr-self-heal.h
@@ -11,8 +11,6 @@
#ifndef _AFR_SELFHEAL_H
#define _AFR_SELFHEAL_H
-#define AFR_SH_MIN_PARTICIPANTS 2
-
/* Perform fop on all UP subvolumes and wait for all callbacks to return */
#define AFR_ONALL(frame, rfn, fop, args...) \
@@ -47,13 +45,16 @@
afr_local_t *__local = frame->local; \
afr_private_t *__priv = frame->this->private; \
int __i = 0; \
- int __count = AFR_COUNT(list, __priv->child_count); \
+ int __count = 0; \
+ unsigned char *__list = alloca(__priv->child_count); \
\
+ memcpy(__list, list, sizeof(*__list) * __priv->child_count); \
+ __count = AFR_COUNT(__list, __priv->child_count); \
__local->barrier.waitfor = __count; \
afr_local_replies_wipe(__local, __priv); \
\
for (__i = 0; __i < __priv->child_count; __i++) { \
- if (!list[__i]) \
+ if (!__list[__i]) \
continue; \
STACK_WIND_COOKIE(frame, rfn, (void *)(long)__i, \
__priv->children[__i], \
@@ -368,4 +369,9 @@ gf_boolean_t
afr_is_file_empty_on_all_children(afr_private_t *priv,
struct afr_reply *replies);
+int
+afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name,
+ inode_t *inode, int child, struct afr_reply *replies);
+int
+afr_anon_inode_create(xlator_t *this, int child, inode_t **linked_inode);
#endif /* !_AFR_SELFHEAL_H */
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
index aa774bb..109fd4b 100644
--- a/xlators/cluster/afr/src/afr-self-heald.c
+++ b/xlators/cluster/afr/src/afr-self-heald.c
@@ -94,7 +94,7 @@ __afr_shd_healer_wait(struct subvol_healer *healer)
priv = healer->this->private;
disabled_loop:
- wait_till.tv_sec = time(NULL) + priv->shd.timeout;
+ wait_till.tv_sec = gf_time() + priv->shd.timeout;
while (!healer->rerun) {
ret = pthread_cond_timedwait(&healer->cond, &healer->mutex, &wait_till);
@@ -222,7 +222,7 @@ out:
}
int
-afr_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name,
+afr_shd_entry_purge(xlator_t *subvol, inode_t *inode, char *name,
ia_type_t type)
{
int ret = 0;
@@ -371,7 +371,7 @@ afr_shd_sweep_prepare(struct subvol_healer *healer)
event->split_brain_count = 0;
event->heal_failed_count = 0;
- time(&event->start_time);
+ event->start_time = gf_time();
event->end_time = 0;
_mask_cancellation();
}
@@ -386,7 +386,7 @@ afr_shd_sweep_done(struct subvol_healer *healer)
event = &healer->crawl_event;
shd = &(((afr_private_t *)healer->this->private)->shd);
- time(&event->end_time);
+ event->end_time = gf_time();
history = gf_memdup(event, sizeof(*event));
event->start_time = 0;
@@ -424,7 +424,7 @@ afr_shd_index_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
ret = afr_shd_selfheal(healer, healer->subvol, gfid);
if (ret == -ENOENT || ret == -ESTALE)
- afr_shd_index_purge(subvol, parent->inode, entry->d_name, val);
+ afr_shd_entry_purge(subvol, parent->inode, entry->d_name, val);
if (ret == 2)
/* If bricks crashed in pre-op after creating indices/xattrop
@@ -843,6 +843,176 @@ out:
return need_heal;
}
+static int
+afr_shd_anon_inode_cleaner(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ void *data)
+{
+ struct subvol_healer *healer = data;
+ afr_private_t *priv = healer->this->private;
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ int ret = 0;
+ loc_t loc = {0};
+ int count = 0;
+ int i = 0;
+ int op_errno = 0;
+ struct iatt *iatt = NULL;
+ gf_boolean_t multiple_links = _gf_false;
+ unsigned char *gfid_present = alloca0(priv->child_count);
+ unsigned char *entry_present = alloca0(priv->child_count);
+ char *type = "file";
+
+ frame = afr_frame_create(healer->this, &ret);
+ if (!frame) {
+ ret = -ret;
+ goto out;
+ }
+ local = frame->local;
+ if (AFR_COUNT(local->child_up, priv->child_count) != priv->child_count) {
+ gf_msg_debug(healer->this->name, 0,
+ "Not all bricks are up. Skipping "
+ "cleanup of %s on %s",
+ entry->d_name, subvol->name);
+ ret = 0;
+ goto out;
+ }
+
+ loc.inode = inode_new(parent->inode->table);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ ret = gf_uuid_parse(entry->d_name, loc.gfid);
+ if (ret) {
+ ret = 0;
+ goto out;
+ }
+ AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, &loc,
+ NULL);
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].op_ret == 0) {
+ count++;
+ gfid_present[i] = 1;
+ iatt = &local->replies[i].poststat;
+ if (iatt->ia_type == IA_IFDIR) {
+ type = "dir";
+ }
+
+ if (i == healer->subvol) {
+ if (local->replies[i].poststat.ia_nlink > 1) {
+ multiple_links = _gf_true;
+ }
+ }
+ } else if (local->replies[i].op_errno != ENOENT &&
+ local->replies[i].op_errno != ESTALE) {
+ /*We don't have complete view. Skip the entry*/
+ gf_msg_debug(healer->this->name, local->replies[i].op_errno,
+ "Skipping cleanup of %s on %s", entry->d_name,
+ subvol->name);
+ ret = 0;
+ goto out;
+ }
+ }
+
+ /*Inode is deleted from subvol*/
+ if (count == 1 || (iatt->ia_type != IA_IFDIR && multiple_links)) {
+ gf_msg(healer->this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_EXPUNGING_FILE_OR_DIR, "expunging %s %s/%s on %s", type,
+ priv->anon_inode_name, entry->d_name, subvol->name);
+ ret = afr_shd_entry_purge(subvol, parent->inode, entry->d_name,
+ iatt->ia_type);
+ if (ret == -ENOENT || ret == -ESTALE)
+ ret = 0;
+ } else if (count > 1) {
+ loc_wipe(&loc);
+ loc.parent = inode_ref(parent->inode);
+ loc.name = entry->d_name;
+ loc.inode = inode_new(parent->inode->table);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup,
+ &loc, NULL);
+ count = 0;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].op_ret == 0) {
+ count++;
+ entry_present[i] = 1;
+ iatt = &local->replies[i].poststat;
+ } else if (local->replies[i].op_errno != ENOENT &&
+ local->replies[i].op_errno != ESTALE) {
+ /*We don't have complete view. Skip the entry*/
+ gf_msg_debug(healer->this->name, local->replies[i].op_errno,
+ "Skipping cleanup of %s on %s", entry->d_name,
+ subvol->name);
+ ret = 0;
+ goto out;
+ }
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if (gfid_present[i] && !entry_present[i]) {
+ /*Entry is not anonymous on at least one subvol*/
+ gf_msg_debug(healer->this->name, 0,
+ "Valid entry present on %s "
+ "Skipping cleanup of %s on %s",
+ priv->children[i]->name, entry->d_name,
+ subvol->name);
+ ret = 0;
+ goto out;
+ }
+ }
+
+ gf_msg(healer->this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "expunging %s %s/%s on all subvols", type, priv->anon_inode_name,
+ entry->d_name);
+ ret = 0;
+ for (i = 0; i < priv->child_count; i++) {
+ op_errno = -afr_shd_entry_purge(priv->children[i], loc.parent,
+ entry->d_name, iatt->ia_type);
+ if (op_errno != ENOENT && op_errno != ESTALE) {
+ ret |= -op_errno;
+ }
+ }
+ }
+
+out:
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+ loc_wipe(&loc);
+ return ret;
+}
+
+static void
+afr_cleanup_anon_inode_dir(struct subvol_healer *healer)
+{
+ int ret = 0;
+ call_frame_t *frame = NULL;
+ afr_private_t *priv = healer->this->private;
+ loc_t loc = {0};
+
+ ret = afr_anon_inode_create(healer->this, healer->subvol, &loc.inode);
+ if (ret)
+ goto out;
+
+ frame = afr_frame_create(healer->this, &ret);
+ if (!frame) {
+ ret = -ret;
+ goto out;
+ }
+
+ ret = syncop_mt_dir_scan(frame, priv->children[healer->subvol], &loc,
+ GF_CLIENT_PID_SELF_HEALD, healer,
+ afr_shd_anon_inode_cleaner, NULL,
+ priv->shd.max_threads, priv->shd.wait_qlength);
+out:
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+ loc_wipe(&loc);
+ return;
+}
+
void *
afr_shd_index_healer(void *data)
{
@@ -900,6 +1070,10 @@ afr_shd_index_healer(void *data)
sleep(1);
} while (ret > 0);
+ if (ret == 0) {
+ afr_cleanup_anon_inode_dir(healer);
+ }
+
if (ret == 0 && pre_crawl_xdata &&
!healer->crawl_event.heal_failed_count) {
afr_shd_ta_check_and_unset_xattrs(this, &loc, healer,
@@ -1481,15 +1655,6 @@ afr_xl_op(xlator_t *this, dict_t *input, dict_t *output)
case GF_SHD_OP_INDEX_SUMMARY:
/* this case has been handled in glfs-heal.c */
break;
- case GF_SHD_OP_HEALED_FILES:
- case GF_SHD_OP_HEAL_FAILED_FILES:
- for (i = 0; i < priv->child_count; i++) {
- keylen = snprintf(key, sizeof(key), "%d-%d-status", xl_id, i);
- AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
- SOP_NOT_SUPPORTED,
- SLEN(SOP_NOT_SUPPORTED));
- }
- break;
case GF_SHD_OP_SPLIT_BRAIN_FILES:
eh_dump(shd->split_brain, output, afr_add_shd_event);
break;
diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h
index 687c28e..18db728 100644
--- a/xlators/cluster/afr/src/afr-self-heald.h
+++ b/xlators/cluster/afr/src/afr-self-heald.h
@@ -70,6 +70,6 @@ afr_shd_gfid_to_path(xlator_t *this, xlator_t *subvol, uuid_t gfid,
char **path_p);
int
-afr_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name,
+afr_shd_entry_purge(xlator_t *subvol, inode_t *inode, char *name,
ia_type_t type);
#endif /* !_AFR_SELF_HEALD_H */
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index 78438f9..a51f79b 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -124,9 +124,9 @@ afr_release_notify_lock_for_ta(void *opaque)
this = (xlator_t *)opaque;
priv = this->private;
- ret = afr_fill_ta_loc(this, &loc);
+ ret = afr_fill_ta_loc(this, &loc, _gf_true);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_THIN_ARB,
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
"Failed to populate loc for thin-arbiter.");
goto out;
}
@@ -521,42 +521,6 @@ afr_compute_pre_op_sources(call_frame_t *frame, xlator_t *this)
local->transaction.pre_op_sources[j] = 0;
}
-gf_boolean_t
-afr_has_arbiter_fop_cbk_quorum(call_frame_t *frame)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- xlator_t *this = NULL;
- gf_boolean_t fop_failed = _gf_false;
- unsigned char *pre_op_sources = NULL;
- int i = 0;
-
- local = frame->local;
- this = frame->this;
- priv = this->private;
- pre_op_sources = local->transaction.pre_op_sources;
-
- /* If the fop failed on the brick, it is not a source. */
- for (i = 0; i < priv->child_count; i++)
- if (local->transaction.failed_subvols[i])
- pre_op_sources[i] = 0;
-
- switch (AFR_COUNT(pre_op_sources, priv->child_count)) {
- case 1:
- if (pre_op_sources[ARBITER_BRICK_INDEX])
- fop_failed = _gf_true;
- break;
- case 0:
- fop_failed = _gf_true;
- break;
- }
-
- if (fop_failed)
- return _gf_false;
-
- return _gf_true;
-}
-
void
afr_txn_arbitrate_fop(call_frame_t *frame, xlator_t *this)
{
@@ -971,12 +935,8 @@ afr_need_dirty_marking(call_frame_t *frame, xlator_t *this)
priv->child_count)
return _gf_false;
- if (priv->arbiter_count) {
- if (!afr_has_arbiter_fop_cbk_quorum(frame))
- need_dirty = _gf_true;
- } else if (!afr_has_fop_cbk_quorum(frame)) {
+ if (!afr_has_fop_cbk_quorum(frame))
need_dirty = _gf_true;
- }
return need_dirty;
}
@@ -1026,12 +986,8 @@ afr_handle_quorum(call_frame_t *frame, xlator_t *this)
* no split-brain with the fix. The problem is eliminated completely.
*/
- if (priv->arbiter_count) {
- if (afr_has_arbiter_fop_cbk_quorum(frame))
- return;
- } else if (afr_has_fop_cbk_quorum(frame)) {
+ if (afr_has_fop_cbk_quorum(frame))
return;
- }
if (afr_need_dirty_marking(frame, this))
goto set_response;
@@ -1073,7 +1029,7 @@ set_response:
}
int
-afr_fill_ta_loc(xlator_t *this, loc_t *loc)
+afr_fill_ta_loc(xlator_t *this, loc_t *loc, gf_boolean_t is_gfid_based_fop)
{
afr_private_t *priv = NULL;
@@ -1081,6 +1037,11 @@ afr_fill_ta_loc(xlator_t *this, loc_t *loc)
loc->parent = inode_ref(priv->root_inode);
gf_uuid_copy(loc->pargfid, loc->parent->gfid);
loc->name = priv->pending_key[THIN_ARBITER_BRICK_INDEX];
+ if (is_gfid_based_fop && gf_uuid_is_null(priv->ta_gfid)) {
+ /* Except afr_ta_id_file_check() which is path based, all other gluster
+ * FOPS need gfid.*/
+ return -EINVAL;
+ }
gf_uuid_copy(loc->gfid, priv->ta_gfid);
loc->inode = inode_new(loc->parent->table);
if (!loc->inode) {
@@ -1090,86 +1051,6 @@ afr_fill_ta_loc(xlator_t *this, loc_t *loc)
return 0;
}
-int
-afr_changelog_thin_arbiter_post_op(xlator_t *this, afr_local_t *local)
-{
- int ret = 0;
- afr_private_t *priv = NULL;
- dict_t *xattr = NULL;
- int failed_count = 0;
- struct gf_flock flock = {
- 0,
- };
- loc_t loc = {
- 0,
- };
- int i = 0;
-
- priv = this->private;
- if (!priv->thin_arbiter_count)
- return 0;
-
- failed_count = AFR_COUNT(local->transaction.failed_subvols,
- priv->child_count);
- if (!failed_count)
- return 0;
-
- GF_ASSERT(failed_count == 1);
- ret = afr_fill_ta_loc(this, &loc);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
- "Failed to populate thin-arbiter loc for: %s.", loc.name);
- goto out;
- }
-
- xattr = dict_new();
- if (!xattr) {
- ret = -ENOMEM;
- goto out;
- }
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_set_static_bin(xattr, priv->pending_key[i],
- local->pending[i],
- AFR_NUM_CHANGE_LOGS * sizeof(int));
- if (ret)
- goto out;
- }
-
- flock.l_type = F_WRLCK;
- flock.l_start = 0;
- flock.l_len = 0;
-
- /*TODO: Convert to two domain locking. */
- ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX],
- AFR_TA_DOM_NOTIFY, &loc, F_SETLKW, &flock, NULL, NULL);
- if (ret)
- goto out;
-
- ret = syncop_xattrop(priv->children[THIN_ARBITER_BRICK_INDEX], &loc,
- GF_XATTROP_ADD_ARRAY, xattr, NULL, NULL, NULL);
-
- if (ret == -EINVAL) {
- gf_msg(this->name, GF_LOG_INFO, -ret, AFR_MSG_THIN_ARB,
- "Thin-arbiter has denied post-op on %s for gfid %s.",
- priv->pending_key[THIN_ARBITER_BRICK_INDEX],
- uuid_utoa(local->inode->gfid));
-
- } else if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
- "Post-op on thin-arbiter id file %s failed for gfid %s.",
- priv->pending_key[THIN_ARBITER_BRICK_INDEX],
- uuid_utoa(local->inode->gfid));
- }
- flock.l_type = F_UNLCK;
- syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX], AFR_TA_DOM_NOTIFY,
- &loc, F_SETLK, &flock, NULL, NULL);
-out:
- if (xattr)
- dict_unref(xattr);
-
- return ret;
-}
-
static int
afr_ta_post_op_done(int ret, call_frame_t *frame, void *opaque)
{
@@ -1264,9 +1145,9 @@ afr_ta_post_op_do(void *opaque)
this = local->transaction.frame->this;
priv = this->private;
- ret = afr_fill_ta_loc(this, &loc);
+ ret = afr_fill_ta_loc(this, &loc, _gf_true);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_THIN_ARB,
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
"Failed to populate loc for thin-arbiter.");
goto out;
}
@@ -2466,8 +2347,13 @@ afr_is_delayed_changelog_post_op_needed(call_frame_t *frame, xlator_t *this,
goto out;
}
- if ((local->op != GF_FOP_WRITE) && (local->op != GF_FOP_FXATTROP)) {
- /*Only allow writes but shard does [f]xattrops on writes, so
+ if (local->transaction.disable_delayed_post_op) {
+ goto out;
+ }
+
+ if ((local->op != GF_FOP_WRITE) && (local->op != GF_FOP_FXATTROP) &&
+ (local->op != GF_FOP_FSYNC)) {
+ /*Only allow writes/fsyncs but shard does [f]xattrops on writes, so
* they are fine too*/
goto out;
}
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index ec7aa22..df7366f 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -135,6 +135,27 @@ set_data_self_heal_algorithm(afr_private_t *priv, char *algo)
}
}
+void
+afr_handle_anon_inode_options(afr_private_t *priv, dict_t *options)
+{
+ char *volfile_id_str = NULL;
+ uuid_t anon_inode_gfid = {0};
+
+ /*If volume id is not present don't enable anything*/
+ if (dict_get_str(options, "volume-id", &volfile_id_str))
+ return;
+ GF_ASSERT(strlen(AFR_ANON_DIR_PREFIX) + strlen(volfile_id_str) <= NAME_MAX);
+ /*anon_inode_name is not supposed to change once assigned*/
+ if (!priv->anon_inode_name[0]) {
+ snprintf(priv->anon_inode_name, sizeof(priv->anon_inode_name), "%s-%s",
+ AFR_ANON_DIR_PREFIX, volfile_id_str);
+ gf_uuid_parse(volfile_id_str, anon_inode_gfid);
+ /*Flip a bit to make sure volfile-id and anon-gfid are not same*/
+ anon_inode_gfid[0] ^= 1;
+ uuid_utoa_r(anon_inode_gfid, priv->anon_gfid_str);
+ }
+}
+
int
reconfigure(xlator_t *this, dict_t *options)
{
@@ -168,7 +189,8 @@ reconfigure(xlator_t *this, dict_t *options)
bool, out);
GF_OPTION_RECONF("data-self-heal", data_self_heal, options, str, out);
- gf_string2boolean(data_self_heal, &priv->data_self_heal);
+ if (gf_string2boolean(data_self_heal, &priv->data_self_heal) == -1)
+ goto out;
GF_OPTION_RECONF("entry-self-heal", priv->entry_self_heal, options, bool,
out);
@@ -289,6 +311,10 @@ reconfigure(xlator_t *this, dict_t *options)
consistent_io = _gf_false;
priv->consistent_io = consistent_io;
+ afr_handle_anon_inode_options(priv, options);
+
+ GF_OPTION_RECONF("use-anonymous-inode", priv->use_anon_inode, options, bool,
+ out);
if (priv->shd.enabled) {
if ((priv->shd.enabled != enabled_old) ||
(timeout_old != priv->shd.timeout))
@@ -485,7 +511,8 @@ init(xlator_t *this)
GF_OPTION_INIT("heal-wait-queue-length", priv->heal_wait_qlen, uint32, out);
GF_OPTION_INIT("data-self-heal", data_self_heal, str, out);
- gf_string2boolean(data_self_heal, &priv->data_self_heal);
+ if (gf_string2boolean(data_self_heal, &priv->data_self_heal) == -1)
+ goto out;
GF_OPTION_INIT("data-self-heal-algorithm", data_self_heal_algorithm, str,
out);
@@ -539,7 +566,9 @@ init(xlator_t *this)
GF_OPTION_INIT("consistent-metadata", priv->consistent_metadata, bool, out);
GF_OPTION_INIT("consistent-io", priv->consistent_io, bool, out);
+ afr_handle_anon_inode_options(priv, this->options);
+ GF_OPTION_INIT("use-anonymous-inode", priv->use_anon_inode, bool, out);
if (priv->quorum_count != 0)
priv->consistent_io = _gf_false;
@@ -551,13 +580,19 @@ init(xlator_t *this)
goto out;
}
+ priv->anon_inode = GF_CALLOC(sizeof(unsigned char), child_count,
+ gf_afr_mt_char);
+
priv->child_up = GF_CALLOC(sizeof(unsigned char), child_count,
gf_afr_mt_char);
priv->child_latency = GF_MALLOC(sizeof(*priv->child_latency) * child_count,
gf_afr_mt_child_latency_t);
+ priv->halo_child_up = GF_CALLOC(sizeof(unsigned char), child_count,
+ gf_afr_mt_char);
- if (!priv->child_up || !priv->child_latency) {
+ if (!priv->child_up || !priv->child_latency || !priv->halo_child_up ||
+ !priv->anon_inode) {
ret = -ENOMEM;
goto out;
}
@@ -1282,6 +1317,14 @@ struct volume_options options[] = {
.tags = {"replicate"},
.description = "This option exists only for backward compatibility "
"and configuring it doesn't have any effect"},
+ {.key = {"use-anonymous-inode"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .op_version = {GD_OP_VERSION_8_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE,
+ .tags = {"replicate"},
+ .description = "Setting this option heals directory renames efficiently"},
+
{.key = {NULL}},
};
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 8845656..d62f9a9 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -42,6 +42,7 @@
#define AFR_LK_HEAL_DOM "afr.lock-heal.domain"
#define AFR_HALO_MAX_LATENCY 99999
+#define AFR_ANON_DIR_PREFIX ".glusterfs-anonymous-inode"
#define PFLAG_PENDING (1 << 0)
#define PFLAG_SBRAIN (1 << 1)
@@ -190,7 +191,9 @@ typedef struct _afr_private {
struct list_head ta_waitq;
struct list_head ta_onwireq;
+ unsigned char *anon_inode;
unsigned char *child_up;
+ unsigned char *halo_child_up;
int64_t *child_latency;
unsigned char *local;
@@ -274,10 +277,15 @@ typedef struct _afr_private {
gf_boolean_t esh_granular;
gf_boolean_t consistent_io;
gf_boolean_t data_self_heal; /* on/off */
+ gf_boolean_t use_anon_inode;
/*For lock healing.*/
struct list_head saved_locks;
struct list_head lk_healq;
+
+ /*For anon-inode handling */
+ char anon_inode_name[NAME_MAX + 1];
+ char anon_gfid_str[UUID_SIZE + 1];
} afr_private_t;
typedef enum {
@@ -900,7 +908,7 @@ typedef struct _afr_local {
gf_boolean_t uninherit_done;
gf_boolean_t uninherit_value;
- /* post-op hook */
+ gf_boolean_t disable_delayed_post_op;
} transaction;
syncbarrier_t barrier;
@@ -996,7 +1004,10 @@ afr_inode_read_subvol_set(inode_t *inode, xlator_t *this,
int event_generation);
int
-afr_inode_event_gen_reset(inode_t *inode, xlator_t *this);
+__afr_inode_need_refresh_set(inode_t *inode, xlator_t *this);
+
+int
+afr_inode_need_refresh_set(inode_t *inode, xlator_t *this);
int
afr_read_subvol_select_by_policy(inode_t *inode, xlator_t *this,
@@ -1267,8 +1278,8 @@ int
afr_inode_split_brain_choice_set(inode_t *inode, xlator_t *this,
int spb_choice);
int
-afr_inode_split_brain_choice_get(inode_t *inode, xlator_t *this,
- int *spb_choice);
+afr_split_brain_read_subvol_get(inode_t *inode, xlator_t *this,
+ call_frame_t *frame, int *spb_subvol);
int
afr_get_child_index_from_name(xlator_t *this, char *name);
@@ -1353,7 +1364,7 @@ int
afr_set_inode_local(xlator_t *this, afr_local_t *local, inode_t *inode);
int
-afr_fill_ta_loc(xlator_t *this, loc_t *loc);
+afr_fill_ta_loc(xlator_t *this, loc_t *loc, gf_boolean_t is_gfid_based_fop);
int
afr_ta_post_op_lock(xlator_t *this, loc_t *loc);
@@ -1401,4 +1412,12 @@ afr_is_lock_mode_mandatory(dict_t *xdata);
void
afr_dom_lock_release(call_frame_t *frame);
+
+void
+afr_fill_success_replies(afr_local_t *local, afr_private_t *priv,
+ unsigned char *replies);
+
+gf_boolean_t
+afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name,
+ pid_t pid);
#endif /* __AFR_H__ */
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index b762f1a..8ba0cc4 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -17,6 +17,7 @@
#include <glusterfs/quota-common-utils.h>
#include <glusterfs/upcall-utils.h>
#include "glusterfs/compat-errno.h" // for ENODATA on BSD
+#include <glusterfs/common-utils.h>
#include <sys/time.h>
#include <libgen.h>
@@ -43,15 +44,6 @@ dht_common_mark_mdsxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
static int
dht_rmdir_unlock(call_frame_t *frame, xlator_t *this);
-char *xattrs_to_heal[] = {"user.",
- POSIX_ACL_ACCESS_XATTR,
- POSIX_ACL_DEFAULT_XATTR,
- QUOTA_LIMIT_KEY,
- QUOTA_LIMIT_OBJECTS_KEY,
- GF_SELINUX_XATTR_KEY,
- GF_XATTR_MDATA_KEY,
- NULL};
-
static const char *dht_dbg_vxattrs[] = {DHT_DBG_HASHED_SUBVOL_PATTERN, NULL};
/* Check the xdata to make sure EBADF has been set by client xlator */
@@ -84,6 +76,8 @@ dht_set_fixed_dir_stat(struct iatt *stat)
static gf_boolean_t
dht_match_xattr(const char *key)
{
+ char **xattrs_to_heal = get_xattrs_to_heal();
+
return gf_get_index_by_elem(xattrs_to_heal, (char *)key) >= 0;
}
@@ -388,7 +382,7 @@ out:
/* Code to save hashed subvol on inode ctx as a mds subvol
*/
-static int
+int
dht_inode_ctx_mdsvol_set(inode_t *inode, xlator_t *this, xlator_t *mds_subvol)
{
dht_inode_ctx_t *ctx = NULL;
@@ -619,13 +613,14 @@ dht_discover_complete(xlator_t *this, call_frame_t *discover_frame)
if (local->need_xattr_heal && !heal_path) {
local->need_xattr_heal = 0;
- ret = dht_dir_xattr_heal(this, local);
- if (ret)
- gf_msg(this->name, GF_LOG_ERROR, ret,
+ ret = dht_dir_xattr_heal(this, local, &op_errno);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
DHT_MSG_DIR_XATTR_HEAL_FAILED,
"xattr heal failed for "
"directory gfid is %s ",
gfid_local);
+ }
}
}
@@ -695,6 +690,7 @@ dht_common_mark_mdsxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int ret = -1;
dht_conf_t *conf = 0;
dht_layout_t *layout = NULL;
+ int32_t mds_heal_fresh_lookup = 0;
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
@@ -702,6 +698,7 @@ dht_common_mark_mdsxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
conf = this->private;
layout = local->selfheal.layout;
+ mds_heal_fresh_lookup = local->mds_heal_fresh_lookup;
if (op_ret) {
gf_msg_debug(this->name, op_ret,
@@ -722,7 +719,7 @@ dht_common_mark_mdsxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
layout);
}
out:
- if (local && local->mds_heal_fresh_lookup)
+ if (mds_heal_fresh_lookup)
DHT_STACK_DESTROY(frame);
return 0;
}
@@ -1256,7 +1253,7 @@ err:
to non hashed subvol
*/
int
-dht_dir_xattr_heal(xlator_t *this, dht_local_t *local)
+dht_dir_xattr_heal(xlator_t *this, dht_local_t *local, int *op_errno)
{
dht_local_t *copy_local = NULL;
call_frame_t *copy = NULL;
@@ -1268,6 +1265,7 @@ dht_dir_xattr_heal(xlator_t *this, dht_local_t *local)
"No gfid exists for path %s "
"so healing xattr is not possible",
local->loc.path);
+ *op_errno = EIO;
goto out;
}
@@ -1281,6 +1279,7 @@ dht_dir_xattr_heal(xlator_t *this, dht_local_t *local)
"Memory allocation failed "
"for path %s gfid %s ",
local->loc.path, gfid_local);
+ *op_errno = ENOMEM;
DHT_STACK_DESTROY(copy);
} else {
copy_local->stbuf = local->stbuf;
@@ -1295,6 +1294,7 @@ dht_dir_xattr_heal(xlator_t *this, dht_local_t *local)
"Synctask creation failed to heal xattr "
"for path %s gfid %s ",
local->loc.path, gfid_local);
+ *op_errno = ENOMEM;
DHT_STACK_DESTROY(copy);
}
}
@@ -1435,15 +1435,31 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
dht_aggregate_xattr(local->xattr, xattr);
}
+ if (__is_root_gfid(stbuf->ia_gfid)) {
+ ret = dht_dir_has_layout(xattr, conf->xattr_name);
+ if (ret >= 0) {
+ if (is_greater_time(local->prebuf.ia_ctime,
+ local->prebuf.ia_ctime_nsec,
+ stbuf->ia_ctime, stbuf->ia_ctime_nsec)) {
+ /* Choose source */
+ local->prebuf.ia_gid = stbuf->ia_gid;
+ local->prebuf.ia_uid = stbuf->ia_uid;
+
+ local->prebuf.ia_ctime = stbuf->ia_ctime;
+ local->prebuf.ia_ctime_nsec = stbuf->ia_ctime_nsec;
+ local->prebuf.ia_prot = stbuf->ia_prot;
+ }
+ }
+ }
+
if (local->stbuf.ia_type != IA_INVAL) {
/* This is not the first subvol to respond
* Compare values to see if attrs need to be healed
*/
- if (!__is_root_gfid(stbuf->ia_gfid) &&
- ((local->stbuf.ia_gid != stbuf->ia_gid) ||
- (local->stbuf.ia_uid != stbuf->ia_uid) ||
- (is_permission_different(&local->stbuf.ia_prot,
- &stbuf->ia_prot)))) {
+ if ((local->stbuf.ia_gid != stbuf->ia_gid) ||
+ (local->stbuf.ia_uid != stbuf->ia_uid) ||
+ (is_permission_different(&local->stbuf.ia_prot,
+ &stbuf->ia_prot))) {
local->need_attrheal = 1;
}
}
@@ -1635,7 +1651,7 @@ dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
uint32_t vol_commit_hash = 0;
xlator_t *subvol = NULL;
int32_t check_mds = 0;
- int errst = 0;
+ int errst = 0, i = 0;
int32_t mds_xattr_val[1] = {0};
GF_VALIDATE_OR_GOTO("dht", frame, err);
@@ -1702,6 +1718,14 @@ dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
local->need_lookup_everywhere = 1;
} else if (IA_ISDIR(local->loc.inode->ia_type)) {
+ layout = local->layout;
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].xlator == prev) {
+ layout->list[i].err = op_errno;
+ break;
+ }
+ }
+
local->need_selfheal = 1;
}
}
@@ -2137,31 +2161,18 @@ static int
dht_fill_dict_to_avoid_unlink_of_migrating_file(dict_t *dict)
{
int ret = 0;
- xlator_t *this = NULL;
- char *linktoskip_key = NULL;
- this = THIS;
- GF_VALIDATE_OR_GOTO("dht", this, err);
-
- if (dht_is_tier_xlator(this))
- linktoskip_key = TIER_SKIP_NON_LINKTO_UNLINK;
- else
- linktoskip_key = DHT_SKIP_NON_LINKTO_UNLINK;
-
- ret = dict_set_int32(dict, linktoskip_key, 1);
+ ret = dict_set_int32_sizen(dict, DHT_SKIP_NON_LINKTO_UNLINK, 1);
if (ret)
- goto err;
+ return -1;
- ret = dict_set_int32(dict, DHT_SKIP_OPEN_FD_UNLINK, 1);
+ ret = dict_set_int32_sizen(dict, DHT_SKIP_OPEN_FD_UNLINK, 1);
if (ret)
- goto err;
+ return -1;
return 0;
-
-err:
- return -1;
}
static int32_t
@@ -4290,6 +4301,8 @@ dht_find_local_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
index = conf->local_subvols_cnt;
uuid_list_copy = gf_strdup(uuid_list);
+ if (!uuid_list_copy)
+ goto unlock;
for (uuid_str = strtok_r(uuid_list, " ", &saveptr); uuid_str;
uuid_str = next_uuid_str) {
@@ -4580,18 +4593,8 @@ dht_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
dict_del(xattr, conf->xattr_name);
dict_del(xattr, conf->mds_xattr_key);
- /* filter out following two xattrs that need not
- * be visible on the mount point for geo-rep -
- * trusted.tier.fix.layout.complete and
- * trusted.tier.tier-dht.commithash
- */
-
dict_del(xattr, conf->commithash_xattr_name);
- if (frame->root->pid >= 0 && dht_is_tier_xlator(this)) {
- dict_del(xattr, GF_XATTR_TIER_LAYOUT_FIXED_KEY);
- }
-
if (frame->root->pid >= 0) {
GF_REMOVE_INTERNAL_XATTR("trusted.glusterfs.quota*", xattr);
GF_REMOVE_INTERNAL_XATTR("trusted.pgfid*", xattr);
@@ -4865,6 +4868,60 @@ out:
return 0;
}
+/* Virtual Xattr which returns 1 if all subvols are up,
+ else returns 0. Geo-rep then uses this virtual xattr
+ after a fresh mount and starts the I/O.
+*/
+
+enum dht_vxattr_subvol {
+ DHT_VXATTR_SUBVOLS_UP = 1,
+ DHT_VXATTR_SUBVOLS_DOWN = 0,
+};
+
+int
+dht_vgetxattr_subvol_status(call_frame_t *frame, xlator_t *this,
+ const char *key)
+{
+ dht_local_t *local = NULL;
+ int ret = -1;
+ int op_errno = ENODATA;
+ int value = DHT_VXATTR_SUBVOLS_UP;
+ int i = 0;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+ local = frame->local;
+
+ if (!key) {
+ op_errno = EINVAL;
+ goto out;
+ }
+ local->xattr = dict_new();
+ if (!local->xattr) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!conf->subvolume_status[i]) {
+ value = DHT_VXATTR_SUBVOLS_DOWN;
+ gf_msg_debug(this->name, 0, "subvol %s is down ",
+ conf->subvolumes[i]->name);
+ break;
+ }
+ }
+ ret = dict_set_int8(local->xattr, (char *)key, value);
+ if (ret < 0) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+
+out:
+ DHT_STACK_UNWIND(getxattr, frame, ret, op_errno, local->xattr, NULL);
+ return 0;
+}
+
int
dht_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key,
dict_t *xdata)
@@ -4922,6 +4979,11 @@ dht_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key,
goto err;
}
+ if (strncmp(key, DHT_SUBVOL_STATUS_KEY, SLEN(DHT_SUBVOL_STATUS_KEY)) == 0) {
+ dht_vgetxattr_subvol_status(frame, this, key);
+ return 0;
+ }
+
/* skip over code which is irrelevant if !DHT_IS_DIR(layout) */
if (!DHT_IS_DIR(layout))
goto no_dht_is_dir;
@@ -5371,11 +5433,13 @@ dht_dir_common_set_remove_xattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
int call_cnt = 0;
dht_local_t *local = NULL;
char gfid_local[GF_UUID_BUF_SIZE] = {0};
+ char **xattrs_to_heal;
conf = this->private;
local = frame->local;
call_cnt = conf->subvolume_cnt;
local->flags = flags;
+ xattrs_to_heal = get_xattrs_to_heal();
if (!gf_uuid_is_null(local->gfid)) {
gf_uuid_unparse(local->gfid, gfid_local);
@@ -5808,22 +5872,7 @@ dht_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr,
if (local->rebalance.target_node) {
local->flags = forced_rebalance;
- /* Flag to suggest its a tiering migration
- * The reason for this dic key-value is that
- * promotions and demotions are multithreaded
- * so the original frame from gf_defrag_start()
- * is not carried. A new frame will be created when
- * we do syncop_setxattr(). This does not have the
- * frame->root->pid of the original frame. So we pass
- * this dic key-value when we do syncop_setxattr() to do
- * data migration and set the frame->root->pid to
- * GF_CLIENT_PID_TIER_DEFRAG in dht_setxattr() just before
- * calling dht_start_rebalance_task() */
- tmp = dict_get(xattr, TIERING_MIGRATION_KEY);
- if (tmp)
- frame->root->pid = GF_CLIENT_PID_TIER_DEFRAG;
- else
- frame->root->pid = GF_CLIENT_PID_DEFRAG;
+ frame->root->pid = GF_CLIENT_PID_DEFRAG;
ret = dht_start_rebalance_task(this, frame);
if (!ret)
@@ -6635,10 +6684,9 @@ dht_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
layout = local->layout;
- /* We have seen crashes in while running "rm -rf" on tier volumes
- when the layout was NULL on the hot tier. This will skip the
- entries on the subvol without a layout, hence preventing the crash
- but rmdir might fail with "directory not empty" errors*/
+ /* This will skip the entries on the subvol without a layout,
+ * hence preventing the crash but rmdir might fail with
+ * "directory not empty" errors*/
if (layout == NULL)
goto done;
@@ -8282,6 +8330,11 @@ dht_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
xlator_t *prev = NULL;
int ret = -1;
dht_local_t *local = NULL;
+ gf_boolean_t parent_layout_changed = _gf_false;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ xlator_t *subvol = NULL;
+
+ local = frame->local;
local = frame->local;
if (!local) {
@@ -8290,8 +8343,69 @@ dht_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
goto out;
}
- if (op_ret == -1)
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ parent_layout_changed = (xdata &&
+ dict_get(xdata, GF_PREOP_CHECK_FAILED))
+ ? _gf_true
+ : _gf_false;
+
+ if (parent_layout_changed) {
+ if (local && local->lock[0].layout.parent_layout.locks) {
+ /* Returning failure as the layout could not be fixed even under
+ * the lock */
+ goto out;
+ }
+
+ gf_uuid_unparse(local->loc.parent->gfid, pgfid);
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "create (%s/%s) (path: %s): parent layout "
+ "changed. Attempting a layout refresh and then a "
+ "retry",
+ pgfid, local->loc.name, local->loc.path);
+
+ /*
+ dht_refresh_layout needs directory info in local->loc.Hence,
+ storing the parent_loc in local->loc and storing the create
+ context in local->loc2. We will restore this information in
+ dht_creation_do.
+ */
+
+ loc_wipe(&local->loc2);
+
+ ret = loc_copy(&local->loc2, &local->loc);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "loc_copy failed %s", local->loc.path);
+
+ goto out;
+ }
+
+ loc_wipe(&local->loc);
+
+ ret = dht_build_parent_loc(this, &local->loc, &local->loc2,
+ &op_errno);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_LOC_FAILED,
+ "parent loc build failed");
+ goto out;
+ }
+
+ subvol = dht_subvol_get_hashed(this, &local->loc2);
+
+ ret = dht_create_lock(frame, subvol);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INODE_LK_ERROR,
+ "locking parent failed");
+ goto out;
+ }
+
+ return 0;
+ }
+
goto out;
+ }
prev = cookie;
@@ -8412,6 +8526,8 @@ dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this,
gf_msg_debug(this->name, 0, "creating %s on %s", loc->path,
subvol->name);
+ dht_set_parent_layout_in_dict(loc, this, local);
+
STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol,
subvol->fops->create, loc, flags, mode, umask, fd,
params);
@@ -8420,10 +8536,6 @@ dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this,
avail_subvol = dht_free_disk_available_subvol(this, subvol, local);
if (avail_subvol != subvol) {
- local->params = dict_ref(params);
- local->flags = flags;
- local->mode = mode;
- local->umask = umask;
local->cached_subvol = avail_subvol;
local->hashed_subvol = subvol;
@@ -8439,6 +8551,8 @@ dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this,
gf_msg_debug(this->name, 0, "creating %s on %s", loc->path,
subvol->name);
+ dht_set_parent_layout_in_dict(loc, this, local);
+
STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol,
subvol->fops->create, loc, flags, mode, umask, fd,
params);
@@ -8654,7 +8768,7 @@ err:
return 0;
}
-static int32_t
+int32_t
dht_create_lock(call_frame_t *frame, xlator_t *subvol)
{
dht_local_t *local = NULL;
@@ -8700,6 +8814,60 @@ err:
}
int
+dht_set_parent_layout_in_dict(loc_t *loc, xlator_t *this, dht_local_t *local)
+{
+ dht_conf_t *conf = this->private;
+ dht_layout_t *parent_layout = NULL;
+ int *parent_disk_layout = NULL;
+ xlator_t *hashed_subvol = NULL;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ int ret = 0;
+
+ gf_uuid_unparse(loc->parent->gfid, pgfid);
+
+ parent_layout = dht_layout_get(this, loc->parent);
+ hashed_subvol = dht_subvol_get_hashed(this, loc);
+
+ ret = dht_disk_layout_extract_for_subvol(this, parent_layout, hashed_subvol,
+ &parent_disk_layout);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "extracting in-memory layout of parent failed. ",
+ gf_fop_list[local->fop], pgfid, loc->name, loc->path);
+ goto err;
+ }
+
+ ret = dict_set_str_sizen(local->params, GF_PREOP_PARENT_KEY,
+ conf->xattr_name);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "setting %s key in params dictionary failed. ",
+ gf_fop_list[local->fop], pgfid, loc->name, loc->path,
+ GF_PREOP_PARENT_KEY);
+ goto err;
+ }
+
+ ret = dict_set_bin(local->params, conf->xattr_name, parent_disk_layout,
+ 4 * 4);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "setting parent-layout in params dictionary failed. ",
+ gf_fop_list[local->fop], pgfid, loc->name, loc->path);
+ goto err;
+ }
+
+err:
+ dht_layout_unref(this, parent_layout);
+ return ret;
+}
+
+int
dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
mode_t mode, mode_t umask, fd_t *fd, dict_t *params)
{
@@ -8725,6 +8893,11 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
goto err;
}
+ local->params = dict_ref(params);
+ local->flags = flags;
+ local->mode = mode;
+ local->umask = umask;
+
if (dht_filter_loc_subvol_key(this, loc, &local->loc, &subvol)) {
gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO,
"creating %s on %s (got create on %s)", local->loc.path,
@@ -8740,10 +8913,6 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
if (hashed_subvol && (hashed_subvol != subvol)) {
/* Create the linkto file and then the data file */
- local->params = dict_ref(params);
- local->flags = flags;
- local->mode = mode;
- local->umask = umask;
local->cached_subvol = subvol;
local->hashed_subvol = hashed_subvol;
@@ -8756,6 +8925,9 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
* file as we expect a lookup everywhere if there are problems
* with the parent layout
*/
+
+ dht_set_parent_layout_in_dict(loc, this, local);
+
STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol,
subvol->fops->create, &local->loc, flags, mode, umask,
fd, params);
@@ -8807,11 +8979,6 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
goto err;
}
- local->params = dict_ref(params);
- local->flags = flags;
- local->mode = mode;
- local->umask = umask;
-
loc_wipe(&local->loc);
ret = dht_build_parent_loc(this, &local->loc, loc, &op_errno);
@@ -10646,23 +10813,17 @@ dht_notify(xlator_t *this, int event, void *data, ...)
int had_heard_from_all = 0;
int have_heard_from_all = 0;
- struct timeval time = {
- 0,
- };
gf_defrag_info_t *defrag = NULL;
dict_t *dict = NULL;
gf_defrag_type cmd = 0;
dict_t *output = NULL;
va_list ap;
- dht_methods_t *methods = NULL;
struct gf_upcall *up_data = NULL;
struct gf_upcall_cache_invalidation *up_ci = NULL;
conf = this->private;
GF_VALIDATE_OR_GOTO(this->name, conf, out);
- methods = &(conf->methods);
-
/* had all subvolumes reported status once till now? */
had_heard_from_all = 1;
for (i = 0; i < conf->subvolume_cnt; i++) {
@@ -10692,12 +10853,11 @@ dht_notify(xlator_t *this, int event, void *data, ...)
break;
}
- gettimeofday(&time, NULL);
LOCK(&conf->subvolume_lock);
{
conf->subvolume_status[cnt] = 1;
conf->last_event[cnt] = event;
- conf->subvol_up_time[cnt] = time.tv_sec;
+ conf->subvol_up_time[cnt] = gf_time();
}
UNLOCK(&conf->subvolume_lock);
@@ -10805,21 +10965,13 @@ dht_notify(xlator_t *this, int event, void *data, ...)
if (defrag->is_exiting)
goto unlock;
if ((cmd == GF_DEFRAG_CMD_STATUS) ||
- (cmd == GF_DEFRAG_CMD_STATUS_TIER) ||
(cmd == GF_DEFRAG_CMD_DETACH_STATUS))
gf_defrag_status_get(conf, output);
- else if (cmd == GF_DEFRAG_CMD_START_DETACH_TIER)
- gf_defrag_start_detach_tier(defrag);
else if (cmd == GF_DEFRAG_CMD_DETACH_START)
defrag->cmd = GF_DEFRAG_CMD_DETACH_START;
else if (cmd == GF_DEFRAG_CMD_STOP ||
- cmd == GF_DEFRAG_CMD_STOP_DETACH_TIER ||
cmd == GF_DEFRAG_CMD_DETACH_STOP)
gf_defrag_stop(conf, GF_DEFRAG_STATUS_STOPPED, output);
- else if (cmd == GF_DEFRAG_CMD_PAUSE_TIER)
- ret = gf_defrag_pause_tier(this, defrag);
- else if (cmd == GF_DEFRAG_CMD_RESUME_TIER)
- ret = gf_defrag_resume_tier(this, defrag);
}
unlock:
UNLOCK(&defrag->lock);
@@ -10894,15 +11046,13 @@ dht_notify(xlator_t *this, int event, void *data, ...)
* thread has already started.
*/
if (conf->defrag && !run_defrag) {
- if (methods->migration_needed(this)) {
- run_defrag = 1;
- ret = gf_thread_create(&conf->defrag->th, NULL, gf_defrag_start,
- this, "dhtdg");
- if (ret) {
- GF_FREE(conf->defrag);
- conf->defrag = NULL;
- kill(getpid(), SIGTERM);
- }
+ run_defrag = 1;
+ ret = gf_thread_create(&conf->defrag->th, NULL, gf_defrag_start,
+ this, "dhtdg");
+ if (ret) {
+ GF_FREE(conf->defrag);
+ conf->defrag = NULL;
+ kill(getpid(), SIGTERM);
}
}
}
@@ -11047,28 +11197,6 @@ out:
return ret;
}
-int32_t
-dht_migration_needed(xlator_t *this)
-{
- gf_defrag_info_t *defrag = NULL;
- dht_conf_t *conf = NULL;
- int ret = 0;
-
- conf = this->private;
-
- GF_VALIDATE_OR_GOTO("dht", conf, out);
- GF_VALIDATE_OR_GOTO("dht", conf->defrag, out);
-
- defrag = conf->defrag;
-
- if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) &&
- (defrag->cmd != GF_DEFRAG_CMD_START_DETACH_TIER))
- ret = 1;
-
-out:
- return ret;
-}
-
/*
This function should not be called more then once during a FOP
handling path. It is valid only for for ops on files
@@ -11103,14 +11231,6 @@ dht_set_local_rebalance(xlator_t *this, dht_local_t *local, struct iatt *stbuf,
return 0;
}
-gf_boolean_t
-dht_is_tier_xlator(xlator_t *this)
-{
- if (strcmp(this->type, "cluster/tier") == 0)
- return _gf_true;
- return _gf_false;
-}
-
int32_t
dht_release(xlator_t *this, fd_t *fd)
{
@@ -11250,3 +11370,22 @@ dht_pt_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key,
FIRST_CHILD(this)->fops->fgetxattr, fd, key, xdata);
return 0;
}
+
+/* The job of this function is to check if all the xlators have updated
+ * error in the layout. */
+int
+dht_dir_layout_error_check(xlator_t *this, inode_t *inode)
+{
+ dht_layout_t *layout = NULL;
+ int i = 0;
+
+ layout = dht_layout_get(this, inode);
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err == 0) {
+ return 0;
+ }
+ }
+
+ /* Returning the first xlator error as all xlators have errors */
+ return layout->list[0].err;
+}
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index 288bdf7..fe0dc3d 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -24,7 +24,6 @@
#define _DHT_H
#define GF_XATTR_FIX_LAYOUT_KEY "distribute.fix.layout"
-#define GF_XATTR_TIER_LAYOUT_FIXED_KEY "trusted.tier.fix.layout.complete"
#define GF_XATTR_FILE_MIGRATE_KEY "trusted.distribute.migrate-data"
#define DHT_MDS_STR "mds"
#define GF_DHT_LOOKUP_UNHASHED_OFF 0
@@ -36,22 +35,21 @@
#define DHT_LAYOUT_HEAL_DOMAIN "dht.layout.heal"
/* Namespace synchronization */
#define DHT_ENTRY_SYNC_DOMAIN "dht.entry.sync"
-#define TIERING_MIGRATION_KEY "tiering.migration"
#define DHT_LAYOUT_HASH_INVALID 1
#define MAX_REBAL_THREADS sysconf(_SC_NPROCESSORS_ONLN)
#define DHT_DIR_STAT_BLOCKS 8
#define DHT_DIR_STAT_SIZE 4096
+/* Virtual xattr for subvols status */
+
+#define DHT_SUBVOL_STATUS_KEY "dht.subvol.status"
+
/* Virtual xattrs for debugging */
#define DHT_DBG_HASHED_SUBVOL_PATTERN "dht.file.hashed-subvol.*"
#define DHT_DBG_HASHED_SUBVOL_KEY "dht.file.hashed-subvol."
-/* Array to hold custom xattr keys
- */
-extern char *xattrs_to_heal[];
-
/* Rebalance nodeuuid flags */
#define REBAL_NODEUUID_MINE 0x01
@@ -242,19 +240,6 @@ typedef gf_boolean_t (*dht_need_heal_t)(call_frame_t *frame,
dht_layout_t **inmem,
dht_layout_t **ondisk);
-typedef struct {
- uint64_t blocks_used;
- uint64_t pblocks_used;
- uint64_t files_used;
- uint64_t pfiles_used;
- uint64_t unhashed_blocks_used;
- uint64_t unhashed_pblocks_used;
- uint64_t unhashed_files_used;
- uint64_t unhashed_pfiles_used;
- uint64_t unhashed_fsid;
- uint64_t hashed_fsid;
-} tier_statvfs_t;
-
struct dht_local {
loc_t loc;
loc_t loc2;
@@ -272,7 +257,6 @@ struct dht_local {
struct iatt preparent;
struct iatt postparent;
struct statvfs statvfs;
- tier_statvfs_t tier_statvfs;
fd_t *fd;
inode_t *inode;
dict_t *params;
@@ -405,14 +389,7 @@ enum gf_defrag_type {
GF_DEFRAG_CMD_STATUS = 1 + 2,
GF_DEFRAG_CMD_START_LAYOUT_FIX = 1 + 3,
GF_DEFRAG_CMD_START_FORCE = 1 + 4,
- GF_DEFRAG_CMD_START_TIER = 1 + 5,
- GF_DEFRAG_CMD_STATUS_TIER = 1 + 6,
- GF_DEFRAG_CMD_START_DETACH_TIER = 1 + 7,
- GF_DEFRAG_CMD_STOP_DETACH_TIER = 1 + 8,
- GF_DEFRAG_CMD_PAUSE_TIER = 1 + 9,
- GF_DEFRAG_CMD_RESUME_TIER = 1 + 10,
GF_DEFRAG_CMD_DETACH_STATUS = 1 + 11,
- GF_DEFRAG_CMD_STOP_TIER = 1 + 12,
GF_DEFRAG_CMD_DETACH_START = 1 + 13,
GF_DEFRAG_CMD_DETACH_COMMIT = 1 + 14,
GF_DEFRAG_CMD_DETACH_COMMIT_FORCE = 1 + 15,
@@ -463,75 +440,6 @@ struct dht_container {
int local_subvol_index;
};
-typedef enum tier_mode_ {
- TIER_MODE_NONE = 0,
- TIER_MODE_TEST,
- TIER_MODE_WM
-} tier_mode_t;
-
-typedef enum tier_pause_state_ {
- TIER_RUNNING = 0,
- TIER_REQUEST_PAUSE,
- TIER_PAUSED
-} tier_pause_state_t;
-
-/* This Structure is only used in tiering fixlayout */
-typedef struct gf_tier_fix_layout_arg {
- xlator_t *this;
- dict_t *fix_layout;
- pthread_t thread_id;
-} gf_tier_fix_layout_arg_t;
-
-typedef struct gf_tier_conf {
- int is_tier;
- int watermark_hi;
- int watermark_low;
- int watermark_last;
- unsigned long block_size;
- fsblkcnt_t blocks_total;
- fsblkcnt_t blocks_used;
- uint64_t max_migrate_bytes;
- int max_migrate_files;
- int query_limit;
- tier_mode_t mode;
- int percent_full;
- /* These flags are only used for tier-compact */
- gf_boolean_t compact_active;
- /* These 3 flags are set to true when the client changes the */
- /* compaction mode on the command line. */
- /* When they are set, the daemon will trigger compaction as */
- /* soon as possible to activate or deactivate compaction. */
- /* If in the middle of a compaction, then the switches take */
- /* effect on the next compaction, not the current one. */
- /* If the user switches it off, we want to avoid needless */
- /* compactions. */
- /* If the user switches it on, they want to compact as soon */
- /* as possible. */
- gf_boolean_t compact_mode_switched;
- gf_boolean_t compact_mode_switched_hot;
- gf_boolean_t compact_mode_switched_cold;
- int tier_max_promote_size;
- int tier_promote_frequency;
- int tier_demote_frequency;
- int tier_compact_hot_frequency;
- int tier_compact_cold_frequency;
- uint64_t st_last_promoted_size;
- uint64_t st_last_demoted_size;
- struct synctask *pause_synctask;
- gf_timer_t *pause_timer;
- pthread_mutex_t pause_mutex;
- int promote_in_progress;
- int demote_in_progress;
- /* This Structure is only used in tiering fixlayout */
- gf_tier_fix_layout_arg_t tier_fix_layout_arg;
- /* Indicates the index of the first queryfile picked
- * in the last cycle of promote or demote */
- int32_t last_promote_qfile_index;
- int32_t last_demote_qfile_index;
- tier_pause_state_t pause_state;
- char volname[GD_VOLUME_NAME_MAX + 1];
-} gf_tier_conf_t;
-
typedef struct nodeuuid_info {
char info; /* Set to 1 is this is my node's uuid*/
uuid_t uuid; /* Store the nodeuuid as well for debugging*/
@@ -559,17 +467,10 @@ struct gf_defrag_info_ {
int cmd;
inode_t *root_inode;
uuid_t node_uuid;
- struct timeval start_time;
+ time_t start_time;
uint32_t new_commit_hash;
gf_defrag_status_t defrag_status;
gf_defrag_pattern_list_t *defrag_pattern;
- gf_tier_conf_t tier_conf;
-
- /*Data Tiering params for scanner*/
- uint64_t total_files_promoted;
- uint64_t total_files_demoted;
- int write_freq_threshold;
- int read_freq_threshold;
pthread_cond_t parallel_migration_cond;
pthread_mutex_t dfq_mutex;
@@ -605,7 +506,6 @@ typedef struct gf_defrag_info_ gf_defrag_info_t;
struct dht_methods_s {
int32_t (*migration_get_dst_subvol)(xlator_t *this, dht_local_t *local);
int32_t (*migration_other)(xlator_t *this, gf_defrag_info_t *defrag);
- int32_t (*migration_needed)(xlator_t *this);
xlator_t *(*layout_search)(xlator_t *this, dht_layout_t *layout,
const char *name);
};
@@ -626,7 +526,7 @@ struct dht_conf {
int subvolume_cnt;
int32_t refresh_interval;
gf_lock_t subvolume_lock;
- struct timeval last_stat_fetch;
+ time_t last_stat_fetch;
gf_lock_t layout_lock;
dict_t *leaf_to_subvol;
void *private; /* Can be used by wrapper xlators over
@@ -748,6 +648,8 @@ struct dir_dfmeta {
struct list_head **head;
struct list_head **iterator;
int *fetch_entries;
+ /* fds corresponding to local subvols only */
+ fd_t **lfd;
};
typedef struct dht_migrate_info {
@@ -879,7 +781,6 @@ dht_layout_anomalies(xlator_t *this, loc_t *loc, dht_layout_t *layout,
int
dht_layout_dir_mismatch(xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
loc_t *loc, dict_t *xattr);
-
xlator_t *
dht_linkfile_subvol(xlator_t *this, inode_t *inode, struct iatt *buf,
dict_t *xattr);
@@ -897,9 +798,6 @@ int
dht_disk_layout_extract(xlator_t *this, dht_layout_t *layout, int pos,
int32_t **disk_layout_p);
int
-dht_disk_layout_merge(xlator_t *this, dht_layout_t *layout, int pos,
- void *disk_layout_raw, int disk_layout_len);
-int
dht_disk_layout_extract_for_subvol(xlator_t *this, dht_layout_t *layout,
xlator_t *subvol, int32_t **disk_layout_p);
@@ -945,7 +843,7 @@ dht_selfheal_new_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
int
dht_selfheal_restore(call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
loc_t *loc, dht_layout_t *layout);
-int
+void
dht_layout_sort_volname(dht_layout_t *layout);
int
@@ -962,14 +860,14 @@ dht_get_du_info_for_subvol(xlator_t *this, int subvol_idx);
int
dht_layout_preset(xlator_t *this, xlator_t *subvol, inode_t *inode);
int
-dht_layout_index_for_subvol(dht_layout_t *layout, xlator_t *subvol);
-int
dht_layout_set(xlator_t *this, inode_t *inode, dht_layout_t *layout);
;
void
dht_layout_unref(xlator_t *this, dht_layout_t *layout);
dht_layout_t *
dht_layout_ref(xlator_t *this, dht_layout_t *layout);
+int
+dht_layout_index_for_subvol(dht_layout_t *layout, xlator_t *subvol);
xlator_t *
dht_first_up_subvol(xlator_t *this);
xlator_t *
@@ -1236,30 +1134,8 @@ dht_common_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *dict,
dict_t *xdata);
int
-dht_fxattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata);
-
-int
gf_defrag_status_get(dht_conf_t *conf, dict_t *dict);
-void
-gf_defrag_set_pause_state(gf_tier_conf_t *tier_conf, tier_pause_state_t state);
-
-tier_pause_state_t
-gf_defrag_get_pause_state(gf_tier_conf_t *tier_conf);
-
-int
-gf_defrag_pause_tier(xlator_t *this, gf_defrag_info_t *defrag);
-
-tier_pause_state_t
-gf_defrag_check_pause_tier(gf_tier_conf_t *defrag);
-
-int
-gf_defrag_resume_tier(xlator_t *this, gf_defrag_info_t *defrag);
-
-int
-gf_defrag_start_detach_tier(gf_defrag_info_t *defrag);
-
int
gf_defrag_stop(dht_conf_t *conf, gf_defrag_status_t status, dict_t *output);
@@ -1291,10 +1167,6 @@ int
dht_dir_attr_heal(void *data);
int
dht_dir_attr_heal_done(int ret, call_frame_t *sync_frame, void *data);
-int
-dht_dir_has_layout(dict_t *xattr, char *name);
-gf_boolean_t
-dht_is_subvol_in_layout(dht_layout_t *layout, xlator_t *xlator);
xlator_t *
dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol,
xlator_t *ignore, dht_layout_t *layout,
@@ -1303,15 +1175,18 @@ xlator_t *
dht_subvol_maxspace_nonzeroinode(xlator_t *this, xlator_t *subvol,
dht_layout_t *layout);
int
+dht_dir_has_layout(dict_t *xattr, char *name);
+int
dht_linkfile_attr_heal(call_frame_t *frame, xlator_t *this);
-void
-dht_layout_dump(dht_layout_t *layout, const char *prefix);
int32_t
dht_priv_dump(xlator_t *this);
int32_t
dht_inodectx_dump(xlator_t *this, inode_t *inode);
+gf_boolean_t
+dht_is_subvol_in_layout(dht_layout_t *layout, xlator_t *xlator);
+
int
dht_inode_ctx_get_mig_info(xlator_t *this, inode_t *inode,
xlator_t **src_subvol, xlator_t **dst_subvol);
@@ -1341,9 +1216,6 @@ dht_layout_missing_dirs(dht_layout_t *layout);
int
dht_refresh_layout(call_frame_t *frame);
-gf_boolean_t
-dht_is_tier_xlator(xlator_t *this);
-
int
dht_build_parent_loc(xlator_t *this, loc_t *parent, loc_t *child,
int32_t *op_errno);
@@ -1456,7 +1328,7 @@ dht_dir_set_heal_xattr(xlator_t *this, dht_local_t *local, dict_t *dst,
dict_t *src, int *uret, int *uflag);
int
-dht_dir_xattr_heal(xlator_t *this, dht_local_t *local);
+dht_dir_xattr_heal(xlator_t *this, dht_local_t *local, int *op_errno);
int
dht_common_mark_mdsxattr(call_frame_t *frame, int *errst, int flag);
@@ -1498,4 +1370,15 @@ dht_common_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *dict,
dict_t *xdata);
+int32_t
+dht_create_lock(call_frame_t *frame, xlator_t *subvol);
+
+int
+dht_set_parent_layout_in_dict(loc_t *loc, xlator_t *this, dht_local_t *local);
+
+int
+dht_dir_layout_error_check(xlator_t *this, inode_t *inode);
+
+int
+dht_inode_ctx_mdsvol_set(inode_t *inode, xlator_t *this, xlator_t *mds_subvol);
#endif /* _DHT_H */
diff --git a/xlators/cluster/dht/src/dht-diskusage.c b/xlators/cluster/dht/src/dht-diskusage.c
index 27097ca..c058882 100644
--- a/xlators/cluster/dht/src/dht-diskusage.c
+++ b/xlators/cluster/dht/src/dht-diskusage.c
@@ -151,22 +151,18 @@ dht_get_du_info(call_frame_t *frame, xlator_t *this, loc_t *loc)
dht_conf_t *conf = NULL;
call_frame_t *statfs_frame = NULL;
dht_local_t *statfs_local = NULL;
- struct timeval tv = {
- 0,
- };
loc_t tmp_loc = {
0,
};
+ time_t now;
conf = this->private;
-
- gettimeofday(&tv, NULL);
-
+ now = gf_time();
/* make it root gfid, should be enough to get the proper
info back */
tmp_loc.gfid[15] = 1;
- if (tv.tv_sec > (conf->refresh_interval + conf->last_stat_fetch.tv_sec)) {
+ if (now > (conf->refresh_interval + conf->last_stat_fetch)) {
statfs_frame = copy_frame(frame);
if (!statfs_frame) {
goto err;
@@ -198,7 +194,7 @@ dht_get_du_info(call_frame_t *frame, xlator_t *this, loc_t *loc)
statfs_local->params);
}
- conf->last_stat_fetch.tv_sec = tv.tv_sec;
+ conf->last_stat_fetch = now;
}
return 0;
err:
diff --git a/xlators/cluster/dht/src/dht-hashfn.c b/xlators/cluster/dht/src/dht-hashfn.c
index 1dfe2a7..acda67c 100644
--- a/xlators/cluster/dht/src/dht-hashfn.c
+++ b/xlators/cluster/dht/src/dht-hashfn.c
@@ -78,6 +78,9 @@ dht_hash_compute(xlator_t *this, int type, const char *name, uint32_t *hash_p)
priv = this->private;
+ if (name == NULL)
+ return -1;
+
len = strlen(name) + 1;
rsync_friendly_name = alloca(len);
diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c
index 023d437..3f2fe43 100644
--- a/xlators/cluster/dht/src/dht-helper.c
+++ b/xlators/cluster/dht/src/dht-helper.c
@@ -64,8 +64,8 @@ __dht_fd_ctx_set(xlator_t *this, fd_t *fd, xlator_t *dst)
ret = __fd_ctx_set(fd, this, value);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_FD_CTX_SET_FAILED,
- "Failed to set fd ctx in fd=0x%p", fd);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_FD_CTX_SET_FAILED,
+ "fd=0x%p", fd, NULL);
GF_REF_PUT(fd_ctx);
}
out:
@@ -96,8 +96,8 @@ dht_fd_ctx_set(xlator_t *this, fd_t *fd, xlator_t *dst)
/* Overwrite and hope for the best*/
fd_ctx->opened_on_dst = (uint64_t)(uintptr_t)dst;
UNLOCK(&fd->lock);
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_INVALID_VALUE,
- "Different dst found in the fd ctx");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_INVALID_VALUE,
+ NULL);
goto out;
}
@@ -383,9 +383,9 @@ dht_check_and_open_fd_on_subvol_complete(int ret, call_frame_t *frame,
&local->rebalance.flock, local->xattr_req);
break;
default:
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_UNKNOWN_FOP,
- "Unknown FOP on fd (%p) on file %s @ %s", fd,
- uuid_utoa(fd->inode->gfid), subvol->name);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_UNKNOWN_FOP, "fd=%p",
+ fd, "gfid=%s", uuid_utoa(fd->inode->gfid), "name=%s",
+ subvol->name, NULL);
break;
}
@@ -458,9 +458,9 @@ handle_err:
break;
default:
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_UNKNOWN_FOP,
- "Unknown FOP on fd (%p) on file %s @ %s", fd,
- uuid_utoa(fd->inode->gfid), subvol->name);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_UNKNOWN_FOP, "fd=%p",
+ fd, "gfid=%s", uuid_utoa(fd->inode->gfid), "name=%s",
+ subvol->name, NULL);
break;
}
@@ -513,10 +513,9 @@ dht_check_and_open_fd_on_subvol_task(void *data)
fd, NULL, NULL);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_OPEN_FD_ON_DST_FAILED,
- "Failed to open the fd"
- " (%p, flags=0%o) on file %s @ %s",
- fd, fd->flags, uuid_utoa(fd->inode->gfid), subvol->name);
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_OPEN_FD_ON_DST_FAILED,
+ "fd=%p", fd, "flags=0%o", fd->flags, "gfid=%s",
+ uuid_utoa(fd->inode->gfid), "name=%s", subvol->name, NULL);
/* This can happen if the cached subvol was updated in the
* inode_ctx and the fd was opened on the new cached suvol
* after this fop was wound on the old cached subvol.
@@ -562,10 +561,8 @@ dht_check_and_open_fd_on_subvol(xlator_t *this, call_frame_t *frame)
dht_check_and_open_fd_on_subvol_complete, frame, frame);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, 0,
- "Failed to create synctask"
- " to check and open fd=%p",
- local->fd);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SYNCTASK_CREATE_FAILED,
+ "to-check-and-open fd=%p", local->fd, NULL);
}
return ret;
@@ -674,9 +671,7 @@ dht_get_subvol_from_id(xlator_t *this, int client_id)
ret = gf_asprintf(&sid, "%d", client_id);
if (ret == -1) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_ASPRINTF_FAILED,
- "asprintf failed while "
- "fetching subvol from the id");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_ASPRINTF_FAILED, NULL);
goto out;
}
@@ -1336,9 +1331,9 @@ dht_migration_complete_check_task(void *data)
* migrated by two different layers. Raise
* a warning here.
*/
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_HAS_MIGINFO,
- "%s: Found miginfo in the inode ctx",
- tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid));
+ gf_smsg(
+ this->name, GF_LOG_WARNING, 0, DHT_MSG_HAS_MIGINFO, "tmp=%s",
+ tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid), NULL);
miginfo = (void *)(uintptr_t)tmp_miginfo;
GF_REF_PUT(miginfo);
@@ -1359,10 +1354,9 @@ dht_migration_complete_check_task(void *data)
ret = syncop_lookup(this, &tmp_loc, &stbuf, 0, 0, 0);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_FILE_LOOKUP_FAILED,
- "%s: failed to lookup the file on %s",
- tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid),
- this->name);
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_FILE_LOOKUP_FAILED,
+ "tmp=%s", tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid),
+ "name=%s", this->name, NULL);
local->op_errno = -ret;
ret = -1;
goto out;
@@ -1370,18 +1364,15 @@ dht_migration_complete_check_task(void *data)
dst_node = dht_subvol_get_cached(this, tmp_loc.inode);
if (linkto_target && dst_node != linkto_target) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_INVALID_LINKFILE,
- "linkto target (%s) is "
- "different from cached-subvol (%s). Treating %s as "
- "destination subvol",
- linkto_target->name, dst_node->name, dst_node->name);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_INVALID_LINKFILE,
+ "linkto_target_name=%s", linkto_target->name, "dst_name=%s",
+ dst_node->name, NULL);
}
if (gf_uuid_compare(stbuf.ia_gfid, tmp_loc.inode->gfid)) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_GFID_MISMATCH,
- "%s: gfid different on the target file on %s",
- tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid),
- dst_node->name);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_GFID_MISMATCH, "tmp=%s",
+ tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid),
+ "dst_name=%s", dst_node->name, NULL);
ret = -1;
local->op_errno = EIO;
goto out;
@@ -1463,12 +1454,10 @@ dht_migration_complete_check_task(void *data)
(iter_fd->flags & ~(O_CREAT | O_EXCL | O_TRUNC)),
iter_fd, NULL, NULL);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, -ret,
- DHT_MSG_OPEN_FD_ON_DST_FAILED,
- "failed"
- " to open the fd"
- " (%p, flags=0%o) on file %s @ %s",
- iter_fd, iter_fd->flags, path, dst_node->name);
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_OPEN_FD_ON_DST_FAILED, "id=%p", iter_fd,
+ "flags=0%o", iter_fd->flags, "path=%s", path, "name=%s",
+ dst_node->name, NULL);
open_failed = 1;
local->op_errno = -ret;
@@ -1622,9 +1611,9 @@ dht_rebalance_inprogress_task(void *data)
* migrated by two different layers. Raise
* a warning here.
*/
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_HAS_MIGINFO,
- "%s: Found miginfo in the inode ctx",
- tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid));
+ gf_smsg(
+ this->name, GF_LOG_WARNING, 0, DHT_MSG_HAS_MIGINFO, "tmp=%s",
+ tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid), NULL);
miginfo = (void *)(uintptr_t)tmp_miginfo;
GF_REF_PUT(miginfo);
}
@@ -1633,17 +1622,16 @@ dht_rebalance_inprogress_task(void *data)
}
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_GET_XATTR_FAILED,
- "%s: failed to get the 'linkto' xattr", local->loc.path);
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_GET_XATTR_FAILED,
+ "path=%s", local->loc.path, NULL);
ret = -1;
goto out;
}
dst_node = dht_linkfile_subvol(this, NULL, NULL, dict);
if (!dst_node) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SUBVOL_NOT_FOUND,
- "%s: failed to get the 'linkto' xattr from dict",
- local->loc.path);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_GET_XATTR_FAILED,
+ "path=%s", local->loc.path, NULL);
ret = -1;
goto out;
}
@@ -1660,20 +1648,17 @@ dht_rebalance_inprogress_task(void *data)
/* lookup on dst */
ret = syncop_lookup(dst_node, &tmp_loc, &stbuf, NULL, NULL, NULL);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret,
- DHT_MSG_FILE_LOOKUP_ON_DST_FAILED,
- "%s: failed to lookup the file on %s",
- tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid),
- dst_node->name);
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_FILE_LOOKUP_FAILED,
+ "tmp=%s", tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid),
+ "name=%s", dst_node->name, NULL);
ret = -1;
goto out;
}
if (gf_uuid_compare(stbuf.ia_gfid, tmp_loc.inode->gfid)) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_GFID_MISMATCH,
- "%s: gfid different on the target file on %s",
- tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid),
- dst_node->name);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_GFID_MISMATCH, "tmp=%s",
+ tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid),
+ "name=%s", dst_node->name, NULL);
ret = -1;
goto out;
}
@@ -1741,11 +1726,10 @@ dht_rebalance_inprogress_task(void *data)
(iter_fd->flags & ~(O_CREAT | O_EXCL | O_TRUNC)),
iter_fd, NULL, NULL);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, -ret,
- DHT_MSG_OPEN_FD_ON_DST_FAILED,
- "failed to send open "
- "the fd (%p, flags=0%o) on file %s @ %s",
- iter_fd, iter_fd->flags, path, dst_node->name);
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_OPEN_FD_ON_DST_FAILED, "fd=%p", iter_fd,
+ "flags=0%o", iter_fd->flags, "path=%s", path, "name=%s",
+ dst_node->name, NULL);
ret = -1;
open_failed = 1;
} else {
@@ -1777,9 +1761,8 @@ unlock:
ret = dht_inode_ctx_set_mig_info(this, inode, src_node, dst_node);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED,
- "%s: failed to set inode-ctx target file at %s", local->loc.path,
- dst_node->name);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED,
+ "path=%s", local->loc.path, "name=%s", dst_node->name, NULL);
goto out;
}
@@ -2001,10 +1984,9 @@ dht_heal_path(xlator_t *this, char *path, inode_table_t *itable)
ret = syncop_lookup(this, &loc, &iatt, NULL, NULL, NULL);
if (ret) {
- gf_msg(this->name, GF_LOG_INFO, -ret, DHT_MSG_DIR_SELFHEAL_FAILED,
- "Healing of path %s failed on subvolume %s for "
- "directory %s",
- path, this->name, bname);
+ gf_smsg(this->name, GF_LOG_INFO, -ret, DHT_MSG_DIR_SELFHEAL_FAILED,
+ "path=%s", path, "subvolume=%s", this->name, "bname=%s",
+ bname, NULL);
goto out;
}
@@ -2062,10 +2044,8 @@ dht_heal_full_path(void *data)
ret = syncop_getxattr(source, &loc, &dict, GET_ANCESTRY_PATH_KEY, NULL,
NULL);
if (ret) {
- gf_msg(this->name, GF_LOG_INFO, -ret, DHT_MSG_DIR_SELFHEAL_FAILED,
- "Failed to get path from subvol %s. Aborting "
- "directory healing.",
- source->name);
+ gf_smsg(this->name, GF_LOG_INFO, -ret, DHT_MSG_DIR_HEAL_ABORT,
+ "subvol=%s", source->name, NULL);
goto out;
}
@@ -2103,6 +2083,7 @@ dht_heal_full_path_done(int op_ret, call_frame_t *heal_frame, void *data)
dht_local_t *local = NULL;
xlator_t *this = NULL;
int ret = -1;
+ int op_errno = 0;
local = heal_frame->local;
main_frame = local->main_frame;
@@ -2112,10 +2093,12 @@ dht_heal_full_path_done(int op_ret, call_frame_t *heal_frame, void *data)
dht_set_fixed_dir_stat(&local->postparent);
if (local->need_xattr_heal) {
local->need_xattr_heal = 0;
- ret = dht_dir_xattr_heal(this, local);
- if (ret)
- gf_msg(this->name, GF_LOG_ERROR, ret, DHT_MSG_DIR_XATTR_HEAL_FAILED,
- "xattr heal failed for directory %s ", local->loc.path);
+ ret = dht_dir_xattr_heal(this, local, &op_errno);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED, "path=%s", local->loc.path,
+ NULL);
+ }
}
DHT_STACK_UNWIND(lookup, main_frame, 0, 0, local->inode, &local->stbuf,
@@ -2203,8 +2186,8 @@ dht_get_lock_subvolume(xlator_t *this, struct gf_flock *lock,
if (ret) {
gf_uuid_unparse(inode->gfid, gfid);
UNLOCK(&inode->lock);
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_SET_INODE_CTX_FAILED,
- "Failed to set lock_subvol in inode ctx for gfid %s", gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_SET_INODE_CTX_FAILED,
+ "lock_subvol gfid=%s", gfid, NULL);
goto post_unlock;
}
subvol = cached_subvol;
@@ -2234,8 +2217,8 @@ dht_lk_inode_unref(call_frame_t *frame, int32_t op_ret)
inode = local->loc.inode ? local->loc.inode : local->fd->inode;
}
if (!inode) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LOCK_INODE_UNREF_FAILED,
- "Found a NULL inode. Failed to unref the inode");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LOCK_INODE_UNREF_FAILED,
+ NULL);
goto out;
}
@@ -2261,11 +2244,8 @@ dht_lk_inode_unref(call_frame_t *frame, int32_t op_ret)
inode_unref(inode);
} else {
gf_uuid_unparse(inode->gfid, gfid);
- gf_msg(this->name, GF_LOG_WARNING, 0,
- DHT_MSG_LOCK_INODE_UNREF_FAILED,
- "Unlock request failed for gfid %s."
- "Failed to unref the inode",
- gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LOCK_INODE_UNREF_FAILED, "gfid=%s", gfid, NULL);
goto out;
}
default:
@@ -2287,12 +2267,11 @@ dht_dir_set_heal_xattr(xlator_t *this, dht_local_t *local, dict_t *dst,
int luret = -1;
int luflag = -1;
int i = 0;
+ char **xattrs_to_heal;
if (!src || !dst) {
- gf_msg(this->name, GF_LOG_WARNING, EINVAL, DHT_MSG_DICT_SET_FAILED,
- "src or dst is NULL. Failed to set "
- " dictionary value for path %s",
- local->loc.path);
+ gf_smsg(this->name, GF_LOG_WARNING, EINVAL, DHT_MSG_DST_NULL_SET_FAILED,
+ "path=%s", local->loc.path, NULL);
return;
}
/* Check if any user xattr present in src dict and set
@@ -2303,17 +2282,18 @@ dht_dir_set_heal_xattr(xlator_t *this, dht_local_t *local, dict_t *dst,
and set it to dst dict, here index start from 1 because
user xattr already checked in previous statement
*/
+
+ xattrs_to_heal = get_xattrs_to_heal();
+
for (i = 1; xattrs_to_heal[i]; i++) {
keyval = dict_get(src, xattrs_to_heal[i]);
if (keyval) {
luflag = 1;
ret = dict_set(dst, xattrs_to_heal[i], keyval);
if (ret)
- gf_msg(this->name, GF_LOG_WARNING, ENOMEM,
- DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value:key = %s for "
- "path %s",
- xattrs_to_heal[i], local->loc.path);
+ gf_smsg(this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_DICT_SET_FAILED, "key=%s", xattrs_to_heal[i],
+ "path=%s", local->loc.path, NULL);
keyval = NULL;
}
}
diff --git a/xlators/cluster/dht/src/dht-inode-read.c b/xlators/cluster/dht/src/dht-inode-read.c
index 8007fc7..dbb8070 100644
--- a/xlators/cluster/dht/src/dht-inode-read.c
+++ b/xlators/cluster/dht/src/dht-inode-read.c
@@ -10,25 +10,25 @@
#include "dht-common.h"
-int
+static int
dht_access2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
-int
+static int
dht_readv2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
-int
+static int
dht_attr2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
-int
+static int
dht_open2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
-int
+static int
dht_flush2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
-int
+static int
dht_lk2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
-int
+static int
dht_fsync2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
-int
+static int
dht_common_xattrop2(xlator_t *this, xlator_t *subvol, call_frame_t *frame,
int ret);
-int
+static int
dht_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, fd_t *fd, dict_t *xdata)
{
@@ -67,7 +67,7 @@ out:
return 0;
}
-int
+static int
dht_open2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -216,7 +216,7 @@ err:
return 0;
}
-int
+static int
dht_attr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -258,7 +258,7 @@ out:
return 0;
}
-int
+static int
dht_attr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, struct iatt *stbuf, dict_t *xdata)
{
@@ -473,7 +473,7 @@ out:
return 0;
}
-int
+static int
dht_readv2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -558,7 +558,7 @@ err:
return 0;
}
-int
+static int
dht_access_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, dict_t *xdata)
{
@@ -606,7 +606,7 @@ out:
return 0;
}
-int
+static int
dht_access2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -735,7 +735,7 @@ out:
return 0;
}
-int
+static int
dht_flush2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -881,7 +881,7 @@ out:
return 0;
}
-int
+static int
dht_fsync2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -959,7 +959,7 @@ err:
/* TODO: for 'lk()' call, we need some other special error, may be ESTALE to
indicate that lock migration happened on the fd, so we can consider it as
phase 2 of migration */
-int
+static int
dht_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, struct gf_flock *flock, dict_t *xdata)
{
@@ -1006,7 +1006,7 @@ out:
return 0;
}
-int
+static int
dht_lk2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -1087,7 +1087,7 @@ err:
return 0;
}
-int
+static int
dht_lease_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, struct gf_lease *lease, dict_t *xdata)
{
@@ -1129,7 +1129,7 @@ err:
}
/* Symlinks are currently not migrated, so no need for any check here */
-int
+static int
dht_readlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, const char *path, struct iatt *stbuf,
dict_t *xdata)
@@ -1305,7 +1305,7 @@ out:
return 0;
}
-int
+static int
dht_common_xattrop2(xlator_t *this, xlator_t *subvol, call_frame_t *frame,
int ret)
{
@@ -1364,7 +1364,7 @@ out:
return 0;
}
-int
+static int
dht_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
@@ -1452,7 +1452,7 @@ err:
return 0;
}
-int
+static int
dht_fxattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
@@ -1525,7 +1525,7 @@ err:
* below fops, hence not implementing 'migration' related checks
*/
-int
+static int
dht_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
diff --git a/xlators/cluster/dht/src/dht-inode-write.c b/xlators/cluster/dht/src/dht-inode-write.c
index b6b349d..2f23ce9 100644
--- a/xlators/cluster/dht/src/dht-inode-write.c
+++ b/xlators/cluster/dht/src/dht-inode-write.c
@@ -10,17 +10,17 @@
#include "dht-common.h"
-int
+static int
dht_writev2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
-int
+static int
dht_truncate2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
-int
+static int
dht_setattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
-int
+static int
dht_fallocate2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
-int
+static int
dht_discard2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
-int
+static int
dht_zerofill2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
int
@@ -93,30 +93,28 @@ dht_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
/* Check if the rebalance phase1 is true */
if (IS_DHT_MIGRATION_PHASE1(postbuf)) {
- if (!dht_is_tier_xlator(this)) {
+ if (!local->xattr_req) {
+ local->xattr_req = dict_new();
if (!local->xattr_req) {
- local->xattr_req = dict_new();
- if (!local->xattr_req) {
- gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, ENOMEM,
- "insufficient memory");
- local->op_errno = ENOMEM;
- local->op_ret = -1;
- goto out;
- }
- }
-
- ret = dict_set_uint32(local->xattr_req,
- GF_PROTECT_FROM_EXTERNAL_WRITES, 1);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_DICT_SET_FAILED, 0,
- "Failed to set key %s in dictionary",
- GF_PROTECT_FROM_EXTERNAL_WRITES);
+ gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, ENOMEM,
+ "insufficient memory");
local->op_errno = ENOMEM;
local->op_ret = -1;
goto out;
}
}
+ ret = dict_set_uint32(local->xattr_req, GF_PROTECT_FROM_EXTERNAL_WRITES,
+ 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_DICT_SET_FAILED, 0,
+ "Failed to set key %s in dictionary",
+ GF_PROTECT_FROM_EXTERNAL_WRITES);
+ local->op_errno = ENOMEM;
+ local->op_ret = -1;
+ goto out;
+ }
+
dht_iatt_merge(this, &local->stbuf, postbuf);
dht_iatt_merge(this, &local->prebuf, prebuf);
@@ -142,7 +140,7 @@ out:
return 0;
}
-int
+static int
dht_writev2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -336,7 +334,7 @@ err:
return 0;
}
-int
+static int
dht_truncate2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -555,7 +553,7 @@ err:
return 0;
}
-int
+static int
dht_fallocate2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -731,7 +729,7 @@ err:
return 0;
}
-int
+static int
dht_discard2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -902,7 +900,7 @@ err:
return 0;
}
-int
+static int
dht_zerofill2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -1049,7 +1047,7 @@ out:
return 0;
}
-int
+static int
dht_setattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
diff --git a/xlators/cluster/dht/src/dht-layout.c b/xlators/cluster/dht/src/dht-layout.c
index dc14d72..fda904c 100644
--- a/xlators/cluster/dht/src/dht-layout.c
+++ b/xlators/cluster/dht/src/dht-layout.c
@@ -131,9 +131,8 @@ dht_layout_search(xlator_t *this, dht_layout_t *layout, const char *name)
ret = dht_hash_compute(this, layout->type, name, &hash);
if (ret != 0) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_COMPUTE_HASH_FAILED,
- "hash computation failed for type=%d name=%s", layout->type,
- name);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_COMPUTE_HASH_FAILED,
+ "type=%d", layout->type, "name=%s", name, NULL);
goto out;
}
@@ -145,8 +144,8 @@ dht_layout_search(xlator_t *this, dht_layout_t *layout, const char *name)
}
if (!subvol) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_HASHED_SUBVOL_GET_FAILED,
- "no subvolume for hash (value) = 0x%x", hash);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "hash-value=0x%x", hash, NULL);
}
out:
@@ -255,7 +254,7 @@ dht_disk_layout_extract_for_subvol(xlator_t *this, dht_layout_t *layout,
return dht_disk_layout_extract(this, layout, i, disk_layout_p);
}
-int
+static int
dht_disk_layout_merge(xlator_t *this, dht_layout_t *layout, int pos,
void *disk_layout_raw, int disk_layout_len)
{
@@ -266,8 +265,8 @@ dht_disk_layout_merge(xlator_t *this, dht_layout_t *layout, int pos,
int disk_layout[4];
if (!disk_layout_raw) {
- gf_msg(this->name, GF_LOG_CRITICAL, 0, DHT_MSG_LAYOUT_MERGE_FAILED,
- "error no layout on disk for merge");
+ gf_smsg(this->name, GF_LOG_CRITICAL, 0, DHT_MSG_LAYOUT_MERGE_FAILED,
+ NULL);
return -1;
}
@@ -284,10 +283,8 @@ dht_disk_layout_merge(xlator_t *this, dht_layout_t *layout, int pos,
case DHT_HASH_TYPE_DM:
break;
default:
- gf_msg(this->name, GF_LOG_CRITICAL, 0, DHT_MSG_INVALID_DISK_LAYOUT,
- "Invalid disk layout: "
- "Catastrophic error layout with unknown type found %d",
- disk_layout[1]);
+ gf_smsg(this->name, GF_LOG_CRITICAL, 0, DHT_MSG_INVALID_DISK_LAYOUT,
+ "layout=%d", disk_layout[1], NULL);
return -1;
}
@@ -355,8 +352,8 @@ dht_layout_merge(xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
ret = dht_disk_layout_merge(this, layout, i, disk_layout_raw,
disk_layout_len);
if (ret != 0) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_MERGE_FAILED,
- "layout merge from subvolume %s failed", subvol->name);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_MERGE_FAILED,
+ "subvolume=%s", subvol->name, NULL);
goto out;
}
@@ -415,8 +412,7 @@ dht_layout_range_swap(dht_layout_t *layout, int i, int j)
layout->list[j].start = start_swap;
layout->list[j].stop = stop_swap;
}
-
-int64_t
+static int64_t
dht_layout_entry_cmp_volname(dht_layout_t *layout, int i, int j)
{
return (strcmp(layout->list[i].xlator->name, layout->list[j].xlator->name));
@@ -439,7 +435,7 @@ dht_is_subvol_in_layout(dht_layout_t *layout, xlator_t *xlator)
return _gf_false;
}
-int64_t
+static int64_t
dht_layout_entry_cmp(dht_layout_t *layout, int i, int j)
{
int64_t diff = 0;
@@ -475,7 +471,7 @@ dht_layout_sort(dht_layout_t *layout)
return 0;
}
-int
+void
dht_layout_sort_volname(dht_layout_t *layout)
{
int i = 0;
@@ -491,8 +487,6 @@ dht_layout_sort_volname(dht_layout_t *layout)
dht_layout_entry_swap(layout, i, j);
}
}
-
- return 0;
}
void
@@ -625,8 +619,8 @@ dht_layout_normalize(xlator_t *this, loc_t *loc, dht_layout_t *layout)
ret = dht_layout_sort(layout);
if (ret == -1) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_SORT_FAILED,
- "sort failed?! how the ....");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_SORT_FAILED,
+ NULL);
goto out;
}
@@ -642,10 +636,9 @@ dht_layout_normalize(xlator_t *this, loc_t *loc, dht_layout_t *layout)
" gfid = %s",
loc->path, gfid);
} else {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_ANOMALIES_INFO,
- "Found anomalies in %s (gfid = %s). "
- "Holes=%d overlaps=%d",
- loc->path, gfid, holes, overlaps);
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_ANOMALIES_INFO,
+ "path=%s", loc->path, "gfid=%s", gfid, "holes=%d", holes,
+ "overlaps=%d", overlaps, NULL);
}
ret = -1;
}
@@ -712,12 +705,11 @@ dht_layout_dir_mismatch(xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
if (!xattr) {
if (err == 0) {
if (loc) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_DICT_GET_FAILED,
- "%s: xattr dictionary is NULL", loc->path);
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_XATTR_DICT_NULL,
+ "path=%s", loc->path, NULL);
} else {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_DICT_GET_FAILED,
- "path not found: "
- "xattr dictionary is NULL");
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_XATTR_DICT_NULL,
+ "path not found", NULL);
}
ret = -1;
}
@@ -729,13 +721,13 @@ dht_layout_dir_mismatch(xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
if (dict_ret < 0) {
if (err == 0 && layout->list[pos].stop) {
if (loc) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_DISK_LAYOUT_MISSING,
- "%s: Disk layout missing, gfid = %s", loc->path, gfid);
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_DISK_LAYOUT_MISSING,
+ "path=%s", loc->path, "gfid=%s", gfid, NULL);
} else {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_DISK_LAYOUT_MISSING,
- "path not found: "
- "Disk layout missing, gfid = %s",
- gfid);
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_DISK_LAYOUT_MISSING,
+ "path not found"
+ "gfid=%s",
+ gfid, NULL);
}
ret = -1;
}
@@ -751,12 +743,13 @@ dht_layout_dir_mismatch(xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
if ((layout->list[pos].start != start_off) ||
(layout->list[pos].stop != stop_off) ||
(layout->list[pos].commit_hash != commit_hash)) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LAYOUT_INFO,
- "subvol: %s; inode layout: 0x%x - 0x%x, 0x%x; "
- "disk layout: 0x%x - 0x%x, 0x%x",
- layout->list[pos].xlator->name, layout->list[pos].start,
- layout->list[pos].stop, layout->list[pos].commit_hash, start_off,
- stop_off, commit_hash);
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_LAYOUT_INFO, "subvol=%s",
+ layout->list[pos].xlator->name, "inode-layout:start=0x%x",
+ layout->list[pos].start, "inode-layout:stop=0x%x",
+ layout->list[pos].stop, "layout-commit-hash=0x%x; ",
+ layout->list[pos].commit_hash, "disk-layout:start-off=0x%x",
+ start_off, "disk-layout:top-off=0x%x", stop_off,
+ "commit-hash=0x%x", commit_hash, NULL);
ret = 1;
} else {
ret = 0;
@@ -778,9 +771,8 @@ dht_layout_preset(xlator_t *this, xlator_t *subvol, inode_t *inode)
layout = dht_layout_for_subvol(this, subvol);
if (!layout) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_NO_LAYOUT_INFO,
- "no pre-set layout for subvolume %s",
- subvol ? subvol->name : "<nil>");
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_NO_LAYOUT_INFO,
+ "subvolume=%s", subvol ? subvol->name : "<nil>", NULL);
ret = -1;
goto out;
}
diff --git a/xlators/cluster/dht/src/dht-linkfile.c b/xlators/cluster/dht/src/dht-linkfile.c
index 2f4e181..89ec6cc 100644
--- a/xlators/cluster/dht/src/dht-linkfile.c
+++ b/xlators/cluster/dht/src/dht-linkfile.c
@@ -11,7 +11,7 @@
#include <glusterfs/compat.h>
#include "dht-common.h"
-int
+static int
dht_linkfile_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode,
struct iatt *stbuf, dict_t *xattr,
@@ -34,17 +34,16 @@ dht_linkfile_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
is_linkfile = check_is_linkfile(inode, stbuf, xattr, conf->link_xattr_name);
if (!is_linkfile)
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NOT_LINK_FILE_ERROR,
- "got non-linkfile %s:%s, gfid = %s", prev->name, local->loc.path,
- gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NOT_LINK_FILE_ERROR,
+ "name=%s", prev->name, "path=%s", local->loc.path, "gfid=%s",
+ gfid, NULL);
out:
local->linkfile.linkfile_cbk(frame, cookie, this, op_ret, op_errno, inode,
stbuf, postparent, postparent, xattr);
return 0;
}
-#define is_equal(a, b) ((a) == (b))
-int
+static int
dht_linkfile_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode,
struct iatt *stbuf, struct iatt *preparent,
@@ -73,9 +72,8 @@ dht_linkfile_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
ret = dict_set_uint32(xattrs, conf->link_xattr_name, 256);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value. key : %s",
- conf->link_xattr_name);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "mame=%s", conf->link_xattr_name, NULL);
goto out;
}
@@ -125,27 +123,23 @@ dht_linkfile_create(call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
ret = dict_set_gfuuid(dict, "gfid-req", local->gfid, true);
if (ret)
- gf_msg("dht-linkfile", GF_LOG_INFO, 0, DHT_MSG_DICT_SET_FAILED,
- "%s: Failed to set dictionary value: "
- "key = gfid-req, gfid = %s ",
- loc->path, gfid);
+ gf_smsg("dht-linkfile", GF_LOG_INFO, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "gfid=%s", gfid, NULL);
} else {
gf_uuid_unparse(loc->gfid, gfid);
}
ret = dict_set_str(dict, GLUSTERFS_INTERNAL_FOP_KEY, "yes");
if (ret)
- gf_msg("dht-linkfile", GF_LOG_INFO, 0, DHT_MSG_DICT_SET_FAILED,
- "%s: Failed to set dictionary value: key = %s,"
- " gfid = %s",
- loc->path, GLUSTERFS_INTERNAL_FOP_KEY, gfid);
+ gf_smsg("dht-linkfile", GF_LOG_INFO, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "key=%s", GLUSTERFS_INTERNAL_FOP_KEY,
+ "gfid=%s", gfid, NULL);
ret = dict_set_str(dict, conf->link_xattr_name, tovol->name);
if (ret < 0) {
- gf_msg(frame->this->name, GF_LOG_INFO, 0, DHT_MSG_CREATE_LINK_FAILED,
- "%s: failed to initialize linkfile data, gfid = %s", loc->path,
- gfid);
+ gf_smsg(frame->this->name, GF_LOG_INFO, 0, DHT_MSG_CREATE_LINK_FAILED,
+ "path=%s", loc->path, "gfid=%s", gfid, NULL);
goto out;
}
@@ -186,10 +180,9 @@ dht_linkfile_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret == -1) {
gf_uuid_unparse(local->loc.gfid, gfid);
- gf_msg(this->name, GF_LOG_INFO, op_errno, DHT_MSG_UNLINK_FAILED,
- "Unlinking linkfile %s (gfid = %s)on "
- "subvolume %s failed ",
- local->loc.path, gfid, subvol->name);
+ gf_smsg(this->name, GF_LOG_INFO, op_errno, DHT_MSG_UNLINK_FAILED,
+ "path=%s", local->loc.path, "gfid=%s", gfid, "subvolume=%s",
+ subvol->name, NULL);
}
DHT_STACK_DESTROY(frame);
@@ -257,7 +250,7 @@ out:
return subvol;
}
-int
+static int
dht_linkfile_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *statpre,
struct iatt *statpost, dict_t *xdata)
@@ -269,10 +262,9 @@ dht_linkfile_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
loc = &local->loc;
if (op_ret)
- gf_msg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_SETATTR_FAILED,
- "Failed to set attr uid/gid on %s"
- " :<gfid:%s> ",
- (loc->path ? loc->path : "NULL"), uuid_utoa(local->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_SETATTR_FAILED,
+ "path=%s", (loc->path ? loc->path : "NULL"), "gfid=%s",
+ uuid_utoa(local->gfid), NULL);
DHT_STACK_DESTROY(frame);
diff --git a/xlators/cluster/dht/src/dht-lock.c b/xlators/cluster/dht/src/dht-lock.c
index f9bac4f..638821c 100644
--- a/xlators/cluster/dht/src/dht-lock.c
+++ b/xlators/cluster/dht/src/dht-lock.c
@@ -44,7 +44,8 @@ dht_log_lk_array(char *name, gf_loglevel_t log_level, dht_lock_t **lk_array,
if (!lk_buf)
goto out;
- gf_msg(name, log_level, 0, DHT_MSG_LK_ARRAY_INFO, "%d. %s", i, lk_buf);
+ gf_smsg(name, log_level, 0, DHT_MSG_LK_ARRAY_INFO, "index=%d", i,
+ "lk_buf=%s", lk_buf, NULL);
GF_FREE(lk_buf);
}
@@ -313,11 +314,9 @@ dht_unlock_entrylk_done(call_frame_t *frame, void *cookie, xlator_t *this,
gfid);
if (op_ret < 0) {
- gf_msg(this->name, GF_LOG_WARNING, op_errno,
- DHT_MSG_PARENT_LAYOUT_CHANGED,
- "unlock failed on gfid: %s, stale lock might be left "
- "in DHT_LAYOUT_HEAL_DOMAIN",
- gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno,
+ DHT_MSG_UNLOCK_GFID_FAILED, "gfid=%s", gfid,
+ "DHT_LAYOUT_HEAL_DOMAIN", NULL);
}
DHT_STACK_DESTROY(frame);
@@ -339,9 +338,10 @@ dht_unlock_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
uuid_utoa_r(local->lock[0].ns.directory_ns.locks[lk_index]->loc.gfid, gfid);
if (op_ret < 0) {
- gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_UNLOCKING_FAILED,
- "unlocking failed on %s:%s",
- local->lock[0].ns.directory_ns.locks[lk_index]->xl->name, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_UNLOCKING_FAILED,
+ "name=%s",
+ local->lock[0].ns.directory_ns.locks[lk_index]->xl->name,
+ "gfid=%s", gfid, NULL);
} else {
local->lock[0].ns.directory_ns.locks[lk_index]->locked = 0;
}
@@ -375,9 +375,9 @@ dht_unlock_entrylk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
lock_frame = dht_lock_frame(frame);
if (lock_frame == NULL) {
- gf_msg(frame->this->name, GF_LOG_WARNING, 0, DHT_MSG_UNLOCKING_FAILED,
- "cannot allocate a frame, not unlocking following "
- "entrylks:");
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_ALLOC_FRAME_FAILED_NOT_UNLOCKING_FOLLOWING_ENTRYLKS,
+ NULL);
dht_log_lk_array(frame->this->name, GF_LOG_WARNING, lk_array, lk_count);
goto done;
@@ -385,9 +385,9 @@ dht_unlock_entrylk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
ret = dht_local_entrylk_init(lock_frame, lk_array, lk_count, entrylk_cbk);
if (ret < 0) {
- gf_msg(frame->this->name, GF_LOG_WARNING, 0, DHT_MSG_UNLOCKING_FAILED,
- "storing locks in local failed, not unlocking "
- "following entrylks:");
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LOCAL_LOCKS_STORE_FAILED_UNLOCKING_FOLLOWING_ENTRYLK,
+ NULL);
dht_log_lk_array(frame->this->name, GF_LOG_WARNING, lk_array, lk_count);
@@ -446,21 +446,17 @@ dht_unlock_entrylk_wrapper(call_frame_t *frame, dht_elock_wrap_t *entrylk)
lock_frame = copy_frame(frame);
if (lock_frame == NULL) {
- gf_msg(frame->this->name, GF_LOG_WARNING, ENOMEM,
- DHT_MSG_PARENT_LAYOUT_CHANGED,
- "mkdir (%s/%s) (path: %s): "
- "copy frame failed",
- pgfid, local->loc.name, local->loc.path);
+ gf_smsg(frame->this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_COPY_FRAME_FAILED, "pgfid=%s", pgfid, "name=%s",
+ local->loc.name, "path=%s", local->loc.path, NULL);
goto done;
}
lock_local = dht_local_init(lock_frame, NULL, NULL, 0);
if (lock_local == NULL) {
- gf_msg(frame->this->name, GF_LOG_WARNING, ENOMEM,
- DHT_MSG_PARENT_LAYOUT_CHANGED,
- "mkdir (%s/%s) (path: %s): "
- "local creation failed",
- pgfid, local->loc.name, local->loc.path);
+ gf_smsg(frame->this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_CREATE_FAILED, "local", "pgfid=%s", pgfid, "name=%s",
+ local->loc.name, "path=%s", local->loc.path, NULL);
goto done;
}
@@ -700,9 +696,10 @@ dht_unlock_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
uuid_utoa_r(local->lock[0].layout.my_layout.locks[lk_index]->loc.gfid,
gfid);
- gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_UNLOCKING_FAILED,
- "unlocking failed on %s:%s",
- local->lock[0].layout.my_layout.locks[lk_index]->xl->name, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_UNLOCKING_FAILED,
+ "name=%s",
+ local->lock[0].layout.my_layout.locks[lk_index]->xl->name,
+ "gfid=%s", gfid, NULL);
} else {
local->lock[0].layout.my_layout.locks[lk_index]->locked = 0;
}
@@ -727,11 +724,9 @@ dht_unlock_inodelk_done(call_frame_t *frame, void *cookie, xlator_t *this,
gfid);
if (op_ret < 0) {
- gf_msg(this->name, GF_LOG_WARNING, op_errno,
- DHT_MSG_PARENT_LAYOUT_CHANGED,
- "unlock failed on gfid: %s, stale lock might be left "
- "in DHT_LAYOUT_HEAL_DOMAIN",
- gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno,
+ DHT_MSG_UNLOCK_GFID_FAILED, "DHT_LAYOUT_HEAL_DOMAIN gfid=%s",
+ gfid, NULL);
}
DHT_STACK_DESTROY(frame);
@@ -762,9 +757,9 @@ dht_unlock_inodelk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
lock_frame = dht_lock_frame(frame);
if (lock_frame == NULL) {
- gf_msg(frame->this->name, GF_LOG_WARNING, 0, DHT_MSG_UNLOCKING_FAILED,
- "cannot allocate a frame, not unlocking following "
- "locks:");
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_ALLOC_FRAME_FAILED_NOT_UNLOCKING_FOLLOWING_ENTRYLKS,
+ NULL);
dht_log_lk_array(frame->this->name, GF_LOG_WARNING, lk_array, lk_count);
goto done;
@@ -772,9 +767,9 @@ dht_unlock_inodelk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
ret = dht_local_inodelk_init(lock_frame, lk_array, lk_count, inodelk_cbk);
if (ret < 0) {
- gf_msg(frame->this->name, GF_LOG_WARNING, 0, DHT_MSG_UNLOCKING_FAILED,
- "storing locks in local failed, not unlocking "
- "following locks:");
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LOCAL_LOCKS_STORE_FAILED_UNLOCKING_FOLLOWING_ENTRYLK,
+ NULL);
dht_log_lk_array(frame->this->name, GF_LOG_WARNING, lk_array, lk_count);
@@ -834,21 +829,17 @@ dht_unlock_inodelk_wrapper(call_frame_t *frame, dht_ilock_wrap_t *inodelk)
lock_frame = copy_frame(frame);
if (lock_frame == NULL) {
- gf_msg(frame->this->name, GF_LOG_WARNING, ENOMEM,
- DHT_MSG_PARENT_LAYOUT_CHANGED,
- "mkdir (%s/%s) (path: %s): "
- "copy frame failed",
- pgfid, local->loc.name, local->loc.path);
+ gf_smsg(frame->this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_COPY_FRAME_FAILED, "pgfid=%s", pgfid, "name=%s",
+ local->loc.name, "path=%s", local->loc.path, NULL);
goto done;
}
lock_local = dht_local_init(lock_frame, NULL, NULL, 0);
if (lock_local == NULL) {
- gf_msg(frame->this->name, GF_LOG_WARNING, ENOMEM,
- DHT_MSG_PARENT_LAYOUT_CHANGED,
- "mkdir (%s/%s) (path: %s): "
- "local creation failed",
- pgfid, local->loc.name, local->loc.path);
+ gf_smsg(frame->this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_CREATE_FAILED, "local", "gfid=%s", pgfid, "name=%s",
+ local->loc.name, "path=%s", local->loc.path, NULL);
goto done;
}
@@ -1039,13 +1030,12 @@ dht_blocking_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
gfid);
local->lock[0].layout.my_layout.op_ret = -1;
local->lock[0].layout.my_layout.op_errno = op_errno;
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- DHT_MSG_INODELK_FAILED,
- "inodelk failed on subvol %s. gfid:%s",
- local->lock[0]
- .layout.my_layout.locks[lk_index]
- ->xl->name,
- gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_INODELK_FAILED, "subvol=%s",
+ local->lock[0]
+ .layout.my_layout.locks[lk_index]
+ ->xl->name,
+ "gfid=%s", gfid, NULL);
goto cleanup;
}
break;
@@ -1060,13 +1050,12 @@ dht_blocking_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
gfid);
local->lock[0].layout.my_layout.op_ret = -1;
local->lock[0].layout.my_layout.op_errno = op_errno;
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- DHT_MSG_INODELK_FAILED,
- "inodelk failed on subvol %s. gfid:%s",
- local->lock[0]
- .layout.my_layout.locks[lk_index]
- ->xl->name,
- gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_INODELK_FAILED, "subvol=%s",
+ local->lock[0]
+ .layout.my_layout.locks[lk_index]
+ ->xl->name,
+ "gfid=%s", gfid, NULL);
goto cleanup;
}
break;
@@ -1077,11 +1066,11 @@ dht_blocking_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
gfid);
local->lock[0].layout.my_layout.op_ret = -1;
local->lock[0].layout.my_layout.op_errno = op_errno;
- gf_msg(
+ gf_smsg(
this->name, GF_LOG_ERROR, op_errno, DHT_MSG_INODELK_FAILED,
- "inodelk failed on subvol %s, gfid:%s",
+ "subvol=%s",
local->lock[0].layout.my_layout.locks[lk_index]->xl->name,
- gfid);
+ "gfid=%s", gfid, NULL);
goto cleanup;
}
}
@@ -1153,19 +1142,16 @@ dht_blocking_inodelk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
lock_frame = dht_lock_frame(frame);
if (lock_frame == NULL) {
gf_uuid_unparse(tmp_local->loc.gfid, gfid);
- gf_msg("dht", GF_LOG_ERROR, ENOMEM, DHT_MSG_LOCK_FRAME_FAILED,
- "memory allocation failed for lock_frame. gfid:%s"
- " path:%s",
- gfid, tmp_local->loc.path);
+ gf_smsg("dht", GF_LOG_ERROR, ENOMEM, DHT_MSG_LOCK_FRAME_FAILED,
+ "gfid=%s", gfid, "path=%s", tmp_local->loc.path, NULL);
goto out;
}
ret = dht_local_inodelk_init(lock_frame, lk_array, lk_count, inodelk_cbk);
if (ret < 0) {
gf_uuid_unparse(tmp_local->loc.gfid, gfid);
- gf_msg("dht", GF_LOG_ERROR, ENOMEM, DHT_MSG_LOCAL_LOCK_INIT_FAILED,
- "dht_local_lock_init failed, gfid: %s path:%s", gfid,
- tmp_local->loc.path);
+ gf_smsg("dht", GF_LOG_ERROR, ENOMEM, DHT_MSG_LOCAL_LOCK_INIT_FAILED,
+ "gfid=%s", gfid, "path=%s", tmp_local->loc.path, NULL);
goto out;
}
@@ -1246,11 +1232,10 @@ dht_blocking_entrylk_after_inodelk(call_frame_t *frame, void *cookie,
if (ret < 0) {
local->op_ret = -1;
local->op_errno = EIO;
- gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
- DHT_MSG_ENTRYLK_ERROR,
- "%s (%s/%s): "
- "dht_blocking_entrylk failed after taking inodelk",
- gf_fop_list[local->fop], pgfid, entrylk->locks[0]->basename);
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_ENTRYLK_FAILED_AFT_INODELK, "fop=%s",
+ gf_fop_list[local->fop], "pgfid=%s", pgfid, "basename=%s",
+ entrylk->locks[0]->basename, NULL);
goto err;
}
@@ -1310,10 +1295,9 @@ dht_protect_namespace(call_frame_t *frame, loc_t *loc, xlator_t *subvol,
ret = dht_build_parent_loc(this, &parent, loc, &op_errno);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_LOC_FAILED,
- "gfid:%s (name:%s) (path: %s): "
- "parent loc build failed",
- loc->gfid, loc->name, loc->path);
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_LOC_FAILED,
+ "gfid=%s", loc->gfid, "name=%s", loc->name, "path=%s",
+ loc->path, NULL);
goto out;
}
gf_uuid_unparse(parent.gfid, pgfid);
@@ -1322,10 +1306,10 @@ dht_protect_namespace(call_frame_t *frame, loc_t *loc, xlator_t *subvol,
inodelk->locks = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_pointer);
if (inodelk->locks == NULL) {
local->op_errno = ENOMEM;
- gf_msg(this->name, GF_LOG_WARNING, local->op_errno, DHT_MSG_NO_MEMORY,
- "%s (%s/%s) (path: %s): "
- "calloc failure",
- gf_fop_list[local->fop], pgfid, loc->name, loc->path);
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_CALLOC_FAILED, "fop=%s", gf_fop_list[local->fop],
+ "pgfid=%s", pgfid, "name=%s", loc->name, "path=%s", loc->path,
+ NULL);
goto out;
}
@@ -1334,10 +1318,10 @@ dht_protect_namespace(call_frame_t *frame, loc_t *loc, xlator_t *subvol,
FAIL_ON_ANY_ERROR);
if (inodelk->locks[0] == NULL) {
local->op_errno = ENOMEM;
- gf_msg(this->name, GF_LOG_WARNING, local->op_errno, DHT_MSG_NO_MEMORY,
- "%s (%s/%s) (path: %s): "
- "inodelk: lock allocation failed",
- gf_fop_list[local->fop], pgfid, loc->name, loc->path);
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_LOCK_ALLOC_FAILED, "inodelk-fop=%s",
+ gf_fop_list[local->fop], "pgfid=%s", pgfid, "name=%s",
+ loc->name, "path=%s", loc->path, NULL);
goto err;
}
inodelk->lk_count = count;
@@ -1346,10 +1330,10 @@ dht_protect_namespace(call_frame_t *frame, loc_t *loc, xlator_t *subvol,
entrylk->locks = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_pointer);
if (entrylk->locks == NULL) {
local->op_errno = ENOMEM;
- gf_msg(this->name, GF_LOG_WARNING, local->op_errno, DHT_MSG_NO_MEMORY,
- "%s (%s/%s) (path: %s): "
- "entrylk: calloc failure",
- gf_fop_list[local->fop], pgfid, loc->name, loc->path);
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_CALLOC_FAILED, "entrylk-fop=%s",
+ gf_fop_list[local->fop], "pgfid=%s", pgfid, "name=%s",
+ loc->name, "path=%s", loc->path, NULL);
goto err;
}
@@ -1359,10 +1343,10 @@ dht_protect_namespace(call_frame_t *frame, loc_t *loc, xlator_t *subvol,
FAIL_ON_ANY_ERROR);
if (entrylk->locks[0] == NULL) {
local->op_errno = ENOMEM;
- gf_msg(this->name, GF_LOG_WARNING, local->op_errno, DHT_MSG_NO_MEMORY,
- "%s (%s/%s) (path: %s): "
- "entrylk: lock allocation failed",
- gf_fop_list[local->fop], pgfid, loc->name, loc->path);
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_LOCK_ALLOC_FAILED, "entrylk-fop=%s",
+ gf_fop_list[local->fop], "pgfid=%s", pgfid, "name=%s",
+ loc->name, "path=%s", loc->path, NULL);
goto err;
}
@@ -1376,11 +1360,11 @@ dht_protect_namespace(call_frame_t *frame, loc_t *loc, xlator_t *subvol,
dht_blocking_entrylk_after_inodelk);
if (ret < 0) {
local->op_errno = EIO;
- gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
- DHT_MSG_INODELK_ERROR,
- "%s (%s/%s) (path: %s): "
- "dht_blocking_inodelk failed",
- gf_fop_list[local->fop], pgfid, loc->name, loc->path);
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_BLOCK_INODELK_FAILED, "fop=%s", gf_fop_list[local->fop],
+ "pgfid=%s", pgfid, "name=%s", loc->name, "path=%s", loc->path,
+ NULL);
+
goto err;
}
diff --git a/xlators/cluster/dht/src/dht-mem-types.h b/xlators/cluster/dht/src/dht-mem-types.h
index 33f9832..e3c4471 100644
--- a/xlators/cluster/dht/src/dht-mem-types.h
+++ b/xlators/cluster/dht/src/dht-mem-types.h
@@ -30,10 +30,7 @@ enum gf_dht_mem_types_ {
gf_dht_mt_container_t,
gf_dht_mt_octx_t,
gf_dht_mt_miginfo_t,
- gf_tier_mt_bricklist_t,
- gf_tier_mt_ipc_ctr_params_t,
gf_dht_mt_fd_ctx_t,
- gf_tier_mt_qfile_array_t,
gf_dht_ret_cache_t,
gf_dht_nodeuuids_t,
gf_dht_mt_end
diff --git a/xlators/cluster/dht/src/dht-messages.h b/xlators/cluster/dht/src/dht-messages.h
index afc7460..601f8da 100644
--- a/xlators/cluster/dht/src/dht-messages.h
+++ b/xlators/cluster/dht/src/dht-messages.h
@@ -38,12 +38,11 @@ GLFS_MSGID(
DHT_MSG_REBALANCE_STATUS, DHT_MSG_REBALANCE_STOPPED, DHT_MSG_RENAME_FAILED,
DHT_MSG_SETATTR_FAILED, DHT_MSG_SUBVOL_INSUFF_INODES,
DHT_MSG_SUBVOL_INSUFF_SPACE, DHT_MSG_UNLINK_FAILED,
- DHT_MSG_LAYOUT_SET_FAILED, DHT_MSG_LOG_FIXED_LAYOUT, DHT_MSG_LOG_TIER_ERROR,
- DHT_MSG_LOG_TIER_STATUS, DHT_MSG_GET_XATTR_FAILED,
- DHT_MSG_FILE_LOOKUP_FAILED, DHT_MSG_OPEN_FD_FAILED,
- DHT_MSG_SET_INODE_CTX_FAILED, DHT_MSG_UNLOCKING_FAILED,
- DHT_MSG_DISK_LAYOUT_NULL, DHT_MSG_SUBVOL_INFO, DHT_MSG_CHUNK_SIZE_INFO,
- DHT_MSG_LAYOUT_FORM_FAILED, DHT_MSG_SUBVOL_ERROR,
+ DHT_MSG_LAYOUT_SET_FAILED, DHT_MSG_LOG_FIXED_LAYOUT,
+ DHT_MSG_GET_XATTR_FAILED, DHT_MSG_FILE_LOOKUP_FAILED,
+ DHT_MSG_OPEN_FD_FAILED, DHT_MSG_SET_INODE_CTX_FAILED,
+ DHT_MSG_UNLOCKING_FAILED, DHT_MSG_DISK_LAYOUT_NULL, DHT_MSG_SUBVOL_INFO,
+ DHT_MSG_CHUNK_SIZE_INFO, DHT_MSG_LAYOUT_FORM_FAILED, DHT_MSG_SUBVOL_ERROR,
DHT_MSG_LAYOUT_SORT_FAILED, DHT_MSG_REGEX_INFO, DHT_MSG_FOPEN_FAILED,
DHT_MSG_SET_HOSTNAME_FAILED, DHT_MSG_BRICK_ERROR, DHT_MSG_SYNCOP_FAILED,
DHT_MSG_MIGRATE_INFO, DHT_MSG_SOCKET_ERROR, DHT_MSG_CREATE_FD_FAILED,
@@ -69,8 +68,7 @@ GLFS_MSGID(
DHT_MSG_INIT_LOCAL_SUBVOL_FAILED, DHT_MSG_SYS_CALL_GET_TIME_FAILED,
DHT_MSG_NO_DISK_USAGE_STATUS, DHT_MSG_SUBVOL_DOWN_ERROR,
DHT_MSG_REBAL_THROTTLE_INFO, DHT_MSG_COMMIT_HASH_INFO,
- DHT_MSG_REBAL_STRUCT_SET, DHT_MSG_HAS_MIGINFO, DHT_MSG_LOG_IPC_TIER_ERROR,
- DHT_MSG_TIER_PAUSED, DHT_MSG_TIER_RESUME, DHT_MSG_SETTLE_HASH_FAILED,
+ DHT_MSG_REBAL_STRUCT_SET, DHT_MSG_HAS_MIGINFO, DHT_MSG_SETTLE_HASH_FAILED,
DHT_MSG_DEFRAG_PROCESS_DIR_FAILED, DHT_MSG_FD_CTX_SET_FAILED,
DHT_MSG_STALE_LOOKUP, DHT_MSG_PARENT_LAYOUT_CHANGED,
DHT_MSG_LOCK_MIGRATION_FAILED, DHT_MSG_LOCK_INODE_UNREF_FAILED,
@@ -79,6 +77,310 @@ GLFS_MSGID(
DHT_MSG_ENTRYLK_ERROR, DHT_MSG_INODELK_ERROR, DHT_MSG_LOC_FAILED,
DHT_MSG_UNKNOWN_FOP, DHT_MSG_MIGRATE_FILE_SKIPPED,
DHT_MSG_DIR_XATTR_HEAL_FAILED, DHT_MSG_HASHED_SUBVOL_DOWN,
- DHT_MSG_NON_HASHED_SUBVOL_DOWN);
+ DHT_MSG_NON_HASHED_SUBVOL_DOWN, DHT_MSG_SYNCTASK_CREATE_FAILED,
+ DHT_MSG_DIR_HEAL_ABORT, DHT_MSG_MIGRATE_SKIP, DHT_MSG_FD_CREATE_FAILED,
+ DHT_MSG_DICT_NEW_FAILED, DHT_MSG_FAILED_TO_OPEN, DHT_MSG_CREATE_FAILED,
+ DHT_MSG_FILE_NOT_EXIST, DHT_MSG_CHOWN_FAILED, DHT_MSG_FALLOCATE_FAILED,
+ DHT_MSG_FTRUNCATE_FAILED, DHT_MSG_STATFS_FAILED, DHT_MSG_WRITE_CROSS,
+ DHT_MSG_NEW_TARGET_FOUND, DHT_MSG_INSUFF_MEMORY, DHT_MSG_SET_XATTR_FAILED,
+ DHT_MSG_SET_MODE_FAILED, DHT_MSG_FILE_EXISTS_IN_DEST,
+ DHT_MSG_SYMLINK_FAILED, DHT_MSG_LINKFILE_DEL_FAILED, DHT_MSG_MKNOD_FAILED,
+ DHT_MSG_MIGRATE_CLEANUP_FAILED, DHT_MSG_LOCK_MIGRATE,
+ DHT_MSG_PARENT_BUILD_FAILED, DHT_MSG_HASHED_SUBVOL_NOT_FOUND,
+ DHT_MSG_ACQUIRE_ENTRYLK_FAILED, DHT_MSG_CREATE_DST_FAILED,
+ DHT_MSG_MIGRATION_EXIT, DHT_MSG_CHANGED_DST, DHT_MSG_TRACE_FAILED,
+ DHT_MSG_WRITE_LOCK_FAILED, DHT_MSG_GETACTIVELK_FAILED, DHT_MSG_STAT_FAILED,
+ DHT_MSG_UNLINK_PERFORM_FAILED, DHT_MSG_CLANUP_SOURCE_FILE_FAILED,
+ DHT_MSG_UNLOCK_FILE_FAILED, DHT_MSG_REMOVE_XATTR_FAILED,
+ DHT_MSG_DATA_MIGRATE_ABORT, DHT_MSG_DEFRAG_NULL, DHT_MSG_PARENT_NULL,
+ DHT_MSG_GFID_NOT_PRESENT, DHT_MSG_CHILD_LOC_FAILED,
+ DHT_MSG_SET_LOOKUP_FAILED, DHT_MSG_DIR_REMOVED, DHT_MSG_FIX_NOT_COMP,
+ DHT_MSG_SUBVOL_DETER_FAILED, DHT_MSG_LOCAL_SUBVOL, DHT_MSG_NODE_UUID,
+ DHT_MSG_SIZE_FILE, DHT_MSG_GET_DATA_SIZE_FAILED,
+ DHT_MSG_PTHREAD_JOIN_FAILED, DHT_MSG_COUNTER_THREAD_CREATE_FAILED,
+ DHT_MSG_MIGRATION_INIT_QUEUE_FAILED, DHT_MSG_PAUSED_TIMEOUT, DHT_MSG_WOKE,
+ DHT_MSG_ABORT_REBALANCE, DHT_MSG_CREATE_TASK_REBAL_FAILED,
+ DHT_MSG_REBAL_ESTIMATE_NOT_AVAIL, DHT_MSG_ADD_CHOICES_ERROR,
+ DHT_MSG_GET_CHOICES_ERROR, DHT_MSG_PREPARE_STATUS_ERROR,
+ DHT_MSG_SET_CHOICE_FAILED, DHT_MSG_SET_HASHED_SUBVOL_FAILED,
+ DHT_MSG_XATTR_HEAL_NOT_POSS, DHT_MSG_LINKTO_FILE_FAILED,
+ DHT_MSG_STALE_LINKFILE_DELETE, DHT_MSG_NO_SUBVOL_FOR_LINKTO,
+ DHT_MSG_SUBVOL_RETURNED, DHT_MSG_UNKNOWN_LOCAL_XSEL, DHT_MSG_GET_XATTR_ERR,
+ DHT_MSG_ALLOC_OR_FILL_FAILED, DHT_MSG_GET_REAL_NAME_FAILED,
+ DHT_MSG_COPY_UUID_FAILED, DHT_MSG_MDS_DETER_FAILED,
+ DHT_MSG_CREATE_REBAL_FAILED, DHT_MSG_LINK_LAYOUT_FAILED,
+ DHT_MSG_NO_SUBVOL_IN_LAYOUT, DHT_MSG_MEM_ALLOC_FAILED,
+ DHT_MSG_SET_IN_PARAMS_DICT_FAILED, DHT_MSG_LOC_COPY_FAILED,
+ DHT_MSG_PARENT_LOC_FAILED, DHT_MSG_CREATE_LOCK_FAILED,
+ DHT_MSG_PREV_ATTEMPT_FAILED, DHT_MSG_REFRESH_ATTEMPT,
+ DHT_MSG_ACQUIRE_LOCK_FAILED, DHT_MSG_CREATE_STUB_FAILED,
+ DHT_MSG_WIND_LOCK_REQ_FAILED, DHT_MSG_REFRESH_FAILED,
+ DHT_MSG_CACHED_SUBVOL_ERROR, DHT_MSG_NO_LINK_SUBVOL, DHT_MSG_SET_KEY_FAILED,
+ DHT_MSG_REMOVE_LINKTO_FAILED, DHT_MSG_LAYOUT_DICT_SET_FAILED,
+ DHT_MSG_XATTR_DICT_NULL, DHT_MSG_DUMMY_ALLOC_FAILED, DHT_MSG_DICT_IS_NULL,
+ DHT_MSG_LINK_INODE_FAILED, DHT_MSG_SELFHEAL_FAILED, DHT_MSG_NO_MDS_SUBVOL,
+ DHT_MSG_LIST_XATTRS_FAILED, DHT_MSG_RESET_INTER_XATTR_FAILED,
+ DHT_MSG_MDS_DOWN_UNABLE_TO_SET, DHT_MSG_WIND_UNLOCK_FAILED,
+ DHT_MSG_COMMIT_HASH_FAILED, DHT_MSG_UNLOCK_GFID_FAILED,
+ DHT_MSG_UNLOCK_FOLLOW_ENTRYLK, DHT_MSG_COPY_FRAME_FAILED,
+ DHT_MSG_UNLOCK_FOLLOW_LOCKS, DHT_MSG_ENTRYLK_FAILED_AFT_INODELK,
+ DHT_MSG_CALLOC_FAILED, DHT_MSG_LOCK_ALLOC_FAILED,
+ DHT_MSG_BLOCK_INODELK_FAILED,
+ DHT_MSG_LOCAL_LOCKS_STORE_FAILED_UNLOCKING_FOLLOWING_ENTRYLK,
+ DHT_MSG_ALLOC_FRAME_FAILED_NOT_UNLOCKING_FOLLOWING_ENTRYLKS,
+ DHT_MSG_DST_NULL_SET_FAILED);
+
+#define DHT_MSG_FD_CTX_SET_FAILED_STR "Failed to set fd ctx"
+#define DHT_MSG_INVALID_VALUE_STR "Different dst found in the fd ctx"
+#define DHT_MSG_UNKNOWN_FOP_STR "Unknown FOP on file"
+#define DHT_MSG_OPEN_FD_ON_DST_FAILED_STR "Failed to open the fd on file"
+#define DHT_MSG_SYNCTASK_CREATE_FAILED_STR "Failed to create synctask"
+#define DHT_MSG_ASPRINTF_FAILED_STR \
+ "asprintf failed while fetching subvol from the id"
+#define DHT_MSG_HAS_MIGINFO_STR "Found miginfo in the inode ctx"
+#define DHT_MSG_FILE_LOOKUP_FAILED_STR "failed to lookup the file"
+#define DHT_MSG_INVALID_LINKFILE_STR \
+ "linkto target is different from cached-subvol. treating as destination " \
+ "subvol"
+#define DHT_MSG_GFID_MISMATCH_STR "gfid different on the target file"
+#define DHT_MSG_GET_XATTR_FAILED_STR "failed to get 'linkto' xattr"
+#define DHT_MSG_SET_INODE_CTX_FAILED_STR "failed to set inode-ctx target file"
+#define DHT_MSG_DIR_SELFHEAL_FAILED_STR "Healing of path failed"
+#define DHT_MSG_DIR_HEAL_ABORT_STR \
+ "Failed to get path from subvol. Aborting directory healing"
+#define DHT_MSG_DIR_XATTR_HEAL_FAILED_STR "xattr heal failed for directory"
+#define DHT_MSG_LOCK_INODE_UNREF_FAILED_STR \
+ "Found a NULL inode. Failed to unref the inode"
+#define DHT_MSG_DICT_SET_FAILED_STR "Failed to set dictionary value"
+#define DHT_MSG_NOT_LINK_FILE_ERROR_STR "got non-linkfile"
+#define DHT_MSG_CREATE_LINK_FAILED_STR "failed to initialize linkfile data"
+#define DHT_MSG_UNLINK_FAILED_STR "Unlinking linkfile on subvolume failed"
+#define DHT_MSG_MIGRATE_FILE_FAILED_STR "Migrate file failed"
+#define DHT_MSG_NO_MEMORY_STR "could not allocate memory for dict"
+#define DHT_MSG_SUBVOL_ERROR_STR "Failed to get linkto subvol"
+#define DHT_MSG_MIGRATE_HARDLINK_FILE_FAILED_STR "link failed on subvol"
+#define DHT_MSG_MIGRATE_FILE_SKIPPED_STR "Migration skipped"
+#define DHT_MSG_FD_CREATE_FAILED_STR "fd create failed"
+#define DHT_MSG_DICT_NEW_FAILED_STR "dict_new failed"
+#define DHT_MSG_FAILED_TO_OPEN_STR "failed to open"
+#define DHT_MSG_CREATE_FAILED_STR "failed to create"
+#define DHT_MSG_FILE_NOT_EXIST_STR "file does not exist"
+#define DHT_MSG_CHOWN_FAILED_STR "chown failed"
+#define DHT_MSG_FALLOCATE_FAILED_STR "fallocate failed"
+#define DHT_MSG_FTRUNCATE_FAILED_STR "ftruncate failed"
+#define DHT_MSG_STATFS_FAILED_STR "failed to get statfs"
+#define DHT_MSG_WRITE_CROSS_STR \
+ "write will cross min-fre-disk for file on subvol. looking for new subvol"
+#define DHT_MSG_SUBVOL_INSUFF_SPACE_STR \
+ "Could not find any subvol with space accommodating the file. Cosider " \
+ "adding bricks"
+#define DHT_MSG_NEW_TARGET_FOUND_STR "New target found for file"
+#define DHT_MSG_INSUFF_MEMORY_STR "insufficient memory"
+#define DHT_MSG_SET_XATTR_FAILED_STR "failed to set xattr"
+#define DHT_MSG_SET_MODE_FAILED_STR "failed to set mode"
+#define DHT_MSG_FILE_EXISTS_IN_DEST_STR "file exists in destination"
+#define DHT_MSG_LINKFILE_DEL_FAILED_STR "failed to delete the linkfile"
+#define DHT_MSG_SYMLINK_FAILED_STR "symlink failed"
+#define DHT_MSG_MKNOD_FAILED_STR "mknod failed"
+#define DHT_MSG_SETATTR_FAILED_STR "failed to perform setattr"
+#define DHT_MSG_MIGRATE_CLEANUP_FAILED_STR \
+ "Migrate file cleanup failed: failed to fstat file"
+#define DHT_MSG_LOCK_MIGRATE_STR "locks will be migrated for file"
+#define DHT_MSG_PARENT_BUILD_FAILED_STR \
+ "failed to build parent loc, which is needed to acquire entrylk to " \
+ "synchronize with renames on this path. Skipping migration"
+#define DHT_MSG_HASHED_SUBVOL_NOT_FOUND_STR \
+ "cannot find hashed subvol which is needed to synchronize with renames " \
+ "on this path. Skipping migration"
+#define DHT_MSG_ACQUIRE_ENTRYLK_FAILED_STR "failed to acquire entrylk on subvol"
+#define DHT_MSG_CREATE_DST_FAILED_STR "create dst failed for file"
+#define DHT_MSG_MIGRATION_EXIT_STR "Exiting migration"
+#define DHT_MSG_CHANGED_DST_STR "destination changed fo file"
+#define DHT_MSG_TRACE_FAILED_STR "Trace failed"
+#define DHT_MSG_WRITE_LOCK_FAILED_STR "write lock failed"
+#define DHT_MSG_GETACTIVELK_FAILED_STR "getactivelk failed for file"
+#define DHT_MSG_STAT_FAILED_STR "failed to do a stat"
+#define DHT_MSG_UNLINK_PERFORM_FAILED_STR "failed to perform unlink"
+#define DHT_MSG_MIGRATE_FILE_COMPLETE_STR "completed migration"
+#define DHT_MSG_CLANUP_SOURCE_FILE_FAILED_STR "failed to cleanup source file"
+#define DHT_MSG_UNLOCK_FILE_FAILED_STR "failed to unlock file"
+#define DHT_MSG_REMOVE_XATTR_FAILED_STR "remove xattr failed"
+#define DHT_MSG_SOCKET_ERROR_STR "Failed to unlink listener socket"
+#define DHT_MSG_HASHED_SUBVOL_GET_FAILED_STR "Failed to get hashed subvolume"
+#define DHT_MSG_CACHED_SUBVOL_GET_FAILED_STR "Failed to get cached subvolume"
+#define DHT_MSG_MIGRATE_DATA_FAILED_STR "migrate-data failed"
+#define DHT_MSG_DEFRAG_NULL_STR "defrag is NULL"
+#define DHT_MSG_DATA_MIGRATE_ABORT_STR \
+ "Readdirp failed. Aborting data migration for dict"
+#define DHT_MSG_LAYOUT_FIX_FAILED_STR "fix layout failed"
+#define DHT_MSG_PARENT_NULL_STR "parent is NULL"
+#define DHT_MSG_GFID_NOT_PRESENT_STR "gfid not present"
+#define DHT_MSG_CHILD_LOC_FAILED_STR "Child loc build failed"
+#define DHT_MSG_SET_LOOKUP_FAILED_STR "Failed to set lookup"
+#define DHT_MSG_DIR_LOOKUP_FAILED_STR "lookup failed"
+#define DHT_MSG_DIR_REMOVED_STR "Dir renamed or removed. Skipping"
+#define DHT_MSG_READDIR_ERROR_STR "readdir failed, Aborting fix-layout"
+#define DHT_MSG_SETTLE_HASH_FAILED_STR "Settle hash failed"
+#define DHT_MSG_DEFRAG_PROCESS_DIR_FAILED_STR "gf_defrag_process_dir failed"
+#define DHT_MSG_FIX_NOT_COMP_STR \
+ "Unable to retrieve fixlayout xattr. Assume background fix layout not " \
+ "complete"
+#define DHT_MSG_SUBVOL_DETER_FAILED_STR \
+ "local subvolume determination failed with error"
+#define DHT_MSG_LOCAL_SUBVOL_STR "local subvol"
+#define DHT_MSG_NODE_UUID_STR "node uuid"
+#define DHT_MSG_SIZE_FILE_STR "Total size files"
+#define DHT_MSG_GET_DATA_SIZE_FAILED_STR \
+ "Failed to get the total data size. Unable to estimate time to complete " \
+ "rebalance"
+#define DHT_MSG_PTHREAD_JOIN_FAILED_STR \
+ "file_counter_thread: pthread_join failed"
+#define DHT_MSG_COUNTER_THREAD_CREATE_FAILED_STR \
+ "Failed to create the file counter thread"
+#define DHT_MSG_MIGRATION_INIT_QUEUE_FAILED_STR \
+ "Failed to initialise migration queue"
+#define DHT_MSG_REBALANCE_STOPPED_STR "Received stop command on rebalance"
+#define DHT_MSG_PAUSED_TIMEOUT_STR "Request pause timer timeout"
+#define DHT_MSG_WOKE_STR "woken"
+#define DHT_MSG_ABORT_REBALANCE_STR "Aborting rebalance"
+#define DHT_MSG_REBALANCE_START_FAILED_STR \
+ "Failed to start rebalance: look up on / failed"
+#define DHT_MSG_CREATE_TASK_REBAL_FAILED_STR \
+ "Could not create task for rebalance"
+#define DHT_MSG_REBAL_ESTIMATE_NOT_AVAIL_STR \
+ "Rebalance estimates will not be available"
+#define DHT_MSG_REBALANCE_STATUS_STR "Rebalance status"
+#define DHT_MSG_DATA_NULL_STR "data value is NULL"
+#define DHT_MSG_ADD_CHOICES_ERROR_STR "Error to add choices in buffer"
+#define DHT_MSG_GET_CHOICES_ERROR_STR "Error to get choices"
+#define DHT_MSG_PREPARE_STATUS_ERROR_STR "Error to prepare status"
+#define DHT_MSG_SET_CHOICE_FAILED_STR "Failed to set full choice"
+#define DHT_MSG_AGGREGATE_QUOTA_XATTR_FAILED_STR \
+ "Failed to aggregate quota xattr"
+#define DHT_MSG_FILE_TYPE_MISMATCH_STR \
+ "path exists as a file on one subvolume and directory on another. Please " \
+ "fix it manually"
+#define DHT_MSG_LAYOUT_SET_FAILED_STR "failed to set layout for subvolume"
+#define DHT_MSG_LAYOUT_MERGE_FAILED_STR "failed to merge layouts for subvolume"
+#define DHT_MSG_SET_HASHED_SUBVOL_FAILED_STR "Failed to set hashed subvolume"
+#define DHT_MSG_XATTR_HEAL_NOT_POSS_STR \
+ "No gfid exists for path. so healing xattr is not possible"
+#define DHT_MSG_REVALIDATE_CBK_INFO_STR "Revalidate: subvolume returned -1"
+#define DHT_MSG_LAYOUT_MISMATCH_STR "Mismatching layouts"
+#define DHT_MSG_UNLINK_LOOKUP_INFO_STR "lookup_unlink retuened"
+#define DHT_MSG_LINKTO_FILE_FAILED_STR \
+ "Could not unlink the linkto file as either fd is open and/or linkto " \
+ "xattr is set"
+#define DHT_MSG_LAYOUT_PRESET_FAILED_STR \
+ "Could not set pre-set layout for subvolume"
+#define DHT_MSG_FILE_ON_MULT_SUBVOL_STR \
+ "multiple subvolumes have file (preferably rename the file in the " \
+ "backend, and do a fresh lookup"
+#define DHT_MSG_STALE_LINKFILE_DELETE_STR \
+ "attempting deletion of stale linkfile"
+#define DHT_MSG_LINK_FILE_LOOKUP_INFO_STR "Lookup on following linkfile"
+#define DHT_MSG_NO_SUBVOL_FOR_LINKTO_STR "No link subvolume for linkto"
+#define DHT_MSG_SUBVOL_RETURNED_STR "Subvolume returned -1"
+#define DHT_MSG_UNKNOWN_LOCAL_XSEL_STR "Unknown local->xsel"
+#define DHT_MSG_DICT_GET_FAILED_STR "Failed to get"
+#define DHT_MSG_UUID_PARSE_ERROR_STR "Failed to parse uuid"
+#define DHT_MSG_GET_XATTR_ERR_STR "getxattr err for dir"
+#define DHT_MSG_ALLOC_OR_FILL_FAILED_STR "alloc or fill failed"
+#define DHT_MSG_UPGRADE_BRICKS_STR \
+ "At least one of the bricks does not support this operation. Please " \
+ "upgrade all bricks"
+#define DHT_MSG_GET_REAL_NAME_FAILED_STR "Failed to get real filename"
+#define DHT_MSG_LAYOUT_NULL_STR "Layout is NULL"
+#define DHT_MSG_COPY_UUID_FAILED_STR "Failed to copy node uuid key"
+#define DHT_MSG_MDS_DETER_FAILED_STR \
+ "Cannot determine MDS, fetching xattr randomly from a subvol"
+#define DHT_MSG_HASHED_SUBVOL_DOWN_STR \
+ "MDS is down for path, so fetching xattr randomly from subvol"
+#define DHT_MSG_CREATE_REBAL_FAILED_STR \
+ "failed to create a new rebalance synctask"
+#define DHT_MSG_FIX_LAYOUT_INFO_STR "fixing the layout"
+#define DHT_MSG_OPERATION_NOT_SUP_STR "wrong directory-spread-count value"
+#define DHT_MSG_LINK_LAYOUT_FAILED_STR "failed to link the layout in inode"
+#define DHT_MSG_NO_SUBVOL_IN_LAYOUT_STR "no subvolume in layout for path"
+#define DHT_MSG_INODE_LK_ERROR_STR "mknod lock failed for file"
+#define DHT_MSG_MEM_ALLOC_FAILED_STR "mem allocation failed"
+#define DHT_MSG_PARENT_LAYOUT_CHANGED_STR \
+ "extracting in-memory layout of parent failed"
+#define DHT_MSG_SET_IN_PARAMS_DICT_FAILED_STR \
+ "setting in params dictionary failed"
+#define DHT_MSG_LOC_COPY_FAILED_STR "loc_copy failed"
+#define DHT_MSG_LOC_FAILED_STR "parent loc build failed"
+#define DHT_MSG_PARENT_LOC_FAILED_STR "locking parent failed"
+#define DHT_MSG_CREATE_LOCK_FAILED_STR "Create lock failed"
+#define DHT_MSG_PREV_ATTEMPT_FAILED_STR \
+ "mkdir loop detected. parent layout didn't change even though previous " \
+ "attempt of mkdir failed because of in-memory layout not matching with " \
+ "that on disk."
+#define DHT_MSG_REFRESH_ATTEMPT_STR \
+ "mkdir parent layout changed. Attempting a refresh and then a retry"
+#define DHT_MSG_ACQUIRE_LOCK_FAILED_STR \
+ "Acquiring lock on parent to guard against layout-change failed"
+#define DHT_MSG_CREATE_STUB_FAILED_STR "creating stub failed"
+#define DHT_MSG_WIND_LOCK_REQ_FAILED_STR \
+ "cannot wind lock request to guard parent layout"
+#define DHT_MSG_REFRESH_FAILED_STR "refreshing parent layout failed."
+#define DHT_MSG_CACHED_SUBVOL_ERROR_STR "On cached subvol"
+#define DHT_MSG_NO_LINK_SUBVOL_STR "Linkfile does not have link subvolume"
+#define DHT_MSG_SET_KEY_FAILED_STR "failed to set key"
+#define DHT_MSG_CHILD_DOWN_STR "Received CHILD_DOWN. Exiting"
+#define DHT_MSG_LOG_FIXED_LAYOUT_STR "log layout fixed"
+#define DHT_MSG_REBAL_STRUCT_SET_STR "local->rebalance already set"
+#define DHT_MSG_REMOVE_LINKTO_FAILED_STR "Removal of linkto failed at subvol"
+#define DHT_MSG_LAYOUT_DICT_SET_FAILED_STR "dht layout dict set failed"
+#define DHT_MSG_SUBVOL_INFO_STR "creating subvolume"
+#define DHT_MSG_COMPUTE_HASH_FAILED_STR "hash computation failed"
+#define DHT_MSG_INVALID_DISK_LAYOUT_STR \
+ "Invalid disk layout: Catastrophic error layout with unknown type found"
+#define DHT_MSG_LAYOUT_SORT_FAILED_STR "layout sort failed"
+#define DHT_MSG_ANOMALIES_INFO_STR "Found anomalies"
+#define DHT_MSG_XATTR_DICT_NULL_STR "xattr dictionary is NULL"
+#define DHT_MSG_DISK_LAYOUT_MISSING_STR "Disk layout missing"
+#define DHT_MSG_LAYOUT_INFO_STR "layout info"
+#define DHT_MSG_SUBVOL_NO_LAYOUT_INFO_STR "no pre-set layout for subvol"
+#define DHT_MSG_SELFHEAL_XATTR_FAILED_STR "layout setxattr failed"
+#define DHT_MSG_DIR_SELFHEAL_XATTR_FAILED_STR "Directory self heal xattr failed"
+#define DHT_MSG_DUMMY_ALLOC_FAILED_STR "failed to allocate dummy layout"
+#define DHT_MSG_DICT_IS_NULL_STR \
+ "dict is NULL, need to make sure gfids are same"
+#define DHT_MSG_ENTRYLK_ERROR_STR "acquiring entrylk after inodelk failed"
+#define DHT_MSG_NO_DISK_USAGE_STATUS_STR "no du stats"
+#define DHT_MSG_LINK_INODE_FAILED_STR "linking inode failed"
+#define DHT_MSG_SELFHEAL_FAILED_STR "Directory selfheal failed"
+#define DHT_MSG_NO_MDS_SUBVOL_STR "No mds subvol"
+#define DHT_MSG_LIST_XATTRS_FAILED_STR "failed to list xattrs"
+#define DHT_MSG_RESET_INTER_XATTR_FAILED_STR "Failed to reset internal xattr"
+#define DHT_MSG_MDS_DOWN_UNABLE_TO_SET_STR \
+ "mds subvol is down, unable to set xattr"
+#define DHT_MSG_DIR_ATTR_HEAL_FAILED_STR \
+ "Directory attr heal failed. Failed to set uid/gid"
+#define DHT_MSG_WIND_UNLOCK_FAILED_STR \
+ "Winding unlock failed: stale locks left on brick"
+#define DHT_MSG_COMMIT_HASH_FAILED_STR "Directory commit hash updaten failed"
+#define DHT_MSG_LK_ARRAY_INFO_STR "lk info"
+#define DHT_MSG_UNLOCK_GFID_FAILED_STR \
+ "unlock failed on gfid: stale lock might be left"
+#define DHT_MSG_UNLOCKING_FAILED_STR "unlocking failed"
+#define DHT_MSG_UNLOCK_FOLLOW_ENTRYLK_STR "not unlocking following entrylks"
+#define DHT_MSG_COPY_FRAME_FAILED_STR "copy frame failed"
+#define DHT_MSG_UNLOCK_FOLLOW_LOCKS_STR "not unlocking following locks"
+#define DHT_MSG_INODELK_FAILED_STR "inodelk failed on subvol"
+#define DHT_MSG_LOCK_FRAME_FAILED_STR "memory allocation failed for lock_frame"
+#define DHT_MSG_LOCAL_LOCK_INIT_FAILED_STR "dht_local_lock_init failed"
+#define DHT_MSG_ENTRYLK_FAILED_AFT_INODELK_STR \
+ "dht_blocking_entrylk failed after taking inodelk"
+#define DHT_MSG_BLOCK_INODELK_FAILED_STR "dht_blocking_inodelk failed"
+#define DHT_MSG_CALLOC_FAILED_STR "calloc failed"
+#define DHT_MSG_LOCK_ALLOC_FAILED_STR "lock allocation failed"
+#define DHT_MSG_ALLOC_FRAME_FAILED_NOT_UNLOCKING_FOLLOWING_ENTRYLKS_STR \
+ "cannot allocate a frame, not unlocking following entrylks"
+#define DHT_MSG_LOCAL_LOCKS_STORE_FAILED_UNLOCKING_FOLLOWING_ENTRYLK_STR \
+ "storing locks in local failed, not unlocking following entrylks"
+#define DHT_MSG_DST_NULL_SET_FAILED_STR \
+ "src or dst is NULL, Failed to set dictionary value"
#endif /* _DHT_MESSAGES_H_ */
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index 0d68ed5..8ba8082 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -16,8 +16,8 @@
#include "glusterfs/compat-errno.h" // for ENODATA on BSD
#define GF_DISK_SECTOR_SIZE 512
-#define DHT_REBALANCE_PID 4242 /* Change it if required */
-#define DHT_REBALANCE_BLKSIZE (1024 * 1024) /* 1 MB */
+#define DHT_REBALANCE_PID 4242 /* Change it if required */
+#define DHT_REBALANCE_BLKSIZE 1048576 /* 1 MB */
#define MAX_MIGRATE_QUEUE_COUNT 500
#define MIN_MIGRATE_QUEUE_COUNT 200
#define MAX_REBAL_TYPE_SIZE 16
@@ -45,7 +45,10 @@ gf_defrag_free_dir_dfmeta(struct dir_dfmeta *meta, int local_subvols_cnt)
if (meta) {
for (i = 0; i < local_subvols_cnt; i++) {
- gf_dirent_free(&meta->equeue[i]);
+ if (meta->equeue)
+ gf_dirent_free(&meta->equeue[i]);
+ if (meta->lfd && meta->lfd[i])
+ fd_unref(meta->lfd[i]);
}
GF_FREE(meta->equeue);
@@ -53,6 +56,7 @@ gf_defrag_free_dir_dfmeta(struct dir_dfmeta *meta, int local_subvols_cnt)
GF_FREE(meta->iterator);
GF_FREE(meta->offset_var);
GF_FREE(meta->fetch_entries);
+ GF_FREE(meta->lfd);
GF_FREE(meta);
}
}
@@ -84,26 +88,6 @@ dht_set_global_defrag_error(gf_defrag_info_t *defrag, int ret)
return;
}
-static gf_boolean_t
-dht_is_tier_command(int cmd)
-{
- gf_boolean_t is_tier = _gf_false;
-
- switch (cmd) {
- case GF_DEFRAG_CMD_START_TIER:
- case GF_DEFRAG_CMD_STATUS_TIER:
- case GF_DEFRAG_CMD_START_DETACH_TIER:
- case GF_DEFRAG_CMD_STOP_DETACH_TIER:
- case GF_DEFRAG_CMD_PAUSE_TIER:
- case GF_DEFRAG_CMD_RESUME_TIER:
- is_tier = _gf_true;
- break;
- default:
- break;
- }
- return is_tier;
-}
-
static int
dht_send_rebalance_event(xlator_t *this, int cmd, gf_defrag_status_t status)
{
@@ -112,8 +96,6 @@ dht_send_rebalance_event(xlator_t *this, int cmd, gf_defrag_status_t status)
char *tmpstr = NULL;
char *ptr = NULL;
char *suffix = "-dht";
- dht_conf_t *conf = NULL;
- gf_defrag_info_t *defrag = NULL;
int len = 0;
eventtypes_t event = EVENT_LAST;
@@ -132,21 +114,14 @@ dht_send_rebalance_event(xlator_t *this, int cmd, gf_defrag_status_t status)
break;
}
- if (dht_is_tier_command(cmd)) {
- /* We should have the tier volume name*/
- conf = this->private;
- defrag = conf->defrag;
- volname = defrag->tier_conf.volname;
- } else {
- /* DHT volume */
- len = strlen(this->name) - strlen(suffix);
- tmpstr = gf_strdup(this->name);
- if (tmpstr) {
- ptr = tmpstr + len;
- if (!strcmp(ptr, suffix)) {
- tmpstr[len] = '\0';
- volname = tmpstr;
- }
+ /* DHT volume */
+ len = strlen(this->name) - strlen(suffix);
+ tmpstr = gf_strdup(this->name);
+ if (tmpstr) {
+ ptr = tmpstr + len;
+ if (!strcmp(ptr, suffix)) {
+ tmpstr[len] = '\0';
+ volname = tmpstr;
}
}
@@ -172,75 +147,6 @@ dht_strip_out_acls(dict_t *dict)
}
}
-static int
-dht_write_with_holes(xlator_t *to, fd_t *fd, struct iovec *vec, int count,
- int32_t size, off_t offset, struct iobref *iobref,
- int *fop_errno)
-{
- int i = 0;
- int ret = -1;
- int start_idx = 0;
- int tmp_offset = 0;
- int write_needed = 0;
- int buf_len = 0;
- int size_pending = 0;
- char *buf = NULL;
-
- /* loop through each vector */
- for (i = 0; i < count; i++) {
- buf = vec[i].iov_base;
- buf_len = vec[i].iov_len;
-
- for (start_idx = 0; (start_idx + GF_DISK_SECTOR_SIZE) <= buf_len;
- start_idx += GF_DISK_SECTOR_SIZE) {
- if (mem_0filled(buf + start_idx, GF_DISK_SECTOR_SIZE) != 0) {
- write_needed = 1;
- continue;
- }
-
- if (write_needed) {
- ret = syncop_write(
- to, fd, (buf + tmp_offset), (start_idx - tmp_offset),
- (offset + tmp_offset), iobref, 0, NULL, NULL);
- /* 'path' will be logged in calling function */
- if (ret < 0) {
- gf_log(THIS->name, GF_LOG_WARNING, "failed to write (%s)",
- strerror(-ret));
- *fop_errno = -ret;
- ret = -1;
- goto out;
- }
-
- write_needed = 0;
- }
- tmp_offset = start_idx + GF_DISK_SECTOR_SIZE;
- }
-
- if ((start_idx < buf_len) || write_needed) {
- /* This means, last chunk is not yet written.. write it */
- ret = syncop_write(to, fd, (buf + tmp_offset),
- (buf_len - tmp_offset), (offset + tmp_offset),
- iobref, 0, NULL, NULL);
- if (ret < 0) {
- /* 'path' will be logged in calling function */
- gf_log(THIS->name, GF_LOG_WARNING, "failed to write (%s)",
- strerror(-ret));
- *fop_errno = -ret;
- ret = -1;
- goto out;
- }
- }
-
- size_pending = (size - buf_len);
- if (!size_pending)
- break;
- }
-
- ret = size;
-out:
- return ret;
-}
-
/*
return values:
-1 : failure
@@ -648,7 +554,7 @@ out:
static int
__dht_rebalance_create_dst_file(xlator_t *this, xlator_t *to, xlator_t *from,
loc_t *loc, struct iatt *stbuf, fd_t **dst_fd,
- int *fop_errno)
+ int *fop_errno, int file_has_holes)
{
int ret = -1;
int ret2 = -1;
@@ -703,26 +609,23 @@ __dht_rebalance_create_dst_file(xlator_t *this, xlator_t *to, xlator_t *from,
goto out;
}
- if (!!dht_is_tier_xlator(this)) {
- xdata = dict_new();
- if (!xdata) {
- *fop_errno = ENOMEM;
- ret = -1;
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM,
- DHT_MSG_MIGRATE_FILE_FAILED, "%s: dict_new failed)",
- loc->path);
- goto out;
- }
+ xdata = dict_new();
+ if (!xdata) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: dict_new failed)", loc->path);
+ goto out;
+ }
- ret = dict_set_int32(xdata, GF_CLEAN_WRITE_PROTECTION, 1);
- if (ret) {
- *fop_errno = ENOMEM;
- ret = -1;
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
- "%s: failed to set dictionary value: key = %s ", loc->path,
- GF_CLEAN_WRITE_PROTECTION);
- goto out;
- }
+ ret = dict_set_int32_sizen(xdata, GF_CLEAN_WRITE_PROTECTION, 1);
+ if (ret) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "%s: failed to set dictionary value: key = %s ", loc->path,
+ GF_CLEAN_WRITE_PROTECTION);
+ goto out;
}
ret = syncop_lookup(to, loc, &new_stbuf, NULL, xdata, NULL);
@@ -817,7 +720,7 @@ __dht_rebalance_create_dst_file(xlator_t *this, xlator_t *to, xlator_t *from,
/* No need to bother about 0 byte size files */
if (stbuf->ia_size > 0) {
- if (conf->use_fallocate) {
+ if (conf->use_fallocate && !file_has_holes) {
ret = syncop_fallocate(to, fd, 0, 0, stbuf->ia_size, NULL, NULL);
if (ret < 0) {
if (ret == -EOPNOTSUPP || ret == -EINVAL || ret == -ENOSYS) {
@@ -844,9 +747,7 @@ __dht_rebalance_create_dst_file(xlator_t *this, xlator_t *to, xlator_t *from,
goto out;
}
}
- }
-
- if (!conf->use_fallocate) {
+ } else {
ret = syncop_ftruncate(to, fd, stbuf->ia_size, NULL, NULL, NULL,
NULL);
if (ret < 0) {
@@ -1097,22 +998,90 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag,
int ret = 0;
int count = 0;
off_t offset = 0;
+ off_t data_offset = 0;
+ off_t hole_offset = 0;
struct iovec *vector = NULL;
struct iobref *iobref = NULL;
uint64_t total = 0;
size_t read_size = 0;
+ size_t data_block_size = 0;
dict_t *xdata = NULL;
dht_conf_t *conf = NULL;
conf = this->private;
+
/* if file size is '0', no need to enter this loop */
while (total < ia_size) {
- read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE)
- ? DHT_REBALANCE_BLKSIZE
- : (ia_size - total));
+ /* This is a regular file - read it sequentially */
+ if (!hole_exists) {
+ read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE)
+ ? DHT_REBALANCE_BLKSIZE
+ : (ia_size - total));
+ } else {
+ /* This is a sparse file - read only the data segments in the file
+ */
+
+ /* If the previous data block is fully copied, find the next data
+ * segment
+ * starting at the offset of the last read and written byte, */
+ if (data_block_size <= 0) {
+ ret = syncop_seek(from, src, offset, GF_SEEK_DATA, NULL,
+ &data_offset);
+ if (ret) {
+ if (ret == -ENXIO)
+ ret = 0; /* No more data segments */
+ else
+ *fop_errno = -ret; /* Error occurred */
+
+ break;
+ }
+
+ /* If the position of the current data segment is greater than
+ * the position of the next hole, find the next hole in order to
+ * calculate the length of the new data segment */
+ if (data_offset > hole_offset) {
+ /* Starting at the offset of the last data segment, find the
+ * next hole */
+ ret = syncop_seek(from, src, data_offset, GF_SEEK_HOLE,
+ NULL, &hole_offset);
+ if (ret) {
+ /* If an error occurred here it's a real error because
+ * if the seek for a data segment was successful then
+ * necessarily another hole must exist (EOF is a hole)
+ */
+ *fop_errno = -ret;
+ break;
+ }
+
+ /* Calculate the total size of the current data block */
+ data_block_size = hole_offset - data_offset;
+ }
+ } else {
+ /* There is still data in the current segment, move the
+ * data_offset to the position of the last written byte */
+ data_offset = offset;
+ }
+
+ /* Calculate how much data needs to be read and written. If the data
+ * segment's length is bigger than DHT_REBALANCE_BLKSIZE, read and
+ * write DHT_REBALANCE_BLKSIZE data length and the rest in the
+ * next iteration(s) */
+ read_size = ((data_block_size > DHT_REBALANCE_BLKSIZE)
+ ? DHT_REBALANCE_BLKSIZE
+ : data_block_size);
+
+ /* Calculate the remaining size of the data block - maybe there's no
+ * need to seek for data in the next iteration */
+ data_block_size -= read_size;
+
+ /* Set offset to the offset of the data segment so read and write
+ * will have the correct position */
+ offset = data_offset;
+ }
ret = syncop_readv(from, src, read_size, offset, 0, &vector, &count,
&iobref, NULL, NULL, NULL);
+
if (!ret || (ret < 0)) {
if (!ret) {
/* File was probably truncated*/
@@ -1124,57 +1093,42 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag,
break;
}
- if (hole_exists) {
- ret = dht_write_with_holes(to, dst, vector, count, ret, offset,
- iobref, fop_errno);
- } else {
- if (!conf->force_migration && !dht_is_tier_xlator(this)) {
+ if (!conf->force_migration) {
+ if (!xdata) {
+ xdata = dict_new();
if (!xdata) {
- xdata = dict_new();
- if (!xdata) {
- gf_msg("dht", GF_LOG_ERROR, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "insufficient memory");
- ret = -1;
- *fop_errno = ENOMEM;
- break;
- }
+ gf_msg("dht", GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "insufficient memory");
+ ret = -1;
+ *fop_errno = ENOMEM;
+ break;
+ }
- /* Fail this write and abort rebalance if we
- * detect a write from client since migration of
- * this file started. This is done to avoid
- * potential data corruption due to out of order
- * writes from rebalance and client to the same
- * region (as compared between src and dst
- * files). See
- * https://github.com/gluster/glusterfs/issues/308
- * for more details.
- */
- ret = dict_set_int32(xdata, GF_AVOID_OVERWRITE, 1);
- if (ret) {
- gf_msg("dht", GF_LOG_ERROR, 0, ENOMEM,
- "failed to set dict");
- ret = -1;
- *fop_errno = ENOMEM;
- break;
- }
+ /* Fail this write and abort rebalance if we
+ * detect a write from client since migration of
+ * this file started. This is done to avoid
+ * potential data corruption due to out of order
+ * writes from rebalance and client to the same
+ * region (as compared between src and dst
+ * files). See
+ * https://github.com/gluster/glusterfs/issues/308
+ * for more details.
+ */
+ ret = dict_set_int32_sizen(xdata, GF_AVOID_OVERWRITE, 1);
+ if (ret) {
+ gf_msg("dht", GF_LOG_ERROR, 0, ENOMEM,
+ "failed to set dict");
+ ret = -1;
+ *fop_errno = ENOMEM;
+ break;
}
}
- ret = syncop_writev(to, dst, vector, count, offset, iobref, 0, NULL,
- NULL, xdata, NULL);
- if (ret < 0) {
- *fop_errno = -ret;
- }
- }
-
- if ((defrag && defrag->cmd == GF_DEFRAG_CMD_START_TIER) &&
- (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING)) {
- gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_TIER_PAUSED,
- "Migrate file paused");
- ret = -1;
}
+ ret = syncop_writev(to, dst, vector, count, offset, iobref, 0, NULL,
+ NULL, xdata, NULL);
if (ret < 0) {
+ *fop_errno = -ret;
break;
}
@@ -1568,6 +1522,7 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
xlator_t *old_target = NULL;
xlator_t *hashed_subvol = NULL;
fd_t *linkto_fd = NULL;
+ dict_t *xdata = NULL;
if (from == to) {
gf_msg_debug(this->name, 0,
@@ -1578,21 +1533,6 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
goto out;
}
- /* If defrag is NULL, it should be assumed that migration is triggered
- * from client using the trusted.distribute.migrate-data virtual xattr
- */
- defrag = conf->defrag;
-
- /* migration of files from clients is restricted to non-tiered clients
- * for now */
- if (!defrag && dht_is_tier_xlator(this)) {
- ret = ENOTSUP;
- goto out;
- }
-
- if (defrag && defrag->tier_conf.is_tier)
- log_level = GF_LOG_TRACE;
-
gf_log(this->name, log_level, "%s: attempting to move from %s to %s",
loc->path, from->name, to->name);
@@ -1739,9 +1679,13 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
goto out;
}
+ /* Try to preserve 'holes' while migrating data */
+ if (stbuf.ia_size > (stbuf.ia_blocks * GF_DISK_SECTOR_SIZE))
+ file_has_holes = 1;
+
/* create the destination, with required modes/xattr */
ret = __dht_rebalance_create_dst_file(this, to, from, loc, &stbuf, &dst_fd,
- fop_errno);
+ fop_errno, file_has_holes);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, 0,
"Create dst failed"
@@ -1785,8 +1729,8 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
* destination. We need to do update this only post migration
* as in case of failure the linkto needs to point to the source
* subvol */
- ret = __dht_rebalance_create_dst_file(this, to, from, loc, &stbuf,
- &dst_fd, fop_errno);
+ ret = __dht_rebalance_create_dst_file(
+ this, to, from, loc, &stbuf, &dst_fd, fop_errno, file_has_holes);
if (ret) {
gf_log(this->name, GF_LOG_ERROR,
"Create dst failed"
@@ -1873,9 +1817,6 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
ret = 0;
goto out;
}
- /* Try to preserve 'holes' while migrating data */
- if (stbuf.ia_size > (stbuf.ia_blocks * GF_DISK_SECTOR_SIZE))
- file_has_holes = 1;
ret = __dht_rebalance_migrate_data(this, defrag, from, to, src_fd, dst_fd,
stbuf.ia_size, file_has_holes,
@@ -1890,7 +1831,15 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
/* TODO: Sync the locks */
- ret = syncop_fsync(to, dst_fd, 0, NULL, NULL, NULL, NULL);
+ xdata = dict_new();
+ if (!xdata || dict_set_int8(xdata, "last-fsync", 1)) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "%s: failed to set last-fsync flag on "
+ "%s (%s)",
+ loc->path, to->name, strerror(ENOMEM));
+ }
+
+ ret = syncop_fsync(to, dst_fd, 0, NULL, NULL, xdata, NULL);
if (ret) {
gf_log(this->name, GF_LOG_WARNING, "%s: failed to fsync on %s (%s)",
loc->path, to->name, strerror(-ret));
@@ -2333,14 +2282,12 @@ out:
}
}
- if (!dht_is_tier_xlator(this)) {
- lk_ret = syncop_removexattr(to, loc, GF_PROTECT_FROM_EXTERNAL_WRITES,
- NULL, NULL);
- if (lk_ret && (lk_ret != -ENODATA) && (lk_ret != -ENOATTR)) {
- gf_msg(this->name, GF_LOG_WARNING, -lk_ret, 0,
- "%s: removexattr failed key %s", loc->path,
- GF_PROTECT_FROM_EXTERNAL_WRITES);
- }
+ lk_ret = syncop_removexattr(to, loc, GF_PROTECT_FROM_EXTERNAL_WRITES, NULL,
+ NULL);
+ if (lk_ret && (lk_ret != -ENODATA) && (lk_ret != -ENOATTR)) {
+ gf_msg(this->name, GF_LOG_WARNING, -lk_ret, 0,
+ "%s: removexattr failed key %s", loc->path,
+ GF_PROTECT_FROM_EXTERNAL_WRITES);
}
if (dict)
@@ -2353,11 +2300,15 @@ out:
if (dst_fd)
syncop_close(dst_fd);
+
if (src_fd)
syncop_close(src_fd);
if (linkto_fd)
syncop_close(linkto_fd);
+ if (xdata)
+ dict_unref(xdata);
+
loc_wipe(&tmp_loc);
loc_wipe(&parent_loc);
@@ -2587,10 +2538,10 @@ out:
* all hardlinks.
*/
-int
+gf_boolean_t
gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid)
{
- int ret = 0;
+ gf_boolean_t ret = _gf_false;
int i = local_subvol_index;
char *str = NULL;
uint32_t hashval = 0;
@@ -2612,12 +2563,11 @@ gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid)
}
str = uuid_utoa_r(gfid, buf);
- ret = dht_hash_compute(this, 0, str, &hashval);
- if (ret == 0) {
+ if (dht_hash_compute(this, 0, str, &hashval) == 0) {
index = (hashval % entry->count);
if (entry->elements[index].info == REBAL_NODEUUID_MINE) {
/* Index matches this node's nodeuuid.*/
- ret = 1;
+ ret = _gf_true;
goto out;
}
@@ -2630,12 +2580,12 @@ gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid)
/* None of the bricks in the subvol are up.
* CHILD_DOWN will kill the process soon */
- return 0;
+ return _gf_false;
}
if (entry->elements[index].info == REBAL_NODEUUID_MINE) {
/* Index matches this node's nodeuuid.*/
- ret = 1;
+ ret = _gf_true;
goto out;
}
}
@@ -2684,6 +2634,7 @@ gf_defrag_migrate_single_file(void *opaque)
struct iatt *iatt_ptr = NULL;
gf_boolean_t update_skippedcount = _gf_true;
int i = 0;
+ gf_boolean_t should_i_migrate = 0;
rebal_entry = (struct dht_container *)opaque;
if (!rebal_entry) {
@@ -2738,11 +2689,29 @@ gf_defrag_migrate_single_file(void *opaque)
goto out;
}
+ should_i_migrate = gf_defrag_should_i_migrate(
+ this, rebal_entry->local_subvol_index, entry->d_stat.ia_gfid);
+
gf_uuid_copy(entry_loc.gfid, entry->d_stat.ia_gfid);
gf_uuid_copy(entry_loc.pargfid, loc->gfid);
ret = syncop_lookup(this, &entry_loc, &iatt, NULL, NULL, NULL);
+
+ if (!should_i_migrate) {
+ /* this node isn't supposed to migrate the file. suppressing any
+ * potential error from lookup as this file is under migration by
+ * another node */
+ if (ret) {
+ gf_msg_debug(this->name, -ret,
+ "Ignoring lookup failure: node isn't migrating %s",
+ entry_loc.path);
+ ret = 0;
+ }
+ gf_msg_debug(this->name, 0, "Don't migrate %s ", entry_loc.path);
+ goto out;
+ }
+
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
"Migrate file failed: %s lookup failed", entry_loc.path);
@@ -2763,12 +2732,6 @@ gf_defrag_migrate_single_file(void *opaque)
goto out;
}
- if (!gf_defrag_should_i_migrate(this, rebal_entry->local_subvol_index,
- entry->d_stat.ia_gfid)) {
- gf_msg_debug(this->name, 0, "Don't migrate %s ", entry_loc.path);
- goto out;
- }
-
iatt_ptr = &iatt;
hashed_subvol = dht_subvol_get_hashed(this, &entry_loc);
@@ -2911,8 +2874,7 @@ gf_defrag_migrate_single_file(void *opaque)
if (defrag->stats == _gf_true) {
gettimeofday(&end, NULL);
- elapsed = (end.tv_sec - start.tv_sec) * 1e6 +
- (end.tv_usec - start.tv_usec);
+ elapsed = gf_tvdiff(&start, &end);
gf_log(this->name, GF_LOG_INFO,
"Migration of "
"file:%s size:%" PRIu64
@@ -3091,9 +3053,9 @@ int static gf_defrag_get_entry(xlator_t *this, int i,
dht_conf_t *conf, gf_defrag_info_t *defrag,
fd_t *fd, dict_t *migrate_data,
struct dir_dfmeta *dir_dfmeta, dict_t *xattr_req,
- int *should_commit_hash, int *perrno)
+ int *perrno)
{
- int ret = -1;
+ int ret = 0;
char is_linkfile = 0;
gf_dirent_t *df_entry = NULL;
struct dht_container *tmp_container = NULL;
@@ -3109,6 +3071,13 @@ int static gf_defrag_get_entry(xlator_t *this, int i,
}
if (dir_dfmeta->fetch_entries[i] == 1) {
+ if (!fd) {
+ dir_dfmeta->fetch_entries[i] = 0;
+ dir_dfmeta->offset_var[i].readdir_done = 1;
+ ret = 0;
+ goto out;
+ }
+
ret = syncop_readdirp(conf->local_subvols[i], fd, 131072,
dir_dfmeta->offset_var[i].offset,
&(dir_dfmeta->equeue[i]), xattr_req, NULL);
@@ -3268,7 +3237,6 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
dict_t *migrate_data, int *perrno)
{
int ret = -1;
- fd_t *fd = NULL;
dht_conf_t *conf = NULL;
gf_dirent_t entries;
dict_t *xattr_req = NULL;
@@ -3289,7 +3257,7 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
int dfc_index = 0;
int throttle_up = 0;
struct dir_dfmeta *dir_dfmeta = NULL;
- int should_commit_hash = 1;
+ xlator_t *old_THIS = NULL;
gf_log(this->name, GF_LOG_INFO, "migrate data called on %s", loc->path);
gettimeofday(&dir_start, NULL);
@@ -3302,28 +3270,53 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
goto out;
}
- fd = fd_create(loc->inode, defrag->pid);
- if (!fd) {
- gf_log(this->name, GF_LOG_ERROR, "Failed to create fd");
+ old_THIS = THIS;
+ THIS = this;
+
+ dir_dfmeta = GF_CALLOC(1, sizeof(*dir_dfmeta), gf_common_mt_pointer);
+ if (!dir_dfmeta) {
+ gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta is NULL");
ret = -1;
goto out;
}
- ret = syncop_opendir(this, loc, fd, NULL, NULL);
- if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_DATA_FAILED,
- "Migrate data failed: Failed to open dir %s", loc->path);
- *perrno = -ret;
+ dir_dfmeta->lfd = GF_CALLOC(local_subvols_cnt, sizeof(fd_t *),
+ gf_common_mt_pointer);
+ if (!dir_dfmeta->lfd) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_INSUFF_MEMORY,
+ "for dir_dfmeta", NULL);
ret = -1;
+ *perrno = ENOMEM;
goto out;
}
- fd_bind(fd);
- dir_dfmeta = GF_CALLOC(1, sizeof(*dir_dfmeta), gf_common_mt_pointer);
- if (!dir_dfmeta) {
- gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta is NULL");
- ret = -1;
- goto out;
+ for (i = 0; i < local_subvols_cnt; i++) {
+ dir_dfmeta->lfd[i] = fd_create(loc->inode, defrag->pid);
+ if (!dir_dfmeta->lfd[i]) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_FD_CREATE_FAILED,
+ NULL);
+ *perrno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_opendir(conf->local_subvols[i], loc, dir_dfmeta->lfd[i],
+ NULL, NULL);
+ if (ret) {
+ fd_unref(dir_dfmeta->lfd[i]);
+ dir_dfmeta->lfd[i] = NULL;
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_FAILED_TO_OPEN,
+ "dir: %s", loc->path, "subvol: %s",
+ conf->local_subvols[i]->name, NULL);
+
+ if (conf->decommission_in_progress) {
+ *perrno = -ret;
+ ret = -1;
+ goto out;
+ }
+ } else {
+ fd_bind(dir_dfmeta->lfd[i]);
+ }
}
dir_dfmeta->head = GF_CALLOC(local_subvols_cnt, sizeof(*(dir_dfmeta->head)),
@@ -3358,6 +3351,7 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
ret = -1;
goto out;
}
+
ret = gf_defrag_ctx_subvols_init(dir_dfmeta->offset_var, this);
if (ret) {
gf_log(this->name, GF_LOG_ERROR,
@@ -3370,7 +3364,8 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
dir_dfmeta->fetch_entries = GF_CALLOC(local_subvols_cnt, sizeof(int),
gf_common_mt_int);
if (!dir_dfmeta->fetch_entries) {
- gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta->fetch_entries is NULL");
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_INSUFF_MEMORY,
+ "for dir_dfmeta->fetch_entries", NULL);
ret = -1;
goto out;
}
@@ -3440,8 +3435,13 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
ldfq_count <= MAX_MIGRATE_QUEUE_COUNT &&
!dht_dfreaddirp_done(dir_dfmeta->offset_var, local_subvols_cnt)) {
ret = gf_defrag_get_entry(this, dfc_index, &container, loc, conf,
- defrag, fd, migrate_data, dir_dfmeta,
- xattr_req, &should_commit_hash, perrno);
+ defrag, dir_dfmeta->lfd[dfc_index],
+ migrate_data, dir_dfmeta, xattr_req,
+ perrno);
+
+ if (defrag->defrag_status == GF_DEFRAG_STATUS_STOPPED) {
+ goto out;
+ }
if (ret) {
gf_log(this->name, GF_LOG_WARNING,
@@ -3481,27 +3481,19 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
}
gettimeofday(&end, NULL);
- elapsed = (end.tv_sec - dir_start.tv_sec) * 1e6 +
- (end.tv_usec - dir_start.tv_usec);
+ elapsed = gf_tvdiff(&dir_start, &end);
gf_log(this->name, GF_LOG_INFO,
"Migration operation on dir %s took "
"%.2f secs",
loc->path, elapsed / 1e6);
ret = 0;
out:
-
+ THIS = old_THIS;
gf_defrag_free_dir_dfmeta(dir_dfmeta, local_subvols_cnt);
if (xattr_req)
dict_unref(xattr_req);
- if (fd)
- fd_unref(fd);
-
- if (ret == 0 && should_commit_hash == 0) {
- ret = 2;
- }
-
/* It does not matter if it errored out - this number is
* used to calculate rebalance estimated time to complete.
* No locking required as dirs are processed by a single thread.
@@ -3509,6 +3501,7 @@ out:
defrag->num_dirs_processed++;
return ret;
}
+
int
gf_defrag_settle_hash(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
dict_t *fix_layout)
@@ -3523,7 +3516,6 @@ gf_defrag_settle_hash(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
* rebalance is complete.
*/
if (defrag->cmd == GF_DEFRAG_CMD_START_LAYOUT_FIX ||
- defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER ||
defrag->cmd == GF_DEFRAG_CMD_DETACH_START) {
return 0;
}
@@ -3569,114 +3561,6 @@ gf_defrag_settle_hash(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
return 0;
}
-/* Function for doing a named lookup on file inodes during an attach tier
- * So that a hardlink lookup heal i.e gfid to parent gfid lookup heal
- * happens on pre-existing data. This is required so that the ctr database has
- * hardlinks of all the exisitng file in the volume. CTR xlator on the
- * brick/server side does db update/insert of the hardlink on a namelookup.
- * Currently the namedlookup is done synchronous to the fixlayout that is
- * triggered by attach tier. This is not performant, adding more time to
- * fixlayout. The performant approach is record the hardlinks on a compressed
- * datastore and then do the namelookup asynchronously later, giving the ctr db
- * eventual consistency
- * */
-int
-gf_fix_layout_tier_attach_lookup(xlator_t *this, loc_t *parent_loc,
- gf_dirent_t *file_dentry)
-{
- int ret = -1;
- dict_t *lookup_xdata = NULL;
- dht_conf_t *conf = NULL;
- loc_t file_loc = {
- 0,
- };
- struct iatt iatt = {
- 0,
- };
-
- GF_VALIDATE_OR_GOTO("tier", this, out);
-
- GF_VALIDATE_OR_GOTO(this->name, parent_loc, out);
-
- GF_VALIDATE_OR_GOTO(this->name, file_dentry, out);
-
- GF_VALIDATE_OR_GOTO(this->name, this->private, out);
-
- if (!parent_loc->inode) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "%s/%s parent is NULL", parent_loc->path, file_dentry->d_name);
- goto out;
- }
-
- conf = this->private;
-
- loc_wipe(&file_loc);
-
- if (gf_uuid_is_null(file_dentry->d_stat.ia_gfid)) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "%s/%s gfid not present", parent_loc->path, file_dentry->d_name);
- goto out;
- }
-
- gf_uuid_copy(file_loc.gfid, file_dentry->d_stat.ia_gfid);
-
- if (gf_uuid_is_null(parent_loc->gfid)) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "%s/%s"
- " gfid not present",
- parent_loc->path, file_dentry->d_name);
- goto out;
- }
-
- gf_uuid_copy(file_loc.pargfid, parent_loc->gfid);
-
- ret = dht_build_child_loc(this, &file_loc, parent_loc, file_dentry->d_name);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Child loc build failed");
- ret = -1;
- goto out;
- }
-
- lookup_xdata = dict_new();
- if (!lookup_xdata) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed creating lookup dict for %s", file_dentry->d_name);
- goto out;
- }
-
- ret = dict_set_int32(lookup_xdata, CTR_ATTACH_TIER_LOOKUP, 1);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to set lookup flag");
- goto out;
- }
-
- gf_uuid_copy(file_loc.parent->gfid, parent_loc->gfid);
-
- /* Sending lookup to cold tier only */
- ret = syncop_lookup(conf->subvolumes[0], &file_loc, &iatt, NULL,
- lookup_xdata, NULL);
- if (ret) {
- /* If the file does not exist on the cold tier than it must */
- /* have been discovered on the hot tier. This is not an error. */
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "%s lookup to cold tier on attach heal failed", file_loc.path);
- goto out;
- }
-
- ret = 0;
-
-out:
-
- loc_wipe(&file_loc);
-
- if (lookup_xdata)
- dict_unref(lookup_xdata);
-
- return ret;
-}
-
int
gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
dict_t *fix_layout, dict_t *migrate_data)
@@ -3696,7 +3580,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
};
inode_t *linked_inode = NULL, *inode = NULL;
dht_conf_t *conf = NULL;
- int should_commit_hash = 1;
int perrno = 0;
conf = this->private;
@@ -3799,16 +3682,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
continue;
if (!IA_ISDIR(entry->d_stat.ia_type)) {
- /* If its a fix layout during the attach
- * tier operation do lookups on files
- * on cold subvolume so that there is a
- * CTR DB Lookup Heal triggered on existing
- * data.
- * */
- if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) {
- gf_fix_layout_tier_attach_lookup(this, loc, entry);
- }
-
continue;
}
loc_wipe(&entry_loc);
@@ -3825,8 +3698,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
goto out;
} else {
- should_commit_hash = 0;
-
continue;
}
}
@@ -3889,7 +3760,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
ret = -1;
goto out;
} else {
- should_commit_hash = 0;
continue;
}
}
@@ -3902,7 +3772,12 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
ret = gf_defrag_fix_layout(this, defrag, &entry_loc, fix_layout,
migrate_data);
- if (ret && ret != 2) {
+ if (defrag->defrag_status == GF_DEFRAG_STATUS_STOPPED ||
+ defrag->defrag_status == GF_DEFRAG_STATUS_FAILED) {
+ goto out;
+ }
+
+ if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LAYOUT_FIX_FAILED,
"Fix layout failed for %s", entry_loc.path);
@@ -3933,6 +3808,17 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
*/
ret = syncop_setxattr(this, loc, fix_layout, 0, NULL, NULL);
+
+ /* In case of a race where the directory is deleted just before
+ * layout setxattr, the errors are updated in the layout structure.
+ * We can use this information to make a decision whether the directory
+ * is deleted entirely.
+ */
+ if (ret == 0) {
+ ret = dht_dir_layout_error_check(this, loc->inode);
+ ret = -ret;
+ }
+
if (ret) {
if (-ret == ENOENT || -ret == ESTALE) {
gf_msg(this->name, GF_LOG_INFO, -ret, DHT_MSG_LAYOUT_FIX_FAILED,
@@ -3943,6 +3829,7 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
defrag->total_failures++;
}
ret = 0;
+ goto out;
} else {
gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LAYOUT_FIX_FAILED,
"Setxattr failed for %s", loc->path);
@@ -3957,11 +3844,10 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
}
}
- if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) &&
- (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX)) {
+ if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) {
ret = gf_defrag_process_dir(this, defrag, loc, migrate_data, &perrno);
- if (ret && (ret != 2)) {
+ if (ret) {
if (perrno == ENOENT || perrno == ESTALE) {
ret = 0;
goto out;
@@ -3977,18 +3863,13 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
if (conf->decommission_in_progress) {
goto out;
}
-
- should_commit_hash = 0;
}
- } else if (ret == 2) {
- should_commit_hash = 0;
}
}
gf_msg_trace(this->name, 0, "fix layout called on %s", loc->path);
- if (should_commit_hash &&
- gf_defrag_settle_hash(this, defrag, loc, fix_layout) != 0) {
+ if (gf_defrag_settle_hash(this, defrag, loc, fix_layout) != 0) {
defrag->total_failures++;
gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SETTLE_HASH_FAILED,
@@ -4012,245 +3893,34 @@ out:
if (fd)
fd_unref(fd);
- if (ret == 0 && should_commit_hash == 0) {
- ret = 2;
- }
-
- return ret;
-}
-
-/******************************************************************************
- * Tier background Fix layout functions
- ******************************************************************************/
-/* This is the background tier fixlayout thread */
-void *
-gf_tier_do_fix_layout(void *args)
-{
- gf_tier_fix_layout_arg_t *tier_fix_layout_arg = args;
- int ret = -1;
- xlator_t *this = NULL;
- dht_conf_t *conf = NULL;
- gf_defrag_info_t *defrag = NULL;
- dict_t *dict = NULL;
- loc_t loc = {
- 0,
- };
- struct iatt iatt = {
- 0,
- };
- struct iatt parent = {
- 0,
- };
-
- GF_VALIDATE_OR_GOTO("tier", tier_fix_layout_arg, out);
- GF_VALIDATE_OR_GOTO("tier", tier_fix_layout_arg->this, out);
- this = tier_fix_layout_arg->this;
-
- conf = this->private;
- GF_VALIDATE_OR_GOTO(this->name, conf, out);
-
- defrag = conf->defrag;
- GF_VALIDATE_OR_GOTO(this->name, defrag, out);
- GF_VALIDATE_OR_GOTO(this->name, defrag->root_inode, out);
-
- GF_VALIDATE_OR_GOTO(this->name, tier_fix_layout_arg->fix_layout, out);
-
- /* Get Root loc_t */
- dht_build_root_loc(defrag->root_inode, &loc);
- ret = syncop_lookup(this, &loc, &iatt, &parent, NULL, NULL);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_REBALANCE_START_FAILED,
- "Lookup on root failed.");
- ret = -1;
- goto out;
- }
-
- /* Start the crawl */
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "Tiering Fixlayout started");
-
- ret = gf_defrag_fix_layout(this, defrag, &loc,
- tier_fix_layout_arg->fix_layout, NULL);
- if (ret && ret != 2) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_REBALANCE_FAILED,
- "Tiering fixlayout failed.");
- ret = -1;
- goto out;
- }
-
- if (ret != 2 &&
- gf_defrag_settle_hash(this, defrag, &loc,
- tier_fix_layout_arg->fix_layout) != 0) {
- defrag->total_failures++;
- ret = -1;
- goto out;
- }
-
- dict = dict_new();
- if (!dict) {
- ret = -1;
- goto out;
- }
-
- ret = dict_set_str(dict, GF_XATTR_TIER_LAYOUT_FIXED_KEY, "yes");
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_REBALANCE_FAILED,
- "Failed to set dictionary value: key = %s",
- GF_XATTR_TIER_LAYOUT_FIXED_KEY);
- ret = -1;
- goto out;
- }
-
- /* Marking the completion of tiering fix layout via a xattr on root */
- ret = syncop_setxattr(this, &loc, dict, 0, NULL, NULL);
- if (ret) {
- gf_log(this->name, GF_LOG_ERROR,
- "Failed to set tiering fix "
- "layout completed xattr on %s",
- loc.path);
- ret = -1;
- goto out;
- }
-
- ret = 0;
-out:
- if (ret && defrag)
- defrag->total_failures++;
-
- if (dict)
- dict_unref(dict);
-
- return NULL;
-}
-
-int
-gf_tier_start_fix_layout(xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag,
- dict_t *fix_layout)
-{
- int ret = -1;
- dict_t *tier_dict = NULL;
- gf_tier_fix_layout_arg_t *tier_fix_layout_arg = NULL;
-
- tier_dict = dict_new();
- if (!tier_dict) {
- gf_log("tier", GF_LOG_ERROR,
- "Tier fix layout failed :"
- "Creation of tier_dict failed");
- ret = -1;
- goto out;
- }
-
- /* Check if layout is fixed already */
- ret = syncop_getxattr(this, loc, &tier_dict, GF_XATTR_TIER_LAYOUT_FIXED_KEY,
- NULL, NULL);
- if (ret != 0) {
- tier_fix_layout_arg = &defrag->tier_conf.tier_fix_layout_arg;
-
- /*Fill crawl arguments */
- tier_fix_layout_arg->this = this;
- tier_fix_layout_arg->fix_layout = fix_layout;
-
- /* Spawn the fix layout thread so that its done in the
- * background */
- ret = gf_thread_create(&tier_fix_layout_arg->thread_id, NULL,
- gf_tier_do_fix_layout, tier_fix_layout_arg,
- "tierfixl");
- if (ret) {
- gf_log("tier", GF_LOG_ERROR,
- "Thread creation failed. "
- "Background fix layout for tiering will not "
- "work.");
- defrag->total_failures++;
- goto out;
- }
- }
- ret = 0;
-out:
- if (tier_dict)
- dict_unref(tier_dict);
-
return ret;
}
-void
-gf_tier_clear_fix_layout(xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag)
-{
- int ret = -1;
- dict_t *dict = NULL;
-
- GF_VALIDATE_OR_GOTO("tier", this, out);
- GF_VALIDATE_OR_GOTO(this->name, loc, out);
- GF_VALIDATE_OR_GOTO(this->name, defrag, out);
-
- /* Check if background fixlayout is completed. This is not
- * multi-process safe i.e there is a possibility that by the time
- * we move to remove the xattr there it might have been cleared by some
- * other detach process from other node. We ignore the error if such
- * a thing happens */
- ret = syncop_getxattr(this, loc, &dict, GF_XATTR_TIER_LAYOUT_FIXED_KEY,
- NULL, NULL);
- if (ret) {
- /* Background fixlayout not complete - nothing to clear*/
- gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_LOG_TIER_STATUS,
- "Unable to retrieve fixlayout xattr."
- "Assume background fix layout not complete");
- goto out;
- }
-
- ret = syncop_removexattr(this, loc, GF_XATTR_TIER_LAYOUT_FIXED_KEY, NULL,
- NULL);
- if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_LOG_TIER_STATUS,
- "Failed removing tier fix layout "
- "xattr from %s",
- loc->path);
- goto out;
- }
- ret = 0;
-out:
- if (dict)
- dict_unref(dict);
-}
-
-void
-gf_tier_wait_fix_lookup(gf_defrag_info_t *defrag)
-{
- if (defrag->tier_conf.tier_fix_layout_arg.thread_id) {
- pthread_join(defrag->tier_conf.tier_fix_layout_arg.thread_id, NULL);
- }
-}
-/******************Tier background Fix layout functions END********************/
-
int
dht_init_local_subvols_and_nodeuuids(xlator_t *this, dht_conf_t *conf,
loc_t *loc)
{
dict_t *dict = NULL;
- gf_defrag_info_t *defrag = NULL;
uuid_t *uuid_ptr = NULL;
int ret = -1;
int i = 0;
int j = 0;
- defrag = conf->defrag;
-
- if (defrag->cmd != GF_DEFRAG_CMD_START_TIER) {
- /* Find local subvolumes */
- ret = syncop_getxattr(this, loc, &dict, GF_REBAL_FIND_LOCAL_SUBVOL,
- NULL, NULL);
- if (ret && (ret != -ENODATA)) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, 0,
- "local "
- "subvolume determination failed with error: %d",
- -ret);
- ret = -1;
- goto out;
- }
-
- if (!ret)
- goto out;
+ /* Find local subvolumes */
+ ret = syncop_getxattr(this, loc, &dict, GF_REBAL_FIND_LOCAL_SUBVOL, NULL,
+ NULL);
+ if (ret && (ret != -ENODATA)) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, 0,
+ "local "
+ "subvolume determination failed with error: %d",
+ -ret);
+ ret = -1;
+ goto out;
}
+ if (!ret)
+ goto out;
+
ret = syncop_getxattr(this, loc, &dict, GF_REBAL_OLD_FIND_LOCAL_SUBVOL,
NULL, NULL);
if (ret) {
@@ -4341,9 +4011,6 @@ dht_file_counter_thread(void *args)
struct timespec time_to_wait = {
0,
};
- struct timeval now = {
- 0,
- };
uint64_t tmp_size = 0;
if (!args)
@@ -4353,9 +4020,8 @@ dht_file_counter_thread(void *args)
dht_build_root_loc(defrag->root_inode, &root_loc);
while (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED) {
- gettimeofday(&now, NULL);
- time_to_wait.tv_sec = now.tv_sec + 600;
- time_to_wait.tv_nsec = 0;
+ timespec_now(&time_to_wait);
+ time_to_wait.tv_sec += 600;
pthread_mutex_lock(&defrag->fc_mutex);
pthread_cond_timedwait(&defrag->fc_wakeup_cond, &defrag->fc_mutex,
@@ -4428,7 +4094,7 @@ gf_defrag_estimates_init(xlator_t *this, loc_t *loc, pthread_t *filecnt_thread)
goto out;
}
- ret = gf_thread_create(filecnt_thread, NULL, &dht_file_counter_thread,
+ ret = gf_thread_create(filecnt_thread, NULL, dht_file_counter_thread,
(void *)defrag, "dhtfcnt");
if (ret) {
@@ -4485,7 +4151,7 @@ gf_defrag_parallel_migration_init(xlator_t *this, gf_defrag_info_t *defrag,
/*Spawn Threads Here*/
while (index < thread_spawn_count) {
- ret = gf_thread_create(&(tid[index]), NULL, &gf_defrag_task,
+ ret = gf_thread_create(&(tid[index]), NULL, gf_defrag_task,
(void *)defrag, "dhtmig%d", (index + 1) & 0x3ff);
if (ret != 0) {
gf_msg("DHT", GF_LOG_ERROR, ret, 0, "Thread[%d] creation failed. ",
@@ -4559,7 +4225,6 @@ gf_defrag_start_crawl(void *data)
dict_t *migrate_data = NULL;
dict_t *status = NULL;
glusterfs_ctx_t *ctx = NULL;
- dht_methods_t *methods = NULL;
call_frame_t *statfs_frame = NULL;
xlator_t *old_THIS = NULL;
int ret = -1;
@@ -4575,7 +4240,6 @@ gf_defrag_start_crawl(void *data)
int thread_index = 0;
pthread_t *tid = NULL;
pthread_t filecnt_thread;
- gf_boolean_t is_tier_detach = _gf_false;
gf_boolean_t fc_thread_started = _gf_false;
this = data;
@@ -4594,7 +4258,8 @@ gf_defrag_start_crawl(void *data)
if (!defrag)
goto exit;
- gettimeofday(&defrag->start_time, NULL);
+ defrag->start_time = gf_time();
+
dht_build_root_inode(this, &defrag->root_inode);
if (!defrag->root_inode)
goto out;
@@ -4728,43 +4393,17 @@ gf_defrag_start_crawl(void *data)
}
}
- if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) {
- /* Fix layout for attach tier */
- ret = gf_tier_start_fix_layout(this, &loc, defrag, fix_layout);
- if (ret) {
- goto out;
- }
-
- methods = &(conf->methods);
-
- /* Calling tier_start of tier.c */
- methods->migration_other(this, defrag);
- if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER ||
- defrag->cmd == GF_DEFRAG_CMD_DETACH_START) {
- ret = dict_set_str(migrate_data, GF_XATTR_FILE_MIGRATE_KEY,
- "force");
- if (ret)
- goto out;
- }
- } else {
- ret = gf_defrag_fix_layout(this, defrag, &loc, fix_layout,
- migrate_data);
- if (ret && ret != 2) {
- defrag->total_failures++;
- ret = -1;
- goto out;
- }
-
- if (ret != 2 &&
- gf_defrag_settle_hash(this, defrag, &loc, fix_layout) != 0) {
- defrag->total_failures++;
- ret = -1;
- goto out;
- }
+ ret = gf_defrag_fix_layout(this, defrag, &loc, fix_layout, migrate_data);
+ if (ret) {
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
+ }
- if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER ||
- defrag->cmd == GF_DEFRAG_CMD_DETACH_START)
- is_tier_detach = _gf_true;
+ if (gf_defrag_settle_hash(this, defrag, &loc, fix_layout) != 0) {
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
}
gf_log("DHT", GF_LOG_INFO, "crawling file-system completed");
@@ -4778,19 +4417,6 @@ out:
defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
}
- if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) {
- /* Wait for the tier fixlayout to
- * complete if its was started.*/
- gf_tier_wait_fix_lookup(defrag);
- }
-
- if (is_tier_detach && ret == 0) {
- /* If it was a detach remove the tier fix-layout
- * xattr on root. Ignoring the failure, as nothing has to be
- * done, logging is done in gf_tier_clear_fix_layout */
- gf_tier_clear_fix_layout(this, &loc, defrag);
- }
-
gf_defrag_parallel_migration_cleanup(defrag, tid, thread_index);
if ((defrag->defrag_status != GF_DEFRAG_STATUS_STOPPED) &&
@@ -4889,9 +4515,6 @@ gf_defrag_get_estimates_based_on_size(dht_conf_t *conf)
uint64_t total_processed = 0;
uint64_t tmp_count = 0;
uint64_t time_to_complete = 0;
- struct timeval now = {
- 0,
- };
double elapsed = 0;
defrag = conf->defrag;
@@ -4899,8 +4522,7 @@ gf_defrag_get_estimates_based_on_size(dht_conf_t *conf)
if (!g_totalsize)
goto out;
- gettimeofday(&now, NULL);
- elapsed = now.tv_sec - defrag->start_time.tv_sec;
+ elapsed = gf_time() - defrag->start_time;
/* Don't calculate the estimates for the first 10 minutes.
* It is unlikely to be accurate and estimates are not required
@@ -4950,13 +4572,8 @@ gf_defrag_status_get(dht_conf_t *conf, dict_t *dict)
uint64_t lookup = 0;
uint64_t failures = 0;
uint64_t skipped = 0;
- uint64_t promoted = 0;
- uint64_t demoted = 0;
char *status = "";
double elapsed = 0;
- struct timeval end = {
- 0,
- };
uint64_t time_to_complete = 0;
uint64_t time_left = 0;
gf_defrag_info_t *defrag = conf->defrag;
@@ -4973,17 +4590,12 @@ gf_defrag_status_get(dht_conf_t *conf, dict_t *dict)
lookup = defrag->num_files_lookedup;
failures = defrag->total_failures;
skipped = defrag->skipped;
- promoted = defrag->total_files_promoted;
- demoted = defrag->total_files_demoted;
- gettimeofday(&end, NULL);
-
- elapsed = end.tv_sec - defrag->start_time.tv_sec;
+ elapsed = gf_time() - defrag->start_time;
/* The rebalance is still in progress */
- if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) &&
- (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED)) {
+ if (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED) {
time_to_complete = gf_defrag_get_estimates_based_on_size(conf);
if (time_to_complete && (time_to_complete > elapsed))
@@ -4998,14 +4610,6 @@ gf_defrag_status_get(dht_conf_t *conf, dict_t *dict)
if (!dict)
goto log;
- ret = dict_set_uint64(dict, "promoted", promoted);
- if (ret)
- gf_log(THIS->name, GF_LOG_WARNING, "failed to set promoted count");
-
- ret = dict_set_uint64(dict, "demoted", demoted);
- if (ret)
- gf_log(THIS->name, GF_LOG_WARNING, "failed to set demoted count");
-
ret = dict_set_uint64(dict, "files", files);
if (ret)
gf_log(THIS->name, GF_LOG_WARNING, "failed to set file count");
@@ -5071,159 +4675,6 @@ out:
return 0;
}
-void
-gf_defrag_set_pause_state(gf_tier_conf_t *tier_conf, tier_pause_state_t state)
-{
- pthread_mutex_lock(&tier_conf->pause_mutex);
- tier_conf->pause_state = state;
- pthread_mutex_unlock(&tier_conf->pause_mutex);
-}
-
-tier_pause_state_t
-gf_defrag_get_pause_state(gf_tier_conf_t *tier_conf)
-{
- int state;
-
- pthread_mutex_lock(&tier_conf->pause_mutex);
- state = tier_conf->pause_state;
- pthread_mutex_unlock(&tier_conf->pause_mutex);
-
- return state;
-}
-
-tier_pause_state_t
-gf_defrag_check_pause_tier(gf_tier_conf_t *tier_conf)
-{
- int woke = 0;
- int state = -1;
-
- pthread_mutex_lock(&tier_conf->pause_mutex);
-
- if (tier_conf->pause_state == TIER_RUNNING)
- goto out;
-
- if (tier_conf->pause_state == TIER_PAUSED)
- goto out;
-
- if (tier_conf->promote_in_progress || tier_conf->demote_in_progress)
- goto out;
-
- tier_conf->pause_state = TIER_PAUSED;
-
- if (tier_conf->pause_synctask) {
- synctask_wake(tier_conf->pause_synctask);
- tier_conf->pause_synctask = 0;
- woke = 1;
- }
-
- gf_msg("tier", GF_LOG_DEBUG, 0, DHT_MSG_TIER_PAUSED, "woken %d", woke);
-
- gf_event(EVENT_TIER_PAUSE, "vol=%s", tier_conf->volname);
-out:
- state = tier_conf->pause_state;
-
- pthread_mutex_unlock(&tier_conf->pause_mutex);
-
- return state;
-}
-
-void
-gf_defrag_pause_tier_timeout(void *data)
-{
- xlator_t *this = NULL;
- dht_conf_t *conf = NULL;
- gf_defrag_info_t *defrag = NULL;
-
- this = (xlator_t *)data;
- GF_VALIDATE_OR_GOTO("tier", this, out);
-
- conf = this->private;
- GF_VALIDATE_OR_GOTO(this->name, conf, out);
-
- defrag = conf->defrag;
- GF_VALIDATE_OR_GOTO(this->name, defrag, out);
-
- gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_TIER_PAUSED,
- "Request pause timer timeout");
-
- gf_defrag_check_pause_tier(&defrag->tier_conf);
-
-out:
- return;
-}
-
-int
-gf_defrag_pause_tier(xlator_t *this, gf_defrag_info_t *defrag)
-{
- int ret = 0;
- struct timespec delta = {
- 0,
- };
- int delay = 2;
-
- if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED)
- goto out;
-
- /*
- * Set flag requesting to pause tiering. Wait 'delay' seconds for
- * tiering to actually stop as indicated by the pause state
- * before returning success or failure.
- */
- gf_defrag_set_pause_state(&defrag->tier_conf, TIER_REQUEST_PAUSE);
-
- /*
- * If migration is not underway, can pause immediately.
- */
- gf_defrag_check_pause_tier(&defrag->tier_conf);
- if (gf_defrag_get_pause_state(&defrag->tier_conf) == TIER_PAUSED)
- goto out;
-
- gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_TIER_PAUSED,
- "Request pause tier");
-
- defrag->tier_conf.pause_synctask = synctask_get();
- delta.tv_sec = delay;
- delta.tv_nsec = 0;
- defrag->tier_conf.pause_timer = gf_timer_call_after(
- this->ctx, delta, gf_defrag_pause_tier_timeout, this);
-
- synctask_yield(defrag->tier_conf.pause_synctask);
-
- if (gf_defrag_get_pause_state(&defrag->tier_conf) == TIER_PAUSED)
- goto out;
-
- gf_defrag_set_pause_state(&defrag->tier_conf, TIER_RUNNING);
-
- ret = -1;
-out:
-
- gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_TIER_PAUSED,
- "Pause tiering ret=%d", ret);
-
- return ret;
-}
-
-int
-gf_defrag_resume_tier(xlator_t *this, gf_defrag_info_t *defrag)
-{
- gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_TIER_RESUME,
- "Pause end. Resume tiering");
-
- gf_defrag_set_pause_state(&defrag->tier_conf, TIER_RUNNING);
-
- gf_event(EVENT_TIER_RESUME, "vol=%s", defrag->tier_conf.volname);
-
- return 0;
-}
-
-int
-gf_defrag_start_detach_tier(gf_defrag_info_t *defrag)
-{
- defrag->cmd = GF_DEFRAG_CMD_START_DETACH_TIER;
-
- return 0;
-}
-
int
gf_defrag_stop(dht_conf_t *conf, gf_defrag_status_t status, dict_t *output)
{
diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c
index e7d8028..3e24065 100644
--- a/xlators/cluster/dht/src/dht-selfheal.c
+++ b/xlators/cluster/dht/src/dht-selfheal.c
@@ -8,9 +8,7 @@
cases as published by the Free Software Foundation.
*/
-#include "dht-common.h"
#include "dht-lock.h"
-#include <glusterfs/glusterfs-acl.h>
#define DHT_SET_LAYOUT_RANGE(layout, i, srt, chunk, path) \
do { \
@@ -35,7 +33,7 @@
} \
} while (0)
-int
+static int
dht_selfheal_layout_lock(call_frame_t *frame, dht_layout_t *layout,
gf_boolean_t newdir, dht_selfheal_layout_t healer,
dht_need_heal_t should_heal);
@@ -146,8 +144,8 @@ dht_refresh_layout_done(call_frame_t *frame)
ret = dht_layout_sort(refreshed);
if (ret == -1) {
- gf_msg(frame->this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_SORT_FAILED,
- "sorting the layout failed");
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LAYOUT_SORT_FAILED, NULL);
goto err;
}
@@ -203,10 +201,9 @@ dht_refresh_layout_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret == -1) {
gf_uuid_unparse(local->loc.gfid, gfid);
local->op_errno = op_errno;
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- DHT_MSG_FILE_LOOKUP_FAILED,
- "lookup of %s on %s returned error, gfid: %s",
- local->loc.path, prev->name, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_FILE_LOOKUP_FAILED, "path=%s", local->loc.path,
+ "name=%s", prev->name, "gfid=%s", gfid, NULL);
goto unlock;
}
@@ -267,9 +264,8 @@ dht_refresh_layout(call_frame_t *frame)
conf->subvolume_cnt);
if (!local->selfheal.refreshed_layout) {
gf_uuid_unparse(local->loc.gfid, gfid);
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
- "mem allocation for layout failed, path:%s gfid:%s",
- local->loc.path, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MEM_ALLOC_FAILED,
+ "path=%s", local->loc.path, "gfid=%s", gfid, NULL);
goto out;
}
@@ -281,9 +277,8 @@ dht_refresh_layout(call_frame_t *frame)
gf_uuid_unparse(local->loc.gfid, gfid);
local->xattr_req = dict_new();
if (local->xattr_req == NULL) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
- "dict mem allocation failed, path:%s gfid:%s",
- local->loc.path, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "path=%s", local->loc.path, "gfid=%s", gfid, NULL);
goto out;
}
}
@@ -291,9 +286,9 @@ dht_refresh_layout(call_frame_t *frame)
if (dict_get(local->xattr_req, conf->xattr_name) == 0) {
ret = dict_set_uint32(local->xattr_req, conf->xattr_name, 4 * 4);
if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "%s: Failed to set dictionary value:key = %s",
- local->loc.path, conf->xattr_name);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", local->loc.path, "key=%s", conf->xattr_name,
+ NULL);
}
for (i = 0; i < call_cnt; i++) {
@@ -526,7 +521,7 @@ out:
return fixit;
}
-int
+static int
dht_selfheal_layout_lock(call_frame_t *frame, dht_layout_t *layout,
gf_boolean_t newdir, dht_selfheal_layout_t healer,
dht_need_heal_t should_heal)
@@ -558,10 +553,8 @@ dht_selfheal_layout_lock(call_frame_t *frame, dht_layout_t *layout,
lk_array = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_char);
if (lk_array == NULL) {
gf_uuid_unparse(local->stbuf.ia_gfid, gfid);
- gf_msg("dht", GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
- "mem allocation failed for "
- "lk_array, gfid:%s path: %s",
- gfid, local->loc.path);
+ gf_smsg("dht", GF_LOG_ERROR, ENOMEM, DHT_MSG_MEM_ALLOC_FAILED,
+ "lk_array-gfid=%s", gfid, "path=%s", local->loc.path, NULL);
goto err;
}
@@ -571,10 +564,9 @@ dht_selfheal_layout_lock(call_frame_t *frame, dht_layout_t *layout,
DHT_LAYOUT_HEAL_DOMAIN, NULL, FAIL_ON_ANY_ERROR);
if (lk_array[i] == NULL) {
gf_uuid_unparse(local->stbuf.ia_gfid, gfid);
- gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
- "mem allocation "
- "failed for lk_array, gfid:%s path:%s",
- gfid, local->loc.path);
+ gf_smsg(THIS->name, GF_LOG_ERROR, ENOMEM,
+ DHT_MSG_MEM_ALLOC_FAILED, "lk_array-gfid=%s", gfid,
+ "path=%s", local->loc.path, NULL);
goto err;
}
}
@@ -583,10 +575,8 @@ dht_selfheal_layout_lock(call_frame_t *frame, dht_layout_t *layout,
lk_array = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_char);
if (lk_array == NULL) {
gf_uuid_unparse(local->stbuf.ia_gfid, gfid);
- gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
- "mem allocation failed for "
- "lk_array, gfid:%s path:%s",
- gfid, local->loc.path);
+ gf_smsg(THIS->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MEM_ALLOC_FAILED,
+ "lk_array-gfid=%s", gfid, "path=%s", local->loc.path, NULL);
goto err;
}
@@ -595,10 +585,8 @@ dht_selfheal_layout_lock(call_frame_t *frame, dht_layout_t *layout,
NULL, FAIL_ON_ANY_ERROR);
if (lk_array[0] == NULL) {
gf_uuid_unparse(local->stbuf.ia_gfid, gfid);
- gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
- "mem allocation failed for "
- "lk_array, gfid:%s path:%s",
- gfid, local->loc.path);
+ gf_smsg(THIS->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MEM_ALLOC_FAILED,
+ "lk_array-gfid=%s", gfid, "path=%s", local->loc.path, NULL);
goto err;
}
}
@@ -624,7 +612,7 @@ err:
return -1;
}
-int
+static int
dht_selfheal_dir_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xdata)
{
@@ -646,10 +634,9 @@ dht_selfheal_dir_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
err = 0;
} else {
gf_uuid_unparse(local->loc.gfid, gfid);
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
- "layout setxattr failed on %s, path:%s gfid:%s", subvol->name,
- local->loc.path, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, "name=%s", subvol->name,
+ "path=%s", local->loc.path, "gfid=%s", gfid, NULL);
err = op_errno;
}
@@ -696,7 +683,7 @@ dht_set_user_xattr(dict_t *dict, char *k, data_t *v, void *data)
return ret;
}
-int
+static int
dht_selfheal_dir_xattr_persubvol(call_frame_t *frame, loc_t *loc,
dht_layout_t *layout, int i,
xlator_t *req_subvol)
@@ -738,19 +725,17 @@ dht_selfheal_dir_xattr_persubvol(call_frame_t *frame, loc_t *loc,
ret = dict_set_str(xdata, GLUSTERFS_INTERNAL_FOP_KEY, "yes");
if (ret < 0) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "%s: Failed to set dictionary value: key = %s,"
- " gfid = %s",
- loc->path, GLUSTERFS_INTERNAL_FOP_KEY, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "key=%s", GLUSTERFS_INTERNAL_FOP_KEY,
+ "gfid=%s", gfid, NULL);
goto err;
}
ret = dict_set_int8(xdata, DHT_IATT_IN_XDATA_KEY, 1);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "%s: Failed to set dictionary value: key = %s,"
- " gfid = %s",
- loc->path, DHT_IATT_IN_XDATA_KEY, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "key=%s", DHT_IATT_IN_XDATA_KEY,
+ "gfid=%s", gfid, NULL);
goto err;
}
@@ -758,21 +743,21 @@ dht_selfheal_dir_xattr_persubvol(call_frame_t *frame, loc_t *loc,
ret = dht_disk_layout_extract(this, layout, i, &disk_layout);
if (ret == -1) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
- "Directory self heal xattr failed:"
- " %s: (subvol %s) Failed to extract disk layout,"
- " gfid = %s",
- loc->path, subvol->name, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
+ "extract-disk-layout-failed, path=%s", loc->path, "subvol=%s",
+ subvol->name, "gfid=%s", gfid, NULL);
goto err;
}
ret = dict_set_bin(xattr, conf->xattr_name, disk_layout, 4 * 4);
if (ret == -1) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
- "Directory self heal xattr failed:"
- "%s: (subvol %s) Failed to set xattr dictionary,"
- " gfid = %s",
- loc->path, subvol->name, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, "path=%s", loc->path,
+ "subvol=%s", subvol->name,
+ "set-xattr-dictionary-failed"
+ "gfid=%s",
+ gfid, NULL);
goto err;
}
disk_layout = NULL;
@@ -788,20 +773,17 @@ dht_selfheal_dir_xattr_persubvol(call_frame_t *frame, loc_t *loc,
if (data) {
ret = dict_add(xattr, QUOTA_LIMIT_KEY, data);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
- "%s: Failed to set dictionary value:"
- " key = %s",
- loc->path, QUOTA_LIMIT_KEY);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "key=%s", QUOTA_LIMIT_KEY, NULL);
}
}
data = dict_get(local->xattr, QUOTA_LIMIT_OBJECTS_KEY);
if (data) {
ret = dict_add(xattr, QUOTA_LIMIT_OBJECTS_KEY, data);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
- "%s: Failed to set dictionary value:"
- " key = %s",
- loc->path, QUOTA_LIMIT_OBJECTS_KEY);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "key=%s", QUOTA_LIMIT_OBJECTS_KEY,
+ NULL);
}
}
}
@@ -830,7 +812,7 @@ err:
return 0;
}
-int
+static int
dht_fix_dir_xattr(call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
{
dht_local_t *local = NULL;
@@ -879,7 +861,7 @@ out:
return 0;
}
-int
+static int
dht_selfheal_dir_xattr(call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
{
dht_local_t *local = NULL;
@@ -939,9 +921,8 @@ dht_selfheal_dir_xattr(call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
dummy = dht_layout_new(this, 1);
if (!dummy) {
gf_uuid_unparse(loc->gfid, gfid);
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
- "failed to allocate dummy layout, path:%s gfid:%s", loc->path,
- gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_DUMMY_ALLOC_FAILED,
+ "path=%s", loc->path, "gfid=%s", gfid, NULL);
goto out;
}
for (i = 0; i < conf->subvolume_cnt && missing_xattr; i++) {
@@ -957,38 +938,6 @@ out:
return 0;
}
-gf_boolean_t
-dht_is_subvol_part_of_layout(dht_layout_t *layout, xlator_t *xlator)
-{
- int i = 0;
- gf_boolean_t ret = _gf_false;
-
- for (i = 0; i < layout->cnt; i++) {
- if (!strcmp(layout->list[i].xlator->name, xlator->name)) {
- ret = _gf_true;
- break;
- }
- }
-
- return ret;
-}
-
-int
-dht_layout_index_from_conf(dht_layout_t *layout, xlator_t *xlator)
-{
- int i = -1;
- int j = 0;
-
- for (j = 0; j < layout->cnt; j++) {
- if (!strcmp(layout->list[j].xlator->name, xlator->name)) {
- i = j;
- break;
- }
- }
-
- return i;
-}
-
int
dht_selfheal_dir_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *statpre,
@@ -1079,7 +1028,7 @@ dht_selfheal_dir_setattr(call_frame_t *frame, loc_t *loc, struct iatt *stbuf,
return 0;
}
-int
+static int
dht_selfheal_dir_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode,
struct iatt *stbuf, struct iatt *preparent,
@@ -1109,11 +1058,10 @@ dht_selfheal_dir_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret) {
gf_uuid_unparse(local->loc.gfid, gfid);
- gf_msg(this->name,
- ((op_errno == EEXIST) ? GF_LOG_DEBUG : GF_LOG_WARNING), op_errno,
- DHT_MSG_DIR_SELFHEAL_FAILED,
- "Directory selfheal failed: path = %s, gfid = %s",
- local->loc.path, gfid);
+ gf_smsg(this->name,
+ ((op_errno == EEXIST) ? GF_LOG_DEBUG : GF_LOG_WARNING),
+ op_errno, DHT_MSG_DIR_SELFHEAL_FAILED, "path=%s",
+ local->loc.path, "gfid=%s", gfid, NULL);
goto out;
}
dht_iatt_merge(this, &local->preparent, preparent);
@@ -1132,89 +1080,7 @@ out:
return 0;
}
-void
-dht_selfheal_dir_mkdir_setacl(dict_t *xattr, dict_t *dict)
-{
- data_t *acl_default = NULL;
- data_t *acl_access = NULL;
- xlator_t *this = NULL;
- int ret = -1;
-
- GF_ASSERT(xattr);
- GF_ASSERT(dict);
-
- this = THIS;
- GF_ASSERT(this);
-
- acl_default = dict_get(xattr, POSIX_ACL_DEFAULT_XATTR);
-
- if (!acl_default) {
- gf_msg_debug(this->name, 0, "ACL_DEFAULT xattr not present");
- goto cont;
- }
- ret = dict_set(dict, POSIX_ACL_DEFAULT_XATTR, acl_default);
- if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value.key = %s",
- POSIX_ACL_DEFAULT_XATTR);
-cont:
- acl_access = dict_get(xattr, POSIX_ACL_ACCESS_XATTR);
- if (!acl_access) {
- gf_msg_debug(this->name, 0, "ACL_ACCESS xattr not present");
- goto out;
- }
- ret = dict_set(dict, POSIX_ACL_ACCESS_XATTR, acl_access);
- if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value.key = %s",
- POSIX_ACL_ACCESS_XATTR);
-
-out:
- return;
-}
-
-void
-dht_selfheal_dir_mkdir_setquota(dict_t *src, dict_t *dst)
-{
- data_t *quota_limit_key = NULL;
- data_t *quota_limit_obj_key = NULL;
- xlator_t *this = NULL;
- int ret = -1;
-
- GF_ASSERT(src);
- GF_ASSERT(dst);
-
- this = THIS;
- GF_ASSERT(this);
-
- quota_limit_key = dict_get(src, QUOTA_LIMIT_KEY);
- if (!quota_limit_key) {
- gf_msg_debug(this->name, 0, "QUOTA_LIMIT_KEY xattr not present");
- goto cont;
- }
- ret = dict_set(dst, QUOTA_LIMIT_KEY, quota_limit_key);
- if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value.key = %s", QUOTA_LIMIT_KEY);
-
-cont:
- quota_limit_obj_key = dict_get(src, QUOTA_LIMIT_OBJECTS_KEY);
- if (!quota_limit_obj_key) {
- gf_msg_debug(this->name, 0,
- "QUOTA_LIMIT_OBJECTS_KEY xattr not present");
- goto out;
- }
- ret = dict_set(dst, QUOTA_LIMIT_OBJECTS_KEY, quota_limit_obj_key);
- if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value.key = %s",
- QUOTA_LIMIT_OBJECTS_KEY);
-
-out:
- return;
-}
-
-int
+static int
dht_selfheal_dir_mkdir_lookup_done(call_frame_t *frame, xlator_t *this)
{
dht_local_t *local = NULL;
@@ -1238,10 +1104,8 @@ dht_selfheal_dir_mkdir_lookup_done(call_frame_t *frame, xlator_t *this)
ret = dict_set_gfuuid(dict, "gfid-req", local->gfid, true);
if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "%s: Failed to set dictionary value:"
- " key = gfid-req",
- loc->path);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "key=gfid-req", NULL);
} else if (local->params) {
/* Send the dictionary from higher layers directly */
@@ -1253,18 +1117,15 @@ dht_selfheal_dir_mkdir_lookup_done(call_frame_t *frame, xlator_t *this)
dht_dir_set_heal_xattr(this, local, dict, local->xattr, NULL, NULL);
if (!dict) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "dict is NULL, need to make sure gfids are same");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_IS_NULL, NULL);
dict = dict_new();
if (!dict)
return -1;
}
ret = dict_set_flag(dict, GF_INTERNAL_CTX_KEY, GF_DHT_HEAL_DIR);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value for"
- " key = %s at path: %s",
- GF_INTERNAL_CTX_KEY, loc->path);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, "key=%s",
+ GF_INTERNAL_CTX_KEY, "path=%s", loc->path, NULL);
/* We can still continue. As heal can still happen
* unless quota limits have reached for the dir.
*/
@@ -1296,7 +1157,7 @@ err:
return 0;
}
-int
+static int
dht_selfheal_dir_mkdir_lookup_cbk(call_frame_t *frame, void *cookie,
xlator_t *this, int op_ret, int op_errno,
inode_t *inode, struct iatt *stbuf,
@@ -1390,7 +1251,7 @@ err:
return 0;
}
-int
+static int
dht_selfheal_dir_mkdir_lock_cbk(call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
int32_t op_errno, dict_t *xdata)
@@ -1410,19 +1271,14 @@ dht_selfheal_dir_mkdir_lock_cbk(call_frame_t *frame, void *cookie,
local->call_cnt = conf->subvolume_cnt;
if (op_ret < 0) {
- /* We get this error when the directory entry was not created
- * on a newky attached tier subvol. Hence proceed and do mkdir
- * on the tier subvol.
- */
if (op_errno == EINVAL) {
local->call_cnt = 1;
dht_selfheal_dir_mkdir_lookup_done(frame, this);
return 0;
}
- gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_ENTRYLK_ERROR,
- "acquiring entrylk after inodelk failed for %s",
- local->loc.path);
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_ENTRYLK_ERROR,
+ "path=%s", local->loc.path, NULL);
local->op_errno = op_errno;
goto err;
@@ -1436,10 +1292,8 @@ dht_selfheal_dir_mkdir_lock_cbk(call_frame_t *frame, void *cookie,
ret = dict_set_int32(local->xattr_req, "list-xattr", 1);
if (ret)
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary key list-xattr value "
- " for path %s ",
- local->loc.path);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, "path=%s",
+ local->loc.path, NULL);
for (i = 0; i < conf->subvolume_cnt; i++) {
if (mds_subvol && conf->subvolumes[i] == mds_subvol) {
@@ -1462,18 +1316,21 @@ err:
return 0;
}
-int
+static int
dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout,
int force)
{
int missing_dirs = 0;
int i = 0;
+ int op_errno = 0;
int ret = -1;
dht_local_t *local = NULL;
xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
local = frame->local;
this = frame->this;
+ conf = this->private;
local->selfheal.force_mkdir = force;
local->selfheal.hole_cnt = 0;
@@ -1490,13 +1347,12 @@ dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout,
if (!__is_root_gfid(local->stbuf.ia_gfid)) {
if (local->need_xattr_heal) {
local->need_xattr_heal = 0;
- ret = dht_dir_xattr_heal(this, local);
- if (ret)
- gf_msg(this->name, GF_LOG_ERROR, ret,
- DHT_MSG_DIR_XATTR_HEAL_FAILED,
- "%s:xattr heal failed for "
- "directory (gfid = %s)",
- local->loc.path, local->gfid);
+ ret = dht_dir_xattr_heal(this, local, &op_errno);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED, "path=%s",
+ local->loc.path, "gfid=%s", local->gfid, NULL);
+ }
} else {
if (!gf_uuid_is_null(local->gfid))
gf_uuid_copy(loc->gfid, local->gfid);
@@ -1505,28 +1361,53 @@ dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout,
if (!ret)
return 0;
- gf_msg(this->name, GF_LOG_INFO, 0,
- DHT_MSG_DIR_XATTR_HEAL_FAILED,
- "%s: Failed to set mds xattr "
- "for directory (gfid = %s)",
- local->loc.path, local->gfid);
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_SET_XATTR_FAILED,
+ "path=%s", local->loc.path, "gfid=%s", local->gfid,
+ NULL);
}
}
dht_selfheal_dir_setattr(frame, loc, &local->stbuf, 0xffffffff, layout);
return 0;
}
- if (local->hashed_subvol == NULL)
- local->hashed_subvol = dht_subvol_get_hashed(this, loc);
+ /* MDS xattr is populated only while DHT is having more than one
+ subvol.In case of graph switch while adding more dht subvols need to
+ consider hash subvol as a MDS to avoid MDS check failure at the time
+ of running fop on directory
+ */
+ if (!dict_get(local->xattr, conf->mds_xattr_key) &&
+ (conf->subvolume_cnt > 1)) {
+ if (local->hashed_subvol == NULL) {
+ local->hashed_subvol = dht_subvol_get_hashed(this, loc);
+ if (local->hashed_subvol == NULL) {
+ local->op_errno = EINVAL;
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED, "gfid=%s",
+ loc->pargfid, "name=%s", loc->name, "path=%s",
+ loc->path, NULL);
+ goto err;
+ }
+ }
+ ret = dht_inode_ctx_mdsvol_set(local->inode, this,
+ local->hashed_subvol);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED,
+ "Failed to set hashed subvol for %s on inode vol is %s",
+ local->loc.path,
+ local->hashed_subvol ? local->hashed_subvol->name : "NULL");
+ goto err;
+ }
+ }
if (local->hashed_subvol == NULL) {
- local->op_errno = EINVAL;
- gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
- DHT_MSG_HASHED_SUBVOL_GET_FAILED,
- "(%s/%s) (path: %s): "
- "hashed subvolume not found",
- loc->pargfid, loc->name, loc->path);
- goto err;
+ local->hashed_subvol = dht_subvol_get_hashed(this, loc);
+ if (local->hashed_subvol == NULL) {
+ local->op_errno = EINVAL;
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED, "gfid=%s", loc->pargfid,
+ "name=%s", loc->name, "path=%s", loc->path, NULL);
+ goto err;
+ }
}
local->current = &local->lock[0];
@@ -1542,7 +1423,7 @@ err:
return -1;
}
-int
+static int
dht_selfheal_layout_alloc_start(xlator_t *this, loc_t *loc,
dht_layout_t *layout)
{
@@ -1651,8 +1532,6 @@ dht_selfheal_layout_new_directory(call_frame_t *frame, loc_t *loc,
dht_layout_t *new_layout);
void
-dht_layout_entry_swap(dht_layout_t *layout, int i, int j);
-void
dht_layout_range_swap(dht_layout_t *layout, int i, int j);
/*
@@ -1661,7 +1540,7 @@ dht_layout_range_swap(dht_layout_t *layout, int i, int j);
*/
#define OV_ENTRY(x, y) table[x * new->cnt + y]
-void
+static void
dht_selfheal_layout_maximize_overlap(call_frame_t *frame, loc_t *loc,
dht_layout_t *new, dht_layout_t *old)
{
@@ -1738,7 +1617,7 @@ dht_selfheal_layout_maximize_overlap(call_frame_t *frame, loc_t *loc,
}
}
-dht_layout_t *
+static dht_layout_t *
dht_fix_layout_of_directory(call_frame_t *frame, loc_t *loc,
dht_layout_t *layout)
{
@@ -1763,9 +1642,8 @@ dht_fix_layout_of_directory(call_frame_t *frame, loc_t *loc,
new_layout = dht_layout_new(this, priv->subvolume_cnt);
if (!new_layout) {
gf_uuid_unparse(loc->gfid, gfid);
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
- "mem allocation failed for new_layout, path:%s gfid:%s",
- loc->path, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MEM_ALLOC_FAILED,
+ "new_layout, path=%s", loc->path, "gfid=%s", gfid, NULL);
goto done;
}
@@ -1775,10 +1653,9 @@ dht_fix_layout_of_directory(call_frame_t *frame, loc_t *loc,
if (subvol_down) {
gf_uuid_unparse(loc->gfid, gfid);
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_FIX_FAILED,
- "Layout fix failed: %u subvolume(s) are down"
- ". Skipping fix layout. path:%s gfid:%s",
- subvol_down, loc->path, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_FIX_FAILED,
+ "subvol-down=%u", subvol_down, "Skipping-fix-layout", "path=%s",
+ loc->path, "gfid=%s", gfid, NULL);
GF_FREE(new_layout);
return NULL;
}
@@ -1796,10 +1673,10 @@ dht_fix_layout_of_directory(call_frame_t *frame, loc_t *loc,
if (priv->du_stats) {
for (i = 0; i < priv->subvolume_cnt; ++i) {
- gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_SUBVOL_INFO,
- "subvolume %d (%s): %u chunks, path:%s", i,
- priv->subvolumes[i]->name, priv->du_stats[i].chunks,
- loc->path);
+ gf_smsg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_SUBVOL_INFO,
+ "index=%d", i, "name=%s", priv->subvolumes[i]->name,
+ "chunks=%u", priv->du_stats[i].chunks, "path=%s", loc->path,
+ NULL);
/* Maximize overlap if the bricks are all the same
* size.
@@ -1811,8 +1688,8 @@ dht_fix_layout_of_directory(call_frame_t *frame, loc_t *loc,
}
}
} else {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NO_DISK_USAGE_STATUS,
- "no du stats ?!?");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NO_DISK_USAGE_STATUS,
+ NULL);
}
/* First give it a layout as though it is a new directory. This
@@ -1843,7 +1720,7 @@ done:
* Having to call this 2x for each entry in the layout is pretty horrible, but
* that's what all of this layout-sorting nonsense gets us.
*/
-uint32_t
+static uint32_t
dht_get_chunks_from_xl(xlator_t *parent, xlator_t *child)
{
dht_conf_t *priv = parent->private;
@@ -1961,7 +1838,7 @@ done:
return;
}
-int
+static int
dht_selfheal_dir_getafix(call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
{
dht_local_t *local = NULL;
@@ -2020,9 +1897,8 @@ dht_selfheal_new_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
linked_inode = inode_link(loc->inode, loc->parent, loc->name,
&local->stbuf);
if (!linked_inode) {
- gf_msg(frame->this->name, GF_LOG_WARNING, 0,
- DHT_MSG_DIR_SELFHEAL_FAILED,
- "linking inode failed (%s/%s) => %s", pgfid, loc->name, gfid);
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0, DHT_MSG_LINK_INODE_FAILED,
+ "pgfid=%s", pgfid, "name=%s", loc->name, "gfid=%s", gfid, NULL);
ret = -1;
goto out;
}
@@ -2094,9 +1970,18 @@ dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
local->selfheal.dir_cbk = dir_cbk;
local->selfheal.layout = dht_layout_ref(this, layout);
- if (local->need_attrheal && !IA_ISINVAL(local->mds_stbuf.ia_type)) {
- /*Use the one in the mds_stbuf*/
- local->stbuf = local->mds_stbuf;
+ if (local->need_attrheal) {
+ if (__is_root_gfid(local->stbuf.ia_gfid)) {
+ local->stbuf.ia_gid = local->prebuf.ia_gid;
+ local->stbuf.ia_uid = local->prebuf.ia_uid;
+
+ local->stbuf.ia_ctime = local->prebuf.ia_ctime;
+ local->stbuf.ia_ctime_nsec = local->prebuf.ia_ctime_nsec;
+ local->stbuf.ia_prot = local->prebuf.ia_prot;
+
+ } else if (!IA_ISINVAL(local->mds_stbuf.ia_type)) {
+ local->stbuf = local->mds_stbuf;
+ }
}
if (!__is_root_gfid(local->stbuf.ia_gfid)) {
@@ -2106,9 +1991,9 @@ dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
linked_inode = inode_link(loc->inode, loc->parent, loc->name,
&local->stbuf);
if (!linked_inode) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_SELFHEAL_FAILED,
- "linking inode failed (%s/%s) => %s", pgfid, loc->name,
- gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LINK_INODE_FAILED,
+ "pgfid=%s", pgfid, "name=%s", loc->name, "gfid=%s", gfid,
+ NULL);
ret = 0;
goto sorry_no_fix;
}
@@ -2134,19 +2019,17 @@ dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
misc = local->selfheal.misc;
if (down) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_SELFHEAL_FAILED,
- "%s: Directory selfheal failed: %d subvolumes down."
- "Not fixing. gfid = %s",
- loc->path, down, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_SELFHEAL_FAILED,
+ "path=%s", loc->path, "subvol-down=%d", down, "Not-fixing",
+ "gfid=%s", gfid, NULL);
ret = 0;
goto sorry_no_fix;
}
if (misc) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_SELFHEAL_FAILED,
- "%s: Directory selfheal failed : %d subvolumes "
- "have unrecoverable errors. gfid = %s",
- loc->path, misc, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_SELFHEAL_FAILED,
+ "path=%s", loc->path, "misc=%d", misc, "unrecoverable-errors",
+ "gfid=%s", gfid, NULL);
ret = 0;
goto sorry_no_fix;
@@ -2232,29 +2115,28 @@ dht_dir_heal_xattrs(void *data)
gf_uuid_unparse(local->loc.gfid, gfid);
if (!mds_subvol) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_XATTR_HEAL_FAILED,
- "No mds subvol for %s gfid = %s", local->loc.path, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NO_MDS_SUBVOL, "path=%s",
+ local->loc.path, "gfid=%s", gfid, NULL);
goto out;
}
if ((local->loc.inode && gf_uuid_is_null(local->loc.inode->gfid)) ||
gf_uuid_is_null(local->loc.gfid)) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_XATTR_HEAL_FAILED,
- "No gfid present so skip heal for path %s gfid = %s",
- local->loc.path, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_GFID_NOT_PRESENT,
+ "skip-heal path=%s", local->loc.path, "gfid=%s", gfid, NULL);
goto out;
}
internal_xattr = dict_new();
if (!internal_xattr) {
- gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, 0,
- "dictionary creation failed");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_CREATE_FAILED,
+ "dictionary", NULL);
goto out;
}
xdata = dict_new();
if (!xdata) {
- gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, 0,
- "dictionary creation failed");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_CREATE_FAILED,
+ "dictionary", NULL);
goto out;
}
@@ -2262,18 +2144,17 @@ dht_dir_heal_xattrs(void *data)
user_xattr = dict_new();
if (!user_xattr) {
- gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, 0,
- "dictionary creation failed");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_CREATE_FAILED,
+ "dictionary", NULL);
goto out;
}
ret = syncop_listxattr(local->mds_subvol, &local->loc, &mds_xattr, NULL,
NULL);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_DIR_XATTR_HEAL_FAILED,
- "failed to list xattrs for "
- "%s: on %s ",
- local->loc.path, local->mds_subvol->name);
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LIST_XATTRS_FAILED,
+ "path=%s", local->loc.path, "name=%s", local->mds_subvol->name,
+ NULL);
}
if (!mds_xattr)
@@ -2288,10 +2169,9 @@ dht_dir_heal_xattrs(void *data)
dict_get(user_xattr, QUOTA_LIMIT_OBJECTS_KEY)) {
ret = dict_set_int32(xdata, GLUSTERFS_INTERNAL_FOP_KEY, 1);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value: key = %s,"
- " path = %s",
- GLUSTERFS_INTERNAL_FOP_KEY, local->loc.path);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "key=%s", GLUSTERFS_INTERNAL_FOP_KEY, "path=%s",
+ local->loc.path, NULL);
goto out;
}
}
@@ -2303,16 +2183,25 @@ dht_dir_heal_xattrs(void *data)
if (subvol == mds_subvol)
continue;
if (uret || uflag) {
+ /* Custom xattr heal is required - let posix handle it */
+ ret = dict_set_int8(xdata, "sync_backend_xattrs", _gf_true);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", local->loc.path, "key=%s",
+ "sync_backend_xattrs", NULL);
+ goto out;
+ }
+
ret = syncop_setxattr(subvol, &local->loc, user_xattr, 0, xdata,
NULL);
if (ret) {
xattr_hashed = 1;
- gf_msg(this->name, GF_LOG_ERROR, -ret,
- DHT_MSG_DIR_XATTR_HEAL_FAILED,
- "Directory xattr heal failed. Failed to set"
- "user xattr on path %s on "
- "subvol %s, gfid = %s ",
- local->loc.path, subvol->name, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED,
+ "set-user-xattr-failed path=%s", local->loc.path,
+ "subvol=%s", subvol->name, "gfid=%s", gfid, NULL);
+ } else {
+ dict_del(xdata, "sync_backend_xattrs");
}
}
}
@@ -2321,21 +2210,17 @@ dht_dir_heal_xattrs(void *data)
ret = dht_dict_set_array(internal_xattr, conf->mds_xattr_key, allzero,
1);
if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value:key = %s for "
- "path %s",
- conf->mds_xattr_key, local->loc.path);
+ gf_smsg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED,
+ "key=%s", conf->mds_xattr_key, "path=%s", local->loc.path,
+ NULL);
goto out;
}
ret = syncop_setxattr(mds_subvol, &local->loc, internal_xattr, 0, NULL,
NULL);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret,
- DHT_MSG_DIR_XATTR_HEAL_FAILED,
- "Failed to reset internal xattr "
- "on path %s on subvol %s"
- "gfid = %s ",
- local->loc.path, mds_subvol->name, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED, "path=%s", local->loc.path,
+ "subvol=%s", mds_subvol->name, "gfid=%s", gfid, NULL);
}
}
@@ -2386,8 +2271,8 @@ dht_dir_attr_heal(void *data)
call_cnt = conf->subvolume_cnt;
if (!__is_root_gfid(local->stbuf.ia_gfid) && (!mds_subvol)) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_ATTR_HEAL_FAILED,
- "No mds subvol for %s gfid = %s", local->loc.path, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NO_MDS_SUBVOL, "path=%s",
+ local->loc.path, "gfid=%s", gfid, NULL);
goto out;
}
@@ -2395,11 +2280,9 @@ dht_dir_attr_heal(void *data)
for (i = 0; i < conf->subvolume_cnt; i++) {
if (conf->subvolumes[i] == mds_subvol) {
if (!conf->subvolume_status[i]) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- DHT_MSG_HASHED_SUBVOL_DOWN,
- "mds subvol is down for path "
- " %s gfid is %s Unable to set xattr ",
- local->loc.path, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MDS_DOWN_UNABLE_TO_SET, "path=%s",
+ local->loc.path, "gfid=%s", gfid, NULL);
goto out;
}
}
@@ -2425,10 +2308,9 @@ dht_dir_attr_heal(void *data)
if (ret) {
gf_uuid_unparse(local->loc.gfid, gfid);
- gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_DIR_ATTR_HEAL_FAILED,
- "Directory attr heal failed. Failed to set"
- " uid/gid on path %s on subvol %s, gfid = %s ",
- local->loc.path, subvol->name, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_DIR_ATTR_HEAL_FAILED, "path=%s", local->loc.path,
+ "subvol=%s", subvol->name, "gfid=%s", gfid, NULL);
}
}
out:
@@ -2443,7 +2325,7 @@ dht_dir_attr_heal_done(int ret, call_frame_t *sync_frame, void *data)
}
/* EXIT: dht_update_commit_hash_for_layout */
-int
+static int
dht_update_commit_hash_for_layout_done(call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
int32_t op_errno, dict_t *xdata)
@@ -2463,7 +2345,7 @@ dht_update_commit_hash_for_layout_done(call_frame_t *frame, void *cookie,
return 0;
}
-int
+static int
dht_update_commit_hash_for_layout_unlock(call_frame_t *frame, xlator_t *this)
{
dht_local_t *local = NULL;
@@ -2481,11 +2363,8 @@ dht_update_commit_hash_for_layout_unlock(call_frame_t *frame, xlator_t *this)
local->op_ret = -1;
}
- gf_msg(this->name, GF_LOG_WARNING, errno,
- DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
- "Winding unlock failed: stale locks left on brick"
- " %s",
- local->loc.path);
+ gf_smsg(this->name, GF_LOG_WARNING, errno, DHT_MSG_WIND_UNLOCK_FAILED,
+ "path=%s", local->loc.path, NULL);
dht_update_commit_hash_for_layout_done(frame, NULL, this, 0, 0, NULL);
}
@@ -2493,7 +2372,7 @@ dht_update_commit_hash_for_layout_unlock(call_frame_t *frame, xlator_t *this)
return 0;
}
-int
+static int
dht_update_commit_hash_for_layout_cbk(call_frame_t *frame, void *cookie,
xlator_t *this, int op_ret, int op_errno,
dict_t *xdata)
@@ -2520,7 +2399,7 @@ dht_update_commit_hash_for_layout_cbk(call_frame_t *frame, void *cookie,
return 0;
}
-int
+static int
dht_update_commit_hash_for_layout_resume(call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
int32_t op_errno, dict_t *xdata)
@@ -2548,11 +2427,8 @@ dht_update_commit_hash_for_layout_resume(call_frame_t *frame, void *cookie,
if (!xattr) {
local->op_errno = errno;
- gf_msg(this->name, GF_LOG_WARNING, errno,
- DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
- "Directory commit hash update failed:"
- " %s: Allocation failed",
- local->loc.path);
+ gf_smsg(this->name, GF_LOG_WARNING, errno, DHT_MSG_COMMIT_HASH_FAILED,
+ "allocation-failed path=%s", local->loc.path, NULL);
goto err;
}
@@ -2563,11 +2439,10 @@ dht_update_commit_hash_for_layout_resume(call_frame_t *frame, void *cookie,
if (ret < 0) {
local->op_errno = ENOENT;
- gf_msg(this->name, GF_LOG_WARNING, 0,
- DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
- "Directory commit hash update failed:"
- " %s: (subvol %s) Failed to find disk layout",
- local->loc.path, conf->local_subvols[i]->name);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_COMMIT_HASH_FAILED,
+ "path=%s", local->loc.path, "subvol=%s",
+ conf->local_subvols[i]->name, "find-disk-layout-failed",
+ NULL);
goto err;
}
@@ -2581,12 +2456,10 @@ dht_update_commit_hash_for_layout_resume(call_frame_t *frame, void *cookie,
if (ret == -1) {
local->op_errno = errno;
- gf_msg(this->name, GF_LOG_WARNING, errno,
- DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
- "Directory commit hash update failed:"
- " %s: (subvol %s) Failed to extract disk"
- " layout",
- local->loc.path, conf->local_subvols[i]->name);
+ gf_smsg(this->name, GF_LOG_WARNING, errno,
+ DHT_MSG_COMMIT_HASH_FAILED, "path=%s", local->loc.path,
+ "subvol=%s", conf->local_subvols[i]->name,
+ "extract-disk-layout-failed", NULL);
goto err;
}
@@ -2595,11 +2468,9 @@ dht_update_commit_hash_for_layout_resume(call_frame_t *frame, void *cookie,
if (!xattr[i]) {
local->op_errno = errno;
- gf_msg(this->name, GF_LOG_WARNING, errno,
- DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
- "Directory commit hash update failed:"
- " %s: Allocation failed",
- local->loc.path);
+ gf_smsg(this->name, GF_LOG_WARNING, errno,
+ DHT_MSG_COMMIT_HASH_FAILED, "path=%s Allocation-failed",
+ local->loc.path, NULL);
goto err;
}
@@ -2608,12 +2479,10 @@ dht_update_commit_hash_for_layout_resume(call_frame_t *frame, void *cookie,
if (ret != 0) {
local->op_errno = ENOMEM;
- gf_msg(this->name, GF_LOG_WARNING, 0,
- DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
- "Directory self heal xattr failed:"
- "%s: (subvol %s) Failed to set xattr"
- " dictionary,",
- local->loc.path, conf->local_subvols[i]->name);
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, "path=%s",
+ local->loc.path, "subvol=%s", conf->local_subvols[i]->name,
+ "set-xattr-failed", NULL);
goto err;
}
diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c
index 0b73121..bb72b0f 100644
--- a/xlators/cluster/dht/src/dht-shared.c
+++ b/xlators/cluster/dht/src/dht-shared.c
@@ -17,23 +17,6 @@
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
#endif
-#define GF_DECIDE_DEFRAG_THROTTLE_COUNT(throttle_count, conf) \
- { \
- throttle_count = MAX((sysconf(_SC_NPROCESSORS_ONLN) - 4), 4); \
- pthread_mutex_lock(&conf->defrag->dfq_mutex); \
- \
- if (!strcasecmp(conf->dthrottle, "lazy")) \
- conf->defrag->recon_thread_count = 1; \
- \
- else if (!strcasecmp(conf->dthrottle, "normal")) \
- conf->defrag->recon_thread_count = (throttle_count / 2); \
- \
- else if (!strcasecmp(conf->dthrottle, "aggressive")) \
- conf->defrag->recon_thread_count = throttle_count; \
- \
- pthread_mutex_unlock(&conf->defrag->dfq_mutex); \
- }
-
/* TODO:
- use volumename in xattr instead of "dht"
- use NS locks
@@ -41,9 +24,7 @@
- complete linkfile selfheal
*/
-extern dht_methods_t dht_methods;
-
-void
+static void
dht_layout_dump(dht_layout_t *layout, const char *prefix)
{
char key[GF_DUMP_MAX_BUF_LEN];
@@ -51,8 +32,6 @@ dht_layout_dump(dht_layout_t *layout, const char *prefix)
if (!layout)
goto out;
- if (!prefix)
- goto out;
gf_proc_dump_build_key(key, prefix, "cnt");
gf_proc_dump_write(key, "%d", layout->cnt);
@@ -161,9 +140,9 @@ dht_priv_dump(xlator_t *this)
}
}
- if (conf->last_stat_fetch.tv_sec)
+ if (conf->last_stat_fetch)
gf_proc_dump_write("last_stat_fetch", "%s",
- ctime(&conf->last_stat_fetch.tv_sec));
+ ctime(&conf->last_stat_fetch));
UNLOCK(&conf->subvolume_lock);
@@ -263,7 +242,7 @@ out:
return ret;
}
-int
+static int
dht_parse_decommissioned_bricks(xlator_t *this, dht_conf_t *conf,
const char *bricks)
{
@@ -309,14 +288,10 @@ out:
return ret;
}
-int
+static void
dht_decommissioned_remove(xlator_t *this, dht_conf_t *conf)
{
int i = 0;
- int ret = -1;
-
- if (!conf)
- goto out;
for (i = 0; i < conf->subvolume_cnt; i++) {
if (conf->decommissioned_bricks[i]) {
@@ -324,13 +299,9 @@ dht_decommissioned_remove(xlator_t *this, dht_conf_t *conf)
conf->decommission_subvols_cnt--;
}
}
-
- ret = 0;
-out:
-
- return ret;
}
-void
+
+static void
dht_init_regex(xlator_t *this, dict_t *odict, char *name, regex_t *re,
gf_boolean_t *re_valid, dht_conf_t *conf)
{
@@ -387,7 +358,7 @@ out:
return ret;
}
-int
+static int
dht_configure_throttle(xlator_t *this, dht_conf_t *conf, char *temp_str)
{
int rebal_thread_count = 0;
@@ -526,9 +497,7 @@ dht_reconfigure(xlator_t *this, dict_t *options)
if (ret == -1)
goto out;
} else {
- ret = dht_decommissioned_remove(this, conf);
- if (ret == -1)
- goto out;
+ dht_decommissioned_remove(this, conf);
}
dht_init_regex(this, options, "rsync-hash-regex", &conf->rsync_regex,
@@ -568,6 +537,8 @@ gf_defrag_pattern_list_fill(xlator_t *this, gf_defrag_info_t *defrag,
pattern_str = strtok_r(data, ",", &tmp_str);
while (pattern_str) {
dup_str = gf_strdup(pattern_str);
+ if (!dup_str)
+ goto out;
pattern_list = GF_CALLOC(1, sizeof(gf_defrag_pattern_list_t), 1);
if (!pattern_list) {
goto out;
@@ -614,7 +585,7 @@ out:
return ret;
}
-int
+static int
dht_init_methods(xlator_t *this)
{
int ret = -1;
@@ -627,7 +598,6 @@ dht_init_methods(xlator_t *this)
methods = &(conf->methods);
methods->migration_get_dst_subvol = dht_migration_get_dst_subvol;
- methods->migration_needed = dht_migration_needed;
methods->migration_other = NULL;
methods->layout_search = dht_layout_search;
@@ -1076,84 +1046,6 @@ struct volume_options dht_options[] = {
/* NUFA option */
{.key = {"local-volume-name"}, .type = GF_OPTION_TYPE_XLATOR},
- /* tier options */
- {
- .key = {"tier-pause"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- },
-
- {
- .key = {"tier-promote-frequency"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "120",
- },
-
- {
- .key = {"tier-demote-frequency"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "3600",
- },
-
- {
- .key = {"write-freq-threshold"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "0",
- },
-
- {
- .key = {"read-freq-threshold"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "0",
- },
- {
- .key = {"watermark-hi"},
- .type = GF_OPTION_TYPE_PERCENT,
- .default_value = "90",
- },
- {
- .key = {"watermark-low"},
- .type = GF_OPTION_TYPE_PERCENT,
- .default_value = "75",
- },
- {
- .key = {"tier-mode"},
- .type = GF_OPTION_TYPE_STR,
- .default_value = "test",
- },
- {
- .key = {"tier-compact"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- },
- {.key = {"tier-hot-compact-frequency"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "604800",
- .description = "Frequency to compact DBs on hot tier in system"},
- {.key = {"tier-cold-compact-frequency"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "604800",
- .description = "Frequency to compact DBs on cold tier in system"},
- {
- .key = {"tier-max-mb"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "4000",
- },
- {
- .key = {"tier-max-promote-file-size"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "0",
- },
- {
- .key = {"tier-max-files"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "10000",
- },
- {
- .key = {"tier-query-limit"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "100",
- },
/* switch option */
{.key = {"pattern.switch.case"}, .type = GF_OPTION_TYPE_ANY},
diff --git a/xlators/cluster/dht/src/nufa.c b/xlators/cluster/dht/src/nufa.c
index 5931363..3648a56 100644
--- a/xlators/cluster/dht/src/nufa.c
+++ b/xlators/cluster/dht/src/nufa.c
@@ -595,7 +595,6 @@ nufa_init(xlator_t *this)
dht_methods_t dht_methods = {
.migration_get_dst_subvol = dht_migration_get_dst_subvol,
- .migration_needed = dht_migration_needed,
.layout_search = dht_layout_search,
};
diff --git a/xlators/cluster/ec/src/ec-combine.c b/xlators/cluster/ec/src/ec-combine.c
index dedcfb0..703a30e 100644
--- a/xlators/cluster/ec/src/ec-combine.c
+++ b/xlators/cluster/ec/src/ec-combine.c
@@ -179,13 +179,14 @@ ec_iatt_combine(ec_fop_data_t *fop, struct iatt *dst, struct iatt *src,
"links: %u-%u, uid: %u-%u, gid: %u-%u, "
"rdev: %" PRIu64 "-%" PRIu64 ", size: %" PRIu64 "-%" PRIu64
", "
- "mode: %o-%o)",
+ "mode: %o-%o), %s",
dst[i].ia_ino, src[i].ia_ino, dst[i].ia_nlink,
src[i].ia_nlink, dst[i].ia_uid, src[i].ia_uid, dst[i].ia_gid,
src[i].ia_gid, dst[i].ia_rdev, src[i].ia_rdev,
dst[i].ia_size, src[i].ia_size,
st_mode_from_ia(dst[i].ia_prot, dst[i].ia_type),
- st_mode_from_ia(src[i].ia_prot, dst[i].ia_type));
+ st_mode_from_ia(src[i].ia_prot, dst[i].ia_type),
+ ec_msg_str(fop));
return 0;
}
@@ -342,9 +343,8 @@ out:
}
static int32_t
-ec_dict_data_concat(const char *fmt, ec_cbk_data_t *cbk, int32_t which,
- char *key, char *new_key, const char *def,
- gf_boolean_t global, ...)
+ec_dict_data_concat(ec_cbk_data_t *cbk, int32_t which, char *key, char *new_key,
+ const char *def, gf_boolean_t global, const char *fmt, ...)
{
ec_t *ec = cbk->fop->xl->private;
data_t *data[ec->nodes];
@@ -356,7 +356,7 @@ ec_dict_data_concat(const char *fmt, ec_cbk_data_t *cbk, int32_t which,
ec_dict_list(data, cbk, which, key, global);
- va_start(args, global);
+ va_start(args, fmt);
err = ec_concat_prepare(cbk->fop->xl, &pre, &sep, &post, fmt, args);
va_end(args);
@@ -729,14 +729,14 @@ ec_dict_data_combine(dict_t *dict, char *key, data_t *value, void *arg)
if ((strcmp(key, GF_XATTR_PATHINFO_KEY) == 0) ||
(strcmp(key, GF_XATTR_USER_PATHINFO_KEY) == 0)) {
- return ec_dict_data_concat("(<EC:%s> { })", data->cbk, data->which, key,
- NULL, NULL, _gf_false,
+ return ec_dict_data_concat(data->cbk, data->which, key, NULL, NULL,
+ _gf_false, _gf_false, "(<EC:%s> { })",
data->cbk->fop->xl->name);
}
if (strncmp(key, GF_XATTR_CLRLK_CMD, SLEN(GF_XATTR_CLRLK_CMD)) == 0) {
- return ec_dict_data_concat("{\n}", data->cbk, data->which, key, NULL,
- NULL, _gf_false);
+ return ec_dict_data_concat(data->cbk, data->which, key, NULL, NULL,
+ _gf_false, "{\n}");
}
if (strncmp(key, GF_XATTR_LOCKINFO_KEY, SLEN(GF_XATTR_LOCKINFO_KEY)) == 0) {
@@ -766,9 +766,9 @@ ec_dict_data_combine(dict_t *dict, char *key, data_t *value, void *arg)
if (XATTR_IS_NODE_UUID(key)) {
if (data->cbk->fop->int32) {
/* List of node uuid is requested */
- return ec_dict_data_concat("{ }", data->cbk, data->which, key,
+ return ec_dict_data_concat(data->cbk, data->which, key,
GF_XATTR_LIST_NODE_UUIDS_KEY, UUID0_STR,
- _gf_true);
+ _gf_true, "{ }");
} else {
return ec_dict_data_uuid(data->cbk, data->which, key);
}
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
index 9045a33..b955efd 100644
--- a/xlators/cluster/ec/src/ec-common.c
+++ b/xlators/cluster/ec/src/ec-common.c
@@ -230,7 +230,7 @@ ec_child_next(ec_t *ec, ec_fop_data_t *fop, uint32_t idx)
int32_t
ec_heal_report(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, uintptr_t mask, uintptr_t good,
- uintptr_t bad, dict_t *xdata)
+ uintptr_t bad, uint32_t pending, dict_t *xdata)
{
if (op_ret < 0) {
gf_msg(this->name, GF_LOG_DEBUG, op_errno, EC_MSG_HEAL_FAIL,
@@ -316,17 +316,19 @@ ec_check_status(ec_fop_data_t *fop)
}
}
- gf_msg(fop->xl->name, GF_LOG_WARNING, 0, EC_MSG_OP_FAIL_ON_SUBVOLS,
- "Operation failed on %d of %d subvolumes.(up=%s, mask=%s, "
- "remaining=%s, good=%s, bad=%s, %s)",
- gf_bits_count(ec->xl_up & ~(fop->remaining | fop->good)), ec->nodes,
- ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
- ec_bin(str2, sizeof(str2), fop->mask, ec->nodes),
- ec_bin(str3, sizeof(str3), fop->remaining, ec->nodes),
- ec_bin(str4, sizeof(str4), fop->good, ec->nodes),
- ec_bin(str5, sizeof(str5), ec->xl_up & ~(fop->remaining | fop->good),
- ec->nodes),
- ec_msg_str(fop));
+ gf_msg(
+ fop->xl->name, GF_LOG_WARNING, 0, EC_MSG_OP_FAIL_ON_SUBVOLS,
+ "Operation failed on %d of %d subvolumes.(up=%s, mask=%s, "
+ "remaining=%s, good=%s, bad=%s,"
+ "(Least significant bit represents first client/brick of subvol), %s)",
+ gf_bits_count(ec->xl_up & ~(fop->remaining | fop->good)), ec->nodes,
+ ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
+ ec_bin(str2, sizeof(str2), fop->mask, ec->nodes),
+ ec_bin(str3, sizeof(str3), fop->remaining, ec->nodes),
+ ec_bin(str4, sizeof(str4), fop->good, ec->nodes),
+ ec_bin(str5, sizeof(str5), ec->xl_up & ~(fop->remaining | fop->good),
+ ec->nodes),
+ ec_msg_str(fop));
if (fop->use_fd) {
if (fop->fd != NULL) {
ec_fheal(NULL, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, NULL,
@@ -614,10 +616,10 @@ ec_msg_str(ec_fop_data_t *fop)
loc_t *loc2 = NULL;
char gfid1[64] = {0};
char gfid2[64] = {0};
+ ec_fop_data_t *parent = fop->parent;
if (fop->errstr)
return fop->errstr;
-
if (!fop->use_fd) {
loc1 = &fop->loc[0];
loc2 = &fop->loc[1];
@@ -625,23 +627,45 @@ ec_msg_str(ec_fop_data_t *fop)
if (fop->id == GF_FOP_RENAME) {
gf_asprintf(&fop->errstr,
"FOP : '%s' failed on '%s' and '%s' with gfids "
- "%s and %s respectively",
+ "%s and %s respectively. Parent FOP: %s",
ec_fop_name(fop->id), loc1->path, loc2->path,
uuid_utoa_r(loc1->gfid, gfid1),
- uuid_utoa_r(loc2->gfid, gfid2));
+ uuid_utoa_r(loc2->gfid, gfid2),
+ parent ? ec_fop_name(parent->id) : "No Parent");
} else {
- gf_asprintf(&fop->errstr, "FOP : '%s' failed on '%s' with gfid %s",
- ec_fop_name(fop->id), loc1->path,
- uuid_utoa_r(loc1->gfid, gfid1));
+ gf_asprintf(
+ &fop->errstr,
+ "FOP : '%s' failed on '%s' with gfid %s. Parent FOP: %s",
+ ec_fop_name(fop->id), loc1->path,
+ uuid_utoa_r(loc1->gfid, gfid1),
+ parent ? ec_fop_name(parent->id) : "No Parent");
}
} else {
- gf_asprintf(&fop->errstr, "FOP : '%s' failed on gfid %s",
- ec_fop_name(fop->id),
- uuid_utoa_r(fop->fd->inode->gfid, gfid1));
+ gf_asprintf(
+ &fop->errstr, "FOP : '%s' failed on gfid %s. Parent FOP: %s",
+ ec_fop_name(fop->id), uuid_utoa_r(fop->fd->inode->gfid, gfid1),
+ parent ? ec_fop_name(parent->id) : "No Parent");
}
return fop->errstr;
}
+static void
+ec_log_insufficient_vol(ec_fop_data_t *fop, int32_t have, uint32_t need,
+ int32_t loglevel)
+{
+ ec_t *ec = fop->xl->private;
+ char str1[32], str2[32], str3[32];
+
+ gf_msg(ec->xl->name, loglevel, 0, EC_MSG_CHILDS_INSUFFICIENT,
+ "Insufficient available children for this request: "
+ "Have : %d, Need : %u : Child UP : %s "
+ "Mask: %s, Healing : %s : %s ",
+ have, need, ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
+ ec_bin(str2, sizeof(str2), fop->mask, ec->nodes),
+ ec_bin(str3, sizeof(str3), fop->healing, ec->nodes),
+ ec_msg_str(fop));
+}
+
static int32_t
ec_child_select(ec_fop_data_t *fop)
{
@@ -699,11 +723,7 @@ ec_child_select(ec_fop_data_t *fop)
ec_trace("SELECT", fop, "");
if ((num < fop->minimum) && (num < ec->fragments)) {
- gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_CHILDS_INSUFFICIENT,
- "Insufficient available children "
- "for this request (have %d, need "
- "%d). %s",
- num, fop->minimum, ec_msg_str(fop));
+ ec_log_insufficient_vol(fop, num, fop->minimum, GF_LOG_ERROR);
return 0;
}
@@ -711,11 +731,7 @@ ec_child_select(ec_fop_data_t *fop)
(fop->locks[0].update[EC_DATA_TXN] ||
fop->locks[0].update[EC_METADATA_TXN])) {
if (ec->quorum_count && (num < ec->quorum_count)) {
- gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_CHILDS_INSUFFICIENT,
- "Insufficient available children "
- "for this request (have %d, need "
- "%d). %s",
- num, ec->quorum_count, ec_msg_str(fop));
+ ec_log_insufficient_vol(fop, num, ec->quorum_count, GF_LOG_ERROR);
return 0;
}
}
@@ -2240,7 +2256,7 @@ ec_unlocked(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
if (op_ret < 0) {
gf_msg(this->name, GF_LOG_WARNING, op_errno, EC_MSG_UNLOCK_FAILED,
- "entry/inode unlocking failed (%s)", ec_fop_name(link->fop->id));
+ "entry/inode unlocking failed :(%s)", ec_msg_str(link->fop));
} else {
ec_trace("UNLOCKED", link->fop, "lock=%p", link->lock);
}
diff --git a/xlators/cluster/ec/src/ec-dir-read.c b/xlators/cluster/ec/src/ec-dir-read.c
index ef6b06f..f71dcfa 100644
--- a/xlators/cluster/ec/src/ec-dir-read.c
+++ b/xlators/cluster/ec/src/ec-dir-read.c
@@ -386,9 +386,16 @@ ec_manager_readdir(ec_fop_data_t *fop, int32_t state)
/* Return error if opendir has not been successfully called on
* any subvolume. */
ctx = ec_fd_get(fop->fd, fop->xl);
- if ((ctx == NULL) || (ctx->open == 0)) {
- fop->error = EINVAL;
+ if (ctx == NULL) {
+ fop->error = ENOMEM;
+ } else if (ctx->open == 0) {
+ fop->error = EBADFD;
+ }
+ if (fop->error) {
+ gf_msg(fop->xl->name, GF_LOG_ERROR, fop->error,
+ EC_MSG_INVALID_REQUEST, "EC is not winding readdir: %s",
+ ec_msg_str(fop));
return EC_STATE_REPORT;
}
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
index 81f6add..7d991f0 100644
--- a/xlators/cluster/ec/src/ec-heal.c
+++ b/xlators/cluster/ec/src/ec-heal.c
@@ -70,6 +70,7 @@ struct ec_name_data {
char *name;
inode_t *parent;
default_args_cbk_t *replies;
+ uint32_t heal_pending;
};
static char *ec_ignore_xattrs[] = {GF_SELINUX_XATTR_KEY, QUOTA_SIZE_KEY, NULL};
@@ -994,6 +995,7 @@ ec_set_new_entry_dirty(ec_t *ec, loc_t *loc, struct iatt *ia,
ret = -ENOTCONN;
goto out;
}
+
out:
if (xattr)
dict_unref(xattr);
@@ -1172,6 +1174,7 @@ ec_create_name(call_frame_t *frame, ec_t *ec, inode_t *parent, char *name,
dict_t *xdata = NULL;
char *linkname = NULL;
ec_config_t config;
+
/* There should be just one gfid key */
EC_REPLIES_ALLOC(replies, ec->nodes);
if (gfid_db->count != 1) {
@@ -1416,6 +1419,11 @@ __ec_heal_name(call_frame_t *frame, ec_t *ec, inode_t *parent, char *name,
ret = ec_create_name(frame, ec, parent, name, replies, gfid_db, enoent,
participants);
+ if (ret >= 0) {
+ /* If ec_create_name() succeeded we return 1 to indicate that a new
+ * file has been created and it will need to be healed. */
+ ret = 1;
+ }
out:
cluster_replies_wipe(replies, ec->nodes);
loc_wipe(&loc);
@@ -1493,18 +1501,22 @@ ec_name_heal_handler(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
ret = ec_heal_name(name_data->frame, ec, parent->inode, entry->d_name,
name_on);
- if (ret < 0)
+ if (ret < 0) {
memset(name_on, 0, ec->nodes);
+ } else {
+ name_data->heal_pending += ret;
+ }
for (i = 0; i < ec->nodes; i++)
if (name_data->participants[i] && !name_on[i])
name_data->failed_on[i] = 1;
+
return 0;
}
int
ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode,
- unsigned char *participants)
+ unsigned char *participants, uint32_t *pending)
{
int i = 0;
int j = 0;
@@ -1517,7 +1529,7 @@ ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode,
name_data.frame = frame;
name_data.participants = participants;
name_data.failed_on = alloca0(ec->nodes);
- ;
+ name_data.heal_pending = 0;
for (i = 0; i < ec->nodes; i++) {
if (!participants[i])
@@ -1536,6 +1548,8 @@ ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode,
break;
}
}
+ *pending += name_data.heal_pending;
+
loc_wipe(&loc);
return ret;
}
@@ -1543,7 +1557,7 @@ ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode,
int
__ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode,
unsigned char *heal_on, unsigned char *sources,
- unsigned char *healed_sinks)
+ unsigned char *healed_sinks, uint32_t *pending)
{
unsigned char *locked_on = NULL;
unsigned char *output = NULL;
@@ -1588,7 +1602,7 @@ unlock:
if (sources[i] || healed_sinks[i])
participants[i] = 1;
}
- ret = ec_heal_names(frame, ec, inode, participants);
+ ret = ec_heal_names(frame, ec, inode, participants, pending);
if (EC_COUNT(participants, ec->nodes) <= ec->fragments)
goto out;
@@ -1609,7 +1623,8 @@ out:
int
ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode,
- unsigned char *sources, unsigned char *healed_sinks)
+ unsigned char *sources, unsigned char *healed_sinks,
+ uint32_t *pending)
{
unsigned char *locked_on = NULL;
unsigned char *up_subvols = NULL;
@@ -1640,7 +1655,7 @@ ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode,
goto unlock;
}
ret = __ec_heal_entry(frame, ec, inode, locked_on, sources,
- healed_sinks);
+ healed_sinks, pending);
}
unlock:
cluster_uninodelk(ec->xl_list, locked_on, ec->nodes, replies, output, frame,
@@ -1961,14 +1976,14 @@ ec_manager_heal_block(ec_fop_data_t *fop, int32_t state)
if (fop->cbks.heal) {
fop->cbks.heal(fop->req_frame, fop->data, fop->xl, 0, 0,
(heal->good | heal->bad), heal->good, heal->bad,
- NULL);
+ 0, NULL);
}
return EC_STATE_END;
case -EC_STATE_REPORT:
if (fop->cbks.heal) {
fop->cbks.heal(fop->req_frame, fop->data, fop->xl, -1,
- fop->error, 0, 0, 0, NULL);
+ fop->error, 0, 0, 0, 0, NULL);
}
return EC_STATE_END;
@@ -2005,14 +2020,15 @@ out:
if (fop != NULL) {
ec_manager(fop, error);
} else {
- func(frame, heal, this, -1, error, 0, 0, 0, NULL);
+ func(frame, heal, this, -1, error, 0, 0, 0, 0, NULL);
}
}
int32_t
ec_heal_block_done(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, uintptr_t mask,
- uintptr_t good, uintptr_t bad, dict_t *xdata)
+ uintptr_t good, uintptr_t bad, uint32_t pending,
+ dict_t *xdata)
{
ec_heal_t *heal = cookie;
@@ -2481,6 +2497,58 @@ out:
return ret;
}
+int
+ec_heal_purge_stale_index(call_frame_t *frame, ec_t *ec, inode_t *inode)
+{
+ int i = 0;
+ int ret = 0;
+ dict_t **xattr = NULL;
+ loc_t loc = {0};
+ uint64_t dirty_xattr[EC_VERSION_SIZE] = {0};
+ unsigned char *on = NULL;
+ default_args_cbk_t *replies = NULL;
+ dict_t *dict = NULL;
+
+ /* Allocate the required memory */
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+ on = alloca0(ec->nodes);
+ EC_REPLIES_ALLOC(replies, ec->nodes);
+ xattr = GF_CALLOC(ec->nodes, sizeof(*xattr), gf_common_mt_pointer);
+ if (!xattr) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ dict = dict_new();
+ if (!dict) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ for (i = 0; i < ec->nodes; i++) {
+ xattr[i] = dict;
+ on[i] = 1;
+ }
+ ret = dict_set_static_bin(dict, EC_XATTR_DIRTY, dirty_xattr,
+ (sizeof(*dirty_xattr) * EC_VERSION_SIZE));
+ if (ret < 0) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ PARALLEL_FOP_ONLIST(ec->xl_list, on, ec->nodes, replies, frame,
+ ec_wind_xattrop_parallel, &loc, GF_XATTROP_ADD_ARRAY64,
+ xattr, NULL);
+out:
+ if (dict) {
+ dict_unref(dict);
+ }
+ if (xattr) {
+ GF_FREE(xattr);
+ }
+ cluster_replies_wipe(replies, ec->nodes);
+ loc_wipe(&loc);
+ return ret;
+}
+
void
ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
{
@@ -2498,6 +2566,7 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
intptr_t mbad = 0;
intptr_t good = 0;
intptr_t bad = 0;
+ uint32_t pending = 0;
ec_fop_data_t *fop = data;
gf_boolean_t blocking = _gf_false;
ec_heal_need_t need_heal = EC_HEAL_NONEED;
@@ -2533,7 +2602,7 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
if (loc->name && strlen(loc->name)) {
ret = ec_heal_name(frame, ec, loc->parent, (char *)loc->name,
participants);
- if (ret == 0) {
+ if (ret >= 0) {
gf_msg_debug(this->name, 0,
"%s: name heal "
"successful on %" PRIXPTR,
@@ -2551,23 +2620,34 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
/* Mount triggers heal only when it detects that it must need heal, shd
* triggers heals periodically which need not be thorough*/
- if (ec->shd.iamshd) {
+ if (ec->shd.iamshd && (ret <= 0)) {
ec_heal_inspect(frame, ec, loc->inode, up_subvols, _gf_false, _gf_false,
&need_heal);
- if (need_heal == EC_HEAL_NONEED) {
+ if (need_heal == EC_HEAL_PURGE_INDEX) {
+ gf_msg(ec->xl->name, GF_LOG_INFO, 0, EC_MSG_HEAL_FAIL,
+ "Index entry needs to be purged for: %s ",
+ uuid_utoa(loc->gfid));
+ /* We need to send zero-xattrop so that stale index entry could be
+ * removed. We need not take lock on this entry to do so as
+ * xattrop on a brick is atomic. */
+ ec_heal_purge_stale_index(frame, ec, loc->inode);
+ goto out;
+ } else if (need_heal == EC_HEAL_NONEED) {
gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL,
"Heal is not required for : %s ", uuid_utoa(loc->gfid));
goto out;
}
}
+
sources = alloca0(ec->nodes);
healed_sinks = alloca0(ec->nodes);
if (IA_ISREG(loc->inode->ia_type)) {
ret = ec_heal_data(frame, ec, blocking, loc->inode, sources,
healed_sinks);
} else if (IA_ISDIR(loc->inode->ia_type) && !partial) {
- ret = ec_heal_entry(frame, ec, loc->inode, sources, healed_sinks);
+ ret = ec_heal_entry(frame, ec, loc->inode, sources, healed_sinks,
+ &pending);
} else {
ret = 0;
memcpy(sources, participants, ec->nodes);
@@ -2597,10 +2677,11 @@ out:
if (fop->cbks.heal) {
fop->cbks.heal(fop->req_frame, fop->data, fop->xl, op_ret, op_errno,
ec_char_array_to_mask(participants, ec->nodes),
- mgood & good, mbad & bad, NULL);
+ mgood & good, mbad & bad, pending, NULL);
}
if (frame)
STACK_DESTROY(frame->root);
+
return;
}
@@ -2648,7 +2729,7 @@ ec_heal_fail(ec_t *ec, ec_fop_data_t *fop)
{
if (fop->cbks.heal) {
fop->cbks.heal(fop->req_frame, fop->data, ec->xl, -1, fop->error, 0, 0,
- 0, NULL);
+ 0, 0, NULL);
}
ec_fop_data_release(fop);
}
@@ -2835,7 +2916,7 @@ fail:
if (fop)
ec_fop_data_release(fop);
if (func)
- func(frame, data, this, -1, err, 0, 0, 0, NULL);
+ func(frame, data, this, -1, err, 0, 0, 0, 0, NULL);
}
int
@@ -2964,6 +3045,13 @@ _need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources,
goto out;
}
}
+ /* If lock count is 0, all dirty flags are 0 and all the
+ * versions are macthing then why are we here. It looks
+ * like something went wrong while removing the index entries
+ * after completing a successful heal or fop. In this case
+ * we need to remove this index entry to avoid triggering heal
+ * in a loop and causing lookups again and again*/
+ *need_heal = EC_HEAL_PURGE_INDEX;
} else {
for (i = 0; i < ec->nodes; i++) {
/* Since each lock can only increment the dirty
diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c
index 956e73c..5c1586b 100644
--- a/xlators/cluster/ec/src/ec-heald.c
+++ b/xlators/cluster/ec/src/ec-heald.c
@@ -62,7 +62,7 @@ __ec_shd_healer_wait(struct subvol_healer *healer)
ec = healer->this->private;
disabled_loop:
- wait_till.tv_sec = time(NULL) + ec->shd.timeout;
+ wait_till.tv_sec = gf_time() + ec->shd.timeout;
while (!healer->rerun) {
ret = pthread_cond_timedwait(&healer->cond, &healer->mutex, &wait_till);
@@ -156,19 +156,78 @@ ec_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name)
return ret;
}
+static gf_boolean_t
+ec_is_heal_completed(char *status)
+{
+ char *bad_pos = NULL;
+ char *zero_pos = NULL;
+
+ if (!status) {
+ return _gf_false;
+ }
+
+ /*Logic:
+ * Status will be of the form Good: <binary>, Bad: <binary>
+ * If heal completes, if we do strchr for '0' it should be present after
+ * 'Bad:' i.e. strRchr for ':'
+ * */
+
+ zero_pos = strchr(status, '0');
+ bad_pos = strrchr(status, ':');
+ if (!zero_pos || !bad_pos) {
+ /*malformed status*/
+ return _gf_false;
+ }
+
+ if (zero_pos > bad_pos) {
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
+
int
ec_shd_selfheal(struct subvol_healer *healer, int child, loc_t *loc,
gf_boolean_t full)
{
+ dict_t *xdata = NULL;
+ dict_t *dict = NULL;
+ uint32_t count;
int32_t ret;
+ char *heal_status = NULL;
+ ec_t *ec = healer->this->private;
+
+ GF_ATOMIC_INC(ec->stats.shd.attempted);
+ ret = syncop_getxattr(healer->this, loc, &dict, EC_XATTR_HEAL, NULL,
+ &xdata);
+ if (ret == 0) {
+ if (dict && (dict_get_str(dict, EC_XATTR_HEAL, &heal_status) == 0)) {
+ if (ec_is_heal_completed(heal_status)) {
+ GF_ATOMIC_INC(ec->stats.shd.completed);
+ }
+ }
+ }
- ret = syncop_getxattr(healer->this, loc, NULL, EC_XATTR_HEAL, NULL, NULL);
- if (!full && (ret >= 0) && (loc->inode->ia_type == IA_IFDIR)) {
+ if (!full && (loc->inode->ia_type == IA_IFDIR)) {
/* If we have just healed a directory, it's possible that
- * other index entries have appeared to be healed. We put a
- * mark so that we can check it later and restart a scan
- * without delay. */
- healer->rerun = _gf_true;
+ * other index entries have appeared to be healed. */
+ if ((xdata != NULL) &&
+ (dict_get_uint32(xdata, EC_XATTR_HEAL_NEW, &count) == 0) &&
+ (count > 0)) {
+ /* Force a rerun of the index healer. */
+ gf_msg_debug(healer->this->name, 0, "%d more entries to heal",
+ count);
+
+ healer->rerun = _gf_true;
+ }
+ }
+
+ if (xdata != NULL) {
+ dict_unref(xdata);
+ }
+
+ if (dict) {
+ dict_unref(dict);
}
return ret;
diff --git a/xlators/cluster/ec/src/ec-inode-read.c b/xlators/cluster/ec/src/ec-inode-read.c
index a891ccd..dad5f4d 100644
--- a/xlators/cluster/ec/src/ec-inode-read.c
+++ b/xlators/cluster/ec/src/ec-inode-read.c
@@ -390,7 +390,8 @@ ec_manager_getxattr(ec_fop_data_t *fop, int32_t state)
int32_t
ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl,
int32_t op_ret, int32_t op_errno, uintptr_t mask,
- uintptr_t good, uintptr_t bad, dict_t *xdata)
+ uintptr_t good, uintptr_t bad, uint32_t pending,
+ dict_t *xdata)
{
fop_getxattr_cbk_t func = cookie;
ec_t *ec = xl->private;
@@ -398,6 +399,25 @@ ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl,
char *str;
char bin1[65], bin2[65];
+ /* We try to return the 'pending' information in xdata, but if this cannot
+ * be set, we will ignore it silently. We prefer to report the success or
+ * failure of the heal itself. */
+ if (xdata == NULL) {
+ xdata = dict_new();
+ } else {
+ dict_ref(xdata);
+ }
+ if (xdata != NULL) {
+ if (dict_set_uint32(xdata, EC_XATTR_HEAL_NEW, pending) != 0) {
+ /* dict_set_uint32() is marked as 'warn_unused_result' and gcc
+ * enforces to check the result in this case. However we don't
+ * really care if it succeeded or not. We'll just do the same.
+ *
+ * This empty 'if' avoids the warning, and it will be removed by
+ * the optimizer. */
+ }
+ }
+
if (op_ret >= 0) {
dict = dict_new();
if (dict == NULL) {
@@ -431,11 +451,14 @@ ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl,
}
out:
- func(frame, NULL, xl, op_ret, op_errno, dict, NULL);
+ func(frame, NULL, xl, op_ret, op_errno, dict, xdata);
if (dict != NULL) {
dict_unref(dict);
}
+ if (xdata != NULL) {
+ dict_unref(xdata);
+ }
return 0;
}
diff --git a/xlators/cluster/ec/src/ec-locks.c b/xlators/cluster/ec/src/ec-locks.c
index 8e84977..601960d 100644
--- a/xlators/cluster/ec/src/ec-locks.c
+++ b/xlators/cluster/ec/src/ec-locks.c
@@ -24,9 +24,36 @@ ec_lock_check(ec_fop_data_t *fop, uintptr_t *mask)
ec_t *ec = fop->xl->private;
ec_cbk_data_t *ans = NULL;
ec_cbk_data_t *cbk = NULL;
- uintptr_t locked = 0, notlocked = 0;
+ uintptr_t locked = 0;
+ int32_t good = 0;
+ int32_t eagain = 0;
+ int32_t estale = 0;
int32_t error = -1;
+ /* There are some errors that we'll handle in an special way while trying
+ * to acquire a lock.
+ *
+ * EAGAIN: If it's found during a parallel non-blocking lock request, we
+ * consider that there's contention on the inode, so we consider
+ * the acquisition a failure and try again with a sequential
+ * blocking lock request. This will ensure that we get a lock on
+ * as many bricks as possible (ignoring EAGAIN here would cause
+ * unnecessary triggers of self-healing).
+ *
+ * If it's found during a sequential blocking lock request, it's
+ * considered an error. Lock will only succeed if there are
+ * enough other bricks locked.
+ *
+ * ESTALE: This can appear during parallel or sequential lock request if
+ * the inode has just been unlinked. We consider this error is
+ * not recoverable, but we also don't consider it as fatal. So,
+ * if it happens during parallel lock, we won't attempt a
+ * sequential one unless there are EAGAIN errors on other
+ * bricks (and are enough to form a quorum), but if we reach
+ * quorum counting the ESTALE bricks, we consider the whole
+ * result of the operation is ESTALE instead of EIO.
+ */
+
list_for_each_entry(ans, &fop->cbk_list, list)
{
if (ans->op_ret >= 0) {
@@ -34,24 +61,23 @@ ec_lock_check(ec_fop_data_t *fop, uintptr_t *mask)
error = EIO;
}
locked |= ans->mask;
+ good = ans->count;
cbk = ans;
- } else {
- if (ans->op_errno == EAGAIN) {
- switch (fop->uint32) {
- case EC_LOCK_MODE_NONE:
- case EC_LOCK_MODE_ALL:
- /* Goal is to treat non-blocking lock as failure
- * even if there is a single EAGAIN*/
- notlocked |= ans->mask;
- break;
- }
- }
+ } else if (ans->op_errno == ESTALE) {
+ estale += ans->count;
+ } else if ((ans->op_errno == EAGAIN) &&
+ (fop->uint32 != EC_LOCK_MODE_INC)) {
+ eagain += ans->count;
}
}
if (error == -1) {
- if (gf_bits_count(locked | notlocked) >= ec->fragments) {
- if (notlocked == 0) {
+ /* If we have enough quorum with succeeded and EAGAIN answers, we
+ * ignore for now any ESTALE answer. If there are EAGAIN answers,
+ * we retry with a sequential blocking lock request if needed.
+ * Otherwise we succeed. */
+ if ((good + eagain) >= ec->fragments) {
+ if (eagain == 0) {
if (fop->answer == NULL) {
fop->answer = cbk;
}
@@ -64,21 +90,28 @@ ec_lock_check(ec_fop_data_t *fop, uintptr_t *mask)
case EC_LOCK_MODE_NONE:
error = EAGAIN;
break;
-
case EC_LOCK_MODE_ALL:
fop->uint32 = EC_LOCK_MODE_INC;
break;
-
default:
+ /* This shouldn't happen because eagain cannot be > 0
+ * when fop->uint32 is EC_LOCK_MODE_INC. */
error = EIO;
break;
}
}
} else {
- if (fop->answer && fop->answer->op_ret < 0)
+ /* We have been unable to find enough candidates that will be able
+ * to take the lock. If we have quorum on some answer, we return
+ * it. Otherwise we check if ESTALE answers allow us to reach
+ * quorum. If so, we return ESTALE. */
+ if (fop->answer && fop->answer->op_ret < 0) {
error = fop->answer->op_errno;
- else
+ } else if ((good + eagain + estale) >= ec->fragments) {
+ error = ESTALE;
+ } else {
error = EIO;
+ }
}
}
diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
index 7829b8c..de9b89b 100644
--- a/xlators/cluster/ec/src/ec-types.h
+++ b/xlators/cluster/ec/src/ec-types.h
@@ -130,7 +130,12 @@ typedef void (*ec_resume_f)(ec_fop_data_t *, int32_t);
enum _ec_read_policy { EC_ROUND_ROBIN, EC_GFID_HASH, EC_READ_POLICY_MAX };
-enum _ec_heal_need { EC_HEAL_NONEED, EC_HEAL_MAYBE, EC_HEAL_MUST };
+enum _ec_heal_need {
+ EC_HEAL_NONEED,
+ EC_HEAL_MAYBE,
+ EC_HEAL_MUST,
+ EC_HEAL_PURGE_INDEX
+};
enum _ec_stripe_part { EC_STRIPE_HEAD, EC_STRIPE_TAIL };
@@ -186,10 +191,10 @@ struct _ec_inode {
typedef int32_t (*fop_heal_cbk_t)(call_frame_t *, void *, xlator_t *, int32_t,
int32_t, uintptr_t, uintptr_t, uintptr_t,
- dict_t *);
+ uint32_t, dict_t *);
typedef int32_t (*fop_fheal_cbk_t)(call_frame_t *, void *, xlator_t *, int32_t,
int32_t, uintptr_t, uintptr_t, uintptr_t,
- dict_t *);
+ uint32_t, dict_t *);
union _ec_cbk {
fop_access_cbk_t access;
@@ -621,6 +626,11 @@ struct _ec_statistics {
requests. (Basically memory allocation
errors). */
} stripe_cache;
+ struct {
+ gf_atomic_t attempted; /*Number of heals attempted on
+ files/directories*/
+ gf_atomic_t completed; /*Number of heals complted on files/directories*/
+ } shd;
};
struct _ec {
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
index 66b4e63..7344be4 100644
--- a/xlators/cluster/ec/src/ec.c
+++ b/xlators/cluster/ec/src/ec.c
@@ -325,13 +325,18 @@ ec_get_event_from_state(ec_t *ec)
void
ec_up(xlator_t *this, ec_t *ec)
{
+ char str1[32], str2[32];
+
if (ec->timer != NULL) {
gf_timer_call_cancel(this->ctx, ec->timer);
ec->timer = NULL;
}
ec->up = 1;
- gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_UP, "Going UP");
+ gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_UP,
+ "Going UP : Child UP = %s Child Notify = %s",
+ ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
+ ec_bin(str2, sizeof(str2), ec->xl_notify, ec->nodes));
gf_event(EVENT_EC_MIN_BRICKS_UP, "subvol=%s", this->name);
}
@@ -339,13 +344,18 @@ ec_up(xlator_t *this, ec_t *ec)
void
ec_down(xlator_t *this, ec_t *ec)
{
+ char str1[32], str2[32];
+
if (ec->timer != NULL) {
gf_timer_call_cancel(this->ctx, ec->timer);
ec->timer = NULL;
}
ec->up = 0;
- gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_DOWN, "Going DOWN");
+ gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_DOWN,
+ "Going DOWN : Child UP = %s Child Notify = %s",
+ ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
+ ec_bin(str2, sizeof(str2), ec->xl_notify, ec->nodes));
gf_event(EVENT_EC_MIN_BRICKS_NOT_UP, "subvol=%s", this->name);
}
@@ -700,6 +710,8 @@ ec_statistics_init(ec_t *ec)
GF_ATOMIC_INIT(ec->stats.stripe_cache.evicts, 0);
GF_ATOMIC_INIT(ec->stats.stripe_cache.allocs, 0);
GF_ATOMIC_INIT(ec->stats.stripe_cache.errors, 0);
+ GF_ATOMIC_INIT(ec->stats.shd.attempted, 0);
+ GF_ATOMIC_INIT(ec->stats.shd.completed, 0);
}
static int
@@ -1569,6 +1581,10 @@ ec_dump_private(xlator_t *this)
GF_ATOMIC_GET(ec->stats.stripe_cache.allocs));
gf_proc_dump_write("errors", "%" GF_PRI_ATOMIC,
GF_ATOMIC_GET(ec->stats.stripe_cache.errors));
+ gf_proc_dump_write("heals-attempted", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(ec->stats.shd.attempted));
+ gf_proc_dump_write("heals-completed", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(ec->stats.shd.completed));
return 0;
}
diff --git a/xlators/cluster/ec/src/ec.h b/xlators/cluster/ec/src/ec.h
index 1b210d9..6f6de6d 100644
--- a/xlators/cluster/ec/src/ec.h
+++ b/xlators/cluster/ec/src/ec.h
@@ -18,6 +18,7 @@
#define EC_XATTR_SIZE EC_XATTR_PREFIX "size"
#define EC_XATTR_VERSION EC_XATTR_PREFIX "version"
#define EC_XATTR_HEAL EC_XATTR_PREFIX "heal"
+#define EC_XATTR_HEAL_NEW EC_XATTR_PREFIX "heal-new"
#define EC_XATTR_DIRTY EC_XATTR_PREFIX "dirty"
#define EC_STRIPE_CACHE_MAX_SIZE 10
#define EC_VERSION_SIZE 2