summaryrefslogtreecommitdiffstats
path: root/xlators/cluster
diff options
context:
space:
mode:
authorBarak Sason Rofman <bsasonro@redhat.com>2019-12-01 12:26:53 +0200
committerRavishankar N <ravishankar@redhat.com>2019-12-10 05:44:28 +0000
commitf8f4efef77c177900923f5b29ddb707fb3f07720 (patch)
tree7f81b2fc7cf5fe17c4addc62b154dce7a620a447 /xlators/cluster
parentba407a67fc0895c970c327361110a960af254401 (diff)
afr/self-heald - Missing error logs
As a follow up on https://review.gluster.org/#/c/glusterfs/+/23749/, adding error logging for the entire method. In addition, converted logging to structured logging in the method. Fixes: bz#1778457 Change-Id: I1f412159e6849d6f6ddbde53ec4a85ad709bbdf4 Signed-off-by: Barak Sason Rofman <bsasonro@redhat.com>
Diffstat (limited to 'xlators/cluster')
-rw-r--r--xlators/cluster/afr/src/afr-messages.h9
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.c92
2 files changed, 77 insertions, 24 deletions
diff --git a/xlators/cluster/afr/src/afr-messages.h b/xlators/cluster/afr/src/afr-messages.h
index 8e59c51b993..cfdf88f2140 100644
--- a/xlators/cluster/afr/src/afr-messages.h
+++ b/xlators/cluster/afr/src/afr-messages.h
@@ -42,6 +42,13 @@ GLFS_MSGID(AFR, AFR_MSG_QUORUM_FAIL, AFR_MSG_QUORUM_MET,
AFR_MSG_SPLIT_BRAIN_STATUS, AFR_MSG_ADD_BRICK_STATUS,
AFR_MSG_NO_CHANGELOG, AFR_MSG_TIMER_CREATE_FAIL,
AFR_MSG_SBRAIN_FAV_CHILD_POLICY, AFR_MSG_INODE_CTX_GET_FAILED,
- AFR_MSG_THIN_ARB, AFR_MSG_LK_HEAL_DOM);
+ AFR_MSG_THIN_ARB, AFR_MSG_LK_HEAL_DOM, AFR_MSG_HEALER_SPAWN_FAILED,
+ AFR_MSG_ADD_CRAWL_EVENT_FAILED);
+
+#define AFR_MSG_DICT_GET_FAILED_STR "Dict get failed"
+#define AFR_MSG_DICT_SET_FAILED_STR "Dict set failed"
+#define AFR_MSG_HEALER_SPAWN_FAILED_STR "Healer spawn failed"
+#define AFR_MSG_ADD_CRAWL_EVENT_FAILED_STR "Adding crawl event failed"
+#define AFR_MSG_INVALID_ARG_STR "Invalid argument"
#endif /* !_AFR_MESSAGES_H_ */
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
index b067c1bf1b5..7066c01971b 100644
--- a/xlators/cluster/afr/src/afr-self-heald.c
+++ b/xlators/cluster/afr/src/afr-self-heald.c
@@ -1374,19 +1374,40 @@ afr_xl_op(xlator_t *this, dict_t *input, dict_t *output)
int op_ret = 0;
uint64_t cnt = 0;
+#define AFR_SET_DICT_AND_LOG(name, output, key, keylen, dict_str, \
+ dict_str_len) \
+ { \
+ int ret; \
+ \
+ ret = dict_set_nstrn(output, key, keylen, dict_str, dict_str_len); \
+ if (ret) { \
+ gf_smsg(name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED, \
+ "key=%s", key, "value=%s", dict_str, NULL); \
+ } \
+ }
+
priv = this->private;
shd = &priv->shd;
ret = dict_get_int32_sizen(input, "xl-op", (int32_t *)&op);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_GET_FAILED,
+ "key=xl-op", NULL);
goto out;
+ }
this_name_len = strlen(this->name);
ret = dict_get_int32n(input, this->name, this_name_len, &xl_id);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_GET_FAILED,
+ "key=%s", this->name, NULL);
goto out;
+ }
ret = dict_set_int32n(output, this->name, this_name_len, xl_id);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "key=%s", this->name, NULL);
goto out;
+ }
switch (op) {
case GF_SHD_OP_HEAL_INDEX:
op_ret = 0;
@@ -1396,23 +1417,30 @@ afr_xl_op(xlator_t *this, dict_t *input, dict_t *output)
keylen = snprintf(key, sizeof(key), "%d-%d-status", xl_id, i);
if (!priv->child_up[i]) {
- ret = dict_set_nstrn(output, key, keylen,
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
SBRICK_NOT_CONNECTED,
SLEN(SBRICK_NOT_CONNECTED));
op_ret = -1;
} else if (AFR_COUNT(priv->child_up, priv->child_count) < 2) {
- ret = dict_set_nstrn(output, key, keylen,
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
SLESS_THAN2_BRICKS_in_REP,
SLEN(SLESS_THAN2_BRICKS_in_REP));
op_ret = -1;
} else if (!afr_shd_is_subvol_local(this, healer->subvol)) {
- ret = dict_set_nstrn(output, key, keylen, SBRICK_IS_REMOTE,
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SBRICK_IS_REMOTE,
SLEN(SBRICK_IS_REMOTE));
} else {
- ret = dict_set_nstrn(output, key, keylen,
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
SSTARTED_SELF_HEAL,
SLEN(SSTARTED_SELF_HEAL));
- afr_shd_index_healer_spawn(this, i);
+
+ ret = afr_shd_index_healer_spawn(this, i);
+
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ AFR_MSG_HEALER_SPAWN_FAILED, NULL);
+ }
}
}
break;
@@ -1424,21 +1452,28 @@ afr_xl_op(xlator_t *this, dict_t *input, dict_t *output)
keylen = snprintf(key, sizeof(key), "%d-%d-status", xl_id, i);
if (!priv->child_up[i]) {
- ret = dict_set_nstrn(output, key, keylen,
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
SBRICK_NOT_CONNECTED,
SLEN(SBRICK_NOT_CONNECTED));
} else if (AFR_COUNT(priv->child_up, priv->child_count) < 2) {
- ret = dict_set_nstrn(output, key, keylen,
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
SLESS_THAN2_BRICKS_in_REP,
SLEN(SLESS_THAN2_BRICKS_in_REP));
} else if (!afr_shd_is_subvol_local(this, healer->subvol)) {
- ret = dict_set_nstrn(output, key, keylen, SBRICK_IS_REMOTE,
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SBRICK_IS_REMOTE,
SLEN(SBRICK_IS_REMOTE));
} else {
- ret = dict_set_nstrn(output, key, keylen,
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
SSTARTED_SELF_HEAL,
SLEN(SSTARTED_SELF_HEAL));
- afr_shd_full_healer_spawn(this, i);
+
+ ret = afr_shd_full_healer_spawn(this, i);
+
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ AFR_MSG_HEALER_SPAWN_FAILED, NULL);
+ }
op_ret = 0;
}
}
@@ -1450,7 +1485,8 @@ afr_xl_op(xlator_t *this, dict_t *input, dict_t *output)
case GF_SHD_OP_HEAL_FAILED_FILES:
for (i = 0; i < priv->child_count; i++) {
keylen = snprintf(key, sizeof(key), "%d-%d-status", xl_id, i);
- ret = dict_set_nstrn(output, key, keylen, SOP_NOT_SUPPORTED,
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SOP_NOT_SUPPORTED,
SLEN(SOP_NOT_SUPPORTED));
}
break;
@@ -1460,10 +1496,19 @@ afr_xl_op(xlator_t *this, dict_t *input, dict_t *output)
case GF_SHD_OP_STATISTICS:
for (i = 0; i < priv->child_count; i++) {
eh_dump(shd->statistics[i], output, afr_add_crawl_event);
- afr_shd_dict_add_crawl_event(
+ ret = afr_shd_dict_add_crawl_event(
this, output, &shd->index_healers[i].crawl_event);
- afr_shd_dict_add_crawl_event(this, output,
- &shd->full_healers[i].crawl_event);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ AFR_MSG_ADD_CRAWL_EVENT_FAILED, NULL);
+ }
+
+ ret = afr_shd_dict_add_crawl_event(
+ this, output, &shd->full_healers[i].crawl_event);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ AFR_MSG_ADD_CRAWL_EVENT_FAILED, NULL);
+ }
}
break;
case GF_SHD_OP_STATISTICS_HEAL_COUNT:
@@ -1474,7 +1519,7 @@ afr_xl_op(xlator_t *this, dict_t *input, dict_t *output)
if (!priv->child_up[i]) {
keylen = snprintf(key, sizeof(key), "%d-%d-status", xl_id,
i);
- ret = dict_set_nstrn(output, key, keylen,
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
SBRICK_NOT_CONNECTED,
SLEN(SBRICK_NOT_CONNECTED));
} else {
@@ -1484,9 +1529,8 @@ afr_xl_op(xlator_t *this, dict_t *input, dict_t *output)
ret = dict_set_uint64(output, key, cnt);
}
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret,
- AFR_MSG_DICT_SET_FAILED,
- "Could not add cnt to output");
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ AFR_MSG_DICT_SET_FAILED, NULL);
}
op_ret = 0;
}
@@ -1495,11 +1539,13 @@ afr_xl_op(xlator_t *this, dict_t *input, dict_t *output)
break;
default:
- gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_ARG,
- "Unknown set op %d", op);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_ARG, "op=%d",
+ op, NULL);
break;
}
out:
dict_deln(output, this->name, this_name_len);
return op_ret;
+
+#undef AFR_SET_DICT_AND_LOG
}