summaryrefslogtreecommitdiffstats
path: root/xlators/cluster
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/cluster')
-rw-r--r--xlators/cluster/afr/src/afr-common.c93
-rw-r--r--xlators/cluster/afr/src/afr-messages.h3
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c107
-rw-r--r--xlators/cluster/afr/src/afr-transaction.h5
-rw-r--r--xlators/cluster/afr/src/afr.c25
-rw-r--r--xlators/cluster/afr/src/afr.h4
6 files changed, 229 insertions, 8 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 01a5db54bdd..8752e98c8df 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -2901,10 +2901,8 @@ afr_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-
-
static void
-afr_discover_done (call_frame_t *frame, xlator_t *this)
+afr_discover_unwind (call_frame_t *frame, xlator_t *this)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
@@ -2966,6 +2964,84 @@ unwind:
&local->replies[read_subvol].postparent);
}
+static int
+afr_ta_id_file_check (void *opaque)
+{
+ afr_private_t *priv = NULL;
+ xlator_t *this = NULL;
+ loc_t loc = {0, };
+ struct iatt stbuf = {0,};
+ dict_t *dict = NULL;
+ uuid_t gfid = {0,};
+ fd_t *fd = NULL;
+ int ret = 0;
+
+ this = opaque;
+ priv = this->private;
+
+ ret = afr_fill_ta_loc (this, &loc);
+ if (ret)
+ goto out;
+
+ ret = syncop_lookup (priv->children[THIN_ARBITER_BRICK_INDEX], &loc,
+ &stbuf, 0, 0, 0);
+ if (ret == 0) {
+ goto out;
+ } else if (ret == -ENOENT) {
+ fd = fd_create (loc.inode, getpid());
+ if (!fd)
+ goto out;
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+ gf_uuid_generate (gfid);
+ ret = dict_set_gfuuid (dict, "gfid-req", gfid, true);
+ ret = syncop_create (priv->children[THIN_ARBITER_BRICK_INDEX],
+ &loc, O_RDWR, 0664, fd, &stbuf, dict,
+ NULL);
+ }
+
+out:
+ if (ret == 0) {
+ gf_uuid_copy (priv->ta_gfid, stbuf.ia_gfid);
+ } else {
+ gf_msg (this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to lookup/create thin-arbiter id file.");
+ }
+ if (dict)
+ dict_unref (dict);
+ if (fd)
+ fd_unref (fd);
+ loc_wipe (&loc);
+
+ return 0;
+}
+
+static int
+afr_ta_id_file_check_cbk (int ret, call_frame_t *ta_frame, void *opaque)
+{
+ return 0;
+}
+
+static void
+afr_discover_done (call_frame_t *frame, xlator_t *this)
+{
+ int ret = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ if (!priv->thin_arbiter_count)
+ goto unwind;
+ if (!gf_uuid_is_null(priv->ta_gfid))
+ goto unwind;
+
+ ret = synctask_new (this->ctx->env, afr_ta_id_file_check,
+ afr_ta_id_file_check_cbk, NULL, this);
+ if (ret)
+ goto unwind;
+unwind:
+ afr_discover_unwind (frame, this);
+}
int
afr_discover_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
@@ -5514,15 +5590,22 @@ afr_set_low_priority (call_frame_t *frame)
void
afr_priv_destroy (afr_private_t *priv)
{
- int i = 0;
+ int i = 0;
+ int child_count = -1;
if (!priv)
goto out;
GF_FREE (priv->last_event);
+
+ child_count = priv->child_count;
+ if (priv->thin_arbiter_count) {
+ child_count++;
+ }
if (priv->pending_key) {
- for (i = 0; i < priv->child_count; i++)
+ for (i = 0; i < child_count; i++)
GF_FREE (priv->pending_key[i]);
}
+
GF_FREE (priv->pending_reads);
GF_FREE (priv->local);
GF_FREE (priv->pending_key);
diff --git a/xlators/cluster/afr/src/afr-messages.h b/xlators/cluster/afr/src/afr-messages.h
index 743bd945821..8aa94730158 100644
--- a/xlators/cluster/afr/src/afr-messages.h
+++ b/xlators/cluster/afr/src/afr-messages.h
@@ -66,7 +66,8 @@ GLFS_MSGID(AFR,
AFR_MSG_NO_CHANGELOG,
AFR_MSG_TIMER_CREATE_FAIL,
AFR_MSG_SBRAIN_FAV_CHILD_POLICY,
- AFR_MSG_INODE_CTX_GET_FAILED
+ AFR_MSG_INODE_CTX_GET_FAILED,
+ AFR_MSG_THIN_ARB
);
#endif /* !_AFR_MESSAGES_H_ */
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index 8bb096775c2..b4d3062fc2b 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -635,6 +635,14 @@ afr_txn_nothing_failed (call_frame_t *frame, xlator_t *this)
local = frame->local;
priv = this->private;
+ if (priv->thin_arbiter_count) {
+ /* We need to perform post-op even if 1 data brick was down
+ * before the txn started.*/
+ if (AFR_COUNT (local->transaction.failed_subvols,
+ priv->child_count))
+ return _gf_false;
+ }
+
for (i = 0; i < priv->child_count; i++) {
if (local->transaction.pre_op[i] &&
local->transaction.failed_subvols[i])
@@ -825,6 +833,97 @@ afr_handle_quorum (call_frame_t *frame)
}
int
+afr_fill_ta_loc (xlator_t *this, loc_t *loc)
+{
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ loc->parent = inode_ref (priv->root_inode);
+ gf_uuid_copy (loc->pargfid, loc->parent->gfid);
+ loc->name = priv->pending_key[THIN_ARBITER_BRICK_INDEX];
+ gf_uuid_copy (loc->gfid, priv->ta_gfid);
+ loc->inode = inode_new (loc->parent->table);
+ if (!loc->inode)
+ return -ENOMEM;
+ return 0;
+}
+
+int
+afr_changelog_thin_arbiter_post_op (xlator_t *this, afr_local_t *local)
+{
+ int ret = 0;
+ afr_private_t *priv = NULL;
+ dict_t *xattr = NULL;
+ int failed_count = 0;
+ struct gf_flock flock = {0, };
+ loc_t loc = {0,};
+ int i = 0;
+
+ priv = this->private;
+ if (!priv->thin_arbiter_count)
+ return 0;
+
+
+ failed_count = AFR_COUNT (local->transaction.failed_subvols,
+ priv->child_count);
+ if (!failed_count)
+ return 0;
+
+ GF_ASSERT (failed_count == 1);
+ ret = afr_fill_ta_loc (this, &loc);
+ if (ret)
+ goto out;
+
+ xattr = dict_new ();
+ if (!xattr) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ ret = dict_set_static_bin (xattr, priv->pending_key[i],
+ local->pending[i],
+ AFR_NUM_CHANGE_LOGS * sizeof (int));
+ if (ret)
+ goto out;
+ }
+
+ flock.l_type = F_WRLCK;
+ flock.l_start = 0;
+ flock.l_len = 0;
+
+ /*TODO: Convert to two domain locking. */
+ ret = syncop_inodelk (priv->children[THIN_ARBITER_BRICK_INDEX],
+ THIN_ARBITER_DOM1, &loc, F_SETLKW, &flock,
+ NULL, NULL);
+ if (ret)
+ goto out;
+
+ ret = syncop_xattrop (priv->children[THIN_ARBITER_BRICK_INDEX], &loc,
+ GF_XATTROP_ADD_ARRAY, xattr, NULL, NULL, NULL);
+
+ if (ret == -EINVAL) {
+ gf_msg (this->name, GF_LOG_INFO, -ret, AFR_MSG_THIN_ARB,
+ "Thin-arbiter has denied post-op on %s for gfid %s.",
+ priv->pending_key[THIN_ARBITER_BRICK_INDEX],
+ uuid_utoa (local->inode->gfid));
+
+ } else if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Post-op on thin-arbiter id file %s failed for gfid %s.",
+ priv->pending_key[THIN_ARBITER_BRICK_INDEX],
+ uuid_utoa (local->inode->gfid));
+ }
+ flock.l_type = F_UNLCK;
+ syncop_inodelk (priv->children[THIN_ARBITER_BRICK_INDEX],
+ THIN_ARBITER_DOM1, &loc, F_SETLKW, &flock, NULL, NULL);
+out:
+ if (xattr)
+ dict_unref (xattr);
+
+ return ret;
+}
+
+int
afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
{
afr_private_t *priv = this->private;
@@ -885,6 +984,14 @@ afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
goto out;
}
+ ret = afr_changelog_thin_arbiter_post_op (this, local);
+ if (ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -ret;
+ afr_changelog_post_op_done (frame, this);
+ goto out;
+ }
+
if (need_undirty)
local->dirty[idx] = hton32(-1);
else
diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h
index cb62c185938..629f6dd557c 100644
--- a/xlators/cluster/afr/src/afr-transaction.h
+++ b/xlators/cluster/afr/src/afr-transaction.h
@@ -54,8 +54,13 @@ afr_pick_error_xdata (afr_local_t *local, afr_private_t *priv,
inode_t *inode2, unsigned char *readable2);
int
afr_transaction_resume (call_frame_t *frame, xlator_t *this);
+
int
afr_lock (call_frame_t *frame, xlator_t *this);
+
void
afr_delayed_changelog_wake_up_cbk (void *data);
+
+int
+afr_fill_ta_loc (xlator_t *this, loc_t *loc);
#endif /* __TRANSACTION_H__ */
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index cb4b1537984..27cee590b4b 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -303,12 +303,20 @@ afr_pending_xattrs_init (afr_private_t *priv, xlator_t *this)
char *ptr1 = NULL;
char *xattrs_list = NULL;
xlator_list_t *trav = NULL;
+ int child_count = -1;
trav = this->children;
+ child_count = priv->child_count;
+ if (priv->thin_arbiter_count) {
+ /* priv->pending_key[THIN_ARBITER_BRICK_INDEX] is used as the
+ * name of the thin arbiter file for persistance across add/
+ * removal of DHT subvols.*/
+ child_count++;
+ }
GF_OPTION_INIT ("afr-pending-xattr", xattrs_list, str, out);
priv->pending_key = GF_CALLOC (sizeof (*priv->pending_key),
- priv->child_count, gf_afr_mt_char);
+ child_count, gf_afr_mt_char);
if (!priv->pending_key) {
ret = -ENOMEM;
goto out;
@@ -318,7 +326,7 @@ afr_pending_xattrs_init (afr_private_t *priv, xlator_t *this)
"Unable to fetch afr-pending-xattr option from volfile."
" Falling back to using client translator names. ");
- while (i < priv->child_count) {
+ while (i < child_count) {
ret = gf_asprintf (&priv->pending_key[i], "%s.%s",
AFR_XATTR_PREFIX,
trav->xlator->name);
@@ -368,6 +376,7 @@ init (xlator_t *this)
int read_subvol_index = -1;
char *qtype = NULL;
char *fav_child_policy = NULL;
+ char *thin_arbiter = NULL;
if (!this->children) {
gf_msg (this->name, GF_LOG_ERROR, 0,
@@ -397,6 +406,11 @@ init (xlator_t *this)
priv->read_child = -1;
GF_OPTION_INIT ("arbiter-count", priv->arbiter_count, uint32, out);
+ GF_OPTION_INIT ("thin-arbiter", thin_arbiter, str, out);
+ if (thin_arbiter && strlen(thin_arbiter) > 0) {
+ priv->thin_arbiter_count = 1;
+ priv->child_count--;
+ }
INIT_LIST_HEAD (&priv->healing);
INIT_LIST_HEAD (&priv->heal_waiting);
@@ -1103,6 +1117,13 @@ struct volume_options options[] = {
.type = GF_OPTION_TYPE_INT,
.description = "subset of child_count. Has to be 0 or 1."
},
+ { .key = {"thin-arbiter"},
+ .type = GF_OPTION_TYPE_STR,
+ .op_version = {GD_OP_VERSION_4_1_0},
+ .flags = OPT_FLAG_SETTABLE,
+ .tags = {"replicate"},
+ .description = "contains host:path of thin abriter brick",
+ },
{ .key = {"shd-max-threads"},
.type = GF_OPTION_TYPE_INT,
.min = 1,
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index b96be62a910..fd75de45341 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -35,6 +35,8 @@
#define AFR_DEFAULT_SPB_CHOICE_TIMEOUT 300 /*in seconds*/
#define ARBITER_BRICK_INDEX 2
+#define THIN_ARBITER_BRICK_INDEX 2
+#define THIN_ARBITER_DOM1 "afr.ta.domain-1"
#define AFR_HALO_MAX_LATENCY 99999
typedef int (*afr_lock_cbk_t) (call_frame_t *frame, xlator_t *this);
@@ -81,10 +83,12 @@ typedef struct _afr_private {
unsigned int child_count; /* total number of children */
unsigned int arbiter_count; /*subset of child_count.
Has to be 0 or 1.*/
+ unsigned int thin_arbiter_count; /* 0 or 1 at the moment.*/
xlator_t **children;
inode_t *root_inode;
+ uuid_t ta_gfid; /*For thin arbiter.*/
unsigned char *child_up;
int64_t *child_latency;