summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/afr/src/afr.c
diff options
context:
space:
mode:
authorRavishankar N <ravishankar@redhat.com>2018-09-23 16:59:58 +0530
committerRavishankar N <ravishankar@redhat.com>2018-10-25 12:26:22 +0000
commit053b1309dc8fbc05fcde5223e734da9f694cf5cc (patch)
treee2eba5c81024b5dc07eef5966289d5e71c3567ee /xlators/cluster/afr/src/afr.c
parentaae1c402b74fd02ed2f6473b896f108d82aef8e3 (diff)
afr: thin-arbiter 2 domain locking and in-memory state
2 domain locking + xattrop for write-txn failures: -------------------------------------------------- - A post-op wound on TA takes AFR_TA_DOM_NOTIFY range lock and AFR_TA_DOM_MODIFY full lock, does xattrop on TA and releases AFR_TA_DOM_MODIFY lock and stores in-memory which brick is bad. - All further write txn failures are handled based on this in-memory value without querying the TA. - When shd heals the files, it does so by requesting full lock on AFR_TA_DOM_NOTIFY domain. Client uses this as a cue (via upcall), releases AFR_TA_DOM_NOTIFY range lock and invalidates its in-memory notion of which brick is bad. The next write txn failure is wound on TA to again update the in-memory state. - Any incomplete write txns before the AFR_TA_DOM_NOTIFY upcall release request is got is completed before the lock is released. - Any write txns got after the release request are maintained in a ta_waitq. - After the release is complete, the ta_waitq elements are spliced to a separate queue which is then processed one by one. - For fops that come in parallel when the in-memory bad brick is still unknown, only one is wound to TA on wire. The other ones are maintained in a ta_onwireq which is then processed after we get the response from TA. Change-Id: I32c7b61a61776663601ab0040e2f0767eca1fd64 updates: bz#1579788 Signed-off-by: Ravishankar N <ravishankar@redhat.com> Signed-off-by: Ashish Pandey <aspandey@redhat.com>
Diffstat (limited to 'xlators/cluster/afr/src/afr.c')
-rw-r--r--xlators/cluster/afr/src/afr.c22
1 files changed, 17 insertions, 5 deletions
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index 26950fd7927..5d5e536ff60 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -336,6 +336,22 @@ out:
return ret;
}
+void
+afr_ta_init(afr_private_t *priv)
+{
+ priv->thin_arbiter_count = 1;
+ priv->child_count--;
+ priv->ta_child_up = 0;
+ priv->ta_bad_child_index = AFR_CHILD_UNKNOWN;
+ priv->ta_notify_dom_lock_offset = 0;
+ priv->ta_in_mem_txn_count = 0;
+ priv->ta_on_wire_txn_count = 0;
+ priv->release_ta_notify_dom_lock = _gf_false;
+ INIT_LIST_HEAD(&priv->ta_waitq);
+ INIT_LIST_HEAD(&priv->ta_onwireq);
+ *priv->ta_gfid = 0;
+}
+
int32_t
init(xlator_t *this)
{
@@ -380,11 +396,7 @@ init(xlator_t *this)
GF_OPTION_INIT("arbiter-count", priv->arbiter_count, uint32, out);
GF_OPTION_INIT("thin-arbiter", thin_arbiter, str, out);
if (thin_arbiter && strlen(thin_arbiter) > 0) {
- priv->thin_arbiter_count = 1;
- priv->child_count--;
- priv->ta_bad_child_index = AFR_CHILD_UNKNOWN;
- priv->ta_notify_dom_lock_offset = 0;
- *priv->ta_gfid = 0;
+ afr_ta_init(priv);
}
INIT_LIST_HEAD(&priv->healing);
INIT_LIST_HEAD(&priv->heal_waiting);