From d7a4d256bd86aadcd60668ee37079514dfcf41f3 Mon Sep 17 00:00:00 2001 From: Ravishankar N Date: Sun, 23 Sep 2018 16:59:58 +0530 Subject: afr: thin-arbiter 2 domain locking and in-memory state 2 domain locking + xattrop for write-txn failures: -------------------------------------------------- - A post-op wound on TA takes AFR_TA_DOM_NOTIFY range lock and AFR_TA_DOM_MODIFY full lock, does xattrop on TA and releases AFR_TA_DOM_MODIFY lock and stores in-memory which brick is bad. - All further write txn failures are handled based on this in-memory value without querying the TA. - When shd heals the files, it does so by requesting full lock on AFR_TA_DOM_NOTIFY domain. Client uses this as a cue (via upcall), releases AFR_TA_DOM_NOTIFY range lock and invalidates its in-memory notion of which brick is bad. The next write txn failure is wound on TA to again update the in-memory state. - Any incomplete write txns before the AFR_TA_DOM_NOTIFY upcall release request is got is completed before the lock is released. - Any write txns got after the release request are maintained in a ta_waitq. - After the release is complete, the ta_waitq elements are spliced to a separate queue which is then processed one by one. - For fops that come in parallel when the in-memory bad brick is still unknown, only one is wound to TA on wire. The other ones are maintained in a ta_onwireq which is then processed after we get the response from TA. Change-Id: I32c7b61a61776663601ab0040e2f0767eca1fd64 updates: bz#1648205 Signed-off-by: Ravishankar N Signed-off-by: Ashish Pandey --- xlators/cluster/afr/src/afr.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) (limited to 'xlators/cluster/afr/src/afr.c') diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index 26950fd7927..5d5e536ff60 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -336,6 +336,22 @@ out: return ret; } +void +afr_ta_init(afr_private_t *priv) +{ + priv->thin_arbiter_count = 1; + priv->child_count--; + priv->ta_child_up = 0; + priv->ta_bad_child_index = AFR_CHILD_UNKNOWN; + priv->ta_notify_dom_lock_offset = 0; + priv->ta_in_mem_txn_count = 0; + priv->ta_on_wire_txn_count = 0; + priv->release_ta_notify_dom_lock = _gf_false; + INIT_LIST_HEAD(&priv->ta_waitq); + INIT_LIST_HEAD(&priv->ta_onwireq); + *priv->ta_gfid = 0; +} + int32_t init(xlator_t *this) { @@ -380,11 +396,7 @@ init(xlator_t *this) GF_OPTION_INIT("arbiter-count", priv->arbiter_count, uint32, out); GF_OPTION_INIT("thin-arbiter", thin_arbiter, str, out); if (thin_arbiter && strlen(thin_arbiter) > 0) { - priv->thin_arbiter_count = 1; - priv->child_count--; - priv->ta_bad_child_index = AFR_CHILD_UNKNOWN; - priv->ta_notify_dom_lock_offset = 0; - *priv->ta_gfid = 0; + afr_ta_init(priv); } INIT_LIST_HEAD(&priv->healing); INIT_LIST_HEAD(&priv->heal_waiting); -- cgit