From d7a4d256bd86aadcd60668ee37079514dfcf41f3 Mon Sep 17 00:00:00 2001 From: Ravishankar N Date: Sun, 23 Sep 2018 16:59:58 +0530 Subject: afr: thin-arbiter 2 domain locking and in-memory state 2 domain locking + xattrop for write-txn failures: -------------------------------------------------- - A post-op wound on TA takes AFR_TA_DOM_NOTIFY range lock and AFR_TA_DOM_MODIFY full lock, does xattrop on TA and releases AFR_TA_DOM_MODIFY lock and stores in-memory which brick is bad. - All further write txn failures are handled based on this in-memory value without querying the TA. - When shd heals the files, it does so by requesting full lock on AFR_TA_DOM_NOTIFY domain. Client uses this as a cue (via upcall), releases AFR_TA_DOM_NOTIFY range lock and invalidates its in-memory notion of which brick is bad. The next write txn failure is wound on TA to again update the in-memory state. - Any incomplete write txns before the AFR_TA_DOM_NOTIFY upcall release request is got is completed before the lock is released. - Any write txns got after the release request are maintained in a ta_waitq. - After the release is complete, the ta_waitq elements are spliced to a separate queue which is then processed one by one. - For fops that come in parallel when the in-memory bad brick is still unknown, only one is wound to TA on wire. The other ones are maintained in a ta_onwireq which is then processed after we get the response from TA. Change-Id: I32c7b61a61776663601ab0040e2f0767eca1fd64 updates: bz#1648205 Signed-off-by: Ravishankar N Signed-off-by: Ashish Pandey --- xlators/cluster/afr/src/afr.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'xlators/cluster/afr/src/afr.h') diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 3d2c1950571..6f8015380f0 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -107,8 +107,20 @@ typedef enum { AFR_CHILD_UNKNOWN = -1, AFR_CHILD_ZERO, AFR_CHILD_ONE, + AFR_CHILD_THIN_ARBITER, } afr_child_index; +typedef enum { + TA_WAIT_FOR_NOTIFY_LOCK_REL, /*FOP came after notify domain lock upcall + notification and waiting for its release.*/ + TA_GET_INFO_FROM_TA_FILE, /*FOP needs post-op on ta file to get + *info about which brick is bad.*/ + TA_INFO_IN_MEMORY_SUCCESS, /*Bad brick info is in memory and fop failed + *on BAD brick - Success*/ + TA_INFO_IN_MEMORY_FAILED, /*Bad brick info is in memory and fop failed + *on GOOD brick - Failed*/ +} afr_ta_fop_state_t; + struct afr_nfsd { gf_boolean_t iamnfsd; uint32_t halo_max_latency_msec; @@ -127,8 +139,14 @@ typedef struct _afr_private { /* For thin-arbiter. */ unsigned int thin_arbiter_count; /* 0 or 1 at the moment.*/ uuid_t ta_gfid; + unsigned char ta_child_up; int ta_bad_child_index; off_t ta_notify_dom_lock_offset; + gf_boolean_t release_ta_notify_dom_lock; + unsigned int ta_in_mem_txn_count; + unsigned int ta_on_wire_txn_count; + struct list_head ta_waitq; + struct list_head ta_onwireq; unsigned char *child_up; int64_t *child_latency; @@ -855,6 +873,13 @@ typedef struct _afr_local { gf_boolean_t is_read_txn; afr_inode_ctx_t *inode_ctx; + + /*For thin-arbiter transactions.*/ + unsigned char ta_child_up; + struct list_head ta_waitq; + struct list_head ta_onwireq; + afr_ta_fop_state_t fop_state; + int ta_failed_subvol; } afr_local_t; typedef struct afr_spbc_timeout { @@ -1289,4 +1314,10 @@ __afr_get_up_children_count(afr_private_t *priv); call_frame_t * afr_ta_frame_create(xlator_t *this); + +gf_boolean_t +afr_ta_has_quorum(afr_private_t *priv, afr_local_t *local); + +void +afr_ta_lock_release_synctask(xlator_t *this); #endif /* __AFR_H__ */ -- cgit