summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/afr/src/afr.h
diff options
context:
space:
mode:
authorRavishankar N <ravishankar@redhat.com>2018-09-23 16:59:58 +0530
committerRavishankar N <ravishankar@redhat.com>2018-10-25 12:26:22 +0000
commit053b1309dc8fbc05fcde5223e734da9f694cf5cc (patch)
treee2eba5c81024b5dc07eef5966289d5e71c3567ee /xlators/cluster/afr/src/afr.h
parentaae1c402b74fd02ed2f6473b896f108d82aef8e3 (diff)
afr: thin-arbiter 2 domain locking and in-memory state
2 domain locking + xattrop for write-txn failures: -------------------------------------------------- - A post-op wound on TA takes AFR_TA_DOM_NOTIFY range lock and AFR_TA_DOM_MODIFY full lock, does xattrop on TA and releases AFR_TA_DOM_MODIFY lock and stores in-memory which brick is bad. - All further write txn failures are handled based on this in-memory value without querying the TA. - When shd heals the files, it does so by requesting full lock on AFR_TA_DOM_NOTIFY domain. Client uses this as a cue (via upcall), releases AFR_TA_DOM_NOTIFY range lock and invalidates its in-memory notion of which brick is bad. The next write txn failure is wound on TA to again update the in-memory state. - Any incomplete write txns before the AFR_TA_DOM_NOTIFY upcall release request is got is completed before the lock is released. - Any write txns got after the release request are maintained in a ta_waitq. - After the release is complete, the ta_waitq elements are spliced to a separate queue which is then processed one by one. - For fops that come in parallel when the in-memory bad brick is still unknown, only one is wound to TA on wire. The other ones are maintained in a ta_onwireq which is then processed after we get the response from TA. Change-Id: I32c7b61a61776663601ab0040e2f0767eca1fd64 updates: bz#1579788 Signed-off-by: Ravishankar N <ravishankar@redhat.com> Signed-off-by: Ashish Pandey <aspandey@redhat.com>
Diffstat (limited to 'xlators/cluster/afr/src/afr.h')
-rw-r--r--xlators/cluster/afr/src/afr.h31
1 files changed, 31 insertions, 0 deletions
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 3d2c1950571..6f8015380f0 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -107,8 +107,20 @@ typedef enum {
AFR_CHILD_UNKNOWN = -1,
AFR_CHILD_ZERO,
AFR_CHILD_ONE,
+ AFR_CHILD_THIN_ARBITER,
} afr_child_index;
+typedef enum {
+ TA_WAIT_FOR_NOTIFY_LOCK_REL, /*FOP came after notify domain lock upcall
+ notification and waiting for its release.*/
+ TA_GET_INFO_FROM_TA_FILE, /*FOP needs post-op on ta file to get
+ *info about which brick is bad.*/
+ TA_INFO_IN_MEMORY_SUCCESS, /*Bad brick info is in memory and fop failed
+ *on BAD brick - Success*/
+ TA_INFO_IN_MEMORY_FAILED, /*Bad brick info is in memory and fop failed
+ *on GOOD brick - Failed*/
+} afr_ta_fop_state_t;
+
struct afr_nfsd {
gf_boolean_t iamnfsd;
uint32_t halo_max_latency_msec;
@@ -127,8 +139,14 @@ typedef struct _afr_private {
/* For thin-arbiter. */
unsigned int thin_arbiter_count; /* 0 or 1 at the moment.*/
uuid_t ta_gfid;
+ unsigned char ta_child_up;
int ta_bad_child_index;
off_t ta_notify_dom_lock_offset;
+ gf_boolean_t release_ta_notify_dom_lock;
+ unsigned int ta_in_mem_txn_count;
+ unsigned int ta_on_wire_txn_count;
+ struct list_head ta_waitq;
+ struct list_head ta_onwireq;
unsigned char *child_up;
int64_t *child_latency;
@@ -855,6 +873,13 @@ typedef struct _afr_local {
gf_boolean_t is_read_txn;
afr_inode_ctx_t *inode_ctx;
+
+ /*For thin-arbiter transactions.*/
+ unsigned char ta_child_up;
+ struct list_head ta_waitq;
+ struct list_head ta_onwireq;
+ afr_ta_fop_state_t fop_state;
+ int ta_failed_subvol;
} afr_local_t;
typedef struct afr_spbc_timeout {
@@ -1289,4 +1314,10 @@ __afr_get_up_children_count(afr_private_t *priv);
call_frame_t *
afr_ta_frame_create(xlator_t *this);
+
+gf_boolean_t
+afr_ta_has_quorum(afr_private_t *priv, afr_local_t *local);
+
+void
+afr_ta_lock_release_synctask(xlator_t *this);
#endif /* __AFR_H__ */