summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.h18
-rw-r--r--xlators/cluster/afr/src/afr.h258
2 files changed, 138 insertions, 138 deletions
diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h
index 19905394540..687c28e6472 100644
--- a/xlators/cluster/afr/src/afr-self-heald.h
+++ b/xlators/cluster/afr/src/afr-self-heald.h
@@ -14,12 +14,11 @@
#include <pthread.h>
typedef struct {
- int child;
char *path;
+ int child;
} shd_event_t;
typedef struct {
- int child;
uint64_t healed_count;
uint64_t split_brain_count;
uint64_t heal_failed_count;
@@ -31,32 +30,33 @@ typedef struct {
cralwer is in progress */
time_t end_time;
char *crawl_type;
+ int child;
} crawl_event_t;
struct subvol_healer {
xlator_t *this;
- int subvol;
- gf_boolean_t local;
- gf_boolean_t running;
- gf_boolean_t rerun;
crawl_event_t crawl_event;
pthread_mutex_t mutex;
pthread_cond_t cond;
pthread_t thread;
+ int subvol;
+ gf_boolean_t local;
+ gf_boolean_t running;
+ gf_boolean_t rerun;
};
typedef struct {
- gf_boolean_t iamshd;
- gf_boolean_t enabled;
- int timeout;
struct subvol_healer *index_healers;
struct subvol_healer *full_healers;
eh_t *split_brain;
eh_t **statistics;
+ int timeout;
uint32_t max_threads;
uint32_t wait_qlength;
uint32_t halo_max_latency_msec;
+ gf_boolean_t iamshd;
+ gf_boolean_t enabled;
} afr_self_heald_t;
int
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index a3f2942b317..f86f019e637 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -139,8 +139,8 @@ typedef enum {
} afr_ta_fop_state_t;
struct afr_nfsd {
- gf_boolean_t iamnfsd;
uint32_t halo_max_latency_msec;
+ gf_boolean_t iamnfsd;
};
typedef struct _afr_private {
@@ -153,14 +153,13 @@ typedef struct _afr_private {
inode_t *root_inode;
+ int favorite_child; /* subvolume to be preferred in resolving
+ split-brain cases */
/* For thin-arbiter. */
- unsigned int thin_arbiter_count; /* 0 or 1 at the moment.*/
uuid_t ta_gfid;
- unsigned char ta_child_up;
+ unsigned int thin_arbiter_count; /* 0 or 1 at the moment.*/
int ta_bad_child_index;
int ta_event_gen;
- off_t ta_notify_dom_lock_offset;
- gf_boolean_t release_ta_notify_dom_lock;
unsigned int ta_in_mem_txn_count;
unsigned int ta_on_wire_txn_count;
struct list_head ta_waitq;
@@ -187,30 +186,31 @@ typedef struct _afr_private {
int32_t healers; /* No. of elements currently undergoing background
heal*/
+ gf_boolean_t release_ta_notify_dom_lock;
+
gf_boolean_t metadata_self_heal; /* on/off */
gf_boolean_t entry_self_heal; /* on/off */
gf_boolean_t metadata_splitbrain_forced_heal; /* on/off */
int read_child; /* read-subvolume */
- afr_read_hash_mode_t hash_mode; /* for when read_child is not set */
- gf_atomic_t *pending_reads; /*No. of pending read cbks per child.*/
- int favorite_child; /* subvolume to be preferred in resolving
- split-brain cases */
+ gf_atomic_t *pending_reads; /*No. of pending read cbks per child.*/
- afr_favorite_child_policy fav_child_policy; /*Policy to use for automatic
- resolution of split-brains.*/
+ gf_timer_t *timer; /* launched when parent up is received */
unsigned int wait_count; /* # of servers to wait for success */
- gf_timer_t *timer; /* launched when parent up is received */
-
+ unsigned char ta_child_up;
gf_boolean_t optimistic_change_log;
gf_boolean_t eager_lock;
gf_boolean_t pre_op_compat; /* on/off */
uint32_t post_op_delay_secs;
unsigned int quorum_count;
- char vol_uuid[UUID_SIZE + 1];
+ off_t ta_notify_dom_lock_offset;
+ afr_favorite_child_policy fav_child_policy; /*Policy to use for automatic
+ resolution of split-brains.*/
+ afr_read_hash_mode_t hash_mode; /* for when read_child is not set */
+
int32_t *last_event;
/* @event_generation: Keeps count of number of events received which can
@@ -223,27 +223,28 @@ typedef struct _afr_private {
important as we might have had a network split brain.
*/
uint32_t event_generation;
+ char vol_uuid[UUID_SIZE + 1];
gf_boolean_t choose_local;
gf_boolean_t did_discovery;
- uint64_t sh_readdir_size;
gf_boolean_t ensure_durability;
+ gf_boolean_t halo_enabled;
+ gf_boolean_t consistent_metadata;
+ gf_boolean_t need_heal;
+ gf_boolean_t granular_locks;
+ uint64_t sh_readdir_size;
char *sh_domain;
char *afr_dirty;
- gf_boolean_t halo_enabled;
- uint32_t halo_max_latency_msec;
- uint32_t halo_max_replicas;
- uint32_t halo_min_replicas;
+ uint64_t spb_choice_timeout;
afr_self_heald_t shd;
struct afr_nfsd nfsd;
- gf_boolean_t consistent_metadata;
- uint64_t spb_choice_timeout;
- gf_boolean_t need_heal;
+ uint32_t halo_max_latency_msec;
+ uint32_t halo_max_replicas;
+ uint32_t halo_min_replicas;
- gf_boolean_t granular_locks;
gf_boolean_t full_lock;
gf_boolean_t esh_granular;
gf_boolean_t consistent_io;
@@ -311,18 +312,17 @@ afr_entry_lockee_cmp(const void *l1, const void *l2);
typedef struct {
loc_t *lk_loc;
- int lockee_count;
afr_lockee_t lockee[AFR_LOCKEE_COUNT_MAX];
const char *lk_basename;
const char *lower_basename;
const char *higher_basename;
- char lower_locked;
- char higher_locked;
unsigned char *lower_locked_nodes;
- int32_t lock_count;
+ afr_lock_cbk_t lock_cbk;
+
+ int lockee_count;
int32_t lk_call_count;
int32_t lk_expected_count;
@@ -330,14 +330,15 @@ typedef struct {
int32_t lock_op_ret;
int32_t lock_op_errno;
- afr_lock_cbk_t lock_cbk;
char *domain; /* Domain on which inode/entry lock/unlock in progress.*/
+ int32_t lock_count;
+ char lower_locked;
+ char higher_locked;
} afr_internal_lock_t;
struct afr_reply {
int valid;
int32_t op_ret;
- int32_t op_errno;
dict_t *xattr; /*For xattrop*/
dict_t *xdata;
struct iatt poststat;
@@ -346,6 +347,7 @@ struct afr_reply {
struct iatt preparent;
struct iatt preparent2;
struct iatt postparent2;
+ int32_t op_errno;
/* For rchecksum */
uint8_t checksum[SHA256_DIGEST_LENGTH];
gf_boolean_t buf_has_zeroes;
@@ -385,8 +387,6 @@ typedef struct _afr_inode_lock_t {
*/
int32_t num_inodelks;
unsigned int event_generation;
- gf_boolean_t release;
- gf_boolean_t acquired;
gf_timer_t *delay_timer;
struct list_head owners; /*Transactions that are performing fop*/
struct list_head post_op; /*Transactions that are done with the fop
@@ -395,6 +395,8 @@ typedef struct _afr_inode_lock_t {
*conflicting transactions to complete*/
struct list_head frozen; /*Transactions that need to go as part of
* next batch of eager-lock*/
+ gf_boolean_t release;
+ gf_boolean_t acquired;
} afr_lock_t;
typedef struct _afr_inode_ctx {
@@ -403,15 +405,11 @@ typedef struct _afr_inode_ctx {
int lock_count;
int spb_choice;
gf_timer_t *timer;
- gf_boolean_t need_refresh;
unsigned int *pre_op_done[AFR_NUM_CHANGE_LOGS];
int inherited[AFR_NUM_CHANGE_LOGS];
int on_disk[AFR_NUM_CHANGE_LOGS];
-
- /* set if any write on this fd was a non stable write
- (i.e, without O_SYNC or O_DSYNC)
- */
- gf_boolean_t witnessed_unstable_write;
+ /*Only 2 types of transactions support eager-locks now. DATA/METADATA*/
+ afr_lock_t lock[2];
/* @open_fd_count:
Number of open FDs queried from the server, as queried through
@@ -419,8 +417,12 @@ typedef struct _afr_inode_ctx {
temporarily disabled.
*/
uint32_t open_fd_count;
- /*Only 2 types of transactions support eager-locks now. DATA/METADATA*/
- afr_lock_t lock[2];
+ gf_boolean_t need_refresh;
+
+ /* set if any write on this fd was a non stable write
+ (i.e, without O_SYNC or O_DSYNC)
+ */
+ gf_boolean_t witnessed_unstable_write;
} afr_inode_ctx_t;
typedef struct _afr_local {
@@ -434,19 +436,15 @@ typedef struct _afr_local {
unsigned int event_generation;
uint32_t open_fd_count;
- gf_boolean_t update_open_fd_count;
int32_t num_inodelks;
- gf_boolean_t update_num_inodelks;
-
- gf_lkowner_t saved_lk_owner;
int32_t op_ret;
int32_t op_errno;
- int32_t **pending;
-
int dirty[AFR_NUM_CHANGE_LOGS];
+ int32_t **pending;
+
loc_t loc;
loc_t newloc;
@@ -477,14 +475,6 @@ typedef struct _afr_local {
afr_read_txn_wind_t readfn;
- /* @refreshed:
-
- the inode was "refreshed" (i.e, pending xattrs from all subvols
- freshly inspected and inode ctx updated accordingly) as part of
- this transaction already.
- */
- gf_boolean_t refreshed;
-
/* @inode:
the inode on which the read txn is performed on. ref'ed and copied
@@ -509,8 +499,6 @@ typedef struct _afr_local {
unsigned char *readable;
unsigned char *readable2; /*For rename transaction*/
- int read_subvol; /* Current read subvolume */
-
afr_inode_refresh_cbk_t refreshfn;
/* @refreshinode:
@@ -519,9 +507,30 @@ typedef struct _afr_local {
*/
inode_t *refreshinode;
+ dict_t *xattr_req;
+
+ dict_t *dict;
+
+ int read_subvol; /* Current read subvolume */
+
+ int optimistic_change_log;
+
+ afr_internal_lock_t internal_lock;
+
/*To handle setattr/setxattr on yet to be linked inode from dht*/
uuid_t refreshgfid;
+ /* @refreshed:
+
+ the inode was "refreshed" (i.e, pending xattrs from all subvols
+ freshly inspected and inode ctx updated accordingly) as part of
+ this transaction already.
+ */
+ gf_boolean_t refreshed;
+
+ gf_boolean_t update_num_inodelks;
+ gf_boolean_t update_open_fd_count;
+
/*
@pre_op_compat:
@@ -531,14 +540,6 @@ typedef struct _afr_local {
gf_boolean_t pre_op_compat;
- dict_t *xattr_req;
-
- afr_internal_lock_t internal_lock;
-
- dict_t *dict;
-
- int optimistic_change_log;
-
/* Is the current writev() going to perform a stable write?
i.e, is fd->flags or @flags writev param have O_SYNC or
O_DSYNC?
@@ -557,25 +558,20 @@ typedef struct _afr_local {
struct {
struct {
- gf_boolean_t needs_fresh_lookup;
- uuid_t gfid_req;
- } lookup;
-
- struct {
- unsigned char buf_set;
struct statvfs buf;
+ unsigned char buf_set;
} statfs;
struct {
- int32_t flags;
fd_t *fd;
+ int32_t flags;
} open;
struct {
- int32_t cmd;
struct gf_flock user_flock;
struct gf_flock ret_flock;
unsigned char *locked_nodes;
+ int32_t cmd;
} lk;
/* inode read */
@@ -600,8 +596,8 @@ typedef struct _afr_local {
struct {
char *name;
- int last_index;
long xattr_len;
+ int last_index;
} getxattr;
struct {
@@ -614,11 +610,10 @@ typedef struct _afr_local {
/* dir read */
struct {
+ uint32_t *checksum;
int success_count;
int32_t op_ret;
int32_t op_errno;
-
- uint32_t *checksum;
} opendir;
struct {
@@ -627,8 +622,8 @@ typedef struct _afr_local {
size_t size;
off_t offset;
dict_t *dict;
- gf_boolean_t failed;
int last_index;
+ gf_boolean_t failed;
} readdir;
/* inode write */
@@ -638,12 +633,11 @@ typedef struct _afr_local {
} inode_wfop; // common structure for all inode-write-fops
struct {
- int32_t op_ret;
-
struct iovec *vector;
struct iobref *iobref;
- int32_t count;
off_t offset;
+ int32_t op_ret;
+ int32_t count;
uint32_t flags;
} writev;
@@ -703,29 +697,25 @@ typedef struct _afr_local {
} create;
struct {
+ dict_t *params;
dev_t dev;
mode_t mode;
- dict_t *params;
} mknod;
struct {
- int32_t mode;
dict_t *params;
+ int32_t mode;
} mkdir;
struct {
- int flags;
- } rmdir;
-
- struct {
dict_t *params;
char *linkpath;
} symlink;
struct {
- int32_t mode;
off_t offset;
size_t len;
+ int32_t mode;
} fallocate;
struct {
@@ -752,10 +742,10 @@ typedef struct _afr_local {
struct {
char *volume;
char *basename;
+ void *xdata;
entrylk_cmd in_cmd;
entrylk_cmd cmd;
entrylk_type type;
- void *xdata;
} entrylk;
struct {
@@ -764,31 +754,33 @@ typedef struct _afr_local {
} seek;
struct {
- int32_t datasync;
- } fsync;
-
- struct {
struct gf_lease user_lease;
struct gf_lease ret_lease;
unsigned char *locked_nodes;
} lease;
- } cont;
+ struct {
+ int flags;
+ } rmdir;
- struct {
- off_t start, len;
+ struct {
+ int32_t datasync;
+ } fsync;
- gf_boolean_t eager_lock_on;
- gf_boolean_t do_eager_unlock;
+ struct {
+ uuid_t gfid_req;
+ gf_boolean_t needs_fresh_lookup;
+ } lookup;
+
+ } cont;
+ struct {
char *basename;
char *new_basename;
loc_t parent_loc;
loc_t new_parent_loc;
- afr_transaction_type type;
-
/* stub to resume on destruction
of the transaction frame */
call_stub_t *resume_stub;
@@ -806,6 +798,30 @@ typedef struct _afr_local {
FOP failed. */
unsigned char *failed_subvols;
+ call_frame_t *main_frame; /*Fop frame*/
+ call_frame_t *frame; /*Transaction frame*/
+
+ int (*wind)(call_frame_t *frame, xlator_t *this, int subvol);
+
+ int (*unwind)(call_frame_t *frame, xlator_t *this);
+
+ off_t start, len;
+
+ afr_transaction_type type;
+
+ int32_t in_flight_sb_errno; /* This is where the cause of the
+ failure on the last good copy of
+ the file is stored.
+ */
+
+ /* @changelog_resume: function to be called after changlogging
+ (either pre-op or post-op) is done
+ */
+ afr_changelog_resume_t changelog_resume;
+
+ gf_boolean_t eager_lock_on;
+ gf_boolean_t do_eager_unlock;
+
/* @dirtied: flag which indicates whether we set dirty flag
in the OP. Typically true when we are performing operation
on more than one subvol and optimistic changelog is disabled
@@ -830,6 +846,10 @@ typedef struct _afr_local {
*/
gf_boolean_t no_uninherit;
+ gf_boolean_t in_flight_sb; /* Indicator for occurrence of
+ split-brain while in the middle of
+ a txn. */
+
/* @uninherit_done:
@uninherit_value:
@@ -842,26 +862,6 @@ typedef struct _afr_local {
gf_boolean_t uninherit_done;
gf_boolean_t uninherit_value;
- gf_boolean_t in_flight_sb; /* Indicator for occurrence of
- split-brain while in the middle of
- a txn. */
- int32_t in_flight_sb_errno; /* This is where the cause of the
- failure on the last good copy of
- the file is stored.
- */
-
- /* @changelog_resume: function to be called after changlogging
- (either pre-op or post-op) is done
- */
- afr_changelog_resume_t changelog_resume;
-
- call_frame_t *main_frame; /*Fop frame*/
- call_frame_t *frame; /*Transaction frame*/
-
- int (*wind)(call_frame_t *frame, xlator_t *this, int subvol);
-
- int (*unwind)(call_frame_t *frame, xlator_t *this);
-
/* post-op hook */
} transaction;
@@ -875,36 +875,36 @@ typedef struct _afr_local {
mode_t umask;
int xflag;
- gf_boolean_t do_discovery;
struct afr_reply *replies;
/* For client side background heals. */
struct list_head healer;
call_frame_t *heal_frame;
- gf_boolean_t need_full_crawl;
- afr_fop_lock_state_t fop_lock_state;
-
- gf_boolean_t is_read_txn;
afr_inode_ctx_t *inode_ctx;
/*For thin-arbiter transactions.*/
- unsigned char read_txn_query_child;
- unsigned char ta_child_up;
+ int ta_failed_subvol;
+ int ta_event_gen;
struct list_head ta_waitq;
struct list_head ta_onwireq;
afr_ta_fop_state_t fop_state;
- int ta_failed_subvol;
- int ta_event_gen;
+ afr_fop_lock_state_t fop_lock_state;
+ gf_lkowner_t saved_lk_owner;
+ unsigned char read_txn_query_child;
+ unsigned char ta_child_up;
+ gf_boolean_t do_discovery;
+ gf_boolean_t need_full_crawl;
+ gf_boolean_t is_read_txn;
gf_boolean_t is_new_entry;
} afr_local_t;
typedef struct afr_spbc_timeout {
call_frame_t *frame;
- gf_boolean_t d_spb;
- gf_boolean_t m_spb;
loc_t *loc;
int spb_child_index;
+ gf_boolean_t d_spb;
+ gf_boolean_t m_spb;
} afr_spbc_timeout_t;
typedef struct afr_spb_status {
@@ -914,9 +914,9 @@ typedef struct afr_spb_status {
typedef struct afr_empty_brick_args {
call_frame_t *frame;
+ char *op_type;
loc_t loc;
int empty_index;
- char *op_type;
} afr_empty_brick_args_t;
typedef struct afr_read_subvol_args {