diff options
Diffstat (limited to 'xlators/features/bit-rot/src/bitd/bit-rot.h')
| -rw-r--r-- | xlators/features/bit-rot/src/bitd/bit-rot.h | 302 |
1 files changed, 302 insertions, 0 deletions
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.h b/xlators/features/bit-rot/src/bitd/bit-rot.h new file mode 100644 index 00000000000..8ac7dcdac3d --- /dev/null +++ b/xlators/features/bit-rot/src/bitd/bit-rot.h @@ -0,0 +1,302 @@ +/* + Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __BIT_ROT_H__ +#define __BIT_ROT_H__ + +#include <glusterfs/glusterfs.h> +#include <glusterfs/logging.h> +#include <glusterfs/dict.h> +#include <glusterfs/xlator.h> +#include <glusterfs/defaults.h> +#include <glusterfs/syncop.h> +#include <glusterfs/syncop-utils.h> +#include "changelog.h" +#include "timer-wheel.h" + +#include <glusterfs/throttle-tbf.h> +#include "bit-rot-ssm.h" + +#include "bit-rot-common.h" +#include "bit-rot-stub-mem-types.h" +#include "bit-rot-scrub-status.h" + +#include <openssl/sha.h> + +typedef enum scrub_throttle { + BR_SCRUB_THROTTLE_VOID = -1, + BR_SCRUB_THROTTLE_LAZY = 0, + BR_SCRUB_THROTTLE_NORMAL = 1, + BR_SCRUB_THROTTLE_AGGRESSIVE = 2, + BR_SCRUB_THROTTLE_STALLED = 3, +} scrub_throttle_t; + +typedef enum scrub_freq { + BR_FSSCRUB_FREQ_HOURLY = 1, + BR_FSSCRUB_FREQ_DAILY, + BR_FSSCRUB_FREQ_WEEKLY, + BR_FSSCRUB_FREQ_BIWEEKLY, + BR_FSSCRUB_FREQ_MONTHLY, + BR_FSSCRUB_FREQ_MINUTE, + BR_FSSCRUB_FREQ_STALLED, +} scrub_freq_t; + +#define signature_size(hl) (sizeof(br_isignature_t) + hl + 1) + +struct br_scanfs { + gf_lock_t entrylock; + + pthread_mutex_t waitlock; + pthread_cond_t waitcond; + + unsigned int entries; + struct list_head queued; + struct list_head ready; +}; + +/* just need three states to track child status */ +typedef enum br_child_state { + BR_CHILD_STATE_CONNECTED = 1, + BR_CHILD_STATE_INITIALIZING, + BR_CHILD_STATE_CONNFAILED, + BR_CHILD_STATE_DISCONNECTED, +} br_child_state_t; + +struct br_child { + pthread_mutex_t lock; /* protects child state */ + char witnessed; /* witnessed at least one successful + connection */ + br_child_state_t c_state; /* current state of this child */ + + char child_up; /* Indicates whether this child is + up or not */ + xlator_t *xl; /* client xlator corresponding to + this child */ + inode_table_t *table; /* inode table for this child */ + char brick_path[PATH_MAX]; /* brick export directory of this + child */ + struct list_head list; /* hook to attach to the list of + UP children */ + xlator_t *this; /* Bit rot xlator */ + + pthread_t thread; /* initial crawler for unsigned + object(s) or scrub crawler */ + int threadrunning; /* active thread */ + + struct mem_pool *timer_pool; /* timer-wheel's timer mem-pool */ + + struct timeval tv; + + struct br_scanfs fsscan; /* per subvolume FS scanner */ + + gf_boolean_t active_scrubbing; /* Actively scrubbing or not */ +}; + +typedef struct br_child br_child_t; + +struct br_obj_n_workers { + struct list_head objects; /* queue of objects expired from the + timer wheel and ready to be picked + up for signing */ + pthread_t *workers; /* Threads which pick up the objects + from the above queue and start + signing each object */ +}; + +struct br_scrubber { + xlator_t *this; + + scrub_throttle_t throttle; + + /** + * frequency of scanning for this subvolume. this should + * normally be per-child, but since all children follow the + * same frequency for a volume, this option ends up here + * instead of br_child_t. + */ + scrub_freq_t frequency; + + gf_boolean_t frequency_reconf; + gf_boolean_t throttle_reconf; + + pthread_mutex_t mutex; + pthread_cond_t cond; + + unsigned int nr_scrubbers; + struct list_head scrubbers; + + /** + * list of "rotatable" subvolume(s) undergoing scrubbing + */ + struct list_head scrublist; +}; + +struct br_monitor { + gf_lock_t lock; + pthread_t thread; /* Monitor thread */ + + gf_boolean_t inited; + pthread_mutex_t mutex; + pthread_cond_t cond; /* Thread starts and will be waiting on cond. + First child which is up wakes this up */ + + xlator_t *this; + /* scheduler */ + uint32_t boot; + + int32_t active_child_count; /* Number of children currently scrubbing */ + gf_boolean_t kick; /* This variable tracks the scrubber is + * kicked or not. Both 'kick' and + * 'active_child_count' uses the same pair + * of mutex-cond variable, i.e, wakelock and + * wakecond. */ + + pthread_mutex_t wakelock; + pthread_cond_t wakecond; + + gf_boolean_t done; + pthread_mutex_t donelock; + pthread_cond_t donecond; + + struct gf_tw_timer_list *timer; + br_scrub_state_t state; /* current scrub state */ +}; + +typedef struct br_obj_n_workers br_obj_n_workers_t; + +typedef struct br_private br_private_t; + +typedef void (*br_scrubbed_file_update)(br_private_t *priv); + +struct br_private { + pthread_mutex_t lock; + + struct list_head bricks; /* list of bricks from which enents + have been received */ + + struct list_head signing; + + pthread_cond_t object_cond; /* handling signing of objects */ + int child_count; + br_child_t *children; /* list of subvolumes */ + int up_children; + + pthread_cond_t cond; /* handling CHILD_UP notifications */ + pthread_t thread; /* thread for connecting each UP + child with changelog */ + + struct tvec_base *timer_wheel; /* timer wheel where the objects which + changelog has sent sits and waits + for expiry */ + br_obj_n_workers_t *obj_queue; /* place holder for all the objects + that are expired from timer wheel + and ready to be picked up for + signing and the workers which sign + the objects */ + + uint32_t expiry_time; /* objects "wait" time */ + + uint32_t signer_th_count; /* Number of signing process threads */ + + tbf_t *tbf; /* token bucket filter */ + + gf_boolean_t iamscrubber; /* function as a fs scrubber */ + + struct br_scrub_stats scrub_stat; /* statistics of scrub*/ + + struct br_scrubber fsscrub; /* scrubbers for this subvolume */ + + struct br_monitor scrub_monitor; /* scrubber monitor */ +}; + +struct br_object { + xlator_t *this; + + uuid_t gfid; + + unsigned long signedversion; /* version against which this object will + be signed */ + br_child_t *child; /* object's subvolume */ + + int sign_info; + + struct list_head list; /* hook to add to the queue once the + object is expired from timer wheel */ + void *data; +}; + +typedef struct br_object br_object_t; +typedef int32_t(br_scrub_ssm_call)(xlator_t *); + +void +br_log_object(xlator_t *, char *, uuid_t, int32_t); + +void +br_log_object_path(xlator_t *, char *, const char *, int32_t); + +int32_t +br_calculate_obj_checksum(unsigned char *, br_child_t *, fd_t *, struct iatt *); + +int32_t +br_prepare_loc(xlator_t *, br_child_t *, loc_t *, gf_dirent_t *, loc_t *); + +gf_boolean_t +bitd_is_bad_file(xlator_t *, br_child_t *, loc_t *, fd_t *); + +static inline void +_br_set_child_state(br_child_t *child, br_child_state_t state) +{ + child->c_state = state; +} + +static inline int +_br_is_child_connected(br_child_t *child) +{ + return (child->c_state == BR_CHILD_STATE_CONNECTED); +} + +static inline int +_br_is_child_scrub_active(br_child_t *child) +{ + return child->active_scrubbing; +} + +static inline int +_br_child_failed_conn(br_child_t *child) +{ + return (child->c_state == BR_CHILD_STATE_CONNFAILED); +} + +static inline int +_br_child_witnessed_connection(br_child_t *child) +{ + return (child->witnessed == 1); +} + +/* scrub state */ +static inline void +_br_monitor_set_scrub_state(struct br_monitor *scrub_monitor, + br_scrub_state_t state) +{ + scrub_monitor->state = state; +} + +static inline br_scrub_event_t +_br_child_get_scrub_event(struct br_scrubber *fsscrub) +{ + return (fsscrub->frequency == BR_FSSCRUB_FREQ_STALLED) + ? BR_SCRUB_EVENT_PAUSE + : BR_SCRUB_EVENT_SCHEDULE; +} + +int32_t +br_get_bad_objects_list(xlator_t *this, dict_t **dict); + +#endif /* __BIT_ROT_H__ */ |
