From 2874ee4d65185a607f1f646fc88ba8eb400aaa9a Mon Sep 17 00:00:00 2001 From: Venky Shankar Date: Thu, 4 Jun 2015 08:50:48 +0530 Subject: features/bitrot: cleanup, v2 Backport of http://review.gluster.org/11148 This patch uses "cleanup, v1" infrastrcuture to cleanup scrubber (data structures, threads, timers, etc..) on brick disconnection. Signer is not cleaned up yet: probably would be done as part of another patch. Change-Id: I78a92b8a7f02b2f39078aa9a5a6b101fc499fd70 BUG: 1226666 Signed-off-by: Venky Shankar Reviewed-on: http://review.gluster.org/11540 Reviewed-by: Raghavendra Bhat Tested-by: NetBSD Build System Tested-by: Gluster Build System --- xlators/features/bit-rot/src/bitd/bit-rot-scrub.c | 27 +++- xlators/features/bit-rot/src/bitd/bit-rot.c | 186 ++++++++++++++++++---- xlators/features/bit-rot/src/bitd/bit-rot.h | 12 +- 3 files changed, 187 insertions(+), 38 deletions(-) (limited to 'xlators/features/bit-rot') diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c index 45499debc14..d6ee4138fd2 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c @@ -392,6 +392,14 @@ br_scrubber_scrub_begin (xlator_t *this, struct br_fsscan_entry *fsentry) return ret; } +static void +_br_lock_cleaner (void *arg) +{ + pthread_mutex_t *mutex = arg; + + pthread_mutex_unlock (mutex); +} + static void wait_for_scrubbing (xlator_t *this, struct br_scanfs *fsscan) { @@ -401,8 +409,10 @@ wait_for_scrubbing (xlator_t *this, struct br_scanfs *fsscan) priv = this->private; fsscrub = &priv->fsscrub; + pthread_cleanup_push (_br_lock_cleaner, &fsscan->waitlock); pthread_mutex_lock (&fsscan->waitlock); { + pthread_cleanup_push (_br_lock_cleaner, &fsscrub->mutex); pthread_mutex_lock (&fsscrub->mutex); { list_replace_init (&fsscan->queued, &fsscan->ready); @@ -411,12 +421,14 @@ wait_for_scrubbing (xlator_t *this, struct br_scanfs *fsscan) pthread_cond_broadcast (&fsscrub->cond); } pthread_mutex_unlock (&fsscrub->mutex); + pthread_cleanup_pop (0); while (fsscan->entries != 0) pthread_cond_wait (&fsscan->waitcond, &fsscan->waitlock); } pthread_mutex_unlock (&fsscan->waitlock); + pthread_cleanup_pop (0); } static inline void @@ -465,6 +477,8 @@ br_fsscanner_handle_entry (xlator_t *subvol, this = child->this; fsscan = &child->fsscan; + _mask_cancellation (); + fsentry = GF_CALLOC (1, sizeof (*fsentry), gf_br_mt_br_fsscan_entry_t); if (!fsentry) goto error_return; @@ -499,6 +513,8 @@ br_fsscanner_handle_entry (xlator_t *subvol, } UNLOCK (&fsscan->entrylock); + _unmask_cancellation (); + if (scrub) wait_for_scrubbing (this, fsscan); @@ -535,6 +551,7 @@ br_fsscanner_log_time (xlator_t *this, br_child_t *child, const char *sfx) static void br_fsscanner_wait_until_kicked (struct br_scanfs *fsscan) { + pthread_cleanup_push (_br_lock_cleaner, &fsscan->wakelock); pthread_mutex_lock (&fsscan->wakelock); { while (!fsscan->kick) @@ -543,6 +560,7 @@ br_fsscanner_wait_until_kicked (struct br_scanfs *fsscan) fsscan->kick = _gf_false; } pthread_mutex_unlock (&fsscan->wakelock); + pthread_cleanup_pop (0); } void * @@ -778,13 +796,6 @@ br_scrubber_calc_scale (xlator_t *this, } -static void -br_scrubber_cleanup_handler (void *arg) -{ - struct br_scrubber *fsscrub = arg; - pthread_mutex_unlock (&fsscrub->mutex); -} - static inline br_child_t * _br_scrubber_get_next_child (struct br_scrubber *fsscrub) { @@ -844,7 +855,7 @@ static void br_scrubber_pick_entry (struct br_scrubber *fsscrub, struct br_fsscan_entry **fsentry) { - pthread_cleanup_push (br_scrubber_cleanup_handler, fsscrub); + pthread_cleanup_push (_br_lock_cleaner, &fsscrub->mutex); pthread_mutex_lock (&fsscrub->mutex); { diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c index 4e86f5071ed..2e50268b1fd 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot.c @@ -1103,7 +1103,7 @@ br_set_child_state (br_child_t *child, br_child_state_t state) * Also, we register to the changelog library to subscribe for event * notifications. */ -static inline int32_t +static int32_t br_enact_signer (xlator_t *this, br_child_t *child, br_stub_init_t *stub) { int32_t ret = 0; @@ -1145,32 +1145,16 @@ br_enact_signer (xlator_t *this, br_child_t *child, br_stub_init_t *stub) return -1; } -static inline int32_t -br_enact_scrubber (xlator_t *this, br_child_t *child) +static int32_t +br_launch_scrubber (xlator_t *this, br_child_t *child, + struct br_scanfs *fsscan, struct br_scrubber *fsscrub) { - int32_t ret = 0; + int32_t ret = -1; br_private_t *priv = NULL; - struct br_scanfs *fsscan = NULL; - struct br_scrubber *fsscrub = NULL; priv = this->private; - fsscan = &child->fsscan; - fsscrub = &priv->fsscrub; - - LOCK_INIT (&fsscan->entrylock); - pthread_mutex_init (&fsscan->waitlock, NULL); - pthread_cond_init (&fsscan->waitcond, NULL); - - fsscan->entries = 0; - INIT_LIST_HEAD (&fsscan->queued); - INIT_LIST_HEAD (&fsscan->ready); - - /* init scheduler related variables */ fsscan->kick = _gf_false; - pthread_mutex_init (&fsscan->wakelock, NULL); - pthread_cond_init (&fsscan->wakecond, NULL); - ret = gf_thread_create (&child->thread, NULL, br_fsscanner, child); if (ret != 0) { gf_msg (this->name, GF_LOG_ALERT, 0, BRB_MSG_SPAWN_FAILED, @@ -1186,7 +1170,7 @@ br_enact_scrubber (xlator_t *this, br_child_t *child) } pthread_mutex_unlock (&priv->lock); if (ret) - goto error_return; + goto cleanup_thread; /** * Everything has been setup.. add this subvolume to scrubbers @@ -1201,6 +1185,50 @@ br_enact_scrubber (xlator_t *this, br_child_t *child) return 0; + cleanup_thread: + (void) gf_thread_cleanup_xint (child->thread); + error_return: + return -1; +} + +static int32_t +br_enact_scrubber (xlator_t *this, br_child_t *child) +{ + int32_t ret = 0; + br_private_t *priv = NULL; + struct br_scanfs *fsscan = NULL; + struct br_scrubber *fsscrub = NULL; + + priv = this->private; + + fsscan = &child->fsscan; + fsscrub = &priv->fsscrub; + + /** + * if this child already witnesses a successfull connection earlier + * there's no need to initialize mutexes, condvars, etc.. + */ + if (_br_child_witnessed_connection (child)) + return br_launch_scrubber (this, child, fsscan, fsscrub); + + LOCK_INIT (&fsscan->entrylock); + pthread_mutex_init (&fsscan->waitlock, NULL); + pthread_cond_init (&fsscan->waitcond, NULL); + + fsscan->entries = 0; + INIT_LIST_HEAD (&fsscan->queued); + INIT_LIST_HEAD (&fsscan->ready); + + /* init scheduler related variables */ + pthread_mutex_init (&fsscan->wakelock, NULL); + pthread_cond_init (&fsscan->wakecond, NULL); + + ret = br_launch_scrubber (this, child, fsscan, fsscrub); + if (ret) + goto error_return; + + return 0; + error_return: LOCK_DESTROY (&fsscan->entrylock); pthread_mutex_destroy (&fsscan->waitlock); @@ -1223,6 +1251,7 @@ br_child_enaction (xlator_t *this, br_child_t *child, br_stub_init_t *stub) ret = br_enact_signer (this, child, stub); if (!ret) { + child->witnessed = 1; _br_set_child_state (child, BR_CHILD_STATE_CONNECTED); gf_log (this->name, GF_LOG_INFO, "Connected to brick %s..", child->brick_path); @@ -1304,6 +1333,100 @@ br_brick_connect (xlator_t *this, br_child_t *child) return ret; } +/* TODO: cleanup signer */ +static int32_t +br_cleanup_signer (xlator_t *this, br_child_t *child) +{ + return 0; +} + +static int32_t +br_cleanup_scrubber (xlator_t *this, br_child_t *child) +{ + int32_t ret = 0; + br_private_t *priv = NULL; + struct br_scanfs *fsscan = NULL; + struct br_scrubber *fsscrub = NULL; + + priv = this->private; + fsscan = &child->fsscan; + fsscrub = &priv->fsscrub; + + /** + * 0x0: child (brick) goes out of rotation + * + * This is fully safe w.r.t. entries for this child being actively + * scrubbed. Each of the scrubber thread(s) would finish scrubbing + * the entry (probably failing due to disconnection) and either + * putting the entry back into the queue or continuing further. + * Either way, pending entries for this child's queue need not be + * drained; entries just sit there in the queued/ready list to be + * consumed later upon re-connection. + */ + pthread_mutex_lock (&fsscrub->mutex); + { + list_del_init (&child->list); + } + pthread_mutex_unlock (&fsscrub->mutex); + + /** + * 0x1: cleanup scanner thread + * + * The pending timer needs to be removed _after_ cleaning up the + * filesystem scanner (scheduling the next scrub time is not a + * cancellation point). + */ + ret = gf_thread_cleanup_xint (child->thread); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Error cleaning up scanner thread"); + + /** + * 0x2: free()up resources + */ + if (fsscan->timer) { + (void) gf_tw_del_timer (priv->timer_wheel, fsscan->timer); + + GF_FREE (fsscan->timer); + fsscan->timer = NULL; + } + + gf_log (this->name, GF_LOG_INFO, + "Cleaned up scrubber for brick [%s]", child->brick_path); + + return 0; +} + +/** + * OK.. this child has made it's mind to go down the drain. So, + * let's clean up what it touched. (NOTE: there's no need to clean + * the inode table, it's just reused taking care of stale inodes) + */ +int32_t +br_brick_disconnect (xlator_t *this, br_child_t *child) +{ + int32_t ret = 0; + br_private_t *priv = this->private; + + LOCK (&child->lock); + { + if (!_br_is_child_connected (child)) + goto unblock; + + /* child is on death row.. */ + _br_set_child_state (child, BR_CHILD_STATE_DISCONNECTED); + + if (priv->iamscrubber) + ret = br_cleanup_scrubber (this, child); + else + ret = br_cleanup_signer (this, child); + } + unblock: + UNLOCK (&child->lock); + + return ret; +} + /** * This function is executed in a separate thread. The thread gets the * brick from where CHILD_UP has received from the queue and gets the @@ -1424,7 +1547,7 @@ notify (xlator_t *this, int32_t event, void *data, ...) { child = &priv->children[idx]; if (child->child_up == 1) - goto unblock; + goto unblock_0; priv->up_children++; child->child_up = 1; @@ -1435,7 +1558,7 @@ notify (xlator_t *this, int32_t event, void *data, ...) _br_qchild_event (this, child, br_brick_connect); pthread_cond_signal (&priv->cond); } - unblock: + unblock_0: pthread_mutex_unlock (&priv->lock); if (priv->up_children == priv->child_count) @@ -1452,11 +1575,17 @@ notify (xlator_t *this, int32_t event, void *data, ...) pthread_mutex_lock (&priv->lock); { - if (priv->children[idx].child_up == 1) { - priv->children[idx].child_up = 0; - priv->up_children--; - } + child = &priv->children[idx]; + if (child->child_up == 0) + goto unblock_1; + + child->child_up = 0; + priv->up_children--; + + _br_qchild_event (this, child, br_brick_disconnect); + pthread_cond_signal (&priv->cond); } + unblock_1: pthread_mutex_unlock (&priv->lock); if (priv->up_children == 0) @@ -1649,6 +1778,7 @@ br_init_children (xlator_t *this, br_private_t *priv) child = &priv->children[i]; LOCK_INIT (&child->lock); + child->witnessed = 0; br_set_child_state (child, BR_CHILD_STATE_DISCONNECTED); child->this = this; diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.h b/xlators/features/bit-rot/src/bitd/bit-rot.h index b8d9e3b1183..9a55773cc87 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot.h +++ b/xlators/features/bit-rot/src/bitd/bit-rot.h @@ -85,8 +85,10 @@ typedef enum br_child_state { } br_child_state_t; struct br_child { - gf_lock_t lock; - br_child_state_t c_state; + gf_lock_t lock; /* protects child state */ + char witnessed; /* witnessed at least one succesfull + connection */ + br_child_state_t c_state; /* current state of this child */ char child_up; /* Indicates whether this child is up or not */ @@ -236,4 +238,10 @@ _br_child_failed_conn (br_child_t *child) return (child->c_state == BR_CHILD_STATE_CONNFAILED); } +static inline int +_br_child_witnessed_connection (br_child_t *child) +{ + return (child->witnessed == 1); +} + #endif /* __BIT_ROT_H__ */ -- cgit