summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot.c364
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot.h32
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h1
3 files changed, 271 insertions, 126 deletions
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c
index 4b698fc..7a0a1b5 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot.c
+++ b/xlators/features/bit-rot/src/bitd/bit-rot.c
@@ -25,6 +25,18 @@
#define BR_HASH_CALC_READ_SIZE (128 * 1024)
+typedef int32_t (br_child_handler)(xlator_t *, br_child_t *);
+
+struct br_child_event {
+ xlator_t *this;
+
+ br_child_t *child;
+
+ br_child_handler *call;
+
+ struct list_head list;
+};
+
static int
br_find_child_index (xlator_t *this, xlator_t *child)
{
@@ -49,26 +61,6 @@ out:
return index;
}
-static void
-br_free_children (xlator_t *this)
-{
- br_private_t *priv = NULL;
- int32_t i = 0;
- br_child_t *child = NULL;
-
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- child = &priv->children[i];
- mem_pool_destroy (child->timer_pool);
- list_del_init (&priv->children[i].list);
- }
-
- GF_FREE (priv->children);
-
- priv->children = NULL;
-}
-
br_child_t *
br_get_child_from_brick_path (xlator_t *this, char *brick_path)
{
@@ -1085,6 +1077,16 @@ br_oneshot_signer (void *arg)
return NULL;
}
+static void
+br_set_child_state (br_child_t *child, br_child_state_t state)
+{
+ LOCK (&child->lock);
+ {
+ _br_set_child_state (child, state);
+ }
+ UNLOCK (&child->lock);
+}
+
/**
* At this point a thread is spawned to crawl the filesystem (in
* tortoise pace) to sign objects that were not signed in previous run(s).
@@ -1172,7 +1174,12 @@ br_enact_scrubber (xlator_t *this, br_child_t *child)
goto error_return;
}
- ret = br_fsscan_schedule (this, child, fsscan, fsscrub);
+ /* this needs to be serialized with reconfigure() */
+ pthread_mutex_lock (&priv->lock);
+ {
+ ret = br_fsscan_schedule (this, child, fsscan, fsscrub);
+ }
+ pthread_mutex_unlock (&priv->lock);
if (ret)
goto error_return;
@@ -1197,6 +1204,30 @@ br_enact_scrubber (xlator_t *this, br_child_t *child)
return -1;
}
+static int32_t
+br_child_enaction (xlator_t *this, br_child_t *child, br_stub_init_t *stub)
+{
+ int32_t ret = -1;
+ br_private_t *priv = this->private;
+
+ LOCK (&child->lock);
+ {
+ if (priv->iamscrubber)
+ ret = br_enact_scrubber (this, child);
+ else
+ ret = br_enact_signer (this, child, stub);
+
+ if (!ret) {
+ _br_set_child_state (child, BR_CHILD_STATE_CONNECTED);
+ gf_log (this->name, GF_LOG_INFO,
+ "Connected to brick %s..", child->brick_path);
+ }
+ }
+ UNLOCK (&child->lock);
+
+ return ret;
+}
+
/**
* This routine fetches various attributes associated with a child which
* is basically a subvolume. Attributes include brick path and the stub
@@ -1204,7 +1235,7 @@ br_enact_scrubber (xlator_t *this, br_child_t *child)
* by getxattr() on a virtual key. Depending on the configuration, the
* process either acts as a signer or a scrubber.
*/
-static inline int32_t
+int32_t
br_brick_connect (xlator_t *this, br_child_t *child)
{
int32_t ret = -1;
@@ -1213,14 +1244,13 @@ br_brick_connect (xlator_t *this, br_child_t *child)
struct iatt parent = {0, };
br_stub_init_t *stub = NULL;
dict_t *xattr = NULL;
- br_private_t *priv = NULL;
int op_errno = 0;
GF_VALIDATE_OR_GOTO ("bit-rot", this, out);
GF_VALIDATE_OR_GOTO (this->name, child, out);
GF_VALIDATE_OR_GOTO (this->name, this->private, out);
- priv = this->private;
+ br_set_child_state (child, BR_CHILD_STATE_INITIALIZING);
loc.inode = inode_ref (child->table->root);
gf_uuid_copy (loc.gfid, loc.inode->gfid);
@@ -1257,20 +1287,15 @@ br_brick_connect (xlator_t *this, br_child_t *child)
child->tv.tv_sec = ntohl (stub->timebuf[0]);
child->tv.tv_usec = ntohl (stub->timebuf[1]);
- if (priv->iamscrubber)
- ret = br_enact_scrubber (this, child);
- else
- ret = br_enact_signer (this, child, stub);
-
- if (!ret)
- gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_CONNECTED_TO_BRICK,
- "Connected to brick %s..", child->brick_path);
+ ret = br_child_enaction (this, child, stub);
free_dict:
dict_unref (xattr);
wipeloc:
loc_wipe (&loc);
out:
+ if (ret)
+ br_set_child_state (child, BR_CHILD_STATE_CONNFAILED);
return ret;
}
@@ -1285,7 +1310,8 @@ br_handle_events (void *arg)
int32_t ret = 0;
xlator_t *this = NULL;
br_private_t *priv = NULL;
- br_child_t *child = NULL;
+ br_child_t *child = NULL;
+ struct br_child_event *childev = NULL;
this = arg;
priv = this->private;
@@ -1304,17 +1330,20 @@ br_handle_events (void *arg)
while (list_empty (&priv->bricks))
pthread_cond_wait (&priv->cond, &priv->lock);
- child = list_first_entry
- (&priv->bricks, br_child_t, list);
- list_del_init (&child->list);
+ childev = list_first_entry
+ (&priv->bricks, struct br_child_event, list);
+ list_del_init (&childev->list);
}
pthread_mutex_unlock (&priv->lock);
- ret = br_brick_connect (this, child);
+ child = childev->child;
+ ret = childev->call (this, child);
if (ret)
gf_msg (this->name, GF_LOG_ERROR, 0,
- BRB_MSG_SUBVOL_CONNECT_FAILED, "failed to "
- "connect to subvolume %s", child->xl->name);
+ BRB_MSG_SUBVOL_CONNECT_FAILED,
+ "callback handler for subvolume [%s] failed",
+ child->xl->name);
+ GF_FREE (childev);
}
return NULL;
@@ -1339,6 +1368,29 @@ mem_acct_init (xlator_t *this)
return ret;
}
+static void
+_br_qchild_event (xlator_t *this, br_child_t *child, br_child_handler *call)
+{
+ br_private_t *priv = NULL;
+ struct br_child_event *childev = NULL;
+
+ priv = this->private;
+
+ childev = GF_CALLOC (1, sizeof (*childev), gf_br_mt_br_child_event_t);
+ if (!childev) {
+ gf_log (this->name, GF_LOG_ERROR, "Event unhandled for "
+ "child.. [Brick: %s]", child->xl->name);
+ return;
+ }
+
+ INIT_LIST_HEAD (&childev->list);
+ childev->this = this;
+ childev->child = child;
+ childev->call = call;
+
+ list_add_tail (&childev->list, &priv->bricks);
+}
+
int
notify (xlator_t *this, int32_t event, void *data, ...)
{
@@ -1368,14 +1420,14 @@ notify (xlator_t *this, int32_t event, void *data, ...)
child = &priv->children[idx];
if (child->child_up == 1)
goto unblock;
+ priv->up_children++;
child->child_up = 1;
child->xl = subvol;
- child->table = inode_table_new (4096, subvol);
+ if (!child->table)
+ child->table = inode_table_new (4096, subvol);
- priv->up_children++;
-
- list_add_tail (&child->list, &priv->bricks);
+ _br_qchild_event (this, child, br_brick_connect);
pthread_cond_signal (&priv->cond);
}
unblock:
@@ -1405,6 +1457,7 @@ notify (xlator_t *this, int32_t event, void *data, ...)
if (priv->up_children == 0)
default_notify (this, event, data);
break;
+
default:
default_notify (this, event, data);
}
@@ -1558,59 +1611,94 @@ br_signer_init (xlator_t *this, br_private_t *priv)
}
-int32_t
-init (xlator_t *this)
+static void
+br_free_children (xlator_t *this, br_private_t *priv, int count)
{
- int i = 0;
- int32_t ret = -1;
- br_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
+ br_child_t *child = NULL;
- if (!this->children) {
- gf_msg (this->name, GF_LOG_ERROR, 0, BRB_MSG_NO_CHILD,
- "FATAL: no children");
- goto out;
+ for (--count; count >= 0; count--) {
+ child = &priv->children[count];
+ mem_pool_destroy (child->timer_pool);
+ LOCK_DESTROY (&child->lock);
}
- priv = GF_CALLOC (1, sizeof (*priv), gf_br_mt_br_private_t);
- if (!priv) {
- gf_msg (this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY,
- "failed to allocate memory (->priv)");
- goto out;
- }
+ GF_FREE (priv->children);
+ priv->children = NULL;
+}
- GF_OPTION_INIT ("scrubber", priv->iamscrubber, bool, out);
+static int
+br_init_children (xlator_t *this, br_private_t *priv)
+{
+ int i = 0;
+ br_child_t *child = NULL;
+ xlator_list_t *trav = NULL;
priv->child_count = xlator_subvolume_count (this);
priv->children = GF_CALLOC (priv->child_count, sizeof (*priv->children),
gf_br_mt_br_child_t);
if (!priv->children)
- goto free_priv;
+ goto err;
trav = this->children;
while (trav) {
- priv->children[i].this = this;
- priv->children[i].xl = trav->xlator;
-
- priv->children[i].timer_pool =
- mem_pool_new (struct gf_tw_timer_list, 4096);
- if (!priv->children[i].timer_pool) {
- gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
- BRB_MSG_NO_MEMORY, "failed to allocate mem-pool"
- " for timer");
+ child = &priv->children[i];
+
+ LOCK_INIT (&child->lock);
+ br_set_child_state (child, BR_CHILD_STATE_DISCONNECTED);
+
+ child->this = this;
+ child->xl = trav->xlator;
+
+ child->timer_pool = mem_pool_new
+ (struct gf_tw_timer_list, 4096);
+ if (!child->timer_pool) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to allocate mem-pool for timer");
errno = ENOMEM;
- goto free_children;
+ goto freechild;
}
+ INIT_LIST_HEAD (&child->list);
+
i++;
trav = trav->next;
}
+ return 0;
+
+ freechild:
+ br_free_children (this, priv, i);
+ err:
+ return -1;
+}
+
+int32_t
+init (xlator_t *this)
+{
+ int32_t ret = -1;
+ br_private_t *priv = NULL;
+
+ if (!this->children) {
+ gf_log (this->name, GF_LOG_ERROR, "FATAL: no children");
+ goto out;
+ }
+
+ priv = GF_CALLOC (1, sizeof (*priv), gf_br_mt_br_private_t);
+ if (!priv) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to allocate memory (->priv)");
+ goto out;
+ }
+
+ GF_OPTION_INIT ("scrubber", priv->iamscrubber, bool, out);
+
+ ret = br_init_children (this, priv);
+ if (ret)
+ goto free_priv;
+
pthread_mutex_init (&priv->lock, NULL);
pthread_cond_init (&priv->cond, NULL);
- for (i = 0; i < priv->child_count; i++)
- INIT_LIST_HEAD (&priv->children[i].list);
INIT_LIST_HEAD (&priv->bricks);
INIT_LIST_HEAD (&priv->signing);
@@ -1619,7 +1707,7 @@ init (xlator_t *this)
gf_msg (this->name, GF_LOG_ERROR, 0,
BRB_MSG_TIMER_WHEEL_UNAVAILABLE,
"global timer wheel unavailable");
- goto cleanup_mutex;
+ goto cleanup;
}
this->private = priv;
@@ -1635,7 +1723,7 @@ init (xlator_t *this)
}
if (ret)
- goto cleanup_mutex;
+ goto cleanup;
ret = gf_thread_create (&priv->thread, NULL, br_handle_events, this);
if (ret != 0) {
@@ -1651,16 +1739,12 @@ init (xlator_t *this)
return 0;
}
- cleanup_mutex:
+ cleanup:
(void) pthread_cond_destroy (&priv->cond);
(void) pthread_mutex_destroy (&priv->lock);
- free_children:
- for (i = 0; i < priv->child_count; i++) {
- if (priv->children[i].timer_pool)
- mem_pool_destroy (priv->children[i].timer_pool);
- }
- GF_FREE (priv->children);
+ br_free_children (this, priv, priv->child_count);
+
free_priv:
GF_FREE (priv);
out:
@@ -1678,7 +1762,7 @@ fini (xlator_t *this)
if (!priv->iamscrubber)
br_fini_signer (this, priv);
- br_free_children (this);
+ br_free_children (this, priv, priv->child_count);
this->private = NULL;
GF_FREE (priv);
@@ -1686,64 +1770,96 @@ fini (xlator_t *this)
return;
}
-int
-reconfigure (xlator_t *this, dict_t *options)
+static void
+br_reconfigure_child (xlator_t *this,
+ br_child_t *child, struct br_scrubber *fsscrub)
{
- int i = 0;
- int32_t ret = -1;
- br_child_t *child = NULL;
- br_private_t *priv = NULL;
- struct br_scanfs *fsscan = NULL;
+ int32_t ret = 0;
+ struct br_scanfs *fsscan = &child->fsscan;
+
+ ret = br_fsscan_reschedule (this, child, fsscan, fsscrub, _gf_true);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not reschedule scrubber for brick: %s. "
+ "Scubbing will continue according to old frequency.",
+ child->brick_path);
+ }
+}
+
+static int
+br_reconfigure_scrubber (xlator_t *this, dict_t *options)
+{
+ int i = 0;
+ int32_t ret = -1;
+ br_child_t *child = NULL;
+ br_private_t *priv = NULL;
struct br_scrubber *fsscrub = NULL;
priv = this->private;
+ fsscrub = &priv->fsscrub;
- if (!priv->iamscrubber) {
- ret = br_signer_handle_options (this, priv, options);
- if (ret)
- goto err;
- return 0;
+ pthread_mutex_lock (&priv->lock);
+ {
+ ret = br_scrubber_handle_options (this, priv, options);
}
+ pthread_mutex_unlock (&priv->lock);
- ret = br_scrubber_handle_options (this, priv, options);
if (ret)
goto err;
- fsscrub = &priv->fsscrub;
-
/* reschedule all _up_ subvolume(s) */
- pthread_mutex_lock (&priv->lock);
- {
- for (; i < priv->child_count; i++) {
- child = &priv->children[i];
- if (!child->child_up) {
- gf_msg (this->name, GF_LOG_INFO, 0,
- BRB_MSG_BRICK_INFO, "Brick %s is "
- "offline, skipping rescheduling (scrub"
- " would auto- schedule when brick is "
- "back online).", child->brick_path);
- continue;
- }
+ for (; i < priv->child_count; i++) {
+ child = &priv->children[i];
- fsscan = &child->fsscan;
- ret = br_fsscan_reschedule (this, child,
- fsscan, fsscrub, _gf_true);
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- BRB_MSG_RESCHEDULE_SCRUBBER_FAILED,
- "Could not reschedule scrubber for "
- "brick: %s. Scubbing will continue "
- "according to old frequency.",
- child->brick_path);
+ LOCK (&child->lock);
+ {
+ if (_br_child_failed_conn (child)) {
+ gf_log (this->name, GF_LOG_INFO,
+ "Scrubber for brick [%s] failed "
+ "initialization, rescheduling is "
+ "skipped", child->brick_path);
+ goto unblock;
}
+
+ if (_br_is_child_connected (child))
+ br_reconfigure_child (this, child, fsscrub);
+
+ /**
+ * for the rest.. either the child is in initialization
+ * phase or is disconnected. either way, updated values
+ * would be reflected on successful connection.
+ */
}
+ unblock:
+ UNLOCK (&child->lock);
}
- pthread_mutex_unlock (&priv->lock);
-
- return 0;
err:
- return -1;
+ return ret;
+}
+
+static int
+br_reconfigure_signer (xlator_t *this, dict_t *options)
+{
+ br_private_t *priv = this->private;
+
+ return br_signer_handle_options (this, priv, options);
+}
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ int ret = 0;
+ br_private_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->iamscrubber)
+ ret = br_reconfigure_scrubber (this, options);
+ else
+ ret = br_reconfigure_signer (this, options);
+
+ return ret;
}
struct xlator_fops fops;
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.h b/xlators/features/bit-rot/src/bitd/bit-rot.h
index 7be4398..d4742f4 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot.h
+++ b/xlators/features/bit-rot/src/bitd/bit-rot.h
@@ -71,7 +71,18 @@ struct br_scanfs {
struct gf_tw_timer_list *timer;
};
+/* just need three states to track child status */
+typedef enum br_child_state {
+ BR_CHILD_STATE_CONNECTED = 1,
+ BR_CHILD_STATE_INITIALIZING,
+ BR_CHILD_STATE_CONNFAILED,
+ BR_CHILD_STATE_DISCONNECTED,
+} br_child_state_t;
+
struct br_child {
+ gf_lock_t lock;
+ br_child_state_t c_state;
+
char child_up; /* Indicates whether this child is
up or not */
xlator_t *xl; /* client xlator corresponding to
@@ -135,8 +146,8 @@ typedef struct br_obj_n_workers br_obj_n_workers_t;
struct br_private {
pthread_mutex_t lock;
- struct list_head bricks; /* list of bricks from which CHILD_UP
- has been received */
+ struct list_head bricks; /* list of bricks from which enents
+ have been received */
struct list_head signing;
@@ -202,5 +213,22 @@ br_prepare_loc (xlator_t *, br_child_t *, loc_t *, gf_dirent_t *, loc_t *);
gf_boolean_t
bitd_is_bad_file (xlator_t *, br_child_t *, loc_t *, fd_t *);
+static inline void
+_br_set_child_state (br_child_t *child, br_child_state_t state)
+{
+ child->c_state = state;
+}
+
+static inline int
+_br_is_child_connected (br_child_t *child)
+{
+ return (child->c_state == BR_CHILD_STATE_CONNECTED);
+}
+
+static inline int
+_br_child_failed_conn (br_child_t *child)
+{
+ return (child->c_state == BR_CHILD_STATE_CONNFAILED);
+}
#endif /* __BIT_ROT_H__ */
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h
index fbb69ce..f70fafb 100644
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h
@@ -31,6 +31,7 @@ enum br_mem_types {
gf_br_stub_mt_br_stub_fd_t,
gf_br_stub_mt_br_scanner_freq_t,
gf_br_stub_mt_sigstub_t,
+ gf_br_mt_br_child_event_t,
gf_br_stub_mt_end,
};