summaryrefslogtreecommitdiffstats
path: root/xlators/protocol/server/src/server.c
diff options
context:
space:
mode:
authorJeff Darcy <jdarcy@redhat.com>2016-12-08 16:24:15 -0500
committerVijay Bellur <vbellur@redhat.com>2017-01-30 19:13:58 -0500
commit1a95fc3036db51b82b6a80952f0908bc2019d24a (patch)
treeb983ac196a8165d5cb5e860a5ef97d3e9a41b5c9 /xlators/protocol/server/src/server.c
parent7f7d7a939e46b330a084d974451eee4757ba61b4 (diff)
core: run many bricks within one glusterfsd process
This patch adds support for multiple brick translator stacks running in a single brick server process. This reduces our per-brick memory usage by approximately 3x, and our appetite for TCP ports even more. It also creates potential to avoid process/thread thrashing, and to improve QoS by scheduling more carefully across the bricks, but realizing that potential will require further work. Multiplexing is controlled by the "cluster.brick-multiplex" global option. By default it's off, and bricks are started in separate processes as before. If multiplexing is enabled, then *compatible* bricks (mostly those with the same transport options) will be started in the same process. Change-Id: I45059454e51d6f4cbb29a4953359c09a408695cb BUG: 1385758 Signed-off-by: Jeff Darcy <jdarcy@redhat.com> Reviewed-on: https://review.gluster.org/14763 Smoke: Gluster Build System <jenkins@build.gluster.org> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'xlators/protocol/server/src/server.c')
-rw-r--r--xlators/protocol/server/src/server.c171
1 files changed, 127 insertions, 44 deletions
diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c
index db2f06ad582..5be900a6db0 100644
--- a/xlators/protocol/server/src/server.c
+++ b/xlators/protocol/server/src/server.c
@@ -524,30 +524,30 @@ server_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event,
*/
pthread_mutex_lock (&conf->mutex);
- {
- list_add_tail (&trans->list, &conf->xprt_list);
- }
+ rpc_transport_ref (trans);
+ list_add_tail (&trans->list, &conf->xprt_list);
pthread_mutex_unlock (&conf->mutex);
break;
}
case RPCSVC_EVENT_DISCONNECT:
+
/* A DISCONNECT event could come without an ACCEPT event
* happening for this transport. This happens when the server is
* expecting encrypted connections by the client tries to
* connect unecnrypted
*/
- if (list_empty (&trans->list))
+ if (list_empty (&trans->list)) {
break;
+ }
/* transport has to be removed from the list upon disconnect
* irrespective of whether lock self heal is off or on, since
* new transport will be created upon reconnect.
*/
pthread_mutex_lock (&conf->mutex);
- {
- list_del_init (&trans->list);
- }
+ list_del_init (&trans->list);
+ rpc_transport_unref (trans);
pthread_mutex_unlock (&conf->mutex);
client = trans->xl_private;
@@ -667,6 +667,8 @@ _delete_auth_opt (dict_t *this, char *key, data_t *value, void *data)
{
char *auth_option_pattern[] = { "auth.addr.*.allow",
"auth.addr.*.reject",
+ "auth.login.*.allow",
+ "auth.login.*.password",
"auth.login.*.ssl-allow",
NULL};
int i = 0;
@@ -687,6 +689,8 @@ _copy_auth_opt (dict_t *unused, char *key, data_t *value, void *xl_dict)
{
char *auth_option_pattern[] = { "auth.addr.*.allow",
"auth.addr.*.reject",
+ "auth.login.*.allow",
+ "auth.login.*.password",
"auth.login.*.ssl-allow",
NULL};
int i = 0;
@@ -729,15 +733,19 @@ out:
}
int
-server_check_event_threads (xlator_t *this, server_conf_t *conf, int32_t old,
- int32_t new)
+server_check_event_threads (xlator_t *this, server_conf_t *conf, int32_t new)
{
- if (old == new)
- return 0;
+ struct event_pool *pool = this->ctx->event_pool;
+ int target;
+ target = new + pool->auto_thread_count;
conf->event_threads = new;
- return event_reconfigure_threads (this->ctx->event_pool,
- conf->event_threads);
+
+ if (target == pool->eventthreadcount) {
+ return 0;
+ }
+
+ return event_reconfigure_threads (pool, target);
}
int
@@ -748,6 +756,7 @@ reconfigure (xlator_t *this, dict_t *options)
rpcsvc_t *rpc_conf;
rpcsvc_listener_t *listeners;
rpc_transport_t *xprt = NULL;
+ rpc_transport_t *xp_next = NULL;
int inode_lru_limit;
gf_boolean_t trace;
data_t *data;
@@ -755,6 +764,19 @@ reconfigure (xlator_t *this, dict_t *options)
char *statedump_path = NULL;
int32_t new_nthread = 0;
char *auth_path = NULL;
+ char *xprt_path = NULL;
+ xlator_t *oldTHIS;
+ xlator_t *kid;
+
+ /*
+ * Since we're not a fop, we can't really count on THIS being set
+ * correctly, and it needs to be or else GF_OPTION_RECONF won't work
+ * (because it won't find our options list). This is another thing
+ * that "just happened" to work before multiplexing, but now we need to
+ * handle it more explicitly.
+ */
+ oldTHIS = THIS;
+ THIS = this;
conf = this->private;
@@ -764,6 +786,19 @@ reconfigure (xlator_t *this, dict_t *options)
goto out;
}
+ /*
+ * For some of the auth/rpc stuff, we need to operate on the correct
+ * child, but for other stuff we need to operate on the server
+ * translator itself.
+ */
+ kid = NULL;
+ if (dict_get_str (options, "auth-path", &auth_path) == 0) {
+ kid = get_xlator_by_name (this, auth_path);
+ }
+ if (!kid) {
+ kid = this;
+ }
+
if (dict_get_int32 ( options, "inode-lru-limit", &inode_lru_limit) == 0){
conf->inode_lru_limit = inode_lru_limit;
gf_msg_trace (this->name, 0, "Reconfigured inode-lru-limit to "
@@ -795,48 +830,50 @@ reconfigure (xlator_t *this, dict_t *options)
}
GF_OPTION_RECONF ("statedump-path", statedump_path,
- options, path, out);
+ options, path, do_auth);
if (!statedump_path) {
gf_msg (this->name, GF_LOG_ERROR, 0,
PS_MSG_STATEDUMP_PATH_ERROR,
"Error while reconfiguring statedump path");
ret = -1;
- goto out;
+ goto do_auth;
}
gf_path_strip_trailing_slashes (statedump_path);
GF_FREE (this->ctx->statedump_path);
this->ctx->statedump_path = gf_strdup (statedump_path);
+do_auth:
if (!conf->auth_modules)
conf->auth_modules = dict_new ();
dict_foreach (options, get_auth_types, conf->auth_modules);
- ret = validate_auth_options (this, options);
+ ret = validate_auth_options (kid, options);
if (ret == -1) {
/* logging already done in validate_auth_options function. */
goto out;
}
- dict_foreach (this->options, _delete_auth_opt, this->options);
- dict_foreach (options, _copy_auth_opt, this->options);
+ dict_foreach (kid->options, _delete_auth_opt, NULL);
+ dict_foreach (options, _copy_auth_opt, kid->options);
- ret = gf_auth_init (this, conf->auth_modules);
+ ret = gf_auth_init (kid, conf->auth_modules);
if (ret) {
dict_unref (conf->auth_modules);
goto out;
}
GF_OPTION_RECONF ("manage-gids", conf->server_manage_gids, options,
- bool, out);
+ bool, do_rpc);
GF_OPTION_RECONF ("gid-timeout", conf->gid_cache_timeout, options,
- int32, out);
+ int32, do_rpc);
if (gid_cache_reconf (&conf->gid_cache, conf->gid_cache_timeout) < 0) {
gf_msg (this->name, GF_LOG_ERROR, 0, PS_MSG_GRP_CACHE_ERROR,
"Failed to reconfigure group cache.");
- goto out;
+ goto do_rpc;
}
+do_rpc:
rpc_conf = conf->rpc;
if (!rpc_conf) {
gf_msg (this->name, GF_LOG_ERROR, 0, PS_MSG_RPC_CONF_ERROR,
@@ -857,7 +894,14 @@ reconfigure (xlator_t *this, dict_t *options)
if (conf->dync_auth) {
pthread_mutex_lock (&conf->mutex);
{
- list_for_each_entry (xprt, &conf->xprt_list, list) {
+ /*
+ * Disconnecting will (usually) drop the last ref,
+ * which will cause the transport to be unlinked and
+ * freed while we're still traversing, which will cause
+ * us to crash unless we use list_for_each_entry_safe.
+ */
+ list_for_each_entry_safe (xprt, xp_next,
+ &conf->xprt_list, list) {
/* check for client authorization */
if (!xprt->clnt_options) {
/* If clnt_options dictionary is null,
@@ -871,25 +915,28 @@ reconfigure (xlator_t *this, dict_t *options)
*/
continue;
}
+ /*
+ * Make sure we're only operating on
+ * connections that are relevant to the brick
+ * we're reconfiguring.
+ */
+ if (dict_get_str (xprt->clnt_options,
+ "remote-subvolume",
+ &xprt_path) != 0) {
+ continue;
+ }
+ if (strcmp (xprt_path, auth_path) != 0) {
+ continue;
+ }
ret = gf_authenticate (xprt->clnt_options,
- options, conf->auth_modules);
+ options,
+ conf->auth_modules);
if (ret == AUTH_ACCEPT) {
- gf_msg (this->name, GF_LOG_TRACE, 0,
+ gf_msg (kid->name, GF_LOG_TRACE, 0,
PS_MSG_CLIENT_ACCEPTED,
"authorized client, hence we "
"continue with this connection");
} else {
- ret = dict_get_str (this->options,
- "auth-path",
- &auth_path);
- if (ret) {
- gf_msg (this->name,
- GF_LOG_WARNING, 0,
- PS_MSG_DICT_GET_FAILED,
- "failed to get "
- "auth-path");
- auth_path = NULL;
- }
gf_event (EVENT_CLIENT_AUTH_REJECT,
"client_uid=%s;"
"client_identifier=%s;"
@@ -932,15 +979,21 @@ reconfigure (xlator_t *this, dict_t *options)
}
}
+ /*
+ * Let the event subsystem know that we're auto-scaling, with an
+ * initial count of one.
+ */
+ ((struct event_pool *)(this->ctx->event_pool))->auto_thread_count = 1;
+
GF_OPTION_RECONF ("event-threads", new_nthread, options, int32, out);
- ret = server_check_event_threads (this, conf, conf->event_threads,
- new_nthread);
+ ret = server_check_event_threads (this, conf, new_nthread);
if (ret)
goto out;
ret = server_init_grace_timer (this, options, conf);
out:
+ THIS = oldTHIS;
gf_msg_debug ("", 0, "returning %d", ret);
return ret;
}
@@ -1001,8 +1054,7 @@ init (xlator_t *this)
/* Set event threads to the configured default */
GF_OPTION_INIT("event-threads", conf->event_threads, int32, out);
- ret = server_check_event_threads (this, conf, STARTING_EVENT_THREADS,
- conf->event_threads);
+ ret = server_check_event_threads (this, conf, conf->event_threads);
if (ret)
goto out;
@@ -1183,9 +1235,13 @@ init (xlator_t *this)
}
}
#endif
- this->private = conf;
+ FIRST_CHILD(this)->volfile_id
+ = gf_strdup (this->ctx->cmd_args.volfile_id);
+
+ this->private = conf;
ret = 0;
+
out:
if (ret) {
if (this != NULL) {
@@ -1350,6 +1406,8 @@ notify (xlator_t *this, int32_t event, void *data, ...)
{
int ret = -1;
server_conf_t *conf = NULL;
+ rpc_transport_t *xprt = NULL;
+ rpc_transport_t *xp_next = NULL;
GF_VALIDATE_OR_GOTO (THIS->name, this, out);
conf = this->private;
@@ -1413,6 +1471,31 @@ notify (xlator_t *this, int32_t event, void *data, ...)
}
+ case GF_EVENT_TRANSPORT_CLEANUP:
+ conf = this->private;
+ pthread_mutex_lock (&conf->mutex);
+ /*
+ * Disconnecting will (usually) drop the last ref, which will
+ * cause the transport to be unlinked and freed while we're
+ * still traversing, which will cause us to crash unless we use
+ * list_for_each_entry_safe.
+ */
+ list_for_each_entry_safe (xprt, xp_next,
+ &conf->xprt_list, list) {
+ if (!xprt->xl_private) {
+ continue;
+ }
+ if (xprt->xl_private->bound_xl == data) {
+ gf_log (this->name, GF_LOG_INFO,
+ "disconnecting %s",
+ xprt->peerinfo.identifier);
+ rpc_transport_disconnect (xprt, _gf_false);
+ }
+ }
+ pthread_mutex_unlock (&conf->mutex);
+ /* NB: do *not* propagate anywhere else */
+ break;
+
default:
default_notify (this, event, data);
break;
@@ -1568,12 +1651,12 @@ struct volume_options options[] = {
{ .key = {"event-threads"},
.type = GF_OPTION_TYPE_INT,
.min = 1,
- .max = 32,
- .default_value = "2",
+ .max = 1024,
+ .default_value = "1",
.description = "Specifies the number of event threads to execute "
"in parallel. Larger values would help process"
" responses faster, depending on available processing"
- " power. Range 1-32 threads."
+ " power."
},
{ .key = {"dynamic-auth"},
.type = GF_OPTION_TYPE_BOOL,