summaryrefslogtreecommitdiffstats
path: root/xlators/protocol/server/src
diff options
context:
space:
mode:
authorJeff Darcy <jdarcy@redhat.com>2016-12-08 16:24:15 -0500
committerVijay Bellur <vbellur@redhat.com>2017-01-30 19:13:58 -0500
commit1a95fc3036db51b82b6a80952f0908bc2019d24a (patch)
treeb983ac196a8165d5cb5e860a5ef97d3e9a41b5c9 /xlators/protocol/server/src
parent7f7d7a939e46b330a084d974451eee4757ba61b4 (diff)
core: run many bricks within one glusterfsd process
This patch adds support for multiple brick translator stacks running in a single brick server process. This reduces our per-brick memory usage by approximately 3x, and our appetite for TCP ports even more. It also creates potential to avoid process/thread thrashing, and to improve QoS by scheduling more carefully across the bricks, but realizing that potential will require further work. Multiplexing is controlled by the "cluster.brick-multiplex" global option. By default it's off, and bricks are started in separate processes as before. If multiplexing is enabled, then *compatible* bricks (mostly those with the same transport options) will be started in the same process. Change-Id: I45059454e51d6f4cbb29a4953359c09a408695cb BUG: 1385758 Signed-off-by: Jeff Darcy <jdarcy@redhat.com> Reviewed-on: https://review.gluster.org/14763 Smoke: Gluster Build System <jenkins@build.gluster.org> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'xlators/protocol/server/src')
-rw-r--r--xlators/protocol/server/src/server-handshake.c152
-rw-r--r--xlators/protocol/server/src/server-rpc-fops.c5
-rw-r--r--xlators/protocol/server/src/server.c171
3 files changed, 197 insertions, 131 deletions
diff --git a/xlators/protocol/server/src/server-handshake.c b/xlators/protocol/server/src/server-handshake.c
index a33efb8..249dde7 100644
--- a/xlators/protocol/server/src/server-handshake.c
+++ b/xlators/protocol/server/src/server-handshake.c
@@ -36,27 +36,6 @@ gf_compare_client_version (rpcsvc_request_t *req, int fop_prognum,
return ret;
}
-void __check_and_set (xlator_t *each, void *data)
-{
- if (!strcmp (each->name,
- ((struct __get_xl_struct *) data)->name))
- ((struct __get_xl_struct *) data)->reply = each;
-}
-
-static xlator_t *
-get_xlator_by_name (xlator_t *some_xl, const char *name)
-{
- struct __get_xl_struct get = {
- .name = name,
- .reply = NULL
- };
-
- xlator_foreach (some_xl, __check_and_set, &get);
-
- return get.reply;
-}
-
-
int
_volfile_update_checksum (xlator_t *this, char *key, uint32_t checksum)
{
@@ -426,13 +405,14 @@ server_setvolume (rpcsvc_request_t *req)
int32_t ret = -1;
int32_t op_ret = -1;
int32_t op_errno = EINVAL;
- int32_t fop_version = 0;
- int32_t mgmt_version = 0;
uint32_t lk_version = 0;
char *buf = NULL;
gf_boolean_t cancelled = _gf_false;
uint32_t opversion = 0;
rpc_transport_t *xprt = NULL;
+ int32_t fop_version = 0;
+ int32_t mgmt_version = 0;
+
params = dict_new ();
reply = dict_new ();
@@ -446,32 +426,6 @@ server_setvolume (rpcsvc_request_t *req)
this = req->svc->xl;
- config_params = dict_copy_with_ref (this->options, NULL);
- conf = this->private;
-
- if (conf->parent_up == _gf_false) {
- /* PARENT_UP indicates that all xlators in graph are inited
- * successfully
- */
- op_ret = -1;
- op_errno = EAGAIN;
-
- ret = dict_set_str (reply, "ERROR",
- "xlator graph in server is not initialised "
- "yet. Try again later");
- if (ret < 0)
- gf_msg_debug (this->name, 0, "failed to set error: "
- "xlator graph in server is not "
- "initialised yet. Try again later");
- goto fail;
- }
-
- ret = dict_set_int32 (reply, "child_up", conf->child_up);
- if (ret < 0)
- gf_msg (this->name, GF_LOG_ERROR, 0,
- PS_MSG_DICT_GET_FAILED, "Failed to set 'child_up' "
- "in the reply dict");
-
buf = memdup (args.dict.dict_val, args.dict.dict_len);
if (buf == NULL) {
op_ret = -1;
@@ -497,6 +451,65 @@ server_setvolume (rpcsvc_request_t *req)
params->extra_free = buf;
buf = NULL;
+ ret = dict_get_str (params, "remote-subvolume", &name);
+ if (ret < 0) {
+ ret = dict_set_str (reply, "ERROR",
+ "No remote-subvolume option specified");
+ if (ret < 0)
+ gf_msg_debug (this->name, 0, "failed to set error "
+ "msg");
+
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto fail;
+ }
+
+ xl = get_xlator_by_name (this, name);
+ if (xl == NULL) {
+ ret = gf_asprintf (&msg, "remote-subvolume \"%s\" is not found",
+ name);
+ if (-1 == ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ PS_MSG_ASPRINTF_FAILED,
+ "asprintf failed while setting error msg");
+ goto fail;
+ }
+ ret = dict_set_dynstr (reply, "ERROR", msg);
+ if (ret < 0)
+ gf_msg_debug (this->name, 0, "failed to set error "
+ "msg");
+
+ op_ret = -1;
+ op_errno = ENOENT;
+ goto fail;
+ }
+
+ config_params = dict_copy_with_ref (xl->options, NULL);
+ conf = this->private;
+
+ if (conf->parent_up == _gf_false) {
+ /* PARENT_UP indicates that all xlators in graph are inited
+ * successfully
+ */
+ op_ret = -1;
+ op_errno = EAGAIN;
+
+ ret = dict_set_str (reply, "ERROR",
+ "xlator graph in server is not initialised "
+ "yet. Try again later");
+ if (ret < 0)
+ gf_msg_debug (this->name, 0, "failed to set error: "
+ "xlator graph in server is not "
+ "initialised yet. Try again later");
+ goto fail;
+ }
+
+ ret = dict_set_int32 (reply, "child_up", conf->child_up);
+ if (ret < 0)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ PS_MSG_DICT_GET_FAILED, "Failed to set 'child_up' "
+ "in the reply dict");
+
ret = dict_get_str (params, "process-uuid", &client_uid);
if (ret < 0) {
ret = dict_set_str (reply, "ERROR",
@@ -603,39 +616,6 @@ server_setvolume (rpcsvc_request_t *req)
goto fail;
}
- ret = dict_get_str (params, "remote-subvolume", &name);
- if (ret < 0) {
- ret = dict_set_str (reply, "ERROR",
- "No remote-subvolume option specified");
- if (ret < 0)
- gf_msg_debug (this->name, 0, "failed to set error "
- "msg");
-
- op_ret = -1;
- op_errno = EINVAL;
- goto fail;
- }
-
- xl = get_xlator_by_name (this, name);
- if (xl == NULL) {
- ret = gf_asprintf (&msg, "remote-subvolume \"%s\" is not found",
- name);
- if (-1 == ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- PS_MSG_ASPRINTF_FAILED,
- "asprintf failed while setting error msg");
- goto fail;
- }
- ret = dict_set_dynstr (reply, "ERROR", msg);
- if (ret < 0)
- gf_msg_debug (this->name, 0, "failed to set error "
- "msg");
-
- op_ret = -1;
- op_errno = ENOENT;
- goto fail;
- }
-
if (conf->verify_volfile) {
ret = dict_get_uint32 (params, "volfile-checksum", &checksum);
if (ret == 0) {
@@ -850,7 +830,13 @@ fail:
dict_unref (params);
dict_unref (reply);
- dict_unref (config_params);
+ if (config_params) {
+ /*
+ * This might be null if we couldn't even find the translator
+ * (brick) to copy it from.
+ */
+ dict_unref (config_params);
+ }
GF_FREE (buf);
diff --git a/xlators/protocol/server/src/server-rpc-fops.c b/xlators/protocol/server/src/server-rpc-fops.c
index 0a5497f..5bb40a7 100644
--- a/xlators/protocol/server/src/server-rpc-fops.c
+++ b/xlators/protocol/server/src/server-rpc-fops.c
@@ -3385,10 +3385,8 @@ server_compound_resume (call_frame_t *frame, xlator_t *bound_xl)
int length = 0;
int op_errno = ENOMEM;
compound_req *c_req = NULL;
- xlator_t *this = NULL;
state = CALL_STATE (frame);
- this = frame->this;
if (state->resolve.op_ret != 0) {
ret = state->resolve.op_ret;
@@ -3422,8 +3420,7 @@ server_compound_resume (call_frame_t *frame, xlator_t *bound_xl)
}
STACK_WIND (frame, server_compound_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->compound,
+ bound_xl, bound_xl->fops->compound,
args, state->xdata);
return 0;
diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c
index db2f06a..5be900a 100644
--- a/xlators/protocol/server/src/server.c
+++ b/xlators/protocol/server/src/server.c
@@ -524,30 +524,30 @@ server_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event,
*/
pthread_mutex_lock (&conf->mutex);
- {
- list_add_tail (&trans->list, &conf->xprt_list);
- }
+ rpc_transport_ref (trans);
+ list_add_tail (&trans->list, &conf->xprt_list);
pthread_mutex_unlock (&conf->mutex);
break;
}
case RPCSVC_EVENT_DISCONNECT:
+
/* A DISCONNECT event could come without an ACCEPT event
* happening for this transport. This happens when the server is
* expecting encrypted connections by the client tries to
* connect unecnrypted
*/
- if (list_empty (&trans->list))
+ if (list_empty (&trans->list)) {
break;
+ }
/* transport has to be removed from the list upon disconnect
* irrespective of whether lock self heal is off or on, since
* new transport will be created upon reconnect.
*/
pthread_mutex_lock (&conf->mutex);
- {
- list_del_init (&trans->list);
- }
+ list_del_init (&trans->list);
+ rpc_transport_unref (trans);
pthread_mutex_unlock (&conf->mutex);
client = trans->xl_private;
@@ -667,6 +667,8 @@ _delete_auth_opt (dict_t *this, char *key, data_t *value, void *data)
{
char *auth_option_pattern[] = { "auth.addr.*.allow",
"auth.addr.*.reject",
+ "auth.login.*.allow",
+ "auth.login.*.password",
"auth.login.*.ssl-allow",
NULL};
int i = 0;
@@ -687,6 +689,8 @@ _copy_auth_opt (dict_t *unused, char *key, data_t *value, void *xl_dict)
{
char *auth_option_pattern[] = { "auth.addr.*.allow",
"auth.addr.*.reject",
+ "auth.login.*.allow",
+ "auth.login.*.password",
"auth.login.*.ssl-allow",
NULL};
int i = 0;
@@ -729,15 +733,19 @@ out:
}
int
-server_check_event_threads (xlator_t *this, server_conf_t *conf, int32_t old,
- int32_t new)
+server_check_event_threads (xlator_t *this, server_conf_t *conf, int32_t new)
{
- if (old == new)
- return 0;
+ struct event_pool *pool = this->ctx->event_pool;
+ int target;
+ target = new + pool->auto_thread_count;
conf->event_threads = new;
- return event_reconfigure_threads (this->ctx->event_pool,
- conf->event_threads);
+
+ if (target == pool->eventthreadcount) {
+ return 0;
+ }
+
+ return event_reconfigure_threads (pool, target);
}
int
@@ -748,6 +756,7 @@ reconfigure (xlator_t *this, dict_t *options)
rpcsvc_t *rpc_conf;
rpcsvc_listener_t *listeners;
rpc_transport_t *xprt = NULL;
+ rpc_transport_t *xp_next = NULL;
int inode_lru_limit;
gf_boolean_t trace;
data_t *data;
@@ -755,6 +764,19 @@ reconfigure (xlator_t *this, dict_t *options)
char *statedump_path = NULL;
int32_t new_nthread = 0;
char *auth_path = NULL;
+ char *xprt_path = NULL;
+ xlator_t *oldTHIS;
+ xlator_t *kid;
+
+ /*
+ * Since we're not a fop, we can't really count on THIS being set
+ * correctly, and it needs to be or else GF_OPTION_RECONF won't work
+ * (because it won't find our options list). This is another thing
+ * that "just happened" to work before multiplexing, but now we need to
+ * handle it more explicitly.
+ */
+ oldTHIS = THIS;
+ THIS = this;
conf = this->private;
@@ -764,6 +786,19 @@ reconfigure (xlator_t *this, dict_t *options)
goto out;
}
+ /*
+ * For some of the auth/rpc stuff, we need to operate on the correct
+ * child, but for other stuff we need to operate on the server
+ * translator itself.
+ */
+ kid = NULL;
+ if (dict_get_str (options, "auth-path", &auth_path) == 0) {
+ kid = get_xlator_by_name (this, auth_path);
+ }
+ if (!kid) {
+ kid = this;
+ }
+
if (dict_get_int32 ( options, "inode-lru-limit", &inode_lru_limit) == 0){
conf->inode_lru_limit = inode_lru_limit;
gf_msg_trace (this->name, 0, "Reconfigured inode-lru-limit to "
@@ -795,48 +830,50 @@ reconfigure (xlator_t *this, dict_t *options)
}
GF_OPTION_RECONF ("statedump-path", statedump_path,
- options, path, out);
+ options, path, do_auth);
if (!statedump_path) {
gf_msg (this->name, GF_LOG_ERROR, 0,
PS_MSG_STATEDUMP_PATH_ERROR,
"Error while reconfiguring statedump path");
ret = -1;
- goto out;
+ goto do_auth;
}
gf_path_strip_trailing_slashes (statedump_path);
GF_FREE (this->ctx->statedump_path);
this->ctx->statedump_path = gf_strdup (statedump_path);
+do_auth:
if (!conf->auth_modules)
conf->auth_modules = dict_new ();
dict_foreach (options, get_auth_types, conf->auth_modules);
- ret = validate_auth_options (this, options);
+ ret = validate_auth_options (kid, options);
if (ret == -1) {
/* logging already done in validate_auth_options function. */
goto out;
}
- dict_foreach (this->options, _delete_auth_opt, this->options);
- dict_foreach (options, _copy_auth_opt, this->options);
+ dict_foreach (kid->options, _delete_auth_opt, NULL);
+ dict_foreach (options, _copy_auth_opt, kid->options);
- ret = gf_auth_init (this, conf->auth_modules);
+ ret = gf_auth_init (kid, conf->auth_modules);
if (ret) {
dict_unref (conf->auth_modules);
goto out;
}
GF_OPTION_RECONF ("manage-gids", conf->server_manage_gids, options,
- bool, out);
+ bool, do_rpc);
GF_OPTION_RECONF ("gid-timeout", conf->gid_cache_timeout, options,
- int32, out);
+ int32, do_rpc);
if (gid_cache_reconf (&conf->gid_cache, conf->gid_cache_timeout) < 0) {
gf_msg (this->name, GF_LOG_ERROR, 0, PS_MSG_GRP_CACHE_ERROR,
"Failed to reconfigure group cache.");
- goto out;
+ goto do_rpc;
}
+do_rpc:
rpc_conf = conf->rpc;
if (!rpc_conf) {
gf_msg (this->name, GF_LOG_ERROR, 0, PS_MSG_RPC_CONF_ERROR,
@@ -857,7 +894,14 @@ reconfigure (xlator_t *this, dict_t *options)
if (conf->dync_auth) {
pthread_mutex_lock (&conf->mutex);
{
- list_for_each_entry (xprt, &conf->xprt_list, list) {
+ /*
+ * Disconnecting will (usually) drop the last ref,
+ * which will cause the transport to be unlinked and
+ * freed while we're still traversing, which will cause
+ * us to crash unless we use list_for_each_entry_safe.
+ */
+ list_for_each_entry_safe (xprt, xp_next,
+ &conf->xprt_list, list) {
/* check for client authorization */
if (!xprt->clnt_options) {
/* If clnt_options dictionary is null,
@@ -871,25 +915,28 @@ reconfigure (xlator_t *this, dict_t *options)
*/
continue;
}
+ /*
+ * Make sure we're only operating on
+ * connections that are relevant to the brick
+ * we're reconfiguring.
+ */
+ if (dict_get_str (xprt->clnt_options,
+ "remote-subvolume",
+ &xprt_path) != 0) {
+ continue;
+ }
+ if (strcmp (xprt_path, auth_path) != 0) {
+ continue;
+ }
ret = gf_authenticate (xprt->clnt_options,
- options, conf->auth_modules);
+ options,
+ conf->auth_modules);
if (ret == AUTH_ACCEPT) {
- gf_msg (this->name, GF_LOG_TRACE, 0,
+ gf_msg (kid->name, GF_LOG_TRACE, 0,
PS_MSG_CLIENT_ACCEPTED,
"authorized client, hence we "
"continue with this connection");
} else {
- ret = dict_get_str (this->options,
- "auth-path",
- &auth_path);
- if (ret) {
- gf_msg (this->name,
- GF_LOG_WARNING, 0,
- PS_MSG_DICT_GET_FAILED,
- "failed to get "
- "auth-path");
- auth_path = NULL;
- }
gf_event (EVENT_CLIENT_AUTH_REJECT,
"client_uid=%s;"
"client_identifier=%s;"
@@ -932,15 +979,21 @@ reconfigure (xlator_t *this, dict_t *options)
}
}
+ /*
+ * Let the event subsystem know that we're auto-scaling, with an
+ * initial count of one.
+ */
+ ((struct event_pool *)(this->ctx->event_pool))->auto_thread_count = 1;
+
GF_OPTION_RECONF ("event-threads", new_nthread, options, int32, out);
- ret = server_check_event_threads (this, conf, conf->event_threads,
- new_nthread);
+ ret = server_check_event_threads (this, conf, new_nthread);
if (ret)
goto out;
ret = server_init_grace_timer (this, options, conf);
out:
+ THIS = oldTHIS;
gf_msg_debug ("", 0, "returning %d", ret);
return ret;
}
@@ -1001,8 +1054,7 @@ init (xlator_t *this)
/* Set event threads to the configured default */
GF_OPTION_INIT("event-threads", conf->event_threads, int32, out);
- ret = server_check_event_threads (this, conf, STARTING_EVENT_THREADS,
- conf->event_threads);
+ ret = server_check_event_threads (this, conf, conf->event_threads);
if (ret)
goto out;
@@ -1183,9 +1235,13 @@ init (xlator_t *this)
}
}
#endif
- this->private = conf;
+ FIRST_CHILD(this)->volfile_id
+ = gf_strdup (this->ctx->cmd_args.volfile_id);
+
+ this->private = conf;
ret = 0;
+
out:
if (ret) {
if (this != NULL) {
@@ -1350,6 +1406,8 @@ notify (xlator_t *this, int32_t event, void *data, ...)
{
int ret = -1;
server_conf_t *conf = NULL;
+ rpc_transport_t *xprt = NULL;
+ rpc_transport_t *xp_next = NULL;
GF_VALIDATE_OR_GOTO (THIS->name, this, out);
conf = this->private;
@@ -1413,6 +1471,31 @@ notify (xlator_t *this, int32_t event, void *data, ...)
}
+ case GF_EVENT_TRANSPORT_CLEANUP:
+ conf = this->private;
+ pthread_mutex_lock (&conf->mutex);
+ /*
+ * Disconnecting will (usually) drop the last ref, which will
+ * cause the transport to be unlinked and freed while we're
+ * still traversing, which will cause us to crash unless we use
+ * list_for_each_entry_safe.
+ */
+ list_for_each_entry_safe (xprt, xp_next,
+ &conf->xprt_list, list) {
+ if (!xprt->xl_private) {
+ continue;
+ }
+ if (xprt->xl_private->bound_xl == data) {
+ gf_log (this->name, GF_LOG_INFO,
+ "disconnecting %s",
+ xprt->peerinfo.identifier);
+ rpc_transport_disconnect (xprt, _gf_false);
+ }
+ }
+ pthread_mutex_unlock (&conf->mutex);
+ /* NB: do *not* propagate anywhere else */
+ break;
+
default:
default_notify (this, event, data);
break;
@@ -1568,12 +1651,12 @@ struct volume_options options[] = {
{ .key = {"event-threads"},
.type = GF_OPTION_TYPE_INT,
.min = 1,
- .max = 32,
- .default_value = "2",
+ .max = 1024,
+ .default_value = "1",
.description = "Specifies the number of event threads to execute "
"in parallel. Larger values would help process"
" responses faster, depending on available processing"
- " power. Range 1-32 threads."
+ " power."
},
{ .key = {"dynamic-auth"},
.type = GF_OPTION_TYPE_BOOL,