summaryrefslogtreecommitdiffstats
path: root/xlators
diff options
context:
space:
mode:
authorAtin Mukherjee <amukherj@redhat.com>2018-08-07 10:25:49 +0530
committerAtin Mukherjee <amukherj@redhat.com>2018-08-09 02:52:57 +0000
commitbb9b8f61501cc633e585593de4d9f2fe5494d5ce (patch)
treee94af79c15c07c0165d7efadac609fdc0bc0d268 /xlators
parent12ce1848f55686cfae0b03f532279938722c0963 (diff)
glusterd: more stricter checks of if brick is running in multiplex mode
While gf_attach () utility can help in detaching a brick instance from the brick process which the kill_brick () function in tests/volume.rc uses it has a caveat which is as follows: 1. It doesn't ensure the respective brick is marked as stopped which glusterd does from glusterd_brick_stop 2. Sometimes if kill_brick () is executed just after a brick stack is up, the mgmt_rpc_notify () can take some time before marking priv->connected to 1 and before it if kill_brick () is executed, brick will fail to initiate the pmap_signout which would inturn cleans up the pidfile. To avoid such possibilities, a more stricter check on if a brick is running or not in brick multiplexing has been brought in now where it not only checks for its pid's existance but checks if the respective process has the brick instance associated with it before checking for brick's status. Change-Id: I98b92df949076663b9686add7aab4ec2f24ad5ab Fixes: bz#1595320 Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
Diffstat (limited to 'xlators')
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c71
1 files changed, 39 insertions, 32 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index bc119adf6eb..0ac075bcc87 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -6174,6 +6174,7 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
char *brickpath = NULL;
glusterd_volinfo_t *other_vol;
struct statvfs brickstat = {0,};
+ gf_boolean_t is_service_running = _gf_false;
this = THIS;
GF_ASSERT (this);
@@ -6240,8 +6241,39 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
brickinfo->path);
goto out;
}
-
- if (gf_is_service_running (pidfile, &pid)) {
+ is_service_running = gf_is_service_running (pidfile, &pid);
+ if (is_service_running) {
+ if (is_brick_mx_enabled ()) {
+ brickpath = search_brick_path_from_proc
+ (pid, brickinfo->path);
+ if (!brickpath) {
+ gf_log (this->name, GF_LOG_INFO,
+ "Either pid %d is not running or brick"
+ " path %s is not consumed so cleanup pidfile",
+ pid, brickinfo->path);
+ /* brick isn't running,so unlink stale pidfile
+ * if any.
+ */
+ if (sys_access (pidfile , R_OK) == 0) {
+ sys_unlink (pidfile);
+ }
+ goto run;
+ }
+ GF_FREE (brickpath);
+ ret = glusterd_get_sock_from_brick_pid (pid, socketpath,
+ sizeof(socketpath));
+ if (ret) {
+ gf_log (this->name, GF_LOG_INFO,
+ "Either pid %d is not running or does "
+ "not match with any running brick "
+ "processes", pid);
+ /* Fetch unix socket is failed so unlink pidfile */
+ if (sys_access (pidfile , R_OK) == 0) {
+ sys_unlink (pidfile);
+ }
+ goto run;
+ }
+ }
if (brickinfo->status != GF_BRICK_STARTING &&
brickinfo->status != GF_BRICK_STARTED) {
gf_log (this->name, GF_LOG_INFO,
@@ -6259,36 +6291,11 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
* same port (on another brick) and re-use that.
* TBD: re-use RPC connection across bricks
*/
- if (is_brick_mx_enabled ()) {
- brickpath = search_brick_path_from_proc (pid, brickinfo->path);
- if (!brickpath) {
- gf_log (this->name, GF_LOG_INFO,
- "Either pid %d is not running or brick"
- " path %s is not consumed so cleanup pidfile",
- pid, brickinfo->path);
- /* search brick is failed so unlink pidfile */
- if (sys_access (pidfile , R_OK) == 0) {
- sys_unlink (pidfile);
- }
- goto run;
- }
- GF_FREE (brickpath);
- ret = glusterd_get_sock_from_brick_pid (pid, socketpath,
- sizeof(socketpath));
- if (ret) {
- gf_log (this->name, GF_LOG_INFO,
- "Either pid %d is not running or is not match"
- " with any running brick process ", pid);
- /* Fetch unix socket is failed so unlink pidfile */
- if (sys_access (pidfile , R_OK) == 0) {
- sys_unlink (pidfile);
- }
- goto run;
- }
- } else {
- glusterd_set_brick_socket_filepath (volinfo, brickinfo,
- socketpath,
- sizeof (socketpath));
+ if (!is_brick_mx_enabled ()) {
+ glusterd_set_brick_socket_filepath
+ (volinfo, brickinfo,
+ socketpath,
+ sizeof (socketpath));
}
gf_log (this->name, GF_LOG_DEBUG,
"Using %s as sockfile for brick %s of volume %s ",