diff options
| author | Mohit Agrawal <moagrawa@redhat.com> | 2017-06-22 16:57:04 +0530 | 
|---|---|---|
| committer | Atin Mukherjee <amukherj@redhat.com> | 2017-06-27 10:57:55 +0000 | 
| commit | b71059960f8c67d9a058244d2a1c748be4fe1323 (patch) | |
| tree | 78c0ee3b75c543fc0548d8ecf2d29b21939e89b7 | |
| parent | 4700c5be55b0e567755b4c8a1a91f33d29c06e6b (diff) | |
glusterd: brick process fails to restart after gluster pod failure
Problem: In container environment sometime after delete gluster pod
         and created new gluster pod brick process doesn't seem
         to come up.
Solution: On the basis of logs it seems glusterd is trying to attach
          with non glusterfs process.Change the code of function
          glusterd_get_sock_from_brick_pid to fetch socketpath from argument
          of running brick process.
BUG: 1464072
Change-Id: Ida6af00066341b683bbb4440d7a0d8042581656a
Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
Reviewed-on: https://review.gluster.org/17601
Smoke: Gluster Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 41 | 
1 files changed, 31 insertions, 10 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index af14187cafe..f0c12315160 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -5345,10 +5345,11 @@ find_compatible_brick (glusterd_conf_t *conf,  }  /* Below function is use to populate sockpath based on passed pid -   value as a argument after check the value from proc +   value as a argument after check the value from proc and also +   check if passed pid is match with running  glusterfs process  */ -void +int  glusterd_get_sock_from_brick_pid (int pid, char *sockpath, size_t len)  {          char fname[128] = {0,}; @@ -5361,6 +5362,7 @@ glusterd_get_sock_from_brick_pid (int pid, char *sockpath, size_t len)          char   *brptr   = NULL;          char tmpsockpath[PATH_MAX] = {0,};          size_t blen    = 0; +        int    ret     = -1;          this = THIS;          GF_ASSERT (this); @@ -5370,7 +5372,7 @@ glusterd_get_sock_from_brick_pid (int pid, char *sockpath, size_t len)          if (sys_access (fname , R_OK) != 0) {                  gf_log (this->name, GF_LOG_ERROR,                           "brick process %d is not running", pid); -                return; +                return ret;          }          fd = open(fname, O_RDONLY); @@ -5380,7 +5382,7 @@ glusterd_get_sock_from_brick_pid (int pid, char *sockpath, size_t len)                  gf_log (this->name, GF_LOG_ERROR,                           "open failed %s to open a file %s", strerror (errno),                                                                fname); -                return; +                return ret;          }          /* convert cmdline to single string */ @@ -5399,10 +5401,18 @@ glusterd_get_sock_from_brick_pid (int pid, char *sockpath, size_t len)          cmdline[j] = '\0';          if (fd)                  sys_close(fd); +        if (!strstr (cmdline, "glusterfs")) +                return ret; -        ptr =   strstr(cmdline, "-S "); -        ptr =   strchr(ptr, '/'); +        ptr = strstr(cmdline, "-S "); +        if (!ptr) +                return ret; +        ptr = strchr(ptr, '/'); +        if (!ptr) +                return ret;          brptr = strstr(ptr, "--brick-name"); +        if (!brptr) +                return ret;          i = 0;          while (ptr < brptr) { @@ -5413,8 +5423,10 @@ glusterd_get_sock_from_brick_pid (int pid, char *sockpath, size_t len)          if (tmpsockpath[0]) {                  strncpy (sockpath, tmpsockpath , i); +                ret = 0;          } +        return ret;  } @@ -5477,22 +5489,31 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,                           * same port (on another brick) and re-use that.                           * TBD: re-use RPC connection across bricks                           */ -                        if (is_brick_mx_enabled ()) -                                glusterd_get_sock_from_brick_pid (pid, socketpath, -                                                                  sizeof(socketpath)); -                        else +                        if (is_brick_mx_enabled ()) { +                                ret = glusterd_get_sock_from_brick_pid (pid, socketpath, +                                                                        sizeof(socketpath)); +                                if (ret) { +                                        gf_log (this->name, GF_LOG_DEBUG, +                                                "Either pid %d is not running or is not match" +                                                " with any running brick process ", pid); +                                        goto run; +                                } +                        } else {                                  glusterd_set_brick_socket_filepath (volinfo, brickinfo,                                                                      socketpath,                                                                      sizeof (socketpath)); +                        }                          gf_log (this->name, GF_LOG_DEBUG,                                  "Using %s as sockfile for brick %s of volume %s ",                                  socketpath, brickinfo->path, volinfo->volname); +                          (void) glusterd_brick_connect (volinfo, brickinfo,                                          socketpath);                  }                  return 0;          } +run:          ret = _mk_rundir_p (volinfo);          if (ret)                  goto out;  | 
