diff options
| author | Sanju Rakonde <srakonde@redhat.com> | 2018-02-21 12:46:25 +0530 | 
|---|---|---|
| committer | Atin Mukherjee <amukherj@redhat.com> | 2018-03-28 04:27:18 +0000 | 
| commit | a60fc2ddc03134fb23c5ed5c0bcb195e1649416b (patch) | |
| tree | c4f42085b4c6c6761bf4f3d23a24b8821cf292a8 | |
| parent | 95601229b8318f015a19d7eff89f73853b684a49 (diff) | |
glusterd: handling brick termination in brick-mux
Problem: There's a race between the last glusterfs_handle_terminate()
response sent to glusterd and the kill that happens immediately if the
terminated brick is the last brick.
Solution: When it is a last brick for the brick process, instead of glusterfsd
killing itself, glusterd will kill the process in case of brick multiplexing.
And also changing gf_attach utility accordingly.
Change-Id: I386c19ca592536daa71294a13d9fc89a26d7e8c0
fixes: bz#1545048
BUG: 1545048
Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
| -rw-r--r-- | glusterfsd/src/gf_attach.c | 41 | ||||
| -rw-r--r-- | glusterfsd/src/glusterfsd-mgmt.c | 33 | ||||
| -rw-r--r-- | tests/bugs/glusterd/stale-brick-proc-brick-mux.t | 32 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-pmap.c | 4 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-syncop.c | 17 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 56 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.h | 3 | 
7 files changed, 147 insertions, 39 deletions
diff --git a/glusterfsd/src/gf_attach.c b/glusterfsd/src/gf_attach.c index 3f248292ddf..0eb4868263b 100644 --- a/glusterfsd/src/gf_attach.c +++ b/glusterfsd/src/gf_attach.c @@ -11,6 +11,9 @@  #include <stdio.h>  #include <stdlib.h>  #include <unistd.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <signal.h>  //#include "config.h"  #include "glusterfs.h" @@ -23,6 +26,7 @@  int done = 0;  int rpc_status; +glfs_t *fs;  struct rpc_clnt_procedure gf_attach_actors[GLUSTERD_BRICK_MAXVALUE] = {          [GLUSTERD_BRICK_NULL] = {"NULL", NULL }, @@ -71,11 +75,43 @@ my_notify (struct rpc_clnt *rpc, void *mydata,  }  int32_t -my_callback (struct rpc_req *req, struct iovec *iov, int count, void *frame) +my_callback (struct rpc_req *req, struct iovec *iov, int count, void *v_frame)  { +        gd1_mgmt_brick_op_rsp     rsp; +        dict_t          *dict   = NULL; +        pid_t            pid    = -1; +        int              ret    = -1; +        xlator_t        *this   = NULL; + +        this = fs->ctx->master; +        memset (&rsp, 0, sizeof (rsp)); + +        ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp); + +        if (ret < 0) { +                fprintf (stderr, "xdr decoding failed\n"); +                goto out; +        } +        GF_PROTOCOL_DICT_UNSERIALIZE (this, dict, +                                      (rsp.output.output_val), +                                      (rsp.output.output_len), +                                      ret, rsp.op_errno, out); +        if (dict) { +                if (dict_get_int32 (dict, "last_brick_terminated", &pid) == 0) { +                        int status = 0; + +                        gf_log ("gf_attach", GF_LOG_INFO, "Killing %d", pid); +                        kill (pid, SIGTERM); +                        waitpid (pid, &status, 0); +                } +                dict_unref (dict); +        } +          rpc_status = req->rpc_status;          done = 1; -        return 0; +        ret = 0; +out: +        return ret;  }  /* copied from gd_syncop_submit_request */ @@ -170,7 +206,6 @@ usage (char *prog)  int  main (int argc, char *argv[])  { -        glfs_t                  *fs;          struct rpc_clnt         *rpc;          dict_t                  *options;          int                     ret; diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c index d2b39494e51..c4df275077f 100644 --- a/glusterfsd/src/glusterfsd-mgmt.c +++ b/glusterfsd/src/glusterfsd-mgmt.c @@ -159,21 +159,31 @@ out:  }  int -glusterfs_terminate_response_send (rpcsvc_request_t *req, int op_ret) +glusterfs_terminate_response_send (rpcsvc_request_t *req, int op_ret, +                                   gf_boolean_t last_brick)  {          gd1_mgmt_brick_op_rsp   rsp = {0,};          dict_t                  *dict = NULL; -        int                     ret = 0; +        int                     ret = -1;          rsp.op_ret = op_ret;          rsp.op_errno = 0;          rsp.op_errstr = "";          dict = dict_new (); -        if (dict) +        if (dict) { +                /* Setting the last_brick_terminated key in dictionary is +                 * required to for standalone gf_attach utility to work. +                 * gf_attach utility will receive this dictionary and kill +                 * the process. +                 */ +                if (last_brick) { +                        ret = dict_set_int32 (dict, "last_brick_terminated", +                                              getpid()); +                }                  ret = dict_allocate_and_serialize (dict, &rsp.output.output_val,                                                     &rsp.output.output_len); - +        }          if (ret == 0)                  ret = glusterfs_submit_reply (req, &rsp, NULL, 0, NULL, @@ -262,6 +272,7 @@ glusterfs_handle_terminate (rpcsvc_request_t *req)          xlator_t                *victim         = NULL;          xlator_list_t           **trav_p        = NULL;          gf_boolean_t            lockflag        = _gf_false; +        gf_boolean_t            last_brick      = _gf_false;          ret = xdr_to_generic (req->msg[0], &xlator_req,                                (xdrproc_t)xdr_gd1_mgmt_brick_op_req); @@ -294,17 +305,16 @@ glusterfs_handle_terminate (rpcsvc_request_t *req)                   * make sure it's down and if it's already down that's                   * good enough.                   */ -                glusterfs_terminate_response_send (req, 0); +                glusterfs_terminate_response_send (req, 0, last_brick);                  goto err;          } -        glusterfs_terminate_response_send (req, 0);          if ((trav_p == &top->children) && !(*trav_p)->next) { -                gf_log (THIS->name, GF_LOG_INFO, -                        "terminating after loss of last child %s", -                        xlator_req.name); -                rpc_clnt_mgmt_pmap_signout (glusterfsd_ctx, xlator_req.name); -                kill (getpid(), SIGTERM); +                last_brick = _gf_true; +                glusterfs_terminate_response_send (req, 0, last_brick); +                gf_log (THIS->name, GF_LOG_INFO, "This is last brick of process." +                        "glusterD will kill the process and takes care of " +                        "removal of entries from port map register");          } else {                  /*                   * This is terribly unsafe without quiescing or shutting @@ -313,6 +323,7 @@ glusterfs_handle_terminate (rpcsvc_request_t *req)                   *                   * TBD: finish implementing this "detach" code properly                   */ +                glusterfs_terminate_response_send (req, 0, last_brick);                  UNLOCK (&ctx->volfile_lock);                  lockflag = _gf_true;                  gf_log (THIS->name, GF_LOG_INFO, "detaching not-only" diff --git a/tests/bugs/glusterd/stale-brick-proc-brick-mux.t b/tests/bugs/glusterd/stale-brick-proc-brick-mux.t new file mode 100644 index 00000000000..a3efe273898 --- /dev/null +++ b/tests/bugs/glusterd/stale-brick-proc-brick-mux.t @@ -0,0 +1,32 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../cluster.rc + +function count_brick_processes { +        pgrep glusterfsd | wc -l +} + +cleanup; + +TEST launch_cluster 2 +TEST $CLI_1 peer probe $H2; +EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count + +#bug-1549996 - stale brick processes on the nodes after volume deletion + +TEST $CLI_1 volume set all cluster.brick-multiplex on +TEST $CLI_1 volume create $V0 replica 3 $H1:$B1/${V0}{1..3} $H2:$B2/${V0}{1..3} +TEST $CLI_1 volume start $V0 + +TEST $CLI_1 volume create $V1 replica 3 $H1:$B1/${V1}{1..3} $H2:$B2/${V1}{1..3} +TEST $CLI_1 volume start $V1 + +EXPECT 2 count_brick_processes + +TEST $CLI_1 volume stop $V0 +TEST $CLI_1 volume stop $V1 + +EXPECT 0 count_brick_processes + +cleanup diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.c b/xlators/mgmt/glusterd/src/glusterd-pmap.c index 98b1aaa63af..8ef285bf48d 100644 --- a/xlators/mgmt/glusterd/src/glusterd-pmap.c +++ b/xlators/mgmt/glusterd/src/glusterd-pmap.c @@ -557,6 +557,7 @@ __gluster_pmap_signout (rpcsvc_request_t *req)          glusterd_brickinfo_t *brickinfo            = NULL;          char                  pidfile[PATH_MAX]    = {0};          char                  brick_path[PATH_MAX] = {0,}; +        int                   kill_pid             = -1;          this = THIS;          GF_VALIDATE_OR_GOTO ("glusterd", this, fail); @@ -606,7 +607,8 @@ __gluster_pmap_signout (rpcsvc_request_t *req)                           * removed in the brick op phase. This situation would                           * arise when the brick is killed explicitly from the                           * backend */ -                        ret = glusterd_brick_process_remove_brick (brickinfo); +                        ret = glusterd_brick_process_remove_brick (brickinfo, +                                                                   &kill_pid);                          if (ret) {                                  gf_msg_debug (this->name, 0, "Couldn't remove "                                                "brick %s:%s from brick process", diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c index 31b08d76adc..e5d4421deb4 100644 --- a/xlators/mgmt/glusterd/src/glusterd-syncop.c +++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c @@ -958,7 +958,6 @@ gd_syncop_mgmt_brick_op (struct rpc_clnt *rpc, glusterd_pending_node_t *pnode,          gd1_mgmt_brick_op_req  *req  = NULL;          int                    ret  = 0;          xlator_t               *this = NULL; -        glusterd_brickinfo_t   *brickinfo = NULL;          this = THIS;          args.op_ret = -1; @@ -987,22 +986,6 @@ gd_syncop_mgmt_brick_op (struct rpc_clnt *rpc, glusterd_pending_node_t *pnode,                  else                          GF_FREE (args.errstr);          } -        if (op == GD_OP_STOP_VOLUME || op == GD_OP_REMOVE_BRICK) { -                if (args.op_ret == 0) { -                        brickinfo = pnode->node; -                        ret = glusterd_brick_process_remove_brick (brickinfo); -                        if (ret) { -                                gf_msg ("glusterd", GF_LOG_ERROR, 0, -                                        GD_MSG_BRICKPROC_REM_BRICK_FAILED, -                                        "Removing brick %s:%s from brick" -                                        " process failed", -                                        brickinfo->hostname, -                                        brickinfo->path); -                                args.op_ret = ret; -                                goto out; -                        } -                } -        }          if (GD_OP_STATUS_VOLUME == op) {                  ret = dict_set_int32 (args.dict, "index", pnode->index); diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index af30756c947..8e71756b927 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -69,6 +69,7 @@  #include <sys/types.h>  #include <sys/ioctl.h>  #include <sys/socket.h> +#include <sys/wait.h>  #include <rpc/pmap_clnt.h>  #include <unistd.h>  #include <fnmatch.h> @@ -2313,7 +2314,8 @@ glusterd_brickprocess_delete (glusterd_brick_proc_t *brick_proc)  }  int -glusterd_brick_process_remove_brick (glusterd_brickinfo_t *brickinfo) +glusterd_brick_process_remove_brick (glusterd_brickinfo_t *brickinfo, +                                     int *kill_pid)  {          int                      ret = -1;          xlator_t                *this = NULL; @@ -2352,6 +2354,7 @@ glusterd_brick_process_remove_brick (glusterd_brickinfo_t *brickinfo)                  /* If all bricks have been removed, delete the brick process */                  if (brick_proc->brick_count == 0) { +                        *kill_pid = 1;                          ret = glusterd_brickprocess_delete (brick_proc);                          if (ret)                                  goto out; @@ -2454,7 +2457,11 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo,          glusterd_conf_t *conf                   = NULL;          int             ret                     = -1;          char            *op_errstr              = NULL; -        char            pidfile[PATH_MAX]       = {0,}; +        char            pidfile_path[PATH_MAX]  = {0,}; +        int             kill_pid                = -1; +        FILE            *pidfile                = NULL; +        pid_t           pid                     = -1; +        int             status                  = -1;          GF_ASSERT (volinfo);          GF_ASSERT (brickinfo); @@ -2467,7 +2474,7 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo,          ret = 0; -        ret = glusterd_brick_process_remove_brick (brickinfo); +        ret = glusterd_brick_process_remove_brick (brickinfo, &kill_pid);          if (ret) {                  gf_msg_debug (this->name, 0, "Couldn't remove brick from"                                " brick process"); @@ -2510,10 +2517,47 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo,                  ret = 0;          } -        GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, conf); +        GLUSTERD_GET_BRICK_PIDFILE (pidfile_path, volinfo, brickinfo, conf); +        if (kill_pid == 1 && is_brick_mx_enabled ()) { +                pidfile = fopen (pidfile_path, "r"); +                if (!pidfile) { +                        gf_msg (this->name, GF_LOG_ERROR, errno, +                                GD_MSG_FILE_OP_FAILED, +                                "Unable to open pidfile: %s", pidfile_path); +                        ret = -1; +                        goto out; +                } + +                ret = fscanf (pidfile, "%d", &pid); +                if (ret <= 0) { +                        gf_msg (this->name, GF_LOG_ERROR, errno, +                                GD_MSG_FILE_OP_FAILED, +                                "Unable to get pid of brick process"); +                        ret = -1; +                        goto out; +                } + +                if (conf->op_version >= GD_OP_VERSION_4_1_0) { +                        while (conf->blockers) { +                                synclock_unlock (&conf->big_lock); +                                sleep (1); +                                synclock_lock (&conf->big_lock); +                        } +                } +                gf_log (this->name, GF_LOG_INFO, +                        "terminating the brick process " +                        "%d after loss of last brick %s of the volume %s", +                        pid, brickinfo->path, volinfo->volname); +                kill (pid, SIGTERM); +                waitpid (pid, &status, 0); +                pmap_registry_remove (this, brickinfo->port, brickinfo->path, +                                      GF_PMAP_PORT_BRICKSERVER, NULL, +                                      _gf_true); +                ret = 0; +        } -        gf_msg_debug (this->name,  0, "Unlinking pidfile %s", pidfile); -        (void) sys_unlink (pidfile); +        gf_msg_debug (this->name,  0, "Unlinking pidfile %s", pidfile_path); +        (void) sys_unlink (pidfile_path);          brickinfo->status = GF_BRICK_STOPPED;          brickinfo->start_triggered = _gf_false; diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index 0e9e54a0687..8118c994974 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -182,7 +182,8 @@ glusterd_brick_process_add_brick (glusterd_brickinfo_t *brickinfo,                                    glusterd_volinfo_t *volinfo);  int -glusterd_brick_process_remove_brick (glusterd_brickinfo_t *brickinfo); +glusterd_brick_process_remove_brick (glusterd_brickinfo_t *brickinfo, +                                     int *kill_pid);  int  glusterd_brick_proc_for_port (int port, glusterd_brick_proc_t **brickprocess);  | 
