diff options
| author | Mohit Agrawal <moagrawa@redhat.com> | 2017-05-25 21:43:42 +0530 | 
|---|---|---|
| committer | Jeff Darcy <jeff@pl.atyp.us> | 2017-05-31 20:43:53 +0000 | 
| commit | dba55ae364a2772904bb68a6bd0ea87289ee1470 (patch) | |
| tree | e8a7cf51bd45464cd26f9c4270787ffc50228854 /glusterfsd | |
| parent | de92c363c95d16966dbcc9d8763fd4448dd84d13 (diff) | |
glusterfs: Not able to mount running volume after enable brick mux and stopped any volume
Problem: After enabled brick mux if any volume has down and then try ot run mount
         with running volume , mount command is hung.
Solution: After enable brick mux server has shared one data structure server_conf
          for all associated subvolumes.After down any subvolume in some
          ungraceful manner (remove brick directory) posix xlator sends
          GF_EVENT_CHILD_DOWN event to parent xlatros and server notify
          updates the child_up to false in server_conf.When client is trying
          to communicate with server through mount it checks conf->child_up
          and it is FALSE so it throws message "translator are not yet ready".
          From this patch updated structure server_conf to save child_up status
          for xlator wise. Another improtant correction from this patch is
          cleanup threads from server side xlators after stop the volume.
BUG: 1453977
Change-Id: Ic54da3f01881b7c9429ce92cc569236eb1d43e0d
Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
Reviewed-on: https://review.gluster.org/17356
Smoke: Gluster Build System <jenkins@build.gluster.org>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
Reviewed-by: Raghavendra Talur <rtalur@redhat.com>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Jeff Darcy <jeff@pl.atyp.us>
Diffstat (limited to 'glusterfsd')
| -rw-r--r-- | glusterfsd/src/glusterfsd-mgmt.c | 113 | 
1 files changed, 58 insertions, 55 deletions
diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c index 8ede110121b..c17bf3bb6fc 100644 --- a/glusterfsd/src/glusterfsd-mgmt.c +++ b/glusterfsd/src/glusterfsd-mgmt.c @@ -198,10 +198,11 @@ glusterfs_handle_terminate (rpcsvc_request_t *req)  {          gd1_mgmt_brick_op_req   xlator_req      = {0,};          ssize_t                 ret; -        xlator_t                *top = NULL; -        xlator_t                *victim = NULL; -        glusterfs_ctx_t         *ctx    = NULL; -        xlator_list_t           **trav_p; +        glusterfs_ctx_t         *ctx            = NULL; +        xlator_t                *top            = NULL; +        xlator_t                *victim         = NULL; +        xlator_list_t           **trav_p        = NULL; +        gf_boolean_t            lockflag        = _gf_false;          ret = xdr_to_generic (req->msg[0], &xlator_req,                                (xdrproc_t)xdr_gd1_mgmt_brick_op_req); @@ -214,57 +215,54 @@ glusterfs_handle_terminate (rpcsvc_request_t *req)          LOCK (&ctx->volfile_lock);          {                  /* Find the xlator_list_t that points to our victim. */ -                top = glusterfsd_ctx->active->first; -                for (trav_p = &top->children; *trav_p; -                     trav_p = &(*trav_p)->next) { -                        victim = (*trav_p)->xlator; -                        if (strcmp (victim->name, xlator_req.name) == 0) { -                                break; +                if (glusterfsd_ctx->active) { +                        top = glusterfsd_ctx->active->first; +                        for (trav_p = &top->children; *trav_p; +                                                    trav_p = &(*trav_p)->next) { +                                victim = (*trav_p)->xlator; +                                if (strcmp (victim->name, xlator_req.name) == 0) { +                                        break; +                                }                          }                  } - -                if (!*trav_p) { -                        gf_log (THIS->name, GF_LOG_ERROR, -                                "can't terminate %s - not found", -                                xlator_req.name); -                        /* -                         * Used to be -ENOENT.  However, the caller asked us to -                         * make sure it's down and if it's already down that's -                         * good enough. -                         */ -                        glusterfs_terminate_response_send (req, 0); -                        goto err; -                } - +        } +        if (!*trav_p) { +                gf_log (THIS->name, GF_LOG_ERROR, +                        "can't terminate %s - not found", +                          xlator_req.name); +                /* +                 * Used to be -ENOENT.  However, the caller asked us to +                 * make sure it's down and if it's already down that's +                 * good enough. +                 */                  glusterfs_terminate_response_send (req, 0); -                if ((trav_p == &top->children) && !(*trav_p)->next) { -                        gf_log (THIS->name, GF_LOG_INFO, -                                "terminating after loss of last child %s", -                                xlator_req.name); -                        glusterfs_mgmt_pmap_signout (glusterfsd_ctx, -                                                     xlator_req.name); -                        kill (getpid(), SIGTERM); -                } else { -                        /* -                         * This is terribly unsafe without quiescing or shutting -                         * things down properly but it gets us to the point -                         * where we can test other stuff. -                         * -                         * TBD: finish implementing this "detach" code properly -                         */ -                        gf_log (THIS->name, GF_LOG_INFO, "detaching not-only" -                                " child %s", xlator_req.name); -                        top->notify (top, GF_EVENT_TRANSPORT_CLEANUP, victim); -                        glusterfs_mgmt_pmap_signout (glusterfsd_ctx, -                                                     xlator_req.name); - -                        *trav_p = (*trav_p)->next; -                        glusterfs_autoscale_threads (THIS->ctx, -1); -                } +                goto err; +        } +        glusterfs_terminate_response_send (req, 0); +        if ((trav_p == &top->children) && !(*trav_p)->next) { +                gf_log (THIS->name, GF_LOG_INFO, +                        "terminating after loss of last child %s", +                        xlator_req.name); +                glusterfs_mgmt_pmap_signout (glusterfsd_ctx, xlator_req.name); +                kill (getpid(), SIGTERM); +        } else { +                /* +                 * This is terribly unsafe without quiescing or shutting +                 * things down properly but it gets us to the point +                 * where we can test other stuff. +                 * +                 * TBD: finish implementing this "detach" code properly +                 */ +                UNLOCK (&ctx->volfile_lock); +                lockflag = _gf_true; +                gf_log (THIS->name, GF_LOG_INFO, "detaching not-only" +                         " child %s", xlator_req.name); +                top->notify (top, GF_EVENT_CLEANUP, victim);          }  err: -        UNLOCK (&ctx->volfile_lock); +        if (!lockflag) +                UNLOCK (&ctx->volfile_lock);          free (xlator_req.name);          xlator_req.name = NULL;          return 0; @@ -838,6 +836,7 @@ glusterfs_handle_attach (rpcsvc_request_t *req)          int32_t                 ret             = -1;          gd1_mgmt_brick_op_req   xlator_req      = {0,};          xlator_t                *this           = NULL; +        xlator_t                *nextchild      = NULL;          glusterfs_graph_t       *newgraph       = NULL;          glusterfs_ctx_t         *ctx            = NULL; @@ -862,15 +861,19 @@ glusterfs_handle_attach (rpcsvc_request_t *req)                          gf_log (this->name, GF_LOG_INFO,                                  "got attach for %s", xlator_req.name);                          ret = glusterfs_graph_attach (this->ctx->active, -                                                      xlator_req.name, -                                                      &newgraph); -                        if (ret == 0) { -                                ret = glusterfs_graph_parent_up (newgraph); +                                              xlator_req.name, &newgraph); +                        if (!ret && (newgraph && newgraph->first)) { +                                nextchild = newgraph->first; +                                ret = xlator_notify (nextchild, +                                                     GF_EVENT_PARENT_UP, +                                                     nextchild);                                  if (ret) { -                                        gf_msg (this->name, GF_LOG_ERROR, 0, +                                        gf_msg (this->name, GF_LOG_ERROR, +                                                0,                                                  LG_MSG_EVENT_NOTIFY_FAILED,                                                  "Parent up notification " -                                                "failed"); +                                                "failed for %s ", +                                                nextchild->name);                                          goto out;                                  }                                  glusterfs_autoscale_threads (this->ctx, 1);  | 
